Merge pull request #15968 from eileenmcnaughton/bom
[civicrm-core.git] / CRM / Utils / QueryFormatter.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
5 | |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
10 */
11
12 /**
13 * @package CRM
14 * @copyright CiviCRM LLC https://civicrm.org/licensing
15 */
16
17 /**
18 * Class CRM_Utils_QueryFormatter
19 *
20 * This class is a bad idea. It exists for the unholy reason that a single installation
21 * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same
22 * query-text. It labors* to take the user's search expression and provide similar search
23 * semantics in different contexts. It is unknown whether this labor will be fruitful
24 * or in vain.
25 */
26 class CRM_Utils_QueryFormatter {
27 /**
28 * Generate queries using SQL LIKE expressions.
29 */
30 const LANG_SQL_LIKE = 'like';
31
32 /**
33 * Generate queries using MySQL FTS expressions.
34 */
35 const LANG_SQL_FTS = 'fts';
36
37 /**
38 * Generate queries using MySQL's boolean FTS expressions.
39 */
40 const LANG_SQL_FTSBOOL = 'ftsbool';
41
42 /**
43 * Generate queries using Solr expressions.
44 */
45 const LANG_SOLR = 'solr';
46
47 /**
48 * Attempt to leave the text as-is.
49 */
50 const MODE_NONE = 'simple';
51
52 /**
53 * Attempt to treat the input text as a phrase
54 */
55 const MODE_PHRASE = 'phrase';
56
57 /**
58 * Attempt to treat the input text as a phrase with
59 * wildcards on each end.
60 */
61 const MODE_WILDPHRASE = 'wildphrase';
62
63 /**
64 * Attempt to treat individual word as if it
65 * had wildcards at the start and end.
66 */
67 const MODE_WILDWORDS = 'wildwords';
68
69 /**
70 * Attempt to treat individual word as if it
71 * had a wildcard at the end.
72 */
73 const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix';
74
75 /**
76 * Singleton object.
77 *
78 * @var \CRM_Utils_QueryFormatter|null
79 */
80 static protected $singleton;
81
82 /**
83 * @param bool $fresh
84 * @return CRM_Utils_QueryFormatter
85 */
86 public static function singleton($fresh = FALSE) {
87 if ($fresh || self::$singleton === NULL) {
88 $mode = Civi::settings()->get('fts_query_mode');
89 self::$singleton = new CRM_Utils_QueryFormatter($mode);
90 }
91 return self::$singleton;
92 }
93
94 /**
95 * @var string
96 * eg MODE_NONE
97 */
98 protected $mode;
99
100 /**
101 * @param string $mode
102 * Eg MODE_NONE.
103 */
104 public function __construct($mode) {
105 $this->mode = $mode;
106 }
107
108 /**
109 * @param mixed $mode
110 */
111 public function setMode($mode) {
112 $this->mode = $mode;
113 }
114
115 /**
116 * @return mixed
117 */
118 public function getMode() {
119 return $this->mode;
120 }
121
122 /**
123 * @param string $text
124 * @param string $language
125 * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR.
126 * @throws CRM_Core_Exception
127 * @return string
128 */
129 public function format($text, $language) {
130 $text = trim($text);
131
132 switch ($language) {
133 case self::LANG_SOLR:
134 case self::LANG_SQL_FTS:
135 $text = $this->_formatFts($text, $this->mode);
136 break;
137
138 case self::LANG_SQL_FTSBOOL:
139 $text = $this->_formatFtsBool($text, $this->mode);
140 break;
141
142 case self::LANG_SQL_LIKE:
143 $text = $this->_formatLike($text, $this->mode);
144 break;
145
146 default:
147 $text = NULL;
148 }
149
150 if ($text === NULL) {
151 throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]");
152 }
153
154 return $text;
155 }
156
157 /**
158 * Create a SQL WHERE expression for matching against a list of
159 * text columns.
160 *
161 * @param string $table
162 * Eg "civicrm_note" or "civicrm_note mynote".
163 * @param array|string $columns
164 * List of columns to search against.
165 * Eg "first_name" or "activity_details".
166 * @param string $queryText
167 * @return string
168 * SQL, eg "MATCH (col1) AGAINST (queryText)" or "col1 LIKE '%queryText%'"
169 */
170 public function formatSql($table, $columns, $queryText) {
171 if ($queryText === '*' || $queryText === '%' || empty($queryText)) {
172 return '(1)';
173 }
174
175 $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
176
177 if (strpos($table, ' ') === FALSE) {
178 $tableName = $tableAlias = $table;
179 }
180 else {
181 list ($tableName, $tableAlias) = explode(' ', $table);
182 }
183 if (is_scalar($columns)) {
184 $columns = [$columns];
185 }
186
187 $clauses = [];
188 if (CRM_Core_InnoDBIndexer::singleton()
189 ->hasDeclaredIndex($tableName, $columns)
190 ) {
191 $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_FTSBOOL);
192
193 $prefixedFieldNames = [];
194 foreach ($columns as $fieldName) {
195 $prefixedFieldNames[] = "$tableAlias.$fieldName";
196 }
197
198 $clauses[] = sprintf("MATCH (%s) AGAINST ('%s' IN BOOLEAN MODE)",
199 implode(',', $prefixedFieldNames),
200 $strtolower(CRM_Core_DAO::escapeString($formattedQuery))
201 );
202 }
203 else {
204 //CRM_Core_Session::setStatus(ts('Cannot use FTS for %1 (%2)', array(
205 // 1 => $table,
206 // 2 => implode(', ', $fullTextFields),
207 //)));
208
209 $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_LIKE);
210 $escapedText = $strtolower(CRM_Core_DAO::escapeString($formattedQuery));
211 foreach ($columns as $fieldName) {
212 $clauses[] = "$tableAlias.$fieldName LIKE '{$escapedText}'";
213 }
214 }
215 return implode(' OR ', $clauses);
216 }
217
218 /**
219 * Format Fts.
220 *
221 * @param string $text
222 * @param $mode
223 *
224 * @return mixed
225 */
226 protected function _formatFts($text, $mode) {
227 $result = NULL;
228
229 // normalize user-inputted wildcards
230 $text = str_replace('%', '*', $text);
231
232 if (empty($text)) {
233 $result = '*';
234 }
235 elseif (strpos($text, '*') !== FALSE) {
236 // if user supplies their own wildcards, then don't do any sophisticated changes
237 $result = $text;
238 }
239 else {
240 switch ($mode) {
241 case self::MODE_NONE:
242 $result = $text;
243 break;
244
245 case self::MODE_PHRASE:
246 $result = '"' . $text . '"';
247 break;
248
249 case self::MODE_WILDPHRASE:
250 $result = '"*' . $text . '*"';
251 break;
252
253 case self::MODE_WILDWORDS:
254 $result = $this->mapWords($text, '*word*');
255 break;
256
257 case self::MODE_WILDWORDS_SUFFIX:
258 $result = $this->mapWords($text, 'word*');
259 break;
260
261 default:
262 $result = NULL;
263 }
264 }
265
266 return $this->dedupeWildcards($result, '%');
267 }
268
269 /**
270 * Format FTS.
271 *
272 * @param string $text
273 * @param $mode
274 *
275 * @return mixed
276 */
277 protected function _formatFtsBool($text, $mode) {
278 $result = NULL;
279 $operators = ['+', '-', '~', '(', ')'];
280 $wildCards = ['@', '%', '*'];
281 $expression = preg_quote(implode('', array_merge($operators, $wildCards)), '/');
282
283 //Return if searched string ends with an unsupported operator.
284 //Or if the string contains an invalid joint occurrence of operators.
285 foreach ($operators as $val) {
286 if ($text == '@' || CRM_Utils_String::endsWith($text, $val) || preg_match("/[{$expression}]{2,}/", $text)) {
287 $csid = CRM_Core_DAO::getFieldValue('CRM_Core_DAO_OptionValue', 'CRM_Contact_Form_Search_Custom_FullText', 'value', 'name');
288 $url = CRM_Utils_System::url("civicrm/contact/search/custom", "csid={$csid}&reset=1");
289 $operators = implode("', '", $operators);
290 CRM_Core_Error::statusBounce("Full-Text Search does not support the use of a search with two attached operators or string ending with any of these operators ('{$operators}' or a single '@'). Please adjust your search term and try again.", $url, 'Invalid Search String');
291 }
292 }
293
294 // normalize user-inputted wildcards
295 $text = str_replace('%', '*', $text);
296
297 if (empty($text)) {
298 $result = '*';
299 }
300 elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) {
301 // if user supplies their own include/exclude operators, use text as is (with trailing wildcard)
302 $result = $this->mapWords($text, 'word*');
303 }
304 elseif (strpos($text, '*') !== FALSE) {
305 // if user supplies their own wildcards, then don't do any sophisticated changes
306 $result = $this->mapWords($text, '+word');
307 }
308 elseif (preg_match('/^(["\']).*\1$/m', $text)) {
309 // if surrounded by quotes, use term as is
310 $result = $text;
311 }
312 else {
313 switch ($mode) {
314 case self::MODE_NONE:
315 $result = $this->mapWords($text, '+word', TRUE);
316 break;
317
318 case self::MODE_PHRASE:
319 $result = '+"' . $text . '"';
320 break;
321
322 case self::MODE_WILDPHRASE:
323 $result = '+"*' . $text . '*"';
324 break;
325
326 case self::MODE_WILDWORDS:
327 $result = $this->mapWords($text, '+*word*');
328 break;
329
330 case self::MODE_WILDWORDS_SUFFIX:
331 $result = $this->mapWords($text, '+word*');
332 break;
333
334 default:
335 $result = NULL;
336 }
337 }
338
339 return $this->dedupeWildcards($result, '%');
340 }
341
342 /**
343 * Format like.
344 *
345 * @param $text
346 * @param $mode
347 *
348 * @return mixed
349 */
350 protected function _formatLike($text, $mode) {
351 $result = NULL;
352
353 if (empty($text)) {
354 $result = '%';
355 }
356 elseif (strpos($text, '%') !== FALSE) {
357 // if user supplies their own wildcards, then don't do any sophisticated changes
358 $result = $text;
359 }
360 else {
361 switch ($mode) {
362 case self::MODE_NONE:
363 case self::MODE_PHRASE:
364 case self::MODE_WILDPHRASE:
365 $result = "%" . $text . "%";
366 break;
367
368 case self::MODE_WILDWORDS:
369 case self::MODE_WILDWORDS_SUFFIX:
370 $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%';
371 break;
372
373 default:
374 $result = NULL;
375 }
376 }
377
378 return $this->dedupeWildcards($result, '%');
379 }
380
381 /**
382 * @param string $text
383 * User-supplied query string.
384 * @param string $template
385 * A prototypical description of each word, eg "word%" or "word*" or "*word*".
386 * @param bool $quotes
387 * True if each searched keyword need to be surrounded with quotes.
388 * @return string
389 */
390 protected function mapWords($text, $template, $quotes = FALSE) {
391 $result = [];
392 foreach ($this->parseWords($text, $quotes) as $word) {
393 $result[] = str_replace('word', $word, $template);
394 }
395 return implode(' ', $result);
396 }
397
398 /**
399 * @param string $text
400 * @param bool $quotes
401 * @return array
402 */
403 protected function parseWords($text, $quotes) {
404 //NYSS 9692 special handling for emails
405 if (preg_match('/^([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})$/', $text)) {
406 $parts = explode('@', $text);
407 $parts[1] = stristr($parts[1], '.', TRUE);
408 $text = implode(' ', $parts);
409 }
410
411 //NYSS also replace other occurrences of @
412 $replacedText = preg_replace('/[ \r\n\t\@]+/', ' ', trim($text));
413 //filter empty values if any
414 $keywords = array_filter(explode(' ', $replacedText));
415
416 //Ensure each searched keywords are wrapped in double quotes.
417 if ($quotes) {
418 foreach ($keywords as &$val) {
419 if (!is_numeric($val)) {
420 $val = "\"{$val}\"";
421 }
422 }
423 }
424 return $keywords;
425 }
426
427 /**
428 * @param $text
429 * @param $wildcard
430 * @return mixed
431 */
432 protected function dedupeWildcards($text, $wildcard) {
433 if ($text === NULL) {
434 return NULL;
435 }
436
437 // don't use preg_replace because $wildcard might be special char
438 while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) {
439 $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text);
440 }
441 return $text;
442 }
443
444 /**
445 * Get modes.
446 *
447 * @return array
448 */
449 public static function getModes() {
450 return [
451 self::MODE_NONE,
452 self::MODE_PHRASE,
453 self::MODE_WILDPHRASE,
454 self::MODE_WILDWORDS,
455 self::MODE_WILDWORDS_SUFFIX,
456 ];
457 }
458
459 /**
460 * Get languages.
461 *
462 * @return array
463 */
464 public static function getLanguages() {
465 return [
466 self::LANG_SOLR,
467 self::LANG_SQL_FTS,
468 self::LANG_SQL_FTSBOOL,
469 self::LANG_SQL_LIKE,
470 ];
471 }
472
473 /**
474 * @param $text
475 *
476 * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");'
477 */
478 public static function dumpExampleTable($text) {
479 $width = strlen($text) + 8;
480 $buf = '';
481
482 $buf .= sprintf("%-{$width}s", 'mode');
483 foreach (self::getLanguages() as $lang) {
484 $buf .= sprintf("%-{$width}s", $lang);
485 }
486 $buf .= "\n";
487
488 foreach (self::getModes() as $mode) {
489 $formatter = new CRM_Utils_QueryFormatter($mode);
490 $buf .= sprintf("%-{$width}s", $mode);
491 foreach (self::getLanguages() as $lang) {
492 $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang));
493 }
494 $buf .= "\n";
495 }
496
497 echo $buf;
498 }
499
500 }