Commit | Line | Data |
---|---|---|
ea74069c | 1 | <?php |
ea74069c TO |
2 | /* |
3 | +--------------------------------------------------------------------+ | |
bc77d7c0 | 4 | | Copyright CiviCRM LLC. All rights reserved. | |
ea74069c | 5 | | | |
bc77d7c0 TO |
6 | | This work is published under the GNU AGPLv3 license with some | |
7 | | permitted exceptions and without any warranty. For full license | | |
8 | | and copyright information, see https://civicrm.org/licensing | | |
ea74069c | 9 | +--------------------------------------------------------------------+ |
d25dd0ee | 10 | */ |
ea74069c | 11 | |
50bfb460 | 12 | /** |
5a84c50e | 13 | * @package CRM |
ca5cec67 | 14 | * @copyright CiviCRM LLC https://civicrm.org/licensing |
5a84c50e | 15 | */ |
50bfb460 | 16 | |
ea74069c TO |
17 | /** |
18 | * Class CRM_Utils_QueryFormatter | |
19 | * | |
20 | * This class is a bad idea. It exists for the unholy reason that a single installation | |
21 | * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same | |
22 | * query-text. It labors* to take the user's search expression and provide similar search | |
23 | * semantics in different contexts. It is unknown whether this labor will be fruitful | |
24 | * or in vain. | |
25 | */ | |
26 | class CRM_Utils_QueryFormatter { | |
7ee0cd30 TO |
27 | /** |
28 | * Generate queries using SQL LIKE expressions. | |
29 | */ | |
ea74069c | 30 | const LANG_SQL_LIKE = 'like'; |
7ee0cd30 TO |
31 | |
32 | /** | |
33 | * Generate queries using MySQL FTS expressions. | |
34 | */ | |
ea74069c | 35 | const LANG_SQL_FTS = 'fts'; |
7ee0cd30 TO |
36 | |
37 | /** | |
38 | * Generate queries using MySQL's boolean FTS expressions. | |
39 | */ | |
3196b7a5 | 40 | const LANG_SQL_FTSBOOL = 'ftsbool'; |
7ee0cd30 TO |
41 | |
42 | /** | |
43 | * Generate queries using Solr expressions. | |
44 | */ | |
ea74069c TO |
45 | const LANG_SOLR = 'solr'; |
46 | ||
47 | /** | |
48 | * Attempt to leave the text as-is. | |
49 | */ | |
50 | const MODE_NONE = 'simple'; | |
51 | ||
52 | /** | |
53 | * Attempt to treat the input text as a phrase | |
54 | */ | |
55 | const MODE_PHRASE = 'phrase'; | |
56 | ||
57 | /** | |
58 | * Attempt to treat the input text as a phrase with | |
59 | * wildcards on each end. | |
60 | */ | |
61 | const MODE_WILDPHRASE = 'wildphrase'; | |
62 | ||
63 | /** | |
64 | * Attempt to treat individual word as if it | |
65 | * had wildcards at the start and end. | |
66 | */ | |
67 | const MODE_WILDWORDS = 'wildwords'; | |
68 | ||
69 | /** | |
70 | * Attempt to treat individual word as if it | |
71 | * had a wildcard at the end. | |
72 | */ | |
73 | const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix'; | |
74 | ||
6714d8d2 | 75 | /** |
041ecc95 | 76 | * Singleton object. |
77 | * | |
78 | * @var \CRM_Utils_QueryFormatter|null | |
6714d8d2 | 79 | */ |
ea74069c TO |
80 | static protected $singleton; |
81 | ||
82 | /** | |
83 | * @param bool $fresh | |
84 | * @return CRM_Utils_QueryFormatter | |
85 | */ | |
86 | public static function singleton($fresh = FALSE) { | |
87 | if ($fresh || self::$singleton === NULL) { | |
aaffa79f | 88 | $mode = Civi::settings()->get('fts_query_mode'); |
ea74069c TO |
89 | self::$singleton = new CRM_Utils_QueryFormatter($mode); |
90 | } | |
91 | return self::$singleton; | |
92 | } | |
93 | ||
94 | /** | |
aaffa79f | 95 | * @var string |
50bfb460 | 96 | * eg MODE_NONE |
ea74069c TO |
97 | */ |
98 | protected $mode; | |
99 | ||
100 | /** | |
77855840 TO |
101 | * @param string $mode |
102 | * Eg MODE_NONE. | |
ea74069c | 103 | */ |
00be9182 | 104 | public function __construct($mode) { |
ea74069c TO |
105 | $this->mode = $mode; |
106 | } | |
107 | ||
108 | /** | |
109 | * @param mixed $mode | |
110 | */ | |
111 | public function setMode($mode) { | |
112 | $this->mode = $mode; | |
113 | } | |
114 | ||
115 | /** | |
116 | * @return mixed | |
117 | */ | |
118 | public function getMode() { | |
119 | return $this->mode; | |
120 | } | |
121 | ||
122 | /** | |
123 | * @param string $text | |
77855840 TO |
124 | * @param string $language |
125 | * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR. | |
ea74069c TO |
126 | * @throws CRM_Core_Exception |
127 | * @return string | |
128 | */ | |
129 | public function format($text, $language) { | |
130 | $text = trim($text); | |
131 | ||
132 | switch ($language) { | |
133 | case self::LANG_SOLR: | |
134 | case self::LANG_SQL_FTS: | |
135 | $text = $this->_formatFts($text, $this->mode); | |
136 | break; | |
e7292422 | 137 | |
3196b7a5 TO |
138 | case self::LANG_SQL_FTSBOOL: |
139 | $text = $this->_formatFtsBool($text, $this->mode); | |
140 | break; | |
e7292422 | 141 | |
ea74069c TO |
142 | case self::LANG_SQL_LIKE: |
143 | $text = $this->_formatLike($text, $this->mode); | |
144 | break; | |
e7292422 | 145 | |
ea74069c TO |
146 | default: |
147 | $text = NULL; | |
148 | } | |
149 | ||
150 | if ($text === NULL) { | |
151 | throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]"); | |
152 | } | |
153 | ||
154 | return $text; | |
155 | } | |
156 | ||
2395ef81 TO |
157 | /** |
158 | * Create a SQL WHERE expression for matching against a list of | |
159 | * text columns. | |
160 | * | |
161 | * @param string $table | |
162 | * Eg "civicrm_note" or "civicrm_note mynote". | |
163 | * @param array|string $columns | |
164 | * List of columns to search against. | |
165 | * Eg "first_name" or "activity_details". | |
166 | * @param string $queryText | |
167 | * @return string | |
168 | * SQL, eg "MATCH (col1) AGAINST (queryText)" or "col1 LIKE '%queryText%'" | |
169 | */ | |
170 | public function formatSql($table, $columns, $queryText) { | |
171 | if ($queryText === '*' || $queryText === '%' || empty($queryText)) { | |
172 | return '(1)'; | |
173 | } | |
174 | ||
175 | $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower'; | |
176 | ||
177 | if (strpos($table, ' ') === FALSE) { | |
178 | $tableName = $tableAlias = $table; | |
179 | } | |
180 | else { | |
181 | list ($tableName, $tableAlias) = explode(' ', $table); | |
182 | } | |
183 | if (is_scalar($columns)) { | |
be2fb01f | 184 | $columns = [$columns]; |
2395ef81 TO |
185 | } |
186 | ||
be2fb01f | 187 | $clauses = []; |
2395ef81 TO |
188 | if (CRM_Core_InnoDBIndexer::singleton() |
189 | ->hasDeclaredIndex($tableName, $columns) | |
190 | ) { | |
191 | $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_FTSBOOL); | |
192 | ||
be2fb01f | 193 | $prefixedFieldNames = []; |
2395ef81 TO |
194 | foreach ($columns as $fieldName) { |
195 | $prefixedFieldNames[] = "$tableAlias.$fieldName"; | |
196 | } | |
197 | ||
198 | $clauses[] = sprintf("MATCH (%s) AGAINST ('%s' IN BOOLEAN MODE)", | |
199 | implode(',', $prefixedFieldNames), | |
200 | $strtolower(CRM_Core_DAO::escapeString($formattedQuery)) | |
201 | ); | |
202 | } | |
203 | else { | |
204 | //CRM_Core_Session::setStatus(ts('Cannot use FTS for %1 (%2)', array( | |
205 | // 1 => $table, | |
206 | // 2 => implode(', ', $fullTextFields), | |
207 | //))); | |
208 | ||
209 | $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_LIKE); | |
210 | $escapedText = $strtolower(CRM_Core_DAO::escapeString($formattedQuery)); | |
211 | foreach ($columns as $fieldName) { | |
212 | $clauses[] = "$tableAlias.$fieldName LIKE '{$escapedText}'"; | |
213 | } | |
214 | } | |
215 | return implode(' OR ', $clauses); | |
216 | } | |
217 | ||
2e2605fe EM |
218 | /** |
219 | * Format Fts. | |
220 | * | |
221 | * @param string $text | |
222 | * @param $mode | |
223 | * | |
224 | * @return mixed | |
225 | */ | |
ea74069c TO |
226 | protected function _formatFts($text, $mode) { |
227 | $result = NULL; | |
228 | ||
229 | // normalize user-inputted wildcards | |
230 | $text = str_replace('%', '*', $text); | |
231 | ||
232 | if (empty($text)) { | |
3196b7a5 | 233 | $result = '*'; |
ea74069c TO |
234 | } |
235 | elseif (strpos($text, '*') !== FALSE) { | |
236 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
3196b7a5 | 237 | $result = $text; |
ea74069c TO |
238 | } |
239 | else { | |
240 | switch ($mode) { | |
241 | case self::MODE_NONE: | |
242 | $result = $text; | |
243 | break; | |
244 | ||
245 | case self::MODE_PHRASE: | |
246 | $result = '"' . $text . '"'; | |
247 | break; | |
248 | ||
249 | case self::MODE_WILDPHRASE: | |
250 | $result = '"*' . $text . '*"'; | |
251 | break; | |
252 | ||
253 | case self::MODE_WILDWORDS: | |
254 | $result = $this->mapWords($text, '*word*'); | |
255 | break; | |
256 | ||
257 | case self::MODE_WILDWORDS_SUFFIX: | |
258 | $result = $this->mapWords($text, 'word*'); | |
259 | break; | |
260 | ||
261 | default: | |
262 | $result = NULL; | |
263 | } | |
264 | } | |
265 | ||
266 | return $this->dedupeWildcards($result, '%'); | |
267 | } | |
268 | ||
2e2605fe EM |
269 | /** |
270 | * Format FTS. | |
271 | * | |
272 | * @param string $text | |
273 | * @param $mode | |
274 | * | |
275 | * @return mixed | |
276 | */ | |
3196b7a5 TO |
277 | protected function _formatFtsBool($text, $mode) { |
278 | $result = NULL; | |
be2fb01f CW |
279 | $operators = ['+', '-', '~', '(', ')']; |
280 | $wildCards = ['@', '%', '*']; | |
e0b51656 | 281 | $expression = preg_quote(implode('', array_merge($operators, $wildCards)), '/'); |
408fe090 | 282 | |
283 | //Return if searched string ends with an unsupported operator. | |
e0b51656 | 284 | //Or if the string contains an invalid joint occurrence of operators. |
408fe090 | 285 | foreach ($operators as $val) { |
e0b51656 | 286 | if ($text == '@' || CRM_Utils_String::endsWith($text, $val) || preg_match("/[{$expression}]{2,}/", $text)) { |
408fe090 | 287 | $csid = CRM_Core_DAO::getFieldValue('CRM_Core_DAO_OptionValue', 'CRM_Contact_Form_Search_Custom_FullText', 'value', 'name'); |
288 | $url = CRM_Utils_System::url("civicrm/contact/search/custom", "csid={$csid}&reset=1"); | |
289 | $operators = implode("', '", $operators); | |
e0b51656 | 290 | CRM_Core_Error::statusBounce("Full-Text Search does not support the use of a search with two attached operators or string ending with any of these operators ('{$operators}' or a single '@'). Please adjust your search term and try again.", $url, 'Invalid Search String'); |
408fe090 | 291 | } |
292 | } | |
3196b7a5 TO |
293 | |
294 | // normalize user-inputted wildcards | |
295 | $text = str_replace('%', '*', $text); | |
296 | ||
297 | if (empty($text)) { | |
298 | $result = '*'; | |
299 | } | |
7bfcf18f TO |
300 | elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) { |
301 | // if user supplies their own include/exclude operators, use text as is (with trailing wildcard) | |
302 | $result = $this->mapWords($text, 'word*'); | |
303 | } | |
3196b7a5 TO |
304 | elseif (strpos($text, '*') !== FALSE) { |
305 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
306 | $result = $this->mapWords($text, '+word'); | |
307 | } | |
7bfcf18f TO |
308 | elseif (preg_match('/^(["\']).*\1$/m', $text)) { |
309 | // if surrounded by quotes, use term as is | |
310 | $result = $text; | |
311 | } | |
3196b7a5 TO |
312 | else { |
313 | switch ($mode) { | |
314 | case self::MODE_NONE: | |
408fe090 | 315 | $result = $this->mapWords($text, '+word', TRUE); |
3196b7a5 TO |
316 | break; |
317 | ||
318 | case self::MODE_PHRASE: | |
319 | $result = '+"' . $text . '"'; | |
320 | break; | |
321 | ||
322 | case self::MODE_WILDPHRASE: | |
323 | $result = '+"*' . $text . '*"'; | |
324 | break; | |
325 | ||
326 | case self::MODE_WILDWORDS: | |
327 | $result = $this->mapWords($text, '+*word*'); | |
328 | break; | |
329 | ||
330 | case self::MODE_WILDWORDS_SUFFIX: | |
331 | $result = $this->mapWords($text, '+word*'); | |
332 | break; | |
333 | ||
334 | default: | |
335 | $result = NULL; | |
336 | } | |
337 | } | |
338 | ||
339 | return $this->dedupeWildcards($result, '%'); | |
340 | } | |
341 | ||
2e2605fe EM |
342 | /** |
343 | * Format like. | |
344 | * | |
345 | * @param $text | |
346 | * @param $mode | |
347 | * | |
348 | * @return mixed | |
349 | */ | |
ea74069c TO |
350 | protected function _formatLike($text, $mode) { |
351 | $result = NULL; | |
352 | ||
353 | if (empty($text)) { | |
354 | $result = '%'; | |
355 | } | |
356 | elseif (strpos($text, '%') !== FALSE) { | |
357 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
358 | $result = $text; | |
359 | } | |
360 | else { | |
361 | switch ($mode) { | |
362 | case self::MODE_NONE: | |
363 | case self::MODE_PHRASE: | |
364 | case self::MODE_WILDPHRASE: | |
365 | $result = "%" . $text . "%"; | |
366 | break; | |
367 | ||
368 | case self::MODE_WILDWORDS: | |
369 | case self::MODE_WILDWORDS_SUFFIX: | |
370 | $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%'; | |
371 | break; | |
372 | ||
373 | default: | |
374 | $result = NULL; | |
375 | } | |
376 | } | |
377 | ||
378 | return $this->dedupeWildcards($result, '%'); | |
379 | } | |
380 | ||
381 | /** | |
77855840 TO |
382 | * @param string $text |
383 | * User-supplied query string. | |
384 | * @param string $template | |
385 | * A prototypical description of each word, eg "word%" or "word*" or "*word*". | |
408fe090 | 386 | * @param bool $quotes |
387 | * True if each searched keyword need to be surrounded with quotes. | |
ea74069c TO |
388 | * @return string |
389 | */ | |
408fe090 | 390 | protected function mapWords($text, $template, $quotes = FALSE) { |
be2fb01f | 391 | $result = []; |
408fe090 | 392 | foreach ($this->parseWords($text, $quotes) as $word) { |
ea74069c TO |
393 | $result[] = str_replace('word', $word, $template); |
394 | } | |
395 | return implode(' ', $result); | |
396 | } | |
397 | ||
398 | /** | |
6714d8d2 SL |
399 | * @param string $text |
400 | * @param bool $quotes | |
ea74069c TO |
401 | * @return array |
402 | */ | |
408fe090 | 403 | protected function parseWords($text, $quotes) { |
895e8529 TO |
404 | //NYSS 9692 special handling for emails |
405 | if (preg_match('/^([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})$/', $text)) { | |
406 | $parts = explode('@', $text); | |
407 | $parts[1] = stristr($parts[1], '.', TRUE); | |
408 | $text = implode(' ', $parts); | |
409 | } | |
895e8529 TO |
410 | |
411 | //NYSS also replace other occurrences of @ | |
408fe090 | 412 | $replacedText = preg_replace('/[ \r\n\t\@]+/', ' ', trim($text)); |
413 | //filter empty values if any | |
414 | $keywords = array_filter(explode(' ', $replacedText)); | |
415 | ||
416 | //Ensure each searched keywords are wrapped in double quotes. | |
417 | if ($quotes) { | |
418 | foreach ($keywords as &$val) { | |
419 | if (!is_numeric($val)) { | |
420 | $val = "\"{$val}\""; | |
421 | } | |
422 | } | |
423 | } | |
424 | return $keywords; | |
ea74069c TO |
425 | } |
426 | ||
427 | /** | |
428 | * @param $text | |
429 | * @param $wildcard | |
430 | * @return mixed | |
431 | */ | |
432 | protected function dedupeWildcards($text, $wildcard) { | |
433 | if ($text === NULL) { | |
434 | return NULL; | |
435 | } | |
436 | ||
437 | // don't use preg_replace because $wildcard might be special char | |
438 | while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) { | |
439 | $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text); | |
440 | } | |
441 | return $text; | |
442 | } | |
443 | ||
2e2605fe EM |
444 | /** |
445 | * Get modes. | |
446 | * | |
447 | * @return array | |
448 | */ | |
ea74069c | 449 | public static function getModes() { |
be2fb01f | 450 | return [ |
ea74069c TO |
451 | self::MODE_NONE, |
452 | self::MODE_PHRASE, | |
453 | self::MODE_WILDPHRASE, | |
454 | self::MODE_WILDWORDS, | |
455 | self::MODE_WILDWORDS_SUFFIX, | |
be2fb01f | 456 | ]; |
ea74069c TO |
457 | } |
458 | ||
2e2605fe EM |
459 | /** |
460 | * Get languages. | |
461 | * | |
462 | * @return array | |
463 | */ | |
ea74069c | 464 | public static function getLanguages() { |
be2fb01f | 465 | return [ |
ea74069c TO |
466 | self::LANG_SOLR, |
467 | self::LANG_SQL_FTS, | |
3196b7a5 | 468 | self::LANG_SQL_FTSBOOL, |
ea74069c | 469 | self::LANG_SQL_LIKE, |
be2fb01f | 470 | ]; |
ea74069c TO |
471 | } |
472 | ||
473 | /** | |
474 | * @param $text | |
475 | * | |
476 | * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");' | |
477 | */ | |
478 | public static function dumpExampleTable($text) { | |
479 | $width = strlen($text) + 8; | |
480 | $buf = ''; | |
481 | ||
482 | $buf .= sprintf("%-{$width}s", 'mode'); | |
483 | foreach (self::getLanguages() as $lang) { | |
484 | $buf .= sprintf("%-{$width}s", $lang); | |
485 | } | |
486 | $buf .= "\n"; | |
487 | ||
488 | foreach (self::getModes() as $mode) { | |
489 | $formatter = new CRM_Utils_QueryFormatter($mode); | |
490 | $buf .= sprintf("%-{$width}s", $mode); | |
491 | foreach (self::getLanguages() as $lang) { | |
492 | $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang)); | |
493 | } | |
494 | $buf .= "\n"; | |
495 | } | |
496 | ||
497 | echo $buf; | |
498 | } | |
96025800 | 499 | |
ef10e0b5 | 500 | } |