Commit | Line | Data |
---|---|---|
ea74069c | 1 | <?php |
ea74069c TO |
2 | /* |
3 | +--------------------------------------------------------------------+ | |
fee14197 | 4 | | CiviCRM version 5 | |
ea74069c | 5 | +--------------------------------------------------------------------+ |
6b83d5bd | 6 | | Copyright CiviCRM LLC (c) 2004-2019 | |
ea74069c TO |
7 | +--------------------------------------------------------------------+ |
8 | | This file is a part of CiviCRM. | | |
9 | | | | |
10 | | CiviCRM is free software; you can copy, modify, and distribute it | | |
11 | | under the terms of the GNU Affero General Public License | | |
12 | | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. | | |
13 | | | | |
14 | | CiviCRM is distributed in the hope that it will be useful, but | | |
15 | | WITHOUT ANY WARRANTY; without even the implied warranty of | | |
16 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | | |
17 | | See the GNU Affero General Public License for more details. | | |
18 | | | | |
19 | | You should have received a copy of the GNU Affero General Public | | |
20 | | License and the CiviCRM Licensing Exception along | | |
21 | | with this program; if not, contact CiviCRM LLC | | |
22 | | at info[AT]civicrm[DOT]org. If you have questions about the | | |
23 | | GNU Affero General Public License or the licensing of CiviCRM, | | |
24 | | see the CiviCRM license FAQ at http://civicrm.org/licensing | | |
25 | +--------------------------------------------------------------------+ | |
d25dd0ee | 26 | */ |
ea74069c | 27 | |
50bfb460 | 28 | /** |
5a84c50e | 29 | * @package CRM |
6b83d5bd | 30 | * @copyright CiviCRM LLC (c) 2004-2019 |
5a84c50e | 31 | */ |
50bfb460 | 32 | |
ea74069c TO |
33 | /** |
34 | * Class CRM_Utils_QueryFormatter | |
35 | * | |
36 | * This class is a bad idea. It exists for the unholy reason that a single installation | |
37 | * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same | |
38 | * query-text. It labors* to take the user's search expression and provide similar search | |
39 | * semantics in different contexts. It is unknown whether this labor will be fruitful | |
40 | * or in vain. | |
41 | */ | |
42 | class CRM_Utils_QueryFormatter { | |
7ee0cd30 TO |
43 | /** |
44 | * Generate queries using SQL LIKE expressions. | |
45 | */ | |
ea74069c | 46 | const LANG_SQL_LIKE = 'like'; |
7ee0cd30 TO |
47 | |
48 | /** | |
49 | * Generate queries using MySQL FTS expressions. | |
50 | */ | |
ea74069c | 51 | const LANG_SQL_FTS = 'fts'; |
7ee0cd30 TO |
52 | |
53 | /** | |
54 | * Generate queries using MySQL's boolean FTS expressions. | |
55 | */ | |
3196b7a5 | 56 | const LANG_SQL_FTSBOOL = 'ftsbool'; |
7ee0cd30 TO |
57 | |
58 | /** | |
59 | * Generate queries using Solr expressions. | |
60 | */ | |
ea74069c TO |
61 | const LANG_SOLR = 'solr'; |
62 | ||
63 | /** | |
64 | * Attempt to leave the text as-is. | |
65 | */ | |
66 | const MODE_NONE = 'simple'; | |
67 | ||
68 | /** | |
69 | * Attempt to treat the input text as a phrase | |
70 | */ | |
71 | const MODE_PHRASE = 'phrase'; | |
72 | ||
73 | /** | |
74 | * Attempt to treat the input text as a phrase with | |
75 | * wildcards on each end. | |
76 | */ | |
77 | const MODE_WILDPHRASE = 'wildphrase'; | |
78 | ||
79 | /** | |
80 | * Attempt to treat individual word as if it | |
81 | * had wildcards at the start and end. | |
82 | */ | |
83 | const MODE_WILDWORDS = 'wildwords'; | |
84 | ||
85 | /** | |
86 | * Attempt to treat individual word as if it | |
87 | * had a wildcard at the end. | |
88 | */ | |
89 | const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix'; | |
90 | ||
6714d8d2 | 91 | /** |
041ecc95 | 92 | * Singleton object. |
93 | * | |
94 | * @var \CRM_Utils_QueryFormatter|null | |
6714d8d2 | 95 | */ |
ea74069c TO |
96 | static protected $singleton; |
97 | ||
98 | /** | |
99 | * @param bool $fresh | |
100 | * @return CRM_Utils_QueryFormatter | |
101 | */ | |
102 | public static function singleton($fresh = FALSE) { | |
103 | if ($fresh || self::$singleton === NULL) { | |
aaffa79f | 104 | $mode = Civi::settings()->get('fts_query_mode'); |
ea74069c TO |
105 | self::$singleton = new CRM_Utils_QueryFormatter($mode); |
106 | } | |
107 | return self::$singleton; | |
108 | } | |
109 | ||
110 | /** | |
aaffa79f | 111 | * @var string |
50bfb460 | 112 | * eg MODE_NONE |
ea74069c TO |
113 | */ |
114 | protected $mode; | |
115 | ||
116 | /** | |
77855840 TO |
117 | * @param string $mode |
118 | * Eg MODE_NONE. | |
ea74069c | 119 | */ |
00be9182 | 120 | public function __construct($mode) { |
ea74069c TO |
121 | $this->mode = $mode; |
122 | } | |
123 | ||
124 | /** | |
125 | * @param mixed $mode | |
126 | */ | |
127 | public function setMode($mode) { | |
128 | $this->mode = $mode; | |
129 | } | |
130 | ||
131 | /** | |
132 | * @return mixed | |
133 | */ | |
134 | public function getMode() { | |
135 | return $this->mode; | |
136 | } | |
137 | ||
138 | /** | |
139 | * @param string $text | |
77855840 TO |
140 | * @param string $language |
141 | * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR. | |
ea74069c TO |
142 | * @throws CRM_Core_Exception |
143 | * @return string | |
144 | */ | |
145 | public function format($text, $language) { | |
146 | $text = trim($text); | |
147 | ||
148 | switch ($language) { | |
149 | case self::LANG_SOLR: | |
150 | case self::LANG_SQL_FTS: | |
151 | $text = $this->_formatFts($text, $this->mode); | |
152 | break; | |
e7292422 | 153 | |
3196b7a5 TO |
154 | case self::LANG_SQL_FTSBOOL: |
155 | $text = $this->_formatFtsBool($text, $this->mode); | |
156 | break; | |
e7292422 | 157 | |
ea74069c TO |
158 | case self::LANG_SQL_LIKE: |
159 | $text = $this->_formatLike($text, $this->mode); | |
160 | break; | |
e7292422 | 161 | |
ea74069c TO |
162 | default: |
163 | $text = NULL; | |
164 | } | |
165 | ||
166 | if ($text === NULL) { | |
167 | throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]"); | |
168 | } | |
169 | ||
170 | return $text; | |
171 | } | |
172 | ||
2395ef81 TO |
173 | /** |
174 | * Create a SQL WHERE expression for matching against a list of | |
175 | * text columns. | |
176 | * | |
177 | * @param string $table | |
178 | * Eg "civicrm_note" or "civicrm_note mynote". | |
179 | * @param array|string $columns | |
180 | * List of columns to search against. | |
181 | * Eg "first_name" or "activity_details". | |
182 | * @param string $queryText | |
183 | * @return string | |
184 | * SQL, eg "MATCH (col1) AGAINST (queryText)" or "col1 LIKE '%queryText%'" | |
185 | */ | |
186 | public function formatSql($table, $columns, $queryText) { | |
187 | if ($queryText === '*' || $queryText === '%' || empty($queryText)) { | |
188 | return '(1)'; | |
189 | } | |
190 | ||
191 | $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower'; | |
192 | ||
193 | if (strpos($table, ' ') === FALSE) { | |
194 | $tableName = $tableAlias = $table; | |
195 | } | |
196 | else { | |
197 | list ($tableName, $tableAlias) = explode(' ', $table); | |
198 | } | |
199 | if (is_scalar($columns)) { | |
be2fb01f | 200 | $columns = [$columns]; |
2395ef81 TO |
201 | } |
202 | ||
be2fb01f | 203 | $clauses = []; |
2395ef81 TO |
204 | if (CRM_Core_InnoDBIndexer::singleton() |
205 | ->hasDeclaredIndex($tableName, $columns) | |
206 | ) { | |
207 | $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_FTSBOOL); | |
208 | ||
be2fb01f | 209 | $prefixedFieldNames = []; |
2395ef81 TO |
210 | foreach ($columns as $fieldName) { |
211 | $prefixedFieldNames[] = "$tableAlias.$fieldName"; | |
212 | } | |
213 | ||
214 | $clauses[] = sprintf("MATCH (%s) AGAINST ('%s' IN BOOLEAN MODE)", | |
215 | implode(',', $prefixedFieldNames), | |
216 | $strtolower(CRM_Core_DAO::escapeString($formattedQuery)) | |
217 | ); | |
218 | } | |
219 | else { | |
220 | //CRM_Core_Session::setStatus(ts('Cannot use FTS for %1 (%2)', array( | |
221 | // 1 => $table, | |
222 | // 2 => implode(', ', $fullTextFields), | |
223 | //))); | |
224 | ||
225 | $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_LIKE); | |
226 | $escapedText = $strtolower(CRM_Core_DAO::escapeString($formattedQuery)); | |
227 | foreach ($columns as $fieldName) { | |
228 | $clauses[] = "$tableAlias.$fieldName LIKE '{$escapedText}'"; | |
229 | } | |
230 | } | |
231 | return implode(' OR ', $clauses); | |
232 | } | |
233 | ||
2e2605fe EM |
234 | /** |
235 | * Format Fts. | |
236 | * | |
237 | * @param string $text | |
238 | * @param $mode | |
239 | * | |
240 | * @return mixed | |
241 | */ | |
ea74069c TO |
242 | protected function _formatFts($text, $mode) { |
243 | $result = NULL; | |
244 | ||
245 | // normalize user-inputted wildcards | |
246 | $text = str_replace('%', '*', $text); | |
247 | ||
248 | if (empty($text)) { | |
3196b7a5 | 249 | $result = '*'; |
ea74069c TO |
250 | } |
251 | elseif (strpos($text, '*') !== FALSE) { | |
252 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
3196b7a5 | 253 | $result = $text; |
ea74069c TO |
254 | } |
255 | else { | |
256 | switch ($mode) { | |
257 | case self::MODE_NONE: | |
258 | $result = $text; | |
259 | break; | |
260 | ||
261 | case self::MODE_PHRASE: | |
262 | $result = '"' . $text . '"'; | |
263 | break; | |
264 | ||
265 | case self::MODE_WILDPHRASE: | |
266 | $result = '"*' . $text . '*"'; | |
267 | break; | |
268 | ||
269 | case self::MODE_WILDWORDS: | |
270 | $result = $this->mapWords($text, '*word*'); | |
271 | break; | |
272 | ||
273 | case self::MODE_WILDWORDS_SUFFIX: | |
274 | $result = $this->mapWords($text, 'word*'); | |
275 | break; | |
276 | ||
277 | default: | |
278 | $result = NULL; | |
279 | } | |
280 | } | |
281 | ||
282 | return $this->dedupeWildcards($result, '%'); | |
283 | } | |
284 | ||
2e2605fe EM |
285 | /** |
286 | * Format FTS. | |
287 | * | |
288 | * @param string $text | |
289 | * @param $mode | |
290 | * | |
291 | * @return mixed | |
292 | */ | |
3196b7a5 TO |
293 | protected function _formatFtsBool($text, $mode) { |
294 | $result = NULL; | |
be2fb01f CW |
295 | $operators = ['+', '-', '~', '(', ')']; |
296 | $wildCards = ['@', '%', '*']; | |
e0b51656 | 297 | $expression = preg_quote(implode('', array_merge($operators, $wildCards)), '/'); |
408fe090 | 298 | |
299 | //Return if searched string ends with an unsupported operator. | |
e0b51656 | 300 | //Or if the string contains an invalid joint occurrence of operators. |
408fe090 | 301 | foreach ($operators as $val) { |
e0b51656 | 302 | if ($text == '@' || CRM_Utils_String::endsWith($text, $val) || preg_match("/[{$expression}]{2,}/", $text)) { |
408fe090 | 303 | $csid = CRM_Core_DAO::getFieldValue('CRM_Core_DAO_OptionValue', 'CRM_Contact_Form_Search_Custom_FullText', 'value', 'name'); |
304 | $url = CRM_Utils_System::url("civicrm/contact/search/custom", "csid={$csid}&reset=1"); | |
305 | $operators = implode("', '", $operators); | |
e0b51656 | 306 | CRM_Core_Error::statusBounce("Full-Text Search does not support the use of a search with two attached operators or string ending with any of these operators ('{$operators}' or a single '@'). Please adjust your search term and try again.", $url, 'Invalid Search String'); |
408fe090 | 307 | } |
308 | } | |
3196b7a5 TO |
309 | |
310 | // normalize user-inputted wildcards | |
311 | $text = str_replace('%', '*', $text); | |
312 | ||
313 | if (empty($text)) { | |
314 | $result = '*'; | |
315 | } | |
7bfcf18f TO |
316 | elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) { |
317 | // if user supplies their own include/exclude operators, use text as is (with trailing wildcard) | |
318 | $result = $this->mapWords($text, 'word*'); | |
319 | } | |
3196b7a5 TO |
320 | elseif (strpos($text, '*') !== FALSE) { |
321 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
322 | $result = $this->mapWords($text, '+word'); | |
323 | } | |
7bfcf18f TO |
324 | elseif (preg_match('/^(["\']).*\1$/m', $text)) { |
325 | // if surrounded by quotes, use term as is | |
326 | $result = $text; | |
327 | } | |
3196b7a5 TO |
328 | else { |
329 | switch ($mode) { | |
330 | case self::MODE_NONE: | |
408fe090 | 331 | $result = $this->mapWords($text, '+word', TRUE); |
3196b7a5 TO |
332 | break; |
333 | ||
334 | case self::MODE_PHRASE: | |
335 | $result = '+"' . $text . '"'; | |
336 | break; | |
337 | ||
338 | case self::MODE_WILDPHRASE: | |
339 | $result = '+"*' . $text . '*"'; | |
340 | break; | |
341 | ||
342 | case self::MODE_WILDWORDS: | |
343 | $result = $this->mapWords($text, '+*word*'); | |
344 | break; | |
345 | ||
346 | case self::MODE_WILDWORDS_SUFFIX: | |
347 | $result = $this->mapWords($text, '+word*'); | |
348 | break; | |
349 | ||
350 | default: | |
351 | $result = NULL; | |
352 | } | |
353 | } | |
354 | ||
355 | return $this->dedupeWildcards($result, '%'); | |
356 | } | |
357 | ||
2e2605fe EM |
358 | /** |
359 | * Format like. | |
360 | * | |
361 | * @param $text | |
362 | * @param $mode | |
363 | * | |
364 | * @return mixed | |
365 | */ | |
ea74069c TO |
366 | protected function _formatLike($text, $mode) { |
367 | $result = NULL; | |
368 | ||
369 | if (empty($text)) { | |
370 | $result = '%'; | |
371 | } | |
372 | elseif (strpos($text, '%') !== FALSE) { | |
373 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
374 | $result = $text; | |
375 | } | |
376 | else { | |
377 | switch ($mode) { | |
378 | case self::MODE_NONE: | |
379 | case self::MODE_PHRASE: | |
380 | case self::MODE_WILDPHRASE: | |
381 | $result = "%" . $text . "%"; | |
382 | break; | |
383 | ||
384 | case self::MODE_WILDWORDS: | |
385 | case self::MODE_WILDWORDS_SUFFIX: | |
386 | $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%'; | |
387 | break; | |
388 | ||
389 | default: | |
390 | $result = NULL; | |
391 | } | |
392 | } | |
393 | ||
394 | return $this->dedupeWildcards($result, '%'); | |
395 | } | |
396 | ||
397 | /** | |
77855840 TO |
398 | * @param string $text |
399 | * User-supplied query string. | |
400 | * @param string $template | |
401 | * A prototypical description of each word, eg "word%" or "word*" or "*word*". | |
408fe090 | 402 | * @param bool $quotes |
403 | * True if each searched keyword need to be surrounded with quotes. | |
ea74069c TO |
404 | * @return string |
405 | */ | |
408fe090 | 406 | protected function mapWords($text, $template, $quotes = FALSE) { |
be2fb01f | 407 | $result = []; |
408fe090 | 408 | foreach ($this->parseWords($text, $quotes) as $word) { |
ea74069c TO |
409 | $result[] = str_replace('word', $word, $template); |
410 | } | |
411 | return implode(' ', $result); | |
412 | } | |
413 | ||
414 | /** | |
6714d8d2 SL |
415 | * @param string $text |
416 | * @param bool $quotes | |
ea74069c TO |
417 | * @return array |
418 | */ | |
408fe090 | 419 | protected function parseWords($text, $quotes) { |
895e8529 TO |
420 | //NYSS 9692 special handling for emails |
421 | if (preg_match('/^([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})$/', $text)) { | |
422 | $parts = explode('@', $text); | |
423 | $parts[1] = stristr($parts[1], '.', TRUE); | |
424 | $text = implode(' ', $parts); | |
425 | } | |
895e8529 TO |
426 | |
427 | //NYSS also replace other occurrences of @ | |
408fe090 | 428 | $replacedText = preg_replace('/[ \r\n\t\@]+/', ' ', trim($text)); |
429 | //filter empty values if any | |
430 | $keywords = array_filter(explode(' ', $replacedText)); | |
431 | ||
432 | //Ensure each searched keywords are wrapped in double quotes. | |
433 | if ($quotes) { | |
434 | foreach ($keywords as &$val) { | |
435 | if (!is_numeric($val)) { | |
436 | $val = "\"{$val}\""; | |
437 | } | |
438 | } | |
439 | } | |
440 | return $keywords; | |
ea74069c TO |
441 | } |
442 | ||
443 | /** | |
444 | * @param $text | |
445 | * @param $wildcard | |
446 | * @return mixed | |
447 | */ | |
448 | protected function dedupeWildcards($text, $wildcard) { | |
449 | if ($text === NULL) { | |
450 | return NULL; | |
451 | } | |
452 | ||
453 | // don't use preg_replace because $wildcard might be special char | |
454 | while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) { | |
455 | $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text); | |
456 | } | |
457 | return $text; | |
458 | } | |
459 | ||
2e2605fe EM |
460 | /** |
461 | * Get modes. | |
462 | * | |
463 | * @return array | |
464 | */ | |
ea74069c | 465 | public static function getModes() { |
be2fb01f | 466 | return [ |
ea74069c TO |
467 | self::MODE_NONE, |
468 | self::MODE_PHRASE, | |
469 | self::MODE_WILDPHRASE, | |
470 | self::MODE_WILDWORDS, | |
471 | self::MODE_WILDWORDS_SUFFIX, | |
be2fb01f | 472 | ]; |
ea74069c TO |
473 | } |
474 | ||
2e2605fe EM |
475 | /** |
476 | * Get languages. | |
477 | * | |
478 | * @return array | |
479 | */ | |
ea74069c | 480 | public static function getLanguages() { |
be2fb01f | 481 | return [ |
ea74069c TO |
482 | self::LANG_SOLR, |
483 | self::LANG_SQL_FTS, | |
3196b7a5 | 484 | self::LANG_SQL_FTSBOOL, |
ea74069c | 485 | self::LANG_SQL_LIKE, |
be2fb01f | 486 | ]; |
ea74069c TO |
487 | } |
488 | ||
489 | /** | |
490 | * @param $text | |
491 | * | |
492 | * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");' | |
493 | */ | |
494 | public static function dumpExampleTable($text) { | |
495 | $width = strlen($text) + 8; | |
496 | $buf = ''; | |
497 | ||
498 | $buf .= sprintf("%-{$width}s", 'mode'); | |
499 | foreach (self::getLanguages() as $lang) { | |
500 | $buf .= sprintf("%-{$width}s", $lang); | |
501 | } | |
502 | $buf .= "\n"; | |
503 | ||
504 | foreach (self::getModes() as $mode) { | |
505 | $formatter = new CRM_Utils_QueryFormatter($mode); | |
506 | $buf .= sprintf("%-{$width}s", $mode); | |
507 | foreach (self::getLanguages() as $lang) { | |
508 | $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang)); | |
509 | } | |
510 | $buf .= "\n"; | |
511 | } | |
512 | ||
513 | echo $buf; | |
514 | } | |
96025800 | 515 | |
ef10e0b5 | 516 | } |