Commit | Line | Data |
---|---|---|
ea74069c | 1 | <?php |
ea74069c TO |
2 | /* |
3 | +--------------------------------------------------------------------+ | |
fee14197 | 4 | | CiviCRM version 5 | |
ea74069c | 5 | +--------------------------------------------------------------------+ |
6b83d5bd | 6 | | Copyright CiviCRM LLC (c) 2004-2019 | |
ea74069c TO |
7 | +--------------------------------------------------------------------+ |
8 | | This file is a part of CiviCRM. | | |
9 | | | | |
10 | | CiviCRM is free software; you can copy, modify, and distribute it | | |
11 | | under the terms of the GNU Affero General Public License | | |
12 | | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. | | |
13 | | | | |
14 | | CiviCRM is distributed in the hope that it will be useful, but | | |
15 | | WITHOUT ANY WARRANTY; without even the implied warranty of | | |
16 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | | |
17 | | See the GNU Affero General Public License for more details. | | |
18 | | | | |
19 | | You should have received a copy of the GNU Affero General Public | | |
20 | | License and the CiviCRM Licensing Exception along | | |
21 | | with this program; if not, contact CiviCRM LLC | | |
22 | | at info[AT]civicrm[DOT]org. If you have questions about the | | |
23 | | GNU Affero General Public License or the licensing of CiviCRM, | | |
24 | | see the CiviCRM license FAQ at http://civicrm.org/licensing | | |
25 | +--------------------------------------------------------------------+ | |
d25dd0ee | 26 | */ |
ea74069c | 27 | |
50bfb460 | 28 | /** |
5a84c50e | 29 | * @package CRM |
6b83d5bd | 30 | * @copyright CiviCRM LLC (c) 2004-2019 |
5a84c50e | 31 | */ |
50bfb460 | 32 | |
ea74069c TO |
33 | /** |
34 | * Class CRM_Utils_QueryFormatter | |
35 | * | |
36 | * This class is a bad idea. It exists for the unholy reason that a single installation | |
37 | * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same | |
38 | * query-text. It labors* to take the user's search expression and provide similar search | |
39 | * semantics in different contexts. It is unknown whether this labor will be fruitful | |
40 | * or in vain. | |
41 | */ | |
42 | class CRM_Utils_QueryFormatter { | |
7ee0cd30 TO |
43 | /** |
44 | * Generate queries using SQL LIKE expressions. | |
45 | */ | |
ea74069c | 46 | const LANG_SQL_LIKE = 'like'; |
7ee0cd30 TO |
47 | |
48 | /** | |
49 | * Generate queries using MySQL FTS expressions. | |
50 | */ | |
ea74069c | 51 | const LANG_SQL_FTS = 'fts'; |
7ee0cd30 TO |
52 | |
53 | /** | |
54 | * Generate queries using MySQL's boolean FTS expressions. | |
55 | */ | |
3196b7a5 | 56 | const LANG_SQL_FTSBOOL = 'ftsbool'; |
7ee0cd30 TO |
57 | |
58 | /** | |
59 | * Generate queries using Solr expressions. | |
60 | */ | |
ea74069c TO |
61 | const LANG_SOLR = 'solr'; |
62 | ||
63 | /** | |
64 | * Attempt to leave the text as-is. | |
65 | */ | |
66 | const MODE_NONE = 'simple'; | |
67 | ||
68 | /** | |
69 | * Attempt to treat the input text as a phrase | |
70 | */ | |
71 | const MODE_PHRASE = 'phrase'; | |
72 | ||
73 | /** | |
74 | * Attempt to treat the input text as a phrase with | |
75 | * wildcards on each end. | |
76 | */ | |
77 | const MODE_WILDPHRASE = 'wildphrase'; | |
78 | ||
79 | /** | |
80 | * Attempt to treat individual word as if it | |
81 | * had wildcards at the start and end. | |
82 | */ | |
83 | const MODE_WILDWORDS = 'wildwords'; | |
84 | ||
85 | /** | |
86 | * Attempt to treat individual word as if it | |
87 | * had a wildcard at the end. | |
88 | */ | |
89 | const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix'; | |
90 | ||
6714d8d2 SL |
91 | /** |
92 | * @var \CRM_Utils_QueryFormatter|NULL | |
93 | */ | |
ea74069c TO |
94 | static protected $singleton; |
95 | ||
96 | /** | |
97 | * @param bool $fresh | |
98 | * @return CRM_Utils_QueryFormatter | |
99 | */ | |
100 | public static function singleton($fresh = FALSE) { | |
101 | if ($fresh || self::$singleton === NULL) { | |
aaffa79f | 102 | $mode = Civi::settings()->get('fts_query_mode'); |
ea74069c TO |
103 | self::$singleton = new CRM_Utils_QueryFormatter($mode); |
104 | } | |
105 | return self::$singleton; | |
106 | } | |
107 | ||
108 | /** | |
aaffa79f | 109 | * @var string |
50bfb460 | 110 | * eg MODE_NONE |
ea74069c TO |
111 | */ |
112 | protected $mode; | |
113 | ||
114 | /** | |
77855840 TO |
115 | * @param string $mode |
116 | * Eg MODE_NONE. | |
ea74069c | 117 | */ |
00be9182 | 118 | public function __construct($mode) { |
ea74069c TO |
119 | $this->mode = $mode; |
120 | } | |
121 | ||
122 | /** | |
123 | * @param mixed $mode | |
124 | */ | |
125 | public function setMode($mode) { | |
126 | $this->mode = $mode; | |
127 | } | |
128 | ||
129 | /** | |
130 | * @return mixed | |
131 | */ | |
132 | public function getMode() { | |
133 | return $this->mode; | |
134 | } | |
135 | ||
136 | /** | |
137 | * @param string $text | |
77855840 TO |
138 | * @param string $language |
139 | * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR. | |
ea74069c TO |
140 | * @throws CRM_Core_Exception |
141 | * @return string | |
142 | */ | |
143 | public function format($text, $language) { | |
144 | $text = trim($text); | |
145 | ||
146 | switch ($language) { | |
147 | case self::LANG_SOLR: | |
148 | case self::LANG_SQL_FTS: | |
149 | $text = $this->_formatFts($text, $this->mode); | |
150 | break; | |
e7292422 | 151 | |
3196b7a5 TO |
152 | case self::LANG_SQL_FTSBOOL: |
153 | $text = $this->_formatFtsBool($text, $this->mode); | |
154 | break; | |
e7292422 | 155 | |
ea74069c TO |
156 | case self::LANG_SQL_LIKE: |
157 | $text = $this->_formatLike($text, $this->mode); | |
158 | break; | |
e7292422 | 159 | |
ea74069c TO |
160 | default: |
161 | $text = NULL; | |
162 | } | |
163 | ||
164 | if ($text === NULL) { | |
165 | throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]"); | |
166 | } | |
167 | ||
168 | return $text; | |
169 | } | |
170 | ||
2395ef81 TO |
171 | /** |
172 | * Create a SQL WHERE expression for matching against a list of | |
173 | * text columns. | |
174 | * | |
175 | * @param string $table | |
176 | * Eg "civicrm_note" or "civicrm_note mynote". | |
177 | * @param array|string $columns | |
178 | * List of columns to search against. | |
179 | * Eg "first_name" or "activity_details". | |
180 | * @param string $queryText | |
181 | * @return string | |
182 | * SQL, eg "MATCH (col1) AGAINST (queryText)" or "col1 LIKE '%queryText%'" | |
183 | */ | |
184 | public function formatSql($table, $columns, $queryText) { | |
185 | if ($queryText === '*' || $queryText === '%' || empty($queryText)) { | |
186 | return '(1)'; | |
187 | } | |
188 | ||
189 | $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower'; | |
190 | ||
191 | if (strpos($table, ' ') === FALSE) { | |
192 | $tableName = $tableAlias = $table; | |
193 | } | |
194 | else { | |
195 | list ($tableName, $tableAlias) = explode(' ', $table); | |
196 | } | |
197 | if (is_scalar($columns)) { | |
be2fb01f | 198 | $columns = [$columns]; |
2395ef81 TO |
199 | } |
200 | ||
be2fb01f | 201 | $clauses = []; |
2395ef81 TO |
202 | if (CRM_Core_InnoDBIndexer::singleton() |
203 | ->hasDeclaredIndex($tableName, $columns) | |
204 | ) { | |
205 | $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_FTSBOOL); | |
206 | ||
be2fb01f | 207 | $prefixedFieldNames = []; |
2395ef81 TO |
208 | foreach ($columns as $fieldName) { |
209 | $prefixedFieldNames[] = "$tableAlias.$fieldName"; | |
210 | } | |
211 | ||
212 | $clauses[] = sprintf("MATCH (%s) AGAINST ('%s' IN BOOLEAN MODE)", | |
213 | implode(',', $prefixedFieldNames), | |
214 | $strtolower(CRM_Core_DAO::escapeString($formattedQuery)) | |
215 | ); | |
216 | } | |
217 | else { | |
218 | //CRM_Core_Session::setStatus(ts('Cannot use FTS for %1 (%2)', array( | |
219 | // 1 => $table, | |
220 | // 2 => implode(', ', $fullTextFields), | |
221 | //))); | |
222 | ||
223 | $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_LIKE); | |
224 | $escapedText = $strtolower(CRM_Core_DAO::escapeString($formattedQuery)); | |
225 | foreach ($columns as $fieldName) { | |
226 | $clauses[] = "$tableAlias.$fieldName LIKE '{$escapedText}'"; | |
227 | } | |
228 | } | |
229 | return implode(' OR ', $clauses); | |
230 | } | |
231 | ||
2e2605fe EM |
232 | /** |
233 | * Format Fts. | |
234 | * | |
235 | * @param string $text | |
236 | * @param $mode | |
237 | * | |
238 | * @return mixed | |
239 | */ | |
ea74069c TO |
240 | protected function _formatFts($text, $mode) { |
241 | $result = NULL; | |
242 | ||
243 | // normalize user-inputted wildcards | |
244 | $text = str_replace('%', '*', $text); | |
245 | ||
246 | if (empty($text)) { | |
3196b7a5 | 247 | $result = '*'; |
ea74069c TO |
248 | } |
249 | elseif (strpos($text, '*') !== FALSE) { | |
250 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
3196b7a5 | 251 | $result = $text; |
ea74069c TO |
252 | } |
253 | else { | |
254 | switch ($mode) { | |
255 | case self::MODE_NONE: | |
256 | $result = $text; | |
257 | break; | |
258 | ||
259 | case self::MODE_PHRASE: | |
260 | $result = '"' . $text . '"'; | |
261 | break; | |
262 | ||
263 | case self::MODE_WILDPHRASE: | |
264 | $result = '"*' . $text . '*"'; | |
265 | break; | |
266 | ||
267 | case self::MODE_WILDWORDS: | |
268 | $result = $this->mapWords($text, '*word*'); | |
269 | break; | |
270 | ||
271 | case self::MODE_WILDWORDS_SUFFIX: | |
272 | $result = $this->mapWords($text, 'word*'); | |
273 | break; | |
274 | ||
275 | default: | |
276 | $result = NULL; | |
277 | } | |
278 | } | |
279 | ||
280 | return $this->dedupeWildcards($result, '%'); | |
281 | } | |
282 | ||
2e2605fe EM |
283 | /** |
284 | * Format FTS. | |
285 | * | |
286 | * @param string $text | |
287 | * @param $mode | |
288 | * | |
289 | * @return mixed | |
290 | */ | |
3196b7a5 TO |
291 | protected function _formatFtsBool($text, $mode) { |
292 | $result = NULL; | |
be2fb01f CW |
293 | $operators = ['+', '-', '~', '(', ')']; |
294 | $wildCards = ['@', '%', '*']; | |
e0b51656 | 295 | $expression = preg_quote(implode('', array_merge($operators, $wildCards)), '/'); |
408fe090 | 296 | |
297 | //Return if searched string ends with an unsupported operator. | |
e0b51656 | 298 | //Or if the string contains an invalid joint occurrence of operators. |
408fe090 | 299 | foreach ($operators as $val) { |
e0b51656 | 300 | if ($text == '@' || CRM_Utils_String::endsWith($text, $val) || preg_match("/[{$expression}]{2,}/", $text)) { |
408fe090 | 301 | $csid = CRM_Core_DAO::getFieldValue('CRM_Core_DAO_OptionValue', 'CRM_Contact_Form_Search_Custom_FullText', 'value', 'name'); |
302 | $url = CRM_Utils_System::url("civicrm/contact/search/custom", "csid={$csid}&reset=1"); | |
303 | $operators = implode("', '", $operators); | |
e0b51656 | 304 | CRM_Core_Error::statusBounce("Full-Text Search does not support the use of a search with two attached operators or string ending with any of these operators ('{$operators}' or a single '@'). Please adjust your search term and try again.", $url, 'Invalid Search String'); |
408fe090 | 305 | } |
306 | } | |
3196b7a5 TO |
307 | |
308 | // normalize user-inputted wildcards | |
309 | $text = str_replace('%', '*', $text); | |
310 | ||
311 | if (empty($text)) { | |
312 | $result = '*'; | |
313 | } | |
7bfcf18f TO |
314 | elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) { |
315 | // if user supplies their own include/exclude operators, use text as is (with trailing wildcard) | |
316 | $result = $this->mapWords($text, 'word*'); | |
317 | } | |
3196b7a5 TO |
318 | elseif (strpos($text, '*') !== FALSE) { |
319 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
320 | $result = $this->mapWords($text, '+word'); | |
321 | } | |
7bfcf18f TO |
322 | elseif (preg_match('/^(["\']).*\1$/m', $text)) { |
323 | // if surrounded by quotes, use term as is | |
324 | $result = $text; | |
325 | } | |
3196b7a5 TO |
326 | else { |
327 | switch ($mode) { | |
328 | case self::MODE_NONE: | |
408fe090 | 329 | $result = $this->mapWords($text, '+word', TRUE); |
3196b7a5 TO |
330 | break; |
331 | ||
332 | case self::MODE_PHRASE: | |
333 | $result = '+"' . $text . '"'; | |
334 | break; | |
335 | ||
336 | case self::MODE_WILDPHRASE: | |
337 | $result = '+"*' . $text . '*"'; | |
338 | break; | |
339 | ||
340 | case self::MODE_WILDWORDS: | |
341 | $result = $this->mapWords($text, '+*word*'); | |
342 | break; | |
343 | ||
344 | case self::MODE_WILDWORDS_SUFFIX: | |
345 | $result = $this->mapWords($text, '+word*'); | |
346 | break; | |
347 | ||
348 | default: | |
349 | $result = NULL; | |
350 | } | |
351 | } | |
352 | ||
353 | return $this->dedupeWildcards($result, '%'); | |
354 | } | |
355 | ||
2e2605fe EM |
356 | /** |
357 | * Format like. | |
358 | * | |
359 | * @param $text | |
360 | * @param $mode | |
361 | * | |
362 | * @return mixed | |
363 | */ | |
ea74069c TO |
364 | protected function _formatLike($text, $mode) { |
365 | $result = NULL; | |
366 | ||
367 | if (empty($text)) { | |
368 | $result = '%'; | |
369 | } | |
370 | elseif (strpos($text, '%') !== FALSE) { | |
371 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
372 | $result = $text; | |
373 | } | |
374 | else { | |
375 | switch ($mode) { | |
376 | case self::MODE_NONE: | |
377 | case self::MODE_PHRASE: | |
378 | case self::MODE_WILDPHRASE: | |
379 | $result = "%" . $text . "%"; | |
380 | break; | |
381 | ||
382 | case self::MODE_WILDWORDS: | |
383 | case self::MODE_WILDWORDS_SUFFIX: | |
384 | $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%'; | |
385 | break; | |
386 | ||
387 | default: | |
388 | $result = NULL; | |
389 | } | |
390 | } | |
391 | ||
392 | return $this->dedupeWildcards($result, '%'); | |
393 | } | |
394 | ||
395 | /** | |
77855840 TO |
396 | * @param string $text |
397 | * User-supplied query string. | |
398 | * @param string $template | |
399 | * A prototypical description of each word, eg "word%" or "word*" or "*word*". | |
408fe090 | 400 | * @param bool $quotes |
401 | * True if each searched keyword need to be surrounded with quotes. | |
ea74069c TO |
402 | * @return string |
403 | */ | |
408fe090 | 404 | protected function mapWords($text, $template, $quotes = FALSE) { |
be2fb01f | 405 | $result = []; |
408fe090 | 406 | foreach ($this->parseWords($text, $quotes) as $word) { |
ea74069c TO |
407 | $result[] = str_replace('word', $word, $template); |
408 | } | |
409 | return implode(' ', $result); | |
410 | } | |
411 | ||
412 | /** | |
6714d8d2 SL |
413 | * @param string $text |
414 | * @param bool $quotes | |
ea74069c TO |
415 | * @return array |
416 | */ | |
408fe090 | 417 | protected function parseWords($text, $quotes) { |
895e8529 TO |
418 | //NYSS 9692 special handling for emails |
419 | if (preg_match('/^([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})$/', $text)) { | |
420 | $parts = explode('@', $text); | |
421 | $parts[1] = stristr($parts[1], '.', TRUE); | |
422 | $text = implode(' ', $parts); | |
423 | } | |
895e8529 TO |
424 | |
425 | //NYSS also replace other occurrences of @ | |
408fe090 | 426 | $replacedText = preg_replace('/[ \r\n\t\@]+/', ' ', trim($text)); |
427 | //filter empty values if any | |
428 | $keywords = array_filter(explode(' ', $replacedText)); | |
429 | ||
430 | //Ensure each searched keywords are wrapped in double quotes. | |
431 | if ($quotes) { | |
432 | foreach ($keywords as &$val) { | |
433 | if (!is_numeric($val)) { | |
434 | $val = "\"{$val}\""; | |
435 | } | |
436 | } | |
437 | } | |
438 | return $keywords; | |
ea74069c TO |
439 | } |
440 | ||
441 | /** | |
442 | * @param $text | |
443 | * @param $wildcard | |
444 | * @return mixed | |
445 | */ | |
446 | protected function dedupeWildcards($text, $wildcard) { | |
447 | if ($text === NULL) { | |
448 | return NULL; | |
449 | } | |
450 | ||
451 | // don't use preg_replace because $wildcard might be special char | |
452 | while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) { | |
453 | $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text); | |
454 | } | |
455 | return $text; | |
456 | } | |
457 | ||
2e2605fe EM |
458 | /** |
459 | * Get modes. | |
460 | * | |
461 | * @return array | |
462 | */ | |
ea74069c | 463 | public static function getModes() { |
be2fb01f | 464 | return [ |
ea74069c TO |
465 | self::MODE_NONE, |
466 | self::MODE_PHRASE, | |
467 | self::MODE_WILDPHRASE, | |
468 | self::MODE_WILDWORDS, | |
469 | self::MODE_WILDWORDS_SUFFIX, | |
be2fb01f | 470 | ]; |
ea74069c TO |
471 | } |
472 | ||
2e2605fe EM |
473 | /** |
474 | * Get languages. | |
475 | * | |
476 | * @return array | |
477 | */ | |
ea74069c | 478 | public static function getLanguages() { |
be2fb01f | 479 | return [ |
ea74069c TO |
480 | self::LANG_SOLR, |
481 | self::LANG_SQL_FTS, | |
3196b7a5 | 482 | self::LANG_SQL_FTSBOOL, |
ea74069c | 483 | self::LANG_SQL_LIKE, |
be2fb01f | 484 | ]; |
ea74069c TO |
485 | } |
486 | ||
487 | /** | |
488 | * @param $text | |
489 | * | |
490 | * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");' | |
491 | */ | |
492 | public static function dumpExampleTable($text) { | |
493 | $width = strlen($text) + 8; | |
494 | $buf = ''; | |
495 | ||
496 | $buf .= sprintf("%-{$width}s", 'mode'); | |
497 | foreach (self::getLanguages() as $lang) { | |
498 | $buf .= sprintf("%-{$width}s", $lang); | |
499 | } | |
500 | $buf .= "\n"; | |
501 | ||
502 | foreach (self::getModes() as $mode) { | |
503 | $formatter = new CRM_Utils_QueryFormatter($mode); | |
504 | $buf .= sprintf("%-{$width}s", $mode); | |
505 | foreach (self::getLanguages() as $lang) { | |
506 | $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang)); | |
507 | } | |
508 | $buf .= "\n"; | |
509 | } | |
510 | ||
511 | echo $buf; | |
512 | } | |
96025800 | 513 | |
ef10e0b5 | 514 | } |