Merge pull request #14062 from civicrm/5.13
[civicrm-core.git] / CRM / Utils / QueryFormatter.php
CommitLineData
ea74069c 1<?php
ea74069c
TO
2/*
3 +--------------------------------------------------------------------+
fee14197 4 | CiviCRM version 5 |
ea74069c 5 +--------------------------------------------------------------------+
6b83d5bd 6 | Copyright CiviCRM LLC (c) 2004-2019 |
ea74069c
TO
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
d25dd0ee 26 */
ea74069c 27
50bfb460 28 /**
5a84c50e 29 * @package CRM
6b83d5bd 30 * @copyright CiviCRM LLC (c) 2004-2019
5a84c50e 31 */
50bfb460 32
ea74069c
TO
33/**
34 * Class CRM_Utils_QueryFormatter
35 *
36 * This class is a bad idea. It exists for the unholy reason that a single installation
37 * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same
38 * query-text. It labors* to take the user's search expression and provide similar search
39 * semantics in different contexts. It is unknown whether this labor will be fruitful
40 * or in vain.
41 */
42class CRM_Utils_QueryFormatter {
7ee0cd30
TO
43 /**
44 * Generate queries using SQL LIKE expressions.
45 */
ea74069c 46 const LANG_SQL_LIKE = 'like';
7ee0cd30
TO
47
48 /**
49 * Generate queries using MySQL FTS expressions.
50 */
ea74069c 51 const LANG_SQL_FTS = 'fts';
7ee0cd30
TO
52
53 /**
54 * Generate queries using MySQL's boolean FTS expressions.
55 */
3196b7a5 56 const LANG_SQL_FTSBOOL = 'ftsbool';
7ee0cd30
TO
57
58 /**
59 * Generate queries using Solr expressions.
60 */
ea74069c
TO
61 const LANG_SOLR = 'solr';
62
63 /**
64 * Attempt to leave the text as-is.
65 */
66 const MODE_NONE = 'simple';
67
68 /**
69 * Attempt to treat the input text as a phrase
70 */
71 const MODE_PHRASE = 'phrase';
72
73 /**
74 * Attempt to treat the input text as a phrase with
75 * wildcards on each end.
76 */
77 const MODE_WILDPHRASE = 'wildphrase';
78
79 /**
80 * Attempt to treat individual word as if it
81 * had wildcards at the start and end.
82 */
83 const MODE_WILDWORDS = 'wildwords';
84
85 /**
86 * Attempt to treat individual word as if it
87 * had a wildcard at the end.
88 */
89 const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix';
90
6714d8d2
SL
91 /**
92 * @var \CRM_Utils_QueryFormatter|NULL
93 */
ea74069c
TO
94 static protected $singleton;
95
96 /**
97 * @param bool $fresh
98 * @return CRM_Utils_QueryFormatter
99 */
100 public static function singleton($fresh = FALSE) {
101 if ($fresh || self::$singleton === NULL) {
aaffa79f 102 $mode = Civi::settings()->get('fts_query_mode');
ea74069c
TO
103 self::$singleton = new CRM_Utils_QueryFormatter($mode);
104 }
105 return self::$singleton;
106 }
107
108 /**
aaffa79f 109 * @var string
50bfb460 110 * eg MODE_NONE
ea74069c
TO
111 */
112 protected $mode;
113
114 /**
77855840
TO
115 * @param string $mode
116 * Eg MODE_NONE.
ea74069c 117 */
00be9182 118 public function __construct($mode) {
ea74069c
TO
119 $this->mode = $mode;
120 }
121
122 /**
123 * @param mixed $mode
124 */
125 public function setMode($mode) {
126 $this->mode = $mode;
127 }
128
129 /**
130 * @return mixed
131 */
132 public function getMode() {
133 return $this->mode;
134 }
135
136 /**
137 * @param string $text
77855840
TO
138 * @param string $language
139 * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR.
ea74069c
TO
140 * @throws CRM_Core_Exception
141 * @return string
142 */
143 public function format($text, $language) {
144 $text = trim($text);
145
146 switch ($language) {
147 case self::LANG_SOLR:
148 case self::LANG_SQL_FTS:
149 $text = $this->_formatFts($text, $this->mode);
150 break;
e7292422 151
3196b7a5
TO
152 case self::LANG_SQL_FTSBOOL:
153 $text = $this->_formatFtsBool($text, $this->mode);
154 break;
e7292422 155
ea74069c
TO
156 case self::LANG_SQL_LIKE:
157 $text = $this->_formatLike($text, $this->mode);
158 break;
e7292422 159
ea74069c
TO
160 default:
161 $text = NULL;
162 }
163
164 if ($text === NULL) {
165 throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]");
166 }
167
168 return $text;
169 }
170
2395ef81
TO
171 /**
172 * Create a SQL WHERE expression for matching against a list of
173 * text columns.
174 *
175 * @param string $table
176 * Eg "civicrm_note" or "civicrm_note mynote".
177 * @param array|string $columns
178 * List of columns to search against.
179 * Eg "first_name" or "activity_details".
180 * @param string $queryText
181 * @return string
182 * SQL, eg "MATCH (col1) AGAINST (queryText)" or "col1 LIKE '%queryText%'"
183 */
184 public function formatSql($table, $columns, $queryText) {
185 if ($queryText === '*' || $queryText === '%' || empty($queryText)) {
186 return '(1)';
187 }
188
189 $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
190
191 if (strpos($table, ' ') === FALSE) {
192 $tableName = $tableAlias = $table;
193 }
194 else {
195 list ($tableName, $tableAlias) = explode(' ', $table);
196 }
197 if (is_scalar($columns)) {
be2fb01f 198 $columns = [$columns];
2395ef81
TO
199 }
200
be2fb01f 201 $clauses = [];
2395ef81
TO
202 if (CRM_Core_InnoDBIndexer::singleton()
203 ->hasDeclaredIndex($tableName, $columns)
204 ) {
205 $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_FTSBOOL);
206
be2fb01f 207 $prefixedFieldNames = [];
2395ef81
TO
208 foreach ($columns as $fieldName) {
209 $prefixedFieldNames[] = "$tableAlias.$fieldName";
210 }
211
212 $clauses[] = sprintf("MATCH (%s) AGAINST ('%s' IN BOOLEAN MODE)",
213 implode(',', $prefixedFieldNames),
214 $strtolower(CRM_Core_DAO::escapeString($formattedQuery))
215 );
216 }
217 else {
218 //CRM_Core_Session::setStatus(ts('Cannot use FTS for %1 (%2)', array(
219 // 1 => $table,
220 // 2 => implode(', ', $fullTextFields),
221 //)));
222
223 $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_LIKE);
224 $escapedText = $strtolower(CRM_Core_DAO::escapeString($formattedQuery));
225 foreach ($columns as $fieldName) {
226 $clauses[] = "$tableAlias.$fieldName LIKE '{$escapedText}'";
227 }
228 }
229 return implode(' OR ', $clauses);
230 }
231
2e2605fe
EM
232 /**
233 * Format Fts.
234 *
235 * @param string $text
236 * @param $mode
237 *
238 * @return mixed
239 */
ea74069c
TO
240 protected function _formatFts($text, $mode) {
241 $result = NULL;
242
243 // normalize user-inputted wildcards
244 $text = str_replace('%', '*', $text);
245
246 if (empty($text)) {
3196b7a5 247 $result = '*';
ea74069c
TO
248 }
249 elseif (strpos($text, '*') !== FALSE) {
250 // if user supplies their own wildcards, then don't do any sophisticated changes
3196b7a5 251 $result = $text;
ea74069c
TO
252 }
253 else {
254 switch ($mode) {
255 case self::MODE_NONE:
256 $result = $text;
257 break;
258
259 case self::MODE_PHRASE:
260 $result = '"' . $text . '"';
261 break;
262
263 case self::MODE_WILDPHRASE:
264 $result = '"*' . $text . '*"';
265 break;
266
267 case self::MODE_WILDWORDS:
268 $result = $this->mapWords($text, '*word*');
269 break;
270
271 case self::MODE_WILDWORDS_SUFFIX:
272 $result = $this->mapWords($text, 'word*');
273 break;
274
275 default:
276 $result = NULL;
277 }
278 }
279
280 return $this->dedupeWildcards($result, '%');
281 }
282
2e2605fe
EM
283 /**
284 * Format FTS.
285 *
286 * @param string $text
287 * @param $mode
288 *
289 * @return mixed
290 */
3196b7a5
TO
291 protected function _formatFtsBool($text, $mode) {
292 $result = NULL;
be2fb01f
CW
293 $operators = ['+', '-', '~', '(', ')'];
294 $wildCards = ['@', '%', '*'];
e0b51656 295 $expression = preg_quote(implode('', array_merge($operators, $wildCards)), '/');
408fe090 296
297 //Return if searched string ends with an unsupported operator.
e0b51656 298 //Or if the string contains an invalid joint occurrence of operators.
408fe090 299 foreach ($operators as $val) {
e0b51656 300 if ($text == '@' || CRM_Utils_String::endsWith($text, $val) || preg_match("/[{$expression}]{2,}/", $text)) {
408fe090 301 $csid = CRM_Core_DAO::getFieldValue('CRM_Core_DAO_OptionValue', 'CRM_Contact_Form_Search_Custom_FullText', 'value', 'name');
302 $url = CRM_Utils_System::url("civicrm/contact/search/custom", "csid={$csid}&reset=1");
303 $operators = implode("', '", $operators);
e0b51656 304 CRM_Core_Error::statusBounce("Full-Text Search does not support the use of a search with two attached operators or string ending with any of these operators ('{$operators}' or a single '@'). Please adjust your search term and try again.", $url, 'Invalid Search String');
408fe090 305 }
306 }
3196b7a5
TO
307
308 // normalize user-inputted wildcards
309 $text = str_replace('%', '*', $text);
310
311 if (empty($text)) {
312 $result = '*';
313 }
7bfcf18f
TO
314 elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) {
315 // if user supplies their own include/exclude operators, use text as is (with trailing wildcard)
316 $result = $this->mapWords($text, 'word*');
317 }
3196b7a5
TO
318 elseif (strpos($text, '*') !== FALSE) {
319 // if user supplies their own wildcards, then don't do any sophisticated changes
320 $result = $this->mapWords($text, '+word');
321 }
7bfcf18f
TO
322 elseif (preg_match('/^(["\']).*\1$/m', $text)) {
323 // if surrounded by quotes, use term as is
324 $result = $text;
325 }
3196b7a5
TO
326 else {
327 switch ($mode) {
328 case self::MODE_NONE:
408fe090 329 $result = $this->mapWords($text, '+word', TRUE);
3196b7a5
TO
330 break;
331
332 case self::MODE_PHRASE:
333 $result = '+"' . $text . '"';
334 break;
335
336 case self::MODE_WILDPHRASE:
337 $result = '+"*' . $text . '*"';
338 break;
339
340 case self::MODE_WILDWORDS:
341 $result = $this->mapWords($text, '+*word*');
342 break;
343
344 case self::MODE_WILDWORDS_SUFFIX:
345 $result = $this->mapWords($text, '+word*');
346 break;
347
348 default:
349 $result = NULL;
350 }
351 }
352
353 return $this->dedupeWildcards($result, '%');
354 }
355
2e2605fe
EM
356 /**
357 * Format like.
358 *
359 * @param $text
360 * @param $mode
361 *
362 * @return mixed
363 */
ea74069c
TO
364 protected function _formatLike($text, $mode) {
365 $result = NULL;
366
367 if (empty($text)) {
368 $result = '%';
369 }
370 elseif (strpos($text, '%') !== FALSE) {
371 // if user supplies their own wildcards, then don't do any sophisticated changes
372 $result = $text;
373 }
374 else {
375 switch ($mode) {
376 case self::MODE_NONE:
377 case self::MODE_PHRASE:
378 case self::MODE_WILDPHRASE:
379 $result = "%" . $text . "%";
380 break;
381
382 case self::MODE_WILDWORDS:
383 case self::MODE_WILDWORDS_SUFFIX:
384 $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%';
385 break;
386
387 default:
388 $result = NULL;
389 }
390 }
391
392 return $this->dedupeWildcards($result, '%');
393 }
394
395 /**
77855840
TO
396 * @param string $text
397 * User-supplied query string.
398 * @param string $template
399 * A prototypical description of each word, eg "word%" or "word*" or "*word*".
408fe090 400 * @param bool $quotes
401 * True if each searched keyword need to be surrounded with quotes.
ea74069c
TO
402 * @return string
403 */
408fe090 404 protected function mapWords($text, $template, $quotes = FALSE) {
be2fb01f 405 $result = [];
408fe090 406 foreach ($this->parseWords($text, $quotes) as $word) {
ea74069c
TO
407 $result[] = str_replace('word', $word, $template);
408 }
409 return implode(' ', $result);
410 }
411
412 /**
6714d8d2
SL
413 * @param string $text
414 * @param bool $quotes
ea74069c
TO
415 * @return array
416 */
408fe090 417 protected function parseWords($text, $quotes) {
895e8529
TO
418 //NYSS 9692 special handling for emails
419 if (preg_match('/^([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})$/', $text)) {
420 $parts = explode('@', $text);
421 $parts[1] = stristr($parts[1], '.', TRUE);
422 $text = implode(' ', $parts);
423 }
895e8529
TO
424
425 //NYSS also replace other occurrences of @
408fe090 426 $replacedText = preg_replace('/[ \r\n\t\@]+/', ' ', trim($text));
427 //filter empty values if any
428 $keywords = array_filter(explode(' ', $replacedText));
429
430 //Ensure each searched keywords are wrapped in double quotes.
431 if ($quotes) {
432 foreach ($keywords as &$val) {
433 if (!is_numeric($val)) {
434 $val = "\"{$val}\"";
435 }
436 }
437 }
438 return $keywords;
ea74069c
TO
439 }
440
441 /**
442 * @param $text
443 * @param $wildcard
444 * @return mixed
445 */
446 protected function dedupeWildcards($text, $wildcard) {
447 if ($text === NULL) {
448 return NULL;
449 }
450
451 // don't use preg_replace because $wildcard might be special char
452 while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) {
453 $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text);
454 }
455 return $text;
456 }
457
2e2605fe
EM
458 /**
459 * Get modes.
460 *
461 * @return array
462 */
ea74069c 463 public static function getModes() {
be2fb01f 464 return [
ea74069c
TO
465 self::MODE_NONE,
466 self::MODE_PHRASE,
467 self::MODE_WILDPHRASE,
468 self::MODE_WILDWORDS,
469 self::MODE_WILDWORDS_SUFFIX,
be2fb01f 470 ];
ea74069c
TO
471 }
472
2e2605fe
EM
473 /**
474 * Get languages.
475 *
476 * @return array
477 */
ea74069c 478 public static function getLanguages() {
be2fb01f 479 return [
ea74069c
TO
480 self::LANG_SOLR,
481 self::LANG_SQL_FTS,
3196b7a5 482 self::LANG_SQL_FTSBOOL,
ea74069c 483 self::LANG_SQL_LIKE,
be2fb01f 484 ];
ea74069c
TO
485 }
486
487 /**
488 * @param $text
489 *
490 * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");'
491 */
492 public static function dumpExampleTable($text) {
493 $width = strlen($text) + 8;
494 $buf = '';
495
496 $buf .= sprintf("%-{$width}s", 'mode');
497 foreach (self::getLanguages() as $lang) {
498 $buf .= sprintf("%-{$width}s", $lang);
499 }
500 $buf .= "\n";
501
502 foreach (self::getModes() as $mode) {
503 $formatter = new CRM_Utils_QueryFormatter($mode);
504 $buf .= sprintf("%-{$width}s", $mode);
505 foreach (self::getLanguages() as $lang) {
506 $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang));
507 }
508 $buf .= "\n";
509 }
510
511 echo $buf;
512 }
96025800 513
ef10e0b5 514}