Merge pull request #15833 from yashodha/participant_edit
[civicrm-core.git] / CRM / Utils / QueryFormatter.php
CommitLineData
ea74069c 1<?php
ea74069c
TO
2/*
3 +--------------------------------------------------------------------+
fee14197 4 | CiviCRM version 5 |
ea74069c 5 +--------------------------------------------------------------------+
f299f7db 6 | Copyright CiviCRM LLC (c) 2004-2020 |
ea74069c
TO
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
d25dd0ee 26 */
ea74069c 27
50bfb460 28 /**
5a84c50e 29 * @package CRM
f299f7db 30 * @copyright CiviCRM LLC (c) 2004-2020
5a84c50e 31 */
50bfb460 32
ea74069c
TO
33/**
34 * Class CRM_Utils_QueryFormatter
35 *
36 * This class is a bad idea. It exists for the unholy reason that a single installation
37 * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same
38 * query-text. It labors* to take the user's search expression and provide similar search
39 * semantics in different contexts. It is unknown whether this labor will be fruitful
40 * or in vain.
41 */
42class CRM_Utils_QueryFormatter {
7ee0cd30
TO
43 /**
44 * Generate queries using SQL LIKE expressions.
45 */
ea74069c 46 const LANG_SQL_LIKE = 'like';
7ee0cd30
TO
47
48 /**
49 * Generate queries using MySQL FTS expressions.
50 */
ea74069c 51 const LANG_SQL_FTS = 'fts';
7ee0cd30
TO
52
53 /**
54 * Generate queries using MySQL's boolean FTS expressions.
55 */
3196b7a5 56 const LANG_SQL_FTSBOOL = 'ftsbool';
7ee0cd30
TO
57
58 /**
59 * Generate queries using Solr expressions.
60 */
ea74069c
TO
61 const LANG_SOLR = 'solr';
62
63 /**
64 * Attempt to leave the text as-is.
65 */
66 const MODE_NONE = 'simple';
67
68 /**
69 * Attempt to treat the input text as a phrase
70 */
71 const MODE_PHRASE = 'phrase';
72
73 /**
74 * Attempt to treat the input text as a phrase with
75 * wildcards on each end.
76 */
77 const MODE_WILDPHRASE = 'wildphrase';
78
79 /**
80 * Attempt to treat individual word as if it
81 * had wildcards at the start and end.
82 */
83 const MODE_WILDWORDS = 'wildwords';
84
85 /**
86 * Attempt to treat individual word as if it
87 * had a wildcard at the end.
88 */
89 const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix';
90
6714d8d2 91 /**
041ecc95 92 * Singleton object.
93 *
94 * @var \CRM_Utils_QueryFormatter|null
6714d8d2 95 */
ea74069c
TO
96 static protected $singleton;
97
98 /**
99 * @param bool $fresh
100 * @return CRM_Utils_QueryFormatter
101 */
102 public static function singleton($fresh = FALSE) {
103 if ($fresh || self::$singleton === NULL) {
aaffa79f 104 $mode = Civi::settings()->get('fts_query_mode');
ea74069c
TO
105 self::$singleton = new CRM_Utils_QueryFormatter($mode);
106 }
107 return self::$singleton;
108 }
109
110 /**
aaffa79f 111 * @var string
50bfb460 112 * eg MODE_NONE
ea74069c
TO
113 */
114 protected $mode;
115
116 /**
77855840
TO
117 * @param string $mode
118 * Eg MODE_NONE.
ea74069c 119 */
00be9182 120 public function __construct($mode) {
ea74069c
TO
121 $this->mode = $mode;
122 }
123
124 /**
125 * @param mixed $mode
126 */
127 public function setMode($mode) {
128 $this->mode = $mode;
129 }
130
131 /**
132 * @return mixed
133 */
134 public function getMode() {
135 return $this->mode;
136 }
137
138 /**
139 * @param string $text
77855840
TO
140 * @param string $language
141 * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR.
ea74069c
TO
142 * @throws CRM_Core_Exception
143 * @return string
144 */
145 public function format($text, $language) {
146 $text = trim($text);
147
148 switch ($language) {
149 case self::LANG_SOLR:
150 case self::LANG_SQL_FTS:
151 $text = $this->_formatFts($text, $this->mode);
152 break;
e7292422 153
3196b7a5
TO
154 case self::LANG_SQL_FTSBOOL:
155 $text = $this->_formatFtsBool($text, $this->mode);
156 break;
e7292422 157
ea74069c
TO
158 case self::LANG_SQL_LIKE:
159 $text = $this->_formatLike($text, $this->mode);
160 break;
e7292422 161
ea74069c
TO
162 default:
163 $text = NULL;
164 }
165
166 if ($text === NULL) {
167 throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]");
168 }
169
170 return $text;
171 }
172
2395ef81
TO
173 /**
174 * Create a SQL WHERE expression for matching against a list of
175 * text columns.
176 *
177 * @param string $table
178 * Eg "civicrm_note" or "civicrm_note mynote".
179 * @param array|string $columns
180 * List of columns to search against.
181 * Eg "first_name" or "activity_details".
182 * @param string $queryText
183 * @return string
184 * SQL, eg "MATCH (col1) AGAINST (queryText)" or "col1 LIKE '%queryText%'"
185 */
186 public function formatSql($table, $columns, $queryText) {
187 if ($queryText === '*' || $queryText === '%' || empty($queryText)) {
188 return '(1)';
189 }
190
191 $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
192
193 if (strpos($table, ' ') === FALSE) {
194 $tableName = $tableAlias = $table;
195 }
196 else {
197 list ($tableName, $tableAlias) = explode(' ', $table);
198 }
199 if (is_scalar($columns)) {
be2fb01f 200 $columns = [$columns];
2395ef81
TO
201 }
202
be2fb01f 203 $clauses = [];
2395ef81
TO
204 if (CRM_Core_InnoDBIndexer::singleton()
205 ->hasDeclaredIndex($tableName, $columns)
206 ) {
207 $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_FTSBOOL);
208
be2fb01f 209 $prefixedFieldNames = [];
2395ef81
TO
210 foreach ($columns as $fieldName) {
211 $prefixedFieldNames[] = "$tableAlias.$fieldName";
212 }
213
214 $clauses[] = sprintf("MATCH (%s) AGAINST ('%s' IN BOOLEAN MODE)",
215 implode(',', $prefixedFieldNames),
216 $strtolower(CRM_Core_DAO::escapeString($formattedQuery))
217 );
218 }
219 else {
220 //CRM_Core_Session::setStatus(ts('Cannot use FTS for %1 (%2)', array(
221 // 1 => $table,
222 // 2 => implode(', ', $fullTextFields),
223 //)));
224
225 $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_LIKE);
226 $escapedText = $strtolower(CRM_Core_DAO::escapeString($formattedQuery));
227 foreach ($columns as $fieldName) {
228 $clauses[] = "$tableAlias.$fieldName LIKE '{$escapedText}'";
229 }
230 }
231 return implode(' OR ', $clauses);
232 }
233
2e2605fe
EM
234 /**
235 * Format Fts.
236 *
237 * @param string $text
238 * @param $mode
239 *
240 * @return mixed
241 */
ea74069c
TO
242 protected function _formatFts($text, $mode) {
243 $result = NULL;
244
245 // normalize user-inputted wildcards
246 $text = str_replace('%', '*', $text);
247
248 if (empty($text)) {
3196b7a5 249 $result = '*';
ea74069c
TO
250 }
251 elseif (strpos($text, '*') !== FALSE) {
252 // if user supplies their own wildcards, then don't do any sophisticated changes
3196b7a5 253 $result = $text;
ea74069c
TO
254 }
255 else {
256 switch ($mode) {
257 case self::MODE_NONE:
258 $result = $text;
259 break;
260
261 case self::MODE_PHRASE:
262 $result = '"' . $text . '"';
263 break;
264
265 case self::MODE_WILDPHRASE:
266 $result = '"*' . $text . '*"';
267 break;
268
269 case self::MODE_WILDWORDS:
270 $result = $this->mapWords($text, '*word*');
271 break;
272
273 case self::MODE_WILDWORDS_SUFFIX:
274 $result = $this->mapWords($text, 'word*');
275 break;
276
277 default:
278 $result = NULL;
279 }
280 }
281
282 return $this->dedupeWildcards($result, '%');
283 }
284
2e2605fe
EM
285 /**
286 * Format FTS.
287 *
288 * @param string $text
289 * @param $mode
290 *
291 * @return mixed
292 */
3196b7a5
TO
293 protected function _formatFtsBool($text, $mode) {
294 $result = NULL;
be2fb01f
CW
295 $operators = ['+', '-', '~', '(', ')'];
296 $wildCards = ['@', '%', '*'];
e0b51656 297 $expression = preg_quote(implode('', array_merge($operators, $wildCards)), '/');
408fe090 298
299 //Return if searched string ends with an unsupported operator.
e0b51656 300 //Or if the string contains an invalid joint occurrence of operators.
408fe090 301 foreach ($operators as $val) {
e0b51656 302 if ($text == '@' || CRM_Utils_String::endsWith($text, $val) || preg_match("/[{$expression}]{2,}/", $text)) {
408fe090 303 $csid = CRM_Core_DAO::getFieldValue('CRM_Core_DAO_OptionValue', 'CRM_Contact_Form_Search_Custom_FullText', 'value', 'name');
304 $url = CRM_Utils_System::url("civicrm/contact/search/custom", "csid={$csid}&reset=1");
305 $operators = implode("', '", $operators);
e0b51656 306 CRM_Core_Error::statusBounce("Full-Text Search does not support the use of a search with two attached operators or string ending with any of these operators ('{$operators}' or a single '@'). Please adjust your search term and try again.", $url, 'Invalid Search String');
408fe090 307 }
308 }
3196b7a5
TO
309
310 // normalize user-inputted wildcards
311 $text = str_replace('%', '*', $text);
312
313 if (empty($text)) {
314 $result = '*';
315 }
7bfcf18f
TO
316 elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) {
317 // if user supplies their own include/exclude operators, use text as is (with trailing wildcard)
318 $result = $this->mapWords($text, 'word*');
319 }
3196b7a5
TO
320 elseif (strpos($text, '*') !== FALSE) {
321 // if user supplies their own wildcards, then don't do any sophisticated changes
322 $result = $this->mapWords($text, '+word');
323 }
7bfcf18f
TO
324 elseif (preg_match('/^(["\']).*\1$/m', $text)) {
325 // if surrounded by quotes, use term as is
326 $result = $text;
327 }
3196b7a5
TO
328 else {
329 switch ($mode) {
330 case self::MODE_NONE:
408fe090 331 $result = $this->mapWords($text, '+word', TRUE);
3196b7a5
TO
332 break;
333
334 case self::MODE_PHRASE:
335 $result = '+"' . $text . '"';
336 break;
337
338 case self::MODE_WILDPHRASE:
339 $result = '+"*' . $text . '*"';
340 break;
341
342 case self::MODE_WILDWORDS:
343 $result = $this->mapWords($text, '+*word*');
344 break;
345
346 case self::MODE_WILDWORDS_SUFFIX:
347 $result = $this->mapWords($text, '+word*');
348 break;
349
350 default:
351 $result = NULL;
352 }
353 }
354
355 return $this->dedupeWildcards($result, '%');
356 }
357
2e2605fe
EM
358 /**
359 * Format like.
360 *
361 * @param $text
362 * @param $mode
363 *
364 * @return mixed
365 */
ea74069c
TO
366 protected function _formatLike($text, $mode) {
367 $result = NULL;
368
369 if (empty($text)) {
370 $result = '%';
371 }
372 elseif (strpos($text, '%') !== FALSE) {
373 // if user supplies their own wildcards, then don't do any sophisticated changes
374 $result = $text;
375 }
376 else {
377 switch ($mode) {
378 case self::MODE_NONE:
379 case self::MODE_PHRASE:
380 case self::MODE_WILDPHRASE:
381 $result = "%" . $text . "%";
382 break;
383
384 case self::MODE_WILDWORDS:
385 case self::MODE_WILDWORDS_SUFFIX:
386 $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%';
387 break;
388
389 default:
390 $result = NULL;
391 }
392 }
393
394 return $this->dedupeWildcards($result, '%');
395 }
396
397 /**
77855840
TO
398 * @param string $text
399 * User-supplied query string.
400 * @param string $template
401 * A prototypical description of each word, eg "word%" or "word*" or "*word*".
408fe090 402 * @param bool $quotes
403 * True if each searched keyword need to be surrounded with quotes.
ea74069c
TO
404 * @return string
405 */
408fe090 406 protected function mapWords($text, $template, $quotes = FALSE) {
be2fb01f 407 $result = [];
408fe090 408 foreach ($this->parseWords($text, $quotes) as $word) {
ea74069c
TO
409 $result[] = str_replace('word', $word, $template);
410 }
411 return implode(' ', $result);
412 }
413
414 /**
6714d8d2
SL
415 * @param string $text
416 * @param bool $quotes
ea74069c
TO
417 * @return array
418 */
408fe090 419 protected function parseWords($text, $quotes) {
895e8529
TO
420 //NYSS 9692 special handling for emails
421 if (preg_match('/^([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})$/', $text)) {
422 $parts = explode('@', $text);
423 $parts[1] = stristr($parts[1], '.', TRUE);
424 $text = implode(' ', $parts);
425 }
895e8529
TO
426
427 //NYSS also replace other occurrences of @
408fe090 428 $replacedText = preg_replace('/[ \r\n\t\@]+/', ' ', trim($text));
429 //filter empty values if any
430 $keywords = array_filter(explode(' ', $replacedText));
431
432 //Ensure each searched keywords are wrapped in double quotes.
433 if ($quotes) {
434 foreach ($keywords as &$val) {
435 if (!is_numeric($val)) {
436 $val = "\"{$val}\"";
437 }
438 }
439 }
440 return $keywords;
ea74069c
TO
441 }
442
443 /**
444 * @param $text
445 * @param $wildcard
446 * @return mixed
447 */
448 protected function dedupeWildcards($text, $wildcard) {
449 if ($text === NULL) {
450 return NULL;
451 }
452
453 // don't use preg_replace because $wildcard might be special char
454 while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) {
455 $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text);
456 }
457 return $text;
458 }
459
2e2605fe
EM
460 /**
461 * Get modes.
462 *
463 * @return array
464 */
ea74069c 465 public static function getModes() {
be2fb01f 466 return [
ea74069c
TO
467 self::MODE_NONE,
468 self::MODE_PHRASE,
469 self::MODE_WILDPHRASE,
470 self::MODE_WILDWORDS,
471 self::MODE_WILDWORDS_SUFFIX,
be2fb01f 472 ];
ea74069c
TO
473 }
474
2e2605fe
EM
475 /**
476 * Get languages.
477 *
478 * @return array
479 */
ea74069c 480 public static function getLanguages() {
be2fb01f 481 return [
ea74069c
TO
482 self::LANG_SOLR,
483 self::LANG_SQL_FTS,
3196b7a5 484 self::LANG_SQL_FTSBOOL,
ea74069c 485 self::LANG_SQL_LIKE,
be2fb01f 486 ];
ea74069c
TO
487 }
488
489 /**
490 * @param $text
491 *
492 * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");'
493 */
494 public static function dumpExampleTable($text) {
495 $width = strlen($text) + 8;
496 $buf = '';
497
498 $buf .= sprintf("%-{$width}s", 'mode');
499 foreach (self::getLanguages() as $lang) {
500 $buf .= sprintf("%-{$width}s", $lang);
501 }
502 $buf .= "\n";
503
504 foreach (self::getModes() as $mode) {
505 $formatter = new CRM_Utils_QueryFormatter($mode);
506 $buf .= sprintf("%-{$width}s", $mode);
507 foreach (self::getLanguages() as $lang) {
508 $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang));
509 }
510 $buf .= "\n";
511 }
512
513 echo $buf;
514 }
96025800 515
ef10e0b5 516}