Merge pull request #12340 from eileenmcnaughton/merge_cleanup
[civicrm-core.git] / CRM / Utils / QueryFormatter.php
CommitLineData
ea74069c 1<?php
ea74069c
TO
2/*
3 +--------------------------------------------------------------------+
fee14197 4 | CiviCRM version 5 |
ea74069c 5 +--------------------------------------------------------------------+
8c9251b3 6 | Copyright CiviCRM LLC (c) 2004-2018 |
ea74069c
TO
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
d25dd0ee 26 */
ea74069c 27
50bfb460 28 /**
5a84c50e 29 * @package CRM
8c9251b3 30 * @copyright CiviCRM LLC (c) 2004-2018
5a84c50e 31 */
50bfb460 32
ea74069c
TO
33/**
34 * Class CRM_Utils_QueryFormatter
35 *
36 * This class is a bad idea. It exists for the unholy reason that a single installation
37 * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same
38 * query-text. It labors* to take the user's search expression and provide similar search
39 * semantics in different contexts. It is unknown whether this labor will be fruitful
40 * or in vain.
41 */
42class CRM_Utils_QueryFormatter {
7ee0cd30
TO
43 /**
44 * Generate queries using SQL LIKE expressions.
45 */
ea74069c 46 const LANG_SQL_LIKE = 'like';
7ee0cd30
TO
47
48 /**
49 * Generate queries using MySQL FTS expressions.
50 */
ea74069c 51 const LANG_SQL_FTS = 'fts';
7ee0cd30
TO
52
53 /**
54 * Generate queries using MySQL's boolean FTS expressions.
55 */
3196b7a5 56 const LANG_SQL_FTSBOOL = 'ftsbool';
7ee0cd30
TO
57
58 /**
59 * Generate queries using Solr expressions.
60 */
ea74069c
TO
61 const LANG_SOLR = 'solr';
62
63 /**
64 * Attempt to leave the text as-is.
65 */
66 const MODE_NONE = 'simple';
67
68 /**
69 * Attempt to treat the input text as a phrase
70 */
71 const MODE_PHRASE = 'phrase';
72
73 /**
74 * Attempt to treat the input text as a phrase with
75 * wildcards on each end.
76 */
77 const MODE_WILDPHRASE = 'wildphrase';
78
79 /**
80 * Attempt to treat individual word as if it
81 * had wildcards at the start and end.
82 */
83 const MODE_WILDWORDS = 'wildwords';
84
85 /**
86 * Attempt to treat individual word as if it
87 * had a wildcard at the end.
88 */
89 const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix';
90
91 static protected $singleton;
92
93 /**
94 * @param bool $fresh
95 * @return CRM_Utils_QueryFormatter
96 */
97 public static function singleton($fresh = FALSE) {
98 if ($fresh || self::$singleton === NULL) {
aaffa79f 99 $mode = Civi::settings()->get('fts_query_mode');
ea74069c
TO
100 self::$singleton = new CRM_Utils_QueryFormatter($mode);
101 }
102 return self::$singleton;
103 }
104
105 /**
aaffa79f 106 * @var string
50bfb460 107 * eg MODE_NONE
ea74069c
TO
108 */
109 protected $mode;
110
111 /**
77855840
TO
112 * @param string $mode
113 * Eg MODE_NONE.
ea74069c 114 */
00be9182 115 public function __construct($mode) {
ea74069c
TO
116 $this->mode = $mode;
117 }
118
119 /**
120 * @param mixed $mode
121 */
122 public function setMode($mode) {
123 $this->mode = $mode;
124 }
125
126 /**
127 * @return mixed
128 */
129 public function getMode() {
130 return $this->mode;
131 }
132
133 /**
134 * @param string $text
77855840
TO
135 * @param string $language
136 * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR.
ea74069c
TO
137 * @throws CRM_Core_Exception
138 * @return string
139 */
140 public function format($text, $language) {
141 $text = trim($text);
142
143 switch ($language) {
144 case self::LANG_SOLR:
145 case self::LANG_SQL_FTS:
146 $text = $this->_formatFts($text, $this->mode);
147 break;
e7292422 148
3196b7a5
TO
149 case self::LANG_SQL_FTSBOOL:
150 $text = $this->_formatFtsBool($text, $this->mode);
151 break;
e7292422 152
ea74069c
TO
153 case self::LANG_SQL_LIKE:
154 $text = $this->_formatLike($text, $this->mode);
155 break;
e7292422 156
ea74069c
TO
157 default:
158 $text = NULL;
159 }
160
161 if ($text === NULL) {
162 throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]");
163 }
164
165 return $text;
166 }
167
2395ef81
TO
168 /**
169 * Create a SQL WHERE expression for matching against a list of
170 * text columns.
171 *
172 * @param string $table
173 * Eg "civicrm_note" or "civicrm_note mynote".
174 * @param array|string $columns
175 * List of columns to search against.
176 * Eg "first_name" or "activity_details".
177 * @param string $queryText
178 * @return string
179 * SQL, eg "MATCH (col1) AGAINST (queryText)" or "col1 LIKE '%queryText%'"
180 */
181 public function formatSql($table, $columns, $queryText) {
182 if ($queryText === '*' || $queryText === '%' || empty($queryText)) {
183 return '(1)';
184 }
185
186 $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
187
188 if (strpos($table, ' ') === FALSE) {
189 $tableName = $tableAlias = $table;
190 }
191 else {
192 list ($tableName, $tableAlias) = explode(' ', $table);
193 }
194 if (is_scalar($columns)) {
195 $columns = array($columns);
196 }
197
198 $clauses = array();
199 if (CRM_Core_InnoDBIndexer::singleton()
200 ->hasDeclaredIndex($tableName, $columns)
201 ) {
202 $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_FTSBOOL);
203
204 $prefixedFieldNames = array();
205 foreach ($columns as $fieldName) {
206 $prefixedFieldNames[] = "$tableAlias.$fieldName";
207 }
208
209 $clauses[] = sprintf("MATCH (%s) AGAINST ('%s' IN BOOLEAN MODE)",
210 implode(',', $prefixedFieldNames),
211 $strtolower(CRM_Core_DAO::escapeString($formattedQuery))
212 );
213 }
214 else {
215 //CRM_Core_Session::setStatus(ts('Cannot use FTS for %1 (%2)', array(
216 // 1 => $table,
217 // 2 => implode(', ', $fullTextFields),
218 //)));
219
220 $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_LIKE);
221 $escapedText = $strtolower(CRM_Core_DAO::escapeString($formattedQuery));
222 foreach ($columns as $fieldName) {
223 $clauses[] = "$tableAlias.$fieldName LIKE '{$escapedText}'";
224 }
225 }
226 return implode(' OR ', $clauses);
227 }
228
2e2605fe
EM
229 /**
230 * Format Fts.
231 *
232 * @param string $text
233 * @param $mode
234 *
235 * @return mixed
236 */
ea74069c
TO
237 protected function _formatFts($text, $mode) {
238 $result = NULL;
239
240 // normalize user-inputted wildcards
241 $text = str_replace('%', '*', $text);
242
243 if (empty($text)) {
3196b7a5 244 $result = '*';
ea74069c
TO
245 }
246 elseif (strpos($text, '*') !== FALSE) {
247 // if user supplies their own wildcards, then don't do any sophisticated changes
3196b7a5 248 $result = $text;
ea74069c
TO
249 }
250 else {
251 switch ($mode) {
252 case self::MODE_NONE:
253 $result = $text;
254 break;
255
256 case self::MODE_PHRASE:
257 $result = '"' . $text . '"';
258 break;
259
260 case self::MODE_WILDPHRASE:
261 $result = '"*' . $text . '*"';
262 break;
263
264 case self::MODE_WILDWORDS:
265 $result = $this->mapWords($text, '*word*');
266 break;
267
268 case self::MODE_WILDWORDS_SUFFIX:
269 $result = $this->mapWords($text, 'word*');
270 break;
271
272 default:
273 $result = NULL;
274 }
275 }
276
277 return $this->dedupeWildcards($result, '%');
278 }
279
2e2605fe
EM
280 /**
281 * Format FTS.
282 *
283 * @param string $text
284 * @param $mode
285 *
286 * @return mixed
287 */
3196b7a5
TO
288 protected function _formatFtsBool($text, $mode) {
289 $result = NULL;
408fe090 290 $operators = array('+', '-', '~', '(', ')');
e0b51656 291 $wildCards = array('@', '%', '*');
292 $expression = preg_quote(implode('', array_merge($operators, $wildCards)), '/');
408fe090 293
294 //Return if searched string ends with an unsupported operator.
e0b51656 295 //Or if the string contains an invalid joint occurrence of operators.
408fe090 296 foreach ($operators as $val) {
e0b51656 297 if ($text == '@' || CRM_Utils_String::endsWith($text, $val) || preg_match("/[{$expression}]{2,}/", $text)) {
408fe090 298 $csid = CRM_Core_DAO::getFieldValue('CRM_Core_DAO_OptionValue', 'CRM_Contact_Form_Search_Custom_FullText', 'value', 'name');
299 $url = CRM_Utils_System::url("civicrm/contact/search/custom", "csid={$csid}&reset=1");
300 $operators = implode("', '", $operators);
e0b51656 301 CRM_Core_Error::statusBounce("Full-Text Search does not support the use of a search with two attached operators or string ending with any of these operators ('{$operators}' or a single '@'). Please adjust your search term and try again.", $url, 'Invalid Search String');
408fe090 302 }
303 }
3196b7a5
TO
304
305 // normalize user-inputted wildcards
306 $text = str_replace('%', '*', $text);
307
308 if (empty($text)) {
309 $result = '*';
310 }
7bfcf18f
TO
311 elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) {
312 // if user supplies their own include/exclude operators, use text as is (with trailing wildcard)
313 $result = $this->mapWords($text, 'word*');
314 }
3196b7a5
TO
315 elseif (strpos($text, '*') !== FALSE) {
316 // if user supplies their own wildcards, then don't do any sophisticated changes
317 $result = $this->mapWords($text, '+word');
318 }
7bfcf18f
TO
319 elseif (preg_match('/^(["\']).*\1$/m', $text)) {
320 // if surrounded by quotes, use term as is
321 $result = $text;
322 }
3196b7a5
TO
323 else {
324 switch ($mode) {
325 case self::MODE_NONE:
408fe090 326 $result = $this->mapWords($text, '+word', TRUE);
3196b7a5
TO
327 break;
328
329 case self::MODE_PHRASE:
330 $result = '+"' . $text . '"';
331 break;
332
333 case self::MODE_WILDPHRASE:
334 $result = '+"*' . $text . '*"';
335 break;
336
337 case self::MODE_WILDWORDS:
338 $result = $this->mapWords($text, '+*word*');
339 break;
340
341 case self::MODE_WILDWORDS_SUFFIX:
342 $result = $this->mapWords($text, '+word*');
343 break;
344
345 default:
346 $result = NULL;
347 }
348 }
349
350 return $this->dedupeWildcards($result, '%');
351 }
352
2e2605fe
EM
353 /**
354 * Format like.
355 *
356 * @param $text
357 * @param $mode
358 *
359 * @return mixed
360 */
ea74069c
TO
361 protected function _formatLike($text, $mode) {
362 $result = NULL;
363
364 if (empty($text)) {
365 $result = '%';
366 }
367 elseif (strpos($text, '%') !== FALSE) {
368 // if user supplies their own wildcards, then don't do any sophisticated changes
369 $result = $text;
370 }
371 else {
372 switch ($mode) {
373 case self::MODE_NONE:
374 case self::MODE_PHRASE:
375 case self::MODE_WILDPHRASE:
376 $result = "%" . $text . "%";
377 break;
378
379 case self::MODE_WILDWORDS:
380 case self::MODE_WILDWORDS_SUFFIX:
381 $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%';
382 break;
383
384 default:
385 $result = NULL;
386 }
387 }
388
389 return $this->dedupeWildcards($result, '%');
390 }
391
392 /**
77855840
TO
393 * @param string $text
394 * User-supplied query string.
395 * @param string $template
396 * A prototypical description of each word, eg "word%" or "word*" or "*word*".
408fe090 397 * @param bool $quotes
398 * True if each searched keyword need to be surrounded with quotes.
ea74069c
TO
399 * @return string
400 */
408fe090 401 protected function mapWords($text, $template, $quotes = FALSE) {
ea74069c 402 $result = array();
408fe090 403 foreach ($this->parseWords($text, $quotes) as $word) {
ea74069c
TO
404 $result[] = str_replace('word', $word, $template);
405 }
406 return implode(' ', $result);
407 }
408
409 /**
410 * @param $text
408fe090 411 * @bool $quotes
ea74069c
TO
412 * @return array
413 */
408fe090 414 protected function parseWords($text, $quotes) {
895e8529
TO
415 //NYSS 9692 special handling for emails
416 if (preg_match('/^([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})$/', $text)) {
417 $parts = explode('@', $text);
418 $parts[1] = stristr($parts[1], '.', TRUE);
419 $text = implode(' ', $parts);
420 }
895e8529
TO
421
422 //NYSS also replace other occurrences of @
408fe090 423 $replacedText = preg_replace('/[ \r\n\t\@]+/', ' ', trim($text));
424 //filter empty values if any
425 $keywords = array_filter(explode(' ', $replacedText));
426
427 //Ensure each searched keywords are wrapped in double quotes.
428 if ($quotes) {
429 foreach ($keywords as &$val) {
430 if (!is_numeric($val)) {
431 $val = "\"{$val}\"";
432 }
433 }
434 }
435 return $keywords;
ea74069c
TO
436 }
437
438 /**
439 * @param $text
440 * @param $wildcard
441 * @return mixed
442 */
443 protected function dedupeWildcards($text, $wildcard) {
444 if ($text === NULL) {
445 return NULL;
446 }
447
448 // don't use preg_replace because $wildcard might be special char
449 while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) {
450 $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text);
451 }
452 return $text;
453 }
454
2e2605fe
EM
455 /**
456 * Get modes.
457 *
458 * @return array
459 */
ea74069c
TO
460 public static function getModes() {
461 return array(
462 self::MODE_NONE,
463 self::MODE_PHRASE,
464 self::MODE_WILDPHRASE,
465 self::MODE_WILDWORDS,
466 self::MODE_WILDWORDS_SUFFIX,
467 );
468 }
469
2e2605fe
EM
470 /**
471 * Get languages.
472 *
473 * @return array
474 */
ea74069c
TO
475 public static function getLanguages() {
476 return array(
477 self::LANG_SOLR,
478 self::LANG_SQL_FTS,
3196b7a5 479 self::LANG_SQL_FTSBOOL,
ea74069c
TO
480 self::LANG_SQL_LIKE,
481 );
482 }
483
484 /**
485 * @param $text
486 *
487 * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");'
488 */
489 public static function dumpExampleTable($text) {
490 $width = strlen($text) + 8;
491 $buf = '';
492
493 $buf .= sprintf("%-{$width}s", 'mode');
494 foreach (self::getLanguages() as $lang) {
495 $buf .= sprintf("%-{$width}s", $lang);
496 }
497 $buf .= "\n";
498
499 foreach (self::getModes() as $mode) {
500 $formatter = new CRM_Utils_QueryFormatter($mode);
501 $buf .= sprintf("%-{$width}s", $mode);
502 foreach (self::getLanguages() as $lang) {
503 $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang));
504 }
505 $buf .= "\n";
506 }
507
508 echo $buf;
509 }
96025800 510
ef10e0b5 511}