Commit | Line | Data |
---|---|---|
ea74069c | 1 | <?php |
ea74069c TO |
2 | /* |
3 | +--------------------------------------------------------------------+ | |
7e9e8871 | 4 | | CiviCRM version 4.7 | |
ea74069c | 5 | +--------------------------------------------------------------------+ |
0f03f337 | 6 | | Copyright CiviCRM LLC (c) 2004-2017 | |
ea74069c TO |
7 | +--------------------------------------------------------------------+ |
8 | | This file is a part of CiviCRM. | | |
9 | | | | |
10 | | CiviCRM is free software; you can copy, modify, and distribute it | | |
11 | | under the terms of the GNU Affero General Public License | | |
12 | | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. | | |
13 | | | | |
14 | | CiviCRM is distributed in the hope that it will be useful, but | | |
15 | | WITHOUT ANY WARRANTY; without even the implied warranty of | | |
16 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | | |
17 | | See the GNU Affero General Public License for more details. | | |
18 | | | | |
19 | | You should have received a copy of the GNU Affero General Public | | |
20 | | License and the CiviCRM Licensing Exception along | | |
21 | | with this program; if not, contact CiviCRM LLC | | |
22 | | at info[AT]civicrm[DOT]org. If you have questions about the | | |
23 | | GNU Affero General Public License or the licensing of CiviCRM, | | |
24 | | see the CiviCRM license FAQ at http://civicrm.org/licensing | | |
25 | +--------------------------------------------------------------------+ | |
d25dd0ee | 26 | */ |
ea74069c | 27 | |
50bfb460 | 28 | /** |
5a84c50e | 29 | * @package CRM |
0f03f337 | 30 | * @copyright CiviCRM LLC (c) 2004-2017 |
5a84c50e | 31 | */ |
50bfb460 | 32 | |
ea74069c TO |
33 | /** |
34 | * Class CRM_Utils_QueryFormatter | |
35 | * | |
36 | * This class is a bad idea. It exists for the unholy reason that a single installation | |
37 | * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same | |
38 | * query-text. It labors* to take the user's search expression and provide similar search | |
39 | * semantics in different contexts. It is unknown whether this labor will be fruitful | |
40 | * or in vain. | |
41 | */ | |
42 | class CRM_Utils_QueryFormatter { | |
7ee0cd30 TO |
43 | /** |
44 | * Generate queries using SQL LIKE expressions. | |
45 | */ | |
ea74069c | 46 | const LANG_SQL_LIKE = 'like'; |
7ee0cd30 TO |
47 | |
48 | /** | |
49 | * Generate queries using MySQL FTS expressions. | |
50 | */ | |
ea74069c | 51 | const LANG_SQL_FTS = 'fts'; |
7ee0cd30 TO |
52 | |
53 | /** | |
54 | * Generate queries using MySQL's boolean FTS expressions. | |
55 | */ | |
3196b7a5 | 56 | const LANG_SQL_FTSBOOL = 'ftsbool'; |
7ee0cd30 TO |
57 | |
58 | /** | |
59 | * Generate queries using Solr expressions. | |
60 | */ | |
ea74069c TO |
61 | const LANG_SOLR = 'solr'; |
62 | ||
63 | /** | |
64 | * Attempt to leave the text as-is. | |
65 | */ | |
66 | const MODE_NONE = 'simple'; | |
67 | ||
68 | /** | |
69 | * Attempt to treat the input text as a phrase | |
70 | */ | |
71 | const MODE_PHRASE = 'phrase'; | |
72 | ||
73 | /** | |
74 | * Attempt to treat the input text as a phrase with | |
75 | * wildcards on each end. | |
76 | */ | |
77 | const MODE_WILDPHRASE = 'wildphrase'; | |
78 | ||
79 | /** | |
80 | * Attempt to treat individual word as if it | |
81 | * had wildcards at the start and end. | |
82 | */ | |
83 | const MODE_WILDWORDS = 'wildwords'; | |
84 | ||
85 | /** | |
86 | * Attempt to treat individual word as if it | |
87 | * had a wildcard at the end. | |
88 | */ | |
89 | const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix'; | |
90 | ||
91 | static protected $singleton; | |
92 | ||
93 | /** | |
94 | * @param bool $fresh | |
95 | * @return CRM_Utils_QueryFormatter | |
96 | */ | |
97 | public static function singleton($fresh = FALSE) { | |
98 | if ($fresh || self::$singleton === NULL) { | |
aaffa79f | 99 | $mode = Civi::settings()->get('fts_query_mode'); |
ea74069c TO |
100 | self::$singleton = new CRM_Utils_QueryFormatter($mode); |
101 | } | |
102 | return self::$singleton; | |
103 | } | |
104 | ||
105 | /** | |
aaffa79f | 106 | * @var string |
50bfb460 | 107 | * eg MODE_NONE |
ea74069c TO |
108 | */ |
109 | protected $mode; | |
110 | ||
111 | /** | |
77855840 TO |
112 | * @param string $mode |
113 | * Eg MODE_NONE. | |
ea74069c | 114 | */ |
00be9182 | 115 | public function __construct($mode) { |
ea74069c TO |
116 | $this->mode = $mode; |
117 | } | |
118 | ||
119 | /** | |
120 | * @param mixed $mode | |
121 | */ | |
122 | public function setMode($mode) { | |
123 | $this->mode = $mode; | |
124 | } | |
125 | ||
126 | /** | |
127 | * @return mixed | |
128 | */ | |
129 | public function getMode() { | |
130 | return $this->mode; | |
131 | } | |
132 | ||
133 | /** | |
134 | * @param string $text | |
77855840 TO |
135 | * @param string $language |
136 | * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR. | |
ea74069c TO |
137 | * @throws CRM_Core_Exception |
138 | * @return string | |
139 | */ | |
140 | public function format($text, $language) { | |
141 | $text = trim($text); | |
142 | ||
143 | switch ($language) { | |
144 | case self::LANG_SOLR: | |
145 | case self::LANG_SQL_FTS: | |
146 | $text = $this->_formatFts($text, $this->mode); | |
147 | break; | |
e7292422 | 148 | |
3196b7a5 TO |
149 | case self::LANG_SQL_FTSBOOL: |
150 | $text = $this->_formatFtsBool($text, $this->mode); | |
151 | break; | |
e7292422 | 152 | |
ea74069c TO |
153 | case self::LANG_SQL_LIKE: |
154 | $text = $this->_formatLike($text, $this->mode); | |
155 | break; | |
e7292422 | 156 | |
ea74069c TO |
157 | default: |
158 | $text = NULL; | |
159 | } | |
160 | ||
161 | if ($text === NULL) { | |
162 | throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]"); | |
163 | } | |
164 | ||
165 | return $text; | |
166 | } | |
167 | ||
2395ef81 TO |
168 | /** |
169 | * Create a SQL WHERE expression for matching against a list of | |
170 | * text columns. | |
171 | * | |
172 | * @param string $table | |
173 | * Eg "civicrm_note" or "civicrm_note mynote". | |
174 | * @param array|string $columns | |
175 | * List of columns to search against. | |
176 | * Eg "first_name" or "activity_details". | |
177 | * @param string $queryText | |
178 | * @return string | |
179 | * SQL, eg "MATCH (col1) AGAINST (queryText)" or "col1 LIKE '%queryText%'" | |
180 | */ | |
181 | public function formatSql($table, $columns, $queryText) { | |
182 | if ($queryText === '*' || $queryText === '%' || empty($queryText)) { | |
183 | return '(1)'; | |
184 | } | |
185 | ||
186 | $strtolower = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower'; | |
187 | ||
188 | if (strpos($table, ' ') === FALSE) { | |
189 | $tableName = $tableAlias = $table; | |
190 | } | |
191 | else { | |
192 | list ($tableName, $tableAlias) = explode(' ', $table); | |
193 | } | |
194 | if (is_scalar($columns)) { | |
195 | $columns = array($columns); | |
196 | } | |
197 | ||
198 | $clauses = array(); | |
199 | if (CRM_Core_InnoDBIndexer::singleton() | |
200 | ->hasDeclaredIndex($tableName, $columns) | |
201 | ) { | |
202 | $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_FTSBOOL); | |
203 | ||
204 | $prefixedFieldNames = array(); | |
205 | foreach ($columns as $fieldName) { | |
206 | $prefixedFieldNames[] = "$tableAlias.$fieldName"; | |
207 | } | |
208 | ||
209 | $clauses[] = sprintf("MATCH (%s) AGAINST ('%s' IN BOOLEAN MODE)", | |
210 | implode(',', $prefixedFieldNames), | |
211 | $strtolower(CRM_Core_DAO::escapeString($formattedQuery)) | |
212 | ); | |
213 | } | |
214 | else { | |
215 | //CRM_Core_Session::setStatus(ts('Cannot use FTS for %1 (%2)', array( | |
216 | // 1 => $table, | |
217 | // 2 => implode(', ', $fullTextFields), | |
218 | //))); | |
219 | ||
220 | $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter::LANG_SQL_LIKE); | |
221 | $escapedText = $strtolower(CRM_Core_DAO::escapeString($formattedQuery)); | |
222 | foreach ($columns as $fieldName) { | |
223 | $clauses[] = "$tableAlias.$fieldName LIKE '{$escapedText}'"; | |
224 | } | |
225 | } | |
226 | return implode(' OR ', $clauses); | |
227 | } | |
228 | ||
2e2605fe EM |
229 | /** |
230 | * Format Fts. | |
231 | * | |
232 | * @param string $text | |
233 | * @param $mode | |
234 | * | |
235 | * @return mixed | |
236 | */ | |
ea74069c TO |
237 | protected function _formatFts($text, $mode) { |
238 | $result = NULL; | |
239 | ||
240 | // normalize user-inputted wildcards | |
241 | $text = str_replace('%', '*', $text); | |
242 | ||
243 | if (empty($text)) { | |
3196b7a5 | 244 | $result = '*'; |
ea74069c TO |
245 | } |
246 | elseif (strpos($text, '*') !== FALSE) { | |
247 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
3196b7a5 | 248 | $result = $text; |
ea74069c TO |
249 | } |
250 | else { | |
251 | switch ($mode) { | |
252 | case self::MODE_NONE: | |
253 | $result = $text; | |
254 | break; | |
255 | ||
256 | case self::MODE_PHRASE: | |
257 | $result = '"' . $text . '"'; | |
258 | break; | |
259 | ||
260 | case self::MODE_WILDPHRASE: | |
261 | $result = '"*' . $text . '*"'; | |
262 | break; | |
263 | ||
264 | case self::MODE_WILDWORDS: | |
265 | $result = $this->mapWords($text, '*word*'); | |
266 | break; | |
267 | ||
268 | case self::MODE_WILDWORDS_SUFFIX: | |
269 | $result = $this->mapWords($text, 'word*'); | |
270 | break; | |
271 | ||
272 | default: | |
273 | $result = NULL; | |
274 | } | |
275 | } | |
276 | ||
277 | return $this->dedupeWildcards($result, '%'); | |
278 | } | |
279 | ||
2e2605fe EM |
280 | /** |
281 | * Format FTS. | |
282 | * | |
283 | * @param string $text | |
284 | * @param $mode | |
285 | * | |
286 | * @return mixed | |
287 | */ | |
3196b7a5 TO |
288 | protected function _formatFtsBool($text, $mode) { |
289 | $result = NULL; | |
408fe090 | 290 | $operators = array('+', '-', '~', '(', ')'); |
e0b51656 | 291 | $wildCards = array('@', '%', '*'); |
292 | $expression = preg_quote(implode('', array_merge($operators, $wildCards)), '/'); | |
408fe090 | 293 | |
294 | //Return if searched string ends with an unsupported operator. | |
e0b51656 | 295 | //Or if the string contains an invalid joint occurrence of operators. |
408fe090 | 296 | foreach ($operators as $val) { |
e0b51656 | 297 | if ($text == '@' || CRM_Utils_String::endsWith($text, $val) || preg_match("/[{$expression}]{2,}/", $text)) { |
408fe090 | 298 | $csid = CRM_Core_DAO::getFieldValue('CRM_Core_DAO_OptionValue', 'CRM_Contact_Form_Search_Custom_FullText', 'value', 'name'); |
299 | $url = CRM_Utils_System::url("civicrm/contact/search/custom", "csid={$csid}&reset=1"); | |
300 | $operators = implode("', '", $operators); | |
e0b51656 | 301 | CRM_Core_Error::statusBounce("Full-Text Search does not support the use of a search with two attached operators or string ending with any of these operators ('{$operators}' or a single '@'). Please adjust your search term and try again.", $url, 'Invalid Search String'); |
408fe090 | 302 | } |
303 | } | |
3196b7a5 TO |
304 | |
305 | // normalize user-inputted wildcards | |
306 | $text = str_replace('%', '*', $text); | |
307 | ||
308 | if (empty($text)) { | |
309 | $result = '*'; | |
310 | } | |
7bfcf18f TO |
311 | elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) { |
312 | // if user supplies their own include/exclude operators, use text as is (with trailing wildcard) | |
313 | $result = $this->mapWords($text, 'word*'); | |
314 | } | |
3196b7a5 TO |
315 | elseif (strpos($text, '*') !== FALSE) { |
316 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
317 | $result = $this->mapWords($text, '+word'); | |
318 | } | |
7bfcf18f TO |
319 | elseif (preg_match('/^(["\']).*\1$/m', $text)) { |
320 | // if surrounded by quotes, use term as is | |
321 | $result = $text; | |
322 | } | |
3196b7a5 TO |
323 | else { |
324 | switch ($mode) { | |
325 | case self::MODE_NONE: | |
408fe090 | 326 | $result = $this->mapWords($text, '+word', TRUE); |
3196b7a5 TO |
327 | break; |
328 | ||
329 | case self::MODE_PHRASE: | |
330 | $result = '+"' . $text . '"'; | |
331 | break; | |
332 | ||
333 | case self::MODE_WILDPHRASE: | |
334 | $result = '+"*' . $text . '*"'; | |
335 | break; | |
336 | ||
337 | case self::MODE_WILDWORDS: | |
338 | $result = $this->mapWords($text, '+*word*'); | |
339 | break; | |
340 | ||
341 | case self::MODE_WILDWORDS_SUFFIX: | |
342 | $result = $this->mapWords($text, '+word*'); | |
343 | break; | |
344 | ||
345 | default: | |
346 | $result = NULL; | |
347 | } | |
348 | } | |
349 | ||
350 | return $this->dedupeWildcards($result, '%'); | |
351 | } | |
352 | ||
2e2605fe EM |
353 | /** |
354 | * Format like. | |
355 | * | |
356 | * @param $text | |
357 | * @param $mode | |
358 | * | |
359 | * @return mixed | |
360 | */ | |
ea74069c TO |
361 | protected function _formatLike($text, $mode) { |
362 | $result = NULL; | |
363 | ||
364 | if (empty($text)) { | |
365 | $result = '%'; | |
366 | } | |
367 | elseif (strpos($text, '%') !== FALSE) { | |
368 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
369 | $result = $text; | |
370 | } | |
371 | else { | |
372 | switch ($mode) { | |
373 | case self::MODE_NONE: | |
374 | case self::MODE_PHRASE: | |
375 | case self::MODE_WILDPHRASE: | |
376 | $result = "%" . $text . "%"; | |
377 | break; | |
378 | ||
379 | case self::MODE_WILDWORDS: | |
380 | case self::MODE_WILDWORDS_SUFFIX: | |
381 | $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%'; | |
382 | break; | |
383 | ||
384 | default: | |
385 | $result = NULL; | |
386 | } | |
387 | } | |
388 | ||
389 | return $this->dedupeWildcards($result, '%'); | |
390 | } | |
391 | ||
392 | /** | |
77855840 TO |
393 | * @param string $text |
394 | * User-supplied query string. | |
395 | * @param string $template | |
396 | * A prototypical description of each word, eg "word%" or "word*" or "*word*". | |
408fe090 | 397 | * @param bool $quotes |
398 | * True if each searched keyword need to be surrounded with quotes. | |
ea74069c TO |
399 | * @return string |
400 | */ | |
408fe090 | 401 | protected function mapWords($text, $template, $quotes = FALSE) { |
ea74069c | 402 | $result = array(); |
408fe090 | 403 | foreach ($this->parseWords($text, $quotes) as $word) { |
ea74069c TO |
404 | $result[] = str_replace('word', $word, $template); |
405 | } | |
406 | return implode(' ', $result); | |
407 | } | |
408 | ||
409 | /** | |
410 | * @param $text | |
408fe090 | 411 | * @bool $quotes |
ea74069c TO |
412 | * @return array |
413 | */ | |
408fe090 | 414 | protected function parseWords($text, $quotes) { |
895e8529 TO |
415 | //NYSS 9692 special handling for emails |
416 | if (preg_match('/^([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})$/', $text)) { | |
417 | $parts = explode('@', $text); | |
418 | $parts[1] = stristr($parts[1], '.', TRUE); | |
419 | $text = implode(' ', $parts); | |
420 | } | |
895e8529 TO |
421 | |
422 | //NYSS also replace other occurrences of @ | |
408fe090 | 423 | $replacedText = preg_replace('/[ \r\n\t\@]+/', ' ', trim($text)); |
424 | //filter empty values if any | |
425 | $keywords = array_filter(explode(' ', $replacedText)); | |
426 | ||
427 | //Ensure each searched keywords are wrapped in double quotes. | |
428 | if ($quotes) { | |
429 | foreach ($keywords as &$val) { | |
430 | if (!is_numeric($val)) { | |
431 | $val = "\"{$val}\""; | |
432 | } | |
433 | } | |
434 | } | |
435 | return $keywords; | |
ea74069c TO |
436 | } |
437 | ||
438 | /** | |
439 | * @param $text | |
440 | * @param $wildcard | |
441 | * @return mixed | |
442 | */ | |
443 | protected function dedupeWildcards($text, $wildcard) { | |
444 | if ($text === NULL) { | |
445 | return NULL; | |
446 | } | |
447 | ||
448 | // don't use preg_replace because $wildcard might be special char | |
449 | while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) { | |
450 | $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text); | |
451 | } | |
452 | return $text; | |
453 | } | |
454 | ||
2e2605fe EM |
455 | /** |
456 | * Get modes. | |
457 | * | |
458 | * @return array | |
459 | */ | |
ea74069c TO |
460 | public static function getModes() { |
461 | return array( | |
462 | self::MODE_NONE, | |
463 | self::MODE_PHRASE, | |
464 | self::MODE_WILDPHRASE, | |
465 | self::MODE_WILDWORDS, | |
466 | self::MODE_WILDWORDS_SUFFIX, | |
467 | ); | |
468 | } | |
469 | ||
2e2605fe EM |
470 | /** |
471 | * Get languages. | |
472 | * | |
473 | * @return array | |
474 | */ | |
ea74069c TO |
475 | public static function getLanguages() { |
476 | return array( | |
477 | self::LANG_SOLR, | |
478 | self::LANG_SQL_FTS, | |
3196b7a5 | 479 | self::LANG_SQL_FTSBOOL, |
ea74069c TO |
480 | self::LANG_SQL_LIKE, |
481 | ); | |
482 | } | |
483 | ||
484 | /** | |
485 | * @param $text | |
486 | * | |
487 | * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");' | |
488 | */ | |
489 | public static function dumpExampleTable($text) { | |
490 | $width = strlen($text) + 8; | |
491 | $buf = ''; | |
492 | ||
493 | $buf .= sprintf("%-{$width}s", 'mode'); | |
494 | foreach (self::getLanguages() as $lang) { | |
495 | $buf .= sprintf("%-{$width}s", $lang); | |
496 | } | |
497 | $buf .= "\n"; | |
498 | ||
499 | foreach (self::getModes() as $mode) { | |
500 | $formatter = new CRM_Utils_QueryFormatter($mode); | |
501 | $buf .= sprintf("%-{$width}s", $mode); | |
502 | foreach (self::getLanguages() as $lang) { | |
503 | $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang)); | |
504 | } | |
505 | $buf .= "\n"; | |
506 | } | |
507 | ||
508 | echo $buf; | |
509 | } | |
96025800 | 510 | |
ef10e0b5 | 511 | } |