3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
14 * @copyright CiviCRM LLC https://civicrm.org/licensing
18 * Class CRM_Utils_QueryFormatter
20 * This class is a bad idea. It exists for the unholy reason that a single installation
21 * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same
22 * query-text. It labors* to take the user's search expression and provide similar search
23 * semantics in different contexts. It is unknown whether this labor will be fruitful
26 class CRM_Utils_QueryFormatter
{
28 * Generate queries using SQL LIKE expressions.
30 const LANG_SQL_LIKE
= 'like';
33 * Generate queries using MySQL FTS expressions.
35 const LANG_SQL_FTS
= 'fts';
38 * Generate queries using MySQL's boolean FTS expressions.
40 const LANG_SQL_FTSBOOL
= 'ftsbool';
43 * Generate queries using Solr expressions.
45 const LANG_SOLR
= 'solr';
48 * Attempt to leave the text as-is.
50 const MODE_NONE
= 'simple';
53 * Attempt to treat the input text as a phrase
55 const MODE_PHRASE
= 'phrase';
58 * Attempt to treat the input text as a phrase with
59 * wildcards on each end.
61 const MODE_WILDPHRASE
= 'wildphrase';
64 * Attempt to treat individual word as if it
65 * had wildcards at the start and end.
67 const MODE_WILDWORDS
= 'wildwords';
70 * Attempt to treat individual word as if it
71 * had a wildcard at the end.
73 const MODE_WILDWORDS_SUFFIX
= 'wildwords-suffix';
78 * @var \CRM_Utils_QueryFormatter|null
80 static protected $singleton;
84 * @return CRM_Utils_QueryFormatter
86 public static function singleton($fresh = FALSE) {
87 if ($fresh || self
::$singleton === NULL) {
88 $mode = Civi
::settings()->get('fts_query_mode');
89 self
::$singleton = new CRM_Utils_QueryFormatter($mode);
91 return self
::$singleton;
101 * @param string $mode
104 public function __construct($mode) {
111 public function setMode($mode) {
118 public function getMode() {
123 * @param string $text
124 * @param string $language
125 * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR.
126 * @throws CRM_Core_Exception
129 public function format($text, $language) {
133 case self
::LANG_SOLR
:
134 case self
::LANG_SQL_FTS
:
135 $text = $this->_formatFts($text, $this->mode
);
138 case self
::LANG_SQL_FTSBOOL
:
139 $text = $this->_formatFtsBool($text, $this->mode
);
142 case self
::LANG_SQL_LIKE
:
143 $text = $this->_formatLike($text, $this->mode
);
150 if ($text === NULL) {
151 throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]");
158 * Create a SQL WHERE expression for matching against a list of
161 * @param string $table
162 * Eg "civicrm_note" or "civicrm_note mynote".
163 * @param array|string $columns
164 * List of columns to search against.
165 * Eg "first_name" or "activity_details".
166 * @param string $queryText
168 * SQL, eg "MATCH (col1) AGAINST (queryText)" or "col1 LIKE '%queryText%'"
170 public function formatSql($table, $columns, $queryText) {
171 if ($queryText === '*' ||
$queryText === '%' ||
empty($queryText)) {
175 $strtolower = function_exists('mb_strtolower') ?
'mb_strtolower' : 'strtolower';
177 if (strpos($table, ' ') === FALSE) {
178 $tableName = $tableAlias = $table;
181 list ($tableName, $tableAlias) = explode(' ', $table);
183 if (is_scalar($columns)) {
184 $columns = [$columns];
188 if (CRM_Core_InnoDBIndexer
::singleton()
189 ->hasDeclaredIndex($tableName, $columns)
191 $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter
::LANG_SQL_FTSBOOL
);
193 $prefixedFieldNames = [];
194 foreach ($columns as $fieldName) {
195 $prefixedFieldNames[] = "$tableAlias.$fieldName";
198 $clauses[] = sprintf("MATCH (%s) AGAINST ('%s' IN BOOLEAN MODE)",
199 implode(',', $prefixedFieldNames),
200 $strtolower(CRM_Core_DAO
::escapeString($formattedQuery))
204 //CRM_Core_Session::setStatus(ts('Cannot use FTS for %1 (%2)', array(
206 // 2 => implode(', ', $fullTextFields),
209 $formattedQuery = $this->format($queryText, CRM_Utils_QueryFormatter
::LANG_SQL_LIKE
);
210 $escapedText = $strtolower(CRM_Core_DAO
::escapeString($formattedQuery));
211 foreach ($columns as $fieldName) {
212 $clauses[] = "$tableAlias.$fieldName LIKE '{$escapedText}'";
215 return implode(' OR ', $clauses);
221 * @param string $text
222 * @param string $mode
226 protected function _formatFts($text, $mode) {
229 // normalize user-inputted wildcards
230 $text = str_replace('%', '*', $text);
235 elseif (strpos($text, '*') !== FALSE) {
236 // if user supplies their own wildcards, then don't do any sophisticated changes
241 case self
::MODE_NONE
:
245 case self
::MODE_PHRASE
:
246 $result = '"' . $text . '"';
249 case self
::MODE_WILDPHRASE
:
250 $result = '"*' . $text . '*"';
253 case self
::MODE_WILDWORDS
:
254 $result = $this->mapWords($text, '*word*');
257 case self
::MODE_WILDWORDS_SUFFIX
:
258 $result = $this->mapWords($text, 'word*');
266 return $this->dedupeWildcards($result, '%');
272 * @param string $text
273 * @param string $mode
277 protected function _formatFtsBool($text, $mode) {
279 $operators = ['+', '-', '~', '(', ')'];
280 $wildCards = ['@', '%', '*'];
281 $expression = preg_quote(implode('', array_merge($operators, $wildCards)), '/');
283 //Return if searched string ends with an unsupported operator.
284 //Or if the string contains an invalid joint occurrence of operators.
285 foreach ($operators as $val) {
286 if ($text == '@' || CRM_Utils_String
::endsWith($text, $val) ||
preg_match("/[{$expression}]{2,}/", $text)) {
287 $csid = CRM_Core_DAO
::getFieldValue('CRM_Core_DAO_OptionValue', 'CRM_Contact_Form_Search_Custom_FullText', 'value', 'name');
288 $url = CRM_Utils_System
::url("civicrm/contact/search/custom", "csid={$csid}&reset=1");
289 $operators = implode("', '", $operators);
290 CRM_Core_Error
::statusBounce("Full-Text Search does not support the use of a search with two attached operators or string ending with any of these operators ('{$operators}' or a single '@'). Please adjust your search term and try again.", $url, 'Invalid Search String');
294 // normalize user-inputted wildcards
295 $text = str_replace('%', '*', $text);
300 elseif (strpos($text, '+') !== FALSE ||
strpos($text, '-') !== FALSE) {
301 // if user supplies their own include/exclude operators, use text as is (with trailing wildcard)
302 $result = $this->mapWords($text, 'word*');
304 elseif (strpos($text, '*') !== FALSE) {
305 // if user supplies their own wildcards, then don't do any sophisticated changes
306 $result = $this->mapWords($text, '+word');
308 elseif (preg_match('/^(["\']).*\1$/m', $text)) {
309 // if surrounded by quotes, use term as is
314 case self
::MODE_NONE
:
315 $result = $this->mapWords($text, '+word', TRUE);
318 case self
::MODE_PHRASE
:
319 $result = '+"' . $text . '"';
322 case self
::MODE_WILDPHRASE
:
323 $result = '+"*' . $text . '*"';
326 case self
::MODE_WILDWORDS
:
327 $result = $this->mapWords($text, '+*word*');
330 case self
::MODE_WILDWORDS_SUFFIX
:
331 $result = $this->mapWords($text, '+word*');
339 return $this->dedupeWildcards($result, '%');
345 * @param string $text
346 * @param string $mode
350 protected function _formatLike($text, $mode) {
356 elseif (strpos($text, '%') !== FALSE) {
357 // if user supplies their own wildcards, then don't do any sophisticated changes
362 case self
::MODE_NONE
:
363 case self
::MODE_PHRASE
:
364 case self
::MODE_WILDPHRASE
:
365 $result = "%" . $text . "%";
368 case self
::MODE_WILDWORDS
:
369 case self
::MODE_WILDWORDS_SUFFIX
:
370 $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%';
378 return $this->dedupeWildcards($result, '%');
382 * @param string $text
383 * User-supplied query string.
384 * @param string $template
385 * A prototypical description of each word, eg "word%" or "word*" or "*word*".
386 * @param bool $quotes
387 * True if each searched keyword need to be surrounded with quotes.
390 protected function mapWords($text, $template, $quotes = FALSE) {
392 foreach ($this->parseWords($text, $quotes) as $word) {
393 $result[] = str_replace('word', $word, $template);
395 return implode(' ', $result);
399 * @param string $text
400 * @param bool $quotes
403 protected function parseWords($text, $quotes) {
404 //NYSS 9692 special handling for emails
405 if (preg_match('/^([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})$/', $text)) {
406 $parts = explode('@', $text);
407 $parts[1] = stristr($parts[1], '.', TRUE);
408 $text = implode(' ', $parts);
411 //NYSS also replace other occurrences of @
412 $replacedText = preg_replace('/[ \r\n\t\@]+/', ' ', trim($text));
413 //filter empty values if any
414 $keywords = array_filter(explode(' ', $replacedText));
416 //Ensure each searched keywords are wrapped in double quotes.
418 foreach ($keywords as &$val) {
419 if (!is_numeric($val)) {
428 * @param string|null $text
429 * @param string $wildcard
430 * @return string|null
432 protected function dedupeWildcards($text, $wildcard) {
433 if ($text === NULL) {
437 // don't use preg_replace because $wildcard might be special char
438 while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) {
439 $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text);
449 public static function getModes() {
453 self
::MODE_WILDPHRASE
,
454 self
::MODE_WILDWORDS
,
455 self
::MODE_WILDWORDS_SUFFIX
,
464 public static function getLanguages() {
468 self
::LANG_SQL_FTSBOOL
,
474 * @param string $text
476 * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");'
478 public static function dumpExampleTable($text) {
479 $width = strlen($text) +
8;
482 $buf .= sprintf("%-{$width}s", 'mode');
483 foreach (self
::getLanguages() as $lang) {
484 $buf .= sprintf("%-{$width}s", $lang);
488 foreach (self
::getModes() as $mode) {
489 $formatter = new CRM_Utils_QueryFormatter($mode);
490 $buf .= sprintf("%-{$width}s", $mode);
491 foreach (self
::getLanguages() as $lang) {
492 $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang));