Merge pull request #5250 from jitendrapurohit/CRM-15934update
[civicrm-core.git] / CRM / Utils / QueryFormatter.php
CommitLineData
ea74069c 1<?php
ea74069c
TO
2/*
3 +--------------------------------------------------------------------+
39de6fd5 4 | CiviCRM version 4.6 |
ea74069c
TO
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2014 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
d25dd0ee 26 */
ea74069c
TO
27
28/**
29 * Class CRM_Utils_QueryFormatter
30 *
31 * This class is a bad idea. It exists for the unholy reason that a single installation
32 * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same
33 * query-text. It labors* to take the user's search expression and provide similar search
34 * semantics in different contexts. It is unknown whether this labor will be fruitful
35 * or in vain.
36 */
37class CRM_Utils_QueryFormatter {
38 const LANG_SQL_LIKE = 'like';
39 const LANG_SQL_FTS = 'fts';
3196b7a5 40 const LANG_SQL_FTSBOOL = 'ftsbool';
ea74069c
TO
41 const LANG_SOLR = 'solr';
42
43 /**
44 * Attempt to leave the text as-is.
45 */
46 const MODE_NONE = 'simple';
47
48 /**
49 * Attempt to treat the input text as a phrase
50 */
51 const MODE_PHRASE = 'phrase';
52
53 /**
54 * Attempt to treat the input text as a phrase with
55 * wildcards on each end.
56 */
57 const MODE_WILDPHRASE = 'wildphrase';
58
59 /**
60 * Attempt to treat individual word as if it
61 * had wildcards at the start and end.
62 */
63 const MODE_WILDWORDS = 'wildwords';
64
65 /**
66 * Attempt to treat individual word as if it
67 * had a wildcard at the end.
68 */
69 const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix';
70
71 static protected $singleton;
72
73 /**
74 * @param bool $fresh
75 * @return CRM_Utils_QueryFormatter
76 */
77 public static function singleton($fresh = FALSE) {
78 if ($fresh || self::$singleton === NULL) {
79 $mode = CRM_Core_BAO_Setting::getItem(CRM_Core_BAO_Setting::SEARCH_PREFERENCES_NAME, 'fts_query_mode', NULL, self::MODE_NONE);
80 self::$singleton = new CRM_Utils_QueryFormatter($mode);
81 }
82 return self::$singleton;
83 }
84
85 /**
86 * @var string eg MODE_NONE
87 */
88 protected $mode;
89
90 /**
77855840
TO
91 * @param string $mode
92 * Eg MODE_NONE.
ea74069c 93 */
00be9182 94 public function __construct($mode) {
ea74069c
TO
95 $this->mode = $mode;
96 }
97
98 /**
99 * @param mixed $mode
100 */
101 public function setMode($mode) {
102 $this->mode = $mode;
103 }
104
105 /**
106 * @return mixed
107 */
108 public function getMode() {
109 return $this->mode;
110 }
111
112 /**
113 * @param string $text
77855840
TO
114 * @param string $language
115 * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR.
ea74069c
TO
116 * @throws CRM_Core_Exception
117 * @return string
118 */
119 public function format($text, $language) {
120 $text = trim($text);
121
122 switch ($language) {
123 case self::LANG_SOLR:
124 case self::LANG_SQL_FTS:
125 $text = $this->_formatFts($text, $this->mode);
126 break;
e7292422 127
3196b7a5
TO
128 case self::LANG_SQL_FTSBOOL:
129 $text = $this->_formatFtsBool($text, $this->mode);
130 break;
e7292422 131
ea74069c
TO
132 case self::LANG_SQL_LIKE:
133 $text = $this->_formatLike($text, $this->mode);
134 break;
e7292422 135
ea74069c
TO
136 default:
137 $text = NULL;
138 }
139
140 if ($text === NULL) {
141 throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]");
142 }
143
144 return $text;
145 }
146
2e2605fe
EM
147 /**
148 * Format Fts.
149 *
150 * @param string $text
151 * @param $mode
152 *
153 * @return mixed
154 */
ea74069c
TO
155 protected function _formatFts($text, $mode) {
156 $result = NULL;
157
158 // normalize user-inputted wildcards
159 $text = str_replace('%', '*', $text);
160
161 if (empty($text)) {
3196b7a5 162 $result = '*';
ea74069c
TO
163 }
164 elseif (strpos($text, '*') !== FALSE) {
165 // if user supplies their own wildcards, then don't do any sophisticated changes
3196b7a5 166 $result = $text;
ea74069c
TO
167 }
168 else {
169 switch ($mode) {
170 case self::MODE_NONE:
171 $result = $text;
172 break;
173
174 case self::MODE_PHRASE:
175 $result = '"' . $text . '"';
176 break;
177
178 case self::MODE_WILDPHRASE:
179 $result = '"*' . $text . '*"';
180 break;
181
182 case self::MODE_WILDWORDS:
183 $result = $this->mapWords($text, '*word*');
184 break;
185
186 case self::MODE_WILDWORDS_SUFFIX:
187 $result = $this->mapWords($text, 'word*');
188 break;
189
190 default:
191 $result = NULL;
192 }
193 }
194
195 return $this->dedupeWildcards($result, '%');
196 }
197
2e2605fe
EM
198 /**
199 * Format FTS.
200 *
201 * @param string $text
202 * @param $mode
203 *
204 * @return mixed
205 */
3196b7a5
TO
206 protected function _formatFtsBool($text, $mode) {
207 $result = NULL;
208
209 // normalize user-inputted wildcards
210 $text = str_replace('%', '*', $text);
211
212 if (empty($text)) {
213 $result = '*';
214 }
7bfcf18f
TO
215 elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) {
216 // if user supplies their own include/exclude operators, use text as is (with trailing wildcard)
217 $result = $this->mapWords($text, 'word*');
218 }
3196b7a5
TO
219 elseif (strpos($text, '*') !== FALSE) {
220 // if user supplies their own wildcards, then don't do any sophisticated changes
221 $result = $this->mapWords($text, '+word');
222 }
7bfcf18f
TO
223 elseif (preg_match('/^(["\']).*\1$/m', $text)) {
224 // if surrounded by quotes, use term as is
225 $result = $text;
226 }
3196b7a5
TO
227 else {
228 switch ($mode) {
229 case self::MODE_NONE:
230 $result = $this->mapWords($text, '+word');
231 break;
232
233 case self::MODE_PHRASE:
234 $result = '+"' . $text . '"';
235 break;
236
237 case self::MODE_WILDPHRASE:
238 $result = '+"*' . $text . '*"';
239 break;
240
241 case self::MODE_WILDWORDS:
242 $result = $this->mapWords($text, '+*word*');
243 break;
244
245 case self::MODE_WILDWORDS_SUFFIX:
246 $result = $this->mapWords($text, '+word*');
247 break;
248
249 default:
250 $result = NULL;
251 }
252 }
253
254 return $this->dedupeWildcards($result, '%');
255 }
256
2e2605fe
EM
257 /**
258 * Format like.
259 *
260 * @param $text
261 * @param $mode
262 *
263 * @return mixed
264 */
ea74069c
TO
265 protected function _formatLike($text, $mode) {
266 $result = NULL;
267
268 if (empty($text)) {
269 $result = '%';
270 }
271 elseif (strpos($text, '%') !== FALSE) {
272 // if user supplies their own wildcards, then don't do any sophisticated changes
273 $result = $text;
274 }
275 else {
276 switch ($mode) {
277 case self::MODE_NONE:
278 case self::MODE_PHRASE:
279 case self::MODE_WILDPHRASE:
280 $result = "%" . $text . "%";
281 break;
282
283 case self::MODE_WILDWORDS:
284 case self::MODE_WILDWORDS_SUFFIX:
285 $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%';
286 break;
287
288 default:
289 $result = NULL;
290 }
291 }
292
293 return $this->dedupeWildcards($result, '%');
294 }
295
296 /**
77855840
TO
297 * @param string $text
298 * User-supplied query string.
299 * @param string $template
300 * A prototypical description of each word, eg "word%" or "word*" or "*word*".
ea74069c
TO
301 * @return string
302 */
303 protected function mapWords($text, $template) {
304 $result = array();
305 foreach ($this->parseWords($text) as $word) {
306 $result[] = str_replace('word', $word, $template);
307 }
308 return implode(' ', $result);
309 }
310
311 /**
312 * @param $text
313 * @return array
314 */
315 protected function parseWords($text) {
316 return explode(' ', preg_replace('/[ \r\n\t]+/', ' ', trim($text)));
317 }
318
319 /**
320 * @param $text
321 * @param $wildcard
322 * @return mixed
323 */
324 protected function dedupeWildcards($text, $wildcard) {
325 if ($text === NULL) {
326 return NULL;
327 }
328
329 // don't use preg_replace because $wildcard might be special char
330 while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) {
331 $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text);
332 }
333 return $text;
334 }
335
2e2605fe
EM
336 /**
337 * Get modes.
338 *
339 * @return array
340 */
ea74069c
TO
341 public static function getModes() {
342 return array(
343 self::MODE_NONE,
344 self::MODE_PHRASE,
345 self::MODE_WILDPHRASE,
346 self::MODE_WILDWORDS,
347 self::MODE_WILDWORDS_SUFFIX,
348 );
349 }
350
2e2605fe
EM
351 /**
352 * Get languages.
353 *
354 * @return array
355 */
ea74069c
TO
356 public static function getLanguages() {
357 return array(
358 self::LANG_SOLR,
359 self::LANG_SQL_FTS,
3196b7a5 360 self::LANG_SQL_FTSBOOL,
ea74069c
TO
361 self::LANG_SQL_LIKE,
362 );
363 }
364
365 /**
366 * @param $text
367 *
368 * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");'
369 */
370 public static function dumpExampleTable($text) {
371 $width = strlen($text) + 8;
372 $buf = '';
373
374 $buf .= sprintf("%-{$width}s", 'mode');
375 foreach (self::getLanguages() as $lang) {
376 $buf .= sprintf("%-{$width}s", $lang);
377 }
378 $buf .= "\n";
379
380 foreach (self::getModes() as $mode) {
381 $formatter = new CRM_Utils_QueryFormatter($mode);
382 $buf .= sprintf("%-{$width}s", $mode);
383 foreach (self::getLanguages() as $lang) {
384 $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang));
385 }
386 $buf .= "\n";
387 }
388
389 echo $buf;
390 }
96025800 391
ef10e0b5 392}