Merge pull request #6987 from xurizaemon/CRM-17212
[civicrm-core.git] / CRM / Utils / QueryFormatter.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.7 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2015 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 * @package CRM
30 * @copyright CiviCRM LLC (c) 2004-2015
31 */
32
33 /**
34 * Class CRM_Utils_QueryFormatter
35 *
36 * This class is a bad idea. It exists for the unholy reason that a single installation
37 * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same
38 * query-text. It labors* to take the user's search expression and provide similar search
39 * semantics in different contexts. It is unknown whether this labor will be fruitful
40 * or in vain.
41 */
42 class CRM_Utils_QueryFormatter {
43 const LANG_SQL_LIKE = 'like';
44 const LANG_SQL_FTS = 'fts';
45 const LANG_SQL_FTSBOOL = 'ftsbool';
46 const LANG_SOLR = 'solr';
47
48 /**
49 * Attempt to leave the text as-is.
50 */
51 const MODE_NONE = 'simple';
52
53 /**
54 * Attempt to treat the input text as a phrase
55 */
56 const MODE_PHRASE = 'phrase';
57
58 /**
59 * Attempt to treat the input text as a phrase with
60 * wildcards on each end.
61 */
62 const MODE_WILDPHRASE = 'wildphrase';
63
64 /**
65 * Attempt to treat individual word as if it
66 * had wildcards at the start and end.
67 */
68 const MODE_WILDWORDS = 'wildwords';
69
70 /**
71 * Attempt to treat individual word as if it
72 * had a wildcard at the end.
73 */
74 const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix';
75
76 static protected $singleton;
77
78 /**
79 * @param bool $fresh
80 * @return CRM_Utils_QueryFormatter
81 */
82 public static function singleton($fresh = FALSE) {
83 if ($fresh || self::$singleton === NULL) {
84 $mode = CRM_Core_BAO_Setting::getItem(CRM_Core_BAO_Setting::SEARCH_PREFERENCES_NAME, 'fts_query_mode', NULL, self::MODE_NONE);
85 self::$singleton = new CRM_Utils_QueryFormatter($mode);
86 }
87 return self::$singleton;
88 }
89
90 /**
91 * @var string
92 * eg MODE_NONE
93 */
94 protected $mode;
95
96 /**
97 * @param string $mode
98 * Eg MODE_NONE.
99 */
100 public function __construct($mode) {
101 $this->mode = $mode;
102 }
103
104 /**
105 * @param mixed $mode
106 */
107 public function setMode($mode) {
108 $this->mode = $mode;
109 }
110
111 /**
112 * @return mixed
113 */
114 public function getMode() {
115 return $this->mode;
116 }
117
118 /**
119 * @param string $text
120 * @param string $language
121 * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR.
122 * @throws CRM_Core_Exception
123 * @return string
124 */
125 public function format($text, $language) {
126 $text = trim($text);
127
128 switch ($language) {
129 case self::LANG_SOLR:
130 case self::LANG_SQL_FTS:
131 $text = $this->_formatFts($text, $this->mode);
132 break;
133
134 case self::LANG_SQL_FTSBOOL:
135 $text = $this->_formatFtsBool($text, $this->mode);
136 break;
137
138 case self::LANG_SQL_LIKE:
139 $text = $this->_formatLike($text, $this->mode);
140 break;
141
142 default:
143 $text = NULL;
144 }
145
146 if ($text === NULL) {
147 throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]");
148 }
149
150 return $text;
151 }
152
153 /**
154 * Format Fts.
155 *
156 * @param string $text
157 * @param $mode
158 *
159 * @return mixed
160 */
161 protected function _formatFts($text, $mode) {
162 $result = NULL;
163
164 // normalize user-inputted wildcards
165 $text = str_replace('%', '*', $text);
166
167 if (empty($text)) {
168 $result = '*';
169 }
170 elseif (strpos($text, '*') !== FALSE) {
171 // if user supplies their own wildcards, then don't do any sophisticated changes
172 $result = $text;
173 }
174 else {
175 switch ($mode) {
176 case self::MODE_NONE:
177 $result = $text;
178 break;
179
180 case self::MODE_PHRASE:
181 $result = '"' . $text . '"';
182 break;
183
184 case self::MODE_WILDPHRASE:
185 $result = '"*' . $text . '*"';
186 break;
187
188 case self::MODE_WILDWORDS:
189 $result = $this->mapWords($text, '*word*');
190 break;
191
192 case self::MODE_WILDWORDS_SUFFIX:
193 $result = $this->mapWords($text, 'word*');
194 break;
195
196 default:
197 $result = NULL;
198 }
199 }
200
201 return $this->dedupeWildcards($result, '%');
202 }
203
204 /**
205 * Format FTS.
206 *
207 * @param string $text
208 * @param $mode
209 *
210 * @return mixed
211 */
212 protected function _formatFtsBool($text, $mode) {
213 $result = NULL;
214
215 // normalize user-inputted wildcards
216 $text = str_replace('%', '*', $text);
217
218 if (empty($text)) {
219 $result = '*';
220 }
221 elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) {
222 // if user supplies their own include/exclude operators, use text as is (with trailing wildcard)
223 $result = $this->mapWords($text, 'word*');
224 }
225 elseif (strpos($text, '*') !== FALSE) {
226 // if user supplies their own wildcards, then don't do any sophisticated changes
227 $result = $this->mapWords($text, '+word');
228 }
229 elseif (preg_match('/^(["\']).*\1$/m', $text)) {
230 // if surrounded by quotes, use term as is
231 $result = $text;
232 }
233 else {
234 switch ($mode) {
235 case self::MODE_NONE:
236 $result = $this->mapWords($text, '+word');
237 break;
238
239 case self::MODE_PHRASE:
240 $result = '+"' . $text . '"';
241 break;
242
243 case self::MODE_WILDPHRASE:
244 $result = '+"*' . $text . '*"';
245 break;
246
247 case self::MODE_WILDWORDS:
248 $result = $this->mapWords($text, '+*word*');
249 break;
250
251 case self::MODE_WILDWORDS_SUFFIX:
252 $result = $this->mapWords($text, '+word*');
253 break;
254
255 default:
256 $result = NULL;
257 }
258 }
259
260 return $this->dedupeWildcards($result, '%');
261 }
262
263 /**
264 * Format like.
265 *
266 * @param $text
267 * @param $mode
268 *
269 * @return mixed
270 */
271 protected function _formatLike($text, $mode) {
272 $result = NULL;
273
274 if (empty($text)) {
275 $result = '%';
276 }
277 elseif (strpos($text, '%') !== FALSE) {
278 // if user supplies their own wildcards, then don't do any sophisticated changes
279 $result = $text;
280 }
281 else {
282 switch ($mode) {
283 case self::MODE_NONE:
284 case self::MODE_PHRASE:
285 case self::MODE_WILDPHRASE:
286 $result = "%" . $text . "%";
287 break;
288
289 case self::MODE_WILDWORDS:
290 case self::MODE_WILDWORDS_SUFFIX:
291 $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%';
292 break;
293
294 default:
295 $result = NULL;
296 }
297 }
298
299 return $this->dedupeWildcards($result, '%');
300 }
301
302 /**
303 * @param string $text
304 * User-supplied query string.
305 * @param string $template
306 * A prototypical description of each word, eg "word%" or "word*" or "*word*".
307 * @return string
308 */
309 protected function mapWords($text, $template) {
310 $result = array();
311 foreach ($this->parseWords($text) as $word) {
312 $result[] = str_replace('word', $word, $template);
313 }
314 return implode(' ', $result);
315 }
316
317 /**
318 * @param $text
319 * @return array
320 */
321 protected function parseWords($text) {
322 return explode(' ', preg_replace('/[ \r\n\t]+/', ' ', trim($text)));
323 }
324
325 /**
326 * @param $text
327 * @param $wildcard
328 * @return mixed
329 */
330 protected function dedupeWildcards($text, $wildcard) {
331 if ($text === NULL) {
332 return NULL;
333 }
334
335 // don't use preg_replace because $wildcard might be special char
336 while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) {
337 $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text);
338 }
339 return $text;
340 }
341
342 /**
343 * Get modes.
344 *
345 * @return array
346 */
347 public static function getModes() {
348 return array(
349 self::MODE_NONE,
350 self::MODE_PHRASE,
351 self::MODE_WILDPHRASE,
352 self::MODE_WILDWORDS,
353 self::MODE_WILDWORDS_SUFFIX,
354 );
355 }
356
357 /**
358 * Get languages.
359 *
360 * @return array
361 */
362 public static function getLanguages() {
363 return array(
364 self::LANG_SOLR,
365 self::LANG_SQL_FTS,
366 self::LANG_SQL_FTSBOOL,
367 self::LANG_SQL_LIKE,
368 );
369 }
370
371 /**
372 * @param $text
373 *
374 * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");'
375 */
376 public static function dumpExampleTable($text) {
377 $width = strlen($text) + 8;
378 $buf = '';
379
380 $buf .= sprintf("%-{$width}s", 'mode');
381 foreach (self::getLanguages() as $lang) {
382 $buf .= sprintf("%-{$width}s", $lang);
383 }
384 $buf .= "\n";
385
386 foreach (self::getModes() as $mode) {
387 $formatter = new CRM_Utils_QueryFormatter($mode);
388 $buf .= sprintf("%-{$width}s", $mode);
389 foreach (self::getLanguages() as $lang) {
390 $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang));
391 }
392 $buf .= "\n";
393 }
394
395 echo $buf;
396 }
397
398 }