Commit | Line | Data |
---|---|---|
ea74069c | 1 | <?php |
ea74069c TO |
2 | /* |
3 | +--------------------------------------------------------------------+ | |
7e9e8871 | 4 | | CiviCRM version 4.7 | |
ea74069c | 5 | +--------------------------------------------------------------------+ |
e7112fa7 | 6 | | Copyright CiviCRM LLC (c) 2004-2015 | |
ea74069c TO |
7 | +--------------------------------------------------------------------+ |
8 | | This file is a part of CiviCRM. | | |
9 | | | | |
10 | | CiviCRM is free software; you can copy, modify, and distribute it | | |
11 | | under the terms of the GNU Affero General Public License | | |
12 | | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. | | |
13 | | | | |
14 | | CiviCRM is distributed in the hope that it will be useful, but | | |
15 | | WITHOUT ANY WARRANTY; without even the implied warranty of | | |
16 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | | |
17 | | See the GNU Affero General Public License for more details. | | |
18 | | | | |
19 | | You should have received a copy of the GNU Affero General Public | | |
20 | | License and the CiviCRM Licensing Exception along | | |
21 | | with this program; if not, contact CiviCRM LLC | | |
22 | | at info[AT]civicrm[DOT]org. If you have questions about the | | |
23 | | GNU Affero General Public License or the licensing of CiviCRM, | | |
24 | | see the CiviCRM license FAQ at http://civicrm.org/licensing | | |
25 | +--------------------------------------------------------------------+ | |
d25dd0ee | 26 | */ |
ea74069c | 27 | |
50bfb460 | 28 | /** |
5a84c50e | 29 | * @package CRM |
30 | * @copyright CiviCRM LLC (c) 2004-2015 | |
31 | */ | |
50bfb460 | 32 | |
ea74069c TO |
33 | /** |
34 | * Class CRM_Utils_QueryFormatter | |
35 | * | |
36 | * This class is a bad idea. It exists for the unholy reason that a single installation | |
37 | * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same | |
38 | * query-text. It labors* to take the user's search expression and provide similar search | |
39 | * semantics in different contexts. It is unknown whether this labor will be fruitful | |
40 | * or in vain. | |
41 | */ | |
42 | class CRM_Utils_QueryFormatter { | |
43 | const LANG_SQL_LIKE = 'like'; | |
44 | const LANG_SQL_FTS = 'fts'; | |
3196b7a5 | 45 | const LANG_SQL_FTSBOOL = 'ftsbool'; |
ea74069c TO |
46 | const LANG_SOLR = 'solr'; |
47 | ||
48 | /** | |
49 | * Attempt to leave the text as-is. | |
50 | */ | |
51 | const MODE_NONE = 'simple'; | |
52 | ||
53 | /** | |
54 | * Attempt to treat the input text as a phrase | |
55 | */ | |
56 | const MODE_PHRASE = 'phrase'; | |
57 | ||
58 | /** | |
59 | * Attempt to treat the input text as a phrase with | |
60 | * wildcards on each end. | |
61 | */ | |
62 | const MODE_WILDPHRASE = 'wildphrase'; | |
63 | ||
64 | /** | |
65 | * Attempt to treat individual word as if it | |
66 | * had wildcards at the start and end. | |
67 | */ | |
68 | const MODE_WILDWORDS = 'wildwords'; | |
69 | ||
70 | /** | |
71 | * Attempt to treat individual word as if it | |
72 | * had a wildcard at the end. | |
73 | */ | |
74 | const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix'; | |
75 | ||
76 | static protected $singleton; | |
77 | ||
78 | /** | |
79 | * @param bool $fresh | |
80 | * @return CRM_Utils_QueryFormatter | |
81 | */ | |
82 | public static function singleton($fresh = FALSE) { | |
83 | if ($fresh || self::$singleton === NULL) { | |
aaffa79f | 84 | $mode = Civi::settings()->get('fts_query_mode'); |
ea74069c TO |
85 | self::$singleton = new CRM_Utils_QueryFormatter($mode); |
86 | } | |
87 | return self::$singleton; | |
88 | } | |
89 | ||
90 | /** | |
aaffa79f | 91 | * @var string |
50bfb460 | 92 | * eg MODE_NONE |
ea74069c TO |
93 | */ |
94 | protected $mode; | |
95 | ||
96 | /** | |
77855840 TO |
97 | * @param string $mode |
98 | * Eg MODE_NONE. | |
ea74069c | 99 | */ |
00be9182 | 100 | public function __construct($mode) { |
ea74069c TO |
101 | $this->mode = $mode; |
102 | } | |
103 | ||
104 | /** | |
105 | * @param mixed $mode | |
106 | */ | |
107 | public function setMode($mode) { | |
108 | $this->mode = $mode; | |
109 | } | |
110 | ||
111 | /** | |
112 | * @return mixed | |
113 | */ | |
114 | public function getMode() { | |
115 | return $this->mode; | |
116 | } | |
117 | ||
118 | /** | |
119 | * @param string $text | |
77855840 TO |
120 | * @param string $language |
121 | * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR. | |
ea74069c TO |
122 | * @throws CRM_Core_Exception |
123 | * @return string | |
124 | */ | |
125 | public function format($text, $language) { | |
126 | $text = trim($text); | |
127 | ||
128 | switch ($language) { | |
129 | case self::LANG_SOLR: | |
130 | case self::LANG_SQL_FTS: | |
131 | $text = $this->_formatFts($text, $this->mode); | |
132 | break; | |
e7292422 | 133 | |
3196b7a5 TO |
134 | case self::LANG_SQL_FTSBOOL: |
135 | $text = $this->_formatFtsBool($text, $this->mode); | |
136 | break; | |
e7292422 | 137 | |
ea74069c TO |
138 | case self::LANG_SQL_LIKE: |
139 | $text = $this->_formatLike($text, $this->mode); | |
140 | break; | |
e7292422 | 141 | |
ea74069c TO |
142 | default: |
143 | $text = NULL; | |
144 | } | |
145 | ||
146 | if ($text === NULL) { | |
147 | throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]"); | |
148 | } | |
149 | ||
150 | return $text; | |
151 | } | |
152 | ||
2e2605fe EM |
153 | /** |
154 | * Format Fts. | |
155 | * | |
156 | * @param string $text | |
157 | * @param $mode | |
158 | * | |
159 | * @return mixed | |
160 | */ | |
ea74069c TO |
161 | protected function _formatFts($text, $mode) { |
162 | $result = NULL; | |
163 | ||
164 | // normalize user-inputted wildcards | |
165 | $text = str_replace('%', '*', $text); | |
166 | ||
167 | if (empty($text)) { | |
3196b7a5 | 168 | $result = '*'; |
ea74069c TO |
169 | } |
170 | elseif (strpos($text, '*') !== FALSE) { | |
171 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
3196b7a5 | 172 | $result = $text; |
ea74069c TO |
173 | } |
174 | else { | |
175 | switch ($mode) { | |
176 | case self::MODE_NONE: | |
177 | $result = $text; | |
178 | break; | |
179 | ||
180 | case self::MODE_PHRASE: | |
181 | $result = '"' . $text . '"'; | |
182 | break; | |
183 | ||
184 | case self::MODE_WILDPHRASE: | |
185 | $result = '"*' . $text . '*"'; | |
186 | break; | |
187 | ||
188 | case self::MODE_WILDWORDS: | |
189 | $result = $this->mapWords($text, '*word*'); | |
190 | break; | |
191 | ||
192 | case self::MODE_WILDWORDS_SUFFIX: | |
193 | $result = $this->mapWords($text, 'word*'); | |
194 | break; | |
195 | ||
196 | default: | |
197 | $result = NULL; | |
198 | } | |
199 | } | |
200 | ||
201 | return $this->dedupeWildcards($result, '%'); | |
202 | } | |
203 | ||
2e2605fe EM |
204 | /** |
205 | * Format FTS. | |
206 | * | |
207 | * @param string $text | |
208 | * @param $mode | |
209 | * | |
210 | * @return mixed | |
211 | */ | |
3196b7a5 TO |
212 | protected function _formatFtsBool($text, $mode) { |
213 | $result = NULL; | |
214 | ||
215 | // normalize user-inputted wildcards | |
216 | $text = str_replace('%', '*', $text); | |
217 | ||
218 | if (empty($text)) { | |
219 | $result = '*'; | |
220 | } | |
7bfcf18f TO |
221 | elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) { |
222 | // if user supplies their own include/exclude operators, use text as is (with trailing wildcard) | |
223 | $result = $this->mapWords($text, 'word*'); | |
224 | } | |
3196b7a5 TO |
225 | elseif (strpos($text, '*') !== FALSE) { |
226 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
227 | $result = $this->mapWords($text, '+word'); | |
228 | } | |
7bfcf18f TO |
229 | elseif (preg_match('/^(["\']).*\1$/m', $text)) { |
230 | // if surrounded by quotes, use term as is | |
231 | $result = $text; | |
232 | } | |
3196b7a5 TO |
233 | else { |
234 | switch ($mode) { | |
235 | case self::MODE_NONE: | |
236 | $result = $this->mapWords($text, '+word'); | |
237 | break; | |
238 | ||
239 | case self::MODE_PHRASE: | |
240 | $result = '+"' . $text . '"'; | |
241 | break; | |
242 | ||
243 | case self::MODE_WILDPHRASE: | |
244 | $result = '+"*' . $text . '*"'; | |
245 | break; | |
246 | ||
247 | case self::MODE_WILDWORDS: | |
248 | $result = $this->mapWords($text, '+*word*'); | |
249 | break; | |
250 | ||
251 | case self::MODE_WILDWORDS_SUFFIX: | |
252 | $result = $this->mapWords($text, '+word*'); | |
253 | break; | |
254 | ||
255 | default: | |
256 | $result = NULL; | |
257 | } | |
258 | } | |
259 | ||
260 | return $this->dedupeWildcards($result, '%'); | |
261 | } | |
262 | ||
2e2605fe EM |
263 | /** |
264 | * Format like. | |
265 | * | |
266 | * @param $text | |
267 | * @param $mode | |
268 | * | |
269 | * @return mixed | |
270 | */ | |
ea74069c TO |
271 | protected function _formatLike($text, $mode) { |
272 | $result = NULL; | |
273 | ||
274 | if (empty($text)) { | |
275 | $result = '%'; | |
276 | } | |
277 | elseif (strpos($text, '%') !== FALSE) { | |
278 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
279 | $result = $text; | |
280 | } | |
281 | else { | |
282 | switch ($mode) { | |
283 | case self::MODE_NONE: | |
284 | case self::MODE_PHRASE: | |
285 | case self::MODE_WILDPHRASE: | |
286 | $result = "%" . $text . "%"; | |
287 | break; | |
288 | ||
289 | case self::MODE_WILDWORDS: | |
290 | case self::MODE_WILDWORDS_SUFFIX: | |
291 | $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%'; | |
292 | break; | |
293 | ||
294 | default: | |
295 | $result = NULL; | |
296 | } | |
297 | } | |
298 | ||
299 | return $this->dedupeWildcards($result, '%'); | |
300 | } | |
301 | ||
302 | /** | |
77855840 TO |
303 | * @param string $text |
304 | * User-supplied query string. | |
305 | * @param string $template | |
306 | * A prototypical description of each word, eg "word%" or "word*" or "*word*". | |
ea74069c TO |
307 | * @return string |
308 | */ | |
309 | protected function mapWords($text, $template) { | |
310 | $result = array(); | |
311 | foreach ($this->parseWords($text) as $word) { | |
312 | $result[] = str_replace('word', $word, $template); | |
313 | } | |
314 | return implode(' ', $result); | |
315 | } | |
316 | ||
317 | /** | |
318 | * @param $text | |
319 | * @return array | |
320 | */ | |
321 | protected function parseWords($text) { | |
322 | return explode(' ', preg_replace('/[ \r\n\t]+/', ' ', trim($text))); | |
323 | } | |
324 | ||
325 | /** | |
326 | * @param $text | |
327 | * @param $wildcard | |
328 | * @return mixed | |
329 | */ | |
330 | protected function dedupeWildcards($text, $wildcard) { | |
331 | if ($text === NULL) { | |
332 | return NULL; | |
333 | } | |
334 | ||
335 | // don't use preg_replace because $wildcard might be special char | |
336 | while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) { | |
337 | $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text); | |
338 | } | |
339 | return $text; | |
340 | } | |
341 | ||
2e2605fe EM |
342 | /** |
343 | * Get modes. | |
344 | * | |
345 | * @return array | |
346 | */ | |
ea74069c TO |
347 | public static function getModes() { |
348 | return array( | |
349 | self::MODE_NONE, | |
350 | self::MODE_PHRASE, | |
351 | self::MODE_WILDPHRASE, | |
352 | self::MODE_WILDWORDS, | |
353 | self::MODE_WILDWORDS_SUFFIX, | |
354 | ); | |
355 | } | |
356 | ||
2e2605fe EM |
357 | /** |
358 | * Get languages. | |
359 | * | |
360 | * @return array | |
361 | */ | |
ea74069c TO |
362 | public static function getLanguages() { |
363 | return array( | |
364 | self::LANG_SOLR, | |
365 | self::LANG_SQL_FTS, | |
3196b7a5 | 366 | self::LANG_SQL_FTSBOOL, |
ea74069c TO |
367 | self::LANG_SQL_LIKE, |
368 | ); | |
369 | } | |
370 | ||
371 | /** | |
372 | * @param $text | |
373 | * | |
374 | * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");' | |
375 | */ | |
376 | public static function dumpExampleTable($text) { | |
377 | $width = strlen($text) + 8; | |
378 | $buf = ''; | |
379 | ||
380 | $buf .= sprintf("%-{$width}s", 'mode'); | |
381 | foreach (self::getLanguages() as $lang) { | |
382 | $buf .= sprintf("%-{$width}s", $lang); | |
383 | } | |
384 | $buf .= "\n"; | |
385 | ||
386 | foreach (self::getModes() as $mode) { | |
387 | $formatter = new CRM_Utils_QueryFormatter($mode); | |
388 | $buf .= sprintf("%-{$width}s", $mode); | |
389 | foreach (self::getLanguages() as $lang) { | |
390 | $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang)); | |
391 | } | |
392 | $buf .= "\n"; | |
393 | } | |
394 | ||
395 | echo $buf; | |
396 | } | |
96025800 | 397 | |
ef10e0b5 | 398 | } |