Commit | Line | Data |
---|---|---|
ea74069c | 1 | <?php |
ea74069c TO |
2 | /* |
3 | +--------------------------------------------------------------------+ | |
39de6fd5 | 4 | | CiviCRM version 4.6 | |
ea74069c TO |
5 | +--------------------------------------------------------------------+ |
6 | | Copyright CiviCRM LLC (c) 2004-2014 | | |
7 | +--------------------------------------------------------------------+ | |
8 | | This file is a part of CiviCRM. | | |
9 | | | | |
10 | | CiviCRM is free software; you can copy, modify, and distribute it | | |
11 | | under the terms of the GNU Affero General Public License | | |
12 | | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. | | |
13 | | | | |
14 | | CiviCRM is distributed in the hope that it will be useful, but | | |
15 | | WITHOUT ANY WARRANTY; without even the implied warranty of | | |
16 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | | |
17 | | See the GNU Affero General Public License for more details. | | |
18 | | | | |
19 | | You should have received a copy of the GNU Affero General Public | | |
20 | | License and the CiviCRM Licensing Exception along | | |
21 | | with this program; if not, contact CiviCRM LLC | | |
22 | | at info[AT]civicrm[DOT]org. If you have questions about the | | |
23 | | GNU Affero General Public License or the licensing of CiviCRM, | | |
24 | | see the CiviCRM license FAQ at http://civicrm.org/licensing | | |
25 | +--------------------------------------------------------------------+ | |
d25dd0ee | 26 | */ |
ea74069c TO |
27 | |
28 | /** | |
29 | * Class CRM_Utils_QueryFormatter | |
30 | * | |
31 | * This class is a bad idea. It exists for the unholy reason that a single installation | |
32 | * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same | |
33 | * query-text. It labors* to take the user's search expression and provide similar search | |
34 | * semantics in different contexts. It is unknown whether this labor will be fruitful | |
35 | * or in vain. | |
36 | */ | |
37 | class CRM_Utils_QueryFormatter { | |
38 | const LANG_SQL_LIKE = 'like'; | |
39 | const LANG_SQL_FTS = 'fts'; | |
3196b7a5 | 40 | const LANG_SQL_FTSBOOL = 'ftsbool'; |
ea74069c TO |
41 | const LANG_SOLR = 'solr'; |
42 | ||
43 | /** | |
44 | * Attempt to leave the text as-is. | |
45 | */ | |
46 | const MODE_NONE = 'simple'; | |
47 | ||
48 | /** | |
49 | * Attempt to treat the input text as a phrase | |
50 | */ | |
51 | const MODE_PHRASE = 'phrase'; | |
52 | ||
53 | /** | |
54 | * Attempt to treat the input text as a phrase with | |
55 | * wildcards on each end. | |
56 | */ | |
57 | const MODE_WILDPHRASE = 'wildphrase'; | |
58 | ||
59 | /** | |
60 | * Attempt to treat individual word as if it | |
61 | * had wildcards at the start and end. | |
62 | */ | |
63 | const MODE_WILDWORDS = 'wildwords'; | |
64 | ||
65 | /** | |
66 | * Attempt to treat individual word as if it | |
67 | * had a wildcard at the end. | |
68 | */ | |
69 | const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix'; | |
70 | ||
71 | static protected $singleton; | |
72 | ||
73 | /** | |
74 | * @param bool $fresh | |
75 | * @return CRM_Utils_QueryFormatter | |
76 | */ | |
77 | public static function singleton($fresh = FALSE) { | |
78 | if ($fresh || self::$singleton === NULL) { | |
79 | $mode = CRM_Core_BAO_Setting::getItem(CRM_Core_BAO_Setting::SEARCH_PREFERENCES_NAME, 'fts_query_mode', NULL, self::MODE_NONE); | |
80 | self::$singleton = new CRM_Utils_QueryFormatter($mode); | |
81 | } | |
82 | return self::$singleton; | |
83 | } | |
84 | ||
85 | /** | |
86 | * @var string eg MODE_NONE | |
87 | */ | |
88 | protected $mode; | |
89 | ||
90 | /** | |
77855840 TO |
91 | * @param string $mode |
92 | * Eg MODE_NONE. | |
ea74069c | 93 | */ |
00be9182 | 94 | public function __construct($mode) { |
ea74069c TO |
95 | $this->mode = $mode; |
96 | } | |
97 | ||
98 | /** | |
99 | * @param mixed $mode | |
100 | */ | |
101 | public function setMode($mode) { | |
102 | $this->mode = $mode; | |
103 | } | |
104 | ||
105 | /** | |
106 | * @return mixed | |
107 | */ | |
108 | public function getMode() { | |
109 | return $this->mode; | |
110 | } | |
111 | ||
112 | /** | |
113 | * @param string $text | |
77855840 TO |
114 | * @param string $language |
115 | * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR. | |
ea74069c TO |
116 | * @throws CRM_Core_Exception |
117 | * @return string | |
118 | */ | |
119 | public function format($text, $language) { | |
120 | $text = trim($text); | |
121 | ||
122 | switch ($language) { | |
123 | case self::LANG_SOLR: | |
124 | case self::LANG_SQL_FTS: | |
125 | $text = $this->_formatFts($text, $this->mode); | |
126 | break; | |
e7292422 | 127 | |
3196b7a5 TO |
128 | case self::LANG_SQL_FTSBOOL: |
129 | $text = $this->_formatFtsBool($text, $this->mode); | |
130 | break; | |
e7292422 | 131 | |
ea74069c TO |
132 | case self::LANG_SQL_LIKE: |
133 | $text = $this->_formatLike($text, $this->mode); | |
134 | break; | |
e7292422 | 135 | |
ea74069c TO |
136 | default: |
137 | $text = NULL; | |
138 | } | |
139 | ||
140 | if ($text === NULL) { | |
141 | throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]"); | |
142 | } | |
143 | ||
144 | return $text; | |
145 | } | |
146 | ||
2e2605fe EM |
147 | /** |
148 | * Format Fts. | |
149 | * | |
150 | * @param string $text | |
151 | * @param $mode | |
152 | * | |
153 | * @return mixed | |
154 | */ | |
ea74069c TO |
155 | protected function _formatFts($text, $mode) { |
156 | $result = NULL; | |
157 | ||
158 | // normalize user-inputted wildcards | |
159 | $text = str_replace('%', '*', $text); | |
160 | ||
161 | if (empty($text)) { | |
3196b7a5 | 162 | $result = '*'; |
ea74069c TO |
163 | } |
164 | elseif (strpos($text, '*') !== FALSE) { | |
165 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
3196b7a5 | 166 | $result = $text; |
ea74069c TO |
167 | } |
168 | else { | |
169 | switch ($mode) { | |
170 | case self::MODE_NONE: | |
171 | $result = $text; | |
172 | break; | |
173 | ||
174 | case self::MODE_PHRASE: | |
175 | $result = '"' . $text . '"'; | |
176 | break; | |
177 | ||
178 | case self::MODE_WILDPHRASE: | |
179 | $result = '"*' . $text . '*"'; | |
180 | break; | |
181 | ||
182 | case self::MODE_WILDWORDS: | |
183 | $result = $this->mapWords($text, '*word*'); | |
184 | break; | |
185 | ||
186 | case self::MODE_WILDWORDS_SUFFIX: | |
187 | $result = $this->mapWords($text, 'word*'); | |
188 | break; | |
189 | ||
190 | default: | |
191 | $result = NULL; | |
192 | } | |
193 | } | |
194 | ||
195 | return $this->dedupeWildcards($result, '%'); | |
196 | } | |
197 | ||
2e2605fe EM |
198 | /** |
199 | * Format FTS. | |
200 | * | |
201 | * @param string $text | |
202 | * @param $mode | |
203 | * | |
204 | * @return mixed | |
205 | */ | |
3196b7a5 TO |
206 | protected function _formatFtsBool($text, $mode) { |
207 | $result = NULL; | |
208 | ||
209 | // normalize user-inputted wildcards | |
210 | $text = str_replace('%', '*', $text); | |
211 | ||
212 | if (empty($text)) { | |
213 | $result = '*'; | |
214 | } | |
7bfcf18f TO |
215 | elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) { |
216 | // if user supplies their own include/exclude operators, use text as is (with trailing wildcard) | |
217 | $result = $this->mapWords($text, 'word*'); | |
218 | } | |
3196b7a5 TO |
219 | elseif (strpos($text, '*') !== FALSE) { |
220 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
221 | $result = $this->mapWords($text, '+word'); | |
222 | } | |
7bfcf18f TO |
223 | elseif (preg_match('/^(["\']).*\1$/m', $text)) { |
224 | // if surrounded by quotes, use term as is | |
225 | $result = $text; | |
226 | } | |
3196b7a5 TO |
227 | else { |
228 | switch ($mode) { | |
229 | case self::MODE_NONE: | |
230 | $result = $this->mapWords($text, '+word'); | |
231 | break; | |
232 | ||
233 | case self::MODE_PHRASE: | |
234 | $result = '+"' . $text . '"'; | |
235 | break; | |
236 | ||
237 | case self::MODE_WILDPHRASE: | |
238 | $result = '+"*' . $text . '*"'; | |
239 | break; | |
240 | ||
241 | case self::MODE_WILDWORDS: | |
242 | $result = $this->mapWords($text, '+*word*'); | |
243 | break; | |
244 | ||
245 | case self::MODE_WILDWORDS_SUFFIX: | |
246 | $result = $this->mapWords($text, '+word*'); | |
247 | break; | |
248 | ||
249 | default: | |
250 | $result = NULL; | |
251 | } | |
252 | } | |
253 | ||
254 | return $this->dedupeWildcards($result, '%'); | |
255 | } | |
256 | ||
2e2605fe EM |
257 | /** |
258 | * Format like. | |
259 | * | |
260 | * @param $text | |
261 | * @param $mode | |
262 | * | |
263 | * @return mixed | |
264 | */ | |
ea74069c TO |
265 | protected function _formatLike($text, $mode) { |
266 | $result = NULL; | |
267 | ||
268 | if (empty($text)) { | |
269 | $result = '%'; | |
270 | } | |
271 | elseif (strpos($text, '%') !== FALSE) { | |
272 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
273 | $result = $text; | |
274 | } | |
275 | else { | |
276 | switch ($mode) { | |
277 | case self::MODE_NONE: | |
278 | case self::MODE_PHRASE: | |
279 | case self::MODE_WILDPHRASE: | |
280 | $result = "%" . $text . "%"; | |
281 | break; | |
282 | ||
283 | case self::MODE_WILDWORDS: | |
284 | case self::MODE_WILDWORDS_SUFFIX: | |
285 | $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%'; | |
286 | break; | |
287 | ||
288 | default: | |
289 | $result = NULL; | |
290 | } | |
291 | } | |
292 | ||
293 | return $this->dedupeWildcards($result, '%'); | |
294 | } | |
295 | ||
296 | /** | |
77855840 TO |
297 | * @param string $text |
298 | * User-supplied query string. | |
299 | * @param string $template | |
300 | * A prototypical description of each word, eg "word%" or "word*" or "*word*". | |
ea74069c TO |
301 | * @return string |
302 | */ | |
303 | protected function mapWords($text, $template) { | |
304 | $result = array(); | |
305 | foreach ($this->parseWords($text) as $word) { | |
306 | $result[] = str_replace('word', $word, $template); | |
307 | } | |
308 | return implode(' ', $result); | |
309 | } | |
310 | ||
311 | /** | |
312 | * @param $text | |
313 | * @return array | |
314 | */ | |
315 | protected function parseWords($text) { | |
316 | return explode(' ', preg_replace('/[ \r\n\t]+/', ' ', trim($text))); | |
317 | } | |
318 | ||
319 | /** | |
320 | * @param $text | |
321 | * @param $wildcard | |
322 | * @return mixed | |
323 | */ | |
324 | protected function dedupeWildcards($text, $wildcard) { | |
325 | if ($text === NULL) { | |
326 | return NULL; | |
327 | } | |
328 | ||
329 | // don't use preg_replace because $wildcard might be special char | |
330 | while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) { | |
331 | $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text); | |
332 | } | |
333 | return $text; | |
334 | } | |
335 | ||
2e2605fe EM |
336 | /** |
337 | * Get modes. | |
338 | * | |
339 | * @return array | |
340 | */ | |
ea74069c TO |
341 | public static function getModes() { |
342 | return array( | |
343 | self::MODE_NONE, | |
344 | self::MODE_PHRASE, | |
345 | self::MODE_WILDPHRASE, | |
346 | self::MODE_WILDWORDS, | |
347 | self::MODE_WILDWORDS_SUFFIX, | |
348 | ); | |
349 | } | |
350 | ||
2e2605fe EM |
351 | /** |
352 | * Get languages. | |
353 | * | |
354 | * @return array | |
355 | */ | |
ea74069c TO |
356 | public static function getLanguages() { |
357 | return array( | |
358 | self::LANG_SOLR, | |
359 | self::LANG_SQL_FTS, | |
3196b7a5 | 360 | self::LANG_SQL_FTSBOOL, |
ea74069c TO |
361 | self::LANG_SQL_LIKE, |
362 | ); | |
363 | } | |
364 | ||
365 | /** | |
366 | * @param $text | |
367 | * | |
368 | * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");' | |
369 | */ | |
370 | public static function dumpExampleTable($text) { | |
371 | $width = strlen($text) + 8; | |
372 | $buf = ''; | |
373 | ||
374 | $buf .= sprintf("%-{$width}s", 'mode'); | |
375 | foreach (self::getLanguages() as $lang) { | |
376 | $buf .= sprintf("%-{$width}s", $lang); | |
377 | } | |
378 | $buf .= "\n"; | |
379 | ||
380 | foreach (self::getModes() as $mode) { | |
381 | $formatter = new CRM_Utils_QueryFormatter($mode); | |
382 | $buf .= sprintf("%-{$width}s", $mode); | |
383 | foreach (self::getLanguages() as $lang) { | |
384 | $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang)); | |
385 | } | |
386 | $buf .= "\n"; | |
387 | } | |
388 | ||
389 | echo $buf; | |
390 | } | |
96025800 | 391 | |
ef10e0b5 | 392 | } |