Commit | Line | Data |
---|---|---|
ea74069c TO |
1 | <?php |
2 | ||
3 | /* | |
4 | +--------------------------------------------------------------------+ | |
39de6fd5 | 5 | | CiviCRM version 4.6 | |
ea74069c TO |
6 | +--------------------------------------------------------------------+ |
7 | | Copyright CiviCRM LLC (c) 2004-2014 | | |
8 | +--------------------------------------------------------------------+ | |
9 | | This file is a part of CiviCRM. | | |
10 | | | | |
11 | | CiviCRM is free software; you can copy, modify, and distribute it | | |
12 | | under the terms of the GNU Affero General Public License | | |
13 | | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. | | |
14 | | | | |
15 | | CiviCRM is distributed in the hope that it will be useful, but | | |
16 | | WITHOUT ANY WARRANTY; without even the implied warranty of | | |
17 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | | |
18 | | See the GNU Affero General Public License for more details. | | |
19 | | | | |
20 | | You should have received a copy of the GNU Affero General Public | | |
21 | | License and the CiviCRM Licensing Exception along | | |
22 | | with this program; if not, contact CiviCRM LLC | | |
23 | | at info[AT]civicrm[DOT]org. If you have questions about the | | |
24 | | GNU Affero General Public License or the licensing of CiviCRM, | | |
25 | | see the CiviCRM license FAQ at http://civicrm.org/licensing | | |
26 | +--------------------------------------------------------------------+ | |
27 | */ | |
28 | ||
29 | /** | |
30 | * Class CRM_Utils_QueryFormatter | |
31 | * | |
32 | * This class is a bad idea. It exists for the unholy reason that a single installation | |
33 | * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same | |
34 | * query-text. It labors* to take the user's search expression and provide similar search | |
35 | * semantics in different contexts. It is unknown whether this labor will be fruitful | |
36 | * or in vain. | |
37 | */ | |
38 | class CRM_Utils_QueryFormatter { | |
39 | const LANG_SQL_LIKE = 'like'; | |
40 | const LANG_SQL_FTS = 'fts'; | |
3196b7a5 | 41 | const LANG_SQL_FTSBOOL = 'ftsbool'; |
ea74069c TO |
42 | const LANG_SOLR = 'solr'; |
43 | ||
44 | /** | |
45 | * Attempt to leave the text as-is. | |
46 | */ | |
47 | const MODE_NONE = 'simple'; | |
48 | ||
49 | /** | |
50 | * Attempt to treat the input text as a phrase | |
51 | */ | |
52 | const MODE_PHRASE = 'phrase'; | |
53 | ||
54 | /** | |
55 | * Attempt to treat the input text as a phrase with | |
56 | * wildcards on each end. | |
57 | */ | |
58 | const MODE_WILDPHRASE = 'wildphrase'; | |
59 | ||
60 | /** | |
61 | * Attempt to treat individual word as if it | |
62 | * had wildcards at the start and end. | |
63 | */ | |
64 | const MODE_WILDWORDS = 'wildwords'; | |
65 | ||
66 | /** | |
67 | * Attempt to treat individual word as if it | |
68 | * had a wildcard at the end. | |
69 | */ | |
70 | const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix'; | |
71 | ||
72 | static protected $singleton; | |
73 | ||
74 | /** | |
75 | * @param bool $fresh | |
76 | * @return CRM_Utils_QueryFormatter | |
77 | */ | |
78 | public static function singleton($fresh = FALSE) { | |
79 | if ($fresh || self::$singleton === NULL) { | |
80 | $mode = CRM_Core_BAO_Setting::getItem(CRM_Core_BAO_Setting::SEARCH_PREFERENCES_NAME, 'fts_query_mode', NULL, self::MODE_NONE); | |
81 | self::$singleton = new CRM_Utils_QueryFormatter($mode); | |
82 | } | |
83 | return self::$singleton; | |
84 | } | |
85 | ||
86 | /** | |
87 | * @var string eg MODE_NONE | |
88 | */ | |
89 | protected $mode; | |
90 | ||
91 | /** | |
77855840 TO |
92 | * @param string $mode |
93 | * Eg MODE_NONE. | |
ea74069c | 94 | */ |
00be9182 | 95 | public function __construct($mode) { |
ea74069c TO |
96 | $this->mode = $mode; |
97 | } | |
98 | ||
99 | /** | |
100 | * @param mixed $mode | |
101 | */ | |
102 | public function setMode($mode) { | |
103 | $this->mode = $mode; | |
104 | } | |
105 | ||
106 | /** | |
107 | * @return mixed | |
108 | */ | |
109 | public function getMode() { | |
110 | return $this->mode; | |
111 | } | |
112 | ||
113 | /** | |
114 | * @param string $text | |
77855840 TO |
115 | * @param string $language |
116 | * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR. | |
ea74069c TO |
117 | * @throws CRM_Core_Exception |
118 | * @return string | |
119 | */ | |
120 | public function format($text, $language) { | |
121 | $text = trim($text); | |
122 | ||
123 | switch ($language) { | |
124 | case self::LANG_SOLR: | |
125 | case self::LANG_SQL_FTS: | |
126 | $text = $this->_formatFts($text, $this->mode); | |
127 | break; | |
e7292422 | 128 | |
3196b7a5 TO |
129 | case self::LANG_SQL_FTSBOOL: |
130 | $text = $this->_formatFtsBool($text, $this->mode); | |
131 | break; | |
e7292422 | 132 | |
ea74069c TO |
133 | case self::LANG_SQL_LIKE: |
134 | $text = $this->_formatLike($text, $this->mode); | |
135 | break; | |
e7292422 | 136 | |
ea74069c TO |
137 | default: |
138 | $text = NULL; | |
139 | } | |
140 | ||
141 | if ($text === NULL) { | |
142 | throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]"); | |
143 | } | |
144 | ||
145 | return $text; | |
146 | } | |
147 | ||
148 | protected function _formatFts($text, $mode) { | |
149 | $result = NULL; | |
150 | ||
151 | // normalize user-inputted wildcards | |
152 | $text = str_replace('%', '*', $text); | |
153 | ||
154 | if (empty($text)) { | |
3196b7a5 | 155 | $result = '*'; |
ea74069c TO |
156 | } |
157 | elseif (strpos($text, '*') !== FALSE) { | |
158 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
3196b7a5 | 159 | $result = $text; |
ea74069c TO |
160 | } |
161 | else { | |
162 | switch ($mode) { | |
163 | case self::MODE_NONE: | |
164 | $result = $text; | |
165 | break; | |
166 | ||
167 | case self::MODE_PHRASE: | |
168 | $result = '"' . $text . '"'; | |
169 | break; | |
170 | ||
171 | case self::MODE_WILDPHRASE: | |
172 | $result = '"*' . $text . '*"'; | |
173 | break; | |
174 | ||
175 | case self::MODE_WILDWORDS: | |
176 | $result = $this->mapWords($text, '*word*'); | |
177 | break; | |
178 | ||
179 | case self::MODE_WILDWORDS_SUFFIX: | |
180 | $result = $this->mapWords($text, 'word*'); | |
181 | break; | |
182 | ||
183 | default: | |
184 | $result = NULL; | |
185 | } | |
186 | } | |
187 | ||
188 | return $this->dedupeWildcards($result, '%'); | |
189 | } | |
190 | ||
3196b7a5 TO |
191 | protected function _formatFtsBool($text, $mode) { |
192 | $result = NULL; | |
193 | ||
194 | // normalize user-inputted wildcards | |
195 | $text = str_replace('%', '*', $text); | |
196 | ||
197 | if (empty($text)) { | |
198 | $result = '*'; | |
199 | } | |
7bfcf18f TO |
200 | elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) { |
201 | // if user supplies their own include/exclude operators, use text as is (with trailing wildcard) | |
202 | $result = $this->mapWords($text, 'word*'); | |
203 | } | |
3196b7a5 TO |
204 | elseif (strpos($text, '*') !== FALSE) { |
205 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
206 | $result = $this->mapWords($text, '+word'); | |
207 | } | |
7bfcf18f TO |
208 | elseif (preg_match('/^(["\']).*\1$/m', $text)) { |
209 | // if surrounded by quotes, use term as is | |
210 | $result = $text; | |
211 | } | |
3196b7a5 TO |
212 | else { |
213 | switch ($mode) { | |
214 | case self::MODE_NONE: | |
215 | $result = $this->mapWords($text, '+word'); | |
216 | break; | |
217 | ||
218 | case self::MODE_PHRASE: | |
219 | $result = '+"' . $text . '"'; | |
220 | break; | |
221 | ||
222 | case self::MODE_WILDPHRASE: | |
223 | $result = '+"*' . $text . '*"'; | |
224 | break; | |
225 | ||
226 | case self::MODE_WILDWORDS: | |
227 | $result = $this->mapWords($text, '+*word*'); | |
228 | break; | |
229 | ||
230 | case self::MODE_WILDWORDS_SUFFIX: | |
231 | $result = $this->mapWords($text, '+word*'); | |
232 | break; | |
233 | ||
234 | default: | |
235 | $result = NULL; | |
236 | } | |
237 | } | |
238 | ||
239 | return $this->dedupeWildcards($result, '%'); | |
240 | } | |
241 | ||
ea74069c TO |
242 | protected function _formatLike($text, $mode) { |
243 | $result = NULL; | |
244 | ||
245 | if (empty($text)) { | |
246 | $result = '%'; | |
247 | } | |
248 | elseif (strpos($text, '%') !== FALSE) { | |
249 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
250 | $result = $text; | |
251 | } | |
252 | else { | |
253 | switch ($mode) { | |
254 | case self::MODE_NONE: | |
255 | case self::MODE_PHRASE: | |
256 | case self::MODE_WILDPHRASE: | |
257 | $result = "%" . $text . "%"; | |
258 | break; | |
259 | ||
260 | case self::MODE_WILDWORDS: | |
261 | case self::MODE_WILDWORDS_SUFFIX: | |
262 | $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%'; | |
263 | break; | |
264 | ||
265 | default: | |
266 | $result = NULL; | |
267 | } | |
268 | } | |
269 | ||
270 | return $this->dedupeWildcards($result, '%'); | |
271 | } | |
272 | ||
273 | /** | |
77855840 TO |
274 | * @param string $text |
275 | * User-supplied query string. | |
276 | * @param string $template | |
277 | * A prototypical description of each word, eg "word%" or "word*" or "*word*". | |
ea74069c TO |
278 | * @return string |
279 | */ | |
280 | protected function mapWords($text, $template) { | |
281 | $result = array(); | |
282 | foreach ($this->parseWords($text) as $word) { | |
283 | $result[] = str_replace('word', $word, $template); | |
284 | } | |
285 | return implode(' ', $result); | |
286 | } | |
287 | ||
288 | /** | |
289 | * @param $text | |
290 | * @return array | |
291 | */ | |
292 | protected function parseWords($text) { | |
293 | return explode(' ', preg_replace('/[ \r\n\t]+/', ' ', trim($text))); | |
294 | } | |
295 | ||
296 | /** | |
297 | * @param $text | |
298 | * @param $wildcard | |
299 | * @return mixed | |
300 | */ | |
301 | protected function dedupeWildcards($text, $wildcard) { | |
302 | if ($text === NULL) { | |
303 | return NULL; | |
304 | } | |
305 | ||
306 | // don't use preg_replace because $wildcard might be special char | |
307 | while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) { | |
308 | $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text); | |
309 | } | |
310 | return $text; | |
311 | } | |
312 | ||
313 | public static function getModes() { | |
314 | return array( | |
315 | self::MODE_NONE, | |
316 | self::MODE_PHRASE, | |
317 | self::MODE_WILDPHRASE, | |
318 | self::MODE_WILDWORDS, | |
319 | self::MODE_WILDWORDS_SUFFIX, | |
320 | ); | |
321 | } | |
322 | ||
323 | public static function getLanguages() { | |
324 | return array( | |
325 | self::LANG_SOLR, | |
326 | self::LANG_SQL_FTS, | |
3196b7a5 | 327 | self::LANG_SQL_FTSBOOL, |
ea74069c TO |
328 | self::LANG_SQL_LIKE, |
329 | ); | |
330 | } | |
331 | ||
332 | /** | |
333 | * @param $text | |
334 | * | |
335 | * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");' | |
336 | */ | |
337 | public static function dumpExampleTable($text) { | |
338 | $width = strlen($text) + 8; | |
339 | $buf = ''; | |
340 | ||
341 | $buf .= sprintf("%-{$width}s", 'mode'); | |
342 | foreach (self::getLanguages() as $lang) { | |
343 | $buf .= sprintf("%-{$width}s", $lang); | |
344 | } | |
345 | $buf .= "\n"; | |
346 | ||
347 | foreach (self::getModes() as $mode) { | |
348 | $formatter = new CRM_Utils_QueryFormatter($mode); | |
349 | $buf .= sprintf("%-{$width}s", $mode); | |
350 | foreach (self::getLanguages() as $lang) { | |
351 | $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang)); | |
352 | } | |
353 | $buf .= "\n"; | |
354 | } | |
355 | ||
356 | echo $buf; | |
357 | } | |
ef10e0b5 | 358 | } |