Commit | Line | Data |
---|---|---|
ea74069c TO |
1 | <?php |
2 | ||
3 | /* | |
4 | +--------------------------------------------------------------------+ | |
5 | | CiviCRM version 4.5 | | |
6 | +--------------------------------------------------------------------+ | |
7 | | Copyright CiviCRM LLC (c) 2004-2014 | | |
8 | +--------------------------------------------------------------------+ | |
9 | | This file is a part of CiviCRM. | | |
10 | | | | |
11 | | CiviCRM is free software; you can copy, modify, and distribute it | | |
12 | | under the terms of the GNU Affero General Public License | | |
13 | | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. | | |
14 | | | | |
15 | | CiviCRM is distributed in the hope that it will be useful, but | | |
16 | | WITHOUT ANY WARRANTY; without even the implied warranty of | | |
17 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | | |
18 | | See the GNU Affero General Public License for more details. | | |
19 | | | | |
20 | | You should have received a copy of the GNU Affero General Public | | |
21 | | License and the CiviCRM Licensing Exception along | | |
22 | | with this program; if not, contact CiviCRM LLC | | |
23 | | at info[AT]civicrm[DOT]org. If you have questions about the | | |
24 | | GNU Affero General Public License or the licensing of CiviCRM, | | |
25 | | see the CiviCRM license FAQ at http://civicrm.org/licensing | | |
26 | +--------------------------------------------------------------------+ | |
27 | */ | |
28 | ||
29 | /** | |
30 | * Class CRM_Utils_QueryFormatter | |
31 | * | |
32 | * This class is a bad idea. It exists for the unholy reason that a single installation | |
33 | * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same | |
34 | * query-text. It labors* to take the user's search expression and provide similar search | |
35 | * semantics in different contexts. It is unknown whether this labor will be fruitful | |
36 | * or in vain. | |
37 | */ | |
38 | class CRM_Utils_QueryFormatter { | |
39 | const LANG_SQL_LIKE = 'like'; | |
40 | const LANG_SQL_FTS = 'fts'; | |
3196b7a5 | 41 | const LANG_SQL_FTSBOOL = 'ftsbool'; |
ea74069c TO |
42 | const LANG_SOLR = 'solr'; |
43 | ||
44 | /** | |
45 | * Attempt to leave the text as-is. | |
46 | */ | |
47 | const MODE_NONE = 'simple'; | |
48 | ||
49 | /** | |
50 | * Attempt to treat the input text as a phrase | |
51 | */ | |
52 | const MODE_PHRASE = 'phrase'; | |
53 | ||
54 | /** | |
55 | * Attempt to treat the input text as a phrase with | |
56 | * wildcards on each end. | |
57 | */ | |
58 | const MODE_WILDPHRASE = 'wildphrase'; | |
59 | ||
60 | /** | |
61 | * Attempt to treat individual word as if it | |
62 | * had wildcards at the start and end. | |
63 | */ | |
64 | const MODE_WILDWORDS = 'wildwords'; | |
65 | ||
66 | /** | |
67 | * Attempt to treat individual word as if it | |
68 | * had a wildcard at the end. | |
69 | */ | |
70 | const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix'; | |
71 | ||
72 | static protected $singleton; | |
73 | ||
74 | /** | |
75 | * @param bool $fresh | |
76 | * @return CRM_Utils_QueryFormatter | |
77 | */ | |
78 | public static function singleton($fresh = FALSE) { | |
79 | if ($fresh || self::$singleton === NULL) { | |
80 | $mode = CRM_Core_BAO_Setting::getItem(CRM_Core_BAO_Setting::SEARCH_PREFERENCES_NAME, 'fts_query_mode', NULL, self::MODE_NONE); | |
81 | self::$singleton = new CRM_Utils_QueryFormatter($mode); | |
82 | } | |
83 | return self::$singleton; | |
84 | } | |
85 | ||
86 | /** | |
87 | * @var string eg MODE_NONE | |
88 | */ | |
89 | protected $mode; | |
90 | ||
91 | /** | |
92 | * @param string $mode eg MODE_NONE | |
93 | */ | |
94 | function __construct($mode) { | |
95 | $this->mode = $mode; | |
96 | } | |
97 | ||
98 | /** | |
99 | * @param mixed $mode | |
100 | */ | |
101 | public function setMode($mode) { | |
102 | $this->mode = $mode; | |
103 | } | |
104 | ||
105 | /** | |
106 | * @return mixed | |
107 | */ | |
108 | public function getMode() { | |
109 | return $this->mode; | |
110 | } | |
111 | ||
112 | /** | |
113 | * @param string $text | |
114 | * @param string $language eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR | |
115 | * @throws CRM_Core_Exception | |
116 | * @return string | |
117 | */ | |
118 | public function format($text, $language) { | |
119 | $text = trim($text); | |
120 | ||
121 | switch ($language) { | |
122 | case self::LANG_SOLR: | |
123 | case self::LANG_SQL_FTS: | |
124 | $text = $this->_formatFts($text, $this->mode); | |
125 | break; | |
3196b7a5 TO |
126 | case self::LANG_SQL_FTSBOOL: |
127 | $text = $this->_formatFtsBool($text, $this->mode); | |
128 | break; | |
ea74069c TO |
129 | case self::LANG_SQL_LIKE: |
130 | $text = $this->_formatLike($text, $this->mode); | |
131 | break; | |
132 | default: | |
133 | $text = NULL; | |
134 | } | |
135 | ||
136 | if ($text === NULL) { | |
137 | throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]"); | |
138 | } | |
139 | ||
140 | return $text; | |
141 | } | |
142 | ||
143 | protected function _formatFts($text, $mode) { | |
144 | $result = NULL; | |
145 | ||
146 | // normalize user-inputted wildcards | |
147 | $text = str_replace('%', '*', $text); | |
148 | ||
149 | if (empty($text)) { | |
3196b7a5 | 150 | $result = '*'; |
ea74069c TO |
151 | } |
152 | elseif (strpos($text, '*') !== FALSE) { | |
153 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
3196b7a5 | 154 | $result = $text; |
ea74069c TO |
155 | } |
156 | else { | |
157 | switch ($mode) { | |
158 | case self::MODE_NONE: | |
159 | $result = $text; | |
160 | break; | |
161 | ||
162 | case self::MODE_PHRASE: | |
163 | $result = '"' . $text . '"'; | |
164 | break; | |
165 | ||
166 | case self::MODE_WILDPHRASE: | |
167 | $result = '"*' . $text . '*"'; | |
168 | break; | |
169 | ||
170 | case self::MODE_WILDWORDS: | |
171 | $result = $this->mapWords($text, '*word*'); | |
172 | break; | |
173 | ||
174 | case self::MODE_WILDWORDS_SUFFIX: | |
175 | $result = $this->mapWords($text, 'word*'); | |
176 | break; | |
177 | ||
178 | default: | |
179 | $result = NULL; | |
180 | } | |
181 | } | |
182 | ||
183 | return $this->dedupeWildcards($result, '%'); | |
184 | } | |
185 | ||
3196b7a5 TO |
186 | protected function _formatFtsBool($text, $mode) { |
187 | $result = NULL; | |
188 | ||
189 | // normalize user-inputted wildcards | |
190 | $text = str_replace('%', '*', $text); | |
191 | ||
192 | if (empty($text)) { | |
193 | $result = '*'; | |
194 | } | |
7bfcf18f TO |
195 | elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) { |
196 | // if user supplies their own include/exclude operators, use text as is (with trailing wildcard) | |
197 | $result = $this->mapWords($text, 'word*'); | |
198 | } | |
3196b7a5 TO |
199 | elseif (strpos($text, '*') !== FALSE) { |
200 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
201 | $result = $this->mapWords($text, '+word'); | |
202 | } | |
7bfcf18f TO |
203 | elseif (preg_match('/^(["\']).*\1$/m', $text)) { |
204 | // if surrounded by quotes, use term as is | |
205 | $result = $text; | |
206 | } | |
3196b7a5 TO |
207 | else { |
208 | switch ($mode) { | |
209 | case self::MODE_NONE: | |
210 | $result = $this->mapWords($text, '+word'); | |
211 | break; | |
212 | ||
213 | case self::MODE_PHRASE: | |
214 | $result = '+"' . $text . '"'; | |
215 | break; | |
216 | ||
217 | case self::MODE_WILDPHRASE: | |
218 | $result = '+"*' . $text . '*"'; | |
219 | break; | |
220 | ||
221 | case self::MODE_WILDWORDS: | |
222 | $result = $this->mapWords($text, '+*word*'); | |
223 | break; | |
224 | ||
225 | case self::MODE_WILDWORDS_SUFFIX: | |
226 | $result = $this->mapWords($text, '+word*'); | |
227 | break; | |
228 | ||
229 | default: | |
230 | $result = NULL; | |
231 | } | |
232 | } | |
233 | ||
234 | return $this->dedupeWildcards($result, '%'); | |
235 | } | |
236 | ||
ea74069c TO |
237 | protected function _formatLike($text, $mode) { |
238 | $result = NULL; | |
239 | ||
240 | if (empty($text)) { | |
241 | $result = '%'; | |
242 | } | |
243 | elseif (strpos($text, '%') !== FALSE) { | |
244 | // if user supplies their own wildcards, then don't do any sophisticated changes | |
245 | $result = $text; | |
246 | } | |
247 | else { | |
248 | switch ($mode) { | |
249 | case self::MODE_NONE: | |
250 | case self::MODE_PHRASE: | |
251 | case self::MODE_WILDPHRASE: | |
252 | $result = "%" . $text . "%"; | |
253 | break; | |
254 | ||
255 | case self::MODE_WILDWORDS: | |
256 | case self::MODE_WILDWORDS_SUFFIX: | |
257 | $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%'; | |
258 | break; | |
259 | ||
260 | default: | |
261 | $result = NULL; | |
262 | } | |
263 | } | |
264 | ||
265 | return $this->dedupeWildcards($result, '%'); | |
266 | } | |
267 | ||
268 | /** | |
269 | * @param string $text user-supplied query string | |
270 | * @param string $template a prototypical description of each word, eg "word%" or "word*" or "*word*" | |
271 | * @return string | |
272 | */ | |
273 | protected function mapWords($text, $template) { | |
274 | $result = array(); | |
275 | foreach ($this->parseWords($text) as $word) { | |
276 | $result[] = str_replace('word', $word, $template); | |
277 | } | |
278 | return implode(' ', $result); | |
279 | } | |
280 | ||
281 | /** | |
282 | * @param $text | |
283 | * @return array | |
284 | */ | |
285 | protected function parseWords($text) { | |
286 | return explode(' ', preg_replace('/[ \r\n\t]+/', ' ', trim($text))); | |
287 | } | |
288 | ||
289 | /** | |
290 | * @param $text | |
291 | * @param $wildcard | |
292 | * @return mixed | |
293 | */ | |
294 | protected function dedupeWildcards($text, $wildcard) { | |
295 | if ($text === NULL) { | |
296 | return NULL; | |
297 | } | |
298 | ||
299 | // don't use preg_replace because $wildcard might be special char | |
300 | while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) { | |
301 | $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text); | |
302 | } | |
303 | return $text; | |
304 | } | |
305 | ||
306 | public static function getModes() { | |
307 | return array( | |
308 | self::MODE_NONE, | |
309 | self::MODE_PHRASE, | |
310 | self::MODE_WILDPHRASE, | |
311 | self::MODE_WILDWORDS, | |
312 | self::MODE_WILDWORDS_SUFFIX, | |
313 | ); | |
314 | } | |
315 | ||
316 | public static function getLanguages() { | |
317 | return array( | |
318 | self::LANG_SOLR, | |
319 | self::LANG_SQL_FTS, | |
3196b7a5 | 320 | self::LANG_SQL_FTSBOOL, |
ea74069c TO |
321 | self::LANG_SQL_LIKE, |
322 | ); | |
323 | } | |
324 | ||
325 | /** | |
326 | * @param $text | |
327 | * | |
328 | * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");' | |
329 | */ | |
330 | public static function dumpExampleTable($text) { | |
331 | $width = strlen($text) + 8; | |
332 | $buf = ''; | |
333 | ||
334 | $buf .= sprintf("%-{$width}s", 'mode'); | |
335 | foreach (self::getLanguages() as $lang) { | |
336 | $buf .= sprintf("%-{$width}s", $lang); | |
337 | } | |
338 | $buf .= "\n"; | |
339 | ||
340 | foreach (self::getModes() as $mode) { | |
341 | $formatter = new CRM_Utils_QueryFormatter($mode); | |
342 | $buf .= sprintf("%-{$width}s", $mode); | |
343 | foreach (self::getLanguages() as $lang) { | |
344 | $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang)); | |
345 | } | |
346 | $buf .= "\n"; | |
347 | } | |
348 | ||
349 | echo $buf; | |
350 | } | |
351 | } |