INFRA-132 - CRM/Upgrade - phpcbf (plus fixup)
[civicrm-core.git] / CRM / Utils / QueryFormatter.php
CommitLineData
ea74069c
TO
1<?php
2
3/*
4 +--------------------------------------------------------------------+
39de6fd5 5 | CiviCRM version 4.6 |
ea74069c
TO
6 +--------------------------------------------------------------------+
7 | Copyright CiviCRM LLC (c) 2004-2014 |
8 +--------------------------------------------------------------------+
9 | This file is a part of CiviCRM. |
10 | |
11 | CiviCRM is free software; you can copy, modify, and distribute it |
12 | under the terms of the GNU Affero General Public License |
13 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
14 | |
15 | CiviCRM is distributed in the hope that it will be useful, but |
16 | WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
18 | See the GNU Affero General Public License for more details. |
19 | |
20 | You should have received a copy of the GNU Affero General Public |
21 | License and the CiviCRM Licensing Exception along |
22 | with this program; if not, contact CiviCRM LLC |
23 | at info[AT]civicrm[DOT]org. If you have questions about the |
24 | GNU Affero General Public License or the licensing of CiviCRM, |
25 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
26 +--------------------------------------------------------------------+
27*/
28
29/**
30 * Class CRM_Utils_QueryFormatter
31 *
32 * This class is a bad idea. It exists for the unholy reason that a single installation
33 * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same
34 * query-text. It labors* to take the user's search expression and provide similar search
35 * semantics in different contexts. It is unknown whether this labor will be fruitful
36 * or in vain.
37 */
38class CRM_Utils_QueryFormatter {
39 const LANG_SQL_LIKE = 'like';
40 const LANG_SQL_FTS = 'fts';
3196b7a5 41 const LANG_SQL_FTSBOOL = 'ftsbool';
ea74069c
TO
42 const LANG_SOLR = 'solr';
43
44 /**
45 * Attempt to leave the text as-is.
46 */
47 const MODE_NONE = 'simple';
48
49 /**
50 * Attempt to treat the input text as a phrase
51 */
52 const MODE_PHRASE = 'phrase';
53
54 /**
55 * Attempt to treat the input text as a phrase with
56 * wildcards on each end.
57 */
58 const MODE_WILDPHRASE = 'wildphrase';
59
60 /**
61 * Attempt to treat individual word as if it
62 * had wildcards at the start and end.
63 */
64 const MODE_WILDWORDS = 'wildwords';
65
66 /**
67 * Attempt to treat individual word as if it
68 * had a wildcard at the end.
69 */
70 const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix';
71
72 static protected $singleton;
73
74 /**
75 * @param bool $fresh
76 * @return CRM_Utils_QueryFormatter
77 */
78 public static function singleton($fresh = FALSE) {
79 if ($fresh || self::$singleton === NULL) {
80 $mode = CRM_Core_BAO_Setting::getItem(CRM_Core_BAO_Setting::SEARCH_PREFERENCES_NAME, 'fts_query_mode', NULL, self::MODE_NONE);
81 self::$singleton = new CRM_Utils_QueryFormatter($mode);
82 }
83 return self::$singleton;
84 }
85
86 /**
87 * @var string eg MODE_NONE
88 */
89 protected $mode;
90
91 /**
77855840
TO
92 * @param string $mode
93 * Eg MODE_NONE.
ea74069c 94 */
00be9182 95 public function __construct($mode) {
ea74069c
TO
96 $this->mode = $mode;
97 }
98
99 /**
100 * @param mixed $mode
101 */
102 public function setMode($mode) {
103 $this->mode = $mode;
104 }
105
106 /**
107 * @return mixed
108 */
109 public function getMode() {
110 return $this->mode;
111 }
112
113 /**
114 * @param string $text
77855840
TO
115 * @param string $language
116 * Eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR.
ea74069c
TO
117 * @throws CRM_Core_Exception
118 * @return string
119 */
120 public function format($text, $language) {
121 $text = trim($text);
122
123 switch ($language) {
124 case self::LANG_SOLR:
125 case self::LANG_SQL_FTS:
126 $text = $this->_formatFts($text, $this->mode);
127 break;
3196b7a5
TO
128 case self::LANG_SQL_FTSBOOL:
129 $text = $this->_formatFtsBool($text, $this->mode);
130 break;
ea74069c
TO
131 case self::LANG_SQL_LIKE:
132 $text = $this->_formatLike($text, $this->mode);
133 break;
134 default:
135 $text = NULL;
136 }
137
138 if ($text === NULL) {
139 throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]");
140 }
141
142 return $text;
143 }
144
145 protected function _formatFts($text, $mode) {
146 $result = NULL;
147
148 // normalize user-inputted wildcards
149 $text = str_replace('%', '*', $text);
150
151 if (empty($text)) {
3196b7a5 152 $result = '*';
ea74069c
TO
153 }
154 elseif (strpos($text, '*') !== FALSE) {
155 // if user supplies their own wildcards, then don't do any sophisticated changes
3196b7a5 156 $result = $text;
ea74069c
TO
157 }
158 else {
159 switch ($mode) {
160 case self::MODE_NONE:
161 $result = $text;
162 break;
163
164 case self::MODE_PHRASE:
165 $result = '"' . $text . '"';
166 break;
167
168 case self::MODE_WILDPHRASE:
169 $result = '"*' . $text . '*"';
170 break;
171
172 case self::MODE_WILDWORDS:
173 $result = $this->mapWords($text, '*word*');
174 break;
175
176 case self::MODE_WILDWORDS_SUFFIX:
177 $result = $this->mapWords($text, 'word*');
178 break;
179
180 default:
181 $result = NULL;
182 }
183 }
184
185 return $this->dedupeWildcards($result, '%');
186 }
187
3196b7a5
TO
188 protected function _formatFtsBool($text, $mode) {
189 $result = NULL;
190
191 // normalize user-inputted wildcards
192 $text = str_replace('%', '*', $text);
193
194 if (empty($text)) {
195 $result = '*';
196 }
7bfcf18f
TO
197 elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) {
198 // if user supplies their own include/exclude operators, use text as is (with trailing wildcard)
199 $result = $this->mapWords($text, 'word*');
200 }
3196b7a5
TO
201 elseif (strpos($text, '*') !== FALSE) {
202 // if user supplies their own wildcards, then don't do any sophisticated changes
203 $result = $this->mapWords($text, '+word');
204 }
7bfcf18f
TO
205 elseif (preg_match('/^(["\']).*\1$/m', $text)) {
206 // if surrounded by quotes, use term as is
207 $result = $text;
208 }
3196b7a5
TO
209 else {
210 switch ($mode) {
211 case self::MODE_NONE:
212 $result = $this->mapWords($text, '+word');
213 break;
214
215 case self::MODE_PHRASE:
216 $result = '+"' . $text . '"';
217 break;
218
219 case self::MODE_WILDPHRASE:
220 $result = '+"*' . $text . '*"';
221 break;
222
223 case self::MODE_WILDWORDS:
224 $result = $this->mapWords($text, '+*word*');
225 break;
226
227 case self::MODE_WILDWORDS_SUFFIX:
228 $result = $this->mapWords($text, '+word*');
229 break;
230
231 default:
232 $result = NULL;
233 }
234 }
235
236 return $this->dedupeWildcards($result, '%');
237 }
238
ea74069c
TO
239 protected function _formatLike($text, $mode) {
240 $result = NULL;
241
242 if (empty($text)) {
243 $result = '%';
244 }
245 elseif (strpos($text, '%') !== FALSE) {
246 // if user supplies their own wildcards, then don't do any sophisticated changes
247 $result = $text;
248 }
249 else {
250 switch ($mode) {
251 case self::MODE_NONE:
252 case self::MODE_PHRASE:
253 case self::MODE_WILDPHRASE:
254 $result = "%" . $text . "%";
255 break;
256
257 case self::MODE_WILDWORDS:
258 case self::MODE_WILDWORDS_SUFFIX:
259 $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%';
260 break;
261
262 default:
263 $result = NULL;
264 }
265 }
266
267 return $this->dedupeWildcards($result, '%');
268 }
269
270 /**
77855840
TO
271 * @param string $text
272 * User-supplied query string.
273 * @param string $template
274 * A prototypical description of each word, eg "word%" or "word*" or "*word*".
ea74069c
TO
275 * @return string
276 */
277 protected function mapWords($text, $template) {
278 $result = array();
279 foreach ($this->parseWords($text) as $word) {
280 $result[] = str_replace('word', $word, $template);
281 }
282 return implode(' ', $result);
283 }
284
285 /**
286 * @param $text
287 * @return array
288 */
289 protected function parseWords($text) {
290 return explode(' ', preg_replace('/[ \r\n\t]+/', ' ', trim($text)));
291 }
292
293 /**
294 * @param $text
295 * @param $wildcard
296 * @return mixed
297 */
298 protected function dedupeWildcards($text, $wildcard) {
299 if ($text === NULL) {
300 return NULL;
301 }
302
303 // don't use preg_replace because $wildcard might be special char
304 while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) {
305 $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text);
306 }
307 return $text;
308 }
309
310 public static function getModes() {
311 return array(
312 self::MODE_NONE,
313 self::MODE_PHRASE,
314 self::MODE_WILDPHRASE,
315 self::MODE_WILDWORDS,
316 self::MODE_WILDWORDS_SUFFIX,
317 );
318 }
319
320 public static function getLanguages() {
321 return array(
322 self::LANG_SOLR,
323 self::LANG_SQL_FTS,
3196b7a5 324 self::LANG_SQL_FTSBOOL,
ea74069c
TO
325 self::LANG_SQL_LIKE,
326 );
327 }
328
329 /**
330 * @param $text
331 *
332 * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");'
333 */
334 public static function dumpExampleTable($text) {
335 $width = strlen($text) + 8;
336 $buf = '';
337
338 $buf .= sprintf("%-{$width}s", 'mode');
339 foreach (self::getLanguages() as $lang) {
340 $buf .= sprintf("%-{$width}s", $lang);
341 }
342 $buf .= "\n";
343
344 foreach (self::getModes() as $mode) {
345 $formatter = new CRM_Utils_QueryFormatter($mode);
346 $buf .= sprintf("%-{$width}s", $mode);
347 foreach (self::getLanguages() as $lang) {
348 $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang));
349 }
350 $buf .= "\n";
351 }
352
353 echo $buf;
354 }
ef10e0b5 355}