Cleanup phpdoc comments
[civicrm-core.git] / CRM / Utils / QueryFormatter.php
1 <?php
2
3 /*
4 +--------------------------------------------------------------------+
5 | CiviCRM version 4.5 |
6 +--------------------------------------------------------------------+
7 | Copyright CiviCRM LLC (c) 2004-2014 |
8 +--------------------------------------------------------------------+
9 | This file is a part of CiviCRM. |
10 | |
11 | CiviCRM is free software; you can copy, modify, and distribute it |
12 | under the terms of the GNU Affero General Public License |
13 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
14 | |
15 | CiviCRM is distributed in the hope that it will be useful, but |
16 | WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
18 | See the GNU Affero General Public License for more details. |
19 | |
20 | You should have received a copy of the GNU Affero General Public |
21 | License and the CiviCRM Licensing Exception along |
22 | with this program; if not, contact CiviCRM LLC |
23 | at info[AT]civicrm[DOT]org. If you have questions about the |
24 | GNU Affero General Public License or the licensing of CiviCRM, |
25 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
26 +--------------------------------------------------------------------+
27 */
28
29 /**
30 * Class CRM_Utils_QueryFormatter
31 *
32 * This class is a bad idea. It exists for the unholy reason that a single installation
33 * may have up to three query engines (MySQL LIKE, MySQL FTS, Solr) processing the same
34 * query-text. It labors* to take the user's search expression and provide similar search
35 * semantics in different contexts. It is unknown whether this labor will be fruitful
36 * or in vain.
37 */
38 class CRM_Utils_QueryFormatter {
39 const LANG_SQL_LIKE = 'like';
40 const LANG_SQL_FTS = 'fts';
41 const LANG_SQL_FTSBOOL = 'ftsbool';
42 const LANG_SOLR = 'solr';
43
44 /**
45 * Attempt to leave the text as-is.
46 */
47 const MODE_NONE = 'simple';
48
49 /**
50 * Attempt to treat the input text as a phrase
51 */
52 const MODE_PHRASE = 'phrase';
53
54 /**
55 * Attempt to treat the input text as a phrase with
56 * wildcards on each end.
57 */
58 const MODE_WILDPHRASE = 'wildphrase';
59
60 /**
61 * Attempt to treat individual word as if it
62 * had wildcards at the start and end.
63 */
64 const MODE_WILDWORDS = 'wildwords';
65
66 /**
67 * Attempt to treat individual word as if it
68 * had a wildcard at the end.
69 */
70 const MODE_WILDWORDS_SUFFIX = 'wildwords-suffix';
71
72 static protected $singleton;
73
74 /**
75 * @param bool $fresh
76 * @return CRM_Utils_QueryFormatter
77 */
78 public static function singleton($fresh = FALSE) {
79 if ($fresh || self::$singleton === NULL) {
80 $mode = CRM_Core_BAO_Setting::getItem(CRM_Core_BAO_Setting::SEARCH_PREFERENCES_NAME, 'fts_query_mode', NULL, self::MODE_NONE);
81 self::$singleton = new CRM_Utils_QueryFormatter($mode);
82 }
83 return self::$singleton;
84 }
85
86 /**
87 * @var string eg MODE_NONE
88 */
89 protected $mode;
90
91 /**
92 * @param string $mode eg MODE_NONE
93 */
94 function __construct($mode) {
95 $this->mode = $mode;
96 }
97
98 /**
99 * @param mixed $mode
100 */
101 public function setMode($mode) {
102 $this->mode = $mode;
103 }
104
105 /**
106 * @return mixed
107 */
108 public function getMode() {
109 return $this->mode;
110 }
111
112 /**
113 * @param string $text
114 * @param string $language eg LANG_SQL_LIKE, LANG_SQL_FTS, LANG_SOLR
115 * @throws CRM_Core_Exception
116 * @return string
117 */
118 public function format($text, $language) {
119 $text = trim($text);
120
121 switch ($language) {
122 case self::LANG_SOLR:
123 case self::LANG_SQL_FTS:
124 $text = $this->_formatFts($text, $this->mode);
125 break;
126 case self::LANG_SQL_FTSBOOL:
127 $text = $this->_formatFtsBool($text, $this->mode);
128 break;
129 case self::LANG_SQL_LIKE:
130 $text = $this->_formatLike($text, $this->mode);
131 break;
132 default:
133 $text = NULL;
134 }
135
136 if ($text === NULL) {
137 throw new CRM_Core_Exception("Unrecognized combination: language=[{$language}] mode=[{$this->mode}]");
138 }
139
140 return $text;
141 }
142
143 protected function _formatFts($text, $mode) {
144 $result = NULL;
145
146 // normalize user-inputted wildcards
147 $text = str_replace('%', '*', $text);
148
149 if (empty($text)) {
150 $result = '*';
151 }
152 elseif (strpos($text, '*') !== FALSE) {
153 // if user supplies their own wildcards, then don't do any sophisticated changes
154 $result = $text;
155 }
156 else {
157 switch ($mode) {
158 case self::MODE_NONE:
159 $result = $text;
160 break;
161
162 case self::MODE_PHRASE:
163 $result = '"' . $text . '"';
164 break;
165
166 case self::MODE_WILDPHRASE:
167 $result = '"*' . $text . '*"';
168 break;
169
170 case self::MODE_WILDWORDS:
171 $result = $this->mapWords($text, '*word*');
172 break;
173
174 case self::MODE_WILDWORDS_SUFFIX:
175 $result = $this->mapWords($text, 'word*');
176 break;
177
178 default:
179 $result = NULL;
180 }
181 }
182
183 return $this->dedupeWildcards($result, '%');
184 }
185
186 protected function _formatFtsBool($text, $mode) {
187 $result = NULL;
188
189 // normalize user-inputted wildcards
190 $text = str_replace('%', '*', $text);
191
192 if (empty($text)) {
193 $result = '*';
194 }
195 elseif (strpos($text, '+') !== FALSE || strpos($text, '-') !== FALSE) {
196 // if user supplies their own include/exclude operators, use text as is (with trailing wildcard)
197 $result = $this->mapWords($text, 'word*');
198 }
199 elseif (strpos($text, '*') !== FALSE) {
200 // if user supplies their own wildcards, then don't do any sophisticated changes
201 $result = $this->mapWords($text, '+word');
202 }
203 elseif (preg_match('/^(["\']).*\1$/m', $text)) {
204 // if surrounded by quotes, use term as is
205 $result = $text;
206 }
207 else {
208 switch ($mode) {
209 case self::MODE_NONE:
210 $result = $this->mapWords($text, '+word');
211 break;
212
213 case self::MODE_PHRASE:
214 $result = '+"' . $text . '"';
215 break;
216
217 case self::MODE_WILDPHRASE:
218 $result = '+"*' . $text . '*"';
219 break;
220
221 case self::MODE_WILDWORDS:
222 $result = $this->mapWords($text, '+*word*');
223 break;
224
225 case self::MODE_WILDWORDS_SUFFIX:
226 $result = $this->mapWords($text, '+word*');
227 break;
228
229 default:
230 $result = NULL;
231 }
232 }
233
234 return $this->dedupeWildcards($result, '%');
235 }
236
237 protected function _formatLike($text, $mode) {
238 $result = NULL;
239
240 if (empty($text)) {
241 $result = '%';
242 }
243 elseif (strpos($text, '%') !== FALSE) {
244 // if user supplies their own wildcards, then don't do any sophisticated changes
245 $result = $text;
246 }
247 else {
248 switch ($mode) {
249 case self::MODE_NONE:
250 case self::MODE_PHRASE:
251 case self::MODE_WILDPHRASE:
252 $result = "%" . $text . "%";
253 break;
254
255 case self::MODE_WILDWORDS:
256 case self::MODE_WILDWORDS_SUFFIX:
257 $result = "%" . preg_replace('/[ \r\n]+/', '%', $text) . '%';
258 break;
259
260 default:
261 $result = NULL;
262 }
263 }
264
265 return $this->dedupeWildcards($result, '%');
266 }
267
268 /**
269 * @param string $text user-supplied query string
270 * @param string $template a prototypical description of each word, eg "word%" or "word*" or "*word*"
271 * @return string
272 */
273 protected function mapWords($text, $template) {
274 $result = array();
275 foreach ($this->parseWords($text) as $word) {
276 $result[] = str_replace('word', $word, $template);
277 }
278 return implode(' ', $result);
279 }
280
281 /**
282 * @param $text
283 * @return array
284 */
285 protected function parseWords($text) {
286 return explode(' ', preg_replace('/[ \r\n\t]+/', ' ', trim($text)));
287 }
288
289 /**
290 * @param $text
291 * @param $wildcard
292 * @return mixed
293 */
294 protected function dedupeWildcards($text, $wildcard) {
295 if ($text === NULL) {
296 return NULL;
297 }
298
299 // don't use preg_replace because $wildcard might be special char
300 while (strpos($text, "{$wildcard}{$wildcard}") !== FALSE) {
301 $text = str_replace("{$wildcard}{$wildcard}", "{$wildcard}", $text);
302 }
303 return $text;
304 }
305
306 public static function getModes() {
307 return array(
308 self::MODE_NONE,
309 self::MODE_PHRASE,
310 self::MODE_WILDPHRASE,
311 self::MODE_WILDWORDS,
312 self::MODE_WILDWORDS_SUFFIX,
313 );
314 }
315
316 public static function getLanguages() {
317 return array(
318 self::LANG_SOLR,
319 self::LANG_SQL_FTS,
320 self::LANG_SQL_FTSBOOL,
321 self::LANG_SQL_LIKE,
322 );
323 }
324
325 /**
326 * @param $text
327 *
328 * Ex: drush eval 'civicrm_initialize(); CRM_Utils_QueryFormatter::dumpExampleTable("firstword secondword");'
329 */
330 public static function dumpExampleTable($text) {
331 $width = strlen($text) + 8;
332 $buf = '';
333
334 $buf .= sprintf("%-{$width}s", 'mode');
335 foreach (self::getLanguages() as $lang) {
336 $buf .= sprintf("%-{$width}s", $lang);
337 }
338 $buf .= "\n";
339
340 foreach (self::getModes() as $mode) {
341 $formatter = new CRM_Utils_QueryFormatter($mode);
342 $buf .= sprintf("%-{$width}s", $mode);
343 foreach (self::getLanguages() as $lang) {
344 $buf .= sprintf("%-{$width}s", $formatter->format($text, $lang));
345 }
346 $buf .= "\n";
347 }
348
349 echo $buf;
350 }
351 }