Merge pull request #3114 from Edzelopez/CRM-14621
[civicrm-core.git] / CRM / Utils / String.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.5 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2014 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2014
32 * $Id$
33 *
34 */
35
36 require_once 'HTML/QuickForm/Rule/Email.php';
37
38 /**
39 * This class contains string functions
40 *
41 */
42 class CRM_Utils_String {
43 CONST COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
44
45 /**
46 * List of all letters and numbers
47 */
48 const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
49
50 /**
51 * Convert a display name into a potential variable
52 * name that we could use in forms/code
53 *
54 * @param name Name of the string
55 *
56 * @return string An equivalent variable name
57 *
58 * @access public
59 *
60 * @return string (or null)
61 * @static
62 */
63 static function titleToVar($title, $maxLength = 31) {
64 $variable = self::munge($title, '_', $maxLength);
65
66 if (CRM_Utils_Rule::title($variable, $maxLength)) {
67 return $variable;
68 }
69
70 // if longer than the maxLength lets just return a substr of the
71 // md5 to prevent errors downstream
72 return substr(md5($title), 0, $maxLength);
73 }
74
75 /**
76 * given a string, replace all non alpha numeric characters and
77 * spaces with the replacement character
78 *
79 * @param string $name the name to be worked on
80 * @param string $char the character to use for non-valid chars
81 * @param int $len length of valid variables
82 *
83 * @access public
84 *
85 * @return string returns the manipulated string
86 * @static
87 */
88 static function munge($name, $char = '_', $len = 63) {
89 // replace all white space and non-alpha numeric with $char
90 // we only use the ascii character set since mysql does not create table names / field names otherwise
91 // CRM-11744
92 $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
93
94 if ($len) {
95 // lets keep variable names short
96 return substr($name, 0, $len);
97 }
98 else {
99 return $name;
100 }
101 }
102
103 /**
104 *
105 * Takes a variable name and munges it randomly into another variable name
106 *
107 * @param string $name Initial Variable Name
108 * @param int $len length of valid variables
109 *
110 * @return string Randomized Variable Name
111 * @access public
112 * @static
113 */
114 static function rename($name, $len = 4) {
115 $rand = substr(uniqid(), 0, $len);
116 return substr_replace($name, $rand, -$len, $len);
117 }
118
119 /**
120 * takes a string and returns the last tuple of the string.
121 * useful while converting file names to class names etc
122 *
123 * @param string $string the input string
124 * @param char $char the character used to demarcate the componets
125 *
126 * @access public
127 *
128 * @return string the last component
129 * @static
130 */
131 static function getClassName($string, $char = '_') {
132 $names = array();
133 if (!is_array($string)) {
134 $names = explode($char, $string);
135 }
136 if (!empty($names)) {
137 return array_pop($names);
138 }
139 }
140
141 /**
142 * appends a name to a string and seperated by delimiter.
143 * does the right thing for an empty string
144 *
145 * @param string $str the string to be appended to
146 * @param string $delim the delimiter to use
147 * @param mixed $name the string (or array of strings) to append
148 *
149 * @return void
150 * @access public
151 * @static
152 */
153 static function append(&$str, $delim, $name) {
154 if (empty($name)) {
155 return;
156 }
157
158 if (is_array($name)) {
159 foreach ($name as $n) {
160 if (empty($n)) {
161 continue;
162 }
163 if (empty($str)) {
164 $str = $n;
165 }
166 else {
167 $str .= $delim . $n;
168 }
169 }
170 }
171 else {
172 if (empty($str)) {
173 $str = $name;
174 }
175 else {
176 $str .= $delim . $name;
177 }
178 }
179 }
180
181 /**
182 * determine if the string is composed only of ascii characters
183 *
184 * @param string $str input string
185 * @param boolean $utf8 attempt utf8 match on failure (default yes)
186 *
187 * @return boolean true if string is ascii
188 * @access public
189 * @static
190 */
191 static function isAscii($str, $utf8 = TRUE) {
192 if (!function_exists('mb_detect_encoding')) {
193 // eliminate all white space from the string
194 $str = preg_replace('/\s+/', '', $str);
195 // FIXME: This is a pretty brutal hack to make utf8 and 8859-1 work.
196
197 /* match low- or high-ascii characters */
198 if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
199 // || // low ascii characters
200 // high ascii characters
201 // preg_match( '/[\x7F-\xFF]/', $str ) ) {
202 if ($utf8) {
203 /* if we did match, try for utf-8, or iso8859-1 */
204
205 return self::isUtf8($str);
206 }
207 else {
208 return FALSE;
209 }
210 }
211 return TRUE;
212 }
213 else {
214 $order = array('ASCII');
215 if ($utf8) {
216 $order[] = 'UTF-8';
217 }
218 $enc = mb_detect_encoding($str, $order, TRUE);
219 return ($enc == 'ASCII' || $enc == 'UTF-8');
220 }
221 }
222
223 /**
224 * determine the string replacements for redaction
225 * on the basis of the regular expressions
226 *
227 * @param string $str input string
228 * @param array $regexRules regular expression to be matched w/ replacements
229 *
230 * @return array $match array of strings w/ corresponding redacted outputs
231 * @access public
232 * @static
233 */
234 static function regex($str, $regexRules) {
235 //redact the regular expressions
236 if (!empty($regexRules) && isset($str)) {
237 static $matches, $totalMatches, $match = array();
238 foreach ($regexRules as $pattern => $replacement) {
239 preg_match_all($pattern, $str, $matches);
240 if (!empty($matches[0])) {
241 if (empty($totalMatches)) {
242 $totalMatches = $matches[0];
243 }
244 else {
245 $totalMatches = array_merge($totalMatches, $matches[0]);
246 }
247 $match = array_flip($totalMatches);
248 }
249 }
250 }
251
252 if (!empty($match)) {
253 foreach ($match as $matchKey => & $dontCare) {
254 foreach ($regexRules as $pattern => $replacement) {
255 if (preg_match($pattern, $matchKey)) {
256 $dontCare = $replacement . substr(md5($matchKey), 0, 5);
257 break;
258 }
259 }
260 }
261 return $match;
262 }
263 return CRM_Core_DAO::$_nullArray;
264 }
265
266 static function redaction($str, $stringRules) {
267 //redact the strings
268 if (!empty($stringRules)) {
269 foreach ($stringRules as $match => $replace) {
270 $str = str_ireplace($match, $replace, $str);
271 }
272 }
273
274 //return the redacted output
275 return $str;
276 }
277
278 /**
279 * Determine if a string is composed only of utf8 characters
280 *
281 * @param string $str input string
282 * @access public
283 * @static
284 *
285 * @return boolean
286 */
287 static function isUtf8($str) {
288 if (!function_exists(mb_detect_encoding)) {
289 // eliminate all white space from the string
290 $str = preg_replace('/\s+/', '', $str);
291
292 /* pattern stolen from the php.net function documentation for
293 * utf8decode();
294 * comment by JF Sebastian, 30-Mar-2005
295 */
296
297 return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
298 // ||
299 // iconv('ISO-8859-1', 'UTF-8', $str);
300 }
301 else {
302 $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
303 return ($enc !== FALSE);
304 }
305 }
306
307 /**
308 * determine if two href's are equivalent (fuzzy match)
309 *
310 * @param string $url1 the first url to be matched
311 * @param string $url2 the second url to be matched against
312 *
313 * @return boolean true if the urls match, else false
314 * @access public
315 * @static
316 */
317 static function match($url1, $url2) {
318 $url1 = strtolower($url1);
319 $url2 = strtolower($url2);
320
321 $url1Str = parse_url($url1);
322 $url2Str = parse_url($url2);
323
324 if ($url1Str['path'] == $url2Str['path'] &&
325 self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
326 ) {
327 return TRUE;
328 }
329 return FALSE;
330 }
331
332 /**
333 * Function to extract variable values
334 *
335 * @param mix $query this is basically url
336 *
337 * @return mix $v returns civicrm url (eg: civicrm/contact/search/...)
338 * @access public
339 * @static
340 */
341 static function extractURLVarValue($query) {
342 $config = CRM_Core_Config::singleton();
343 $urlVar = $config->userFrameworkURLVar;
344
345 $params = explode('&', $query);
346 foreach ($params as $p) {
347 if (strpos($p, '=')) {
348 list($k, $v) = explode('=', $p);
349 if ($k == $urlVar) {
350 return $v;
351 }
352 }
353 }
354 return NULL;
355 }
356
357 /**
358 * translate a true/false/yes/no string to a 0 or 1 value
359 *
360 * @param string $str the string to be translated
361 *
362 * @return boolean
363 * @access public
364 * @static
365 */
366 static function strtobool($str) {
367 if (!is_scalar($str)) {
368 return FALSE;
369 }
370
371 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
372 return TRUE;
373 }
374 return FALSE;
375 }
376
377 /**
378 * returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
379 *
380 * @param string $str the string to be translated
381 *
382 * @return boolean
383 * @access public
384 * @static
385 */
386 static function strtoboolstr($str) {
387 if (!is_scalar($str)) {
388 return FALSE;
389 }
390
391 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
392 return '1';
393 }
394 elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
395 return '0';
396 }
397 else {
398 return FALSE;
399 }
400 }
401
402 /**
403 * Convert a HTML string into a text one using html2text
404 *
405 * @param string $html the string to be converted
406 *
407 * @return string the converted string
408 * @access public
409 * @static
410 */
411 static function htmlToText($html) {
412 require_once 'packages/html2text/rcube_html2text.php';
413 $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
414 $converter = new rcube_html2text($token_html);
415 $token_text = $converter->get_text();
416 $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
417 return $text;
418 }
419
420 static function extractName($string, &$params) {
421 $name = trim($string);
422 if (empty($name)) {
423 return;
424 }
425
426 // strip out quotes
427 $name = str_replace('"', '', $name);
428 $name = str_replace('\'', '', $name);
429
430 // check for comma in name
431 if (strpos($name, ',') !== FALSE) {
432
433 // name has a comma - assume lname, fname [mname]
434 $names = explode(',', $name);
435 if (count($names) > 1) {
436 $params['last_name'] = trim($names[0]);
437
438 // check for space delim
439 $fnames = explode(' ', trim($names[1]));
440 if (count($fnames) > 1) {
441 $params['first_name'] = trim($fnames[0]);
442 $params['middle_name'] = trim($fnames[1]);
443 }
444 else {
445 $params['first_name'] = trim($fnames[0]);
446 }
447 }
448 else {
449 $params['first_name'] = trim($names[0]);
450 }
451 }
452 else {
453 // name has no comma - assume fname [mname] fname
454 $names = explode(' ', $name);
455 if (count($names) == 1) {
456 $params['first_name'] = $names[0];
457 }
458 elseif (count($names) == 2) {
459 $params['first_name'] = $names[0];
460 $params['last_name'] = $names[1];
461 }
462 else {
463 $params['first_name'] = $names[0];
464 $params['middle_name'] = $names[1];
465 $params['last_name'] = $names[2];
466 }
467 }
468 }
469
470 static function &makeArray($string) {
471 $string = trim($string);
472
473 $values = explode("\n", $string);
474 $result = array();
475 foreach ($values as $value) {
476 list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
477 if (!empty($v)) {
478 $result[trim($n)] = trim($v);
479 }
480 }
481 return $result;
482 }
483
484 /**
485 * Given an ezComponents-parsed representation of
486 * a text with alternatives return only the first one
487 *
488 * @param string $full all alternatives as a long string (or some other text)
489 *
490 * @return string only the first alternative found (or the text without alternatives)
491 */
492 static function stripAlternatives($full) {
493 $matches = array();
494 preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
495
496 if (isset($matches[1]) &&
497 trim(strip_tags($matches[1])) != ''
498 ) {
499 return $matches[1];
500 }
501 else {
502 return $full;
503 }
504 }
505
506 /**
507 * strip leading, trailing, double spaces from string
508 * used for postal/greeting/addressee
509 *
510 * @param string $string input string to be cleaned
511 *
512 * @return string the cleaned string
513 * @access public
514 * @static
515 */
516 static function stripSpaces($string) {
517 return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
518 }
519
520 /**
521 * This function is used to clean the URL 'path' variable that we use
522 * to construct CiviCRM urls by removing characters from the path variable
523 *
524 * @param string $string the input string to be sanitized
525 * @param array $search the characters to be sanitized
526 * @param string $replace the character to replace it with
527 *
528 * @return string the sanitized string
529 * @access public
530 * @static
531 */
532 static function stripPathChars($string,
533 $search = NULL,
534 $replace = NULL
535 ) {
536 static $_searchChars = NULL;
537 static $_replaceChar = NULL;
538
539 if (empty($string)) {
540 return $string;
541 }
542
543 if ($_searchChars == NULL) {
544 $_searchChars = array(
545 '&', ';', ',', '=', '$',
546 '"', "'", '\\',
547 '<', '>', '(', ')',
548 ' ', "\r", "\r\n", "\n", "\t",
549 );
550 $_replaceChar = '_';
551 }
552
553
554 if ($search == NULL) {
555 $search = $_searchChars;
556 }
557
558 if ($replace == NULL) {
559 $replace = $_replaceChar;
560 }
561
562 return str_replace($search, $replace, $string);
563 }
564
565
566 /**
567 * Use HTMLPurifier to clean up a text string and remove any potential
568 * xss attacks. This is primarily used in public facing pages which
569 * accept html as the input string
570 *
571 * @param string $string the input string
572 *
573 * @return string the cleaned up string
574 * @public
575 * @static
576 */
577 static function purifyHTML($string) {
578 static $_filter = null;
579 if (!$_filter) {
580 $config = HTMLPurifier_Config::createDefault();
581 $config->set('Core.Encoding', 'UTF-8');
582
583 // Disable the cache entirely
584 $config->set('Cache.DefinitionImpl', null);
585
586 $_filter = new HTMLPurifier($config);
587 }
588
589 return $_filter->purify($string);
590 }
591
592 /**
593 * Truncate $string; if $string exceeds $maxLen, place "..." at the end
594 *
595 * @param string $string
596 * @param int $maxLen
597 */
598 static function ellipsify($string, $maxLen) {
599 $len = strlen($string);
600 if ($len <= $maxLen) {
601 return $string;
602 }
603 else {
604 return substr($string, 0, $maxLen-3) . '...';
605 }
606 }
607
608 /**
609 * Generate a random string
610 *
611 * @param $len
612 * @param $alphabet
613 * @return string
614 */
615 public static function createRandom($len, $alphabet) {
616 $alphabetSize = strlen($alphabet);
617 $result = '';
618 for ($i = 0; $i < $len; $i++) {
619 $result .= $alphabet{rand(1, $alphabetSize) - 1};
620 }
621 return $result;
622 }
623
624 /**
625 * Examples:
626 * "admin foo" => array(NULL,"admin foo")
627 * "cms:admin foo" => array("cms", "admin foo")
628 *
629 * @param string $string e.g. "view all contacts". Syntax: "[prefix:]name"
630 * @return array (0 => string|NULL $prefix, 1 => string $value)
631 */
632 public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
633 $pos = strpos($string, $delim);
634 if ($pos === FALSE) {
635 return array($defaultPrefix, $string);
636 }
637 else {
638 return array(substr($string, 0, $pos), substr($string, 1+$pos));
639 }
640 }
641
642 /**
643 * this function will mask part of the the user portion of an Email address (everything before the @)
644 *
645 * @param string $email the email address to be masked
646 * @param string $maskChar the character used for masking
647 * @param integer $percent the percentage of the user portion to be masked
648 *
649 * @return string returns the masked Email address
650 */
651 public static function maskEmail($email, $maskChar= '*', $percent=50) {
652 list($user, $domain) = preg_split("/@/", $email);
653 $len = strlen($user);
654 $maskCount = floor($len * $percent /100);
655 $offset = floor(($len - $maskCount) / 2);
656
657 $masked = substr($user, 0, $offset)
658 .str_repeat($maskChar, $maskCount)
659 .substr($user, $maskCount + $offset);
660
661 return($masked.'@'.$domain);
662 }
663
664 /**
665 * this function compares two strings
666 *
667 * @param string $strOne string one
668 * @param string $strTwo string two
669 * @param boolean $case boolean indicating whether you want the comparison to be case sensitive or not
670 *
671 * @return boolean TRUE (string are identical); FALSE (strings are not identical)
672 */
673 public static function compareStr($strOne, $strTwo, $case) {
674 if ($case == TRUE) {
675 // Convert to lowercase and trim white spaces
676 if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
677 // yes - they are identical
678 return TRUE;
679 }
680 else {
681 // not identical
682 return FALSE;
683 }
684 }
685 if ($case == FALSE) {
686 // Trim white spaces
687 if (trim($strOne) == trim($strTwo)) {
688 // yes - they are identical
689 return TRUE;
690 }
691 else {
692 // not identical
693 return FALSE;
694 }
695 }
696 }
697
698 }
699