Merge pull request #8980 from ergonlogic/dev/CRM-19308
[civicrm-core.git] / CRM / Utils / String.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.7 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2017 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2017
32 */
33
34 require_once 'HTML/QuickForm/Rule/Email.php';
35
36 /**
37 * This class contains string functions.
38 */
39 class CRM_Utils_String {
40 const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
41
42 /**
43 * List of all letters and numbers
44 */
45 const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
46
47 /**
48 * Convert a display name into a potential variable name.
49 *
50 * @param $title title of the string
51 * @param int $maxLength
52 *
53 * @return string
54 * An equivalent variable name.
55 */
56 public static function titleToVar($title, $maxLength = 31) {
57 $variable = self::munge($title, '_', $maxLength);
58
59 if (CRM_Utils_Rule::title($variable, $maxLength)) {
60 return $variable;
61 }
62
63 // if longer than the maxLength lets just return a substr of the
64 // md5 to prevent errors downstream
65 return substr(md5($title), 0, $maxLength);
66 }
67
68 /**
69 * Replace all non alpha numeric characters and spaces with the replacement character.
70 *
71 * @param string $name
72 * The name to be worked on.
73 * @param string $char
74 * The character to use for non-valid chars.
75 * @param int $len
76 * Length of valid variables.
77 *
78 * @return string
79 * returns the manipulated string
80 */
81 public static function munge($name, $char = '_', $len = 63) {
82 // Replace all white space and non-alpha numeric with $char
83 // we only use the ascii character set since mysql does not create table names / field names otherwise
84 // CRM-11744
85 $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
86
87 if ($len) {
88 // lets keep variable names short
89 return substr($name, 0, $len);
90 }
91 else {
92 return $name;
93 }
94 }
95
96 /**
97 * Convert possibly underscore separated words to camel case with special handling for 'UF'
98 * e.g membership_payment returns MembershipPayment
99 *
100 * @param string $string
101 *
102 * @return string
103 */
104 public static function convertStringToCamel($string) {
105 $map = array(
106 'acl' => 'Acl',
107 'ACL' => 'Acl',
108 'im' => 'Im',
109 'IM' => 'Im',
110 );
111 if (isset($map[$string])) {
112 return $map[$string];
113 }
114
115 $fragments = explode('_', $string);
116 foreach ($fragments as & $fragment) {
117 $fragment = ucfirst($fragment);
118 }
119 // Special case: UFGroup, UFJoin, UFMatch, UFField
120 if ($fragments[0] === 'Uf') {
121 $fragments[0] = 'UF';
122 }
123 return implode('', $fragments);
124 }
125
126 /**
127 * Takes a variable name and munges it randomly into another variable name.
128 *
129 * @param string $name
130 * Initial Variable Name.
131 * @param int $len
132 * Length of valid variables.
133 *
134 * @return string
135 * Randomized Variable Name
136 */
137 public static function rename($name, $len = 4) {
138 $rand = substr(uniqid(), 0, $len);
139 return substr_replace($name, $rand, -$len, $len);
140 }
141
142 /**
143 * Takes a string and returns the last tuple of the string.
144 *
145 * Useful while converting file names to class names etc
146 *
147 * @param string $string
148 * The input string.
149 * @param string $char
150 * Character used to demarcate the components
151 *
152 * @return string
153 * The last component
154 */
155 public static function getClassName($string, $char = '_') {
156 $names = array();
157 if (!is_array($string)) {
158 $names = explode($char, $string);
159 }
160 if (!empty($names)) {
161 return array_pop($names);
162 }
163 }
164
165 /**
166 * Appends a name to a string and separated by delimiter.
167 *
168 * Does the right thing for an empty string
169 *
170 * @param string $str
171 * The string to be appended to.
172 * @param string $delim
173 * The delimiter to use.
174 * @param mixed $name
175 * The string (or array of strings) to append.
176 */
177 public static function append(&$str, $delim, $name) {
178 if (empty($name)) {
179 return;
180 }
181
182 if (is_array($name)) {
183 foreach ($name as $n) {
184 if (empty($n)) {
185 continue;
186 }
187 if (empty($str)) {
188 $str = $n;
189 }
190 else {
191 $str .= $delim . $n;
192 }
193 }
194 }
195 else {
196 if (empty($str)) {
197 $str = $name;
198 }
199 else {
200 $str .= $delim . $name;
201 }
202 }
203 }
204
205 /**
206 * Determine if the string is composed only of ascii characters.
207 *
208 * @param string $str
209 * Input string.
210 * @param bool $utf8
211 * Attempt utf8 match on failure (default yes).
212 *
213 * @return bool
214 * true if string is ascii
215 */
216 public static function isAscii($str, $utf8 = TRUE) {
217 if (!function_exists('mb_detect_encoding')) {
218 // eliminate all white space from the string
219 $str = preg_replace('/\s+/', '', $str);
220 // FIXME: This is a pretty brutal hack to make utf8 and 8859-1 work.
221
222 // match low- or high-ascii characters
223 if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
224 // || // low ascii characters
225 // high ascii characters
226 // preg_match( '/[\x7F-\xFF]/', $str ) ) {
227 if ($utf8) {
228 // if we did match, try for utf-8, or iso8859-1
229
230 return self::isUtf8($str);
231 }
232 else {
233 return FALSE;
234 }
235 }
236 return TRUE;
237 }
238 else {
239 $order = array('ASCII');
240 if ($utf8) {
241 $order[] = 'UTF-8';
242 }
243 $enc = mb_detect_encoding($str, $order, TRUE);
244 return ($enc == 'ASCII' || $enc == 'UTF-8');
245 }
246 }
247
248 /**
249 * Determine the string replacements for redaction.
250 * on the basis of the regular expressions
251 *
252 * @param string $str
253 * Input string.
254 * @param array $regexRules
255 * Regular expression to be matched w/ replacements.
256 *
257 * @return array
258 * array of strings w/ corresponding redacted outputs
259 */
260 public static function regex($str, $regexRules) {
261 // redact the regular expressions
262 if (!empty($regexRules) && isset($str)) {
263 static $matches, $totalMatches, $match = array();
264 foreach ($regexRules as $pattern => $replacement) {
265 preg_match_all($pattern, $str, $matches);
266 if (!empty($matches[0])) {
267 if (empty($totalMatches)) {
268 $totalMatches = $matches[0];
269 }
270 else {
271 $totalMatches = array_merge($totalMatches, $matches[0]);
272 }
273 $match = array_flip($totalMatches);
274 }
275 }
276 }
277
278 if (!empty($match)) {
279 foreach ($match as $matchKey => & $dontCare) {
280 foreach ($regexRules as $pattern => $replacement) {
281 if (preg_match($pattern, $matchKey)) {
282 $dontCare = $replacement . substr(md5($matchKey), 0, 5);
283 break;
284 }
285 }
286 }
287 return $match;
288 }
289 return CRM_Core_DAO::$_nullArray;
290 }
291
292 /**
293 * @param $str
294 * @param $stringRules
295 *
296 * @return mixed
297 */
298 public static function redaction($str, $stringRules) {
299 // redact the strings
300 if (!empty($stringRules)) {
301 foreach ($stringRules as $match => $replace) {
302 $str = str_ireplace($match, $replace, $str);
303 }
304 }
305
306 // return the redacted output
307 return $str;
308 }
309
310 /**
311 * Determine if a string is composed only of utf8 characters
312 *
313 * @param string $str
314 * Input string.
315 *
316 * @return bool
317 */
318 public static function isUtf8($str) {
319 if (!function_exists(mb_detect_encoding)) {
320 // eliminate all white space from the string
321 $str = preg_replace('/\s+/', '', $str);
322
323 // pattern stolen from the php.net function documentation for
324 // utf8decode();
325 // comment by JF Sebastian, 30-Mar-2005
326 return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
327 // ||
328 // iconv('ISO-8859-1', 'UTF-8', $str);
329 }
330 else {
331 $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
332 return ($enc !== FALSE);
333 }
334 }
335
336 /**
337 * Determine if two hrefs are equivalent (fuzzy match)
338 *
339 * @param string $url1
340 * The first url to be matched.
341 * @param string $url2
342 * The second url to be matched against.
343 *
344 * @return bool
345 * true if the urls match, else false
346 */
347 public static function match($url1, $url2) {
348 $url1 = strtolower($url1);
349 $url2 = strtolower($url2);
350
351 $url1Str = parse_url($url1);
352 $url2Str = parse_url($url2);
353
354 if ($url1Str['path'] == $url2Str['path'] &&
355 self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
356 ) {
357 return TRUE;
358 }
359 return FALSE;
360 }
361
362 /**
363 * Extract the civicrm path from the url.
364 *
365 * @param string $query
366 * A url string.
367 *
368 * @return string|null
369 * civicrm url (eg: civicrm/contact/search)
370 */
371 public static function extractURLVarValue($query) {
372 $config = CRM_Core_Config::singleton();
373 $urlVar = $config->userFrameworkURLVar;
374
375 $params = explode('&', $query);
376 foreach ($params as $p) {
377 if (strpos($p, '=')) {
378 list($k, $v) = explode('=', $p);
379 if ($k == $urlVar) {
380 return $v;
381 }
382 }
383 }
384 return NULL;
385 }
386
387 /**
388 * Translate a true/false/yes/no string to a 0 or 1 value
389 *
390 * @param string $str
391 * The string to be translated.
392 *
393 * @return bool
394 */
395 public static function strtobool($str) {
396 if (!is_scalar($str)) {
397 return FALSE;
398 }
399
400 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
401 return TRUE;
402 }
403 return FALSE;
404 }
405
406 /**
407 * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
408 *
409 * @param string $str
410 * The string to be translated.
411 *
412 * @return bool
413 */
414 public static function strtoboolstr($str) {
415 if (!is_scalar($str)) {
416 return FALSE;
417 }
418
419 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
420 return '1';
421 }
422 elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
423 return '0';
424 }
425 else {
426 return FALSE;
427 }
428 }
429
430 /**
431 * Convert a HTML string into a text one using html2text
432 *
433 * @param string $html
434 * The string to be converted.
435 *
436 * @return string
437 * the converted string
438 */
439 public static function htmlToText($html) {
440 require_once 'packages/html2text/rcube_html2text.php';
441 $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
442 $converter = new rcube_html2text($token_html);
443 $token_text = $converter->get_text();
444 $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
445 return $text;
446 }
447
448 /**
449 * @param $string
450 * @param array $params
451 */
452 public static function extractName($string, &$params) {
453 $name = trim($string);
454 if (empty($name)) {
455 return;
456 }
457
458 // strip out quotes
459 $name = str_replace('"', '', $name);
460 $name = str_replace('\'', '', $name);
461
462 // check for comma in name
463 if (strpos($name, ',') !== FALSE) {
464
465 // name has a comma - assume lname, fname [mname]
466 $names = explode(',', $name);
467 if (count($names) > 1) {
468 $params['last_name'] = trim($names[0]);
469
470 // check for space delim
471 $fnames = explode(' ', trim($names[1]));
472 if (count($fnames) > 1) {
473 $params['first_name'] = trim($fnames[0]);
474 $params['middle_name'] = trim($fnames[1]);
475 }
476 else {
477 $params['first_name'] = trim($fnames[0]);
478 }
479 }
480 else {
481 $params['first_name'] = trim($names[0]);
482 }
483 }
484 else {
485 // name has no comma - assume fname [mname] fname
486 $names = explode(' ', $name);
487 if (count($names) == 1) {
488 $params['first_name'] = $names[0];
489 }
490 elseif (count($names) == 2) {
491 $params['first_name'] = $names[0];
492 $params['last_name'] = $names[1];
493 }
494 else {
495 $params['first_name'] = $names[0];
496 $params['middle_name'] = $names[1];
497 $params['last_name'] = $names[2];
498 }
499 }
500 }
501
502 /**
503 * @param $string
504 *
505 * @return array
506 */
507 public static function &makeArray($string) {
508 $string = trim($string);
509
510 $values = explode("\n", $string);
511 $result = array();
512 foreach ($values as $value) {
513 list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
514 if (!empty($v)) {
515 $result[trim($n)] = trim($v);
516 }
517 }
518 return $result;
519 }
520
521 /**
522 * Given an ezComponents-parsed representation of
523 * a text with alternatives return only the first one
524 *
525 * @param string $full
526 * All alternatives as a long string (or some other text).
527 *
528 * @return string
529 * only the first alternative found (or the text without alternatives)
530 */
531 public static function stripAlternatives($full) {
532 $matches = array();
533 preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
534
535 if (isset($matches[1]) &&
536 trim(strip_tags($matches[1])) != ''
537 ) {
538 return $matches[1];
539 }
540 else {
541 return $full;
542 }
543 }
544
545 /**
546 * Strip leading, trailing, double spaces from string
547 * used for postal/greeting/addressee
548 *
549 * @param string $string
550 * Input string to be cleaned.
551 *
552 * @return string
553 * the cleaned string
554 */
555 public static function stripSpaces($string) {
556 return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
557 }
558
559 /**
560 * clean the URL 'path' variable that we use
561 * to construct CiviCRM urls by removing characters from the path variable
562 *
563 * @param string $string
564 * The input string to be sanitized.
565 * @param array $search
566 * The characters to be sanitized.
567 * @param string $replace
568 * The character to replace it with.
569 *
570 * @return string
571 * the sanitized string
572 */
573 public static function stripPathChars(
574 $string,
575 $search = NULL,
576 $replace = NULL
577 ) {
578 static $_searchChars = NULL;
579 static $_replaceChar = NULL;
580
581 if (empty($string)) {
582 return $string;
583 }
584
585 if ($_searchChars == NULL) {
586 $_searchChars = array(
587 '&',
588 ';',
589 ',',
590 '=',
591 '$',
592 '"',
593 "'",
594 '\\',
595 '<',
596 '>',
597 '(',
598 ')',
599 ' ',
600 "\r",
601 "\r\n",
602 "\n",
603 "\t",
604 );
605 $_replaceChar = '_';
606 }
607
608 if ($search == NULL) {
609 $search = $_searchChars;
610 }
611
612 if ($replace == NULL) {
613 $replace = $_replaceChar;
614 }
615
616 return str_replace($search, $replace, $string);
617 }
618
619
620 /**
621 * Use HTMLPurifier to clean up a text string and remove any potential
622 * xss attacks. This is primarily used in public facing pages which
623 * accept html as the input string
624 *
625 * @param string $string
626 * The input string.
627 *
628 * @return string
629 * the cleaned up string
630 */
631 public static function purifyHTML($string) {
632 static $_filter = NULL;
633 if (!$_filter) {
634 $config = HTMLPurifier_Config::createDefault();
635 $config->set('Core.Encoding', 'UTF-8');
636
637 // Disable the cache entirely
638 $config->set('Cache.DefinitionImpl', NULL);
639
640 $_filter = new HTMLPurifier($config);
641 }
642
643 return $_filter->purify($string);
644 }
645
646 /**
647 * Truncate $string; if $string exceeds $maxLen, place "..." at the end
648 *
649 * @param string $string
650 * @param int $maxLen
651 *
652 * @return string
653 */
654 public static function ellipsify($string, $maxLen) {
655 $len = strlen($string);
656 if ($len <= $maxLen) {
657 return $string;
658 }
659 else {
660 $end = $maxLen - 3;
661 while (strlen($string) > $maxLen - 3) {
662 $string = mb_substr($string, 0, $end, 'UTF-8');
663 $end = $end - 1;
664 }
665 return $string . '...';
666 }
667 }
668
669 /**
670 * Generate a random string.
671 *
672 * @param $len
673 * @param $alphabet
674 * @return string
675 */
676 public static function createRandom($len, $alphabet) {
677 $alphabetSize = strlen($alphabet);
678 $result = '';
679 for ($i = 0; $i < $len; $i++) {
680 $result .= $alphabet{rand(1, $alphabetSize) - 1};
681 }
682 return $result;
683 }
684
685 /**
686 * Examples:
687 * "admin foo" => array(NULL,"admin foo")
688 * "cms:admin foo" => array("cms", "admin foo")
689 *
690 * @param $delim
691 * @param string $string
692 * E.g. "view all contacts". Syntax: "[prefix:]name".
693 * @param null $defaultPrefix
694 *
695 * @return array
696 * (0 => string|NULL $prefix, 1 => string $value)
697 */
698 public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
699 $pos = strpos($string, $delim);
700 if ($pos === FALSE) {
701 return array($defaultPrefix, $string);
702 }
703 else {
704 return array(substr($string, 0, $pos), substr($string, 1 + $pos));
705 }
706 }
707
708 /**
709 * This function will mask part of the the user portion of an Email address (everything before the @)
710 *
711 * @param string $email
712 * The email address to be masked.
713 * @param string $maskChar
714 * The character used for masking.
715 * @param int $percent
716 * The percentage of the user portion to be masked.
717 *
718 * @return string
719 * returns the masked Email address
720 */
721 public static function maskEmail($email, $maskChar = '*', $percent = 50) {
722 list($user, $domain) = preg_split("/@/", $email);
723 $len = strlen($user);
724 $maskCount = floor($len * $percent / 100);
725 $offset = floor(($len - $maskCount) / 2);
726
727 $masked = substr($user, 0, $offset)
728 . str_repeat($maskChar, $maskCount)
729 . substr($user, $maskCount + $offset);
730
731 return ($masked . '@' . $domain);
732 }
733
734 /**
735 * This function compares two strings.
736 *
737 * @param string $strOne
738 * String one.
739 * @param string $strTwo
740 * String two.
741 * @param bool $case
742 * Boolean indicating whether you want the comparison to be case sensitive or not.
743 *
744 * @return bool
745 * TRUE (string are identical); FALSE (strings are not identical)
746 */
747 public static function compareStr($strOne, $strTwo, $case) {
748 if ($case == TRUE) {
749 // Convert to lowercase and trim white spaces
750 if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
751 // yes - they are identical
752 return TRUE;
753 }
754 else {
755 // not identical
756 return FALSE;
757 }
758 }
759 if ($case == FALSE) {
760 // Trim white spaces
761 if (trim($strOne) == trim($strTwo)) {
762 // yes - they are identical
763 return TRUE;
764 }
765 else {
766 // not identical
767 return FALSE;
768 }
769 }
770 }
771
772 /**
773 * Many parts of the codebase have a convention of internally passing around
774 * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
775 * (because most other odd characters are %-escaped in URLs; and %-escaped
776 * strings don't need any extra escaping in HTML).
777 *
778 * @param string $htmlUrl
779 * URL with HTML entities.
780 * @return string
781 * URL without HTML entities
782 */
783 public static function unstupifyUrl($htmlUrl) {
784 return str_replace('&amp;', '&', $htmlUrl);
785 }
786
787 /**
788 * Formats a string of attributes for insertion in an html tag.
789 *
790 * @param array $attributes
791 *
792 * @return string
793 */
794 public static function htmlAttributes($attributes) {
795 $output = '';
796 foreach ($attributes as $name => $vals) {
797 $output .= " $name=\"" . htmlspecialchars(implode(' ', (array) $vals)) . '"';
798 }
799 return ltrim($output);
800 }
801
802 /**
803 * Determine if $string starts with $fragment.
804 *
805 * @param string $string
806 * The long string.
807 * @param string $fragment
808 * The fragment to look for.
809 * @return bool
810 */
811 public static function startsWith($string, $fragment) {
812 if ($fragment === '') {
813 return TRUE;
814 }
815 $len = strlen($fragment);
816 return substr($string, 0, $len) === $fragment;
817 }
818
819 /**
820 * Determine if $string ends with $fragment.
821 *
822 * @param string $string
823 * The long string.
824 * @param string $fragment
825 * The fragment to look for.
826 * @return bool
827 */
828 public static function endsWith($string, $fragment) {
829 if ($fragment === '') {
830 return TRUE;
831 }
832 $len = strlen($fragment);
833 return substr($string, -1 * $len) === $fragment;
834 }
835
836 /**
837 * @param string|array $patterns
838 * @param array $allStrings
839 * @param bool $allowNew
840 * Whether to return new, unrecognized names.
841 * @return array
842 */
843 public static function filterByWildcards($patterns, $allStrings, $allowNew = FALSE) {
844 $patterns = (array) $patterns;
845 $result = array();
846 foreach ($patterns as $pattern) {
847 if (!\CRM_Utils_String::endsWith($pattern, '*')) {
848 if ($allowNew || in_array($pattern, $allStrings)) {
849 $result[] = $pattern;
850 }
851 }
852 else {
853 $prefix = rtrim($pattern, '*');
854 foreach ($allStrings as $key) {
855 if (\CRM_Utils_String::startsWith($key, $prefix)) {
856 $result[] = $key;
857 }
858 }
859 }
860 }
861 return array_values(array_unique($result));
862 }
863
864 }