Merge pull request #10479 from tschuettler/CRM-20694
[civicrm-core.git] / CRM / Utils / String.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.7 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2017 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2017
32 */
33
34 require_once 'HTML/QuickForm/Rule/Email.php';
35
36 /**
37 * This class contains string functions.
38 */
39 class CRM_Utils_String {
40 const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
41
42 /**
43 * List of all letters and numbers
44 */
45 const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
46
47 /**
48 * Convert a display name into a potential variable name.
49 *
50 * @param $title title of the string
51 * @param int $maxLength
52 *
53 * @return string
54 * An equivalent variable name.
55 */
56 public static function titleToVar($title, $maxLength = 31) {
57 $variable = self::munge($title, '_', $maxLength);
58
59 if (CRM_Utils_Rule::title($variable, $maxLength)) {
60 return $variable;
61 }
62
63 // if longer than the maxLength lets just return a substr of the
64 // md5 to prevent errors downstream
65 return substr(md5($title), 0, $maxLength);
66 }
67
68 /**
69 * Replace all non alpha numeric characters and spaces with the replacement character.
70 *
71 * @param string $name
72 * The name to be worked on.
73 * @param string $char
74 * The character to use for non-valid chars.
75 * @param int $len
76 * Length of valid variables.
77 *
78 * @return string
79 * returns the manipulated string
80 */
81 public static function munge($name, $char = '_', $len = 63) {
82 // Replace all white space and non-alpha numeric with $char
83 // we only use the ascii character set since mysql does not create table names / field names otherwise
84 // CRM-11744
85 $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
86
87 //If there are no ascii characters present.
88 if ($name == $char) {
89 $name = self::createRandom($len, self::ALPHANUMERIC);
90 }
91
92 if ($len) {
93 // lets keep variable names short
94 return substr($name, 0, $len);
95 }
96 else {
97 return $name;
98 }
99 }
100
101 /**
102 * Convert possibly underscore separated words to camel case with special handling for 'UF'
103 * e.g membership_payment returns MembershipPayment
104 *
105 * @param string $string
106 *
107 * @return string
108 */
109 public static function convertStringToCamel($string) {
110 $map = array(
111 'acl' => 'Acl',
112 'ACL' => 'Acl',
113 'im' => 'Im',
114 'IM' => 'Im',
115 );
116 if (isset($map[$string])) {
117 return $map[$string];
118 }
119
120 $fragments = explode('_', $string);
121 foreach ($fragments as & $fragment) {
122 $fragment = ucfirst($fragment);
123 }
124 // Special case: UFGroup, UFJoin, UFMatch, UFField
125 if ($fragments[0] === 'Uf') {
126 $fragments[0] = 'UF';
127 }
128 return implode('', $fragments);
129 }
130
131 /**
132 * Takes a variable name and munges it randomly into another variable name.
133 *
134 * @param string $name
135 * Initial Variable Name.
136 * @param int $len
137 * Length of valid variables.
138 *
139 * @return string
140 * Randomized Variable Name
141 */
142 public static function rename($name, $len = 4) {
143 $rand = substr(uniqid(), 0, $len);
144 return substr_replace($name, $rand, -$len, $len);
145 }
146
147 /**
148 * Takes a string and returns the last tuple of the string.
149 *
150 * Useful while converting file names to class names etc
151 *
152 * @param string $string
153 * The input string.
154 * @param string $char
155 * Character used to demarcate the components
156 *
157 * @return string
158 * The last component
159 */
160 public static function getClassName($string, $char = '_') {
161 $names = array();
162 if (!is_array($string)) {
163 $names = explode($char, $string);
164 }
165 if (!empty($names)) {
166 return array_pop($names);
167 }
168 }
169
170 /**
171 * Appends a name to a string and separated by delimiter.
172 *
173 * Does the right thing for an empty string
174 *
175 * @param string $str
176 * The string to be appended to.
177 * @param string $delim
178 * The delimiter to use.
179 * @param mixed $name
180 * The string (or array of strings) to append.
181 */
182 public static function append(&$str, $delim, $name) {
183 if (empty($name)) {
184 return;
185 }
186
187 if (is_array($name)) {
188 foreach ($name as $n) {
189 if (empty($n)) {
190 continue;
191 }
192 if (empty($str)) {
193 $str = $n;
194 }
195 else {
196 $str .= $delim . $n;
197 }
198 }
199 }
200 else {
201 if (empty($str)) {
202 $str = $name;
203 }
204 else {
205 $str .= $delim . $name;
206 }
207 }
208 }
209
210 /**
211 * Determine if the string is composed only of ascii characters.
212 *
213 * @param string $str
214 * Input string.
215 * @param bool $utf8
216 * Attempt utf8 match on failure (default yes).
217 *
218 * @return bool
219 * true if string is ascii
220 */
221 public static function isAscii($str, $utf8 = TRUE) {
222 if (!function_exists('mb_detect_encoding')) {
223 // eliminate all white space from the string
224 $str = preg_replace('/\s+/', '', $str);
225 // FIXME: This is a pretty brutal hack to make utf8 and 8859-1 work.
226
227 // match low- or high-ascii characters
228 if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
229 // || // low ascii characters
230 // high ascii characters
231 // preg_match( '/[\x7F-\xFF]/', $str ) ) {
232 if ($utf8) {
233 // if we did match, try for utf-8, or iso8859-1
234
235 return self::isUtf8($str);
236 }
237 else {
238 return FALSE;
239 }
240 }
241 return TRUE;
242 }
243 else {
244 $order = array('ASCII');
245 if ($utf8) {
246 $order[] = 'UTF-8';
247 }
248 $enc = mb_detect_encoding($str, $order, TRUE);
249 return ($enc == 'ASCII' || $enc == 'UTF-8');
250 }
251 }
252
253 /**
254 * Determine the string replacements for redaction.
255 * on the basis of the regular expressions
256 *
257 * @param string $str
258 * Input string.
259 * @param array $regexRules
260 * Regular expression to be matched w/ replacements.
261 *
262 * @return array
263 * array of strings w/ corresponding redacted outputs
264 */
265 public static function regex($str, $regexRules) {
266 // redact the regular expressions
267 if (!empty($regexRules) && isset($str)) {
268 static $matches, $totalMatches, $match = array();
269 foreach ($regexRules as $pattern => $replacement) {
270 preg_match_all($pattern, $str, $matches);
271 if (!empty($matches[0])) {
272 if (empty($totalMatches)) {
273 $totalMatches = $matches[0];
274 }
275 else {
276 $totalMatches = array_merge($totalMatches, $matches[0]);
277 }
278 $match = array_flip($totalMatches);
279 }
280 }
281 }
282
283 if (!empty($match)) {
284 foreach ($match as $matchKey => & $dontCare) {
285 foreach ($regexRules as $pattern => $replacement) {
286 if (preg_match($pattern, $matchKey)) {
287 $dontCare = $replacement . substr(md5($matchKey), 0, 5);
288 break;
289 }
290 }
291 }
292 return $match;
293 }
294 return CRM_Core_DAO::$_nullArray;
295 }
296
297 /**
298 * @param $str
299 * @param $stringRules
300 *
301 * @return mixed
302 */
303 public static function redaction($str, $stringRules) {
304 // redact the strings
305 if (!empty($stringRules)) {
306 foreach ($stringRules as $match => $replace) {
307 $str = str_ireplace($match, $replace, $str);
308 }
309 }
310
311 // return the redacted output
312 return $str;
313 }
314
315 /**
316 * Determine if a string is composed only of utf8 characters
317 *
318 * @param string $str
319 * Input string.
320 *
321 * @return bool
322 */
323 public static function isUtf8($str) {
324 if (!function_exists(mb_detect_encoding)) {
325 // eliminate all white space from the string
326 $str = preg_replace('/\s+/', '', $str);
327
328 // pattern stolen from the php.net function documentation for
329 // utf8decode();
330 // comment by JF Sebastian, 30-Mar-2005
331 return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
332 // ||
333 // iconv('ISO-8859-1', 'UTF-8', $str);
334 }
335 else {
336 $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
337 return ($enc !== FALSE);
338 }
339 }
340
341 /**
342 * Determine if two hrefs are equivalent (fuzzy match)
343 *
344 * @param string $url1
345 * The first url to be matched.
346 * @param string $url2
347 * The second url to be matched against.
348 *
349 * @return bool
350 * true if the urls match, else false
351 */
352 public static function match($url1, $url2) {
353 $url1 = strtolower($url1);
354 $url2 = strtolower($url2);
355
356 $url1Str = parse_url($url1);
357 $url2Str = parse_url($url2);
358
359 if ($url1Str['path'] == $url2Str['path'] &&
360 self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
361 ) {
362 return TRUE;
363 }
364 return FALSE;
365 }
366
367 /**
368 * Extract the civicrm path from the url.
369 *
370 * @param string $query
371 * A url string.
372 *
373 * @return string|null
374 * civicrm url (eg: civicrm/contact/search)
375 */
376 public static function extractURLVarValue($query) {
377 $config = CRM_Core_Config::singleton();
378 $urlVar = $config->userFrameworkURLVar;
379
380 $params = explode('&', $query);
381 foreach ($params as $p) {
382 if (strpos($p, '=')) {
383 list($k, $v) = explode('=', $p);
384 if ($k == $urlVar) {
385 return $v;
386 }
387 }
388 }
389 return NULL;
390 }
391
392 /**
393 * Translate a true/false/yes/no string to a 0 or 1 value
394 *
395 * @param string $str
396 * The string to be translated.
397 *
398 * @return bool
399 */
400 public static function strtobool($str) {
401 if (!is_scalar($str)) {
402 return FALSE;
403 }
404
405 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
406 return TRUE;
407 }
408 return FALSE;
409 }
410
411 /**
412 * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
413 *
414 * @param string $str
415 * The string to be translated.
416 *
417 * @return bool
418 */
419 public static function strtoboolstr($str) {
420 if (!is_scalar($str)) {
421 return FALSE;
422 }
423
424 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
425 return '1';
426 }
427 elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
428 return '0';
429 }
430 else {
431 return FALSE;
432 }
433 }
434
435 /**
436 * Convert a HTML string into a text one using html2text
437 *
438 * @param string $html
439 * The string to be converted.
440 *
441 * @return string
442 * the converted string
443 */
444 public static function htmlToText($html) {
445 require_once 'packages/html2text/rcube_html2text.php';
446 $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
447 $converter = new rcube_html2text($token_html);
448 $token_text = $converter->get_text();
449 $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
450 return $text;
451 }
452
453 /**
454 * @param $string
455 * @param array $params
456 */
457 public static function extractName($string, &$params) {
458 $name = trim($string);
459 if (empty($name)) {
460 return;
461 }
462
463 // strip out quotes
464 $name = str_replace('"', '', $name);
465 $name = str_replace('\'', '', $name);
466
467 // check for comma in name
468 if (strpos($name, ',') !== FALSE) {
469
470 // name has a comma - assume lname, fname [mname]
471 $names = explode(',', $name);
472 if (count($names) > 1) {
473 $params['last_name'] = trim($names[0]);
474
475 // check for space delim
476 $fnames = explode(' ', trim($names[1]));
477 if (count($fnames) > 1) {
478 $params['first_name'] = trim($fnames[0]);
479 $params['middle_name'] = trim($fnames[1]);
480 }
481 else {
482 $params['first_name'] = trim($fnames[0]);
483 }
484 }
485 else {
486 $params['first_name'] = trim($names[0]);
487 }
488 }
489 else {
490 // name has no comma - assume fname [mname] fname
491 $names = explode(' ', $name);
492 if (count($names) == 1) {
493 $params['first_name'] = $names[0];
494 }
495 elseif (count($names) == 2) {
496 $params['first_name'] = $names[0];
497 $params['last_name'] = $names[1];
498 }
499 else {
500 $params['first_name'] = $names[0];
501 $params['middle_name'] = $names[1];
502 $params['last_name'] = $names[2];
503 }
504 }
505 }
506
507 /**
508 * @param $string
509 *
510 * @return array
511 */
512 public static function &makeArray($string) {
513 $string = trim($string);
514
515 $values = explode("\n", $string);
516 $result = array();
517 foreach ($values as $value) {
518 list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
519 if (!empty($v)) {
520 $result[trim($n)] = trim($v);
521 }
522 }
523 return $result;
524 }
525
526 /**
527 * Given an ezComponents-parsed representation of
528 * a text with alternatives return only the first one
529 *
530 * @param string $full
531 * All alternatives as a long string (or some other text).
532 *
533 * @return string
534 * only the first alternative found (or the text without alternatives)
535 */
536 public static function stripAlternatives($full) {
537 $matches = array();
538 preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
539
540 if (isset($matches[1]) &&
541 trim(strip_tags($matches[1])) != ''
542 ) {
543 return $matches[1];
544 }
545 else {
546 return $full;
547 }
548 }
549
550 /**
551 * Strip leading, trailing, double spaces from string
552 * used for postal/greeting/addressee
553 *
554 * @param string $string
555 * Input string to be cleaned.
556 *
557 * @return string
558 * the cleaned string
559 */
560 public static function stripSpaces($string) {
561 return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
562 }
563
564 /**
565 * clean the URL 'path' variable that we use
566 * to construct CiviCRM urls by removing characters from the path variable
567 *
568 * @param string $string
569 * The input string to be sanitized.
570 * @param array $search
571 * The characters to be sanitized.
572 * @param string $replace
573 * The character to replace it with.
574 *
575 * @return string
576 * the sanitized string
577 */
578 public static function stripPathChars(
579 $string,
580 $search = NULL,
581 $replace = NULL
582 ) {
583 static $_searchChars = NULL;
584 static $_replaceChar = NULL;
585
586 if (empty($string)) {
587 return $string;
588 }
589
590 if ($_searchChars == NULL) {
591 $_searchChars = array(
592 '&',
593 ';',
594 ',',
595 '=',
596 '$',
597 '"',
598 "'",
599 '\\',
600 '<',
601 '>',
602 '(',
603 ')',
604 ' ',
605 "\r",
606 "\r\n",
607 "\n",
608 "\t",
609 );
610 $_replaceChar = '_';
611 }
612
613 if ($search == NULL) {
614 $search = $_searchChars;
615 }
616
617 if ($replace == NULL) {
618 $replace = $_replaceChar;
619 }
620
621 return str_replace($search, $replace, $string);
622 }
623
624
625 /**
626 * Use HTMLPurifier to clean up a text string and remove any potential
627 * xss attacks. This is primarily used in public facing pages which
628 * accept html as the input string
629 *
630 * @param string $string
631 * The input string.
632 *
633 * @return string
634 * the cleaned up string
635 */
636 public static function purifyHTML($string) {
637 static $_filter = NULL;
638 if (!$_filter) {
639 $config = HTMLPurifier_Config::createDefault();
640 $config->set('Core.Encoding', 'UTF-8');
641
642 // Disable the cache entirely
643 $config->set('Cache.DefinitionImpl', NULL);
644
645 $_filter = new HTMLPurifier($config);
646 }
647
648 return $_filter->purify($string);
649 }
650
651 /**
652 * Truncate $string; if $string exceeds $maxLen, place "..." at the end
653 *
654 * @param string $string
655 * @param int $maxLen
656 *
657 * @return string
658 */
659 public static function ellipsify($string, $maxLen) {
660 $len = strlen($string);
661 if ($len <= $maxLen) {
662 return $string;
663 }
664 else {
665 $end = $maxLen - 3;
666 while (strlen($string) > $maxLen - 3) {
667 $string = mb_substr($string, 0, $end, 'UTF-8');
668 $end = $end - 1;
669 }
670 return $string . '...';
671 }
672 }
673
674 /**
675 * Generate a random string.
676 *
677 * @param $len
678 * @param $alphabet
679 * @return string
680 */
681 public static function createRandom($len, $alphabet) {
682 $alphabetSize = strlen($alphabet);
683 $result = '';
684 for ($i = 0; $i < $len; $i++) {
685 $result .= $alphabet{rand(1, $alphabetSize) - 1};
686 }
687 return $result;
688 }
689
690 /**
691 * Examples:
692 * "admin foo" => array(NULL,"admin foo")
693 * "cms:admin foo" => array("cms", "admin foo")
694 *
695 * @param $delim
696 * @param string $string
697 * E.g. "view all contacts". Syntax: "[prefix:]name".
698 * @param null $defaultPrefix
699 *
700 * @return array
701 * (0 => string|NULL $prefix, 1 => string $value)
702 */
703 public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
704 $pos = strpos($string, $delim);
705 if ($pos === FALSE) {
706 return array($defaultPrefix, $string);
707 }
708 else {
709 return array(substr($string, 0, $pos), substr($string, 1 + $pos));
710 }
711 }
712
713 /**
714 * This function will mask part of the the user portion of an Email address (everything before the @)
715 *
716 * @param string $email
717 * The email address to be masked.
718 * @param string $maskChar
719 * The character used for masking.
720 * @param int $percent
721 * The percentage of the user portion to be masked.
722 *
723 * @return string
724 * returns the masked Email address
725 */
726 public static function maskEmail($email, $maskChar = '*', $percent = 50) {
727 list($user, $domain) = preg_split("/@/", $email);
728 $len = strlen($user);
729 $maskCount = floor($len * $percent / 100);
730 $offset = floor(($len - $maskCount) / 2);
731
732 $masked = substr($user, 0, $offset)
733 . str_repeat($maskChar, $maskCount)
734 . substr($user, $maskCount + $offset);
735
736 return ($masked . '@' . $domain);
737 }
738
739 /**
740 * This function compares two strings.
741 *
742 * @param string $strOne
743 * String one.
744 * @param string $strTwo
745 * String two.
746 * @param bool $case
747 * Boolean indicating whether you want the comparison to be case sensitive or not.
748 *
749 * @return bool
750 * TRUE (string are identical); FALSE (strings are not identical)
751 */
752 public static function compareStr($strOne, $strTwo, $case) {
753 if ($case == TRUE) {
754 // Convert to lowercase and trim white spaces
755 if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
756 // yes - they are identical
757 return TRUE;
758 }
759 else {
760 // not identical
761 return FALSE;
762 }
763 }
764 if ($case == FALSE) {
765 // Trim white spaces
766 if (trim($strOne) == trim($strTwo)) {
767 // yes - they are identical
768 return TRUE;
769 }
770 else {
771 // not identical
772 return FALSE;
773 }
774 }
775 }
776
777 /**
778 * Many parts of the codebase have a convention of internally passing around
779 * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
780 * (because most other odd characters are %-escaped in URLs; and %-escaped
781 * strings don't need any extra escaping in HTML).
782 *
783 * @param string $htmlUrl
784 * URL with HTML entities.
785 * @return string
786 * URL without HTML entities
787 */
788 public static function unstupifyUrl($htmlUrl) {
789 return str_replace('&amp;', '&', $htmlUrl);
790 }
791
792 /**
793 * Formats a string of attributes for insertion in an html tag.
794 *
795 * @param array $attributes
796 *
797 * @return string
798 */
799 public static function htmlAttributes($attributes) {
800 $output = '';
801 foreach ($attributes as $name => $vals) {
802 $output .= " $name=\"" . htmlspecialchars(implode(' ', (array) $vals)) . '"';
803 }
804 return ltrim($output);
805 }
806
807 /**
808 * Determine if $string starts with $fragment.
809 *
810 * @param string $string
811 * The long string.
812 * @param string $fragment
813 * The fragment to look for.
814 * @return bool
815 */
816 public static function startsWith($string, $fragment) {
817 if ($fragment === '') {
818 return TRUE;
819 }
820 $len = strlen($fragment);
821 return substr($string, 0, $len) === $fragment;
822 }
823
824 /**
825 * Determine if $string ends with $fragment.
826 *
827 * @param string $string
828 * The long string.
829 * @param string $fragment
830 * The fragment to look for.
831 * @return bool
832 */
833 public static function endsWith($string, $fragment) {
834 if ($fragment === '') {
835 return TRUE;
836 }
837 $len = strlen($fragment);
838 return substr($string, -1 * $len) === $fragment;
839 }
840
841 /**
842 * @param string|array $patterns
843 * @param array $allStrings
844 * @param bool $allowNew
845 * Whether to return new, unrecognized names.
846 * @return array
847 */
848 public static function filterByWildcards($patterns, $allStrings, $allowNew = FALSE) {
849 $patterns = (array) $patterns;
850 $result = array();
851 foreach ($patterns as $pattern) {
852 if (!\CRM_Utils_String::endsWith($pattern, '*')) {
853 if ($allowNew || in_array($pattern, $allStrings)) {
854 $result[] = $pattern;
855 }
856 }
857 else {
858 $prefix = rtrim($pattern, '*');
859 foreach ($allStrings as $key) {
860 if (\CRM_Utils_String::startsWith($key, $prefix)) {
861 $result[] = $key;
862 }
863 }
864 }
865 }
866 return array_values(array_unique($result));
867 }
868
869 }