3 +--------------------------------------------------------------------+
4 | Copyright CiviCRM LLC. All rights reserved. |
6 | This work is published under the GNU AGPLv3 license with some |
7 | permitted exceptions and without any warranty. For full license |
8 | and copyright information, see https://civicrm.org/licensing |
9 +--------------------------------------------------------------------+
15 * @copyright CiviCRM LLC https://civicrm.org/licensing
18 use function xKerman\Restricted\unserialize
;
19 use xKerman\Restricted\UnserializeFailedException
;
21 require_once 'HTML/QuickForm/Rule/Email.php';
24 * This class contains string functions.
26 class CRM_Utils_String
{
27 const COMMA
= ",", SEMICOLON
= ";", SPACE
= " ", TAB
= "\t", LINEFEED
= "\n", CARRIAGELINE
= "\r\n", LINECARRIAGE
= "\n\r", CARRIAGERETURN
= "\r";
30 * List of all letters and numbers
32 const ALPHANUMERIC
= 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
35 * Convert a display name into a potential variable name.
37 * @param string $title title of the string
38 * @param int $maxLength
41 * An equivalent variable name.
43 public static function titleToVar($title, $maxLength = 31) {
44 $variable = self
::munge($title, '_', $maxLength);
46 if (CRM_Utils_Rule
::title($variable, $maxLength)) {
50 // if longer than the maxLength lets just return a substr of the
51 // md5 to prevent errors downstream
52 return substr(md5($title), 0, $maxLength);
56 * Replace all non alpha numeric characters and spaces with the replacement character.
59 * The name to be worked on.
61 * The character to use for non-valid chars.
63 * Length of valid variables.
66 * returns the manipulated string
68 public static function munge($name, $char = '_', $len = 63) {
69 // Replace all white space and non-alpha numeric with $char
70 // we only use the ascii character set since mysql does not create table names / field names otherwise
72 $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
74 //If there are no ascii characters present.
76 $name = self
::createRandom($len, self
::ALPHANUMERIC
);
80 // lets keep variable names short
81 return substr($name, 0, $len);
89 * Convert possibly underscore separated words to camel case.
92 * @param bool $ucFirst
93 * Should the first letter be capitalized like `CamelCase` or lower like `camelCase`
96 public static function convertStringToCamel($str, $ucFirst = TRUE) {
97 $fragments = explode('_', $str);
98 $camel = implode('', array_map('ucfirst', $fragments));
99 return $ucFirst ?
$camel : lcfirst($camel);
103 * Takes a variable name and munges it randomly into another variable name.
105 * @param string $name
106 * Initial Variable Name.
108 * Length of valid variables.
111 * Randomized Variable Name
113 public static function rename($name, $len = 4) {
114 $rand = substr(uniqid(), 0, $len);
115 return substr_replace($name, $rand, -$len, $len);
119 * Takes a string and returns the last tuple of the string.
121 * Useful while converting file names to class names etc
123 * @param string $string
125 * @param string $char
126 * Character used to demarcate the components
131 public static function getClassName($string, $char = '_') {
133 if (!is_array($string)) {
134 $names = explode($char, $string);
136 if (!empty($names)) {
137 return array_pop($names);
142 * Appends a name to a string and separated by delimiter.
144 * Does the right thing for an empty string
147 * The string to be appended to.
148 * @param string $delim
149 * The delimiter to use.
151 * The string (or array of strings) to append.
153 public static function append(&$str, $delim, $name) {
158 if (is_array($name)) {
159 foreach ($name as $n) {
176 $str .= $delim . $name;
182 * Determine if the string is composed only of ascii characters.
187 * Attempt utf8 match on failure (default yes).
190 * true if string is ascii
192 public static function isAscii($str, $utf8 = TRUE) {
193 if (!function_exists('mb_detect_encoding')) {
194 // eliminate all white space from the string
195 $str = preg_replace('/\s+/', '', $str);
196 // FIXME: This is a pretty brutal hack to make utf8 and 8859-1 work.
198 // match low- or high-ascii characters
199 if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
200 // || // low ascii characters
201 // high ascii characters
202 // preg_match( '/[\x7F-\xFF]/', $str ) ) {
204 // if we did match, try for utf-8, or iso8859-1
206 return self
::isUtf8($str);
219 $enc = mb_detect_encoding($str, $order, TRUE);
220 return ($enc == 'ASCII' ||
$enc == 'UTF-8');
225 * Encode string using URL-safe Base64.
230 * @see https://tools.ietf.org/html/rfc4648#section-5
232 public static function base64UrlEncode($v) {
233 return rtrim(str_replace(['+', '/'], ['-', '_'], base64_encode($v)), '=');
237 * Decode string using URL-safe Base64.
241 * @return false|string
242 * @see https://tools.ietf.org/html/rfc4648#section-5
244 public static function base64UrlDecode($v) {
245 // PHP base64_decode() is already forgiving about padding ("=").
246 return base64_decode(str_replace(['-', '_'], ['+', '/'], $v));
250 * Determine the string replacements for redaction.
251 * on the basis of the regular expressions
255 * @param array $regexRules
256 * Regular expression to be matched w/ replacements.
259 * array of strings w/ corresponding redacted outputs
261 public static function regex($str, $regexRules) {
262 // redact the regular expressions
263 if (!empty($regexRules) && isset($str)) {
264 static $matches, $totalMatches, $match = [];
265 foreach ($regexRules as $pattern => $replacement) {
266 preg_match_all($pattern, $str, $matches);
267 if (!empty($matches[0])) {
268 if (empty($totalMatches)) {
269 $totalMatches = $matches[0];
272 $totalMatches = array_merge($totalMatches, $matches[0]);
274 $match = array_flip($totalMatches);
279 if (!empty($match)) {
280 foreach ($match as $matchKey => & $dontCare) {
281 foreach ($regexRules as $pattern => $replacement) {
282 if (preg_match($pattern, $matchKey)) {
283 $dontCare = $replacement . substr(md5($matchKey), 0, 5);
295 * @param $stringRules
299 public static function redaction($str, $stringRules) {
300 // redact the strings
301 if (!empty($stringRules)) {
302 foreach ($stringRules as $match => $replace) {
303 $str = str_ireplace($match, $replace, $str);
307 // return the redacted output
312 * Determine if a string is composed only of utf8 characters
319 public static function isUtf8($str) {
320 if (!function_exists(mb_detect_encoding
)) {
321 // eliminate all white space from the string
322 $str = preg_replace('/\s+/', '', $str);
324 // pattern stolen from the php.net function documentation for
326 // comment by JF Sebastian, 30-Mar-2005
327 return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
329 // iconv('ISO-8859-1', 'UTF-8', $str);
332 $enc = mb_detect_encoding($str, ['UTF-8'], TRUE);
333 return ($enc !== FALSE);
338 * Determine if two hrefs are equivalent (fuzzy match)
340 * @param string $url1
341 * The first url to be matched.
342 * @param string $url2
343 * The second url to be matched against.
346 * true if the urls match, else false
348 public static function match($url1, $url2) {
349 $url1 = strtolower($url1);
350 $url2 = strtolower($url2);
352 $url1Str = parse_url($url1);
353 $url2Str = parse_url($url2);
355 if ($url1Str['path'] == $url2Str['path'] &&
356 self
::extractURLVarValue(CRM_Utils_Array
::value('query', $url1Str)) == self
::extractURLVarValue(CRM_Utils_Array
::value('query', $url2Str))
364 * Extract the civicrm path from the url.
366 * @param string $query
369 * @return string|null
370 * civicrm url (eg: civicrm/contact/search)
372 public static function extractURLVarValue($query) {
373 $config = CRM_Core_Config
::singleton();
374 $urlVar = $config->userFrameworkURLVar
;
376 $params = explode('&', $query);
377 foreach ($params as $p) {
378 if (strpos($p, '=')) {
379 list($k, $v) = explode('=', $p);
389 * Translate a true/false/yes/no string to a 0 or 1 value
392 * The string to be translated.
396 public static function strtobool($str) {
397 if (!is_scalar($str)) {
401 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
408 * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
411 * The string to be translated.
415 public static function strtoboolstr($str) {
416 if (!is_scalar($str)) {
420 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
423 elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
432 * Convert a HTML string into a text one using html2text
434 * @param string $html
435 * The string to be converted.
438 * the converted string
440 public static function htmlToText($html) {
441 require_once 'html2text/rcube_html2text.php';
442 $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
443 $converter = new rcube_html2text($token_html);
444 $token_text = $converter->get_text();
445 $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
451 * @param array $params
453 public static function extractName($string, &$params) {
454 $name = trim($string);
460 $name = str_replace('"', '', $name);
461 $name = str_replace('\'', '', $name);
463 // check for comma in name
464 if (strpos($name, ',') !== FALSE) {
466 // name has a comma - assume lname, fname [mname]
467 $names = explode(',', $name);
468 if (count($names) > 1) {
469 $params['last_name'] = trim($names[0]);
471 // check for space delim
472 $fnames = explode(' ', trim($names[1]));
473 if (count($fnames) > 1) {
474 $params['first_name'] = trim($fnames[0]);
475 $params['middle_name'] = trim($fnames[1]);
478 $params['first_name'] = trim($fnames[0]);
482 $params['first_name'] = trim($names[0]);
486 // name has no comma - assume fname [mname] fname
487 $names = explode(' ', $name);
488 if (count($names) == 1) {
489 $params['first_name'] = $names[0];
491 elseif (count($names) == 2) {
492 $params['first_name'] = $names[0];
493 $params['last_name'] = $names[1];
496 $params['first_name'] = $names[0];
497 $params['middle_name'] = $names[1];
498 $params['last_name'] = $names[2];
508 public static function &makeArray($string) {
509 $string = trim($string);
511 $values = explode("\n", $string);
513 foreach ($values as $value) {
514 list($n, $v) = CRM_Utils_System
::explode('=', $value, 2);
516 $result[trim($n)] = trim($v);
523 * Given an ezComponents-parsed representation of
524 * a text with alternatives return only the first one
526 * @param string $full
527 * All alternatives as a long string (or some other text).
530 * only the first alternative found (or the text without alternatives)
532 public static function stripAlternatives($full) {
534 preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
536 if (isset($matches[1]) &&
537 trim(strip_tags($matches[1])) != ''
547 * Strip leading, trailing, double spaces from string
548 * used for postal/greeting/addressee
550 * @param string $string
551 * Input string to be cleaned.
556 public static function stripSpaces($string) {
557 return (empty($string)) ?
$string : preg_replace("/\s{2,}/", " ", trim($string));
561 * clean the URL 'path' variable that we use
562 * to construct CiviCRM urls by removing characters from the path variable
564 * @param string $string
565 * The input string to be sanitized.
566 * @param array $search
567 * The characters to be sanitized.
568 * @param string $replace
569 * The character to replace it with.
572 * the sanitized string
574 public static function stripPathChars(
579 static $_searchChars = NULL;
580 static $_replaceChar = NULL;
582 if (empty($string)) {
586 if ($_searchChars == NULL) {
609 if ($search == NULL) {
610 $search = $_searchChars;
613 if ($replace == NULL) {
614 $replace = $_replaceChar;
617 return str_replace($search, $replace, $string);
621 * Use HTMLPurifier to clean up a text string and remove any potential
622 * xss attacks. This is primarily used in public facing pages which
623 * accept html as the input string
625 * @param string $string
629 * the cleaned up string
631 public static function purifyHTML($string) {
632 static $_filter = NULL;
634 $config = HTMLPurifier_Config
::createDefault();
635 $config->set('Core.Encoding', 'UTF-8');
636 $config->set('Attr.AllowedFrameTargets', ['_blank', '_self', '_parent', '_top']);
638 // Disable the cache entirely
639 $config->set('Cache.DefinitionImpl', NULL);
641 $_filter = new HTMLPurifier($config);
644 return $_filter->purify($string);
648 * Truncate $string; if $string exceeds $maxLen, place "..." at the end
650 * @param string $string
655 public static function ellipsify($string, $maxLen) {
656 if (mb_strlen($string, 'UTF-8') <= $maxLen) {
659 return mb_substr($string, 0, $maxLen - 3, 'UTF-8') . '...';
663 * Generate a random string.
669 public static function createRandom($len, $alphabet) {
670 $alphabetSize = strlen($alphabet);
672 for ($i = 0; $i < $len; $i++
) {
673 $result .= $alphabet[rand(1, $alphabetSize) - 1];
680 * "admin foo" => array(NULL,"admin foo")
681 * "cms:admin foo" => array("cms", "admin foo")
684 * @param string $string
685 * E.g. "view all contacts". Syntax: "[prefix:]name".
686 * @param null $defaultPrefix
689 * (0 => string|NULL $prefix, 1 => string $value)
691 public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
692 $pos = strpos($string, $delim);
693 if ($pos === FALSE) {
694 return [$defaultPrefix, $string];
697 return [substr($string, 0, $pos), substr($string, 1 +
$pos)];
702 * This function will mask part of the the user portion of an Email address (everything before the @)
704 * @param string $email
705 * The email address to be masked.
706 * @param string $maskChar
707 * The character used for masking.
708 * @param int $percent
709 * The percentage of the user portion to be masked.
712 * returns the masked Email address
714 public static function maskEmail($email, $maskChar = '*', $percent = 50) {
715 list($user, $domain) = preg_split("/@/", $email);
716 $len = strlen($user);
717 $maskCount = floor($len * $percent / 100);
718 $offset = floor(($len - $maskCount) / 2);
720 $masked = substr($user, 0, $offset)
721 . str_repeat($maskChar, $maskCount)
722 . substr($user, $maskCount +
$offset);
724 return ($masked . '@' . $domain);
728 * This function compares two strings.
730 * @param string $strOne
732 * @param string $strTwo
735 * Boolean indicating whether you want the comparison to be case sensitive or not.
738 * TRUE (string are identical); FALSE (strings are not identical)
740 public static function compareStr($strOne, $strTwo, $case) {
742 // Convert to lowercase and trim white spaces
743 if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
744 // yes - they are identical
752 if ($case == FALSE) {
754 if (trim($strOne) == trim($strTwo)) {
755 // yes - they are identical
766 * Many parts of the codebase have a convention of internally passing around
767 * HTML-encoded URLs. This effectively means that "&" is replaced by "&"
768 * (because most other odd characters are %-escaped in URLs; and %-escaped
769 * strings don't need any extra escaping in HTML).
771 * @param string $htmlUrl
772 * URL with HTML entities.
774 * URL without HTML entities
776 public static function unstupifyUrl($htmlUrl) {
777 return str_replace('&', '&', $htmlUrl);
781 * When a user supplies a URL (e.g. to an image), we'd like to:
782 * - Remove the protocol and domain name if the URL points to the current
784 * - Keep the domain name for remote URLs.
785 * - Optionally, force remote URLs to use https instead of http (which is
789 * The URL to simplify. Examples:
790 * "https://example.org/sites/default/files/coffee-mug.jpg"
791 * "sites/default/files/coffee-mug.jpg"
792 * "http://i.stack.imgur.com/9jb2ial01b.png"
793 * @param bool $forceHttps = FALSE
794 * If TRUE, ensure that remote URLs use https. If a URL with
795 * http is supplied, then we'll change it to https.
796 * This is useful for situations like showing a premium product on a
797 * contribution, because (as reported in CRM-14283) if the user gets a
798 * browser warning like "page contains insecure elements" on a contribution
799 * page, that's a very bad thing. Thus, even if changing http to https
800 * breaks the image, that's better than leaving http content in a
804 * The simplified URL. Examples:
805 * "/sites/default/files/coffee-mug.jpg"
806 * "https://i.stack.imgur.com/9jb2ial01b.png"
808 public static function simplifyURL($url, $forceHttps = FALSE) {
809 $config = CRM_Core_Config
::singleton();
810 $siteURLParts = self
::simpleParseUrl($config->userFrameworkBaseURL
);
811 $urlParts = self
::simpleParseUrl($url);
813 // If the image is locally hosted, then only give the path to the image
815 = ($urlParts['host+port'] == '')
816 |
($urlParts['host+port'] == $siteURLParts['host+port']);
818 // and make sure it begins with one forward slash
819 return preg_replace('_^/*(?=.)_', '/', $urlParts['path+query']);
822 // If the URL is external, then keep the full URL as supplied
824 return $forceHttps ?
preg_replace('_^http://_', 'https://', $url) : $url;
829 * A simplified version of PHP's parse_url() function.
832 * e.g. "https://example.com:8000/foo/bar/?id=1#fragment"
835 * Will always contain keys 'host+port' and 'path+query', even if they're
836 * empty strings. Example:
838 * 'host+port' => "example.com:8000",
839 * 'path+query' => "/foo/bar/?id=1",
842 public static function simpleParseUrl($url) {
843 $parts = parse_url($url);
844 $host = $parts['host'] ??
'';
845 $port = isset($parts['port']) ?
':' . $parts['port'] : '';
846 $path = $parts['path'] ??
'';
847 $query = isset($parts['query']) ?
'?' . $parts['query'] : '';
849 'host+port' => "$host$port",
850 'path+query' => "$path$query",
855 * Formats a string of attributes for insertion in an html tag.
857 * @param array $attributes
861 public static function htmlAttributes($attributes) {
863 foreach ($attributes as $name => $vals) {
864 $output .= " $name=\"" . htmlspecialchars(implode(' ', (array) $vals)) . '"';
866 return ltrim($output);
870 * Determine if $string starts with $fragment.
872 * @param string $string
874 * @param string $fragment
875 * The fragment to look for.
878 public static function startsWith($string, $fragment) {
879 if ($fragment === '') {
882 $len = strlen($fragment);
883 return substr($string, 0, $len) === $fragment;
887 * Determine if $string ends with $fragment.
889 * @param string $string
891 * @param string $fragment
892 * The fragment to look for.
895 public static function endsWith($string, $fragment) {
896 if ($fragment === '') {
899 $len = strlen($fragment);
900 return substr($string, -1 * $len) === $fragment;
904 * @param string|array $patterns
905 * @param array $allStrings
906 * @param bool $allowNew
907 * Whether to return new, unrecognized names.
910 public static function filterByWildcards($patterns, $allStrings, $allowNew = FALSE) {
911 $patterns = (array) $patterns;
913 foreach ($patterns as $pattern) {
914 if (!\CRM_Utils_String
::endsWith($pattern, '*')) {
915 if ($allowNew ||
in_array($pattern, $allStrings)) {
916 $result[] = $pattern;
920 $prefix = rtrim($pattern, '*');
921 foreach ($allStrings as $key) {
922 if (\CRM_Utils_String
::startsWith($key, $prefix)) {
928 return array_values(array_unique($result));
932 * Safely unserialize a string of scalar or array values (but not objects!)
934 * Use `xkerman/restricted-unserialize` to unserialize strings using PHP's
935 * serialization format. `restricted-unserialize` works like PHP's built-in
936 * `unserialize` function except that it does not deserialize object instances,
937 * making it immune to PHP Object Injection {@see https://www.owasp.org/index.php/PHP_Object_Injection}
940 * Note: When dealing with user inputs, it is generally recommended to use
941 * safe, standard data interchange formats such as JSON rather than PHP's
942 * serialization format when dealing with user input.
944 * @param string|NULL $string
948 public static function unserialize($string) {
949 if (!is_string($string)) {
953 return unserialize($string);
955 catch (UnserializeFailedException
$e) {
961 * Returns the plural form of an English word.
966 public static function pluralize($str) {
967 $lastLetter = substr($str, -1);
968 $lastTwo = substr($str, -2);
969 if ($lastLetter == 's' ||
$lastLetter == 'x' ||
$lastTwo == 'ch') {
972 if ($lastLetter == 'y' && !in_array($lastTwo, ['ay', 'ey', 'iy', 'oy', 'uy'])) {
973 return substr($str, 0, -1) . 'ies';
979 * Generic check as to whether any tokens are in the given string.
981 * It might be a smarty token OR a CiviCRM token. In both cases the
982 * absence of a '{' indicates no token is present.
984 * @param string $string
988 public static function stringContainsTokens(string $string) {
989 return strpos($string, '{') !== FALSE;
993 * Parse a string through smarty without creating a smarty template file per string.
995 * This function is for swapping out any smarty tokens that appear in a string
996 * and are not re-used much if at all. For example parsing a contact's greeting
997 * does not need to be cached are there are some minor security / data privacy benefits
998 * to not caching them per file. We also save disk space, reduce I/O and disk clearing time.
1000 * Doing this is cleaning in Smarty3 which we are alas not using
1001 * https://www.smarty.net/docs/en/resources.string.tpl
1003 * However, it highlights that smarty-eval is not evil-eval and still have the security applied.
1005 * In order to replicate that in Smarty2 I'm using {eval} per
1006 * https://www.smarty.net/docsv2/en/language.function.eval.tpl#id2820446
1008 * - Evaluated variables are treated the same as templates. They follow the same escapement and security features just as if they were templates.
1009 * - Evaluated variables are compiled on every invocation, the compiled versions are not saved! However if you have caching enabled, the output
1010 * will be cached with the rest of the template.
1012 * Our set up does not have caching enabled and my testing suggests this still works fine with it
1013 * enabled so turning it off before running this is out of caution based on the above.
1015 * When this function is run only one template file is created (for the eval) tag no matter how
1016 * many times it is run. This compares to it otherwise creating one file for every parsed string.
1018 * @param string $templateString
1022 public static function parseOneOffStringThroughSmarty($templateString) {
1023 if (!CRM_Utils_String
::stringContainsTokens($templateString)) {
1024 // Skip expensive smarty processing.
1025 return $templateString;
1027 $smarty = CRM_Core_Smarty
::singleton();
1028 $cachingValue = $smarty->caching
;
1029 $smarty->caching
= 0;
1030 $smarty->assign('smartySingleUseString', $templateString);
1031 $templateString = $smarty->fetch('string:{eval var=$smartySingleUseString}');
1032 $smarty->caching
= $cachingValue;
1033 $smarty->assign('smartySingleUseString', NULL);
1034 return $templateString;