ImTest, SyntaxConformanceTest, CRM_Utils_String - Fix for case-insensitive filesystems
[civicrm-core.git] / CRM / Utils / String.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.7 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2015 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2015
32 * $Id$
33 */
34
35 require_once 'HTML/QuickForm/Rule/Email.php';
36
37 /**
38 * This class contains string functions.
39 *
40 */
41 class CRM_Utils_String {
42 const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
43
44 /**
45 * List of all letters and numbers
46 */
47 const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
48
49 /**
50 * Convert a display name into a potential variable name.
51 *
52 * @param $title title of the string
53 * @param int $maxLength
54 *
55 * @return string
56 * An equivalent variable name.
57 */
58 public static function titleToVar($title, $maxLength = 31) {
59 $variable = self::munge($title, '_', $maxLength);
60
61 if (CRM_Utils_Rule::title($variable, $maxLength)) {
62 return $variable;
63 }
64
65 // if longer than the maxLength lets just return a substr of the
66 // md5 to prevent errors downstream
67 return substr(md5($title), 0, $maxLength);
68 }
69
70 /**
71 * Replace all non alpha numeric characters and spaces with the replacement character.
72 *
73 * @param string $name
74 * The name to be worked on.
75 * @param string $char
76 * The character to use for non-valid chars.
77 * @param int $len
78 * Length of valid variables.
79 *
80 * @return string
81 * returns the manipulated string
82 */
83 public static function munge($name, $char = '_', $len = 63) {
84 // Replace all white space and non-alpha numeric with $char
85 // we only use the ascii character set since mysql does not create table names / field names otherwise
86 // CRM-11744
87 $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
88
89 if ($len) {
90 // lets keep variable names short
91 return substr($name, 0, $len);
92 }
93 else {
94 return $name;
95 }
96 }
97
98 /**
99 * Convert possibly underscore separated words to camel case with special handling for 'UF'
100 * e.g membership_payment returns MembershipPayment
101 *
102 * @param string $string
103 *
104 * @return string
105 */
106 public static function convertStringToCamel($string) {
107 $map = array(
108 'acl' => 'Acl',
109 'ACL' => 'Acl',
110 'im' => 'Im',
111 'IM' => 'Im',
112 );
113 if (isset($map[$string])) {
114 return $map[$string];
115 }
116
117 $fragments = explode('_', $string);
118 foreach ($fragments as & $fragment) {
119 $fragment = ucfirst($fragment);
120 }
121 // Special case: UFGroup, UFJoin, UFMatch, UFField
122 if ($fragments[0] === 'Uf') {
123 $fragments[0] = 'UF';
124 }
125 return implode('', $fragments);
126 }
127
128 /**
129 * Takes a variable name and munges it randomly into another variable name.
130 *
131 * @param string $name
132 * Initial Variable Name.
133 * @param int $len
134 * Length of valid variables.
135 *
136 * @return string
137 * Randomized Variable Name
138 */
139 public static function rename($name, $len = 4) {
140 $rand = substr(uniqid(), 0, $len);
141 return substr_replace($name, $rand, -$len, $len);
142 }
143
144 /**
145 * Takes a string and returns the last tuple of the string.
146 *
147 * Useful while converting file names to class names etc
148 *
149 * @param string $string
150 * The input string.
151 * @param string $char
152 * Character used to demarcate the components
153 *
154 * @return string
155 * The last component
156 */
157 public static function getClassName($string, $char = '_') {
158 $names = array();
159 if (!is_array($string)) {
160 $names = explode($char, $string);
161 }
162 if (!empty($names)) {
163 return array_pop($names);
164 }
165 }
166
167 /**
168 * Appends a name to a string and separated by delimiter.
169 *
170 * Does the right thing for an empty string
171 *
172 * @param string $str
173 * The string to be appended to.
174 * @param string $delim
175 * The delimiter to use.
176 * @param mixed $name
177 * The string (or array of strings) to append.
178 */
179 public static function append(&$str, $delim, $name) {
180 if (empty($name)) {
181 return;
182 }
183
184 if (is_array($name)) {
185 foreach ($name as $n) {
186 if (empty($n)) {
187 continue;
188 }
189 if (empty($str)) {
190 $str = $n;
191 }
192 else {
193 $str .= $delim . $n;
194 }
195 }
196 }
197 else {
198 if (empty($str)) {
199 $str = $name;
200 }
201 else {
202 $str .= $delim . $name;
203 }
204 }
205 }
206
207 /**
208 * Determine if the string is composed only of ascii characters.
209 *
210 * @param string $str
211 * Input string.
212 * @param bool $utf8
213 * Attempt utf8 match on failure (default yes).
214 *
215 * @return bool
216 * true if string is ascii
217 */
218 public static function isAscii($str, $utf8 = TRUE) {
219 if (!function_exists('mb_detect_encoding')) {
220 // eliminate all white space from the string
221 $str = preg_replace('/\s+/', '', $str);
222 // FIXME: This is a pretty brutal hack to make utf8 and 8859-1 work.
223
224 /* match low- or high-ascii characters */
225 if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
226 // || // low ascii characters
227 // high ascii characters
228 // preg_match( '/[\x7F-\xFF]/', $str ) ) {
229 if ($utf8) {
230 /* if we did match, try for utf-8, or iso8859-1 */
231
232 return self::isUtf8($str);
233 }
234 else {
235 return FALSE;
236 }
237 }
238 return TRUE;
239 }
240 else {
241 $order = array('ASCII');
242 if ($utf8) {
243 $order[] = 'UTF-8';
244 }
245 $enc = mb_detect_encoding($str, $order, TRUE);
246 return ($enc == 'ASCII' || $enc == 'UTF-8');
247 }
248 }
249
250 /**
251 * Determine the string replacements for redaction.
252 * on the basis of the regular expressions
253 *
254 * @param string $str
255 * Input string.
256 * @param array $regexRules
257 * Regular expression to be matched w/ replacements.
258 *
259 * @return array
260 * array of strings w/ corresponding redacted outputs
261 */
262 public static function regex($str, $regexRules) {
263 //redact the regular expressions
264 if (!empty($regexRules) && isset($str)) {
265 static $matches, $totalMatches, $match = array();
266 foreach ($regexRules as $pattern => $replacement) {
267 preg_match_all($pattern, $str, $matches);
268 if (!empty($matches[0])) {
269 if (empty($totalMatches)) {
270 $totalMatches = $matches[0];
271 }
272 else {
273 $totalMatches = array_merge($totalMatches, $matches[0]);
274 }
275 $match = array_flip($totalMatches);
276 }
277 }
278 }
279
280 if (!empty($match)) {
281 foreach ($match as $matchKey => & $dontCare) {
282 foreach ($regexRules as $pattern => $replacement) {
283 if (preg_match($pattern, $matchKey)) {
284 $dontCare = $replacement . substr(md5($matchKey), 0, 5);
285 break;
286 }
287 }
288 }
289 return $match;
290 }
291 return CRM_Core_DAO::$_nullArray;
292 }
293
294 /**
295 * @param $str
296 * @param $stringRules
297 *
298 * @return mixed
299 */
300 public static function redaction($str, $stringRules) {
301 //redact the strings
302 if (!empty($stringRules)) {
303 foreach ($stringRules as $match => $replace) {
304 $str = str_ireplace($match, $replace, $str);
305 }
306 }
307
308 //return the redacted output
309 return $str;
310 }
311
312 /**
313 * Determine if a string is composed only of utf8 characters
314 *
315 * @param string $str
316 * Input string.
317 *
318 * @return bool
319 */
320 public static function isUtf8($str) {
321 if (!function_exists(mb_detect_encoding)) {
322 // eliminate all white space from the string
323 $str = preg_replace('/\s+/', '', $str);
324
325 /* pattern stolen from the php.net function documentation for
326 * utf8decode();
327 * comment by JF Sebastian, 30-Mar-2005
328 */
329
330 return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
331 // ||
332 // iconv('ISO-8859-1', 'UTF-8', $str);
333 }
334 else {
335 $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
336 return ($enc !== FALSE);
337 }
338 }
339
340 /**
341 * Determine if two href's are equivalent (fuzzy match)
342 *
343 * @param string $url1
344 * The first url to be matched.
345 * @param string $url2
346 * The second url to be matched against.
347 *
348 * @return bool
349 * true if the urls match, else false
350 */
351 public static function match($url1, $url2) {
352 $url1 = strtolower($url1);
353 $url2 = strtolower($url2);
354
355 $url1Str = parse_url($url1);
356 $url2Str = parse_url($url2);
357
358 if ($url1Str['path'] == $url2Str['path'] &&
359 self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
360 ) {
361 return TRUE;
362 }
363 return FALSE;
364 }
365
366 /**
367 * Extract the civicrm path from the url.
368 *
369 * @param string $query
370 * A url string.
371 *
372 * @return string|null
373 * civicrm url (eg: civicrm/contact/search)
374 */
375 public static function extractURLVarValue($query) {
376 $config = CRM_Core_Config::singleton();
377 $urlVar = $config->userFrameworkURLVar;
378
379 $params = explode('&', $query);
380 foreach ($params as $p) {
381 if (strpos($p, '=')) {
382 list($k, $v) = explode('=', $p);
383 if ($k == $urlVar) {
384 return $v;
385 }
386 }
387 }
388 return NULL;
389 }
390
391 /**
392 * Translate a true/false/yes/no string to a 0 or 1 value
393 *
394 * @param string $str
395 * The string to be translated.
396 *
397 * @return bool
398 */
399 public static function strtobool($str) {
400 if (!is_scalar($str)) {
401 return FALSE;
402 }
403
404 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
405 return TRUE;
406 }
407 return FALSE;
408 }
409
410 /**
411 * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
412 *
413 * @param string $str
414 * The string to be translated.
415 *
416 * @return bool
417 */
418 public static function strtoboolstr($str) {
419 if (!is_scalar($str)) {
420 return FALSE;
421 }
422
423 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
424 return '1';
425 }
426 elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
427 return '0';
428 }
429 else {
430 return FALSE;
431 }
432 }
433
434 /**
435 * Convert a HTML string into a text one using html2text
436 *
437 * @param string $html
438 * The string to be converted.
439 *
440 * @return string
441 * the converted string
442 */
443 public static function htmlToText($html) {
444 require_once 'packages/html2text/rcube_html2text.php';
445 $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
446 $converter = new rcube_html2text($token_html);
447 $token_text = $converter->get_text();
448 $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
449 return $text;
450 }
451
452 /**
453 * @param $string
454 * @param array $params
455 */
456 public static function extractName($string, &$params) {
457 $name = trim($string);
458 if (empty($name)) {
459 return;
460 }
461
462 // strip out quotes
463 $name = str_replace('"', '', $name);
464 $name = str_replace('\'', '', $name);
465
466 // check for comma in name
467 if (strpos($name, ',') !== FALSE) {
468
469 // name has a comma - assume lname, fname [mname]
470 $names = explode(',', $name);
471 if (count($names) > 1) {
472 $params['last_name'] = trim($names[0]);
473
474 // check for space delim
475 $fnames = explode(' ', trim($names[1]));
476 if (count($fnames) > 1) {
477 $params['first_name'] = trim($fnames[0]);
478 $params['middle_name'] = trim($fnames[1]);
479 }
480 else {
481 $params['first_name'] = trim($fnames[0]);
482 }
483 }
484 else {
485 $params['first_name'] = trim($names[0]);
486 }
487 }
488 else {
489 // name has no comma - assume fname [mname] fname
490 $names = explode(' ', $name);
491 if (count($names) == 1) {
492 $params['first_name'] = $names[0];
493 }
494 elseif (count($names) == 2) {
495 $params['first_name'] = $names[0];
496 $params['last_name'] = $names[1];
497 }
498 else {
499 $params['first_name'] = $names[0];
500 $params['middle_name'] = $names[1];
501 $params['last_name'] = $names[2];
502 }
503 }
504 }
505
506 /**
507 * @param $string
508 *
509 * @return array
510 */
511 public static function &makeArray($string) {
512 $string = trim($string);
513
514 $values = explode("\n", $string);
515 $result = array();
516 foreach ($values as $value) {
517 list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
518 if (!empty($v)) {
519 $result[trim($n)] = trim($v);
520 }
521 }
522 return $result;
523 }
524
525 /**
526 * Given an ezComponents-parsed representation of
527 * a text with alternatives return only the first one
528 *
529 * @param string $full
530 * All alternatives as a long string (or some other text).
531 *
532 * @return string
533 * only the first alternative found (or the text without alternatives)
534 */
535 public static function stripAlternatives($full) {
536 $matches = array();
537 preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
538
539 if (isset($matches[1]) &&
540 trim(strip_tags($matches[1])) != ''
541 ) {
542 return $matches[1];
543 }
544 else {
545 return $full;
546 }
547 }
548
549 /**
550 * Strip leading, trailing, double spaces from string
551 * used for postal/greeting/addressee
552 *
553 * @param string $string
554 * Input string to be cleaned.
555 *
556 * @return string
557 * the cleaned string
558 */
559 public static function stripSpaces($string) {
560 return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
561 }
562
563 /**
564 * clean the URL 'path' variable that we use
565 * to construct CiviCRM urls by removing characters from the path variable
566 *
567 * @param string $string
568 * The input string to be sanitized.
569 * @param array $search
570 * The characters to be sanitized.
571 * @param string $replace
572 * The character to replace it with.
573 *
574 * @return string
575 * the sanitized string
576 */
577 public static function stripPathChars(
578 $string,
579 $search = NULL,
580 $replace = NULL
581 ) {
582 static $_searchChars = NULL;
583 static $_replaceChar = NULL;
584
585 if (empty($string)) {
586 return $string;
587 }
588
589 if ($_searchChars == NULL) {
590 $_searchChars = array(
591 '&',
592 ';',
593 ',',
594 '=',
595 '$',
596 '"',
597 "'",
598 '\\',
599 '<',
600 '>',
601 '(',
602 ')',
603 ' ',
604 "\r",
605 "\r\n",
606 "\n",
607 "\t",
608 );
609 $_replaceChar = '_';
610 }
611
612 if ($search == NULL) {
613 $search = $_searchChars;
614 }
615
616 if ($replace == NULL) {
617 $replace = $_replaceChar;
618 }
619
620 return str_replace($search, $replace, $string);
621 }
622
623
624 /**
625 * Use HTMLPurifier to clean up a text string and remove any potential
626 * xss attacks. This is primarily used in public facing pages which
627 * accept html as the input string
628 *
629 * @param string $string
630 * The input string.
631 *
632 * @return string
633 * the cleaned up string
634 */
635 public static function purifyHTML($string) {
636 static $_filter = NULL;
637 if (!$_filter) {
638 $config = HTMLPurifier_Config::createDefault();
639 $config->set('Core.Encoding', 'UTF-8');
640
641 // Disable the cache entirely
642 $config->set('Cache.DefinitionImpl', NULL);
643
644 $_filter = new HTMLPurifier($config);
645 }
646
647 return $_filter->purify($string);
648 }
649
650 /**
651 * Truncate $string; if $string exceeds $maxLen, place "..." at the end
652 *
653 * @param string $string
654 * @param int $maxLen
655 *
656 * @return string
657 */
658 public static function ellipsify($string, $maxLen) {
659 $len = strlen($string);
660 if ($len <= $maxLen) {
661 return $string;
662 }
663 else {
664 return substr($string, 0, $maxLen - 3) . '...';
665 }
666 }
667
668 /**
669 * Generate a random string.
670 *
671 * @param $len
672 * @param $alphabet
673 * @return string
674 */
675 public static function createRandom($len, $alphabet) {
676 $alphabetSize = strlen($alphabet);
677 $result = '';
678 for ($i = 0; $i < $len; $i++) {
679 $result .= $alphabet{rand(1, $alphabetSize) - 1};
680 }
681 return $result;
682 }
683
684 /**
685 * Examples:
686 * "admin foo" => array(NULL,"admin foo")
687 * "cms:admin foo" => array("cms", "admin foo")
688 *
689 * @param $delim
690 * @param string $string
691 * E.g. "view all contacts". Syntax: "[prefix:]name".
692 * @param null $defaultPrefix
693 *
694 * @return array
695 * (0 => string|NULL $prefix, 1 => string $value)
696 */
697 public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
698 $pos = strpos($string, $delim);
699 if ($pos === FALSE) {
700 return array($defaultPrefix, $string);
701 }
702 else {
703 return array(substr($string, 0, $pos), substr($string, 1 + $pos));
704 }
705 }
706
707 /**
708 * This function will mask part of the the user portion of an Email address (everything before the @)
709 *
710 * @param string $email
711 * The email address to be masked.
712 * @param string $maskChar
713 * The character used for masking.
714 * @param int $percent
715 * The percentage of the user portion to be masked.
716 *
717 * @return string
718 * returns the masked Email address
719 */
720 public static function maskEmail($email, $maskChar = '*', $percent = 50) {
721 list($user, $domain) = preg_split("/@/", $email);
722 $len = strlen($user);
723 $maskCount = floor($len * $percent / 100);
724 $offset = floor(($len - $maskCount) / 2);
725
726 $masked = substr($user, 0, $offset)
727 . str_repeat($maskChar, $maskCount)
728 . substr($user, $maskCount + $offset);
729
730 return ($masked . '@' . $domain);
731 }
732
733 /**
734 * This function compares two strings.
735 *
736 * @param string $strOne
737 * String one.
738 * @param string $strTwo
739 * String two.
740 * @param bool $case
741 * Boolean indicating whether you want the comparison to be case sensitive or not.
742 *
743 * @return bool
744 * TRUE (string are identical); FALSE (strings are not identical)
745 */
746 public static function compareStr($strOne, $strTwo, $case) {
747 if ($case == TRUE) {
748 // Convert to lowercase and trim white spaces
749 if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
750 // yes - they are identical
751 return TRUE;
752 }
753 else {
754 // not identical
755 return FALSE;
756 }
757 }
758 if ($case == FALSE) {
759 // Trim white spaces
760 if (trim($strOne) == trim($strTwo)) {
761 // yes - they are identical
762 return TRUE;
763 }
764 else {
765 // not identical
766 return FALSE;
767 }
768 }
769 }
770
771 /**
772 * Many parts of the codebase have a convention of internally passing around
773 * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
774 * (because most other odd characters are %-escaped in URLs; and %-escaped
775 * strings don't need any extra escaping in HTML).
776 *
777 * @param string $htmlUrl
778 * URL with HTML entities.
779 * @return string
780 * URL without HTML entities
781 */
782 public static function unstupifyUrl($htmlUrl) {
783 return str_replace('&amp;', '&', $htmlUrl);
784 }
785
786 /**
787 * Formats a string of attributes for insertion in an html tag.
788 *
789 * @param array $attributes
790 *
791 * @return string
792 */
793 public static function htmlAttributes($attributes) {
794 $output = '';
795 foreach ($attributes as $name => $vals) {
796 $output .= " $name=\"" . htmlspecialchars(implode(' ', (array) $vals)) . '"';
797 }
798 return ltrim($output);
799 }
800
801 }