8e8b1f96519087e9f8bb1fd4835a6aacecc8a3a5
[civicrm-core.git] / CRM / Utils / String.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.6 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2014 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2014
32 * $Id$
33 *
34 */
35
36 require_once 'HTML/QuickForm/Rule/Email.php';
37
38 /**
39 * This class contains string functions
40 *
41 */
42 class CRM_Utils_String {
43 const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
44
45 /**
46 * List of all letters and numbers
47 */
48 const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
49
50 /**
51 * Convert a display name into a potential variable
52 * name that we could use in forms/code
53 *
54 * @param name Name of the string
55 *
56 * @param int $maxLength
57 *
58 * @return string
59 * An equivalent variable name.
60 * @static
61 */
62 public static function titleToVar($title, $maxLength = 31) {
63 $variable = self::munge($title, '_', $maxLength);
64
65 if (CRM_Utils_Rule::title($variable, $maxLength)) {
66 return $variable;
67 }
68
69 // if longer than the maxLength lets just return a substr of the
70 // md5 to prevent errors downstream
71 return substr(md5($title), 0, $maxLength);
72 }
73
74 /**
75 * Given a string, replace all non alpha numeric characters and
76 * spaces with the replacement character
77 *
78 * @param string $name
79 * The name to be worked on.
80 * @param string $char
81 * The character to use for non-valid chars.
82 * @param int $len
83 * Length of valid variables.
84 *
85 *
86 * @return string
87 * returns the manipulated string
88 * @static
89 */
90 public static function munge($name, $char = '_', $len = 63) {
91 // replace all white space and non-alpha numeric with $char
92 // we only use the ascii character set since mysql does not create table names / field names otherwise
93 // CRM-11744
94 $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
95
96 if ($len) {
97 // lets keep variable names short
98 return substr($name, 0, $len);
99 }
100 else {
101 return $name;
102 }
103 }
104
105 /**
106 * Convert possibly underscore separated words to camel case with special handling for 'UF'
107 * e.g
108 * membership_payment returns MembershipPayment
109 * @param string $string
110 *
111 * @return string
112 * string
113 */
114 public static function convertStringToCamel($string) {
115 $fragments = explode('_', $string);
116 foreach ($fragments as & $fragment) {
117 $fragment = ucfirst($fragment);
118 }
119 // Special case: UFGroup, UFJoin, UFMatch, UFField
120 if ($fragments[0] === 'Uf') {
121 $fragments[0] = 'UF';
122 }
123 return implode('', $fragments);
124 }
125
126 /**
127 *
128 * Takes a variable name and munges it randomly into another variable name
129 *
130 * @param string $name
131 * Initial Variable Name.
132 * @param int $len
133 * Length of valid variables.
134 *
135 * @return string
136 * Randomized Variable Name
137 * @static
138 */
139 public static function rename($name, $len = 4) {
140 $rand = substr(uniqid(), 0, $len);
141 return substr_replace($name, $rand, -$len, $len);
142 }
143
144 /**
145 * Takes a string and returns the last tuple of the string.
146 * useful while converting file names to class names etc
147 *
148 * @param string $string
149 * The input string.
150 * @param \char|string $char $char the character used to demarcate the componets
151 *
152 *
153 * @return string
154 * the last component
155 * @static
156 */
157 public static function getClassName($string, $char = '_') {
158 $names = array();
159 if (!is_array($string)) {
160 $names = explode($char, $string);
161 }
162 if (!empty($names)) {
163 return array_pop($names);
164 }
165 }
166
167 /**
168 * Appends a name to a string and seperated by delimiter.
169 * does the right thing for an empty string
170 *
171 * @param string $str
172 * The string to be appended to.
173 * @param string $delim
174 * The delimiter to use.
175 * @param mixed $name
176 * The string (or array of strings) to append.
177 *
178 * @return void
179 * @static
180 */
181 public static function append(&$str, $delim, $name) {
182 if (empty($name)) {
183 return;
184 }
185
186 if (is_array($name)) {
187 foreach ($name as $n) {
188 if (empty($n)) {
189 continue;
190 }
191 if (empty($str)) {
192 $str = $n;
193 }
194 else {
195 $str .= $delim . $n;
196 }
197 }
198 }
199 else {
200 if (empty($str)) {
201 $str = $name;
202 }
203 else {
204 $str .= $delim . $name;
205 }
206 }
207 }
208
209 /**
210 * Determine if the string is composed only of ascii characters
211 *
212 * @param string $str
213 * Input string.
214 * @param bool $utf8
215 * Attempt utf8 match on failure (default yes).
216 *
217 * @return boolean
218 * true if string is ascii
219 * @static
220 */
221 public static function isAscii($str, $utf8 = TRUE) {
222 if (!function_exists('mb_detect_encoding')) {
223 // eliminate all white space from the string
224 $str = preg_replace('/\s+/', '', $str);
225 // FIXME: This is a pretty brutal hack to make utf8 and 8859-1 work.
226
227 /* match low- or high-ascii characters */
228 if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
229 // || // low ascii characters
230 // high ascii characters
231 // preg_match( '/[\x7F-\xFF]/', $str ) ) {
232 if ($utf8) {
233 /* if we did match, try for utf-8, or iso8859-1 */
234
235 return self::isUtf8($str);
236 }
237 else {
238 return FALSE;
239 }
240 }
241 return TRUE;
242 }
243 else {
244 $order = array('ASCII');
245 if ($utf8) {
246 $order[] = 'UTF-8';
247 }
248 $enc = mb_detect_encoding($str, $order, TRUE);
249 return ($enc == 'ASCII' || $enc == 'UTF-8');
250 }
251 }
252
253 /**
254 * Determine the string replacements for redaction
255 * on the basis of the regular expressions
256 *
257 * @param string $str
258 * Input string.
259 * @param array $regexRules
260 * Regular expression to be matched w/ replacements.
261 *
262 * @return array
263 * array of strings w/ corresponding redacted outputs
264 * @static
265 */
266 public static function regex($str, $regexRules) {
267 //redact the regular expressions
268 if (!empty($regexRules) && isset($str)) {
269 static $matches, $totalMatches, $match = array();
270 foreach ($regexRules as $pattern => $replacement) {
271 preg_match_all($pattern, $str, $matches);
272 if (!empty($matches[0])) {
273 if (empty($totalMatches)) {
274 $totalMatches = $matches[0];
275 }
276 else {
277 $totalMatches = array_merge($totalMatches, $matches[0]);
278 }
279 $match = array_flip($totalMatches);
280 }
281 }
282 }
283
284 if (!empty($match)) {
285 foreach ($match as $matchKey => & $dontCare) {
286 foreach ($regexRules as $pattern => $replacement) {
287 if (preg_match($pattern, $matchKey)) {
288 $dontCare = $replacement . substr(md5($matchKey), 0, 5);
289 break;
290 }
291 }
292 }
293 return $match;
294 }
295 return CRM_Core_DAO::$_nullArray;
296 }
297
298 /**
299 * @param $str
300 * @param $stringRules
301 *
302 * @return mixed
303 */
304 public static function redaction($str, $stringRules) {
305 //redact the strings
306 if (!empty($stringRules)) {
307 foreach ($stringRules as $match => $replace) {
308 $str = str_ireplace($match, $replace, $str);
309 }
310 }
311
312 //return the redacted output
313 return $str;
314 }
315
316 /**
317 * Determine if a string is composed only of utf8 characters
318 *
319 * @param string $str
320 * Input string.
321 * @static
322 *
323 * @return boolean
324 */
325 public static function isUtf8($str) {
326 if (!function_exists(mb_detect_encoding)) {
327 // eliminate all white space from the string
328 $str = preg_replace('/\s+/', '', $str);
329
330 /* pattern stolen from the php.net function documentation for
331 * utf8decode();
332 * comment by JF Sebastian, 30-Mar-2005
333 */
334
335 return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
336 // ||
337 // iconv('ISO-8859-1', 'UTF-8', $str);
338 }
339 else {
340 $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
341 return ($enc !== FALSE);
342 }
343 }
344
345 /**
346 * Determine if two href's are equivalent (fuzzy match)
347 *
348 * @param string $url1
349 * The first url to be matched.
350 * @param string $url2
351 * The second url to be matched against.
352 *
353 * @return boolean
354 * true if the urls match, else false
355 * @static
356 */
357 public static function match($url1, $url2) {
358 $url1 = strtolower($url1);
359 $url2 = strtolower($url2);
360
361 $url1Str = parse_url($url1);
362 $url2Str = parse_url($url2);
363
364 if ($url1Str['path'] == $url2Str['path'] &&
365 self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
366 ) {
367 return TRUE;
368 }
369 return FALSE;
370 }
371
372 /**
373 * Extract variable values
374 *
375 * @param mix $query
376 * This is basically url.
377 *
378 * @return mix
379 * $v returns civicrm url (eg: civicrm/contact/search/...)
380 * @static
381 */
382 public static function extractURLVarValue($query) {
383 $config = CRM_Core_Config::singleton();
384 $urlVar = $config->userFrameworkURLVar;
385
386 $params = explode('&', $query);
387 foreach ($params as $p) {
388 if (strpos($p, '=')) {
389 list($k, $v) = explode('=', $p);
390 if ($k == $urlVar) {
391 return $v;
392 }
393 }
394 }
395 return NULL;
396 }
397
398 /**
399 * Translate a true/false/yes/no string to a 0 or 1 value
400 *
401 * @param string $str
402 * The string to be translated.
403 *
404 * @return boolean
405 * @static
406 */
407 public static function strtobool($str) {
408 if (!is_scalar($str)) {
409 return FALSE;
410 }
411
412 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
413 return TRUE;
414 }
415 return FALSE;
416 }
417
418 /**
419 * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
420 *
421 * @param string $str
422 * The string to be translated.
423 *
424 * @return boolean
425 * @static
426 */
427 public static function strtoboolstr($str) {
428 if (!is_scalar($str)) {
429 return FALSE;
430 }
431
432 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
433 return '1';
434 }
435 elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
436 return '0';
437 }
438 else {
439 return FALSE;
440 }
441 }
442
443 /**
444 * Convert a HTML string into a text one using html2text
445 *
446 * @param string $html
447 * The string to be converted.
448 *
449 * @return string
450 * the converted string
451 * @static
452 */
453 public static function htmlToText($html) {
454 require_once 'packages/html2text/rcube_html2text.php';
455 $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
456 $converter = new rcube_html2text($token_html);
457 $token_text = $converter->get_text();
458 $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
459 return $text;
460 }
461
462 /**
463 * @param $string
464 * @param array $params
465 */
466 public static function extractName($string, &$params) {
467 $name = trim($string);
468 if (empty($name)) {
469 return;
470 }
471
472 // strip out quotes
473 $name = str_replace('"', '', $name);
474 $name = str_replace('\'', '', $name);
475
476 // check for comma in name
477 if (strpos($name, ',') !== FALSE) {
478
479 // name has a comma - assume lname, fname [mname]
480 $names = explode(',', $name);
481 if (count($names) > 1) {
482 $params['last_name'] = trim($names[0]);
483
484 // check for space delim
485 $fnames = explode(' ', trim($names[1]));
486 if (count($fnames) > 1) {
487 $params['first_name'] = trim($fnames[0]);
488 $params['middle_name'] = trim($fnames[1]);
489 }
490 else {
491 $params['first_name'] = trim($fnames[0]);
492 }
493 }
494 else {
495 $params['first_name'] = trim($names[0]);
496 }
497 }
498 else {
499 // name has no comma - assume fname [mname] fname
500 $names = explode(' ', $name);
501 if (count($names) == 1) {
502 $params['first_name'] = $names[0];
503 }
504 elseif (count($names) == 2) {
505 $params['first_name'] = $names[0];
506 $params['last_name'] = $names[1];
507 }
508 else {
509 $params['first_name'] = $names[0];
510 $params['middle_name'] = $names[1];
511 $params['last_name'] = $names[2];
512 }
513 }
514 }
515
516 /**
517 * @param $string
518 *
519 * @return array
520 */
521 public static function &makeArray($string) {
522 $string = trim($string);
523
524 $values = explode("\n", $string);
525 $result = array();
526 foreach ($values as $value) {
527 list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
528 if (!empty($v)) {
529 $result[trim($n)] = trim($v);
530 }
531 }
532 return $result;
533 }
534
535 /**
536 * Given an ezComponents-parsed representation of
537 * a text with alternatives return only the first one
538 *
539 * @param string $full
540 * All alternatives as a long string (or some other text).
541 *
542 * @return string
543 * only the first alternative found (or the text without alternatives)
544 */
545 public static function stripAlternatives($full) {
546 $matches = array();
547 preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
548
549 if (isset($matches[1]) &&
550 trim(strip_tags($matches[1])) != ''
551 ) {
552 return $matches[1];
553 }
554 else {
555 return $full;
556 }
557 }
558
559 /**
560 * Strip leading, trailing, double spaces from string
561 * used for postal/greeting/addressee
562 *
563 * @param string $string
564 * Input string to be cleaned.
565 *
566 * @return string
567 * the cleaned string
568 * @static
569 */
570 public static function stripSpaces($string) {
571 return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
572 }
573
574 /**
575 * clean the URL 'path' variable that we use
576 * to construct CiviCRM urls by removing characters from the path variable
577 *
578 * @param string $string
579 * The input string to be sanitized.
580 * @param array $search
581 * The characters to be sanitized.
582 * @param string $replace
583 * The character to replace it with.
584 *
585 * @return string
586 * the sanitized string
587 * @static
588 */
589 static function stripPathChars(
590 $string,
591 $search = NULL,
592 $replace = NULL
593 ) {
594 static $_searchChars = NULL;
595 static $_replaceChar = NULL;
596
597 if (empty($string)) {
598 return $string;
599 }
600
601 if ($_searchChars == NULL) {
602 $_searchChars = array(
603 '&',
604 ';',
605 ',',
606 '=',
607 '$',
608 '"',
609 "'",
610 '\\',
611 '<',
612 '>',
613 '(',
614 ')',
615 ' ',
616 "\r",
617 "\r\n",
618 "\n",
619 "\t",
620 );
621 $_replaceChar = '_';
622 }
623
624 if ($search == NULL) {
625 $search = $_searchChars;
626 }
627
628 if ($replace == NULL) {
629 $replace = $_replaceChar;
630 }
631
632 return str_replace($search, $replace, $string);
633 }
634
635
636 /**
637 * Use HTMLPurifier to clean up a text string and remove any potential
638 * xss attacks. This is primarily used in public facing pages which
639 * accept html as the input string
640 *
641 * @param string $string
642 * The input string.
643 *
644 * @return string
645 * the cleaned up string
646 * @static
647 */
648 public static function purifyHTML($string) {
649 static $_filter = NULL;
650 if (!$_filter) {
651 $config = HTMLPurifier_Config::createDefault();
652 $config->set('Core.Encoding', 'UTF-8');
653
654 // Disable the cache entirely
655 $config->set('Cache.DefinitionImpl', NULL);
656
657 $_filter = new HTMLPurifier($config);
658 }
659
660 return $_filter->purify($string);
661 }
662
663 /**
664 * Truncate $string; if $string exceeds $maxLen, place "..." at the end
665 *
666 * @param string $string
667 * @param int $maxLen
668 *
669 * @return string
670 */
671 public static function ellipsify($string, $maxLen) {
672 $len = strlen($string);
673 if ($len <= $maxLen) {
674 return $string;
675 }
676 else {
677 return substr($string, 0, $maxLen - 3) . '...';
678 }
679 }
680
681 /**
682 * Generate a random string
683 *
684 * @param $len
685 * @param $alphabet
686 * @return string
687 */
688 public static function createRandom($len, $alphabet) {
689 $alphabetSize = strlen($alphabet);
690 $result = '';
691 for ($i = 0; $i < $len; $i++) {
692 $result .= $alphabet{rand(1, $alphabetSize) - 1};
693 }
694 return $result;
695 }
696
697 /**
698 * Examples:
699 * "admin foo" => array(NULL,"admin foo")
700 * "cms:admin foo" => array("cms", "admin foo")
701 *
702 * @param $delim
703 * @param string $string
704 * E.g. "view all contacts". Syntax: "[prefix:]name".
705 * @param null $defaultPrefix
706 *
707 * @return array
708 * (0 => string|NULL $prefix, 1 => string $value)
709 */
710 public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
711 $pos = strpos($string, $delim);
712 if ($pos === FALSE) {
713 return array($defaultPrefix, $string);
714 }
715 else {
716 return array(substr($string, 0, $pos), substr($string, 1 + $pos));
717 }
718 }
719
720 /**
721 * This function will mask part of the the user portion of an Email address (everything before the @)
722 *
723 * @param string $email
724 * The email address to be masked.
725 * @param string $maskChar
726 * The character used for masking.
727 * @param int $percent
728 * The percentage of the user portion to be masked.
729 *
730 * @return string
731 * returns the masked Email address
732 */
733 public static function maskEmail($email, $maskChar = '*', $percent = 50) {
734 list($user, $domain) = preg_split("/@/", $email);
735 $len = strlen($user);
736 $maskCount = floor($len * $percent / 100);
737 $offset = floor(($len - $maskCount) / 2);
738
739 $masked = substr($user, 0, $offset)
740 . str_repeat($maskChar, $maskCount)
741 . substr($user, $maskCount + $offset);
742
743 return ($masked . '@' . $domain);
744 }
745
746 /**
747 * This function compares two strings
748 *
749 * @param string $strOne
750 * String one.
751 * @param string $strTwo
752 * String two.
753 * @param bool $case
754 * Boolean indicating whether you want the comparison to be case sensitive or not.
755 *
756 * @return boolean
757 * TRUE (string are identical); FALSE (strings are not identical)
758 */
759 public static function compareStr($strOne, $strTwo, $case) {
760 if ($case == TRUE) {
761 // Convert to lowercase and trim white spaces
762 if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
763 // yes - they are identical
764 return TRUE;
765 }
766 else {
767 // not identical
768 return FALSE;
769 }
770 }
771 if ($case == FALSE) {
772 // Trim white spaces
773 if (trim($strOne) == trim($strTwo)) {
774 // yes - they are identical
775 return TRUE;
776 }
777 else {
778 // not identical
779 return FALSE;
780 }
781 }
782 }
783
784 /**
785 * Many parts of the codebase have a convention of internally passing around
786 * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
787 * (because most other odd characters are %-escaped in URLs; and %-escaped
788 * strings don't need any extra escaping in HTML).
789 *
790 * @param string $url
791 * URL with HTML entities.
792 * @return string
793 * URL without HTML entities
794 */
795 public static function unstupifyUrl($htmlUrl) {
796 return str_replace('&amp;', '&', $htmlUrl);
797 }
798 }