Merge remote-tracking branch 'upstream/4.5' into 4.5-master-2015-03-04-18-48-05
[civicrm-core.git] / CRM / Utils / String.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.6 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2014 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2014
32 * $Id$
33 */
34
35 require_once 'HTML/QuickForm/Rule/Email.php';
36
37 /**
38 * This class contains string functions.
39 *
40 */
41 class CRM_Utils_String {
42 const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
43
44 /**
45 * List of all letters and numbers
46 */
47 const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
48
49 /**
50 * Convert a display name into a potential variable name.
51 *
52 * @param $title title of the string
53 * @param int $maxLength
54 *
55 * @return string
56 * An equivalent variable name.
57 */
58 public static function titleToVar($title, $maxLength = 31) {
59 $variable = self::munge($title, '_', $maxLength);
60
61 if (CRM_Utils_Rule::title($variable, $maxLength)) {
62 return $variable;
63 }
64
65 // if longer than the maxLength lets just return a substr of the
66 // md5 to prevent errors downstream
67 return substr(md5($title), 0, $maxLength);
68 }
69
70 /**
71 * Replace all non alpha numeric characters and spaces with the replacement character.
72 *
73 * @param string $name
74 * The name to be worked on.
75 * @param string $char
76 * The character to use for non-valid chars.
77 * @param int $len
78 * Length of valid variables.
79 *
80 * @return string
81 * returns the manipulated string
82 */
83 public static function munge($name, $char = '_', $len = 63) {
84 // Replace all white space and non-alpha numeric with $char
85 // we only use the ascii character set since mysql does not create table names / field names otherwise
86 // CRM-11744
87 $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
88
89 if ($len) {
90 // lets keep variable names short
91 return substr($name, 0, $len);
92 }
93 else {
94 return $name;
95 }
96 }
97
98 /**
99 * Convert possibly underscore separated words to camel case with special handling for 'UF'
100 * e.g membership_payment returns MembershipPayment
101 *
102 * @param string $string
103 *
104 * @return string
105 */
106 public static function convertStringToCamel($string) {
107 $fragments = explode('_', $string);
108 foreach ($fragments as & $fragment) {
109 $fragment = ucfirst($fragment);
110 }
111 // Special case: UFGroup, UFJoin, UFMatch, UFField
112 if ($fragments[0] === 'Uf') {
113 $fragments[0] = 'UF';
114 }
115 return implode('', $fragments);
116 }
117
118 /**
119 * Takes a variable name and munges it randomly into another variable name.
120 *
121 * @param string $name
122 * Initial Variable Name.
123 * @param int $len
124 * Length of valid variables.
125 *
126 * @return string
127 * Randomized Variable Name
128 */
129 public static function rename($name, $len = 4) {
130 $rand = substr(uniqid(), 0, $len);
131 return substr_replace($name, $rand, -$len, $len);
132 }
133
134 /**
135 * Takes a string and returns the last tuple of the string.
136 *
137 * Useful while converting file names to class names etc
138 *
139 * @param string $string
140 * The input string.
141 * @param string $char
142 * Character used to demarcate the components
143 *
144 * @return string
145 * The last component
146 */
147 public static function getClassName($string, $char = '_') {
148 $names = array();
149 if (!is_array($string)) {
150 $names = explode($char, $string);
151 }
152 if (!empty($names)) {
153 return array_pop($names);
154 }
155 }
156
157 /**
158 * Appends a name to a string and separated by delimiter.
159 *
160 * Does the right thing for an empty string
161 *
162 * @param string $str
163 * The string to be appended to.
164 * @param string $delim
165 * The delimiter to use.
166 * @param mixed $name
167 * The string (or array of strings) to append.
168 */
169 public static function append(&$str, $delim, $name) {
170 if (empty($name)) {
171 return;
172 }
173
174 if (is_array($name)) {
175 foreach ($name as $n) {
176 if (empty($n)) {
177 continue;
178 }
179 if (empty($str)) {
180 $str = $n;
181 }
182 else {
183 $str .= $delim . $n;
184 }
185 }
186 }
187 else {
188 if (empty($str)) {
189 $str = $name;
190 }
191 else {
192 $str .= $delim . $name;
193 }
194 }
195 }
196
197 /**
198 * Determine if the string is composed only of ascii characters.
199 *
200 * @param string $str
201 * Input string.
202 * @param bool $utf8
203 * Attempt utf8 match on failure (default yes).
204 *
205 * @return bool
206 * true if string is ascii
207 */
208 public static function isAscii($str, $utf8 = TRUE) {
209 if (!function_exists('mb_detect_encoding')) {
210 // eliminate all white space from the string
211 $str = preg_replace('/\s+/', '', $str);
212 // FIXME: This is a pretty brutal hack to make utf8 and 8859-1 work.
213
214 /* match low- or high-ascii characters */
215 if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
216 // || // low ascii characters
217 // high ascii characters
218 // preg_match( '/[\x7F-\xFF]/', $str ) ) {
219 if ($utf8) {
220 /* if we did match, try for utf-8, or iso8859-1 */
221
222 return self::isUtf8($str);
223 }
224 else {
225 return FALSE;
226 }
227 }
228 return TRUE;
229 }
230 else {
231 $order = array('ASCII');
232 if ($utf8) {
233 $order[] = 'UTF-8';
234 }
235 $enc = mb_detect_encoding($str, $order, TRUE);
236 return ($enc == 'ASCII' || $enc == 'UTF-8');
237 }
238 }
239
240 /**
241 * Determine the string replacements for redaction.
242 * on the basis of the regular expressions
243 *
244 * @param string $str
245 * Input string.
246 * @param array $regexRules
247 * Regular expression to be matched w/ replacements.
248 *
249 * @return array
250 * array of strings w/ corresponding redacted outputs
251 */
252 public static function regex($str, $regexRules) {
253 //redact the regular expressions
254 if (!empty($regexRules) && isset($str)) {
255 static $matches, $totalMatches, $match = array();
256 foreach ($regexRules as $pattern => $replacement) {
257 preg_match_all($pattern, $str, $matches);
258 if (!empty($matches[0])) {
259 if (empty($totalMatches)) {
260 $totalMatches = $matches[0];
261 }
262 else {
263 $totalMatches = array_merge($totalMatches, $matches[0]);
264 }
265 $match = array_flip($totalMatches);
266 }
267 }
268 }
269
270 if (!empty($match)) {
271 foreach ($match as $matchKey => & $dontCare) {
272 foreach ($regexRules as $pattern => $replacement) {
273 if (preg_match($pattern, $matchKey)) {
274 $dontCare = $replacement . substr(md5($matchKey), 0, 5);
275 break;
276 }
277 }
278 }
279 return $match;
280 }
281 return CRM_Core_DAO::$_nullArray;
282 }
283
284 /**
285 * @param $str
286 * @param $stringRules
287 *
288 * @return mixed
289 */
290 public static function redaction($str, $stringRules) {
291 //redact the strings
292 if (!empty($stringRules)) {
293 foreach ($stringRules as $match => $replace) {
294 $str = str_ireplace($match, $replace, $str);
295 }
296 }
297
298 //return the redacted output
299 return $str;
300 }
301
302 /**
303 * Determine if a string is composed only of utf8 characters
304 *
305 * @param string $str
306 * Input string.
307 *
308 * @return bool
309 */
310 public static function isUtf8($str) {
311 if (!function_exists(mb_detect_encoding)) {
312 // eliminate all white space from the string
313 $str = preg_replace('/\s+/', '', $str);
314
315 /* pattern stolen from the php.net function documentation for
316 * utf8decode();
317 * comment by JF Sebastian, 30-Mar-2005
318 */
319
320 return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
321 // ||
322 // iconv('ISO-8859-1', 'UTF-8', $str);
323 }
324 else {
325 $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
326 return ($enc !== FALSE);
327 }
328 }
329
330 /**
331 * Determine if two href's are equivalent (fuzzy match)
332 *
333 * @param string $url1
334 * The first url to be matched.
335 * @param string $url2
336 * The second url to be matched against.
337 *
338 * @return bool
339 * true if the urls match, else false
340 */
341 public static function match($url1, $url2) {
342 $url1 = strtolower($url1);
343 $url2 = strtolower($url2);
344
345 $url1Str = parse_url($url1);
346 $url2Str = parse_url($url2);
347
348 if ($url1Str['path'] == $url2Str['path'] &&
349 self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
350 ) {
351 return TRUE;
352 }
353 return FALSE;
354 }
355
356 /**
357 * Extract the civicrm path from the url.
358 *
359 * @param string $query
360 * A url string.
361 *
362 * @return string|null
363 * civicrm url (eg: civicrm/contact/search)
364 */
365 public static function extractURLVarValue($query) {
366 $config = CRM_Core_Config::singleton();
367 $urlVar = $config->userFrameworkURLVar;
368
369 $params = explode('&', $query);
370 foreach ($params as $p) {
371 if (strpos($p, '=')) {
372 list($k, $v) = explode('=', $p);
373 if ($k == $urlVar) {
374 return $v;
375 }
376 }
377 }
378 return NULL;
379 }
380
381 /**
382 * Translate a true/false/yes/no string to a 0 or 1 value
383 *
384 * @param string $str
385 * The string to be translated.
386 *
387 * @return bool
388 */
389 public static function strtobool($str) {
390 if (!is_scalar($str)) {
391 return FALSE;
392 }
393
394 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
395 return TRUE;
396 }
397 return FALSE;
398 }
399
400 /**
401 * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
402 *
403 * @param string $str
404 * The string to be translated.
405 *
406 * @return bool
407 */
408 public static function strtoboolstr($str) {
409 if (!is_scalar($str)) {
410 return FALSE;
411 }
412
413 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
414 return '1';
415 }
416 elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
417 return '0';
418 }
419 else {
420 return FALSE;
421 }
422 }
423
424 /**
425 * Convert a HTML string into a text one using html2text
426 *
427 * @param string $html
428 * The string to be converted.
429 *
430 * @return string
431 * the converted string
432 */
433 public static function htmlToText($html) {
434 require_once 'packages/html2text/rcube_html2text.php';
435 $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
436 $converter = new rcube_html2text($token_html);
437 $token_text = $converter->get_text();
438 $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
439 return $text;
440 }
441
442 /**
443 * @param $string
444 * @param array $params
445 */
446 public static function extractName($string, &$params) {
447 $name = trim($string);
448 if (empty($name)) {
449 return;
450 }
451
452 // strip out quotes
453 $name = str_replace('"', '', $name);
454 $name = str_replace('\'', '', $name);
455
456 // check for comma in name
457 if (strpos($name, ',') !== FALSE) {
458
459 // name has a comma - assume lname, fname [mname]
460 $names = explode(',', $name);
461 if (count($names) > 1) {
462 $params['last_name'] = trim($names[0]);
463
464 // check for space delim
465 $fnames = explode(' ', trim($names[1]));
466 if (count($fnames) > 1) {
467 $params['first_name'] = trim($fnames[0]);
468 $params['middle_name'] = trim($fnames[1]);
469 }
470 else {
471 $params['first_name'] = trim($fnames[0]);
472 }
473 }
474 else {
475 $params['first_name'] = trim($names[0]);
476 }
477 }
478 else {
479 // name has no comma - assume fname [mname] fname
480 $names = explode(' ', $name);
481 if (count($names) == 1) {
482 $params['first_name'] = $names[0];
483 }
484 elseif (count($names) == 2) {
485 $params['first_name'] = $names[0];
486 $params['last_name'] = $names[1];
487 }
488 else {
489 $params['first_name'] = $names[0];
490 $params['middle_name'] = $names[1];
491 $params['last_name'] = $names[2];
492 }
493 }
494 }
495
496 /**
497 * @param $string
498 *
499 * @return array
500 */
501 public static function &makeArray($string) {
502 $string = trim($string);
503
504 $values = explode("\n", $string);
505 $result = array();
506 foreach ($values as $value) {
507 list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
508 if (!empty($v)) {
509 $result[trim($n)] = trim($v);
510 }
511 }
512 return $result;
513 }
514
515 /**
516 * Given an ezComponents-parsed representation of
517 * a text with alternatives return only the first one
518 *
519 * @param string $full
520 * All alternatives as a long string (or some other text).
521 *
522 * @return string
523 * only the first alternative found (or the text without alternatives)
524 */
525 public static function stripAlternatives($full) {
526 $matches = array();
527 preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
528
529 if (isset($matches[1]) &&
530 trim(strip_tags($matches[1])) != ''
531 ) {
532 return $matches[1];
533 }
534 else {
535 return $full;
536 }
537 }
538
539 /**
540 * Strip leading, trailing, double spaces from string
541 * used for postal/greeting/addressee
542 *
543 * @param string $string
544 * Input string to be cleaned.
545 *
546 * @return string
547 * the cleaned string
548 */
549 public static function stripSpaces($string) {
550 return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
551 }
552
553 /**
554 * clean the URL 'path' variable that we use
555 * to construct CiviCRM urls by removing characters from the path variable
556 *
557 * @param string $string
558 * The input string to be sanitized.
559 * @param array $search
560 * The characters to be sanitized.
561 * @param string $replace
562 * The character to replace it with.
563 *
564 * @return string
565 * the sanitized string
566 */
567 public static function stripPathChars(
568 $string,
569 $search = NULL,
570 $replace = NULL
571 ) {
572 static $_searchChars = NULL;
573 static $_replaceChar = NULL;
574
575 if (empty($string)) {
576 return $string;
577 }
578
579 if ($_searchChars == NULL) {
580 $_searchChars = array(
581 '&',
582 ';',
583 ',',
584 '=',
585 '$',
586 '"',
587 "'",
588 '\\',
589 '<',
590 '>',
591 '(',
592 ')',
593 ' ',
594 "\r",
595 "\r\n",
596 "\n",
597 "\t",
598 );
599 $_replaceChar = '_';
600 }
601
602 if ($search == NULL) {
603 $search = $_searchChars;
604 }
605
606 if ($replace == NULL) {
607 $replace = $_replaceChar;
608 }
609
610 return str_replace($search, $replace, $string);
611 }
612
613
614 /**
615 * Use HTMLPurifier to clean up a text string and remove any potential
616 * xss attacks. This is primarily used in public facing pages which
617 * accept html as the input string
618 *
619 * @param string $string
620 * The input string.
621 *
622 * @return string
623 * the cleaned up string
624 */
625 public static function purifyHTML($string) {
626 static $_filter = NULL;
627 if (!$_filter) {
628 $config = HTMLPurifier_Config::createDefault();
629 $config->set('Core.Encoding', 'UTF-8');
630
631 // Disable the cache entirely
632 $config->set('Cache.DefinitionImpl', NULL);
633
634 $_filter = new HTMLPurifier($config);
635 }
636
637 return $_filter->purify($string);
638 }
639
640 /**
641 * Truncate $string; if $string exceeds $maxLen, place "..." at the end
642 *
643 * @param string $string
644 * @param int $maxLen
645 *
646 * @return string
647 */
648 public static function ellipsify($string, $maxLen) {
649 $len = strlen($string);
650 if ($len <= $maxLen) {
651 return $string;
652 }
653 else {
654 return substr($string, 0, $maxLen - 3) . '...';
655 }
656 }
657
658 /**
659 * Generate a random string.
660 *
661 * @param $len
662 * @param $alphabet
663 * @return string
664 */
665 public static function createRandom($len, $alphabet) {
666 $alphabetSize = strlen($alphabet);
667 $result = '';
668 for ($i = 0; $i < $len; $i++) {
669 $result .= $alphabet{rand(1, $alphabetSize) - 1};
670 }
671 return $result;
672 }
673
674 /**
675 * Examples:
676 * "admin foo" => array(NULL,"admin foo")
677 * "cms:admin foo" => array("cms", "admin foo")
678 *
679 * @param $delim
680 * @param string $string
681 * E.g. "view all contacts". Syntax: "[prefix:]name".
682 * @param null $defaultPrefix
683 *
684 * @return array
685 * (0 => string|NULL $prefix, 1 => string $value)
686 */
687 public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
688 $pos = strpos($string, $delim);
689 if ($pos === FALSE) {
690 return array($defaultPrefix, $string);
691 }
692 else {
693 return array(substr($string, 0, $pos), substr($string, 1 + $pos));
694 }
695 }
696
697 /**
698 * This function will mask part of the the user portion of an Email address (everything before the @)
699 *
700 * @param string $email
701 * The email address to be masked.
702 * @param string $maskChar
703 * The character used for masking.
704 * @param int $percent
705 * The percentage of the user portion to be masked.
706 *
707 * @return string
708 * returns the masked Email address
709 */
710 public static function maskEmail($email, $maskChar = '*', $percent = 50) {
711 list($user, $domain) = preg_split("/@/", $email);
712 $len = strlen($user);
713 $maskCount = floor($len * $percent / 100);
714 $offset = floor(($len - $maskCount) / 2);
715
716 $masked = substr($user, 0, $offset)
717 . str_repeat($maskChar, $maskCount)
718 . substr($user, $maskCount + $offset);
719
720 return ($masked . '@' . $domain);
721 }
722
723 /**
724 * This function compares two strings.
725 *
726 * @param string $strOne
727 * String one.
728 * @param string $strTwo
729 * String two.
730 * @param bool $case
731 * Boolean indicating whether you want the comparison to be case sensitive or not.
732 *
733 * @return bool
734 * TRUE (string are identical); FALSE (strings are not identical)
735 */
736 public static function compareStr($strOne, $strTwo, $case) {
737 if ($case == TRUE) {
738 // Convert to lowercase and trim white spaces
739 if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
740 // yes - they are identical
741 return TRUE;
742 }
743 else {
744 // not identical
745 return FALSE;
746 }
747 }
748 if ($case == FALSE) {
749 // Trim white spaces
750 if (trim($strOne) == trim($strTwo)) {
751 // yes - they are identical
752 return TRUE;
753 }
754 else {
755 // not identical
756 return FALSE;
757 }
758 }
759 }
760
761 /**
762 * Many parts of the codebase have a convention of internally passing around
763 * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
764 * (because most other odd characters are %-escaped in URLs; and %-escaped
765 * strings don't need any extra escaping in HTML).
766 *
767 * @param string $htmlUrl
768 * URL with HTML entities.
769 * @return string
770 * URL without HTML entities
771 */
772 public static function unstupifyUrl($htmlUrl) {
773 return str_replace('&amp;', '&', $htmlUrl);
774 }
775
776 /**
777 * Formats a string of attributes for insertion in an html tag.
778 *
779 * @param array $attributes
780 *
781 * @return string
782 */
783 public static function htmlAttributes($attributes) {
784 $output = '';
785 foreach ($attributes as $name => $vals) {
786 $output .= " $name=\"" . htmlspecialchars(implode(' ', (array) $vals)) . '"';
787 }
788 return ltrim($output);
789 }
790
791 }