a3606c2558ff92f1eb0def141467594a39ccbe75
[civicrm-core.git] / CRM / Utils / String.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.6 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2014 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2014
32 * $Id$
33 *
34 */
35
36 require_once 'HTML/QuickForm/Rule/Email.php';
37
38 /**
39 * This class contains string functions
40 *
41 */
42 class CRM_Utils_String {
43 const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
44
45 /**
46 * List of all letters and numbers
47 */
48 const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
49
50 /**
51 * Convert a display name into a potential variable
52 * name that we could use in forms/code
53 *
54 * @param $title title of the string
55 * @param int $maxLength
56 *
57 * @return string
58 * An equivalent variable name.
59 */
60 public static function titleToVar($title, $maxLength = 31) {
61 $variable = self::munge($title, '_', $maxLength);
62
63 if (CRM_Utils_Rule::title($variable, $maxLength)) {
64 return $variable;
65 }
66
67 // if longer than the maxLength lets just return a substr of the
68 // md5 to prevent errors downstream
69 return substr(md5($title), 0, $maxLength);
70 }
71
72 /**
73 * Given a string, replace all non alpha numeric characters and
74 * spaces with the replacement character
75 *
76 * @param string $name
77 * The name to be worked on.
78 * @param string $char
79 * The character to use for non-valid chars.
80 * @param int $len
81 * Length of valid variables.
82 *
83 *
84 * @return string
85 * returns the manipulated string
86 */
87 public static function munge($name, $char = '_', $len = 63) {
88 // replace all white space and non-alpha numeric with $char
89 // we only use the ascii character set since mysql does not create table names / field names otherwise
90 // CRM-11744
91 $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
92
93 if ($len) {
94 // lets keep variable names short
95 return substr($name, 0, $len);
96 }
97 else {
98 return $name;
99 }
100 }
101
102 /**
103 * Convert possibly underscore separated words to camel case with special handling for 'UF'
104 * e.g membership_payment returns MembershipPayment
105 *
106 * @param string $string
107 *
108 * @return string
109 */
110 public static function convertStringToCamel($string) {
111 $fragments = explode('_', $string);
112 foreach ($fragments as & $fragment) {
113 $fragment = ucfirst($fragment);
114 }
115 // Special case: UFGroup, UFJoin, UFMatch, UFField
116 if ($fragments[0] === 'Uf') {
117 $fragments[0] = 'UF';
118 }
119 return implode('', $fragments);
120 }
121
122 /**
123 * Takes a variable name and munges it randomly into another variable name
124 *
125 * @param string $name
126 * Initial Variable Name.
127 * @param int $len
128 * Length of valid variables.
129 *
130 * @return string
131 * Randomized Variable Name
132 */
133 public static function rename($name, $len = 4) {
134 $rand = substr(uniqid(), 0, $len);
135 return substr_replace($name, $rand, -$len, $len);
136 }
137
138 /**
139 * Takes a string and returns the last tuple of the string.
140 * useful while converting file names to class names etc
141 *
142 * @param string $string
143 * The input string.
144 * @param string $char
145 * Character used to demarcate the components
146 *
147 * @return string
148 * The last component
149 */
150 public static function getClassName($string, $char = '_') {
151 $names = array();
152 if (!is_array($string)) {
153 $names = explode($char, $string);
154 }
155 if (!empty($names)) {
156 return array_pop($names);
157 }
158 }
159
160 /**
161 * Appends a name to a string and separated by delimiter.
162 * does the right thing for an empty string
163 *
164 * @param string $str
165 * The string to be appended to.
166 * @param string $delim
167 * The delimiter to use.
168 * @param mixed $name
169 * The string (or array of strings) to append.
170 *
171 * @return void
172 */
173 public static function append(&$str, $delim, $name) {
174 if (empty($name)) {
175 return;
176 }
177
178 if (is_array($name)) {
179 foreach ($name as $n) {
180 if (empty($n)) {
181 continue;
182 }
183 if (empty($str)) {
184 $str = $n;
185 }
186 else {
187 $str .= $delim . $n;
188 }
189 }
190 }
191 else {
192 if (empty($str)) {
193 $str = $name;
194 }
195 else {
196 $str .= $delim . $name;
197 }
198 }
199 }
200
201 /**
202 * Determine if the string is composed only of ascii characters
203 *
204 * @param string $str
205 * Input string.
206 * @param bool $utf8
207 * Attempt utf8 match on failure (default yes).
208 *
209 * @return bool
210 * true if string is ascii
211 */
212 public static function isAscii($str, $utf8 = TRUE) {
213 if (!function_exists('mb_detect_encoding')) {
214 // eliminate all white space from the string
215 $str = preg_replace('/\s+/', '', $str);
216 // FIXME: This is a pretty brutal hack to make utf8 and 8859-1 work.
217
218 /* match low- or high-ascii characters */
219 if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
220 // || // low ascii characters
221 // high ascii characters
222 // preg_match( '/[\x7F-\xFF]/', $str ) ) {
223 if ($utf8) {
224 /* if we did match, try for utf-8, or iso8859-1 */
225
226 return self::isUtf8($str);
227 }
228 else {
229 return FALSE;
230 }
231 }
232 return TRUE;
233 }
234 else {
235 $order = array('ASCII');
236 if ($utf8) {
237 $order[] = 'UTF-8';
238 }
239 $enc = mb_detect_encoding($str, $order, TRUE);
240 return ($enc == 'ASCII' || $enc == 'UTF-8');
241 }
242 }
243
244 /**
245 * Determine the string replacements for redaction
246 * on the basis of the regular expressions
247 *
248 * @param string $str
249 * Input string.
250 * @param array $regexRules
251 * Regular expression to be matched w/ replacements.
252 *
253 * @return array
254 * array of strings w/ corresponding redacted outputs
255 */
256 public static function regex($str, $regexRules) {
257 //redact the regular expressions
258 if (!empty($regexRules) && isset($str)) {
259 static $matches, $totalMatches, $match = array();
260 foreach ($regexRules as $pattern => $replacement) {
261 preg_match_all($pattern, $str, $matches);
262 if (!empty($matches[0])) {
263 if (empty($totalMatches)) {
264 $totalMatches = $matches[0];
265 }
266 else {
267 $totalMatches = array_merge($totalMatches, $matches[0]);
268 }
269 $match = array_flip($totalMatches);
270 }
271 }
272 }
273
274 if (!empty($match)) {
275 foreach ($match as $matchKey => & $dontCare) {
276 foreach ($regexRules as $pattern => $replacement) {
277 if (preg_match($pattern, $matchKey)) {
278 $dontCare = $replacement . substr(md5($matchKey), 0, 5);
279 break;
280 }
281 }
282 }
283 return $match;
284 }
285 return CRM_Core_DAO::$_nullArray;
286 }
287
288 /**
289 * @param $str
290 * @param $stringRules
291 *
292 * @return mixed
293 */
294 public static function redaction($str, $stringRules) {
295 //redact the strings
296 if (!empty($stringRules)) {
297 foreach ($stringRules as $match => $replace) {
298 $str = str_ireplace($match, $replace, $str);
299 }
300 }
301
302 //return the redacted output
303 return $str;
304 }
305
306 /**
307 * Determine if a string is composed only of utf8 characters
308 *
309 * @param string $str
310 * Input string.
311 *
312 * @return bool
313 */
314 public static function isUtf8($str) {
315 if (!function_exists(mb_detect_encoding)) {
316 // eliminate all white space from the string
317 $str = preg_replace('/\s+/', '', $str);
318
319 /* pattern stolen from the php.net function documentation for
320 * utf8decode();
321 * comment by JF Sebastian, 30-Mar-2005
322 */
323
324 return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
325 // ||
326 // iconv('ISO-8859-1', 'UTF-8', $str);
327 }
328 else {
329 $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
330 return ($enc !== FALSE);
331 }
332 }
333
334 /**
335 * Determine if two href's are equivalent (fuzzy match)
336 *
337 * @param string $url1
338 * The first url to be matched.
339 * @param string $url2
340 * The second url to be matched against.
341 *
342 * @return bool
343 * true if the urls match, else false
344 */
345 public static function match($url1, $url2) {
346 $url1 = strtolower($url1);
347 $url2 = strtolower($url2);
348
349 $url1Str = parse_url($url1);
350 $url2Str = parse_url($url2);
351
352 if ($url1Str['path'] == $url2Str['path'] &&
353 self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
354 ) {
355 return TRUE;
356 }
357 return FALSE;
358 }
359
360 /**
361 * Extract the civicrm path from the url
362 *
363 * @param string $query
364 * A url string.
365 *
366 * @return string|null
367 * civicrm url (eg: civicrm/contact/search)
368 */
369 public static function extractURLVarValue($query) {
370 $config = CRM_Core_Config::singleton();
371 $urlVar = $config->userFrameworkURLVar;
372
373 $params = explode('&', $query);
374 foreach ($params as $p) {
375 if (strpos($p, '=')) {
376 list($k, $v) = explode('=', $p);
377 if ($k == $urlVar) {
378 return $v;
379 }
380 }
381 }
382 return NULL;
383 }
384
385 /**
386 * Translate a true/false/yes/no string to a 0 or 1 value
387 *
388 * @param string $str
389 * The string to be translated.
390 *
391 * @return bool
392 */
393 public static function strtobool($str) {
394 if (!is_scalar($str)) {
395 return FALSE;
396 }
397
398 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
399 return TRUE;
400 }
401 return FALSE;
402 }
403
404 /**
405 * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
406 *
407 * @param string $str
408 * The string to be translated.
409 *
410 * @return bool
411 */
412 public static function strtoboolstr($str) {
413 if (!is_scalar($str)) {
414 return FALSE;
415 }
416
417 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
418 return '1';
419 }
420 elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
421 return '0';
422 }
423 else {
424 return FALSE;
425 }
426 }
427
428 /**
429 * Convert a HTML string into a text one using html2text
430 *
431 * @param string $html
432 * The string to be converted.
433 *
434 * @return string
435 * the converted string
436 */
437 public static function htmlToText($html) {
438 require_once 'packages/html2text/rcube_html2text.php';
439 $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
440 $converter = new rcube_html2text($token_html);
441 $token_text = $converter->get_text();
442 $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
443 return $text;
444 }
445
446 /**
447 * @param $string
448 * @param array $params
449 */
450 public static function extractName($string, &$params) {
451 $name = trim($string);
452 if (empty($name)) {
453 return;
454 }
455
456 // strip out quotes
457 $name = str_replace('"', '', $name);
458 $name = str_replace('\'', '', $name);
459
460 // check for comma in name
461 if (strpos($name, ',') !== FALSE) {
462
463 // name has a comma - assume lname, fname [mname]
464 $names = explode(',', $name);
465 if (count($names) > 1) {
466 $params['last_name'] = trim($names[0]);
467
468 // check for space delim
469 $fnames = explode(' ', trim($names[1]));
470 if (count($fnames) > 1) {
471 $params['first_name'] = trim($fnames[0]);
472 $params['middle_name'] = trim($fnames[1]);
473 }
474 else {
475 $params['first_name'] = trim($fnames[0]);
476 }
477 }
478 else {
479 $params['first_name'] = trim($names[0]);
480 }
481 }
482 else {
483 // name has no comma - assume fname [mname] fname
484 $names = explode(' ', $name);
485 if (count($names) == 1) {
486 $params['first_name'] = $names[0];
487 }
488 elseif (count($names) == 2) {
489 $params['first_name'] = $names[0];
490 $params['last_name'] = $names[1];
491 }
492 else {
493 $params['first_name'] = $names[0];
494 $params['middle_name'] = $names[1];
495 $params['last_name'] = $names[2];
496 }
497 }
498 }
499
500 /**
501 * @param $string
502 *
503 * @return array
504 */
505 public static function &makeArray($string) {
506 $string = trim($string);
507
508 $values = explode("\n", $string);
509 $result = array();
510 foreach ($values as $value) {
511 list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
512 if (!empty($v)) {
513 $result[trim($n)] = trim($v);
514 }
515 }
516 return $result;
517 }
518
519 /**
520 * Given an ezComponents-parsed representation of
521 * a text with alternatives return only the first one
522 *
523 * @param string $full
524 * All alternatives as a long string (or some other text).
525 *
526 * @return string
527 * only the first alternative found (or the text without alternatives)
528 */
529 public static function stripAlternatives($full) {
530 $matches = array();
531 preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
532
533 if (isset($matches[1]) &&
534 trim(strip_tags($matches[1])) != ''
535 ) {
536 return $matches[1];
537 }
538 else {
539 return $full;
540 }
541 }
542
543 /**
544 * Strip leading, trailing, double spaces from string
545 * used for postal/greeting/addressee
546 *
547 * @param string $string
548 * Input string to be cleaned.
549 *
550 * @return string
551 * the cleaned string
552 */
553 public static function stripSpaces($string) {
554 return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
555 }
556
557 /**
558 * clean the URL 'path' variable that we use
559 * to construct CiviCRM urls by removing characters from the path variable
560 *
561 * @param string $string
562 * The input string to be sanitized.
563 * @param array $search
564 * The characters to be sanitized.
565 * @param string $replace
566 * The character to replace it with.
567 *
568 * @return string
569 * the sanitized string
570 */
571 public static function stripPathChars(
572 $string,
573 $search = NULL,
574 $replace = NULL
575 ) {
576 static $_searchChars = NULL;
577 static $_replaceChar = NULL;
578
579 if (empty($string)) {
580 return $string;
581 }
582
583 if ($_searchChars == NULL) {
584 $_searchChars = array(
585 '&',
586 ';',
587 ',',
588 '=',
589 '$',
590 '"',
591 "'",
592 '\\',
593 '<',
594 '>',
595 '(',
596 ')',
597 ' ',
598 "\r",
599 "\r\n",
600 "\n",
601 "\t",
602 );
603 $_replaceChar = '_';
604 }
605
606 if ($search == NULL) {
607 $search = $_searchChars;
608 }
609
610 if ($replace == NULL) {
611 $replace = $_replaceChar;
612 }
613
614 return str_replace($search, $replace, $string);
615 }
616
617
618 /**
619 * Use HTMLPurifier to clean up a text string and remove any potential
620 * xss attacks. This is primarily used in public facing pages which
621 * accept html as the input string
622 *
623 * @param string $string
624 * The input string.
625 *
626 * @return string
627 * the cleaned up string
628 */
629 public static function purifyHTML($string) {
630 static $_filter = NULL;
631 if (!$_filter) {
632 $config = HTMLPurifier_Config::createDefault();
633 $config->set('Core.Encoding', 'UTF-8');
634
635 // Disable the cache entirely
636 $config->set('Cache.DefinitionImpl', NULL);
637
638 $_filter = new HTMLPurifier($config);
639 }
640
641 return $_filter->purify($string);
642 }
643
644 /**
645 * Truncate $string; if $string exceeds $maxLen, place "..." at the end
646 *
647 * @param string $string
648 * @param int $maxLen
649 *
650 * @return string
651 */
652 public static function ellipsify($string, $maxLen) {
653 $len = strlen($string);
654 if ($len <= $maxLen) {
655 return $string;
656 }
657 else {
658 return substr($string, 0, $maxLen - 3) . '...';
659 }
660 }
661
662 /**
663 * Generate a random string
664 *
665 * @param $len
666 * @param $alphabet
667 * @return string
668 */
669 public static function createRandom($len, $alphabet) {
670 $alphabetSize = strlen($alphabet);
671 $result = '';
672 for ($i = 0; $i < $len; $i++) {
673 $result .= $alphabet{rand(1, $alphabetSize) - 1};
674 }
675 return $result;
676 }
677
678 /**
679 * Examples:
680 * "admin foo" => array(NULL,"admin foo")
681 * "cms:admin foo" => array("cms", "admin foo")
682 *
683 * @param $delim
684 * @param string $string
685 * E.g. "view all contacts". Syntax: "[prefix:]name".
686 * @param null $defaultPrefix
687 *
688 * @return array
689 * (0 => string|NULL $prefix, 1 => string $value)
690 */
691 public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
692 $pos = strpos($string, $delim);
693 if ($pos === FALSE) {
694 return array($defaultPrefix, $string);
695 }
696 else {
697 return array(substr($string, 0, $pos), substr($string, 1 + $pos));
698 }
699 }
700
701 /**
702 * This function will mask part of the the user portion of an Email address (everything before the @)
703 *
704 * @param string $email
705 * The email address to be masked.
706 * @param string $maskChar
707 * The character used for masking.
708 * @param int $percent
709 * The percentage of the user portion to be masked.
710 *
711 * @return string
712 * returns the masked Email address
713 */
714 public static function maskEmail($email, $maskChar = '*', $percent = 50) {
715 list($user, $domain) = preg_split("/@/", $email);
716 $len = strlen($user);
717 $maskCount = floor($len * $percent / 100);
718 $offset = floor(($len - $maskCount) / 2);
719
720 $masked = substr($user, 0, $offset)
721 . str_repeat($maskChar, $maskCount)
722 . substr($user, $maskCount + $offset);
723
724 return ($masked . '@' . $domain);
725 }
726
727 /**
728 * This function compares two strings
729 *
730 * @param string $strOne
731 * String one.
732 * @param string $strTwo
733 * String two.
734 * @param bool $case
735 * Boolean indicating whether you want the comparison to be case sensitive or not.
736 *
737 * @return bool
738 * TRUE (string are identical); FALSE (strings are not identical)
739 */
740 public static function compareStr($strOne, $strTwo, $case) {
741 if ($case == TRUE) {
742 // Convert to lowercase and trim white spaces
743 if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
744 // yes - they are identical
745 return TRUE;
746 }
747 else {
748 // not identical
749 return FALSE;
750 }
751 }
752 if ($case == FALSE) {
753 // Trim white spaces
754 if (trim($strOne) == trim($strTwo)) {
755 // yes - they are identical
756 return TRUE;
757 }
758 else {
759 // not identical
760 return FALSE;
761 }
762 }
763 }
764
765 /**
766 * Many parts of the codebase have a convention of internally passing around
767 * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
768 * (because most other odd characters are %-escaped in URLs; and %-escaped
769 * strings don't need any extra escaping in HTML).
770 *
771 * @param string $htmlUrl
772 * URL with HTML entities.
773 * @return string
774 * URL without HTML entities
775 */
776 public static function unstupifyUrl($htmlUrl) {
777 return str_replace('&amp;', '&', $htmlUrl);
778 }
779
780 }