9918d92215b24be661fa9f5515c1ad685355298c
[civicrm-core.git] / CRM / Utils / String.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.6 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2014 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2014
32 * $Id$
33 *
34 */
35
36 require_once 'HTML/QuickForm/Rule/Email.php';
37
38 /**
39 * This class contains string functions
40 *
41 */
42 class CRM_Utils_String {
43 const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
44
45 /**
46 * List of all letters and numbers
47 */
48 const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
49
50 /**
51 * Convert a display name into a potential variable
52 * name that we could use in forms/code
53 *
54 * @param name Name of the string
55 *
56 * @param int $maxLength
57 *
58 * @return string An equivalent variable name
59 *
60 * @access public
61 *
62 * @return string (or null)
63 * @static
64 */
65 static function titleToVar($title, $maxLength = 31) {
66 $variable = self::munge($title, '_', $maxLength);
67
68 if (CRM_Utils_Rule::title($variable, $maxLength)) {
69 return $variable;
70 }
71
72 // if longer than the maxLength lets just return a substr of the
73 // md5 to prevent errors downstream
74 return substr(md5($title), 0, $maxLength);
75 }
76
77 /**
78 * Given a string, replace all non alpha numeric characters and
79 * spaces with the replacement character
80 *
81 * @param string $name the name to be worked on
82 * @param string $char the character to use for non-valid chars
83 * @param int $len length of valid variables
84 *
85 * @access public
86 *
87 * @return string returns the manipulated string
88 * @static
89 */
90 static function munge($name, $char = '_', $len = 63) {
91 // replace all white space and non-alpha numeric with $char
92 // we only use the ascii character set since mysql does not create table names / field names otherwise
93 // CRM-11744
94 $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
95
96 if ($len) {
97 // lets keep variable names short
98 return substr($name, 0, $len);
99 }
100 else {
101 return $name;
102 }
103 }
104
105 /**
106 * Convert possibly underscore separated words to camel case with special handling for 'UF'
107 * e.g
108 * membership_payment returns MembershipPayment
109 * @param string $string
110 *
111 * @return string string
112 */
113 static function convertStringToCamel($string) {
114 $fragments = explode('_', $string);
115 foreach ($fragments as & $fragment) {
116 $fragment = ucfirst($fragment);
117 }
118 // Special case: UFGroup, UFJoin, UFMatch, UFField
119 if ($fragments[0] === 'Uf') {
120 $fragments[0] = 'UF';
121 }
122 return implode('', $fragments);
123 }
124
125 /**
126 *
127 * Takes a variable name and munges it randomly into another variable name
128 *
129 * @param string $name Initial Variable Name
130 * @param int $len length of valid variables
131 *
132 * @return string Randomized Variable Name
133 * @access public
134 * @static
135 */
136 static function rename($name, $len = 4) {
137 $rand = substr(uniqid(), 0, $len);
138 return substr_replace($name, $rand, -$len, $len);
139 }
140
141 /**
142 * Takes a string and returns the last tuple of the string.
143 * useful while converting file names to class names etc
144 *
145 * @param string $string the input string
146 * @param \char|string $char $char the character used to demarcate the componets
147 *
148 * @access public
149 *
150 * @return string the last component
151 * @static
152 */
153 static function getClassName($string, $char = '_') {
154 $names = array();
155 if (!is_array($string)) {
156 $names = explode($char, $string);
157 }
158 if (!empty($names)) {
159 return array_pop($names);
160 }
161 }
162
163 /**
164 * Appends a name to a string and seperated by delimiter.
165 * does the right thing for an empty string
166 *
167 * @param string $str the string to be appended to
168 * @param string $delim the delimiter to use
169 * @param mixed $name the string (or array of strings) to append
170 *
171 * @return void
172 * @access public
173 * @static
174 */
175 static function append(&$str, $delim, $name) {
176 if (empty($name)) {
177 return;
178 }
179
180 if (is_array($name)) {
181 foreach ($name as $n) {
182 if (empty($n)) {
183 continue;
184 }
185 if (empty($str)) {
186 $str = $n;
187 }
188 else {
189 $str .= $delim . $n;
190 }
191 }
192 }
193 else {
194 if (empty($str)) {
195 $str = $name;
196 }
197 else {
198 $str .= $delim . $name;
199 }
200 }
201 }
202
203 /**
204 * Determine if the string is composed only of ascii characters
205 *
206 * @param string $str input string
207 * @param boolean $utf8 attempt utf8 match on failure (default yes)
208 *
209 * @return boolean true if string is ascii
210 * @access public
211 * @static
212 */
213 static function isAscii($str, $utf8 = TRUE) {
214 if (!function_exists('mb_detect_encoding')) {
215 // eliminate all white space from the string
216 $str = preg_replace('/\s+/', '', $str);
217 // FIXME: This is a pretty brutal hack to make utf8 and 8859-1 work.
218
219 /* match low- or high-ascii characters */
220 if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
221 // || // low ascii characters
222 // high ascii characters
223 // preg_match( '/[\x7F-\xFF]/', $str ) ) {
224 if ($utf8) {
225 /* if we did match, try for utf-8, or iso8859-1 */
226
227 return self::isUtf8($str);
228 }
229 else {
230 return FALSE;
231 }
232 }
233 return TRUE;
234 }
235 else {
236 $order = array('ASCII');
237 if ($utf8) {
238 $order[] = 'UTF-8';
239 }
240 $enc = mb_detect_encoding($str, $order, TRUE);
241 return ($enc == 'ASCII' || $enc == 'UTF-8');
242 }
243 }
244
245 /**
246 * Determine the string replacements for redaction
247 * on the basis of the regular expressions
248 *
249 * @param string $str input string
250 * @param array $regexRules regular expression to be matched w/ replacements
251 *
252 * @return array $match array of strings w/ corresponding redacted outputs
253 * @access public
254 * @static
255 */
256 static function regex($str, $regexRules) {
257 //redact the regular expressions
258 if (!empty($regexRules) && isset($str)) {
259 static $matches, $totalMatches, $match = array();
260 foreach ($regexRules as $pattern => $replacement) {
261 preg_match_all($pattern, $str, $matches);
262 if (!empty($matches[0])) {
263 if (empty($totalMatches)) {
264 $totalMatches = $matches[0];
265 }
266 else {
267 $totalMatches = array_merge($totalMatches, $matches[0]);
268 }
269 $match = array_flip($totalMatches);
270 }
271 }
272 }
273
274 if (!empty($match)) {
275 foreach ($match as $matchKey => & $dontCare) {
276 foreach ($regexRules as $pattern => $replacement) {
277 if (preg_match($pattern, $matchKey)) {
278 $dontCare = $replacement . substr(md5($matchKey), 0, 5);
279 break;
280 }
281 }
282 }
283 return $match;
284 }
285 return CRM_Core_DAO::$_nullArray;
286 }
287
288 /**
289 * @param $str
290 * @param $stringRules
291 *
292 * @return mixed
293 */
294 static function redaction($str, $stringRules) {
295 //redact the strings
296 if (!empty($stringRules)) {
297 foreach ($stringRules as $match => $replace) {
298 $str = str_ireplace($match, $replace, $str);
299 }
300 }
301
302 //return the redacted output
303 return $str;
304 }
305
306 /**
307 * Determine if a string is composed only of utf8 characters
308 *
309 * @param string $str input string
310 * @access public
311 * @static
312 *
313 * @return boolean
314 */
315 static function isUtf8($str) {
316 if (!function_exists(mb_detect_encoding)) {
317 // eliminate all white space from the string
318 $str = preg_replace('/\s+/', '', $str);
319
320 /* pattern stolen from the php.net function documentation for
321 * utf8decode();
322 * comment by JF Sebastian, 30-Mar-2005
323 */
324
325 return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
326 // ||
327 // iconv('ISO-8859-1', 'UTF-8', $str);
328 }
329 else {
330 $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
331 return ($enc !== FALSE);
332 }
333 }
334
335 /**
336 * Determine if two href's are equivalent (fuzzy match)
337 *
338 * @param string $url1 the first url to be matched
339 * @param string $url2 the second url to be matched against
340 *
341 * @return boolean true if the urls match, else false
342 * @access public
343 * @static
344 */
345 static function match($url1, $url2) {
346 $url1 = strtolower($url1);
347 $url2 = strtolower($url2);
348
349 $url1Str = parse_url($url1);
350 $url2Str = parse_url($url2);
351
352 if ($url1Str['path'] == $url2Str['path'] &&
353 self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
354 ) {
355 return TRUE;
356 }
357 return FALSE;
358 }
359
360 /**
361 * Extract variable values
362 *
363 * @param mix $query this is basically url
364 *
365 * @return mix $v returns civicrm url (eg: civicrm/contact/search/...)
366 * @access public
367 * @static
368 */
369 static function extractURLVarValue($query) {
370 $config = CRM_Core_Config::singleton();
371 $urlVar = $config->userFrameworkURLVar;
372
373 $params = explode('&', $query);
374 foreach ($params as $p) {
375 if (strpos($p, '=')) {
376 list($k, $v) = explode('=', $p);
377 if ($k == $urlVar) {
378 return $v;
379 }
380 }
381 }
382 return NULL;
383 }
384
385 /**
386 * Translate a true/false/yes/no string to a 0 or 1 value
387 *
388 * @param string $str the string to be translated
389 *
390 * @return boolean
391 * @access public
392 * @static
393 */
394 static function strtobool($str) {
395 if (!is_scalar($str)) {
396 return FALSE;
397 }
398
399 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
400 return TRUE;
401 }
402 return FALSE;
403 }
404
405 /**
406 * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
407 *
408 * @param string $str the string to be translated
409 *
410 * @return boolean
411 * @access public
412 * @static
413 */
414 static function strtoboolstr($str) {
415 if (!is_scalar($str)) {
416 return FALSE;
417 }
418
419 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
420 return '1';
421 }
422 elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
423 return '0';
424 }
425 else {
426 return FALSE;
427 }
428 }
429
430 /**
431 * Convert a HTML string into a text one using html2text
432 *
433 * @param string $html the string to be converted
434 *
435 * @return string the converted string
436 * @access public
437 * @static
438 */
439 static function htmlToText($html) {
440 require_once 'packages/html2text/rcube_html2text.php';
441 $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
442 $converter = new rcube_html2text($token_html);
443 $token_text = $converter->get_text();
444 $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
445 return $text;
446 }
447
448 /**
449 * @param $string
450 * @param array $params
451 */
452 static function extractName($string, &$params) {
453 $name = trim($string);
454 if (empty($name)) {
455 return;
456 }
457
458 // strip out quotes
459 $name = str_replace('"', '', $name);
460 $name = str_replace('\'', '', $name);
461
462 // check for comma in name
463 if (strpos($name, ',') !== FALSE) {
464
465 // name has a comma - assume lname, fname [mname]
466 $names = explode(',', $name);
467 if (count($names) > 1) {
468 $params['last_name'] = trim($names[0]);
469
470 // check for space delim
471 $fnames = explode(' ', trim($names[1]));
472 if (count($fnames) > 1) {
473 $params['first_name'] = trim($fnames[0]);
474 $params['middle_name'] = trim($fnames[1]);
475 }
476 else {
477 $params['first_name'] = trim($fnames[0]);
478 }
479 }
480 else {
481 $params['first_name'] = trim($names[0]);
482 }
483 }
484 else {
485 // name has no comma - assume fname [mname] fname
486 $names = explode(' ', $name);
487 if (count($names) == 1) {
488 $params['first_name'] = $names[0];
489 }
490 elseif (count($names) == 2) {
491 $params['first_name'] = $names[0];
492 $params['last_name'] = $names[1];
493 }
494 else {
495 $params['first_name'] = $names[0];
496 $params['middle_name'] = $names[1];
497 $params['last_name'] = $names[2];
498 }
499 }
500 }
501
502 /**
503 * @param $string
504 *
505 * @return array
506 */
507 static function &makeArray($string) {
508 $string = trim($string);
509
510 $values = explode("\n", $string);
511 $result = array();
512 foreach ($values as $value) {
513 list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
514 if (!empty($v)) {
515 $result[trim($n)] = trim($v);
516 }
517 }
518 return $result;
519 }
520
521 /**
522 * Given an ezComponents-parsed representation of
523 * a text with alternatives return only the first one
524 *
525 * @param string $full all alternatives as a long string (or some other text)
526 *
527 * @return string only the first alternative found (or the text without alternatives)
528 */
529 static function stripAlternatives($full) {
530 $matches = array();
531 preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
532
533 if (isset($matches[1]) &&
534 trim(strip_tags($matches[1])) != ''
535 ) {
536 return $matches[1];
537 }
538 else {
539 return $full;
540 }
541 }
542
543 /**
544 * Strip leading, trailing, double spaces from string
545 * used for postal/greeting/addressee
546 *
547 * @param string $string input string to be cleaned
548 *
549 * @return string the cleaned string
550 * @access public
551 * @static
552 */
553 static function stripSpaces($string) {
554 return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
555 }
556
557 /**
558 * This function is used to clean the URL 'path' variable that we use
559 * to construct CiviCRM urls by removing characters from the path variable
560 *
561 * @param string $string the input string to be sanitized
562 * @param array $search the characters to be sanitized
563 * @param string $replace the character to replace it with
564 *
565 * @return string the sanitized string
566 * @access public
567 * @static
568 */
569 static function stripPathChars($string,
570 $search = NULL,
571 $replace = NULL
572 ) {
573 static $_searchChars = NULL;
574 static $_replaceChar = NULL;
575
576 if (empty($string)) {
577 return $string;
578 }
579
580 if ($_searchChars == NULL) {
581 $_searchChars = array(
582 '&', ';', ',', '=', '$',
583 '"', "'", '\\',
584 '<', '>', '(', ')',
585 ' ', "\r", "\r\n", "\n", "\t",
586 );
587 $_replaceChar = '_';
588 }
589
590
591 if ($search == NULL) {
592 $search = $_searchChars;
593 }
594
595 if ($replace == NULL) {
596 $replace = $_replaceChar;
597 }
598
599 return str_replace($search, $replace, $string);
600 }
601
602
603 /**
604 * Use HTMLPurifier to clean up a text string and remove any potential
605 * xss attacks. This is primarily used in public facing pages which
606 * accept html as the input string
607 *
608 * @param string $string the input string
609 *
610 * @return string the cleaned up string
611 * @public
612 * @static
613 */
614 static function purifyHTML($string) {
615 static $_filter = null;
616 if (!$_filter) {
617 $config = HTMLPurifier_Config::createDefault();
618 $config->set('Core.Encoding', 'UTF-8');
619
620 // Disable the cache entirely
621 $config->set('Cache.DefinitionImpl', null);
622
623 $_filter = new HTMLPurifier($config);
624 }
625
626 return $_filter->purify($string);
627 }
628
629 /**
630 * Truncate $string; if $string exceeds $maxLen, place "..." at the end
631 *
632 * @param string $string
633 * @param int $maxLen
634 *
635 * @return string
636 */
637 static function ellipsify($string, $maxLen) {
638 $len = strlen($string);
639 if ($len <= $maxLen) {
640 return $string;
641 }
642 else {
643 return substr($string, 0, $maxLen-3) . '...';
644 }
645 }
646
647 /**
648 * Generate a random string
649 *
650 * @param $len
651 * @param $alphabet
652 * @return string
653 */
654 public static function createRandom($len, $alphabet) {
655 $alphabetSize = strlen($alphabet);
656 $result = '';
657 for ($i = 0; $i < $len; $i++) {
658 $result .= $alphabet{rand(1, $alphabetSize) - 1};
659 }
660 return $result;
661 }
662
663 /**
664 * Examples:
665 * "admin foo" => array(NULL,"admin foo")
666 * "cms:admin foo" => array("cms", "admin foo")
667 *
668 * @param $delim
669 * @param string $string e.g. "view all contacts". Syntax: "[prefix:]name"
670 * @param null $defaultPrefix
671 *
672 * @return array (0 => string|NULL $prefix, 1 => string $value)
673 */
674 public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
675 $pos = strpos($string, $delim);
676 if ($pos === FALSE) {
677 return array($defaultPrefix, $string);
678 }
679 else {
680 return array(substr($string, 0, $pos), substr($string, 1+$pos));
681 }
682 }
683
684 /**
685 * This function will mask part of the the user portion of an Email address (everything before the @)
686 *
687 * @param string $email the email address to be masked
688 * @param string $maskChar the character used for masking
689 * @param integer $percent the percentage of the user portion to be masked
690 *
691 * @return string returns the masked Email address
692 */
693 public static function maskEmail($email, $maskChar= '*', $percent=50) {
694 list($user, $domain) = preg_split("/@/", $email);
695 $len = strlen($user);
696 $maskCount = floor($len * $percent /100);
697 $offset = floor(($len - $maskCount) / 2);
698
699 $masked = substr($user, 0, $offset)
700 .str_repeat($maskChar, $maskCount)
701 .substr($user, $maskCount + $offset);
702
703 return($masked.'@'.$domain);
704 }
705
706 /**
707 * This function compares two strings
708 *
709 * @param string $strOne string one
710 * @param string $strTwo string two
711 * @param boolean $case boolean indicating whether you want the comparison to be case sensitive or not
712 *
713 * @return boolean TRUE (string are identical); FALSE (strings are not identical)
714 */
715 public static function compareStr($strOne, $strTwo, $case) {
716 if ($case == TRUE) {
717 // Convert to lowercase and trim white spaces
718 if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
719 // yes - they are identical
720 return TRUE;
721 }
722 else {
723 // not identical
724 return FALSE;
725 }
726 }
727 if ($case == FALSE) {
728 // Trim white spaces
729 if (trim($strOne) == trim($strTwo)) {
730 // yes - they are identical
731 return TRUE;
732 }
733 else {
734 // not identical
735 return FALSE;
736 }
737 }
738 }
739
740 /**
741 * Many parts of the codebase have a convention of internally passing around
742 * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
743 * (because most other odd characters are %-escaped in URLs; and %-escaped
744 * strings don't need any extra escaping in HTML).
745 *
746 * @param string $url URL with HTML entities
747 * @return string URL without HTML entities
748 */
749 public static function unstupifyUrl($htmlUrl) {
750 return str_replace('&amp;', '&', $htmlUrl);
751 }
752 }
753