CRM/Utils add comments
[civicrm-core.git] / CRM / Utils / String.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.5 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2014 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2014
32 * $Id$
33 *
34 */
35
36 require_once 'HTML/QuickForm/Rule/Email.php';
37
38 /**
39 * This class contains string functions
40 *
41 */
42 class CRM_Utils_String {
43 CONST COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
44
45 /**
46 * List of all letters and numbers
47 */
48 const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
49
50 /**
51 * Convert a display name into a potential variable
52 * name that we could use in forms/code
53 *
54 * @param name Name of the string
55 *
56 * @param int $maxLength
57 *
58 * @return string An equivalent variable name
59 *
60 * @access public
61 *
62 * @return string (or null)
63 * @static
64 */
65 static function titleToVar($title, $maxLength = 31) {
66 $variable = self::munge($title, '_', $maxLength);
67
68 if (CRM_Utils_Rule::title($variable, $maxLength)) {
69 return $variable;
70 }
71
72 // if longer than the maxLength lets just return a substr of the
73 // md5 to prevent errors downstream
74 return substr(md5($title), 0, $maxLength);
75 }
76
77 /**
78 * given a string, replace all non alpha numeric characters and
79 * spaces with the replacement character
80 *
81 * @param string $name the name to be worked on
82 * @param string $char the character to use for non-valid chars
83 * @param int $len length of valid variables
84 *
85 * @access public
86 *
87 * @return string returns the manipulated string
88 * @static
89 */
90 static function munge($name, $char = '_', $len = 63) {
91 // replace all white space and non-alpha numeric with $char
92 // we only use the ascii character set since mysql does not create table names / field names otherwise
93 // CRM-11744
94 $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
95
96 if ($len) {
97 // lets keep variable names short
98 return substr($name, 0, $len);
99 }
100 else {
101 return $name;
102 }
103 }
104
105 /**
106 *
107 * Takes a variable name and munges it randomly into another variable name
108 *
109 * @param string $name Initial Variable Name
110 * @param int $len length of valid variables
111 *
112 * @return string Randomized Variable Name
113 * @access public
114 * @static
115 */
116 static function rename($name, $len = 4) {
117 $rand = substr(uniqid(), 0, $len);
118 return substr_replace($name, $rand, -$len, $len);
119 }
120
121 /**
122 * takes a string and returns the last tuple of the string.
123 * useful while converting file names to class names etc
124 *
125 * @param string $string the input string
126 * @param \char|string $char $char the character used to demarcate the componets
127 *
128 * @access public
129 *
130 * @return string the last component
131 * @static
132 */
133 static function getClassName($string, $char = '_') {
134 $names = array();
135 if (!is_array($string)) {
136 $names = explode($char, $string);
137 }
138 if (!empty($names)) {
139 return array_pop($names);
140 }
141 }
142
143 /**
144 * appends a name to a string and seperated by delimiter.
145 * does the right thing for an empty string
146 *
147 * @param string $str the string to be appended to
148 * @param string $delim the delimiter to use
149 * @param mixed $name the string (or array of strings) to append
150 *
151 * @return void
152 * @access public
153 * @static
154 */
155 static function append(&$str, $delim, $name) {
156 if (empty($name)) {
157 return;
158 }
159
160 if (is_array($name)) {
161 foreach ($name as $n) {
162 if (empty($n)) {
163 continue;
164 }
165 if (empty($str)) {
166 $str = $n;
167 }
168 else {
169 $str .= $delim . $n;
170 }
171 }
172 }
173 else {
174 if (empty($str)) {
175 $str = $name;
176 }
177 else {
178 $str .= $delim . $name;
179 }
180 }
181 }
182
183 /**
184 * determine if the string is composed only of ascii characters
185 *
186 * @param string $str input string
187 * @param boolean $utf8 attempt utf8 match on failure (default yes)
188 *
189 * @return boolean true if string is ascii
190 * @access public
191 * @static
192 */
193 static function isAscii($str, $utf8 = TRUE) {
194 if (!function_exists('mb_detect_encoding')) {
195 // eliminate all white space from the string
196 $str = preg_replace('/\s+/', '', $str);
197 // FIXME: This is a pretty brutal hack to make utf8 and 8859-1 work.
198
199 /* match low- or high-ascii characters */
200 if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
201 // || // low ascii characters
202 // high ascii characters
203 // preg_match( '/[\x7F-\xFF]/', $str ) ) {
204 if ($utf8) {
205 /* if we did match, try for utf-8, or iso8859-1 */
206
207 return self::isUtf8($str);
208 }
209 else {
210 return FALSE;
211 }
212 }
213 return TRUE;
214 }
215 else {
216 $order = array('ASCII');
217 if ($utf8) {
218 $order[] = 'UTF-8';
219 }
220 $enc = mb_detect_encoding($str, $order, TRUE);
221 return ($enc == 'ASCII' || $enc == 'UTF-8');
222 }
223 }
224
225 /**
226 * determine the string replacements for redaction
227 * on the basis of the regular expressions
228 *
229 * @param string $str input string
230 * @param array $regexRules regular expression to be matched w/ replacements
231 *
232 * @return array $match array of strings w/ corresponding redacted outputs
233 * @access public
234 * @static
235 */
236 static function regex($str, $regexRules) {
237 //redact the regular expressions
238 if (!empty($regexRules) && isset($str)) {
239 static $matches, $totalMatches, $match = array();
240 foreach ($regexRules as $pattern => $replacement) {
241 preg_match_all($pattern, $str, $matches);
242 if (!empty($matches[0])) {
243 if (empty($totalMatches)) {
244 $totalMatches = $matches[0];
245 }
246 else {
247 $totalMatches = array_merge($totalMatches, $matches[0]);
248 }
249 $match = array_flip($totalMatches);
250 }
251 }
252 }
253
254 if (!empty($match)) {
255 foreach ($match as $matchKey => & $dontCare) {
256 foreach ($regexRules as $pattern => $replacement) {
257 if (preg_match($pattern, $matchKey)) {
258 $dontCare = $replacement . substr(md5($matchKey), 0, 5);
259 break;
260 }
261 }
262 }
263 return $match;
264 }
265 return CRM_Core_DAO::$_nullArray;
266 }
267
268 /**
269 * @param $str
270 * @param $stringRules
271 *
272 * @return mixed
273 */
274 static function redaction($str, $stringRules) {
275 //redact the strings
276 if (!empty($stringRules)) {
277 foreach ($stringRules as $match => $replace) {
278 $str = str_ireplace($match, $replace, $str);
279 }
280 }
281
282 //return the redacted output
283 return $str;
284 }
285
286 /**
287 * Determine if a string is composed only of utf8 characters
288 *
289 * @param string $str input string
290 * @access public
291 * @static
292 *
293 * @return boolean
294 */
295 static function isUtf8($str) {
296 if (!function_exists(mb_detect_encoding)) {
297 // eliminate all white space from the string
298 $str = preg_replace('/\s+/', '', $str);
299
300 /* pattern stolen from the php.net function documentation for
301 * utf8decode();
302 * comment by JF Sebastian, 30-Mar-2005
303 */
304
305 return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
306 // ||
307 // iconv('ISO-8859-1', 'UTF-8', $str);
308 }
309 else {
310 $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
311 return ($enc !== FALSE);
312 }
313 }
314
315 /**
316 * determine if two href's are equivalent (fuzzy match)
317 *
318 * @param string $url1 the first url to be matched
319 * @param string $url2 the second url to be matched against
320 *
321 * @return boolean true if the urls match, else false
322 * @access public
323 * @static
324 */
325 static function match($url1, $url2) {
326 $url1 = strtolower($url1);
327 $url2 = strtolower($url2);
328
329 $url1Str = parse_url($url1);
330 $url2Str = parse_url($url2);
331
332 if ($url1Str['path'] == $url2Str['path'] &&
333 self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
334 ) {
335 return TRUE;
336 }
337 return FALSE;
338 }
339
340 /**
341 * Function to extract variable values
342 *
343 * @param mix $query this is basically url
344 *
345 * @return mix $v returns civicrm url (eg: civicrm/contact/search/...)
346 * @access public
347 * @static
348 */
349 static function extractURLVarValue($query) {
350 $config = CRM_Core_Config::singleton();
351 $urlVar = $config->userFrameworkURLVar;
352
353 $params = explode('&', $query);
354 foreach ($params as $p) {
355 if (strpos($p, '=')) {
356 list($k, $v) = explode('=', $p);
357 if ($k == $urlVar) {
358 return $v;
359 }
360 }
361 }
362 return NULL;
363 }
364
365 /**
366 * translate a true/false/yes/no string to a 0 or 1 value
367 *
368 * @param string $str the string to be translated
369 *
370 * @return boolean
371 * @access public
372 * @static
373 */
374 static function strtobool($str) {
375 if (!is_scalar($str)) {
376 return FALSE;
377 }
378
379 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
380 return TRUE;
381 }
382 return FALSE;
383 }
384
385 /**
386 * returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
387 *
388 * @param string $str the string to be translated
389 *
390 * @return boolean
391 * @access public
392 * @static
393 */
394 static function strtoboolstr($str) {
395 if (!is_scalar($str)) {
396 return FALSE;
397 }
398
399 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
400 return '1';
401 }
402 elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
403 return '0';
404 }
405 else {
406 return FALSE;
407 }
408 }
409
410 /**
411 * Convert a HTML string into a text one using html2text
412 *
413 * @param string $html the string to be converted
414 *
415 * @return string the converted string
416 * @access public
417 * @static
418 */
419 static function htmlToText($html) {
420 require_once 'packages/html2text/rcube_html2text.php';
421 $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
422 $converter = new rcube_html2text($token_html);
423 $token_text = $converter->get_text();
424 $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
425 return $text;
426 }
427
428 /**
429 * @param $string
430 * @param $params
431 */
432 static function extractName($string, &$params) {
433 $name = trim($string);
434 if (empty($name)) {
435 return;
436 }
437
438 // strip out quotes
439 $name = str_replace('"', '', $name);
440 $name = str_replace('\'', '', $name);
441
442 // check for comma in name
443 if (strpos($name, ',') !== FALSE) {
444
445 // name has a comma - assume lname, fname [mname]
446 $names = explode(',', $name);
447 if (count($names) > 1) {
448 $params['last_name'] = trim($names[0]);
449
450 // check for space delim
451 $fnames = explode(' ', trim($names[1]));
452 if (count($fnames) > 1) {
453 $params['first_name'] = trim($fnames[0]);
454 $params['middle_name'] = trim($fnames[1]);
455 }
456 else {
457 $params['first_name'] = trim($fnames[0]);
458 }
459 }
460 else {
461 $params['first_name'] = trim($names[0]);
462 }
463 }
464 else {
465 // name has no comma - assume fname [mname] fname
466 $names = explode(' ', $name);
467 if (count($names) == 1) {
468 $params['first_name'] = $names[0];
469 }
470 elseif (count($names) == 2) {
471 $params['first_name'] = $names[0];
472 $params['last_name'] = $names[1];
473 }
474 else {
475 $params['first_name'] = $names[0];
476 $params['middle_name'] = $names[1];
477 $params['last_name'] = $names[2];
478 }
479 }
480 }
481
482 /**
483 * @param $string
484 *
485 * @return array
486 */
487 static function &makeArray($string) {
488 $string = trim($string);
489
490 $values = explode("\n", $string);
491 $result = array();
492 foreach ($values as $value) {
493 list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
494 if (!empty($v)) {
495 $result[trim($n)] = trim($v);
496 }
497 }
498 return $result;
499 }
500
501 /**
502 * Given an ezComponents-parsed representation of
503 * a text with alternatives return only the first one
504 *
505 * @param string $full all alternatives as a long string (or some other text)
506 *
507 * @return string only the first alternative found (or the text without alternatives)
508 */
509 static function stripAlternatives($full) {
510 $matches = array();
511 preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
512
513 if (isset($matches[1]) &&
514 trim(strip_tags($matches[1])) != ''
515 ) {
516 return $matches[1];
517 }
518 else {
519 return $full;
520 }
521 }
522
523 /**
524 * strip leading, trailing, double spaces from string
525 * used for postal/greeting/addressee
526 *
527 * @param string $string input string to be cleaned
528 *
529 * @return string the cleaned string
530 * @access public
531 * @static
532 */
533 static function stripSpaces($string) {
534 return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
535 }
536
537 /**
538 * This function is used to clean the URL 'path' variable that we use
539 * to construct CiviCRM urls by removing characters from the path variable
540 *
541 * @param string $string the input string to be sanitized
542 * @param array $search the characters to be sanitized
543 * @param string $replace the character to replace it with
544 *
545 * @return string the sanitized string
546 * @access public
547 * @static
548 */
549 static function stripPathChars($string,
550 $search = NULL,
551 $replace = NULL
552 ) {
553 static $_searchChars = NULL;
554 static $_replaceChar = NULL;
555
556 if (empty($string)) {
557 return $string;
558 }
559
560 if ($_searchChars == NULL) {
561 $_searchChars = array(
562 '&', ';', ',', '=', '$',
563 '"', "'", '\\',
564 '<', '>', '(', ')',
565 ' ', "\r", "\r\n", "\n", "\t",
566 );
567 $_replaceChar = '_';
568 }
569
570
571 if ($search == NULL) {
572 $search = $_searchChars;
573 }
574
575 if ($replace == NULL) {
576 $replace = $_replaceChar;
577 }
578
579 return str_replace($search, $replace, $string);
580 }
581
582
583 /**
584 * Use HTMLPurifier to clean up a text string and remove any potential
585 * xss attacks. This is primarily used in public facing pages which
586 * accept html as the input string
587 *
588 * @param string $string the input string
589 *
590 * @return string the cleaned up string
591 * @public
592 * @static
593 */
594 static function purifyHTML($string) {
595 static $_filter = null;
596 if (!$_filter) {
597 $config = HTMLPurifier_Config::createDefault();
598 $config->set('Core.Encoding', 'UTF-8');
599
600 // Disable the cache entirely
601 $config->set('Cache.DefinitionImpl', null);
602
603 $_filter = new HTMLPurifier($config);
604 }
605
606 return $_filter->purify($string);
607 }
608
609 /**
610 * Truncate $string; if $string exceeds $maxLen, place "..." at the end
611 *
612 * @param string $string
613 * @param int $maxLen
614 *
615 * @return string
616 */
617 static function ellipsify($string, $maxLen) {
618 $len = strlen($string);
619 if ($len <= $maxLen) {
620 return $string;
621 }
622 else {
623 return substr($string, 0, $maxLen-3) . '...';
624 }
625 }
626
627 /**
628 * Generate a random string
629 *
630 * @param $len
631 * @param $alphabet
632 * @return string
633 */
634 public static function createRandom($len, $alphabet) {
635 $alphabetSize = strlen($alphabet);
636 $result = '';
637 for ($i = 0; $i < $len; $i++) {
638 $result .= $alphabet{rand(1, $alphabetSize) - 1};
639 }
640 return $result;
641 }
642
643 /**
644 * Examples:
645 * "admin foo" => array(NULL,"admin foo")
646 * "cms:admin foo" => array("cms", "admin foo")
647 *
648 * @param $delim
649 * @param string $string e.g. "view all contacts". Syntax: "[prefix:]name"
650 * @param null $defaultPrefix
651 *
652 * @return array (0 => string|NULL $prefix, 1 => string $value)
653 */
654 public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
655 $pos = strpos($string, $delim);
656 if ($pos === FALSE) {
657 return array($defaultPrefix, $string);
658 }
659 else {
660 return array(substr($string, 0, $pos), substr($string, 1+$pos));
661 }
662 }
663
664 /**
665 * this function will mask part of the the user portion of an Email address (everything before the @)
666 *
667 * @param string $email the email address to be masked
668 * @param string $maskChar the character used for masking
669 * @param integer $percent the percentage of the user portion to be masked
670 *
671 * @return string returns the masked Email address
672 */
673 public static function maskEmail($email, $maskChar= '*', $percent=50) {
674 list($user, $domain) = preg_split("/@/", $email);
675 $len = strlen($user);
676 $maskCount = floor($len * $percent /100);
677 $offset = floor(($len - $maskCount) / 2);
678
679 $masked = substr($user, 0, $offset)
680 .str_repeat($maskChar, $maskCount)
681 .substr($user, $maskCount + $offset);
682
683 return($masked.'@'.$domain);
684 }
685
686 /**
687 * this function compares two strings
688 *
689 * @param string $strOne string one
690 * @param string $strTwo string two
691 * @param boolean $case boolean indicating whether you want the comparison to be case sensitive or not
692 *
693 * @return boolean TRUE (string are identical); FALSE (strings are not identical)
694 */
695 public static function compareStr($strOne, $strTwo, $case) {
696 if ($case == TRUE) {
697 // Convert to lowercase and trim white spaces
698 if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
699 // yes - they are identical
700 return TRUE;
701 }
702 else {
703 // not identical
704 return FALSE;
705 }
706 }
707 if ($case == FALSE) {
708 // Trim white spaces
709 if (trim($strOne) == trim($strTwo)) {
710 // yes - they are identical
711 return TRUE;
712 }
713 else {
714 // not identical
715 return FALSE;
716 }
717 }
718 }
719
720 }
721