INFRA-132 - CRM/Utils - Convert single-line @param to multi-line
[civicrm-core.git] / CRM / Utils / String.php
1 <?php
2 /*
3 +--------------------------------------------------------------------+
4 | CiviCRM version 4.6 |
5 +--------------------------------------------------------------------+
6 | Copyright CiviCRM LLC (c) 2004-2014 |
7 +--------------------------------------------------------------------+
8 | This file is a part of CiviCRM. |
9 | |
10 | CiviCRM is free software; you can copy, modify, and distribute it |
11 | under the terms of the GNU Affero General Public License |
12 | Version 3, 19 November 2007 and the CiviCRM Licensing Exception. |
13 | |
14 | CiviCRM is distributed in the hope that it will be useful, but |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
17 | See the GNU Affero General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU Affero General Public |
20 | License and the CiviCRM Licensing Exception along |
21 | with this program; if not, contact CiviCRM LLC |
22 | at info[AT]civicrm[DOT]org. If you have questions about the |
23 | GNU Affero General Public License or the licensing of CiviCRM, |
24 | see the CiviCRM license FAQ at http://civicrm.org/licensing |
25 +--------------------------------------------------------------------+
26 */
27
28 /**
29 *
30 * @package CRM
31 * @copyright CiviCRM LLC (c) 2004-2014
32 * $Id$
33 *
34 */
35
36 require_once 'HTML/QuickForm/Rule/Email.php';
37
38 /**
39 * This class contains string functions
40 *
41 */
42 class CRM_Utils_String {
43 const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
44
45 /**
46 * List of all letters and numbers
47 */
48 const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
49
50 /**
51 * Convert a display name into a potential variable
52 * name that we could use in forms/code
53 *
54 * @param name Name of the string
55 *
56 * @param int $maxLength
57 *
58 * @return string An equivalent variable name
59 *
60 *
61 * @return string (or null)
62 * @static
63 */
64 public static function titleToVar($title, $maxLength = 31) {
65 $variable = self::munge($title, '_', $maxLength);
66
67 if (CRM_Utils_Rule::title($variable, $maxLength)) {
68 return $variable;
69 }
70
71 // if longer than the maxLength lets just return a substr of the
72 // md5 to prevent errors downstream
73 return substr(md5($title), 0, $maxLength);
74 }
75
76 /**
77 * Given a string, replace all non alpha numeric characters and
78 * spaces with the replacement character
79 *
80 * @param string $name
81 * The name to be worked on.
82 * @param string $char
83 * The character to use for non-valid chars.
84 * @param int $len
85 * Length of valid variables.
86 *
87 *
88 * @return string returns the manipulated string
89 * @static
90 */
91 public static function munge($name, $char = '_', $len = 63) {
92 // replace all white space and non-alpha numeric with $char
93 // we only use the ascii character set since mysql does not create table names / field names otherwise
94 // CRM-11744
95 $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
96
97 if ($len) {
98 // lets keep variable names short
99 return substr($name, 0, $len);
100 }
101 else {
102 return $name;
103 }
104 }
105
106 /**
107 * Convert possibly underscore separated words to camel case with special handling for 'UF'
108 * e.g
109 * membership_payment returns MembershipPayment
110 * @param string $string
111 *
112 * @return string string
113 */
114 public static function convertStringToCamel($string) {
115 $fragments = explode('_', $string);
116 foreach ($fragments as & $fragment) {
117 $fragment = ucfirst($fragment);
118 }
119 // Special case: UFGroup, UFJoin, UFMatch, UFField
120 if ($fragments[0] === 'Uf') {
121 $fragments[0] = 'UF';
122 }
123 return implode('', $fragments);
124 }
125
126 /**
127 *
128 * Takes a variable name and munges it randomly into another variable name
129 *
130 * @param string $name
131 * Initial Variable Name.
132 * @param int $len
133 * Length of valid variables.
134 *
135 * @return string Randomized Variable Name
136 * @static
137 */
138 public static function rename($name, $len = 4) {
139 $rand = substr(uniqid(), 0, $len);
140 return substr_replace($name, $rand, -$len, $len);
141 }
142
143 /**
144 * Takes a string and returns the last tuple of the string.
145 * useful while converting file names to class names etc
146 *
147 * @param string $string
148 * The input string.
149 * @param \char|string $char $char the character used to demarcate the componets
150 *
151 *
152 * @return string the last component
153 * @static
154 */
155 public static function getClassName($string, $char = '_') {
156 $names = array();
157 if (!is_array($string)) {
158 $names = explode($char, $string);
159 }
160 if (!empty($names)) {
161 return array_pop($names);
162 }
163 }
164
165 /**
166 * Appends a name to a string and seperated by delimiter.
167 * does the right thing for an empty string
168 *
169 * @param string $str
170 * The string to be appended to.
171 * @param string $delim
172 * The delimiter to use.
173 * @param mixed $name
174 * The string (or array of strings) to append.
175 *
176 * @return void
177 * @static
178 */
179 public static function append(&$str, $delim, $name) {
180 if (empty($name)) {
181 return;
182 }
183
184 if (is_array($name)) {
185 foreach ($name as $n) {
186 if (empty($n)) {
187 continue;
188 }
189 if (empty($str)) {
190 $str = $n;
191 }
192 else {
193 $str .= $delim . $n;
194 }
195 }
196 }
197 else {
198 if (empty($str)) {
199 $str = $name;
200 }
201 else {
202 $str .= $delim . $name;
203 }
204 }
205 }
206
207 /**
208 * Determine if the string is composed only of ascii characters
209 *
210 * @param string $str
211 * Input string.
212 * @param bool $utf8
213 * Attempt utf8 match on failure (default yes).
214 *
215 * @return boolean true if string is ascii
216 * @static
217 */
218 public static function isAscii($str, $utf8 = TRUE) {
219 if (!function_exists('mb_detect_encoding')) {
220 // eliminate all white space from the string
221 $str = preg_replace('/\s+/', '', $str);
222 // FIXME: This is a pretty brutal hack to make utf8 and 8859-1 work.
223
224 /* match low- or high-ascii characters */
225 if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
226 // || // low ascii characters
227 // high ascii characters
228 // preg_match( '/[\x7F-\xFF]/', $str ) ) {
229 if ($utf8) {
230 /* if we did match, try for utf-8, or iso8859-1 */
231
232 return self::isUtf8($str);
233 }
234 else {
235 return FALSE;
236 }
237 }
238 return TRUE;
239 }
240 else {
241 $order = array('ASCII');
242 if ($utf8) {
243 $order[] = 'UTF-8';
244 }
245 $enc = mb_detect_encoding($str, $order, TRUE);
246 return ($enc == 'ASCII' || $enc == 'UTF-8');
247 }
248 }
249
250 /**
251 * Determine the string replacements for redaction
252 * on the basis of the regular expressions
253 *
254 * @param string $str
255 * Input string.
256 * @param array $regexRules
257 * Regular expression to be matched w/ replacements.
258 *
259 * @return array $match array of strings w/ corresponding redacted outputs
260 * @static
261 */
262 public static function regex($str, $regexRules) {
263 //redact the regular expressions
264 if (!empty($regexRules) && isset($str)) {
265 static $matches, $totalMatches, $match = array();
266 foreach ($regexRules as $pattern => $replacement) {
267 preg_match_all($pattern, $str, $matches);
268 if (!empty($matches[0])) {
269 if (empty($totalMatches)) {
270 $totalMatches = $matches[0];
271 }
272 else {
273 $totalMatches = array_merge($totalMatches, $matches[0]);
274 }
275 $match = array_flip($totalMatches);
276 }
277 }
278 }
279
280 if (!empty($match)) {
281 foreach ($match as $matchKey => & $dontCare) {
282 foreach ($regexRules as $pattern => $replacement) {
283 if (preg_match($pattern, $matchKey)) {
284 $dontCare = $replacement . substr(md5($matchKey), 0, 5);
285 break;
286 }
287 }
288 }
289 return $match;
290 }
291 return CRM_Core_DAO::$_nullArray;
292 }
293
294 /**
295 * @param $str
296 * @param $stringRules
297 *
298 * @return mixed
299 */
300 public static function redaction($str, $stringRules) {
301 //redact the strings
302 if (!empty($stringRules)) {
303 foreach ($stringRules as $match => $replace) {
304 $str = str_ireplace($match, $replace, $str);
305 }
306 }
307
308 //return the redacted output
309 return $str;
310 }
311
312 /**
313 * Determine if a string is composed only of utf8 characters
314 *
315 * @param string $str
316 * Input string.
317 * @static
318 *
319 * @return boolean
320 */
321 public static function isUtf8($str) {
322 if (!function_exists(mb_detect_encoding)) {
323 // eliminate all white space from the string
324 $str = preg_replace('/\s+/', '', $str);
325
326 /* pattern stolen from the php.net function documentation for
327 * utf8decode();
328 * comment by JF Sebastian, 30-Mar-2005
329 */
330
331 return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
332 // ||
333 // iconv('ISO-8859-1', 'UTF-8', $str);
334 }
335 else {
336 $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
337 return ($enc !== FALSE);
338 }
339 }
340
341 /**
342 * Determine if two href's are equivalent (fuzzy match)
343 *
344 * @param string $url1
345 * The first url to be matched.
346 * @param string $url2
347 * The second url to be matched against.
348 *
349 * @return boolean true if the urls match, else false
350 * @static
351 */
352 public static function match($url1, $url2) {
353 $url1 = strtolower($url1);
354 $url2 = strtolower($url2);
355
356 $url1Str = parse_url($url1);
357 $url2Str = parse_url($url2);
358
359 if ($url1Str['path'] == $url2Str['path'] &&
360 self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
361 ) {
362 return TRUE;
363 }
364 return FALSE;
365 }
366
367 /**
368 * Extract variable values
369 *
370 * @param mix $query
371 * This is basically url.
372 *
373 * @return mix $v returns civicrm url (eg: civicrm/contact/search/...)
374 * @static
375 */
376 public static function extractURLVarValue($query) {
377 $config = CRM_Core_Config::singleton();
378 $urlVar = $config->userFrameworkURLVar;
379
380 $params = explode('&', $query);
381 foreach ($params as $p) {
382 if (strpos($p, '=')) {
383 list($k, $v) = explode('=', $p);
384 if ($k == $urlVar) {
385 return $v;
386 }
387 }
388 }
389 return NULL;
390 }
391
392 /**
393 * Translate a true/false/yes/no string to a 0 or 1 value
394 *
395 * @param string $str
396 * The string to be translated.
397 *
398 * @return boolean
399 * @static
400 */
401 public static function strtobool($str) {
402 if (!is_scalar($str)) {
403 return FALSE;
404 }
405
406 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
407 return TRUE;
408 }
409 return FALSE;
410 }
411
412 /**
413 * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
414 *
415 * @param string $str
416 * The string to be translated.
417 *
418 * @return boolean
419 * @static
420 */
421 public static function strtoboolstr($str) {
422 if (!is_scalar($str)) {
423 return FALSE;
424 }
425
426 if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
427 return '1';
428 }
429 elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
430 return '0';
431 }
432 else {
433 return FALSE;
434 }
435 }
436
437 /**
438 * Convert a HTML string into a text one using html2text
439 *
440 * @param string $html
441 * The string to be converted.
442 *
443 * @return string the converted string
444 * @static
445 */
446 public static function htmlToText($html) {
447 require_once 'packages/html2text/rcube_html2text.php';
448 $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
449 $converter = new rcube_html2text($token_html);
450 $token_text = $converter->get_text();
451 $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
452 return $text;
453 }
454
455 /**
456 * @param $string
457 * @param array $params
458 */
459 public static function extractName($string, &$params) {
460 $name = trim($string);
461 if (empty($name)) {
462 return;
463 }
464
465 // strip out quotes
466 $name = str_replace('"', '', $name);
467 $name = str_replace('\'', '', $name);
468
469 // check for comma in name
470 if (strpos($name, ',') !== FALSE) {
471
472 // name has a comma - assume lname, fname [mname]
473 $names = explode(',', $name);
474 if (count($names) > 1) {
475 $params['last_name'] = trim($names[0]);
476
477 // check for space delim
478 $fnames = explode(' ', trim($names[1]));
479 if (count($fnames) > 1) {
480 $params['first_name'] = trim($fnames[0]);
481 $params['middle_name'] = trim($fnames[1]);
482 }
483 else {
484 $params['first_name'] = trim($fnames[0]);
485 }
486 }
487 else {
488 $params['first_name'] = trim($names[0]);
489 }
490 }
491 else {
492 // name has no comma - assume fname [mname] fname
493 $names = explode(' ', $name);
494 if (count($names) == 1) {
495 $params['first_name'] = $names[0];
496 }
497 elseif (count($names) == 2) {
498 $params['first_name'] = $names[0];
499 $params['last_name'] = $names[1];
500 }
501 else {
502 $params['first_name'] = $names[0];
503 $params['middle_name'] = $names[1];
504 $params['last_name'] = $names[2];
505 }
506 }
507 }
508
509 /**
510 * @param $string
511 *
512 * @return array
513 */
514 public static function &makeArray($string) {
515 $string = trim($string);
516
517 $values = explode("\n", $string);
518 $result = array();
519 foreach ($values as $value) {
520 list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
521 if (!empty($v)) {
522 $result[trim($n)] = trim($v);
523 }
524 }
525 return $result;
526 }
527
528 /**
529 * Given an ezComponents-parsed representation of
530 * a text with alternatives return only the first one
531 *
532 * @param string $full
533 * All alternatives as a long string (or some other text).
534 *
535 * @return string only the first alternative found (or the text without alternatives)
536 */
537 public static function stripAlternatives($full) {
538 $matches = array();
539 preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
540
541 if (isset($matches[1]) &&
542 trim(strip_tags($matches[1])) != ''
543 ) {
544 return $matches[1];
545 }
546 else {
547 return $full;
548 }
549 }
550
551 /**
552 * Strip leading, trailing, double spaces from string
553 * used for postal/greeting/addressee
554 *
555 * @param string $string
556 * Input string to be cleaned.
557 *
558 * @return string the cleaned string
559 * @static
560 */
561 public static function stripSpaces($string) {
562 return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
563 }
564
565 /**
566 * This function is used to clean the URL 'path' variable that we use
567 * to construct CiviCRM urls by removing characters from the path variable
568 *
569 * @param string $string
570 * The input string to be sanitized.
571 * @param array $search
572 * The characters to be sanitized.
573 * @param string $replace
574 * The character to replace it with.
575 *
576 * @return string the sanitized string
577 * @static
578 */
579 static function stripPathChars($string,
580 $search = NULL,
581 $replace = NULL
582 ) {
583 static $_searchChars = NULL;
584 static $_replaceChar = NULL;
585
586 if (empty($string)) {
587 return $string;
588 }
589
590 if ($_searchChars == NULL) {
591 $_searchChars = array(
592 '&', ';', ',', '=', '$',
593 '"', "'", '\\',
594 '<', '>', '(', ')',
595 ' ', "\r", "\r\n", "\n", "\t",
596 );
597 $_replaceChar = '_';
598 }
599
600
601 if ($search == NULL) {
602 $search = $_searchChars;
603 }
604
605 if ($replace == NULL) {
606 $replace = $_replaceChar;
607 }
608
609 return str_replace($search, $replace, $string);
610 }
611
612
613 /**
614 * Use HTMLPurifier to clean up a text string and remove any potential
615 * xss attacks. This is primarily used in public facing pages which
616 * accept html as the input string
617 *
618 * @param string $string
619 * The input string.
620 *
621 * @return string the cleaned up string
622 * @static
623 */
624 public static function purifyHTML($string) {
625 static $_filter = null;
626 if (!$_filter) {
627 $config = HTMLPurifier_Config::createDefault();
628 $config->set('Core.Encoding', 'UTF-8');
629
630 // Disable the cache entirely
631 $config->set('Cache.DefinitionImpl', null);
632
633 $_filter = new HTMLPurifier($config);
634 }
635
636 return $_filter->purify($string);
637 }
638
639 /**
640 * Truncate $string; if $string exceeds $maxLen, place "..." at the end
641 *
642 * @param string $string
643 * @param int $maxLen
644 *
645 * @return string
646 */
647 public static function ellipsify($string, $maxLen) {
648 $len = strlen($string);
649 if ($len <= $maxLen) {
650 return $string;
651 }
652 else {
653 return substr($string, 0, $maxLen-3) . '...';
654 }
655 }
656
657 /**
658 * Generate a random string
659 *
660 * @param $len
661 * @param $alphabet
662 * @return string
663 */
664 public static function createRandom($len, $alphabet) {
665 $alphabetSize = strlen($alphabet);
666 $result = '';
667 for ($i = 0; $i < $len; $i++) {
668 $result .= $alphabet{rand(1, $alphabetSize) - 1};
669 }
670 return $result;
671 }
672
673 /**
674 * Examples:
675 * "admin foo" => array(NULL,"admin foo")
676 * "cms:admin foo" => array("cms", "admin foo")
677 *
678 * @param $delim
679 * @param string $string
680 * E.g. "view all contacts". Syntax: "[prefix:]name".
681 * @param null $defaultPrefix
682 *
683 * @return array (0 => string|NULL $prefix, 1 => string $value)
684 */
685 public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
686 $pos = strpos($string, $delim);
687 if ($pos === FALSE) {
688 return array($defaultPrefix, $string);
689 }
690 else {
691 return array(substr($string, 0, $pos), substr($string, 1+$pos));
692 }
693 }
694
695 /**
696 * This function will mask part of the the user portion of an Email address (everything before the @)
697 *
698 * @param string $email
699 * The email address to be masked.
700 * @param string $maskChar
701 * The character used for masking.
702 * @param int $percent
703 * The percentage of the user portion to be masked.
704 *
705 * @return string returns the masked Email address
706 */
707 public static function maskEmail($email, $maskChar= '*', $percent=50) {
708 list($user, $domain) = preg_split("/@/", $email);
709 $len = strlen($user);
710 $maskCount = floor($len * $percent /100);
711 $offset = floor(($len - $maskCount) / 2);
712
713 $masked = substr($user, 0, $offset)
714 .str_repeat($maskChar, $maskCount)
715 .substr($user, $maskCount + $offset);
716
717 return($masked.'@'.$domain);
718 }
719
720 /**
721 * This function compares two strings
722 *
723 * @param string $strOne
724 * String one.
725 * @param string $strTwo
726 * String two.
727 * @param bool $case
728 * Boolean indicating whether you want the comparison to be case sensitive or not.
729 *
730 * @return boolean TRUE (string are identical); FALSE (strings are not identical)
731 */
732 public static function compareStr($strOne, $strTwo, $case) {
733 if ($case == TRUE) {
734 // Convert to lowercase and trim white spaces
735 if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
736 // yes - they are identical
737 return TRUE;
738 }
739 else {
740 // not identical
741 return FALSE;
742 }
743 }
744 if ($case == FALSE) {
745 // Trim white spaces
746 if (trim($strOne) == trim($strTwo)) {
747 // yes - they are identical
748 return TRUE;
749 }
750 else {
751 // not identical
752 return FALSE;
753 }
754 }
755 }
756
757 /**
758 * Many parts of the codebase have a convention of internally passing around
759 * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
760 * (because most other odd characters are %-escaped in URLs; and %-escaped
761 * strings don't need any extra escaping in HTML).
762 *
763 * @param string $url
764 * URL with HTML entities.
765 * @return string URL without HTML entities
766 */
767 public static function unstupifyUrl($htmlUrl) {
768 return str_replace('&amp;', '&', $htmlUrl);
769 }
770 }