CRM/Utils/String.php

   1 <?php
   2 /*
   3  +--------------------------------------------------------------------+
   4  | CiviCRM version 4.6                                                |
   5  +--------------------------------------------------------------------+
   6  | Copyright CiviCRM LLC (c) 2004-2014                                |
   7  +--------------------------------------------------------------------+
   8  | This file is a part of CiviCRM.                                    |
   9  |                                                                    |
  10  | CiviCRM is free software; you can copy, modify, and distribute it  |
  11  | under the terms of the GNU Affero General Public License           |
  12  | Version 3, 19 November 2007 and the CiviCRM Licensing Exception.   |
  13  |                                                                    |
  14  | CiviCRM is distributed in the hope that it will be useful, but     |
  15  | WITHOUT ANY WARRANTY; without even the implied warranty of         |
  16  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.               |
  17  | See the GNU Affero General Public License for more details.        |
  18  |                                                                    |
  19  | You should have received a copy of the GNU Affero General Public   |
  20  | License and the CiviCRM Licensing Exception along                  |
  21  | with this program; if not, contact CiviCRM LLC                     |
  22  | at info[AT]civicrm[DOT]org. If you have questions about the        |
  23  | GNU Affero General Public License or the licensing of CiviCRM,     |
  24  | see the CiviCRM license FAQ at http://civicrm.org/licensing        |
  25  +--------------------------------------------------------------------+
  26 */
  27
  28 /**
  29  *
  30  * @package CRM
  31  * @copyright CiviCRM LLC (c) 2004-2014
  32  * $Id$
  33  *
  34  */
  35
  36 require_once 'HTML/QuickForm/Rule/Email.php';
  37
  38 /**
  39  * This class contains string functions
  40  *
  41  */
  42 class CRM_Utils_String {
  43   const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
  44
  45   /**
  46    * List of all letters and numbers
  47    */
  48   const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
  49
  50   /**
  51    * Convert a display name into a potential variable
  52    * name that we could use in forms/code
  53    *
  54    * @param  name    Name of the string
  55    *
  56    * @param int $maxLength
  57    *
  58    * @return string  An equivalent variable name
  59    *
  60    * @access public
  61    *
  62    * @return string (or null)
  63    * @static
  64    */
  65   static function titleToVar($title, $maxLength = 31) {
  66     $variable = self::munge($title, '_', $maxLength);
  67
  68     if (CRM_Utils_Rule::title($variable, $maxLength)) {
  69       return $variable;
  70     }
  71
  72     // if longer than the maxLength lets just return a substr of the
  73     // md5 to prevent errors downstream
  74     return substr(md5($title), 0, $maxLength);
  75   }
  76
  77   /**
  78    * Given a string, replace all non alpha numeric characters and
  79    * spaces with the replacement character
  80    *
  81    * @param string $name the name to be worked on
  82    * @param string $char the character to use for non-valid chars
  83    * @param int    $len  length of valid variables
  84    *
  85    * @access public
  86    *
  87    * @return string returns the manipulated string
  88    * @static
  89    */
  90   static function munge($name, $char = '_', $len = 63) {
  91     // replace all white space and non-alpha numeric with $char
  92     // we only use the ascii character set since mysql does not create table names / field names otherwise
  93     // CRM-11744
  94     $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
  95
  96     if ($len) {
  97       // lets keep variable names short
  98       return substr($name, 0, $len);
  99     }
 100     else {
 101       return $name;
 102     }
 103   }
 104
 105   /**
 106    * Convert possibly underscore separated words to camel case with special handling for 'UF'
 107    * e.g
 108    * membership_payment returns MembershipPayment
 109    * @param string $string
 110    *
 111    * @return string string
 112    */
 113   static function convertStringToCamel($string) {
 114     $fragments = explode('_', $string);
 115     foreach ($fragments as & $fragment) {
 116       $fragment = ucfirst($fragment);
 117     }
 118     // Special case: UFGroup, UFJoin, UFMatch, UFField
 119     if ($fragments[0] === 'Uf') {
 120       $fragments[0] = 'UF';
 121     }
 122     return implode('', $fragments);
 123   }
 124
 125   /**
 126    *
 127    * Takes a variable name and munges it randomly into another variable name
 128    *
 129    * @param  string $name    Initial Variable Name
 130    * @param int     $len  length of valid variables
 131    *
 132    * @return string  Randomized Variable Name
 133    * @access public
 134    * @static
 135    */
 136   static function rename($name, $len = 4) {
 137     $rand = substr(uniqid(), 0, $len);
 138     return substr_replace($name, $rand, -$len, $len);
 139   }
 140
 141   /**
 142    * Takes a string and returns the last tuple of the string.
 143    * useful while converting file names to class names etc
 144    *
 145    * @param string $string the input string
 146    * @param \char|string $char $char   the character used to demarcate the componets
 147    *
 148    * @access public
 149    *
 150    * @return string the last component
 151    * @static
 152    */
 153   static function getClassName($string, $char = '_') {
 154     $names = array();
 155     if (!is_array($string)) {
 156       $names = explode($char, $string);
 157     }
 158     if (!empty($names)) {
 159       return array_pop($names);
 160     }
 161   }
 162
 163   /**
 164    * Appends a name to a string and seperated by delimiter.
 165    * does the right thing for an empty string
 166    *
 167    * @param string $str   the string to be appended to
 168    * @param string $delim the delimiter to use
 169    * @param mixed  $name  the string (or array of strings) to append
 170    *
 171    * @return void
 172    * @access public
 173    * @static
 174    */
 175   static function append(&$str, $delim, $name) {
 176     if (empty($name)) {
 177       return;
 178     }
 179
 180     if (is_array($name)) {
 181       foreach ($name as $n) {
 182         if (empty($n)) {
 183           continue;
 184         }
 185         if (empty($str)) {
 186           $str = $n;
 187         }
 188         else {
 189           $str .= $delim . $n;
 190         }
 191       }
 192     }
 193     else {
 194       if (empty($str)) {
 195         $str = $name;
 196       }
 197       else {
 198         $str .= $delim . $name;
 199       }
 200     }
 201   }
 202
 203   /**
 204    * Determine if the string is composed only of ascii characters
 205    *
 206    * @param string  $str input string
 207    * @param boolean $utf8 attempt utf8 match on failure (default yes)
 208    *
 209    * @return boolean    true if string is ascii
 210    * @access public
 211    * @static
 212    */
 213   static function isAscii($str, $utf8 = TRUE) {
 214     if (!function_exists('mb_detect_encoding')) {
 215       // eliminate all white space from the string
 216       $str = preg_replace('/\s+/', '', $str);
 217       // FIXME:  This is a pretty brutal hack to make utf8 and 8859-1 work.
 218
 219       /* match low- or high-ascii characters */
 220       if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
 221         // || // low ascii characters
 222         // high ascii characters
 223         //  preg_match( '/[\x7F-\xFF]/', $str ) ) {
 224         if ($utf8) {
 225           /* if we did match, try for utf-8, or iso8859-1 */
 226
 227           return self::isUtf8($str);
 228         }
 229         else {
 230           return FALSE;
 231         }
 232       }
 233       return TRUE;
 234     }
 235     else {
 236       $order = array('ASCII');
 237       if ($utf8) {
 238         $order[] = 'UTF-8';
 239       }
 240       $enc = mb_detect_encoding($str, $order, TRUE);
 241       return ($enc == 'ASCII' || $enc == 'UTF-8');
 242     }
 243   }
 244
 245   /**
 246    * Determine the string replacements for redaction
 247    * on the basis of the regular expressions
 248    *
 249    * @param string $str        input string
 250    * @param array  $regexRules regular expression to be matched w/ replacements
 251    *
 252    * @return array $match      array of strings w/ corresponding redacted outputs
 253    * @access public
 254    * @static
 255    */
 256   static function regex($str, $regexRules) {
 257     //redact the regular expressions
 258     if (!empty($regexRules) && isset($str)) {
 259       static $matches, $totalMatches, $match = array();
 260       foreach ($regexRules as $pattern => $replacement) {
 261         preg_match_all($pattern, $str, $matches);
 262         if (!empty($matches[0])) {
 263           if (empty($totalMatches)) {
 264             $totalMatches = $matches[0];
 265           }
 266           else {
 267             $totalMatches = array_merge($totalMatches, $matches[0]);
 268           }
 269           $match = array_flip($totalMatches);
 270         }
 271       }
 272     }
 273
 274     if (!empty($match)) {
 275       foreach ($match as $matchKey => & $dontCare) {
 276         foreach ($regexRules as $pattern => $replacement) {
 277           if (preg_match($pattern, $matchKey)) {
 278             $dontCare = $replacement . substr(md5($matchKey), 0, 5);
 279             break;
 280           }
 281         }
 282       }
 283       return $match;
 284     }
 285     return CRM_Core_DAO::$_nullArray;
 286   }
 287
 288   /**
 289    * @param $str
 290    * @param $stringRules
 291    *
 292    * @return mixed
 293    */
 294   static function redaction($str, $stringRules) {
 295     //redact the strings
 296     if (!empty($stringRules)) {
 297       foreach ($stringRules as $match => $replace) {
 298         $str = str_ireplace($match, $replace, $str);
 299       }
 300     }
 301
 302     //return the redacted output
 303     return $str;
 304   }
 305
 306   /**
 307    * Determine if a string is composed only of utf8 characters
 308    *
 309    * @param string $str  input string
 310    * @access public
 311    * @static
 312    *
 313    * @return boolean
 314    */
 315   static function isUtf8($str) {
 316     if (!function_exists(mb_detect_encoding)) {
 317       // eliminate all white space from the string
 318       $str = preg_replace('/\s+/', '', $str);
 319
 320       /* pattern stolen from the php.net function documentation for
 321              * utf8decode();
 322              * comment by JF Sebastian, 30-Mar-2005
 323              */
 324
 325       return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
 326       // ||
 327       // iconv('ISO-8859-1', 'UTF-8', $str);
 328     }
 329     else {
 330       $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
 331       return ($enc !== FALSE);
 332     }
 333   }
 334
 335   /**
 336    * Determine if two href's are equivalent (fuzzy match)
 337    *
 338    * @param string $url1 the first url to be matched
 339    * @param string $url2 the second url to be matched against
 340    *
 341    * @return boolean true if the urls match, else false
 342    * @access public
 343    * @static
 344    */
 345   static function match($url1, $url2) {
 346     $url1 = strtolower($url1);
 347     $url2 = strtolower($url2);
 348
 349     $url1Str = parse_url($url1);
 350     $url2Str = parse_url($url2);
 351
 352     if ($url1Str['path'] == $url2Str['path'] &&
 353       self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
 354     ) {
 355       return TRUE;
 356     }
 357     return FALSE;
 358   }
 359
 360   /**
 361    * Extract variable values
 362    *
 363    * @param  mix $query this is basically url
 364    *
 365    * @return mix $v  returns civicrm url (eg: civicrm/contact/search/...)
 366    * @access public
 367    * @static
 368    */
 369   static function extractURLVarValue($query) {
 370     $config = CRM_Core_Config::singleton();
 371     $urlVar = $config->userFrameworkURLVar;
 372
 373     $params = explode('&', $query);
 374     foreach ($params as $p) {
 375       if (strpos($p, '=')) {
 376         list($k, $v) = explode('=', $p);
 377         if ($k == $urlVar) {
 378           return $v;
 379         }
 380       }
 381     }
 382     return NULL;
 383   }
 384
 385   /**
 386    * Translate a true/false/yes/no string to a 0 or 1 value
 387    *
 388    * @param string $str  the string to be translated
 389    *
 390    * @return boolean
 391    * @access public
 392    * @static
 393    */
 394   static function strtobool($str) {
 395     if (!is_scalar($str)) {
 396       return FALSE;
 397     }
 398
 399     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 400       return TRUE;
 401     }
 402     return FALSE;
 403   }
 404
 405   /**
 406    * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
 407    *
 408    * @param string $str  the string to be translated
 409    *
 410    * @return boolean
 411    * @access public
 412    * @static
 413    */
 414   static function strtoboolstr($str) {
 415     if (!is_scalar($str)) {
 416       return FALSE;
 417     }
 418
 419     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 420       return '1';
 421     }
 422     elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
 423       return '0';
 424     }
 425     else {
 426       return FALSE;
 427     }
 428   }
 429
 430   /**
 431    * Convert a HTML string into a text one using html2text
 432    *
 433    * @param string $html  the string to be converted
 434    *
 435    * @return string       the converted string
 436    * @access public
 437    * @static
 438    */
 439   static function htmlToText($html) {
 440     require_once 'packages/html2text/rcube_html2text.php';
 441     $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
 442     $converter = new rcube_html2text($token_html);
 443     $token_text = $converter->get_text();
 444     $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
 445     return $text;
 446   }
 447
 448   /**
 449    * @param $string
 450    * @param array $params
 451    */
 452   static function extractName($string, &$params) {
 453     $name = trim($string);
 454     if (empty($name)) {
 455       return;
 456     }
 457
 458     // strip out quotes
 459     $name = str_replace('"', '', $name);
 460     $name = str_replace('\'', '', $name);
 461
 462     // check for comma in name
 463     if (strpos($name, ',') !== FALSE) {
 464
 465       // name has a comma - assume lname, fname [mname]
 466       $names = explode(',', $name);
 467       if (count($names) > 1) {
 468         $params['last_name'] = trim($names[0]);
 469
 470         // check for space delim
 471         $fnames = explode(' ', trim($names[1]));
 472         if (count($fnames) > 1) {
 473           $params['first_name'] = trim($fnames[0]);
 474           $params['middle_name'] = trim($fnames[1]);
 475         }
 476         else {
 477           $params['first_name'] = trim($fnames[0]);
 478         }
 479       }
 480       else {
 481         $params['first_name'] = trim($names[0]);
 482       }
 483     }
 484     else {
 485       // name has no comma - assume fname [mname] fname
 486       $names = explode(' ', $name);
 487       if (count($names) == 1) {
 488         $params['first_name'] = $names[0];
 489       }
 490       elseif (count($names) == 2) {
 491         $params['first_name'] = $names[0];
 492         $params['last_name'] = $names[1];
 493       }
 494       else {
 495         $params['first_name'] = $names[0];
 496         $params['middle_name'] = $names[1];
 497         $params['last_name'] = $names[2];
 498       }
 499     }
 500   }
 501
 502   /**
 503    * @param $string
 504    *
 505    * @return array
 506    */
 507   static function &makeArray($string) {
 508     $string = trim($string);
 509
 510     $values = explode("\n", $string);
 511     $result = array();
 512     foreach ($values as $value) {
 513       list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
 514       if (!empty($v)) {
 515         $result[trim($n)] = trim($v);
 516       }
 517     }
 518     return $result;
 519   }
 520
 521   /**
 522    * Given an ezComponents-parsed representation of
 523    * a text with alternatives return only the first one
 524    *
 525    * @param string $full  all alternatives as a long string (or some other text)
 526    *
 527    * @return string       only the first alternative found (or the text without alternatives)
 528    */
 529   static function stripAlternatives($full) {
 530     $matches = array();
 531     preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
 532
 533     if (isset($matches[1]) &&
 534       trim(strip_tags($matches[1])) != ''
 535     ) {
 536       return $matches[1];
 537     }
 538     else {
 539       return $full;
 540     }
 541   }
 542
 543   /**
 544    * Strip leading, trailing, double spaces from string
 545    * used for postal/greeting/addressee
 546    *
 547    * @param string  $string input string to be cleaned
 548    *
 549    * @return string the cleaned string
 550    * @access public
 551    * @static
 552    */
 553   static function stripSpaces($string) {
 554     return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
 555   }
 556
 557   /**
 558    * This function is used to clean the URL 'path' variable that we use
 559    * to construct CiviCRM urls by removing characters from the path variable
 560    *
 561    * @param string $string  the input string to be sanitized
 562    * @param array  $search  the characters to be sanitized
 563    * @param string $replace the character to replace it with
 564    *
 565    * @return string the sanitized string
 566    * @access public
 567    * @static
 568    */
 569   static function stripPathChars($string,
 570     $search = NULL,
 571     $replace = NULL
 572   ) {
 573     static $_searchChars = NULL;
 574     static $_replaceChar = NULL;
 575
 576     if (empty($string)) {
 577       return $string;
 578     }
 579
 580     if ($_searchChars == NULL) {
 581       $_searchChars = array(
 582         '&', ';', ',', '=', '$',
 583         '"', "'", '\\',
 584         '<', '>', '(', ')',
 585         ' ', "\r", "\r\n", "\n", "\t",
 586       );
 587       $_replaceChar = '_';
 588     }
 589
 590
 591     if ($search == NULL) {
 592       $search = $_searchChars;
 593     }
 594
 595     if ($replace == NULL) {
 596       $replace = $_replaceChar;
 597     }
 598
 599     return str_replace($search, $replace, $string);
 600   }
 601
 602
 603   /**
 604    * Use HTMLPurifier to clean up a text string and remove any potential
 605    * xss attacks. This is primarily used in public facing pages which
 606    * accept html as the input string
 607    *
 608    * @param string $string the input string
 609    *
 610    * @return string the cleaned up string
 611    * @public
 612    * @static
 613    */
 614   static function purifyHTML($string) {
 615     static $_filter = null;
 616     if (!$_filter) {
 617       $config = HTMLPurifier_Config::createDefault();
 618       $config->set('Core.Encoding', 'UTF-8');
 619
 620       // Disable the cache entirely
 621       $config->set('Cache.DefinitionImpl', null);
 622
 623       $_filter = new HTMLPurifier($config);
 624     }
 625
 626     return $_filter->purify($string);
 627   }
 628
 629   /**
 630    * Truncate $string; if $string exceeds $maxLen, place "..." at the end
 631    *
 632    * @param string $string
 633    * @param int $maxLen
 634    *
 635    * @return string
 636    */
 637   static function ellipsify($string, $maxLen) {
 638     $len = strlen($string);
 639     if ($len <= $maxLen) {
 640       return $string;
 641     }
 642     else {
 643       return substr($string, 0, $maxLen-3) . '...';
 644     }
 645   }
 646
 647   /**
 648    * Generate a random string
 649    *
 650    * @param $len
 651    * @param $alphabet
 652    * @return string
 653    */
 654   public static function createRandom($len, $alphabet) {
 655     $alphabetSize = strlen($alphabet);
 656     $result = '';
 657     for ($i = 0; $i < $len; $i++) {
 658       $result .= $alphabet{rand(1, $alphabetSize) - 1};
 659     }
 660     return $result;
 661   }
 662
 663   /**
 664    * Examples:
 665    * "admin foo" => array(NULL,"admin foo")
 666    * "cms:admin foo" => array("cms", "admin foo")
 667    *
 668    * @param $delim
 669    * @param string $string e.g. "view all contacts". Syntax: "[prefix:]name"
 670    * @param null $defaultPrefix
 671    *
 672    * @return array (0 => string|NULL $prefix, 1 => string $value)
 673    */
 674   public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
 675     $pos = strpos($string, $delim);
 676     if ($pos === FALSE) {
 677       return array($defaultPrefix, $string);
 678     }
 679     else {
 680       return array(substr($string, 0, $pos), substr($string, 1+$pos));
 681     }
 682   }
 683
 684   /**
 685    * This function will mask part of the the user portion of an Email address (everything before the @)
 686    *
 687    * @param string $email the email address to be masked
 688    * @param string $maskChar the character used for masking
 689    * @param integer $percent the percentage of the user portion to be masked
 690    *
 691    * @return string returns the masked Email address
 692    */
 693   public static function maskEmail($email, $maskChar= '*', $percent=50) {
 694     list($user, $domain) = preg_split("/@/", $email);
 695     $len = strlen($user);
 696     $maskCount = floor($len * $percent /100);
 697     $offset = floor(($len - $maskCount) / 2);
 698
 699     $masked = substr($user, 0, $offset)
 700       .str_repeat($maskChar, $maskCount)
 701       .substr($user, $maskCount + $offset);
 702
 703     return($masked.'@'.$domain);
 704   }
 705
 706   /**
 707    * This function compares two strings
 708    *
 709    * @param string $strOne string one
 710    * @param string $strTwo string two
 711    * @param boolean $case boolean indicating whether you want the comparison to be case sensitive or not
 712    *
 713    * @return boolean TRUE (string are identical); FALSE (strings are not identical)
 714    */
 715   public static function compareStr($strOne, $strTwo, $case) {
 716     if ($case == TRUE) {
 717       // Convert to lowercase and trim white spaces
 718       if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
 719         // yes - they are identical
 720         return TRUE;
 721       }
 722       else {
 723         // not identical
 724         return FALSE;
 725       }
 726     }
 727     if ($case == FALSE) {
 728       // Trim white spaces
 729       if (trim($strOne) == trim($strTwo)) {
 730         // yes - they are identical
 731         return TRUE;
 732       }
 733       else {
 734         // not identical
 735         return FALSE;
 736       }
 737     }
 738   }
 739
 740   /**
 741    * Many parts of the codebase have a convention of internally passing around
 742    * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
 743    * (because most other odd characters are %-escaped in URLs; and %-escaped
 744    * strings don't need any extra escaping in HTML).
 745    *
 746    * @param string $url URL with HTML entities
 747    * @return string URL without HTML entities
 748    */
 749   public static function unstupifyUrl($htmlUrl) {
 750     return str_replace('&amp;', '&', $htmlUrl);
 751   }
 752 }
 753