CRM/Utils/String.php

   1 <?php
   2 /*
   3  +--------------------------------------------------------------------+
   4  | CiviCRM version 4.6                                                |
   5  +--------------------------------------------------------------------+
   6  | Copyright CiviCRM LLC (c) 2004-2014                                |
   7  +--------------------------------------------------------------------+
   8  | This file is a part of CiviCRM.                                    |
   9  |                                                                    |
  10  | CiviCRM is free software; you can copy, modify, and distribute it  |
  11  | under the terms of the GNU Affero General Public License           |
  12  | Version 3, 19 November 2007 and the CiviCRM Licensing Exception.   |
  13  |                                                                    |
  14  | CiviCRM is distributed in the hope that it will be useful, but     |
  15  | WITHOUT ANY WARRANTY; without even the implied warranty of         |
  16  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.               |
  17  | See the GNU Affero General Public License for more details.        |
  18  |                                                                    |
  19  | You should have received a copy of the GNU Affero General Public   |
  20  | License and the CiviCRM Licensing Exception along                  |
  21  | with this program; if not, contact CiviCRM LLC                     |
  22  | at info[AT]civicrm[DOT]org. If you have questions about the        |
  23  | GNU Affero General Public License or the licensing of CiviCRM,     |
  24  | see the CiviCRM license FAQ at http://civicrm.org/licensing        |
  25  +--------------------------------------------------------------------+
  26 */
  27
  28 /**
  29  *
  30  * @package CRM
  31  * @copyright CiviCRM LLC (c) 2004-2014
  32  * $Id$
  33  *
  34  */
  35
  36 require_once 'HTML/QuickForm/Rule/Email.php';
  37
  38 /**
  39  * This class contains string functions
  40  *
  41  */
  42 class CRM_Utils_String {
  43   const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
  44
  45   /**
  46    * List of all letters and numbers
  47    */
  48   const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
  49
  50   /**
  51    * Convert a display name into a potential variable
  52    * name that we could use in forms/code
  53    *
  54    * @param name Name of the string
  55    *
  56    * @param int $maxLength
  57    *
  58    * @return string
  59    *   An equivalent variable name.
  60    * @static
  61    */
  62   public static function titleToVar($title, $maxLength = 31) {
  63     $variable = self::munge($title, '_', $maxLength);
  64
  65     if (CRM_Utils_Rule::title($variable, $maxLength)) {
  66       return $variable;
  67     }
  68
  69     // if longer than the maxLength lets just return a substr of the
  70     // md5 to prevent errors downstream
  71     return substr(md5($title), 0, $maxLength);
  72   }
  73
  74   /**
  75    * Given a string, replace all non alpha numeric characters and
  76    * spaces with the replacement character
  77    *
  78    * @param string $name
  79    *   The name to be worked on.
  80    * @param string $char
  81    *   The character to use for non-valid chars.
  82    * @param int $len
  83    *   Length of valid variables.
  84    *
  85    *
  86    * @return string
  87    *   returns the manipulated string
  88    * @static
  89    */
  90   public static function munge($name, $char = '_', $len = 63) {
  91     // replace all white space and non-alpha numeric with $char
  92     // we only use the ascii character set since mysql does not create table names / field names otherwise
  93     // CRM-11744
  94     $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
  95
  96     if ($len) {
  97       // lets keep variable names short
  98       return substr($name, 0, $len);
  99     }
 100     else {
 101       return $name;
 102     }
 103   }
 104
 105   /**
 106    * Convert possibly underscore separated words to camel case with special handling for 'UF'
 107    * e.g
 108    * membership_payment returns MembershipPayment
 109    * @param string $string
 110    *
 111    * @return string
 112    *   string
 113    */
 114   public static function convertStringToCamel($string) {
 115     $fragments = explode('_', $string);
 116     foreach ($fragments as & $fragment) {
 117       $fragment = ucfirst($fragment);
 118     }
 119     // Special case: UFGroup, UFJoin, UFMatch, UFField
 120     if ($fragments[0] === 'Uf') {
 121       $fragments[0] = 'UF';
 122     }
 123     return implode('', $fragments);
 124   }
 125
 126   /**
 127    *
 128    * Takes a variable name and munges it randomly into another variable name
 129    *
 130    * @param string $name
 131    *   Initial Variable Name.
 132    * @param int $len
 133    *   Length of valid variables.
 134    *
 135    * @return string
 136    *   Randomized Variable Name
 137    * @static
 138    */
 139   public static function rename($name, $len = 4) {
 140     $rand = substr(uniqid(), 0, $len);
 141     return substr_replace($name, $rand, -$len, $len);
 142   }
 143
 144   /**
 145    * Takes a string and returns the last tuple of the string.
 146    * useful while converting file names to class names etc
 147    *
 148    * @param string $string
 149    *   The input string.
 150    * @param \char|string $char $char the character used to demarcate the componets
 151    *
 152    *
 153    * @return string
 154    *   the last component
 155    * @static
 156    */
 157   public static function getClassName($string, $char = '_') {
 158     $names = array();
 159     if (!is_array($string)) {
 160       $names = explode($char, $string);
 161     }
 162     if (!empty($names)) {
 163       return array_pop($names);
 164     }
 165   }
 166
 167   /**
 168    * Appends a name to a string and seperated by delimiter.
 169    * does the right thing for an empty string
 170    *
 171    * @param string $str
 172    *   The string to be appended to.
 173    * @param string $delim
 174    *   The delimiter to use.
 175    * @param mixed $name
 176    *   The string (or array of strings) to append.
 177    *
 178    * @return void
 179    * @static
 180    */
 181   public static function append(&$str, $delim, $name) {
 182     if (empty($name)) {
 183       return;
 184     }
 185
 186     if (is_array($name)) {
 187       foreach ($name as $n) {
 188         if (empty($n)) {
 189           continue;
 190         }
 191         if (empty($str)) {
 192           $str = $n;
 193         }
 194         else {
 195           $str .= $delim . $n;
 196         }
 197       }
 198     }
 199     else {
 200       if (empty($str)) {
 201         $str = $name;
 202       }
 203       else {
 204         $str .= $delim . $name;
 205       }
 206     }
 207   }
 208
 209   /**
 210    * Determine if the string is composed only of ascii characters
 211    *
 212    * @param string $str
 213    *   Input string.
 214    * @param bool $utf8
 215    *   Attempt utf8 match on failure (default yes).
 216    *
 217    * @return boolean
 218    *   true if string is ascii
 219    * @static
 220    */
 221   public static function isAscii($str, $utf8 = TRUE) {
 222     if (!function_exists('mb_detect_encoding')) {
 223       // eliminate all white space from the string
 224       $str = preg_replace('/\s+/', '', $str);
 225       // FIXME:  This is a pretty brutal hack to make utf8 and 8859-1 work.
 226
 227       /* match low- or high-ascii characters */
 228       if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
 229         // || // low ascii characters
 230         // high ascii characters
 231         //  preg_match( '/[\x7F-\xFF]/', $str ) ) {
 232         if ($utf8) {
 233           /* if we did match, try for utf-8, or iso8859-1 */
 234
 235           return self::isUtf8($str);
 236         }
 237         else {
 238           return FALSE;
 239         }
 240       }
 241       return TRUE;
 242     }
 243     else {
 244       $order = array('ASCII');
 245       if ($utf8) {
 246         $order[] = 'UTF-8';
 247       }
 248       $enc = mb_detect_encoding($str, $order, TRUE);
 249       return ($enc == 'ASCII' || $enc == 'UTF-8');
 250     }
 251   }
 252
 253   /**
 254    * Determine the string replacements for redaction
 255    * on the basis of the regular expressions
 256    *
 257    * @param string $str
 258    *   Input string.
 259    * @param array $regexRules
 260    *   Regular expression to be matched w/ replacements.
 261    *
 262    * @return array
 263    *   array of strings w/ corresponding redacted outputs
 264    * @static
 265    */
 266   public static function regex($str, $regexRules) {
 267     //redact the regular expressions
 268     if (!empty($regexRules) && isset($str)) {
 269       static $matches, $totalMatches, $match = array();
 270       foreach ($regexRules as $pattern => $replacement) {
 271         preg_match_all($pattern, $str, $matches);
 272         if (!empty($matches[0])) {
 273           if (empty($totalMatches)) {
 274             $totalMatches = $matches[0];
 275           }
 276           else {
 277             $totalMatches = array_merge($totalMatches, $matches[0]);
 278           }
 279           $match = array_flip($totalMatches);
 280         }
 281       }
 282     }
 283
 284     if (!empty($match)) {
 285       foreach ($match as $matchKey => & $dontCare) {
 286         foreach ($regexRules as $pattern => $replacement) {
 287           if (preg_match($pattern, $matchKey)) {
 288             $dontCare = $replacement . substr(md5($matchKey), 0, 5);
 289             break;
 290           }
 291         }
 292       }
 293       return $match;
 294     }
 295     return CRM_Core_DAO::$_nullArray;
 296   }
 297
 298   /**
 299    * @param $str
 300    * @param $stringRules
 301    *
 302    * @return mixed
 303    */
 304   public static function redaction($str, $stringRules) {
 305     //redact the strings
 306     if (!empty($stringRules)) {
 307       foreach ($stringRules as $match => $replace) {
 308         $str = str_ireplace($match, $replace, $str);
 309       }
 310     }
 311
 312     //return the redacted output
 313     return $str;
 314   }
 315
 316   /**
 317    * Determine if a string is composed only of utf8 characters
 318    *
 319    * @param string $str
 320    *   Input string.
 321    * @static
 322    *
 323    * @return boolean
 324    */
 325   public static function isUtf8($str) {
 326     if (!function_exists(mb_detect_encoding)) {
 327       // eliminate all white space from the string
 328       $str = preg_replace('/\s+/', '', $str);
 329
 330       /* pattern stolen from the php.net function documentation for
 331              * utf8decode();
 332              * comment by JF Sebastian, 30-Mar-2005
 333              */
 334
 335       return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
 336       // ||
 337       // iconv('ISO-8859-1', 'UTF-8', $str);
 338     }
 339     else {
 340       $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
 341       return ($enc !== FALSE);
 342     }
 343   }
 344
 345   /**
 346    * Determine if two href's are equivalent (fuzzy match)
 347    *
 348    * @param string $url1
 349    *   The first url to be matched.
 350    * @param string $url2
 351    *   The second url to be matched against.
 352    *
 353    * @return boolean
 354    *   true if the urls match, else false
 355    * @static
 356    */
 357   public static function match($url1, $url2) {
 358     $url1 = strtolower($url1);
 359     $url2 = strtolower($url2);
 360
 361     $url1Str = parse_url($url1);
 362     $url2Str = parse_url($url2);
 363
 364     if ($url1Str['path'] == $url2Str['path'] &&
 365       self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
 366     ) {
 367       return TRUE;
 368     }
 369     return FALSE;
 370   }
 371
 372   /**
 373    * Extract variable values
 374    *
 375    * @param mix $query
 376    *   This is basically url.
 377    *
 378    * @return mix
 379    *   $v  returns civicrm url (eg: civicrm/contact/search/...)
 380    * @static
 381    */
 382   public static function extractURLVarValue($query) {
 383     $config = CRM_Core_Config::singleton();
 384     $urlVar = $config->userFrameworkURLVar;
 385
 386     $params = explode('&', $query);
 387     foreach ($params as $p) {
 388       if (strpos($p, '=')) {
 389         list($k, $v) = explode('=', $p);
 390         if ($k == $urlVar) {
 391           return $v;
 392         }
 393       }
 394     }
 395     return NULL;
 396   }
 397
 398   /**
 399    * Translate a true/false/yes/no string to a 0 or 1 value
 400    *
 401    * @param string $str
 402    *   The string to be translated.
 403    *
 404    * @return boolean
 405    * @static
 406    */
 407   public static function strtobool($str) {
 408     if (!is_scalar($str)) {
 409       return FALSE;
 410     }
 411
 412     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 413       return TRUE;
 414     }
 415     return FALSE;
 416   }
 417
 418   /**
 419    * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
 420    *
 421    * @param string $str
 422    *   The string to be translated.
 423    *
 424    * @return boolean
 425    * @static
 426    */
 427   public static function strtoboolstr($str) {
 428     if (!is_scalar($str)) {
 429       return FALSE;
 430     }
 431
 432     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 433       return '1';
 434     }
 435     elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
 436       return '0';
 437     }
 438     else {
 439       return FALSE;
 440     }
 441   }
 442
 443   /**
 444    * Convert a HTML string into a text one using html2text
 445    *
 446    * @param string $html
 447    *   The string to be converted.
 448    *
 449    * @return string
 450    *   the converted string
 451    * @static
 452    */
 453   public static function htmlToText($html) {
 454     require_once 'packages/html2text/rcube_html2text.php';
 455     $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
 456     $converter = new rcube_html2text($token_html);
 457     $token_text = $converter->get_text();
 458     $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
 459     return $text;
 460   }
 461
 462   /**
 463    * @param $string
 464    * @param array $params
 465    */
 466   public static function extractName($string, &$params) {
 467     $name = trim($string);
 468     if (empty($name)) {
 469       return;
 470     }
 471
 472     // strip out quotes
 473     $name = str_replace('"', '', $name);
 474     $name = str_replace('\'', '', $name);
 475
 476     // check for comma in name
 477     if (strpos($name, ',') !== FALSE) {
 478
 479       // name has a comma - assume lname, fname [mname]
 480       $names = explode(',', $name);
 481       if (count($names) > 1) {
 482         $params['last_name'] = trim($names[0]);
 483
 484         // check for space delim
 485         $fnames = explode(' ', trim($names[1]));
 486         if (count($fnames) > 1) {
 487           $params['first_name'] = trim($fnames[0]);
 488           $params['middle_name'] = trim($fnames[1]);
 489         }
 490         else {
 491           $params['first_name'] = trim($fnames[0]);
 492         }
 493       }
 494       else {
 495         $params['first_name'] = trim($names[0]);
 496       }
 497     }
 498     else {
 499       // name has no comma - assume fname [mname] fname
 500       $names = explode(' ', $name);
 501       if (count($names) == 1) {
 502         $params['first_name'] = $names[0];
 503       }
 504       elseif (count($names) == 2) {
 505         $params['first_name'] = $names[0];
 506         $params['last_name'] = $names[1];
 507       }
 508       else {
 509         $params['first_name'] = $names[0];
 510         $params['middle_name'] = $names[1];
 511         $params['last_name'] = $names[2];
 512       }
 513     }
 514   }
 515
 516   /**
 517    * @param $string
 518    *
 519    * @return array
 520    */
 521   public static function &makeArray($string) {
 522     $string = trim($string);
 523
 524     $values = explode("\n", $string);
 525     $result = array();
 526     foreach ($values as $value) {
 527       list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
 528       if (!empty($v)) {
 529         $result[trim($n)] = trim($v);
 530       }
 531     }
 532     return $result;
 533   }
 534
 535   /**
 536    * Given an ezComponents-parsed representation of
 537    * a text with alternatives return only the first one
 538    *
 539    * @param string $full
 540    *   All alternatives as a long string (or some other text).
 541    *
 542    * @return string
 543    *   only the first alternative found (or the text without alternatives)
 544    */
 545   public static function stripAlternatives($full) {
 546     $matches = array();
 547     preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
 548
 549     if (isset($matches[1]) &&
 550       trim(strip_tags($matches[1])) != ''
 551     ) {
 552       return $matches[1];
 553     }
 554     else {
 555       return $full;
 556     }
 557   }
 558
 559   /**
 560    * Strip leading, trailing, double spaces from string
 561    * used for postal/greeting/addressee
 562    *
 563    * @param string $string
 564    *   Input string to be cleaned.
 565    *
 566    * @return string
 567    *   the cleaned string
 568    * @static
 569    */
 570   public static function stripSpaces($string) {
 571     return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
 572   }
 573
 574   /**
 575    * clean the URL 'path' variable that we use
 576    * to construct CiviCRM urls by removing characters from the path variable
 577    *
 578    * @param string $string
 579    *   The input string to be sanitized.
 580    * @param array $search
 581    *   The characters to be sanitized.
 582    * @param string $replace
 583    *   The character to replace it with.
 584    *
 585    * @return string
 586    *   the sanitized string
 587    * @static
 588    */
 589   static function stripPathChars(
 590     $string,
 591     $search = NULL,
 592     $replace = NULL
 593   ) {
 594     static $_searchChars = NULL;
 595     static $_replaceChar = NULL;
 596
 597     if (empty($string)) {
 598       return $string;
 599     }
 600
 601     if ($_searchChars == NULL) {
 602       $_searchChars = array(
 603         '&',
 604         ';',
 605         ',',
 606         '=',
 607         '$',
 608         '"',
 609         "'",
 610         '\\',
 611         '<',
 612         '>',
 613         '(',
 614         ')',
 615         ' ',
 616         "\r",
 617         "\r\n",
 618         "\n",
 619         "\t",
 620       );
 621       $_replaceChar = '_';
 622     }
 623
 624     if ($search == NULL) {
 625       $search = $_searchChars;
 626     }
 627
 628     if ($replace == NULL) {
 629       $replace = $_replaceChar;
 630     }
 631
 632     return str_replace($search, $replace, $string);
 633   }
 634
 635
 636   /**
 637    * Use HTMLPurifier to clean up a text string and remove any potential
 638    * xss attacks. This is primarily used in public facing pages which
 639    * accept html as the input string
 640    *
 641    * @param string $string
 642    *   The input string.
 643    *
 644    * @return string
 645    *   the cleaned up string
 646    * @static
 647    */
 648   public static function purifyHTML($string) {
 649     static $_filter = NULL;
 650     if (!$_filter) {
 651       $config = HTMLPurifier_Config::createDefault();
 652       $config->set('Core.Encoding', 'UTF-8');
 653
 654       // Disable the cache entirely
 655       $config->set('Cache.DefinitionImpl', NULL);
 656
 657       $_filter = new HTMLPurifier($config);
 658     }
 659
 660     return $_filter->purify($string);
 661   }
 662
 663   /**
 664    * Truncate $string; if $string exceeds $maxLen, place "..." at the end
 665    *
 666    * @param string $string
 667    * @param int $maxLen
 668    *
 669    * @return string
 670    */
 671   public static function ellipsify($string, $maxLen) {
 672     $len = strlen($string);
 673     if ($len <= $maxLen) {
 674       return $string;
 675     }
 676     else {
 677       return substr($string, 0, $maxLen - 3) . '...';
 678     }
 679   }
 680
 681   /**
 682    * Generate a random string
 683    *
 684    * @param $len
 685    * @param $alphabet
 686    * @return string
 687    */
 688   public static function createRandom($len, $alphabet) {
 689     $alphabetSize = strlen($alphabet);
 690     $result = '';
 691     for ($i = 0; $i < $len; $i++) {
 692       $result .= $alphabet{rand(1, $alphabetSize) - 1};
 693     }
 694     return $result;
 695   }
 696
 697   /**
 698    * Examples:
 699    * "admin foo" => array(NULL,"admin foo")
 700    * "cms:admin foo" => array("cms", "admin foo")
 701    *
 702    * @param $delim
 703    * @param string $string
 704    *   E.g. "view all contacts". Syntax: "[prefix:]name".
 705    * @param null $defaultPrefix
 706    *
 707    * @return array
 708    *   (0 => string|NULL $prefix, 1 => string $value)
 709    */
 710   public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
 711     $pos = strpos($string, $delim);
 712     if ($pos === FALSE) {
 713       return array($defaultPrefix, $string);
 714     }
 715     else {
 716       return array(substr($string, 0, $pos), substr($string, 1 + $pos));
 717     }
 718   }
 719
 720   /**
 721    * This function will mask part of the the user portion of an Email address (everything before the @)
 722    *
 723    * @param string $email
 724    *   The email address to be masked.
 725    * @param string $maskChar
 726    *   The character used for masking.
 727    * @param int $percent
 728    *   The percentage of the user portion to be masked.
 729    *
 730    * @return string
 731    *   returns the masked Email address
 732    */
 733   public static function maskEmail($email, $maskChar = '*', $percent = 50) {
 734     list($user, $domain) = preg_split("/@/", $email);
 735     $len = strlen($user);
 736     $maskCount = floor($len * $percent / 100);
 737     $offset = floor(($len - $maskCount) / 2);
 738
 739     $masked = substr($user, 0, $offset)
 740       . str_repeat($maskChar, $maskCount)
 741       . substr($user, $maskCount + $offset);
 742
 743     return ($masked . '@' . $domain);
 744   }
 745
 746   /**
 747    * This function compares two strings
 748    *
 749    * @param string $strOne
 750    *   String one.
 751    * @param string $strTwo
 752    *   String two.
 753    * @param bool $case
 754    *   Boolean indicating whether you want the comparison to be case sensitive or not.
 755    *
 756    * @return boolean
 757    *   TRUE (string are identical); FALSE (strings are not identical)
 758    */
 759   public static function compareStr($strOne, $strTwo, $case) {
 760     if ($case == TRUE) {
 761       // Convert to lowercase and trim white spaces
 762       if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
 763         // yes - they are identical
 764         return TRUE;
 765       }
 766       else {
 767         // not identical
 768         return FALSE;
 769       }
 770     }
 771     if ($case == FALSE) {
 772       // Trim white spaces
 773       if (trim($strOne) == trim($strTwo)) {
 774         // yes - they are identical
 775         return TRUE;
 776       }
 777       else {
 778         // not identical
 779         return FALSE;
 780       }
 781     }
 782   }
 783
 784   /**
 785    * Many parts of the codebase have a convention of internally passing around
 786    * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
 787    * (because most other odd characters are %-escaped in URLs; and %-escaped
 788    * strings don't need any extra escaping in HTML).
 789    *
 790    * @param string $url
 791    *   URL with HTML entities.
 792    * @return string
 793    *   URL without HTML entities
 794    */
 795   public static function unstupifyUrl($htmlUrl) {
 796     return str_replace('&amp;', '&', $htmlUrl);
 797   }
 798 }