CRM/Utils/String.php

   1 <?php
   2 /*
   3  +--------------------------------------------------------------------+
   4  | CiviCRM version 4.6                                                |
   5  +--------------------------------------------------------------------+
   6  | Copyright CiviCRM LLC (c) 2004-2014                                |
   7  +--------------------------------------------------------------------+
   8  | This file is a part of CiviCRM.                                    |
   9  |                                                                    |
  10  | CiviCRM is free software; you can copy, modify, and distribute it  |
  11  | under the terms of the GNU Affero General Public License           |
  12  | Version 3, 19 November 2007 and the CiviCRM Licensing Exception.   |
  13  |                                                                    |
  14  | CiviCRM is distributed in the hope that it will be useful, but     |
  15  | WITHOUT ANY WARRANTY; without even the implied warranty of         |
  16  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.               |
  17  | See the GNU Affero General Public License for more details.        |
  18  |                                                                    |
  19  | You should have received a copy of the GNU Affero General Public   |
  20  | License and the CiviCRM Licensing Exception along                  |
  21  | with this program; if not, contact CiviCRM LLC                     |
  22  | at info[AT]civicrm[DOT]org. If you have questions about the        |
  23  | GNU Affero General Public License or the licensing of CiviCRM,     |
  24  | see the CiviCRM license FAQ at http://civicrm.org/licensing        |
  25  +--------------------------------------------------------------------+
  26 */
  27
  28 /**
  29  *
  30  * @package CRM
  31  * @copyright CiviCRM LLC (c) 2004-2014
  32  * $Id$
  33  *
  34  */
  35
  36 require_once 'HTML/QuickForm/Rule/Email.php';
  37
  38 /**
  39  * This class contains string functions
  40  *
  41  */
  42 class CRM_Utils_String {
  43   const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
  44
  45   /**
  46    * List of all letters and numbers
  47    */
  48   const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
  49
  50   /**
  51    * Convert a display name into a potential variable
  52    * name that we could use in forms/code
  53    *
  54    * @param name Name of the string
  55    *
  56    * @param int $maxLength
  57    *
  58    * @return string
  59    *   An equivalent variable name.
  60    * @static
  61    */
  62   public static function titleToVar($title, $maxLength = 31) {
  63     $variable = self::munge($title, '_', $maxLength);
  64
  65     if (CRM_Utils_Rule::title($variable, $maxLength)) {
  66       return $variable;
  67     }
  68
  69     // if longer than the maxLength lets just return a substr of the
  70     // md5 to prevent errors downstream
  71     return substr(md5($title), 0, $maxLength);
  72   }
  73
  74   /**
  75    * Given a string, replace all non alpha numeric characters and
  76    * spaces with the replacement character
  77    *
  78    * @param string $name
  79    *   The name to be worked on.
  80    * @param string $char
  81    *   The character to use for non-valid chars.
  82    * @param int $len
  83    *   Length of valid variables.
  84    *
  85    *
  86    * @return string
  87    *   returns the manipulated string
  88    * @static
  89    */
  90   public static function munge($name, $char = '_', $len = 63) {
  91     // replace all white space and non-alpha numeric with $char
  92     // we only use the ascii character set since mysql does not create table names / field names otherwise
  93     // CRM-11744
  94     $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
  95
  96     if ($len) {
  97       // lets keep variable names short
  98       return substr($name, 0, $len);
  99     }
 100     else {
 101       return $name;
 102     }
 103   }
 104
 105   /**
 106    * Convert possibly underscore separated words to camel case with special handling for 'UF'
 107    * e.g
 108    * membership_payment returns MembershipPayment
 109    * @param string $string
 110    *
 111    * @return string
 112    *   string
 113    */
 114   public static function convertStringToCamel($string) {
 115     $fragments = explode('_', $string);
 116     foreach ($fragments as & $fragment) {
 117       $fragment = ucfirst($fragment);
 118     }
 119     // Special case: UFGroup, UFJoin, UFMatch, UFField
 120     if ($fragments[0] === 'Uf') {
 121       $fragments[0] = 'UF';
 122     }
 123     return implode('', $fragments);
 124   }
 125
 126   /**
 127    *
 128    * Takes a variable name and munges it randomly into another variable name
 129    *
 130    * @param string $name
 131    *   Initial Variable Name.
 132    * @param int $len
 133    *   Length of valid variables.
 134    *
 135    * @return string
 136    *   Randomized Variable Name
 137    * @static
 138    */
 139   public static function rename($name, $len = 4) {
 140     $rand = substr(uniqid(), 0, $len);
 141     return substr_replace($name, $rand, -$len, $len);
 142   }
 143
 144   /**
 145    * Takes a string and returns the last tuple of the string.
 146    * useful while converting file names to class names etc
 147    *
 148    * @param string $string
 149    *   The input string.
 150    * @param \char|string $char $char the character used to demarcate the componets
 151    *
 152    *
 153    * @return string
 154    *   the last component
 155    * @static
 156    */
 157   public static function getClassName($string, $char = '_') {
 158     $names = array();
 159     if (!is_array($string)) {
 160       $names = explode($char, $string);
 161     }
 162     if (!empty($names)) {
 163       return array_pop($names);
 164     }
 165   }
 166
 167   /**
 168    * Appends a name to a string and seperated by delimiter.
 169    * does the right thing for an empty string
 170    *
 171    * @param string $str
 172    *   The string to be appended to.
 173    * @param string $delim
 174    *   The delimiter to use.
 175    * @param mixed $name
 176    *   The string (or array of strings) to append.
 177    *
 178    * @return void
 179    * @static
 180    */
 181   public static function append(&$str, $delim, $name) {
 182     if (empty($name)) {
 183       return;
 184     }
 185
 186     if (is_array($name)) {
 187       foreach ($name as $n) {
 188         if (empty($n)) {
 189           continue;
 190         }
 191         if (empty($str)) {
 192           $str = $n;
 193         }
 194         else {
 195           $str .= $delim . $n;
 196         }
 197       }
 198     }
 199     else {
 200       if (empty($str)) {
 201         $str = $name;
 202       }
 203       else {
 204         $str .= $delim . $name;
 205       }
 206     }
 207   }
 208
 209   /**
 210    * Determine if the string is composed only of ascii characters
 211    *
 212    * @param string $str
 213    *   Input string.
 214    * @param bool $utf8
 215    *   Attempt utf8 match on failure (default yes).
 216    *
 217    * @return boolean
 218    *   true if string is ascii
 219    * @static
 220    */
 221   public static function isAscii($str, $utf8 = TRUE) {
 222     if (!function_exists('mb_detect_encoding')) {
 223       // eliminate all white space from the string
 224       $str = preg_replace('/\s+/', '', $str);
 225       // FIXME:  This is a pretty brutal hack to make utf8 and 8859-1 work.
 226
 227       /* match low- or high-ascii characters */
 228       if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
 229         // || // low ascii characters
 230         // high ascii characters
 231         //  preg_match( '/[\x7F-\xFF]/', $str ) ) {
 232         if ($utf8) {
 233           /* if we did match, try for utf-8, or iso8859-1 */
 234
 235           return self::isUtf8($str);
 236         }
 237         else {
 238           return FALSE;
 239         }
 240       }
 241       return TRUE;
 242     }
 243     else {
 244       $order = array('ASCII');
 245       if ($utf8) {
 246         $order[] = 'UTF-8';
 247       }
 248       $enc = mb_detect_encoding($str, $order, TRUE);
 249       return ($enc == 'ASCII' || $enc == 'UTF-8');
 250     }
 251   }
 252
 253   /**
 254    * Determine the string replacements for redaction
 255    * on the basis of the regular expressions
 256    *
 257    * @param string $str
 258    *   Input string.
 259    * @param array $regexRules
 260    *   Regular expression to be matched w/ replacements.
 261    *
 262    * @return array
 263    *   array of strings w/ corresponding redacted outputs
 264    * @static
 265    */
 266   public static function regex($str, $regexRules) {
 267     //redact the regular expressions
 268     if (!empty($regexRules) && isset($str)) {
 269       static $matches, $totalMatches, $match = array();
 270       foreach ($regexRules as $pattern => $replacement) {
 271         preg_match_all($pattern, $str, $matches);
 272         if (!empty($matches[0])) {
 273           if (empty($totalMatches)) {
 274             $totalMatches = $matches[0];
 275           }
 276           else {
 277             $totalMatches = array_merge($totalMatches, $matches[0]);
 278           }
 279           $match = array_flip($totalMatches);
 280         }
 281       }
 282     }
 283
 284     if (!empty($match)) {
 285       foreach ($match as $matchKey => & $dontCare) {
 286         foreach ($regexRules as $pattern => $replacement) {
 287           if (preg_match($pattern, $matchKey)) {
 288             $dontCare = $replacement . substr(md5($matchKey), 0, 5);
 289             break;
 290           }
 291         }
 292       }
 293       return $match;
 294     }
 295     return CRM_Core_DAO::$_nullArray;
 296   }
 297
 298   /**
 299    * @param $str
 300    * @param $stringRules
 301    *
 302    * @return mixed
 303    */
 304   public static function redaction($str, $stringRules) {
 305     //redact the strings
 306     if (!empty($stringRules)) {
 307       foreach ($stringRules as $match => $replace) {
 308         $str = str_ireplace($match, $replace, $str);
 309       }
 310     }
 311
 312     //return the redacted output
 313     return $str;
 314   }
 315
 316   /**
 317    * Determine if a string is composed only of utf8 characters
 318    *
 319    * @param string $str
 320    *   Input string.
 321    * @static
 322    *
 323    * @return boolean
 324    */
 325   public static function isUtf8($str) {
 326     if (!function_exists(mb_detect_encoding)) {
 327       // eliminate all white space from the string
 328       $str = preg_replace('/\s+/', '', $str);
 329
 330       /* pattern stolen from the php.net function documentation for
 331              * utf8decode();
 332              * comment by JF Sebastian, 30-Mar-2005
 333              */
 334
 335       return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
 336       // ||
 337       // iconv('ISO-8859-1', 'UTF-8', $str);
 338     }
 339     else {
 340       $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
 341       return ($enc !== FALSE);
 342     }
 343   }
 344
 345   /**
 346    * Determine if two href's are equivalent (fuzzy match)
 347    *
 348    * @param string $url1
 349    *   The first url to be matched.
 350    * @param string $url2
 351    *   The second url to be matched against.
 352    *
 353    * @return boolean
 354    *   true if the urls match, else false
 355    * @static
 356    */
 357   public static function match($url1, $url2) {
 358     $url1 = strtolower($url1);
 359     $url2 = strtolower($url2);
 360
 361     $url1Str = parse_url($url1);
 362     $url2Str = parse_url($url2);
 363
 364     if ($url1Str['path'] == $url2Str['path'] &&
 365       self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
 366     ) {
 367       return TRUE;
 368     }
 369     return FALSE;
 370   }
 371
 372   /**
 373    * Extract variable values
 374    *
 375    * @param mix $query
 376    *   This is basically url.
 377    *
 378    * @return mix $v  returns civicrm url (eg: civicrm/contact/search/...)
 379    * @static
 380    */
 381   public static function extractURLVarValue($query) {
 382     $config = CRM_Core_Config::singleton();
 383     $urlVar = $config->userFrameworkURLVar;
 384
 385     $params = explode('&', $query);
 386     foreach ($params as $p) {
 387       if (strpos($p, '=')) {
 388         list($k, $v) = explode('=', $p);
 389         if ($k == $urlVar) {
 390           return $v;
 391         }
 392       }
 393     }
 394     return NULL;
 395   }
 396
 397   /**
 398    * Translate a true/false/yes/no string to a 0 or 1 value
 399    *
 400    * @param string $str
 401    *   The string to be translated.
 402    *
 403    * @return boolean
 404    * @static
 405    */
 406   public static function strtobool($str) {
 407     if (!is_scalar($str)) {
 408       return FALSE;
 409     }
 410
 411     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 412       return TRUE;
 413     }
 414     return FALSE;
 415   }
 416
 417   /**
 418    * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
 419    *
 420    * @param string $str
 421    *   The string to be translated.
 422    *
 423    * @return boolean
 424    * @static
 425    */
 426   public static function strtoboolstr($str) {
 427     if (!is_scalar($str)) {
 428       return FALSE;
 429     }
 430
 431     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 432       return '1';
 433     }
 434     elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
 435       return '0';
 436     }
 437     else {
 438       return FALSE;
 439     }
 440   }
 441
 442   /**
 443    * Convert a HTML string into a text one using html2text
 444    *
 445    * @param string $html
 446    *   The string to be converted.
 447    *
 448    * @return string
 449    *   the converted string
 450    * @static
 451    */
 452   public static function htmlToText($html) {
 453     require_once 'packages/html2text/rcube_html2text.php';
 454     $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
 455     $converter = new rcube_html2text($token_html);
 456     $token_text = $converter->get_text();
 457     $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
 458     return $text;
 459   }
 460
 461   /**
 462    * @param $string
 463    * @param array $params
 464    */
 465   public static function extractName($string, &$params) {
 466     $name = trim($string);
 467     if (empty($name)) {
 468       return;
 469     }
 470
 471     // strip out quotes
 472     $name = str_replace('"', '', $name);
 473     $name = str_replace('\'', '', $name);
 474
 475     // check for comma in name
 476     if (strpos($name, ',') !== FALSE) {
 477
 478       // name has a comma - assume lname, fname [mname]
 479       $names = explode(',', $name);
 480       if (count($names) > 1) {
 481         $params['last_name'] = trim($names[0]);
 482
 483         // check for space delim
 484         $fnames = explode(' ', trim($names[1]));
 485         if (count($fnames) > 1) {
 486           $params['first_name'] = trim($fnames[0]);
 487           $params['middle_name'] = trim($fnames[1]);
 488         }
 489         else {
 490           $params['first_name'] = trim($fnames[0]);
 491         }
 492       }
 493       else {
 494         $params['first_name'] = trim($names[0]);
 495       }
 496     }
 497     else {
 498       // name has no comma - assume fname [mname] fname
 499       $names = explode(' ', $name);
 500       if (count($names) == 1) {
 501         $params['first_name'] = $names[0];
 502       }
 503       elseif (count($names) == 2) {
 504         $params['first_name'] = $names[0];
 505         $params['last_name'] = $names[1];
 506       }
 507       else {
 508         $params['first_name'] = $names[0];
 509         $params['middle_name'] = $names[1];
 510         $params['last_name'] = $names[2];
 511       }
 512     }
 513   }
 514
 515   /**
 516    * @param $string
 517    *
 518    * @return array
 519    */
 520   public static function &makeArray($string) {
 521     $string = trim($string);
 522
 523     $values = explode("\n", $string);
 524     $result = array();
 525     foreach ($values as $value) {
 526       list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
 527       if (!empty($v)) {
 528         $result[trim($n)] = trim($v);
 529       }
 530     }
 531     return $result;
 532   }
 533
 534   /**
 535    * Given an ezComponents-parsed representation of
 536    * a text with alternatives return only the first one
 537    *
 538    * @param string $full
 539    *   All alternatives as a long string (or some other text).
 540    *
 541    * @return string
 542    *   only the first alternative found (or the text without alternatives)
 543    */
 544   public static function stripAlternatives($full) {
 545     $matches = array();
 546     preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
 547
 548     if (isset($matches[1]) &&
 549       trim(strip_tags($matches[1])) != ''
 550     ) {
 551       return $matches[1];
 552     }
 553     else {
 554       return $full;
 555     }
 556   }
 557
 558   /**
 559    * Strip leading, trailing, double spaces from string
 560    * used for postal/greeting/addressee
 561    *
 562    * @param string $string
 563    *   Input string to be cleaned.
 564    *
 565    * @return string
 566    *   the cleaned string
 567    * @static
 568    */
 569   public static function stripSpaces($string) {
 570     return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
 571   }
 572
 573   /**
 574    * clean the URL 'path' variable that we use
 575    * to construct CiviCRM urls by removing characters from the path variable
 576    *
 577    * @param string $string
 578    *   The input string to be sanitized.
 579    * @param array $search
 580    *   The characters to be sanitized.
 581    * @param string $replace
 582    *   The character to replace it with.
 583    *
 584    * @return string
 585    *   the sanitized string
 586    * @static
 587    */
 588   static function stripPathChars(
 589     $string,
 590     $search = NULL,
 591     $replace = NULL
 592   ) {
 593     static $_searchChars = NULL;
 594     static $_replaceChar = NULL;
 595
 596     if (empty($string)) {
 597       return $string;
 598     }
 599
 600     if ($_searchChars == NULL) {
 601       $_searchChars = array(
 602         '&', ';', ',', '=', '$',
 603         '"', "'", '\\',
 604         '<', '>', '(', ')',
 605         ' ', "\r", "\r\n", "\n", "\t",
 606       );
 607       $_replaceChar = '_';
 608     }
 609
 610     if ($search == NULL) {
 611       $search = $_searchChars;
 612     }
 613
 614     if ($replace == NULL) {
 615       $replace = $_replaceChar;
 616     }
 617
 618     return str_replace($search, $replace, $string);
 619   }
 620
 621
 622   /**
 623    * Use HTMLPurifier to clean up a text string and remove any potential
 624    * xss attacks. This is primarily used in public facing pages which
 625    * accept html as the input string
 626    *
 627    * @param string $string
 628    *   The input string.
 629    *
 630    * @return string
 631    *   the cleaned up string
 632    * @static
 633    */
 634   public static function purifyHTML($string) {
 635     static $_filter = NULL;
 636     if (!$_filter) {
 637       $config = HTMLPurifier_Config::createDefault();
 638       $config->set('Core.Encoding', 'UTF-8');
 639
 640       // Disable the cache entirely
 641       $config->set('Cache.DefinitionImpl', NULL);
 642
 643       $_filter = new HTMLPurifier($config);
 644     }
 645
 646     return $_filter->purify($string);
 647   }
 648
 649   /**
 650    * Truncate $string; if $string exceeds $maxLen, place "..." at the end
 651    *
 652    * @param string $string
 653    * @param int $maxLen
 654    *
 655    * @return string
 656    */
 657   public static function ellipsify($string, $maxLen) {
 658     $len = strlen($string);
 659     if ($len <= $maxLen) {
 660       return $string;
 661     }
 662     else {
 663       return substr($string, 0, $maxLen - 3) . '...';
 664     }
 665   }
 666
 667   /**
 668    * Generate a random string
 669    *
 670    * @param $len
 671    * @param $alphabet
 672    * @return string
 673    */
 674   public static function createRandom($len, $alphabet) {
 675     $alphabetSize = strlen($alphabet);
 676     $result = '';
 677     for ($i = 0; $i < $len; $i++) {
 678       $result .= $alphabet{rand(1, $alphabetSize) - 1};
 679     }
 680     return $result;
 681   }
 682
 683   /**
 684    * Examples:
 685    * "admin foo" => array(NULL,"admin foo")
 686    * "cms:admin foo" => array("cms", "admin foo")
 687    *
 688    * @param $delim
 689    * @param string $string
 690    *   E.g. "view all contacts". Syntax: "[prefix:]name".
 691    * @param null $defaultPrefix
 692    *
 693    * @return array
 694    *   (0 => string|NULL $prefix, 1 => string $value)
 695    */
 696   public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
 697     $pos = strpos($string, $delim);
 698     if ($pos === FALSE) {
 699       return array($defaultPrefix, $string);
 700     }
 701     else {
 702       return array(substr($string, 0, $pos), substr($string, 1 + $pos));
 703     }
 704   }
 705
 706   /**
 707    * This function will mask part of the the user portion of an Email address (everything before the @)
 708    *
 709    * @param string $email
 710    *   The email address to be masked.
 711    * @param string $maskChar
 712    *   The character used for masking.
 713    * @param int $percent
 714    *   The percentage of the user portion to be masked.
 715    *
 716    * @return string
 717    *   returns the masked Email address
 718    */
 719   public static function maskEmail($email, $maskChar = '*', $percent = 50) {
 720     list($user, $domain) = preg_split("/@/", $email);
 721     $len = strlen($user);
 722     $maskCount = floor($len * $percent / 100);
 723     $offset = floor(($len - $maskCount) / 2);
 724
 725     $masked = substr($user, 0, $offset)
 726        . str_repeat($maskChar, $maskCount)
 727        . substr($user, $maskCount + $offset);
 728
 729     return ($masked . '@' . $domain);
 730   }
 731
 732   /**
 733    * This function compares two strings
 734    *
 735    * @param string $strOne
 736    *   String one.
 737    * @param string $strTwo
 738    *   String two.
 739    * @param bool $case
 740    *   Boolean indicating whether you want the comparison to be case sensitive or not.
 741    *
 742    * @return boolean
 743    *   TRUE (string are identical); FALSE (strings are not identical)
 744    */
 745   public static function compareStr($strOne, $strTwo, $case) {
 746     if ($case == TRUE) {
 747       // Convert to lowercase and trim white spaces
 748       if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
 749         // yes - they are identical
 750         return TRUE;
 751       }
 752       else {
 753         // not identical
 754         return FALSE;
 755       }
 756     }
 757     if ($case == FALSE) {
 758       // Trim white spaces
 759       if (trim($strOne) == trim($strTwo)) {
 760         // yes - they are identical
 761         return TRUE;
 762       }
 763       else {
 764         // not identical
 765         return FALSE;
 766       }
 767     }
 768   }
 769
 770   /**
 771    * Many parts of the codebase have a convention of internally passing around
 772    * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
 773    * (because most other odd characters are %-escaped in URLs; and %-escaped
 774    * strings don't need any extra escaping in HTML).
 775    *
 776    * @param string $url
 777    *   URL with HTML entities.
 778    * @return string
 779    *   URL without HTML entities
 780    */
 781   public static function unstupifyUrl($htmlUrl) {
 782     return str_replace('&amp;', '&', $htmlUrl);
 783   }
 784 }