CRM/Utils/String.php

   1 <?php
   2 /*
   3  +--------------------------------------------------------------------+
   4  | CiviCRM version 4.6                                                |
   5  +--------------------------------------------------------------------+
   6  | Copyright CiviCRM LLC (c) 2004-2014                                |
   7  +--------------------------------------------------------------------+
   8  | This file is a part of CiviCRM.                                    |
   9  |                                                                    |
  10  | CiviCRM is free software; you can copy, modify, and distribute it  |
  11  | under the terms of the GNU Affero General Public License           |
  12  | Version 3, 19 November 2007 and the CiviCRM Licensing Exception.   |
  13  |                                                                    |
  14  | CiviCRM is distributed in the hope that it will be useful, but     |
  15  | WITHOUT ANY WARRANTY; without even the implied warranty of         |
  16  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.               |
  17  | See the GNU Affero General Public License for more details.        |
  18  |                                                                    |
  19  | You should have received a copy of the GNU Affero General Public   |
  20  | License and the CiviCRM Licensing Exception along                  |
  21  | with this program; if not, contact CiviCRM LLC                     |
  22  | at info[AT]civicrm[DOT]org. If you have questions about the        |
  23  | GNU Affero General Public License or the licensing of CiviCRM,     |
  24  | see the CiviCRM license FAQ at http://civicrm.org/licensing        |
  25  +--------------------------------------------------------------------+
  26 */
  27
  28 /**
  29  *
  30  * @package CRM
  31  * @copyright CiviCRM LLC (c) 2004-2014
  32  * $Id$
  33  *
  34  */
  35
  36 require_once 'HTML/QuickForm/Rule/Email.php';
  37
  38 /**
  39  * This class contains string functions
  40  *
  41  */
  42 class CRM_Utils_String {
  43   const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
  44
  45   /**
  46    * List of all letters and numbers
  47    */
  48   const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
  49
  50   /**
  51    * Convert a display name into a potential variable
  52    * name that we could use in forms/code
  53    *
  54    * @param name Name of the string
  55    *
  56    * @param int $maxLength
  57    *
  58    * @return string
  59    *   An equivalent variable name.
  60    */
  61   public static function titleToVar($title, $maxLength = 31) {
  62     $variable = self::munge($title, '_', $maxLength);
  63
  64     if (CRM_Utils_Rule::title($variable, $maxLength)) {
  65       return $variable;
  66     }
  67
  68     // if longer than the maxLength lets just return a substr of the
  69     // md5 to prevent errors downstream
  70     return substr(md5($title), 0, $maxLength);
  71   }
  72
  73   /**
  74    * Given a string, replace all non alpha numeric characters and
  75    * spaces with the replacement character
  76    *
  77    * @param string $name
  78    *   The name to be worked on.
  79    * @param string $char
  80    *   The character to use for non-valid chars.
  81    * @param int $len
  82    *   Length of valid variables.
  83    *
  84    *
  85    * @return string
  86    *   returns the manipulated string
  87    */
  88   public static function munge($name, $char = '_', $len = 63) {
  89     // replace all white space and non-alpha numeric with $char
  90     // we only use the ascii character set since mysql does not create table names / field names otherwise
  91     // CRM-11744
  92     $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
  93
  94     if ($len) {
  95       // lets keep variable names short
  96       return substr($name, 0, $len);
  97     }
  98     else {
  99       return $name;
 100     }
 101   }
 102
 103   /**
 104    * Convert possibly underscore separated words to camel case with special handling for 'UF'
 105    * e.g
 106    * membership_payment returns MembershipPayment
 107    * @param string $string
 108    *
 109    * @return string
 110    *   string
 111    */
 112   public static function convertStringToCamel($string) {
 113     $fragments = explode('_', $string);
 114     foreach ($fragments as & $fragment) {
 115       $fragment = ucfirst($fragment);
 116     }
 117     // Special case: UFGroup, UFJoin, UFMatch, UFField
 118     if ($fragments[0] === 'Uf') {
 119       $fragments[0] = 'UF';
 120     }
 121     return implode('', $fragments);
 122   }
 123
 124   /**
 125    *
 126    * Takes a variable name and munges it randomly into another variable name
 127    *
 128    * @param string $name
 129    *   Initial Variable Name.
 130    * @param int $len
 131    *   Length of valid variables.
 132    *
 133    * @return string
 134    *   Randomized Variable Name
 135    */
 136   public static function rename($name, $len = 4) {
 137     $rand = substr(uniqid(), 0, $len);
 138     return substr_replace($name, $rand, -$len, $len);
 139   }
 140
 141   /**
 142    * Takes a string and returns the last tuple of the string.
 143    * useful while converting file names to class names etc
 144    *
 145    * @param string $string
 146    *   The input string.
 147    * @param \char|string $char $char the character used to demarcate the componets
 148    *
 149    *
 150    * @return string
 151    *   the last component
 152    */
 153   public static function getClassName($string, $char = '_') {
 154     $names = array();
 155     if (!is_array($string)) {
 156       $names = explode($char, $string);
 157     }
 158     if (!empty($names)) {
 159       return array_pop($names);
 160     }
 161   }
 162
 163   /**
 164    * Appends a name to a string and seperated by delimiter.
 165    * does the right thing for an empty string
 166    *
 167    * @param string $str
 168    *   The string to be appended to.
 169    * @param string $delim
 170    *   The delimiter to use.
 171    * @param mixed $name
 172    *   The string (or array of strings) to append.
 173    *
 174    * @return void
 175    */
 176   public static function append(&$str, $delim, $name) {
 177     if (empty($name)) {
 178       return;
 179     }
 180
 181     if (is_array($name)) {
 182       foreach ($name as $n) {
 183         if (empty($n)) {
 184           continue;
 185         }
 186         if (empty($str)) {
 187           $str = $n;
 188         }
 189         else {
 190           $str .= $delim . $n;
 191         }
 192       }
 193     }
 194     else {
 195       if (empty($str)) {
 196         $str = $name;
 197       }
 198       else {
 199         $str .= $delim . $name;
 200       }
 201     }
 202   }
 203
 204   /**
 205    * Determine if the string is composed only of ascii characters
 206    *
 207    * @param string $str
 208    *   Input string.
 209    * @param bool $utf8
 210    *   Attempt utf8 match on failure (default yes).
 211    *
 212    * @return boolean
 213    *   true if string is ascii
 214    */
 215   public static function isAscii($str, $utf8 = TRUE) {
 216     if (!function_exists('mb_detect_encoding')) {
 217       // eliminate all white space from the string
 218       $str = preg_replace('/\s+/', '', $str);
 219       // FIXME:  This is a pretty brutal hack to make utf8 and 8859-1 work.
 220
 221       /* match low- or high-ascii characters */
 222       if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
 223         // || // low ascii characters
 224         // high ascii characters
 225         //  preg_match( '/[\x7F-\xFF]/', $str ) ) {
 226         if ($utf8) {
 227           /* if we did match, try for utf-8, or iso8859-1 */
 228
 229           return self::isUtf8($str);
 230         }
 231         else {
 232           return FALSE;
 233         }
 234       }
 235       return TRUE;
 236     }
 237     else {
 238       $order = array('ASCII');
 239       if ($utf8) {
 240         $order[] = 'UTF-8';
 241       }
 242       $enc = mb_detect_encoding($str, $order, TRUE);
 243       return ($enc == 'ASCII' || $enc == 'UTF-8');
 244     }
 245   }
 246
 247   /**
 248    * Determine the string replacements for redaction
 249    * on the basis of the regular expressions
 250    *
 251    * @param string $str
 252    *   Input string.
 253    * @param array $regexRules
 254    *   Regular expression to be matched w/ replacements.
 255    *
 256    * @return array
 257    *   array of strings w/ corresponding redacted outputs
 258    */
 259   public static function regex($str, $regexRules) {
 260     //redact the regular expressions
 261     if (!empty($regexRules) && isset($str)) {
 262       static $matches, $totalMatches, $match = array();
 263       foreach ($regexRules as $pattern => $replacement) {
 264         preg_match_all($pattern, $str, $matches);
 265         if (!empty($matches[0])) {
 266           if (empty($totalMatches)) {
 267             $totalMatches = $matches[0];
 268           }
 269           else {
 270             $totalMatches = array_merge($totalMatches, $matches[0]);
 271           }
 272           $match = array_flip($totalMatches);
 273         }
 274       }
 275     }
 276
 277     if (!empty($match)) {
 278       foreach ($match as $matchKey => & $dontCare) {
 279         foreach ($regexRules as $pattern => $replacement) {
 280           if (preg_match($pattern, $matchKey)) {
 281             $dontCare = $replacement . substr(md5($matchKey), 0, 5);
 282             break;
 283           }
 284         }
 285       }
 286       return $match;
 287     }
 288     return CRM_Core_DAO::$_nullArray;
 289   }
 290
 291   /**
 292    * @param $str
 293    * @param $stringRules
 294    *
 295    * @return mixed
 296    */
 297   public static function redaction($str, $stringRules) {
 298     //redact the strings
 299     if (!empty($stringRules)) {
 300       foreach ($stringRules as $match => $replace) {
 301         $str = str_ireplace($match, $replace, $str);
 302       }
 303     }
 304
 305     //return the redacted output
 306     return $str;
 307   }
 308
 309   /**
 310    * Determine if a string is composed only of utf8 characters
 311    *
 312    * @param string $str
 313    *   Input string.
 314    *
 315    * @return boolean
 316    */
 317   public static function isUtf8($str) {
 318     if (!function_exists(mb_detect_encoding)) {
 319       // eliminate all white space from the string
 320       $str = preg_replace('/\s+/', '', $str);
 321
 322       /* pattern stolen from the php.net function documentation for
 323              * utf8decode();
 324              * comment by JF Sebastian, 30-Mar-2005
 325              */
 326
 327       return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
 328       // ||
 329       // iconv('ISO-8859-1', 'UTF-8', $str);
 330     }
 331     else {
 332       $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
 333       return ($enc !== FALSE);
 334     }
 335   }
 336
 337   /**
 338    * Determine if two href's are equivalent (fuzzy match)
 339    *
 340    * @param string $url1
 341    *   The first url to be matched.
 342    * @param string $url2
 343    *   The second url to be matched against.
 344    *
 345    * @return boolean
 346    *   true if the urls match, else false
 347    */
 348   public static function match($url1, $url2) {
 349     $url1 = strtolower($url1);
 350     $url2 = strtolower($url2);
 351
 352     $url1Str = parse_url($url1);
 353     $url2Str = parse_url($url2);
 354
 355     if ($url1Str['path'] == $url2Str['path'] &&
 356       self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
 357     ) {
 358       return TRUE;
 359     }
 360     return FALSE;
 361   }
 362
 363   /**
 364    * Extract variable values
 365    *
 366    * @param mix $query
 367    *   This is basically url.
 368    *
 369    * @return mix
 370    *   $v  returns civicrm url (eg: civicrm/contact/search/...)
 371    */
 372   public static function extractURLVarValue($query) {
 373     $config = CRM_Core_Config::singleton();
 374     $urlVar = $config->userFrameworkURLVar;
 375
 376     $params = explode('&', $query);
 377     foreach ($params as $p) {
 378       if (strpos($p, '=')) {
 379         list($k, $v) = explode('=', $p);
 380         if ($k == $urlVar) {
 381           return $v;
 382         }
 383       }
 384     }
 385     return NULL;
 386   }
 387
 388   /**
 389    * Translate a true/false/yes/no string to a 0 or 1 value
 390    *
 391    * @param string $str
 392    *   The string to be translated.
 393    *
 394    * @return boolean
 395    */
 396   public static function strtobool($str) {
 397     if (!is_scalar($str)) {
 398       return FALSE;
 399     }
 400
 401     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 402       return TRUE;
 403     }
 404     return FALSE;
 405   }
 406
 407   /**
 408    * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
 409    *
 410    * @param string $str
 411    *   The string to be translated.
 412    *
 413    * @return boolean
 414    */
 415   public static function strtoboolstr($str) {
 416     if (!is_scalar($str)) {
 417       return FALSE;
 418     }
 419
 420     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 421       return '1';
 422     }
 423     elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
 424       return '0';
 425     }
 426     else {
 427       return FALSE;
 428     }
 429   }
 430
 431   /**
 432    * Convert a HTML string into a text one using html2text
 433    *
 434    * @param string $html
 435    *   The string to be converted.
 436    *
 437    * @return string
 438    *   the converted string
 439    */
 440   public static function htmlToText($html) {
 441     require_once 'packages/html2text/rcube_html2text.php';
 442     $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
 443     $converter = new rcube_html2text($token_html);
 444     $token_text = $converter->get_text();
 445     $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
 446     return $text;
 447   }
 448
 449   /**
 450    * @param $string
 451    * @param array $params
 452    */
 453   public static function extractName($string, &$params) {
 454     $name = trim($string);
 455     if (empty($name)) {
 456       return;
 457     }
 458
 459     // strip out quotes
 460     $name = str_replace('"', '', $name);
 461     $name = str_replace('\'', '', $name);
 462
 463     // check for comma in name
 464     if (strpos($name, ',') !== FALSE) {
 465
 466       // name has a comma - assume lname, fname [mname]
 467       $names = explode(',', $name);
 468       if (count($names) > 1) {
 469         $params['last_name'] = trim($names[0]);
 470
 471         // check for space delim
 472         $fnames = explode(' ', trim($names[1]));
 473         if (count($fnames) > 1) {
 474           $params['first_name'] = trim($fnames[0]);
 475           $params['middle_name'] = trim($fnames[1]);
 476         }
 477         else {
 478           $params['first_name'] = trim($fnames[0]);
 479         }
 480       }
 481       else {
 482         $params['first_name'] = trim($names[0]);
 483       }
 484     }
 485     else {
 486       // name has no comma - assume fname [mname] fname
 487       $names = explode(' ', $name);
 488       if (count($names) == 1) {
 489         $params['first_name'] = $names[0];
 490       }
 491       elseif (count($names) == 2) {
 492         $params['first_name'] = $names[0];
 493         $params['last_name'] = $names[1];
 494       }
 495       else {
 496         $params['first_name'] = $names[0];
 497         $params['middle_name'] = $names[1];
 498         $params['last_name'] = $names[2];
 499       }
 500     }
 501   }
 502
 503   /**
 504    * @param $string
 505    *
 506    * @return array
 507    */
 508   public static function &makeArray($string) {
 509     $string = trim($string);
 510
 511     $values = explode("\n", $string);
 512     $result = array();
 513     foreach ($values as $value) {
 514       list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
 515       if (!empty($v)) {
 516         $result[trim($n)] = trim($v);
 517       }
 518     }
 519     return $result;
 520   }
 521
 522   /**
 523    * Given an ezComponents-parsed representation of
 524    * a text with alternatives return only the first one
 525    *
 526    * @param string $full
 527    *   All alternatives as a long string (or some other text).
 528    *
 529    * @return string
 530    *   only the first alternative found (or the text without alternatives)
 531    */
 532   public static function stripAlternatives($full) {
 533     $matches = array();
 534     preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
 535
 536     if (isset($matches[1]) &&
 537       trim(strip_tags($matches[1])) != ''
 538     ) {
 539       return $matches[1];
 540     }
 541     else {
 542       return $full;
 543     }
 544   }
 545
 546   /**
 547    * Strip leading, trailing, double spaces from string
 548    * used for postal/greeting/addressee
 549    *
 550    * @param string $string
 551    *   Input string to be cleaned.
 552    *
 553    * @return string
 554    *   the cleaned string
 555    */
 556   public static function stripSpaces($string) {
 557     return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
 558   }
 559
 560   /**
 561    * clean the URL 'path' variable that we use
 562    * to construct CiviCRM urls by removing characters from the path variable
 563    *
 564    * @param string $string
 565    *   The input string to be sanitized.
 566    * @param array $search
 567    *   The characters to be sanitized.
 568    * @param string $replace
 569    *   The character to replace it with.
 570    *
 571    * @return string
 572    *   the sanitized string
 573    */
 574   static function stripPathChars(
 575     $string,
 576     $search = NULL,
 577     $replace = NULL
 578   ) {
 579     static $_searchChars = NULL;
 580     static $_replaceChar = NULL;
 581
 582     if (empty($string)) {
 583       return $string;
 584     }
 585
 586     if ($_searchChars == NULL) {
 587       $_searchChars = array(
 588         '&',
 589         ';',
 590         ',',
 591         '=',
 592         '$',
 593         '"',
 594         "'",
 595         '\\',
 596         '<',
 597         '>',
 598         '(',
 599         ')',
 600         ' ',
 601         "\r",
 602         "\r\n",
 603         "\n",
 604         "\t",
 605       );
 606       $_replaceChar = '_';
 607     }
 608
 609     if ($search == NULL) {
 610       $search = $_searchChars;
 611     }
 612
 613     if ($replace == NULL) {
 614       $replace = $_replaceChar;
 615     }
 616
 617     return str_replace($search, $replace, $string);
 618   }
 619
 620
 621   /**
 622    * Use HTMLPurifier to clean up a text string and remove any potential
 623    * xss attacks. This is primarily used in public facing pages which
 624    * accept html as the input string
 625    *
 626    * @param string $string
 627    *   The input string.
 628    *
 629    * @return string
 630    *   the cleaned up string
 631    */
 632   public static function purifyHTML($string) {
 633     static $_filter = NULL;
 634     if (!$_filter) {
 635       $config = HTMLPurifier_Config::createDefault();
 636       $config->set('Core.Encoding', 'UTF-8');
 637
 638       // Disable the cache entirely
 639       $config->set('Cache.DefinitionImpl', NULL);
 640
 641       $_filter = new HTMLPurifier($config);
 642     }
 643
 644     return $_filter->purify($string);
 645   }
 646
 647   /**
 648    * Truncate $string; if $string exceeds $maxLen, place "..." at the end
 649    *
 650    * @param string $string
 651    * @param int $maxLen
 652    *
 653    * @return string
 654    */
 655   public static function ellipsify($string, $maxLen) {
 656     $len = strlen($string);
 657     if ($len <= $maxLen) {
 658       return $string;
 659     }
 660     else {
 661       return substr($string, 0, $maxLen - 3) . '...';
 662     }
 663   }
 664
 665   /**
 666    * Generate a random string
 667    *
 668    * @param $len
 669    * @param $alphabet
 670    * @return string
 671    */
 672   public static function createRandom($len, $alphabet) {
 673     $alphabetSize = strlen($alphabet);
 674     $result = '';
 675     for ($i = 0; $i < $len; $i++) {
 676       $result .= $alphabet{rand(1, $alphabetSize) - 1};
 677     }
 678     return $result;
 679   }
 680
 681   /**
 682    * Examples:
 683    * "admin foo" => array(NULL,"admin foo")
 684    * "cms:admin foo" => array("cms", "admin foo")
 685    *
 686    * @param $delim
 687    * @param string $string
 688    *   E.g. "view all contacts". Syntax: "[prefix:]name".
 689    * @param null $defaultPrefix
 690    *
 691    * @return array
 692    *   (0 => string|NULL $prefix, 1 => string $value)
 693    */
 694   public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
 695     $pos = strpos($string, $delim);
 696     if ($pos === FALSE) {
 697       return array($defaultPrefix, $string);
 698     }
 699     else {
 700       return array(substr($string, 0, $pos), substr($string, 1 + $pos));
 701     }
 702   }
 703
 704   /**
 705    * This function will mask part of the the user portion of an Email address (everything before the @)
 706    *
 707    * @param string $email
 708    *   The email address to be masked.
 709    * @param string $maskChar
 710    *   The character used for masking.
 711    * @param int $percent
 712    *   The percentage of the user portion to be masked.
 713    *
 714    * @return string
 715    *   returns the masked Email address
 716    */
 717   public static function maskEmail($email, $maskChar = '*', $percent = 50) {
 718     list($user, $domain) = preg_split("/@/", $email);
 719     $len = strlen($user);
 720     $maskCount = floor($len * $percent / 100);
 721     $offset = floor(($len - $maskCount) / 2);
 722
 723     $masked = substr($user, 0, $offset)
 724       . str_repeat($maskChar, $maskCount)
 725       . substr($user, $maskCount + $offset);
 726
 727     return ($masked . '@' . $domain);
 728   }
 729
 730   /**
 731    * This function compares two strings
 732    *
 733    * @param string $strOne
 734    *   String one.
 735    * @param string $strTwo
 736    *   String two.
 737    * @param bool $case
 738    *   Boolean indicating whether you want the comparison to be case sensitive or not.
 739    *
 740    * @return boolean
 741    *   TRUE (string are identical); FALSE (strings are not identical)
 742    */
 743   public static function compareStr($strOne, $strTwo, $case) {
 744     if ($case == TRUE) {
 745       // Convert to lowercase and trim white spaces
 746       if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
 747         // yes - they are identical
 748         return TRUE;
 749       }
 750       else {
 751         // not identical
 752         return FALSE;
 753       }
 754     }
 755     if ($case == FALSE) {
 756       // Trim white spaces
 757       if (trim($strOne) == trim($strTwo)) {
 758         // yes - they are identical
 759         return TRUE;
 760       }
 761       else {
 762         // not identical
 763         return FALSE;
 764       }
 765     }
 766   }
 767
 768   /**
 769    * Many parts of the codebase have a convention of internally passing around
 770    * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
 771    * (because most other odd characters are %-escaped in URLs; and %-escaped
 772    * strings don't need any extra escaping in HTML).
 773    *
 774    * @param string $url
 775    *   URL with HTML entities.
 776    * @return string
 777    *   URL without HTML entities
 778    */
 779   public static function unstupifyUrl($htmlUrl) {
 780     return str_replace('&amp;', '&', $htmlUrl);
 781   }
 782 }