CRM/Utils/String.php

   1 <?php
   2 /*
   3  +--------------------------------------------------------------------+
   4  | CiviCRM version 4.6                                                |
   5  +--------------------------------------------------------------------+
   6  | Copyright CiviCRM LLC (c) 2004-2014                                |
   7  +--------------------------------------------------------------------+
   8  | This file is a part of CiviCRM.                                    |
   9  |                                                                    |
  10  | CiviCRM is free software; you can copy, modify, and distribute it  |
  11  | under the terms of the GNU Affero General Public License           |
  12  | Version 3, 19 November 2007 and the CiviCRM Licensing Exception.   |
  13  |                                                                    |
  14  | CiviCRM is distributed in the hope that it will be useful, but     |
  15  | WITHOUT ANY WARRANTY; without even the implied warranty of         |
  16  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.               |
  17  | See the GNU Affero General Public License for more details.        |
  18  |                                                                    |
  19  | You should have received a copy of the GNU Affero General Public   |
  20  | License and the CiviCRM Licensing Exception along                  |
  21  | with this program; if not, contact CiviCRM LLC                     |
  22  | at info[AT]civicrm[DOT]org. If you have questions about the        |
  23  | GNU Affero General Public License or the licensing of CiviCRM,     |
  24  | see the CiviCRM license FAQ at http://civicrm.org/licensing        |
  25  +--------------------------------------------------------------------+
  26 */
  27
  28 /**
  29  *
  30  * @package CRM
  31  * @copyright CiviCRM LLC (c) 2004-2014
  32  * $Id$
  33  *
  34  */
  35
  36 require_once 'HTML/QuickForm/Rule/Email.php';
  37
  38 /**
  39  * This class contains string functions
  40  *
  41  */
  42 class CRM_Utils_String {
  43   const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
  44
  45   /**
  46    * List of all letters and numbers
  47    */
  48   const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
  49
  50   /**
  51    * Convert a display name into a potential variable
  52    * name that we could use in forms/code
  53    *
  54    * @param $title title of the string
  55    * @param int $maxLength
  56    *
  57    * @return string
  58    *   An equivalent variable name.
  59    */
  60   public static function titleToVar($title, $maxLength = 31) {
  61     $variable = self::munge($title, '_', $maxLength);
  62
  63     if (CRM_Utils_Rule::title($variable, $maxLength)) {
  64       return $variable;
  65     }
  66
  67     // if longer than the maxLength lets just return a substr of the
  68     // md5 to prevent errors downstream
  69     return substr(md5($title), 0, $maxLength);
  70   }
  71
  72   /**
  73    * Given a string, replace all non alpha numeric characters and
  74    * spaces with the replacement character
  75    *
  76    * @param string $name
  77    *   The name to be worked on.
  78    * @param string $char
  79    *   The character to use for non-valid chars.
  80    * @param int $len
  81    *   Length of valid variables.
  82    *
  83    *
  84    * @return string
  85    *   returns the manipulated string
  86    */
  87   public static function munge($name, $char = '_', $len = 63) {
  88     // replace all white space and non-alpha numeric with $char
  89     // we only use the ascii character set since mysql does not create table names / field names otherwise
  90     // CRM-11744
  91     $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
  92
  93     if ($len) {
  94       // lets keep variable names short
  95       return substr($name, 0, $len);
  96     }
  97     else {
  98       return $name;
  99     }
 100   }
 101
 102   /**
 103    * Convert possibly underscore separated words to camel case with special handling for 'UF'
 104    * e.g membership_payment returns MembershipPayment
 105    *
 106    * @param string $string
 107    *
 108    * @return string
 109    */
 110   public static function convertStringToCamel($string) {
 111     $fragments = explode('_', $string);
 112     foreach ($fragments as & $fragment) {
 113       $fragment = ucfirst($fragment);
 114     }
 115     // Special case: UFGroup, UFJoin, UFMatch, UFField
 116     if ($fragments[0] === 'Uf') {
 117       $fragments[0] = 'UF';
 118     }
 119     return implode('', $fragments);
 120   }
 121
 122   /**
 123    * Takes a variable name and munges it randomly into another variable name
 124    *
 125    * @param string $name
 126    *   Initial Variable Name.
 127    * @param int $len
 128    *   Length of valid variables.
 129    *
 130    * @return string
 131    *   Randomized Variable Name
 132    */
 133   public static function rename($name, $len = 4) {
 134     $rand = substr(uniqid(), 0, $len);
 135     return substr_replace($name, $rand, -$len, $len);
 136   }
 137
 138   /**
 139    * Takes a string and returns the last tuple of the string.
 140    * useful while converting file names to class names etc
 141    *
 142    * @param string $string
 143    *   The input string.
 144    * @param string $char
 145    *   Character used to demarcate the components
 146    *
 147    * @return string
 148    *   The last component
 149    */
 150   public static function getClassName($string, $char = '_') {
 151     $names = array();
 152     if (!is_array($string)) {
 153       $names = explode($char, $string);
 154     }
 155     if (!empty($names)) {
 156       return array_pop($names);
 157     }
 158   }
 159
 160   /**
 161    * Appends a name to a string and separated by delimiter.
 162    * does the right thing for an empty string
 163    *
 164    * @param string $str
 165    *   The string to be appended to.
 166    * @param string $delim
 167    *   The delimiter to use.
 168    * @param mixed $name
 169    *   The string (or array of strings) to append.
 170    *
 171    * @return void
 172    */
 173   public static function append(&$str, $delim, $name) {
 174     if (empty($name)) {
 175       return;
 176     }
 177
 178     if (is_array($name)) {
 179       foreach ($name as $n) {
 180         if (empty($n)) {
 181           continue;
 182         }
 183         if (empty($str)) {
 184           $str = $n;
 185         }
 186         else {
 187           $str .= $delim . $n;
 188         }
 189       }
 190     }
 191     else {
 192       if (empty($str)) {
 193         $str = $name;
 194       }
 195       else {
 196         $str .= $delim . $name;
 197       }
 198     }
 199   }
 200
 201   /**
 202    * Determine if the string is composed only of ascii characters
 203    *
 204    * @param string $str
 205    *   Input string.
 206    * @param bool $utf8
 207    *   Attempt utf8 match on failure (default yes).
 208    *
 209    * @return bool
 210    *   true if string is ascii
 211    */
 212   public static function isAscii($str, $utf8 = TRUE) {
 213     if (!function_exists('mb_detect_encoding')) {
 214       // eliminate all white space from the string
 215       $str = preg_replace('/\s+/', '', $str);
 216       // FIXME:  This is a pretty brutal hack to make utf8 and 8859-1 work.
 217
 218       /* match low- or high-ascii characters */
 219       if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
 220         // || // low ascii characters
 221         // high ascii characters
 222         //  preg_match( '/[\x7F-\xFF]/', $str ) ) {
 223         if ($utf8) {
 224           /* if we did match, try for utf-8, or iso8859-1 */
 225
 226           return self::isUtf8($str);
 227         }
 228         else {
 229           return FALSE;
 230         }
 231       }
 232       return TRUE;
 233     }
 234     else {
 235       $order = array('ASCII');
 236       if ($utf8) {
 237         $order[] = 'UTF-8';
 238       }
 239       $enc = mb_detect_encoding($str, $order, TRUE);
 240       return ($enc == 'ASCII' || $enc == 'UTF-8');
 241     }
 242   }
 243
 244   /**
 245    * Determine the string replacements for redaction
 246    * on the basis of the regular expressions
 247    *
 248    * @param string $str
 249    *   Input string.
 250    * @param array $regexRules
 251    *   Regular expression to be matched w/ replacements.
 252    *
 253    * @return array
 254    *   array of strings w/ corresponding redacted outputs
 255    */
 256   public static function regex($str, $regexRules) {
 257     //redact the regular expressions
 258     if (!empty($regexRules) && isset($str)) {
 259       static $matches, $totalMatches, $match = array();
 260       foreach ($regexRules as $pattern => $replacement) {
 261         preg_match_all($pattern, $str, $matches);
 262         if (!empty($matches[0])) {
 263           if (empty($totalMatches)) {
 264             $totalMatches = $matches[0];
 265           }
 266           else {
 267             $totalMatches = array_merge($totalMatches, $matches[0]);
 268           }
 269           $match = array_flip($totalMatches);
 270         }
 271       }
 272     }
 273
 274     if (!empty($match)) {
 275       foreach ($match as $matchKey => & $dontCare) {
 276         foreach ($regexRules as $pattern => $replacement) {
 277           if (preg_match($pattern, $matchKey)) {
 278             $dontCare = $replacement . substr(md5($matchKey), 0, 5);
 279             break;
 280           }
 281         }
 282       }
 283       return $match;
 284     }
 285     return CRM_Core_DAO::$_nullArray;
 286   }
 287
 288   /**
 289    * @param $str
 290    * @param $stringRules
 291    *
 292    * @return mixed
 293    */
 294   public static function redaction($str, $stringRules) {
 295     //redact the strings
 296     if (!empty($stringRules)) {
 297       foreach ($stringRules as $match => $replace) {
 298         $str = str_ireplace($match, $replace, $str);
 299       }
 300     }
 301
 302     //return the redacted output
 303     return $str;
 304   }
 305
 306   /**
 307    * Determine if a string is composed only of utf8 characters
 308    *
 309    * @param string $str
 310    *   Input string.
 311    *
 312    * @return bool
 313    */
 314   public static function isUtf8($str) {
 315     if (!function_exists(mb_detect_encoding)) {
 316       // eliminate all white space from the string
 317       $str = preg_replace('/\s+/', '', $str);
 318
 319       /* pattern stolen from the php.net function documentation for
 320        * utf8decode();
 321        * comment by JF Sebastian, 30-Mar-2005
 322        */
 323
 324       return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
 325       // ||
 326       // iconv('ISO-8859-1', 'UTF-8', $str);
 327     }
 328     else {
 329       $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
 330       return ($enc !== FALSE);
 331     }
 332   }
 333
 334   /**
 335    * Determine if two href's are equivalent (fuzzy match)
 336    *
 337    * @param string $url1
 338    *   The first url to be matched.
 339    * @param string $url2
 340    *   The second url to be matched against.
 341    *
 342    * @return bool
 343    *   true if the urls match, else false
 344    */
 345   public static function match($url1, $url2) {
 346     $url1 = strtolower($url1);
 347     $url2 = strtolower($url2);
 348
 349     $url1Str = parse_url($url1);
 350     $url2Str = parse_url($url2);
 351
 352     if ($url1Str['path'] == $url2Str['path'] &&
 353       self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
 354     ) {
 355       return TRUE;
 356     }
 357     return FALSE;
 358   }
 359
 360   /**
 361    * Extract the civicrm path from the url
 362    *
 363    * @param string $query
 364    *   A url string.
 365    *
 366    * @return string|null
 367    *   civicrm url (eg: civicrm/contact/search)
 368    */
 369   public static function extractURLVarValue($query) {
 370     $config = CRM_Core_Config::singleton();
 371     $urlVar = $config->userFrameworkURLVar;
 372
 373     $params = explode('&', $query);
 374     foreach ($params as $p) {
 375       if (strpos($p, '=')) {
 376         list($k, $v) = explode('=', $p);
 377         if ($k == $urlVar) {
 378           return $v;
 379         }
 380       }
 381     }
 382     return NULL;
 383   }
 384
 385   /**
 386    * Translate a true/false/yes/no string to a 0 or 1 value
 387    *
 388    * @param string $str
 389    *   The string to be translated.
 390    *
 391    * @return bool
 392    */
 393   public static function strtobool($str) {
 394     if (!is_scalar($str)) {
 395       return FALSE;
 396     }
 397
 398     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 399       return TRUE;
 400     }
 401     return FALSE;
 402   }
 403
 404   /**
 405    * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
 406    *
 407    * @param string $str
 408    *   The string to be translated.
 409    *
 410    * @return bool
 411    */
 412   public static function strtoboolstr($str) {
 413     if (!is_scalar($str)) {
 414       return FALSE;
 415     }
 416
 417     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 418       return '1';
 419     }
 420     elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
 421       return '0';
 422     }
 423     else {
 424       return FALSE;
 425     }
 426   }
 427
 428   /**
 429    * Convert a HTML string into a text one using html2text
 430    *
 431    * @param string $html
 432    *   The string to be converted.
 433    *
 434    * @return string
 435    *   the converted string
 436    */
 437   public static function htmlToText($html) {
 438     require_once 'packages/html2text/rcube_html2text.php';
 439     $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
 440     $converter = new rcube_html2text($token_html);
 441     $token_text = $converter->get_text();
 442     $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
 443     return $text;
 444   }
 445
 446   /**
 447    * @param $string
 448    * @param array $params
 449    */
 450   public static function extractName($string, &$params) {
 451     $name = trim($string);
 452     if (empty($name)) {
 453       return;
 454     }
 455
 456     // strip out quotes
 457     $name = str_replace('"', '', $name);
 458     $name = str_replace('\'', '', $name);
 459
 460     // check for comma in name
 461     if (strpos($name, ',') !== FALSE) {
 462
 463       // name has a comma - assume lname, fname [mname]
 464       $names = explode(',', $name);
 465       if (count($names) > 1) {
 466         $params['last_name'] = trim($names[0]);
 467
 468         // check for space delim
 469         $fnames = explode(' ', trim($names[1]));
 470         if (count($fnames) > 1) {
 471           $params['first_name'] = trim($fnames[0]);
 472           $params['middle_name'] = trim($fnames[1]);
 473         }
 474         else {
 475           $params['first_name'] = trim($fnames[0]);
 476         }
 477       }
 478       else {
 479         $params['first_name'] = trim($names[0]);
 480       }
 481     }
 482     else {
 483       // name has no comma - assume fname [mname] fname
 484       $names = explode(' ', $name);
 485       if (count($names) == 1) {
 486         $params['first_name'] = $names[0];
 487       }
 488       elseif (count($names) == 2) {
 489         $params['first_name'] = $names[0];
 490         $params['last_name'] = $names[1];
 491       }
 492       else {
 493         $params['first_name'] = $names[0];
 494         $params['middle_name'] = $names[1];
 495         $params['last_name'] = $names[2];
 496       }
 497     }
 498   }
 499
 500   /**
 501    * @param $string
 502    *
 503    * @return array
 504    */
 505   public static function &makeArray($string) {
 506     $string = trim($string);
 507
 508     $values = explode("\n", $string);
 509     $result = array();
 510     foreach ($values as $value) {
 511       list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
 512       if (!empty($v)) {
 513         $result[trim($n)] = trim($v);
 514       }
 515     }
 516     return $result;
 517   }
 518
 519   /**
 520    * Given an ezComponents-parsed representation of
 521    * a text with alternatives return only the first one
 522    *
 523    * @param string $full
 524    *   All alternatives as a long string (or some other text).
 525    *
 526    * @return string
 527    *   only the first alternative found (or the text without alternatives)
 528    */
 529   public static function stripAlternatives($full) {
 530     $matches = array();
 531     preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
 532
 533     if (isset($matches[1]) &&
 534       trim(strip_tags($matches[1])) != ''
 535     ) {
 536       return $matches[1];
 537     }
 538     else {
 539       return $full;
 540     }
 541   }
 542
 543   /**
 544    * Strip leading, trailing, double spaces from string
 545    * used for postal/greeting/addressee
 546    *
 547    * @param string $string
 548    *   Input string to be cleaned.
 549    *
 550    * @return string
 551    *   the cleaned string
 552    */
 553   public static function stripSpaces($string) {
 554     return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
 555   }
 556
 557   /**
 558    * clean the URL 'path' variable that we use
 559    * to construct CiviCRM urls by removing characters from the path variable
 560    *
 561    * @param string $string
 562    *   The input string to be sanitized.
 563    * @param array $search
 564    *   The characters to be sanitized.
 565    * @param string $replace
 566    *   The character to replace it with.
 567    *
 568    * @return string
 569    *   the sanitized string
 570    */
 571   public static function stripPathChars(
 572     $string,
 573     $search = NULL,
 574     $replace = NULL
 575   ) {
 576     static $_searchChars = NULL;
 577     static $_replaceChar = NULL;
 578
 579     if (empty($string)) {
 580       return $string;
 581     }
 582
 583     if ($_searchChars == NULL) {
 584       $_searchChars = array(
 585         '&',
 586         ';',
 587         ',',
 588         '=',
 589         '$',
 590         '"',
 591         "'",
 592         '\\',
 593         '<',
 594         '>',
 595         '(',
 596         ')',
 597         ' ',
 598         "\r",
 599         "\r\n",
 600         "\n",
 601         "\t",
 602       );
 603       $_replaceChar = '_';
 604     }
 605
 606     if ($search == NULL) {
 607       $search = $_searchChars;
 608     }
 609
 610     if ($replace == NULL) {
 611       $replace = $_replaceChar;
 612     }
 613
 614     return str_replace($search, $replace, $string);
 615   }
 616
 617
 618   /**
 619    * Use HTMLPurifier to clean up a text string and remove any potential
 620    * xss attacks. This is primarily used in public facing pages which
 621    * accept html as the input string
 622    *
 623    * @param string $string
 624    *   The input string.
 625    *
 626    * @return string
 627    *   the cleaned up string
 628    */
 629   public static function purifyHTML($string) {
 630     static $_filter = NULL;
 631     if (!$_filter) {
 632       $config = HTMLPurifier_Config::createDefault();
 633       $config->set('Core.Encoding', 'UTF-8');
 634
 635       // Disable the cache entirely
 636       $config->set('Cache.DefinitionImpl', NULL);
 637
 638       $_filter = new HTMLPurifier($config);
 639     }
 640
 641     return $_filter->purify($string);
 642   }
 643
 644   /**
 645    * Truncate $string; if $string exceeds $maxLen, place "..." at the end
 646    *
 647    * @param string $string
 648    * @param int $maxLen
 649    *
 650    * @return string
 651    */
 652   public static function ellipsify($string, $maxLen) {
 653     $len = strlen($string);
 654     if ($len <= $maxLen) {
 655       return $string;
 656     }
 657     else {
 658       return substr($string, 0, $maxLen - 3) . '...';
 659     }
 660   }
 661
 662   /**
 663    * Generate a random string
 664    *
 665    * @param $len
 666    * @param $alphabet
 667    * @return string
 668    */
 669   public static function createRandom($len, $alphabet) {
 670     $alphabetSize = strlen($alphabet);
 671     $result = '';
 672     for ($i = 0; $i < $len; $i++) {
 673       $result .= $alphabet{rand(1, $alphabetSize) - 1};
 674     }
 675     return $result;
 676   }
 677
 678   /**
 679    * Examples:
 680    * "admin foo" => array(NULL,"admin foo")
 681    * "cms:admin foo" => array("cms", "admin foo")
 682    *
 683    * @param $delim
 684    * @param string $string
 685    *   E.g. "view all contacts". Syntax: "[prefix:]name".
 686    * @param null $defaultPrefix
 687    *
 688    * @return array
 689    *   (0 => string|NULL $prefix, 1 => string $value)
 690    */
 691   public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
 692     $pos = strpos($string, $delim);
 693     if ($pos === FALSE) {
 694       return array($defaultPrefix, $string);
 695     }
 696     else {
 697       return array(substr($string, 0, $pos), substr($string, 1 + $pos));
 698     }
 699   }
 700
 701   /**
 702    * This function will mask part of the the user portion of an Email address (everything before the @)
 703    *
 704    * @param string $email
 705    *   The email address to be masked.
 706    * @param string $maskChar
 707    *   The character used for masking.
 708    * @param int $percent
 709    *   The percentage of the user portion to be masked.
 710    *
 711    * @return string
 712    *   returns the masked Email address
 713    */
 714   public static function maskEmail($email, $maskChar = '*', $percent = 50) {
 715     list($user, $domain) = preg_split("/@/", $email);
 716     $len = strlen($user);
 717     $maskCount = floor($len * $percent / 100);
 718     $offset = floor(($len - $maskCount) / 2);
 719
 720     $masked = substr($user, 0, $offset)
 721       . str_repeat($maskChar, $maskCount)
 722       . substr($user, $maskCount + $offset);
 723
 724     return ($masked . '@' . $domain);
 725   }
 726
 727   /**
 728    * This function compares two strings
 729    *
 730    * @param string $strOne
 731    *   String one.
 732    * @param string $strTwo
 733    *   String two.
 734    * @param bool $case
 735    *   Boolean indicating whether you want the comparison to be case sensitive or not.
 736    *
 737    * @return bool
 738    *   TRUE (string are identical); FALSE (strings are not identical)
 739    */
 740   public static function compareStr($strOne, $strTwo, $case) {
 741     if ($case == TRUE) {
 742       // Convert to lowercase and trim white spaces
 743       if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
 744         // yes - they are identical
 745         return TRUE;
 746       }
 747       else {
 748         // not identical
 749         return FALSE;
 750       }
 751     }
 752     if ($case == FALSE) {
 753       // Trim white spaces
 754       if (trim($strOne) == trim($strTwo)) {
 755         // yes - they are identical
 756         return TRUE;
 757       }
 758       else {
 759         // not identical
 760         return FALSE;
 761       }
 762     }
 763   }
 764
 765   /**
 766    * Many parts of the codebase have a convention of internally passing around
 767    * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
 768    * (because most other odd characters are %-escaped in URLs; and %-escaped
 769    * strings don't need any extra escaping in HTML).
 770    *
 771    * @param string $htmlUrl
 772    *   URL with HTML entities.
 773    * @return string
 774    *   URL without HTML entities
 775    */
 776   public static function unstupifyUrl($htmlUrl) {
 777     return str_replace('&amp;', '&', $htmlUrl);
 778   }
 779
 780 }