CRM/Utils/String.php

   1 <?php
   2 /*
   3  +--------------------------------------------------------------------+
   4  | CiviCRM version 4.6                                                |
   5  +--------------------------------------------------------------------+
   6  | Copyright CiviCRM LLC (c) 2004-2014                                |
   7  +--------------------------------------------------------------------+
   8  | This file is a part of CiviCRM.                                    |
   9  |                                                                    |
  10  | CiviCRM is free software; you can copy, modify, and distribute it  |
  11  | under the terms of the GNU Affero General Public License           |
  12  | Version 3, 19 November 2007 and the CiviCRM Licensing Exception.   |
  13  |                                                                    |
  14  | CiviCRM is distributed in the hope that it will be useful, but     |
  15  | WITHOUT ANY WARRANTY; without even the implied warranty of         |
  16  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.               |
  17  | See the GNU Affero General Public License for more details.        |
  18  |                                                                    |
  19  | You should have received a copy of the GNU Affero General Public   |
  20  | License and the CiviCRM Licensing Exception along                  |
  21  | with this program; if not, contact CiviCRM LLC                     |
  22  | at info[AT]civicrm[DOT]org. If you have questions about the        |
  23  | GNU Affero General Public License or the licensing of CiviCRM,     |
  24  | see the CiviCRM license FAQ at http://civicrm.org/licensing        |
  25  +--------------------------------------------------------------------+
  26  */
  27
  28 /**
  29  *
  30  * @package CRM
  31  * @copyright CiviCRM LLC (c) 2004-2014
  32  * $Id$
  33  */
  34
  35 require_once 'HTML/QuickForm/Rule/Email.php';
  36
  37 /**
  38  * This class contains string functions.
  39  *
  40  */
  41 class CRM_Utils_String {
  42   const COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
  43
  44   /**
  45    * List of all letters and numbers
  46    */
  47   const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
  48
  49   /**
  50    * Convert a display name into a potential variable name.
  51    *
  52    * @param $title title of the string
  53    * @param int $maxLength
  54    *
  55    * @return string
  56    *   An equivalent variable name.
  57    */
  58   public static function titleToVar($title, $maxLength = 31) {
  59     $variable = self::munge($title, '_', $maxLength);
  60
  61     if (CRM_Utils_Rule::title($variable, $maxLength)) {
  62       return $variable;
  63     }
  64
  65     // if longer than the maxLength lets just return a substr of the
  66     // md5 to prevent errors downstream
  67     return substr(md5($title), 0, $maxLength);
  68   }
  69
  70   /**
  71    * Replace all non alpha numeric characters and spaces with the replacement character.
  72    *
  73    * @param string $name
  74    *   The name to be worked on.
  75    * @param string $char
  76    *   The character to use for non-valid chars.
  77    * @param int $len
  78    *   Length of valid variables.
  79    *
  80    * @return string
  81    *   returns the manipulated string
  82    */
  83   public static function munge($name, $char = '_', $len = 63) {
  84     // Replace all white space and non-alpha numeric with $char
  85     // we only use the ascii character set since mysql does not create table names / field names otherwise
  86     // CRM-11744
  87     $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
  88
  89     if ($len) {
  90       // lets keep variable names short
  91       return substr($name, 0, $len);
  92     }
  93     else {
  94       return $name;
  95     }
  96   }
  97
  98   /**
  99    * Convert possibly underscore separated words to camel case with special handling for 'UF'
 100    * e.g membership_payment returns MembershipPayment
 101    *
 102    * @param string $string
 103    *
 104    * @return string
 105    */
 106   public static function convertStringToCamel($string) {
 107     $fragments = explode('_', $string);
 108     foreach ($fragments as & $fragment) {
 109       $fragment = ucfirst($fragment);
 110     }
 111     // Special case: UFGroup, UFJoin, UFMatch, UFField
 112     if ($fragments[0] === 'Uf') {
 113       $fragments[0] = 'UF';
 114     }
 115     return implode('', $fragments);
 116   }
 117
 118   /**
 119    * Takes a variable name and munges it randomly into another variable name.
 120    *
 121    * @param string $name
 122    *   Initial Variable Name.
 123    * @param int $len
 124    *   Length of valid variables.
 125    *
 126    * @return string
 127    *   Randomized Variable Name
 128    */
 129   public static function rename($name, $len = 4) {
 130     $rand = substr(uniqid(), 0, $len);
 131     return substr_replace($name, $rand, -$len, $len);
 132   }
 133
 134   /**
 135    * Takes a string and returns the last tuple of the string.
 136    *
 137    * Useful while converting file names to class names etc
 138    *
 139    * @param string $string
 140    *   The input string.
 141    * @param string $char
 142    *   Character used to demarcate the components
 143    *
 144    * @return string
 145    *   The last component
 146    */
 147   public static function getClassName($string, $char = '_') {
 148     $names = array();
 149     if (!is_array($string)) {
 150       $names = explode($char, $string);
 151     }
 152     if (!empty($names)) {
 153       return array_pop($names);
 154     }
 155   }
 156
 157   /**
 158    * Appends a name to a string and separated by delimiter.
 159    *
 160    * Does the right thing for an empty string
 161    *
 162    * @param string $str
 163    *   The string to be appended to.
 164    * @param string $delim
 165    *   The delimiter to use.
 166    * @param mixed $name
 167    *   The string (or array of strings) to append.
 168    */
 169   public static function append(&$str, $delim, $name) {
 170     if (empty($name)) {
 171       return;
 172     }
 173
 174     if (is_array($name)) {
 175       foreach ($name as $n) {
 176         if (empty($n)) {
 177           continue;
 178         }
 179         if (empty($str)) {
 180           $str = $n;
 181         }
 182         else {
 183           $str .= $delim . $n;
 184         }
 185       }
 186     }
 187     else {
 188       if (empty($str)) {
 189         $str = $name;
 190       }
 191       else {
 192         $str .= $delim . $name;
 193       }
 194     }
 195   }
 196
 197   /**
 198    * Determine if the string is composed only of ascii characters.
 199    *
 200    * @param string $str
 201    *   Input string.
 202    * @param bool $utf8
 203    *   Attempt utf8 match on failure (default yes).
 204    *
 205    * @return bool
 206    *   true if string is ascii
 207    */
 208   public static function isAscii($str, $utf8 = TRUE) {
 209     if (!function_exists('mb_detect_encoding')) {
 210       // eliminate all white space from the string
 211       $str = preg_replace('/\s+/', '', $str);
 212       // FIXME:  This is a pretty brutal hack to make utf8 and 8859-1 work.
 213
 214       /* match low- or high-ascii characters */
 215       if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
 216         // || // low ascii characters
 217         // high ascii characters
 218         //  preg_match( '/[\x7F-\xFF]/', $str ) ) {
 219         if ($utf8) {
 220           /* if we did match, try for utf-8, or iso8859-1 */
 221
 222           return self::isUtf8($str);
 223         }
 224         else {
 225           return FALSE;
 226         }
 227       }
 228       return TRUE;
 229     }
 230     else {
 231       $order = array('ASCII');
 232       if ($utf8) {
 233         $order[] = 'UTF-8';
 234       }
 235       $enc = mb_detect_encoding($str, $order, TRUE);
 236       return ($enc == 'ASCII' || $enc == 'UTF-8');
 237     }
 238   }
 239
 240   /**
 241    * Determine the string replacements for redaction.
 242    * on the basis of the regular expressions
 243    *
 244    * @param string $str
 245    *   Input string.
 246    * @param array $regexRules
 247    *   Regular expression to be matched w/ replacements.
 248    *
 249    * @return array
 250    *   array of strings w/ corresponding redacted outputs
 251    */
 252   public static function regex($str, $regexRules) {
 253     //redact the regular expressions
 254     if (!empty($regexRules) && isset($str)) {
 255       static $matches, $totalMatches, $match = array();
 256       foreach ($regexRules as $pattern => $replacement) {
 257         preg_match_all($pattern, $str, $matches);
 258         if (!empty($matches[0])) {
 259           if (empty($totalMatches)) {
 260             $totalMatches = $matches[0];
 261           }
 262           else {
 263             $totalMatches = array_merge($totalMatches, $matches[0]);
 264           }
 265           $match = array_flip($totalMatches);
 266         }
 267       }
 268     }
 269
 270     if (!empty($match)) {
 271       foreach ($match as $matchKey => & $dontCare) {
 272         foreach ($regexRules as $pattern => $replacement) {
 273           if (preg_match($pattern, $matchKey)) {
 274             $dontCare = $replacement . substr(md5($matchKey), 0, 5);
 275             break;
 276           }
 277         }
 278       }
 279       return $match;
 280     }
 281     return CRM_Core_DAO::$_nullArray;
 282   }
 283
 284   /**
 285    * @param $str
 286    * @param $stringRules
 287    *
 288    * @return mixed
 289    */
 290   public static function redaction($str, $stringRules) {
 291     //redact the strings
 292     if (!empty($stringRules)) {
 293       foreach ($stringRules as $match => $replace) {
 294         $str = str_ireplace($match, $replace, $str);
 295       }
 296     }
 297
 298     //return the redacted output
 299     return $str;
 300   }
 301
 302   /**
 303    * Determine if a string is composed only of utf8 characters
 304    *
 305    * @param string $str
 306    *   Input string.
 307    *
 308    * @return bool
 309    */
 310   public static function isUtf8($str) {
 311     if (!function_exists(mb_detect_encoding)) {
 312       // eliminate all white space from the string
 313       $str = preg_replace('/\s+/', '', $str);
 314
 315       /* pattern stolen from the php.net function documentation for
 316        * utf8decode();
 317        * comment by JF Sebastian, 30-Mar-2005
 318        */
 319
 320       return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
 321       // ||
 322       // iconv('ISO-8859-1', 'UTF-8', $str);
 323     }
 324     else {
 325       $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
 326       return ($enc !== FALSE);
 327     }
 328   }
 329
 330   /**
 331    * Determine if two href's are equivalent (fuzzy match)
 332    *
 333    * @param string $url1
 334    *   The first url to be matched.
 335    * @param string $url2
 336    *   The second url to be matched against.
 337    *
 338    * @return bool
 339    *   true if the urls match, else false
 340    */
 341   public static function match($url1, $url2) {
 342     $url1 = strtolower($url1);
 343     $url2 = strtolower($url2);
 344
 345     $url1Str = parse_url($url1);
 346     $url2Str = parse_url($url2);
 347
 348     if ($url1Str['path'] == $url2Str['path'] &&
 349       self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
 350     ) {
 351       return TRUE;
 352     }
 353     return FALSE;
 354   }
 355
 356   /**
 357    * Extract the civicrm path from the url.
 358    *
 359    * @param string $query
 360    *   A url string.
 361    *
 362    * @return string|null
 363    *   civicrm url (eg: civicrm/contact/search)
 364    */
 365   public static function extractURLVarValue($query) {
 366     $config = CRM_Core_Config::singleton();
 367     $urlVar = $config->userFrameworkURLVar;
 368
 369     $params = explode('&', $query);
 370     foreach ($params as $p) {
 371       if (strpos($p, '=')) {
 372         list($k, $v) = explode('=', $p);
 373         if ($k == $urlVar) {
 374           return $v;
 375         }
 376       }
 377     }
 378     return NULL;
 379   }
 380
 381   /**
 382    * Translate a true/false/yes/no string to a 0 or 1 value
 383    *
 384    * @param string $str
 385    *   The string to be translated.
 386    *
 387    * @return bool
 388    */
 389   public static function strtobool($str) {
 390     if (!is_scalar($str)) {
 391       return FALSE;
 392     }
 393
 394     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 395       return TRUE;
 396     }
 397     return FALSE;
 398   }
 399
 400   /**
 401    * Returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
 402    *
 403    * @param string $str
 404    *   The string to be translated.
 405    *
 406    * @return bool
 407    */
 408   public static function strtoboolstr($str) {
 409     if (!is_scalar($str)) {
 410       return FALSE;
 411     }
 412
 413     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 414       return '1';
 415     }
 416     elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
 417       return '0';
 418     }
 419     else {
 420       return FALSE;
 421     }
 422   }
 423
 424   /**
 425    * Convert a HTML string into a text one using html2text
 426    *
 427    * @param string $html
 428    *   The string to be converted.
 429    *
 430    * @return string
 431    *   the converted string
 432    */
 433   public static function htmlToText($html) {
 434     require_once 'packages/html2text/rcube_html2text.php';
 435     $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
 436     $converter = new rcube_html2text($token_html);
 437     $token_text = $converter->get_text();
 438     $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
 439     return $text;
 440   }
 441
 442   /**
 443    * @param $string
 444    * @param array $params
 445    */
 446   public static function extractName($string, &$params) {
 447     $name = trim($string);
 448     if (empty($name)) {
 449       return;
 450     }
 451
 452     // strip out quotes
 453     $name = str_replace('"', '', $name);
 454     $name = str_replace('\'', '', $name);
 455
 456     // check for comma in name
 457     if (strpos($name, ',') !== FALSE) {
 458
 459       // name has a comma - assume lname, fname [mname]
 460       $names = explode(',', $name);
 461       if (count($names) > 1) {
 462         $params['last_name'] = trim($names[0]);
 463
 464         // check for space delim
 465         $fnames = explode(' ', trim($names[1]));
 466         if (count($fnames) > 1) {
 467           $params['first_name'] = trim($fnames[0]);
 468           $params['middle_name'] = trim($fnames[1]);
 469         }
 470         else {
 471           $params['first_name'] = trim($fnames[0]);
 472         }
 473       }
 474       else {
 475         $params['first_name'] = trim($names[0]);
 476       }
 477     }
 478     else {
 479       // name has no comma - assume fname [mname] fname
 480       $names = explode(' ', $name);
 481       if (count($names) == 1) {
 482         $params['first_name'] = $names[0];
 483       }
 484       elseif (count($names) == 2) {
 485         $params['first_name'] = $names[0];
 486         $params['last_name'] = $names[1];
 487       }
 488       else {
 489         $params['first_name'] = $names[0];
 490         $params['middle_name'] = $names[1];
 491         $params['last_name'] = $names[2];
 492       }
 493     }
 494   }
 495
 496   /**
 497    * @param $string
 498    *
 499    * @return array
 500    */
 501   public static function &makeArray($string) {
 502     $string = trim($string);
 503
 504     $values = explode("\n", $string);
 505     $result = array();
 506     foreach ($values as $value) {
 507       list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
 508       if (!empty($v)) {
 509         $result[trim($n)] = trim($v);
 510       }
 511     }
 512     return $result;
 513   }
 514
 515   /**
 516    * Given an ezComponents-parsed representation of
 517    * a text with alternatives return only the first one
 518    *
 519    * @param string $full
 520    *   All alternatives as a long string (or some other text).
 521    *
 522    * @return string
 523    *   only the first alternative found (or the text without alternatives)
 524    */
 525   public static function stripAlternatives($full) {
 526     $matches = array();
 527     preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
 528
 529     if (isset($matches[1]) &&
 530       trim(strip_tags($matches[1])) != ''
 531     ) {
 532       return $matches[1];
 533     }
 534     else {
 535       return $full;
 536     }
 537   }
 538
 539   /**
 540    * Strip leading, trailing, double spaces from string
 541    * used for postal/greeting/addressee
 542    *
 543    * @param string $string
 544    *   Input string to be cleaned.
 545    *
 546    * @return string
 547    *   the cleaned string
 548    */
 549   public static function stripSpaces($string) {
 550     return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
 551   }
 552
 553   /**
 554    * clean the URL 'path' variable that we use
 555    * to construct CiviCRM urls by removing characters from the path variable
 556    *
 557    * @param string $string
 558    *   The input string to be sanitized.
 559    * @param array $search
 560    *   The characters to be sanitized.
 561    * @param string $replace
 562    *   The character to replace it with.
 563    *
 564    * @return string
 565    *   the sanitized string
 566    */
 567   public static function stripPathChars(
 568     $string,
 569     $search = NULL,
 570     $replace = NULL
 571   ) {
 572     static $_searchChars = NULL;
 573     static $_replaceChar = NULL;
 574
 575     if (empty($string)) {
 576       return $string;
 577     }
 578
 579     if ($_searchChars == NULL) {
 580       $_searchChars = array(
 581         '&',
 582         ';',
 583         ',',
 584         '=',
 585         '$',
 586         '"',
 587         "'",
 588         '\\',
 589         '<',
 590         '>',
 591         '(',
 592         ')',
 593         ' ',
 594         "\r",
 595         "\r\n",
 596         "\n",
 597         "\t",
 598       );
 599       $_replaceChar = '_';
 600     }
 601
 602     if ($search == NULL) {
 603       $search = $_searchChars;
 604     }
 605
 606     if ($replace == NULL) {
 607       $replace = $_replaceChar;
 608     }
 609
 610     return str_replace($search, $replace, $string);
 611   }
 612
 613
 614   /**
 615    * Use HTMLPurifier to clean up a text string and remove any potential
 616    * xss attacks. This is primarily used in public facing pages which
 617    * accept html as the input string
 618    *
 619    * @param string $string
 620    *   The input string.
 621    *
 622    * @return string
 623    *   the cleaned up string
 624    */
 625   public static function purifyHTML($string) {
 626     static $_filter = NULL;
 627     if (!$_filter) {
 628       $config = HTMLPurifier_Config::createDefault();
 629       $config->set('Core.Encoding', 'UTF-8');
 630
 631       // Disable the cache entirely
 632       $config->set('Cache.DefinitionImpl', NULL);
 633
 634       $_filter = new HTMLPurifier($config);
 635     }
 636
 637     return $_filter->purify($string);
 638   }
 639
 640   /**
 641    * Truncate $string; if $string exceeds $maxLen, place "..." at the end
 642    *
 643    * @param string $string
 644    * @param int $maxLen
 645    *
 646    * @return string
 647    */
 648   public static function ellipsify($string, $maxLen) {
 649     $len = strlen($string);
 650     if ($len <= $maxLen) {
 651       return $string;
 652     }
 653     else {
 654       return substr($string, 0, $maxLen - 3) . '...';
 655     }
 656   }
 657
 658   /**
 659    * Generate a random string.
 660    *
 661    * @param $len
 662    * @param $alphabet
 663    * @return string
 664    */
 665   public static function createRandom($len, $alphabet) {
 666     $alphabetSize = strlen($alphabet);
 667     $result = '';
 668     for ($i = 0; $i < $len; $i++) {
 669       $result .= $alphabet{rand(1, $alphabetSize) - 1};
 670     }
 671     return $result;
 672   }
 673
 674   /**
 675    * Examples:
 676    * "admin foo" => array(NULL,"admin foo")
 677    * "cms:admin foo" => array("cms", "admin foo")
 678    *
 679    * @param $delim
 680    * @param string $string
 681    *   E.g. "view all contacts". Syntax: "[prefix:]name".
 682    * @param null $defaultPrefix
 683    *
 684    * @return array
 685    *   (0 => string|NULL $prefix, 1 => string $value)
 686    */
 687   public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
 688     $pos = strpos($string, $delim);
 689     if ($pos === FALSE) {
 690       return array($defaultPrefix, $string);
 691     }
 692     else {
 693       return array(substr($string, 0, $pos), substr($string, 1 + $pos));
 694     }
 695   }
 696
 697   /**
 698    * This function will mask part of the the user portion of an Email address (everything before the @)
 699    *
 700    * @param string $email
 701    *   The email address to be masked.
 702    * @param string $maskChar
 703    *   The character used for masking.
 704    * @param int $percent
 705    *   The percentage of the user portion to be masked.
 706    *
 707    * @return string
 708    *   returns the masked Email address
 709    */
 710   public static function maskEmail($email, $maskChar = '*', $percent = 50) {
 711     list($user, $domain) = preg_split("/@/", $email);
 712     $len = strlen($user);
 713     $maskCount = floor($len * $percent / 100);
 714     $offset = floor(($len - $maskCount) / 2);
 715
 716     $masked = substr($user, 0, $offset)
 717       . str_repeat($maskChar, $maskCount)
 718       . substr($user, $maskCount + $offset);
 719
 720     return ($masked . '@' . $domain);
 721   }
 722
 723   /**
 724    * This function compares two strings.
 725    *
 726    * @param string $strOne
 727    *   String one.
 728    * @param string $strTwo
 729    *   String two.
 730    * @param bool $case
 731    *   Boolean indicating whether you want the comparison to be case sensitive or not.
 732    *
 733    * @return bool
 734    *   TRUE (string are identical); FALSE (strings are not identical)
 735    */
 736   public static function compareStr($strOne, $strTwo, $case) {
 737     if ($case == TRUE) {
 738       // Convert to lowercase and trim white spaces
 739       if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
 740         // yes - they are identical
 741         return TRUE;
 742       }
 743       else {
 744         // not identical
 745         return FALSE;
 746       }
 747     }
 748     if ($case == FALSE) {
 749       // Trim white spaces
 750       if (trim($strOne) == trim($strTwo)) {
 751         // yes - they are identical
 752         return TRUE;
 753       }
 754       else {
 755         // not identical
 756         return FALSE;
 757       }
 758     }
 759   }
 760
 761   /**
 762    * Many parts of the codebase have a convention of internally passing around
 763    * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
 764    * (because most other odd characters are %-escaped in URLs; and %-escaped
 765    * strings don't need any extra escaping in HTML).
 766    *
 767    * @param string $htmlUrl
 768    *   URL with HTML entities.
 769    * @return string
 770    *   URL without HTML entities
 771    */
 772   public static function unstupifyUrl($htmlUrl) {
 773     return str_replace('&amp;', '&', $htmlUrl);
 774   }
 775
 776   /**
 777    * Formats a string of attributes for insertion in an html tag.
 778    *
 779    * @param array $attributes
 780    *
 781    * @return string
 782    */
 783   public static function htmlAttributes($attributes) {
 784     $output = '';
 785     foreach ($attributes as $name => $vals) {
 786       $output .= " $name=\"" . htmlspecialchars(implode(' ', (array) $vals)) . '"';
 787     }
 788     return ltrim($output);
 789   }
 790
 791 }