CRM/Utils/String.php

   1 <?php
   2 /*
   3  +--------------------------------------------------------------------+
   4  | CiviCRM version 4.5                                                |
   5  +--------------------------------------------------------------------+
   6  | Copyright CiviCRM LLC (c) 2004-2014                                |
   7  +--------------------------------------------------------------------+
   8  | This file is a part of CiviCRM.                                    |
   9  |                                                                    |
  10  | CiviCRM is free software; you can copy, modify, and distribute it  |
  11  | under the terms of the GNU Affero General Public License           |
  12  | Version 3, 19 November 2007 and the CiviCRM Licensing Exception.   |
  13  |                                                                    |
  14  | CiviCRM is distributed in the hope that it will be useful, but     |
  15  | WITHOUT ANY WARRANTY; without even the implied warranty of         |
  16  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.               |
  17  | See the GNU Affero General Public License for more details.        |
  18  |                                                                    |
  19  | You should have received a copy of the GNU Affero General Public   |
  20  | License and the CiviCRM Licensing Exception along                  |
  21  | with this program; if not, contact CiviCRM LLC                     |
  22  | at info[AT]civicrm[DOT]org. If you have questions about the        |
  23  | GNU Affero General Public License or the licensing of CiviCRM,     |
  24  | see the CiviCRM license FAQ at http://civicrm.org/licensing        |
  25  +--------------------------------------------------------------------+
  26 */
  27
  28 /**
  29  *
  30  * @package CRM
  31  * @copyright CiviCRM LLC (c) 2004-2014
  32  * $Id$
  33  *
  34  */
  35
  36 require_once 'HTML/QuickForm/Rule/Email.php';
  37
  38 /**
  39  * This class contains string functions
  40  *
  41  */
  42 class CRM_Utils_String {
  43   CONST COMMA = ",", SEMICOLON = ";", SPACE = " ", TAB = "\t", LINEFEED = "\n", CARRIAGELINE = "\r\n", LINECARRIAGE = "\n\r", CARRIAGERETURN = "\r";
  44
  45   /**
  46    * List of all letters and numbers
  47    */
  48   const ALPHANUMERIC = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890';
  49
  50   /**
  51    * Convert a display name into a potential variable
  52    * name that we could use in forms/code
  53    *
  54    * @param  name    Name of the string
  55    *
  56    * @param int $maxLength
  57    *
  58    * @return string  An equivalent variable name
  59    *
  60    * @access public
  61    *
  62    * @return string (or null)
  63    * @static
  64    */
  65   static function titleToVar($title, $maxLength = 31) {
  66     $variable = self::munge($title, '_', $maxLength);
  67
  68     if (CRM_Utils_Rule::title($variable, $maxLength)) {
  69       return $variable;
  70     }
  71
  72     // if longer than the maxLength lets just return a substr of the
  73     // md5 to prevent errors downstream
  74     return substr(md5($title), 0, $maxLength);
  75   }
  76
  77   /**
  78    * given a string, replace all non alpha numeric characters and
  79    * spaces with the replacement character
  80    *
  81    * @param string $name the name to be worked on
  82    * @param string $char the character to use for non-valid chars
  83    * @param int    $len  length of valid variables
  84    *
  85    * @access public
  86    *
  87    * @return string returns the manipulated string
  88    * @static
  89    */
  90   static function munge($name, $char = '_', $len = 63) {
  91     // replace all white space and non-alpha numeric with $char
  92     // we only use the ascii character set since mysql does not create table names / field names otherwise
  93     // CRM-11744
  94     $name = preg_replace('/[^a-zA-Z0-9]+/', $char, trim($name));
  95
  96     if ($len) {
  97       // lets keep variable names short
  98       return substr($name, 0, $len);
  99     }
 100     else {
 101       return $name;
 102     }
 103   }
 104
 105   /**
 106    *
 107    * Takes a variable name and munges it randomly into another variable name
 108    *
 109    * @param  string $name    Initial Variable Name
 110    * @param int     $len  length of valid variables
 111    *
 112    * @return string  Randomized Variable Name
 113    * @access public
 114    * @static
 115    */
 116   static function rename($name, $len = 4) {
 117     $rand = substr(uniqid(), 0, $len);
 118     return substr_replace($name, $rand, -$len, $len);
 119   }
 120
 121   /**
 122    * takes a string and returns the last tuple of the string.
 123    * useful while converting file names to class names etc
 124    *
 125    * @param string $string the input string
 126    * @param \char|string $char $char   the character used to demarcate the componets
 127    *
 128    * @access public
 129    *
 130    * @return string the last component
 131    * @static
 132    */
 133   static function getClassName($string, $char = '_') {
 134     $names = array();
 135     if (!is_array($string)) {
 136       $names = explode($char, $string);
 137     }
 138     if (!empty($names)) {
 139       return array_pop($names);
 140     }
 141   }
 142
 143   /**
 144    * appends a name to a string and seperated by delimiter.
 145    * does the right thing for an empty string
 146    *
 147    * @param string $str   the string to be appended to
 148    * @param string $delim the delimiter to use
 149    * @param mixed  $name  the string (or array of strings) to append
 150    *
 151    * @return void
 152    * @access public
 153    * @static
 154    */
 155   static function append(&$str, $delim, $name) {
 156     if (empty($name)) {
 157       return;
 158     }
 159
 160     if (is_array($name)) {
 161       foreach ($name as $n) {
 162         if (empty($n)) {
 163           continue;
 164         }
 165         if (empty($str)) {
 166           $str = $n;
 167         }
 168         else {
 169           $str .= $delim . $n;
 170         }
 171       }
 172     }
 173     else {
 174       if (empty($str)) {
 175         $str = $name;
 176       }
 177       else {
 178         $str .= $delim . $name;
 179       }
 180     }
 181   }
 182
 183   /**
 184    * determine if the string is composed only of ascii characters
 185    *
 186    * @param string  $str input string
 187    * @param boolean $utf8 attempt utf8 match on failure (default yes)
 188    *
 189    * @return boolean    true if string is ascii
 190    * @access public
 191    * @static
 192    */
 193   static function isAscii($str, $utf8 = TRUE) {
 194     if (!function_exists('mb_detect_encoding')) {
 195       // eliminate all white space from the string
 196       $str = preg_replace('/\s+/', '', $str);
 197       // FIXME:  This is a pretty brutal hack to make utf8 and 8859-1 work.
 198
 199       /* match low- or high-ascii characters */
 200       if (preg_match('/[\x00-\x20]|[\x7F-\xFF]/', $str)) {
 201         // || // low ascii characters
 202         // high ascii characters
 203         //  preg_match( '/[\x7F-\xFF]/', $str ) ) {
 204         if ($utf8) {
 205           /* if we did match, try for utf-8, or iso8859-1 */
 206
 207           return self::isUtf8($str);
 208         }
 209         else {
 210           return FALSE;
 211         }
 212       }
 213       return TRUE;
 214     }
 215     else {
 216       $order = array('ASCII');
 217       if ($utf8) {
 218         $order[] = 'UTF-8';
 219       }
 220       $enc = mb_detect_encoding($str, $order, TRUE);
 221       return ($enc == 'ASCII' || $enc == 'UTF-8');
 222     }
 223   }
 224
 225   /**
 226    * determine the string replacements for redaction
 227    * on the basis of the regular expressions
 228    *
 229    * @param string $str        input string
 230    * @param array  $regexRules regular expression to be matched w/ replacements
 231    *
 232    * @return array $match      array of strings w/ corresponding redacted outputs
 233    * @access public
 234    * @static
 235    */
 236   static function regex($str, $regexRules) {
 237     //redact the regular expressions
 238     if (!empty($regexRules) && isset($str)) {
 239       static $matches, $totalMatches, $match = array();
 240       foreach ($regexRules as $pattern => $replacement) {
 241         preg_match_all($pattern, $str, $matches);
 242         if (!empty($matches[0])) {
 243           if (empty($totalMatches)) {
 244             $totalMatches = $matches[0];
 245           }
 246           else {
 247             $totalMatches = array_merge($totalMatches, $matches[0]);
 248           }
 249           $match = array_flip($totalMatches);
 250         }
 251       }
 252     }
 253
 254     if (!empty($match)) {
 255       foreach ($match as $matchKey => & $dontCare) {
 256         foreach ($regexRules as $pattern => $replacement) {
 257           if (preg_match($pattern, $matchKey)) {
 258             $dontCare = $replacement . substr(md5($matchKey), 0, 5);
 259             break;
 260           }
 261         }
 262       }
 263       return $match;
 264     }
 265     return CRM_Core_DAO::$_nullArray;
 266   }
 267
 268   /**
 269    * @param $str
 270    * @param $stringRules
 271    *
 272    * @return mixed
 273    */
 274   static function redaction($str, $stringRules) {
 275     //redact the strings
 276     if (!empty($stringRules)) {
 277       foreach ($stringRules as $match => $replace) {
 278         $str = str_ireplace($match, $replace, $str);
 279       }
 280     }
 281
 282     //return the redacted output
 283     return $str;
 284   }
 285
 286   /**
 287    * Determine if a string is composed only of utf8 characters
 288    *
 289    * @param string $str  input string
 290    * @access public
 291    * @static
 292    *
 293    * @return boolean
 294    */
 295   static function isUtf8($str) {
 296     if (!function_exists(mb_detect_encoding)) {
 297       // eliminate all white space from the string
 298       $str = preg_replace('/\s+/', '', $str);
 299
 300       /* pattern stolen from the php.net function documentation for
 301              * utf8decode();
 302              * comment by JF Sebastian, 30-Mar-2005
 303              */
 304
 305       return preg_match('/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xec][\x80-\xbf]{2}|\xed[\x80-\x9f][\x80-\xbf]|[\xee-\xef][\x80-\xbf]{2}|f0[\x90-\xbf][\x80-\xbf]{2}|[\xf1-\xf3][\x80-\xbf]{3}|\xf4[\x80-\x8f][\x80-\xbf]{2})*$/', $str);
 306       // ||
 307       // iconv('ISO-8859-1', 'UTF-8', $str);
 308     }
 309     else {
 310       $enc = mb_detect_encoding($str, array('UTF-8'), TRUE);
 311       return ($enc !== FALSE);
 312     }
 313   }
 314
 315   /**
 316    * determine if two href's are equivalent (fuzzy match)
 317    *
 318    * @param string $url1 the first url to be matched
 319    * @param string $url2 the second url to be matched against
 320    *
 321    * @return boolean true if the urls match, else false
 322    * @access public
 323    * @static
 324    */
 325   static function match($url1, $url2) {
 326     $url1 = strtolower($url1);
 327     $url2 = strtolower($url2);
 328
 329     $url1Str = parse_url($url1);
 330     $url2Str = parse_url($url2);
 331
 332     if ($url1Str['path'] == $url2Str['path'] &&
 333       self::extractURLVarValue(CRM_Utils_Array::value('query', $url1Str)) == self::extractURLVarValue(CRM_Utils_Array::value('query', $url2Str))
 334     ) {
 335       return TRUE;
 336     }
 337     return FALSE;
 338   }
 339
 340   /**
 341    * Function to extract variable values
 342    *
 343    * @param  mix $query this is basically url
 344    *
 345    * @return mix $v  returns civicrm url (eg: civicrm/contact/search/...)
 346    * @access public
 347    * @static
 348    */
 349   static function extractURLVarValue($query) {
 350     $config = CRM_Core_Config::singleton();
 351     $urlVar = $config->userFrameworkURLVar;
 352
 353     $params = explode('&', $query);
 354     foreach ($params as $p) {
 355       if (strpos($p, '=')) {
 356         list($k, $v) = explode('=', $p);
 357         if ($k == $urlVar) {
 358           return $v;
 359         }
 360       }
 361     }
 362     return NULL;
 363   }
 364
 365   /**
 366    * translate a true/false/yes/no string to a 0 or 1 value
 367    *
 368    * @param string $str  the string to be translated
 369    *
 370    * @return boolean
 371    * @access public
 372    * @static
 373    */
 374   static function strtobool($str) {
 375     if (!is_scalar($str)) {
 376       return FALSE;
 377     }
 378
 379     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 380       return TRUE;
 381     }
 382     return FALSE;
 383   }
 384
 385   /**
 386    * returns string '1' for a true/yes/1 string, and '0' for no/false/0 else returns false
 387    *
 388    * @param string $str  the string to be translated
 389    *
 390    * @return boolean
 391    * @access public
 392    * @static
 393    */
 394   static function strtoboolstr($str) {
 395     if (!is_scalar($str)) {
 396       return FALSE;
 397     }
 398
 399     if (preg_match('/^(y(es)?|t(rue)?|1)$/i', $str)) {
 400       return '1';
 401     }
 402     elseif (preg_match('/^(n(o)?|f(alse)?|0)$/i', $str)) {
 403       return '0';
 404     }
 405     else {
 406       return FALSE;
 407     }
 408   }
 409
 410   /**
 411    * Convert a HTML string into a text one using html2text
 412    *
 413    * @param string $html  the string to be converted
 414    *
 415    * @return string       the converted string
 416    * @access public
 417    * @static
 418    */
 419   static function htmlToText($html) {
 420     require_once 'packages/html2text/rcube_html2text.php';
 421     $token_html = preg_replace('!\{([a-z_.]+)\}!i', 'token:{$1}', $html);
 422     $converter = new rcube_html2text($token_html);
 423     $token_text = $converter->get_text();
 424     $text = preg_replace('!token\:\{([a-z_.]+)\}!i', '{$1}', $token_text);
 425     return $text;
 426   }
 427
 428   /**
 429    * @param $string
 430    * @param $params
 431    */
 432   static function extractName($string, &$params) {
 433     $name = trim($string);
 434     if (empty($name)) {
 435       return;
 436     }
 437
 438     // strip out quotes
 439     $name = str_replace('"', '', $name);
 440     $name = str_replace('\'', '', $name);
 441
 442     // check for comma in name
 443     if (strpos($name, ',') !== FALSE) {
 444
 445       // name has a comma - assume lname, fname [mname]
 446       $names = explode(',', $name);
 447       if (count($names) > 1) {
 448         $params['last_name'] = trim($names[0]);
 449
 450         // check for space delim
 451         $fnames = explode(' ', trim($names[1]));
 452         if (count($fnames) > 1) {
 453           $params['first_name'] = trim($fnames[0]);
 454           $params['middle_name'] = trim($fnames[1]);
 455         }
 456         else {
 457           $params['first_name'] = trim($fnames[0]);
 458         }
 459       }
 460       else {
 461         $params['first_name'] = trim($names[0]);
 462       }
 463     }
 464     else {
 465       // name has no comma - assume fname [mname] fname
 466       $names = explode(' ', $name);
 467       if (count($names) == 1) {
 468         $params['first_name'] = $names[0];
 469       }
 470       elseif (count($names) == 2) {
 471         $params['first_name'] = $names[0];
 472         $params['last_name'] = $names[1];
 473       }
 474       else {
 475         $params['first_name'] = $names[0];
 476         $params['middle_name'] = $names[1];
 477         $params['last_name'] = $names[2];
 478       }
 479     }
 480   }
 481
 482   /**
 483    * @param $string
 484    *
 485    * @return array
 486    */
 487   static function &makeArray($string) {
 488     $string = trim($string);
 489
 490     $values = explode("\n", $string);
 491     $result = array();
 492     foreach ($values as $value) {
 493       list($n, $v) = CRM_Utils_System::explode('=', $value, 2);
 494       if (!empty($v)) {
 495         $result[trim($n)] = trim($v);
 496       }
 497     }
 498     return $result;
 499   }
 500
 501   /**
 502    * Given an ezComponents-parsed representation of
 503    * a text with alternatives return only the first one
 504    *
 505    * @param string $full  all alternatives as a long string (or some other text)
 506    *
 507    * @return string       only the first alternative found (or the text without alternatives)
 508    */
 509   static function stripAlternatives($full) {
 510     $matches = array();
 511     preg_match('/-ALTERNATIVE ITEM 0-(.*?)-ALTERNATIVE ITEM 1-.*-ALTERNATIVE END-/s', $full, $matches);
 512
 513     if (isset($matches[1]) &&
 514       trim(strip_tags($matches[1])) != ''
 515     ) {
 516       return $matches[1];
 517     }
 518     else {
 519       return $full;
 520     }
 521   }
 522
 523   /**
 524    * strip leading, trailing, double spaces from string
 525    * used for postal/greeting/addressee
 526    *
 527    * @param string  $string input string to be cleaned
 528    *
 529    * @return string the cleaned string
 530    * @access public
 531    * @static
 532    */
 533   static function stripSpaces($string) {
 534     return (empty($string)) ? $string : preg_replace("/\s{2,}/", " ", trim($string));
 535   }
 536
 537   /**
 538    * This function is used to clean the URL 'path' variable that we use
 539    * to construct CiviCRM urls by removing characters from the path variable
 540    *
 541    * @param string $string  the input string to be sanitized
 542    * @param array  $search  the characters to be sanitized
 543    * @param string $replace the character to replace it with
 544    *
 545    * @return string the sanitized string
 546    * @access public
 547    * @static
 548    */
 549   static function stripPathChars($string,
 550     $search = NULL,
 551     $replace = NULL
 552   ) {
 553     static $_searchChars = NULL;
 554     static $_replaceChar = NULL;
 555
 556     if (empty($string)) {
 557       return $string;
 558     }
 559
 560     if ($_searchChars == NULL) {
 561       $_searchChars = array(
 562         '&', ';', ',', '=', '$',
 563         '"', "'", '\\',
 564         '<', '>', '(', ')',
 565         ' ', "\r", "\r\n", "\n", "\t",
 566       );
 567       $_replaceChar = '_';
 568     }
 569
 570
 571     if ($search == NULL) {
 572       $search = $_searchChars;
 573     }
 574
 575     if ($replace == NULL) {
 576       $replace = $_replaceChar;
 577     }
 578
 579     return str_replace($search, $replace, $string);
 580   }
 581
 582
 583   /**
 584    * Use HTMLPurifier to clean up a text string and remove any potential
 585    * xss attacks. This is primarily used in public facing pages which
 586    * accept html as the input string
 587    *
 588    * @param string $string the input string
 589    *
 590    * @return string the cleaned up string
 591    * @public
 592    * @static
 593    */
 594   static function purifyHTML($string) {
 595     static $_filter = null;
 596     if (!$_filter) {
 597       $config = HTMLPurifier_Config::createDefault();
 598       $config->set('Core.Encoding', 'UTF-8');
 599
 600       // Disable the cache entirely
 601       $config->set('Cache.DefinitionImpl', null);
 602
 603       $_filter = new HTMLPurifier($config);
 604     }
 605
 606     return $_filter->purify($string);
 607   }
 608
 609   /**
 610    * Truncate $string; if $string exceeds $maxLen, place "..." at the end
 611    *
 612    * @param string $string
 613    * @param int $maxLen
 614    *
 615    * @return string
 616    */
 617   static function ellipsify($string, $maxLen) {
 618     $len = strlen($string);
 619     if ($len <= $maxLen) {
 620       return $string;
 621     }
 622     else {
 623       return substr($string, 0, $maxLen-3) . '...';
 624     }
 625   }
 626
 627   /**
 628    * Generate a random string
 629    *
 630    * @param $len
 631    * @param $alphabet
 632    * @return string
 633    */
 634   public static function createRandom($len, $alphabet) {
 635     $alphabetSize = strlen($alphabet);
 636     $result = '';
 637     for ($i = 0; $i < $len; $i++) {
 638       $result .= $alphabet{rand(1, $alphabetSize) - 1};
 639     }
 640     return $result;
 641   }
 642
 643   /**
 644    * Examples:
 645    * "admin foo" => array(NULL,"admin foo")
 646    * "cms:admin foo" => array("cms", "admin foo")
 647    *
 648    * @param $delim
 649    * @param string $string e.g. "view all contacts". Syntax: "[prefix:]name"
 650    * @param null $defaultPrefix
 651    *
 652    * @return array (0 => string|NULL $prefix, 1 => string $value)
 653    */
 654   public static function parsePrefix($delim, $string, $defaultPrefix = NULL) {
 655     $pos = strpos($string, $delim);
 656     if ($pos === FALSE) {
 657       return array($defaultPrefix, $string);
 658     }
 659     else {
 660       return array(substr($string, 0, $pos), substr($string, 1+$pos));
 661     }
 662   }
 663
 664   /**
 665    * this function will mask part of the the user portion of an Email address (everything before the @)
 666    *
 667    * @param string $email the email address to be masked
 668    * @param string $maskChar the character used for masking
 669    * @param integer $percent the percentage of the user portion to be masked
 670    *
 671    * @return string returns the masked Email address
 672    */
 673   public static function maskEmail($email, $maskChar= '*', $percent=50) {
 674     list($user, $domain) = preg_split("/@/", $email);
 675     $len = strlen($user);
 676     $maskCount = floor($len * $percent /100);
 677     $offset = floor(($len - $maskCount) / 2);
 678
 679     $masked = substr($user, 0, $offset)
 680       .str_repeat($maskChar, $maskCount)
 681       .substr($user, $maskCount + $offset);
 682
 683     return($masked.'@'.$domain);
 684   }
 685
 686   /**
 687    * this function compares two strings
 688    *
 689    * @param string $strOne string one
 690    * @param string $strTwo string two
 691    * @param boolean $case boolean indicating whether you want the comparison to be case sensitive or not
 692    *
 693    * @return boolean TRUE (string are identical); FALSE (strings are not identical)
 694    */
 695   public static function compareStr($strOne, $strTwo, $case) {
 696     if ($case == TRUE) {
 697       // Convert to lowercase and trim white spaces
 698       if (strtolower(trim($strOne)) == strtolower(trim($strTwo))) {
 699         // yes - they are identical
 700         return TRUE;
 701       }
 702       else {
 703         // not identical
 704         return FALSE;
 705       }
 706     }
 707     if ($case == FALSE) {
 708       // Trim white spaces
 709       if (trim($strOne) == trim($strTwo)) {
 710         // yes - they are identical
 711         return TRUE;
 712       }
 713       else {
 714         // not identical
 715         return FALSE;
 716       }
 717     }
 718   }
 719
 720   /**
 721    * Many parts of the codebase have a convention of internally passing around
 722    * HTML-encoded URLs. This effectively means that "&" is replaced by "&amp;"
 723    * (because most other odd characters are %-escaped in URLs; and %-escaped
 724    * strings don't need any extra escaping in HTML).
 725    *
 726    * @param string $url URL with HTML entities
 727    * @return string URL without HTML entities
 728    */
 729   public static function unstupifyUrl($htmlUrl) {
 730     return str_replace('&amp;', '&', $htmlUrl);
 731   }
 732 }
 733