using mbstring functions for body wrapping, if they are available.

[squirrelmail.git] / functions / strings.php
diff --git a/functions/strings.php b/functions/strings.php

index 792c0d0fb46f6af3a2f230fcd4216107a358c62a..84a70af4025bb2b7bed8385b016d52f2d9de62ca 100644 (file)
--- a/functions/strings.php
+++ b/functions/strings.php
@@ -3,11 +3,11 @@
  /**
   * strings.php
   *
- * Copyright (c) 1999-2004 The SquirrelMail Project Team
+ * Copyright (c) 1999-2005 The SquirrelMail Project Team
   * Licensed under the GNU GPL. For full terms see the file COPYING.
   *
   * This code provides various string manipulation functions that are
- * used by the rest of the Squirrelmail code.
+ * used by the rest of the SquirrelMail code.
   *
   * @version $Id$
   * @package squirrelmail
@@ -77,8 +77,6 @@ function sqMakeNewLine (&$str, $citeLevel, &$column) {
  /**
   * Checks for spaces in strings - only used if PHP doesn't have native ctype support
   *
- * @author Tomas Kuliavas
- *
   * You might be able to rewrite the function by adding short evaluation form.
   *
   * possible problems:
@@ -96,11 +94,11 @@ function sqMakeNewLine (&$str, $citeLevel, &$column) {
   * @return bool true when only whitespace symbols are present in test string
   */
  function sm_ctype_space($string) {
-  if ( preg_match('/^[\x09-\x0D]|^\x20/', $string) || $string=='') {
-    return true;
-  } else {
-    return false;
-  }
+    if ( preg_match('/^[\x09-\x0D]|^\x20/', $string) || $string=='') {
+        return true;
+    } else {
+        return false;
+    }
  }
  
  /**
@@ -127,7 +125,7 @@ function &sqBodyWrap (&$body, $wrap) {
      $outString = '';
      // current column since the last newline in the outstring
      $outStringCol = 0;
-    $length = strlen($body);
+    $length = sq_strlen($body);
      // where we are in the original string
      $pos = 0;
      // the number of >>> citation markers we are currently at
@@ -139,12 +137,12 @@ function &sqBodyWrap (&$body, $wrap) {
         // we're at the beginning of a line, get the new cite level
         $newCiteLevel = 0;
  
-       while (($pos < $length) && ($body{$pos} == '>')) {
+       while (($pos < $length) && (sq_substr($body,$pos,1) == '>')) {
             $newCiteLevel++;
             $pos++;
  
             // skip over any spaces interleaved among the cite markers
-           while (($pos < $length) && ($body{$pos} == ' ')) {
+           while (($pos < $length) && (sq_substr($body,$pos,1) == ' ')) {
  
                 $pos++;
  
@@ -157,8 +155,8 @@ function &sqBodyWrap (&$body, $wrap) {
         // special case: if this is a blank line then maintain it
         // (i.e. try to preserve original paragraph breaks)
         // unless they occur at the very beginning of the text
-       if (($body{$pos} == "\n" ) && (strlen($outString) != 0)) {
-           $outStringLast = $outString{strlen($outString) - 1};
+       if ((sq_substr($body,$pos,1) == "\n" ) && (sq_strlen($outString) != 0)) {
+           $outStringLast = $outString{sq_strlen($outString) - 1};
             if ($outStringLast != "\n") {
                 $outString .= "\n";
             }
@@ -192,7 +190,7 @@ function &sqBodyWrap (&$body, $wrap) {
         }
  
         // find the next newline -- we don't want to go further than that
-       $nextNewline = strpos ($body, "\n", $pos);
+       $nextNewline = sq_strpos ($body, "\n", $pos);
         if ($nextNewline === FALSE) {
             $nextNewline = $length;
         }
@@ -201,7 +199,7 @@ function &sqBodyWrap (&$body, $wrap) {
         // will work fine for this.  Maybe revisit this later though
         // (for completeness more than anything else, I think)
         if ($citeLevel == 0) {
-           $outString .= substr ($body, $pos, ($nextNewline - $pos));
+           $outString .= sq_substr ($body, $pos, ($nextNewline - $pos));
             $outStringCol = $nextNewline - $pos;
             if ($nextNewline != $length) {
                 sqMakeNewLine ($outString, 0, $outStringCol);
@@ -217,7 +215,7 @@ function &sqBodyWrap (&$body, $wrap) {
         // the next newline
         while ($pos < $nextNewline) {
             // skip over initial spaces
-           while (($pos < $nextNewline) && (ctype_space ($body{$pos}))) {
+           while (($pos < $nextNewline) && (ctype_space (sq_substr($body,$pos,1)))) {
                 $pos++;
             }
             // if this is a short line then just append it and continue outer loop
@@ -225,25 +223,24 @@ function &sqBodyWrap (&$body, $wrap) {
                 // if this is the final line in the input string then include
                 // any trailing newlines
                 //      echo substr($body,$pos,$wrap). "<br />";
-               if (($nextNewline + 1 == $length) && ($body{$nextNewline} == "\n")) {
+               if (($nextNewline + 1 == $length) && (sq_substr($body,$nextNewline,1) == "\n")) {
                     $nextNewline++;
                 }
  
                 // trim trailing spaces
                 $lastRealChar = $nextNewline;
-               while (($lastRealChar > $pos && $lastRealChar < $length) && (ctype_space ($body{$lastRealChar}))) {
+               while (($lastRealChar > $pos && $lastRealChar < $length) && (ctype_space (sq_substr($body,$lastRealChar,1)))) {
                     $lastRealChar--;
                 }
                 // decide if appending the short string is what we want
-               if (($nextNewline < $length && $body{$nextNewline} == "\n") &&
+               if (($nextNewline < $length && sq_substr($body,$nextNewline,1) == "\n") &&
                       isset($lastRealChar)) {
                     $mypos = $pos;
                     //check the first word:
-                   while (($mypos < $length) && ($body{$mypos} == '>')) {
+                   while (($mypos < $length) && (sq_substr($body,$mypos,1) == '>')) {
                         $mypos++;
                         // skip over any spaces interleaved among the cite markers
-                       $oldpos = $mypos;
-                       while (($mypos < $length) && ($body{$mypos} == ' ')) {
+                       while (($mypos < $length) && (sq_substr($body,$mypos,1) == ' ')) {
                             $mypos++;
                         }
                     }
@@ -256,15 +253,15 @@ function &sqBodyWrap (&$body, $wrap) {
                       }
  */
  
-                   $firstword = substr($body,$mypos,strpos($body,' ',$mypos) - $mypos);
+                   $firstword = sq_substr($body,$mypos,sq_strpos($body,' ',$mypos) - $mypos);
                     //if ($dowrap || $ldnspacecnt > 1 || ($firstword && (
                     if (!$smartwrap || $firstword && (
                                          $firstword{0} == '-' ||
                                          $firstword{0} == '+' ||
                                          $firstword{0} == '*' ||
-                                        $firstword{0} == strtoupper($firstword{0}) ||
+                                        sq_substr($firstword,0,1) == sq_strtoupper(sq_substr($firstword,0,1)) ||
                                          strpos($firstword,':'))) {
-                        $outString .= substr($body,$pos,($lastRealChar - $pos+1));
+                        $outString .= sq_substr($body,$pos,($lastRealChar - $pos+1));
                          $outStringCol += ($lastRealChar - $pos);
                          sqMakeNewLine($outString,$citeLevel,$outStringCol);
                          $nextNewline++;
@@ -275,7 +272,7 @@ function &sqBodyWrap (&$body, $wrap) {
  
                 }
  
-               $outString .= substr ($body, $pos, ($lastRealChar - $pos + 1));
+               $outString .= sq_substr ($body, $pos, ($lastRealChar - $pos + 1));
                 $outStringCol += ($lastRealChar - $pos);
                 $pos = $nextNewline + 1;
                 continue;
@@ -294,7 +291,7 @@ function &sqBodyWrap (&$body, $wrap) {
  
             // start looking backwards for whitespace to break at.
             $breakPoint = $eol;
-           while (($breakPoint > $pos) && (! ctype_space ($body{$breakPoint}))) {
+           while (($breakPoint > $pos) && (! ctype_space (sq_substr($body,$breakPoint,1)))) {
                 $breakPoint--;
             }
  
@@ -327,13 +324,13 @@ function &sqBodyWrap (&$body, $wrap) {
             }
  
             // skip newlines or whitespace at the beginning of the string
-           $substring = substr ($body, $pos, ($breakPoint - $pos));
+           $substring = sq_substr ($body, $pos, ($breakPoint - $pos));
             $substring = rtrim ($substring); // do rtrim and ctype_space have the same ideas about whitespace?
             $outString .= $substring;
-           $outStringCol += strlen ($substring);
+           $outStringCol += sq_strlen ($substring);
             // advance past the whitespace which caused the wrap
             $pos = $breakPoint;
-           while (($pos < $length) && (ctype_space ($body{$pos}))) {
+           while (($pos < $length) && (ctype_space (sq_substr($body,$pos,1)))) {
                 $pos++;
             }
             if ($pos < $length) {
@@ -351,16 +348,18 @@ function &sqBodyWrap (&$body, $wrap) {
   * Has a problem with special HTML characters, so call this before
   * you do character translation.
   *
- * Specifically, &#039 comes up as 5 characters instead of 1.
+ * Specifically, &amp;#039; comes up as 5 characters instead of 1.
   * This should not add newlines to the end of lines.
   *
   * @param string line the line of text to wrap, by ref
   * @param int wrap the maximum line lenth
+ * @param string charset name of charset used in $line string. Available since v.1.5.1.
   * @return void
   */
-function sqWordWrap(&$line, $wrap) {
+function sqWordWrap(&$line, $wrap, $charset='') {
      global $languages, $squirrelmail_language;
  
+    // Use custom wrapping function, if translation provides it
      if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
          function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_wordwrap')) {
          if (mb_detect_encoding($line) != 'ASCII') {
@@ -383,9 +382,9 @@ function sqWordWrap(&$line, $wrap) {
      while ($i < count($words)) {
          /* Force one word to be on a line (minimum) */
          $line .= $words[$i];
-        $line_len = strlen($beginning_spaces) + strlen($words[$i]) + 2;
+        $line_len = strlen($beginning_spaces) + sq_strlen($words[$i],$charset) + 2;
          if (isset($words[$i + 1]))
-            $line_len += strlen($words[$i + 1]);
+            $line_len += sq_strlen($words[$i + 1],$charset);
          $i ++;
  
          /* Add more words (as long as they fit) */
@@ -393,7 +392,7 @@ function sqWordWrap(&$line, $wrap) {
              $line .= ' ' . $words[$i];
              $i++;
              if (isset($words[$i]))
-                $line_len += strlen($words[$i]) + 1;
+                $line_len += sq_strlen($words[$i],$charset) + 1;
              else
                  $line_len += 1;
          }
@@ -740,7 +739,7 @@ function show_readable_size($bytes) {
  }
  
  /**
- * Generates a random string from the caracter set you pass in
+ * Generates a random string from the character set you pass in
   *
   * @param int size the size of the string to generate
   * @param string chars a string containing the characters to use
@@ -837,7 +836,7 @@ function makeComposeLink($url, $text = null, $target='')
      // build the compose in new window link...
  
  
-    // if javascript is on, use onClick event to handle it
+    // if javascript is on, use onclick event to handle it
      if($javascript_on) {
          sqgetGlobalVar('base_uri', $base_uri, SQ_SESSION);
          return '<a href="javascript:void(0)" onclick="comp_in_new(\''.$base_uri.$url.'\')">'. $text.'</a>';
@@ -866,6 +865,19 @@ function sm_print_r() {
      foreach(func_get_args() as $var) {
          print_r($var);
          echo "\n";
+        // php has get_class_methods function that can print class methods
+        if (is_object($var)) {
+            // get class methods if $var is object
+            $aMethods=get_class_methods(get_class($var));
+            // make sure that $aMethods is array and array is not empty
+            if (is_array($aMethods) && $aMethods!=array()) {
+                echo "Object methods:\n";
+                foreach($aMethods as $method) {
+                    echo '* ' . $method . "\n";
+                }
+            }
+            echo "\n";
+        }
      }
      $buffer = ob_get_contents(); // Grab the print_r output
      ob_end_clean();  // Silently discard the output & stop buffering
@@ -955,7 +967,10 @@ function sq_get_html_translation_table($table,$quote_style=ENT_COMPAT,$charset='
   * sq_htmlentities
   *
   * Convert all applicable characters to HTML entities.
- * Minimal php requirement - v.4.0.5
+ * Minimal php requirement - v.4.0.5.
+ *
+ * Function is designed for people that want to use full power of htmlentities() in
+ * i18n environment.
   *
   * @param string $string string that has to be sanitized
   * @param integer $quote_style quote encoding style. Possible values (without quotes):
@@ -978,12 +993,12 @@ function sq_htmlentities($string,$quote_style=ENT_COMPAT,$charset='us-ascii') {
   * Tests if string contains 8bit symbols.
   *
   * If charset is not set, function defaults to default_charset.
- * $default_charset global must be set correctly if $charset is 
+ * $default_charset global must be set correctly if $charset is
   * not used.
   * @param string $string tested string
   * @param string $charset charset used in a string
   * @return bool true if 8bit symbols are detected
- * @since 1.5.1
+ * @since 1.5.1 and 1.4.4
   */
  function sq_is8bit($string,$charset='') {
      global $default_charset;
@@ -992,7 +1007,7 @@ function sq_is8bit($string,$charset='') {
  
      /**
       * Don't use \240 in ranges. Sometimes RH 7.2 doesn't like it.
-     * Don't use \200-\237 for iso-8859-x charsets. This ranges 
+     * Don't use \200-\237 for iso-8859-x charsets. This range
       * stores control symbols in those charsets.
       * Use preg_match instead of ereg in order to avoid problems
       * with mbstring overloading
@@ -1012,12 +1027,12 @@ function sq_is8bit($string,$charset='') {
   * in php 5.x. Function does not test all mbstring encodings. Only the ones
   * that might be used in SM translations.
   *
- * Supported arrays are stored in session in order to reduce number of 
+ * Supported strings are stored in session in order to reduce number of
   * mb_internal_encoding function calls.
   *
- * If you want to test all mbstring encodings - fill $list_of_encodings 
+ * If you want to test all mbstring encodings - fill $list_of_encodings
   * array.
- * @return array list of encodings supported by mbstring
+ * @return array list of encodings supported by php mbstring extension
   * @since 1.5.1
   */
  function sq_mb_list_encodings() {
@@ -1048,6 +1063,7 @@ function sq_mb_list_encodings() {
          'koi8-u',
          'big5',
          'gb2312',
+        'gb18030',
          'windows-1251',
          'windows-1255',
          'windows-1256',
@@ -1073,5 +1089,165 @@ function sq_mb_list_encodings() {
      return $supported_encodings;
  }
  
+/**
+ * Function returns number of characters in string.
+ *
+ * Returned number might be different from number of bytes in string,
+ * if $charset is multibyte charset. Detection depends on mbstring 
+ * functions. If mbstring does not support tested multibyte charset,
+ * vanilla string length function is used. 
+ * @param string $str string
+ * @param string $charset charset
+ * @since 1.5.1
+ * @return integer number of characters in string 
+ */
+function sq_strlen($str, $charset=''){
+    // default option
+    if ($charset=='') return strlen($str);
+
+    // use automatic charset detection, if function call asks for it
+    if ($charset=='auto') {
+        global $default_charset;
+        set_my_charset();
+        $charset=$default_charset;
+    }
+
+    // lowercase charset name
+    $charset=strtolower($charset);
+
+    // Use mbstring only with listed charsets
+    $aList_of_mb_charsets=array('utf-8','big5','gb2312','gb18030','euc-jp','euc-cn','euc-tw','euc-kr');
+
+    // calculate string length according to charset
+    if (in_array($charset,$aList_of_mb_charsets) && in_array($charset,sq_mb_list_encodings())) {
+        $real_length = mb_strlen($str,$charset);
+    } else {
+        // own strlen detection code is removed because missing strpos, 
+        // strtoupper and substr implementations break string wrapping.
+        $real_length=strlen($str);
+    }
+    return $real_length;
+}
+
+/**
+ * string padding with multibyte support
+ *
+ * @link http://www.php.net/str_pad
+ * @param string $string original string
+ * @param integer $width padded string width
+ * @param string $pad padding symbols
+ * @param integer $padtype padding type 
+ *  (internal php defines, see str_pad() description)
+ * @param string $charset charset used in original string
+ * @return string padded string
+ */
+function sq_str_pad($string, $width, $pad, $padtype, $charset='') {
+
+    $charset = strtolower($charset);
+    $padded_string = '';
+
+    switch ($charset) {
+    case 'utf-8':
+    case 'big5':
+    case 'gb2312':
+    case 'euc-kr':
+        /*
+         * all multibyte charsets try to increase width value by
+         * adding difference between number of bytes and real length
+         */
+        $width = $width - sq_strlen($string,$charset) + strlen($string);
+    default:
+        $padded_string=str_pad($string,$width,$pad,$padtype);
+    }
+    return $padded_string;
+}
+
+/**
+ * Wrapper that is used to switch between vanilla and multibyte substr
+ * functions.
+ * @param string $string
+ * @param integer $start
+ * @param integer $length
+ * @param string $charset
+ * @return string
+ * @since 1.5.1
+ * @link http://www.php.net/substr
+ * @link http://www.php.net/mb_substr
+ */
+function sq_substr($string,$start,$length,$charset='auto') {
+    // use automatic charset detection, if function call asks for it
+    if ($charset=='auto') {
+        global $default_charset;
+        set_my_charset();
+        $charset=$default_charset;
+    }
+    $charset = strtolower($charset);
+    if (function_exists('mb_internal_encoding') && 
+        in_array($charset,sq_mb_list_encodings())) {
+        return mb_substr($string,$start,$length,$charset);
+    }
+    // TODO: add mbstring independent code
+
+    // use vanilla string functions as last option
+    return substr($string,$start,$length);
+}
+
+/**
+ * Wrapper that is used to switch between vanilla and multibyte strpos
+ * functions.
+ * @param string $haystack
+ * @param mixed $needle
+ * @param integer $offset
+ * @param string $charset
+ * @return string
+ * @since 1.5.1
+ * @link http://www.php.net/strpos
+ * @link http://www.php.net/mb_strpos
+ */
+function sq_strpos($haystack,$needle,$offset,$charset='auto') {
+    // use automatic charset detection, if function call asks for it
+    if ($charset=='auto') {
+        global $default_charset;
+        set_my_charset();
+        $charset=$default_charset;
+    }
+    $charset = strtolower($charset);
+    if (function_exists('mb_internal_encoding') && 
+        in_array($charset,sq_mb_list_encodings())) {
+        return mb_strpos($haystack,$needle,$offset,$charset);
+    }
+    // TODO: add mbstring independent code
+
+    // use vanilla string functions as last option
+    return strpos($haystack,$needle,$offset);
+}
+
+/**
+ * Wrapper that is used to switch between vanilla and multibyte strtoupper
+ * functions.
+ * @param string $string
+ * @param string $charset
+ * @return string
+ * @since 1.5.1
+ * @link http://www.php.net/strtoupper
+ * @link http://www.php.net/mb_strtoupper
+ */
+function sq_strtoupper($string,$charset='auto') {
+    // use automatic charset detection, if function call asks for it
+    if ($charset=='auto') {
+        global $default_charset;
+        set_my_charset();
+        $charset=$default_charset;
+    }
+    $charset = strtolower($charset);
+    if (function_exists('mb_internal_encoding') && 
+        in_array($charset,sq_mb_list_encodings())) {
+        return mb_strtoupper($string,$charset);
+    }
+    // TODO: add mbstring independent code
+
+    // use vanilla string functions as last option
+    return strtoupper($string);
+}
  $PHP_SELF = php_self();
  ?>
 \ No newline at end of file