documentation update

[squirrelmail.git] / functions / strings.php
diff --git a/functions/strings.php b/functions/strings.php

index 924f2f1bb9f5a10cc001b116f3d628dbf5baba5b..49addede63768e15b53622b685a6e9bb7a716551 100644 (file)
--- a/functions/strings.php
+++ b/functions/strings.php
@@ -3,11 +3,11 @@
  /**
   * strings.php
   *
- * Copyright (c) 1999-2004 The SquirrelMail Project Team
+ * Copyright (c) 1999-2005 The SquirrelMail Project Team
   * Licensed under the GNU GPL. For full terms see the file COPYING.
   *
   * This code provides various string manipulation functions that are
- * used by the rest of the Squirrelmail code.
+ * used by the rest of the SquirrelMail code.
   *
   * @version $Id$
   * @package squirrelmail
@@ -96,11 +96,11 @@ function sqMakeNewLine (&$str, $citeLevel, &$column) {
   * @return bool true when only whitespace symbols are present in test string
   */
  function sm_ctype_space($string) {
-  if ( preg_match('/^[\x09-\x0D]|^\x20/', $string) || $string=='') {
-    return true;
-  } else {
-    return false;
-  }
+    if ( preg_match('/^[\x09-\x0D]|^\x20/', $string) || $string=='') {
+        return true;
+    } else {
+        return false;
+    }
  }
  
  /**
@@ -213,7 +213,6 @@ function &sqBodyWrap (&$body, $wrap) {
          * Set this to false to stop appending short strings to previous lines
          */
         $smartwrap = true;
-
         // inner loop, (obviously) handles wrapping up to
         // the next newline
         while ($pos < $nextNewline) {
@@ -221,8 +220,6 @@ function &sqBodyWrap (&$body, $wrap) {
             while (($pos < $nextNewline) && (ctype_space ($body{$pos}))) {
                 $pos++;
             }
-
-
             // if this is a short line then just append it and continue outer loop
             if (($outStringCol + $nextNewline - $pos) <= ($wrap - $citeLevel - 1) ) {
                 // if this is the final line in the input string then include
@@ -237,13 +234,11 @@ function &sqBodyWrap (&$body, $wrap) {
                 while (($lastRealChar > $pos && $lastRealChar < $length) && (ctype_space ($body{$lastRealChar}))) {
                     $lastRealChar--;
                 }
-
                 // decide if appending the short string is what we want
                 if (($nextNewline < $length && $body{$nextNewline} == "\n") &&
                       isset($lastRealChar)) {
-
-                     //check the first word:
-                   $mypos = $nextNewline+1;
+                   $mypos = $pos;
+                   //check the first word:
                     while (($mypos < $length) && ($body{$mypos} == '>')) {
                         $mypos++;
                         // skip over any spaces interleaved among the cite markers
@@ -261,7 +256,6 @@ function &sqBodyWrap (&$body, $wrap) {
  */
  
                     $firstword = substr($body,$mypos,strpos($body,' ',$mypos) - $mypos);
-
                     //if ($dowrap || $ldnspacecnt > 1 || ($firstword && (
                     if (!$smartwrap || $firstword && (
                                          $firstword{0} == '-' ||
@@ -356,20 +350,22 @@ function &sqBodyWrap (&$body, $wrap) {
   * Has a problem with special HTML characters, so call this before
   * you do character translation.
   *
- * Specifically, &#039 comes up as 5 characters instead of 1.
+ * Specifically, &amp;#039; comes up as 5 characters instead of 1.
   * This should not add newlines to the end of lines.
   *
   * @param string line the line of text to wrap, by ref
   * @param int wrap the maximum line lenth
+ * @param string charset name of charset used in $line string. Available since v.1.5.1.
   * @return void
   */
-function sqWordWrap(&$line, $wrap) {
+function sqWordWrap(&$line, $wrap, $charset='') {
      global $languages, $squirrelmail_language;
  
+    // Use custom wrapping function, if translation provides it
      if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
-        function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
+        function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_wordwrap')) {
          if (mb_detect_encoding($line) != 'ASCII') {
-            $line = $languages[$squirrelmail_language]['XTRA_CODE']('wordwrap', $line, $wrap);
+            $line = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_wordwrap', $line, $wrap);
              return;
          }
      }
@@ -388,9 +384,9 @@ function sqWordWrap(&$line, $wrap) {
      while ($i < count($words)) {
          /* Force one word to be on a line (minimum) */
          $line .= $words[$i];
-        $line_len = strlen($beginning_spaces) + strlen($words[$i]) + 2;
+        $line_len = strlen($beginning_spaces) + sq_strlen($words[$i],$charset) + 2;
          if (isset($words[$i + 1]))
-            $line_len += strlen($words[$i + 1]);
+            $line_len += sq_strlen($words[$i + 1],$charset);
          $i ++;
  
          /* Add more words (as long as they fit) */
@@ -398,7 +394,7 @@ function sqWordWrap(&$line, $wrap) {
              $line .= ' ' . $words[$i];
              $i++;
              if (isset($words[$i]))
-                $line_len += strlen($words[$i]) + 1;
+                $line_len += sq_strlen($words[$i],$charset) + 1;
              else
                  $line_len += 1;
          }
@@ -647,7 +643,7 @@ function sq_mt_seed($Val) {
          $Val *= -1;
      }
  
-    if ($Val = 0) {
+    if ($Val == 0) {
          return;
      }
  
@@ -745,7 +741,7 @@ function show_readable_size($bytes) {
  }
  
  /**
- * Generates a random string from the caracter set you pass in
+ * Generates a random string from the character set you pass in
   *
   * @param int size the size of the string to generate
   * @param string chars a string containing the characters to use
@@ -842,7 +838,7 @@ function makeComposeLink($url, $text = null, $target='')
      // build the compose in new window link...
  
  
-    // if javascript is on, use onClick event to handle it
+    // if javascript is on, use onclick event to handle it
      if($javascript_on) {
          sqgetGlobalVar('base_uri', $base_uri, SQ_SESSION);
          return '<a href="javascript:void(0)" onclick="comp_in_new(\''.$base_uri.$url.'\')">'. $text.'</a>';
@@ -871,6 +867,19 @@ function sm_print_r() {
      foreach(func_get_args() as $var) {
          print_r($var);
          echo "\n";
+        // php has get_class_methods function that can print class methods
+        if (is_object($var)) {
+            // get class methods if $var is object
+            $aMethods=get_class_methods(get_class($var));
+            // make sure that $aMethods is array and array is not empty
+            if (is_array($aMethods) && $aMethods!=array()) {
+                echo "Object methods:\n";
+                foreach($aMethods as $method) {
+                    echo '* ' . $method . "\n";
+                }
+            }
+            echo "\n";
+        }
      }
      $buffer = ob_get_contents(); // Grab the print_r output
      ob_end_clean();  // Silently discard the output & stop buffering
@@ -960,7 +969,10 @@ function sq_get_html_translation_table($table,$quote_style=ENT_COMPAT,$charset='
   * sq_htmlentities
   *
   * Convert all applicable characters to HTML entities.
- * Minimal php requirement - v.4.0.5
+ * Minimal php requirement - v.4.0.5.
+ *
+ * Function is designed for people that want to use full power of htmlentities() in
+ * i18n environment.
   *
   * @param string $string string that has to be sanitized
   * @param integer $quote_style quote encoding style. Possible values (without quotes):
@@ -979,5 +991,243 @@ function sq_htmlentities($string,$quote_style=ENT_COMPAT,$charset='us-ascii') {
    return str_replace(array_keys($sq_html_ent_table),array_values($sq_html_ent_table),$string);
  }
  
+/**
+ * Tests if string contains 8bit symbols.
+ *
+ * If charset is not set, function defaults to default_charset.
+ * $default_charset global must be set correctly if $charset is
+ * not used.
+ * @param string $string tested string
+ * @param string $charset charset used in a string
+ * @return bool true if 8bit symbols are detected
+ * @since 1.5.1 and 1.4.4
+ */
+function sq_is8bit($string,$charset='') {
+    global $default_charset;
+
+    if ($charset=='') $charset=$default_charset;
+
+    /**
+     * Don't use \240 in ranges. Sometimes RH 7.2 doesn't like it.
+     * Don't use \200-\237 for iso-8859-x charsets. This range
+     * stores control symbols in those charsets.
+     * Use preg_match instead of ereg in order to avoid problems
+     * with mbstring overloading
+     */
+    if (preg_match("/^iso-8859/i",$charset)) {
+        $needle='/\240|[\241-\377]/';
+    } else {
+        $needle='/[\200-\237]|\240|[\241-\377]/';
+    }
+    return preg_match("$needle",$string);
+}
+
+/**
+ * Replacement of mb_list_encodings function
+ *
+ * This function provides replacement for function that is available only
+ * in php 5.x. Function does not test all mbstring encodings. Only the ones
+ * that might be used in SM translations.
+ *
+ * Supported strings are stored in session in order to reduce number of
+ * mb_internal_encoding function calls.
+ *
+ * If you want to test all mbstring encodings - fill $list_of_encodings
+ * array.
+ * @return array list of encodings supported by php mbstring extension
+ * @since 1.5.1
+ */
+function sq_mb_list_encodings() {
+    if (! function_exists('mb_internal_encoding'))
+        return array();
+
+    // don't try to test encodings, if they are already stored in session
+    if (sqgetGlobalVar('mb_supported_encodings',$mb_supported_encodings,SQ_SESSION))
+        return $mb_supported_encodings;
+
+    // save original encoding
+    $orig_encoding=mb_internal_encoding();
+
+    $list_of_encoding=array(
+        'pass',
+        'auto',
+        'ascii',
+        'jis',
+        'utf-8',
+        'sjis',
+        'euc-jp',
+        'iso-8859-1',
+        'iso-8859-2',
+        'iso-8859-7',
+        'iso-8859-9',
+        'iso-8859-15',
+        'koi8-r',
+        'koi8-u',
+        'big5',
+        'gb2312',
+        'windows-1251',
+        'windows-1255',
+        'windows-1256',
+        'tis-620',
+        'iso-2022-jp',
+        'euc-kr',
+        'utf7-imap');
+
+    $supported_encodings=array();
+
+    foreach ($list_of_encoding as $encoding) {
+        // try setting encodings. suppress warning messages
+        if (@mb_internal_encoding($encoding))
+            $supported_encodings[]=$encoding;
+    }
+
+    // restore original encoding
+    mb_internal_encoding($orig_encoding);
+
+    // register list in session
+    sqsession_register($supported_encodings,'mb_supported_encodings');
+
+    return $supported_encodings;
+}
+
+/**
+ * Function returns number of characters in string.
+ *
+ * Returned number might be different from number of bytes in string,
+ * if $charset is multibyte charset. Currently only utf-8 charset is 
+ * supported.
+ * @param string $str string
+ * @param string $charset charset
+ * @since 1.5.1
+ * @return integer number of characters in string 
+ */
+function sq_strlen($str, $charset=''){
+    // default option
+    if ($charset=='') return strlen($str);
+
+    // use automatic charset detection, if function call asks for it
+    if ($charset=='auto') {
+        global $default_charset;
+        set_my_charset();
+        $charset=$default_charset;
+    }
+
+    // lowercase charset name
+    $charset=strtolower($charset);
+
+    // set initial returned length number
+    $real_length=0;
+
+    // calculate string length according to charset
+    // function can be modulized same way we modulize decode/encode/htmlentities
+    if ($charset=='utf-8') {
+        if (function_exists('mb_strlen')) {
+            $real_length = mb_strlen($str,'utf-8');
+        } else {
+            // function needs length of string in bytes.
+            // mbstring overloading might break it
+            $str_length=strlen($str);
+            $str_index=0;
+            while ($str_index < $str_length) {
+                // start of internal utf-8 multibyte character detection
+                if (preg_match("/[\xC0-\xDF]/",$str[$str_index]) &&
+                    isset($str[$str_index+1]) && 
+                    preg_match("/[\x80-\xBF]/",$str[$str_index+1])) {
+                    // two byte utf-8
+                    $str_index=$str_index+2;
+                    $real_length++;
+                } elseif (preg_match("/[\xE0-\xEF]/",$str[$str_index]) &&
+                    isset($str[$str_index+2]) && 
+                    preg_match("/[\x80-\xBF][\x80-\xBF]/",$str[$str_index+1].$str[$str_index+2])) {
+                    // three byte utf-8
+                    $str_index=$str_index+3;
+                    $real_length++;
+                } elseif (preg_match("/[\xF0-\xF7]/",$str[$str_index]) &&
+                    isset($str[$str_index+3]) && 
+                    preg_match("/[\x80-\xBF][\x80-\xBF][\x80-\xBF]/",$str[$str_index+1].$str[$str_index+2].$str[$str_index+3])) {
+                    // four byte utf-8
+                    $str_index=$str_index+4;
+                    $real_length++;
+                } elseif (preg_match("/[\xF8-\xFB]/",$str[$str_index]) &&
+                    isset($str[$str_index+4]) && 
+                    preg_match("/[\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF]/",
+                               $str[$str_index+1].$str[$str_index+2].$str[$str_index+3].$str[$str_index+4])) {
+                    // five byte utf-8
+                    $str_index=$str_index+5;
+                    $real_length++;
+                } elseif (preg_match("/[\xFC-\xFD]/",$str[$str_index]) &&
+                    isset($str[$str_index+5]) && 
+                    preg_match("/[\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF]/",
+                               $str[$str_index+1].$str[$str_index+2].$str[$str_index+3].$str[$str_index+4].$str[$str_index+5])) {
+                    // six byte utf-8
+                    $str_index=$str_index+6;
+                    $real_length++;
+                } else {
+                    $str_index++;
+                    $real_length++;
+                }
+                // end of internal utf-8 multibyte character detection
+            }
+        }
+        // end of utf-8 length detection
+    } elseif ($charset=='big5') {
+        // TODO: add big5 string length detection
+        $real_length=strlen($str);
+    } elseif ($charset=='gb2312') {
+        // TODO: add gb2312 string length detection
+        $real_length=strlen($str);
+    } elseif ($charset=='gb18030') {
+        // TODO: add gb18030 string length detection
+        $real_length=strlen($str);
+    } elseif ($charset=='euc-jp') {
+        // TODO: add euc-jp string length detection
+        $real_length=strlen($str);
+    } elseif ($charset=='euc-cn') {
+        // TODO: add euc-cn string length detection
+        $real_length=strlen($str);
+    } elseif ($charset=='euc-tw') {
+        // TODO: add euc-tw string length detection
+        $real_length=strlen($str);
+    } elseif ($charset=='euc-kr') {
+        // TODO: add euc-kr string length detection
+        $real_length=strlen($str);
+    } else {
+        $real_length=strlen($str);
+    }
+    return $real_length;
+}
+
+/**
+ * string padding with multibyte support
+ *
+ * @link http://www.php.net/str_pad
+ * @param string $string original string
+ * @param integer $width padded string width
+ * @param string $pad padding symbols
+ * @param integer $padtype padding type 
+ *  (internal php defines, see str_pad() description)
+ * @param string $charset charset used in original string
+ * @return string padded string
+ */
+function sq_str_pad($string, $width, $pad, $padtype, $charset='') {
+
+    $charset = strtolower($charset);
+    $padded_string = '';
+
+    switch ($charset) {
+    case 'utf-8':
+    case 'big5':
+    case 'gb2312':
+    case 'euc-kr':
+        /*
+         * all multibyte charsets try to increase width value by
+         * adding difference between number of bytes and real length
+         */
+        $width = $width - sq_strlen($string,$charset) + strlen($string);
+    default:
+        $padded_string=str_pad($string,$width,$pad,$padtype);
+    }
+    return $padded_string;
+}
  $PHP_SELF = php_self();
  ?>
 \ No newline at end of file