adding charset conversion functions. utf_8.php is taken from www.php.net samples.

author tokul <tokul@7612ce4b-ef26-0410-bec9-ea0150e637f0>

Sun, 16 May 2004 08:43:50 +0000 (08:43 +0000)

committer tokul <tokul@7612ce4b-ef26-0410-bec9-ea0150e637f0>

Sun, 16 May 2004 08:43:50 +0000 (08:43 +0000)
author tokul <tokul@7612ce4b-ef26-0410-bec9-ea0150e637f0>
Sun, 16 May 2004 08:43:50 +0000 (08:43 +0000)
committer tokul <tokul@7612ce4b-ef26-0410-bec9-ea0150e637f0>
Sun, 16 May 2004 08:43:50 +0000 (08:43 +0000)
diff --git a/functions/encode/utf_8.php b/functions/encode/utf_8.php

new file mode 100644 (file)

index 0000000..7a87e92
--- /dev/null
+++ b/functions/encode/utf_8.php
@@ -0,0 +1,71 @@
+<?php
+/**
+* takes a string of unicode entities and converts it to a utf-8 encoded string
+* each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
+* browsers.  Ascii will not be modified.
+* @param $source string of unicode entities [STRING]
+* @return a utf-8 encoded string [STRING]
+* @access public
+*/
+function charset_encode_utf_8 ($source) {
+   $utf8Str = '';
+   $entityArray = explode ("&#", $source);
+   $size = count ($entityArray);
+   for ($i = 0; $i < $size; $i++) {
+       $subStr = $entityArray[$i];
+       $nonEntity = strstr ($subStr, ';');
+       if ($nonEntity !== false) {
+           $unicode = intval (substr ($subStr, 0, (strpos ($subStr, ';') + 1)));
+           // determine how many chars are needed to reprsent this unicode char
+           if ($unicode < 128) {
+               $utf8Substring = chr ($unicode);
+           }
+           else if ($unicode >= 128 && $unicode < 2048) {
+               $binVal = str_pad (decbin ($unicode), 11, "0", STR_PAD_LEFT);
+               $binPart1 = substr ($binVal, 0, 5);
+               $binPart2 = substr ($binVal, 5);
+          
+               $char1 = chr (192 + bindec ($binPart1));
+               $char2 = chr (128 + bindec ($binPart2));
+               $utf8Substring = $char1 . $char2;
+           }
+           else if ($unicode >= 2048 && $unicode < 65536) {
+               $binVal = str_pad (decbin ($unicode), 16, "0", STR_PAD_LEFT);
+               $binPart1 = substr ($binVal, 0, 4);
+               $binPart2 = substr ($binVal, 4, 6);
+               $binPart3 = substr ($binVal, 10);
+          
+               $char1 = chr (224 + bindec ($binPart1));
+               $char2 = chr (128 + bindec ($binPart2));
+               $char3 = chr (128 + bindec ($binPart3));
+               $utf8Substring = $char1 . $char2 . $char3;
+           }
+           else {
+               $binVal = str_pad (decbin ($unicode), 21, "0", STR_PAD_LEFT);
+               $binPart1 = substr ($binVal, 0, 3);
+               $binPart2 = substr ($binVal, 3, 6);
+               $binPart3 = substr ($binVal, 9, 6);
+               $binPart4 = substr ($binVal, 15);
+      
+               $char1 = chr (240 + bindec ($binPart1));
+               $char2 = chr (128 + bindec ($binPart2));
+               $char3 = chr (128 + bindec ($binPart3));
+               $char4 = chr (128 + bindec ($binPart4));
+               $utf8Substring = $char1 . $char2 . $char3 . $char4;
+           }
+          
+           if (strlen ($nonEntity) > 1)
+               $nonEntity = substr ($nonEntity, 1); // chop the first char (';')
+           else
+               $nonEntity = '';
+
+           $utf8Str .= $utf8Substring . $nonEntity;
+       }
+       else {
+           $utf8Str .= $subStr;
+       }
+   }
+
+   return $utf8Str;
+}
+?>
+\ No newline at end of file
diff --git a/functions/i18n.php b/functions/i18n.php

index cbe85a378f7a7f1dd7d0e3c19b70d507a567aac6..d2dfa852e447e9ab0107db943ddedcd7d8dcf783 100644 (file)
--- a/functions/i18n.php
+++ b/functions/i18n.php
@@ -1,7 +1,7 @@
  <?php
  
  /**
- * functions/i18n.php
+ * SquirrelMail internationalization functions
   *
   * Copyright (c) 1999-2004 The SquirrelMail Project Team
   * Licensed under the GNU GPL. For full terms see the file COPYING.
@@ -107,6 +107,42 @@ function charset_decode ($charset, $string) {
      return( $ret );
  }
  
+/**
+ * Converts html string to given charset
+ * @param string $string
+ * @param string $charset
+ * @param string 
+ */
+function charset_encode($string,$charset) {
+  global $default_charset;
+
+  $encode=fixcharset($charset);
+  $encodefile=SM_PATH . 'functions/encode/' . $encode . '.php';
+  if (file_exists($encodefile)) {
+    include_once($encodefile);
+    $ret = call_user_func('charset_encode_'.$encode, $string);
+  } else {
+    $ret = $string;
+  }
+  return( $ret );
+}
+
+/**
+ * Combined decoding and encoding functions
+ *
+ * If conversion is done to charset different that utf-8, unsupported symbols
+ * will be replaced with question marks.
+ * @param string $in_charset initial charset
+ * @param string $string string that has to be converted
+ * @param string $out_charset final charset
+ * @return string converted string
+ */
+function charset_convert($in_charset,$string,$out_charset) {
+  $string=charset_decode($in_charset,$string);
+  $string=charset_encode($string,$out_charset);
+  return $string;
+}
+
  /**
   * Makes charset name suitable for decoding cycles
   *
author	tokul <tokul@7612ce4b-ef26-0410-bec9-ea0150e637f0>
	Sun, 16 May 2004 08:43:50 +0000 (08:43 +0000)
committer	tokul <tokul@7612ce4b-ef26-0410-bec9-ea0150e637f0>
	Sun, 16 May 2004 08:43:50 +0000 (08:43 +0000)
functions/encode/utf_8.php	[new file with mode: 0644]	patch \| blob
functions/i18n.php		patch \| blob \| blame \| history