--- /dev/null
+<?php
+/**
+* takes a string of unicode entities and converts it to a utf-8 encoded string
+* each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
+* browsers. Ascii will not be modified.
+* @param $source string of unicode entities [STRING]
+* @return a utf-8 encoded string [STRING]
+* @access public
+*/
+function charset_encode_utf_8 ($source) {
+ $utf8Str = '';
+ $entityArray = explode ("&#", $source);
+ $size = count ($entityArray);
+ for ($i = 0; $i < $size; $i++) {
+ $subStr = $entityArray[$i];
+ $nonEntity = strstr ($subStr, ';');
+ if ($nonEntity !== false) {
+ $unicode = intval (substr ($subStr, 0, (strpos ($subStr, ';') + 1)));
+ // determine how many chars are needed to reprsent this unicode char
+ if ($unicode < 128) {
+ $utf8Substring = chr ($unicode);
+ }
+ else if ($unicode >= 128 && $unicode < 2048) {
+ $binVal = str_pad (decbin ($unicode), 11, "0", STR_PAD_LEFT);
+ $binPart1 = substr ($binVal, 0, 5);
+ $binPart2 = substr ($binVal, 5);
+
+ $char1 = chr (192 + bindec ($binPart1));
+ $char2 = chr (128 + bindec ($binPart2));
+ $utf8Substring = $char1 . $char2;
+ }
+ else if ($unicode >= 2048 && $unicode < 65536) {
+ $binVal = str_pad (decbin ($unicode), 16, "0", STR_PAD_LEFT);
+ $binPart1 = substr ($binVal, 0, 4);
+ $binPart2 = substr ($binVal, 4, 6);
+ $binPart3 = substr ($binVal, 10);
+
+ $char1 = chr (224 + bindec ($binPart1));
+ $char2 = chr (128 + bindec ($binPart2));
+ $char3 = chr (128 + bindec ($binPart3));
+ $utf8Substring = $char1 . $char2 . $char3;
+ }
+ else {
+ $binVal = str_pad (decbin ($unicode), 21, "0", STR_PAD_LEFT);
+ $binPart1 = substr ($binVal, 0, 3);
+ $binPart2 = substr ($binVal, 3, 6);
+ $binPart3 = substr ($binVal, 9, 6);
+ $binPart4 = substr ($binVal, 15);
+
+ $char1 = chr (240 + bindec ($binPart1));
+ $char2 = chr (128 + bindec ($binPart2));
+ $char3 = chr (128 + bindec ($binPart3));
+ $char4 = chr (128 + bindec ($binPart4));
+ $utf8Substring = $char1 . $char2 . $char3 . $char4;
+ }
+
+ if (strlen ($nonEntity) > 1)
+ $nonEntity = substr ($nonEntity, 1); // chop the first char (';')
+ else
+ $nonEntity = '';
+
+ $utf8Str .= $utf8Substring . $nonEntity;
+ }
+ else {
+ $utf8Str .= $subStr;
+ }
+ }
+
+ return $utf8Str;
+}
+?>
\ No newline at end of file
<?php
/**
- * functions/i18n.php
+ * SquirrelMail internationalization functions
*
* Copyright (c) 1999-2004 The SquirrelMail Project Team
* Licensed under the GNU GPL. For full terms see the file COPYING.
return( $ret );
}
+/**
+ * Converts html string to given charset
+ * @param string $string
+ * @param string $charset
+ * @param string
+ */
+function charset_encode($string,$charset) {
+ global $default_charset;
+
+ $encode=fixcharset($charset);
+ $encodefile=SM_PATH . 'functions/encode/' . $encode . '.php';
+ if (file_exists($encodefile)) {
+ include_once($encodefile);
+ $ret = call_user_func('charset_encode_'.$encode, $string);
+ } else {
+ $ret = $string;
+ }
+ return( $ret );
+}
+
+/**
+ * Combined decoding and encoding functions
+ *
+ * If conversion is done to charset different that utf-8, unsupported symbols
+ * will be replaced with question marks.
+ * @param string $in_charset initial charset
+ * @param string $string string that has to be converted
+ * @param string $out_charset final charset
+ * @return string converted string
+ */
+function charset_convert($in_charset,$string,$out_charset) {
+ $string=charset_decode($in_charset,$string);
+ $string=charset_encode($string,$out_charset);
+ return $string;
+}
+
/**
* Makes charset name suitable for decoding cycles
*