From 78086b294b663875a6192edbe8f337da96ccdb2b Mon Sep 17 00:00:00 2001 From: tokul Date: Thu, 26 Aug 2004 18:39:20 +0000 Subject: [PATCH] rewriting utf8 encoding function. preg_replace is safer that splitting into arrays. now function will support all symbols until somebody comes up with symbol No 2^31+1 :) git-svn-id: https://svn.code.sf.net/p/squirrelmail/code/trunk/squirrelmail@7962 7612ce4b-ef26-0410-bec9-ea0150e637f0 --- functions/encode/utf_8.php | 164 ++++++++++++++++++++++--------------- 1 file changed, 98 insertions(+), 66 deletions(-) diff --git a/functions/encode/utf_8.php b/functions/encode/utf_8.php index d455e109..c1c45b52 100644 --- a/functions/encode/utf_8.php +++ b/functions/encode/utf_8.php @@ -1,87 +1,119 @@ = 128 && $unicode < 2048) { - $binVal = str_pad (decbin ($unicode), 11, "0", STR_PAD_LEFT); - $binPart1 = substr ($binVal, 0, 5); - $binPart2 = substr ($binVal, 5); - - $char1 = chr (192 + bindec ($binPart1)); - $char2 = chr (128 + bindec ($binPart2)); - $utf8Substring = $char1 . $char2; - } - else if ($unicode >= 2048 && $unicode < 65536) { - $binVal = str_pad (decbin ($unicode), 16, "0", STR_PAD_LEFT); - $binPart1 = substr ($binVal, 0, 4); - $binPart2 = substr ($binVal, 4, 6); - $binPart3 = substr ($binVal, 10); - - $char1 = chr (224 + bindec ($binPart1)); - $char2 = chr (128 + bindec ($binPart2)); - $char3 = chr (128 + bindec ($binPart3)); - $utf8Substring = $char1 . $char2 . $char3; - } - else { - $binVal = str_pad (decbin ($unicode), 21, "0", STR_PAD_LEFT); - $binPart1 = substr ($binVal, 0, 3); - $binPart2 = substr ($binVal, 3, 6); - $binPart3 = substr ($binVal, 9, 6); - $binPart4 = substr ($binVal, 15); - - $char1 = chr (240 + bindec ($binPart1)); - $char2 = chr (128 + bindec ($binPart2)); - $char3 = chr (128 + bindec ($binPart3)); - $char4 = chr (128 + bindec ($binPart4)); - $utf8Substring = $char1 . $char2 . $char3 . $char4; - } - - if (strlen ($nonEntity) > 1) - $nonEntity = substr ($nonEntity, 1); // chop the first char (';') - else - $nonEntity = ''; + $char1 = chr (240 + bindec ($binPart1)); + $char2 = chr (128 + bindec ($binPart2)); + $char3 = chr (128 + bindec ($binPart3)); + $char4 = chr (128 + bindec ($binPart4)); + $ret = $char1 . $char2 . $char3 . $char4; + } else if ($var < 67108864) { + // Five byte utf-8 + $binVal = str_pad (decbin ($var), 26, "0", STR_PAD_LEFT); + $binPart1 = substr ($binVal, 0, 2); + $binPart2 = substr ($binVal, 2, 6); + $binPart3 = substr ($binVal, 8, 6); + $binPart4 = substr ($binVal, 14,6); + $binPart5 = substr ($binVal, 20); - $utf8Str .= $utf8Substring . $nonEntity; - } - else { - $utf8Str .= $subStr; - } - } + $char1 = chr (248 + bindec ($binPart1)); + $char2 = chr (128 + bindec ($binPart2)); + $char3 = chr (128 + bindec ($binPart3)); + $char4 = chr (128 + bindec ($binPart4)); + $char5 = chr (128 + bindec ($binPart5)); + $ret = $char1 . $char2 . $char3 . $char4 . $char5; + } else if ($var < 2147483648) { + // Six byte utf-8 + $binVal = str_pad (decbin ($var), 31, "0", STR_PAD_LEFT); + $binPart1 = substr ($binVal, 0, 1); + $binPart2 = substr ($binVal, 1, 6); + $binPart3 = substr ($binVal, 7, 6); + $binPart4 = substr ($binVal, 13,6); + $binPart5 = substr ($binVal, 19,6); + $binPart6 = substr ($binVal, 25); - return $utf8Str; + $char1 = chr (252 + bindec ($binPart1)); + $char2 = chr (128 + bindec ($binPart2)); + $char3 = chr (128 + bindec ($binPart3)); + $char4 = chr (128 + bindec ($binPart4)); + $char5 = chr (128 + bindec ($binPart5)); + $char6 = chr (128 + bindec ($binPart6)); + $ret = $char1 . $char2 . $char3 . $char4 . $char5 . $char6; + } else { + // there is no such symbol in utf-8 + $ret='?'; + } + return $ret; } ?> \ No newline at end of file -- 2.25.1