d3bab52e |
1 | <?php |
4b4abf93 |
2 | |
d3bab52e |
3 | /** |
78086b29 |
4 | * utf-8 encoding functions |
12975d38 |
5 | * |
6 | * takes a string of unicode entities and converts it to a utf-8 encoded string |
7 | * each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting |
78086b29 |
8 | * browsers. Ascii will not be modified. |
12975d38 |
9 | * |
78086b29 |
10 | * Original code is taken from www.php.net manual comments |
11 | * Original author: ronen at greyzone dot com |
12975d38 |
12 | * |
47ccfad4 |
13 | * @copyright © 2004-2006 The SquirrelMail Project Team |
4b4abf93 |
14 | * @license http://opensource.org/licenses/gpl-license.php GNU Public License |
31841a9e |
15 | * @version $Id$ |
12975d38 |
16 | * @package squirrelmail |
17 | * @subpackage encode |
42396076 |
18 | */ |
19 | |
20 | /** |
21 | * Converts string to utf-8 |
78086b29 |
22 | * @param string $string text with numeric unicode entities |
23 | * @return string utf-8 encoded text |
12975d38 |
24 | */ |
78086b29 |
25 | function charset_encode_utf_8 ($string) { |
26 | // don't run encoding function, if there is no encoded characters |
27 | if (! preg_match("'&#[0-9]+;'",$string) ) return $string; |
5e5ea15d |
28 | |
78086b29 |
29 | $string=preg_replace("/&#([0-9]+);/e","unicodetoutf8('\\1')",$string); |
30 | // $string=preg_replace("/&#[xX]([0-9A-F]+);/e","unicodetoutf8(hexdec('\\1'))",$string); |
91e0dccc |
31 | |
78086b29 |
32 | return $string; |
33 | } |
34 | |
35 | /** |
36 | * Return utf8 symbol when unicode character number is provided |
91e0dccc |
37 | * |
38 | * This function is used internally by charset_encode_utf_8 |
598294a7 |
39 | * function. It might be unavailable to other SquirrelMail functions. |
91e0dccc |
40 | * Don't use it or make sure, that functions/encode/utf_8.php is |
41 | * included. |
78086b29 |
42 | * |
43 | * @param int $var decimal unicode value |
44 | * @return string utf8 character |
45 | */ |
46 | function unicodetoutf8($var) { |
47 | |
48 | if ($var < 128) { |
49 | $ret = chr ($var); |
50 | } else if ($var < 2048) { |
51 | // Two byte utf-8 |
52 | $binVal = str_pad (decbin ($var), 11, "0", STR_PAD_LEFT); |
53 | $binPart1 = substr ($binVal, 0, 5); |
54 | $binPart2 = substr ($binVal, 5); |
91e0dccc |
55 | |
78086b29 |
56 | $char1 = chr (192 + bindec ($binPart1)); |
57 | $char2 = chr (128 + bindec ($binPart2)); |
58 | $ret = $char1 . $char2; |
59 | } else if ($var < 65536) { |
60 | // Three byte utf-8 |
61 | $binVal = str_pad (decbin ($var), 16, "0", STR_PAD_LEFT); |
62 | $binPart1 = substr ($binVal, 0, 4); |
63 | $binPart2 = substr ($binVal, 4, 6); |
64 | $binPart3 = substr ($binVal, 10); |
65 | |
66 | $char1 = chr (224 + bindec ($binPart1)); |
67 | $char2 = chr (128 + bindec ($binPart2)); |
68 | $char3 = chr (128 + bindec ($binPart3)); |
69 | $ret = $char1 . $char2 . $char3; |
70 | } else if ($var < 2097152) { |
71 | // Four byte utf-8 |
72 | $binVal = str_pad (decbin ($var), 21, "0", STR_PAD_LEFT); |
73 | $binPart1 = substr ($binVal, 0, 3); |
74 | $binPart2 = substr ($binVal, 3, 6); |
75 | $binPart3 = substr ($binVal, 9, 6); |
76 | $binPart4 = substr ($binVal, 15); |
5e5ea15d |
77 | |
78086b29 |
78 | $char1 = chr (240 + bindec ($binPart1)); |
79 | $char2 = chr (128 + bindec ($binPart2)); |
80 | $char3 = chr (128 + bindec ($binPart3)); |
81 | $char4 = chr (128 + bindec ($binPart4)); |
82 | $ret = $char1 . $char2 . $char3 . $char4; |
83 | } else if ($var < 67108864) { |
84 | // Five byte utf-8 |
85 | $binVal = str_pad (decbin ($var), 26, "0", STR_PAD_LEFT); |
86 | $binPart1 = substr ($binVal, 0, 2); |
87 | $binPart2 = substr ($binVal, 2, 6); |
88 | $binPart3 = substr ($binVal, 8, 6); |
89 | $binPart4 = substr ($binVal, 14,6); |
90 | $binPart5 = substr ($binVal, 20); |
d3bab52e |
91 | |
78086b29 |
92 | $char1 = chr (248 + bindec ($binPart1)); |
93 | $char2 = chr (128 + bindec ($binPart2)); |
94 | $char3 = chr (128 + bindec ($binPart3)); |
95 | $char4 = chr (128 + bindec ($binPart4)); |
96 | $char5 = chr (128 + bindec ($binPart5)); |
97 | $ret = $char1 . $char2 . $char3 . $char4 . $char5; |
98 | } else if ($var < 2147483648) { |
99 | // Six byte utf-8 |
100 | $binVal = str_pad (decbin ($var), 31, "0", STR_PAD_LEFT); |
101 | $binPart1 = substr ($binVal, 0, 1); |
102 | $binPart2 = substr ($binVal, 1, 6); |
103 | $binPart3 = substr ($binVal, 7, 6); |
104 | $binPart4 = substr ($binVal, 13,6); |
105 | $binPart5 = substr ($binVal, 19,6); |
106 | $binPart6 = substr ($binVal, 25); |
d3bab52e |
107 | |
78086b29 |
108 | $char1 = chr (252 + bindec ($binPart1)); |
109 | $char2 = chr (128 + bindec ($binPart2)); |
110 | $char3 = chr (128 + bindec ($binPart3)); |
111 | $char4 = chr (128 + bindec ($binPart4)); |
112 | $char5 = chr (128 + bindec ($binPart5)); |
113 | $char6 = chr (128 + bindec ($binPart6)); |
114 | $ret = $char1 . $char2 . $char3 . $char4 . $char5 . $char6; |
115 | } else { |
116 | // there is no such symbol in utf-8 |
117 | $ret='?'; |
118 | } |
119 | return $ret; |
d3bab52e |
120 | } |