clean up code commented out since 8 y
[squirrelmail.git] / functions / encode / utf_8.php
1 <?php
2
3 /**
4 * utf-8 encoding functions
5 *
6 * takes a string of unicode entities and converts it to a utf-8 encoded string
7 * each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
8 * browsers. Ascii will not be modified.
9 *
10 * Original code is taken from www.php.net manual comments
11 * Original author: ronen at greyzone dot com
12 *
13 * @copyright 2004-2012 The SquirrelMail Project Team
14 * @license http://opensource.org/licenses/gpl-license.php GNU Public License
15 * @version $Id$
16 * @package squirrelmail
17 * @subpackage encode
18 */
19
20 /**
21 * Converts string to utf-8
22 * @param string $string text with numeric unicode entities
23 * @return string utf-8 encoded text
24 */
25 function charset_encode_utf_8 ($string) {
26 // don't run encoding function, if there is no encoded characters
27 if (! preg_match("'&#[0-9]+;'",$string) ) return $string;
28
29 $string=preg_replace("/&#([0-9]+);/e","unicodetoutf8('\\1')",$string);
30
31 return $string;
32 }
33
34 /**
35 * Return utf8 symbol when unicode character number is provided
36 *
37 * This function is used internally by charset_encode_utf_8
38 * function. It might be unavailable to other SquirrelMail functions.
39 * Don't use it or make sure, that functions/encode/utf_8.php is
40 * included.
41 *
42 * @param int $var decimal unicode value
43 * @return string utf8 character
44 */
45 function unicodetoutf8($var) {
46
47 if ($var < 128) {
48 $ret = chr ($var);
49 } else if ($var < 2048) {
50 // Two byte utf-8
51 $binVal = str_pad (decbin ($var), 11, "0", STR_PAD_LEFT);
52 $binPart1 = substr ($binVal, 0, 5);
53 $binPart2 = substr ($binVal, 5);
54
55 $char1 = chr (192 + bindec ($binPart1));
56 $char2 = chr (128 + bindec ($binPart2));
57 $ret = $char1 . $char2;
58 } else if ($var < 65536) {
59 // Three byte utf-8
60 $binVal = str_pad (decbin ($var), 16, "0", STR_PAD_LEFT);
61 $binPart1 = substr ($binVal, 0, 4);
62 $binPart2 = substr ($binVal, 4, 6);
63 $binPart3 = substr ($binVal, 10);
64
65 $char1 = chr (224 + bindec ($binPart1));
66 $char2 = chr (128 + bindec ($binPart2));
67 $char3 = chr (128 + bindec ($binPart3));
68 $ret = $char1 . $char2 . $char3;
69 } else if ($var < 2097152) {
70 // Four byte utf-8
71 $binVal = str_pad (decbin ($var), 21, "0", STR_PAD_LEFT);
72 $binPart1 = substr ($binVal, 0, 3);
73 $binPart2 = substr ($binVal, 3, 6);
74 $binPart3 = substr ($binVal, 9, 6);
75 $binPart4 = substr ($binVal, 15);
76
77 $char1 = chr (240 + bindec ($binPart1));
78 $char2 = chr (128 + bindec ($binPart2));
79 $char3 = chr (128 + bindec ($binPart3));
80 $char4 = chr (128 + bindec ($binPart4));
81 $ret = $char1 . $char2 . $char3 . $char4;
82 } else if ($var < 67108864) {
83 // Five byte utf-8
84 $binVal = str_pad (decbin ($var), 26, "0", STR_PAD_LEFT);
85 $binPart1 = substr ($binVal, 0, 2);
86 $binPart2 = substr ($binVal, 2, 6);
87 $binPart3 = substr ($binVal, 8, 6);
88 $binPart4 = substr ($binVal, 14,6);
89 $binPart5 = substr ($binVal, 20);
90
91 $char1 = chr (248 + bindec ($binPart1));
92 $char2 = chr (128 + bindec ($binPart2));
93 $char3 = chr (128 + bindec ($binPart3));
94 $char4 = chr (128 + bindec ($binPart4));
95 $char5 = chr (128 + bindec ($binPart5));
96 $ret = $char1 . $char2 . $char3 . $char4 . $char5;
97 } else if ($var < 2147483648) {
98 // Six byte utf-8
99 $binVal = str_pad (decbin ($var), 31, "0", STR_PAD_LEFT);
100 $binPart1 = substr ($binVal, 0, 1);
101 $binPart2 = substr ($binVal, 1, 6);
102 $binPart3 = substr ($binVal, 7, 6);
103 $binPart4 = substr ($binVal, 13,6);
104 $binPart5 = substr ($binVal, 19,6);
105 $binPart6 = substr ($binVal, 25);
106
107 $char1 = chr (252 + bindec ($binPart1));
108 $char2 = chr (128 + bindec ($binPart2));
109 $char3 = chr (128 + bindec ($binPart3));
110 $char4 = chr (128 + bindec ($binPart4));
111 $char5 = chr (128 + bindec ($binPart5));
112 $char6 = chr (128 + bindec ($binPart6));
113 $ret = $char1 . $char2 . $char3 . $char4 . $char5 . $char6;
114 } else {
115 // there is no such symbol in utf-8
116 $ret='?';
117 }
118 return $ret;
119 }