7786e03aa7a222a656d956e9b27c712e4243de7d
[squirrelmail.git] / functions / encode / utf_8.php
1 <?php
2
3 /**
4 * utf-8 encoding functions
5 *
6 * takes a string of unicode entities and converts it to a utf-8 encoded string
7 * each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
8 * browsers. Ascii will not be modified.
9 *
10 * Original code is taken from www.php.net manual comments
11 * Original author: ronen at greyzone dot com
12 *
13 * @copyright 2004-2019 The SquirrelMail Project Team
14 * @license http://opensource.org/licenses/gpl-license.php GNU Public License
15 * @version $Id$
16 * @package squirrelmail
17 * @subpackage encode
18 */
19
20 /**
21 * Converts string to utf-8
22 * @param string $string text with numeric unicode entities
23 * @return string utf-8 encoded text
24 */
25 function charset_encode_utf_8 ($string) {
26 // don't run encoding function, if there is no encoded characters
27 if (! preg_match("'&#[0-9]+;'",$string) ) return $string;
28
29 $string=preg_replace_callback("/&#([0-9]+);/",'unicodetoutf8',$string);
30
31 return $string;
32 }
33
34 /**
35 * Return utf8 symbol when unicode character number is provided
36 *
37 * This function is used internally by charset_encode_utf_8
38 * function. It might be unavailable to other SquirrelMail functions.
39 * Don't use it or make sure, that functions/encode/utf_8.php is
40 * included.
41 *
42 * @param array $matches array with first element a decimal unicode value
43 * @return string utf8 character
44 */
45 function unicodetoutf8($matches) {
46 $var = $matches[1];
47
48 if ($var < 128) {
49 $ret = chr ($var);
50 } else if ($var < 2048) {
51 // Two byte utf-8
52 $binVal = str_pad (decbin ($var), 11, "0", STR_PAD_LEFT);
53 $binPart1 = substr ($binVal, 0, 5);
54 $binPart2 = substr ($binVal, 5);
55
56 $char1 = chr (192 + bindec ($binPart1));
57 $char2 = chr (128 + bindec ($binPart2));
58 $ret = $char1 . $char2;
59 } else if ($var < 65536) {
60 // Three byte utf-8
61 $binVal = str_pad (decbin ($var), 16, "0", STR_PAD_LEFT);
62 $binPart1 = substr ($binVal, 0, 4);
63 $binPart2 = substr ($binVal, 4, 6);
64 $binPart3 = substr ($binVal, 10);
65
66 $char1 = chr (224 + bindec ($binPart1));
67 $char2 = chr (128 + bindec ($binPart2));
68 $char3 = chr (128 + bindec ($binPart3));
69 $ret = $char1 . $char2 . $char3;
70 } else if ($var < 2097152) {
71 // Four byte utf-8
72 $binVal = str_pad (decbin ($var), 21, "0", STR_PAD_LEFT);
73 $binPart1 = substr ($binVal, 0, 3);
74 $binPart2 = substr ($binVal, 3, 6);
75 $binPart3 = substr ($binVal, 9, 6);
76 $binPart4 = substr ($binVal, 15);
77
78 $char1 = chr (240 + bindec ($binPart1));
79 $char2 = chr (128 + bindec ($binPart2));
80 $char3 = chr (128 + bindec ($binPart3));
81 $char4 = chr (128 + bindec ($binPart4));
82 $ret = $char1 . $char2 . $char3 . $char4;
83 } else if ($var < 67108864) {
84 // Five byte utf-8
85 $binVal = str_pad (decbin ($var), 26, "0", STR_PAD_LEFT);
86 $binPart1 = substr ($binVal, 0, 2);
87 $binPart2 = substr ($binVal, 2, 6);
88 $binPart3 = substr ($binVal, 8, 6);
89 $binPart4 = substr ($binVal, 14,6);
90 $binPart5 = substr ($binVal, 20);
91
92 $char1 = chr (248 + bindec ($binPart1));
93 $char2 = chr (128 + bindec ($binPart2));
94 $char3 = chr (128 + bindec ($binPart3));
95 $char4 = chr (128 + bindec ($binPart4));
96 $char5 = chr (128 + bindec ($binPart5));
97 $ret = $char1 . $char2 . $char3 . $char4 . $char5;
98 } else if ($var < 2147483648) {
99 // Six byte utf-8
100 $binVal = str_pad (decbin ($var), 31, "0", STR_PAD_LEFT);
101 $binPart1 = substr ($binVal, 0, 1);
102 $binPart2 = substr ($binVal, 1, 6);
103 $binPart3 = substr ($binVal, 7, 6);
104 $binPart4 = substr ($binVal, 13,6);
105 $binPart5 = substr ($binVal, 19,6);
106 $binPart6 = substr ($binVal, 25);
107
108 $char1 = chr (252 + bindec ($binPart1));
109 $char2 = chr (128 + bindec ($binPart2));
110 $char3 = chr (128 + bindec ($binPart3));
111 $char4 = chr (128 + bindec ($binPart4));
112 $char5 = chr (128 + bindec ($binPart5));
113 $char6 = chr (128 + bindec ($binPart6));
114 $ret = $char1 . $char2 . $char3 . $char4 . $char5 . $char6;
115 } else {
116 // there is no such symbol in utf-8
117 $ret='?';
118 }
119 return $ret;
120 }