Happy New Year!
[squirrelmail.git] / functions / encode / utf_8.php
CommitLineData
d3bab52e 1<?php
4b4abf93 2
d3bab52e 3/**
78086b29 4 * utf-8 encoding functions
12975d38 5 *
6 * takes a string of unicode entities and converts it to a utf-8 encoded string
7 * each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
78086b29 8 * browsers. Ascii will not be modified.
12975d38 9 *
78086b29 10 * Original code is taken from www.php.net manual comments
11 * Original author: ronen at greyzone dot com
12975d38 12 *
ae5dddc0 13 * @copyright 2004-2011 The SquirrelMail Project Team
4b4abf93 14 * @license http://opensource.org/licenses/gpl-license.php GNU Public License
31841a9e 15 * @version $Id$
12975d38 16 * @package squirrelmail
17 * @subpackage encode
42396076 18 */
19
20/**
21 * Converts string to utf-8
78086b29 22 * @param string $string text with numeric unicode entities
23 * @return string utf-8 encoded text
12975d38 24 */
78086b29 25function charset_encode_utf_8 ($string) {
26 // don't run encoding function, if there is no encoded characters
27 if (! preg_match("'&#[0-9]+;'",$string) ) return $string;
5e5ea15d 28
78086b29 29 $string=preg_replace("/&#([0-9]+);/e","unicodetoutf8('\\1')",$string);
30 // $string=preg_replace("/&#[xX]([0-9A-F]+);/e","unicodetoutf8(hexdec('\\1'))",$string);
91e0dccc 31
78086b29 32 return $string;
33}
34
35/**
36 * Return utf8 symbol when unicode character number is provided
91e0dccc 37 *
38 * This function is used internally by charset_encode_utf_8
598294a7 39 * function. It might be unavailable to other SquirrelMail functions.
91e0dccc 40 * Don't use it or make sure, that functions/encode/utf_8.php is
41 * included.
78086b29 42 *
43 * @param int $var decimal unicode value
44 * @return string utf8 character
45 */
46function unicodetoutf8($var) {
47
48 if ($var < 128) {
49 $ret = chr ($var);
50 } else if ($var < 2048) {
51 // Two byte utf-8
52 $binVal = str_pad (decbin ($var), 11, "0", STR_PAD_LEFT);
53 $binPart1 = substr ($binVal, 0, 5);
54 $binPart2 = substr ($binVal, 5);
91e0dccc 55
78086b29 56 $char1 = chr (192 + bindec ($binPart1));
57 $char2 = chr (128 + bindec ($binPart2));
58 $ret = $char1 . $char2;
59 } else if ($var < 65536) {
60 // Three byte utf-8
61 $binVal = str_pad (decbin ($var), 16, "0", STR_PAD_LEFT);
62 $binPart1 = substr ($binVal, 0, 4);
63 $binPart2 = substr ($binVal, 4, 6);
64 $binPart3 = substr ($binVal, 10);
65
66 $char1 = chr (224 + bindec ($binPart1));
67 $char2 = chr (128 + bindec ($binPart2));
68 $char3 = chr (128 + bindec ($binPart3));
69 $ret = $char1 . $char2 . $char3;
70 } else if ($var < 2097152) {
71 // Four byte utf-8
72 $binVal = str_pad (decbin ($var), 21, "0", STR_PAD_LEFT);
73 $binPart1 = substr ($binVal, 0, 3);
74 $binPart2 = substr ($binVal, 3, 6);
75 $binPart3 = substr ($binVal, 9, 6);
76 $binPart4 = substr ($binVal, 15);
5e5ea15d 77
78086b29 78 $char1 = chr (240 + bindec ($binPart1));
79 $char2 = chr (128 + bindec ($binPart2));
80 $char3 = chr (128 + bindec ($binPart3));
81 $char4 = chr (128 + bindec ($binPart4));
82 $ret = $char1 . $char2 . $char3 . $char4;
83 } else if ($var < 67108864) {
84 // Five byte utf-8
85 $binVal = str_pad (decbin ($var), 26, "0", STR_PAD_LEFT);
86 $binPart1 = substr ($binVal, 0, 2);
87 $binPart2 = substr ($binVal, 2, 6);
88 $binPart3 = substr ($binVal, 8, 6);
89 $binPart4 = substr ($binVal, 14,6);
90 $binPart5 = substr ($binVal, 20);
d3bab52e 91
78086b29 92 $char1 = chr (248 + bindec ($binPart1));
93 $char2 = chr (128 + bindec ($binPart2));
94 $char3 = chr (128 + bindec ($binPart3));
95 $char4 = chr (128 + bindec ($binPart4));
96 $char5 = chr (128 + bindec ($binPart5));
97 $ret = $char1 . $char2 . $char3 . $char4 . $char5;
98 } else if ($var < 2147483648) {
99 // Six byte utf-8
100 $binVal = str_pad (decbin ($var), 31, "0", STR_PAD_LEFT);
101 $binPart1 = substr ($binVal, 0, 1);
102 $binPart2 = substr ($binVal, 1, 6);
103 $binPart3 = substr ($binVal, 7, 6);
104 $binPart4 = substr ($binVal, 13,6);
105 $binPart5 = substr ($binVal, 19,6);
106 $binPart6 = substr ($binVal, 25);
d3bab52e 107
78086b29 108 $char1 = chr (252 + bindec ($binPart1));
109 $char2 = chr (128 + bindec ($binPart2));
110 $char3 = chr (128 + bindec ($binPart3));
111 $char4 = chr (128 + bindec ($binPart4));
112 $char5 = chr (128 + bindec ($binPart5));
113 $char6 = chr (128 + bindec ($binPart6));
114 $ret = $char1 . $char2 . $char3 . $char4 . $char5 . $char6;
115 } else {
116 // there is no such symbol in utf-8
117 $ret='?';
118 }
119 return $ret;
d3bab52e 120}