removed local directory name used for testing.
[squirrelmail.git] / functions / encode / utf_8.php
CommitLineData
d3bab52e 1<?php
2/**
78086b29 3 * utf-8 encoding functions
12975d38 4 *
5 * takes a string of unicode entities and converts it to a utf-8 encoded string
6 * each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
78086b29 7 * browsers. Ascii will not be modified.
12975d38 8 *
78086b29 9 * Original code is taken from www.php.net manual comments
10 * Original author: ronen at greyzone dot com
12975d38 11 *
31841a9e 12 * @version $Id$
6c84ba1e 13 * @copyright Copyright &copy; 2004-2005 The SquirrelMail Project Team
12975d38 14 * @package squirrelmail
15 * @subpackage encode
42396076 16 */
17
18/**
19 * Converts string to utf-8
78086b29 20 * @param string $string text with numeric unicode entities
21 * @return string utf-8 encoded text
12975d38 22 */
78086b29 23function charset_encode_utf_8 ($string) {
24 // don't run encoding function, if there is no encoded characters
25 if (! preg_match("'&#[0-9]+;'",$string) ) return $string;
5e5ea15d 26
78086b29 27 $string=preg_replace("/&#([0-9]+);/e","unicodetoutf8('\\1')",$string);
28 // $string=preg_replace("/&#[xX]([0-9A-F]+);/e","unicodetoutf8(hexdec('\\1'))",$string);
91e0dccc 29
78086b29 30 return $string;
31}
32
33/**
34 * Return utf8 symbol when unicode character number is provided
91e0dccc 35 *
36 * This function is used internally by charset_encode_utf_8
598294a7 37 * function. It might be unavailable to other SquirrelMail functions.
91e0dccc 38 * Don't use it or make sure, that functions/encode/utf_8.php is
39 * included.
78086b29 40 *
41 * @param int $var decimal unicode value
42 * @return string utf8 character
43 */
44function unicodetoutf8($var) {
45
46 if ($var < 128) {
47 $ret = chr ($var);
48 } else if ($var < 2048) {
49 // Two byte utf-8
50 $binVal = str_pad (decbin ($var), 11, "0", STR_PAD_LEFT);
51 $binPart1 = substr ($binVal, 0, 5);
52 $binPart2 = substr ($binVal, 5);
91e0dccc 53
78086b29 54 $char1 = chr (192 + bindec ($binPart1));
55 $char2 = chr (128 + bindec ($binPart2));
56 $ret = $char1 . $char2;
57 } else if ($var < 65536) {
58 // Three byte utf-8
59 $binVal = str_pad (decbin ($var), 16, "0", STR_PAD_LEFT);
60 $binPart1 = substr ($binVal, 0, 4);
61 $binPart2 = substr ($binVal, 4, 6);
62 $binPart3 = substr ($binVal, 10);
63
64 $char1 = chr (224 + bindec ($binPart1));
65 $char2 = chr (128 + bindec ($binPart2));
66 $char3 = chr (128 + bindec ($binPart3));
67 $ret = $char1 . $char2 . $char3;
68 } else if ($var < 2097152) {
69 // Four byte utf-8
70 $binVal = str_pad (decbin ($var), 21, "0", STR_PAD_LEFT);
71 $binPart1 = substr ($binVal, 0, 3);
72 $binPart2 = substr ($binVal, 3, 6);
73 $binPart3 = substr ($binVal, 9, 6);
74 $binPart4 = substr ($binVal, 15);
5e5ea15d 75
78086b29 76 $char1 = chr (240 + bindec ($binPart1));
77 $char2 = chr (128 + bindec ($binPart2));
78 $char3 = chr (128 + bindec ($binPart3));
79 $char4 = chr (128 + bindec ($binPart4));
80 $ret = $char1 . $char2 . $char3 . $char4;
81 } else if ($var < 67108864) {
82 // Five byte utf-8
83 $binVal = str_pad (decbin ($var), 26, "0", STR_PAD_LEFT);
84 $binPart1 = substr ($binVal, 0, 2);
85 $binPart2 = substr ($binVal, 2, 6);
86 $binPart3 = substr ($binVal, 8, 6);
87 $binPart4 = substr ($binVal, 14,6);
88 $binPart5 = substr ($binVal, 20);
d3bab52e 89
78086b29 90 $char1 = chr (248 + bindec ($binPart1));
91 $char2 = chr (128 + bindec ($binPart2));
92 $char3 = chr (128 + bindec ($binPart3));
93 $char4 = chr (128 + bindec ($binPart4));
94 $char5 = chr (128 + bindec ($binPart5));
95 $ret = $char1 . $char2 . $char3 . $char4 . $char5;
96 } else if ($var < 2147483648) {
97 // Six byte utf-8
98 $binVal = str_pad (decbin ($var), 31, "0", STR_PAD_LEFT);
99 $binPart1 = substr ($binVal, 0, 1);
100 $binPart2 = substr ($binVal, 1, 6);
101 $binPart3 = substr ($binVal, 7, 6);
102 $binPart4 = substr ($binVal, 13,6);
103 $binPart5 = substr ($binVal, 19,6);
104 $binPart6 = substr ($binVal, 25);
d3bab52e 105
78086b29 106 $char1 = chr (252 + bindec ($binPart1));
107 $char2 = chr (128 + bindec ($binPart2));
108 $char3 = chr (128 + bindec ($binPart3));
109 $char4 = chr (128 + bindec ($binPart4));
110 $char5 = chr (128 + bindec ($binPart5));
111 $char6 = chr (128 + bindec ($binPart6));
112 $ret = $char1 . $char2 . $char3 . $char4 . $char5 . $char6;
113 } else {
114 // there is no such symbol in utf-8
115 $ret='?';
116 }
117 return $ret;
d3bab52e 118}
119?>