a3dc34009cb2c5b220338645827e99776db1124b
[squirrelmail.git] / functions / encode / utf_8.php
1 <?php
2 /**
3 * utf-8 encoding function
4 *
5 * takes a string of unicode entities and converts it to a utf-8 encoded string
6 * each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
7 * browsers. Ascii will not be modified.
8 *
9 * code is taken from www.php.net manual comments
10 * Author: ronen at greyzone dot com
11 *
12 * @version $Id$
13 * @package squirrelmail
14 * @subpackage encode
15 * @param $source string of unicode entities [STRING]
16 * @return a utf-8 encoded string [STRING]
17 * @access public
18 */
19 function charset_encode_utf_8 ($source) {
20 $utf8Str = '';
21 $entityArray = explode ("&#", $source);
22 $size = count ($entityArray);
23 for ($i = 0; $i < $size; $i++) {
24 $subStr = $entityArray[$i];
25 $nonEntity = strstr ($subStr, ';');
26 if ($nonEntity !== false) {
27 $unicode = intval (substr ($subStr, 0, (strpos ($subStr, ';') + 1)));
28 // determine how many chars are needed to reprsent this unicode char
29 if ($unicode < 128) {
30 $utf8Substring = chr ($unicode);
31 }
32 else if ($unicode >= 128 && $unicode < 2048) {
33 $binVal = str_pad (decbin ($unicode), 11, "0", STR_PAD_LEFT);
34 $binPart1 = substr ($binVal, 0, 5);
35 $binPart2 = substr ($binVal, 5);
36
37 $char1 = chr (192 + bindec ($binPart1));
38 $char2 = chr (128 + bindec ($binPart2));
39 $utf8Substring = $char1 . $char2;
40 }
41 else if ($unicode >= 2048 && $unicode < 65536) {
42 $binVal = str_pad (decbin ($unicode), 16, "0", STR_PAD_LEFT);
43 $binPart1 = substr ($binVal, 0, 4);
44 $binPart2 = substr ($binVal, 4, 6);
45 $binPart3 = substr ($binVal, 10);
46
47 $char1 = chr (224 + bindec ($binPart1));
48 $char2 = chr (128 + bindec ($binPart2));
49 $char3 = chr (128 + bindec ($binPart3));
50 $utf8Substring = $char1 . $char2 . $char3;
51 }
52 else {
53 $binVal = str_pad (decbin ($unicode), 21, "0", STR_PAD_LEFT);
54 $binPart1 = substr ($binVal, 0, 3);
55 $binPart2 = substr ($binVal, 3, 6);
56 $binPart3 = substr ($binVal, 9, 6);
57 $binPart4 = substr ($binVal, 15);
58
59 $char1 = chr (240 + bindec ($binPart1));
60 $char2 = chr (128 + bindec ($binPart2));
61 $char3 = chr (128 + bindec ($binPart3));
62 $char4 = chr (128 + bindec ($binPart4));
63 $utf8Substring = $char1 . $char2 . $char3 . $char4;
64 }
65
66 if (strlen ($nonEntity) > 1)
67 $nonEntity = substr ($nonEntity, 1); // chop the first char (';')
68 else
69 $nonEntity = '';
70
71 $utf8Str .= $utf8Substring . $nonEntity;
72 }
73 else {
74 $utf8Str .= $subStr;
75 }
76 }
77
78 return $utf8Str;
79 }
80 ?>