some plugins might use own hooks
[squirrelmail.git] / functions / encode / utf_8.php
1 <?php
2 /**
3 * utf-8 encoding function
4 *
5 * takes a string of unicode entities and converts it to a utf-8 encoded string
6 * each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
7 * browsers. Ascii will not be modified.
8 *
9 * code is taken from www.php.net manual comments
10 * Author: ronen at greyzone dot com
11 *
12 * @version $Id$
13 * @package squirrelmail
14 * @subpackage encode
15 */
16
17 /**
18 * Converts string to utf-8
19 * @param $source string of unicode entities [STRING]
20 * @return a utf-8 encoded string [STRING]
21 */
22 function charset_encode_utf_8 ($source) {
23 $utf8Str = '';
24 $entityArray = explode ("&#", $source);
25 $size = count ($entityArray);
26 for ($i = 0; $i < $size; $i++) {
27 $subStr = $entityArray[$i];
28 $nonEntity = strstr ($subStr, ';');
29 if ($nonEntity !== false) {
30 $unicode = intval (substr ($subStr, 0, (strpos ($subStr, ';') + 1)));
31 // determine how many chars are needed to reprsent this unicode char
32 if ($unicode < 128) {
33 $utf8Substring = chr ($unicode);
34 }
35 else if ($unicode >= 128 && $unicode < 2048) {
36 $binVal = str_pad (decbin ($unicode), 11, "0", STR_PAD_LEFT);
37 $binPart1 = substr ($binVal, 0, 5);
38 $binPart2 = substr ($binVal, 5);
39
40 $char1 = chr (192 + bindec ($binPart1));
41 $char2 = chr (128 + bindec ($binPart2));
42 $utf8Substring = $char1 . $char2;
43 }
44 else if ($unicode >= 2048 && $unicode < 65536) {
45 $binVal = str_pad (decbin ($unicode), 16, "0", STR_PAD_LEFT);
46 $binPart1 = substr ($binVal, 0, 4);
47 $binPart2 = substr ($binVal, 4, 6);
48 $binPart3 = substr ($binVal, 10);
49
50 $char1 = chr (224 + bindec ($binPart1));
51 $char2 = chr (128 + bindec ($binPart2));
52 $char3 = chr (128 + bindec ($binPart3));
53 $utf8Substring = $char1 . $char2 . $char3;
54 }
55 else {
56 $binVal = str_pad (decbin ($unicode), 21, "0", STR_PAD_LEFT);
57 $binPart1 = substr ($binVal, 0, 3);
58 $binPart2 = substr ($binVal, 3, 6);
59 $binPart3 = substr ($binVal, 9, 6);
60 $binPart4 = substr ($binVal, 15);
61
62 $char1 = chr (240 + bindec ($binPart1));
63 $char2 = chr (128 + bindec ($binPart2));
64 $char3 = chr (128 + bindec ($binPart3));
65 $char4 = chr (128 + bindec ($binPart4));
66 $utf8Substring = $char1 . $char2 . $char3 . $char4;
67 }
68
69 if (strlen ($nonEntity) > 1)
70 $nonEntity = substr ($nonEntity, 1); // chop the first char (';')
71 else
72 $nonEntity = '';
73
74 $utf8Str .= $utf8Substring . $nonEntity;
75 }
76 else {
77 $utf8Str .= $subStr;
78 }
79 }
80
81 return $utf8Str;
82 }
83 ?>