aStutus -> aStatus
[squirrelmail.git] / functions / encode / utf_8.php
... / ...
CommitLineData
1<?php
2/**
3* takes a string of unicode entities and converts it to a utf-8 encoded string
4* each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
5* browsers. Ascii will not be modified.
6* @param $source string of unicode entities [STRING]
7* @return a utf-8 encoded string [STRING]
8* @access public
9*/
10function charset_encode_utf_8 ($source) {
11 $utf8Str = '';
12 $entityArray = explode ("&#", $source);
13 $size = count ($entityArray);
14 for ($i = 0; $i < $size; $i++) {
15 $subStr = $entityArray[$i];
16 $nonEntity = strstr ($subStr, ';');
17 if ($nonEntity !== false) {
18 $unicode = intval (substr ($subStr, 0, (strpos ($subStr, ';') + 1)));
19 // determine how many chars are needed to reprsent this unicode char
20 if ($unicode < 128) {
21 $utf8Substring = chr ($unicode);
22 }
23 else if ($unicode >= 128 && $unicode < 2048) {
24 $binVal = str_pad (decbin ($unicode), 11, "0", STR_PAD_LEFT);
25 $binPart1 = substr ($binVal, 0, 5);
26 $binPart2 = substr ($binVal, 5);
27
28 $char1 = chr (192 + bindec ($binPart1));
29 $char2 = chr (128 + bindec ($binPart2));
30 $utf8Substring = $char1 . $char2;
31 }
32 else if ($unicode >= 2048 && $unicode < 65536) {
33 $binVal = str_pad (decbin ($unicode), 16, "0", STR_PAD_LEFT);
34 $binPart1 = substr ($binVal, 0, 4);
35 $binPart2 = substr ($binVal, 4, 6);
36 $binPart3 = substr ($binVal, 10);
37
38 $char1 = chr (224 + bindec ($binPart1));
39 $char2 = chr (128 + bindec ($binPart2));
40 $char3 = chr (128 + bindec ($binPart3));
41 $utf8Substring = $char1 . $char2 . $char3;
42 }
43 else {
44 $binVal = str_pad (decbin ($unicode), 21, "0", STR_PAD_LEFT);
45 $binPart1 = substr ($binVal, 0, 3);
46 $binPart2 = substr ($binVal, 3, 6);
47 $binPart3 = substr ($binVal, 9, 6);
48 $binPart4 = substr ($binVal, 15);
49
50 $char1 = chr (240 + bindec ($binPart1));
51 $char2 = chr (128 + bindec ($binPart2));
52 $char3 = chr (128 + bindec ($binPart3));
53 $char4 = chr (128 + bindec ($binPart4));
54 $utf8Substring = $char1 . $char2 . $char3 . $char4;
55 }
56
57 if (strlen ($nonEntity) > 1)
58 $nonEntity = substr ($nonEntity, 1); // chop the first char (';')
59 else
60 $nonEntity = '';
61
62 $utf8Str .= $utf8Substring . $nonEntity;
63 }
64 else {
65 $utf8Str .= $subStr;
66 }
67 }
68
69 return $utf8Str;
70}
71?>