Minor cleanups
[squirrelmail.git] / functions / encode / utf_8.php
CommitLineData
d3bab52e 1<?php
2/**
12975d38 3 * utf-8 encoding function
4 *
5 * takes a string of unicode entities and converts it to a utf-8 encoded string
6 * each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
7 * browsers. Ascii will not be modified.
8 *
9 * code is taken from www.php.net manual comments
10 * Author: ronen at greyzone dot com
11 *
31841a9e 12 * @version $Id$
12975d38 13 * @package squirrelmail
14 * @subpackage encode
42396076 15 */
16
17/**
18 * Converts string to utf-8
12975d38 19 * @param $source string of unicode entities [STRING]
20 * @return a utf-8 encoded string [STRING]
12975d38 21 */
d3bab52e 22function charset_encode_utf_8 ($source) {
5e5ea15d 23
24 // don't run though encoding function, if there is no encoded characters
25 if (! preg_match("'&#'",$source) ) return $source;
26
d3bab52e 27 $utf8Str = '';
28 $entityArray = explode ("&#", $source);
29 $size = count ($entityArray);
30 for ($i = 0; $i < $size; $i++) {
31 $subStr = $entityArray[$i];
32 $nonEntity = strstr ($subStr, ';');
33 if ($nonEntity !== false) {
34 $unicode = intval (substr ($subStr, 0, (strpos ($subStr, ';') + 1)));
35 // determine how many chars are needed to reprsent this unicode char
36 if ($unicode < 128) {
37 $utf8Substring = chr ($unicode);
38 }
39 else if ($unicode >= 128 && $unicode < 2048) {
40 $binVal = str_pad (decbin ($unicode), 11, "0", STR_PAD_LEFT);
41 $binPart1 = substr ($binVal, 0, 5);
42 $binPart2 = substr ($binVal, 5);
43
44 $char1 = chr (192 + bindec ($binPart1));
45 $char2 = chr (128 + bindec ($binPart2));
46 $utf8Substring = $char1 . $char2;
47 }
48 else if ($unicode >= 2048 && $unicode < 65536) {
49 $binVal = str_pad (decbin ($unicode), 16, "0", STR_PAD_LEFT);
50 $binPart1 = substr ($binVal, 0, 4);
51 $binPart2 = substr ($binVal, 4, 6);
52 $binPart3 = substr ($binVal, 10);
53
54 $char1 = chr (224 + bindec ($binPart1));
55 $char2 = chr (128 + bindec ($binPart2));
56 $char3 = chr (128 + bindec ($binPart3));
57 $utf8Substring = $char1 . $char2 . $char3;
58 }
59 else {
60 $binVal = str_pad (decbin ($unicode), 21, "0", STR_PAD_LEFT);
61 $binPart1 = substr ($binVal, 0, 3);
62 $binPart2 = substr ($binVal, 3, 6);
63 $binPart3 = substr ($binVal, 9, 6);
64 $binPart4 = substr ($binVal, 15);
65
66 $char1 = chr (240 + bindec ($binPart1));
67 $char2 = chr (128 + bindec ($binPart2));
68 $char3 = chr (128 + bindec ($binPart3));
69 $char4 = chr (128 + bindec ($binPart4));
70 $utf8Substring = $char1 . $char2 . $char3 . $char4;
71 }
72
73 if (strlen ($nonEntity) > 1)
74 $nonEntity = substr ($nonEntity, 1); // chop the first char (';')
75 else
76 $nonEntity = '';
77
78 $utf8Str .= $utf8Substring . $nonEntity;
79 }
80 else {
81 $utf8Str .= $subStr;
82 }
83 }
84
85 return $utf8Str;
86}
87?>