ee37ee9b |
1 | <?php |
2 | /* |
3 | * decode/utf-8.php |
4 | * $Id$ |
5 | * |
0a708025 |
6 | * Copyright (c) 2003 The SquirrelMail Project Team |
ee37ee9b |
7 | * Licensed under the GNU GPL. For full terms see the file COPYING. |
8 | * |
9 | * This file contains utf-8 decoding function that is needed to read |
10 | * utf-8 encoded mails in non-utf-8 locale. |
11 | * |
12 | * Every decoded character consists of n bytes. First byte is octal |
13 | * 300-375, other bytes - always octals 200-277. |
14 | * |
15 | * \a\b characters are decoded to html code octdec(a-300)*64 + octdec(b-200) |
16 | * \a\b\c characters are decoded to html code octdec(a-340)*64*64 + octdec(b-200)*64 + octdec(c-200) |
17 | * |
18 | * decoding cycle is unfinished. please test and report problems to tokul@users.sourceforge.net |
19 | * |
20 | */ |
21 | function charset_decode_utf8 ($string) { |
0a708025 |
22 | global $default_charset; |
ee37ee9b |
23 | |
24 | if (strtolower($default_charset) == 'utf-8') |
25 | return $string; |
ee37ee9b |
26 | |
27 | /* Only do the slow convert if there are 8-bit characters */ |
0a708025 |
28 | /* avoid using 0xA0 (\240) in ereg ranges. RH73 does not like that */ |
29 | if (! ereg("[\200-\237]", $string) and ! ereg("[\241-\377]", $string)) |
ee37ee9b |
30 | return $string; |
31 | |
32 | // decode three byte unicode characters |
33 | $string = preg_replace("/([\340-\357])([\200-\277])([\200-\277])/e", |
34 | "'&#'.((ord('\\1')-224)*4096+(ord('\\2')-128)*64+(ord('\\3')-128)).';'", |
35 | $string); |
36 | |
37 | // decode two byte unicode characters |
38 | $string = preg_replace("/([\300-\337])([\200-\277])/e", |
39 | "'&#'.((ord('\\1')-192)*64+(ord('\\2')-128)).';'", |
40 | $string); |
41 | |
42 | return $string; |
43 | } |
44 | |
45 | ?> |