| 1 | <?php |
| 2 | /** |
| 3 | * decode/utf-8.php |
| 4 | * |
| 5 | * Copyright (c) 2003-2004 The SquirrelMail Project Team |
| 6 | * Licensed under the GNU GPL. For full terms see the file COPYING. |
| 7 | * |
| 8 | * This file contains utf-8 decoding function that is needed to read |
| 9 | * utf-8 encoded mails in non-utf-8 locale. |
| 10 | * |
| 11 | * Every decoded character consists of n bytes. First byte is octal |
| 12 | * 300-375, other bytes - always octals 200-277. |
| 13 | * |
| 14 | * \a\b characters are decoded to html code octdec(a-300)*64 + octdec(b-200) |
| 15 | * \a\b\c characters are decoded to html code octdec(a-340)*64*64 + octdec(b-200)*64 + octdec(c-200) |
| 16 | * |
| 17 | * decoding cycle is unfinished. please test and report problems to tokul@users.sourceforge.net |
| 18 | * |
| 19 | * @version $Id$ |
| 20 | * @package squirrelmail |
| 21 | * @subpackage decode |
| 22 | */ |
| 23 | |
| 24 | /** |
| 25 | * Decode utf-8 strings |
| 26 | * @param string $string Encoded string |
| 27 | * @return string Decoded string |
| 28 | */ |
| 29 | function charset_decode_utf_8 ($string) { |
| 30 | global $default_charset,$squirrelmail_language; |
| 31 | |
| 32 | if ($squirrelmail_language == 'ja_JP') |
| 33 | return $string; |
| 34 | |
| 35 | // don't do decoding when there are no 8bit symbols |
| 36 | if (! sq_is8bit($string,'utf-8')) |
| 37 | return $string; |
| 38 | |
| 39 | // decode three byte unicode characters |
| 40 | $string = preg_replace("/([\340-\357])([\200-\277])([\200-\277])/e", |
| 41 | "'&#'.((ord('\\1')-224)*4096+(ord('\\2')-128)*64+(ord('\\3')-128)).';'", |
| 42 | $string); |
| 43 | |
| 44 | // decode two byte unicode characters |
| 45 | $string = preg_replace("/([\300-\337])([\200-\277])/e", |
| 46 | "'&#'.((ord('\\1')-192)*64+(ord('\\2')-128)).';'", |
| 47 | $string); |
| 48 | |
| 49 | return $string; |
| 50 | } |
| 51 | ?> |