Adding last two functions from i18n.php
[squirrelmail.git] / functions / decode / utf_8.php
1 <?php
2 /**
3 * decode/utf-8.php
4 * $Id$
5 *
6 * Copyright (c) 2003 The SquirrelMail Project Team
7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This file contains utf-8 decoding function that is needed to read
10 * utf-8 encoded mails in non-utf-8 locale.
11 *
12 * Every decoded character consists of n bytes. First byte is octal
13 * 300-375, other bytes - always octals 200-277.
14 *
15 * \a\b characters are decoded to html code octdec(a-300)*64 + octdec(b-200)
16 * \a\b\c characters are decoded to html code octdec(a-340)*64*64 + octdec(b-200)*64 + octdec(c-200)
17 *
18 * decoding cycle is unfinished. please test and report problems to tokul@users.sourceforge.net
19 *
20 * @package squirrelmail
21 * @subpackage decode
22 */
23
24 /**
25 * Decode utf-8 strings
26 * @param string $string Encoded string
27 * @return string Decoded string
28 */
29 function charset_decode_utf_8 ($string) {
30 global $default_charset,$squirrelmail_language;
31
32 if ($squirrelmail_language == 'ja_JP')
33 return $string;
34
35 /* Only do the slow convert if there are 8-bit characters */
36 /* avoid using 0xA0 (\240) in ereg ranges. RH73 does not like that */
37 if (! ereg("[\200-\237]", $string) and ! ereg("[\241-\377]", $string))
38 return $string;
39
40 // decode three byte unicode characters
41 $string = preg_replace("/([\340-\357])([\200-\277])([\200-\277])/e",
42 "'&#'.((ord('\\1')-224)*4096+(ord('\\2')-128)*64+(ord('\\3')-128)).';'",
43 $string);
44
45 // decode two byte unicode characters
46 $string = preg_replace("/([\300-\337])([\200-\277])/e",
47 "'&#'.((ord('\\1')-192)*64+(ord('\\2')-128)).';'",
48 $string);
49
50 return $string;
51 }
52
53 ?>