4 * functions/decode/utf-8.php - utf-8 decoding functions
6 * This file contains utf-8 decoding function that is needed to read
7 * utf-8 encoded mails in non-utf-8 locale.
9 * Every decoded character consists of n bytes. First byte is octal
10 * 300-375, other bytes - always octals 200-277.
12 * Ranges (first byte):
14 * Two byte - 300-337 192-223 C0-DF
15 * Three byte - 340-357 224-239 E0-EF
16 * Four byte - 360-367 240-247 F0-F7
17 * Five byte - 370-373 248-251 F8-FB
18 * Six byte - 374-375 252-253 FC-FD
20 * \a\b characters are decoded to html code calculated with formula:
21 * octdec(a-300)*64 + octdec(b-200)
23 * \a\b\c characters are decoded to html code calculated with formula:
24 * octdec(a-340)*64^2 + octdec(b-200)*64 + octdec(c-200)
26 * \a\b\c\d characters are decoded to html code calculated with formula:
27 * octdec(a-360)*64^3 + octdec(b-200)*64^2 +
28 * + octdec(c-200)*64 + octdec(d-200)
30 * \a\b\c\d\e characters are decoded to html code calculated with formula:
31 * octdec(a-370)*64^4 + octdec(b-200)*64^3 +
32 * + octdec(c-200)*64^2 + octdec(d-200)*64 + octdec(e-200)
34 * \a\b\c\d\e\f characters are decoded to html code calculated with formula:
35 * octdec(a-374)*64^5 + octdec(b-200)*64^4 + octdec(c-200)*64^3 +
36 * + octdec(d-200)*64^2 + octdec(e-200)*64 + octdec(f-200)
38 * @copyright 2003-2018 The SquirrelMail Project Team
39 * @license http://opensource.org/licenses/gpl-license.php GNU Public License
41 * @package squirrelmail
46 * Decode utf-8 strings
47 * @param string $string Encoded string
48 * @return string Decoded string
50 function charset_decode_utf_8 ($string) {
51 global $squirrelmail_language;
53 // Japanese translation uses mbstring function to read utf-8
54 if ($squirrelmail_language == 'ja_JP')
57 // don't do decoding when there are no 8bit symbols
58 if (! sq_is8bit($string,'utf-8'))
61 // decode six byte unicode characters
62 /* (i think currently there is no such symbol)
63 $string = preg_replace_callback("/([\374-\375])([\200-\277])([\200-\277])([\200-\277])([\200-\277])([\200-\277])/",
64 create_function ('$matches', 'return \'&#\'.((ord($matches[1])-252)*1073741824+(ord($matches[2])-200)*16777216+(ord($matches[3])-200)*262144+(ord($matches[4])-128)*4096+(ord($matches[5])-128)*64+(ord($matches[6])-128)).\';\';'),
68 // decode five byte unicode characters
69 /* (i think currently there is no such symbol)
70 $string = preg_replace_callback("/([\370-\373])([\200-\277])([\200-\277])([\200-\277])([\200-\277])/",
71 create_function ('$matches', 'return \'&#\'.((ord($matches[1])-248)*16777216+(ord($matches[2])-200)*262144+(ord($matches[3])-128)*4096+(ord($matches[4])-128)*64+(ord($matches[5])-128)).\';\';'),
75 // decode four byte unicode characters
76 $string = preg_replace_callback("/([\360-\367])([\200-\277])([\200-\277])([\200-\277])/",
77 create_function ('$matches', 'return \'&#\'.((ord($matches[1])-240)*262144+(ord($matches[2])-128)*4096+(ord($matches[3])-128)*64+(ord($matches[4])-128)).\';\';'),
80 // decode three byte unicode characters
81 $string = preg_replace_callback("/([\340-\357])([\200-\277])([\200-\277])/",
82 create_function ('$matches', 'return \'&#\'.((ord($matches[1])-224)*4096+(ord($matches[2])-128)*64+(ord($matches[3])-128)).\';\';'),
85 // decode two byte unicode characters
86 $string = preg_replace_callback("/([\300-\337])([\200-\277])/",
87 create_function ('$matches', 'return \'&#\'.((ord($matches[1])-192)*64+(ord($matches[2])-128)).\';\';'),
90 // remove broken unicode
91 $string = preg_replace("/[\200-\237]|\240|[\241-\377]/",'?',$string);