59177427 |
1 | <?php |
1fd97780 |
2 | |
35586184 |
3 | /** |
4 | * i18n.php |
5 | * |
76911253 |
6 | * Copyright (c) 1999-2003 The SquirrelMail Project Team |
35586184 |
7 | * Licensed under the GNU GPL. For full terms see the file COPYING. |
8 | * |
9 | * This file contains variuos functions that are needed to do |
10 | * internationalization of SquirrelMail. |
11 | * |
12 | * Internally the output character set is used. Other characters are |
13 | * encoded using Unicode entities according to HTML 4.0. |
14 | * |
15 | * $Id$ |
d6c32258 |
16 | * @package squirrelmail |
35586184 |
17 | */ |
18 | |
d6c32258 |
19 | /** Everything uses global.php... */ |
961ca3d8 |
20 | require_once(SM_PATH . 'functions/global.php'); |
21 | |
d6c32258 |
22 | /** |
51468260 |
23 | * Converts string from given charset to charset, that can be displayed by user translation. |
24 | * |
25 | * Function by default returns html encoded strings, if translation uses different encoding. |
26 | * If Japanese translation is used - function returns string converted to euc-jp |
27 | * If iconv or recode functions are enabled and translation uses utf-8 - function returns utf-8 encoded string. |
28 | * If $charset is not supported - function returns unconverted string. |
d6c32258 |
29 | * |
51468260 |
30 | * sanitizing of html tags is also done by this function. |
31 | * |
d6c32258 |
32 | * @param string $charset |
33 | * @param string $string Text to be decoded |
51468260 |
34 | * @return string decoded string |
d6c32258 |
35 | */ |
a2a7852b |
36 | function charset_decode ($charset, $string) { |
3ec81e63 |
37 | global $languages, $squirrelmail_language, $default_charset; |
edf2c0ba |
38 | global $use_php_recode, $use_php_iconv, $agresive_decoding; |
a2a7852b |
39 | |
3714db45 |
40 | if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && |
41 | function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { |
6fbd125b |
42 | $string = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $string); |
43 | } |
b05c8961 |
44 | |
3ec81e63 |
45 | $charset = strtolower($charset); |
46 | |
47 | set_my_charset(); |
48 | |
49 | // Variables that allow to use functions without function_exist() calls |
edf2c0ba |
50 | if (! isset($use_php_recode) || $use_php_recode=="" ) { |
51 | $use_php_recode=false; } |
52 | if (! isset($use_php_iconv) || $use_php_iconv=="" ) { |
53 | $use_php_iconv=false; } |
3ec81e63 |
54 | |
55 | // Don't do conversion if charset is the same. |
56 | if ( $charset == strtolower($default_charset) ) |
57 | return htmlspecialchars($string); |
58 | |
59 | // catch iso-8859-8-i thing |
60 | if ( $charset == "iso-8859-8-i" ) |
61 | $charset = "iso-8859-8"; |
62 | |
63 | /* |
64 | * Recode converts html special characters automatically if you use |
65 | * 'charset..html' decoding. There is no documented way to put -d option |
66 | * into php recode function call. |
67 | */ |
68 | if ( $use_php_recode ) { |
69 | if ( $default_charset == "utf-8" ) { |
70 | // other charsets can be converted to utf-8 without loss. |
71 | // and output string is smaller |
72 | $string = recode_string($charset . "..utf-8",$string); |
73 | return htmlspecialchars($string); |
74 | } else { |
75 | $string = recode_string($charset . "..html",$string); |
76 | // recode does not convert single quote, htmlspecialchars does. |
77 | $string = str_replace("'", ''', $string); |
78 | return $string; |
79 | } |
80 | } |
81 | |
82 | // iconv functions does not have html target and can be used only with utf-8 |
83 | if ( $use_php_iconv && $default_charset=='utf-8') { |
84 | $string = iconv($charset,$default_charset,$string); |
85 | return htmlspecialchars($string); |
86 | } |
87 | |
88 | // If we don't use recode and iconv, we'll do it old way. |
89 | |
a2a7852b |
90 | /* All HTML special characters are 7 bit and can be replaced first */ |
cef054e4 |
91 | |
098ea084 |
92 | $string = htmlspecialchars ($string); |
a2a7852b |
93 | |
5dd23dac |
94 | /* controls cpu and memory intensive decoding cycles */ |
edf2c0ba |
95 | if (! isset($agresive_decoding) || $agresive_decoding=="" ) { |
96 | $agresive_decoding=false; } |
5dd23dac |
97 | |
b142de74 |
98 | $decode=fixcharset($charset); |
99 | $decodefile=SM_PATH . 'functions/decode/' . $decode . '.php'; |
100 | if (file_exists($decodefile)) { |
101 | include_once($decodefile); |
102 | $ret = call_user_func('charset_decode_'.$decode, $string); |
a2a7852b |
103 | } else { |
b142de74 |
104 | $ret = $string; |
a2a7852b |
105 | } |
106 | return( $ret ); |
107 | } |
b142de74 |
108 | |
109 | /** |
110 | * Makes charset name suitable for decoding cycles |
111 | * |
112 | * @param string $charset Name of charset |
113 | * @return string $charset Adjusted name of charset |
114 | */ |
115 | function fixcharset($charset) { |
116 | // minus removed from function names |
117 | $charset=str_replace('-','_',$charset); |
118 | |
119 | // windows-125x and cp125x charsets |
120 | $charset=str_replace('windows_','cp',$charset); |
a2a7852b |
121 | |
b142de74 |
122 | // ibm > cp |
123 | $charset=str_replace('ibm','cp',$charset); |
124 | |
125 | // iso-8859-8-i -> iso-8859-8 |
126 | // use same cycle until I'll find differences |
127 | $charset=str_replace('iso_8859_8_i','iso_8859_8',$charset); |
128 | |
129 | return $charset; |
130 | } |
a2a7852b |
131 | |
51468260 |
132 | /** |
133 | * 8bit cleanup functions. |
134 | * |
135 | * Replaces all 8 bit characters from ISO-8859 character sets with '?' |
136 | * Legacy function used for unsupported ISO-8859 charsets |
137 | * |
138 | * @param string $string string that has to be cleaned |
139 | * @return string cleaned string |
140 | */ |
a2a7852b |
141 | function charset_decode_iso_8859_default ($string) { |
142 | return (strtr($string, "\240\241\242\243\244\245\246\247". |
1fd97780 |
143 | "\250\251\252\253\254\255\256\257". |
144 | "\260\261\262\263\264\265\266\267". |
145 | "\270\271\272\273\274\275\276\277". |
146 | "\300\301\302\303\304\305\306\307". |
147 | "\310\311\312\313\314\315\316\317". |
148 | "\320\321\322\323\324\325\326\327". |
149 | "\330\331\332\333\334\335\336\337". |
150 | "\340\341\342\343\344\345\346\347". |
151 | "\350\351\352\353\354\355\356\357". |
152 | "\360\361\362\363\364\365\366\367". |
a2a7852b |
153 | "\370\371\372\373\374\375\376\377", |
1fd97780 |
154 | "????????????????????????????????????????". |
155 | "????????????????????????????????????????". |
156 | "????????????????????????????????????????". |
157 | "????????")); |
a2a7852b |
158 | |
159 | } |
160 | |
51468260 |
161 | /** |
162 | * ns_4551_1 decoding function |
163 | * |
a2a7852b |
164 | * This is the same as ISO-646-NO and is used by some |
165 | * Microsoft programs when sending Norwegian characters |
51468260 |
166 | * |
167 | * @param string $string |
168 | * @return string |
a2a7852b |
169 | */ |
170 | function charset_decode_ns_4551_1 ($string) { |
171 | /* |
172 | * These characters are: |
173 | * Latin capital letter AE |
174 | * Latin capital letter O with stroke |
175 | * Latin capital letter A with ring above |
176 | * and the same as small letters |
177 | */ |
178 |