Japanese without mbstring handling
[squirrelmail.git] / functions / i18n.php
CommitLineData
59177427 1<?php
1fd97780 2
35586184 3/**
4 * i18n.php
5 *
76911253 6 * Copyright (c) 1999-2003 The SquirrelMail Project Team
35586184 7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This file contains variuos functions that are needed to do
10 * internationalization of SquirrelMail.
11 *
12 * Internally the output character set is used. Other characters are
13 * encoded using Unicode entities according to HTML 4.0.
14 *
15 * $Id$
d6c32258 16 * @package squirrelmail
35586184 17 */
18
d6c32258 19/** Everything uses global.php... */
961ca3d8 20require_once(SM_PATH . 'functions/global.php');
21
d6c32258 22/**
51468260 23 * Converts string from given charset to charset, that can be displayed by user translation.
24 *
25 * Function by default returns html encoded strings, if translation uses different encoding.
26 * If Japanese translation is used - function returns string converted to euc-jp
27 * If iconv or recode functions are enabled and translation uses utf-8 - function returns utf-8 encoded string.
28 * If $charset is not supported - function returns unconverted string.
d6c32258 29 *
51468260 30 * sanitizing of html tags is also done by this function.
31 *
d6c32258 32 * @param string $charset
33 * @param string $string Text to be decoded
51468260 34 * @return string decoded string
d6c32258 35 */
a2a7852b 36function charset_decode ($charset, $string) {
3ec81e63 37 global $languages, $squirrelmail_language, $default_charset;
edf2c0ba 38 global $use_php_recode, $use_php_iconv, $agresive_decoding;
a2a7852b 39
3714db45 40 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
41 function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
6fbd125b 42 $string = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $string);
43 }
b05c8961 44
3ec81e63 45 $charset = strtolower($charset);
46
47 set_my_charset();
48
49 // Variables that allow to use functions without function_exist() calls
edf2c0ba 50 if (! isset($use_php_recode) || $use_php_recode=="" ) {
51 $use_php_recode=false; }
52 if (! isset($use_php_iconv) || $use_php_iconv=="" ) {
53 $use_php_iconv=false; }
3ec81e63 54
55 // Don't do conversion if charset is the same.
56 if ( $charset == strtolower($default_charset) )
57 return htmlspecialchars($string);
58
59 // catch iso-8859-8-i thing
60 if ( $charset == "iso-8859-8-i" )
61 $charset = "iso-8859-8";
62
63 /*
64 * Recode converts html special characters automatically if you use
65 * 'charset..html' decoding. There is no documented way to put -d option
66 * into php recode function call.
67 */
68 if ( $use_php_recode ) {
69 if ( $default_charset == "utf-8" ) {
70 // other charsets can be converted to utf-8 without loss.
71 // and output string is smaller
72 $string = recode_string($charset . "..utf-8",$string);
73 return htmlspecialchars($string);
74 } else {
75 $string = recode_string($charset . "..html",$string);
76 // recode does not convert single quote, htmlspecialchars does.
77 $string = str_replace("'", '&#039;', $string);
78 return $string;
79 }
80 }
81
82 // iconv functions does not have html target and can be used only with utf-8
83 if ( $use_php_iconv && $default_charset=='utf-8') {
84 $string = iconv($charset,$default_charset,$string);
85 return htmlspecialchars($string);
86 }
87
88 // If we don't use recode and iconv, we'll do it old way.
89
a2a7852b 90 /* All HTML special characters are 7 bit and can be replaced first */
cef054e4 91
098ea084 92 $string = htmlspecialchars ($string);
a2a7852b 93
5dd23dac 94 /* controls cpu and memory intensive decoding cycles */
edf2c0ba 95 if (! isset($agresive_decoding) || $agresive_decoding=="" ) {
96 $agresive_decoding=false; }
5dd23dac 97
a2a7852b 98 if (ereg('iso-8859-([[:digit:]]+)', $charset, $res)) {
99 if ($res[1] == '1') {
5dd23dac 100 include_once(SM_PATH . 'functions/decode/iso8859-1.php');
101 $ret = charset_decode_iso8859_1 ($string);
a2a7852b 102 } else if ($res[1] == '2') {
5dd23dac 103 include_once(SM_PATH . 'functions/decode/iso8859-2.php');
104 $ret = charset_decode_iso8859_2 ($string);
3a66bed2 105 } else if ($res[1] == '3') {
106 include_once(SM_PATH . 'functions/decode/iso8859-3.php');
107 $ret = charset_decode_iso8859_3 ($string);
9be313d5 108 } else if ($res[1] == '4') {
3a66bed2 109 include_once(SM_PATH . 'functions/decode/iso8859-4.php');
110 $ret = charset_decode_iso8859_4 ($string);
94965562 111 } else if ($res[1] == '5') {
3a66bed2 112 include_once(SM_PATH . 'functions/decode/iso8859-5.php');
113 $ret = charset_decode_iso8859_5 ($string);
ef82d2d5 114 } else if ($res[1] == '6') {
5dd23dac 115 include_once(SM_PATH . 'functions/decode/iso8859-6.php');
116 $ret = charset_decode_iso8859_6 ($string);
a2a7852b 117 } else if ($res[1] == '7') {
5dd23dac 118 include_once(SM_PATH . 'functions/decode/iso8859-7.php');
119 $ret = charset_decode_iso8859_7 ($string);
3a66bed2 120 } else if ($res[1] == '8') {
121 include_once(SM_PATH . 'functions/decode/iso8859-8.php');
122 $ret = charset_decode_iso8859_8 ($string);
3ab35042 123 } else if ($res[1] == '9') {
5dd23dac 124 include_once(SM_PATH . 'functions/decode/iso8859-9.php');
125 $ret = charset_decode_iso8859_9 ($string);
3a66bed2 126 } else if ($res[1] == '10') {
127 include_once(SM_PATH . 'functions/decode/iso8859-10.php');
128 $ret = charset_decode_iso8859_10 ($string);
129 } else if ($res[1] == '11') {
130 include_once(SM_PATH . 'functions/decode/iso8859-11.php');
131 $ret = charset_decode_iso8859_11 ($string);
9be313d5 132 } else if ($res[1] == '13') {
3a66bed2 133 include_once(SM_PATH . 'functions/decode/iso8859-13.php');
134 $ret = charset_decode_iso8859_13 ($string);
135 } else if ($res[1] == '14') {
136 include_once(SM_PATH . 'functions/decode/iso8859-14.php');
137 $ret = charset_decode_iso8859_14 ($string);
a2a7852b 138 } else if ($res[1] == '15') {
5dd23dac 139 include_once(SM_PATH . 'functions/decode/iso8859-15.php');
140 $ret = charset_decode_iso8859_15 ($string);
3a66bed2 141 } else if ($res[1] == '16') {
142 include_once(SM_PATH . 'functions/decode/iso8859-16.php');
143 $ret = charset_decode_iso8859_16 ($string);
a2a7852b 144 } else {
145 $ret = charset_decode_iso_8859_default ($string);
146 }
147 } else if ($charset == 'ns_4551-1') {
148 $ret = charset_decode_ns_4551_1 ($string);
149 } else if ($charset == 'koi8-r') {
5dd23dac 150 include_once(SM_PATH . 'functions/decode/koi8-r.php');
a2a7852b 151 $ret = charset_decode_koi8r ($string);
1c0e847f 152 } else if ($charset == 'koi8-u') {
5dd23dac 153 include_once(SM_PATH . 'functions/decode/koi8-u.php');
1c0e847f 154 $ret = charset_decode_koi8u ($string);
5dd23dac 155 } else if ($charset == 'windows-1250') {
156 include_once(SM_PATH . 'functions/decode/cp1250.php');
157 $ret = charset_decode_cp1250 ($string);
a2a7852b 158 } else if ($charset == 'windows-1251') {
5dd23dac 159 include_once(SM_PATH . 'functions/decode/cp1251.php');
160 $ret = charset_decode_cp1251 ($string);
161 } else if ($charset == 'windows-1252') {
162 include_once(SM_PATH . 'functions/decode/cp1252.php');
163 $ret = charset_decode_cp1252 ($string);
3ab35042 164 } else if ($charset == 'windows-1253') {
5dd23dac 165 include_once(SM_PATH . 'functions/decode/cp1253.php');
166 $ret = charset_decode_cp1253 ($string);
3ab35042 167 } else if ($charset == 'windows-1254') {
5dd23dac 168 include_once(SM_PATH . 'functions/decode/cp1254.php');
169 $ret = charset_decode_cp1254 ($string);
c48a8ca7 170 } else if ($charset == 'windows-1255') {
5dd23dac 171 include_once(SM_PATH . 'functions/decode/cp1255.php');
172 $ret = charset_decode_cp1255 ($string);
c48a8ca7 173 } else if ($charset == 'windows-1256') {
5dd23dac 174 include_once(SM_PATH . 'functions/decode/cp1256.php');
175 $ret = charset_decode_cp1256 ($string);
c37a12f8 176 } else if ($charset == 'windows-1257') {
3a66bed2 177 include_once(SM_PATH . 'functions/decode/cp1257.php');
178 $ret = charset_decode_cp1257 ($string);
5dd23dac 179 } else if ($charset == 'windows-1258') {
180 include_once(SM_PATH . 'functions/decode/cp1258.php');
181 $ret = charset_decode_cp1258 ($string);
e9a71964 182 } else if ($charset == 'x-mac-roman') {
183 include_once(SM_PATH . 'functions/decode/cp10000.php');
184 $ret = charset_decode_cp10000 ($string);
185 } else if ($charset == 'x-mac-greek') {
186 include_once(SM_PATH . 'functions/decode/cp10006.php');
187 $ret = charset_decode_cp10006 ($string);
188 } else if ($charset == 'x-mac-cyrillic') {
189 include_once(SM_PATH . 'functions/decode/cp10007.php');
190 $ret = charset_decode_cp10007 ($string);
191 } else if ($charset == 'x-mac-ukrainian') {
192 include_once(SM_PATH . 'functions/decode/cp10017.php');
193 $ret = charset_decode_cp10017 ($string);
194 } else if ($charset == 'x-mac-centraleurroman') {
195 include_once(SM_PATH . 'functions/decode/cp10029.php');
196 $ret = charset_decode_cp10029 ($string);
197 } else if ($charset == 'x-mac-icelandic') {
198 include_once(SM_PATH . 'functions/decode/cp10079.php');
199 $ret = charset_decode_cp10079 ($string);
200 } else if ($charset == 'x-mac-turkish') {
201 include_once(SM_PATH . 'functions/decode/cp10081.php');
202 $ret = charset_decode_cp10081 ($string);
203 } else if ($charset == 'ibm855') {
204 include_once(SM_PATH . 'functions/decode/cp855.php');
205 $ret = charset_decode_cp855 ($string);
206 } else if ($charset == 'ibm866') {
207 include_once(SM_PATH . 'functions/decode/cp866.php');
208 $ret = charset_decode_cp866 ($string);
0e4337e7 209 } else if ($charset == 'iso-ir-111') {
210 include_once(SM_PATH . 'functions/decode/iso-ir-111.php');
211 $ret = charset_decode_iso_ir_111 ($string);
7af26ef8 212 } else if ($charset == 'tis-620') {
213 include_once(SM_PATH . 'functions/decode/tis620.php');
214 $ret = charset_decode_tis620 ($string);
5dd23dac 215 } else if ($charset == 'big5' and $agresive_decoding ) {
216 include_once(SM_PATH . 'functions/decode/big5.php');
217 $ret = charset_decode_big5 ($string);
218 } else if ($charset == 'gb2312' and $agresive_decoding ) {
219 include_once(SM_PATH . 'functions/decode/gb2312.php');
220 $ret = charset_decode_gb2312 ($string);
96d985e5 221 } else if ($charset == 'utf-8' && $squirrelmail_language != 'ja_JP') {
5dd23dac 222 include_once(SM_PATH . 'functions/decode/utf-8.php');
3ab35042 223 $ret = charset_decode_utf8 ($string);
a2a7852b 224 } else {
225 $ret = $string;
226 }
227 return( $ret );
228}
229
a2a7852b 230
51468260 231/**
232 * 8bit cleanup functions.
233 *
234 * Replaces all 8 bit characters from ISO-8859 character sets with '?'
235 * Legacy function used for unsupported ISO-8859 charsets
236 *
237 * @param string $string string that has to be cleaned
238 * @return string cleaned string
239 */
a2a7852b 240function charset_decode_iso_8859_default ($string) {
241 return (strtr($string, "\240\241\242\243\244\245\246\247".
1fd97780 242 "\250\251\252\253\254\255\256\257".
243 "\260\261\262\263\264\265\266\267".
244 "\270\271\272\273\274\275\276\277".
245 "\300\301\302\303\304\305\306\307".
246 "\310\311\312\313\314\315\316\317".
247 "\320\321\322\323\324\325\326\327".
248 "\330\331\332\333\334\335\336\337".
249 "\340\341\342\343\344\345\346\347".
250 "\350\351\352\353\354\355\356\357".
251 "\360\361\362\363\364\365\366\367".
a2a7852b 252 "\370\371\372\373\374\375\376\377",
1fd97780 253 "????????????????????????????????????????".
254 "????????????????????????????????????????".
255 "????????????????????????????????????????".
256 "????????"));
a2a7852b 257
258}
259
51468260 260/**
261 * ns_4551_1 decoding function
262 *
a2a7852b 263 * This is the same as ISO-646-NO and is used by some
264 * Microsoft programs when sending Norwegian characters
51468260 265 *
266 * @param string $string
267 * @return string
a2a7852b 268 */
269function charset_decode_ns_4551_1 ($string) {
270 /*
271 * These characters are:
272 * Latin capital letter AE
273 * Latin capital letter O with stroke
274 * Latin capital letter A with ring above
275 * and the same as small letters
276 */
277