Fixes a few issues
[squirrelmail.git] / functions / i18n.php
CommitLineData
59177427 1<?php
1fd97780 2
35586184 3/**
4 * i18n.php
5 *
76911253 6 * Copyright (c) 1999-2003 The SquirrelMail Project Team
35586184 7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This file contains variuos functions that are needed to do
10 * internationalization of SquirrelMail.
11 *
12 * Internally the output character set is used. Other characters are
13 * encoded using Unicode entities according to HTML 4.0.
14 *
15 * $Id$
d6c32258 16 * @package squirrelmail
35586184 17 */
18
d6c32258 19/** Everything uses global.php... */
961ca3d8 20require_once(SM_PATH . 'functions/global.php');
21
d6c32258 22/**
23 * Decodes a string to the internal encoding from the given charset
24 *
25 * @param string $charset
26 * @param string $string Text to be decoded
27 * @return string Decoded text
28 */
a2a7852b 29function charset_decode ($charset, $string) {
3ec81e63 30 global $languages, $squirrelmail_language, $default_charset;
edf2c0ba 31 global $use_php_recode, $use_php_iconv, $agresive_decoding;
a2a7852b 32
3714db45 33 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
34 function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
6fbd125b 35 $string = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $string);
36 }
b05c8961 37
3ec81e63 38 $charset = strtolower($charset);
39
40 set_my_charset();
41
42 // Variables that allow to use functions without function_exist() calls
edf2c0ba 43 if (! isset($use_php_recode) || $use_php_recode=="" ) {
44 $use_php_recode=false; }
45 if (! isset($use_php_iconv) || $use_php_iconv=="" ) {
46 $use_php_iconv=false; }
3ec81e63 47
48 // Don't do conversion if charset is the same.
49 if ( $charset == strtolower($default_charset) )
50 return htmlspecialchars($string);
51
52 // catch iso-8859-8-i thing
53 if ( $charset == "iso-8859-8-i" )
54 $charset = "iso-8859-8";
55
56 /*
57 * Recode converts html special characters automatically if you use
58 * 'charset..html' decoding. There is no documented way to put -d option
59 * into php recode function call.
60 */
61 if ( $use_php_recode ) {
62 if ( $default_charset == "utf-8" ) {
63 // other charsets can be converted to utf-8 without loss.
64 // and output string is smaller
65 $string = recode_string($charset . "..utf-8",$string);
66 return htmlspecialchars($string);
67 } else {
68 $string = recode_string($charset . "..html",$string);
69 // recode does not convert single quote, htmlspecialchars does.
70 $string = str_replace("'", '&#039;', $string);
71 return $string;
72 }
73 }
74
75 // iconv functions does not have html target and can be used only with utf-8
76 if ( $use_php_iconv && $default_charset=='utf-8') {
77 $string = iconv($charset,$default_charset,$string);
78 return htmlspecialchars($string);
79 }
80
81 // If we don't use recode and iconv, we'll do it old way.
82
a2a7852b 83 /* All HTML special characters are 7 bit and can be replaced first */
cef054e4 84
098ea084 85 $string = htmlspecialchars ($string);
a2a7852b 86
5dd23dac 87 /* controls cpu and memory intensive decoding cycles */
edf2c0ba 88 if (! isset($agresive_decoding) || $agresive_decoding=="" ) {
89 $agresive_decoding=false; }
5dd23dac 90
a2a7852b 91 if (ereg('iso-8859-([[:digit:]]+)', $charset, $res)) {
92 if ($res[1] == '1') {
5dd23dac 93 include_once(SM_PATH . 'functions/decode/iso8859-1.php');
94 $ret = charset_decode_iso8859_1 ($string);
a2a7852b 95 } else if ($res[1] == '2') {
5dd23dac 96 include_once(SM_PATH . 'functions/decode/iso8859-2.php');
97 $ret = charset_decode_iso8859_2 ($string);
3a66bed2 98 } else if ($res[1] == '3') {
99 include_once(SM_PATH . 'functions/decode/iso8859-3.php');
100 $ret = charset_decode_iso8859_3 ($string);
9be313d5 101 } else if ($res[1] == '4') {
3a66bed2 102 include_once(SM_PATH . 'functions/decode/iso8859-4.php');
103 $ret = charset_decode_iso8859_4 ($string);
94965562 104 } else if ($res[1] == '5') {
3a66bed2 105 include_once(SM_PATH . 'functions/decode/iso8859-5.php');
106 $ret = charset_decode_iso8859_5 ($string);
ef82d2d5 107 } else if ($res[1] == '6') {
5dd23dac 108 include_once(SM_PATH . 'functions/decode/iso8859-6.php');
109 $ret = charset_decode_iso8859_6 ($string);
a2a7852b 110 } else if ($res[1] == '7') {
5dd23dac 111 include_once(SM_PATH . 'functions/decode/iso8859-7.php');
112 $ret = charset_decode_iso8859_7 ($string);
3a66bed2 113 } else if ($res[1] == '8') {
114 include_once(SM_PATH . 'functions/decode/iso8859-8.php');
115 $ret = charset_decode_iso8859_8 ($string);
3ab35042 116 } else if ($res[1] == '9') {
5dd23dac 117 include_once(SM_PATH . 'functions/decode/iso8859-9.php');
118 $ret = charset_decode_iso8859_9 ($string);
3a66bed2 119 } else if ($res[1] == '10') {
120 include_once(SM_PATH . 'functions/decode/iso8859-10.php');
121 $ret = charset_decode_iso8859_10 ($string);
122 } else if ($res[1] == '11') {
123 include_once(SM_PATH . 'functions/decode/iso8859-11.php');
124 $ret = charset_decode_iso8859_11 ($string);
9be313d5 125 } else if ($res[1] == '13') {
3a66bed2 126 include_once(SM_PATH . 'functions/decode/iso8859-13.php');
127 $ret = charset_decode_iso8859_13 ($string);
128 } else if ($res[1] == '14') {
129 include_once(SM_PATH . 'functions/decode/iso8859-14.php');
130 $ret = charset_decode_iso8859_14 ($string);
a2a7852b 131 } else if ($res[1] == '15') {
5dd23dac 132 include_once(SM_PATH . 'functions/decode/iso8859-15.php');
133 $ret = charset_decode_iso8859_15 ($string);
3a66bed2 134 } else if ($res[1] == '16') {
135 include_once(SM_PATH . 'functions/decode/iso8859-16.php');
136 $ret = charset_decode_iso8859_16 ($string);
a2a7852b 137 } else {
138 $ret = charset_decode_iso_8859_default ($string);
139 }
140 } else if ($charset == 'ns_4551-1') {
141 $ret = charset_decode_ns_4551_1 ($string);
142 } else if ($charset == 'koi8-r') {
5dd23dac 143 include_once(SM_PATH . 'functions/decode/koi8-r.php');
a2a7852b 144 $ret = charset_decode_koi8r ($string);
1c0e847f 145 } else if ($charset == 'koi8-u') {
5dd23dac 146 include_once(SM_PATH . 'functions/decode/koi8-u.php');
1c0e847f 147 $ret = charset_decode_koi8u ($string);
5dd23dac 148 } else if ($charset == 'windows-1250') {
149 include_once(SM_PATH . 'functions/decode/cp1250.php');
150 $ret = charset_decode_cp1250 ($string);
a2a7852b 151 } else if ($charset == 'windows-1251') {
5dd23dac 152 include_once(SM_PATH . 'functions/decode/cp1251.php');
153 $ret = charset_decode_cp1251 ($string);
154 } else if ($charset == 'windows-1252') {
155 include_once(SM_PATH . 'functions/decode/cp1252.php');
156 $ret = charset_decode_cp1252 ($string);
3ab35042 157 } else if ($charset == 'windows-1253') {
5dd23dac 158 include_once(SM_PATH . 'functions/decode/cp1253.php');
159 $ret = charset_decode_cp1253 ($string);
3ab35042 160 } else if ($charset == 'windows-1254') {
5dd23dac 161 include_once(SM_PATH . 'functions/decode/cp1254.php');
162 $ret = charset_decode_cp1254 ($string);
c48a8ca7 163 } else if ($charset == 'windows-1255') {
5dd23dac 164 include_once(SM_PATH . 'functions/decode/cp1255.php');
165 $ret = charset_decode_cp1255 ($string);
c48a8ca7 166 } else if ($charset == 'windows-1256') {
5dd23dac 167 include_once(SM_PATH . 'functions/decode/cp1256.php');
168 $ret = charset_decode_cp1256 ($string);
c37a12f8 169 } else if ($charset == 'windows-1257') {
3a66bed2 170 include_once(SM_PATH . 'functions/decode/cp1257.php');
171 $ret = charset_decode_cp1257 ($string);
5dd23dac 172 } else if ($charset == 'windows-1258') {
173 include_once(SM_PATH . 'functions/decode/cp1258.php');
174 $ret = charset_decode_cp1258 ($string);
e9a71964 175 } else if ($charset == 'x-mac-roman') {
176 include_once(SM_PATH . 'functions/decode/cp10000.php');
177 $ret = charset_decode_cp10000 ($string);
178 } else if ($charset == 'x-mac-greek') {
179 include_once(SM_PATH . 'functions/decode/cp10006.php');
180 $ret = charset_decode_cp10006 ($string);
181 } else if ($charset == 'x-mac-cyrillic') {
182 include_once(SM_PATH . 'functions/decode/cp10007.php');
183 $ret = charset_decode_cp10007 ($string);
184 } else if ($charset == 'x-mac-ukrainian') {
185 include_once(SM_PATH . 'functions/decode/cp10017.php');
186 $ret = charset_decode_cp10017 ($string);
187 } else if ($charset == 'x-mac-centraleurroman') {
188 include_once(SM_PATH . 'functions/decode/cp10029.php');
189 $ret = charset_decode_cp10029 ($string);
190 } else if ($charset == 'x-mac-icelandic') {
191 include_once(SM_PATH . 'functions/decode/cp10079.php');
192 $ret = charset_decode_cp10079 ($string);
193 } else if ($charset == 'x-mac-turkish') {
194 include_once(SM_PATH . 'functions/decode/cp10081.php');
195 $ret = charset_decode_cp10081 ($string);
196 } else if ($charset == 'ibm855') {
197 include_once(SM_PATH . 'functions/decode/cp855.php');
198 $ret = charset_decode_cp855 ($string);
199 } else if ($charset == 'ibm866') {
200 include_once(SM_PATH . 'functions/decode/cp866.php');
201 $ret = charset_decode_cp866 ($string);
0e4337e7 202 } else if ($charset == 'iso-ir-111') {
203 include_once(SM_PATH . 'functions/decode/iso-ir-111.php');
204 $ret = charset_decode_iso_ir_111 ($string);
7af26ef8 205 } else if ($charset == 'tis-620') {
206 include_once(SM_PATH . 'functions/decode/tis620.php');
207 $ret = charset_decode_tis620 ($string);
5dd23dac 208 } else if ($charset == 'big5' and $agresive_decoding ) {
209 include_once(SM_PATH . 'functions/decode/big5.php');
210 $ret = charset_decode_big5 ($string);
211 } else if ($charset == 'gb2312' and $agresive_decoding ) {
212 include_once(SM_PATH . 'functions/decode/gb2312.php');
213 $ret = charset_decode_gb2312 ($string);
96d985e5 214 } else if ($charset == 'utf-8' && $squirrelmail_language != 'ja_JP') {
5dd23dac 215 include_once(SM_PATH . 'functions/decode/utf-8.php');
3ab35042 216 $ret = charset_decode_utf8 ($string);
a2a7852b 217 } else {
218 $ret = $string;
219 }
220 return( $ret );
221}
222
a2a7852b 223
224/* Remove all 8 bit characters from all other ISO-8859 character sets */
225function charset_decode_iso_8859_default ($string) {
226 return (strtr($string, "\240\241\242\243\244\245\246\247".
1fd97780 227 "\250\251\252\253\254\255\256\257".
228 "\260\261\262\263\264\265\266\267".
229 "\270\271\272\273\274\275\276\277".
230 "\300\301\302\303\304\305\306\307".
231 "\310\311\312\313\314\315\316\317".
232 "\320\321\322\323\324\325\326\327".
233 "\330\331\332\333\334\335\336\337".
234 "\340\341\342\343\344\345\346\347".
235 "\350\351\352\353\354\355\356\357".
236 "\360\361\362\363\364\365\366\367".
a2a7852b 237 "\370\371\372\373\374\375\376\377",
1fd97780 238 "????????????????????????????????????????".
239 "????????????????????????????????????????".
240 "????????????????????????????????????????".
241 "????????"));
a2a7852b 242
243}
244
245/*
246 * This is the same as ISO-646-NO and is used by some
247 * Microsoft programs when sending Norwegian characters
248 */
249function charset_decode_ns_4551_1 ($string) {
250 /*
251 * These characters are:
252 * Latin capital letter AE
253 * Latin capital letter O with stroke
254 * Latin capital letter A with ring above
255 * and the same as small letters
256 */
257