Added limited support for Cyrillic (only KOI8-R yet) and made it possible to
[squirrelmail.git] / functions / i18n.php
1 <?
2
3 /**
4 ** i18n.php
5 **
6 ** This file contains variuos functions that are needed to do
7 ** internationalization of SquirrelMail.
8 **
9 ** Internally iso-8859-1 is used as character set. Other characters
10 ** are encoded using Unicode entities according to HTML 4.0.
11 **
12 **/
13
14 $i18n_php = true;
15
16 // This array specifies the available languages.
17 $languages[0]["NAME"] = "English";
18 $languages[0]["CODE"] = "en";
19 $languages[1]["NAME"] = "Norsk";
20 $languages[1]["CODE"] = "no";
21 $languages[2]["NAME"] = "Deutcsh";
22 $languages[2]["CODE"] = "de";
23
24 // Decodes a string to the internal encoding from the given charset
25 function charset_decode ($charset, $string) {
26 // All HTML special characters are 7 bit and can be replaced first
27 $string = htmlspecialchars ($string);
28
29 $charset = strtolower($charset);
30
31 if (ereg("iso-8859-(.*)", $charset, $res)) {
32 if ($res[1] == "1")
33 return charset_decode_iso_8859_1 ($string);
34 if ($res[1] == "7")
35 return charset_decode_iso_8859_7 ($string);
36 else if ($res[1] == "15")
37 return charset_decode_iso_8859_15 ($string);
38 else
39 return charset_decode_iso_8859_default ($string);
40 } else if ($charset == "ns_4551-1") {
41 return charset_decode_ns_4551_1 ($string);
42 } else if ($charset == "koi8-r") {
43 return charset_decode_koi8r ($string);
44 } else
45 return $string;
46 }
47
48 // iso-8859-1 is the same as Latin 1 and is normally used
49 // in western europe.
50 function charset_decode_iso_8859_1 ($string) {
51 // This is only debug code as long as the internal
52 // character set is iso-8859-1
53
54 // Latin small letter o with stroke
55 while (ereg("\370", $string))
56 $string = ereg_replace ("\370", "&#248;", $string);
57
58 return ($string);
59 }
60
61 // iso-8859-7 is Greek.
62 function charset_decode_iso_8859_7 ($string) {
63 // Could not find Unicode equivalent of 0xA1 and 0xA2
64 // 0xA4, 0xA5, 0xAA, 0xAE, 0xD2 and 0xFF should not be used
65 $string = strtr($string, "\241\242\244\245\252\256\322\377",
66 "????????");
67
68 // Horizontal bar (parentheki pavla)
69 while (ereg("\257", $string))
70 $string = ereg_replace ("\257", "&#8213;", $string);
71
72 // ISO-8859-7 characters from 11/04 (0xB4) to 11/06 (0xB6)
73 // These are Unicode 900-902
74 while (ereg("([\264-\266])", $string, $res)) {
75 $replace = "&#" . (ord($res[1])+720) . ";";
76 $string = ereg_replace($res[1], $replace, $string);
77 }
78
79 // 11/07 (0xB7) Middle dot is the same in iso-8859-1
80
81 // ISO-8859-7 characters from 11/08 (0xB8) to 11/10 (0xBA)
82 // These are Unicode 900-902
83 while (ereg("([\270-\272])", $string, $res)) {
84 $replace = "&#" . (ord($res[1])+720) . ";";
85 $string = ereg_replace($res[1], $replace, $string);
86 }
87
88 // 11/11 (0xBB) Right angle quotation mark is the same as in
89 // iso-8859-1
90
91 // And now the rest of the charset
92 while (ereg("([\273-\376])", $string, $res)) {
93 $replace = "&#" . (ord($res[1])+720) . ";";
94 $string = ereg_replace($res[1], $replace, $string);
95 }
96
97 return $string;
98 }
99
100 // iso-8859-15 is Latin 15 and has very much the same use as Latin 1
101 // but has the Euro symbol and some characters needed for French.
102 function charset_decode_iso_8859_15 ($string) {
103 // Euro sign
104 while (ereg("\244", $replace))
105 $string = ereg_replace ("\244", "&#8364;", $string);
106 // Latin capital letter S with caron
107 while (ereg("\246", $string))
108 $string = ereg_replace ("\244", "&#352;", $string);
109 // Latin small letter s with caron
110 while (ereg("\250", $string))
111 $string = ereg_replace ("\250", "&#353;", $string);
112 // Latin capital letter Z with caron
113 while (ereg("\264", $string))
114 $string = ereg_replace ("\264", "&#381;", $string);
115 // Latin small letter z with caron
116 while (ereg("\270", $string))
117 $string = ereg_replace ("\270", "&#382;", $string);
118 // Latin capital ligature OE
119 while (ereg("\274", $string))
120 $string = ereg_replace ("\274", "&#338;", $string);
121 // Latin small ligature oe
122 while (ereg("\275", $string))
123 $string = ereg_replace ("\275", "&#339;", $string);
124 // Latin capital letter Y with diaeresis
125 while (ereg("\276", $string))
126 $string = ereg_replace ("\276", "&#376;", $string);
127
128 return ($string);
129 }
130
131 // ISO-8859-15 is Cyrillic
132 function charset_decode_iso_8859_5 ($string) {
133 // Not yet implemented.
134 }
135
136 // Remove all 8 bit characters from all other ISO-8859 character sets
137 function charset_decode_iso_8859_default ($string) {
138 return (strtr($string, "\240\241\242\243\244\245\246\247".
139 "\250\251\252\253\254\255\256\257".
140 "\260\261\262\263\264\265\266\267".
141 "\270\271\272\273\274\275\276\277".
142 "\300\301\302\303\304\305\306\307".
143 "\310\311\312\313\314\315\316\317".
144 "\320\321\322\323\324\325\326\327".
145 "\330\331\332\333\334\335\336\337".
146 "\340\341\342\343\344\345\346\347".
147 "\350\351\352\353\354\355\356\357".
148 "\360\361\362\363\364\365\366\367".
149 "\370\371\372\373\374\375\376\377",
150 "????????????????????????????????????????".
151 "????????????????????????????????????????".
152 "????????????????????????????????????????".
153 "????????"));
154
155 }
156
157 // This is the same as ISO-646-NO and is used by some
158 // Microsoft programs when sending Norwegian characters
159 function charset_decode_ns_4551_1 ($string) {
160 // These characters are:
161 // Latin capital letter AE
162 // Latin capital letter O with stroke
163 // Latin capital letter A with ring above
164 // and the same as small letters
165 return strtr ($string, "[\\]{|}", "ÆØÅæøå");
166 }
167
168 // KOI8-R is used to encode Russian mail (Cyrrilic). Defined in RFC
169 // 1489.
170 function charset_decode_koi8r ($string) {
171 global $default_charset;
172
173 if ($default_charset == "koi8-r") {
174 return $string;
175 } else {
176 // Convert to Unicode HTML entities.
177 // This code is rather ineffective.
178 $string = ereg_replace("\200", "&#9472;", $string);
179 $string = ereg_replace("\201", "&#9474;", $string);
180 $string = ereg_replace("\202", "&#9484;", $string);
181 $string = ereg_replace("\203", "&#9488;", $string);
182 $string = ereg_replace("\204", "&#9492;", $string);
183 $string = ereg_replace("\205", "&#9496;", $string);
184 $string = ereg_replace("\206", "&#9500;", $string);
185 $string = ereg_replace("\207", "&#9508;", $string);
186 $string = ereg_replace("\210", "&#9516;", $string);
187 $string = ereg_replace("\211", "&#9524;", $string);
188 $string = ereg_replace("\212", "&#9532;", $string);
189 $string = ereg_replace("\213", "&#9600;", $string);
190 $string = ereg_replace("\214", "&#9604;", $string);
191 $string = ereg_replace("\215", "&#9608;", $string);
192 $string = ereg_replace("\216", "&#9612;", $string);
193 $string = ereg_replace("\217", "&#9616;", $string);
194 $string = ereg_replace("\220", "&#9617;", $string);
195 $string = ereg_replace("\221", "&#9618;", $string);
196 $string = ereg_replace("\222", "&#9619;", $string);
197 $string = ereg_replace("\223", "&#8992;", $string);
198 $string = ereg_replace("\224", "&#9632;", $string);
199 $string = ereg_replace("\225", "&#8729;", $string);
200 $string = ereg_replace("\226", "&#8730;", $string);
201 $string = ereg_replace("\227", "&#8776;", $string);
202 $string = ereg_replace("\230", "&#8804;", $string);
203 $string = ereg_replace("\231", "&#8805;", $string);
204 $string = ereg_replace("\232", "&#160;", $string);
205 $string = ereg_replace("\233", "&#8993;", $string);
206 $string = ereg_replace("\234", "&#176;", $string);
207 $string = ereg_replace("\235", "&#178;", $string);
208 $string = ereg_replace("\236", "&#183;", $string);
209 $string = ereg_replace("\237", "&#247;", $string);
210 $string = ereg_replace("\240", "&#9552;", $string);
211 $string = ereg_replace("\241", "&#9553;", $string);
212 $string = ereg_replace("\242", "&#9554;", $string);
213 $string = ereg_replace("\243", "&#1105;", $string);
214 $string = ereg_replace("\244", "&#9555;", $string);
215 $string = ereg_replace("\245", "&#9556;", $string);
216 $string = ereg_replace("\246", "&#9557;", $string);
217 $string = ereg_replace("\247", "&#9558;", $string);
218 $string = ereg_replace("\250", "&#9559;", $string);
219 $string = ereg_replace("\251", "&#9560;", $string);
220 $string = ereg_replace("\252", "&#9561;", $string);
221 $string = ereg_replace("\253", "&#9562;", $string);
222 $string = ereg_replace("\254", "&#9563;", $string);
223 $string = ereg_replace("\255", "&#9564;", $string);
224 $string = ereg_replace("\256", "&#9565;", $string);
225 $string = ereg_replace("\257", "&#9566;", $string);
226 $string = ereg_replace("\260", "&#9567;", $string);
227 $string = ereg_replace("\261", "&#9568;", $string);
228 $string = ereg_replace("\262", "&#9569;", $string);
229 $string = ereg_replace("\263", "&#1025;", $string);
230 $string = ereg_replace("\264", "&#9570;", $string);
231 $string = ereg_replace("\265", "&#9571;", $string);
232 $string = ereg_replace("\266", "&#9572;", $string);
233 $string = ereg_replace("\267", "&#9573;", $string);
234 $string = ereg_replace("\270", "&#9574;", $string);
235 $string = ereg_replace("\271", "&#9575;", $string);
236 $string = ereg_replace("\272", "&#9576;", $string);
237 $string = ereg_replace("\273", "&#9577;", $string);
238 $string = ereg_replace("\274", "&#9578;", $string);
239 $string = ereg_replace("\275", "&#9579;", $string);
240 $string = ereg_replace("\276", "&#9580;", $string);
241 $string = ereg_replace("\277", "&#169;", $string);
242 $string = ereg_replace("\300", "&#1102;", $string);
243 $string = ereg_replace("\301", "&#1072;", $string);
244 $string = ereg_replace("\302", "&#1073;", $string);
245 $string = ereg_replace("\303", "&#1094;", $string);
246 $string = ereg_replace("\304", "&#1076;", $string);
247 $string = ereg_replace("\305", "&#1077;", $string);
248 $string = ereg_replace("\306", "&#1092;", $string);
249 $string = ereg_replace("\307", "&#1075;", $string);
250 $string = ereg_replace("\310", "&#1093;", $string);
251 $string = ereg_replace("\311", "&#1080;", $string);
252 $string = ereg_replace("\312", "&#1081;", $string);
253 $string = ereg_replace("\313", "&#1082;", $string);
254 $string = ereg_replace("\314", "&#1083;", $string);
255 $string = ereg_replace("\315", "&#1084;", $string);
256 $string = ereg_replace("\316", "&#1085;", $string);
257 $string = ereg_replace("\317", "&#1086;", $string);
258 $string = ereg_replace("\320", "&#1087;", $string);
259 $string = ereg_replace("\321", "&#1103;", $string);
260 $string = ereg_replace("\322", "&#1088;", $string);
261 $string = ereg_replace("\323", "&#1089;", $string);
262 $string = ereg_replace("\324", "&#1090;", $string);
263 $string = ereg_replace("\325", "&#1091;", $string);
264 $string = ereg_replace("\326", "&#1078;", $string);
265 $string = ereg_replace("\327", "&#1074;", $string);
266 $string = ereg_replace("\330", "&#1100;", $string);
267 $string = ereg_replace("\331", "&#1099;", $string);
268 $string = ereg_replace("\332", "&#1079;", $string);
269 $string = ereg_replace("\333", "&#1096;", $string);
270 $string = ereg_replace("\334", "&#1101;", $string);
271 $string = ereg_replace("\335", "&#1097;", $string);
272 $string = ereg_replace("\336", "&#1095;", $string);
273 $string = ereg_replace("\337", "&#1098;", $string);
274 $string = ereg_replace("\340", "&#1070;", $string);
275 $string = ereg_replace("\341", "&#1040;", $string);
276 $string = ereg_replace("\342", "&#1041;", $string);
277 $string = ereg_replace("\343", "&#1062;", $string);
278 $string = ereg_replace("\344", "&#1044;", $string);
279 $string = ereg_replace("\345", "&#1045;", $string);
280 $string = ereg_replace("\346", "&#1060;", $string);
281 $string = ereg_replace("\347", "&#1043;", $string);
282 $string = ereg_replace("\350", "&#1061;", $string);
283 $string = ereg_replace("\351", "&#1048;", $string);
284 $string = ereg_replace("\352", "&#1049;", $string);
285 $string = ereg_replace("\353", "&#1050;", $string);
286 $string = ereg_replace("\354", "&#1051;", $string);
287 $string = ereg_replace("\355", "&#1052;", $string);
288 $string = ereg_replace("\356", "&#1053;", $string);
289 $string = ereg_replace("\357", "&#1054;", $string);
290 $string = ereg_replace("\360", "&#1055;", $string);
291 $string = ereg_replace("\361", "&#1071;", $string);
292 $string = ereg_replace("\362", "&#1056;", $string);
293 $string = ereg_replace("\363", "&#1057;", $string);
294 $string = ereg_replace("\364", "&#1058;", $string);
295 $string = ereg_replace("\365", "&#1059;", $string);
296 $string = ereg_replace("\366", "&#1046;", $string);
297 $string = ereg_replace("\367", "&#1042;", $string);
298 $string = ereg_replace("\370", "&#1068;", $string);
299 $string = ereg_replace("\371", "&#1067;", $string);
300 $string = ereg_replace("\372", "&#1047;", $string);
301 $string = ereg_replace("\373", "&#1064;", $string);
302 $string = ereg_replace("\374", "&#1069;", $string);
303 $string = ereg_replace("\375", "&#1065;", $string);
304 $string = ereg_replace("\376", "&#1063;", $string);
305 $string = ereg_replace("\377", "&#1066;", $string);
306
307 return $string;
308 }
309 }
310
311 ?>