Added koi8-u decoding support. Changed windows-1251 and iso-8859-5 decoding
[squirrelmail.git] / functions / i18n.php
... / ...
CommitLineData
1<?php
2
3/**
4 * i18n.php
5 *
6 * Copyright (c) 1999-2003 The SquirrelMail Project Team
7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This file contains variuos functions that are needed to do
10 * internationalization of SquirrelMail.
11 *
12 * Internally the output character set is used. Other characters are
13 * encoded using Unicode entities according to HTML 4.0.
14 *
15 * $Id$
16 */
17
18require_once(SM_PATH . 'functions/global.php');
19
20/* Decodes a string to the internal encoding from the given charset */
21function charset_decode ($charset, $string) {
22 global $languages, $squirrelmail_language;
23
24 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
25 function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
26 $string = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $string);
27 }
28
29 /* All HTML special characters are 7 bit and can be replaced first */
30
31 $string = htmlspecialchars ($string);
32
33 $charset = strtolower($charset);
34
35 set_my_charset() ;
36
37 if (ereg('iso-8859-([[:digit:]]+)', $charset, $res)) {
38 if ($res[1] == '1') {
39 $ret = charset_decode_iso_8859_1 ($string);
40 } else if ($res[1] == '2') {
41 $ret = charset_decode_iso_8859_2 ($string);
42 } else if ($res[1] == '4') {
43 $ret = charset_decode_iso_8859_4 ($string);
44 } else if ($res[1] == '5') {
45 $ret = charset_decode_iso_8859_5 ($string);
46 } else if ($res[1] == '7') {
47 $ret = charset_decode_iso_8859_7 ($string);
48 } else if ($res[1] == '9') {
49 $ret = charset_decode_iso_8859_9 ($string);
50 } else if ($res[1] == '13') {
51 $ret = charset_decode_iso_8859_13 ($string);
52 } else if ($res[1] == '15') {
53 $ret = charset_decode_iso_8859_15 ($string);
54 } else {
55 $ret = charset_decode_iso_8859_default ($string);
56 }
57 } else if ($charset == 'ns_4551-1') {
58 $ret = charset_decode_ns_4551_1 ($string);
59 } else if ($charset == 'koi8-r') {
60 $ret = charset_decode_koi8r ($string);
61 } else if ($charset == 'koi8-u') {
62 $ret = charset_decode_koi8u ($string);
63 } else if ($charset == 'windows-1251') {
64 $ret = charset_decode_windows_1251 ($string);
65 } else if ($charset == 'windows-1253') {
66 $ret = charset_decode_windows_1253 ($string);
67 } else if ($charset == 'windows-1254') {
68 $ret = charset_decode_windows_1254 ($string);
69 } else if ($charset == 'windows-1257') {
70 $ret = charset_decode_windows_1257 ($string);
71 } else if ($charset == 'utf-8') {
72 $ret = charset_decode_utf8 ($string);
73 } else {
74 $ret = $string;
75 }
76 return( $ret );
77}
78
79/*
80 iso-8859-1 is the same as Latin 1 and is normally used
81 in western europe.
82 */
83function charset_decode_iso_8859_1 ($string) {
84 global $default_charset;
85
86 if (strtolower($default_charset) <> 'iso-8859-1') {
87 /* Only do the slow convert if there are 8-bit characters */
88 if (ereg("[\200-\377]", $string)) {
89 $string = str_replace("\201", '&#129;', $string);
90 $string = str_replace("\202", '&#130;', $string);
91 $string = str_replace("\203", '&#131;', $string);
92 $string = str_replace("\204", '&#132;', $string);
93 $string = str_replace("\205", '&#133;', $string);
94 $string = str_replace("\206", '&#134;', $string);
95 $string = str_replace("\207", '&#135;', $string);
96 $string = str_replace("\210", '&#136;', $string);
97 $string = str_replace("\211", '&#137;', $string);
98 $string = str_replace("\212", '&#138;', $string);
99 $string = str_replace("\213", '&#139;', $string);
100 $string = str_replace("\214", '&#140;', $string);
101 $string = str_replace("\215", '&#141;', $string);
102 $string = str_replace("\216", '&#142;', $string);
103 $string = str_replace("\217", '&#143;', $string);
104 $string = str_replace("\220", '&#144;', $string);
105 $string = str_replace("\221", '&#145;', $string);
106 $string = str_replace("\222", '&#146;', $string);
107 $string = str_replace("\223", '&#147;', $string);
108 $string = str_replace("\224", '&#148;', $string);
109 $string = str_replace("\225", '&#149;', $string);
110 $string = str_replace("\226", '&#150;', $string);
111 $string = str_replace("\227", '&#151;', $string);
112 $string = str_replace("\230", '&#152;', $string);
113 $string = str_replace("\231", '&#153;', $string);
114 $string = str_replace("\232", '&#154;', $string);
115 $string = str_replace("\233", '&#155;', $string);
116 $string = str_replace("\234", '&#156;', $string);
117 $string = str_replace("\235", '&#157;', $string);
118 $string = str_replace("\236", '&#158;', $string);
119 $string = str_replace("\237", '&#159;', $string);
120 $string = str_replace("\240", '&#160;', $string);
121 $string = str_replace("\241", '&#161;', $string);
122 $string = str_replace("\242", '&#162;', $string);
123 $string = str_replace("\243", '&#163;', $string);
124 $string = str_replace("\244", '&#164;', $string);
125 $string = str_replace("\245", '&#165;', $string);
126 $string = str_replace("\246", '&#166;', $string);
127 $string = str_replace("\247", '&#167;', $string);
128 $string = str_replace("\250", '&#168;', $string);
129 $string = str_replace("\251", '&#169;', $string);
130 $string = str_replace("\252", '&#170;', $string);
131 $string = str_replace("\253", '&#171;', $string);
132 $string = str_replace("\254", '&#172;', $string);
133 $string = str_replace("\255", '&#173;', $string);
134 $string = str_replace("\256", '&#174;', $string);
135 $string = str_replace("\257", '&#175;', $string);
136 $string = str_replace("\260", '&#176;', $string);
137 $string = str_replace("\261", '&#177;', $string);
138 $string = str_replace("\262", '&#178;', $string);
139 $string = str_replace("\263", '&#179;', $string);
140 $string = str_replace("\264", '&#180;', $string);
141 $string = str_replace("\265", '&#181;', $string);
142 $string = str_replace("\266", '&#182;', $string);
143 $string = str_replace("\267", '&#183;', $string);
144 $string = str_replace("\270", '&#184;', $string);
145 $string = str_replace("\271", '&#185;', $string);
146 $string = str_replace("\272", '&#186;', $string);
147 $string = str_replace("\273", '&#187;', $string);
148 $string = str_replace("\274", '&#188;', $string);
149 $string = str_replace("\275", '&#189;', $string);
150 $string = str_replace("\276", '&#190;', $string);
151 $string = str_replace("\277", '&#191;', $string);
152 $string = str_replace("\300", '&#192;', $string);
153 $string = str_replace("\301", '&#193;', $string);
154 $string = str_replace("\302", '&#194;', $string);
155 $string = str_replace("\303", '&#195;', $string);
156 $string = str_replace("\304", '&#196;', $string);
157 $string = str_replace("\305", '&#197;', $string);
158 $string = str_replace("\306", '&#198;', $string);
159 $string = str_replace("\307", '&#199;', $string);
160 $string = str_replace("\310", '&#200;', $string);
161 $string = str_replace("\311", '&#201;', $string);
162 $string = str_replace("\312", '&#202;', $string);
163 $string = str_replace("\313", '&#203;', $string);
164 $string = str_replace("\314", '&#204;', $string);
165 $string = str_replace("\315", '&#205;', $string);
166 $string = str_replace("\316", '&#206;', $string);
167 $string = str_replace("\317", '&#207;', $string);
168 $string = str_replace("\320", '&#208;', $string);
169 $string = str_replace("\321", '&#209;', $string);
170 $string = str_replace("\322", '&#210;', $string);
171 $string = str_replace("\323", '&#211;', $string);
172 $string = str_replace("\324", '&#212;', $string);
173 $string = str_replace("\325", '&#213;', $string);
174 $string = str_replace("\326", '&#214;', $string);
175 $string = str_replace("\327", '&#215;', $string);
176 $string = str_replace("\330", '&#216;', $string);
177 $string = str_replace("\331", '&#217;', $string);
178 $string = str_replace("\332", '&#218;', $string);
179 $string = str_replace("\333", '&#219;', $string);
180 $string = str_replace("\334", '&#220;', $string);
181 $string = str_replace("\335", '&#221;', $string);
182 $string = str_replace("\336", '&#222;', $string);
183 $string = str_replace("\337", '&#223;', $string);
184 $string = str_replace("\340", '&#224;', $string);
185 $string = str_replace("\341", '&#225;', $string);
186 $string = str_replace("\342", '&#226;', $string);
187 $string = str_replace("\343", '&#227;', $string);
188 $string = str_replace("\344", '&#228;', $string);
189 $string = str_replace("\345", '&#229;', $string);
190 $string = str_replace("\346", '&#230;', $string);
191 $string = str_replace("\347", '&#231;', $string);
192 $string = str_replace("\350", '&#232;', $string);
193 $string = str_replace("\351", '&#233;', $string);
194 $string = str_replace("\352", '&#234;', $string);
195 $string = str_replace("\353", '&#235;', $string);
196 $string = str_replace("\354", '&#236;', $string);
197 $string = str_replace("\355", '&#237;', $string);
198 $string = str_replace("\356", '&#238;', $string);
199 $string = str_replace("\357", '&#239;', $string);
200 $string = str_replace("\360", '&#240;', $string);
201 $string = str_replace("\361", '&#241;', $string);
202 $string = str_replace("\362", '&#242;', $string);
203 $string = str_replace("\363", '&#243;', $string);
204 $string = str_replace("\364", '&#244;', $string);
205 $string = str_replace("\365", '&#245;', $string);
206 $string = str_replace("\366", '&#246;', $string);
207 $string = str_replace("\367", '&#247;', $string);
208 $string = str_replace("\370", '&#248;', $string);
209 $string = str_replace("\371", '&#249;', $string);
210 $string = str_replace("\372", '&#250;', $string);
211 $string = str_replace("\373", '&#251;', $string);
212 $string = str_replace("\374", '&#252;', $string);
213 $string = str_replace("\375", '&#253;', $string);
214 $string = str_replace("\376", '&#254;', $string);
215 $string = str_replace("\377", '&#255;', $string);
216 }
217 }
218
219 return ($string);
220}
221
222/* iso-8859-2 is used for some eastern European languages */
223function charset_decode_iso_8859_2 ($string) {
224 global $default_charset;
225
226 if (strtolower($default_charset) == 'iso-8859-2')
227 return $string;
228
229 /* Only do the slow convert if there are 8-bit characters */
230 if (! ereg("[\200-\377]", $string))
231 return $string;
232
233 /* NO-BREAK SPACE */
234 $string = str_replace("\240", '&#160;', $string);
235 /* LATIN CAPITAL LETTER A WITH OGONEK */
236 $string = str_replace("\241", '&#260;', $string);
237 /* BREVE */
238 $string = str_replace("\242", '&#728;', $string);
239 // LATIN CAPITAL LETTER L WITH STROKE
240 $string = str_replace("\243", '&#321;', $string);
241 // CURRENCY SIGN
242 $string = str_replace("\244", '&#164;', $string);
243 // LATIN CAPITAL LETTER L WITH CARON
244 $string = str_replace("\245", '&#317;', $string);
245 // LATIN CAPITAL LETTER S WITH ACUTE
246 $string = str_replace("\246", '&#346;', $string);
247 // SECTION SIGN
248 $string = str_replace("\247", '&#167;', $string);
249 // DIAERESIS
250 $string = str_replace("\250", '&#168;', $string);
251 // LATIN CAPITAL LETTER S WITH CARON
252 $string = str_replace("\251", '&#352;', $string);
253 // LATIN CAPITAL LETTER S WITH CEDILLA
254 $string = str_replace("\252", '&#350;', $string);
255 // LATIN CAPITAL LETTER T WITH CARON
256 $string = str_replace("\253", '&#356;', $string);
257 // LATIN CAPITAL LETTER Z WITH ACUTE
258 $string = str_replace("\254", '&#377;', $string);
259 // SOFT HYPHEN
260 $string = str_replace("\255", '&#173;', $string);
261 // LATIN CAPITAL LETTER Z WITH CARON
262 $string = str_replace("\256", '&#381;', $string);
263 // LATIN CAPITAL LETTER Z WITH DOT ABOVE
264 $string = str_replace("\257", '&#379;', $string);
265 // DEGREE SIGN
266 $string = str_replace("\260", '&#176;', $string);
267 // LATIN SMALL LETTER A WITH OGONEK
268 $string = str_replace("\261", '&#261;', $string);
269 // OGONEK
270 $string = str_replace("\262", '&#731;', $string);
271 // LATIN SMALL LETTER L WITH STROKE
272 $string = str_replace("\263", '&#322;', $string);
273 // ACUTE ACCENT
274 $string = str_replace("\264", '&#180;', $string);
275 // LATIN SMALL LETTER L WITH CARON
276 $string = str_replace("\265", '&#318;', $string);
277 // LATIN SMALL LETTER S WITH ACUTE
278 $string = str_replace("\266", '&#347;', $string);
279 // CARON
280 $string = str_replace("\267", '&#711;', $string);
281 // CEDILLA
282 $string = str_replace("\270", '&#184;', $string);
283 // LATIN SMALL LETTER S WITH CARON
284 $string = str_replace("\271", '&#353;', $string);
285 // LATIN SMALL LETTER S WITH CEDILLA
286 $string = str_replace("\272", '&#351;', $string);
287 // LATIN SMALL LETTER T WITH CARON
288 $string = str_replace("\273", '&#357;', $string);
289 // LATIN SMALL LETTER Z WITH ACUTE
290 $string = str_replace("\274", '&#378;', $string);
291 // DOUBLE ACUTE ACCENT
292 $string = str_replace("\275", '&#733;', $string);
293 // LATIN SMALL LETTER Z WITH CARON
294 $string = str_replace("\276", '&#382;', $string);
295 // LATIN SMALL LETTER Z WITH DOT ABOVE
296 $string = str_replace("\277", '&#380;', $string);
297 // LATIN CAPITAL LETTER R WITH ACUTE
298 $string = str_replace("\300", '&#340;', $string);
299 // LATIN CAPITAL LETTER A WITH ACUTE
300 $string = str_replace("\301", '&#193;', $string);
301 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
302 $string = str_replace("\302", '&#194;', $string);
303 // LATIN CAPITAL LETTER A WITH BREVE
304 $string = str_replace("\303", '&#258;', $string);
305 // LATIN CAPITAL LETTER A WITH DIAERESIS
306 $string = str_replace("\304", '&#196;', $string);
307 // LATIN CAPITAL LETTER L WITH ACUTE
308 $string = str_replace("\305", '&#313;', $string);
309 // LATIN CAPITAL LETTER C WITH ACUTE
310 $string = str_replace("\306", '&#262;', $string);
311 // LATIN CAPITAL LETTER C WITH CEDILLA
312 $string = str_replace("\307", '&#199;', $string);
313 // LATIN CAPITAL LETTER C WITH CARON
314 $string = str_replace("\310", '&#268;', $string);
315 // LATIN CAPITAL LETTER E WITH ACUTE
316 $string = str_replace("\311", '&#201;', $string);
317 // LATIN CAPITAL LETTER E WITH OGONEK
318 $string = str_replace("\312", '&#280;', $string);
319 // LATIN CAPITAL LETTER E WITH DIAERESIS
320 $string = str_replace("\313", '&#203;', $string);
321 // LATIN CAPITAL LETTER E WITH CARON
322 $string = str_replace("\314", '&#282;', $string);
323 // LATIN CAPITAL LETTER I WITH ACUTE
324 $string = str_replace("\315", '&#205;', $string);
325 // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
326 $string = str_replace("\316", '&#206;', $string);
327 // LATIN CAPITAL LETTER D WITH CARON
328 $string = str_replace("\317", '&#270;', $string);
329 // LATIN CAPITAL LETTER D WITH STROKE
330 $string = str_replace("\320", '&#272;', $string);
331 // LATIN CAPITAL LETTER N WITH ACUTE
332 $string = str_replace("\321", '&#323;', $string);
333 // LATIN CAPITAL LETTER N WITH CARON
334 $string = str_replace("\322", '&#327;', $string);
335 // LATIN CAPITAL LETTER O WITH ACUTE
336 $string = str_replace("\323", '&#211;', $string);
337 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
338 $string = str_replace("\324", '&#212;', $string);
339 // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
340 $string = str_replace("\325", '&#336;', $string);
341 // LATIN CAPITAL LETTER O WITH DIAERESIS
342 $string = str_replace("\326", '&#214;', $string);
343 // MULTIPLICATION SIGN
344 $string = str_replace("\327", '&#215;', $string);
345 // LATIN CAPITAL LETTER R WITH CARON
346 $string = str_replace("\330", '&#344;', $string);
347 // LATIN CAPITAL LETTER U WITH RING ABOVE
348 $string = str_replace("\331", '&#366;', $string);
349 // LATIN CAPITAL LETTER U WITH ACUTE
350 $string = str_replace("\332", '&#218;', $string);
351 // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
352 $string = str_replace("\333", '&#368;', $string);
353 // LATIN CAPITAL LETTER U WITH DIAERESIS
354 $string = str_replace("\334", '&#220;', $string);
355 // LATIN CAPITAL LETTER Y WITH ACUTE
356 $string = str_replace("\335", '&#221;', $string);
357 // LATIN CAPITAL LETTER T WITH CEDILLA
358 $string = str_replace("\336", '&#354;', $string);
359 // LATIN SMALL LETTER SHARP S
360 $string = str_replace("\337", '&#223;', $string);
361 // LATIN SMALL LETTER R WITH ACUTE
362 $string = str_replace("\340", '&#341;', $string);
363 // LATIN SMALL LETTER A WITH ACUTE
364 $string = str_replace("\341", '&#225;', $string);
365 // LATIN SMALL LETTER A WITH CIRCUMFLEX
366 $string = str_replace("\342", '&#226;', $string);
367 // LATIN SMALL LETTER A WITH BREVE
368 $string = str_replace("\343", '&#259;', $string);
369 // LATIN SMALL LETTER A WITH DIAERESIS
370 $string = str_replace("\344", '&#228;', $string);
371 // LATIN SMALL LETTER L WITH ACUTE
372 $string = str_replace("\345", '&#314;', $string);
373 // LATIN SMALL LETTER C WITH ACUTE
374 $string = str_replace("\346", '&#263;', $string);
375 // LATIN SMALL LETTER C WITH CEDILLA
376 $string = str_replace("\347", '&#231;', $string);
377 // LATIN SMALL LETTER C WITH CARON
378 $string = str_replace("\350", '&#269;', $string);
379 // LATIN SMALL LETTER E WITH ACUTE
380 $string = str_replace("\351", '&#233;', $string);
381 // LATIN SMALL LETTER E WITH OGONEK
382 $string = str_replace("\352", '&#281;', $string);
383 // LATIN SMALL LETTER E WITH DIAERESIS
384 $string = str_replace("\353", '&#235;', $string);
385 // LATIN SMALL LETTER E WITH CARON
386 $string = str_replace("\354", '&#283;', $string);
387 // LATIN SMALL LETTER I WITH ACUTE
388 $string = str_replace("\355", '&#237;', $string);
389 // LATIN SMALL LETTER I WITH CIRCUMFLEX
390 $string = str_replace("\356", '&#238;', $string);
391 // LATIN SMALL LETTER D WITH CARON
392 $string = str_replace("\357", '&#271;', $string);
393 // LATIN SMALL LETTER D WITH STROKE
394 $string = str_replace("\360", '&#273;', $string);
395 // LATIN SMALL LETTER N WITH ACUTE
396 $string = str_replace("\361", '&#324;', $string);
397 // LATIN SMALL LETTER N WITH CARON
398 $string = str_replace("\362", '&#328;', $string);
399 // LATIN SMALL LETTER O WITH ACUTE
400 $string = str_replace("\363", '&#243;', $string);
401 // LATIN SMALL LETTER O WITH CIRCUMFLEX
402 $string = str_replace("\364", '&#244;', $string);
403 // LATIN SMALL LETTER O WITH DOUBLE ACUTE
404 $string = str_replace("\365", '&#337;', $string);
405 // LATIN SMALL LETTER O WITH DIAERESIS
406 $string = str_replace("\366", '&#246;', $string);
407 // DIVISION SIGN
408 $string = str_replace("\367", '&#247;', $string);
409 // LATIN SMALL LETTER R WITH CARON
410 $string = str_replace("\370", '&#345;', $string);
411 // LATIN SMALL LETTER U WITH RING ABOVE
412 $string = str_replace("\371", '&#367;', $string);
413 // LATIN SMALL LETTER U WITH ACUTE
414 $string = str_replace("\372", '&#250;', $string);
415 // LATIN SMALL LETTER U WITH DOUBLE ACUTE
416 $string = str_replace("\373", '&#369;', $string);
417 // LATIN SMALL LETTER U WITH DIAERESIS
418 $string = str_replace("\374", '&#252;', $string);
419 // LATIN SMALL LETTER Y WITH ACUTE
420 $string = str_replace("\375", '&#253;', $string);
421 // LATIN SMALL LETTER T WITH CEDILLA
422 $string = str_replace("\376", '&#355;', $string);
423 // DOT ABOVE
424 $string = str_replace("\377", '&#729;', $string);
425
426 return $string;
427}
428
429/*
430 ISO/IEC 8859-4:1998 Latin Alphabet No. 4
431*/
432
433function charset_decode_iso_8859_4 ($string) {
434 global $default_charset;
435
436 if (strtolower($default_charset) == 'iso-8859-4')
437 return $string;
438
439 /* Only do the slow convert if there are 8-bit characters */
440 if (! ereg("[\200-\377]", $string))
441 return $string;
442
443 $string = str_replace ("\241", '&#260;', $string);
444 $string = str_replace ("\242", '&#312;', $string);
445 $string = str_replace ("\243", '&#342;', $string);
446 $string = str_replace ("\245", '&#296;', $string);
447 $string = str_replace ("\246", '&#315;', $string);
448 $string = str_replace ("\251", '&#352;', $string);
449 $string = str_replace ("\252", '&#274;', $string);
450 $string = str_replace ("\253", '&#290;', $string);
451 $string = str_replace ("\254", '&#358;', $string);
452 $string = str_replace ("\256", '&#381;', $string);
453 $string = str_replace ("\261", '&#261;', $string);
454 $string = str_replace ("\262", '&#731;', $string);
455 $string = str_replace ("\263", '&#343;', $string);
456 $string = str_replace ("\265", '&#297;', $string);
457 $string = str_replace ("\266", '&#316;', $string);
458 $string = str_replace ("\267", '&#711;', $string);
459 $string = str_replace ("\271", '&#353;', $string);
460 $string = str_replace ("\272", '&#275;', $string);
461 $string = str_replace ("\273", '&#291;', $string);
462 $string = str_replace ("\274", '&#359;', $string);
463 $string = str_replace ("\275", '&#330;', $string);
464 $string = str_replace ("\276", '&#382;', $string);
465 $string = str_replace ("\277", '&#331;', $string);
466 $string = str_replace ("\300", '&#256;', $string);
467 $string = str_replace ("\307", '&#302;', $string);
468 $string = str_replace ("\310", '&#268;', $string);
469 $string = str_replace ("\312", '&#280;', $string);
470 $string = str_replace ("\314", '&#278;', $string);
471 $string = str_replace ("\317", '&#298;', $string);
472 $string = str_replace ("\320", '&#272;', $string);
473 $string = str_replace ("\321", '&#325;', $string);
474 $string = str_replace ("\322", '&#332;', $string);
475 $string = str_replace ("\323", '&#310;', $string);
476 $string = str_replace ("\331", '&#370;', $string);
477 $string = str_replace ("\335", '&#360;', $string);
478 $string = str_replace ("\336", '&#362;', $string);
479 $string = str_replace ("\340", '&#257;', $string);
480 $string = str_replace ("\347", '&#303;', $string);
481 $string = str_replace ("\350", '&#269;', $string);
482 $string = str_replace ("\352", '&#281;', $string);
483 $string = str_replace ("\354", '&#279;', $string);
484 $string = str_replace ("\357", '&#299;', $string);
485 $string = str_replace ("\360", '&#273;', $string);
486 $string = str_replace ("\361", '&#326;', $string);
487 $string = str_replace ("\362", '&#333;', $string);
488 $string = str_replace ("\363", '&#311;', $string);
489 $string = str_replace ("\371", '&#371;', $string);
490 $string = str_replace ("\375", '&#361;', $string);
491 $string = str_replace ("\376", '&#363;', $string);
492 $string = str_replace ("\377", '&#729;', $string);
493
494 // rest of charset is the same as ISO-8859-1
495 return (charset_decode_iso_8859_1($string));
496}
497
498/* ISO-8859-5 is Cyrillic */
499function charset_decode_iso_8859_5 ($string) {
500 global $default_charset;
501
502 if (strtolower($default_charset) == 'iso-8859-5') {
503 return $string;
504 }
505
506 /* Only do the slow convert if there are 8-bit characters */
507 if (! ereg("[\200-\377]", $string))
508 return $string;
509
510 // NO-BREAK SPACE
511 $string = str_replace("\240", '&#160;', $string);
512 // 161-172 -> 1025-1036 (+864)
513 $string = preg_replace("/([\241-\254])/e","'&#' . (ord('\\1')+864) . ';'",$string);
514 // SOFT HYPHEN
515 $string = str_replace("\255", '&#173;', $string);
516 // 174-239 -> 1038-1103 (+864)
517 $string = preg_replace("/([\256-\357])/e","'&#' . (ord('\\1')+864) . ';'",$string);
518 // NUMERO SIGN
519 $string = str_replace("\360", '&#8470;', $string);
520 // 241-252 -> 1105-1116 (+864)
521 $string = preg_replace("/([\361-\374])/e","'&#' . (ord('\\1')+864) . ';'",$string);
522 // SECTION SIGN
523 $string = str_replace("\375", '&#167;', $string);
524 // CYRILLIC SMALL LETTER SHORT U (Byelorussian)
525 $string = str_replace("\376", '&#1118;', $string);
526 // CYRILLIC SMALL LETTER DZHE
527 $string = str_replace("\377", '&#1119;', $string);
528
529 return $string;
530}
531
532/* iso-8859-7 is Greek. */
533function charset_decode_iso_8859_7 ($string) {
534 global $default_charset;
535
536 if (strtolower($default_charset) == 'iso-8859-7') {
537 return $string;
538 }
539
540 /* Only do the slow convert if there are 8-bit characters */
541 if (!ereg("[\200-\377]", $string)) {
542 return $string;
543 }
544
545 /* Some diverse characters in the beginning */
546 $string = str_replace("\240", '&#160;', $string);
547 $string = str_replace("\241", '&#8216;', $string);
548 $string = str_replace("\242", '&#8217;', $string);
549 $string = str_replace("\243", '&#163;', $string);
550 $string = str_replace("\246", '&#166;', $string);
551 $string = str_replace("\247", '&#167;', $string);
552 $string = str_replace("\250", '&#168;', $string);
553 $string = str_replace("\251", '&#169;', $string);
554 $string = str_replace("\253", '&#171;', $string);
555 $string = str_replace("\254", '&#172;', $string);
556 $string = str_replace("\255", '&#173;', $string);
557 $string = str_replace("\257", '&#8213;', $string);
558 $string = str_replace("\260", '&#176;', $string);
559 $string = str_replace("\261", '&#177;', $string);
560 $string = str_replace("\262", '&#178;', $string);
561 $string = str_replace("\263", '&#179;', $string);
562
563 /* Horizontal bar (parentheki pavla) */
564 $string = str_replace ("\257", '&#8213;', $string);
565
566 /*
567 * ISO-8859-7 characters from 11/04 (0xB4) to 11/06 (0xB6)
568 * These are Unicode 900-902
569 */
570 $string = preg_replace("/([\264-\266])/e","'&#' . (ord('\\1')+720);",$string);
571
572 /* 11/07 (0xB7) Middle dot is the same in iso-8859-1 */
573 $string = str_replace("\267", '&#183;', $string);
574
575 /*
576 * ISO-8859-7 characters from 11/08 (0xB8) to 11/10 (0xBA)
577 * These are Unicode 900-902
578 */
579 $string = preg_replace("/([\270-\272])/e","'&#' . (ord('\\1')+720);",$string);
580
581 /*
582 * 11/11 (0xBB) Right angle quotation mark is the same as in
583 * iso-8859-1
584 */
585 $string = str_replace("\273", '&#187;', $string);
586
587 /* And now the rest of the charset */
588 $string = preg_replace("/([\274-\376])/e","'&#'.(ord('\\1')+720);",$string);
589
590 return $string;
591}
592
593/*
594 ISOIEC 8859-9:1999 Latin Alphabet No. 5
595
596*/
597function charset_decode_iso_8859_9 ($string) {
598 global $default_charset;
599
600 if (strtolower($default_charset) == 'iso-8859-9')
601 return $string;
602
603 /* Only do the slow convert if there are 8-bit characters */
604 if (! ereg("[\200-\377]", $string))
605 return $string;
606
607 // latin capital letter g with breve 208->286
608 $string = str_replace("\320", '&#286;', $string);
609 // latin capital letter i with dot above 221->304
610 $string = str_replace("\335", '&#304;', $string);
611 // latin capital letter s with cedilla 222->350
612 $string = str_replace("\336", '&#350;', $string);
613 // latin small letter g with breve 240->287
614 $string = str_replace("\360", '&#287;', $string);
615 // latin small letter dotless i 253->305
616 $string = str_replace("\375", '&#305;', $string);
617 // latin small letter s with cedilla 254->351
618 $string = str_replace("\376", '&#351;', $string);
619
620 // rest of charset is the same as ISO-8859-1
621 return (charset_decode_iso_8859_1($string));
622}
623
624
625/*
626 ISO/IEC 8859-13:1998 Latin Alphabet No. 7 (Baltic Rim)
627*/
628function charset_decode_iso_8859_13 ($string) {
629 global $default_charset;
630
631 if (strtolower($default_charset) == 'iso-8859-13')
632 return $string;
633
634 /* Only do the slow convert if there are 8-bit characters */
635 if (! ereg("[\200-\377]", $string))
636 return $string;
637
638 $string = str_replace ("\241", '&#8221;', $string);
639 $string = str_replace ("\245", '&#8222;', $string);
640 $string = str_replace ("\250", '&#216;', $string);
641 $string = str_replace ("\252", '&#342;', $string);
642 $string = str_replace ("\257", '&#198;', $string);
643 $string = str_replace ("\264", '&#8220;', $string);
644 $string = str_replace ("\270", '&#248;', $string);
645 $string = str_replace ("\272", '&#343;', $string);
646 $string = str_replace ("\277", '&#230;', $string);
647 $string = str_replace ("\300", '&#260;', $string);
648 $string = str_replace ("\301", '&#302;', $string);
649 $string = str_replace ("\302", '&#256;', $string);
650 $string = str_replace ("\303", '&#262;', $string);
651 $string = str_replace ("\306", '&#280;', $string);
652 $string = str_replace ("\307", '&#274;', $string);
653 $string = str_replace ("\310", '&#268;', $string);
654 $string = str_replace ("\312", '&#377;', $string);
655 $string = str_replace ("\313", '&#278;', $string);
656 $string = str_replace ("\314", '&#290;', $string);
657 $string = str_replace ("\315", '&#310;', $string);
658 $string = str_replace ("\316", '&#298;', $string);
659 $string = str_replace ("\317", '&#315;', $string);
660 $string = str_replace ("\320", '&#352;', $string);
661 $string = str_replace ("\321", '&#323;', $string);
662 $string = str_replace ("\322", '&#325;', $string);
663 $string = str_replace ("\324", '&#332;', $string);
664 $string = str_replace ("\330", '&#370;', $string);
665 $string = str_replace ("\331", '&#321;', $string);
666 $string = str_replace ("\332", '&#346;', $string);
667 $string = str_replace ("\333", '&#362;', $string);
668 $string = str_replace ("\335", '&#379;', $string);
669 $string = str_replace ("\336", '&#381;', $string);
670 $string = str_replace ("\340", '&#261;', $string);
671 $string = str_replace ("\341", '&#303;', $string);
672 $string = str_replace ("\342", '&#257;', $string);
673 $string = str_replace ("\343", '&#263;', $string);
674 $string = str_replace ("\346", '&#281;', $string);
675 $string = str_replace ("\347", '&#275;', $string);
676 $string = str_replace ("\350", '&#269;', $string);
677 $string = str_replace ("\352", '&#378;', $string);
678 $string = str_replace ("\353", '&#279;', $string);
679 $string = str_replace ("\354", '&#291;', $string);
680 $string = str_replace ("\355", '&#311;', $string);
681 $string = str_replace ("\356", '&#299;', $string);
682 $string = str_replace ("\357", '&#316;', $string);
683 $string = str_replace ("\360", '&#353;', $string);
684 $string = str_replace ("\361", '&#324;', $string);
685 $string = str_replace ("\362", '&#326;', $string);
686 $string = str_replace ("\364", '&#333;', $string);
687 $string = str_replace ("\370", '&#371;', $string);
688 $string = str_replace ("\371", '&#322;', $string);
689 $string = str_replace ("\372", '&#347;', $string);
690 $string = str_replace ("\373", '&#363;', $string);
691 $string = str_replace ("\375", '&#380;', $string);
692 $string = str_replace ("\376", '&#382;', $string);
693 $string = str_replace ("\377", '&#8217;', $string);
694
695 // rest of charset is the same as ISO-8859-1
696 return (charset_decode_iso_8859_1($string));
697}
698
699/*
700 * iso-8859-15 is Latin 9 and has very much the same use as Latin 1
701 * but has the Euro symbol and some characters needed for French.
702 */
703function charset_decode_iso_8859_15 ($string) {
704 // Euro sign
705 $string = str_replace ("\244", '&#8364;', $string);
706 // Latin capital letter S with caron
707 $string = str_replace ("\246", '&#352;', $string);
708 // Latin small letter s with caron
709 $string = str_replace ("\250", '&#353;', $string);
710 // Latin capital letter Z with caron
711 $string = str_replace ("\264", '&#381;', $string);
712 // Latin small letter z with caron
713 $string = str_replace ("\270", '&#382;', $string);
714 // Latin capital ligature OE
715 $string = str_replace ("\274", '&#338;', $string);
716 // Latin small ligature oe
717 $string = str_replace ("\275", '&#339;', $string);
718 // Latin capital letter Y with diaeresis
719 $string = str_replace ("\276", '&#376;', $string);
720
721 return (charset_decode_iso_8859_1($string));
722}
723
724
725/* Remove all 8 bit characters from all other ISO-8859 character sets */
726function charset_decode_iso_8859_default ($string) {
727 return (strtr($string, "\240\241\242\243\244\245\246\247".
728 "\250\251\252\253\254\255\256\257".
729 "\260\261\262\263\264\265\266\267".
730 "\270\271\272\273\274\275\276\277".
731 "\300\301\302\303\304\305\306\307".
732 "\310\311\312\313\314\315\316\317".
733 "\320\321\322\323\324\325\326\327".
734 "\330\331\332\333\334\335\336\337".
735 "\340\341\342\343\344\345\346\347".
736 "\350\351\352\353\354\355\356\357".
737 "\360\361\362\363\364\365\366\367".
738 "\370\371\372\373\374\375\376\377",
739 "????????????????????????????????????????".
740 "????????????????????????????????????????".
741 "????????????????????????????????????????".
742 "????????"));
743
744}
745
746/*
747 * This is the same as ISO-646-NO and is used by some
748 * Microsoft programs when sending Norwegian characters
749 */
750function charset_decode_ns_4551_1 ($string) {
751 /*
752 * These characters are:
753 * Latin capital letter AE
754 * Latin capital letter O with stroke
755 * Latin capital letter A with ring above
756 * and the same as small letters
757 */
758