Added koi8-u decoding support. Changed windows-1251 and iso-8859-5 decoding
[squirrelmail.git] / functions / i18n.php
CommitLineData
59177427 1<?php
1fd97780 2
35586184 3/**
4 * i18n.php
5 *
76911253 6 * Copyright (c) 1999-2003 The SquirrelMail Project Team
35586184 7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This file contains variuos functions that are needed to do
10 * internationalization of SquirrelMail.
11 *
12 * Internally the output character set is used. Other characters are
13 * encoded using Unicode entities according to HTML 4.0.
14 *
15 * $Id$
16 */
17
961ca3d8 18require_once(SM_PATH . 'functions/global.php');
19
a2a7852b 20/* Decodes a string to the internal encoding from the given charset */
21function charset_decode ($charset, $string) {
6fbd125b 22 global $languages, $squirrelmail_language;
a2a7852b 23
3714db45 24 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
25 function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
6fbd125b 26 $string = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $string);
27 }
b05c8961 28
a2a7852b 29 /* All HTML special characters are 7 bit and can be replaced first */
cef054e4 30
098ea084 31 $string = htmlspecialchars ($string);
a2a7852b 32
33 $charset = strtolower($charset);
34
94965562 35 set_my_charset() ;
36
a2a7852b 37 if (ereg('iso-8859-([[:digit:]]+)', $charset, $res)) {
38 if ($res[1] == '1') {
39 $ret = charset_decode_iso_8859_1 ($string);
40 } else if ($res[1] == '2') {
41 $ret = charset_decode_iso_8859_2 ($string);
9be313d5 42 } else if ($res[1] == '4') {
43 $ret = charset_decode_iso_8859_4 ($string);
94965562 44 } else if ($res[1] == '5') {
45 $ret = charset_decode_iso_8859_5 ($string);
a2a7852b 46 } else if ($res[1] == '7') {
47 $ret = charset_decode_iso_8859_7 ($string);
3ab35042 48 } else if ($res[1] == '9') {
49 $ret = charset_decode_iso_8859_9 ($string);
9be313d5 50 } else if ($res[1] == '13') {
51 $ret = charset_decode_iso_8859_13 ($string);
a2a7852b 52 } else if ($res[1] == '15') {
53 $ret = charset_decode_iso_8859_15 ($string);
54 } else {
55 $ret = charset_decode_iso_8859_default ($string);
56 }
57 } else if ($charset == 'ns_4551-1') {
58 $ret = charset_decode_ns_4551_1 ($string);
59 } else if ($charset == 'koi8-r') {
60 $ret = charset_decode_koi8r ($string);
1c0e847f 61 } else if ($charset == 'koi8-u') {
62 $ret = charset_decode_koi8u ($string);
a2a7852b 63 } else if ($charset == 'windows-1251') {
ecd877a8 64 $ret = charset_decode_windows_1251 ($string);
3ab35042 65 } else if ($charset == 'windows-1253') {
66 $ret = charset_decode_windows_1253 ($string);
67 } else if ($charset == 'windows-1254') {
68 $ret = charset_decode_windows_1254 ($string);
c37a12f8 69 } else if ($charset == 'windows-1257') {
70 $ret = charset_decode_windows_1257 ($string);
3ab35042 71 } else if ($charset == 'utf-8') {
72 $ret = charset_decode_utf8 ($string);
a2a7852b 73 } else {
74 $ret = $string;
75 }
76 return( $ret );
77}
78
79/*
80 iso-8859-1 is the same as Latin 1 and is normally used
81 in western europe.
82 */
83function charset_decode_iso_8859_1 ($string) {
84 global $default_charset;
13e0c649 85
a2a7852b 86 if (strtolower($default_charset) <> 'iso-8859-1') {
87 /* Only do the slow convert if there are 8-bit characters */
88 if (ereg("[\200-\377]", $string)) {
066c374f 89 $string = str_replace("\201", '&#129;', $string);
90 $string = str_replace("\202", '&#130;', $string);
91 $string = str_replace("\203", '&#131;', $string);
92 $string = str_replace("\204", '&#132;', $string);
93 $string = str_replace("\205", '&#133;', $string);
94 $string = str_replace("\206", '&#134;', $string);
95 $string = str_replace("\207", '&#135;', $string);
96 $string = str_replace("\210", '&#136;', $string);
97 $string = str_replace("\211", '&#137;', $string);
98 $string = str_replace("\212", '&#138;', $string);
99 $string = str_replace("\213", '&#139;', $string);
100 $string = str_replace("\214", '&#140;', $string);
101 $string = str_replace("\215", '&#141;', $string);
102 $string = str_replace("\216", '&#142;', $string);
103 $string = str_replace("\217", '&#143;', $string);
104 $string = str_replace("\220", '&#144;', $string);
105 $string = str_replace("\221", '&#145;', $string);
106 $string = str_replace("\222", '&#146;', $string);
107 $string = str_replace("\223", '&#147;', $string);
108 $string = str_replace("\224", '&#148;', $string);
109 $string = str_replace("\225", '&#149;', $string);
110 $string = str_replace("\226", '&#150;', $string);
111 $string = str_replace("\227", '&#151;', $string);
112 $string = str_replace("\230", '&#152;', $string);
113 $string = str_replace("\231", '&#153;', $string);
114 $string = str_replace("\232", '&#154;', $string);
115 $string = str_replace("\233", '&#155;', $string);
116 $string = str_replace("\234", '&#156;', $string);
117 $string = str_replace("\235", '&#157;', $string);
118 $string = str_replace("\236", '&#158;', $string);
119 $string = str_replace("\237", '&#159;', $string);
120 $string = str_replace("\240", '&#160;', $string);
121 $string = str_replace("\241", '&#161;', $string);
122 $string = str_replace("\242", '&#162;', $string);
123 $string = str_replace("\243", '&#163;', $string);
124 $string = str_replace("\244", '&#164;', $string);
125 $string = str_replace("\245", '&#165;', $string);
126 $string = str_replace("\246", '&#166;', $string);
127 $string = str_replace("\247", '&#167;', $string);
128 $string = str_replace("\250", '&#168;', $string);
129 $string = str_replace("\251", '&#169;', $string);
130 $string = str_replace("\252", '&#170;', $string);
131 $string = str_replace("\253", '&#171;', $string);
132 $string = str_replace("\254", '&#172;', $string);
133 $string = str_replace("\255", '&#173;', $string);
134 $string = str_replace("\256", '&#174;', $string);
135 $string = str_replace("\257", '&#175;', $string);
136 $string = str_replace("\260", '&#176;', $string);
137 $string = str_replace("\261", '&#177;', $string);
138 $string = str_replace("\262", '&#178;', $string);
139 $string = str_replace("\263", '&#179;', $string);
140 $string = str_replace("\264", '&#180;', $string);
141 $string = str_replace("\265", '&#181;', $string);
142 $string = str_replace("\266", '&#182;', $string);
143 $string = str_replace("\267", '&#183;', $string);
144 $string = str_replace("\270", '&#184;', $string);
145 $string = str_replace("\271", '&#185;', $string);
146 $string = str_replace("\272", '&#186;', $string);
147 $string = str_replace("\273", '&#187;', $string);
148 $string = str_replace("\274", '&#188;', $string);
149 $string = str_replace("\275", '&#189;', $string);
150 $string = str_replace("\276", '&#190;', $string);
151 $string = str_replace("\277", '&#191;', $string);
152 $string = str_replace("\300", '&#192;', $string);
153 $string = str_replace("\301", '&#193;', $string);
154 $string = str_replace("\302", '&#194;', $string);
155 $string = str_replace("\303", '&#195;', $string);
156 $string = str_replace("\304", '&#196;', $string);
157 $string = str_replace("\305", '&#197;', $string);
158 $string = str_replace("\306", '&#198;', $string);
159 $string = str_replace("\307", '&#199;', $string);
160 $string = str_replace("\310", '&#200;', $string);
161 $string = str_replace("\311", '&#201;', $string);
162 $string = str_replace("\312", '&#202;', $string);
163 $string = str_replace("\313", '&#203;', $string);
164 $string = str_replace("\314", '&#204;', $string);
165 $string = str_replace("\315", '&#205;', $string);
166 $string = str_replace("\316", '&#206;', $string);
167 $string = str_replace("\317", '&#207;', $string);
168 $string = str_replace("\320", '&#208;', $string);
169 $string = str_replace("\321", '&#209;', $string);
170 $string = str_replace("\322", '&#210;', $string);
171 $string = str_replace("\323", '&#211;', $string);
172 $string = str_replace("\324", '&#212;', $string);
173 $string = str_replace("\325", '&#213;', $string);
174 $string = str_replace("\326", '&#214;', $string);
175 $string = str_replace("\327", '&#215;', $string);
176 $string = str_replace("\330", '&#216;', $string);
177 $string = str_replace("\331", '&#217;', $string);
178 $string = str_replace("\332", '&#218;', $string);
179 $string = str_replace("\333", '&#219;', $string);
180 $string = str_replace("\334", '&#220;', $string);
181 $string = str_replace("\335", '&#221;', $string);
182 $string = str_replace("\336", '&#222;', $string);
183 $string = str_replace("\337", '&#223;', $string);
184 $string = str_replace("\340", '&#224;', $string);
185 $string = str_replace("\341", '&#225;', $string);
186 $string = str_replace("\342", '&#226;', $string);
187 $string = str_replace("\343", '&#227;', $string);
188 $string = str_replace("\344", '&#228;', $string);
189 $string = str_replace("\345", '&#229;', $string);
190 $string = str_replace("\346", '&#230;', $string);
191 $string = str_replace("\347", '&#231;', $string);
192 $string = str_replace("\350", '&#232;', $string);
193 $string = str_replace("\351", '&#233;', $string);
194 $string = str_replace("\352", '&#234;', $string);
195 $string = str_replace("\353", '&#235;', $string);
196 $string = str_replace("\354", '&#236;', $string);
197 $string = str_replace("\355", '&#237;', $string);
198 $string = str_replace("\356", '&#238;', $string);
199 $string = str_replace("\357", '&#239;', $string);
200 $string = str_replace("\360", '&#240;', $string);
201 $string = str_replace("\361", '&#241;', $string);
202 $string = str_replace("\362", '&#242;', $string);
203 $string = str_replace("\363", '&#243;', $string);
204 $string = str_replace("\364", '&#244;', $string);
205 $string = str_replace("\365", '&#245;', $string);
206 $string = str_replace("\366", '&#246;', $string);
207 $string = str_replace("\367", '&#247;', $string);
208 $string = str_replace("\370", '&#248;', $string);
209 $string = str_replace("\371", '&#249;', $string);
210 $string = str_replace("\372", '&#250;', $string);
211 $string = str_replace("\373", '&#251;', $string);
212 $string = str_replace("\374", '&#252;', $string);
213 $string = str_replace("\375", '&#253;', $string);
214 $string = str_replace("\376", '&#254;', $string);
215 $string = str_replace("\377", '&#255;', $string);
a2a7852b 216 }
217 }
218
219 return ($string);
220}
221
222/* iso-8859-2 is used for some eastern European languages */
223function charset_decode_iso_8859_2 ($string) {
224 global $default_charset;
225
226 if (strtolower($default_charset) == 'iso-8859-2')
227 return $string;
228
229 /* Only do the slow convert if there are 8-bit characters */
230 if (! ereg("[\200-\377]", $string))
231 return $string;
232
233 /* NO-BREAK SPACE */
234 $string = str_replace("\240", '&#160;', $string);
235 /* LATIN CAPITAL LETTER A WITH OGONEK */
236 $string = str_replace("\241", '&#260;', $string);
237 /* BREVE */
238 $string = str_replace("\242", '&#728;', $string);
239 // LATIN CAPITAL LETTER L WITH STROKE
240 $string = str_replace("\243", '&#321;', $string);
241 // CURRENCY SIGN
242 $string = str_replace("\244", '&#164;', $string);
243 // LATIN CAPITAL LETTER L WITH CARON
244 $string = str_replace("\245", '&#317;', $string);
245 // LATIN CAPITAL LETTER S WITH ACUTE
246 $string = str_replace("\246", '&#346;', $string);
247 // SECTION SIGN
248 $string = str_replace("\247", '&#167;', $string);
249 // DIAERESIS
250 $string = str_replace("\250", '&#168;', $string);
251 // LATIN CAPITAL LETTER S WITH CARON
252 $string = str_replace("\251", '&#352;', $string);
253 // LATIN CAPITAL LETTER S WITH CEDILLA
254 $string = str_replace("\252", '&#350;', $string);
255 // LATIN CAPITAL LETTER T WITH CARON
256 $string = str_replace("\253", '&#356;', $string);
257 // LATIN CAPITAL LETTER Z WITH ACUTE
258 $string = str_replace("\254", '&#377;', $string);
259 // SOFT HYPHEN
260 $string = str_replace("\255", '&#173;', $string);
261 // LATIN CAPITAL LETTER Z WITH CARON
262 $string = str_replace("\256", '&#381;', $string);
263 // LATIN CAPITAL LETTER Z WITH DOT ABOVE
264 $string = str_replace("\257", '&#379;', $string);
265 // DEGREE SIGN
266 $string = str_replace("\260", '&#176;', $string);
267 // LATIN SMALL LETTER A WITH OGONEK
268 $string = str_replace("\261", '&#261;', $string);
269 // OGONEK
270 $string = str_replace("\262", '&#731;', $string);
271 // LATIN SMALL LETTER L WITH STROKE
272 $string = str_replace("\263", '&#322;', $string);
273 // ACUTE ACCENT
274 $string = str_replace("\264", '&#180;', $string);
275 // LATIN SMALL LETTER L WITH CARON
276 $string = str_replace("\265", '&#318;', $string);
277 // LATIN SMALL LETTER S WITH ACUTE
278 $string = str_replace("\266", '&#347;', $string);
279 // CARON
280 $string = str_replace("\267", '&#711;', $string);
281 // CEDILLA
282 $string = str_replace("\270", '&#184;', $string);
283 // LATIN SMALL LETTER S WITH CARON
284 $string = str_replace("\271", '&#353;', $string);
285 // LATIN SMALL LETTER S WITH CEDILLA
286 $string = str_replace("\272", '&#351;', $string);
287 // LATIN SMALL LETTER T WITH CARON
288 $string = str_replace("\273", '&#357;', $string);
289 // LATIN SMALL LETTER Z WITH ACUTE
290 $string = str_replace("\274", '&#378;', $string);
291 // DOUBLE ACUTE ACCENT
292 $string = str_replace("\275", '&#733;', $string);
293 // LATIN SMALL LETTER Z WITH CARON
294 $string = str_replace("\276", '&#382;', $string);
295 // LATIN SMALL LETTER Z WITH DOT ABOVE
296 $string = str_replace("\277", '&#380;', $string);
297 // LATIN CAPITAL LETTER R WITH ACUTE
298 $string = str_replace("\300", '&#340;', $string);
299 // LATIN CAPITAL LETTER A WITH ACUTE
300 $string = str_replace("\301", '&#193;', $string);
301 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
302 $string = str_replace("\302", '&#194;', $string);
303 // LATIN CAPITAL LETTER A WITH BREVE
304 $string = str_replace("\303", '&#258;', $string);
305 // LATIN CAPITAL LETTER A WITH DIAERESIS
306 $string = str_replace("\304", '&#196;', $string);
307 // LATIN CAPITAL LETTER L WITH ACUTE
308 $string = str_replace("\305", '&#313;', $string);
309 // LATIN CAPITAL LETTER C WITH ACUTE
310 $string = str_replace("\306", '&#262;', $string);
311 // LATIN CAPITAL LETTER C WITH CEDILLA
312 $string = str_replace("\307", '&#199;', $string);
313 // LATIN CAPITAL LETTER C WITH CARON
314 $string = str_replace("\310", '&#268;', $string);
315 // LATIN CAPITAL LETTER E WITH ACUTE
316 $string = str_replace("\311", '&#201;', $string);
317 // LATIN CAPITAL LETTER E WITH OGONEK
318 $string = str_replace("\312", '&#280;', $string);
319 // LATIN CAPITAL LETTER E WITH DIAERESIS
320 $string = str_replace("\313", '&#203;', $string);
321 // LATIN CAPITAL LETTER E WITH CARON
322 $string = str_replace("\314", '&#282;', $string);
323 // LATIN CAPITAL LETTER I WITH ACUTE
324 $string = str_replace("\315", '&#205;', $string);
325 // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
326 $string = str_replace("\316", '&#206;', $string);
327 // LATIN CAPITAL LETTER D WITH CARON
328 $string = str_replace("\317", '&#270;', $string);
329 // LATIN CAPITAL LETTER D WITH STROKE
330 $string = str_replace("\320", '&#272;', $string);
331 // LATIN CAPITAL LETTER N WITH ACUTE
332 $string = str_replace("\321", '&#323;', $string);
333 // LATIN CAPITAL LETTER N WITH CARON
334 $string = str_replace("\322", '&#327;', $string);
335 // LATIN CAPITAL LETTER O WITH ACUTE
336 $string = str_replace("\323", '&#211;', $string);
337 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
338 $string = str_replace("\324", '&#212;', $string);
339 // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
340 $string = str_replace("\325", '&#336;', $string);
341 // LATIN CAPITAL LETTER O WITH DIAERESIS
342 $string = str_replace("\326", '&#214;', $string);
343 // MULTIPLICATION SIGN
344 $string = str_replace("\327", '&#215;', $string);
345 // LATIN CAPITAL LETTER R WITH CARON
346 $string = str_replace("\330", '&#344;', $string);
347 // LATIN CAPITAL LETTER U WITH RING ABOVE
348 $string = str_replace("\331", '&#366;', $string);
349 // LATIN CAPITAL LETTER U WITH ACUTE
350 $string = str_replace("\332", '&#218;', $string);
351 // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
352 $string = str_replace("\333", '&#368;', $string);
353 // LATIN CAPITAL LETTER U WITH DIAERESIS
354 $string = str_replace("\334", '&#220;', $string);
355 // LATIN CAPITAL LETTER Y WITH ACUTE
356 $string = str_replace("\335", '&#221;', $string);
357 // LATIN CAPITAL LETTER T WITH CEDILLA
358 $string = str_replace("\336", '&#354;', $string);
359 // LATIN SMALL LETTER SHARP S
360 $string = str_replace("\337", '&#223;', $string);
361 // LATIN SMALL LETTER R WITH ACUTE
362 $string = str_replace("\340", '&#341;', $string);
363 // LATIN SMALL LETTER A WITH ACUTE
364 $string = str_replace("\341", '&#225;', $string);
365 // LATIN SMALL LETTER A WITH CIRCUMFLEX
366 $string = str_replace("\342", '&#226;', $string);
367 // LATIN SMALL LETTER A WITH BREVE
368 $string = str_replace("\343", '&#259;', $string);
369 // LATIN SMALL LETTER A WITH DIAERESIS
370 $string = str_replace("\344", '&#228;', $string);
371 // LATIN SMALL LETTER L WITH ACUTE
372 $string = str_replace("\345", '&#314;', $string);
373 // LATIN SMALL LETTER C WITH ACUTE
374 $string = str_replace("\346", '&#263;', $string);
375 // LATIN SMALL LETTER C WITH CEDILLA
376 $string = str_replace("\347", '&#231;', $string);
377 // LATIN SMALL LETTER C WITH CARON
378 $string = str_replace("\350", '&#269;', $string);
379 // LATIN SMALL LETTER E WITH ACUTE
380 $string = str_replace("\351", '&#233;', $string);
381 // LATIN SMALL LETTER E WITH OGONEK
382 $string = str_replace("\352", '&#281;', $string);
383 // LATIN SMALL LETTER E WITH DIAERESIS
384 $string = str_replace("\353", '&#235;', $string);
385 // LATIN SMALL LETTER E WITH CARON
386 $string = str_replace("\354", '&#283;', $string);
387 // LATIN SMALL LETTER I WITH ACUTE
388 $string = str_replace("\355", '&#237;', $string);
389 // LATIN SMALL LETTER I WITH CIRCUMFLEX
390 $string = str_replace("\356", '&#238;', $string);
391 // LATIN SMALL LETTER D WITH CARON
392 $string = str_replace("\357", '&#271;', $string);
393 // LATIN SMALL LETTER D WITH STROKE
394 $string = str_replace("\360", '&#273;', $string);
395 // LATIN SMALL LETTER N WITH ACUTE
396 $string = str_replace("\361", '&#324;', $string);
397 // LATIN SMALL LETTER N WITH CARON
398 $string = str_replace("\362", '&#328;', $string);
399 // LATIN SMALL LETTER O WITH ACUTE
400 $string = str_replace("\363", '&#243;', $string);
401 // LATIN SMALL LETTER O WITH CIRCUMFLEX
402 $string = str_replace("\364", '&#244;', $string);
403 // LATIN SMALL LETTER O WITH DOUBLE ACUTE
404 $string = str_replace("\365", '&#337;', $string);
405 // LATIN SMALL LETTER O WITH DIAERESIS
406 $string = str_replace("\366", '&#246;', $string);
407 // DIVISION SIGN
408 $string = str_replace("\367", '&#247;', $string);
409 // LATIN SMALL LETTER R WITH CARON
410 $string = str_replace("\370", '&#345;', $string);
411 // LATIN SMALL LETTER U WITH RING ABOVE
412 $string = str_replace("\371", '&#367;', $string);
413 // LATIN SMALL LETTER U WITH ACUTE
414 $string = str_replace("\372", '&#250;', $string);
415 // LATIN SMALL LETTER U WITH DOUBLE ACUTE
416 $string = str_replace("\373", '&#369;', $string);
417 // LATIN SMALL LETTER U WITH DIAERESIS
418 $string = str_replace("\374", '&#252;', $string);
419 // LATIN SMALL LETTER Y WITH ACUTE
420 $string = str_replace("\375", '&#253;', $string);
421 // LATIN SMALL LETTER T WITH CEDILLA
422 $string = str_replace("\376", '&#355;', $string);
423 // DOT ABOVE
424 $string = str_replace("\377", '&#729;', $string);
425
426 return $string;
427}
428
9be313d5 429/*
c37a12f8 430 ISO/IEC 8859-4:1998 Latin Alphabet No. 4
9be313d5 431*/
432
433function charset_decode_iso_8859_4 ($string) {
94965562 434 global $default_charset;
9be313d5 435
c37a12f8 436 if (strtolower($default_charset) == 'iso-8859-4')
437 return $string;
c37a12f8 438
439 /* Only do the slow convert if there are 8-bit characters */
440 if (! ereg("[\200-\377]", $string))
441 return $string;
442
c37a12f8 443 $string = str_replace ("\241", '&#260;', $string);
c37a12f8 444 $string = str_replace ("\242", '&#312;', $string);
c37a12f8 445 $string = str_replace ("\243", '&#342;', $string);
c37a12f8 446 $string = str_replace ("\245", '&#296;', $string);
c37a12f8 447 $string = str_replace ("\246", '&#315;', $string);
c37a12f8 448 $string = str_replace ("\251", '&#352;', $string);
c37a12f8 449 $string = str_replace ("\252", '&#274;', $string);
c37a12f8 450 $string = str_replace ("\253", '&#290;', $string);
c37a12f8 451 $string = str_replace ("\254", '&#358;', $string);
c37a12f8 452 $string = str_replace ("\256", '&#381;', $string);
c37a12f8 453 $string = str_replace ("\261", '&#261;', $string);
c37a12f8 454 $string = str_replace ("\262", '&#731;', $string);
c37a12f8 455 $string = str_replace ("\263", '&#343;', $string);
c37a12f8 456 $string = str_replace ("\265", '&#297;', $string);
c37a12f8 457 $string = str_replace ("\266", '&#316;', $string);
c37a12f8 458 $string = str_replace ("\267", '&#711;', $string);
c37a12f8 459 $string = str_replace ("\271", '&#353;', $string);
c37a12f8 460 $string = str_replace ("\272", '&#275;', $string);
c37a12f8 461 $string = str_replace ("\273", '&#291;', $string);
c37a12f8 462 $string = str_replace ("\274", '&#359;', $string);
c37a12f8 463 $string = str_replace ("\275", '&#330;', $string);
c37a12f8 464 $string = str_replace ("\276", '&#382;', $string);
c37a12f8 465 $string = str_replace ("\277", '&#331;', $string);
c37a12f8 466 $string = str_replace ("\300", '&#256;', $string);
c37a12f8 467 $string = str_replace ("\307", '&#302;', $string);
c37a12f8 468 $string = str_replace ("\310", '&#268;', $string);
c37a12f8 469 $string = str_replace ("\312", '&#280;', $string);
c37a12f8 470 $string = str_replace ("\314", '&#278;', $string);
c37a12f8 471 $string = str_replace ("\317", '&#298;', $string);
c37a12f8 472 $string = str_replace ("\320", '&#272;', $string);
c37a12f8 473 $string = str_replace ("\321", '&#325;', $string);
c37a12f8 474 $string = str_replace ("\322", '&#332;', $string);
c37a12f8 475 $string = str_replace ("\323", '&#310;', $string);
c37a12f8 476 $string = str_replace ("\331", '&#370;', $string);
c37a12f8 477 $string = str_replace ("\335", '&#360;', $string);
c37a12f8 478 $string = str_replace ("\336", '&#362;', $string);
c37a12f8 479 $string = str_replace ("\340", '&#257;', $string);
c37a12f8 480 $string = str_replace ("\347", '&#303;', $string);
c37a12f8 481 $string = str_replace ("\350", '&#269;', $string);
c37a12f8 482 $string = str_replace ("\352", '&#281;', $string);
c37a12f8 483 $string = str_replace ("\354", '&#279;', $string);
c37a12f8 484 $string = str_replace ("\357", '&#299;', $string);
c37a12f8 485 $string = str_replace ("\360", '&#273;', $string);
c37a12f8 486 $string = str_replace ("\361", '&#326;', $string);
c37a12f8 487 $string = str_replace ("\362", '&#333;', $string);
c37a12f8 488 $string = str_replace ("\363", '&#311;', $string);
c37a12f8 489 $string = str_replace ("\371", '&#371;', $string);
c37a12f8 490 $string = str_replace ("\375", '&#361;', $string);
c37a12f8 491 $string = str_replace ("\376", '&#363;', $string);
c37a12f8 492 $string = str_replace ("\377", '&#729;', $string);
493
494 // rest of charset is the same as ISO-8859-1
9be313d5 495 return (charset_decode_iso_8859_1($string));
496}
497
1c0e847f 498/* ISO-8859-5 is Cyrillic */
499function charset_decode_iso_8859_5 ($string) {
500 global $default_charset;
501
502 if (strtolower($default_charset) == 'iso-8859-5') {
503 return $string;
504 }
505
506 /* Only do the slow convert if there are 8-bit characters */
507 if (! ereg("[\200-\377]", $string))
508 return $string;
509
510 // NO-BREAK SPACE
511 $string = str_replace("\240", '&#160;', $string);
512 // 161-172 -> 1025-1036 (+864)
513 $string = preg_replace("/([\241-\254])/e","'&#' . (ord('\\1')+864) . ';'",$string);
514 // SOFT HYPHEN
515 $string = str_replace("\255", '&#173;', $string);
516 // 174-239 -> 1038-1103 (+864)
517 $string = preg_replace("/([\256-\357])/e","'&#' . (ord('\\1')+864) . ';'",$string);
518 // NUMERO SIGN
519 $string = str_replace("\360", '&#8470;', $string);
520 // 241-252 -> 1105-1116 (+864)
521 $string = preg_replace("/([\361-\374])/e","'&#' . (ord('\\1')+864) . ';'",$string);
522 // SECTION SIGN
523 $string = str_replace("\375", '&#167;', $string);
524 // CYRILLIC SMALL LETTER SHORT U (Byelorussian)
525 $string = str_replace("\376", '&#1118;', $string);
526 // CYRILLIC SMALL LETTER DZHE
527 $string = str_replace("\377", '&#1119;', $string);
528
529 return $string;
530}
531
a2a7852b 532/* iso-8859-7 is Greek. */
533function charset_decode_iso_8859_7 ($string) {
534 global $default_charset;
535
536 if (strtolower($default_charset) == 'iso-8859-7') {
537 return $string;
538 }
539
540 /* Only do the slow convert if there are 8-bit characters */
541 if (!ereg("[\200-\377]", $string)) {
542 return $string;
543 }
544
545 /* Some diverse characters in the beginning */
546 $string = str_replace("\240", '&#160;', $string);
547 $string = str_replace("\241", '&#8216;', $string);
548 $string = str_replace("\242", '&#8217;', $string);
549 $string = str_replace("\243", '&#163;', $string);
550 $string = str_replace("\246", '&#166;', $string);
551 $string = str_replace("\247", '&#167;', $string);
552 $string = str_replace("\250", '&#168;', $string);
553 $string = str_replace("\251", '&#169;', $string);
554 $string = str_replace("\253", '&#171;', $string);
555 $string = str_replace("\254", '&#172;', $string);
556 $string = str_replace("\255", '&#173;', $string);
557 $string = str_replace("\257", '&#8213;', $string);
558 $string = str_replace("\260", '&#176;', $string);
559 $string = str_replace("\261", '&#177;', $string);
560 $string = str_replace("\262", '&#178;', $string);
561 $string = str_replace("\263", '&#179;', $string);
562
563 /* Horizontal bar (parentheki pavla) */
564 $string = str_replace ("\257", '&#8213;', $string);
565
566 /*
567 * ISO-8859-7 characters from 11/04 (0xB4) to 11/06 (0xB6)
568 * These are Unicode 900-902
569 */
b85a4575 570 $string = preg_replace("/([\264-\266])/e","'&#' . (ord('\\1')+720);",$string);
49c17806 571
a2a7852b 572 /* 11/07 (0xB7) Middle dot is the same in iso-8859-1 */
573 $string = str_replace("\267", '&#183;', $string);
574
575 /*
576 * ISO-8859-7 characters from 11/08 (0xB8) to 11/10 (0xBA)
577 * These are Unicode 900-902
578 */
b85a4575 579 $string = preg_replace("/([\270-\272])/e","'&#' . (ord('\\1')+720);",$string);
a2a7852b 580
581 /*
582 * 11/11 (0xBB) Right angle quotation mark is the same as in
583 * iso-8859-1
584 */
585 $string = str_replace("\273", '&#187;', $string);
586
587 /* And now the rest of the charset */
b85a4575 588 $string = preg_replace("/([\274-\376])/e","'&#'.(ord('\\1')+720);",$string);
a2a7852b 589
590 return $string;
591}
592
3ab35042 593/*
cd21d1aa 594 ISOIEC 8859-9:1999 Latin Alphabet No. 5
961ca3d8 595
3ab35042 596*/
597function charset_decode_iso_8859_9 ($string) {
94965562 598 global $default_charset;
3ab35042 599
600 if (strtolower($default_charset) == 'iso-8859-9')
601 return $string;
3ab35042 602
603 /* Only do the slow convert if there are 8-bit characters */
604 if (! ereg("[\200-\377]", $string))
605 return $string;
606
607 // latin capital letter g with breve 208->286
608 $string = str_replace("\320", '&#286;', $string);
609 // latin capital letter i with dot above 221->304
610 $string = str_replace("\335", '&#304;', $string);
611 // latin capital letter s with cedilla 222->350
612 $string = str_replace("\336", '&#350;', $string);
613 // latin small letter g with breve 240->287
614 $string = str_replace("\360", '&#287;', $string);
615 // latin small letter dotless i 253->305
616 $string = str_replace("\375", '&#305;', $string);
617 // latin small letter s with cedilla 254->351
618 $string = str_replace("\376", '&#351;', $string);
619
620 // rest of charset is the same as ISO-8859-1
621 return (charset_decode_iso_8859_1($string));
622}
623
624
9be313d5 625/*
c37a12f8 626 ISO/IEC 8859-13:1998 Latin Alphabet No. 7 (Baltic Rim)
9be313d5 627*/
9be313d5 628function charset_decode_iso_8859_13 ($string) {
94965562 629 global $default_charset;
c37a12f8 630
631 if (strtolower($default_charset) == 'iso-8859-13')
632 return $string;
c37a12f8 633
634 /* Only do the slow convert if there are 8-bit characters */
635 if (! ereg("[\200-\377]", $string))
636 return $string;
9be313d5 637
c37a12f8 638 $string = str_replace ("\241", '&#8221;', $string);
c37a12f8 639 $string = str_replace ("\245", '&#8222;', $string);
c37a12f8 640 $string = str_replace ("\250", '&#216;', $string);
c37a12f8 641 $string = str_replace ("\252", '&#342;', $string);
c37a12f8 642 $string = str_replace ("\257", '&#198;', $string);
c37a12f8 643 $string = str_replace ("\264", '&#8220;', $string);
c37a12f8 644 $string = str_replace ("\270", '&#248;', $string);
c37a12f8 645 $string = str_replace ("\272", '&#343;', $string);
c37a12f8 646 $string = str_replace ("\277", '&#230;', $string);
c37a12f8 647 $string = str_replace ("\300", '&#260;', $string);
c37a12f8 648 $string = str_replace ("\301", '&#302;', $string);
c37a12f8 649 $string = str_replace ("\302", '&#256;', $string);
c37a12f8 650 $string = str_replace ("\303", '&#262;', $string);
c37a12f8 651 $string = str_replace ("\306", '&#280;', $string);
c37a12f8 652 $string = str_replace ("\307", '&#274;', $string);
c37a12f8 653 $string = str_replace ("\310", '&#268;', $string);
c37a12f8 654 $string = str_replace ("\312", '&#377;', $string);
c37a12f8 655 $string = str_replace ("\313", '&#278;', $string);
c37a12f8 656 $string = str_replace ("\314", '&#290;', $string);
c37a12f8 657 $string = str_replace ("\315", '&#310;', $string);
c37a12f8 658 $string = str_replace ("\316", '&#298;', $string);
c37a12f8 659 $string = str_replace ("\317", '&#315;', $string);
c37a12f8 660 $string = str_replace ("\320", '&#352;', $string);
c37a12f8 661 $string = str_replace ("\321", '&#323;', $string);
c37a12f8 662 $string = str_replace ("\322", '&#325;', $string);
c37a12f8 663 $string = str_replace ("\324", '&#332;', $string);
c37a12f8 664 $string = str_replace ("\330", '&#370;', $string);
c37a12f8 665 $string = str_replace ("\331", '&#321;', $string);
c37a12f8 666 $string = str_replace ("\332", '&#346;', $string);
c37a12f8 667 $string = str_replace ("\333", '&#362;', $string);
c37a12f8 668 $string = str_replace ("\335", '&#379;', $string);
c37a12f8 669 $string = str_replace ("\336", '&#381;', $string);
c37a12f8 670 $string = str_replace ("\340", '&#261;', $string);
c37a12f8 671 $string = str_replace ("\341", '&#303;', $string);
c37a12f8 672 $string = str_replace ("\342", '&#257;', $string);
c37a12f8 673 $string = str_replace ("\343", '&#263;', $string);
c37a12f8 674 $string = str_replace ("\346", '&#281;', $string);
c37a12f8 675 $string = str_replace ("\347", '&#275;', $string);
c37a12f8 676 $string = str_replace ("\350", '&#269;', $string);
c37a12f8 677 $string = str_replace ("\352", '&#378;', $string);
c37a12f8 678 $string = str_replace ("\353", '&#279;', $string);
c37a12f8 679 $string = str_replace ("\354", '&#291;', $string);
c37a12f8 680 $string = str_replace ("\355", '&#311;', $string);
c37a12f8 681 $string = str_replace ("\356", '&#299;', $string);
c37a12f8 682 $string = str_replace ("\357", '&#316;', $string);
c37a12f8 683 $string = str_replace ("\360", '&#353;', $string);
c37a12f8 684 $string = str_replace ("\361", '&#324;', $string);
c37a12f8 685 $string = str_replace ("\362", '&#326;', $string);
c37a12f8 686 $string = str_replace ("\364", '&#333;', $string);
c37a12f8 687 $string = str_replace ("\370", '&#371;', $string);
c37a12f8 688 $string = str_replace ("\371", '&#322;', $string);
c37a12f8 689 $string = str_replace ("\372", '&#347;', $string);
c37a12f8 690 $string = str_replace ("\373", '&#363;', $string);
c37a12f8 691 $string = str_replace ("\375", '&#380;', $string);
c37a12f8 692 $string = str_replace ("\376", '&#382;', $string);
c37a12f8 693 $string = str_replace ("\377", '&#8217;', $string);
694
695 // rest of charset is the same as ISO-8859-1
9be313d5 696 return (charset_decode_iso_8859_1($string));
697}
698
a2a7852b 699/*
700 * iso-8859-15 is Latin 9 and has very much the same use as Latin 1
701 * but has the Euro symbol and some characters needed for French.
702 */
703function charset_decode_iso_8859_15 ($string) {
704 // Euro sign
705 $string = str_replace ("\244", '&#8364;', $string);
706 // Latin capital letter S with caron
84556805 707 $string = str_replace ("\246", '&#352;', $string);
a2a7852b 708 // Latin small letter s with caron
709 $string = str_replace ("\250", '&#353;', $string);
710 // Latin capital letter Z with caron
711 $string = str_replace ("\264", '&#381;', $string);
712 // Latin small letter z with caron
713 $string = str_replace ("\270", '&#382;', $string);
714 // Latin capital ligature OE
715 $string = str_replace ("\274", '&#338;', $string);
716 // Latin small ligature oe
717 $string = str_replace ("\275", '&#339;', $string);
718 // Latin capital letter Y with diaeresis
719 $string = str_replace ("\276", '&#376;', $string);
720
721 return (charset_decode_iso_8859_1($string));
722}
723
a2a7852b 724
725/* Remove all 8 bit characters from all other ISO-8859 character sets */
726function charset_decode_iso_8859_default ($string) {
727 return (strtr($string, "\240\241\242\243\244\245\246\247".
1fd97780 728 "\250\251\252\253\254\255\256\257".
729 "\260\261\262\263\264\265\266\267".
730 "\270\271\272\273\274\275\276\277".
731 "\300\301\302\303\304\305\306\307".
732 "\310\311\312\313\314\315\316\317".
733 "\320\321\322\323\324\325\326\327".
734 "\330\331\332\333\334\335\336\337".
735 "\340\341\342\343\344\345\346\347".
736 "\350\351\352\353\354\355\356\357".
737 "\360\361\362\363\364\365\366\367".
a2a7852b 738 "\370\371\372\373\374\375\376\377",
1fd97780 739 "????????????????????????????????????????".
740 "????????????????????????????????????????".
741 "????????????????????????????????????????".
742 "????????"));
a2a7852b 743
744}
745
746/*
747 * This is the same as ISO-646-NO and is used by some
748 * Microsoft programs when sending Norwegian characters
749 */
750function charset_decode_ns_4551_1 ($string) {
751 /*
752 * These characters are:
753 * Latin capital letter AE
754 * Latin capital letter O with stroke
755 * Latin capital letter A with ring above
756 * and the same as small letters
757 */
758