6 ** This file contains variuos functions that are needed to do
7 ** internationalization of SquirrelMail.
9 ** Internally iso-8859-1 is used as character set. Other characters
10 ** are encoded using Unicode entities according to HTML 4.0.
16 // Decodes a string to the internal encoding from the given charset
17 function charset_decode ($charset, $string) {
18 // All HTML special characters are 7 bit and can be replaced first
19 $string = htmlspecialchars ($string);
21 $charset = strtolower($charset);
23 if (ereg("iso-8859-(.*)", $charset, $res)) {
25 return charset_decode_iso_8859_1 ($string);
27 return charset_decode_iso_8859_7 ($string);
28 else if ($res[1] == "15")
29 return charset_decode_iso_8859_15 ($string);
31 return charset_decode_iso_8859_default ($string);
32 } else if ($charset == "ns_4551-1") {
33 return charset_decode_ns_4551_1 ($string);
38 // iso-8859-1 is the same as Latin 1 and is normally used
40 function charset_decode_iso_8859_1 ($string) {
41 // This is only debug code as long as the internal
42 // character set is iso-8859-1
44 // Latin small letter o with stroke
45 while (ereg("\370", $string))
46 $string = ereg_replace ("\370", "ø", $string);
51 // iso-8859-1 is Greek.
52 function charset_decode_iso_8859_7 ($string) {
53 // Could not find Unicode equivalent of 0xA1 and 0xA2
54 // 0xA4, 0xA5, 0xAA, 0xAE, 0xD2 and 0xFF should not be used
55 $string = strtr($string, "\241\242\244\245\252\256\322\377",
58 // Horizontal bar (parentheki pavla)
59 while (ereg("\257", $string))
60 $string = ereg_replace ("\257", "―", $string);
62 // ISO-8859-7 characters from 11/04 (0xB4) to 11/06 (0xB6)
63 // These are Unicode 900-902
64 while (ereg("([\264-\266])", $string, $res)) {
65 $replace = "&#." . ord($res[1])+
720 . ";";
66 ereg_repleace("[\264-\266]", $replace, $string);
69 // 11/07 (0xB7) Middle dot is the same in iso-8859-1
71 // ISO-8859-7 characters from 11/08 (0xB8) to 11/10 (0xBA)
72 // These are Unicode 900-902
73 while (ereg("([\270-\272])", $string, $res)) {
74 $replace = "&#." . ord($res[1])+
720 . ";";
75 ereg_repleace("[\270-\272]", $replace, $string);
78 // 11/11 (0xBB) Right angle quotation mark is the same as in
81 // And now the rest of the charset
82 while (ereg("([\273-\376])", $string, $res)) {
83 $replace = "&#." . ord($res[1])+
720 . ";";
84 ereg_repleace("[\273-\376]", $replace, $string);
90 // iso-8859-15 is Latin 15 and has very much the same use as Latin 1
91 // but has the Euro symbol and some characters needed for French.
92 function charset_decode_iso_8859_15 ($string) {
94 while (ereg("\244", $replace))
95 $string = ereg_replace ("\244", "€", $string);
96 // Latin capital letter S with caron
97 while (ereg("\246", $string))
98 $string = ereg_replace ("\244", "Š", $string);
99 // Latin small letter s with caron
100 while (ereg("\250", $string))
101 $string = ereg_replace ("\250", "š", $string);
102 // Latin capital letter Z with caron
103 while (ereg("\264", $string))
104 $string = ereg_replace ("\264", "Ž", $string);
105 // Latin small letter z with caron
106 while (ereg("\270", $string))
107 $string = ereg_replace ("\270", "ž", $string);
108 // Latin capital ligature OE
109 while (ereg("\274", $string))
110 $string = ereg_replace ("\274", "Œ", $string);
111 // Latin small ligature oe
112 while (ereg("\275", $string))
113 $string = ereg_replace ("\275", "œ", $string);
114 // Latin capital letter Y with diaeresis
115 while (ereg("\276", $string))
116 $string = ereg_replace ("\276", "Ÿ", $string);
121 // Remove all 8 bit characters from all other ISO-8859 character sets
122 function charset_decode_iso_8859_default ($string) {
123 return (strtr($string, "\240\241\242\243\244\245\246\247".
124 "\250\251\252\253\254\255\256\257".
125 "\260\261\262\263\264\265\266\267".
126 "\270\271\272\273\274\275\276\277".
127 "\300\301\302\303\304\305\306\307".
128 "\310\311\312\313\314\315\316\317".
129 "\320\321\322\323\324\325\326\327".
130 "\330\331\332\333\334\335\336\337".
131 "\340\341\342\343\344\345\346\347".
132 "\350\351\352\353\354\355\356\357".
133 "\360\361\362\363\364\365\366\367".
134 "\370\371\372\373\374\375\376\377",
135 "????????????????????????????????????????".
136 "????????????????????????????????????????".
137 "????????????????????????????????????????".
142 // This is the same as ISO-646-NO and is used by some
143 // Microsoft programs when sending Norwegian characters
144 function charset_decode_ns_4551_1 ($string) {
145 // These characters are:
146 // Latin capital letter AE
147 // Latin capital letter O with stroke
148 // Latin capital letter A with ring above
149 // and the same as small letters
150 return strtr ($string, "[\\]{|}", "ÆØÅæøå");