6d8b555b10263364a6c7353ce7596b8437ccc8e6
[squirrelmail.git] / functions / i18n.php
1 <?
2
3 /**
4 ** i18n.php
5 **
6 ** This file contains variuos functions that are needed to do
7 ** internationalization of SquirrelMail.
8 **
9 ** Internally iso-8859-1 is used as character set. Other characters
10 ** are encoded using Unicode entities according to HTML 4.0.
11 **
12 **/
13
14 $i18n_php = true;
15
16 // This array specifies the available languages.
17 $languages[0]["NAME"] = "English";
18 $languages[0]["CODE"] = "en";
19 $languages[1]["NAME"] = "Norsk";
20 $languages[1]["CODE"] = "no";
21 $languages[2]["NAME"] = "Deutcsh";
22 $languages[2]["CODE"] = "de";
23
24 // Decodes a string to the internal encoding from the given charset
25 function charset_decode ($charset, $string) {
26 // All HTML special characters are 7 bit and can be replaced first
27 $string = htmlspecialchars ($string);
28
29 $charset = strtolower($charset);
30
31 if (ereg("iso-8859-(.*)", $charset, $res)) {
32 if ($res[1] == "1")
33 return charset_decode_iso_8859_1 ($string);
34 if ($res[1] == "7")
35 return charset_decode_iso_8859_7 ($string);
36 else if ($res[1] == "15")
37 return charset_decode_iso_8859_15 ($string);
38 else
39 return charset_decode_iso_8859_default ($string);
40 } else if ($charset == "ns_4551-1") {
41 return charset_decode_ns_4551_1 ($string);
42 } else
43 return $string;
44 }
45
46 // iso-8859-1 is the same as Latin 1 and is normally used
47 // in western europe.
48 function charset_decode_iso_8859_1 ($string) {
49 // This is only debug code as long as the internal
50 // character set is iso-8859-1
51
52 // Latin small letter o with stroke
53 while (ereg("\370", $string))
54 $string = ereg_replace ("\370", "&#248;", $string);
55
56 return ($string);
57 }
58
59 // iso-8859-7 is Greek.
60 function charset_decode_iso_8859_7 ($string) {
61 // Could not find Unicode equivalent of 0xA1 and 0xA2
62 // 0xA4, 0xA5, 0xAA, 0xAE, 0xD2 and 0xFF should not be used
63 $string = strtr($string, "\241\242\244\245\252\256\322\377",
64 "????????");
65
66 // Horizontal bar (parentheki pavla)
67 while (ereg("\257", $string))
68 $string = ereg_replace ("\257", "&#8213;", $string);
69
70 // ISO-8859-7 characters from 11/04 (0xB4) to 11/06 (0xB6)
71 // These are Unicode 900-902
72 while (ereg("([\264-\266])", $string, $res)) {
73 $replace = "&#" . (ord($res[1])+720) . ";";
74 $string = ereg_replace($res[1], $replace, $string);
75 }
76
77 // 11/07 (0xB7) Middle dot is the same in iso-8859-1
78
79 // ISO-8859-7 characters from 11/08 (0xB8) to 11/10 (0xBA)
80 // These are Unicode 900-902
81 while (ereg("([\270-\272])", $string, $res)) {
82 $replace = "&#" . (ord($res[1])+720) . ";";
83 $string = ereg_replace($res[1], $replace, $string);
84 }
85
86 // 11/11 (0xBB) Right angle quotation mark is the same as in
87 // iso-8859-1
88
89 // And now the rest of the charset
90 while (ereg("([\273-\376])", $string, $res)) {
91 $replace = "&#" . (ord($res[1])+720) . ";";
92 $string = ereg_replace($res[1], $replace, $string);
93 }
94
95 return $string;
96 }
97
98 // iso-8859-15 is Latin 15 and has very much the same use as Latin 1
99 // but has the Euro symbol and some characters needed for French.
100 function charset_decode_iso_8859_15 ($string) {
101 // Euro sign
102 while (ereg("\244", $replace))
103 $string = ereg_replace ("\244", "&#8364;", $string);
104 // Latin capital letter S with caron
105 while (ereg("\246", $string))
106 $string = ereg_replace ("\244", "&#352;", $string);
107 // Latin small letter s with caron
108 while (ereg("\250", $string))
109 $string = ereg_replace ("\250", "&#353;", $string);
110 // Latin capital letter Z with caron
111 while (ereg("\264", $string))
112 $string = ereg_replace ("\264", "&#381;", $string);
113 // Latin small letter z with caron
114 while (ereg("\270", $string))
115 $string = ereg_replace ("\270", "&#382;", $string);
116 // Latin capital ligature OE
117 while (ereg("\274", $string))
118 $string = ereg_replace ("\274", "&#338;", $string);
119 // Latin small ligature oe
120 while (ereg("\275", $string))
121 $string = ereg_replace ("\275", "&#339;", $string);
122 // Latin capital letter Y with diaeresis
123 while (ereg("\276", $string))
124 $string = ereg_replace ("\276", "&#376;", $string);
125
126 return ($string);
127 }
128
129 // Remove all 8 bit characters from all other ISO-8859 character sets
130 function charset_decode_iso_8859_default ($string) {
131 return (strtr($string, "\240\241\242\243\244\245\246\247".
132 "\250\251\252\253\254\255\256\257".
133 "\260\261\262\263\264\265\266\267".
134 "\270\271\272\273\274\275\276\277".
135 "\300\301\302\303\304\305\306\307".
136 "\310\311\312\313\314\315\316\317".
137 "\320\321\322\323\324\325\326\327".
138 "\330\331\332\333\334\335\336\337".
139 "\340\341\342\343\344\345\346\347".
140 "\350\351\352\353\354\355\356\357".
141 "\360\361\362\363\364\365\366\367".
142 "\370\371\372\373\374\375\376\377",
143 "????????????????????????????????????????".
144 "????????????????????????????????????????".
145 "????????????????????????????????????????".
146 "????????"));
147
148 }
149
150 // This is the same as ISO-646-NO and is used by some
151 // Microsoft programs when sending Norwegian characters
152 function charset_decode_ns_4551_1 ($string) {
153 // These characters are:
154 // Latin capital letter AE
155 // Latin capital letter O with stroke
156 // Latin capital letter A with ring above
157 // and the same as small letters
158 return strtr ($string, "[\\]{|}", "ÆØÅæøå");
159 }
160
161 ?>