Bugfix of 810047
[squirrelmail.git] / functions / i18n.php
... / ...
CommitLineData
1<?php
2
3/**
4 * i18n.php
5 *
6 * Copyright (c) 1999-2003 The SquirrelMail Project Team
7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This file contains variuos functions that are needed to do
10 * internationalization of SquirrelMail.
11 *
12 * Internally the output character set is used. Other characters are
13 * encoded using Unicode entities according to HTML 4.0.
14 *
15 * $Id$
16 */
17
18require_once(SM_PATH . 'functions/global.php');
19
20/* Decodes a string to the internal encoding from the given charset */
21function charset_decode ($charset, $string) {
22 global $languages, $squirrelmail_language, $default_charset;
23
24 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
25 function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
26 $string = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $string);
27 }
28
29 $charset = strtolower($charset);
30
31 set_my_charset();
32
33 // Variables that allow to use functions without function_exist() calls
34 $use_php_recode=false;
35 $use_php_iconv=false;
36
37 // Don't do conversion if charset is the same.
38 if ( $charset == strtolower($default_charset) )
39 return htmlspecialchars($string);
40
41 // catch iso-8859-8-i thing
42 if ( $charset == "iso-8859-8-i" )
43 $charset = "iso-8859-8";
44
45 /*
46 * Recode converts html special characters automatically if you use
47 * 'charset..html' decoding. There is no documented way to put -d option
48 * into php recode function call.
49 */
50 if ( $use_php_recode ) {
51 if ( $default_charset == "utf-8" ) {
52 // other charsets can be converted to utf-8 without loss.
53 // and output string is smaller
54 $string = recode_string($charset . "..utf-8",$string);
55 return htmlspecialchars($string);
56 } else {
57 $string = recode_string($charset . "..html",$string);
58 // recode does not convert single quote, htmlspecialchars does.
59 $string = str_replace("'", '&#039;', $string);
60 return $string;
61 }
62 }
63
64 // iconv functions does not have html target and can be used only with utf-8
65 if ( $use_php_iconv && $default_charset=='utf-8') {
66 $string = iconv($charset,$default_charset,$string);
67 return htmlspecialchars($string);
68 }
69
70 // If we don't use recode and iconv, we'll do it old way.
71
72 /* All HTML special characters are 7 bit and can be replaced first */
73
74 $string = htmlspecialchars ($string);
75
76 /* controls cpu and memory intensive decoding cycles */
77 $agresive_decoding = false;
78
79 if (ereg('iso-8859-([[:digit:]]+)', $charset, $res)) {
80 if ($res[1] == '1') {
81 include_once(SM_PATH . 'functions/decode/iso8859-1.php');
82 $ret = charset_decode_iso8859_1 ($string);
83 } else if ($res[1] == '2') {
84 include_once(SM_PATH . 'functions/decode/iso8859-2.php');
85 $ret = charset_decode_iso8859_2 ($string);
86 } else if ($res[1] == '3') {
87 include_once(SM_PATH . 'functions/decode/iso8859-3.php');
88 $ret = charset_decode_iso8859_3 ($string);
89 } else if ($res[1] == '4') {
90 include_once(SM_PATH . 'functions/decode/iso8859-4.php');
91 $ret = charset_decode_iso8859_4 ($string);
92 } else if ($res[1] == '5') {
93 include_once(SM_PATH . 'functions/decode/iso8859-5.php');
94 $ret = charset_decode_iso8859_5 ($string);
95 } else if ($res[1] == '6') {
96 include_once(SM_PATH . 'functions/decode/iso8859-6.php');
97 $ret = charset_decode_iso8859_6 ($string);
98 } else if ($res[1] == '7') {
99 include_once(SM_PATH . 'functions/decode/iso8859-7.php');
100 $ret = charset_decode_iso8859_7 ($string);
101 } else if ($res[1] == '8') {
102 include_once(SM_PATH . 'functions/decode/iso8859-8.php');
103 $ret = charset_decode_iso8859_8 ($string);
104 } else if ($res[1] == '9') {
105 include_once(SM_PATH . 'functions/decode/iso8859-9.php');
106 $ret = charset_decode_iso8859_9 ($string);
107 } else if ($res[1] == '10') {
108 include_once(SM_PATH . 'functions/decode/iso8859-10.php');
109 $ret = charset_decode_iso8859_10 ($string);
110 } else if ($res[1] == '11') {
111 include_once(SM_PATH . 'functions/decode/iso8859-11.php');
112 $ret = charset_decode_iso8859_11 ($string);
113 } else if ($res[1] == '13') {
114 include_once(SM_PATH . 'functions/decode/iso8859-13.php');
115 $ret = charset_decode_iso8859_13 ($string);
116 } else if ($res[1] == '14') {
117 include_once(SM_PATH . 'functions/decode/iso8859-14.php');
118 $ret = charset_decode_iso8859_14 ($string);
119 } else if ($res[1] == '15') {
120 include_once(SM_PATH . 'functions/decode/iso8859-15.php');
121 $ret = charset_decode_iso8859_15 ($string);
122 } else if ($res[1] == '16') {
123 include_once(SM_PATH . 'functions/decode/iso8859-16.php');
124 $ret = charset_decode_iso8859_16 ($string);
125 } else {
126 $ret = charset_decode_iso_8859_default ($string);
127 }
128 } else if ($charset == 'ns_4551-1') {
129 $ret = charset_decode_ns_4551_1 ($string);
130 } else if ($charset == 'koi8-r') {
131 include_once(SM_PATH . 'functions/decode/koi8-r.php');
132 $ret = charset_decode_koi8r ($string);
133 } else if ($charset == 'koi8-u') {
134 include_once(SM_PATH . 'functions/decode/koi8-u.php');
135 $ret = charset_decode_koi8u ($string);
136 } else if ($charset == 'windows-1250') {
137 include_once(SM_PATH . 'functions/decode/cp1250.php');
138 $ret = charset_decode_cp1250 ($string);
139 } else if ($charset == 'windows-1251') {
140 include_once(SM_PATH . 'functions/decode/cp1251.php');
141 $ret = charset_decode_cp1251 ($string);
142 } else if ($charset == 'windows-1252') {
143 include_once(SM_PATH . 'functions/decode/cp1252.php');
144 $ret = charset_decode_cp1252 ($string);
145 } else if ($charset == 'windows-1253') {
146 include_once(SM_PATH . 'functions/decode/cp1253.php');
147 $ret = charset_decode_cp1253 ($string);
148 } else if ($charset == 'windows-1254') {
149 include_once(SM_PATH . 'functions/decode/cp1254.php');
150 $ret = charset_decode_cp1254 ($string);
151 } else if ($charset == 'windows-1255') {
152 include_once(SM_PATH . 'functions/decode/cp1255.php');
153 $ret = charset_decode_cp1255 ($string);
154 } else if ($charset == 'windows-1256') {
155 include_once(SM_PATH . 'functions/decode/cp1256.php');
156 $ret = charset_decode_cp1256 ($string);
157 } else if ($charset == 'windows-1257') {
158 include_once(SM_PATH . 'functions/decode/cp1257.php');
159 $ret = charset_decode_cp1257 ($string);
160 } else if ($charset == 'windows-1258') {
161 include_once(SM_PATH . 'functions/decode/cp1258.php');
162 $ret = charset_decode_cp1258 ($string);
163 } else if ($charset == 'tis-620') {
164 include_once(SM_PATH . 'functions/decode/tis620.php');
165 $ret = charset_decode_tis620 ($string);
166 } else if ($charset == 'big5' and $agresive_decoding ) {
167 include_once(SM_PATH . 'functions/decode/big5.php');
168 $ret = charset_decode_big5 ($string);
169 } else if ($charset == 'gb2312' and $agresive_decoding ) {
170 include_once(SM_PATH . 'functions/decode/gb2312.php');
171 $ret = charset_decode_gb2312 ($string);
172 } else if ($charset == 'utf-8') {
173 include_once(SM_PATH . 'functions/decode/utf-8.php');
174 $ret = charset_decode_utf8 ($string);
175 } else {
176 $ret = $string;
177 }
178 return( $ret );
179}
180
181
182/* Remove all 8 bit characters from all other ISO-8859 character sets */
183function charset_decode_iso_8859_default ($string) {
184 return (strtr($string, "\240\241\242\243\244\245\246\247".
185 "\250\251\252\253\254\255\256\257".
186 "\260\261\262\263\264\265\266\267".
187 "\270\271\272\273\274\275\276\277".
188 "\300\301\302\303\304\305\306\307".
189 "\310\311\312\313\314\315\316\317".
190 "\320\321\322\323\324\325\326\327".
191 "\330\331\332\333\334\335\336\337".
192 "\340\341\342\343\344\345\346\347".
193 "\350\351\352\353\354\355\356\357".
194 "\360\361\362\363\364\365\366\367".
195 "\370\371\372\373\374\375\376\377",
196 "????????????????????????????????????????".
197 "????????????????????????????????????????".
198 "????????????????????????????????????????".
199 "????????"));
200
201}
202
203/*
204 * This is the same as ISO-646-NO and is used by some
205 * Microsoft programs when sending Norwegian characters
206 */
207function charset_decode_ns_4551_1 ($string) {
208 /*
209 * These characters are:
210 * Latin capital letter AE
211 * Latin capital letter O with stroke
212 * Latin capital letter A with ring above
213 * and the same as small letters
214 */
215