managing agresive decoding and ja_JP + utf-8.
[squirrelmail.git] / functions / i18n.php
... / ...
CommitLineData
1<?php
2
3/**
4 * i18n.php
5 *
6 * Copyright (c) 1999-2003 The SquirrelMail Project Team
7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This file contains variuos functions that are needed to do
10 * internationalization of SquirrelMail.
11 *
12 * Internally the output character set is used. Other characters are
13 * encoded using Unicode entities according to HTML 4.0.
14 *
15 * $Id$
16 * @package squirrelmail
17 */
18
19/** Everything uses global.php... */
20require_once(SM_PATH . 'functions/global.php');
21
22/**
23 * Converts string from given charset to charset, that can be displayed by user translation.
24 *
25 * Function by default returns html encoded strings, if translation uses different encoding.
26 * If Japanese translation is used - function returns string converted to euc-jp
27 * If iconv or recode functions are enabled and translation uses utf-8 - function returns utf-8 encoded string.
28 * If $charset is not supported - function returns unconverted string.
29 *
30 * sanitizing of html tags is also done by this function.
31 *
32 * @param string $charset
33 * @param string $string Text to be decoded
34 * @return string decoded string
35 */
36function charset_decode ($charset, $string) {
37 global $languages, $squirrelmail_language, $default_charset;
38 global $use_php_recode, $use_php_iconv, $agresive_decoding;
39
40 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
41 function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
42 $string = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $string);
43 }
44
45 $charset = strtolower($charset);
46
47 set_my_charset();
48
49 // Variables that allow to use functions without function_exist() calls
50 if (! isset($use_php_recode) || $use_php_recode=="" ) {
51 $use_php_recode=false; }
52 if (! isset($use_php_iconv) || $use_php_iconv=="" ) {
53 $use_php_iconv=false; }
54
55 // Don't do conversion if charset is the same.
56 if ( $charset == strtolower($default_charset) )
57 return htmlspecialchars($string);
58
59 // catch iso-8859-8-i thing
60 if ( $charset == "iso-8859-8-i" )
61 $charset = "iso-8859-8";
62
63 /*
64 * Recode converts html special characters automatically if you use
65 * 'charset..html' decoding. There is no documented way to put -d option
66 * into php recode function call.
67 */
68 if ( $use_php_recode ) {
69 if ( $default_charset == "utf-8" ) {
70 // other charsets can be converted to utf-8 without loss.
71 // and output string is smaller
72 $string = recode_string($charset . "..utf-8",$string);
73 return htmlspecialchars($string);
74 } else {
75 $string = recode_string($charset . "..html",$string);
76 // recode does not convert single quote, htmlspecialchars does.
77 $string = str_replace("'", '&#039;', $string);
78 return $string;
79 }
80 }
81
82 // iconv functions does not have html target and can be used only with utf-8
83 if ( $use_php_iconv && $default_charset=='utf-8') {
84 $string = iconv($charset,$default_charset,$string);
85 return htmlspecialchars($string);
86 }
87
88 // If we don't use recode and iconv, we'll do it old way.
89
90 /* All HTML special characters are 7 bit and can be replaced first */
91
92 $string = htmlspecialchars ($string);
93
94 /* controls cpu and memory intensive decoding cycles */
95 if (! isset($agresive_decoding) || $agresive_decoding=="" ) {
96 $agresive_decoding=false; }
97
98 $decode=fixcharset($charset);
99 $decodefile=SM_PATH . 'functions/decode/' . $decode . '.php';
100 if (file_exists($decodefile)) {
101 include_once($decodefile);
102 $ret = call_user_func('charset_decode_'.$decode, $string);
103 } else {
104 $ret = $string;
105 }
106 return( $ret );
107}
108
109/**
110 * Makes charset name suitable for decoding cycles
111 *
112 * @param string $charset Name of charset
113 * @return string $charset Adjusted name of charset
114 */
115function fixcharset($charset) {
116 // minus removed from function names
117 $charset=str_replace('-','_',$charset);
118
119 // windows-125x and cp125x charsets
120 $charset=str_replace('windows_','cp',$charset);
121
122 // ibm > cp
123 $charset=str_replace('ibm','cp',$charset);
124
125 // iso-8859-8-i -> iso-8859-8
126 // use same cycle until I'll find differences
127 $charset=str_replace('iso_8859_8_i','iso_8859_8',$charset);
128
129 return $charset;
130}
131
132/**
133 * 8bit cleanup functions.
134 *
135 * Replaces all 8 bit characters from ISO-8859 character sets with '?'
136 * Legacy function used for unsupported ISO-8859 charsets
137 *
138 * @param string $string string that has to be cleaned
139 * @return string cleaned string
140 */
141function charset_decode_iso_8859_default ($string) {
142 return (strtr($string, "\240\241\242\243\244\245\246\247".
143 "\250\251\252\253\254\255\256\257".
144 "\260\261\262\263\264\265\266\267".
145 "\270\271\272\273\274\275\276\277".
146 "\300\301\302\303\304\305\306\307".
147 "\310\311\312\313\314\315\316\317".
148 "\320\321\322\323\324\325\326\327".
149 "\330\331\332\333\334\335\336\337".
150 "\340\341\342\343\344\345\346\347".
151 "\350\351\352\353\354\355\356\357".
152 "\360\361\362\363\364\365\366\367".
153 "\370\371\372\373\374\375\376\377",
154 "????????????????????????????????????????".
155 "????????????????????????????????????????".
156 "????????????????????????????????????????".
157 "????????"));
158
159}
160
161/**
162 * ns_4551_1 decoding function
163 *
164 * This is the same as ISO-646-NO and is used by some
165 * Microsoft programs when sending Norwegian characters
166 *
167 * @param string $string
168 * @return string
169 */
170function charset_decode_ns_4551_1 ($string) {
171 /*
172 * These characters are:
173 * Latin capital letter AE
174 * Latin capital letter O with stroke
175 * Latin capital letter A with ring above
176 * and the same as small letters
177 */
178