convert message array to string and display it sanitized.
[squirrelmail.git] / functions / i18n.php
CommitLineData
59177427 1<?php
1fd97780 2
35586184 3/**
4 * i18n.php
5 *
76911253 6 * Copyright (c) 1999-2003 The SquirrelMail Project Team
35586184 7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This file contains variuos functions that are needed to do
10 * internationalization of SquirrelMail.
11 *
12 * Internally the output character set is used. Other characters are
13 * encoded using Unicode entities according to HTML 4.0.
14 *
15 * $Id$
d6c32258 16 * @package squirrelmail
35586184 17 */
18
d6c32258 19/** Everything uses global.php... */
961ca3d8 20require_once(SM_PATH . 'functions/global.php');
21
d6c32258 22/**
51468260 23 * Converts string from given charset to charset, that can be displayed by user translation.
24 *
25 * Function by default returns html encoded strings, if translation uses different encoding.
26 * If Japanese translation is used - function returns string converted to euc-jp
27 * If iconv or recode functions are enabled and translation uses utf-8 - function returns utf-8 encoded string.
28 * If $charset is not supported - function returns unconverted string.
d6c32258 29 *
51468260 30 * sanitizing of html tags is also done by this function.
31 *
d6c32258 32 * @param string $charset
33 * @param string $string Text to be decoded
51468260 34 * @return string decoded string
d6c32258 35 */
a2a7852b 36function charset_decode ($charset, $string) {
3ec81e63 37 global $languages, $squirrelmail_language, $default_charset;
edf2c0ba 38 global $use_php_recode, $use_php_iconv, $agresive_decoding;
a2a7852b 39
3714db45 40 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
41 function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
6fbd125b 42 $string = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $string);
43 }
b05c8961 44
3ec81e63 45 $charset = strtolower($charset);
46
47 set_my_charset();
48
49 // Variables that allow to use functions without function_exist() calls
edf2c0ba 50 if (! isset($use_php_recode) || $use_php_recode=="" ) {
51 $use_php_recode=false; }
52 if (! isset($use_php_iconv) || $use_php_iconv=="" ) {
53 $use_php_iconv=false; }
3ec81e63 54
55 // Don't do conversion if charset is the same.
56 if ( $charset == strtolower($default_charset) )
57 return htmlspecialchars($string);
58
59 // catch iso-8859-8-i thing
60 if ( $charset == "iso-8859-8-i" )
61 $charset = "iso-8859-8";
62
63 /*
64 * Recode converts html special characters automatically if you use
65 * 'charset..html' decoding. There is no documented way to put -d option
66 * into php recode function call.
67 */
68 if ( $use_php_recode ) {
69 if ( $default_charset == "utf-8" ) {
70 // other charsets can be converted to utf-8 without loss.
71 // and output string is smaller
72 $string = recode_string($charset . "..utf-8",$string);
73 return htmlspecialchars($string);
74 } else {
75 $string = recode_string($charset . "..html",$string);
76 // recode does not convert single quote, htmlspecialchars does.
77 $string = str_replace("'", '&#039;', $string);
78 return $string;
79 }
80 }
81
82 // iconv functions does not have html target and can be used only with utf-8
83 if ( $use_php_iconv && $default_charset=='utf-8') {
84 $string = iconv($charset,$default_charset,$string);
85 return htmlspecialchars($string);
86 }
87
88 // If we don't use recode and iconv, we'll do it old way.
89
a2a7852b 90 /* All HTML special characters are 7 bit and can be replaced first */
cef054e4 91
098ea084 92 $string = htmlspecialchars ($string);
a2a7852b 93
5dd23dac 94 /* controls cpu and memory intensive decoding cycles */
edf2c0ba 95 if (! isset($agresive_decoding) || $agresive_decoding=="" ) {
96 $agresive_decoding=false; }
5dd23dac 97
b142de74 98 $decode=fixcharset($charset);
99 $decodefile=SM_PATH . 'functions/decode/' . $decode . '.php';
100 if (file_exists($decodefile)) {
101 include_once($decodefile);
102 $ret = call_user_func('charset_decode_'.$decode, $string);
a2a7852b 103 } else {
b142de74 104 $ret = $string;
a2a7852b 105 }
106 return( $ret );
107}
b142de74 108
109/**
110 * Makes charset name suitable for decoding cycles
111 *
112 * @param string $charset Name of charset
113 * @return string $charset Adjusted name of charset
114 */
115function fixcharset($charset) {
116 // minus removed from function names
117 $charset=str_replace('-','_',$charset);
118
119 // windows-125x and cp125x charsets
120 $charset=str_replace('windows_','cp',$charset);
a2a7852b 121
b142de74 122 // ibm > cp
123 $charset=str_replace('ibm','cp',$charset);
124
125 // iso-8859-8-i -> iso-8859-8
126 // use same cycle until I'll find differences
127 $charset=str_replace('iso_8859_8_i','iso_8859_8',$charset);
128
129 return $charset;
130}
a2a7852b 131
51468260 132/**
133 * 8bit cleanup functions.
134 *
135 * Replaces all 8 bit characters from ISO-8859 character sets with '?'
136 * Legacy function used for unsupported ISO-8859 charsets
137 *
138 * @param string $string string that has to be cleaned
139 * @return string cleaned string
140 */
a2a7852b 141function charset_decode_iso_8859_default ($string) {
142 return (strtr($string, "\240\241\242\243\244\245\246\247".
1fd97780 143 "\250\251\252\253\254\255\256\257".
144 "\260\261\262\263\264\265\266\267".
145 "\270\271\272\273\274\275\276\277".
146 "\300\301\302\303\304\305\306\307".
147 "\310\311\312\313\314\315\316\317".
148 "\320\321\322\323\324\325\326\327".
149 "\330\331\332\333\334\335\336\337".
150 "\340\341\342\343\344\345\346\347".
151 "\350\351\352\353\354\355\356\357".
152 "\360\361\362\363\364\365\366\367".
a2a7852b 153 "\370\371\372\373\374\375\376\377",
1fd97780 154 "????????????????????????????????????????".
155 "????????????????????????????????????????".
156 "????????????????????????????????????????".
157 "????????"));
a2a7852b 158
159}
160
51468260 161/**
162 * ns_4551_1 decoding function
163 *
a2a7852b 164 * This is the same as ISO-646-NO and is used by some
165 * Microsoft programs when sending Norwegian characters
51468260 166 *
167 * @param string $string
168 * @return string
a2a7852b 169 */
170function charset_decode_ns_4551_1 ($string) {
171 /*
172 * These characters are:
173 * Latin capital letter AE
174 * Latin capital letter O with stroke
175 * Latin capital letter A with ring above
176 * and the same as small letters
177 */
178