| 1 | <?php |
| 2 | |
| 3 | /** |
| 4 | * i18n.php |
| 5 | * |
| 6 | * Copyright (c) 1999-2003 The SquirrelMail Project Team |
| 7 | * Licensed under the GNU GPL. For full terms see the file COPYING. |
| 8 | * |
| 9 | * This file contains variuos functions that are needed to do |
| 10 | * internationalization of SquirrelMail. |
| 11 | * |
| 12 | * Internally the output character set is used. Other characters are |
| 13 | * encoded using Unicode entities according to HTML 4.0. |
| 14 | * |
| 15 | * $Id$ |
| 16 | * @package squirrelmail |
| 17 | */ |
| 18 | |
| 19 | /** Everything uses global.php... */ |
| 20 | require_once(SM_PATH . 'functions/global.php'); |
| 21 | |
| 22 | /** |
| 23 | * Converts string from given charset to charset, that can be displayed by user translation. |
| 24 | * |
| 25 | * Function by default returns html encoded strings, if translation uses different encoding. |
| 26 | * If Japanese translation is used - function returns string converted to euc-jp |
| 27 | * If iconv or recode functions are enabled and translation uses utf-8 - function returns utf-8 encoded string. |
| 28 | * If $charset is not supported - function returns unconverted string. |
| 29 | * |
| 30 | * sanitizing of html tags is also done by this function. |
| 31 | * |
| 32 | * @param string $charset |
| 33 | * @param string $string Text to be decoded |
| 34 | * @return string decoded string |
| 35 | */ |
| 36 | function charset_decode ($charset, $string) { |
| 37 | global $languages, $squirrelmail_language, $default_charset; |
| 38 | global $use_php_recode, $use_php_iconv, $agresive_decoding; |
| 39 | |
| 40 | if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && |
| 41 | function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { |
| 42 | $string = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $string); |
| 43 | } |
| 44 | |
| 45 | $charset = strtolower($charset); |
| 46 | |
| 47 | set_my_charset(); |
| 48 | |
| 49 | // Variables that allow to use functions without function_exist() calls |
| 50 | if (! isset($use_php_recode) || $use_php_recode=="" ) { |
| 51 | $use_php_recode=false; } |
| 52 | if (! isset($use_php_iconv) || $use_php_iconv=="" ) { |
| 53 | $use_php_iconv=false; } |
| 54 | |
| 55 | // Don't do conversion if charset is the same. |
| 56 | if ( $charset == strtolower($default_charset) ) |
| 57 | return htmlspecialchars($string); |
| 58 | |
| 59 | // catch iso-8859-8-i thing |
| 60 | if ( $charset == "iso-8859-8-i" ) |
| 61 | $charset = "iso-8859-8"; |
| 62 | |
| 63 | /* |
| 64 | * Recode converts html special characters automatically if you use |
| 65 | * 'charset..html' decoding. There is no documented way to put -d option |
| 66 | * into php recode function call. |
| 67 | */ |
| 68 | if ( $use_php_recode ) { |
| 69 | if ( $default_charset == "utf-8" ) { |
| 70 | // other charsets can be converted to utf-8 without loss. |
| 71 | // and output string is smaller |
| 72 | $string = recode_string($charset . "..utf-8",$string); |
| 73 | return htmlspecialchars($string); |
| 74 | } else { |
| 75 | $string = recode_string($charset . "..html",$string); |
| 76 | // recode does not convert single quote, htmlspecialchars does. |
| 77 | $string = str_replace("'", ''', $string); |
| 78 | return $string; |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | // iconv functions does not have html target and can be used only with utf-8 |
| 83 | if ( $use_php_iconv && $default_charset=='utf-8') { |
| 84 | $string = iconv($charset,$default_charset,$string); |
| 85 | return htmlspecialchars($string); |
| 86 | } |
| 87 | |
| 88 | // If we don't use recode and iconv, we'll do it old way. |
| 89 | |
| 90 | /* All HTML special characters are 7 bit and can be replaced first */ |
| 91 | |
| 92 | $string = htmlspecialchars ($string); |
| 93 | |
| 94 | /* controls cpu and memory intensive decoding cycles */ |
| 95 | if (! isset($agresive_decoding) || $agresive_decoding=="" ) { |
| 96 | $agresive_decoding=false; } |
| 97 | |
| 98 | $decode=fixcharset($charset); |
| 99 | $decodefile=SM_PATH . 'functions/decode/' . $decode . '.php'; |
| 100 | if (file_exists($decodefile)) { |
| 101 | include_once($decodefile); |
| 102 | $ret = call_user_func('charset_decode_'.$decode, $string); |
| 103 | } else { |
| 104 | $ret = $string; |
| 105 | } |
| 106 | return( $ret ); |
| 107 | } |
| 108 | |
| 109 | /** |
| 110 | * Makes charset name suitable for decoding cycles |
| 111 | * |
| 112 | * @param string $charset Name of charset |
| 113 | * @return string $charset Adjusted name of charset |
| 114 | */ |
| 115 | function fixcharset($charset) { |
| 116 | // minus removed from function names |
| 117 | $charset=str_replace('-','_',$charset); |
| 118 | |
| 119 | // windows-125x and cp125x charsets |
| 120 | $charset=str_replace('windows_','cp',$charset); |
| 121 | |
| 122 | // ibm > cp |
| 123 | $charset=str_replace('ibm','cp',$charset); |
| 124 | |
| 125 | // iso-8859-8-i -> iso-8859-8 |
| 126 | // use same cycle until I'll find differences |
| 127 | $charset=str_replace('iso_8859_8_i','iso_8859_8',$charset); |
| 128 | |
| 129 | return $charset; |
| 130 | } |
| 131 | |
| 132 | /** |
| 133 | * 8bit cleanup functions. |
| 134 | * |
| 135 | * Replaces all 8 bit characters from ISO-8859 character sets with '?' |
| 136 | * Legacy function used for unsupported ISO-8859 charsets |
| 137 | * |
| 138 | * @param string $string string that has to be cleaned |
| 139 | * @return string cleaned string |
| 140 | */ |
| 141 | function charset_decode_iso_8859_default ($string) { |
| 142 | return (strtr($string, "\240\241\242\243\244\245\246\247". |
| 143 | "\250\251\252\253\254\255\256\257". |
| 144 | "\260\261\262\263\264\265\266\267". |
| 145 | "\270\271\272\273\274\275\276\277". |
| 146 | "\300\301\302\303\304\305\306\307". |
| 147 | "\310\311\312\313\314\315\316\317". |
| 148 | "\320\321\322\323\324\325\326\327". |
| 149 | "\330\331\332\333\334\335\336\337". |
| 150 | "\340\341\342\343\344\345\346\347". |
| 151 | "\350\351\352\353\354\355\356\357". |
| 152 | "\360\361\362\363\364\365\366\367". |
| 153 | "\370\371\372\373\374\375\376\377", |
| 154 | "????????????????????????????????????????". |
| 155 | "????????????????????????????????????????". |
| 156 | "????????????????????????????????????????". |
| 157 | "????????")); |
| 158 | |
| 159 | } |
| 160 | |
| 161 | /** |
| 162 | * ns_4551_1 decoding function |
| 163 | * |
| 164 | * This is the same as ISO-646-NO and is used by some |
| 165 | * Microsoft programs when sending Norwegian characters |
| 166 | * |
| 167 | * @param string $string |
| 168 | * @return string |
| 169 | */ |
| 170 | function charset_decode_ns_4551_1 ($string) { |
| 171 | /* |
| 172 | * These characters are: |
| 173 | * Latin capital letter AE |
| 174 | * Latin capital letter O with stroke |
| 175 | * Latin capital letter A with ring above |
| 176 | * and the same as small letters |
| 177 | */ |
| 178 |