From c37a12f89d97de0689dfd643e1b774f42fa3c03c Mon Sep 17 00:00:00 2001 From: philippe_mingo Date: Mon, 4 Nov 2002 13:37:46 +0000 Subject: [PATCH] Tomas Kuliavas Updates iso-8859-4 and iso-8859-13 decode cycles to full charset support. Adds full windows-1257 charset decode support. Previous decode_lt_devel.diff patch is not needed. git-svn-id: https://svn.code.sf.net/p/squirrelmail/code/trunk/squirrelmail@4088 7612ce4b-ef26-0410-bec9-ea0150e637f0 --- functions/i18n.php | 488 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 410 insertions(+), 78 deletions(-) diff --git a/functions/i18n.php b/functions/i18n.php index 31363c16..1e7ddf05 100644 --- a/functions/i18n.php +++ b/functions/i18n.php @@ -51,6 +51,8 @@ function charset_decode ($charset, $string) { $ret = charset_decode_koi8r ($string); } else if ($charset == 'windows-1251') { $ret = charset_decode_koi8r ($string); + } else if ($charset == 'windows-1257') { + $ret = charset_decode_windows_1257 ($string); } else { $ret = $string; } @@ -408,49 +410,123 @@ function charset_decode_iso_8859_2 ($string) { } /* - iso-8859-4 is Baltic codeset used in some email clients - instead of iso-8859-13 in Lithuania - only Lithuanian charactes are added. + ISO/IEC 8859-4:1998 Latin Alphabet No. 4 */ function charset_decode_iso_8859_4 ($string) { - // latin capital a with ogonek - $string = str_replace ("\241", 'Ą', $string); - // latin capital c with caron - $string = str_replace ("\310", 'Č', $string); - // latin capital e with ogonek - $string = str_replace ("\312", 'Ę', $string); - // latin capital e with dot above - $string = str_replace ("\314", 'Ė', $string); - // latin capital i with ogonek - $string = str_replace ("\307", 'Į', $string); - // latin capital s with caron - $string = str_replace ("\251", 'Š', $string); - // latin capital u with ogonek - $string = str_replace ("\331", 'Ų', $string); - // latin capital u with macron - $string = str_replace ("\336", 'Ū', $string); - // latin capital z with caron - $string = str_replace ("\256", 'Ž', $string); - // latin small a with ogonek - $string = str_replace ("\261", 'ą', $string); - // latin small c with caron - $string = str_replace ("\350", 'č', $string); - // latin small e with ogonek - $string = str_replace ("\352", 'ę', $string); - // latin small e with dot above - $string = str_replace ("\354", 'ė', $string); - // latin small i with ogonek - $string = str_replace ("\347", 'į', $string); - // latin small s with caron - $string = str_replace ("\271", 'š', $string); - // latin small u with ogonek - $string = str_replace ("\371", 'ų', $string); - // latin small u with macron - $string = str_replace ("\376", 'ū', $string); - // latin small z with caron - $string = str_replace ("\276", 'ž', $string); + global $default_charset, $languages, $sm_notAlias; + if (strtolower($default_charset) == 'iso-8859-4') + return $string; + if (strtolower($languages[$sm_notAlias]['CHARSET']) == 'iso-8859-4') + return $string; + + /* Only do the slow convert if there are 8-bit characters */ + if (! ereg("[\200-\377]", $string)) + return $string; + + // latin capital letter a with ogonek 161 -> 260 + $string = str_replace ("\241", 'Ą', $string); + // latin small letter kra 162 -> 312 + $string = str_replace ("\242", 'ĸ', $string); + // latin capital letter r with cedilla 163 -> 342 + $string = str_replace ("\243", 'Ŗ', $string); + // latin capital letter i with tilde 165 -> 296 + $string = str_replace ("\245", 'Ĩ', $string); + // latin capital letter l with cedilla 166 -> 315 + $string = str_replace ("\246", 'Ļ', $string); + // latin capital letter s with caron 169 -> 352 + $string = str_replace ("\251", 'Š', $string); + // latin capital letter e with macron 170 -> 274 + $string = str_replace ("\252", 'Ē', $string); + // latin capital letter g with cedilla 171 -> 290 + $string = str_replace ("\253", 'Ģ', $string); + // latin capital letter t with stroke 172 -> 358 + $string = str_replace ("\254", 'Ŧ', $string); + // latin capital letter z with caron 174 -> 381 + $string = str_replace ("\256", 'Ž', $string); + // latin small letter a with ogonek 177 -> 261 + $string = str_replace ("\261", 'ą', $string); + // ogonek 178 -> 731 + $string = str_replace ("\262", '˛', $string); + // latin small letter r with cedilla 179 -> 343 + $string = str_replace ("\263", 'ŗ', $string); + // latin small letter i with tilde 181 -> 297 + $string = str_replace ("\265", 'ĩ', $string); + // latin small letter l with cedilla 182 -> 316 + $string = str_replace ("\266", 'ļ', $string); + // caron 183 -> 711 + $string = str_replace ("\267", 'ˇ', $string); + // latin small letter s with caron 185 -> 353 + $string = str_replace ("\271", 'š', $string); + // latin small letter e with macron 186 -> 275 + $string = str_replace ("\272", 'ē', $string); + // latin small letter g with cedilla 187 -> 291 + $string = str_replace ("\273", 'ģ', $string); + // latin small letter t with stroke 188 -> 359 + $string = str_replace ("\274", 'ŧ', $string); + // latin capital letter eng 189 -> 330 + $string = str_replace ("\275", 'Ŋ', $string); + // latin small letter z with caron 190 -> 382 + $string = str_replace ("\276", 'ž', $string); + // latin small letter eng 191 -> 331 + $string = str_replace ("\277", 'ŋ', $string); + // latin capital letter a with macron 192 -> 256 + $string = str_replace ("\300", 'Ā', $string); + // latin capital letter i with ogonek 199 -> 302 + $string = str_replace ("\307", 'Į', $string); + // latin capital letter c with caron 200 -> 268 + $string = str_replace ("\310", 'Č', $string); + // latin capital letter e with ogonek 202 -> 280 + $string = str_replace ("\312", 'Ę', $string); + // latin capital letter e with dot above 204 -> 278 + $string = str_replace ("\314", 'Ė', $string); + // latin capital letter i with macron 207 -> 298 + $string = str_replace ("\317", 'Ī', $string); + // latin capital letter d with stroke 208 -> 272 + $string = str_replace ("\320", 'Đ', $string); + // latin capital letter n with cedilla 209 -> 325 + $string = str_replace ("\321", 'Ņ', $string); + // latin capital letter o with macron 210 -> 332 + $string = str_replace ("\322", 'Ō', $string); + // latin capital letter k with cedilla 211 -> 310 + $string = str_replace ("\323", 'Ķ', $string); + // latin capital letter u with ogonek 217 -> 370 + $string = str_replace ("\331", 'Ų', $string); + // latin capital letter u with tilde 221 -> 360 + $string = str_replace ("\335", 'Ũ', $string); + // latin capital letter u with macron 222 -> 362 + $string = str_replace ("\336", 'Ū', $string); + // latin small letter a with macron 224 -> 257 + $string = str_replace ("\340", 'ā', $string); + // latin small letter i with ogonek 231 -> 303 + $string = str_replace ("\347", 'į', $string); + // latin small letter c with caron 232 -> 269 + $string = str_replace ("\350", 'č', $string); + // latin small letter e with ogonek 234 -> 281 + $string = str_replace ("\352", 'ę', $string); + // latin small letter e with dot above 236 -> 279 + $string = str_replace ("\354", 'ė', $string); + // latin small letter i with macron 239 -> 299 + $string = str_replace ("\357", 'ī', $string); + // latin small letter d with stroke 240 -> 273 + $string = str_replace ("\360", 'đ', $string); + // latin small letter n with cedilla 241 -> 326 + $string = str_replace ("\361", 'ņ', $string); + // latin small letter o with macron 242 -> 333 + $string = str_replace ("\362", 'ō', $string); + // latin small letter k with cedilla 243 -> 311 + $string = str_replace ("\363", 'ķ', $string); + // latin small letter u with ogonek 249 -> 371 + $string = str_replace ("\371", 'ų', $string); + // latin small letter u with tilde 253 -> 361 + $string = str_replace ("\375", 'ũ', $string); + // latin small letter u with macron 254 -> 363 + $string = str_replace ("\376", 'ū', $string); + // dot above 255 -> 729 + $string = str_replace ("\377", '˙', $string); + + // rest of charset is the same as ISO-8859-1 return (charset_decode_iso_8859_1($string)); } @@ -516,48 +592,134 @@ function charset_decode_iso_8859_7 ($string) { } /* - iso-8859-13 codeset used in Lithuania - only Lithuanian charactes are added. + ISO/IEC 8859-13:1998 Latin Alphabet No. 7 (Baltic Rim) */ - function charset_decode_iso_8859_13 ($string) { - // latin capital a with ogonek - $string = str_replace ("\300", 'Ą', $string); - // latin capital c with caron - $string = str_replace ("\310", 'Č', $string); - // latin capital e with ogonek - $string = str_replace ("\306", 'Ę', $string); - // latin capital e with dot above - $string = str_replace ("\313", 'Ė', $string); - // latin capital i with ogonek - $string = str_replace ("\301", 'Į', $string); - // latin capital s with caron - $string = str_replace ("\320", 'Š', $string); - // latin capital u with ogonek - $string = str_replace ("\330", 'Ų', $string); - // latin capital u with macron - $string = str_replace ("\333", 'Ū', $string); - // latin capital z with caron - $string = str_replace ("\336", 'Ž', $string); - // latin small a with ogonek - $string = str_replace ("\340", 'ą', $string); - // latin small c with caron - $string = str_replace ("\350", 'č', $string); - // latin small e with ogonek - $string = str_replace ("\346", 'ę', $string); - // latin small e with dot above - $string = str_replace ("\353", 'ė', $string); - // latin small i with ogonek - $string = str_replace ("\341", 'į', $string); - // latin small s with caron - $string = str_replace ("\360", 'š', $string); - // latin small u with ogonek - $string = str_replace ("\370", 'ų', $string); - // latin small u with macron - $string = str_replace ("\373", 'ū', $string); - // latin small z with caron - $string = str_replace ("\376", 'ž', $string); + global $default_charset, $languages, $sm_notAlias; + + if (strtolower($default_charset) == 'iso-8859-13') + return $string; + if (strtolower($languages[$sm_notAlias]['CHARSET']) == 'iso-8859-13') + return $string; + + /* Only do the slow convert if there are 8-bit characters */ + if (! ereg("[\200-\377]", $string)) + return $string; + // right double quotation mark 161 -> 8221 + $string = str_replace ("\241", '”', $string); + // double low-9 quotation mark 165 -> 8222 + $string = str_replace ("\245", '„', $string); + // latin capital letter o with stroke 168 -> 216 + $string = str_replace ("\250", 'Ø', $string); + // latin capital letter r with cedilla 170 -> 342 + $string = str_replace ("\252", 'Ŗ', $string); + // latin capital letter ae 175 -> 198 + $string = str_replace ("\257", 'Æ', $string); + // left double quotation mark 180 -> 8220 + $string = str_replace ("\264", '“', $string); + // latin small letter o with stroke 184 -> 248 + $string = str_replace ("\270", 'ø', $string); + // latin small letter r with cedilla 186 -> 343 + $string = str_replace ("\272", 'ŗ', $string); + // latin small letter ae 191 -> 230 + $string = str_replace ("\277", 'æ', $string); + // latin capital letter a with ogonek 192 -> 260 + $string = str_replace ("\300", 'Ą', $string); + // latin capital letter i with ogonek 193 -> 302 + $string = str_replace ("\301", 'Į', $string); + // latin capital letter a with macron 194 -> 256 + $string = str_replace ("\302", 'Ā', $string); + // latin capital letter c with acute 195 -> 262 + $string = str_replace ("\303", 'Ć', $string); + // latin capital letter e with ogonek 198 -> 280 + $string = str_replace ("\306", 'Ę', $string); + // latin capital letter e with macron 199 -> 274 + $string = str_replace ("\307", 'Ē', $string); + // latin capital letter c with caron 200 -> 268 + $string = str_replace ("\310", 'Č', $string); + // latin capital letter z with acute 202 -> 377 + $string = str_replace ("\312", 'Ź', $string); + // latin capital letter e with dot above 203 -> 278 + $string = str_replace ("\313", 'Ė', $string); + // latin capital letter g with cedilla 204 -> 290 + $string = str_replace ("\314", 'Ģ', $string); + // latin capital letter k with cedilla 205 -> 310 + $string = str_replace ("\315", 'Ķ', $string); + // latin capital letter i with macron 206 -> 298 + $string = str_replace ("\316", 'Ī', $string); + // latin capital letter l with cedilla 207 -> 315 + $string = str_replace ("\317", 'Ļ', $string); + // latin capital letter s with caron 208 -> 352 + $string = str_replace ("\320", 'Š', $string); + // latin capital letter n with acute 209 -> 323 + $string = str_replace ("\321", 'Ń', $string); + // latin capital letter n with cedilla 210 -> 325 + $string = str_replace ("\322", 'Ņ', $string); + // latin capital letter o with macron 212 -> 332 + $string = str_replace ("\324", 'Ō', $string); + // latin capital letter u with ogonek 216 -> 370 + $string = str_replace ("\330", 'Ų', $string); + // latin capital letter l with stroke 217 -> 321 + $string = str_replace ("\331", 'Ł', $string); + // latin capital letter s with acute 218 -> 346 + $string = str_replace ("\332", 'Ś', $string); + // latin capital letter u with macron 219 -> 362 + $string = str_replace ("\333", 'Ū', $string); + // latin capital letter z with dot above 221 -> 379 + $string = str_replace ("\335", 'Ż', $string); + // latin capital letter z with caron 222 -> 381 + $string = str_replace ("\336", 'Ž', $string); + // latin small letter a with ogonek 224 -> 261 + $string = str_replace ("\340", 'ą', $string); + // latin small letter i with ogonek 225 -> 303 + $string = str_replace ("\341", 'į', $string); + // latin small letter a with macron 226 -> 257 + $string = str_replace ("\342", 'ā', $string); + // latin small letter c with acute 227 -> 263 + $string = str_replace ("\343", 'ć', $string); + // latin small letter e with ogonek 230 -> 281 + $string = str_replace ("\346", 'ę', $string); + // latin small letter e with macron 231 -> 275 + $string = str_replace ("\347", 'ē', $string); + // latin small letter c with caron 232 -> 269 + $string = str_replace ("\350", 'č', $string); + // latin small letter z with acute 234 -> 378 + $string = str_replace ("\352", 'ź', $string); + // latin small letter e with dot above 235 -> 279 + $string = str_replace ("\353", 'ė', $string); + // latin small letter g with cedilla 236 -> 291 + $string = str_replace ("\354", 'ģ', $string); + // latin small letter k with cedilla 237 -> 311 + $string = str_replace ("\355", 'ķ', $string); + // latin small letter i with macron 238 -> 299 + $string = str_replace ("\356", 'ī', $string); + // latin small letter l with cedilla 239 -> 316 + $string = str_replace ("\357", 'ļ', $string); + // latin small letter s with caron 240 -> 253 + $string = str_replace ("\360", 'š', $string); + // latin small letter n with acute 241 -> 324 + $string = str_replace ("\361", 'ń', $string); + // latin small letter n with cedilla 242 -> 326 + $string = str_replace ("\362", 'ņ', $string); + // latin small letter o with macron 244 -> 333 + $string = str_replace ("\364", 'ō', $string); + // latin small letter u with ogonek 248 -> 371 + $string = str_replace ("\370", 'ų', $string); + // latin small letter l with stroke 249 -> 322 + $string = str_replace ("\371", 'ł', $string); + // latin small letter s with acute 250 -> 347 + $string = str_replace ("\372", 'ś', $string); + // latin small letter u with macron 251 -> 363 + $string = str_replace ("\373", 'ū', $string); + // latin small letter z with dot above 253 -> 380 + $string = str_replace ("\375", 'ż', $string); + // latin small letter z with caron 254 -> 382 + $string = str_replace ("\376", 'ž', $string); + // right single quotation mark 255 -> 8217 + $string = str_replace ("\377", '’', $string); + + // rest of charset is the same as ISO-8859-1 return (charset_decode_iso_8859_1($string)); } @@ -776,6 +938,176 @@ function charset_decode_koi8r ($string) { return $string; } +/* + windows-1257 (BaltRim) + */ +function charset_decode_windows_1257 ($string) { + global $default_charset, $languages, $sm_notAlias; + + if (strtolower($default_charset) == 'windows-1257') + return $string; + if (strtolower($languages[$sm_notAlias]['CHARSET']) == 'windows-1257') + return $string; + + /* Only do the slow convert if there are 8-bit characters */ + if (! ereg("[\200-\377]", $string)) + return $string; + + // Euro sign 128 -> 8364 + $string = str_replace("\200", '€', $string); + // Single low-9 quotation mark 130 -> 8218 + $string = str_replace("\202", '‚', $string); + // Double low-9 quotation mark 132 -> 8222 + $string = str_replace("\204", '„', $string); + // horizontal ellipsis 133 -> 8230 + $string = str_replace("\205", '…', $string); + // dagger 134 -> 8224 + $string = str_replace("\206", '†', $string); + // double dagger 135 -> 8225 + $string = str_replace("\207", '‡', $string); + // per mille sign 137 -> 8240 + $string = str_replace("\211", '‰', $string); + // single left-pointing angle quotation mark 139 -> 8249 + $string = str_replace("\213", '‹', $string); + // diaeresis 141 -> 168 + $string = str_replace("\215", '¨', $string); + // caron 142 -> 711 + $string = str_replace("\216", 'ˇ', $string); + // cedilla 143 -> 184 + $string = str_replace("\217", '¸', $string); + // left single quotation mark 145 -> 8216 + $string = str_replace("\221", '‘', $string); + // right single quotation mark 146 -> 8217 + $string = str_replace("\222", '’', $string); + // left double quotation mark 147 -> 8220 + $string = str_replace("\223", '“', $string); + // right double quotation mark 148 -> 8221 + $string = str_replace("\224", '”', $string); + // bullet 149 -> 8226 + $string = str_replace("\225", '•', $string); + // en dash 150 -> 8211 + $string = str_replace("\226", '–', $string); + // em dash 151 -> 8212 + $string = str_replace("\227", '—', $string); + // trade mark sign 153 -> 8482 + $string = str_replace("\231", '™', $string); + // single right-pointing angle quotation mark 155 -> 8250 + $string = str_replace("\233", '›', $string); + // macron 157 -> 175 + $string = str_replace("\235", '¯', $string); + // ogonek 158 -> 731 + $string = str_replace("\236", '˛', $string); + // latin capital letter o with stroke 168 -> 216 + $string = str_replace ("\250", 'Ø', $string); + // latin capital letter r with cedilla 170 -> 342 + $string = str_replace ("\252", 'Ŗ', $string); + // latin capital letter ae 175 -> 198 + $string = str_replace ("\257", 'Æ', $string); + // latin small letter o with stroke 184 -> 248 + $string = str_replace ("\270", 'ø', $string); + // latin small letter r with cedilla 186 -> 343 + $string = str_replace ("\272", 'ŗ', $string); + // latin small letter ae 191 -> 230 + $string = str_replace ("\277", 'æ', $string); + // latin capital letter a with ogonek 192 -> 260 + $string = str_replace ("\300", 'Ą', $string); + // latin capital letter i with ogonek 193 -> 302 + $string = str_replace ("\301", 'Į', $string); + // latin capital letter a with macron 194 -> 256 + $string = str_replace ("\302", 'Ā', $string); + // latin capital letter c with acute 195 -> 262 + $string = str_replace ("\303", 'Ć', $string); + // latin capital letter e with ogonek 198 -> 280 + $string = str_replace ("\306", 'Ę', $string); + // latin capital letter e with macron 199 -> 274 + $string = str_replace ("\307", 'Ē', $string); + // latin capital letter c with caron 200 -> 268 + $string = str_replace ("\310", 'Č', $string); + // latin capital letter z with acute 202 -> 377 + $string = str_replace ("\312", 'Ź', $string); + // latin capital letter e with dot above 203 -> 278 + $string = str_replace ("\313", 'Ė', $string); + // latin capital letter g with cedilla 204 -> 290 + $string = str_replace ("\314", 'Ģ', $string); + // latin capital letter k with cedilla 205 -> 310 + $string = str_replace ("\315", 'Ķ', $string); + // latin capital letter i with macron 206 -> 298 + $string = str_replace ("\316", 'Ī', $string); + // latin capital letter l with cedilla 207 -> 315 + $string = str_replace ("\317", 'Ļ', $string); + // latin capital letter s with caron 208 -> 352 + $string = str_replace ("\320", 'Š', $string); + // latin capital letter n with acute 209 -> 323 + $string = str_replace ("\321", 'Ń', $string); + // latin capital letter n with cedilla 210 -> 325 + $string = str_replace ("\322", 'Ņ', $string); + // latin capital letter o with macron 212 -> 332 + $string = str_replace ("\324", 'Ō', $string); + // latin capital letter u with ogonek 216 -> 370 + $string = str_replace ("\330", 'Ų', $string); + // latin capital letter l with stroke 217 -> 321 + $string = str_replace ("\331", 'Ł', $string); + // latin capital letter r with acute 218 -> 340 + $string = str_replace ("\332", 'Ŕ', $string); + // latin capital letter u with macron 219 -> 362 + $string = str_replace ("\333", 'Ū', $string); + // latin capital letter z with dot above 221 -> 379 + $string = str_replace ("\335", 'Ż', $string); + // latin capital letter z with caron 222 -> 381 + $string = str_replace ("\336", 'Ž', $string); + // latin small letter a with ogonek 224 -> 261 + $string = str_replace ("\340", 'ą', $string); + // latin small letter i with ogonek 225 -> 303 + $string = str_replace ("\341", 'į', $string); + // latin small letter a with macron 226 -> 257 + $string = str_replace ("\342", 'ā', $string); + // latin small letter c with acute 227 -> 263 + $string = str_replace ("\343", 'ć', $string); + // latin small letter e with ogonek 230 -> 281 + $string = str_replace ("\346", 'ę', $string); + // latin small letter e with macron 231 -> 275 + $string = str_replace ("\347", 'ē', $string); + // latin small letter c with caron 232 -> 269 + $string = str_replace ("\350", 'č', $string); + // latin small letter z with acute 234 -> 378 + $string = str_replace ("\352", 'ź', $string); + // latin small letter e with dot above 235 -> 279 + $string = str_replace ("\353", 'ė', $string); + // latin small letter g with cedilla 236 -> 291 + $string = str_replace ("\354", 'ģ', $string); + // latin small letter k with cedilla 237 -> 311 + $string = str_replace ("\355", 'ķ', $string); + // latin small letter i with macron 238 -> 299 + $string = str_replace ("\356", 'ī', $string); + // latin small letter l with cedilla 239 -> 316 + $string = str_replace ("\357", 'ļ', $string); + // latin small letter s with caron 240 -> 253 + $string = str_replace ("\360", 'š', $string); + // latin small letter n with acute 241 -> 324 + $string = str_replace ("\361", 'ń', $string); + // latin small letter n with cedilla 242 -> 326 + $string = str_replace ("\362", 'ņ', $string); + // latin small letter o with macron 244 -> 333 + $string = str_replace ("\364", 'ō', $string); + // latin small letter u with ogonek 248 -> 371 + $string = str_replace ("\370", 'ų', $string); + // latin small letter l with stroke 249 -> 322 + $string = str_replace ("\371", 'ł', $string); + // latin small letter s with acute 250 -> 347 + $string = str_replace ("\372", 'ś', $string); + // latin small letter u with macron 251 -> 363 + $string = str_replace ("\373", 'ū', $string); + // latin small letter z with dot above 253 -> 380 + $string = str_replace ("\375", 'ż', $string); + // latin small letter z with caron 254 -> 382 + $string = str_replace ("\376", 'ž', $string); + // dot above 255 -> 729 + $string = str_replace ("\377", '˙', $string); + + // Rest of charset is like iso-8859-1 + return (charset_decode_iso_8859_1($string)); +} + /* * Set up the language to be output -- 2.25.1