X-Git-Url: https://vcs.fsf.org/?a=blobdiff_plain;f=functions%2Fi18n.php;h=764188e7353f66b8fe367ff156737accc527b775;hb=969a0af6a44fbaea7d56f542f067277109546516;hp=c7b5618428955a26a6f485890e4a806687af45f9;hpb=e7ab8c9da136be7565e4b686841d95370b286896;p=squirrelmail.git diff --git a/functions/i18n.php b/functions/i18n.php index c7b56184..764188e7 100644 --- a/functions/i18n.php +++ b/functions/i18n.php @@ -3,7 +3,7 @@ /** * i18n.php * - * Copyright (c) 1999-2002 The SquirrelMail Project Team + * Copyright (c) 1999-2003 The SquirrelMail Project Team * Licensed under the GNU GPL. For full terms see the file COPYING. * * This file contains variuos functions that are needed to do @@ -15,6 +15,8 @@ * $Id$ */ +require_once(SM_PATH . 'functions/global.php'); + /* Decodes a string to the internal encoding from the given charset */ function charset_decode ($charset, $string) { global $languages, $squirrelmail_language; @@ -25,10 +27,13 @@ function charset_decode ($charset, $string) { } /* All HTML special characters are 7 bit and can be replaced first */ + $string = htmlspecialchars ($string); $charset = strtolower($charset); + set_my_charset() ; + if (ereg('iso-8859-([[:digit:]]+)', $charset, $res)) { if ($res[1] == '1') { $ret = charset_decode_iso_8859_1 ($string); @@ -36,8 +41,12 @@ function charset_decode ($charset, $string) { $ret = charset_decode_iso_8859_2 ($string); } else if ($res[1] == '4') { $ret = charset_decode_iso_8859_4 ($string); + } else if ($res[1] == '5') { + $ret = charset_decode_iso_8859_5 ($string); } else if ($res[1] == '7') { $ret = charset_decode_iso_8859_7 ($string); + } else if ($res[1] == '9') { + $ret = charset_decode_iso_8859_9 ($string); } else if ($res[1] == '13') { $ret = charset_decode_iso_8859_13 ($string); } else if ($res[1] == '15') { @@ -50,7 +59,15 @@ function charset_decode ($charset, $string) { } else if ($charset == 'koi8-r') { $ret = charset_decode_koi8r ($string); } else if ($charset == 'windows-1251') { - $ret = charset_decode_koi8r ($string); + $ret = charset_decode_windows_1251 ($string); + } else if ($charset == 'windows-1253') { + $ret = charset_decode_windows_1253 ($string); + } else if ($charset == 'windows-1254') { + $ret = charset_decode_windows_1254 ($string); + } else if ($charset == 'windows-1257') { + $ret = charset_decode_windows_1257 ($string); + } else if ($charset == 'utf-8') { + $ret = charset_decode_utf8 ($string); } else { $ret = $string; } @@ -408,49 +425,71 @@ function charset_decode_iso_8859_2 ($string) { } /* - iso-8859-4 is Baltic codeset used in some email clients - instead of iso-8859-13 in Lithuania - only Lithuanian charactes are added. + ISO/IEC 8859-4:1998 Latin Alphabet No. 4 */ function charset_decode_iso_8859_4 ($string) { - // latin capital a with ogonek - $string = str_replace ("\241", 'Ą', $string); - // latin capital c with caron - $string = str_replace ("\310", 'Č', $string); - // latin capital e with ogonek - $string = str_replace ("\312", 'Ę', $string); - // latin capital e with dot above - $string = str_replace ("\314", 'Ė', $string); - // latin capital i with ogonek - $string = str_replace ("\307", 'Į', $string); - // latin capital s with caron - $string = str_replace ("\251", 'Š', $string); - // latin capital u with ogonek - $string = str_replace ("\331", 'Ų', $string); - // latin capital u with macron - $string = str_replace ("\336", 'Ū', $string); - // latin capital z with caron - $string = str_replace ("\256", 'Ž', $string); - // latin small a with ogonek - $string = str_replace ("\261", 'ą', $string); - // latin small c with caron - $string = str_replace ("\350", 'č', $string); - // latin small e with ogonek - $string = str_replace ("\352", 'ę', $string); - // latin small e with dot above - $string = str_replace ("\354", 'ė', $string); - // latin small i with ogonek - $string = str_replace ("\347", 'į', $string); - // latin small s with caron - $string = str_replace ("\271", 'š', $string); - // latin small u with ogonek - $string = str_replace ("\371", 'ų', $string); - // latin small u with macron - $string = str_replace ("\376", 'ū', $string); - // latin small z with caron - $string = str_replace ("\276", 'ž', $string); + global $default_charset; + + if (strtolower($default_charset) == 'iso-8859-4') + return $string; + /* Only do the slow convert if there are 8-bit characters */ + if (! ereg("[\200-\377]", $string)) + return $string; + + $string = str_replace ("\241", 'Ą', $string); + $string = str_replace ("\242", 'ĸ', $string); + $string = str_replace ("\243", 'Ŗ', $string); + $string = str_replace ("\245", 'Ĩ', $string); + $string = str_replace ("\246", 'Ļ', $string); + $string = str_replace ("\251", 'Š', $string); + $string = str_replace ("\252", 'Ē', $string); + $string = str_replace ("\253", 'Ģ', $string); + $string = str_replace ("\254", 'Ŧ', $string); + $string = str_replace ("\256", 'Ž', $string); + $string = str_replace ("\261", 'ą', $string); + $string = str_replace ("\262", '˛', $string); + $string = str_replace ("\263", 'ŗ', $string); + $string = str_replace ("\265", 'ĩ', $string); + $string = str_replace ("\266", 'ļ', $string); + $string = str_replace ("\267", 'ˇ', $string); + $string = str_replace ("\271", 'š', $string); + $string = str_replace ("\272", 'ē', $string); + $string = str_replace ("\273", 'ģ', $string); + $string = str_replace ("\274", 'ŧ', $string); + $string = str_replace ("\275", 'Ŋ', $string); + $string = str_replace ("\276", 'ž', $string); + $string = str_replace ("\277", 'ŋ', $string); + $string = str_replace ("\300", 'Ā', $string); + $string = str_replace ("\307", 'Į', $string); + $string = str_replace ("\310", 'Č', $string); + $string = str_replace ("\312", 'Ę', $string); + $string = str_replace ("\314", 'Ė', $string); + $string = str_replace ("\317", 'Ī', $string); + $string = str_replace ("\320", 'Đ', $string); + $string = str_replace ("\321", 'Ņ', $string); + $string = str_replace ("\322", 'Ō', $string); + $string = str_replace ("\323", 'Ķ', $string); + $string = str_replace ("\331", 'Ų', $string); + $string = str_replace ("\335", 'Ũ', $string); + $string = str_replace ("\336", 'Ū', $string); + $string = str_replace ("\340", 'ā', $string); + $string = str_replace ("\347", 'į', $string); + $string = str_replace ("\350", 'č', $string); + $string = str_replace ("\352", 'ę', $string); + $string = str_replace ("\354", 'ė', $string); + $string = str_replace ("\357", 'ī', $string); + $string = str_replace ("\360", 'đ', $string); + $string = str_replace ("\361", 'ņ', $string); + $string = str_replace ("\362", 'ō', $string); + $string = str_replace ("\363", 'ķ', $string); + $string = str_replace ("\371", 'ų', $string); + $string = str_replace ("\375", 'ũ', $string); + $string = str_replace ("\376", 'ū', $string); + $string = str_replace ("\377", '˙', $string); + + // rest of charset is the same as ISO-8859-1 return (charset_decode_iso_8859_1($string)); } @@ -492,7 +531,7 @@ function charset_decode_iso_8859_7 ($string) { * ISO-8859-7 characters from 11/04 (0xB4) to 11/06 (0xB6) * These are Unicode 900-902 */ - $string = preg_replace("/([\264-\266])/","'&#' . (ord(\\1)+720)",$string); + $string = preg_replace("/([\264-\266])/e","'&#' . (ord('\\1')+720);",$string); /* 11/07 (0xB7) Middle dot is the same in iso-8859-1 */ $string = str_replace("\267", '·', $string); @@ -501,7 +540,7 @@ function charset_decode_iso_8859_7 ($string) { * ISO-8859-7 characters from 11/08 (0xB8) to 11/10 (0xBA) * These are Unicode 900-902 */ - $string = preg_replace("/([\270-\272])/","'&#' . (ord(\\1)+720)",$string); + $string = preg_replace("/([\270-\272])/e","'&#' . (ord('\\1')+720);",$string); /* * 11/11 (0xBB) Right angle quotation mark is the same as in @@ -510,54 +549,114 @@ function charset_decode_iso_8859_7 ($string) { $string = str_replace("\273", '»', $string); /* And now the rest of the charset */ - $string = preg_replace("/([\274-\376])/","'&#' . (ord(\\1)+720)",$string); + $string = preg_replace("/([\274-\376])/e","'&#'.(ord('\\1')+720);",$string); return $string; } /* - iso-8859-13 codeset used in Lithuania - only Lithuanian charactes are added. + ISOIEC 8859-9:1999 Latin Alphabet No. 5 + */ +function charset_decode_iso_8859_9 ($string) { + global $default_charset; + + if (strtolower($default_charset) == 'iso-8859-9') + return $string; + + /* Only do the slow convert if there are 8-bit characters */ + if (! ereg("[\200-\377]", $string)) + return $string; + + // latin capital letter g with breve 208->286 + $string = str_replace("\320", 'Ğ', $string); + // latin capital letter i with dot above 221->304 + $string = str_replace("\335", 'İ', $string); + // latin capital letter s with cedilla 222->350 + $string = str_replace("\336", 'Ş', $string); + // latin small letter g with breve 240->287 + $string = str_replace("\360", 'ğ', $string); + // latin small letter dotless i 253->305 + $string = str_replace("\375", 'ı', $string); + // latin small letter s with cedilla 254->351 + $string = str_replace("\376", 'ş', $string); + + // rest of charset is the same as ISO-8859-1 + return (charset_decode_iso_8859_1($string)); +} + +/* + ISO/IEC 8859-13:1998 Latin Alphabet No. 7 (Baltic Rim) +*/ function charset_decode_iso_8859_13 ($string) { - // latin capital a with ogonek - $string = str_replace ("\300", 'Ą', $string); - // latin capital c with caron - $string = str_replace ("\310", 'Č', $string); - // latin capital e with ogonek - $string = str_replace ("\306", 'Ę', $string); - // latin capital e with dot above - $string = str_replace ("\313", 'Ė', $string); - // latin capital i with ogonek - $string = str_replace ("\301", 'Į', $string); - // latin capital s with caron - $string = str_replace ("\320", 'Š', $string); - // latin capital u with ogonek - $string = str_replace ("\330", 'Ų', $string); - // latin capital u with macron - $string = str_replace ("\333", 'Ū', $string); - // latin capital z with caron - $string = str_replace ("\336", 'Ž', $string); - // latin small a with ogonek - $string = str_replace ("\340", 'ą', $string); - // latin small c with caron - $string = str_replace ("\350", 'č', $string); - // latin small e with ogonek - $string = str_replace ("\346", 'ę', $string); - // latin small e with dot above - $string = str_replace ("\353", 'ė', $string); - // latin small i with ogonek - $string = str_replace ("\341", 'į', $string); - // latin small s with caron - $string = str_replace ("\360", 'š', $string); - // latin small u with ogonek - $string = str_replace ("\370", 'ų', $string); - // latin small u with macron - $string = str_replace ("\373", 'ū', $string); - // latin small z with caron - $string = str_replace ("\376", 'ž', $string); + global $default_charset; + + if (strtolower($default_charset) == 'iso-8859-13') + return $string; + /* Only do the slow convert if there are 8-bit characters */ + if (! ereg("[\200-\377]", $string)) + return $string; + + $string = str_replace ("\241", '”', $string); + $string = str_replace ("\245", '„', $string); + $string = str_replace ("\250", 'Ø', $string); + $string = str_replace ("\252", 'Ŗ', $string); + $string = str_replace ("\257", 'Æ', $string); + $string = str_replace ("\264", '“', $string); + $string = str_replace ("\270", 'ø', $string); + $string = str_replace ("\272", 'ŗ', $string); + $string = str_replace ("\277", 'æ', $string); + $string = str_replace ("\300", 'Ą', $string); + $string = str_replace ("\301", 'Į', $string); + $string = str_replace ("\302", 'Ā', $string); + $string = str_replace ("\303", 'Ć', $string); + $string = str_replace ("\306", 'Ę', $string); + $string = str_replace ("\307", 'Ē', $string); + $string = str_replace ("\310", 'Č', $string); + $string = str_replace ("\312", 'Ź', $string); + $string = str_replace ("\313", 'Ė', $string); + $string = str_replace ("\314", 'Ģ', $string); + $string = str_replace ("\315", 'Ķ', $string); + $string = str_replace ("\316", 'Ī', $string); + $string = str_replace ("\317", 'Ļ', $string); + $string = str_replace ("\320", 'Š', $string); + $string = str_replace ("\321", 'Ń', $string); + $string = str_replace ("\322", 'Ņ', $string); + $string = str_replace ("\324", 'Ō', $string); + $string = str_replace ("\330", 'Ų', $string); + $string = str_replace ("\331", 'Ł', $string); + $string = str_replace ("\332", 'Ś', $string); + $string = str_replace ("\333", 'Ū', $string); + $string = str_replace ("\335", 'Ż', $string); + $string = str_replace ("\336", 'Ž', $string); + $string = str_replace ("\340", 'ą', $string); + $string = str_replace ("\341", 'į', $string); + $string = str_replace ("\342", 'ā', $string); + $string = str_replace ("\343", 'ć', $string); + $string = str_replace ("\346", 'ę', $string); + $string = str_replace ("\347", 'ē', $string); + $string = str_replace ("\350", 'č', $string); + $string = str_replace ("\352", 'ź', $string); + $string = str_replace ("\353", 'ė', $string); + $string = str_replace ("\354", 'ģ', $string); + $string = str_replace ("\355", 'ķ', $string); + $string = str_replace ("\356", 'ī', $string); + $string = str_replace ("\357", 'ļ', $string); + $string = str_replace ("\360", 'š', $string); + $string = str_replace ("\361", 'ń', $string); + $string = str_replace ("\362", 'ņ', $string); + $string = str_replace ("\364", 'ō', $string); + $string = str_replace ("\370", 'ų', $string); + $string = str_replace ("\371", 'ł', $string); + $string = str_replace ("\372", 'ś', $string); + $string = str_replace ("\373", 'ū', $string); + $string = str_replace ("\375", 'ż', $string); + $string = str_replace ("\376", 'ž', $string); + $string = str_replace ("\377", '’', $string); + + // rest of charset is the same as ISO-8859-1 return (charset_decode_iso_8859_1($string)); } @@ -776,6 +875,234 @@ function charset_decode_koi8r ($string) { return $string; } +/* + windows-1254 (Turks) + */ +function charset_decode_windows_1254 ($string) { + global $default_charset; + + if (strtolower($default_charset) == 'windows-1254') + return $string; + + /* Only do the slow convert if there are 8-bit characters */ + if (! ereg("[\200-\377]", $string)) + return $string; + + // Euro sign 128 -> 8364 + $string = str_replace("\200", '€', $string); + // Single low-9 quotation mark 130 -> 8218 + $string = str_replace("\202", '‚', $string); + // latin small letter f with hook 131 -> 402 + $string = str_replace("\203", 'ƒ', $string); + // Double low-9 quotation mark 132 -> 8222 + $string = str_replace("\204", '„', $string); + // horizontal ellipsis 133 -> 8230 + $string = str_replace("\205", '…', $string); + // dagger 134 -> 8224 + $string = str_replace("\206", '†', $string); + // double dagger 135 -> 8225 + $string = str_replace("\207", '‡', $string); + // modifier letter circumflex accent 136->710 + $string = str_replace("\210", 'ˆ', $string); + // per mille sign 137 -> 8240 + $string = str_replace("\211", '‰', $string); + // latin capital letter s with caron 138 -> 352 + $string = str_replace("\212", 'Š', $string); + // single left-pointing angle quotation mark 139 -> 8249 + $string = str_replace("\213", '‹', $string); + // latin capital ligature oe 140 -> 338 + $string = str_replace("\214", 'Œ', $string); + // left single quotation mark 145 -> 8216 + $string = str_replace("\221", '‘', $string); + // right single quotation mark 146 -> 8217 + $string = str_replace("\222", '’', $string); + // left double quotation mark 147 -> 8220 + $string = str_replace("\223", '“', $string); + // right double quotation mark 148 -> 8221 + $string = str_replace("\224", '”', $string); + // bullet 149 -> 8226 + $string = str_replace("\225", '•', $string); + // en dash 150 -> 8211 + $string = str_replace("\226", '–', $string); + // em dash 151 -> 8212 + $string = str_replace("\227", '—', $string); + // small tilde 152 -> 732 + $string = str_replace("\230", '˜', $string); + // trade mark sign 153 -> 8482 + $string = str_replace("\231", '™', $string); + // latin small letter s with caron 154 -> 353 + $string = str_replace("\232", 'š', $string); + // single right-pointing angle quotation mark 155 -> 8250 + $string = str_replace("\233", '›', $string); + // latin small ligature oe 156 -> 339 + $string = str_replace("\234", 'œ', $string); + // latin capital letter y with diaresis 159->376 + $string = str_replace("\237", 'Ÿ', $string); + // latin capital letter g with breve 208->286 + $string = str_replace("\320", 'Ğ', $string); + // latin capital letter i with dot above 221->304 + $string = str_replace("\335", 'İ', $string); + // latin capital letter s with cedilla 222->350 + $string = str_replace("\336", 'Ş', $string); + // latin small letter g with breve 240->287 + $string = str_replace("\360", 'ğ', $string); + // latin small letter dotless i 253->305 + $string = str_replace("\375", 'ı', $string); + // latin small letter s with cedilla 254->351 + $string = str_replace("\376", 'ş', $string); + + // Rest of charset is like iso-8859-1 + return (charset_decode_iso_8859_1($string)); +} + +/* + windows-1253 (Greek) + */ +function charset_decode_windows_1253 ($string) { + global $default_charset; + + if (strtolower($default_charset) == 'windows-1253') + return $string; + + /* Only do the slow convert if there are 8-bit characters */ + if (! ereg("[\200-\377]", $string)) + return $string; + + $string = str_replace("\200", '€', $string); + $string = str_replace("\202", '‚', $string); + $string = str_replace("\203", 'ƒ', $string); + $string = str_replace("\204", '„', $string); + $string = str_replace("\205", '…', $string); + $string = str_replace("\206", '†', $string); + $string = str_replace("\207", '‡', $string); + $string = str_replace("\211", '‰', $string); + $string = str_replace("\213", '‹', $string); + $string = str_replace("\221", '‘', $string); + $string = str_replace("\222", '’', $string); + $string = str_replace("\223", '“', $string); + $string = str_replace("\224", '”', $string); + $string = str_replace("\225", '•', $string); + $string = str_replace("\226", '–', $string); + $string = str_replace("\227", '—', $string); + $string = str_replace("\231", '™', $string); + $string = str_replace("\233", '›', $string); + $string = str_replace("\241", '΅', $string); + $string = str_replace("\242", 'Ά', $string); + $string = str_replace ("\257", '―', $string); + $string = str_replace("\264", '΄', $string); + $string = str_replace("\270", 'Έ', $string); + $string = str_replace ("\271", 'Ή', $string); + $string = str_replace ("\272", 'Ί', $string); + $string = str_replace ("\274", 'Ό', $string); + // cycle for 190-254 symbols + $string = preg_replace("/([\274-\376])/e","'&#' . (ord('\\1')+720);",$string); + + // Rest of charset is like iso-8859-1 + return (charset_decode_iso_8859_1($string)); +} + + +/* + windows-1257 (BaltRim) + */ +function charset_decode_windows_1257 ($string) { + global $default_charset; + + if (strtolower($default_charset) == 'windows-1257') + return $string; + + /* Only do the slow convert if there are 8-bit characters */ + if (! ereg("[\200-\377]", $string)) + return $string; + + $string = str_replace("\200", '€', $string); + $string = str_replace("\202", '‚', $string); + $string = str_replace("\204", '„', $string); + $string = str_replace("\205", '…', $string); + $string = str_replace("\206", '†', $string); + $string = str_replace("\207", '‡', $string); + $string = str_replace("\211", '‰', $string); + $string = str_replace("\213", '‹', $string); + $string = str_replace("\215", '¨', $string); + $string = str_replace("\216", 'ˇ', $string); + $string = str_replace("\217", '¸', $string); + $string = str_replace("\221", '‘', $string); + $string = str_replace("\222", '’', $string); + $string = str_replace("\223", '“', $string); + $string = str_replace("\224", '”', $string); + $string = str_replace("\225", '•', $string); + $string = str_replace("\226", '–', $string); + $string = str_replace("\227", '—', $string); + $string = str_replace("\231", '™', $string); + $string = str_replace("\233", '›', $string); + $string = str_replace("\235", '¯', $string); + $string = str_replace("\236", '˛', $string); + $string = str_replace ("\250", 'Ø', $string); + $string = str_replace ("\252", 'Ŗ', $string); + $string = str_replace ("\257", 'Æ', $string); + $string = str_replace ("\270", 'ø', $string); + $string = str_replace ("\272", 'ŗ', $string); + $string = str_replace ("\277", 'æ', $string); + $string = str_replace ("\300", 'Ą', $string); + $string = str_replace ("\301", 'Į', $string); + $string = str_replace ("\302", 'Ā', $string); + $string = str_replace ("\303", 'Ć', $string); + $string = str_replace ("\306", 'Ę', $string); + $string = str_replace ("\307", 'Ē', $string); + $string = str_replace ("\310", 'Č', $string); + $string = str_replace ("\312", 'Ź', $string); + $string = str_replace ("\313", 'Ė', $string); + $string = str_replace ("\314", 'Ģ', $string); + $string = str_replace ("\315", 'Ķ', $string); + $string = str_replace ("\316", 'Ī', $string); + $string = str_replace ("\317", 'Ļ', $string); + $string = str_replace ("\320", 'Š', $string); + $string = str_replace ("\321", 'Ń', $string); + $string = str_replace ("\322", 'Ņ', $string); + $string = str_replace ("\324", 'Ō', $string); + $string = str_replace ("\330", 'Ų', $string); + $string = str_replace ("\331", 'Ł', $string); + $string = str_replace ("\332", 'Ŕ', $string); + $string = str_replace ("\333", 'Ū', $string); + $string = str_replace ("\335", 'Ż', $string); + $string = str_replace ("\336", 'Ž', $string); + $string = str_replace ("\340", 'ą', $string); + $string = str_replace ("\341", 'į', $string); + $string = str_replace ("\342", 'ā', $string); + $string = str_replace ("\343", 'ć', $string); + $string = str_replace ("\346", 'ę', $string); + $string = str_replace ("\347", 'ē', $string); + $string = str_replace ("\350", 'č', $string); + $string = str_replace ("\352", 'ź', $string); + $string = str_replace ("\353", 'ė', $string); + $string = str_replace ("\354", 'ģ', $string); + $string = str_replace ("\355", 'ķ', $string); + $string = str_replace ("\356", 'ī', $string); + $string = str_replace ("\357", 'ļ', $string); + $string = str_replace ("\360", 'š', $string); + $string = str_replace ("\361", 'ń', $string); + $string = str_replace ("\362", 'ņ', $string); + $string = str_replace ("\364", 'ō', $string); + $string = str_replace ("\370", 'ų', $string); + $string = str_replace ("\371", 'ł', $string); + $string = str_replace ("\372", 'ś', $string); + $string = str_replace ("\373", 'ū', $string); + $string = str_replace ("\375", 'ż', $string); + $string = str_replace ("\376", 'ž', $string); + $string = str_replace ("\377", '˙', $string); + + // Rest of charset is like iso-8859-1 + return (charset_decode_iso_8859_1($string)); +} + +/* windows-1251 is Microsoft Cyrillic encoding */ +function charset_decode_windows_1251 ($string) { + // Convert to KOI8-R, then return this decoded. + $string = convert_cyr_string($string, 'w', 'k'); + return charset_decode_koi8r($string); +} + + /* * Set up the language to be output @@ -785,17 +1112,19 @@ function charset_decode_koi8r ($string) { function set_up_language($sm_language, $do_search = false) { static $SetupAlready = 0; - global $HTTP_ACCEPT_LANGUAGE, $use_gettext, $languages, + global $use_gettext, $languages, $squirrelmail_language, $squirrelmail_default_language, $sm_notAlias; if ($SetupAlready) { return; } + $SetupAlready = TRUE; + sqgetGlobalVar('HTTP_ACCEPT_LANGUAGE', $accept_lang, SQ_SERVER); - if ($do_search && ! $sm_language && isset($HTTP_ACCEPT_LANGUAGE)) { - $sm_language = substr($HTTP_ACCEPT_LANGUAGE, 0, 2); + if ($do_search && ! $sm_language && isset($accept_lang)) { + $sm_language = substr($accept_lang, 0, 2); } if (!$sm_language && isset($squirrelmail_default_language)) { @@ -811,8 +1140,11 @@ function set_up_language($sm_language, $do_search = false) { $use_gettext && $sm_language != '' && isset($languages[$sm_notAlias]['CHARSET']) ) { - bindtextdomain( 'squirrelmail', '../locale/' ); + bindtextdomain( 'squirrelmail', SM_PATH . 'locale/' ); textdomain( 'squirrelmail' ); + if (function_exists('bind_textdomain_codeset')) { + bind_textdomain_codeset ("squirrelmail", $languages[$sm_notAlias]['CHARSET'] ); + } if ( !ini_get('safe_mode') && getenv( 'LC_ALL' ) != $sm_notAlias ) { putenv( "LC_ALL=$sm_notAlias" ); @@ -824,7 +1156,10 @@ function set_up_language($sm_language, $do_search = false) { if ($squirrelmail_language == 'ja_JP' && function_exists('mb_detect_encoding') ) { header ('Content-Type: text/html; charset=EUC-JP'); if (!function_exists('mb_internal_encoding')) { - echo _("You need to have php4 installed with the multibyte string function enabled (using configure option --with-mbstring)."); + echo _("You need to have php4 installed with the multibyte string function enabled (using configure option --enable-mbstring)."); + } + if (function_exists('mb_language')) { + mb_language('Japanese'); } mb_internal_encoding('EUC-JP'); mb_http_output('pass'); @@ -845,11 +1180,11 @@ function set_my_charset(){ * selection. This is "more right" (tm), than just stamping the * message blindly with the system-wide $default_charset. */ - global $data_dir, $username, $default_charset, $languages; + global $data_dir, $username, $default_charset, $languages, $squirrelmail_default_language; $my_language = getPref($data_dir, $username, 'language'); if (!$my_language) { - return; + $my_language = $squirrelmail_default_language ; } while (isset($languages[$my_language]['ALIAS'])) { $my_language = $languages[$my_language]['ALIAS']; @@ -893,6 +1228,10 @@ $languages['de']['ALIAS'] = 'de_DE'; // There is no en_EN! There is en_US, en_BR, en_AU, and so forth, // but who cares about !US, right? Right? :) +$languages['el_GR']['NAME'] = 'Greek'; +$languages['el_GR']['CHARSET'] = 'iso-8859-7'; +$languages['el']['ALIAS'] = 'el_GR'; + $languages['en_US']['NAME'] = 'English'; $languages['en_US']['CHARSET'] = 'iso-8859-1'; $languages['en']['ALIAS'] = 'en_US'; @@ -979,13 +1318,13 @@ $languages['tr_TR']['NAME'] = 'Turkish'; $languages['tr_TR']['CHARSET'] = 'iso-8859-9'; $languages['tr']['ALIAS'] = 'tr_TR'; -$languages['zh_TW']['NAME'] = 'Taiwan'; +$languages['zh_TW']['NAME'] = 'Chinese Trad'; $languages['zh_TW']['CHARSET'] = 'big5'; $languages['tw']['ALIAS'] = 'zh_TW'; -$languages['zh_TW']['NAME'] = 'Chinese'; -$languages['zh_TW']['CHARSET'] = 'gb2312'; -$languages['tw']['ALIAS'] = 'zh_CN'; +$languages['zh_CN']['NAME'] = 'Chinese Simp'; +$languages['zh_CN']['CHARSET'] = 'gb2312'; +$languages['cn']['ALIAS'] = 'zh_CN'; $languages['sk_SK']['NAME'] = 'Slovak'; $languages['sk_SK']['CHARSET'] = 'iso-8859-2'; @@ -1000,7 +1339,7 @@ $languages['th_TH']['CHARSET'] = 'tis-620'; $languages['th']['ALIAS'] = 'th_TH'; $languages['lt_LT']['NAME'] = 'Lithuanian'; -$languages['lt_LT']['CHARSET'] = 'iso-8859-13'; +$languages['lt_LT']['CHARSET'] = 'windows-1257'; $languages['lt']['ALIAS'] = 'lt_LT'; $languages['sl_SI']['NAME'] = 'Slovenian'; @@ -1011,13 +1350,29 @@ $languages['bg_BG']['NAME'] = 'Bulgarian'; $languages['bg_BG']['CHARSET'] = 'windows-1251'; $languages['bg']['ALIAS'] = 'bg_BG'; +$languages['uk_UA']['NAME'] = 'Ukrainian'; +$languages['uk_UA']['CHARSET'] = 'koi8-u'; +$languages['uk']['ALIAS'] = 'uk_UA'; + +$languages['cy_GB']['NAME'] = 'Welsh'; +$languages['cy_GB']['CHARSET'] = 'iso-8859-1'; +$languages['cy']['ALIAS'] = 'cy_GB'; + + // Right to left languages -$languages['he_HE']['NAME'] = 'Hebrew'; -$languages['he_HE']['CHARSET'] = 'windows-1255'; -$languages['he_HE']['DIR'] = 'rtl'; -$languages['he']['ALIAS'] = 'he_HE'; +$languages['ar']['NAME'] = 'Arabic'; +$languages['ar']['CHARSET'] = 'windows-1256'; +$languages['ar']['DIR'] = 'rtl'; + +$languages['he_IL']['NAME'] = 'Hebrew'; +$languages['he_IL']['CHARSET'] = 'windows-1255'; +$languages['he_IL']['DIR'] = 'rtl'; +$languages['he']['ALIAS'] = 'he_IL'; +$languages['vi_VN']['NAME'] = 'Vietnamese'; +$languages['vi_VN']['CHARSET'] = 'utf-8'; +$languages['vi']['ALIAS'] = 'vi_VN'; /* Detect whether gettext is installed. */ $gettext_flags = 0; @@ -1058,6 +1413,39 @@ elseif ($gettext_flags == 0) { } } +function charset_decode_utf8 ($string) { +/* + Every decoded character consists of n bytes. First byte is octal + 300-375, other bytes - always octals 200-277. + + \a\b characters are decoded to html code octdec(a-300)*64 + octdec(b-200) + \a\b\c characters are decoded to html code octdec(a-340)*64*64 + octdec(b-200)*64 + octdec(c-200) + + decoding cycle is unfinished. please test and report problems to tokul@users.sourceforge.net +*/ + global $default_charset, $languages, $sm_notAlias; + + if (strtolower($default_charset) == 'utf-8') + return $string; + if (strtolower($languages[$sm_notAlias]['CHARSET']) == 'utf-8') + return $string; + + /* Only do the slow convert if there are 8-bit characters */ + if (! ereg("[\200-\377]", $string)) + return $string; + + // decode three byte unicode characters + $string = preg_replace("/([\340-\357])([\200-\277])([\200-\277])/e", + "'&#'.((ord('\\1')-224)*4096+(ord('\\2')-128)*64+(ord('\\3')-128)).';'", + $string); + + // decode two byte unicode characters + $string = preg_replace("/([\300-\337])([\200-\277])/e", + "'&#'.((ord('\\1')-192)*64+(ord('\\2')-128)).';'", + $string); + + return $string; +} /* * Japanese charset extra function @@ -1068,21 +1456,23 @@ function japanese_charset_xtra() { if (function_exists('mb_detect_encoding')) { switch (func_get_arg(0)) { /* action */ case 'decode': - $detect_encoding = mb_detect_encoding($ret); + $detect_encoding = @mb_detect_encoding($ret); if ($detect_encoding == 'JIS' || $detect_encoding == 'EUC-JP' || - $detect_encoding == 'SJIS') { + $detect_encoding == 'SJIS' || + $detect_encoding == 'UTF-8') { - $ret = mb_convert_encoding($ret, 'EUC-JP', 'AUTO'); + $ret = mb_convert_kana(mb_convert_encoding($ret, 'EUC-JP', 'AUTO'), "KV"); } break; case 'encode': - $detect_encoding = mb_detect_encoding($ret); + $detect_encoding = @mb_detect_encoding($ret); if ($detect_encoding == 'JIS' || $detect_encoding == 'EUC-JP' || - $detect_encoding == 'SJIS') { + $detect_encoding == 'SJIS' || + $detect_encoding == 'UTF-8') { - $ret = mb_convert_encoding($ret, 'JIS', 'AUTO'); + $ret = mb_convert_encoding(mb_convert_kana($ret, "KV"), 'JIS', 'AUTO'); } break; case 'strimwidth': @@ -1102,7 +1492,8 @@ function japanese_charset_xtra() { if ($prevcsize == 1) { $result .= $tmpstr; } else { - $result .= mb_encode_mimeheader($tmpstr); + $result .= str_replace(' ', '', + mb_encode_mimeheader($tmpstr,'iso-2022-jp','B','')); } $tmpstr = $tmp; $prevcsize = strlen($tmp); @@ -1112,17 +1503,17 @@ function japanese_charset_xtra() { if (strlen(mb_substr($tmpstr, 0, 1)) == 1) $result .= $tmpstr; else - $result .= mb_encode_mimeheader($tmpstr); + $result .= str_replace(' ', '', + mb_encode_mimeheader($tmpstr,'iso-2022-jp','B','')); } } $ret = $result; - //$ret = mb_encode_mimeheader($ret); break; case 'decodeheader': $ret = str_replace("\t", "", $ret); if (eregi('=\\?([^?]+)\\?(q|b)\\?([^?]+)\\?=', $ret)) - $ret = mb_decode_mimeheader($ret); - $ret = mb_convert_encoding($ret, 'EUC-JP', 'AUTO'); + $ret = @mb_decode_mimeheader($ret); + $ret = @mb_convert_encoding($ret, 'EUC-JP', 'AUTO'); break; case 'downloadfilename': $useragent = func_get_arg(2); @@ -1133,6 +1524,54 @@ function japanese_charset_xtra() { $ret = mb_convert_encoding($ret, 'EUC-JP', 'AUTO'); } break; + case 'wordwrap': + $no_begin = "\x21\x25\x29\x2c\x2e\x3a\x3b\x3f\x5d\x7d\xa1\xf1\xa1\xeb\xa1" . + "\xc7\xa1\xc9\xa2\xf3\xa1\xec\xa1\xed\xa1\xee\xa1\xa2\xa1\xa3\xa1\xb9" . + "\xa1\xd3\xa1\xd5\xa1\xd7\xa1\xd9\xa1\xdb\xa1\xcd\xa4\xa1\xa4\xa3\xa4" . + "\xa5\xa4\xa7\xa4\xa9\xa4\xc3\xa4\xe3\xa4\xe5\xa4\xe7\xa4\xee\xa1\xab" . + "\xa1\xac\xa1\xb5\xa1\xb6\xa5\xa1\xa5\xa3\xa5\xa5\xa5\xa7\xa5\xa9\xa5" . + "\xc3\xa5\xe3\xa5\xe5\xa5\xe7\xa5\xee\xa5\xf5\xa5\xf6\xa1\xa6\xa1\xbc" . + "\xa1\xb3\xa1\xb4\xa1\xaa\xa1\xf3\xa1\xcb\xa1\xa4\xa1\xa5\xa1\xa7\xa1" . + "\xa8\xa1\xa9\xa1\xcf\xa1\xd1"; + $no_end = "\x5c\x24\x28\x5b\x7b\xa1\xf2\x5c\xa1\xc6\xa1\xc8\xa1\xd2\xa1" . + "\xd4\xa1\xd6\xa1\xd8\xa1\xda\xa1\xcc\xa1\xf0\xa1\xca\xa1\xce\xa1\xd0\xa1\xef"; + $wrap = func_get_arg(2); + + if (strlen($ret) >= $wrap && + substr($ret, 0, 1) != '>' && + strpos($ret, 'http://') === FALSE && + strpos($ret, 'https://') === FALSE && + strpos($ret, 'ftp://') === FALSE) { + + $ret = mb_convert_kana($ret, "KV"); + + $line_new = ''; + $ptr = 0; + + while ($ptr < strlen($ret) - 1) { + $l = mb_strcut($ret, $ptr, $wrap); + $ptr += strlen($l); + $tmp = $l; + + $l = mb_strcut($ret, $ptr, 2); + while (strlen($l) != 0 && mb_strpos($no_begin, $l) !== FALSE ) { + $tmp .= $l; + $ptr += strlen($l); + $l = mb_strcut($ret, $ptr, 1); + } + $line_new .= $tmp; + if ($ptr < strlen($ret) - 1) + $line_new .= "\n"; + } + $ret = $line_new; + } + break; + case 'utf7-imap_encode': + $ret = mb_convert_encoding($ret, 'UTF7-IMAP', 'EUC-JP'); + break; + case 'utf7-imap_decode': + $ret = mb_convert_encoding($ret, 'EUC-JP', 'UTF7-IMAP'); + break; } } return $ret; @@ -1166,4 +1605,4 @@ function korean_charset_xtra() { return $ret; } -?> +?> \ No newline at end of file