} else if ($charset == "koi8-r") {
return charset_decode_koi8r ($string);
} else
- return $string;
+ return "xx$charset:$string";
}
// iso-8859-1 is the same as Latin 1 and is normally used
// in western europe.
function charset_decode_iso_8859_1 ($string) {
- // This is only debug code as long as the internal
- // character set is iso-8859-1
+ global $default_charset;
- // Latin small letter o with stroke
- $string = str_replace ("\370", "ø", $string);
+ if (strtolower($default_charset) == "iso-8859-1") {
+ return $string;
+ } else {
+ // Only do the slow convert if there are 8-bit characters
+ if (ereg("[\200-\377]", $string)) {
+ $string = str_replace("\201", "", $string);
+ $string = str_replace("\202", "‚", $string);
+ $string = str_replace("\203", "ƒ", $string);
+ $string = str_replace("\204", "„", $string);
+ $string = str_replace("\205", "…", $string);
+ $string = str_replace("\206", "†", $string);
+ $string = str_replace("\207", "‡", $string);
+ $string = str_replace("\210", "ˆ", $string);
+ $string = str_replace("\211", "‰", $string);
+ $string = str_replace("\212", "Š", $string);
+ $string = str_replace("\213", "‹", $string);
+ $string = str_replace("\214", "Œ", $string);
+ $string = str_replace("\215", "", $string);
+ $string = str_replace("\216", "Ž", $string);
+ $string = str_replace("\217", "", $string);
+ $string = str_replace("\220", "", $string);
+ $string = str_replace("\221", "‘", $string);
+ $string = str_replace("\222", "’", $string);
+ $string = str_replace("\223", "“", $string);
+ $string = str_replace("\224", "”", $string);
+ $string = str_replace("\225", "•", $string);
+ $string = str_replace("\226", "–", $string);
+ $string = str_replace("\227", "—", $string);
+ $string = str_replace("\230", "˜", $string);
+ $string = str_replace("\231", "™", $string);
+ $string = str_replace("\232", "š", $string);
+ $string = str_replace("\233", "›", $string);
+ $string = str_replace("\234", "œ", $string);
+ $string = str_replace("\235", "", $string);
+ $string = str_replace("\236", "ž", $string);
+ $string = str_replace("\237", "Ÿ", $string);
+ $string = str_replace("\240", " ", $string);
+ $string = str_replace("\241", "¡", $string);
+ $string = str_replace("\242", "¢", $string);
+ $string = str_replace("\243", "£", $string);
+ $string = str_replace("\244", "¤", $string);
+ $string = str_replace("\245", "¥", $string);
+ $string = str_replace("\246", "¦", $string);
+ $string = str_replace("\247", "§", $string);
+ $string = str_replace("\250", "¨", $string);
+ $string = str_replace("\251", "©", $string);
+ $string = str_replace("\252", "ª", $string);
+ $string = str_replace("\253", "«", $string);
+ $string = str_replace("\254", "¬", $string);
+ $string = str_replace("\255", "­", $string);
+ $string = str_replace("\256", "®", $string);
+ $string = str_replace("\257", "¯", $string);
+ $string = str_replace("\260", "°", $string);
+ $string = str_replace("\261", "±", $string);
+ $string = str_replace("\262", "²", $string);
+ $string = str_replace("\263", "³", $string);
+ $string = str_replace("\264", "´", $string);
+ $string = str_replace("\265", "µ", $string);
+ $string = str_replace("\266", "¶", $string);
+ $string = str_replace("\267", "·", $string);
+ $string = str_replace("\270", "¸", $string);
+ $string = str_replace("\271", "¹", $string);
+ $string = str_replace("\272", "º", $string);
+ $string = str_replace("\273", "»", $string);
+ $string = str_replace("\274", "¼", $string);
+ $string = str_replace("\275", "½", $string);
+ $string = str_replace("\276", "¾", $string);
+ $string = str_replace("\277", "¿", $string);
+ $string = str_replace("\300", "À", $string);
+ $string = str_replace("\301", "Á", $string);
+ $string = str_replace("\302", "Â", $string);
+ $string = str_replace("\303", "Ã", $string);
+ $string = str_replace("\304", "Ä", $string);
+ $string = str_replace("\305", "Å", $string);
+ $string = str_replace("\306", "Æ", $string);
+ $string = str_replace("\307", "Ç", $string);
+ $string = str_replace("\310", "È", $string);
+ $string = str_replace("\311", "É", $string);
+ $string = str_replace("\312", "Ê", $string);
+ $string = str_replace("\313", "Ë", $string);
+ $string = str_replace("\314", "Ì", $string);
+ $string = str_replace("\315", "Í", $string);
+ $string = str_replace("\316", "Î", $string);
+ $string = str_replace("\317", "Ï", $string);
+ $string = str_replace("\320", "Ð", $string);
+ $string = str_replace("\321", "Ñ", $string);
+ $string = str_replace("\322", "Ò", $string);
+ $string = str_replace("\323", "Ó", $string);
+ $string = str_replace("\324", "Ô", $string);
+ $string = str_replace("\325", "Õ", $string);
+ $string = str_replace("\326", "Ö", $string);
+ $string = str_replace("\327", "×", $string);
+ $string = str_replace("\330", "Ø", $string);
+ $string = str_replace("\331", "Ù", $string);
+ $string = str_replace("\332", "Ú", $string);
+ $string = str_replace("\333", "Û", $string);
+ $string = str_replace("\334", "Ü", $string);
+ $string = str_replace("\335", "Ý", $string);
+ $string = str_replace("\336", "Þ", $string);
+ $string = str_replace("\337", "ß", $string);
+ $string = str_replace("\340", "à", $string);
+ $string = str_replace("\341", "á", $string);
+ $string = str_replace("\342", "â", $string);
+ $string = str_replace("\343", "ã", $string);
+ $string = str_replace("\344", "ä", $string);
+ $string = str_replace("\345", "å", $string);
+ $string = str_replace("\346", "æ", $string);
+ $string = str_replace("\347", "ç", $string);
+ $string = str_replace("\350", "è", $string);
+ $string = str_replace("\351", "é", $string);
+ $string = str_replace("\352", "ê", $string);
+ $string = str_replace("\353", "ë", $string);
+ $string = str_replace("\354", "ì", $string);
+ $string = str_replace("\355", "í", $string);
+ $string = str_replace("\356", "î", $string);
+ $string = str_replace("\357", "ï", $string);
+ $string = str_replace("\360", "ð", $string);
+ $string = str_replace("\361", "ñ", $string);
+ $string = str_replace("\362", "ò", $string);
+ $string = str_replace("\363", "ó", $string);
+ $string = str_replace("\364", "ô", $string);
+ $string = str_replace("\365", "õ", $string);
+ $string = str_replace("\366", "ö", $string);
+ $string = str_replace("\367", "÷", $string);
+ $string = str_replace("\370", "ø", $string);
+ $string = str_replace("\371", "ù", $string);
+ $string = str_replace("\372", "ú", $string);
+ $string = str_replace("\373", "û", $string);
+ $string = str_replace("\374", "ü", $string);
+ $string = str_replace("\375", "ý", $string);
+ $string = str_replace("\376", "þ", $string);
+ $string = str_replace("\377", "ÿ", $string);
+ }
+ }
return ($string);
}
// iso-8859-7 is Greek.
function charset_decode_iso_8859_7 ($string) {
- // Could not find Unicode equivalent of 0xA1 and 0xA2
- // 0xA4, 0xA5, 0xAA, 0xAE, 0xD2 and 0xFF should not be used
- $string = strtr($string, "\241\242\244\245\252\256\322\377",
- "????????");
-
- // Horizontal bar (parentheki pavla)
- while (ereg("\257", $string))
- $string = str_replace ("\257", "―", $string);
-
- // ISO-8859-7 characters from 11/04 (0xB4) to 11/06 (0xB6)
- // These are Unicode 900-902
- while (ereg("([\264-\266])", $string, $res)) {
- $replace = "&#" . (ord($res[1])+720) . ";";
- $string = str_replace($res[1], $replace, $string);
- }
-
- // 11/07 (0xB7) Middle dot is the same in iso-8859-1
-
- // ISO-8859-7 characters from 11/08 (0xB8) to 11/10 (0xBA)
- // These are Unicode 900-902
- while (ereg("([\270-\272])", $string, $res)) {
- $replace = "&#" . (ord($res[1])+720) . ";";
- $string = str_replace($res[1], $replace, $string);
- }
-
- // 11/11 (0xBB) Right angle quotation mark is the same as in
- // iso-8859-1
+ global $default_charset;
- // And now the rest of the charset
- while (ereg("([\273-\376])", $string, $res)) {
- $replace = "&#" . (ord($res[1])+720) . ";";
- $string = str_replace($res[1], $replace, $string);
+ if (strtolower($default_charset) == "iso-8859-7") {
+ return $string;
+ } else {
+ // Only do the slow convert if there are 8-bit characters
+ if (ereg("[\200-\377]", $string)) {
+ // Some diverse characters in the beginning
+ $string = str_replace("\240", " ", $string);
+ $string = str_replace("\241", "‘", $string);
+ $string = str_replace("\242", "’", $string);
+ $string = str_replace("\243", "£", $string);
+ $string = str_replace("\246", "¦", $string);
+ $string = str_replace("\247", "§", $string);
+ $string = str_replace("\250", "¨", $string);
+ $string = str_replace("\251", "©", $string);
+ $string = str_replace("\253", "«", $string);
+ $string = str_replace("\254", "¬", $string);
+ $string = str_replace("\255", "­", $string);
+ $string = str_replace("\257", "―", $string);
+ $string = str_replace("\260", "°", $string);
+ $string = str_replace("\261", "±", $string);
+ $string = str_replace("\262", "²", $string);
+ $string = str_replace("\263", "³", $string);
+
+ // Horizontal bar (parentheki pavla)
+ $string = str_replace ("\257", "―", $string);
+
+ // ISO-8859-7 characters from 11/04 (0xB4) to 11/06 (0xB6)
+ // These are Unicode 900-902
+ while (ereg("([\264-\266])", $string, $res)) {
+ $replace = "&#" . (ord($res[1])+720) . ";";
+ $string = str_replace($res[1], $replace, $string);
+ }
+
+ // 11/07 (0xB7) Middle dot is the same in iso-8859-1
+ $string = str_replace("\267", "·", $string);
+
+ // ISO-8859-7 characters from 11/08 (0xB8) to 11/10 (0xBA)
+ // These are Unicode 900-902
+ while (ereg("([\270-\272])", $string, $res)) {
+ $replace = "&#" . (ord($res[1])+720) . ";";
+ $string = str_replace($res[1], $replace, $string);
+ }
+
+ // 11/11 (0xBB) Right angle quotation mark is the same as in
+ // iso-8859-1
+ $string = str_replace("\273", "»", $string);
+
+ // And now the rest of the charset
+ while (ereg("([\274-\376])", $string, $res)) {
+ $replace = "&#" . (ord($res[1])+720) . ";";
+ $string = str_replace($res[1], $replace, $string);
+ }
+ }
}
return $string;
// Latin capital letter Y with diaeresis
$string = str_replace ("\276", "Ÿ", $string);
- return ($string);
+ return (charset_decode_iso_8859_1($string));
}
// ISO-8859-15 is Cyrillic