From: tokul Date: Thu, 2 Mar 2006 20:31:31 +0000 (+0000) Subject: if PHP recode functions are used for decoding, they might use html character X-Git-Url: https://vcs.fsf.org/?a=commitdiff_plain;h=564990610d25dffb34b73f18b7564c1fd027d0c7;p=squirrelmail.git if PHP recode functions are used for decoding, they might use html character entities. Encoding functions can't handle them. It is easier to convert all entities in one place instead of finding supported entities in every encoding function. git-svn-id: https://svn.code.sf.net/p/squirrelmail/code/trunk/squirrelmail@10848 7612ce4b-ef26-0410-bec9-ea0150e637f0 --- diff --git a/functions/i18n.php b/functions/i18n.php index a56f4f42..0d47e14c 100644 --- a/functions/i18n.php +++ b/functions/i18n.php @@ -244,6 +244,7 @@ function charset_encode($string,$charset,$htmlencode=true) { */ function charset_convert($in_charset,$string,$out_charset,$htmlencode=true) { $string=charset_decode($in_charset,$string,true); + $string=sqi18n_convert_entities($string); $string=charset_encode($string,$out_charset,$htmlencode); return $string; } @@ -588,79 +589,360 @@ endswitch; * @return bool is it possible to convert to user's charset */ function is_conversion_safe($input_charset) { - global $languages, $sm_notAlias, $default_charset, $lossy_encoding; + global $languages, $sm_notAlias, $default_charset, $lossy_encoding; if (isset($lossy_encoding) && $lossy_encoding ) return true; - // convert to lower case - $input_charset = strtolower($input_charset); + // convert to lower case + $input_charset = strtolower($input_charset); - // Is user's locale Unicode based ? - if ( $default_charset == "utf-8" ) { - return true; - } - - // Charsets that are similar -switch ($default_charset): -case "windows-1251": - if ( $input_charset == "iso-8859-5" || - $input_charset == "koi8-r" || - $input_charset == "koi8-u" ) { + // Is user's locale Unicode based ? + if ( $default_charset == "utf-8" ) { return true; - } else { + } + + // Charsets that are similar + switch ($default_charset) { + case "windows-1251": + if ( $input_charset == "iso-8859-5" || + $input_charset == "koi8-r" || + $input_charset == "koi8-u" ) { + return true; + } else { + return false; + } + case "windows-1257": + if ( $input_charset == "iso-8859-13" || + $input_charset == "iso-8859-4" ) { + return true; + } else { + return false; + } + case "iso-8859-4": + if ( $input_charset == "iso-8859-13" || + $input_charset == "windows-1257" ) { + return true; + } else { + return false; + } + case "iso-8859-5": + if ( $input_charset == "windows-1251" || + $input_charset == "koi8-r" || + $input_charset == "koi8-u" ) { + return true; + } else { + return false; + } + case "iso-8859-13": + if ( $input_charset == "iso-8859-4" || + $input_charset == "windows-1257" ) { + return true; + } else { + return false; + } + case "koi8-r": + if ( $input_charset == "windows-1251" || + $input_charset == "iso-8859-5" || + $input_charset == "koi8-u" ) { + return true; + } else { + return false; + } + case "koi8-u": + if ( $input_charset == "windows-1251" || + $input_charset == "iso-8859-5" || + $input_charset == "koi8-r" ) { + return true; + } else { + return false; + } + default: return false; - } -case "windows-1257": - if ( $input_charset == "iso-8859-13" || - $input_charset == "iso-8859-4" ) { - return true; - } else { - return false; - } -case "iso-8859-4": - if ( $input_charset == "iso-8859-13" || - $input_charset == "windows-1257" ) { - return true; - } else { - return false; - } -case "iso-8859-5": - if ( $input_charset == "windows-1251" || - $input_charset == "koi8-r" || - $input_charset == "koi8-u" ) { - return true; - } else { - return false; - } -case "iso-8859-13": - if ( $input_charset == "iso-8859-4" || - $input_charset == "windows-1257" ) { - return true; - } else { - return false; - } -case "koi8-r": - if ( $input_charset == "windows-1251" || - $input_charset == "iso-8859-5" || - $input_charset == "koi8-u" ) { - return true; - } else { - return false; - } -case "koi8-u": - if ( $input_charset == "windows-1251" || - $input_charset == "iso-8859-5" || - $input_charset == "koi8-r" ) { - return true; - } else { - return false; - } -default: - return false; -endswitch; + } } +/** + * Converts html character entities to numeric entities + * + * SquirrelMail encoding functions work only with numeric entities. + * This function fixes issues with decoding functions that might convert + * some symbols to character entities. Issue is specific to PHP recode + * extension decoding. Function is used internally in charset_convert() + * function. + * @param string $str string that might contain html character entities + * @return string string with character entities converted to decimals. + * @since 1.5.2 + */ +function sqi18n_convert_entities($str) { + + $entities = array( + // Latin 1 + ' ' => ' ', + '¡' => '¡', + '¢' => '¢', + '£' => '£', + '¤' => '¤', + '¥' => '¥', + '¦' => '¦', + '§' => '§', + '¨' => '¨', + '©' => '©', + 'ª' => 'ª', + '«' => '«', + '¬' => '¬', + '­' => '­', + '®' => '®', + '¯' => '¯', + '°' => '°', + '±' => '±', + '²' => '²', + '³' => '³', + '´' => '´', + 'µ' => 'µ', + '¶' => '¶', + '·' => '·', + '¸' => '¸', + '¹' => '¹', + 'º' => 'º', + '»' => '»', + '¼' => '¼', + '½' => '½', + '¾' => '¾', + '¿' => '¿', + 'À' => 'À', + 'Á' => 'Á', + 'Â' => 'Â', + 'Ã' => 'Ã', + 'Ä' => 'Ä', + 'Å' => 'Å', + 'Æ' => 'Æ', + 'Ç' => 'Ç', + 'È' => 'È', + 'É' => 'É', + 'Ê' => 'Ê', + 'Ë' => 'Ë', + 'Ì' => 'Ì', + 'Í' => 'Í', + 'Î' => 'Î', + 'Ï' => 'Ï', + 'Ð' => 'Ð', + 'Ñ' => 'Ñ', + 'Ò' => 'Ò', + 'Ó' => 'Ó', + 'Ô' => 'Ô', + 'Õ' => 'Õ', + 'Ö' => 'Ö', + '×' => '×', + 'Ø' => 'Ø', + 'Ù' => 'Ù', + 'Ú' => 'Ú', + 'Û' => 'Û', + 'Ü' => 'Ü', + 'Ý' => 'Ý', + 'Þ' => 'Þ', + 'ß' => 'ß', + 'à' => 'à', + 'á' => 'á', + 'â' => 'â', + 'ã' => 'ã', + 'ä' => 'ä', + 'å' => 'å', + 'æ' => 'æ', + 'ç' => 'ç', + 'è' => 'è', + 'é' => 'é', + 'ê' => 'ê', + 'ë' => 'ë', + 'ì' => 'ì', + 'í' => 'í', + 'î' => 'î', + 'ï' => 'ï', + 'ð' => 'ð', + 'ñ' => 'ñ', + 'ò' => 'ò', + 'ó' => 'ó', + 'ô' => 'ô', + 'õ' => 'õ', + 'ö' => 'ö', + '÷' => '÷', + 'ø' => 'ø', + 'ù' => 'ù', + 'ú' => 'ú', + 'û' => 'û', + 'ü' => 'ü', + 'ý' => 'ý', + 'þ' => 'þ', + 'ÿ' => 'ÿ', + // Latin Extended-A + 'Œ' => 'Œ', + 'œ' => 'œ', + 'Š' => 'Š', + 'š' => 'š', + 'Ÿ' => 'Ÿ', + // Spacing Modifier Letters + 'ˆ' => 'ˆ', + '˜' => '˜', + // General Punctuation + ' ' => ' ', + ' ' => ' ', + ' ' => ' ', + '‌' => '‌', + '‍' => '‍', + '‎' => '‎', + '‏' => '‏', + '–' => '–', + '—' => '—', + '‘' => '‘', + '’' => '’', + '‚' => '‚', + '“' => '“', + '”' => '”', + '„' => '„', + '†' => '†', + '‡' => '‡', + '‰' => '‰', + '‹' => '‹', + '›' => '›', + '€' => '€', + // Latin Extended-B + 'ƒ' => 'ƒ', + // Greek + 'Α' => 'Α', + 'Β' => 'Β', + 'Γ' => 'Γ', + 'Δ' => 'Δ', + 'Ε' => 'Ε', + 'Ζ' => 'Ζ', + 'Η' => 'Η', + 'Θ' => 'Θ', + 'Ι' => 'Ι', + 'Κ' => 'Κ', + 'Λ' => 'Λ', + 'Μ' => 'Μ', + 'Ν' => 'Ν', + 'Ξ' => 'Ξ', + 'Ο' => 'Ο', + 'Π' => 'Π', + 'Ρ' => 'Ρ', + 'Σ' => 'Σ', + 'Τ' => 'Τ', + 'Υ' => 'Υ', + 'Φ' => 'Φ', + 'Χ' => 'Χ', + 'Ψ' => 'Ψ', + 'Ω' => 'Ω', + 'α' => 'α', + 'β' => 'β', + 'γ' => 'γ', + 'δ' => 'δ', + 'ε' => 'ε', + 'ζ' => 'ζ', + 'η' => 'η', + 'θ' => 'θ', + 'ι' => 'ι', + 'κ' => 'κ', + 'λ' => 'λ', + 'μ' => 'μ', + 'ν' => 'ν', + 'ξ' => 'ξ', + 'ο' => 'ο', + 'π' => 'π', + 'ρ' => 'ρ', + 'ς' => 'ς', + 'σ' => 'σ', + 'τ' => 'τ', + 'υ' => 'υ', + 'φ' => 'φ', + 'χ' => 'χ', + 'ψ' => 'ψ', + 'ω' => 'ω', + 'ϑ' => 'ϑ', + 'ϒ' => 'ϒ', + 'ϖ' => 'ϖ', + // General Punctuation + '•' => '•', + '…' => '…', + '′' => '′', + '″' => '″', + '‾' => '‾', + '⁄' => '⁄', + // Letterlike Symbols + '℘' => '℘', + 'ℑ' => 'ℑ', + 'ℜ' => 'ℜ', + '™' => '™', + 'ℵ' => 'ℵ', + // Arrows + '←' => '←', + '↑' => '↑', + '→' => '→', + '↓' => '↓', + '↔' => '↔', + '↵' => '↵', + '⇐' => '⇐', + '⇑' => '⇑', + '⇒' => '⇒', + '⇓' => '⇓', + '⇔' => '⇔', + // Mathematical Operators + '∀' => '∀', + '∂' => '∂', + '∃' => '∃', + '∅' => '∅', + '∇' => '∇', + '∈' => '∈', + '∉' => '∉', + '∋' => '∋', + '∏' => '∏', + '∑' => '∑', + '−' => '−', + '∗' => '∗', + '√' => '√', + '∝' => '∝', + '∞' => '∞', + '∠' => '∠', + '∧' => '∧', + '∨' => '∨', + '∩' => '∩', + '∪' => '∪', + '∫' => '∫', + '∴' => '∴', + '∼' => '∼', + '≅' => '≅', + '≈' => '≈', + '≠' => '≠', + '≡' => '≡', + '≤' => '≤', + '≥' => '≥', + '⊂' => '⊂', + '⊃' => '⊃', + '⊄' => '⊄', + '⊆' => '⊆', + '⊇' => '⊇', + '⊕' => '⊕', + '⊗' => '⊗', + '⊥' => '⊥', + '⋅' => '⋅', + // Miscellaneous Technical + '⌈' => '⌈', + '⌉' => '⌉', + '⌊' => '⌊', + '⌋' => '⌋', + '⟨' => '〈', + '⟩' => '〉', + // Geometric Shapes + '◊' => '◊', + // Miscellaneous Symbols + '♠' => '♠', + '♣' => '♣', + '♥' => '♥', + '♦' => '♦'); + + $str = str_replace(array_keys($entities), array_values($entities), $str); + + return $str; +} /* ------------------------------ main --------------------------- */