*/
function charset_convert($in_charset,$string,$out_charset,$htmlencode=true) {
$string=charset_decode($in_charset,$string,true);
+ $string=sqi18n_convert_entities($string);
$string=charset_encode($string,$out_charset,$htmlencode);
return $string;
}
* @return bool is it possible to convert to user's charset
*/
function is_conversion_safe($input_charset) {
- global $languages, $sm_notAlias, $default_charset, $lossy_encoding;
+ global $languages, $sm_notAlias, $default_charset, $lossy_encoding;
if (isset($lossy_encoding) && $lossy_encoding )
return true;
- // convert to lower case
- $input_charset = strtolower($input_charset);
+ // convert to lower case
+ $input_charset = strtolower($input_charset);
- // Is user's locale Unicode based ?
- if ( $default_charset == "utf-8" ) {
- return true;
- }
-
- // Charsets that are similar
-switch ($default_charset):
-case "windows-1251":
- if ( $input_charset == "iso-8859-5" ||
- $input_charset == "koi8-r" ||
- $input_charset == "koi8-u" ) {
+ // Is user's locale Unicode based ?
+ if ( $default_charset == "utf-8" ) {
return true;
- } else {
+ }
+
+ // Charsets that are similar
+ switch ($default_charset) {
+ case "windows-1251":
+ if ( $input_charset == "iso-8859-5" ||
+ $input_charset == "koi8-r" ||
+ $input_charset == "koi8-u" ) {
+ return true;
+ } else {
+ return false;
+ }
+ case "windows-1257":
+ if ( $input_charset == "iso-8859-13" ||
+ $input_charset == "iso-8859-4" ) {
+ return true;
+ } else {
+ return false;
+ }
+ case "iso-8859-4":
+ if ( $input_charset == "iso-8859-13" ||
+ $input_charset == "windows-1257" ) {
+ return true;
+ } else {
+ return false;
+ }
+ case "iso-8859-5":
+ if ( $input_charset == "windows-1251" ||
+ $input_charset == "koi8-r" ||
+ $input_charset == "koi8-u" ) {
+ return true;
+ } else {
+ return false;
+ }
+ case "iso-8859-13":
+ if ( $input_charset == "iso-8859-4" ||
+ $input_charset == "windows-1257" ) {
+ return true;
+ } else {
+ return false;
+ }
+ case "koi8-r":
+ if ( $input_charset == "windows-1251" ||
+ $input_charset == "iso-8859-5" ||
+ $input_charset == "koi8-u" ) {
+ return true;
+ } else {
+ return false;
+ }
+ case "koi8-u":
+ if ( $input_charset == "windows-1251" ||
+ $input_charset == "iso-8859-5" ||
+ $input_charset == "koi8-r" ) {
+ return true;
+ } else {
+ return false;
+ }
+ default:
return false;
- }
-case "windows-1257":
- if ( $input_charset == "iso-8859-13" ||
- $input_charset == "iso-8859-4" ) {
- return true;
- } else {
- return false;
- }
-case "iso-8859-4":
- if ( $input_charset == "iso-8859-13" ||
- $input_charset == "windows-1257" ) {
- return true;
- } else {
- return false;
- }
-case "iso-8859-5":
- if ( $input_charset == "windows-1251" ||
- $input_charset == "koi8-r" ||
- $input_charset == "koi8-u" ) {
- return true;
- } else {
- return false;
- }
-case "iso-8859-13":
- if ( $input_charset == "iso-8859-4" ||
- $input_charset == "windows-1257" ) {
- return true;
- } else {
- return false;
- }
-case "koi8-r":
- if ( $input_charset == "windows-1251" ||
- $input_charset == "iso-8859-5" ||
- $input_charset == "koi8-u" ) {
- return true;
- } else {
- return false;
- }
-case "koi8-u":
- if ( $input_charset == "windows-1251" ||
- $input_charset == "iso-8859-5" ||
- $input_charset == "koi8-r" ) {
- return true;
- } else {
- return false;
- }
-default:
- return false;
-endswitch;
+ }
}
+/**
+ * Converts html character entities to numeric entities
+ *
+ * SquirrelMail encoding functions work only with numeric entities.
+ * This function fixes issues with decoding functions that might convert
+ * some symbols to character entities. Issue is specific to PHP recode
+ * extension decoding. Function is used internally in charset_convert()
+ * function.
+ * @param string $str string that might contain html character entities
+ * @return string string with character entities converted to decimals.
+ * @since 1.5.2
+ */
+function sqi18n_convert_entities($str) {
+
+ $entities = array(
+ // Latin 1
+ ' ' => ' ',
+ '¡' => '¡',
+ '¢' => '¢',
+ '£' => '£',
+ '¤' => '¤',
+ '¥' => '¥',
+ '¦' => '¦',
+ '§' => '§',
+ '¨' => '¨',
+ '©' => '©',
+ 'ª' => 'ª',
+ '«' => '«',
+ '¬' => '¬',
+ '­' => '­',
+ '®' => '®',
+ '¯' => '¯',
+ '°' => '°',
+ '±' => '±',
+ '²' => '²',
+ '³' => '³',
+ '´' => '´',
+ 'µ' => 'µ',
+ '¶' => '¶',
+ '·' => '·',
+ '¸' => '¸',
+ '¹' => '¹',
+ 'º' => 'º',
+ '»' => '»',
+ '¼' => '¼',
+ '½' => '½',
+ '¾' => '¾',
+ '¿' => '¿',
+ 'À' => 'À',
+ 'Á' => 'Á',
+ 'Â' => 'Â',
+ 'Ã' => 'Ã',
+ 'Ä' => 'Ä',
+ 'Å' => 'Å',
+ 'Æ' => 'Æ',
+ 'Ç' => 'Ç',
+ 'È' => 'È',
+ 'É' => 'É',
+ 'Ê' => 'Ê',
+ 'Ë' => 'Ë',
+ 'Ì' => 'Ì',
+ 'Í' => 'Í',
+ 'Î' => 'Î',
+ 'Ï' => 'Ï',
+ 'Ð' => 'Ð',
+ 'Ñ' => 'Ñ',
+ 'Ò' => 'Ò',
+ 'Ó' => 'Ó',
+ 'Ô' => 'Ô',
+ 'Õ' => 'Õ',
+ 'Ö' => 'Ö',
+ '×' => '×',
+ 'Ø' => 'Ø',
+ 'Ù' => 'Ù',
+ 'Ú' => 'Ú',
+ 'Û' => 'Û',
+ 'Ü' => 'Ü',
+ 'Ý' => 'Ý',
+ 'Þ' => 'Þ',
+ 'ß' => 'ß',
+ 'à' => 'à',
+ 'á' => 'á',
+ 'â' => 'â',
+ 'ã' => 'ã',
+ 'ä' => 'ä',
+ 'å' => 'å',
+ 'æ' => 'æ',
+ 'ç' => 'ç',
+ 'è' => 'è',
+ 'é' => 'é',
+ 'ê' => 'ê',
+ 'ë' => 'ë',
+ 'ì' => 'ì',
+ 'í' => 'í',
+ 'î' => 'î',
+ 'ï' => 'ï',
+ 'ð' => 'ð',
+ 'ñ' => 'ñ',
+ 'ò' => 'ò',
+ 'ó' => 'ó',
+ 'ô' => 'ô',
+ 'õ' => 'õ',
+ 'ö' => 'ö',
+ '÷' => '÷',
+ 'ø' => 'ø',
+ 'ù' => 'ù',
+ 'ú' => 'ú',
+ 'û' => 'û',
+ 'ü' => 'ü',
+ 'ý' => 'ý',
+ 'þ' => 'þ',
+ 'ÿ' => 'ÿ',
+ // Latin Extended-A
+ 'Œ' => 'Œ',
+ 'œ' => 'œ',
+ 'Š' => 'Š',
+ 'š' => 'š',
+ 'Ÿ' => 'Ÿ',
+ // Spacing Modifier Letters
+ 'ˆ' => 'ˆ',
+ '˜' => '˜',
+ // General Punctuation
+ ' ' => ' ',
+ ' ' => ' ',
+ ' ' => ' ',
+ '‌' => '‌',
+ '‍' => '‍',
+ '‎' => '‎',
+ '‏' => '‏',
+ '–' => '–',
+ '—' => '—',
+ '‘' => '‘',
+ '’' => '’',
+ '‚' => '‚',
+ '“' => '“',
+ '”' => '”',
+ '„' => '„',
+ '†' => '†',
+ '‡' => '‡',
+ '‰' => '‰',
+ '‹' => '‹',
+ '›' => '›',
+ '€' => '€',
+ // Latin Extended-B
+ 'ƒ' => 'ƒ',
+ // Greek
+ 'Α' => 'Α',
+ 'Β' => 'Β',
+ 'Γ' => 'Γ',
+ 'Δ' => 'Δ',
+ 'Ε' => 'Ε',
+ 'Ζ' => 'Ζ',
+ 'Η' => 'Η',
+ 'Θ' => 'Θ',
+ 'Ι' => 'Ι',
+ 'Κ' => 'Κ',
+ 'Λ' => 'Λ',
+ 'Μ' => 'Μ',
+ 'Ν' => 'Ν',
+ 'Ξ' => 'Ξ',
+ 'Ο' => 'Ο',
+ 'Π' => 'Π',
+ 'Ρ' => 'Ρ',
+ 'Σ' => 'Σ',
+ 'Τ' => 'Τ',
+ 'Υ' => 'Υ',
+ 'Φ' => 'Φ',
+ 'Χ' => 'Χ',
+ 'Ψ' => 'Ψ',
+ 'Ω' => 'Ω',
+ 'α' => 'α',
+ 'β' => 'β',
+ 'γ' => 'γ',
+ 'δ' => 'δ',
+ 'ε' => 'ε',
+ 'ζ' => 'ζ',
+ 'η' => 'η',
+ 'θ' => 'θ',
+ 'ι' => 'ι',
+ 'κ' => 'κ',
+ 'λ' => 'λ',
+ 'μ' => 'μ',
+ 'ν' => 'ν',
+ 'ξ' => 'ξ',
+ 'ο' => 'ο',
+ 'π' => 'π',
+ 'ρ' => 'ρ',
+ 'ς' => 'ς',
+ 'σ' => 'σ',
+ 'τ' => 'τ',
+ 'υ' => 'υ',
+ 'φ' => 'φ',
+ 'χ' => 'χ',
+ 'ψ' => 'ψ',
+ 'ω' => 'ω',
+ 'ϑ' => 'ϑ',
+ 'ϒ' => 'ϒ',
+ 'ϖ' => 'ϖ',
+ // General Punctuation
+ '•' => '•',
+ '…' => '…',
+ '′' => '′',
+ '″' => '″',
+ '‾' => '‾',
+ '⁄' => '⁄',
+ // Letterlike Symbols
+ '℘' => '℘',
+ 'ℑ' => 'ℑ',
+ 'ℜ' => 'ℜ',
+ '™' => '™',
+ 'ℵ' => 'ℵ',
+ // Arrows
+ '←' => '←',
+ '↑' => '↑',
+ '→' => '→',
+ '↓' => '↓',
+ '↔' => '↔',
+ '↵' => '↵',
+ '⇐' => '⇐',
+ '⇑' => '⇑',
+ '⇒' => '⇒',
+ '⇓' => '⇓',
+ '⇔' => '⇔',
+ // Mathematical Operators
+ '∀' => '∀',
+ '∂' => '∂',
+ '∃' => '∃',
+ '∅' => '∅',
+ '∇' => '∇',
+ '∈' => '∈',
+ '∉' => '∉',
+ '∋' => '∋',
+ '∏' => '∏',
+ '∑' => '∑',
+ '−' => '−',
+ '∗' => '∗',
+ '√' => '√',
+ '∝' => '∝',
+ '∞' => '∞',
+ '∠' => '∠',
+ '∧' => '∧',
+ '∨' => '∨',
+ '∩' => '∩',
+ '∪' => '∪',
+ '∫' => '∫',
+ '∴' => '∴',
+ '∼' => '∼',
+ '≅' => '≅',
+ '≈' => '≈',
+ '≠' => '≠',
+ '≡' => '≡',
+ '≤' => '≤',
+ '≥' => '≥',
+ '⊂' => '⊂',
+ '⊃' => '⊃',
+ '⊄' => '⊄',
+ '⊆' => '⊆',
+ '⊇' => '⊇',
+ '⊕' => '⊕',
+ '⊗' => '⊗',
+ '⊥' => '⊥',
+ '⋅' => '⋅',
+ // Miscellaneous Technical
+ '⌈' => '⌈',
+ '⌉' => '⌉',
+ '⌊' => '⌊',
+ '⌋' => '⌋',
+ '⟨' => '〈',
+ '⟩' => '〉',
+ // Geometric Shapes
+ '◊' => '◊',
+ // Miscellaneous Symbols
+ '♠' => '♠',
+ '♣' => '♣',
+ '♥' => '♥',
+ '♦' => '♦');
+
+ $str = str_replace(array_keys($entities), array_values($entities), $str);
+
+ return $str;
+}
/* ------------------------------ main --------------------------- */