include/languages.php

   1 <?php
   2
   3 /**
   4  * SquirrelMail internationalization functions
   5  *
   6  * This file contains variuos functions that are needed to do
   7  * internationalization of SquirrelMail.
   8  *
   9  * Internally the output character set is used. Other characters are
  10  * encoded using Unicode entities according to HTML 4.0.
  11  *
  12  * Before 1.5.2 functions were stored in functions/i18n.php. Script is moved
  13  * because it executes some code in order to detect functions supported by
  14  * existing PHP installation and implements fallback functions when required
  15  * functions are not available. Scripts in functions/ directory should not
  16  * setup anything when they are loaded.
  17  * @copyright &copy; 1999-2007 The SquirrelMail Project Team
  18  * @license http://opensource.org/licenses/gpl-license.php GNU Public License
  19  * @version $Id$
  20  * @package squirrelmail
  21  * @subpackage i18n
  22  */
  23
  24
  25 /**
  26  * Wrapper for textdomain(), bindtextdomain() and
  27  * bind_textdomain_codeset() primarily intended for
  28  * plugins when changing into their own text domain
  29  * and back again.
  30  *
  31  * Note that if plugins using this function have
  32  * their translation files located in the SquirrelMail
  33  * locale directory, the second argument is optional.
  34  *
  35  * @param string $domain_name The name of the text domain
  36  *                            (usually the plugin name, or
  37  *                            "squirrelmail") being switched to.
  38  * @param string $directory   The directory that contains
  39  *                            all translations for the domain
  40  *                            (OPTIONAL; default is SquirrelMail
  41  *                            locale directory).
  42  *
  43  * @return string The name of the text domain that was set
  44  *                *BEFORE* it is changed herein - NOTE that
  45  *                this differs from PHP's textdomain()
  46  *
  47  * @since 1.5.2 and 1.4.10
  48  */
  49 function sq_change_text_domain($domain_name, $directory='') {
  50
  51     static $domains_already_seen = array();
  52     global $gettext_domain;
  53     $return_value = $gettext_domain;
  54
  55     // empty domain defaults to "squirrelmail"
  56     //
  57     if (empty($domain_name)) $domain_name = 'squirrelmail';
  58
  59     // only need to call bindtextdomain() once
  60     //
  61     if (in_array($domain_name, $domains_already_seen)) {
  62         sq_textdomain($domain_name);
  63         return $return_value;
  64     }
  65
  66     $domains_already_seen[] = $domain_name;
  67
  68     if (empty($directory)) $directory = SM_PATH . 'locale/';
  69
  70     sq_bindtextdomain($domain_name, $directory);
  71     sq_textdomain($domain_name);
  72
  73     return $return_value;
  74 }
  75
  76 /**
  77  * Gettext bindtextdomain wrapper.
  78  *
  79  * Wrapper solves differences between php versions in order to provide
  80  * ngettext support. Should be used if translation uses ngettext
  81  * functions.
  82  *
  83  * This also provides a bind_textdomain_codeset call to make sure the
  84  * domain's encoding will not be overridden.
  85  *
  86  * @since 1.4.10 and 1.5.1
  87  * @param string $domain gettext domain name
  88  * @param string $dir directory that contains all translations (OPTIONAL;
  89  *                    if not specified, defaults to SquirrelMail locale
  90  *                    directory)
  91  * @return string path to translation directory
  92  */
  93 function sq_bindtextdomain($domain,$dir='') {
  94     global $l10n, $gettext_flags, $sm_notAlias;
  95
  96     if (empty($dir)) $dir = SM_PATH . 'locale/';
  97
  98     if ($gettext_flags==7) {
  99         // gettext extension without ngettext
 100         if (substr($dir, -1) != '/') $dir .= '/';
 101         $mofile=$dir . $sm_notAlias . '/LC_MESSAGES/' . $domain . '.mo';
 102         $input = new FileReader($mofile);
 103         $l10n[$domain] = new gettext_reader($input);
 104     }
 105
 106     $dir=bindtextdomain($domain,$dir);
 107
 108     // set codeset in order to avoid gettext charset conversions
 109     if (function_exists('bind_textdomain_codeset')
 110      && isset($languages[$sm_notAlias]['CHARSET'])) {
 111
 112         // Japanese translation uses different internal charset
 113         if ($sm_notAlias == 'ja_JP') {
 114             bind_textdomain_codeset ($domain_name, 'EUC-JP');
 115         } else {
 116             bind_textdomain_codeset ($domain_name, $languages[$sm_notAlias]['CHARSET']);
 117         }
 118
 119     }
 120
 121     return $dir;
 122 }
 123
 124 /**
 125  * Gettext textdomain wrapper.
 126  * Makes sure that gettext_domain global is modified.
 127  * @since 1.5.1
 128  * @param string $name gettext domain name
 129  * @return string gettext domain name
 130  */
 131 function sq_textdomain($domain) {
 132     global $gettext_domain;
 133     $gettext_domain=textdomain($domain);
 134     return $gettext_domain;
 135 }
 136
 137 /**
 138  * php setlocale function wrapper
 139  *
 140  * From php 4.3.0 it is possible to use arrays in order to set locale.
 141  * php gettext extension works only when locale is set. This wrapper
 142  * function allows to use more than one locale name.
 143  *
 144  * @param int $category locale category name. Use php named constants
 145  *     (LC_ALL, LC_COLLATE, LC_CTYPE, LC_MONETARY, LC_NUMERIC, LC_TIME)
 146  * @param mixed $locale option contains array with possible locales or string with one locale
 147  * @return string name of set locale or false, if all locales fail.
 148  * @since 1.5.1 and 1.4.5
 149  * @see http://www.php.net/setlocale
 150  */
 151 function sq_setlocale($category,$locale) {
 152     if (is_string($locale)) {
 153         // string with only one locale
 154         $ret = setlocale($category,$locale);
 155     } elseif (! check_php_version(4,3)) {
 156         // older php version (second setlocale argument must be string)
 157         $ret=false;
 158         $index=0;
 159         while ( ! $ret && $index<count($locale)) {
 160             $ret=setlocale($category,$locale[$index]);
 161             $index++;
 162         }
 163     } else {
 164         // php 4.3.0 or better, use entire array
 165         $ret=setlocale($category,$locale);
 166     }
 167
 168     /* safety checks */
 169     if (preg_match("/^.*\/.*\/.*\/.*\/.*\/.*$/",$ret)) {
 170         /**
 171          * Welcome to We-Don't-Follow-Own-Fine-Manual department
 172          * OpenBSD 3.8, 3.9-current and maybe later versions
 173          * return invalid response to setlocale command.
 174          * SM bug report #1427512.
 175          */
 176         $ret = false;
 177     }
 178     return $ret;
 179 }
 180
 181 /**
 182  * Converts string from given charset to charset, that can be displayed by user translation.
 183  *
 184  * Function by default returns html encoded strings, if translation uses different encoding.
 185  * If Japanese translation is used - function returns string converted to euc-jp
 186  * If iconv or recode functions are enabled and translation uses utf-8 - function returns utf-8 encoded string.
 187  * If $charset is not supported - function returns unconverted string.
 188  *
 189  * sanitizing of html tags is also done by this function.
 190  *
 191  * @param string $charset
 192  * @param string $string Text to be decoded
 193  * @param boolean $force_decode converts string to html without $charset!=$default_charset check.
 194  * Argument is available since 1.5.1 and 1.4.5.
 195  * @param boolean $save_html disables htmlspecialchars() in order to preserve
 196  *  html formating. Use with care. Available since 1.5.1
 197  * @return string decoded string
 198  */
 199 function charset_decode ($charset, $string, $force_decode=false, $save_html=false) {
 200     global $languages, $squirrelmail_language, $default_charset;
 201     global $use_php_recode, $use_php_iconv, $aggressive_decoding;
 202
 203     if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
 204         function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode')) {
 205         $string = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode', $string);
 206     }
 207
 208     $charset = strtolower($charset);
 209
 210     set_my_charset();
 211
 212     // Variables that allow to use functions without function_exist() calls
 213     if (! isset($use_php_recode) || $use_php_recode=="" ) {
 214         $use_php_recode=false; }
 215     if (! isset($use_php_iconv) || $use_php_iconv=="" ) {
 216         $use_php_iconv=false; }
 217
 218     // Don't do conversion if charset is the same.
 219     if ( ! $force_decode && $charset == strtolower($default_charset) )
 220         return ($save_html ? $string : htmlspecialchars($string));
 221
 222     // catch iso-8859-8-i thing
 223     if ( $charset == "iso-8859-8-i" )
 224         $charset = "iso-8859-8";
 225
 226     /*
 227      * Recode converts html special characters automatically if you use
 228      * 'charset..html' decoding. There is no documented way to put -d option
 229      * into php recode function call.
 230      */
 231     if ( $use_php_recode ) {
 232         if ( $default_charset == "utf-8" ) {
 233             // other charsets can be converted to utf-8 without loss.
 234             // and output string is smaller
 235             $string = recode_string($charset . "..utf-8",$string);
 236             return ($save_html ? $string : htmlspecialchars($string));
 237         } else {
 238             $string = recode_string($charset . "..html",$string);
 239             // recode does not convert single quote, htmlspecialchars does.
 240             $string = str_replace("'", '&#039;', $string);
 241             // undo html specialchars
 242             if ($save_html)
 243                 $string=str_replace(array('&amp;','&quot;','&lt;','&gt;'),
 244                                     array('&','"','<','>'),$string);
 245             return $string;
 246         }
 247     }
 248
 249     // iconv functions does not have html target and can be used only with utf-8
 250     if ( $use_php_iconv && $default_charset=='utf-8') {
 251         $string = iconv($charset,$default_charset,$string);
 252         return ($save_html ? $string : htmlspecialchars($string));
 253     }
 254
 255     // If we don't use recode and iconv, we'll do it old way.
 256
 257     /* All HTML special characters are 7 bit and can be replaced first */
 258     if (! $save_html) $string = htmlspecialchars ($string);
 259
 260     /* controls cpu and memory intensive decoding cycles */
 261     if (! isset($aggressive_decoding) || $aggressive_decoding=="" ) {
 262         $aggressive_decoding=false; }
 263
 264     $decode=fixcharset($charset);
 265     $decodefile=SM_PATH . 'functions/decode/' . $decode . '.php';
 266     if ($decode != 'index' && file_exists($decodefile)) {
 267         include_once($decodefile);
 268         // send $save_html argument to decoding function. needed for iso-2022-xx decoding.
 269         $ret = call_user_func('charset_decode_'.$decode, $string, $save_html);
 270     } else {
 271         $ret = $string;
 272     }
 273     return( $ret );
 274 }
 275
 276 /**
 277  * Converts html string to given charset
 278  * @since 1.5.1 and 1.4.4
 279  * @param string $string
 280  * @param string $charset
 281  * @param boolean $htmlencode keep htmlspecialchars encoding
 282  * @return string
 283  */
 284 function charset_encode($string,$charset,$htmlencode=true) {
 285     global $default_charset;
 286
 287     $encode=fixcharset($charset);
 288     $encodefile=SM_PATH . 'functions/encode/' . $encode . '.php';
 289     if ($encode != 'index' && file_exists($encodefile)) {
 290         include_once($encodefile);
 291         $ret = call_user_func('charset_encode_'.$encode, $string);
 292     } elseif(file_exists(SM_PATH . 'functions/encode/us_ascii.php')) {
 293         // function replaces all 8bit html entities with question marks.
 294         // it is used when other encoding functions are unavailable
 295         include_once(SM_PATH . 'functions/encode/us_ascii.php');
 296         $ret = charset_encode_us_ascii($string);
 297     } else {
 298         /**
 299          * fix for yahoo users that remove all us-ascii related things
 300          */
 301         $ret = $string;
 302     }
 303
 304     /**
 305      * Undo html special chars, some places (like compose form) have
 306      * own sanitizing functions and don't need html symbols.
 307      * Undo chars only after encoding in order to prevent conversion of
 308      * html entities in plain text emails.
 309      */
 310     if (! $htmlencode ) {
 311         $ret = str_replace(array('&amp;','&gt;','&lt;','&quot;'),array('&','>','<','"'),$ret);
 312     }
 313     return( $ret );
 314 }
 315
 316 /**
 317  * Combined decoding and encoding functions
 318  *
 319  * If conversion is done to charset different that utf-8, unsupported symbols
 320  * will be replaced with question marks.
 321  * @since 1.5.1 and 1.4.4
 322  * @param string $in_charset initial charset
 323  * @param string $string string that has to be converted
 324  * @param string $out_charset final charset
 325  * @param boolean $htmlencode keep htmlspecialchars encoding
 326  * @return string converted string
 327  */
 328 function charset_convert($in_charset,$string,$out_charset,$htmlencode=true) {
 329     $string=charset_decode($in_charset,$string,true);
 330     $string=sqi18n_convert_entities($string);
 331     $string=charset_encode($string,$out_charset,$htmlencode);
 332     return $string;
 333 }
 334
 335 /**
 336  * Makes charset name suitable for decoding cycles
 337  *
 338  * @since 1.5.0 and 1.4.4
 339  * @param string $charset Name of charset
 340  * @return string $charset Adjusted name of charset
 341  */
 342 function fixcharset($charset) {
 343     /* remove minus and characters that might be used in paths from charset
 344      * name in order to be able to use it in function names and include calls.
 345      */
 346     $charset=preg_replace("/[-:.\/\\\]/",'_',$charset);
 347
 348     // OE ks_c_5601_1987 > cp949
 349     $charset=str_replace('ks_c_5601_1987','cp949',$charset);
 350     // Moz x-euc-tw > euc-tw
 351     $charset=str_replace('x_euc','euc',$charset);
 352     // Moz x-windows-949 > cp949
 353     $charset=str_replace('x_windows_','cp',$charset);
 354
 355     // windows-125x and cp125x charsets
 356     $charset=str_replace('windows_','cp',$charset);
 357
 358     // ibm > cp
 359     $charset=str_replace('ibm','cp',$charset);
 360
 361     // iso-8859-8-i -> iso-8859-8
 362     // use same cycle until I'll find differences
 363     $charset=str_replace('iso_8859_8_i','iso_8859_8',$charset);
 364
 365     return $charset;
 366 }
 367
 368 /**
 369  * Set up the language to be output
 370  * if $do_search is true, then scan the browser information
 371  * for a possible language that we know
 372  *
 373  * Function sets system locale environment (LC_ALL, LANG, LANGUAGE),
 374  * gettext translation bindings and html header information.
 375  *
 376  * Function returns error codes, if there is some fatal error.
 377  *  0 = no error,
 378  *  1 = mbstring support is not present,
 379  *  2 = mbstring support is not present, user's translation reverted to en_US.
 380  *
 381  * @param string $sm_language  Translation used by user's interface
 382  * @param bool   $do_search    Use browser's preferred language detection functions.
 383  *                             Defaults to false.
 384  * @param bool   $default      Set $sm_language to $squirrelmail_default_language if
 385  *                             language detection fails or language is not set.
 386  *                             Defaults to false.
 387  * @param string $content_type The content type being served currently (OPTIONAL;
 388  *                             if not specified, defaults to whatever the template
 389  *                             set that is in use has defined).
 390  *
 391  * @return int function execution error codes.
 392  *
 393  */
 394 function set_up_language($sm_language, $do_search=false,
 395                          $default=false, $content_type='') {
 396
 397     static $SetupAlready = 0;
 398     global $use_gettext, $languages, $oTemplate,
 399            $squirrelmail_language, $squirrelmail_default_language, $default_charset,
 400            $sm_notAlias, $username, $data_dir;
 401
 402     if ($SetupAlready) {
 403         return;
 404     }
 405
 406     $SetupAlready = TRUE;
 407     sqgetGlobalVar('HTTP_ACCEPT_LANGUAGE',  $accept_lang, SQ_SERVER);
 408
 409     // grab content type if needed
 410     //
 411     if (empty($content_type)) $content_type = $oTemplate->get_content_type();
 412
 413     /**
 414      * If function is asked to detect preferred language
 415      *  OR squirrelmail default language is set to empty string
 416      *    AND
 417      * squirrelmail language ($sm_language) is empty string
 418      * (not set in user's prefs and no cookie with language info)
 419      *    AND
 420      * browser provides list of preferred languages
 421      *  THEN
 422      * get preferred language from HTTP_ACCEPT_LANGUAGE header
 423      */
 424     if (($do_search || empty($squirrelmail_default_language)) &&
 425         ! $sm_language &&
 426         isset($accept_lang)) {
 427         // TODO: use more than one language, if first language is not available
 428         // FIXME: function assumes that string contains two or more characters.
 429         // FIXME: some languages use 5 chars
 430         $sm_language = substr($accept_lang, 0, 2);
 431     }
 432
 433     /**
 434      * If language preference is not set OR script asks to use default language
 435      *  AND
 436      * default squirrelmail language is not set to empty string
 437      *  THEN
 438      * use default squirrelmail language value from configuration.
 439      */
 440     if ((!$sm_language||$default) &&
 441         ! empty($squirrelmail_default_language)) {
 442         $squirrelmail_language = $squirrelmail_default_language;
 443         $sm_language = $squirrelmail_default_language;
 444     }
 445
 446     /** provide failsafe language when detection fails */
 447     if (! $sm_language) $sm_language='en_US';
 448
 449     $sm_notAlias = $sm_language;
 450
 451     // Catching removed translation
 452     // System reverts to English translation if user prefs contain translation
 453     // that is not available in $languages array
 454     if (!isset($languages[$sm_notAlias])) {
 455         $sm_notAlias="en_US";
 456     }
 457
 458     while (isset($languages[$sm_notAlias]['ALIAS'])) {
 459         $sm_notAlias = $languages[$sm_notAlias]['ALIAS'];
 460     }
 461
 462     if ( isset($sm_language) &&
 463          $use_gettext &&
 464          $sm_language != '' &&
 465          isset($languages[$sm_notAlias]['CHARSET']) ) {
 466         sq_bindtextdomain( 'squirrelmail', SM_PATH . 'locale/' );
 467         sq_textdomain( 'squirrelmail' );
 468
 469         // Use LOCALE key, if it is set.
 470         if (isset($languages[$sm_notAlias]['LOCALE'])){
 471             $longlocale=$languages[$sm_notAlias]['LOCALE'];
 472         } else {
 473             $longlocale=$sm_notAlias;
 474         }
 475
 476         // try setting locale
 477         $retlocale=sq_setlocale(LC_ALL, $longlocale);
 478
 479         // check if locale is set and assign that locale to $longlocale
 480         // in order to use it in putenv calls.
 481         if (! is_bool($retlocale)) {
 482             $longlocale=$retlocale;
 483         } elseif (is_array($longlocale)) {
 484             // setting of all locales failed.
 485             // we need string instead of array used in LOCALE key.
 486             $longlocale=$sm_notAlias;
 487         }
 488
 489         if ( !((bool)ini_get('safe_mode')) &&
 490              getenv( 'LC_ALL' ) != $longlocale ) {
 491             putenv( "LC_ALL=$longlocale" );
 492             putenv( "LANG=$longlocale" );
 493             putenv( "LANGUAGE=$longlocale" );
 494             putenv( "LC_NUMERIC=C" );
 495             if ($sm_notAlias=='tr_TR') putenv( "LC_CTYPE=C" );
 496         }
 497         // Workaround for plugins that use numbers with floating point
 498         // It might be removed if plugins use correct decimal delimiters
 499         // according to locale settings.
 500         setlocale(LC_NUMERIC, 'C');
 501         // Workaround for specific Turkish strtolower/strtoupper rules.
 502         // Many functions expect English conversion rules.
 503         if ($sm_notAlias=='tr_TR') setlocale(LC_CTYPE,'C');
 504
 505         /**
 506          * Set text direction/alignment variables
 507          * When language environment is setup, scripts can use these globals
 508          * without accessing $languages directly and making checks for optional
 509          * array key.
 510          */
 511         global $text_direction, $left_align, $right_align;
 512         if (isset($languages[$sm_notAlias]['DIR']) &&
 513             $languages[$sm_notAlias]['DIR'] == 'rtl') {
 514             /**
 515              * Text direction
 516              * @global string $text_direction
 517              */
 518             $text_direction='rtl';
 519             /**
 520              * Left alignment
 521              * @global string $left_align
 522              */
 523             $left_align='right';
 524             /**
 525              * Right alignment
 526              * @global string $right_align
 527              */
 528             $right_align='left';
 529         } else {
 530             $text_direction='ltr';
 531             $left_align='left';
 532             $right_align='right';
 533         }
 534
 535         $squirrelmail_language = $sm_notAlias;
 536         if ($squirrelmail_language == 'ja_JP') {
 537             $oTemplate->header ('Content-Type: ' . $content_type . '; charset=EUC-JP');
 538             if (!function_exists('mb_internal_encoding')) {
 539                 // Error messages can't be displayed here
 540                 $error = 1;
 541                 // Revert to English if possible.
 542                 if (function_exists('setPref')  && $username!='' && $data_dir!="") {
 543                     setPref($data_dir, $username, 'language', "en_US");
 544                     $error = 2;
 545                 }
 546                 // stop further execution in order not to get php errors on mb_internal_encoding().
 547                 return $error;
 548             }
 549             if (function_exists('mb_language')) {
 550                 mb_language('Japanese');
 551             }
 552             mb_internal_encoding('EUC-JP');
 553             mb_http_output('pass');
 554         } elseif ($squirrelmail_language == 'en_US') {
 555             $oTemplate->header( 'Content-Type: ' . $content_type . '; charset=' . $default_charset );
 556         } else {
 557             $oTemplate->header( 'Content-Type: ' . $content_type . '; charset=' . $languages[$sm_notAlias]['CHARSET'] );
 558         }
 559         /**
 560          * mbstring.func_overload fix (#929644).
 561          *
 562          * php mbstring extension can replace standard string functions with their multibyte
 563          * equivalents. See http://www.php.net/ref.mbstring#mbstring.overload. This feature
 564          * was added in php v.4.2.0
 565          *
 566          * Some SquirrelMail functions work with 8bit strings in bytes. If interface is forced
 567          * to use mbstring functions and mbstring internal encoding is set to multibyte charset,
 568          * interface can't trust regular string functions. Due to mbstring overloading design
 569          * limits php scripts can't control this setting.
 570          *
 571          * This hack should fix some issues related to 8bit strings in passwords. Correct fix is
 572          * to disable mbstring overloading. Japanese translation uses different internal encoding.
 573          */
 574         if ($squirrelmail_language != 'ja_JP' &&
 575             function_exists('mb_internal_encoding') &&
 576             check_php_version(4,2,0) &&
 577             (int)ini_get('mbstring.func_overload')!=0) {
 578             mb_internal_encoding('pass');
 579         }
 580     }
 581     return 0;
 582 }
 583
 584 /**
 585  * Sets default_charset variable according to the one that is used by user's translations.
 586  *
 587  * Function changes global $default_charset variable in order to be sure, that it
 588  * contains charset used by user's translation. Sanity of $squirrelmail_language
 589  * and $default_charset combination is also tested.
 590  *
 591  * There can be a $default_charset setting in the
 592  * config.php file, but the user may have a different language
 593  * selected for a user interface. This function checks the
 594  * language selected by the user and tags the outgoing messages
 595  * with the appropriate charset corresponding to the language
 596  * selection. This is "more right" (tm), than just stamping the
 597  * message blindly with the system-wide $default_charset.
 598  */
 599 function set_my_charset(){
 600     global $data_dir, $username, $default_charset, $languages, $squirrelmail_language;
 601
 602     $my_language = getPref($data_dir, $username, 'language');
 603     if (!$my_language) {
 604         $my_language = $squirrelmail_language ;
 605     }
 606     // Catch removed translation
 607     if (!isset($languages[$my_language])) {
 608         $my_language="en_US";
 609     }
 610     while (isset($languages[$my_language]['ALIAS'])) {
 611         $my_language = $languages[$my_language]['ALIAS'];
 612     }
 613     $my_charset = $languages[$my_language]['CHARSET'];
 614     if ($my_language!='en_US') {
 615         $default_charset = $my_charset;
 616     }
 617 }
 618
 619 /**
 620  * Replaces non-braking spaces inserted by some browsers with regular space
 621  *
 622  * This function can be used to replace non-braking space symbols
 623  * that are inserted in forms by some browsers instead of normal
 624  * space symbol.
 625  *
 626  * @param string $string Text that needs to be cleaned
 627  * @param string $charset Charset used in text
 628  * @return string Cleaned text
 629  */
 630 function cleanup_nbsp($string,$charset) {
 631
 632   // reduce number of case statements
 633   if (stristr('iso-8859-',substr($charset,0,9))){
 634     $output_charset="iso-8859-x";
 635   }
 636   if (stristr('windows-125',substr($charset,0,11))){
 637     $output_charset="cp125x";
 638   }
 639   if (stristr('koi8',substr($charset,0,4))){
 640     $output_charset="koi8-x";
 641   }
 642   if (! isset($output_charset)){
 643     $output_charset=strtolower($charset);
 644   }
 645
 646 // where is non-braking space symbol
 647 switch($output_charset):
 648  case "iso-8859-x":
 649  case "cp125x":
 650  case "iso-2022-jp":
 651   $nbsp="\xA0";
 652   break;
 653  case "koi8-x":
 654    $nbsp="\x9A";
 655    break;
 656  case "utf-8":
 657    $nbsp="\xC2\xA0";
 658    break;
 659  default:
 660    // don't change string if charset is unmatched
 661    return $string;
 662 endswitch;
 663
 664 // return space instead of non-braking space.
 665  return str_replace($nbsp,' ',$string);
 666 }
 667
 668 /**
 669  * Function informs if it is safe to convert given charset to the one that is used by user.
 670  *
 671  * It is safe to use conversion only if user uses utf-8 encoding and when
 672  * converted charset is similar to the one that is used by user.
 673  *
 674  * @param string $input_charset Charset of text that needs to be converted
 675  * @return bool is it possible to convert to user's charset
 676  */
 677 function is_conversion_safe($input_charset) {
 678     global $languages, $sm_notAlias, $default_charset, $lossy_encoding;
 679
 680     if (isset($lossy_encoding) && $lossy_encoding )
 681         return true;
 682
 683     // convert to lower case
 684     $input_charset = strtolower($input_charset);
 685
 686     // Is user's locale Unicode based ?
 687     if ( $default_charset == "utf-8" ) {
 688         return true;
 689     }
 690
 691     // Charsets that are similar
 692     switch ($default_charset) {
 693     case "windows-1251":
 694         if ( $input_charset == "iso-8859-5" ||
 695              $input_charset == "koi8-r" ||
 696              $input_charset == "koi8-u" ) {
 697             return true;
 698         } else {
 699             return false;
 700         }
 701     case "windows-1257":
 702         if ( $input_charset == "iso-8859-13" ||
 703              $input_charset == "iso-8859-4" ) {
 704             return true;
 705         } else {
 706             return false;
 707         }
 708     case "iso-8859-4":
 709         if ( $input_charset == "iso-8859-13" ||
 710              $input_charset == "windows-1257" ) {
 711             return true;
 712         } else {
 713             return false;
 714         }
 715     case "iso-8859-5":
 716         if ( $input_charset == "windows-1251" ||
 717              $input_charset == "koi8-r" ||
 718              $input_charset == "koi8-u" ) {
 719             return true;
 720         } else {
 721             return false;
 722         }
 723     case "iso-8859-13":
 724         if ( $input_charset == "iso-8859-4" ||
 725              $input_charset == "windows-1257" ) {
 726             return true;
 727         } else {
 728             return false;
 729         }
 730     case "koi8-r":
 731         if ( $input_charset == "windows-1251" ||
 732              $input_charset == "iso-8859-5" ||
 733              $input_charset == "koi8-u" ) {
 734             return true;
 735         } else {
 736             return false;
 737         }
 738     case "koi8-u":
 739         if ( $input_charset == "windows-1251" ||
 740              $input_charset == "iso-8859-5" ||
 741              $input_charset == "koi8-r" ) {
 742             return true;
 743         } else {
 744             return false;
 745         }
 746     default:
 747         return false;
 748     }
 749 }
 750
 751 /**
 752  * Converts html character entities to numeric entities
 753  *
 754  * SquirrelMail encoding functions work only with numeric entities.
 755  * This function fixes issues with decoding functions that might convert
 756  * some symbols to character entities. Issue is specific to PHP recode
 757  * extension decoding. Function is used internally in charset_convert()
 758  * function.
 759  * @param string $str string that might contain html character entities
 760  * @return string string with character entities converted to decimals.
 761  * @since 1.5.2
 762  */
 763 function sqi18n_convert_entities($str) {
 764
 765     $entities = array(
 766         // Latin 1
 767         '&nbsp;'   => '&#160;',
 768         '&iexcl;'  => '&#161;',
 769         '&cent;'   => '&#162;',
 770         '&pound;'  => '&#163;',
 771         '&curren;' => '&#164;',
 772         '&yen;'    => '&#165;',
 773         '&brvbar;' => '&#166;',
 774         '&sect;'   => '&#167;',
 775         '&uml;'    => '&#168;',
 776         '&copy;'   => '&#169;',
 777         '&ordf;'   => '&#170;',
 778         '&laquo;'  => '&#171;',
 779         '&not;'    => '&#172;',
 780         '&shy;'    => '&#173;',
 781         '&reg;'    => '&#174;',
 782         '&macr;'   => '&#175;',
 783         '&deg;'    => '&#176;',
 784         '&plusmn;' => '&#177;',
 785         '&sup2;'   => '&#178;',
 786         '&sup3;'   => '&#179;',
 787         '&acute;'  => '&#180;',
 788         '&micro;'  => '&#181;',
 789         '&para;'   => '&#182;',
 790         '&middot;' => '&#183;',
 791         '&cedil;'  => '&#184;',
 792         '&sup1;'   => '&#185;',
 793         '&ordm;'   => '&#186;',
 794         '&raquo;'  => '&#187;',
 795         '&frac14;' => '&#188;',
 796         '&frac12;' => '&#189;',
 797         '&frac34;' => '&#190;',
 798         '&iquest;' => '&#191;',
 799         '&Agrave;' => '&#192;',
 800         '&Aacute;' => '&#193;',
 801         '&Acirc;'  => '&#194;',
 802         '&Atilde;' => '&#195;',
 803         '&Auml;'   => '&#196;',
 804         '&Aring;'  => '&#197;',
 805         '&AElig;'  => '&#198;',
 806         '&Ccedil;' => '&#199;',
 807         '&Egrave;' => '&#200;',
 808         '&Eacute;' => '&#201;',
 809         '&Ecirc;'  => '&#202;',
 810         '&Euml;'   => '&#203;',
 811         '&Igrave;' => '&#204;',
 812         '&Iacute;' => '&#205;',
 813         '&Icirc;'  => '&#206;',
 814         '&Iuml;'   => '&#207;',
 815         '&ETH;'    => '&#208;',
 816         '&Ntilde;' => '&#209;',
 817         '&Ograve;' => '&#210;',
 818         '&Oacute;' => '&#211;',
 819         '&Ocirc;'  => '&#212;',
 820         '&Otilde;' => '&#213;',
 821         '&Ouml;'   => '&#214;',
 822         '&times;'  => '&#215;',
 823         '&Oslash;' => '&#216;',
 824         '&Ugrave;' => '&#217;',
 825         '&Uacute;' => '&#218;',
 826         '&Ucirc;'  => '&#219;',
 827         '&Uuml;'   => '&#220;',
 828         '&Yacute;' => '&#221;',
 829         '&THORN;'  => '&#222;',
 830         '&szlig;'  => '&#223;',
 831         '&agrave;' => '&#224;',
 832         '&aacute;' => '&#225;',
 833         '&acirc;'  => '&#226;',
 834         '&atilde;' => '&#227;',
 835         '&auml;'   => '&#228;',
 836         '&aring;'  => '&#229;',
 837         '&aelig;'  => '&#230;',
 838         '&ccedil;' => '&#231;',
 839         '&egrave;' => '&#232;',
 840         '&eacute;' => '&#233;',
 841         '&ecirc;'  => '&#234;',
 842         '&euml;'   => '&#235;',
 843         '&igrave;' => '&#236;',
 844         '&iacute;' => '&#237;',
 845         '&icirc;'  => '&#238;',
 846         '&iuml;'   => '&#239;',
 847         '&eth;'    => '&#240;',
 848         '&ntilde;' => '&#241;',
 849         '&ograve;' => '&#242;',
 850         '&oacute;' => '&#243;',
 851         '&ocirc;'  => '&#244;',
 852         '&otilde;' => '&#245;',
 853         '&ouml;'   => '&#246;',
 854         '&divide;' => '&#247;',
 855         '&oslash;' => '&#248;',
 856         '&ugrave;' => '&#249;',
 857         '&uacute;' => '&#250;',
 858         '&ucirc;'  => '&#251;',
 859         '&uuml;'   => '&#252;',
 860         '&yacute;' => '&#253;',
 861         '&thorn;'  => '&#254;',
 862         '&yuml;'   => '&#255;',
 863         // Latin Extended-A
 864         '&OElig;'  => '&#338;',
 865         '&oelig;'  => '&#339;',
 866         '&Scaron;' => '&#352;',
 867         '&scaron;' => '&#353;',
 868         '&Yuml;'   => '&#376;',
 869         // Spacing Modifier Letters
 870         '&circ;'   => '&#710;',
 871         '&tilde;'  => '&#732;',
 872         // General Punctuation
 873         '&ensp;'   => '&#8194;',
 874         '&emsp;'   => '&#8195;',
 875         '&thinsp;' => '&#8201;',
 876         '&zwnj;'   => '&#8204;',
 877         '&zwj;'    => '&#8205;',
 878         '&lrm;'    => '&#8206;',
 879         '&rlm;'    => '&#8207;',
 880         '&ndash;'  => '&#8211;',
 881         '&mdash;'  => '&#8212;',
 882         '&lsquo;'  => '&#8216;',
 883         '&rsquo;'  => '&#8217;',
 884         '&sbquo;'  => '&#8218;',
 885         '&ldquo;'  => '&#8220;',
 886         '&rdquo;'  => '&#8221;',
 887         '&bdquo;'  => '&#8222;',
 888         '&dagger;' => '&#8224;',
 889         '&Dagger;' => '&#8225;',
 890         '&permil;' => '&#8240;',
 891         '&lsaquo;' => '&#8249;',
 892         '&rsaquo;' => '&#8250;',
 893         '&euro;'   => '&#8364;',
 894         // Latin Extended-B
 895         '&fnof;' => '&#402;',
 896         // Greek
 897         '&Alpha;'  => '&#913;',
 898         '&Beta;'   => '&#914;',
 899         '&Gamma;'  => '&#915;',
 900         '&Delta;'  => '&#916;',
 901         '&Epsilon;' => '&#917;',
 902         '&Zeta;'   => '&#918;',
 903         '&Eta;'    => '&#919;',
 904         '&Theta;'  => '&#920;',
 905         '&Iota;'   => '&#921;',
 906         '&Kappa;'  => '&#922;',
 907         '&Lambda;' => '&#923;',
 908         '&Mu;'     => '&#924;',
 909         '&Nu;'     => '&#925;',
 910         '&Xi;'     => '&#926;',
 911         '&Omicron;' => '&#927;',
 912         '&Pi;'     => '&#928;',
 913         '&Rho;'    => '&#929;',
 914         '&Sigma;'  => '&#931;',
 915         '&Tau;'    => '&#932;',
 916         '&Upsilon;' => '&#933;',
 917         '&Phi;'    => '&#934;',
 918         '&Chi;'    => '&#935;',
 919         '&Psi;'    => '&#936;',
 920         '&Omega;'  => '&#937;',
 921         '&alpha;'  => '&#945;',
 922         '&beta;'   => '&#946;',
 923         '&gamma;'  => '&#947;',
 924         '&delta;'  => '&#948;',
 925         '&epsilon;' => '&#949;',
 926         '&zeta;'   => '&#950;',
 927         '&eta;'    => '&#951;',
 928         '&theta;'  => '&#952;',
 929         '&iota;'   => '&#953;',
 930         '&kappa;'  => '&#954;',
 931         '&lambda;' => '&#955;',
 932         '&mu;'     => '&#956;',
 933         '&nu;'     => '&#957;',
 934         '&xi;'     => '&#958;',
 935         '&omicron;' => '&#959;',
 936         '&pi;'     => '&#960;',
 937         '&rho;'    => '&#961;',
 938         '&sigmaf;' => '&#962;',
 939         '&sigma;'  => '&#963;',
 940         '&tau;'    => '&#964;',
 941         '&upsilon;' => '&#965;',
 942         '&phi;'    => '&#966;',
 943         '&chi;'    => '&#967;',
 944         '&psi;'    => '&#968;',
 945         '&omega;'  => '&#969;',
 946         '&thetasym;' => '&#977;',
 947         '&upsih;'  => '&#978;',
 948         '&piv;'    => '&#982;',
 949         // General Punctuation
 950         '&bull;'   => '&#8226;',
 951         '&hellip;' => '&#8230;',
 952         '&prime;'  => '&#8242;',
 953         '&Prime;'  => '&#8243;',
 954         '&oline;'  => '&#8254;',
 955         '&frasl;'  => '&#8260;',
 956         // Letterlike Symbols
 957         '&weierp;' => '&#8472;',
 958         '&image;'  => '&#8465;',
 959         '&real;'   => '&#8476;',
 960         '&trade;'  => '&#8482;',
 961         '&alefsym;' => '&#8501;',
 962         // Arrows
 963         '&larr;'   => '&#8592;',
 964         '&uarr;'   => '&#8593;',
 965         '&rarr;'   => '&#8594;',
 966         '&darr;'   => '&#8595;',
 967         '&harr;'   => '&#8596;',
 968         '&crarr;'  => '&#8629;',
 969         '&lArr;'   => '&#8656;',
 970         '&uArr;'   => '&#8657;',
 971         '&rArr;'   => '&#8658;',
 972         '&dArr;'   => '&#8659;',
 973         '&hArr;'   => '&#8660;',
 974         // Mathematical Operators
 975         '&forall;' => '&#8704;',
 976         '&part;'   => '&#8706;',
 977         '&exist;'  => '&#8707;',
 978         '&empty;'  => '&#8709;',
 979         '&nabla;'  => '&#8711;',
 980         '&isin;'   => '&#8712;',
 981         '&notin;'  => '&#8713;',
 982         '&ni;'     => '&#8715;',
 983         '&prod;'   => '&#8719;',
 984         '&sum;'    => '&#8721;',
 985         '&minus;'  => '&#8722;',
 986         '&lowast;' => '&#8727;',
 987         '&radic;'  => '&#8730;',
 988         '&prop;'   => '&#8733;',
 989         '&infin;'  => '&#8734;',
 990         '&ang;'    => '&#8736;',
 991         '&and;'    => '&#8743;',
 992         '&or;'     => '&#8744;',
 993         '&cap;'    => '&#8745;',
 994         '&cup;'    => '&#8746;',
 995         '&int;'    => '&#8747;',
 996         '&there4;' => '&#8756;',
 997         '&sim;'    => '&#8764;',
 998         '&cong;'   => '&#8773;',
 999         '&asymp;'  => '&#8776;',
1000         '&ne;'     => '&#8800;',
1001         '&equiv;'  => '&#8801;',
1002         '&le;'     => '&#8804;',
1003         '&ge;'     => '&#8805;',
1004         '&sub;'    => '&#8834;',
1005         '&sup;'    => '&#8835;',
1006         '&nsub;'   => '&#8836;',
1007         '&sube;'   => '&#8838;',
1008         '&supe;'   => '&#8839;',
1009         '&oplus;'  => '&#8853;',
1010         '&otimes;' => '&#8855;',
1011         '&perp;'   => '&#8869;',
1012         '&sdot;'   => '&#8901;',
1013         // Miscellaneous Technical
1014         '&lceil;'  => '&#8968;',
1015         '&rceil;'  => '&#8969;',
1016         '&lfloor;' => '&#8970;',
1017         '&rfloor;' => '&#8971;',
1018         '&lang;'   => '&#9001;',
1019         '&rang;'   => '&#9002;',
1020         // Geometric Shapes
1021         '&loz;'    => '&#9674;',
1022         // Miscellaneous Symbols
1023         '&spades;' => '&#9824;',
1024         '&clubs;'  => '&#9827;',
1025         '&hearts;' => '&#9829;',
1026         '&diams;'  => '&#9830;');
1027
1028     $str = str_replace(array_keys($entities), array_values($entities), $str);
1029
1030     return $str;
1031 }
1032
1033 /* ------------------------------ main --------------------------- */
1034
1035 global $squirrelmail_language, $languages, $use_gettext;
1036
1037 if (! sqgetGlobalVar('squirrelmail_language',$squirrelmail_language,SQ_COOKIE)) {
1038     $squirrelmail_language = '';
1039 }
1040
1041 /**
1042  * Array specifies the available translations.
1043  *
1044  * Structure of array:
1045  * $languages['language']['variable'] = 'value'
1046  *
1047  * Possible 'variable' names:
1048  *  NAME      - Translation name in English
1049  *  CHARSET   - Encoding used by translation
1050  *  ALIAS     - used when 'language' is only short name and 'value' should provide long language name
1051  *  ALTNAME   - Native translation name. Any 8bit symbols must be html encoded.
1052  *  LOCALE    - Full locale name (in xx_XX.charset format). It can use array with more than one locale name since 1.4.5 and 1.5.1
1053  *  DIR       - Text direction. Used to define Right-to-Left languages. Possible values 'rtl' or 'ltr'. If undefined - defaults to 'ltr'
1054  *  XTRA_CODE - translation uses special functions. See doc/i18n.txt
1055  *
1056  * Each 'language' definition requires NAME+CHARSET or ALIAS variables.
1057  *
1058  * @name $languages
1059  * @global array $languages
1060  */
1061 $languages['en_US']['NAME']    = 'English';
1062 $languages['en_US']['CHARSET'] = 'iso-8859-1';
1063 $languages['en_US']['LOCALE']  = 'en_US.ISO8859-1';
1064 $languages['en']['ALIAS'] = 'en_US';
1065
1066 /**
1067  * Automatic translation loading from setup.php files.
1068  * Solution for bug. 1240889.
1069  * setup.php file can contain $languages array entries and XTRA_CODE functions.
1070  */
1071 if (is_dir(SM_PATH . 'locale') &&
1072     is_readable(SM_PATH . 'locale')) {
1073     $localedir = dir(SM_PATH . 'locale');
1074     while($lang_dir=$localedir->read()) {
1075         // remove trailing slash, if present
1076         if (substr($lang_dir,-1)=='/') {
1077             $lang_dir = substr($lang_dir,0,-1);
1078         }
1079         if ($lang_dir != '..' && $lang_dir != '.' && $lang_dir != 'CVS' &&
1080             $lang_dir != '.svn' && is_dir(SM_PATH.'locale/'.$lang_dir) &&
1081             file_exists(SM_PATH.'locale/'.$lang_dir.'/setup.php')) {
1082             include_once(SM_PATH.'locale/'.$lang_dir.'/setup.php');
1083         }
1084     }
1085     $localedir->close();
1086 }
1087
1088 /* Detect whether gettext is installed. */
1089 $gettext_flags = 0;
1090 if (function_exists('_')) {
1091     $gettext_flags += 1;
1092 }
1093 if (function_exists('bindtextdomain')) {
1094     $gettext_flags += 2;
1095 }
1096 if (function_exists('textdomain')) {
1097     $gettext_flags += 4;
1098 }
1099 if (function_exists('ngettext')) {
1100     $gettext_flags += 8;
1101 }
1102
1103 /* If gettext is fully loaded, cool */
1104 if ($gettext_flags == 15) {
1105     $use_gettext = true;
1106 }
1107
1108 /* If ngettext support is missing, load it */
1109 elseif ($gettext_flags == 7) {
1110     $use_gettext = true;
1111     // load internal ngettext functions
1112     include_once(SM_PATH . 'class/l10n.class.php');
1113     include_once(SM_PATH . 'functions/ngettext.php');
1114 }
1115
1116 /* If we can fake gettext, try that */
1117 elseif ($gettext_flags == 0) {
1118     $use_gettext = true;
1119     include_once(SM_PATH . 'functions/gettext.php');
1120 } else {
1121     /* Uh-ho.  A weird install */
1122     if (! $gettext_flags & 1) {
1123       /**
1124        * Function is used as replacement in broken installs
1125        * @ignore
1126        */
1127         function _($str) {
1128             return $str;
1129         }
1130     }
1131     if (! $gettext_flags & 2) {
1132       /**
1133        * Function is used as replacement in broken installs
1134        * @ignore
1135        */
1136         function bindtextdomain() {
1137             return;
1138         }
1139     }
1140     if (! $gettext_flags & 4) {
1141       /**
1142        * Function is used as replacemet in broken installs
1143        * @ignore
1144        */
1145         function textdomain() {
1146             return;
1147         }
1148     }
1149     if (! $gettext_flags & 8) {
1150         /**
1151          * Function is used as replacemet in broken installs
1152          * @ignore
1153          */
1154         function ngettext($str,$str2,$number) {
1155             if ($number>1) {
1156                 return $str2;
1157             } else {
1158                 return $str;
1159             }
1160         }
1161     }
1162     if (! function_exists('dgettext')) {
1163         /**
1164          * Replacement for broken setups.
1165          * @ignore
1166          */
1167         function dgettext($domain,$str) {
1168             return $str;
1169         }
1170     }
1171     if (! function_exists('dngettext')) {
1172         /**
1173          * Replacement for broken setups
1174          * @ignore
1175          */
1176         function dngettext($domain,$str1,$strn,$number) {
1177             return ($number==1 ? $str1 : $strn);
1178         }
1179     }
1180 }