$ret = charset_decode_iso_8859_4 ($string);
} else if ($res[1] == '7') {
$ret = charset_decode_iso_8859_7 ($string);
+ } else if ($res[1] == '9') {
+ $ret = charset_decode_iso_8859_9 ($string);
} else if ($res[1] == '13') {
$ret = charset_decode_iso_8859_13 ($string);
} else if ($res[1] == '15') {
$ret = charset_decode_koi8r ($string);
} else if ($charset == 'windows-1251') {
$ret = charset_decode_windows_1251 ($string);
+ } else if ($charset == 'windows-1253') {
+ $ret = charset_decode_windows_1253 ($string);
+ } else if ($charset == 'windows-1254') {
+ $ret = charset_decode_windows_1254 ($string);
} else if ($charset == 'windows-1257') {
$ret = charset_decode_windows_1257 ($string);
+ } else if ($charset == 'utf-8') {
+ $ret = charset_decode_utf8 ($string);
} else {
$ret = $string;
}
if (! ereg("[\200-\377]", $string))
return $string;
- // latin capital letter a with ogonek 161 -> 260
$string = str_replace ("\241", 'Ą', $string);
- // latin small letter kra 162 -> 312
$string = str_replace ("\242", 'ĸ', $string);
- // latin capital letter r with cedilla 163 -> 342
$string = str_replace ("\243", 'Ŗ', $string);
- // latin capital letter i with tilde 165 -> 296
$string = str_replace ("\245", 'Ĩ', $string);
- // latin capital letter l with cedilla 166 -> 315
$string = str_replace ("\246", 'Ļ', $string);
- // latin capital letter s with caron 169 -> 352
$string = str_replace ("\251", 'Š', $string);
- // latin capital letter e with macron 170 -> 274
$string = str_replace ("\252", 'Ē', $string);
- // latin capital letter g with cedilla 171 -> 290
$string = str_replace ("\253", 'Ģ', $string);
- // latin capital letter t with stroke 172 -> 358
$string = str_replace ("\254", 'Ŧ', $string);
- // latin capital letter z with caron 174 -> 381
$string = str_replace ("\256", 'Ž', $string);
- // latin small letter a with ogonek 177 -> 261
$string = str_replace ("\261", 'ą', $string);
- // ogonek 178 -> 731
$string = str_replace ("\262", '˛', $string);
- // latin small letter r with cedilla 179 -> 343
$string = str_replace ("\263", 'ŗ', $string);
- // latin small letter i with tilde 181 -> 297
$string = str_replace ("\265", 'ĩ', $string);
- // latin small letter l with cedilla 182 -> 316
$string = str_replace ("\266", 'ļ', $string);
- // caron 183 -> 711
$string = str_replace ("\267", 'ˇ', $string);
- // latin small letter s with caron 185 -> 353
$string = str_replace ("\271", 'š', $string);
- // latin small letter e with macron 186 -> 275
$string = str_replace ("\272", 'ē', $string);
- // latin small letter g with cedilla 187 -> 291
$string = str_replace ("\273", 'ģ', $string);
- // latin small letter t with stroke 188 -> 359
$string = str_replace ("\274", 'ŧ', $string);
- // latin capital letter eng 189 -> 330
$string = str_replace ("\275", 'Ŋ', $string);
- // latin small letter z with caron 190 -> 382
$string = str_replace ("\276", 'ž', $string);
- // latin small letter eng 191 -> 331
$string = str_replace ("\277", 'ŋ', $string);
- // latin capital letter a with macron 192 -> 256
$string = str_replace ("\300", 'Ā', $string);
- // latin capital letter i with ogonek 199 -> 302
$string = str_replace ("\307", 'Į', $string);
- // latin capital letter c with caron 200 -> 268
$string = str_replace ("\310", 'Č', $string);
- // latin capital letter e with ogonek 202 -> 280
$string = str_replace ("\312", 'Ę', $string);
- // latin capital letter e with dot above 204 -> 278
$string = str_replace ("\314", 'Ė', $string);
- // latin capital letter i with macron 207 -> 298
$string = str_replace ("\317", 'Ī', $string);
- // latin capital letter d with stroke 208 -> 272
$string = str_replace ("\320", 'Đ', $string);
- // latin capital letter n with cedilla 209 -> 325
$string = str_replace ("\321", 'Ņ', $string);
- // latin capital letter o with macron 210 -> 332
$string = str_replace ("\322", 'Ō', $string);
- // latin capital letter k with cedilla 211 -> 310
$string = str_replace ("\323", 'Ķ', $string);
- // latin capital letter u with ogonek 217 -> 370
$string = str_replace ("\331", 'Ų', $string);
- // latin capital letter u with tilde 221 -> 360
$string = str_replace ("\335", 'Ũ', $string);
- // latin capital letter u with macron 222 -> 362
$string = str_replace ("\336", 'Ū', $string);
- // latin small letter a with macron 224 -> 257
$string = str_replace ("\340", 'ā', $string);
- // latin small letter i with ogonek 231 -> 303
$string = str_replace ("\347", 'į', $string);
- // latin small letter c with caron 232 -> 269
$string = str_replace ("\350", 'č', $string);
- // latin small letter e with ogonek 234 -> 281
$string = str_replace ("\352", 'ę', $string);
- // latin small letter e with dot above 236 -> 279
$string = str_replace ("\354", 'ė', $string);
- // latin small letter i with macron 239 -> 299
$string = str_replace ("\357", 'ī', $string);
- // latin small letter d with stroke 240 -> 273
$string = str_replace ("\360", 'đ', $string);
- // latin small letter n with cedilla 241 -> 326
$string = str_replace ("\361", 'ņ', $string);
- // latin small letter o with macron 242 -> 333
$string = str_replace ("\362", 'ō', $string);
- // latin small letter k with cedilla 243 -> 311
$string = str_replace ("\363", 'ķ', $string);
- // latin small letter u with ogonek 249 -> 371
$string = str_replace ("\371", 'ų', $string);
- // latin small letter u with tilde 253 -> 361
$string = str_replace ("\375", 'ũ', $string);
- // latin small letter u with macron 254 -> 363
$string = str_replace ("\376", 'ū', $string);
- // dot above 255 -> 729
$string = str_replace ("\377", '˙', $string);
// rest of charset is the same as ISO-8859-1
return $string;
}
+/*
+ ISOIEC 8859-9:1999 Latin Alphabet No. 5
+*/
+function charset_decode_iso_8859_9 ($string) {
+ global $default_charset, $languages, $sm_notAlias;
+
+ if (strtolower($default_charset) == 'iso-8859-9')
+ return $string;
+ if (strtolower($languages[$sm_notAlias]['CHARSET']) == 'iso-8859-9')
+ return $string;
+
+ /* Only do the slow convert if there are 8-bit characters */
+ if (! ereg("[\200-\377]", $string))
+ return $string;
+
+ // latin capital letter g with breve 208->286
+ $string = str_replace("\320", 'Ğ', $string);
+ // latin capital letter i with dot above 221->304
+ $string = str_replace("\335", 'İ', $string);
+ // latin capital letter s with cedilla 222->350
+ $string = str_replace("\336", 'Ş', $string);
+ // latin small letter g with breve 240->287
+ $string = str_replace("\360", 'ğ', $string);
+ // latin small letter dotless i 253->305
+ $string = str_replace("\375", 'ı', $string);
+ // latin small letter s with cedilla 254->351
+ $string = str_replace("\376", 'ş', $string);
+
+ // rest of charset is the same as ISO-8859-1
+ return (charset_decode_iso_8859_1($string));
+}
+
+
/*
ISO/IEC 8859-13:1998 Latin Alphabet No. 7 (Baltic Rim)
*/
if (! ereg("[\200-\377]", $string))
return $string;
- // right double quotation mark 161 -> 8221
$string = str_replace ("\241", '”', $string);
- // double low-9 quotation mark 165 -> 8222
$string = str_replace ("\245", '„', $string);
- // latin capital letter o with stroke 168 -> 216
$string = str_replace ("\250", 'Ø', $string);
- // latin capital letter r with cedilla 170 -> 342
$string = str_replace ("\252", 'Ŗ', $string);
- // latin capital letter ae 175 -> 198
$string = str_replace ("\257", 'Æ', $string);
- // left double quotation mark 180 -> 8220
$string = str_replace ("\264", '“', $string);
- // latin small letter o with stroke 184 -> 248
$string = str_replace ("\270", 'ø', $string);
- // latin small letter r with cedilla 186 -> 343
$string = str_replace ("\272", 'ŗ', $string);
- // latin small letter ae 191 -> 230
$string = str_replace ("\277", 'æ', $string);
- // latin capital letter a with ogonek 192 -> 260
$string = str_replace ("\300", 'Ą', $string);
- // latin capital letter i with ogonek 193 -> 302
$string = str_replace ("\301", 'Į', $string);
- // latin capital letter a with macron 194 -> 256
$string = str_replace ("\302", 'Ā', $string);
- // latin capital letter c with acute 195 -> 262
$string = str_replace ("\303", 'Ć', $string);
- // latin capital letter e with ogonek 198 -> 280
$string = str_replace ("\306", 'Ę', $string);
- // latin capital letter e with macron 199 -> 274
$string = str_replace ("\307", 'Ē', $string);
- // latin capital letter c with caron 200 -> 268
$string = str_replace ("\310", 'Č', $string);
- // latin capital letter z with acute 202 -> 377
$string = str_replace ("\312", 'Ź', $string);
- // latin capital letter e with dot above 203 -> 278
$string = str_replace ("\313", 'Ė', $string);
- // latin capital letter g with cedilla 204 -> 290
$string = str_replace ("\314", 'Ģ', $string);
- // latin capital letter k with cedilla 205 -> 310
$string = str_replace ("\315", 'Ķ', $string);
- // latin capital letter i with macron 206 -> 298
$string = str_replace ("\316", 'Ī', $string);
- // latin capital letter l with cedilla 207 -> 315
$string = str_replace ("\317", 'Ļ', $string);
- // latin capital letter s with caron 208 -> 352
$string = str_replace ("\320", 'Š', $string);
- // latin capital letter n with acute 209 -> 323
$string = str_replace ("\321", 'Ń', $string);
- // latin capital letter n with cedilla 210 -> 325
$string = str_replace ("\322", 'Ņ', $string);
- // latin capital letter o with macron 212 -> 332
$string = str_replace ("\324", 'Ō', $string);
- // latin capital letter u with ogonek 216 -> 370
$string = str_replace ("\330", 'Ų', $string);
- // latin capital letter l with stroke 217 -> 321
$string = str_replace ("\331", 'Ł', $string);
- // latin capital letter s with acute 218 -> 346
$string = str_replace ("\332", 'Ś', $string);
- // latin capital letter u with macron 219 -> 362
$string = str_replace ("\333", 'Ū', $string);
- // latin capital letter z with dot above 221 -> 379
$string = str_replace ("\335", 'Ż', $string);
- // latin capital letter z with caron 222 -> 381
$string = str_replace ("\336", 'Ž', $string);
- // latin small letter a with ogonek 224 -> 261
$string = str_replace ("\340", 'ą', $string);
- // latin small letter i with ogonek 225 -> 303
$string = str_replace ("\341", 'į', $string);
- // latin small letter a with macron 226 -> 257
$string = str_replace ("\342", 'ā', $string);
- // latin small letter c with acute 227 -> 263
$string = str_replace ("\343", 'ć', $string);
- // latin small letter e with ogonek 230 -> 281
$string = str_replace ("\346", 'ę', $string);
- // latin small letter e with macron 231 -> 275
$string = str_replace ("\347", 'ē', $string);
- // latin small letter c with caron 232 -> 269
$string = str_replace ("\350", 'č', $string);
- // latin small letter z with acute 234 -> 378
$string = str_replace ("\352", 'ź', $string);
- // latin small letter e with dot above 235 -> 279
$string = str_replace ("\353", 'ė', $string);
- // latin small letter g with cedilla 236 -> 291
$string = str_replace ("\354", 'ģ', $string);
- // latin small letter k with cedilla 237 -> 311
$string = str_replace ("\355", 'ķ', $string);
- // latin small letter i with macron 238 -> 299
$string = str_replace ("\356", 'ī', $string);
- // latin small letter l with cedilla 239 -> 316
$string = str_replace ("\357", 'ļ', $string);
- // latin small letter s with caron 240 -> 253
$string = str_replace ("\360", 'š', $string);
- // latin small letter n with acute 241 -> 324
$string = str_replace ("\361", 'ń', $string);
- // latin small letter n with cedilla 242 -> 326
$string = str_replace ("\362", 'ņ', $string);
- // latin small letter o with macron 244 -> 333
$string = str_replace ("\364", 'ō', $string);
- // latin small letter u with ogonek 248 -> 371
$string = str_replace ("\370", 'ų', $string);
- // latin small letter l with stroke 249 -> 322
$string = str_replace ("\371", 'ł', $string);
- // latin small letter s with acute 250 -> 347
$string = str_replace ("\372", 'ś', $string);
- // latin small letter u with macron 251 -> 363
$string = str_replace ("\373", 'ū', $string);
- // latin small letter z with dot above 253 -> 380
$string = str_replace ("\375", 'ż', $string);
- // latin small letter z with caron 254 -> 382
$string = str_replace ("\376", 'ž', $string);
- // right single quotation mark 255 -> 8217
$string = str_replace ("\377", '’', $string);
// rest of charset is the same as ISO-8859-1
}
/*
- windows-1257 (BaltRim)
+ windows-1254 (Turks)
*/
-function charset_decode_windows_1257 ($string) {
+function charset_decode_windows_1254 ($string) {
global $default_charset, $languages, $sm_notAlias;
- if (strtolower($default_charset) == 'windows-1257')
+ if (strtolower($default_charset) == 'windows-1254')
return $string;
- if (strtolower($languages[$sm_notAlias]['CHARSET']) == 'windows-1257')
+ if (strtolower($languages[$sm_notAlias]['CHARSET']) == 'windows-1254')
return $string;
/* Only do the slow convert if there are 8-bit characters */
$string = str_replace("\200", '€', $string);
// Single low-9 quotation mark 130 -> 8218
$string = str_replace("\202", '‚', $string);
+ // latin small letter f with hook 131 -> 402
+ $string = str_replace("\203", 'ƒ', $string);
// Double low-9 quotation mark 132 -> 8222
$string = str_replace("\204", '„', $string);
// horizontal ellipsis 133 -> 8230
$string = str_replace("\206", '†', $string);
// double dagger 135 -> 8225
$string = str_replace("\207", '‡', $string);
+ // modifier letter circumflex accent 136->710
+ $string = str_replace("\210", 'ˆ', $string);
// per mille sign 137 -> 8240
$string = str_replace("\211", '‰', $string);
+ // latin capital letter s with caron 138 -> 352
+ $string = str_replace("\212", 'Š', $string);
// single left-pointing angle quotation mark 139 -> 8249
$string = str_replace("\213", '‹', $string);
- // diaeresis 141 -> 168
- $string = str_replace("\215", '¨', $string);
- // caron 142 -> 711
- $string = str_replace("\216", 'ˇ', $string);
- // cedilla 143 -> 184
- $string = str_replace("\217", '¸', $string);
+ // latin capital ligature oe 140 -> 338
+ $string = str_replace("\214", 'Œ', $string);
// left single quotation mark 145 -> 8216
$string = str_replace("\221", '‘', $string);
// right single quotation mark 146 -> 8217
$string = str_replace("\226", '–', $string);
// em dash 151 -> 8212
$string = str_replace("\227", '—', $string);
+ // small tilde 152 -> 732
+ $string = str_replace("\230", '˜', $string);
// trade mark sign 153 -> 8482
$string = str_replace("\231", '™', $string);
+ // latin small letter s with caron 154 -> 353
+ $string = str_replace("\232", 'š', $string);
// single right-pointing angle quotation mark 155 -> 8250
$string = str_replace("\233", '›', $string);
- // macron 157 -> 175
+ // latin small ligature oe 156 -> 339
+ $string = str_replace("\234", 'œ', $string);
+ // latin capital letter y with diaresis 159->376
+ $string = str_replace("\237", 'Ÿ', $string);
+ // latin capital letter g with breve 208->286
+ $string = str_replace("\320", 'Ğ', $string);
+ // latin capital letter i with dot above 221->304
+ $string = str_replace("\335", 'İ', $string);
+ // latin capital letter s with cedilla 222->350
+ $string = str_replace("\336", 'Ş', $string);
+ // latin small letter g with breve 240->287
+ $string = str_replace("\360", 'ğ', $string);
+ // latin small letter dotless i 253->305
+ $string = str_replace("\375", 'ı', $string);
+ // latin small letter s with cedilla 254->351
+ $string = str_replace("\376", 'ş', $string);
+
+ // Rest of charset is like iso-8859-1
+ return (charset_decode_iso_8859_1($string));
+}
+
+/*
+ windows-1253 (Greek)
+ */
+function charset_decode_windows_1253 ($string) {
+ global $languages, $sm_notAlias;
+
+ if (strtolower($languages[$sm_notAlias]['CHARSET']) == 'windows-1253')
+ return $string;
+
+ /* Only do the slow convert if there are 8-bit characters */
+ if (! ereg("[\200-\377]", $string))
+ return $string;
+
+ $string = str_replace("\200", '€', $string);
+ $string = str_replace("\202", '‚', $string);
+ $string = str_replace("\203", 'ƒ', $string);
+ $string = str_replace("\204", '„', $string);
+ $string = str_replace("\205", '…', $string);
+ $string = str_replace("\206", '†', $string);
+ $string = str_replace("\207", '‡', $string);
+ $string = str_replace("\211", '‰', $string);
+ $string = str_replace("\213", '‹', $string);
+ $string = str_replace("\221", '‘', $string);
+ $string = str_replace("\222", '’', $string);
+ $string = str_replace("\223", '“', $string);
+ $string = str_replace("\224", '”', $string);
+ $string = str_replace("\225", '•', $string);
+ $string = str_replace("\226", '–', $string);
+ $string = str_replace("\227", '—', $string);
+ $string = str_replace("\231", '™', $string);
+ $string = str_replace("\233", '›', $string);
+ $string = str_replace("\241", '΅', $string);
+ $string = str_replace("\242", 'Ά', $string);
+ $string = str_replace ("\257", '―', $string);
+ $string = str_replace("\264", '΄', $string);
+ $string = str_replace("\270", 'Έ', $string);
+ $string = str_replace ("\271", 'Ή', $string);
+ $string = str_replace ("\272", 'Ί', $string);
+ $string = str_replace ("\274", 'Ό', $string);
+ // cycle for 190-254 symbols
+ $string = preg_replace("/([\274-\376])/e","'&#' . (ord('\\1')+720);",$string);
+
+ // Rest of charset is like iso-8859-1
+ return (charset_decode_iso_8859_1($string));
+}
+
+
+/*
+ windows-1257 (BaltRim)
+ */
+function charset_decode_windows_1257 ($string) {
+ global $default_charset, $languages, $sm_notAlias;
+
+ if (strtolower($default_charset) == 'windows-1257')
+ return $string;
+ if (strtolower($languages[$sm_notAlias]['CHARSET']) == 'windows-1257')
+ return $string;
+
+ /* Only do the slow convert if there are 8-bit characters */
+ if (! ereg("[\200-\377]", $string))
+ return $string;
+
+ $string = str_replace("\200", '€', $string);
+ $string = str_replace("\202", '‚', $string);
+ $string = str_replace("\204", '„', $string);
+ $string = str_replace("\205", '…', $string);
+ $string = str_replace("\206", '†', $string);
+ $string = str_replace("\207", '‡', $string);
+ $string = str_replace("\211", '‰', $string);
+ $string = str_replace("\213", '‹', $string);
+ $string = str_replace("\215", '¨', $string);
+ $string = str_replace("\216", 'ˇ', $string);
+ $string = str_replace("\217", '¸', $string);
+ $string = str_replace("\221", '‘', $string);
+ $string = str_replace("\222", '’', $string);
+ $string = str_replace("\223", '“', $string);
+ $string = str_replace("\224", '”', $string);
+ $string = str_replace("\225", '•', $string);
+ $string = str_replace("\226", '–', $string);
+ $string = str_replace("\227", '—', $string);
+ $string = str_replace("\231", '™', $string);
+ $string = str_replace("\233", '›', $string);
$string = str_replace("\235", '¯', $string);
- // ogonek 158 -> 731
$string = str_replace("\236", '˛', $string);
- // latin capital letter o with stroke 168 -> 216
$string = str_replace ("\250", 'Ø', $string);
- // latin capital letter r with cedilla 170 -> 342
$string = str_replace ("\252", 'Ŗ', $string);
- // latin capital letter ae 175 -> 198
$string = str_replace ("\257", 'Æ', $string);
- // latin small letter o with stroke 184 -> 248
$string = str_replace ("\270", 'ø', $string);
- // latin small letter r with cedilla 186 -> 343
$string = str_replace ("\272", 'ŗ', $string);
- // latin small letter ae 191 -> 230
$string = str_replace ("\277", 'æ', $string);
- // latin capital letter a with ogonek 192 -> 260
$string = str_replace ("\300", 'Ą', $string);
- // latin capital letter i with ogonek 193 -> 302
$string = str_replace ("\301", 'Į', $string);
- // latin capital letter a with macron 194 -> 256
$string = str_replace ("\302", 'Ā', $string);
- // latin capital letter c with acute 195 -> 262
$string = str_replace ("\303", 'Ć', $string);
- // latin capital letter e with ogonek 198 -> 280
$string = str_replace ("\306", 'Ę', $string);
- // latin capital letter e with macron 199 -> 274
$string = str_replace ("\307", 'Ē', $string);
- // latin capital letter c with caron 200 -> 268
$string = str_replace ("\310", 'Č', $string);
- // latin capital letter z with acute 202 -> 377
$string = str_replace ("\312", 'Ź', $string);
- // latin capital letter e with dot above 203 -> 278
$string = str_replace ("\313", 'Ė', $string);
- // latin capital letter g with cedilla 204 -> 290
$string = str_replace ("\314", 'Ģ', $string);
- // latin capital letter k with cedilla 205 -> 310
$string = str_replace ("\315", 'Ķ', $string);
- // latin capital letter i with macron 206 -> 298
$string = str_replace ("\316", 'Ī', $string);
- // latin capital letter l with cedilla 207 -> 315
$string = str_replace ("\317", 'Ļ', $string);
- // latin capital letter s with caron 208 -> 352
$string = str_replace ("\320", 'Š', $string);
- // latin capital letter n with acute 209 -> 323
$string = str_replace ("\321", 'Ń', $string);
- // latin capital letter n with cedilla 210 -> 325
$string = str_replace ("\322", 'Ņ', $string);
- // latin capital letter o with macron 212 -> 332
$string = str_replace ("\324", 'Ō', $string);
- // latin capital letter u with ogonek 216 -> 370
$string = str_replace ("\330", 'Ų', $string);
- // latin capital letter l with stroke 217 -> 321
$string = str_replace ("\331", 'Ł', $string);
- // latin capital letter r with acute 218 -> 340
$string = str_replace ("\332", 'Ŕ', $string);
- // latin capital letter u with macron 219 -> 362
$string = str_replace ("\333", 'Ū', $string);
- // latin capital letter z with dot above 221 -> 379
$string = str_replace ("\335", 'Ż', $string);
- // latin capital letter z with caron 222 -> 381
$string = str_replace ("\336", 'Ž', $string);
- // latin small letter a with ogonek 224 -> 261
$string = str_replace ("\340", 'ą', $string);
- // latin small letter i with ogonek 225 -> 303
$string = str_replace ("\341", 'į', $string);
- // latin small letter a with macron 226 -> 257
$string = str_replace ("\342", 'ā', $string);
- // latin small letter c with acute 227 -> 263
$string = str_replace ("\343", 'ć', $string);
- // latin small letter e with ogonek 230 -> 281
$string = str_replace ("\346", 'ę', $string);
- // latin small letter e with macron 231 -> 275
$string = str_replace ("\347", 'ē', $string);
- // latin small letter c with caron 232 -> 269
$string = str_replace ("\350", 'č', $string);
- // latin small letter z with acute 234 -> 378
$string = str_replace ("\352", 'ź', $string);
- // latin small letter e with dot above 235 -> 279
$string = str_replace ("\353", 'ė', $string);
- // latin small letter g with cedilla 236 -> 291
$string = str_replace ("\354", 'ģ', $string);
- // latin small letter k with cedilla 237 -> 311
$string = str_replace ("\355", 'ķ', $string);
- // latin small letter i with macron 238 -> 299
$string = str_replace ("\356", 'ī', $string);
- // latin small letter l with cedilla 239 -> 316
$string = str_replace ("\357", 'ļ', $string);
- // latin small letter s with caron 240 -> 253
$string = str_replace ("\360", 'š', $string);
- // latin small letter n with acute 241 -> 324
$string = str_replace ("\361", 'ń', $string);
- // latin small letter n with cedilla 242 -> 326
$string = str_replace ("\362", 'ņ', $string);
- // latin small letter o with macron 244 -> 333
$string = str_replace ("\364", 'ō', $string);
- // latin small letter u with ogonek 248 -> 371
$string = str_replace ("\370", 'ų', $string);
- // latin small letter l with stroke 249 -> 322
$string = str_replace ("\371", 'ł', $string);
- // latin small letter s with acute 250 -> 347
$string = str_replace ("\372", 'ś', $string);
- // latin small letter u with macron 251 -> 363
$string = str_replace ("\373", 'ū', $string);
- // latin small letter z with dot above 253 -> 380
$string = str_replace ("\375", 'ż', $string);
- // latin small letter z with caron 254 -> 382
$string = str_replace ("\376", 'ž', $string);
- // dot above 255 -> 729
$string = str_replace ("\377", '˙', $string);
// Rest of charset is like iso-8859-1
// There is no en_EN! There is en_US, en_BR, en_AU, and so forth,
// but who cares about !US, right? Right? :)
+$languages['el_GR']['NAME'] = 'Greek';
+$languages['el_GR']['CHARSET'] = 'iso-8859-7';
+$languages['el']['ALIAS'] = 'el_GR';
+
$languages['en_US']['NAME'] = 'English';
$languages['en_US']['CHARSET'] = 'iso-8859-1';
$languages['en']['ALIAS'] = 'en_US';
}
}
+function charset_decode_utf8 ($string) {
+/*
+ Every decoded character consists of n bytes. First byte is octal
+ 300-375, other bytes - always octals 200-277.
+
+ \a\b characters are decoded to html code octdec(a-300)*64 + octdec(b-200)
+ \a\b\c characters are decoded to html code octdec(a-340)*64*64 + octdec(b-200)*64 + octdec(c-200)
+
+ decoding cycle is unfinished. please test and report problems to tokul@users.sourceforge.net
+*/
+ global $default_charset, $languages, $sm_notAlias;
+
+ if (strtolower($default_charset) == 'utf-8')
+ return $string;
+ if (strtolower($languages[$sm_notAlias]['CHARSET']) == 'utf-8')
+ return $string;
+
+ /* Only do the slow convert if there are 8-bit characters */
+ if (! ereg("[\200-\377]", $string))
+ return $string;
+
+ // decode three byte unicode characters
+ $string = preg_replace("/([\340-\357])([\200-\277])([\200-\277])/e",
+ "'&#'.((ord('\\1')-224)*4096+(ord('\\2')-128)*64+(ord('\\3')-128)).';'",
+ $string);
+
+ // decode two byte unicode characters
+ $string = preg_replace("/([\300-\337])([\200-\277])/e",
+ "'&#'.((ord('\\1')-192)*64+(ord('\\2')-128)).';'",
+ $string);
+
+ return $string;
+}
/*
* Japanese charset extra function