X-Git-Url: https://vcs.fsf.org/?p=squirrelmail.git;a=blobdiff_plain;f=functions%2Fstrings.php;h=84a70af4025bb2b7bed8385b016d52f2d9de62ca;hp=49addede63768e15b53622b685a6e9bb7a716551;hb=98abf40863b317860ae2cabba04f97b69103f556;hpb=1d7f7b27de6d84f9f9da1c4fae3487bcb65fdd1c diff --git a/functions/strings.php b/functions/strings.php index 49addede..84a70af4 100644 --- a/functions/strings.php +++ b/functions/strings.php @@ -77,8 +77,6 @@ function sqMakeNewLine (&$str, $citeLevel, &$column) { /** * Checks for spaces in strings - only used if PHP doesn't have native ctype support * - * @author Tomas Kuliavas - * * You might be able to rewrite the function by adding short evaluation form. * * possible problems: @@ -127,7 +125,7 @@ function &sqBodyWrap (&$body, $wrap) { $outString = ''; // current column since the last newline in the outstring $outStringCol = 0; - $length = strlen($body); + $length = sq_strlen($body); // where we are in the original string $pos = 0; // the number of >>> citation markers we are currently at @@ -139,12 +137,12 @@ function &sqBodyWrap (&$body, $wrap) { // we're at the beginning of a line, get the new cite level $newCiteLevel = 0; - while (($pos < $length) && ($body{$pos} == '>')) { + while (($pos < $length) && (sq_substr($body,$pos,1) == '>')) { $newCiteLevel++; $pos++; // skip over any spaces interleaved among the cite markers - while (($pos < $length) && ($body{$pos} == ' ')) { + while (($pos < $length) && (sq_substr($body,$pos,1) == ' ')) { $pos++; @@ -157,8 +155,8 @@ function &sqBodyWrap (&$body, $wrap) { // special case: if this is a blank line then maintain it // (i.e. try to preserve original paragraph breaks) // unless they occur at the very beginning of the text - if (($body{$pos} == "\n" ) && (strlen($outString) != 0)) { - $outStringLast = $outString{strlen($outString) - 1}; + if ((sq_substr($body,$pos,1) == "\n" ) && (sq_strlen($outString) != 0)) { + $outStringLast = $outString{sq_strlen($outString) - 1}; if ($outStringLast != "\n") { $outString .= "\n"; } @@ -192,7 +190,7 @@ function &sqBodyWrap (&$body, $wrap) { } // find the next newline -- we don't want to go further than that - $nextNewline = strpos ($body, "\n", $pos); + $nextNewline = sq_strpos ($body, "\n", $pos); if ($nextNewline === FALSE) { $nextNewline = $length; } @@ -201,7 +199,7 @@ function &sqBodyWrap (&$body, $wrap) { // will work fine for this. Maybe revisit this later though // (for completeness more than anything else, I think) if ($citeLevel == 0) { - $outString .= substr ($body, $pos, ($nextNewline - $pos)); + $outString .= sq_substr ($body, $pos, ($nextNewline - $pos)); $outStringCol = $nextNewline - $pos; if ($nextNewline != $length) { sqMakeNewLine ($outString, 0, $outStringCol); @@ -217,7 +215,7 @@ function &sqBodyWrap (&$body, $wrap) { // the next newline while ($pos < $nextNewline) { // skip over initial spaces - while (($pos < $nextNewline) && (ctype_space ($body{$pos}))) { + while (($pos < $nextNewline) && (ctype_space (sq_substr($body,$pos,1)))) { $pos++; } // if this is a short line then just append it and continue outer loop @@ -225,24 +223,24 @@ function &sqBodyWrap (&$body, $wrap) { // if this is the final line in the input string then include // any trailing newlines // echo substr($body,$pos,$wrap). "
"; - if (($nextNewline + 1 == $length) && ($body{$nextNewline} == "\n")) { + if (($nextNewline + 1 == $length) && (sq_substr($body,$nextNewline,1) == "\n")) { $nextNewline++; } // trim trailing spaces $lastRealChar = $nextNewline; - while (($lastRealChar > $pos && $lastRealChar < $length) && (ctype_space ($body{$lastRealChar}))) { + while (($lastRealChar > $pos && $lastRealChar < $length) && (ctype_space (sq_substr($body,$lastRealChar,1)))) { $lastRealChar--; } // decide if appending the short string is what we want - if (($nextNewline < $length && $body{$nextNewline} == "\n") && + if (($nextNewline < $length && sq_substr($body,$nextNewline,1) == "\n") && isset($lastRealChar)) { $mypos = $pos; //check the first word: - while (($mypos < $length) && ($body{$mypos} == '>')) { + while (($mypos < $length) && (sq_substr($body,$mypos,1) == '>')) { $mypos++; // skip over any spaces interleaved among the cite markers - while (($mypos < $length) && ($body{$mypos} == ' ')) { + while (($mypos < $length) && (sq_substr($body,$mypos,1) == ' ')) { $mypos++; } } @@ -255,15 +253,15 @@ function &sqBodyWrap (&$body, $wrap) { } */ - $firstword = substr($body,$mypos,strpos($body,' ',$mypos) - $mypos); + $firstword = sq_substr($body,$mypos,sq_strpos($body,' ',$mypos) - $mypos); //if ($dowrap || $ldnspacecnt > 1 || ($firstword && ( if (!$smartwrap || $firstword && ( $firstword{0} == '-' || $firstword{0} == '+' || $firstword{0} == '*' || - $firstword{0} == strtoupper($firstword{0}) || + sq_substr($firstword,0,1) == sq_strtoupper(sq_substr($firstword,0,1)) || strpos($firstword,':'))) { - $outString .= substr($body,$pos,($lastRealChar - $pos+1)); + $outString .= sq_substr($body,$pos,($lastRealChar - $pos+1)); $outStringCol += ($lastRealChar - $pos); sqMakeNewLine($outString,$citeLevel,$outStringCol); $nextNewline++; @@ -274,7 +272,7 @@ function &sqBodyWrap (&$body, $wrap) { } - $outString .= substr ($body, $pos, ($lastRealChar - $pos + 1)); + $outString .= sq_substr ($body, $pos, ($lastRealChar - $pos + 1)); $outStringCol += ($lastRealChar - $pos); $pos = $nextNewline + 1; continue; @@ -293,7 +291,7 @@ function &sqBodyWrap (&$body, $wrap) { // start looking backwards for whitespace to break at. $breakPoint = $eol; - while (($breakPoint > $pos) && (! ctype_space ($body{$breakPoint}))) { + while (($breakPoint > $pos) && (! ctype_space (sq_substr($body,$breakPoint,1)))) { $breakPoint--; } @@ -326,13 +324,13 @@ function &sqBodyWrap (&$body, $wrap) { } // skip newlines or whitespace at the beginning of the string - $substring = substr ($body, $pos, ($breakPoint - $pos)); + $substring = sq_substr ($body, $pos, ($breakPoint - $pos)); $substring = rtrim ($substring); // do rtrim and ctype_space have the same ideas about whitespace? $outString .= $substring; - $outStringCol += strlen ($substring); + $outStringCol += sq_strlen ($substring); // advance past the whitespace which caused the wrap $pos = $breakPoint; - while (($pos < $length) && (ctype_space ($body{$pos}))) { + while (($pos < $length) && (ctype_space (sq_substr($body,$pos,1)))) { $pos++; } if ($pos < $length) { @@ -1065,6 +1063,7 @@ function sq_mb_list_encodings() { 'koi8-u', 'big5', 'gb2312', + 'gb18030', 'windows-1251', 'windows-1255', 'windows-1256', @@ -1094,8 +1093,9 @@ function sq_mb_list_encodings() { * Function returns number of characters in string. * * Returned number might be different from number of bytes in string, - * if $charset is multibyte charset. Currently only utf-8 charset is - * supported. + * if $charset is multibyte charset. Detection depends on mbstring + * functions. If mbstring does not support tested multibyte charset, + * vanilla string length function is used. * @param string $str string * @param string $charset charset * @since 1.5.1 @@ -1115,83 +1115,15 @@ function sq_strlen($str, $charset=''){ // lowercase charset name $charset=strtolower($charset); - // set initial returned length number - $real_length=0; + // Use mbstring only with listed charsets + $aList_of_mb_charsets=array('utf-8','big5','gb2312','gb18030','euc-jp','euc-cn','euc-tw','euc-kr'); // calculate string length according to charset - // function can be modulized same way we modulize decode/encode/htmlentities - if ($charset=='utf-8') { - if (function_exists('mb_strlen')) { - $real_length = mb_strlen($str,'utf-8'); - } else { - // function needs length of string in bytes. - // mbstring overloading might break it - $str_length=strlen($str); - $str_index=0; - while ($str_index < $str_length) { - // start of internal utf-8 multibyte character detection - if (preg_match("/[\xC0-\xDF]/",$str[$str_index]) && - isset($str[$str_index+1]) && - preg_match("/[\x80-\xBF]/",$str[$str_index+1])) { - // two byte utf-8 - $str_index=$str_index+2; - $real_length++; - } elseif (preg_match("/[\xE0-\xEF]/",$str[$str_index]) && - isset($str[$str_index+2]) && - preg_match("/[\x80-\xBF][\x80-\xBF]/",$str[$str_index+1].$str[$str_index+2])) { - // three byte utf-8 - $str_index=$str_index+3; - $real_length++; - } elseif (preg_match("/[\xF0-\xF7]/",$str[$str_index]) && - isset($str[$str_index+3]) && - preg_match("/[\x80-\xBF][\x80-\xBF][\x80-\xBF]/",$str[$str_index+1].$str[$str_index+2].$str[$str_index+3])) { - // four byte utf-8 - $str_index=$str_index+4; - $real_length++; - } elseif (preg_match("/[\xF8-\xFB]/",$str[$str_index]) && - isset($str[$str_index+4]) && - preg_match("/[\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF]/", - $str[$str_index+1].$str[$str_index+2].$str[$str_index+3].$str[$str_index+4])) { - // five byte utf-8 - $str_index=$str_index+5; - $real_length++; - } elseif (preg_match("/[\xFC-\xFD]/",$str[$str_index]) && - isset($str[$str_index+5]) && - preg_match("/[\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF]/", - $str[$str_index+1].$str[$str_index+2].$str[$str_index+3].$str[$str_index+4].$str[$str_index+5])) { - // six byte utf-8 - $str_index=$str_index+6; - $real_length++; - } else { - $str_index++; - $real_length++; - } - // end of internal utf-8 multibyte character detection - } - } - // end of utf-8 length detection - } elseif ($charset=='big5') { - // TODO: add big5 string length detection - $real_length=strlen($str); - } elseif ($charset=='gb2312') { - // TODO: add gb2312 string length detection - $real_length=strlen($str); - } elseif ($charset=='gb18030') { - // TODO: add gb18030 string length detection - $real_length=strlen($str); - } elseif ($charset=='euc-jp') { - // TODO: add euc-jp string length detection - $real_length=strlen($str); - } elseif ($charset=='euc-cn') { - // TODO: add euc-cn string length detection - $real_length=strlen($str); - } elseif ($charset=='euc-tw') { - // TODO: add euc-tw string length detection - $real_length=strlen($str); - } elseif ($charset=='euc-kr') { - // TODO: add euc-kr string length detection - $real_length=strlen($str); + if (in_array($charset,$aList_of_mb_charsets) && in_array($charset,sq_mb_list_encodings())) { + $real_length = mb_strlen($str,$charset); } else { + // own strlen detection code is removed because missing strpos, + // strtoupper and substr implementations break string wrapping. $real_length=strlen($str); } return $real_length; @@ -1229,5 +1161,93 @@ function sq_str_pad($string, $width, $pad, $padtype, $charset='') { } return $padded_string; } + +/** + * Wrapper that is used to switch between vanilla and multibyte substr + * functions. + * @param string $string + * @param integer $start + * @param integer $length + * @param string $charset + * @return string + * @since 1.5.1 + * @link http://www.php.net/substr + * @link http://www.php.net/mb_substr + */ +function sq_substr($string,$start,$length,$charset='auto') { + // use automatic charset detection, if function call asks for it + if ($charset=='auto') { + global $default_charset; + set_my_charset(); + $charset=$default_charset; + } + $charset = strtolower($charset); + if (function_exists('mb_internal_encoding') && + in_array($charset,sq_mb_list_encodings())) { + return mb_substr($string,$start,$length,$charset); + } + // TODO: add mbstring independent code + + // use vanilla string functions as last option + return substr($string,$start,$length); +} + +/** + * Wrapper that is used to switch between vanilla and multibyte strpos + * functions. + * @param string $haystack + * @param mixed $needle + * @param integer $offset + * @param string $charset + * @return string + * @since 1.5.1 + * @link http://www.php.net/strpos + * @link http://www.php.net/mb_strpos + */ +function sq_strpos($haystack,$needle,$offset,$charset='auto') { + // use automatic charset detection, if function call asks for it + if ($charset=='auto') { + global $default_charset; + set_my_charset(); + $charset=$default_charset; + } + $charset = strtolower($charset); + if (function_exists('mb_internal_encoding') && + in_array($charset,sq_mb_list_encodings())) { + return mb_strpos($haystack,$needle,$offset,$charset); + } + // TODO: add mbstring independent code + + // use vanilla string functions as last option + return strpos($haystack,$needle,$offset); +} + +/** + * Wrapper that is used to switch between vanilla and multibyte strtoupper + * functions. + * @param string $string + * @param string $charset + * @return string + * @since 1.5.1 + * @link http://www.php.net/strtoupper + * @link http://www.php.net/mb_strtoupper + */ +function sq_strtoupper($string,$charset='auto') { + // use automatic charset detection, if function call asks for it + if ($charset=='auto') { + global $default_charset; + set_my_charset(); + $charset=$default_charset; + } + $charset = strtolower($charset); + if (function_exists('mb_internal_encoding') && + in_array($charset,sq_mb_list_encodings())) { + return mb_strtoupper($string,$charset); + } + // TODO: add mbstring independent code + + // use vanilla string functions as last option + return strtoupper($string); +} $PHP_SELF = php_self(); ?> \ No newline at end of file