X-Git-Url: https://vcs.fsf.org/?a=blobdiff_plain;f=functions%2Fstrings.php;h=49addede63768e15b53622b685a6e9bb7a716551;hb=480feeacdef8ef7790e1b940901135520f4ea39d;hp=27dfa8a1f96cecbe28cfc061369296c20ab1bf5a;hpb=c9d61baf815700ed32427bf46fed3a5cc093216a;p=squirrelmail.git diff --git a/functions/strings.php b/functions/strings.php index 27dfa8a1..49addede 100644 --- a/functions/strings.php +++ b/functions/strings.php @@ -3,11 +3,11 @@ /** * strings.php * - * Copyright (c) 1999-2004 The SquirrelMail Project Team + * Copyright (c) 1999-2005 The SquirrelMail Project Team * Licensed under the GNU GPL. For full terms see the file COPYING. * * This code provides various string manipulation functions that are - * used by the rest of the Squirrelmail code. + * used by the rest of the SquirrelMail code. * * @version $Id$ * @package squirrelmail @@ -74,6 +74,35 @@ function sqMakeNewLine (&$str, $citeLevel, &$column) { } } +/** + * Checks for spaces in strings - only used if PHP doesn't have native ctype support + * + * @author Tomas Kuliavas + * + * You might be able to rewrite the function by adding short evaluation form. + * + * possible problems: + * - iso-2022-xx charsets - hex 20 might be part of other symbol. I might + * be wrong. 0x20 is not used in iso-2022-jp. I haven't checked iso-2022-kr + * and iso-2022-cn mappings. + * + * - no-break space ( ) - it is 8bit symbol, that depends on charset. + * there are at least three different charset groups that have nbsp in + * different places. + * + * I don't see any charset/nbsp options in php ctype either. + * + * @param string $string tested string + * @return bool true when only whitespace symbols are present in test string + */ +function sm_ctype_space($string) { + if ( preg_match('/^[\x09-\x0D]|^\x20/', $string) || $string=='') { + return true; + } else { + return false; + } +} + /** * Wraps text at $wrap characters. While sqWordWrap takes * a single line of text and wraps it, this function works @@ -87,6 +116,13 @@ function sqMakeNewLine (&$str, $citeLevel, &$column) { * @return string the wrapped text */ function &sqBodyWrap (&$body, $wrap) { + //check for ctype support, and fake it if it doesn't exist + if (!function_exists('ctype_space')) { + function ctype_space ($string) { + return sm_ctype_space($string); + } + } + // the newly wrapped text $outString = ''; // current column since the last newline in the outstring @@ -109,7 +145,9 @@ function &sqBodyWrap (&$body, $wrap) { // skip over any spaces interleaved among the cite markers while (($pos < $length) && ($body{$pos} == ' ')) { + $pos++; + } if ($pos >= $length) { break; @@ -119,7 +157,7 @@ function &sqBodyWrap (&$body, $wrap) { // special case: if this is a blank line then maintain it // (i.e. try to preserve original paragraph breaks) // unless they occur at the very beginning of the text - if (($body{$pos} == "\n") && (strlen($outString) != 0)) { + if (($body{$pos} == "\n" ) && (strlen($outString) != 0)) { $outStringLast = $outString{strlen($outString) - 1}; if ($outStringLast != "\n") { $outString .= "\n"; @@ -171,7 +209,10 @@ function &sqBodyWrap (&$body, $wrap) { $pos = $nextNewline + 1; continue; } - + /** + * Set this to false to stop appending short strings to previous lines + */ + $smartwrap = true; // inner loop, (obviously) handles wrapping up to // the next newline while ($pos < $nextNewline) { @@ -179,20 +220,59 @@ function &sqBodyWrap (&$body, $wrap) { while (($pos < $nextNewline) && (ctype_space ($body{$pos}))) { $pos++; } - // if this is a short line then just append it and continue outer loop - if (($outStringCol + $nextNewline - $pos) <= ($wrap - $citeLevel - 1)) { + if (($outStringCol + $nextNewline - $pos) <= ($wrap - $citeLevel - 1) ) { // if this is the final line in the input string then include // any trailing newlines + // echo substr($body,$pos,$wrap). "
"; if (($nextNewline + 1 == $length) && ($body{$nextNewline} == "\n")) { $nextNewline++; } // trim trailing spaces $lastRealChar = $nextNewline; - while (($lastRealChar > $pos) && (ctype_space ($body{$lastRealChar}))) { + while (($lastRealChar > $pos && $lastRealChar < $length) && (ctype_space ($body{$lastRealChar}))) { $lastRealChar--; } + // decide if appending the short string is what we want + if (($nextNewline < $length && $body{$nextNewline} == "\n") && + isset($lastRealChar)) { + $mypos = $pos; + //check the first word: + while (($mypos < $length) && ($body{$mypos} == '>')) { + $mypos++; + // skip over any spaces interleaved among the cite markers + while (($mypos < $length) && ($body{$mypos} == ' ')) { + $mypos++; + } + } +/* + $ldnspacecnt = 0; + if ($mypos == $nextNewline+1) { + while (($mypos < $length) && ($body{$mypos} == ' ')) { + $ldnspacecnt++; + } + } +*/ + + $firstword = substr($body,$mypos,strpos($body,' ',$mypos) - $mypos); + //if ($dowrap || $ldnspacecnt > 1 || ($firstword && ( + if (!$smartwrap || $firstword && ( + $firstword{0} == '-' || + $firstword{0} == '+' || + $firstword{0} == '*' || + $firstword{0} == strtoupper($firstword{0}) || + strpos($firstword,':'))) { + $outString .= substr($body,$pos,($lastRealChar - $pos+1)); + $outStringCol += ($lastRealChar - $pos); + sqMakeNewLine($outString,$citeLevel,$outStringCol); + $nextNewline++; + $pos = $nextNewline; + $outStringCol--; + continue; + } + + } $outString .= substr ($body, $pos, ($lastRealChar - $pos + 1)); $outStringCol += ($lastRealChar - $pos); @@ -207,7 +287,7 @@ function &sqBodyWrap (&$body, $wrap) { // our current line is already too long, break immediately // and restart outer loop if ($eol <= $pos) { - sqMakeNewLine ($outString, $citeLeve, $outStringCol); + sqMakeNewLine ($outString, $citeLevel, $outStringCol); continue; } @@ -270,20 +350,22 @@ function &sqBodyWrap (&$body, $wrap) { * Has a problem with special HTML characters, so call this before * you do character translation. * - * Specifically, ' comes up as 5 characters instead of 1. + * Specifically, &#039; comes up as 5 characters instead of 1. * This should not add newlines to the end of lines. * * @param string line the line of text to wrap, by ref * @param int wrap the maximum line lenth + * @param string charset name of charset used in $line string. Available since v.1.5.1. * @return void */ -function sqWordWrap(&$line, $wrap) { +function sqWordWrap(&$line, $wrap, $charset='') { global $languages, $squirrelmail_language; + // Use custom wrapping function, if translation provides it if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && - function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { + function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_wordwrap')) { if (mb_detect_encoding($line) != 'ASCII') { - $line = $languages[$squirrelmail_language]['XTRA_CODE']('wordwrap', $line, $wrap); + $line = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_wordwrap', $line, $wrap); return; } } @@ -302,9 +384,9 @@ function sqWordWrap(&$line, $wrap) { while ($i < count($words)) { /* Force one word to be on a line (minimum) */ $line .= $words[$i]; - $line_len = strlen($beginning_spaces) + strlen($words[$i]) + 2; + $line_len = strlen($beginning_spaces) + sq_strlen($words[$i],$charset) + 2; if (isset($words[$i + 1])) - $line_len += strlen($words[$i + 1]); + $line_len += sq_strlen($words[$i + 1],$charset); $i ++; /* Add more words (as long as they fit) */ @@ -312,7 +394,7 @@ function sqWordWrap(&$line, $wrap) { $line .= ' ' . $words[$i]; $i++; if (isset($words[$i])) - $line_len += strlen($words[$i]) + 1; + $line_len += sq_strlen($words[$i],$charset) + 1; else $line_len += 1; } @@ -561,7 +643,7 @@ function sq_mt_seed($Val) { $Val *= -1; } - if ($Val = 0) { + if ($Val == 0) { return; } @@ -659,7 +741,7 @@ function show_readable_size($bytes) { } /** - * Generates a random string from the caracter set you pass in + * Generates a random string from the character set you pass in * * @param int size the size of the string to generate * @param string chars a string containing the characters to use @@ -756,7 +838,7 @@ function makeComposeLink($url, $text = null, $target='') // build the compose in new window link... - // if javascript is on, use onClick event to handle it + // if javascript is on, use onclick event to handle it if($javascript_on) { sqgetGlobalVar('base_uri', $base_uri, SQ_SESSION); return ''. $text.''; @@ -785,6 +867,19 @@ function sm_print_r() { foreach(func_get_args() as $var) { print_r($var); echo "\n"; + // php has get_class_methods function that can print class methods + if (is_object($var)) { + // get class methods if $var is object + $aMethods=get_class_methods(get_class($var)); + // make sure that $aMethods is array and array is not empty + if (is_array($aMethods) && $aMethods!=array()) { + echo "Object methods:\n"; + foreach($aMethods as $method) { + echo '* ' . $method . "\n"; + } + } + echo "\n"; + } } $buffer = ob_get_contents(); // Grab the print_r output ob_end_clean(); // Silently discard the output & stop buffering @@ -874,7 +969,10 @@ function sq_get_html_translation_table($table,$quote_style=ENT_COMPAT,$charset=' * sq_htmlentities * * Convert all applicable characters to HTML entities. - * Minimal php requirement - v.4.0.5 + * Minimal php requirement - v.4.0.5. + * + * Function is designed for people that want to use full power of htmlentities() in + * i18n environment. * * @param string $string string that has to be sanitized * @param integer $quote_style quote encoding style. Possible values (without quotes): @@ -893,5 +991,243 @@ function sq_htmlentities($string,$quote_style=ENT_COMPAT,$charset='us-ascii') { return str_replace(array_keys($sq_html_ent_table),array_values($sq_html_ent_table),$string); } +/** + * Tests if string contains 8bit symbols. + * + * If charset is not set, function defaults to default_charset. + * $default_charset global must be set correctly if $charset is + * not used. + * @param string $string tested string + * @param string $charset charset used in a string + * @return bool true if 8bit symbols are detected + * @since 1.5.1 and 1.4.4 + */ +function sq_is8bit($string,$charset='') { + global $default_charset; + + if ($charset=='') $charset=$default_charset; + + /** + * Don't use \240 in ranges. Sometimes RH 7.2 doesn't like it. + * Don't use \200-\237 for iso-8859-x charsets. This range + * stores control symbols in those charsets. + * Use preg_match instead of ereg in order to avoid problems + * with mbstring overloading + */ + if (preg_match("/^iso-8859/i",$charset)) { + $needle='/\240|[\241-\377]/'; + } else { + $needle='/[\200-\237]|\240|[\241-\377]/'; + } + return preg_match("$needle",$string); +} + +/** + * Replacement of mb_list_encodings function + * + * This function provides replacement for function that is available only + * in php 5.x. Function does not test all mbstring encodings. Only the ones + * that might be used in SM translations. + * + * Supported strings are stored in session in order to reduce number of + * mb_internal_encoding function calls. + * + * If you want to test all mbstring encodings - fill $list_of_encodings + * array. + * @return array list of encodings supported by php mbstring extension + * @since 1.5.1 + */ +function sq_mb_list_encodings() { + if (! function_exists('mb_internal_encoding')) + return array(); + + // don't try to test encodings, if they are already stored in session + if (sqgetGlobalVar('mb_supported_encodings',$mb_supported_encodings,SQ_SESSION)) + return $mb_supported_encodings; + + // save original encoding + $orig_encoding=mb_internal_encoding(); + + $list_of_encoding=array( + 'pass', + 'auto', + 'ascii', + 'jis', + 'utf-8', + 'sjis', + 'euc-jp', + 'iso-8859-1', + 'iso-8859-2', + 'iso-8859-7', + 'iso-8859-9', + 'iso-8859-15', + 'koi8-r', + 'koi8-u', + 'big5', + 'gb2312', + 'windows-1251', + 'windows-1255', + 'windows-1256', + 'tis-620', + 'iso-2022-jp', + 'euc-kr', + 'utf7-imap'); + + $supported_encodings=array(); + + foreach ($list_of_encoding as $encoding) { + // try setting encodings. suppress warning messages + if (@mb_internal_encoding($encoding)) + $supported_encodings[]=$encoding; + } + + // restore original encoding + mb_internal_encoding($orig_encoding); + + // register list in session + sqsession_register($supported_encodings,'mb_supported_encodings'); + + return $supported_encodings; +} + +/** + * Function returns number of characters in string. + * + * Returned number might be different from number of bytes in string, + * if $charset is multibyte charset. Currently only utf-8 charset is + * supported. + * @param string $str string + * @param string $charset charset + * @since 1.5.1 + * @return integer number of characters in string + */ +function sq_strlen($str, $charset=''){ + // default option + if ($charset=='') return strlen($str); + + // use automatic charset detection, if function call asks for it + if ($charset=='auto') { + global $default_charset; + set_my_charset(); + $charset=$default_charset; + } + + // lowercase charset name + $charset=strtolower($charset); + + // set initial returned length number + $real_length=0; + + // calculate string length according to charset + // function can be modulized same way we modulize decode/encode/htmlentities + if ($charset=='utf-8') { + if (function_exists('mb_strlen')) { + $real_length = mb_strlen($str,'utf-8'); + } else { + // function needs length of string in bytes. + // mbstring overloading might break it + $str_length=strlen($str); + $str_index=0; + while ($str_index < $str_length) { + // start of internal utf-8 multibyte character detection + if (preg_match("/[\xC0-\xDF]/",$str[$str_index]) && + isset($str[$str_index+1]) && + preg_match("/[\x80-\xBF]/",$str[$str_index+1])) { + // two byte utf-8 + $str_index=$str_index+2; + $real_length++; + } elseif (preg_match("/[\xE0-\xEF]/",$str[$str_index]) && + isset($str[$str_index+2]) && + preg_match("/[\x80-\xBF][\x80-\xBF]/",$str[$str_index+1].$str[$str_index+2])) { + // three byte utf-8 + $str_index=$str_index+3; + $real_length++; + } elseif (preg_match("/[\xF0-\xF7]/",$str[$str_index]) && + isset($str[$str_index+3]) && + preg_match("/[\x80-\xBF][\x80-\xBF][\x80-\xBF]/",$str[$str_index+1].$str[$str_index+2].$str[$str_index+3])) { + // four byte utf-8 + $str_index=$str_index+4; + $real_length++; + } elseif (preg_match("/[\xF8-\xFB]/",$str[$str_index]) && + isset($str[$str_index+4]) && + preg_match("/[\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF]/", + $str[$str_index+1].$str[$str_index+2].$str[$str_index+3].$str[$str_index+4])) { + // five byte utf-8 + $str_index=$str_index+5; + $real_length++; + } elseif (preg_match("/[\xFC-\xFD]/",$str[$str_index]) && + isset($str[$str_index+5]) && + preg_match("/[\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF]/", + $str[$str_index+1].$str[$str_index+2].$str[$str_index+3].$str[$str_index+4].$str[$str_index+5])) { + // six byte utf-8 + $str_index=$str_index+6; + $real_length++; + } else { + $str_index++; + $real_length++; + } + // end of internal utf-8 multibyte character detection + } + } + // end of utf-8 length detection + } elseif ($charset=='big5') { + // TODO: add big5 string length detection + $real_length=strlen($str); + } elseif ($charset=='gb2312') { + // TODO: add gb2312 string length detection + $real_length=strlen($str); + } elseif ($charset=='gb18030') { + // TODO: add gb18030 string length detection + $real_length=strlen($str); + } elseif ($charset=='euc-jp') { + // TODO: add euc-jp string length detection + $real_length=strlen($str); + } elseif ($charset=='euc-cn') { + // TODO: add euc-cn string length detection + $real_length=strlen($str); + } elseif ($charset=='euc-tw') { + // TODO: add euc-tw string length detection + $real_length=strlen($str); + } elseif ($charset=='euc-kr') { + // TODO: add euc-kr string length detection + $real_length=strlen($str); + } else { + $real_length=strlen($str); + } + return $real_length; +} + +/** + * string padding with multibyte support + * + * @link http://www.php.net/str_pad + * @param string $string original string + * @param integer $width padded string width + * @param string $pad padding symbols + * @param integer $padtype padding type + * (internal php defines, see str_pad() description) + * @param string $charset charset used in original string + * @return string padded string + */ +function sq_str_pad($string, $width, $pad, $padtype, $charset='') { + + $charset = strtolower($charset); + $padded_string = ''; + + switch ($charset) { + case 'utf-8': + case 'big5': + case 'gb2312': + case 'euc-kr': + /* + * all multibyte charsets try to increase width value by + * adding difference between number of bytes and real length + */ + $width = $width - sq_strlen($string,$charset) + strlen($string); + default: + $padded_string=str_pad($string,$width,$pad,$padtype); + } + return $padded_string; +} $PHP_SELF = php_self(); ?> \ No newline at end of file