+/**
+ * Function returns number of characters in string.
+ *
+ * Returned number might be different from number of bytes in string,
+ * if $charset is multibyte charset. Currently only utf-8 charset is
+ * supported.
+ * @param string $str string
+ * @param string $charset charset
+ * @since 1.5.1
+ * @return integer number of characters in string
+ */
+function sq_strlen($str, $charset=''){
+ // default option
+ if ($charset=='') return strlen($str);
+
+ // use automatic charset detection, if function call asks for it
+ if ($charset=='auto') {
+ global $default_charset;
+ set_my_charset();
+ $charset=$default_charset;
+ }
+
+ // lowercase charset name
+ $charset=strtolower($charset);
+
+ // set initial returned length number
+ $real_length=0;
+
+ // calculate string length according to charset
+ // function can be modulized same way we modulize decode/encode/htmlentities
+ if ($charset=='utf-8') {
+ if (function_exists('mb_strlen')) {
+ $real_length = mb_strlen($str,'utf-8');
+ } else {
+ // function needs length of string in bytes.
+ // mbstring overloading might break it
+ $str_length=strlen($str);
+ $str_index=0;
+ while ($str_index < $str_length) {
+ // start of internal utf-8 multibyte character detection
+ if (preg_match("/[\xC0-\xDF]/",$str[$str_index]) &&
+ isset($str[$str_index+1]) &&
+ preg_match("/[\x80-\xBF]/",$str[$str_index+1])) {
+ // two byte utf-8
+ $str_index=$str_index+2;
+ $real_length++;
+ } elseif (preg_match("/[\xE0-\xEF]/",$str[$str_index]) &&
+ isset($str[$str_index+2]) &&
+ preg_match("/[\x80-\xBF][\x80-\xBF]/",$str[$str_index+1].$str[$str_index+2])) {
+ // three byte utf-8
+ $str_index=$str_index+3;
+ $real_length++;
+ } elseif (preg_match("/[\xF0-\xF7]/",$str[$str_index]) &&
+ isset($str[$str_index+3]) &&
+ preg_match("/[\x80-\xBF][\x80-\xBF][\x80-\xBF]/",$str[$str_index+1].$str[$str_index+2].$str[$str_index+3])) {
+ // four byte utf-8
+ $str_index=$str_index+4;
+ $real_length++;
+ } elseif (preg_match("/[\xF8-\xFB]/",$str[$str_index]) &&
+ isset($str[$str_index+4]) &&
+ preg_match("/[\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF]/",
+ $str[$str_index+1].$str[$str_index+2].$str[$str_index+3].$str[$str_index+4])) {
+ // five byte utf-8
+ $str_index=$str_index+5;
+ $real_length++;
+ } elseif (preg_match("/[\xFC-\xFD]/",$str[$str_index]) &&
+ isset($str[$str_index+5]) &&
+ preg_match("/[\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF]/",
+ $str[$str_index+1].$str[$str_index+2].$str[$str_index+3].$str[$str_index+4].$str[$str_index+5])) {
+ // six byte utf-8
+ $str_index=$str_index+6;
+ $real_length++;
+ } else {
+ $str_index++;
+ $real_length++;
+ }
+ // end of internal utf-8 multibyte character detection
+ }
+ }
+ // end of utf-8 length detection
+ } elseif ($charset=='big5') {
+ // TODO: add big5 string length detection
+ $real_length=strlen($str);
+ } elseif ($charset=='gb2312') {
+ // TODO: add gb2312 string length detection
+ $real_length=strlen($str);
+ } elseif ($charset=='gb18030') {
+ // TODO: add gb18030 string length detection
+ $real_length=strlen($str);
+ } elseif ($charset=='euc-jp') {
+ // TODO: add euc-jp string length detection
+ $real_length=strlen($str);
+ } elseif ($charset=='euc-cn') {
+ // TODO: add euc-cn string length detection
+ $real_length=strlen($str);
+ } elseif ($charset=='euc-tw') {
+ // TODO: add euc-tw string length detection
+ $real_length=strlen($str);
+ } elseif ($charset=='euc-kr') {
+ // TODO: add euc-kr string length detection
+ $real_length=strlen($str);
+ } else {
+ $real_length=strlen($str);
+ }
+ return $real_length;
+}
+
+/**
+ * string padding with multibyte support
+ *
+ * @link http://www.php.net/str_pad
+ * @param string $string original string
+ * @param integer $width padded string width
+ * @param string $pad padding symbols
+ * @param integer $padtype padding type
+ * (internal php defines, see str_pad() description)
+ * @param string $charset charset used in original string
+ * @return string padded string
+ */
+function sq_str_pad($string, $width, $pad, $padtype, $charset='') {
+
+ $charset = strtolower($charset);
+ $padded_string = '';
+
+ switch ($charset) {
+ case 'utf-8':
+ case 'big5':
+ case 'gb2312':
+ case 'euc-kr':
+ /*
+ * all multibyte charsets try to increase width value by
+ * adding difference between number of bytes and real length
+ */
+ $width = $width - sq_strlen($string,$charset) + strlen($string);
+ default:
+ $padded_string=str_pad($string,$width,$pad,$padtype);
+ }
+ return $padded_string;
+}