}
}
+ /* make sure that there are no 8bit chars between $target_pos and suspected end of URL */
+ if (!is_bool($first8bit=sq_strpos_8bit($body,$target_pos,$end))) {
+ $end = $first8bit;
+ }
+
/* Extract URL */
$url = substr($body, $target_pos, $end-$target_pos);
return (array_key_exists(0, $addresses) ? $addresses[0] : '');
}
+/**
+ * Finds first occurrence of 8bit data in the string
+ *
+ * Function finds first 8bit symbol or html entity that represents 8bit character.
+ * Search start is defined by $offset argument. Search ends at $maxlength position.
+ * If $maxlength is not defined or bigger than provided string, search ends when
+ * string ends.
+ *
+ * Check returned data type in order to avoid confusion between bool(false)
+ * (not found) and int(0) (first char in the string).
+ * @param string $haystack
+ * @param integer $offset
+ * @param integer $maxlength
+ * @return mixed integer with first 8bit character position or boolean false
+ * @since 1.5.2
+ */
+function sq_strpos_8bit($haystack,$offset=0,$maxlength=false) {
+ $ret = false;
+
+ if ($maxlength===false || strlen($haystack) < $maxlength) {
+ $maxlength=strlen($haystack);
+ }
+
+ for($i=$offset;$i<$maxlength;$i++) {
+ /* rh7-8 compatibility. don't use full 8bit range in regexp */
+ if (preg_match('/[\200-\237]|\240|[\241-\377]/',$haystack[$i])) {
+ /* we have 8bit char. stop here and return position */
+ $ret = $i;
+ break;
+ } elseif ($haystack[$i]=='&') {
+ $substring = substr($haystack,$i);
+ /**
+ * 1. look for "&#(decimal number);" where decimal_number is bigger than 127
+ * 2. look for "&x(hexadecimal number);", where hex number is bigger than x7f
+ * 3. look for any html character entity that is not 7bit html special char. Use
+ * own sq_get_html_translation_table() function with 'utf-8' character set in
+ * order to get all html entities.
+ */
+ if ((preg_match('/^&#(\d+);/',$substring,$match) && $match[1]>127) ||
+ (preg_match('/^&x([0-9a-f]+);/i',$substring,$match) && $match[1]>"\x7f") ||
+ (preg_match('/^&([a-z]+);/i',$substring,$match) &&
+ !in_array($match[0],get_html_translation_table(HTML_SPECIALCHARS)) &&
+ in_array($match[0],sq_get_html_translation_table(HTML_ENTITIES,ENT_COMPAT,'utf-8')))) {
+ $ret = $i;
+ break;
+ }
+ }
+ }
+ return $ret;
+}
?>
\ No newline at end of file