From 20a60f89f40d29b87104a29a3f79f337fb10eb05 Mon Sep 17 00:00:00 2001 From: fidian Date: Fri, 13 Apr 2001 13:41:08 +0000 Subject: [PATCH] * Improved speed of URL parser * Made useful regexps global so other plugins (like abook_take) could use them * Made host regexp match any TLD because of what New.net and ICANN are squabbling about -- might as well be safe and match a little too much than not match a valid host. git-svn-id: https://svn.code.sf.net/p/squirrelmail/code/trunk/squirrelmail@1224 7612ce4b-ef26-0410-bec9-ea0150e637f0 --- functions/url_parser.php | 78 +++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/functions/url_parser.php b/functions/url_parser.php index 032ab167..6a9e7d41 100644 --- a/functions/url_parser.php +++ b/functions/url_parser.php @@ -4,48 +4,48 @@ $url_parser_php = true; - function replaceBlock ($in, $replace, $start, $end) { + function replaceBlock (&$in, $replace, $start, $end) { $begin = substr($in,0,$start); $end = substr($in,$end,strlen($in)-$end); - $ret = $begin.$replace.$end; - return $ret; + $in = $begin.$replace.$end; } + // Having this defined in just one spot could help when changes need + // to be made to the pattern + // Make sure that the expression is evaluated case insensitively + // + // Here's pretty sophisticated IP matching: + // $IPMatch = '(2[0-5][0-9]|1?[0-9]{1,2})'; + // $IPMatch = '\[?' . $IPMatch . '(\.' . $IPMatch . '){3}\]?'; + // + // Here's enough: + $IP_RegExp_Match = '\\[?[0-9]{1,3}(\\.[0-9]{1,3}){3}\\]?'; + $Host_RegExp_Match = '(' . $IP_RegExp_Match . + '|[0-9a-z]([-.]?[0-9a-z])*\\.[a-z][a-z]+)'; + $Email_RegExp_Match = '[0-9a-z]([-_.]?[0-9a-z])*(%' . $Host_RegExp_Match . + ')?@' . $Host_RegExp_Match; + function parseEmail (&$body) { - global $color; + global $color, $Email_RegExp_Match; $Size = strlen($body); - // Having this defined in just one spot could help when changes need - // to be made to the pattern - // Make sure that the expression is evaluated case insensitively - // - // Here's pretty sophisticated IP matching: - // $IPMatch = '(2[0-5][0-9]|1?[0-9]{1,2})'; - // $IPMatch = '\[?' . $IPMatch . '(\.' . $IPMatch . '){3}\]?'; - // - // Here's enough: - $IPMatch = '\\[?[0-9]{1,3}(\\.[0-9]{1,3}){3}\\]?'; - $Host = '(' . $IPMatch . -'|[0-9a-z]([-.]?[0-9a-z])*\\.[a-wyz][a-z](fo|g|l|m|me|o|op|pa|ro|seum|t|u|v|z)?)'; - $Expression = '[0-9a-z]([-_.]?[0-9a-z])*(%' . $Host . ')?@' . $Host; - /* This is here in case we ever decide to use highlighting of searched text. this does it for email addresses if ($what && ($where == "BODY" || $where == "TEXT")) { - eregi ($Expression, $body, $regs); + eregi ($Email_RegExp_Match, $body, $regs); $oldaddr = $regs[0]; if ($oldaddr) { $newaddr = eregi_replace ($what, "$what", $oldaddr); $body = str_replace ($oldaddr, "$newaddr", $body); } } else { - $body = eregi_replace ($Expression, "\\0", $body); + $body = eregi_replace ($Email_RegExp_Match, "\\0", $body); } */ - $body = eregi_replace ($Expression, "\\0", $body); + $body = eregi_replace ($Email_RegExp_Match, "\\0", $body); // If there are any changes, it'll just get bigger. if ($Size != strlen($body)) @@ -54,20 +54,24 @@ } - function parseUrl (&$body) - { - $url_tokens = array( - 'http://', - 'https://', - 'ftp://', - 'telnet:', // Special case -- doesn't need the slashes - 'gopher://', - 'news://'); + // We don't want to re-initialize this stuff for every line. Save work + // and just do it once here. + $url_parser_url_tokens = array( + 'http://', + 'https://', + 'ftp://', + 'telnet:', // Special case -- doesn't need the slashes + 'gopher://', + 'news://'); - $poss_ends = array(' ', "\n", "\r", '<', '>', ".\r", ".\n", '. ', - ' ', ')', '(', '"', '<', '>', '.<', ']', '[', '{', - '}', "\240"); + $url_parser_poss_ends = array(' ', "\n", "\r", '<', '>', ".\r", ".\n", + '. ', ' ', ')', '(', '"', '<', '>', '.<', + ']', '[', '{', '}', "\240"); + + function parseUrl (&$body) + { + global $url_parser_poss_ends, $url_parser_url_tokens;; $start = 0; $target_pos = strlen($body); @@ -76,7 +80,7 @@ $target_token = ''; // Find the first token to replace - foreach ($url_tokens as $the_token) + foreach ($url_parser_url_tokens as $the_token) { $pos = strpos(strtolower($body), $the_token, $start); if (is_int($pos) && $pos < $target_pos) @@ -91,7 +95,7 @@ if (parseEmail($check_str)) { - $body = replaceBlock($body, $check_str, $start, $target_pos); + replaceBlock($body, $check_str, $start, $target_pos); $target_pos = strlen($check_str) + $start; } @@ -100,7 +104,7 @@ { // Find the end of the URL $end=strlen($body); - foreach ($poss_ends as $key => $val) + foreach ($url_parser_poss_ends as $key => $val) { $enda = strpos($body,$val,$target_pos); if (is_int($enda) && $enda < $end) @@ -114,7 +118,7 @@ if ($url != '' && $url != $target_token) { $url_str = "$url"; - $body = replaceBlock($body,$url_str,$target_pos,$end); + replaceBlock($body,$url_str,$target_pos,$end); $target_pos += strlen($url_str); } else -- 2.25.1