From 8f7163e7fd1f5b7839fff1c1f3d6bcd37379e904 Mon Sep 17 00:00:00 2001 From: fidian Date: Fri, 22 Sep 2000 18:02:35 +0000 Subject: [PATCH] Improved link detection in bodies of mail messages. git-svn-id: https://svn.code.sf.net/p/squirrelmail/code/trunk/squirrelmail@754 7612ce4b-ef26-0410-bec9-ea0150e637f0 --- functions/url_parser.php | 174 ++++++++++++++++++++------------------- 1 file changed, 89 insertions(+), 85 deletions(-) diff --git a/functions/url_parser.php b/functions/url_parser.php index 372a1bad..d9105b8c 100644 --- a/functions/url_parser.php +++ b/functions/url_parser.php @@ -13,18 +13,24 @@ function parseEmail ($body) { global $color; - // Changed the expression to the one in abook_take - // This works very well, especially it looks like you might have - // three instances of it below. Having it defined in - // just one spot could help when you need to change it. - $Expression = "[0-9a-z]([-_.]?[0-9a-z])*@[0-9a-z]([-.]?[0-9a-z])*\\.[a-wyz][a-z](g|l|m|pa|t|u|v)?"; + // Having this defined in just one spot could help when changes need + // to be made to the pattern + // Make sure that the expression is evaluated case insensitively + // + // Here's pretty sophisticated IP matching: + // $IPMatch = '(2[0-5][0-9]|1?[0-9]{1,2})'; + // $IPMatch = '\[?' . $IPMatch . '(\.' . $IPMatch . '){3}\]?'; + // + // Here's enough: + $IPMatch = '\[?[0-9]{1,3}(\.[0-9]{1,3}){3}\]?'; + $Host = '(' . $IPMatch . '|[0-9a-z]([-.]?[0-9a-z])*\.[a-wyz][a-z](g|l|m|pa|t|u|v)?)'; + $Expression = '[0-9a-z]([-_.]?[0-9a-z])*(%' . $Host . ')?@' . $Host; /* This is here in case we ever decide to use highlighting of searched text. this does it for email addresses if ($what && ($where == "BODY" || $where == "TEXT")) { - // Use the $Expression eregi ($Expression, $body, $regs); $oldaddr = $regs[0]; if ($oldaddr) { @@ -32,94 +38,92 @@ $body = str_replace ($oldaddr, "$newaddr", $body); } } else { - // Use the $Expression $body = eregi_replace ($Expression, "\\0", $body); } */ - // Use the $Expression + $body = eregi_replace ($Expression, "\\0", $body); return $body; } - - function parseUrl ($body) { - #Possible ways a URL could finish. - // Removed "--" since it could be part of a URL - $poss_ends=array(" ", "\n", "\r", "<", ">", ".\r", ".\n", ". ", " ", ")", "(", - """, "<", ">", ".<", "]", "[", "{", "}"); - $done=False; - while (!$done) { - #Look for when a URL starts - // Added gopher, news. Modified telnet. - $url_tokens = array( - "http://", - "https://", - "ftp://", - "telnet:", // Special case -- doesn't need the slashes - "gopher://", - "news://"); - for($i = 0; $i < sizeof($url_tokens); $i++) { - // Removed the use of "^^" -- it is unneeded - if(is_int($where = strpos(strtolower($body), $url_tokens[$i], $start))) - break; - } - // Look between $start and $where for email links - $check_str = substr($body, $start, $where); - $new_str = parseEmail($check_str); - - if ($check_str != $new_str) - { - $body = replaceBlock($body, $new_str, $start, $where); - $where = strlen($new_str) + $start; - } - - //$where = strpos(strtolower($body),"http://",$start); - // Fixed this to work with $i instead of $where - if ($i < sizeof($url_tokens)) { - // Removed the "^^" so I removed the next line - //$where = $where - 2; // because we added the ^^ at the begining - # Find the end of that URL - reset($poss_ends); $end=0; - while (list($key, $val) = each($poss_ends)) { - $enda = strpos($body,$val,$where); - if ($end == 0) $end = $enda; - if ($enda < $end and $enda != 0) $end = $enda; - } - if (!$end) $end = strlen($body); - #Extract URL - $url = substr($body,$where,$end-$where); - #Replace URL with HyperLinked Url - // Now this code doesn't simply match on url_tokens - // It will need some more text. This is good. - if ($url != "" && $url != $url_tokens[$i]) { - $url_str = "$url"; - # $body = str_replace($url,$url_str,$body); - # echo "$where, $end
"; - $body = replaceBlock($body,$url_str,$where,$end); - // Removed unnecessary strpos call. Searching - // a string takes longer than just figuring out - // the length. - // $start = strpos($body,"",$where); - $start = $where + strlen($url_str); - } else { - // Proper length increment -- Don't just assume 7 - $start = $where + strlen($url_tokens[$i]); - } - } else { - $done=true; - } - } - // Look after $start for more email links. - $check_str = substr($body, $start); - $new_str = parseEmail($check_str); - - if ($check_str != $new_str) + function parseUrl ($body) + { + $url_tokens = array( + 'http://', + 'https://', + 'ftp://', + 'telnet:', // Special case -- doesn't need the slashes + 'gopher://', + 'news://'); + + $poss_ends = array(' ', '\n', '\r', '<', '>', '.\r', '.\n', '. ', + ' ', ')', '(', '"', '<', '>', '.<', ']', '[', '{', + '}', "\240"); + + $start = 0; + $target_pos = strlen($body); + + while ($start != $target_pos) { - $body = replaceBlock($body, $new_str, $start, strlen($body)); - } + $target_token = ''; + + // Find the first token to replace + foreach ($url_tokens as $the_token) + { + $pos = strpos(strtolower($body), $the_token, $start); + if (is_int($pos) && $pos < $target_pos) + { + $target_pos = $pos; + $target_token = $the_token; + } + } + + // Look for email addresses between $start and $target_pos + $check_str = substr($body, $start, $target_pos); + $new_str = parseEmail($check_str); + + if ($check_str != $new_str) + { + $body = replaceBlock($body, $new_str, $start, $target_pos); + $target_pos = strlen($new_str) + $start; + } - return $body; + // If there was a token to replace, replace it + if ($target_token != '') + { + // Find the end of the URL + $end=strlen($body); + foreach ($poss_ends as $key => $val) + { + $enda = strpos($body,$val,$target_pos); + if (is_int($enda) && $enda < $end) + $end = $enda; + } + + // Extract URL + $url = substr($body, $target_pos, $end-$target_pos); + + // Replace URL with HyperLinked Url, requires 1 char in link + if ($url != '' && $url != $target_token) + { + $url_str = "$url"; + $body = replaceBlock($body,$url_str,$target_pos,$end); + $target_pos += strlen($url_str); + } + else + { + // Not quite a valid link, skip ahead to next chance + $target_pos += strlen($target_token); + } + } + + // Move forward + $start = $target_pos; + $target_pos = strlen($body); + } + + return $body; } - + ?> -- 2.25.1