* Improved speed of URL parser
[squirrelmail.git] / functions / url_parser.php
1 <?php
2 /* URL Passing code to allow links from with in emails */
3 /* $Id$ */
4
5 $url_parser_php = true;
6
7 function replaceBlock (&$in, $replace, $start, $end) {
8 $begin = substr($in,0,$start);
9 $end = substr($in,$end,strlen($in)-$end);
10 $in = $begin.$replace.$end;
11 }
12
13 // Having this defined in just one spot could help when changes need
14 // to be made to the pattern
15 // Make sure that the expression is evaluated case insensitively
16 //
17 // Here's pretty sophisticated IP matching:
18 // $IPMatch = '(2[0-5][0-9]|1?[0-9]{1,2})';
19 // $IPMatch = '\[?' . $IPMatch . '(\.' . $IPMatch . '){3}\]?';
20 //
21 // Here's enough:
22 $IP_RegExp_Match = '\\[?[0-9]{1,3}(\\.[0-9]{1,3}){3}\\]?';
23 $Host_RegExp_Match = '(' . $IP_RegExp_Match .
24 '|[0-9a-z]([-.]?[0-9a-z])*\\.[a-z][a-z]+)';
25 $Email_RegExp_Match = '[0-9a-z]([-_.]?[0-9a-z])*(%' . $Host_RegExp_Match .
26 ')?@' . $Host_RegExp_Match;
27
28 function parseEmail (&$body) {
29 global $color, $Email_RegExp_Match;
30 $Size = strlen($body);
31
32 /*
33 This is here in case we ever decide to use highlighting of searched
34 text. this does it for email addresses
35
36 if ($what && ($where == "BODY" || $where == "TEXT")) {
37 eregi ($Email_RegExp_Match, $body, $regs);
38 $oldaddr = $regs[0];
39 if ($oldaddr) {
40 $newaddr = eregi_replace ($what, "<b><font color=\"$color[2]\">$what</font></font></b>", $oldaddr);
41 $body = str_replace ($oldaddr, "<a href=\"../src/compose.php?send_to=$oldaddr\">$newaddr</a>", $body);
42 }
43 } else {
44 $body = eregi_replace ($Email_RegExp_Match, "<a href=\"../src/compose.php?send_to=\\0\">\\0</a>", $body);
45 }
46 */
47
48 $body = eregi_replace ($Email_RegExp_Match, "<a href=\"../src/compose.php?send_to=\\0\">\\0</a>", $body);
49
50 // If there are any changes, it'll just get bigger.
51 if ($Size != strlen($body))
52 return 1;
53 return 0;
54 }
55
56
57 // We don't want to re-initialize this stuff for every line. Save work
58 // and just do it once here.
59 $url_parser_url_tokens = array(
60 'http://',
61 'https://',
62 'ftp://',
63 'telnet:', // Special case -- doesn't need the slashes
64 'gopher://',
65 'news://');
66
67 $url_parser_poss_ends = array(' ', "\n", "\r", '<', '>', ".\r", ".\n",
68 '.&nbsp;', '&nbsp;', ')', '(', '&quot;', '&lt;', '&gt;', '.<',
69 ']', '[', '{', '}', "\240");
70
71
72 function parseUrl (&$body)
73 {
74 global $url_parser_poss_ends, $url_parser_url_tokens;;
75 $start = 0;
76 $target_pos = strlen($body);
77
78 while ($start != $target_pos)
79 {
80 $target_token = '';
81
82 // Find the first token to replace
83 foreach ($url_parser_url_tokens as $the_token)
84 {
85 $pos = strpos(strtolower($body), $the_token, $start);
86 if (is_int($pos) && $pos < $target_pos)
87 {
88 $target_pos = $pos;
89 $target_token = $the_token;
90 }
91 }
92
93 // Look for email addresses between $start and $target_pos
94 $check_str = substr($body, $start, $target_pos);
95
96 if (parseEmail($check_str))
97 {
98 replaceBlock($body, $check_str, $start, $target_pos);
99 $target_pos = strlen($check_str) + $start;
100 }
101
102 // If there was a token to replace, replace it
103 if ($target_token != '')
104 {
105 // Find the end of the URL
106 $end=strlen($body);
107 foreach ($url_parser_poss_ends as $key => $val)
108 {
109 $enda = strpos($body,$val,$target_pos);
110 if (is_int($enda) && $enda < $end)
111 $end = $enda;
112 }
113
114 // Extract URL
115 $url = substr($body, $target_pos, $end-$target_pos);
116
117 // Replace URL with HyperLinked Url, requires 1 char in link
118 if ($url != '' && $url != $target_token)
119 {
120 $url_str = "<a href=\"$url\" target=\"_blank\">$url</a>";
121 replaceBlock($body,$url_str,$target_pos,$end);
122 $target_pos += strlen($url_str);
123 }
124 else
125 {
126 // Not quite a valid link, skip ahead to next chance
127 $target_pos += strlen($target_token);
128 }
129 }
130
131 // Move forward
132 $start = $target_pos;
133 $target_pos = strlen($body);
134 }
135 }
136
137 ?>