X-Git-Url: https://vcs.fsf.org/?a=blobdiff_plain;f=functions%2Furl_parser.php;h=eecd579af67c5c54cdb5a2b3231c0d9980fdf70c;hb=353d074afac6827c90f4bb03e846c5e453d3b5b1;hp=32fdc392f04ce52c9eb92fb279e44256bcb4b28d;hpb=6c99d1de81366bceab6c9d6cf12179eedc81f9bc;p=squirrelmail.git diff --git a/functions/url_parser.php b/functions/url_parser.php index 32fdc392..eecd579a 100644 --- a/functions/url_parser.php +++ b/functions/url_parser.php @@ -6,7 +6,7 @@ * This code provides various string manipulation functions that are * used by the rest of the SquirrelMail code. * - * @copyright © 1999-2006 The SquirrelMail Project Team + * @copyright 1999-2018 The SquirrelMail Project Team * @license http://opensource.org/licenses/gpl-license.php GNU Public License * @version $Id$ * @package squirrelmail @@ -35,10 +35,12 @@ function replaceBlock (&$in, $replace, $start, $end) { */ /* Here's enough: */ global $IP_RegExp_Match, $Host_RegExp_Match, $Email_RegExp_Match; +//FIXME: these were written for use in an ereg().... they are now being used in preg()... we need to run some tests to make sure they are fully working still $IP_RegExp_Match = '\\[?[0-9]{1,3}(\\.[0-9]{1,3}){3}\\]?'; $Host_RegExp_Match = '(' . $IP_RegExp_Match . '|[0-9a-z]([-.]?[0-9a-z])*\\.[a-z][a-z]+)'; -$atext = '([a-z0-9!#$&%*+/=?^_`{|}~-]|&)'; +// NB: the backslash in the following line escapes the forward slash, which assumes that the regular expression will be enclosed in /.../ +$atext = '([a-z0-9!#$&%*+\/=?^_`{|}~-]|&)'; $dot_atom = $atext . '+(\.' . $atext . '+)*'; $Email_RegExp_Match = $dot_atom . '(%' . $Host_RegExp_Match . ')?@' . $Host_RegExp_Match; @@ -55,7 +57,7 @@ function parseEmail (&$body) { $addresses = array(); /* Find all the email addresses in the body */ - while(eregi($Email_RegExp_Match, $sbody, $regs)) { + while (preg_match('/' . $Email_RegExp_Match . '/i', $sbody, $regs)) { $addresses[$regs[0]] = strtr($regs[0], array('&' => '&')); $start = strpos($sbody, $regs[0]) + strlen($regs[0]); $sbody = substr($sbody, $start); @@ -133,8 +135,8 @@ function parseUrl (&$body) { $target_pos = strlen($check_str) + $start; } - /* If there was a token to replace, replace it */ - if ($target_token == 'mailto:') { // rfc 2368 (mailto URL) + // rfc 2368 (mailto URL) + if ($target_token == 'mailto:') { $target_pos += 7; //skip mailto: $end = $blength; @@ -144,9 +146,15 @@ function parseUrl (&$body) { if ((preg_match($MailTo_PReg_Match, $mailto, $regs)) && ($regs[0] != '')) { //sm_print_r($regs); $mailto_before = $target_token . $regs[0]; - $mailto_params = $regs[10]; + /** + * '+' characters in a mailto URI don't need to be percent-encoded. + * However, when mailto URI data is transported via HTTP, '+' must + * be percent-encoded as %2B so that when the HTTP data is + * percent-decoded, you get '+' back and not a space. + */ + $mailto_params = str_replace("+", "%2B", $regs[10]); if ($regs[1]) { //if there is an email addr before '?', we need to merge it with the params - $to = 'to=' . $regs[1]; + $to = 'to=' . str_replace("+", "%2B", $regs[1]); if (strpos($mailto_params, 'to=') > -1) //already a 'to=' $mailto_params = str_replace('to=', $to . '%2C%20', $mailto_params); else { @@ -163,6 +171,7 @@ function parseUrl (&$body) { } } else + /* If there was a token to replace, replace it */ if ($target_token != '') { /* Find the end of the URL */ $end = $blength; @@ -182,7 +191,7 @@ function parseUrl (&$body) { $url = substr($body, $target_pos, $end-$target_pos); /* Needed since lines are not passed with \n or \r */ - while ( ereg("[,\.]$", $url) ) { + while ( preg_match('/[,.]$/', $url) ) { $url = substr( $url, 0, -1 ); $end--; } @@ -216,7 +225,7 @@ function getEmail($string) { $addresses = array(); /* Find all the email addresses in the body */ - while (eregi($Email_RegExp_Match, $string, $regs)) { + while (preg_match('/' . $Email_RegExp_Match . '/i', $string, $regs)) { $addresses[$regs[0]] = strtr($regs[0], array('&' => '&')); $start = strpos($string, $regs[0]) + strlen($regs[0]); $string = substr($string, $start);