- */ + * First look for general BODY style declaration, which would be + * like so: + * body {background: blah-blah} + * and change it to .bodyclass so we can just assign it to a

+ */ $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content); $secremoveimg = '../images/' . _("sec_remove_eng.png"); /** - * Fix url('blah') declarations. - */ - $content = preg_replace("|url\s*$\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*$|si", - "url(\\1$secremoveimg\\2)", $content); - /** - * Fix url('https*://.*) declarations but only if $view_unsafe_images - * is false. - */ - if (!$view_unsafe_images){ - $content = preg_replace("|url\s*$\s*([\'\"])\s*https*:.*?([\'\"])\s*$|si", - "url(\\1$secremoveimg\\2)", $content); - } - - /** - * Fix urls that refer to cid: - */ - while (preg_match("|url\s*$\s*([\'\"]\s*cid:.*?[\'\"])\s*$|si", - $content, $matches)){ - $cidurl = $matches{1}; - $httpurl = sq_cid2http($message, $id, $cidurl); - $content = preg_replace("|url\s*$\s*$cidurl\s*$|si", - "url($httpurl)", $content); + * Fix url('blah') declarations. + */ + // $content = preg_replace("|url\s*$\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*$|si", + // "url(\\1$secremoveimg\\2)", $content); + // remove NUL + $content = str_replace("\0", "", $content); + // translate ur\l and variations (IE parses that) + $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content); + // NB I insert NUL characters to keep to avoid an infinite loop. They are removed after the loop. + while (preg_match("/url\s*$\s*[\'\"]?([^:]+):(.*)?[\'\"]?\s*$/si", $content, $matches)) { + $sProto = strtolower($matches[1]); + switch ($sProto) { + /** + * Fix url('https*://.*) declarations but only if $view_unsafe_images + * is false. + */ + case 'https': + case 'http': + if (!$view_unsafe_images){ + + $sExpr = "/url\s*$\s*[\'\"]?\s*$sProto*:.*[\'\"]?\s*$/si"; + $content = preg_replace($sExpr, "u\0r\0l(\\1$secremoveimg\\2)", $content); + + } else { + $content = preg_replace('/url/i',"u\0r\0l",$content); + } + break; + /** + * Fix urls that refer to cid: + */ + case 'cid': + $cidurl = 'cid:'. $matches[2]; + $httpurl = sq_cid2http($message, $id, $cidurl, $mailbox); + // escape parentheses that can modify the regular expression + $cidurl = str_replace(array('(',')'),array('\$','\$'),$cidurl); + $content = preg_replace("|url\s*$\s*$cidurl\s*$|si", + "u\0r\0l($httpurl)", $content); + break; + default: + /** + * replace url with protocol other then the white list + * http,https and cid by an empty string. + */ + $content = preg_replace("/url\s*$\s*[\'\"]?([^:]+):(.*)?[\'\"]?\s*$/si", + "", $content); + break; + } } + // remove NUL + $content = str_replace("\0", "", $content); + /** + * Remove any backslashes, entities, and extraneous whitespace. + */ + $contentTemp = $content; + sq_defang($contentTemp); + sq_unspace($contentTemp); /** * Fix stupid css declarations which lead to vulnerabilities * in IE. */ - $match = Array('/expression/i', - '/behaviou*r/i', - '/binding/i'); - $replace = Array('idiocy', 'idiocy', 'idiocy'); - $content = preg_replace($match, $replace, $content); + $match = Array('/\/\*.*\*\//', + '/expression/i', + '/behaviou*r/i', + '/binding/i', + '/include-source/i'); + $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy'); + $contentNew = preg_replace($match, $replace, $contentTemp); + if ($contentNew !== $contentTemp) { + // insecure css declarations are used. From now on we don't care + // anymore if the css is destroyed by sq_deent, sq_unspace or sq_unbackslash + $content = $contentNew; + } return array($content, $newpos); } + /** * This function converts cid: url's into the ones that can be viewed in * the browser. @@ -1438,6 +1802,7 @@ function sq_fixstyle($body, $pos, $message, $id){ * @param $message the message object * @param $id the message id * @param $cidurl the cid: url. + * @param $mailbox the message mailbox * @return a string with a http-friendly url */ function sq_cid2http($message, $id, $cidurl, $mailbox){ @@ -1445,17 +1810,52 @@ function sq_cid2http($message, $id, $cidurl, $mailbox){ * Get rid of quotes. */ $quotchar = substr($cidurl, 0, 1); - $cidurl = str_replace($quotchar, "", $cidurl); + if ($quotchar == '"' || $quotchar == "'"){ + $cidurl = str_replace($quotchar, "", $cidurl); + } else { + $quotchar = ''; + } $cidurl = substr(trim($cidurl), 4); + + $match_str = '/\{.*?\}\//'; + $str_rep = ''; + $cidurl = preg_replace($match_str, $str_rep, $cidurl); + $linkurl = find_ent_id($cidurl, $message); /* in case of non-save cid links $httpurl should be replaced by a sort of unsave link image */ $httpurl = ''; - if ($linkurl) { + + /** + * This is part of a fix for Outlook Express 6.x generating + * cid URLs without creating content-id headers. These images are + * not part of the multipart/related html mail. The html contains + *

references to + * attached images with as goal to render them inline although + * the attachment disposition property is not inline. + */ + + if (empty($linkurl)) { + if (preg_match('/{.*}\//', $cidurl)) { + $cidurl = preg_replace('/{.*}\//','', $cidurl); + if (!empty($cidurl)) { + $linkurl = find_ent_id($cidurl, $message); + } + } + } + + if (!empty($linkurl)) { $httpurl = $quotchar . SM_PATH . 'src/download.php?absolute_dl=true&' . - "passed_id=$id&mailbox=" . urlencode($mailbox) . - '&ent_id=' . $linkurl . $quotchar; + "passed_id=$id&mailbox=" . urlencode($mailbox) . + '&ent_id=' . $linkurl . $quotchar; + } else { + /** + * If we couldn't generate a proper img url, drop in a blank image + * instead of sending back empty, otherwise it causes unusual behaviour + */ + $httpurl = $quotchar . SM_PATH . 'images/blank.png' . $quotchar; } + return $httpurl; } @@ -1463,14 +1863,17 @@ function sq_cid2http($message, $id, $cidurl, $mailbox){ * This function changes the tag into a

tag since we * can't really have a body-within-body. * - * @param $attary an array of attributes and values of - * @return a modified array of attributes to be set for

+ * @param $attary an array of attributes and values of + * @param $mailbox mailbox we're currently reading (for cid2http) + * @param $message current message (for cid2http) + * @param $id current message id (for cid2http) + * @return a modified array of attributes to be set for

*/ -function sq_body2div($attary){ +function sq_body2div($attary, $mailbox, $message, $id){ $me = 'sq_body2div'; $divattary = Array('class' => "'bodyclass'"); - $bgcolor = '#ffffff'; $text = '#000000'; + $has_bgc_stl = $has_txt_stl = false; $styledef = ''; if (is_array($attary) && sizeof($attary) > 0){ foreach ($attary as $attname=>$attvalue){ @@ -1478,16 +1881,24 @@ function sq_body2div($attary){ $attvalue = str_replace($quotchar, "", $attvalue); switch ($attname){ case 'background': + $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox); $styledef .= "background-image: url('$attvalue'); "; break; case 'bgcolor': + $has_bgc_stl = true; $styledef .= "background-color: $attvalue; "; break; case 'text': + $has_txt_stl = true; $styledef .= "color: $attvalue; "; break; } } + // Outlook defines a white bgcolor and no text color. This can lead to + // white text on a white bg with certain themes. + if ($has_bgc_stl && !$has_txt_stl) { + $styledef .= "color: $text; "; + } if (strlen($styledef) > 0){ $divattary{"style"} = "\"$styledef\""; } @@ -1501,7 +1912,7 @@ function sq_body2div($attary){ * special description. * * Since the description is quite lengthy, see it here: - * http://www.mricon.com/html/phpfilter.html + * http://linux.duke.edu/projects/mini/htmlfilter/ * * @param $body the string with HTML you wish to filter * @param $tag_list see description above @@ -1515,8 +1926,8 @@ function sq_body2div($attary){ * @param $id message id * @return sanitized html safe to show on your pages. */ -function sq_sanitize($body, - $tag_list, +function sq_sanitize($body, + $tag_list, $rm_tags_with_content, $self_closing_tags, $force_tag_closing, @@ -1542,7 +1953,7 @@ function sq_sanitize($body, */ $curpos = 0; $open_tags = Array(); - $trusted = "\n"; + $trusted = "\n\n"; $skip_content = false; /** * Take care of netscape's stupid javascript entities like @@ -1557,8 +1968,8 @@ function sq_sanitize($body, * Take care of

' . - _("Body retrieval error. The reason for this is most probably that the message is malformed.") . - '
' . _("Command:") . "	$cmd
' . _("Response:") . "	$response
' . _("Message:") . "	$message
' . _("FETCH line:") . "	$topline

' . + _("Body retrieval error. The reason for this is most probably that the message is malformed.") . + '
' . _("Command:") . "	$cmd
' . _("Response:") . "	$response
' . _("Message:") . "	$message
' . _("FETCH line:") . "	$topline