- */ + * First look for general BODY style declaration, which would be + * like so: + * body {background: blah-blah} + * and change it to .bodyclass so we can just assign it to a

+ */ $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content); $secremoveimg = '../images/' . _("sec_remove_eng.png"); /** - * Fix url('blah') declarations. - */ - $content = preg_replace("|url$([\'\"])\s*\S+script\s*:.*?([\'\"])$|si", - "url(\\1$secremoveimg\\2)", $content); - /** - * Fix url('https*://.*) declarations but only if $view_unsafe_images - * is false. - */ - if (!$view_unsafe_images){ - $content = preg_replace("|url$([\'\"])\s*https*:.*?([\'\"])$|si", - "url(\\1$secremoveimg\\2)", $content); - } - - /** - * Fix urls that refer to cid: - */ - while (preg_match("|url$([\'\"]\s*cid:.*?[\'\"])$|si", $content, - $matches)){ - $cidurl = $matches{1}; - $httpurl = sq_cid2http($message, $id, $cidurl); - $content = preg_replace("|url$$cidurl$|si", - "url($httpurl)", $content); + * Fix url('blah') declarations. + */ + // $content = preg_replace("|url\s*$\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*$|si", + // "url(\\1$secremoveimg\\2)", $content); + // remove NUL + $content = str_replace("\0", "", $content); + // NB I insert NUL characters to keep to avoid an infinite loop. They are removed after the loop. + while (preg_match("/url\s*$\s*[\'\"]?([^:]+):(.*)?[\'\"]?\s*$/si", $content, $matches)) { + $sProto = strtolower($matches[1]); + switch ($sProto) { + /** + * Fix url('https*://.*) declarations but only if $view_unsafe_images + * is false. + */ + case 'https': + case 'http': + if (!$view_unsafe_images){ + $sExpr = "/url\s*$\s*([\'\"])\s*$sProto*:.*?([\'\"])\s*$/si"; + $content = preg_replace($sExpr, "u\0r\0l(\\1$secremoveimg\\2)", $content); + } + break; + /** + * Fix urls that refer to cid: + */ + case 'cid': + $cidurl = 'cid:'. $matches[2]; + $httpurl = sq_cid2http($message, $id, $cidurl, $mailbox); + $content = preg_replace("|url\s*$\s*$cidurl\s*$|si", + "u\0r\0l($httpurl)", $content); + break; + default: + /** + * replace url with protocol other then the white list + * http,https and cid by an empty string. + */ + $content = preg_replace("/url\s*$\s*[\'\"]?([^:]+):(.*)?[\'\"]?\s*$/si", + "", $content); + break; + } + break; } + // remove NUL + $content = str_replace("\0", "", $content); + + /** + * Remove any backslashes, entities, and extraneous whitespace. + */ + $contentTemp = $content; + sq_defang($contentTemp); + sq_unspace($contentTemp); /** * Fix stupid css declarations which lead to vulnerabilities * in IE. */ $match = Array('/expression/i', - '/behaviou*r/i', - '/binding/i'); - $replace = Array('idiocy', 'idiocy', 'idiocy'); - $content = preg_replace($match, $replace, $content); - return $content; + '/behaviou*r/i', + '/binding/i', + '/include-source/i'); + $replace = Array('idiocy', 'idiocy', 'idiocy', 'idiocy'); + $contentNew = preg_replace($match, $replace, $contentTemp); + if ($contentNew !== $contentTemp) { + // insecure css declarations are used. From now on we don't care + // anymore if the css is destroyed by sq_deent, sq_unspace or sq_unbackslash + $content = $contentNew; + } + return array($content, $newpos); } + /** * This function converts cid: url's into the ones that can be viewed in * the browser. @@ -1355,6 +1646,7 @@ function sq_fixstyle($message, $id, $content){ * @param $message the message object * @param $id the message id * @param $cidurl the cid: url. + * @param $mailbox the message mailbox * @return a string with a http-friendly url */ function sq_cid2http($message, $id, $cidurl, $mailbox){ @@ -1362,17 +1654,52 @@ function sq_cid2http($message, $id, $cidurl, $mailbox){ * Get rid of quotes. */ $quotchar = substr($cidurl, 0, 1); - $cidurl = str_replace($quotchar, "", $cidurl); + if ($quotchar == '"' || $quotchar == "'"){ + $cidurl = str_replace($quotchar, "", $cidurl); + } else { + $quotchar = ''; + } $cidurl = substr(trim($cidurl), 4); + + $match_str = '/\{.*?\}\//'; + $str_rep = ''; + $cidurl = preg_replace($match_str, $str_rep, $cidurl); + $linkurl = find_ent_id($cidurl, $message); /* in case of non-save cid links $httpurl should be replaced by a sort of unsave link image */ $httpurl = ''; - if ($linkurl) { + + /** + * This is part of a fix for Outlook Express 6.x generating + * cid URLs without creating content-id headers. These images are + * not part of the multipart/related html mail. The html contains + *

references to + * attached images with as goal to render them inline although + * the attachment disposition property is not inline. + */ + + if (empty($linkurl)) { + if (preg_match('/{.*}\//', $cidurl)) { + $cidurl = preg_replace('/{.*}\//','', $cidurl); + if (!empty($cidurl)) { + $linkurl = find_ent_id($cidurl, $message); + } + } + } + + if (!empty($linkurl)) { $httpurl = $quotchar . SM_PATH . 'src/download.php?absolute_dl=true&' . - "passed_id=$id&mailbox=" . urlencode($mailbox) . - '&ent_id=' . $linkurl . $quotchar; + "passed_id=$id&mailbox=" . urlencode($mailbox) . + '&ent_id=' . $linkurl . $quotchar; + } else { + /** + * If we couldn't generate a proper img url, drop in a blank image + * instead of sending back empty, otherwise it causes unusual behaviour + */ + $httpurl = $quotchar . SM_PATH . 'images/blank.png' . $quotchar; } + return $httpurl; } @@ -1380,14 +1707,17 @@ function sq_cid2http($message, $id, $cidurl, $mailbox){ * This function changes the tag into a

tag since we * can't really have a body-within-body. * - * @param $attary an array of attributes and values of - * @return a modified array of attributes to be set for

+ * @param $attary an array of attributes and values of + * @param $mailbox mailbox we're currently reading (for cid2http) + * @param $message current message (for cid2http) + * @param $id current message id (for cid2http) + * @return a modified array of attributes to be set for

*/ -function sq_body2div($attary){ +function sq_body2div($attary, $mailbox, $message, $id){ $me = 'sq_body2div'; $divattary = Array('class' => "'bodyclass'"); - $bgcolor = '#ffffff'; $text = '#000000'; + $has_bgc_stl = $has_txt_stl = false; $styledef = ''; if (is_array($attary) && sizeof($attary) > 0){ foreach ($attary as $attname=>$attvalue){ @@ -1395,16 +1725,24 @@ function sq_body2div($attary){ $attvalue = str_replace($quotchar, "", $attvalue); switch ($attname){ case 'background': + $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox); $styledef .= "background-image: url('$attvalue'); "; break; case 'bgcolor': + $has_bgc_stl = true; $styledef .= "background-color: $attvalue; "; break; case 'text': + $has_txt_stl = true; $styledef .= "color: $attvalue; "; break; } } + // Outlook defines a white bgcolor and no text color. This can lead to + // white text on a white bg with certain themes. + if ($has_bgc_stl && !$has_txt_stl) { + $styledef .= "color: $text; "; + } if (strlen($styledef) > 0){ $divattary{"style"} = "\"$styledef\""; } @@ -1418,7 +1756,7 @@ function sq_body2div($attary){ * special description. * * Since the description is quite lengthy, see it here: - * http://www.mricon.com/html/phpfilter.html + * http://linux.duke.edu/projects/mini/htmlfilter/ * * @param $body the string with HTML you wish to filter * @param $tag_list see description above @@ -1432,8 +1770,8 @@ function sq_body2div($attary){ * @param $id message id * @return sanitized html safe to show on your pages. */ -function sq_sanitize($body, - $tag_list, +function sq_sanitize($body, + $tag_list, $rm_tags_with_content, $self_closing_tags, $force_tag_closing, @@ -1445,10 +1783,11 @@ function sq_sanitize($body, $mailbox ){ $me = 'sq_sanitize'; + $rm_tags = array_shift($tag_list); /** * Normalize rm_tags and rm_tags_with_content. */ - @array_walk($rm_tags, 'sq_casenormalize'); + @array_walk($tag_list, 'sq_casenormalize'); @array_walk($rm_tags_with_content, 'sq_casenormalize'); @array_walk($self_closing_tags, 'sq_casenormalize'); /** @@ -1456,10 +1795,9 @@ function sq_sanitize($body, * false means remove these tags * true means allow these tags */ - $rm_tags = array_shift($tag_list); $curpos = 0; $open_tags = Array(); - $trusted = "\n"; + $trusted = "\n\n"; $skip_content = false; /** * Take care of netscape's stupid javascript entities like @@ -1467,18 +1805,21 @@ function sq_sanitize($body, */ $body = preg_replace("/&(\{.*?\};)/si", "&\\1", $body); - while (($curtag=sq_getnxtag($body, $curpos)) != FALSE){ + while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){ list($tagname, $attary, $tagtype, $lt, $gt) = $curtag; $free_content = substr($body, $curpos, $lt-$curpos); /** * Take care of . Edit the - * content before we apply it. - */ - $free_content = sq_fixstyle($message, $id, $free_content); + if ($tagname == "style" && $tagtype == 1){ + list($free_content, $curpos) = + sq_fixstyle($body, $gt+1, $message, $id, $mailbox); + if ($free_content != FALSE){ + $trusted .= sq_tagprint($tagname, $attary, $tagtype); + $trusted .= $free_content; + $trusted .= sq_tagprint($tagname, false, 2); + } + continue; } if ($skip_content == false){ $trusted .= $free_content; @@ -1495,13 +1836,12 @@ function sq_sanitize($body, if ($skip_content == false){ if ($tagname == "body"){ $tagname = "div"; - } else { - if (isset($open_tags{$tagname}) && + } + if (isset($open_tags{$tagname}) && $open_tags{$tagname} > 0){ - $open_tags{$tagname}--; - } else { - $tagname = false; - } + $open_tags{$tagname}--; + } else { + $tagname = false; } } } @@ -1515,23 +1855,31 @@ function sq_sanitize($body, * tagtype appropriately. */ if ($tagtype == 1 - && in_array($tagname, $self_closing_tags)){ - $tagtype=3; + && in_array($tagname, $self_closing_tags)){ + $tagtype = 3; } /** * See if we should skip this tag and any content * inside it. */ if ($tagtype == 1 && - in_array($tagname, $rm_tags_with_content)){ + in_array($tagname, $rm_tags_with_content)){ $skip_content = $tagname; } else { - if (($rm_tags == false - && in_array($tagname, $tag_list)) || - ($rm_tags == true && - !in_array($tagname, $tag_list))){ + if (($rm_tags == false + && in_array($tagname, $tag_list)) || + ($rm_tags == true && + !in_array($tagname, $tag_list))){ $tagname = false; } else { + /** + * Convert body into div. + */ + if ($tagname == "body"){ + $tagname = "div"; + $attary = sq_body2div($attary, $mailbox, + $message, $id); + } if ($tagtype == 1){ if (isset($open_tags{$tagname})){ $open_tags{$tagname}++; @@ -1553,13 +1901,6 @@ function sq_sanitize($body, $mailbox ); } - /** - * Convert body into div. - */ - if ($tagname == "body"){ - $tagname = "div"; - $attary = sq_body2div($attary, $message, $id); - } } } } @@ -1589,9 +1930,18 @@ function sq_sanitize($body, * * @param $body the body of the message * @param $id the id of the message + + * @param $message + * @param $mailbox + * @param boolean $take_mailto_links When TRUE, converts mailto: links + * into internal SM compose links + * (optional; default = TRUE) * @return a string with html safe to display in the browser. */ -function magicHTML($body, $id, $message, $mailbox = 'INBOX') { +function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links =true) { + + require_once(SM_PATH . 'functions/url_parser.php'); // for $MailTo_PReg_Match + global $attachment_common_show_images, $view_unsafe_images, $has_unsafe_images; /** @@ -1599,124 +1949,137 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX') { */ $attachment_common_show_images = false; $tag_list = Array( - false, - "object", - "meta", - "html", - "head", - "base", - "link", - "frame", - "iframe" - ); + false, + "object", + "meta", + "html", + "head", + "base", + "link", + "frame", + "iframe", + "plaintext", + "marquee" + ); $rm_tags_with_content = Array( - "script", - "applet", - "embed", - "title" - ); + "script", + "applet", + "embed", + "title", + "frameset", + "xmp", + "xml" + ); $self_closing_tags = Array( - "img", - "br", - "hr", - "input" - ); + "img", + "br", + "hr", + "input", + "outbind" + ); - $force_tag_closing = false; + $force_tag_closing = true; $rm_attnames = Array( - "/.*/" => - Array( - "/target/i", - "/^on.*/i", - "/^dynsrc/i", - "/^data.*/i", - "/^lowsrc.*/i" - ) - ); + "/.*/" => + Array( + "/target/i", + "/^on.*/i", + "/^dynsrc/i", + "/^data.*/i", + "/^lowsrc.*/i" + ) + ); $secremoveimg = "../images/" . _("sec_remove_eng.png"); $bad_attvals = Array( - "/.*/" => + "/.*/" => Array( "/^src|background/i" => + Array( Array( - Array( - "|^([\'\"])\s*\.\./.*([\'\"])|si", - "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si", - "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si", - "/^([\'\"])\s*about\s*:.*([\'\"])/si" - ), - Array( - "\\1$secremoveimg\\2", - "\\1$secremoveimg\\2", - "\\1$secremoveimg\\2", - "\\1$secremoveimg\\2" - ) + "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si", + "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si", + "/^([\'\"])\s*about\s*:.*([\'\"])/si" ), + Array( + "\\1$secremoveimg\\2", + "\\1$secremoveimg\\2", + "\\1$secremoveimg\\2", + "\\1$secremoveimg\\2" + ) + ), "/^href|action/i" => + Array( Array( - Array( - "|^([\'\"])\s*\.\./.*([\'\"])|si", - "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si", - "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si", - "/^([\'\"])\s*about\s*:.*([\'\"])/si" - ), - Array( - "\\1#\\2", - "\\1#\\2", - "\\1#\\2", - "\\1#\\2" - ) + "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si", + "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si", + "/^([\'\"])\s*about\s*:.*([\'\"])/si" ), - "/^style/i" => Array( - Array( - "/expression/i", - "/binding/i", - "/behaviou*r/i", - "|url$([\'\"])\s*\.\./.*([\'\"])$|si", - "/url$([\'\"])\s*\S+script\s*:.*([\'\"])$/si", - "/url$([\'\"])\s*mocha\s*:.*([\'\"])$/si", - "/url$([\'\"])\s*about\s*:.*([\'\"])$/si" - ), - Array( - "idiocy", - "idiocy", - "idiocy", - "url(\\1#\\2)", - "url(\\1#\\2)", - "url(\\1#\\2)", - "url(\\1#\\2)" - ) - ) + "\\1#\\1", + "\\1#\\1", + "\\1#\\1", + "\\1#\\1" + ) + ), + "/^style/i" => + Array( + Array( + "/expression/i", + "/binding/i", + "/behaviou*r/i", + "/include-source/i", + "/position\s*:\s*absolute/i", + "/url\s*$\s*([\'\"])\s*\S+script\s*:.*([\'\"])\s*$/si", + "/url\s*$\s*([\'\"])\s*mocha\s*:.*([\'\"])\s*$/si", + "/url\s*$\s*([\'\"])\s*about\s*:.*([\'\"])\s*$/si", + "/(.*)\s*:\s*url\s*$\s*([\'\"]*)\s*\S+script\s*:.*([\'\"]*)\s*$/si" + ), + Array( + "idiocy", + "idiocy", + "idiocy", + "idiocy", + "", + "url(\\1#\\1)", + "url(\\1#\\1)", + "url(\\1#\\1)", + "url(\\1#\\1)", + "url(\\1#\\1)", + "\\1:url(\\2#\\3)" + ) ) + ) ); if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) { - $view_unsafe_images = false; + $view_unsafe_images = false; } if (!$view_unsafe_images){ /** * Remove any references to http/https if view_unsafe_images set * to false. */ - array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0], - '/^([\'\"])\s*https*:.*([\'\"])/si'); - array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1], - "\\1$secremoveimg\\2"); - array_push($bad_attvals{'/.*/'}{'/^style/i'}[0], - '/url$([\'\"])\s*https*:.*([\'\"])$/si'); - array_push($bad_attvals{'/.*/'}{'/^style/i'}[1], - "url(\\1$secremoveimg\\2)"); + array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0], + '/^([\'\"])\s*https*:.*([\'\"])/si'); + array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1], + "\\1$secremoveimg\\1"); + array_push($bad_attvals{'/.*/'}{'/^style/i'}[0], + '/url$([\'\"])\s*https*:.*([\'\"])$/si'); + array_push($bad_attvals{'/.*/'}{'/^style/i'}[1], + "url(\\1$secremoveimg\\1)"); } $add_attr_to_tag = Array( - "/^a$/i" => Array('target'=>'"_new"') - ); - $trusted = sq_sanitize($body, - $tag_list, + "/^a$/i" => + Array('target'=>'"_blank"', + 'title'=>'"'._("This external link will open in a new window").'"' + ) + ); + $trusted = sq_sanitize($body, + $tag_list, $rm_tags_with_content, $self_closing_tags, $force_tag_closing, @@ -1729,8 +2092,169 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX') { ); if (preg_match("|$secremoveimg|i", $trusted)){ $has_unsafe_images = true; - } + } + + // we want to parse mailto's in HTML output, change to SM compose links + // this is a modified version of code from url_parser.php... but Marc is + // right: we need a better filtering implementation; adding this randomly + // here is not a great solution + // + if ($take_mailto_links) { + // parseUrl($trusted); // this even parses URLs inside of tags... too aggressive + global $MailTo_PReg_Match; + $MailTo_PReg_Match = '/mailto:' . substr($MailTo_PReg_Match, 1); + if ((preg_match_all($MailTo_PReg_Match, $trusted, $regs)) && ($regs[0][0] != '')) { + foreach ($regs[0] as $i => $mailto_before) { + $mailto_params = $regs[10][$i]; + // get rid of any tailing quote since we have to add send_to to the end + // + if (substr($mailto_before, strlen($mailto_before) - 1) == '"') + $mailto_before = substr($mailto_before, 0, strlen($mailto_before) - 1); + if (substr($mailto_params, strlen($mailto_params) - 1) == '"') + $mailto_params = substr($mailto_params, 0, strlen($mailto_params) - 1); + + if ($regs[1][$i]) { //if there is an email addr before '?', we need to merge it with the params + $to = 'to=' . $regs[1][$i]; + if (strpos($mailto_params, 'to=') > -1) //already a 'to=' + $mailto_params = str_replace('to=', $to . '%2C%20', $mailto_params); + else { + if ($mailto_params) //already some params, append to them + $mailto_params .= '&' . $to; + else + $mailto_params .= '?' . $to; + } + } + + $url_str = preg_replace(array('/to=/i', '/(?\|;]', '_', str_replace(' ', ' ', $filename)); + } + + // A Pox on Microsoft and it's Internet Explorer! + // + // IE has lots of bugs with file downloads. + // It also has problems with SSL. Both of these cause problems + // for us in this function. + // + // See this article on Cache Control headers and SSL + // http://support.microsoft.com/default.aspx?scid=kb;en-us;323308 + // + // The best thing you can do for IE is to upgrade to the latest + // version + //set all the Cache Control Headers for IE + if ($isIE) { + $filename=rawurlencode($filename); + header ("Pragma: public"); + header ("Cache-Control: no-store, max-age=0, no-cache, must-revalidate"); // HTTP/1.1 + header ("Cache-Control: post-check=0, pre-check=0", false); + header ("Cache-Control: private"); + + //set the inline header for IE, we'll add the attachment header later if we need it + header ("Content-Disposition: inline; filename=$filename"); + } + + if (!$force) { + // Try to show in browser window + header ("Content-Disposition: inline; filename=\"$filename\""); + header ("Content-Type: $type0/$type1; name=\"$filename\""); + } else { + // Try to pop up the "save as" box + + // IE makes this hard. It pops up 2 save boxes, or none. + // http://support.microsoft.com/support/kb/articles/Q238/5/88.ASP + // http://support.microsoft.com/default.aspx?scid=kb;EN-US;260519 + // But, according to Microsoft, it is "RFC compliant but doesn't + // take into account some deviations that allowed within the + // specification." Doesn't that mean RFC non-compliant? + // http://support.microsoft.com/support/kb/articles/Q258/4/52.ASP + + // all browsers need the application/octet-stream header for this + header ("Content-Type: application/octet-stream; name=\"$filename\""); + + // http://support.microsoft.com/support/kb/articles/Q182/3/15.asp + // Do not have quotes around filename, but that applied to + // "attachment"... does it apply to inline too? + header ("Content-Disposition: attachment; filename=\"$filename\""); + + if ($isIE && !$isIE6) { + // This combination seems to work mostly. IE 5.5 SP 1 has + // known issues (see the Microsoft Knowledge Base) + + // This works for most types, but doesn't work with Word files + header ("Content-Type: application/download; name=\"$filename\""); + + // These are spares, just in case. :-) + //header("Content-Type: $type0/$type1; name=\"$filename\""); + //header("Content-Type: application/x-msdownload; name=\"$filename\""); + //header("Content-Type: application/octet-stream; name=\"$filename\""); + } else { + // another application/octet-stream forces download for Netscape + header ("Content-Type: application/octet-stream; name=\"$filename\""); + } + } + + //send the content-length header if the calling function provides it + if ($filesize > 0) { + header("Content-Length: $filesize"); + } + +} // end fn SendDownloadHeaders + ?>

' . - _("Body retrieval error. The reason for this is most probably that the message is malformed.") . - '
' . _("Command:") . "	$cmd
' . _("Response:") . "	$response
' . _("Message:") . "	$message
' . _("FETCH line:") . "	$topline

' . + _("Body retrieval error. The reason for this is most probably that the message is malformed.") . + '
' . _("Command:") . "	$cmd
' . _("Response:") . "	$response
' . _("Message:") . "	$message
' . _("FETCH line:") . "	$topline