' => "\n", // '
' => "\n", // '
' => "\n", // '
' => "\n", // '
' => "\n", // '' => "\n", // '

' => "\n", '>' => '>', '<' => '<', '&' => '&', '©' => '©'); // first, completely remove * @param $mailbox the message mailbox * @return a string with edited content. */ function sq_fixstyle($body, $pos, $message, $id, $mailbox){ $me = 'sq_fixstyle'; // workaround for in between comments $iCurrentPos = $pos; $content = ''; $sToken = ''; $bSucces = false; $bEndTag = false; for ($i=$pos,$iCount=strlen($body);$i<$iCount;++$i) { $char = $body[$i]; switch ($char) { case '<': $sToken = $char; break; case '/': if ($sToken == '<') { $sToken .= $char; $bEndTag = true; } else { $content .= $char; } break; case '>': if ($bEndTag) { $sToken .= $char; if (preg_match('/\<\/\s*style\s*\>/i',$sToken,$aMatch)) { $newpos = $i + 1; $bSucces = true; break 2; } else { $content .= $sToken; } $bEndTag = false; } else { $content .= $char; } break; case '!': if ($sToken == '<') { // possible comment if (isset($body[$i+2]) && substr($body,$i,3) == '!--') { $i = strpos($body,'-->',$i+3); if ($i === false) { // no end comment $i = strlen($body); } $sToken = ''; } } else { $content .= $char; } break; default: if ($bEndTag) { $sToken .= $char; } else { $content .= $char; } break; } } if ($bSucces == FALSE){ return array(FALSE, strlen($body)); } /** * First look for general BODY style declaration, which would be * like so: * body {background: blah-blah} * and change it to .bodyclass so we can just assign it to a

*/ // $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content); // Nah, this is even better - try to preface all CSS selectors with // our

class ID "bodyclass" then correct generic "body" selectors // TODO: this works pretty good but breaks stuff like this: // @media print { body { font-size: 10pt; } } // but there isn't an easy way to make this regex skip @media // definitions... though lots of the ones in the wild will be // correctly handled because they tend to end with a parenthesis, like: // @media screen and (max-width:480px) { ... $content = preg_replace('/([a-z0-9._-][a-z0-9 >+~|:._-]*\s*(?:,|{.*?}))/si', '.bodyclass $1', $content); $content = str_replace('.bodyclass body', '.bodyclass', $content); global $use_transparent_security_image; if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png'; else $secremoveimg = '../images/' . _("sec_remove_eng.png"); /** * Fix url('blah') declarations. */ // $content = preg_replace("|url\s*$\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*$|si", // "url(\\1$secremoveimg\\2)", $content); // first check for 8bit sequences and disallowed control characters if (preg_match('/[\16-\37\200-\377]+/',$content)) { $content = ''; return array($content, $newpos); } // IE Sucks hard. We have a special function for it. sq_fixIE_idiocy($content); // remove @import line $content = preg_replace("/^\s*(@import.*)$/mi","\n\n",$content); // translate ur\l and variations (IE parses that) // TODO check if the sq_fixIE_idiocy function already handles this. $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content); preg_match_all("/url\s*$(.+)$/si",$content,$aMatch); if (count($aMatch)) { $aValue = $aReplace = array(); foreach($aMatch[1] as $sMatch) { // url value $urlvalue = $sMatch; sq_fix_url('style',$urlvalue, $message, $id, $mailbox,"'"); $aValue[] = $sMatch; $aReplace[] = $urlvalue; } $content = str_replace($aValue,$aReplace,$content); } /** * Remove any backslashes, entities, and extraneous whitespace. */ $contentTemp = $content; sq_defang($contentTemp); sq_unspace($contentTemp); /** * Fix stupid css declarations which lead to vulnerabilities * in IE. * * Also remove "position" attribute, as it can easily be set * to "fixed" or "absolute" with "left" and "top" attributes * of zero, taking over the whole content frame. It can also * be set to relative and move itself anywhere it wants to, * displaying content in areas it shouldn't be allowed to touch. */ $match = Array('/\/\*.*\*\//', // removes /* blah blah */ '/expression/i', '/behaviou*r/i', '/binding/i', '/include-source/i', '/javascript/i', '/script/i', '/position/i'); $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', ''); $contentNew = preg_replace($match, $replace, $contentTemp); if ($contentNew !== $contentTemp) { // insecure css declarations are used. From now on we don't care // anymore if the css is destroyed by sq_deent, sq_unspace or sq_unbackslash $content = $contentNew; } return array($content, $newpos); } /** * This function converts cid: url's into the ones that can be viewed in * the browser. * * @param $message the message object * @param $id the message id * @param $cidurl the cid: url. * @param $mailbox the message mailbox * @return a string with a http-friendly url */ function sq_cid2http($message, $id, $cidurl, $mailbox){ /** * Get rid of quotes. */ $quotchar = substr($cidurl, 0, 1); if ($quotchar == '"' || $quotchar == "'"){ $cidurl = str_replace($quotchar, "", $cidurl); } else { $quotchar = ''; } $cidurl = substr(trim($cidurl), 4); $match_str = '/\{.*?\}\//'; $str_rep = ''; $cidurl = preg_replace($match_str, $str_rep, $cidurl); $linkurl = find_ent_id($cidurl, $message); /* in case of non-safe cid links $httpurl should be replaced by a sort of unsafe link image */ $httpurl = ''; /** * This is part of a fix for Outlook Express 6.x generating * cid URLs without creating content-id headers. These images are * not part of the multipart/related html mail. The html contains *

references to * attached images with as goal to render them inline although * the attachment disposition property is not inline. */ if (empty($linkurl)) { if (preg_match('/{.*}\//', $cidurl)) { $cidurl = preg_replace('/{.*}\//','', $cidurl); if (!empty($cidurl)) { $linkurl = find_ent_id($cidurl, $message); } } } if (!empty($linkurl)) { $httpurl = $quotchar . sqm_baseuri() . 'src/download.php?absolute_dl=true&' . "passed_id=$id&mailbox=" . urlencode($mailbox) . '&ent_id=' . $linkurl . $quotchar; } else { /** * If we couldn't generate a proper img url, drop in a blank image * instead of sending back empty, otherwise it causes unusual behaviour */ $httpurl = $quotchar . SM_PATH . 'images/blank.png' . $quotchar; } return $httpurl; } /** * This function changes the tag into a

tag since we * can't really have a body-within-body. * * @param $attary an array of attributes and values of * @param $mailbox mailbox we're currently reading (for cid2http) * @param $message current message (for cid2http) * @param $id current message id (for cid2http) * @return a modified array of attributes to be set for

*/ function sq_body2div($attary, $mailbox, $message, $id){ $me = 'sq_body2div'; $divattary = Array('class' => "'bodyclass'"); $text = '#000000'; $has_bgc_stl = $has_txt_stl = false; $styledef = ''; if (is_array($attary) && sizeof($attary) > 0){ foreach ($attary as $attname=>$attvalue){ $quotchar = substr($attvalue, 0, 1); $attvalue = str_replace($quotchar, "", $attvalue); switch ($attname){ case 'background': $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox); $styledef .= "background-image: url('$attvalue'); "; break; case 'bgcolor': $has_bgc_stl = true; $styledef .= "background-color: $attvalue; "; break; case 'text': $has_txt_stl = true; $styledef .= "color: $attvalue; "; break; } } // Outlook defines a white bgcolor and no text color. This can lead to // white text on a white bg with certain themes. if ($has_bgc_stl && !$has_txt_stl) { $styledef .= "color: $text; "; } if (strlen($styledef) > 0){ $divattary["style"] = "\"$styledef\""; } } return $divattary; } /** * This is the main function and the one you should actually be calling. * There are several variables you should be aware of an which need * special description. * * Since the description is quite lengthy, see it here: * http://linux.duke.edu/projects/mini/htmlfilter/ * * @param $body the string with HTML you wish to filter * @param $tag_list see description above * @param $rm_tags_with_content see description above * @param $self_closing_tags see description above * @param $force_tag_closing see description above * @param $rm_attnames see description above * @param $bad_attvals see description above * @param $add_attr_to_tag see description above * @param $message message object * @param $id message id * @param $recursively_called boolean flag for recursive calls into this function (optional; default FALSE) * @return sanitized html safe to show on your pages. */ function sq_sanitize($body, $tag_list, $rm_tags_with_content, $self_closing_tags, $force_tag_closing, $rm_attnames, $bad_attvals, $add_attr_to_tag, $message, $id, $mailbox, $recursively_called=FALSE ){ $me = 'sq_sanitize'; /** * See if tag_list is of tags to remove or tags to allow. * false means remove these tags * true means allow these tags */ $orig_tag_list = $tag_list; $rm_tags = array_shift($tag_list); /** * Normalize rm_tags and rm_tags_with_content. */ @array_walk($tag_list, 'sq_casenormalize'); @array_walk($rm_tags_with_content, 'sq_casenormalize'); @array_walk($self_closing_tags, 'sq_casenormalize'); $curpos = 0; $open_tags = Array(); $trusted = "\n\n"; $skip_content = false; /** * Take care of netscape's stupid javascript entities like * &{alert('boo')}; */ $body = preg_replace("/&(\{.*?\};)/si", "&\\1", $body); while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){ list($tagname, $attary, $tagtype, $lt, $gt) = $curtag; /** * RCDATA and RAWTEXT tags are handled differently: * next instance of closing tag is used, whether or not * the HTML is well formed before that */ global $rcdata_rawtext_tags; if (!$recursively_called && in_array($tagname, $rcdata_rawtext_tags) && $tagtype === 1){ $closing_tag = false; $closing_tag_offset = $curpos; // seek out the closing tag for the current RCDATA/RAWTEXT tag while (1) { // first we need to move forward to next available closing tag // (intentionally leave off the closing > and let sq_getnxtag() validate a proper tag syntax) $next_tag = sq_findnxreg($body, $closing_tag_offset, " */ if ($tagname == "style" && $tagtype == 1){ list($free_content, $curpos) = sq_fixstyle($body, $gt+1, $message, $id, $mailbox); if ($free_content != FALSE){ if ( !empty($attary) ) { $attary = sq_fixatts($tagname, $attary, $rm_attnames, $bad_attvals, $add_attr_to_tag, $message, $id, $mailbox ); } $trusted .= sq_tagprint($tagname, $attary, $tagtype); $trusted .= $free_content; $trusted .= sq_tagprint($tagname, false, 2); } continue; } if ($skip_content == false){ $trusted .= $free_content; } if ($tagname != FALSE){ if ($tagtype == 2){ if ($skip_content == $tagname){ /** * Got to the end of tag we needed to remove. */ $tagname = false; $skip_content = false; } else { if ($skip_content == false){ if ($tagname == "body"){ $tagname = "div"; } if (isset($open_tags[$tagname]) && $open_tags[$tagname] > 0){ $open_tags[$tagname]--; } else { $tagname = false; } } } } else { /** * $rm_tags_with_content */ if ($skip_content == false){ /** * See if this is a self-closing type and change * tagtype appropriately. */ if ($tagtype == 1 && in_array($tagname, $self_closing_tags)){ $tagtype = 3; } /** * See if we should skip this tag and any content * inside it. */ if ($tagtype == 1 && in_array($tagname, $rm_tags_with_content)){ $skip_content = $tagname; } else { if (($rm_tags == false && in_array($tagname, $tag_list)) || ($rm_tags == true && !in_array($tagname, $tag_list))){ $tagname = false; } else { /** * Convert body into div. */ if ($tagname == "body"){ $tagname = "div"; $attary = sq_body2div($attary, $mailbox, $message, $id); } if ($tagtype == 1){ if (isset($open_tags[$tagname])){ $open_tags[$tagname]++; } else { $open_tags[$tagname]=1; } } /** * This is where we run other checks. */ if (is_array($attary) && sizeof($attary) > 0){ $attary = sq_fixatts($tagname, $attary, $rm_attnames, $bad_attvals, $add_attr_to_tag, $message, $id, $mailbox ); } } } } } if ($tagname != false && $skip_content == false){ $trusted .= sq_tagprint($tagname, $attary, $tagtype); } } $curpos = $gt+1; } $trusted .= substr($body, $curpos, strlen($body)-$curpos); if ($force_tag_closing == true){ foreach ($open_tags as $tagname=>$opentimes){ while ($opentimes > 0){ $trusted .= ''; $opentimes--; } } $trusted .= "\n"; } $trusted .= "\n"; return $trusted; } /** * This is a wrapper function to call html sanitizing routines. * * @param $body the body of the message * @param $id the id of the message * @param $message * @param $mailbox * @param boolean $take_mailto_links When TRUE, converts mailto: links * into internal SM compose links * (optional; default = TRUE) * @return a string with html safe to display in the browser. */ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links =true) { // require_once(SM_PATH . 'functions/url_parser.php'); // for $MailTo_PReg_Match global $attachment_common_show_images, $view_unsafe_images, $has_unsafe_images, $allow_svg_display, $rcdata_rawtext_tags, $remove_rcdata_rawtext_tags_and_content; $rcdata_rawtext_tags = array( "noscript", "noframes", "noembed", "textarea", // also "title", "xmp", "script", "iframe", "plaintext" which we already remove below ); /** * Don't display attached images in HTML mode. * * SB: why? */ $attachment_common_show_images = false; $tag_list = Array( false, // remove these tags "meta", "html", "head", "base", "link", "frame", "iframe", "plaintext", "marquee", ); $rm_tags_with_content = Array( "script", "object", "applet", "embed", "title", "frameset", "xmp", "xml", ); if (!$allow_svg_display) $rm_tags_with_content[] = 'svg'; /** * SquirrelMail will parse RCDATA and RAWTEXT tags and handle them as the special * case that they are, but if you prefer to remove them and their contents entirely * (in most cases, should be a safe thing with minimal impact), you can add the * following to config/config_local.php * $remove_rcdata_rawtext_tags_and_content = TRUE; */ if ($remove_rcdata_rawtext_tags_and_content) $rm_tags_with_content = array_merge($rm_tags_with_content, $rcdata_rawtext_tags); $self_closing_tags = Array( "img", "br", "hr", "input", "outbind", ); $force_tag_closing = true; $rm_attnames = Array( "/.*/" => Array( "/target/i", "/^on.*/i", "/^dynsrc/i", "/^data.*/i", "/^lowsrc.*/i", ) ); global $use_transparent_security_image; if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png'; else $secremoveimg = '../images/' . _("sec_remove_eng.png"); $bad_attvals = Array( "/.*/" => Array( "/^src|background/i" => Array( Array( "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si", "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si", "/^([\'\"])\s*about\s*:.*([\'\"])/si" ), Array( "\\1$secremoveimg\\2", "\\1$secremoveimg\\2", "\\1$secremoveimg\\2", ) ), "/^href|action/i" => Array( Array( "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si", "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si", "/^([\'\"])\s*about\s*:.*([\'\"])/si" ), Array( "\\1#\\1", "\\1#\\1", "\\1#\\1" ) ), "/^style/i" => Array( Array( "/\/\*.*\*\//", "/expression/i", "/binding/i", "/behaviou*r/i", "/include-source/i", // position:relative can also be exploited // to put content outside of email body area // and position:fixed is similarly exploitable // as position:absolute, so we'll remove it // altogether.... // // Does this screw up legitimate HTML messages? // If so, the only fix I see is to allow position // attributes (any values? I think we still have // to block static and fixed) only if $use_iframe // is enabled (1.5.0+) // // was: "/position\s*:\s*absolute/i", // "/position\s*:/i", "/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", "/url\s*$\s*([\'\"])\s*\S+script\s*:.*([\'\"])\s*$/si", "/url\s*$\s*([\'\"])\s*mocha\s*:.*([\'\"])\s*$/si", "/url\s*$\s*([\'\"])\s*about\s*:.*([\'\"])\s*$/si", "/(.*)\s*:\s*url\s*$\s*([\'\"]*)\s*\S+script\s*:.*([\'\"]*)\s*$/si", ), Array( "", "idiocy", "idiocy", "idiocy", "idiocy", "idiocy", "url", "url(\\1#\\1)", "url(\\1#\\1)", "url(\\1#\\1)", "\\1:url(\\2#\\3)" ) ) ) ); // If there's no "view_unsafe_images" variable in the URL, turn unsafe // images off by default. sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET, FALSE); if (!$view_unsafe_images){ /** * Remove any references to http/https if view_unsafe_images set * to false. */ array_push($bad_attvals['/.*/']['/^src|background/i'][0], '/^([\'\"])\s*https*:.*([\'\"])/si'); array_push($bad_attvals['/.*/']['/^src|background/i'][1], "\\1$secremoveimg\\1"); array_push($bad_attvals['/.*/']['/^style/i'][0], '/url$[\'\"]?https?:[^$]*[\'\"]?\)/si'); array_push($bad_attvals['/.*/']['/^style/i'][1], "url(\\1$secremoveimg\\1)"); } $add_attr_to_tag = Array( "/^a$/i" => Array('target'=>'"_blank"', 'title'=>'"'._("This external link will open in a new window").'"' ) ); $trusted = sq_sanitize($body, $tag_list, $rm_tags_with_content, $self_closing_tags, $force_tag_closing, $rm_attnames, $bad_attvals, $add_attr_to_tag, $message, $id, $mailbox ); if (strpos($trusted,$secremoveimg)){ $has_unsafe_images = true; } // we want to parse mailto's in HTML output, change to SM compose links // this is a modified version of code from url_parser.php... but Marc is // right: we need a better filtering implementation; adding this randomly // here is not a great solution // if ($take_mailto_links) { // parseUrl($trusted); // this even parses URLs inside of tags... too aggressive global $MailTo_PReg_Match; // some mailers (Microsoft, surprise surprise) produce mailto strings without being // inside an anchor (link) tag, so we have to make sure the regex looks for the // quote before mailto, and we'll also try to convert the non-links back into links $MailTo_PReg_Match = '/([\'"])?mailto:' . substr($MailTo_PReg_Match, 1) ; if ((preg_match_all($MailTo_PReg_Match, $trusted, $regs)) && ($regs[0][0] != '')) { foreach ($regs[0] as $i => $mailto_before) { $mailto_params = $regs[11][$i]; // get rid of any leading quote we may have captured but don't care about // $mailto_before = ltrim($mailto_before, '"\''); // get rid of any tailing quote since we have to add send_to to the end // $mailto_before = rtrim($mailto_before, '"\''); $mailto_params = rtrim($mailto_params, '"\''); if ($regs[2][$i]) { //if there is an email addr before '?', we need to merge it with the params $to = 'to=' . $regs[2][$i]; if (strpos($mailto_params, 'to=') > -1) //already a 'to=' $mailto_params = str_replace('to=', $to . '%2C%20', $mailto_params); else { if ($mailto_params) //already some params, append to them $mailto_params .= '&' . $to; else $mailto_params .= '?' . $to; } } $url_str = preg_replace(array('/to=/i', '/(?= 6 && strstr($HTTP_USER_AGENT, 'Opera') === false) { $isIE6plus = true; } if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_downloadfilename')) { $filename = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_downloadfilename', $filename, $HTTP_USER_AGENT); } else { $filename = preg_replace('/[\\\\\/:*?"<>|;]/', '_', str_replace(' ', ' ', $filename)); } // A Pox on Microsoft and it's Internet Explorer! // // IE has lots of bugs with file downloads. // It also has problems with SSL. Both of these cause problems // for us in this function. // // See this article on Cache Control headers and SSL // http://support.microsoft.com/default.aspx?scid=kb;en-us;323308 // // The best thing you can do for IE is to upgrade to the latest // version //set all the Cache Control Headers for IE if ($isIE) { $filename=rawurlencode($filename); header ("Pragma: public"); header ("Cache-Control: no-store, max-age=0, no-cache, must-revalidate"); // HTTP/1.1 // does nothing - see: https://blogs.msdn.microsoft.com/ieinternals/2009/07/20/internet-explorers-cache-control-extensions/ // header ("Cache-Control: post-check=0, pre-check=0", false); header ("Cache-Control: private"); //set the inline header for IE, we'll add the attachment header later if we need it header ("Content-Disposition: inline; filename=$filename"); } if (!$force) { // Try to show in browser window header ("Content-Disposition: inline; filename=\"$filename\""); header ("Content-Type: $type0/$type1; name=\"$filename\""); } else { // Try to pop up the "save as" box // IE makes this hard. It pops up 2 save boxes, or none. // http://support.microsoft.com/support/kb/articles/Q238/5/88.ASP // http://support.microsoft.com/default.aspx?scid=kb;EN-US;260519 // But, according to Microsoft, it is "RFC compliant but doesn't // take into account some deviations that allowed within the // specification." Doesn't that mean RFC non-compliant? // http://support.microsoft.com/support/kb/articles/Q258/4/52.ASP // all browsers need the application/octet-stream header for this header ("Content-Type: application/octet-stream; name=\"$filename\""); // http://support.microsoft.com/support/kb/articles/Q182/3/15.asp // Do not have quotes around filename, but that applied to // "attachment"... does it apply to inline too? header ("Content-Disposition: attachment; filename=\"$filename\""); if ($isIE && !$isIE6plus) { // This combination seems to work mostly. IE 5.5 SP 1 has // known issues (see the Microsoft Knowledge Base) // This works for most types, but doesn't work with Word files header ("Content-Type: application/download; name=\"$filename\""); header ("Content-Type: application/force-download; name=\"$filename\""); // These are spares, just in case. :-) //header("Content-Type: $type0/$type1; name=\"$filename\""); //header("Content-Type: application/x-msdownload; name=\"$filename\""); //header("Content-Type: application/octet-stream; name=\"$filename\""); } else if ($isIE) { // This is to prevent IE for MIME sniffing and auto open a file in IE header ("Content-Type: application/force-download; name=\"$filename\""); } else { // another application/octet-stream forces download for Netscape header ("Content-Type: application/octet-stream; name=\"$filename\""); } } //send the content-length header if the calling function provides it if ($filesize > 0) { header("Content-Length: $filesize"); } } // end fn SendDownloadHeaders

' . _("Body retrieval error. The reason for this is most probably that the message is malformed.") . '
' . _("Command:") . "	$cmd
' . _("Response:") . "	$response
' . _("Message:") . "	$message
' . _("FETCH line:") . "	$topline