X-Git-Url: https://vcs.fsf.org/?a=blobdiff_plain;f=functions%2Fmime.php;h=27c0e00846b72973e20f7a3835846073596f45eb;hb=33aab559e47bc6c36748eb7d105b47dbdd6d89c1;hp=11c9f4cfaad3a37e3fe30987624a6becd2e83213;hpb=e11159795f02a528915f2a4b85ac083809298da2;p=squirrelmail.git diff --git a/functions/mime.php b/functions/mime.php index 11c9f4cf..27c0e008 100644 --- a/functions/mime.php +++ b/functions/mime.php @@ -6,7 +6,7 @@ * This contains the functions necessary to detect and decode MIME * messages. * - * @copyright © 1999-2007 The SquirrelMail Project Team + * @copyright 1999-2024 The SquirrelMail Project Team * @license http://opensource.org/licenses/gpl-license.php GNU Public License * @version $Id$ * @package squirrelmail @@ -60,19 +60,18 @@ function mime_structure ($bodystructure, $flags=array()) { if (!is_object($msg)) { global $color, $mailbox; - /* removed urldecode because $_GET is auto urldecoded ??? */ displayPageHeader( $color, $mailbox ); $errormessage = _("SquirrelMail could not decode the bodystructure of the message"); $errormessage .= '
'._("The bodystructure provided by your IMAP server:").'

'; - $errormessage .= '
' . htmlspecialchars($read) . '
'; - plain_error_message( $errormessage, $color ); + $errormessage .= '
' . sm_encode_html_special_chars($read) . '
'; + plain_error_message( $errormessage ); echo ''; exit; } if (count($flags)) { foreach ($flags as $flag) { //FIXME: please document why it is we have to check the first char of the flag but we then go ahead and do a full string comparison anyway. Is this a speed enhancement? If not, let's keep it simple and just compare the full string and forget the switch block. - $char = strtoupper($flag{1}); + $char = strtoupper($flag[1]); switch ($char) { case 'S': if (strtolower($flag) == '\\seen') { @@ -93,6 +92,9 @@ function mime_structure ($bodystructure, $flags=array()) { if (strtolower($flag) == '\\flagged') { $msg->is_flagged = true; } + else if (strtolower($flag) == '$forwarded') { + $msg->is_forwarded = true; + } break; case 'M': if (strtolower($flag) == '$mdnsent') { @@ -135,19 +137,27 @@ function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) { $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, TRUE); do { $topline = trim(array_shift($data)); - } while($topline && ($topline[0] == '*') && !preg_match('/\* [0-9]+ FETCH.*/i', $topline)) ; + } while($topline && ($topline[0] == '*') && !preg_match('/\* [0-9]+ FETCH .*BODY.*/i', $topline)) ; + // Matching with "BODY" above is difficult: in most cases "FETCH \(BODY" would work + // but some servers may put other things in the same result, perhaps something such + // as "* 23 FETCH (FLAGS (\Seen) BODY[1] {174}". There is some small chance that + // if the character sequence "BODY" appears in a response where it isn't actually + // a FETCH response data item name, the current regex will break things. The better + // way to do this would be to parse the response correctly and not use a regex. $wholemessage = implode('', $data); - if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) { + if (preg_match('/\{([^\}]*)\}/', $topline, $regs)) { $ret = substr($wholemessage, 0, $regs[1]); /* There is some information in the content info header that could be important * in order to parse html messages. Let's get them here. */ -// if ($ret{0} == '<') { +// if ($ret[0] == '<') { // $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, TRUE); // } - } else if (ereg('"([^"]*)"', $topline, $regs)) { + } else if (preg_match('/"([^"]*)"/', $topline, $regs)) { $ret = $regs[1]; + } else if ((stristr($topline, 'nil') !== false) && (empty($wholemessage))) { + $ret = $wholemessage; } else { global $where, $what, $mailbox, $passed_id, $startMessage; $par = 'mailbox=' . urlencode($mailbox) . '&passed_id=' . $passed_id; @@ -180,7 +190,8 @@ function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) { return $ret; } -function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding, $rStream='php://stdout') { +// TODO: Needs documentation. $ent_id default is usually 1 +function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding, $rStream='php://stdout', $force_crlf='') { /* Don't kill the connection if the browser is over a dialup * and it would take over 30 seconds to download it. @@ -202,9 +213,9 @@ function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding, $rStrea } else { $body = mime_fetch_body ($imap_stream, $id, $ent_id); if (is_resource($rStream)) { - fputs($rStream,decodeBody($body,$encoding)); + fputs($rStream,decodeBody($body, $encoding, $force_crlf)); } else { - echo decodeBody($body, $encoding); + echo decodeBody($body, $encoding, $force_crlf); } } @@ -290,7 +301,8 @@ function translateText(&$body, $wrap_at, $charset) { $body_ary = explode("\n", $body); for ($i=0; $i < count($body_ary); $i++) { - $line = $body_ary[$i]; + $line = rtrim($body_ary[$i],"\r"); + if (strlen($line) - 2 >= $wrap_at) { sqWordWrap($line, $wrap_at, $charset); } @@ -341,10 +353,9 @@ function translateText(&$body, $wrap_at, $charset) { * @param string $ent_num (since 1.3.0) message part id * @param integer $id (since 1.3.0) message id * @param string $mailbox (since 1.3.0) imap folder name - * @param boolean $clean (since 1.5.1) Do not output stuff that's irrelevant for the printable version. * @return string html formated message text */ -function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $mailbox='INBOX', $clean=FALSE) { +function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $mailbox='INBOX') { /* This if statement checks for the entity to show as the * primary message. To add more of them, just put them in the * order that is their priority. @@ -353,16 +364,14 @@ function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $ma $show_html_default, $sort, $has_unsafe_images, $passed_ent_id, $use_iframe, $iframe_height, $download_and_unsafe_link, $download_href, $unsafe_image_toggle_href, $unsafe_image_toggle_text, - $oTemplate; - - $nbsp = $oTemplate->fetch('non_breaking_space.tpl'); + $oTemplate, $nbsp; // workaround for not updated config.php if (! isset($use_iframe)) $use_iframe = false; - if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) { - $view_unsafe_images = false; - } + // If there's no "view_unsafe_images" variable in the URL, turn unsafe + // images off by default. + sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET, FALSE); $body = ''; $urlmailbox = urlencode($mailbox); @@ -387,24 +396,42 @@ function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $ma */ if ($body_message->header->type1 == 'html') { - if ($show_html_default <> 1) { + // Do we need to make an HTML part viewable as non-HTML plain text? + if ($show_html_default != 1) { $entity_conv = array(' ' => ' ', - '

' => "\n", - '

' => "\n", - '
' => "\n", - '
' => "\n", - '
' => "\n", - '
' => "\n", + // These are better done by regex (below) + // '

' => "\n", + // '

' => "\n", + // '
' => "\n", + // '
' => "\n", + // '
' => "\n", + // '
' => "\n", + // '' => "\n", + // '

' => "\n", '>' => '>', - '<' => '<'); + '<' => '<', + '&' => '&', + '©' => '©'); + // first, completely remove in between comments $iCurrentPos = $pos; @@ -1915,10 +2107,10 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){ $bSucces = false; $bEndTag = false; for ($i=$pos,$iCount=strlen($body);$i<$iCount;++$i) { - $char = $body{$i}; + $char = $body[$i]; switch ($char) { case '<': - $sToken .= $char; + $sToken = $char; break; case '/': if ($sToken == '<') { @@ -1946,7 +2138,7 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){ case '!': if ($sToken == '<') { // possible comment - if (isset($body{$i+2}) && substr($body,$i,3) == '!--') { + if (isset($body[$i+2]) && substr($body,$i,3) == '!--') { $i = strpos($body,'-->',$i+3); if ($i === false) { // no end comment $i = strlen($body); @@ -1973,13 +2165,27 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){ /** - * First look for general BODY style declaration, which would be - * like so: - * body {background: blah-blah} - * and change it to .bodyclass so we can just assign it to a
- */ - $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content); - $secremoveimg = '../images/' . _("sec_remove_eng.png"); + * First look for general BODY style declaration, which would be + * like so: + * body {background: blah-blah} + * and change it to .bodyclass so we can just assign it to a
+ */ + // $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content); + // Nah, this is even better - try to preface all CSS selectors with + // our
class ID "bodyclass" then correct generic "body" selectors + // TODO: this works pretty good but breaks stuff like this: + // @media print { body { font-size: 10pt; } } + // but there isn't an easy way to make this regex skip @media + // definitions... though lots of the ones in the wild will be + // correctly handled because they tend to end with a parenthesis, like: + // @media screen and (max-width:480px) { ... + $content = preg_replace('/([a-z0-9._-][a-z0-9 >+~|:._-]*\s*(?:,|{.*?}))/si', '.bodyclass $1', $content); + $content = str_replace('.bodyclass body', '.bodyclass', $content); + + global $use_transparent_security_image; + if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png'; + else $secremoveimg = '../images/' . _("sec_remove_eng.png"); + /** * Fix url('blah') declarations. */ @@ -2014,9 +2220,9 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){ $content = str_replace($aValue,$aReplace,$content); } - /** - * Remove any backslashes, entities, and extraneous whitespace. - */ + /** + * Remove any backslashes, entities, and extraneous whitespace. + */ $contentTemp = $content; sq_defang($contentTemp); sq_unspace($contentTemp); @@ -2024,15 +2230,22 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){ /** * Fix stupid css declarations which lead to vulnerabilities * in IE. + * + * Also remove "position" attribute, as it can easily be set + * to "fixed" or "absolute" with "left" and "top" attributes + * of zero, taking over the whole content frame. It can also + * be set to relative and move itself anywhere it wants to, + * displaying content in areas it shouldn't be allowed to touch. */ - $match = Array('/\/\*.*\*\//', + $match = Array('/\/\*.*\*\//', // removes /* blah blah */ '/expression/i', '/behaviou*r/i', '/binding/i', '/include-source/i', '/javascript/i', - '/script/i'); - $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy'); + '/script/i', + '/position/i'); + $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', ''); $contentNew = preg_replace($match, $replace, $contentTemp); if ($contentNew !== $contentTemp) { // insecure css declarations are used. From now on we don't care @@ -2070,8 +2283,8 @@ function sq_cid2http($message, $id, $cidurl, $mailbox){ $cidurl = preg_replace($match_str, $str_rep, $cidurl); $linkurl = find_ent_id($cidurl, $message); - /* in case of non-save cid links $httpurl should be replaced by a sort of - unsave link image */ + /* in case of non-safe cid links $httpurl should be replaced by a sort of + unsafe link image */ $httpurl = ''; /** @@ -2093,7 +2306,7 @@ function sq_cid2http($message, $id, $cidurl, $mailbox){ } if (!empty($linkurl)) { - $httpurl = $quotchar . SM_PATH . 'src/download.php?absolute_dl=true&' . + $httpurl = $quotchar . sqm_baseuri() . 'src/download.php?absolute_dl=true&' . "passed_id=$id&mailbox=" . urlencode($mailbox) . '&ent_id=' . $linkurl . $quotchar; } else { @@ -2148,7 +2361,7 @@ function sq_body2div($attary, $mailbox, $message, $id){ $styledef .= "color: $text; "; } if (strlen($styledef) > 0){ - $divattary{"style"} = "\"$styledef\""; + $divattary["style"] = "\"$styledef\""; } } return $divattary; @@ -2172,6 +2385,7 @@ function sq_body2div($attary, $mailbox, $message, $id){ * @param $add_attr_to_tag see description above * @param $message message object * @param $id message id + * @param $recursively_called boolean flag for recursive calls into this function (optional; default FALSE) * @return sanitized html safe to show on your pages. */ function sq_sanitize($body, @@ -2184,21 +2398,26 @@ function sq_sanitize($body, $add_attr_to_tag, $message, $id, - $mailbox + $mailbox, + $recursively_called=FALSE ){ $me = 'sq_sanitize'; + + /** + * See if tag_list is of tags to remove or tags to allow. + * false means remove these tags + * true means allow these tags + */ + $orig_tag_list = $tag_list; $rm_tags = array_shift($tag_list); + /** * Normalize rm_tags and rm_tags_with_content. */ @array_walk($tag_list, 'sq_casenormalize'); @array_walk($rm_tags_with_content, 'sq_casenormalize'); @array_walk($self_closing_tags, 'sq_casenormalize'); - /** - * See if tag_list is of tags to remove or tags to allow. - * false means remove these tags - * true means allow these tags - */ + $curpos = 0; $open_tags = Array(); $trusted = "\n\n"; @@ -2211,6 +2430,47 @@ function sq_sanitize($body, while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){ list($tagname, $attary, $tagtype, $lt, $gt) = $curtag; + + /** + * RCDATA and RAWTEXT tags are handled differently: + * next instance of closing tag is used, whether or not + * the HTML is well formed before that + */ + global $rcdata_rawtext_tags; + if (!$recursively_called + && in_array($tagname, $rcdata_rawtext_tags) + && $tagtype === 1){ + $closing_tag = false; + $closing_tag_offset = $curpos; + // seek out the closing tag for the current RCDATA/RAWTEXT tag + while (1) { + // first we need to move forward to next available closing tag + // (intentionally leave off the closing > and let sq_getnxtag() validate a proper tag syntax) + $next_tag = sq_findnxreg($body, $closing_tag_offset, " @@ -2219,6 +2479,17 @@ function sq_sanitize($body, list($free_content, $curpos) = sq_fixstyle($body, $gt+1, $message, $id, $mailbox); if ($free_content != FALSE){ + if ( !empty($attary) ) { + $attary = sq_fixatts($tagname, + $attary, + $rm_attnames, + $bad_attvals, + $add_attr_to_tag, + $message, + $id, + $mailbox + ); + } $trusted .= sq_tagprint($tagname, $attary, $tagtype); $trusted .= $free_content; $trusted .= sq_tagprint($tagname, false, 2); @@ -2241,9 +2512,9 @@ function sq_sanitize($body, if ($tagname == "body"){ $tagname = "div"; } - if (isset($open_tags{$tagname}) && - $open_tags{$tagname} > 0){ - $open_tags{$tagname}--; + if (isset($open_tags[$tagname]) && + $open_tags[$tagname] > 0){ + $open_tags[$tagname]--; } else { $tagname = false; } @@ -2285,10 +2556,10 @@ function sq_sanitize($body, $message, $id); } if ($tagtype == 1){ - if (isset($open_tags{$tagname})){ - $open_tags{$tagname}++; + if (isset($open_tags[$tagname])){ + $open_tags[$tagname]++; } else { - $open_tags{$tagname}=1; + $open_tags[$tagname]=1; } } /** @@ -2347,7 +2618,17 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links // require_once(SM_PATH . 'functions/url_parser.php'); // for $MailTo_PReg_Match global $attachment_common_show_images, $view_unsafe_images, - $has_unsafe_images; + $has_unsafe_images, $allow_svg_display, $rcdata_rawtext_tags, + $remove_rcdata_rawtext_tags_and_content; + + $rcdata_rawtext_tags = array( + "noscript", + "noframes", + "noembed", + "textarea", + // also "title", "xmp", "script", "iframe", "plaintext" which we already remove below + ); + /** * Don't display attached images in HTML mode. * @@ -2355,8 +2636,7 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links */ $attachment_common_show_images = false; $tag_list = Array( - false, - "object", + false, // remove these tags "meta", "html", "head", @@ -2365,25 +2645,37 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links "frame", "iframe", "plaintext", - "marquee" + "marquee", ); $rm_tags_with_content = Array( "script", + "object", "applet", "embed", "title", "frameset", "xmp", - "xml" + "xml", ); + if (!$allow_svg_display) + $rm_tags_with_content[] = 'svg'; + /** + * SquirrelMail will parse RCDATA and RAWTEXT tags and handle them as the special + * case that they are, but if you prefer to remove them and their contents entirely + * (in most cases, should be a safe thing with minimal impact), you can add the + * following to config/config_local.php + * $remove_rcdata_rawtext_tags_and_content = TRUE; + */ + if ($remove_rcdata_rawtext_tags_and_content) + $rm_tags_with_content = array_merge($rm_tags_with_content, $rcdata_rawtext_tags); $self_closing_tags = Array( "img", "br", "hr", "input", - "outbind" + "outbind", ); $force_tag_closing = true; @@ -2395,11 +2687,14 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links "/^on.*/i", "/^dynsrc/i", "/^data.*/i", - "/^lowsrc.*/i" + "/^lowsrc.*/i", ) ); - $secremoveimg = "../images/" . _("sec_remove_eng.png"); + global $use_transparent_security_image; + if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png'; + else $secremoveimg = '../images/' . _("sec_remove_eng.png"); + $bad_attvals = Array( "/.*/" => Array( @@ -2437,12 +2732,28 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links "/binding/i", "/behaviou*r/i", "/include-source/i", - "/position\s*:\s*absolute/i", + + // position:relative can also be exploited + // to put content outside of email body area + // and position:fixed is similarly exploitable + // as position:absolute, so we'll remove it + // altogether.... + // + // Does this screw up legitimate HTML messages? + // If so, the only fix I see is to allow position + // attributes (any values? I think we still have + // to block static and fixed) only if $use_iframe + // is enabled (1.5.0+) + // + // was: "/position\s*:\s*absolute/i", + // + "/position\s*:/i", + "/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", "/url\s*\(\s*([\'\"])\s*\S+script\s*:.*([\'\"])\s*\)/si", "/url\s*\(\s*([\'\"])\s*mocha\s*:.*([\'\"])\s*\)/si", "/url\s*\(\s*([\'\"])\s*about\s*:.*([\'\"])\s*\)/si", - "/(.*)\s*:\s*url\s*\(\s*([\'\"]*)\s*\S+script\s*:.*([\'\"]*)\s*\)/si" + "/(.*)\s*:\s*url\s*\(\s*([\'\"]*)\s*\S+script\s*:.*([\'\"]*)\s*\)/si", ), Array( "", @@ -2460,21 +2771,23 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links ) ) ); - if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) { - $view_unsafe_images = false; - } + + // If there's no "view_unsafe_images" variable in the URL, turn unsafe + // images off by default. + sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET, FALSE); + if (!$view_unsafe_images){ /** * Remove any references to http/https if view_unsafe_images set * to false. */ - array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0], + array_push($bad_attvals['/.*/']['/^src|background/i'][0], '/^([\'\"])\s*https*:.*([\'\"])/si'); - array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1], + array_push($bad_attvals['/.*/']['/^src|background/i'][1], "\\1$secremoveimg\\1"); - array_push($bad_attvals{'/.*/'}{'/^style/i'}[0], + array_push($bad_attvals['/.*/']['/^style/i'][0], '/url\([\'\"]?https?:[^\)]*[\'\"]?\)/si'); - array_push($bad_attvals{'/.*/'}{'/^style/i'}[1], + array_push($bad_attvals['/.*/']['/^style/i'][1], "url(\\1$secremoveimg\\1)"); } @@ -2508,19 +2821,25 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links if ($take_mailto_links) { // parseUrl($trusted); // this even parses URLs inside of tags... too aggressive global $MailTo_PReg_Match; - $MailTo_PReg_Match = '/mailto:' . substr($MailTo_PReg_Match, 1) ; + // some mailers (Microsoft, surprise surprise) produce mailto strings without being + // inside an anchor (link) tag, so we have to make sure the regex looks for the + // quote before mailto, and we'll also try to convert the non-links back into links + $MailTo_PReg_Match = '/([\'"])?mailto:' . substr($MailTo_PReg_Match, 1) ; if ((preg_match_all($MailTo_PReg_Match, $trusted, $regs)) && ($regs[0][0] != '')) { foreach ($regs[0] as $i => $mailto_before) { - $mailto_params = $regs[10][$i]; + $mailto_params = $regs[11][$i]; + + // get rid of any leading quote we may have captured but don't care about + // + $mailto_before = ltrim($mailto_before, '"\''); + // get rid of any tailing quote since we have to add send_to to the end // - if (substr($mailto_before, strlen($mailto_before) - 1) == '"') - $mailto_before = substr($mailto_before, 0, strlen($mailto_before) - 1); - if (substr($mailto_params, strlen($mailto_params) - 1) == '"') - $mailto_params = substr($mailto_params, 0, strlen($mailto_params) - 1); + $mailto_before = rtrim($mailto_before, '"\''); + $mailto_params = rtrim($mailto_params, '"\''); - if ($regs[1][$i]) { //if there is an email addr before '?', we need to merge it with the params - $to = 'to=' . $regs[1][$i]; + if ($regs[2][$i]) { //if there is an email addr before '?', we need to merge it with the params + $to = 'to=' . $regs[2][$i]; if (strpos($mailto_params, 'to=') > -1) //already a 'to=' $mailto_params = str_replace('to=', $to . '%2C%20', $mailto_params); else { @@ -2545,8 +2864,12 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links // remove \|;]', '_', str_replace(' ', ' ', $filename)); + $filename = preg_replace('/[\\\\\/:*?"<>|;]/', '_', str_replace(' ', ' ', $filename)); } // A Pox on Microsoft and it's Internet Explorer! @@ -2609,7 +2932,8 @@ function SendDownloadHeaders($type0, $type1, $filename, $force, $filesize=0) { $filename=rawurlencode($filename); header ("Pragma: public"); header ("Cache-Control: no-store, max-age=0, no-cache, must-revalidate"); // HTTP/1.1 - header ("Cache-Control: post-check=0, pre-check=0", false); + // does nothing - see: https://blogs.msdn.microsoft.com/ieinternals/2009/07/20/internet-explorers-cache-control-extensions/ + // header ("Cache-Control: post-check=0, pre-check=0", false); header ("Cache-Control: private"); //set the inline header for IE, we'll add the attachment header later if we need it