X-Git-Url: https://vcs.fsf.org/?p=squirrelmail.git;a=blobdiff_plain;f=functions%2Fmime.php;h=56ee0d47c3320ad1b57e88eb8f1519d0aa346bd6;hp=1fff67ed2fa687812a0df7f2c442b9c22dfcc2bb;hb=24e8917ead46268f6a4264900c832b4a5c37ee84;hpb=8439f61d2eaaf3aa5fb0b911e3b410d70232c1c8 diff --git a/functions/mime.php b/functions/mime.php index 1fff67ed..56ee0d47 100644 --- a/functions/mime.php +++ b/functions/mime.php @@ -6,7 +6,7 @@ * This contains the functions necessary to detect and decode MIME * messages. * - * @copyright 1999-2012 The SquirrelMail Project Team + * @copyright 1999-2020 The SquirrelMail Project Team * @license http://opensource.org/licenses/gpl-license.php GNU Public License * @version $Id$ * @package squirrelmail @@ -63,7 +63,7 @@ function mime_structure ($bodystructure, $flags=array()) { displayPageHeader( $color, $mailbox ); $errormessage = _("SquirrelMail could not decode the bodystructure of the message"); $errormessage .= '
'._("The bodystructure provided by your IMAP server:").'

'; - $errormessage .= '
' . htmlspecialchars($read) . '
'; + $errormessage .= '
' . sm_encode_html_special_chars($read) . '
'; plain_error_message( $errormessage ); echo ''; exit; @@ -514,7 +514,8 @@ function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $ma * @param integer $id message id */ function buildAttachmentArray($message, $exclude_id, $mailbox, $id) { - global $where, $what, $startMessage, $color, $passed_ent_id, $base_uri; + global $where, $what, $startMessage, $color, $passed_ent_id, + $base_uri, $block_svg_download; $att_ar = $message->getAttachments($exclude_id); $urlMailbox = urlencode($mailbox); @@ -525,6 +526,9 @@ function buildAttachmentArray($message, $exclude_id, $mailbox, $id) { $header = $att->header; $type0 = strtolower($header->type0); $type1 = strtolower($header->type1); + if ($block_svg_download && strpos($type1, 'svg') === 0) + continue; + $name = ''; $links = array(); $links['download link']['text'] = _("Download"); @@ -631,8 +635,14 @@ function buildAttachmentArray($message, $exclude_id, $mailbox, $id) { $this_attachment['DefaultHREF'] = $defaultlink; $this_attachment['DownloadHREF'] = $links['download link']['href']; $this_attachment['ViewHREF'] = isset($links['attachment_common']) ? $links['attachment_common']['href'] : ''; - $this_attachment['Size'] = $header->size; - $this_attachment['ContentType'] = htmlspecialchars($type0 .'/'. $type1); + + // base64 encoded file sizes are misleading, so approximate real size + if (!empty($header->encoding) && strtolower($header->encoding) == 'base64') + $this_attachment['Size'] = $header->size / 4 * 3; + else + $this_attachment['Size'] = $header->size; + + $this_attachment['ContentType'] = sm_encode_html_special_chars($type0 .'/'. $type1); $this_attachment['OtherLinks'] = array(); foreach ($links as $val) { if ($val['text']==_("Download") || $val['text'] == _("View")) @@ -791,7 +801,7 @@ function decodeBody($string, $encoding, $force_crlf='') { * @return string decoded header string */ function decodeHeader ($string, $utfencode=true,$htmlsafe=true,$decide=false) { - global $languages, $squirrelmail_language,$default_charset; + global $languages, $squirrelmail_language,$default_charset, $fix_broken_base64_encoded_messages; if (is_array($string)) { $string = implode("\n", $string); } @@ -806,6 +816,7 @@ function decodeHeader ($string, $utfencode=true,$htmlsafe=true,$decide=false) { $iLastMatch = -2; $encoded = true; +// FIXME: spaces are allowed inside quoted-printable encoding, but the following line will bust up any such encoded strings $aString = explode(' ',$string); $ret = ''; foreach ($aString as $chunk) { @@ -831,7 +842,7 @@ function decodeHeader ($string, $utfencode=true,$htmlsafe=true,$decide=false) { $iLastMatch = $i; $j = $i; if ($htmlsafe) { - $ret .= htmlspecialchars($res[1]); + $ret .= sm_encode_html_special_chars($res[1]); } else { $ret .= $res[1]; } @@ -847,6 +858,13 @@ function decodeHeader ($string, $utfencode=true,$htmlsafe=true,$decide=false) { switch ($encoding) { case 'B': + // fix broken base64-encoded strings (remove end = padding, + // change any = to + in middle of string, add padding back + // to the end) + if ($fix_broken_base64_encoded_messages) { + $encoded_string_minus_padding = strtr(rtrim($res[4], '='), '=', '+'); + $res[4] = str_pad($encoded_string_minus_padding, strlen($res[4]), '='); + } $replace = base64_decode($res[4]); if ($utfencode) { if ($can_be_encoded) { @@ -860,14 +878,15 @@ function decodeHeader ($string, $utfencode=true,$htmlsafe=true,$decide=false) { } } else { if ($htmlsafe) { - $replace = htmlspecialchars($replace); + $replace = sm_encode_html_special_chars($replace); } $ret.= $replace; } break; case 'Q': $replace = str_replace('_', ' ', $res[4]); - $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))', + $replace = preg_replace_callback('/=([0-9a-f]{2})/i', + create_function ('$matches', 'return chr(hexdec($matches[1]));'), $replace); if ($utfencode) { if ($can_be_encoded) { @@ -881,7 +900,7 @@ function decodeHeader ($string, $utfencode=true,$htmlsafe=true,$decide=false) { } } else { if ($htmlsafe) { - $replace = htmlspecialchars($replace); + $replace = sm_encode_html_special_chars($replace); } } $ret .= $replace; @@ -901,7 +920,7 @@ function decodeHeader ($string, $utfencode=true,$htmlsafe=true,$decide=false) { } if (!$encoded && $htmlsafe) { - $ret .= htmlspecialchars($chunk); + $ret .= sm_encode_html_special_chars($chunk); } else { $ret .= $chunk; } @@ -1384,9 +1403,8 @@ function sq_casenormalize(&$val){ function sq_skipspace($body, $offset){ $me = 'sq_skipspace'; preg_match('/^(\s*)/s', substr($body, $offset), $matches); - if (sizeof($matches{1})){ - $count = strlen($matches{1}); - $offset += $count; + if (!empty($matches[1])){ + $offset += strlen($matches[1]); } return $offset; } @@ -1859,7 +1877,9 @@ function sq_fixatts($tagname, /** * Use white list based filtering on attributes which can contain url's */ - else if ($attname == 'href' || $attname == 'src' || $attname == 'background') { + else if ($attname == 'href' || $attname == 'xlink:href' || $attname == 'src' + || $attname == 'poster' || $attname == 'formaction' + || $attname == 'background' || $attname == 'action') { sq_fix_url($attname, $attvalue, $message, $id, $mailbox); $attary{$attname} = $attvalue; } @@ -1896,7 +1916,9 @@ function sq_fix_url($attname, &$attvalue, $message, $id, $mailbox,$sQuote = '"') // images off by default. sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET, FALSE); - $secremoveimg = '../images/' . _("sec_remove_eng.png"); + global $use_transparent_security_image; + if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png'; + else $secremoveimg = '../images/' . _("sec_remove_eng.png"); /** * Replace empty src tags with the blank image. src is only used @@ -2108,8 +2130,22 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){ * body {background: blah-blah} * and change it to .bodyclass so we can just assign it to a
*/ - $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content); - $secremoveimg = '../images/' . _("sec_remove_eng.png"); + // $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content); + // Nah, this is even better - try to preface all CSS selectors with + // our
class ID "bodyclass" then correct generic "body" selectors + // TODO: this works pretty good but breaks stuff like this: + // @media print { body { font-size: 10pt; } } + // but there isn't an easy way to make this regex skip @media + // definitions... though lots of the ones in the wild will be + // correctly handled because they tend to end with a parenthesis, like: + // @media screen and (max-width:480px) { ... + $content = preg_replace('/([a-z0-9._-][a-z0-9 >+~|:._-]*\s*(?:,|{.*?}))/si', '.bodyclass $1', $content); + $content = str_replace('.bodyclass body', '.bodyclass', $content); + + global $use_transparent_security_image; + if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png'; + else $secremoveimg = '../images/' . _("sec_remove_eng.png"); + /** * Fix url('blah') declarations. */ @@ -2161,7 +2197,7 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){ * be set to relative and move itself anywhere it wants to, * displaying content in areas it shouldn't be allowed to touch. */ - $match = Array('/\/\*.*\*\//', + $match = Array('/\/\*.*\*\//', // removes /* blah blah */ '/expression/i', '/behaviou*r/i', '/binding/i', @@ -2309,6 +2345,7 @@ function sq_body2div($attary, $mailbox, $message, $id){ * @param $add_attr_to_tag see description above * @param $message message object * @param $id message id + * @param $recursively_called boolean flag for recursive calls into this function (optional; default FALSE) * @return sanitized html safe to show on your pages. */ function sq_sanitize($body, @@ -2321,21 +2358,26 @@ function sq_sanitize($body, $add_attr_to_tag, $message, $id, - $mailbox + $mailbox, + $recursively_called=FALSE ){ $me = 'sq_sanitize'; + + /** + * See if tag_list is of tags to remove or tags to allow. + * false means remove these tags + * true means allow these tags + */ + $orig_tag_list = $tag_list; $rm_tags = array_shift($tag_list); + /** * Normalize rm_tags and rm_tags_with_content. */ @array_walk($tag_list, 'sq_casenormalize'); @array_walk($rm_tags_with_content, 'sq_casenormalize'); @array_walk($self_closing_tags, 'sq_casenormalize'); - /** - * See if tag_list is of tags to remove or tags to allow. - * false means remove these tags - * true means allow these tags - */ + $curpos = 0; $open_tags = Array(); $trusted = "\n\n"; @@ -2348,6 +2390,47 @@ function sq_sanitize($body, while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){ list($tagname, $attary, $tagtype, $lt, $gt) = $curtag; + + /** + * RCDATA and RAWTEXT tags are handled differently: + * next instance of closing tag is used, whether or not + * the HTML is well formed before that + */ + global $rcdata_rawtext_tags; + if (!$recursively_called + && in_array($tagname, $rcdata_rawtext_tags) + && $tagtype === 1){ + $closing_tag = false; + $closing_tag_offset = $curpos; + // seek out the closing tag for the current RCDATA/RAWTEXT tag + while (1) { + // first we need to move forward to next available closing tag + // (intentionally leave off the closing > and let sq_getnxtag() validate a proper tag syntax) + $next_tag = sq_findnxreg($body, $closing_tag_offset, " @@ -2495,7 +2578,17 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links // require_once(SM_PATH . 'functions/url_parser.php'); // for $MailTo_PReg_Match global $attachment_common_show_images, $view_unsafe_images, - $has_unsafe_images; + $has_unsafe_images, $allow_svg_display, $rcdata_rawtext_tags, + $remove_rcdata_rawtext_tags_and_content; + + $rcdata_rawtext_tags = array( + "noscript", + "noframes", + "noembed", + "textarea", + // also "title", "xmp", "script", "iframe", "plaintext" which we already remove below + ); + /** * Don't display attached images in HTML mode. * @@ -2503,8 +2596,7 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links */ $attachment_common_show_images = false; $tag_list = Array( - false, - "object", + false, // remove these tags "meta", "html", "head", @@ -2513,25 +2605,37 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links "frame", "iframe", "plaintext", - "marquee" + "marquee", ); $rm_tags_with_content = Array( "script", + "object", "applet", "embed", "title", "frameset", "xmp", - "xml" + "xml", ); + if (!$allow_svg_display) + $rm_tags_with_content[] = 'svg'; + /** + * SquirrelMail will parse RCDATA and RAWTEXT tags and handle them as the special + * case that they are, but if you prefer to remove them and their contents entirely + * (in most cases, should be a safe thing with minimal impact), you can add the + * following to config/config_local.php + * $remove_rcdata_rawtext_tags_and_content = TRUE; + */ + if ($remove_rcdata_rawtext_tags_and_content) + $rm_tags_with_content = array_merge($rm_tags_with_content, $rcdata_rawtext_tags); $self_closing_tags = Array( "img", "br", "hr", "input", - "outbind" + "outbind", ); $force_tag_closing = true; @@ -2543,11 +2647,14 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links "/^on.*/i", "/^dynsrc/i", "/^data.*/i", - "/^lowsrc.*/i" + "/^lowsrc.*/i", ) ); - $secremoveimg = "../images/" . _("sec_remove_eng.png"); + global $use_transparent_security_image; + if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png'; + else $secremoveimg = '../images/' . _("sec_remove_eng.png"); + $bad_attvals = Array( "/.*/" => Array( @@ -2674,19 +2781,25 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links if ($take_mailto_links) { // parseUrl($trusted); // this even parses URLs inside of tags... too aggressive global $MailTo_PReg_Match; - $MailTo_PReg_Match = '/mailto:' . substr($MailTo_PReg_Match, 1) ; + // some mailers (Microsoft, surprise surprise) produce mailto strings without being + // inside an anchor (link) tag, so we have to make sure the regex looks for the + // quote before mailto, and we'll also try to convert the non-links back into links + $MailTo_PReg_Match = '/([\'"])?mailto:' . substr($MailTo_PReg_Match, 1) ; if ((preg_match_all($MailTo_PReg_Match, $trusted, $regs)) && ($regs[0][0] != '')) { foreach ($regs[0] as $i => $mailto_before) { - $mailto_params = $regs[10][$i]; + $mailto_params = $regs[11][$i]; + + // get rid of any leading quote we may have captured but don't care about + // + $mailto_before = ltrim($mailto_before, '"\''); + // get rid of any tailing quote since we have to add send_to to the end // - if (substr($mailto_before, strlen($mailto_before) - 1) == '"') - $mailto_before = substr($mailto_before, 0, strlen($mailto_before) - 1); - if (substr($mailto_params, strlen($mailto_params) - 1) == '"') - $mailto_params = substr($mailto_params, 0, strlen($mailto_params) - 1); + $mailto_before = rtrim($mailto_before, '"\''); + $mailto_params = rtrim($mailto_params, '"\''); - if ($regs[1][$i]) { //if there is an email addr before '?', we need to merge it with the params - $to = 'to=' . $regs[1][$i]; + if ($regs[2][$i]) { //if there is an email addr before '?', we need to merge it with the params + $to = 'to=' . $regs[2][$i]; if (strpos($mailto_params, 'to=') > -1) //already a 'to=' $mailto_params = str_replace('to=', $to . '%2C%20', $mailto_params); else { @@ -2711,8 +2824,12 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links // remove