* This contains the functions necessary to detect and decode MIME
* messages.
*
- * @copyright 1999-2012 The SquirrelMail Project Team
+ * @copyright 1999-2020 The SquirrelMail Project Team
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
* @version $Id$
* @package squirrelmail
displayPageHeader( $color, $mailbox );
$errormessage = _("SquirrelMail could not decode the bodystructure of the message");
$errormessage .= '<br />'._("The bodystructure provided by your IMAP server:").'<br /><br />';
- $errormessage .= '<pre>' . htmlspecialchars($read) . '</pre>';
+ $errormessage .= '<pre>' . sm_encode_html_special_chars($read) . '</pre>';
plain_error_message( $errormessage );
echo '</body></html>';
exit;
* @param integer $id message id
*/
function buildAttachmentArray($message, $exclude_id, $mailbox, $id) {
- global $where, $what, $startMessage, $color, $passed_ent_id, $base_uri;
+ global $where, $what, $startMessage, $color, $passed_ent_id,
+ $base_uri, $block_svg_download;
$att_ar = $message->getAttachments($exclude_id);
$urlMailbox = urlencode($mailbox);
$header = $att->header;
$type0 = strtolower($header->type0);
$type1 = strtolower($header->type1);
+ if ($block_svg_download && strpos($type1, 'svg') === 0)
+ continue;
+
$name = '';
$links = array();
$links['download link']['text'] = _("Download");
$this_attachment['DefaultHREF'] = $defaultlink;
$this_attachment['DownloadHREF'] = $links['download link']['href'];
$this_attachment['ViewHREF'] = isset($links['attachment_common']) ? $links['attachment_common']['href'] : '';
- $this_attachment['Size'] = $header->size;
- $this_attachment['ContentType'] = htmlspecialchars($type0 .'/'. $type1);
+
+ // base64 encoded file sizes are misleading, so approximate real size
+ if (!empty($header->encoding) && strtolower($header->encoding) == 'base64')
+ $this_attachment['Size'] = $header->size / 4 * 3;
+ else
+ $this_attachment['Size'] = $header->size;
+
+ $this_attachment['ContentType'] = sm_encode_html_special_chars($type0 .'/'. $type1);
$this_attachment['OtherLinks'] = array();
foreach ($links as $val) {
if ($val['text']==_("Download") || $val['text'] == _("View"))
* @return string decoded header string
*/
function decodeHeader ($string, $utfencode=true,$htmlsafe=true,$decide=false) {
- global $languages, $squirrelmail_language,$default_charset;
+ global $languages, $squirrelmail_language,$default_charset, $fix_broken_base64_encoded_messages;
if (is_array($string)) {
$string = implode("\n", $string);
}
$iLastMatch = -2;
$encoded = true;
+// FIXME: spaces are allowed inside quoted-printable encoding, but the following line will bust up any such encoded strings
$aString = explode(' ',$string);
$ret = '';
foreach ($aString as $chunk) {
$iLastMatch = $i;
$j = $i;
if ($htmlsafe) {
- $ret .= htmlspecialchars($res[1]);
+ $ret .= sm_encode_html_special_chars($res[1]);
} else {
$ret .= $res[1];
}
switch ($encoding)
{
case 'B':
+ // fix broken base64-encoded strings (remove end = padding,
+ // change any = to + in middle of string, add padding back
+ // to the end)
+ if ($fix_broken_base64_encoded_messages) {
+ $encoded_string_minus_padding = strtr(rtrim($res[4], '='), '=', '+');
+ $res[4] = str_pad($encoded_string_minus_padding, strlen($res[4]), '=');
+ }
$replace = base64_decode($res[4]);
if ($utfencode) {
if ($can_be_encoded) {
}
} else {
if ($htmlsafe) {
- $replace = htmlspecialchars($replace);
+ $replace = sm_encode_html_special_chars($replace);
}
$ret.= $replace;
}
break;
case 'Q':
$replace = str_replace('_', ' ', $res[4]);
- $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))',
+ $replace = preg_replace_callback('/=([0-9a-f]{2})/i',
+ create_function ('$matches', 'return chr(hexdec($matches[1]));'),
$replace);
if ($utfencode) {
if ($can_be_encoded) {
}
} else {
if ($htmlsafe) {
- $replace = htmlspecialchars($replace);
+ $replace = sm_encode_html_special_chars($replace);
}
}
$ret .= $replace;
}
if (!$encoded && $htmlsafe) {
- $ret .= htmlspecialchars($chunk);
+ $ret .= sm_encode_html_special_chars($chunk);
} else {
$ret .= $chunk;
}
function sq_skipspace($body, $offset){
$me = 'sq_skipspace';
preg_match('/^(\s*)/s', substr($body, $offset), $matches);
- if (sizeof($matches{1})){
- $count = strlen($matches{1});
- $offset += $count;
+ if (!empty($matches[1])){
+ $offset += strlen($matches[1]);
}
return $offset;
}
/**
* Use white list based filtering on attributes which can contain url's
*/
- else if ($attname == 'href' || $attname == 'src' || $attname == 'background') {
+ else if ($attname == 'href' || $attname == 'xlink:href' || $attname == 'src'
+ || $attname == 'poster' || $attname == 'formaction'
+ || $attname == 'background' || $attname == 'action') {
sq_fix_url($attname, $attvalue, $message, $id, $mailbox);
$attary{$attname} = $attvalue;
}
// images off by default.
sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET, FALSE);
- $secremoveimg = '../images/' . _("sec_remove_eng.png");
+ global $use_transparent_security_image;
+ if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png';
+ else $secremoveimg = '../images/' . _("sec_remove_eng.png");
/**
* Replace empty src tags with the blank image. src is only used
* body {background: blah-blah}
* and change it to .bodyclass so we can just assign it to a <div>
*/
- $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
- $secremoveimg = '../images/' . _("sec_remove_eng.png");
+ // $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
+ // Nah, this is even better - try to preface all CSS selectors with
+ // our <div> class ID "bodyclass" then correct generic "body" selectors
+ // TODO: this works pretty good but breaks stuff like this:
+ // @media print { body { font-size: 10pt; } }
+ // but there isn't an easy way to make this regex skip @media
+ // definitions... though lots of the ones in the wild will be
+ // correctly handled because they tend to end with a parenthesis, like:
+ // @media screen and (max-width:480px) { ...
+ $content = preg_replace('/([a-z0-9._-][a-z0-9 >+~|:._-]*\s*(?:,|{.*?}))/si', '.bodyclass $1', $content);
+ $content = str_replace('.bodyclass body', '.bodyclass', $content);
+
+ global $use_transparent_security_image;
+ if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png';
+ else $secremoveimg = '../images/' . _("sec_remove_eng.png");
+
/**
* Fix url('blah') declarations.
*/
* be set to relative and move itself anywhere it wants to,
* displaying content in areas it shouldn't be allowed to touch.
*/
- $match = Array('/\/\*.*\*\//',
+ $match = Array('/\/\*.*\*\//', // removes /* blah blah */
'/expression/i',
'/behaviou*r/i',
'/binding/i',
* @param $add_attr_to_tag see description above
* @param $message message object
* @param $id message id
+ * @param $recursively_called boolean flag for recursive calls into this function (optional; default FALSE)
* @return sanitized html safe to show on your pages.
*/
function sq_sanitize($body,
$add_attr_to_tag,
$message,
$id,
- $mailbox
+ $mailbox,
+ $recursively_called=FALSE
){
$me = 'sq_sanitize';
+
+ /**
+ * See if tag_list is of tags to remove or tags to allow.
+ * false means remove these tags
+ * true means allow these tags
+ */
+ $orig_tag_list = $tag_list;
$rm_tags = array_shift($tag_list);
+
/**
* Normalize rm_tags and rm_tags_with_content.
*/
@array_walk($tag_list, 'sq_casenormalize');
@array_walk($rm_tags_with_content, 'sq_casenormalize');
@array_walk($self_closing_tags, 'sq_casenormalize');
- /**
- * See if tag_list is of tags to remove or tags to allow.
- * false means remove these tags
- * true means allow these tags
- */
+
$curpos = 0;
$open_tags = Array();
$trusted = "\n<!-- begin sanitized html -->\n";
while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){
list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
+
+ /**
+ * RCDATA and RAWTEXT tags are handled differently:
+ * next instance of closing tag is used, whether or not
+ * the HTML is well formed before that
+ */
+ global $rcdata_rawtext_tags;
+ if (!$recursively_called
+ && in_array($tagname, $rcdata_rawtext_tags)
+ && $tagtype === 1){
+ $closing_tag = false;
+ $closing_tag_offset = $curpos;
+ // seek out the closing tag for the current RCDATA/RAWTEXT tag
+ while (1) {
+ // first we need to move forward to next available closing tag
+ // (intentionally leave off the closing > and let sq_getnxtag() validate a proper tag syntax)
+ $next_tag = sq_findnxreg($body, $closing_tag_offset, "</\s*$tagname");
+ if ($next_tag === false) {
+ $closing_tag = false;
+ break;
+ }
+ // but then we have to make sure it's a well-formed tag
+ $closing_tag = sq_getnxtag($body, $next_tag[0]);
+ if ($closing_tag === false)
+ break;
+ else if ($closing_tag[0] !== false
+ // these should be redundant
+ && $closing_tag[0] === $tagname && $closing_tag[2] === 2) {
+ $trusted .= sq_sanitize(substr($body, $curpos, $closing_tag[4] - $curpos + 1),
+ $orig_tag_list, $rm_tags_with_content, $self_closing_tags,
+ $force_tag_closing, $rm_attnames, $bad_attvals, $add_attr_to_tag,
+ $message, $id, $mailbox, true);
+ $curpos = $closing_tag[4] + 1;
+ continue 2;
+ }
+ $closing_tag_offset = $next_tag[0] + 1;
+ }
+ if ($closing_tag === false)
+ { /* no-op... there was no closing tag for this RCDATA/RAWTEXT tag - we could probably set $curpos to the end of $body, but this HTML is malformed anyway and should just fall apart on its own */ }
+ }
+
$free_content = substr($body, $curpos, $lt-$curpos);
/**
* Take care of <style>
// require_once(SM_PATH . 'functions/url_parser.php'); // for $MailTo_PReg_Match
global $attachment_common_show_images, $view_unsafe_images,
- $has_unsafe_images;
+ $has_unsafe_images, $allow_svg_display, $rcdata_rawtext_tags,
+ $remove_rcdata_rawtext_tags_and_content;
+
+ $rcdata_rawtext_tags = array(
+ "noscript",
+ "noframes",
+ "noembed",
+ "textarea",
+ // also "title", "xmp", "script", "iframe", "plaintext" which we already remove below
+ );
+
/**
* Don't display attached images in HTML mode.
*
*/
$attachment_common_show_images = false;
$tag_list = Array(
- false,
- "object",
+ false, // remove these tags
"meta",
"html",
"head",
"frame",
"iframe",
"plaintext",
- "marquee"
+ "marquee",
);
$rm_tags_with_content = Array(
"script",
+ "object",
"applet",
"embed",
"title",
"frameset",
"xmp",
- "xml"
+ "xml",
);
+ if (!$allow_svg_display)
+ $rm_tags_with_content[] = 'svg';
+ /**
+ * SquirrelMail will parse RCDATA and RAWTEXT tags and handle them as the special
+ * case that they are, but if you prefer to remove them and their contents entirely
+ * (in most cases, should be a safe thing with minimal impact), you can add the
+ * following to config/config_local.php
+ * $remove_rcdata_rawtext_tags_and_content = TRUE;
+ */
+ if ($remove_rcdata_rawtext_tags_and_content)
+ $rm_tags_with_content = array_merge($rm_tags_with_content, $rcdata_rawtext_tags);
$self_closing_tags = Array(
"img",
"br",
"hr",
"input",
- "outbind"
+ "outbind",
);
$force_tag_closing = true;
"/^on.*/i",
"/^dynsrc/i",
"/^data.*/i",
- "/^lowsrc.*/i"
+ "/^lowsrc.*/i",
)
);
- $secremoveimg = "../images/" . _("sec_remove_eng.png");
+ global $use_transparent_security_image;
+ if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png';
+ else $secremoveimg = '../images/' . _("sec_remove_eng.png");
+
$bad_attvals = Array(
"/.*/" =>
Array(
if ($take_mailto_links) {
// parseUrl($trusted); // this even parses URLs inside of tags... too aggressive
global $MailTo_PReg_Match;
- $MailTo_PReg_Match = '/mailto:' . substr($MailTo_PReg_Match, 1) ;
+ // some mailers (Microsoft, surprise surprise) produce mailto strings without being
+ // inside an anchor (link) tag, so we have to make sure the regex looks for the
+ // quote before mailto, and we'll also try to convert the non-links back into links
+ $MailTo_PReg_Match = '/([\'"])?mailto:' . substr($MailTo_PReg_Match, 1) ;
if ((preg_match_all($MailTo_PReg_Match, $trusted, $regs)) && ($regs[0][0] != '')) {
foreach ($regs[0] as $i => $mailto_before) {
- $mailto_params = $regs[10][$i];
+ $mailto_params = $regs[11][$i];
+
+ // get rid of any leading quote we may have captured but don't care about
+ //
+ $mailto_before = ltrim($mailto_before, '"\'');
+
// get rid of any tailing quote since we have to add send_to to the end
//
- if (substr($mailto_before, strlen($mailto_before) - 1) == '"')
- $mailto_before = substr($mailto_before, 0, strlen($mailto_before) - 1);
- if (substr($mailto_params, strlen($mailto_params) - 1) == '"')
- $mailto_params = substr($mailto_params, 0, strlen($mailto_params) - 1);
+ $mailto_before = rtrim($mailto_before, '"\'');
+ $mailto_params = rtrim($mailto_params, '"\'');
- if ($regs[1][$i]) { //if there is an email addr before '?', we need to merge it with the params
- $to = 'to=' . $regs[1][$i];
+ if ($regs[2][$i]) { //if there is an email addr before '?', we need to merge it with the params
+ $to = 'to=' . $regs[2][$i];
if (strpos($mailto_params, 'to=') > -1) //already a 'to='
$mailto_params = str_replace('to=', $to . '%2C%20', $mailto_params);
else {
// remove <a href=" and anything after the next quote (we only
// need the uri, not the link HTML) in compose uri
//
- $comp_uri = substr($comp_uri, 9);
- $comp_uri = substr($comp_uri, 0, strpos($comp_uri, '"', 1));
+ // but only do this if the original mailto was in a real anchor tag
+ //
+ if (!empty($regs[1][$i])) {
+ $comp_uri = substr($comp_uri, 9);
+ $comp_uri = substr($comp_uri, 0, strpos($comp_uri, '"', 1));
+ }
$trusted = str_replace($mailto_before, $comp_uri, $trusted);
}
}
$filename=rawurlencode($filename);
header ("Pragma: public");
header ("Cache-Control: no-store, max-age=0, no-cache, must-revalidate"); // HTTP/1.1
- header ("Cache-Control: post-check=0, pre-check=0", false);
+ // does nothing - see: https://blogs.msdn.microsoft.com/ieinternals/2009/07/20/internet-explorers-cache-control-extensions/
+ // header ("Cache-Control: post-check=0, pre-check=0", false);
header ("Cache-Control: private");
//set the inline header for IE, we'll add the attachment header later if we need it