* This contains the functions necessary to detect and decode MIME
* messages.
*
- * @copyright 1999-2019 The SquirrelMail Project Team
+ * @copyright 1999-2021 The SquirrelMail Project Team
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
* @version $Id$
* @package squirrelmail
if (count($flags)) {
foreach ($flags as $flag) {
//FIXME: please document why it is we have to check the first char of the flag but we then go ahead and do a full string comparison anyway. Is this a speed enhancement? If not, let's keep it simple and just compare the full string and forget the switch block.
- $char = strtoupper($flag{1});
+ $char = strtoupper($flag[1]);
switch ($char) {
case 'S':
if (strtolower($flag) == '\\seen') {
/* There is some information in the content info header that could be important
* in order to parse html messages. Let's get them here.
*/
-// if ($ret{0} == '<') {
+// if ($ret[0] == '<') {
// $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, TRUE);
// }
} else if (preg_match('/"([^"]*)"/', $topline, $regs)) {
returning any changes, changes should simply be made to the original
arguments themselves. */
$temp = array(&$links, &$startMessage, &$id, &$urlMailbox, &$ent,
- &$defaultlink, &$display_filename, &$where, &$what);
+ &$defaultlink, &$display_filename, &$where, &$what,
+ &$type0, &$type1);
do_hook("attachment $type0/$type1", $temp);
/* The API for this hook has changed as of 1.5.2 so that all plugin
arguments are passed in an array instead of each their own plugin
returning any changes, changes should simply be made to the original
arguments themselves. */
$temp = array(&$links, &$startMessage, &$id, &$urlMailbox, &$ent,
- &$defaultlink, &$display_filename, &$where, &$what);
+ &$defaultlink, &$display_filename, &$where, &$what,
+ &$type0, &$type1);
// Do not let a generic plugin change the default link if a more
// specialized one already did it...
if ($defaultlink != $defaultlink_orig) {
returning any changes, changes should simply be made to the original
arguments themselves. */
$temp = array(&$links, &$startMessage, &$id, &$urlMailbox, &$ent,
- &$defaultlink, &$display_filename, &$where, &$what);
+ &$defaultlink, &$display_filename, &$where, &$what,
+ &$type0, &$type1);
// Do not let a generic plugin change the default link if a more
// specialized one already did it...
if ($defaultlink != $defaultlink_orig) {
$this_attachment['DefaultHREF'] = $defaultlink;
$this_attachment['DownloadHREF'] = $links['download link']['href'];
$this_attachment['ViewHREF'] = isset($links['attachment_common']) ? $links['attachment_common']['href'] : '';
- $this_attachment['Size'] = $header->size;
+
+ // base64 encoded file sizes are misleading, so approximate real size
+ if (!empty($header->encoding) && strtolower($header->encoding) == 'base64')
+ $this_attachment['Size'] = $header->size / 4 * 3;
+ else
+ $this_attachment['Size'] = $header->size;
+
$this_attachment['ContentType'] = sm_encode_html_special_chars($type0 .'/'. $type1);
$this_attachment['OtherLinks'] = array();
foreach ($links as $val) {
- if ($val['text']==_("Download") || $val['text'] == _("View"))
+ if ($val['text']==_("Download")) {
+ $this_attachment['DownloadHREF'] = $val['href'];
continue;
- if (empty($val['text']) && empty($val['extra']))
+ }
+ if ($val['text']==_("View")) {
+ $this_attachment['ViewHREF'] = $val['href'];
+ continue;
+ }
+
+ // This makes no sense - If 'text' and 'extra' are just concatenated,
+ // there is no point in having 'extra'.... I am going to assume this
+ // was a mistake and am changing 'extra' to be what I think it was
+ // meant to be: additional tag attributes. However, I'm not checking
+ // extensively for plugins that were using this the wrong way (but why would they?)
+ if (empty($val['text']))
continue;
$temp = array();
$temp['HREF'] = $val['href'];
- $temp['Text'] = (empty($val['text']) ? '' : $val['text']) . (empty($val['extra']) ? '' : $val['extra']);
+ $temp['Text'] = $val['text'];
+ $temp['Extra'] = (empty($val['extra']) ? '' : $val['extra']);
$this_attachment['OtherLinks'][] = $temp;
}
$attachments[] = $this_attachment;
$iEncStart = $enc_init = false;
$cur_l = $iOffset = 0;
for($i = 0; $i < $j; ++$i) {
- switch($string{$i})
+ switch($string[$i])
{
case '"':
case '=':
$ret = '';
$iEncStart = false;
} else {
- $ret .= sprintf("=%02X",ord($string{$i}));
+ $ret .= sprintf("=%02X",ord($string[$i]));
}
break;
case '(':
}
break;
default:
- $k = ord($string{$i});
+ $k = ord($string[$i]);
if ($k > 126) {
if ($iEncStart === false) {
// do not start encoding in the middle of a string, also take the rest of the word.
$cur_l = 0;
$ret = '';
} else {
- $ret .= $string{$i};
+ $ret .= $string[$i];
}
}
}
$fulltag = '<' . $tagname;
if (is_array($attary) && sizeof($attary)){
$atts = Array();
- while (list($attname, $attvalue) = each($attary)){
+ foreach ($attary as $attname => $attvalue){
array_push($atts, "$attname=$attvalue");
}
$fulltag .= ' ' . join(" ", $atts);
$matches = Array();
$retarr = Array();
preg_match("%^(.*?)($reg)%si", substr($body, $offset), $matches);
- if (!isset($matches{0}) || !$matches{0}){
+ if (!isset($matches[0]) || !$matches[0]){
$retarr = false;
} else {
- $retarr{0} = $offset + strlen($matches{1});
- $retarr{1} = $matches{1};
- $retarr{2} = $matches{2};
+ $retarr[0] = $offset + strlen($matches[1]);
+ $retarr[1] = $matches[1];
+ $retarr[2] = $matches[2];
}
return $retarr;
}
/**
* Yep. So we did.
*/
- $pos += strlen($matches{1});
- if ($matches{2} == "/>"){
+ $pos += strlen($matches[1]);
+ if ($matches[2] == "/>"){
$tagtype = 3;
$pos++;
}
return $retary;
}
case '>':
- $attary{$attname} = '"yes"';
+ $attary[$attname] = '"yes"';
return Array($tagname, $attary, $tagtype, $lt, $pos);
break;
default:
}
list($pos, $attval, $match) = $regary;
$pos++;
- $attary{$attname} = "'" . $attval . "'";
+ $attary[$attname] = "'" . $attval . "'";
} else if ($quot == '"'){
$regary = sq_findnxreg($body, $pos+1, '\"');
if ($regary == false){
}
list($pos, $attval, $match) = $regary;
$pos++;
- $attary{$attname} = '"' . $attval . '"';
+ $attary[$attname] = '"' . $attval . '"';
} else {
/**
* These are hateful. Look for \s, or >.
* If it's ">" it will be caught at the top.
*/
$attval = preg_replace("/\"/s", """, $attval);
- $attary{$attname} = '"' . $attval . '"';
+ $attary[$attname] = '"' . $attval . '"';
}
} else if (preg_match("|[\w/>]|", $char)) {
/**
* That was attribute type 4.
*/
- $attary{$attname} = '"yes"';
+ $attary[$attname] = '"yes"';
} else {
/**
* An illegal character. Find next '>' and return.
if ($hex){
$numval = hexdec($numval);
}
- $repl{$matches[0][$i]} = chr($numval);
+ $repl[$matches[0][$i]] = chr($numval);
}
$attvalue = strtr($attvalue, $repl);
return true;
$mailbox
){
$me = 'sq_fixatts';
- while (list($attname, $attvalue) = each($attary)){
+ foreach ($attary as $attname => $attvalue){
/**
* See if this attribute should be removed.
*/
if (preg_match($matchtag, $tagname)){
foreach ($matchattrs as $matchattr){
if (preg_match($matchattr, $attname)){
- unset($attary{$attname});
+ unset($attary[$attname]);
continue;
}
}
// entities are used in the attribute value. In 99% of the cases it's there as XSS
// i.e.<div style="{ left:expʀessioɴ( alert('XSS') ) }">
$attvalue = "idiocy";
- $attary{$attname} = $attvalue;
+ $attary[$attname] = $attvalue;
}
sq_unspace($attvalue);
$newvalue =
preg_replace($valmatch, $valrepl, $attvalue);
if ($newvalue != $attvalue){
- $attary{$attname} = $newvalue;
+ $attary[$attname] = $newvalue;
$attvalue = $newvalue;
}
}
if ($attname == 'style') {
if (preg_match('/[\0-\37\200-\377]+/',$attvalue)) {
// 8bit and control characters in style attribute values can be used for XSS, remove them
- $attary{$attname} = '"disallowed character"';
+ $attary[$attname] = '"disallowed character"';
}
preg_match_all("/url\s*\((.+)\)/si",$attvalue,$aMatch);
if (count($aMatch)) {
// url value
$urlvalue = $sMatch;
sq_fix_url($attname, $urlvalue, $message, $id, $mailbox,"'");
- $attary{$attname} = str_replace($sMatch,$urlvalue,$attvalue);
+ $attary[$attname] = str_replace($sMatch,$urlvalue,$attvalue);
}
}
}
|| $attname == 'poster' || $attname == 'formaction'
|| $attname == 'background' || $attname == 'action') {
sq_fix_url($attname, $attvalue, $message, $id, $mailbox);
- $attary{$attname} = $attvalue;
+ $attary[$attname] = $attvalue;
}
}
/**
$bSucces = false;
$bEndTag = false;
for ($i=$pos,$iCount=strlen($body);$i<$iCount;++$i) {
- $char = $body{$i};
+ $char = $body[$i];
switch ($char) {
case '<':
$sToken = $char;
case '!':
if ($sToken == '<') {
// possible comment
- if (isset($body{$i+2}) && substr($body,$i,3) == '!--') {
+ if (isset($body[$i+2]) && substr($body,$i,3) == '!--') {
$i = strpos($body,'-->',$i+3);
if ($i === false) { // no end comment
$i = strlen($body);
* body {background: blah-blah}
* and change it to .bodyclass so we can just assign it to a <div>
*/
- $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
+ // $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
+ // Nah, this is even better - try to preface all CSS selectors with
+ // our <div> class ID "bodyclass" then correct generic "body" selectors
+ // TODO: this works pretty good but breaks stuff like this:
+ // @media print { body { font-size: 10pt; } }
+ // but there isn't an easy way to make this regex skip @media
+ // definitions... though lots of the ones in the wild will be
+ // correctly handled because they tend to end with a parenthesis, like:
+ // @media screen and (max-width:480px) { ...
+ $content = preg_replace('/([a-z0-9._-][a-z0-9 >+~|:._-]*\s*(?:,|{.*?}))/si', '.bodyclass $1', $content);
+ $content = str_replace('.bodyclass body', '.bodyclass', $content);
global $use_transparent_security_image;
if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png';
$styledef .= "color: $text; ";
}
if (strlen($styledef) > 0){
- $divattary{"style"} = "\"$styledef\"";
+ $divattary["style"] = "\"$styledef\"";
}
}
return $divattary;
* @param $add_attr_to_tag see description above
* @param $message message object
* @param $id message id
+ * @param $recursively_called boolean flag for recursive calls into this function (optional; default FALSE)
* @return sanitized html safe to show on your pages.
*/
function sq_sanitize($body,
$add_attr_to_tag,
$message,
$id,
- $mailbox
+ $mailbox,
+ $recursively_called=FALSE
){
$me = 'sq_sanitize';
+
+ /**
+ * See if tag_list is of tags to remove or tags to allow.
+ * false means remove these tags
+ * true means allow these tags
+ */
+ $orig_tag_list = $tag_list;
$rm_tags = array_shift($tag_list);
+
/**
* Normalize rm_tags and rm_tags_with_content.
*/
@array_walk($tag_list, 'sq_casenormalize');
@array_walk($rm_tags_with_content, 'sq_casenormalize');
@array_walk($self_closing_tags, 'sq_casenormalize');
- /**
- * See if tag_list is of tags to remove or tags to allow.
- * false means remove these tags
- * true means allow these tags
- */
+
$curpos = 0;
$open_tags = Array();
$trusted = "\n<!-- begin sanitized html -->\n";
while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){
list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
+
+ /**
+ * RCDATA and RAWTEXT tags are handled differently:
+ * next instance of closing tag is used, whether or not
+ * the HTML is well formed before that
+ */
+ global $rcdata_rawtext_tags;
+ if (!$recursively_called
+ && in_array($tagname, $rcdata_rawtext_tags)
+ && $tagtype === 1){
+ $closing_tag = false;
+ $closing_tag_offset = $curpos;
+ // seek out the closing tag for the current RCDATA/RAWTEXT tag
+ while (1) {
+ // first we need to move forward to next available closing tag
+ // (intentionally leave off the closing > and let sq_getnxtag() validate a proper tag syntax)
+ $next_tag = sq_findnxreg($body, $closing_tag_offset, "</\s*$tagname");
+ if ($next_tag === false) {
+ $closing_tag = false;
+ break;
+ }
+ // but then we have to make sure it's a well-formed tag
+ $closing_tag = sq_getnxtag($body, $next_tag[0]);
+ if ($closing_tag === false)
+ break;
+ else if ($closing_tag[0] !== false
+ // these should be redundant
+ && $closing_tag[0] === $tagname && $closing_tag[2] === 2) {
+ $trusted .= sq_sanitize(substr($body, $curpos, $closing_tag[4] - $curpos + 1),
+ $orig_tag_list, $rm_tags_with_content, $self_closing_tags,
+ $force_tag_closing, $rm_attnames, $bad_attvals, $add_attr_to_tag,
+ $message, $id, $mailbox, true);
+ $curpos = $closing_tag[4] + 1;
+ continue 2;
+ }
+ $closing_tag_offset = $next_tag[0] + 1;
+ }
+ if ($closing_tag === false)
+ { /* no-op... there was no closing tag for this RCDATA/RAWTEXT tag - we could probably set $curpos to the end of $body, but this HTML is malformed anyway and should just fall apart on its own */ }
+ }
+
$free_content = substr($body, $curpos, $lt-$curpos);
/**
* Take care of <style>
if ($tagname == "body"){
$tagname = "div";
}
- if (isset($open_tags{$tagname}) &&
- $open_tags{$tagname} > 0){
- $open_tags{$tagname}--;
+ if (isset($open_tags[$tagname]) &&
+ $open_tags[$tagname] > 0){
+ $open_tags[$tagname]--;
} else {
$tagname = false;
}
$message, $id);
}
if ($tagtype == 1){
- if (isset($open_tags{$tagname})){
- $open_tags{$tagname}++;
+ if (isset($open_tags[$tagname])){
+ $open_tags[$tagname]++;
} else {
- $open_tags{$tagname}=1;
+ $open_tags[$tagname]=1;
}
}
/**
// require_once(SM_PATH . 'functions/url_parser.php'); // for $MailTo_PReg_Match
global $attachment_common_show_images, $view_unsafe_images,
- $has_unsafe_images, $allow_svg_display;
+ $has_unsafe_images, $allow_svg_display, $rcdata_rawtext_tags,
+ $remove_rcdata_rawtext_tags_and_content;
+
+ $rcdata_rawtext_tags = array(
+ "noscript",
+ "noframes",
+ "noembed",
+ "textarea",
+ // also "title", "xmp", "script", "iframe", "plaintext" which we already remove below
+ );
+
/**
* Don't display attached images in HTML mode.
*
*/
$attachment_common_show_images = false;
$tag_list = Array(
- false,
+ false, // remove these tags
"meta",
"html",
"head",
);
if (!$allow_svg_display)
$rm_tags_with_content[] = 'svg';
+ /**
+ * SquirrelMail will parse RCDATA and RAWTEXT tags and handle them as the special
+ * case that they are, but if you prefer to remove them and their contents entirely
+ * (in most cases, should be a safe thing with minimal impact), you can add the
+ * following to config/config_local.php
+ * $remove_rcdata_rawtext_tags_and_content = TRUE;
+ */
+ if ($remove_rcdata_rawtext_tags_and_content)
+ $rm_tags_with_content = array_merge($rm_tags_with_content, $rcdata_rawtext_tags);
$self_closing_tags = Array(
"img",
* Remove any references to http/https if view_unsafe_images set
* to false.
*/
- array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0],
+ array_push($bad_attvals['/.*/']['/^src|background/i'][0],
'/^([\'\"])\s*https*:.*([\'\"])/si');
- array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1],
+ array_push($bad_attvals['/.*/']['/^src|background/i'][1],
"\\1$secremoveimg\\1");
- array_push($bad_attvals{'/.*/'}{'/^style/i'}[0],
+ array_push($bad_attvals['/.*/']['/^style/i'][0],
'/url\([\'\"]?https?:[^\)]*[\'\"]?\)/si');
- array_push($bad_attvals{'/.*/'}{'/^style/i'}[1],
+ array_push($bad_attvals['/.*/']['/^style/i'][1],
"url(\\1$secremoveimg\\1)");
}