From: graf25 Date: Sun, 28 Apr 2002 21:49:30 +0000 (+0000) Subject: Committing reworked html filtering code. Hopefully, Nick Cleaton will X-Git-Url: https://vcs.fsf.org/?a=commitdiff_plain;h=691a2d25c1679688b84aa48e78250f1faf284fa4;p=squirrelmail.git Committing reworked html filtering code. Hopefully, Nick Cleaton will leave us alone now. :) git-svn-id: https://svn.code.sf.net/p/squirrelmail/code/trunk/squirrelmail@2763 7612ce4b-ef26-0410-bec9-ea0150e637f0 --- diff --git a/functions/mime.php b/functions/mime.php index 14caa71b..95baa011 100644 --- a/functions/mime.php +++ b/functions/mime.php @@ -1062,439 +1062,957 @@ function encodeHeader ($string) { return( $string ); } -/* - Strips dangerous tags from html messages. -*/ -function MagicHTML( $body, $id ) { - - global $message, $HTTP_SERVER_VARS, - $attachment_common_show_images; - - $attachment_common_show_images = - FALSE; // Don't display attached images in HTML mode - $j = strlen( $body ); // Legnth of the HTML - $ret = ''; // Returned string - $bgcolor = '#ffffff'; // Background style color (defaults to white) - $textcolor = '#000000'; // Foreground style color (defaults to black) - $leftmargin = ''; // Left margin style - $title = ''; // HTML title if any +/* This function trys to locate the entity_id of a specific mime element */ - $i = 0; - while ( $i < $j ) { - if ( $body{$i} == '<' ) { - $pos = $i + 1; - $tag = ''; - while ($body{$pos} == ' ' || $body{$pos} == "\t" || - $body{$pos} == "\n") { - $pos ++; - } - while (strlen($tag) < 4 && $body{$pos} != ' ' && - $body{$pos} != "\t" && $body{$pos} != "\n" && - $pos < $j ) { - if ($body{$pos} == "<"){ - $tag = ''; - $pos++; - } - $tag .= $body{$pos}; - $pos ++; - } - /* - A comment in HTML is only three characters and isn't - guaranteed to have a space after it. This fudges so - it will be caught by the switch statement. - */ - if (ereg("!--", $tag)) { - $tag = "!-- "; - } - switch( strtoupper( $tag ) ) { - // Strips the entire tag and contents - case 'APPL': - case 'EMBE': - case 'FRAM': - case 'SCRI': - case 'OBJE': - $etg = '/' . $tag; - while ( $body{$i+1}.$body{$i+2}.$body{$i+3}.$body{$i+4}.$body{$i+5} <> $etg && - $i < $j ) $i++; - while ( $i < $j && $body{++$i} <> '>' ); - // $ret .= ""; - break; - // Substitute Title - case 'TITL': - $i += 5; - while ( $body{$i} <> '>' && // - $i < $j ) - $i++; - $i++; - $title = ''; - while ( $body{$i} <> '<' && // - $i < $j ) { - $title .= $body{$i}; - $i++; - } - $i += 7; - break; - // Destroy these tags - case 'HTML': - case 'HEAD': - case '/HTM': - case '/HEA': - case '!DOC': - case 'META': - //case 'DIV ': - //case '/DIV': - case '!-- ': - $i += 4; - while ( $body{$i} <> '>' && - $i < $j ) - $i++; - // $i++; - break; - case 'STYL': - $i += 5; - while ( $body{$i} <> '>' && // - $i < $j ) - $i++; - $i++; - // We parse the style to look for interesting stuff - $styleblk = ''; - while ( $body{$i} <> '>' && - $i < $j ) { - // First we get the name of the style - $style = ''; - while ( $body{$i} <> '>' && - $body{$i} <> '<' && - $body{$i} <> '{' && - $i < $j ) { - if ( isnoSep( $body{$i} ) ) - $style .= $body{$i}; - $i++; - } - stripComments( $i, $j, $body ); - $style = strtoupper( trim( $style ) ); - if ( $style == 'BODY' ) { - // Next we look into the definitions of the body style - while ( $body{$i} <> '>' && - $body{$i} <> '}' && - $i < $j ) { - // We look for the background color if any. - if ( substr( $body, $i, 17 ) == 'BACKGROUND-COLOR:' ) { - $i += 17; - $bgcolor = getStyleData( $i, $j, $body ); - } elseif ( substr( $body, $i, 12 ) == 'MARGIN-LEFT:' ) { - $i += 12; - $leftmargin = getStyleData( $i, $j, $body ); - } - $i++; - } - } else { - // Other style are mantained - $styleblk .= "$style "; - while ( $body{$i} <> '>' && - $body{$i} <> '<' && - $body{$i} <> '}' && - $i < $j ) { - $styleblk .= $body{$i}; - $i++; - } - $styleblk .= $body{$i}; - } - stripComments( $i, $j, $body ); - if ( $body{$i} <> '>' ) - $i++; - } - if ( $styleblk <> '' ) - $ret .= " + * @return a string with edited content. + */ +function sq_fixstyle($message, $id, $content){ + global $view_unsafe_images; + $me = "sq_fixstyle"; + /** + * First look for general BODY style declaration, which would be + * like so: + * body {background: blah-blah} + * and change it to .bodyclass so we can just assign it to a
+ */ + $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content); + $secremoveimg = "../images/" . _("sec_remove_eng.png"); + /** + * Fix url('blah') declarations. + */ + $content = preg_replace("|url\(([\'\"])\s*\S+script\s*:.*?([\'\"])\)|si", + "url(\\1$secremoveimg\\2)", $content); + /** + * Fix url('https*://.*) declarations but only if $view_unsafe_images + * is false. + */ + if (!$view_unsafe_images){ + $content = preg_replace("|url\(([\'\"])\s*https*:.*?([\'\"])\)|si", + "url(\\1$secremoveimg\\2)", $content); + } + + /** + * Fix urls that refer to cid: + */ + while (preg_match("|url\(([\'\"]\s*cid:.*?[\'\"])\)|si", $content, + $matches)){ + $cidurl = $matches{1}; + $httpurl = sq_cid2http($message, $id, $cidurl); + $content = preg_replace("|url\($cidurl\)|si", + "url($httpurl)", $content); + } -/* This function trys to locate the entity_id of a specific mime element */ + /** + * Fix stupid expression: declarations which lead to vulnerabilities + * in IE. + */ + $content = preg_replace("/expression\s*:/si", "idiocy:", $content); + return $content; +} -function find_ent_id( $id, $message ) { - $ret = ''; - for ($i=0; $ret == '' && $i < count($message->entities); $i++) { - if ( $message->entities[$i]->header->entity_id == '' || $message->entities[$i]->header->type ) { - $ret = find_ent_id( $id, $message->entities[$i] ); - } else { - if ( strcasecmp( $message->entities[$i]->header->id, $id ) == 0 ) - $ret = $message->entities[$i]->header->entity_id; +/** + * This function converts cid: url's into the ones that can be viewed in + * the browser. + * + * @param $message the message object + * @param $id the message id + * @param $cidurl the cid: url. + * @return a string with a http-friendly url + */ +function sq_cid2http($message, $id, $cidurl){ + /** + * Get rid of quotes. + */ + $quotchar = substr($cidurl, 0, 1); + $cidurl = str_replace($quotchar, "", $cidurl); + $cidurl = substr(trim($cidurl), 4); + $httpurl = $quotchar . "../src/download.php?absolute_dl=true&" . + "passed_id=$id&mailbox=" . urlencode($message->header->mailbox) . + "&passed_ent_id=" . find_ent_id($cidurl, $message) . $quotchar; + return $httpurl; +} + +/** + * This function changes the tag into a
tag since we + * can't really have a body-within-body. + * + * @param $attary an array of attributes and values of + * @return a modified array of attributes to be set for
+ */ +function sq_body2div($attary){ + $me = "sq_body2div"; + $divattary = Array("class"=>"'bodyclass'"); + $bgcolor="#ffffff"; + $text="#000000"; + $styledef=""; + if (is_array($attary) && sizeof($attary) > 0){ + foreach ($attary as $attname=>$attvalue){ + $quotchar = substr($attvalue, 0, 1); + $attvalue = str_replace($quotchar, "", $attvalue); + switch ($attname){ + case "background": + $styledef .= "background-image: url('$attvalue'); "; + break; + case "bgcolor": + $styledef .= "background-color: $attvalue; "; + break; + case "text": + $styledef .= "color: $attvalue; "; + } } + if (strlen($styledef) > 0){ + $divattary{"style"} = "\"$styledef\""; + } + } + return $divattary; +} +/** + * This is the main function and the one you should actually be calling. + * There are several variables you should be aware of an which need + * special description. + * + * Since the description is quite lengthy, see it here: + * http://www.mricon.com/html/phpfilter.html + * + * @param $body the string with HTML you wish to filter + * @param $tag_list see description above + * @param $rm_tags_with_content see description above + * @param $self_closing_tags see description above + * @param $force_tag_closing see description above + * @param $rm_attnames see description above + * @param $bad_attvals see description above + * @param $add_attr_to_tag see description above + * @param $message message object + * @param $id message id + * @return sanitized html safe to show on your pages. + */ +function sq_sanitize($body, + $tag_list, + $rm_tags_with_content, + $self_closing_tags, + $force_tag_closing, + $rm_attnames, + $bad_attvals, + $add_attr_to_tag, + $message, + $id + ){ + $me = "sq_sanitize"; + /** + * Normalize rm_tags and rm_tags_with_content. + */ + @array_walk($rm_tags, 'sq_casenormalize'); + @array_walk($rm_tags_with_content, 'sq_casenormalize'); + @array_walk($self_closing_tags, 'sq_casenormalize'); + /** + * See if tag_list is of tags to remove or tags to allow. + * false means remove these tags + * true means allow these tags + */ + $rm_tags = array_shift($tag_list); + $curpos = 0; + $open_tags = Array(); + $trusted = "\n"; + $skip_content = false; + + while (($curtag=sq_getnxtag($body, $curpos)) != FALSE){ + list($tagname, $attary, $tagtype, $lt, $gt) = $curtag; + $free_content = substr($body, $curpos, $lt-$curpos); + /** + * Take care of . Edit the + * content before we apply it. + */ + $free_content = sq_fixstyle($message, $id, $free_content); + } else if ($tagname == "body"){ + $tagname = "div"; + if ($tagtype == 1){ + $attary = sq_body2div($attary); + } + } + if ($skip_content == false){ + $trusted .= $free_content; + } else { + } + if ($tagname != FALSE){ + if ($tagtype == 2){ + if ($skip_content == $tagname){ + /** + * Got to the end of tag we needed to remove. + */ + $tagname = false; + $skip_content = false; + } else { + if ($skip_content == false){ + if (isset($open_tags{$tagname}) && + $open_tags{$tagname} > 0){ + $open_tags{$tagname}--; + } else { + $tagname = false; + } + } else { + } + } + } else { + /** + * $rm_tags_with_content + */ + if ($skip_content == false){ + /** + * See if this is a self-closing type and change + * tagtype appropriately. + */ + if ($tagtype == 1 + && in_array($tagname, $self_closing_tags)){ + $tagtype=3; + } + /** + * See if we should skip this tag and any content + * inside it. + */ + if ($tagtype == 1 && + in_array($tagname, $rm_tags_with_content)){ + $skip_content = $tagname; + } else { + if (($rm_tags == false + && in_array($tagname, $tag_list)) || + ($rm_tags == true && + !in_array($tagname, $tag_list))){ + $tagname = false; + } else { + if ($tagtype == 1){ + if (isset($open_tags{$tagname})){ + $open_tags{$tagname}++; + } else { + $open_tags{$tagname}=1; + } + } + /** + * This is where we run other checks. + */ + if (is_array($attary) && sizeof($attary) > 0){ + $attary = sq_fixatts($tagname, + $attary, + $rm_attnames, + $bad_attvals, + $add_attr_to_tag, + $message, + $id + ); + } + } + } + } else { + } + } + if ($tagname != false && $skip_content == false){ + $trusted .= sq_tagprint($tagname, $attary, $tagtype); + } + } else { + } + $curpos = $gt+1; } + $trusted .= substr($body, $curpos, strlen($body)-$curpos); + if ($force_tag_closing == true){ + foreach ($open_tags as $tagname=>$opentimes){ + while ($opentimes > 0){ + $trusted .= ''; + $opentimes--; + } + } + $trusted .= "\n"; + } + $trusted .= "\n"; + return $trusted; +} - return( $ret ); +/** + * This is a wrapper function to call html sanitizing routines. + * + * @param $body the body of the message + * @param $id the id of the message + * @return a string with html safe to display in the browser. + */ +function magicHTML($body, $id){ + global $attachment_common_show_images, $view_unsafe_images, + $has_unsafe_images, $message; + /** + * Don't display attached images in HTML mode. + */ + $attachment_common_show_images = false; + $tag_list = Array( + false, + "object", + "meta", + "html", + "head", + "base" + ); + + $rm_tags_with_content = Array( + "script", + "applet", + "embed", + "title" + ); + + $self_closing_tags = Array( + "img", + "br", + "hr", + "input" + ); + + $force_tag_closing = false; + + $rm_attnames = Array( + "/.*/" => + Array( + "/target/si", + "/^on.*/si" + ) + ); + + $secremoveimg = "../images/" . _("sec_remove_eng.png"); + $bad_attvals = Array( + "/.*/" => + Array( + "/^src|background|href|action/i" => + Array( + Array( + "|^([\'\"])\s*\.\./.*([\'\"])|si", + "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si" + ), + Array( + "\\1$secremoveimg\\2", + "\\1$secremoveimg\\2" + ) + ), + "/^style/si" => + Array( + Array( + "/expression\s*:/si", + "|url\(([\'\"])\s*\.\./.*([\'\"])\)|si", + "/url\(([\'\"])\s*\S+script:.*([\'\"])\)/si" + ), + Array( + "idiocy:", + "url(\\1$secremoveimg\\2)", + "url(\\1$secremoveimg\\2)" + ) + ) + ) + ); + if (!$view_unsafe_images){ + /** + * Remove any references to http/https if view_unsafe_images set + * to false. + */ + $addendum = Array( + "/.*/" => + Array( + "/^src|background/i" => + Array( + Array( + "/^([\'\"])\s*https*:.*([\'\"])/si" + ), + Array( + "\\1$secremoveimg\\2" + ) + ), + "/^style/si" => + Array( + Array( + "/url\(([\'\"])\s*https*:.*([\'\"])\)/si" + ), + Array( + "url(\\1$secremoveimg\\2)" + ) + ) + ) + ); + $bad_attvals = array_merge($bad_attvals, $addendum); + } + $add_attr_to_tag = Array( + "/^a$/si" => Array('target'=>'"_new"') + ); + $trusted = sq_sanitize($body, + $tag_list, + $rm_tags_with_content, + $self_closing_tags, + $force_tag_closing, + $rm_attnames, + $bad_attvals, + $add_attr_to_tag, + $message, + $id + ); + if (preg_match("|$secremoveimg|si", $trusted)){ + $has_unsafe_images = true; + } + return $trusted; } -?> +?> \ No newline at end of file