+ /**
+ * There are several types of attributes, with optional
+ * [:space:] between members.
+ * Type 1:
+ * attrname[:space:]=[:space:]'CDATA'
+ * Type 2:
+ * attrname[:space:]=[:space:]"CDATA"
+ * Type 3:
+ * attr[:space:]=[:space:]CDATA
+ * Type 4:
+ * attrname
+ *
+ * We leave types 1 and 2 the same, type 3 we check for
+ * '"' and convert to """ if needed, then wrap in
+ * double quotes. Type 4 we convert into:
+ * attrname="yes".
+ */
+ $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
+ if ($regary == false){
+ /**
+ * Looks like body ended before the end of tag.
+ */
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $attname, $match) = $regary;
+ $attname = strtolower($attname);
+ /**
+ * We arrived at the end of attribute name. Several things possible
+ * here:
+ * '>' means the end of the tag and this is attribute type 4
+ * '/' if followed by '>' means the same thing as above
+ * '\s' means a lot of things -- look what it's followed by.
+ * anything else means the attribute is invalid.
+ */
+ switch($match){
+ case "/":
+ /**
+ * This is an xhtml-style tag with a closing / at the
+ * end, like so: <img src="blah"/>. Check if it's followed
+ * by the closing bracket. If not, then this tag is invalid
+ */
+ if (substr($body, $pos, 2) == "/>"){
+ $pos++;
+ $tagtype = 3;
+ } else {
+ $gt = sq_findnxstr($body, $pos, ">");
+ $retary = Array(false, false, false, $lt, $gt);
+ return $retary;
+ }
+ case ">":
+ $attary{$attname} = '"yes"';
+ return Array($tagname, $attary, $tagtype, $lt, $pos);
+ break;
+ default:
+ /**
+ * Skip whitespace and see what we arrive at.
+ */
+ $pos = sq_skipspace($body, $pos);
+ $char = substr($body, $pos, 1);
+ /**
+ * Two things are valid here:
+ * '=' means this is attribute type 1 2 or 3.
+ * \w means this was attribute type 4.
+ * anything else we ignore and re-loop. End of tag and
+ * invalid stuff will be caught by our checks at the beginning
+ * of the loop.
+ */
+ if ($char == "="){
+ $pos++;
+ $pos = sq_skipspace($body, $pos);
+ /**
+ * Here are 3 possibilities:
+ * "'" attribute type 1
+ * '"' attribute type 2
+ * everything else is the content of tag type 3
+ */
+ $quot = substr($body, $pos, 1);
+ if ($quot == "'"){
+ $regary = sq_findnxreg($body, $pos+1, "\'");
+ if ($regary == false){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $attval, $match) = $regary;
+ $pos++;
+ $attary{$attname} = "'" . $attval . "'";
+ } else if ($quot == '"'){
+ $regary = sq_findnxreg($body, $pos+1, '\"');
+ if ($regary == false){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $attval, $match) = $regary;
+ $pos++;
+ $attary{$attname} = '"' . $attval . '"';
+ } else {
+ /**
+ * These are hateful. Look for \s, or >.
+ */
+ $regary = sq_findnxreg($body, $pos, "[\s>]");
+ if ($regary == false){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $attval, $match) = $regary;
+ /**
+ * If it's ">" it will be caught at the top.
+ */
+ $attval = preg_replace("/\"/s", """, $attval);
+ $attary{$attname} = '"' . $attval . '"';
+ }
+ } else if (preg_match("|[\w/>]|", $char)) {
+ /**
+ * That was attribute type 4.
+ */
+ $attary{$attname} = '"yes"';
+ } else {
+ /**
+ * An illegal character. Find next '>' and return.
+ */
+ $gt = sq_findnxstr($body, $pos, ">");
+ return Array(false, false, false, $lt, $gt);
+ }
+ }
+ }
+ /**
+ * The fact that we got here indicates that the tag end was never
+ * found. Return invalid tag indication so it gets stripped.
+ */
+ return Array(false, false, false, $lt, strlen($body));
+}
+
+/**
+ * This function checks attribute values for entity-encoded values
+ * and returns them translated into 8-bit strings so we can run
+ * checks on them.
+ *
+ * @param $attvalue A string to run entity check against.
+ * @return Translated value.
+ */
+function sq_deent($attvalue){
+ $me="sq_deent";
+ /**
+ * See if we have to run the checks first. All entities must start
+ * with "&".
+ */
+ if (strpos($attvalue, "&") === false){
+ return $attvalue;
+ }
+ /**
+ * Check named entities first.
+ */
+ $trans = get_html_translation_table(HTML_ENTITIES);
+ /**
+ * Leave " in, as it can mess us up.
+ */
+ $trans = array_flip($trans);
+ unset($trans{"""});
+ while (list($ent, $val) = each($trans)){
+ $attvalue = preg_replace("/$ent*(\W)/si", "$val\\1", $attvalue);
+ }
+ /**
+ * Now translate numbered entities from 1 to 255 if needed.
+ */
+ if (strpos($attvalue, "#") !== false){
+ $omit = Array(34, 39);
+ for ($asc=1; $asc<256; $asc++){
+ if (!in_array($asc, $omit)){
+ $chr = chr($asc);
+ $attvalue = preg_replace("/\�*$asc;*(\D)/si", "$chr\\1",
+ $attvalue);
+ $attvalue = preg_replace("/\�*".dechex($asc).";*(\W)/si",
+ "$chr\\1", $attvalue);
+ }
+ }
+ }
+ return $attvalue;
+}
+
+/**
+ * This function runs various checks against the attributes.
+ *
+ * @param $tagname String with the name of the tag.
+ * @param $attary Array with all tag attributes.
+ * @param $rm_attnames See description for sq_sanitize
+ * @param $bad_attvals See description for sq_sanitize
+ * @param $add_attr_to_tag See description for sq_sanitize
+ * @param $message message object
+ * @param $id message id
+ * @return Array with modified attributes.
+ */
+function sq_fixatts($tagname,
+ $attary,
+ $rm_attnames,
+ $bad_attvals,
+ $add_attr_to_tag,
+ $message,
+ $id
+ ){
+ $me = "sq_fixatts";
+ while (list($attname, $attvalue) = each($attary)){
+ /**
+ * See if this attribute should be removed.
+ */
+ foreach ($rm_attnames as $matchtag=>$matchattrs){
+ if (preg_match($matchtag, $tagname)){
+ foreach ($matchattrs as $matchattr){
+ if (preg_match($matchattr, $attname)){
+ unset($attary{$attname});
+ continue;
+ }
+ }
+ }
+ }
+ /**
+ * Remove any entities.
+ */
+ $attvalue = sq_deent($attvalue);
+
+ /**
+ * Now let's run checks on the attvalues.
+ * I don't expect anyone to comprehend this. If you do,
+ * get in touch with me so I can drive to where you live and
+ * shake your hand personally. :)
+ */
+ foreach ($bad_attvals as $matchtag=>$matchattrs){
+ if (preg_match($matchtag, $tagname)){
+ foreach ($matchattrs as $matchattr=>$valary){
+ if (preg_match($matchattr, $attname)){
+ /**
+ * There are two arrays in valary.
+ * First is matches.
+ * Second one is replacements
+ */
+ list($valmatch, $valrepl) = $valary;
+ $newvalue =
+ preg_replace($valmatch, $valrepl, $attvalue);
+ if ($newvalue != $attvalue){
+ $attary{$attname} = $newvalue;
+ }
+ }
+ }
+ }
+ }
+ /**
+ * Turn cid: urls into http-friendly ones.
+ */
+ if (preg_match("/^[\'\"]\s*cid:/si", $attvalue)){
+ $attary{$attname} = sq_cid2http($message, $id, $attvalue);
+ }
+ }
+ /**
+ * See if we need to append any attributes to this tag.
+ */
+ foreach ($add_attr_to_tag as $matchtag=>$addattary){
+ if (preg_match($matchtag, $tagname)){
+ $attary = array_merge($attary, $addattary);
+ }
+ }
+ return $attary;
+}
+
+/**
+ * This function edits the style definition to make them friendly and
+ * usable in squirrelmail.
+ *
+ * @param $message the message object
+ * @param $id the message id
+ * @param $content a string with whatever is between <style> and </style>
+ * @return a string with edited content.
+ */
+function sq_fixstyle($message, $id, $content){
+ global $view_unsafe_images;
+ $me = "sq_fixstyle";
+ /**
+ * First look for general BODY style declaration, which would be
+ * like so:
+ * body {background: blah-blah}
+ * and change it to .bodyclass so we can just assign it to a <div>
+ */
+ $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
+ $secremoveimg = "../images/" . _("sec_remove_eng.png");
+ /**
+ * Fix url('blah') declarations.
+ */
+ $content = preg_replace("|url\(([\'\"])\s*\S+script\s*:.*?([\'\"])\)|si",
+ "url(\\1$secremoveimg\\2)", $content);
+ /**
+ * Fix url('https*://.*) declarations but only if $view_unsafe_images
+ * is false.
+ */
+ if (!$view_unsafe_images){
+ $content = preg_replace("|url\(([\'\"])\s*https*:.*?([\'\"])\)|si",
+ "url(\\1$secremoveimg\\2)", $content);
+ }
+
+ /**
+ * Fix urls that refer to cid:
+ */
+ while (preg_match("|url\(([\'\"]\s*cid:.*?[\'\"])\)|si", $content,
+ $matches)){
+ $cidurl = $matches{1};
+ $httpurl = sq_cid2http($message, $id, $cidurl);
+ $content = preg_replace("|url\($cidurl\)|si",
+ "url($httpurl)", $content);
+ }
+
+ /**
+ * Fix stupid expression: declarations which lead to vulnerabilities
+ * in IE.
+ */
+ $content = preg_replace("/expression\s*:/si", "idiocy:", $content);
+ return $content;
+}
+
+/**
+ * This function converts cid: url's into the ones that can be viewed in
+ * the browser.
+ *
+ * @param $message the message object
+ * @param $id the message id
+ * @param $cidurl the cid: url.
+ * @return a string with a http-friendly url
+ */
+function sq_cid2http($message, $id, $cidurl){
+ /**
+ * Get rid of quotes.
+ */
+ $quotchar = substr($cidurl, 0, 1);
+ $cidurl = str_replace($quotchar, "", $cidurl);
+ $cidurl = substr(trim($cidurl), 4);
+ $httpurl = $quotchar . "../src/download.php?absolute_dl=true&" .
+ "passed_id=$id&mailbox=" . urlencode($message->header->mailbox) .
+ "&passed_ent_id=" . find_ent_id($cidurl, $message) . $quotchar;
+ return $httpurl;
+}
+
+/**
+ * This function changes the <body> tag into a <div> tag since we
+ * can't really have a body-within-body.
+ *
+ * @param $attary an array of attributes and values of <body>
+ * @return a modified array of attributes to be set for <div>
+ */
+function sq_body2div($attary){
+ $me = "sq_body2div";
+ $divattary = Array("class"=>"'bodyclass'");
+ $bgcolor="#ffffff";
+ $text="#000000";
+ $styledef="";
+ if (is_array($attary) && sizeof($attary) > 0){
+ foreach ($attary as $attname=>$attvalue){
+ $quotchar = substr($attvalue, 0, 1);
+ $attvalue = str_replace($quotchar, "", $attvalue);
+ switch ($attname){
+ case "background":
+ $styledef .= "background-image: url('$attvalue'); ";
+ break;
+ case "bgcolor":
+ $styledef .= "background-color: $attvalue; ";
+ break;
+ case "text":
+ $styledef .= "color: $attvalue; ";
+ }
+ }
+ if (strlen($styledef) > 0){
+ $divattary{"style"} = "\"$styledef\"";
+ }
+ }
+ return $divattary;
+}
+
+/**
+ * This is the main function and the one you should actually be calling.
+ * There are several variables you should be aware of an which need
+ * special description.
+ *
+ * Since the description is quite lengthy, see it here:
+ * http://www.mricon.com/html/phpfilter.html
+ *
+ * @param $body the string with HTML you wish to filter
+ * @param $tag_list see description above
+ * @param $rm_tags_with_content see description above
+ * @param $self_closing_tags see description above
+ * @param $force_tag_closing see description above
+ * @param $rm_attnames see description above
+ * @param $bad_attvals see description above
+ * @param $add_attr_to_tag see description above
+ * @param $message message object
+ * @param $id message id
+ * @return sanitized html safe to show on your pages.
+ */
+function sq_sanitize($body,
+ $tag_list,
+ $rm_tags_with_content,
+ $self_closing_tags,
+ $force_tag_closing,
+ $rm_attnames,
+ $bad_attvals,
+ $add_attr_to_tag,
+ $message,
+ $id
+ ){
+ $me = "sq_sanitize";
+ /**
+ * Normalize rm_tags and rm_tags_with_content.
+ */
+ @array_walk($rm_tags, 'sq_casenormalize');
+ @array_walk($rm_tags_with_content, 'sq_casenormalize');
+ @array_walk($self_closing_tags, 'sq_casenormalize');
+ /**
+ * See if tag_list is of tags to remove or tags to allow.
+ * false means remove these tags
+ * true means allow these tags
+ */
+ $rm_tags = array_shift($tag_list);
+ $curpos = 0;
+ $open_tags = Array();
+ $trusted = "<!-- begin sanitized html -->\n";
+ $skip_content = false;
+
+ while (($curtag=sq_getnxtag($body, $curpos)) != FALSE){
+ list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
+ $free_content = substr($body, $curpos, $lt-$curpos);
+ /**
+ * Take care of <style>
+ */
+ if ($tagname == "style" && $tagtype == 2){
+ /**
+ * This is a closing </style>. Edit the
+ * content before we apply it.
+ */
+ $free_content = sq_fixstyle($message, $id, $free_content);
+ }
+ if ($skip_content == false){
+ $trusted .= $free_content;
+ } else {
+ }
+ if ($tagname != FALSE){
+ if ($tagtype == 2){
+ if ($skip_content == $tagname){
+ /**
+ * Got to the end of tag we needed to remove.
+ */
+ $tagname = false;
+ $skip_content = false;
+ } else {
+ if ($skip_content == false){
+ if ($tagname == "body"){
+ $tagname = "div";
+ } else {
+ if (isset($open_tags{$tagname}) &&
+ $open_tags{$tagname} > 0){
+ $open_tags{$tagname}--;
+ } else {
+ $tagname = false;
+ }
+ }
+ } else {
+ }
+ }
+ } else {
+ /**
+ * $rm_tags_with_content
+ */
+ if ($skip_content == false){
+ /**
+ * See if this is a self-closing type and change
+ * tagtype appropriately.
+ */
+ if ($tagtype == 1
+ && in_array($tagname, $self_closing_tags)){
+ $tagtype=3;
+ }
+ /**
+ * See if we should skip this tag and any content
+ * inside it.
+ */
+ if ($tagtype == 1 &&
+ in_array($tagname, $rm_tags_with_content)){
+ $skip_content = $tagname;
+ } else {
+ if (($rm_tags == false
+ && in_array($tagname, $tag_list)) ||
+ ($rm_tags == true &&
+ !in_array($tagname, $tag_list))){
+ $tagname = false;
+ } else {
+ if ($tagtype == 1){
+ if (isset($open_tags{$tagname})){
+ $open_tags{$tagname}++;
+ } else {
+ $open_tags{$tagname}=1;
+ }
+ }
+ /**
+ * This is where we run other checks.
+ */
+ if (is_array($attary) && sizeof($attary) > 0){
+ $attary = sq_fixatts($tagname,
+ $attary,
+ $rm_attnames,
+ $bad_attvals,
+ $add_attr_to_tag,
+ $message,
+ $id
+ );
+ }
+ /**
+ * Convert body into div.
+ */
+ if ($tagname == "body"){
+ $tagname = "div";
+ $attary = sq_body2div($attary, $message, $id);
+ }
+ }
+ }
+ } else {
+ }
+ }
+ if ($tagname != false && $skip_content == false){
+ $trusted .= sq_tagprint($tagname, $attary, $tagtype);
+ }
+ } else {
+ }
+ $curpos = $gt+1;
+ }
+ $trusted .= substr($body, $curpos, strlen($body)-$curpos);
+ if ($force_tag_closing == true){
+ foreach ($open_tags as $tagname=>$opentimes){
+ while ($opentimes > 0){
+ $trusted .= '</' . $tagname . '>';
+ $opentimes--;
+ }
+ }
+ $trusted .= "\n";
+ }
+ $trusted .= "<!-- end sanitized html -->\n";
+ return $trusted;
+}
+
+/**
+ * This is a wrapper function to call html sanitizing routines.
+ *
+ * @param $body the body of the message
+ * @param $id the id of the message
+ * @return a string with html safe to display in the browser.
+ */
+function magicHTML($body, $id, $message){
+ global $attachment_common_show_images, $view_unsafe_images,
+ $has_unsafe_images, $message;
+ /**
+ * Don't display attached images in HTML mode.
+ */
+ $attachment_common_show_images = false;
+ $tag_list = Array(
+ false,
+ "object",
+ "meta",
+ "html",
+ "head",
+ "base"
+ );
+
+ $rm_tags_with_content = Array(
+ "script",
+ "applet",
+ "embed",
+ "title"
+ );
+
+ $self_closing_tags = Array(
+ "img",
+ "br",
+ "hr",
+ "input"
+ );
+
+ $force_tag_closing = false;
+
+ $rm_attnames = Array(
+ "/.*/" =>
+ Array(
+ "/target/si",
+ "/^on.*/si"
+ )
+ );
+
+ $secremoveimg = "../images/" . _("sec_remove_eng.png");
+ $bad_attvals = Array(
+ "/.*/" =>
+ Array(
+ "/^src|background|href|action/i" =>
+ Array(
+ Array(
+ "|^([\'\"])\s*\.\./.*([\'\"])|si",
+ "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si"
+ ),
+ Array(
+ "\\1$secremoveimg\\2",
+ "\\1$secremoveimg\\2"
+ )
+ ),
+ "/^style/si" =>
+ Array(
+ Array(
+ "/expression\s*:/si",
+ "|url\(([\'\"])\s*\.\./.*([\'\"])\)|si",
+ "/url\(([\'\"])\s*\S+script:.*([\'\"])\)/si"
+ ),
+ Array(
+ "idiocy:",
+ "url(\\1$secremoveimg\\2)",
+ "url(\\1$secremoveimg\\2)"
+ )
+ )
+ )
+ );
+ if (!$view_unsafe_images){
+ /**
+ * Remove any references to http/https if view_unsafe_images set
+ * to false.
+ */
+ $addendum = Array(
+ "/.*/" =>
+ Array(
+ "/^src|background/i" =>
+ Array(
+ Array(
+ "/^([\'\"])\s*https*:.*([\'\"])/si"
+ ),
+ Array(
+ "\\1$secremoveimg\\2"
+ )
+ ),
+ "/^style/si" =>
+ Array(
+ Array(
+ "/url\(([\'\"])\s*https*:.*([\'\"])\)/si"
+ ),
+ Array(
+ "url(\\1$secremoveimg\\2)"
+ )
+ )
+ )
+ );
+ $bad_attvals = array_merge($bad_attvals, $addendum);
+ }
+
+ $add_attr_to_tag = Array(
+ "/^a$/si" => Array('target'=>'"_new"')
+ );
+ $trusted = sq_sanitize($body,
+ $tag_list,
+ $rm_tags_with_content,
+ $self_closing_tags,
+ $force_tag_closing,
+ $rm_attnames,
+ $bad_attvals,
+ $add_attr_to_tag,
+ $message,
+ $id
+ );
+ if (preg_match("|$secremoveimg|si", $trusted)){
+ $has_unsafe_images = true;
+ }
+ return $trusted;
+}
+?>
\ No newline at end of file