+ }
+ }
+ return $attvalue;
+}
+
+/**
+* This function runs various checks against the attributes.
+*
+* @param $tagname String with the name of the tag.
+* @param $attary Array with all tag attributes.
+* @param $rm_attnames See description for sq_sanitize
+* @param $bad_attvals See description for sq_sanitize
+* @param $add_attr_to_tag See description for sq_sanitize
+* @param $message message object
+* @param $id message id
+* @return Array with modified attributes.
+*/
+function sq_fixatts($tagname,
+ $attary,
+ $rm_attnames,
+ $bad_attvals,
+ $add_attr_to_tag,
+ $message,
+ $id,
+ $mailbox
+ ){
+ $me = 'sq_fixatts';
+ while (list($attname, $attvalue) = each($attary)){
+ /**
+ * See if this attribute should be removed.
+ */
+ foreach ($rm_attnames as $matchtag=>$matchattrs){
+ if (preg_match($matchtag, $tagname)){
+ foreach ($matchattrs as $matchattr){
+ if (preg_match($matchattr, $attname)){
+ unset($attary{$attname});
+ continue;
+ }
+ }
+ }
+ }
+ /**
+ * Remove any backslashes, entities, and extraneous whitespace.
+ */
+ $attvalue = sq_unbackslash($attvalue);
+ $attvalue = sq_deent($attvalue);
+ $attvalue = sq_unspace($attvalue);
+
+ /**
+ * Remove \r \n \t \0 " " "\\"
+ */
+ $attvalue = str_replace(Array("\r", "\n", "\t", "\0", " ", "\\"),
+ Array('', '','','','',''), $attvalue);
+
+ /**
+ * Now let's run checks on the attvalues.
+ * I don't expect anyone to comprehend this. If you do,
+ * get in touch with me so I can drive to where you live and
+ * shake your hand personally. :)
+ */
+ foreach ($bad_attvals as $matchtag=>$matchattrs){
+ if (preg_match($matchtag, $tagname)){
+ foreach ($matchattrs as $matchattr=>$valary){
+ if (preg_match($matchattr, $attname)){
+ /**
+ * There are two arrays in valary.
+ * First is matches.
+ * Second one is replacements
+ */
+ list($valmatch, $valrepl) = $valary;
+ $newvalue =
+ preg_replace($valmatch, $valrepl, $attvalue);
+ if ($newvalue != $attvalue){
+ $attary{$attname} = $newvalue;
+ }
+ }
+ }
+ }
+ }
+
+
+ /**
+ * Replace empty src tags with the blank image. src is only used
+ * for frames, images, and image inputs. Doing a replace should
+ * not affect them working as should be, however it will stop
+ * IE from being kicked off when src for img tags are not set
+ */
+ if (($attname == 'src') && ($attvalue == '""')) {
+ $attary{$attname} = '"' . SM_PATH . 'images/blank.png"';
+ }
+
+ /**
+ * Turn cid: urls into http-friendly ones.
+ */
+ if (preg_match("/^[\'\"]\s*cid:/si", $attvalue)){
+ $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox);
+ }
+
+ /**
+ * "Hack" fix for Outlook using propriatary outbind:// protocol in img tags.
+ * One day MS might actually make it match something useful, for now, falling
+ * back to using cid2http, so we can grab the blank.png.
+ */
+ if (preg_match("/^[\'\"]\s*outbind:\/\//si", $attvalue)) {
+ $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox);
+ }
+
+ }
+ /**
+ * See if we need to append any attributes to this tag.
+ */
+ foreach ($add_attr_to_tag as $matchtag=>$addattary){
+ if (preg_match($matchtag, $tagname)){
+ $attary = array_merge($attary, $addattary);
+ }
+ }
+ return $attary;
+}
+
+/**
+* This function edits the style definition to make them friendly and
+* usable in SquirrelMail.
+*
+* @param $message the message object
+* @param $id the message id
+* @param $content a string with whatever is between <style> and </style>
+* @param $mailbox the message mailbox
+* @return a string with edited content.
+*/
+function sq_fixstyle($body, $pos, $message, $id, $mailbox){
+ global $view_unsafe_images;
+ $me = 'sq_fixstyle';
+ $ret = sq_findnxreg($body, $pos, '</\s*style\s*>');
+ if ($ret == FALSE){
+ return array(FALSE, strlen($body));
+ }
+ $newpos = $ret[0] + strlen($ret[2]);
+ $content = $ret[1];
+ /**
+ * First look for general BODY style declaration, which would be
+ * like so:
+ * body {background: blah-blah}
+ * and change it to .bodyclass so we can just assign it to a <div>
+ */
+ $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
+ $secremoveimg = '../images/' . _("sec_remove_eng.png");
+ /**
+ * Fix url('blah') declarations.
+ */
+ $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si",
+ "url(\\1$secremoveimg\\2)", $content);
+ /**
+ * Fix url('https*://.*) declarations but only if $view_unsafe_images
+ * is false.
+ */
+ if (!$view_unsafe_images){
+ $content = preg_replace("|url\s*\(\s*([\'\"])\s*https*:.*?([\'\"])\s*\)|si",
+ "url(\\1$secremoveimg\\2)", $content);
+ }
+
+ /**
+ * Fix urls that refer to cid:
+ */
+ while (preg_match("|url\s*\(\s*([\'\"]\s*cid:.*?[\'\"])\s*\)|si",
+ $content, $matches)){
+ $cidurl = $matches{1};
+ $httpurl = sq_cid2http($message, $id, $cidurl, $mailbox);
+ $content = preg_replace("|url\s*\(\s*$cidurl\s*\)|si",
+ "url($httpurl)", $content);
+ }
+
+ /**
+ * Fix stupid css declarations which lead to vulnerabilities
+ * in IE.
+ */
+ $match = Array('/expression/i',
+ '/behaviou*r/i',
+ '/binding/i',
+ '/include-source/i');
+ $replace = Array('idiocy', 'idiocy', 'idiocy', 'idiocy');
+ $content = preg_replace($match, $replace, $content);
+ return array($content, $newpos);
+}
+
+/**
+* This function converts cid: url's into the ones that can be viewed in
+* the browser.
+*
+* @param $message the message object
+* @param $id the message id
+* @param $cidurl the cid: url.
+* @param $mailbox the message mailbox
+* @return a string with a http-friendly url
+*/
+function sq_cid2http($message, $id, $cidurl, $mailbox){
+ /**
+ * Get rid of quotes.
+ */
+ $quotchar = substr($cidurl, 0, 1);
+ if ($quotchar == '"' || $quotchar == "'"){
+ $cidurl = str_replace($quotchar, "", $cidurl);
+ } else {
+ $quotchar = '';
+ }
+ $cidurl = substr(trim($cidurl), 4);
+ $linkurl = find_ent_id($cidurl, $message);
+ /* in case of non-save cid links $httpurl should be replaced by a sort of
+ unsave link image */
+ $httpurl = '';
+
+ /**
+ * This is part of a fix for Outlook Express 6.x generating
+ * cid URLs without creating content-id headers. These images are
+ * not part of the multipart/related html mail. The html contains
+ * <img src="cid:{some_id}/image_filename.ext"> references to
+ * attached images with as goal to render them inline although
+ * the attachment disposition property is not inline.
+ **/
+
+ if (empty($linkurl)) {
+ if (preg_match('/{.*}\//', $cidurl)) {
+ $cidurl = preg_replace('/{.*}\//','', $cidurl);
+ if (!empty($cidurl)) {
+ $linkurl = find_ent_id($cidurl, $message);
+ }
+ }
+ }
+
+ if (!empty($linkurl)) {
+ $httpurl = $quotchar . SM_PATH . 'src/download.php?absolute_dl=true&' .
+ "passed_id=$id&mailbox=" . urlencode($mailbox) .
+ '&ent_id=' . $linkurl . $quotchar;
+ } else {
+ /**
+ * If we couldn't generate a proper img url, drop in a blank image
+ * instead of sending back empty, otherwise it causes unusual behaviour
+ */
+ $httpurl = $quotchar . SM_PATH . 'images/blank.png';
+ }
+
+ return $httpurl;
+}
+
+/**
+* This function changes the <body> tag into a <div> tag since we
+* can't really have a body-within-body.
+*
+* @param $attary an array of attributes and values of <body>
+* @param $mailbox mailbox we're currently reading (for cid2http)
+* @param $message current message (for cid2http)
+* @param $id current message id (for cid2http)
+* @return a modified array of attributes to be set for <div>
+*/
+function sq_body2div($attary, $mailbox, $message, $id){
+ $me = 'sq_body2div';
+ $divattary = Array('class' => "'bodyclass'");
+ $text = '#000000';
+ $has_bgc_stl = $has_txt_stl = false;
+ $styledef = '';
+ if (is_array($attary) && sizeof($attary) > 0){
+ foreach ($attary as $attname=>$attvalue){
+ $quotchar = substr($attvalue, 0, 1);
+ $attvalue = str_replace($quotchar, "", $attvalue);
+ switch ($attname){
+ case 'background':
+ $attvalue = sq_cid2http($message, $id,
+ $attvalue, $mailbox);
+ $styledef .= "background-image: url('$attvalue'); ";
+ break;
+ case 'bgcolor':
+ $has_bgc_stl = true;
+ $styledef .= "background-color: $attvalue; ";
+ break;
+ case 'text':
+ $has_txt_stl = true;
+ $styledef .= "color: $attvalue; ";
+ break;
+ }
+ }
+ // Outlook defines a white bgcolor and no text color. This can lead to
+ // white text on a white bg with certain themes.
+ if ($has_bgc_stl && !$has_txt_stl) {
+ $styledef .= "color: $text; ";
+ }
+ if (strlen($styledef) > 0){
+ $divattary{"style"} = "\"$styledef\"";
+ }
+ }
+ return $divattary;
+}
+
+/**
+* This is the main function and the one you should actually be calling.
+* There are several variables you should be aware of an which need
+* special description.
+*
+* Since the description is quite lengthy, see it here:
+* http://linux.duke.edu/projects/mini/htmlfilter/
+*
+* @param $body the string with HTML you wish to filter
+* @param $tag_list see description above
+* @param $rm_tags_with_content see description above
+* @param $self_closing_tags see description above
+* @param $force_tag_closing see description above
+* @param $rm_attnames see description above
+* @param $bad_attvals see description above
+* @param $add_attr_to_tag see description above
+* @param $message message object
+* @param $id message id
+* @return sanitized html safe to show on your pages.
+*/
+function sq_sanitize($body,
+ $tag_list,
+ $rm_tags_with_content,
+ $self_closing_tags,
+ $force_tag_closing,
+ $rm_attnames,
+ $bad_attvals,
+ $add_attr_to_tag,
+ $message,
+ $id,
+ $mailbox
+ ){
+ $me = 'sq_sanitize';
+ $rm_tags = array_shift($tag_list);
+ /**
+ * Normalize rm_tags and rm_tags_with_content.
+ */
+ @array_walk($tag_list, 'sq_casenormalize');
+ @array_walk($rm_tags_with_content, 'sq_casenormalize');
+ @array_walk($self_closing_tags, 'sq_casenormalize');
+ /**
+ * See if tag_list is of tags to remove or tags to allow.
+ * false means remove these tags
+ * true means allow these tags
+ */
+ $curpos = 0;
+ $open_tags = Array();
+ $trusted = "\n<!-- begin sanitized html -->\n";
+ $skip_content = false;
+ /**
+ * Take care of netscape's stupid javascript entities like
+ * &{alert('boo')};
+ */
+ $body = preg_replace("/&(\{.*?\};)/si", "&\\1", $body);
+
+ while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){
+ list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
+ $free_content = substr($body, $curpos, $lt-$curpos);
+ /**
+ * Take care of <style>
+ */
+ if ($tagname == "style" && $tagtype == 1){
+ list($free_content, $curpos) =
+ sq_fixstyle($body, $gt+1, $message, $id, $mailbox);
+ if ($free_content != FALSE){
+ $trusted .= sq_tagprint($tagname, $attary, $tagtype);
+ $trusted .= $free_content;
+ $trusted .= sq_tagprint($tagname, false, 2);
+ }
+ continue;
+ }
+ if ($skip_content == false){
+ $trusted .= $free_content;
+ }
+ if ($tagname != FALSE){
+ if ($tagtype == 2){
+ if ($skip_content == $tagname){
+ /**
+ * Got to the end of tag we needed to remove.
+ */
+ $tagname = false;
+ $skip_content = false;
+ } else {
+ if ($skip_content == false){
+ if ($tagname == "body"){
+ $tagname = "div";
+ }
+ if (isset($open_tags{$tagname}) &&
+ $open_tags{$tagname} > 0){
+ $open_tags{$tagname}--;
+ } else {
+ $tagname = false;
+ }
+ }
+ }