+ }
+
+ /**
+ * Replace empty src tags with the blank image. src is only used
+ * for frames, images, and image inputs. Doing a replace should
+ * not affect them working as should be, however it will stop
+ * IE from being kicked off when src for img tags are not set
+ */
+ if (($attname == 'src') && ($attvalue == '""')) {
+ $attary{$attname} = '"' . SM_PATH . 'images/blank.png"';
+ }
+
+ /**
+ * Turn cid: urls into http-friendly ones.
+ */
+ if (preg_match("/^[\'\"]\s*cid:/si", $attvalue)){
+ $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox);
+ }
+
+ /**
+ * "Hack" fix for Outlook using propriatary outbind:// protocol in img tags.
+ * One day MS might actually make it match something useful, for now, falling
+ * back to using cid2http, so we can grab the blank.png.
+ */
+ if (preg_match("/^[\'\"]\s*outbind:\/\//si", $attvalue)) {
+ $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox);
+ }
+
+ }
+ /**
+ * See if we need to append any attributes to this tag.
+ */
+ foreach ($add_attr_to_tag as $matchtag=>$addattary){
+ if (preg_match($matchtag, $tagname)){
+ $attary = array_merge($attary, $addattary);
+ }
+ }
+ return $attary;
+}
+
+/**
+ * This function edits the style definition to make them friendly and
+ * usable in SquirrelMail.
+ *
+ * @param $message the message object
+ * @param $id the message id
+ * @param $content a string with whatever is between <style> and </style>
+ * @param $mailbox the message mailbox
+ * @return a string with edited content.
+ */
+function sq_fixstyle($body, $pos, $message, $id, $mailbox){
+ global $view_unsafe_images;
+ $me = 'sq_fixstyle';
+ $ret = sq_findnxreg($body, $pos, '</\s*style\s*>');
+ if ($ret == FALSE){
+ return array(FALSE, strlen($body));
+ }
+ $newpos = $ret[0] + strlen($ret[2]);
+ $content = $ret[1];
+ /**
+ * First look for general BODY style declaration, which would be
+ * like so:
+ * body {background: blah-blah}
+ * and change it to .bodyclass so we can just assign it to a <div>
+ */
+ $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
+ $secremoveimg = '../images/' . _("sec_remove_eng.png");
+ /**
+ * Fix url('blah') declarations.
+ */
+ // $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si",
+ // "url(\\1$secremoveimg\\2)", $content);
+ // remove NUL
+ $content = str_replace("\0", "", $content);
+ // translate ur\l and variations (IE parses that)
+ $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content);
+ // NB I insert NUL characters to keep to avoid an infinite loop. They are removed after the loop.
+ while (preg_match("/url\s*\(\s*[\'\"]?([^:]+):(.*)?[\'\"]?\s*\)/si", $content, $matches)) {
+ $sProto = strtolower($matches[1]);
+ switch ($sProto) {
+ /**
+ * Fix url('https*://.*) declarations but only if $view_unsafe_images
+ * is false.
+ */
+ case 'https':
+ case 'http':
+ if (!$view_unsafe_images){
+
+ $sExpr = "/url\s*\(\s*[\'\"]?\s*$sProto*:.*[\'\"]?\s*\)/si";
+ $content = preg_replace($sExpr, "u\0r\0l(\\1$secremoveimg\\2)", $content);
+
+ } else {
+ $content = preg_replace('/url/i',"u\0r\0l",$content);
+ }
+ break;
+ /**
+ * Fix urls that refer to cid:
+ */
+ case 'cid':
+ $cidurl = 'cid:'. $matches[2];
+ $httpurl = sq_cid2http($message, $id, $cidurl, $mailbox);
+ // escape parentheses that can modify the regular expression
+ $cidurl = str_replace(array('(',')'),array('\\(','\\)'),$cidurl);
+ $content = preg_replace("|url\s*\(\s*$cidurl\s*\)|si",
+ "u\0r\0l($httpurl)", $content);
+ break;
+ default:
+ /**
+ * replace url with protocol other then the white list
+ * http,https and cid by an empty string.
+ */
+ $content = preg_replace("/url\s*\(\s*[\'\"]?([^:]+):(.*)?[\'\"]?\s*\)/si",
+ "", $content);
+ break;
+ }
+ }
+ // remove NUL
+ $content = str_replace("\0", "", $content);
+ /**
+ * Remove any backslashes, entities, and extraneous whitespace.
+ */
+ $contentTemp = $content;
+ sq_defang($contentTemp);
+ sq_unspace($contentTemp);
+
+ /**
+ * Fix stupid css declarations which lead to vulnerabilities
+ * in IE.
+ */
+ $match = Array('/\/\*.*\*\//',
+ '/expression/i',
+ '/behaviou*r/i',
+ '/binding/i',
+ '/include-source/i');
+ $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy');
+ $contentNew = preg_replace($match, $replace, $contentTemp);
+ if ($contentNew !== $contentTemp) {
+ // insecure css declarations are used. From now on we don't care
+ // anymore if the css is destroyed by sq_deent, sq_unspace or sq_unbackslash
+ $content = $contentNew;
+ }
+ return array($content, $newpos);
+}
+
+
+/**
+ * This function converts cid: url's into the ones that can be viewed in
+ * the browser.
+ *
+ * @param $message the message object
+ * @param $id the message id
+ * @param $cidurl the cid: url.
+ * @param $mailbox the message mailbox
+ * @return a string with a http-friendly url
+ */
+function sq_cid2http($message, $id, $cidurl, $mailbox){
+ /**
+ * Get rid of quotes.
+ */
+ $quotchar = substr($cidurl, 0, 1);
+ if ($quotchar == '"' || $quotchar == "'"){
+ $cidurl = str_replace($quotchar, "", $cidurl);
+ } else {
+ $quotchar = '';
+ }
+ $cidurl = substr(trim($cidurl), 4);
+
+ $match_str = '/\{.*?\}\//';
+ $str_rep = '';
+ $cidurl = preg_replace($match_str, $str_rep, $cidurl);
+
+ $linkurl = find_ent_id($cidurl, $message);
+ /* in case of non-save cid links $httpurl should be replaced by a sort of
+ unsave link image */
+ $httpurl = '';
+
+ /**
+ * This is part of a fix for Outlook Express 6.x generating
+ * cid URLs without creating content-id headers. These images are
+ * not part of the multipart/related html mail. The html contains
+ * <img src="cid:{some_id}/image_filename.ext"> references to
+ * attached images with as goal to render them inline although
+ * the attachment disposition property is not inline.
+ */
+
+ if (empty($linkurl)) {
+ if (preg_match('/{.*}\//', $cidurl)) {
+ $cidurl = preg_replace('/{.*}\//','', $cidurl);
+ if (!empty($cidurl)) {
+ $linkurl = find_ent_id($cidurl, $message);
+ }
+ }
+ }
+
+ if (!empty($linkurl)) {
+ $httpurl = $quotchar . SM_PATH . 'src/download.php?absolute_dl=true&' .
+ "passed_id=$id&mailbox=" . urlencode($mailbox) .
+ '&ent_id=' . $linkurl . $quotchar;
+ } else {
+ /**
+ * If we couldn't generate a proper img url, drop in a blank image
+ * instead of sending back empty, otherwise it causes unusual behaviour
+ */
+ $httpurl = $quotchar . SM_PATH . 'images/blank.png' . $quotchar;
+ }
+
+ return $httpurl;
+}
+
+/**
+ * This function changes the <body> tag into a <div> tag since we
+ * can't really have a body-within-body.
+ *
+ * @param $attary an array of attributes and values of <body>
+ * @param $mailbox mailbox we're currently reading (for cid2http)
+ * @param $message current message (for cid2http)
+ * @param $id current message id (for cid2http)
+ * @return a modified array of attributes to be set for <div>
+ */
+function sq_body2div($attary, $mailbox, $message, $id){
+ $me = 'sq_body2div';
+ $divattary = Array('class' => "'bodyclass'");
+ $text = '#000000';
+ $has_bgc_stl = $has_txt_stl = false;
+ $styledef = '';
+ if (is_array($attary) && sizeof($attary) > 0){
+ foreach ($attary as $attname=>$attvalue){
+ $quotchar = substr($attvalue, 0, 1);
+ $attvalue = str_replace($quotchar, "", $attvalue);
+ switch ($attname){
+ case 'background':
+ $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox);
+ $styledef .= "background-image: url('$attvalue'); ";
+ break;
+ case 'bgcolor':
+ $has_bgc_stl = true;
+ $styledef .= "background-color: $attvalue; ";
+ break;
+ case 'text':
+ $has_txt_stl = true;
+ $styledef .= "color: $attvalue; ";
+ break;
+ }
+ }
+ // Outlook defines a white bgcolor and no text color. This can lead to
+ // white text on a white bg with certain themes.
+ if ($has_bgc_stl && !$has_txt_stl) {
+ $styledef .= "color: $text; ";
+ }
+ if (strlen($styledef) > 0){
+ $divattary{"style"} = "\"$styledef\"";
+ }
+ }
+ return $divattary;
+}
+
+/**
+ * This is the main function and the one you should actually be calling.
+ * There are several variables you should be aware of an which need
+ * special description.
+ *
+ * Since the description is quite lengthy, see it here:
+ * http://linux.duke.edu/projects/mini/htmlfilter/
+ *
+ * @param $body the string with HTML you wish to filter
+ * @param $tag_list see description above
+ * @param $rm_tags_with_content see description above
+ * @param $self_closing_tags see description above
+ * @param $force_tag_closing see description above
+ * @param $rm_attnames see description above
+ * @param $bad_attvals see description above
+ * @param $add_attr_to_tag see description above
+ * @param $message message object
+ * @param $id message id
+ * @return sanitized html safe to show on your pages.
+ */
+function sq_sanitize($body,
+ $tag_list,
+ $rm_tags_with_content,
+ $self_closing_tags,
+ $force_tag_closing,
+ $rm_attnames,
+ $bad_attvals,
+ $add_attr_to_tag,
+ $message,
+ $id,
+ $mailbox
+ ){
+ $me = 'sq_sanitize';
+ $rm_tags = array_shift($tag_list);
+ /**
+ * Normalize rm_tags and rm_tags_with_content.
+ */
+ @array_walk($tag_list, 'sq_casenormalize');
+ @array_walk($rm_tags_with_content, 'sq_casenormalize');
+ @array_walk($self_closing_tags, 'sq_casenormalize');
+ /**
+ * See if tag_list is of tags to remove or tags to allow.
+ * false means remove these tags
+ * true means allow these tags
+ */
+ $curpos = 0;
+ $open_tags = Array();
+ $trusted = "\n<!-- begin sanitized html -->\n";
+ $skip_content = false;
+ /**
+ * Take care of netscape's stupid javascript entities like
+ * &{alert('boo')};
+ */
+ $body = preg_replace("/&(\{.*?\};)/si", "&\\1", $body);
+
+ while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){
+ list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
+ $free_content = substr($body, $curpos, $lt-$curpos);
+ /**
+ * Take care of <style>
+ */
+ if ($tagname == "style" && $tagtype == 1){
+ list($free_content, $curpos) =
+ sq_fixstyle($body, $gt+1, $message, $id, $mailbox);
+ if ($free_content != FALSE){
+ $trusted .= sq_tagprint($tagname, $attary, $tagtype);
+ $trusted .= $free_content;
+ $trusted .= sq_tagprint($tagname, false, 2);