url_parser translateText => url_parser */ /* -------------------------------------------------------------------------- */ /* MIME DECODING */ /* -------------------------------------------------------------------------- */ /** * Get the MIME structure * * This function gets the structure of a message and stores it in the "message" class. * It will return this object for use with all relevant header information and * fully parsed into the standard "message" object format. */ function mime_structure ($bodystructure, $flags=array()) { /* Isolate the body structure and remove beginning and end parenthesis. */ $read = trim(substr ($bodystructure, strpos(strtolower($bodystructure), 'bodystructure') + 13)); $read = trim(substr ($read, 0, -1)); $i = 0; $msg = Message::parseStructure($read,$i); if (!is_object($msg)) { global $color, $mailbox; displayPageHeader( $color, $mailbox ); $errormessage = _("SquirrelMail could not decode the bodystructure of the message"); $errormessage .= '
'._("The bodystructure provided by your IMAP server:").'

'; $errormessage .= '
' . sm_encode_html_special_chars($read) . '
'; plain_error_message( $errormessage ); echo ''; exit; } if (count($flags)) { foreach ($flags as $flag) { //FIXME: please document why it is we have to check the first char of the flag but we then go ahead and do a full string comparison anyway. Is this a speed enhancement? If not, let's keep it simple and just compare the full string and forget the switch block. $char = strtoupper($flag[1]); switch ($char) { case 'S': if (strtolower($flag) == '\\seen') { $msg->is_seen = true; } break; case 'A': if (strtolower($flag) == '\\answered') { $msg->is_answered = true; } break; case 'D': if (strtolower($flag) == '\\deleted') { $msg->is_deleted = true; } break; case 'F': if (strtolower($flag) == '\\flagged') { $msg->is_flagged = true; } else if (strtolower($flag) == '$forwarded') { $msg->is_forwarded = true; } break; case 'M': if (strtolower($flag) == '$mdnsent') { $msg->is_mdnsent = true; } break; default: break; } } } // listEntities($msg); return $msg; } /* This starts the parsing of a particular structure. It is called recursively, * so it can be passed different structures. It returns an object of type * $message. * First, it checks to see if it is a multipart message. If it is, then it * handles that as it sees is necessary. If it is just a regular entity, * then it parses it and adds the necessary header information (by calling out * to mime_get_elements() */ function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) { /* Do a bit of error correction. If we couldn't find the entity id, just guess * that it is the first one. That is usually the case anyway. */ if (!$ent_id) { $cmd = "FETCH $id BODY[]"; } else { $cmd = "FETCH $id BODY[$ent_id]"; } if ($fetch_size!=0) $cmd .= "<0.$fetch_size>"; $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, TRUE); do { $topline = trim(array_shift($data)); } while($topline && ($topline[0] == '*') && !preg_match('/\* [0-9]+ FETCH .*BODY.*/i', $topline)) ; // Matching with "BODY" above is difficult: in most cases "FETCH \(BODY" would work // but some servers may put other things in the same result, perhaps something such // as "* 23 FETCH (FLAGS (\Seen) BODY[1] {174}". There is some small chance that // if the character sequence "BODY" appears in a response where it isn't actually // a FETCH response data item name, the current regex will break things. The better // way to do this would be to parse the response correctly and not use a regex. $wholemessage = implode('', $data); if (preg_match('/\{([^\}]*)\}/', $topline, $regs)) { $ret = substr($wholemessage, 0, $regs[1]); /* There is some information in the content info header that could be important * in order to parse html messages. Let's get them here. */ // if ($ret[0] == '<') { // $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, TRUE); // } } else if (preg_match('/"([^"]*)"/', $topline, $regs)) { $ret = $regs[1]; } else if ((stristr($topline, 'nil') !== false) && (empty($wholemessage))) { $ret = $wholemessage; } else { global $where, $what, $mailbox, $passed_id, $startMessage; $par = 'mailbox=' . urlencode($mailbox) . '&passed_id=' . $passed_id; if (isset($where) && isset($what)) { $par .= '&where=' . urlencode($where) . '&what=' . urlencode($what); } else { $par .= '&startMessage=' . $startMessage . '&show_more=0'; } $par .= '&response=' . urlencode($response) . '&message=' . urlencode($message) . '&topline=' . urlencode($topline); echo '
' . '' . '' . '" . '" . '" . '" . "
' . _("Body retrieval error. The reason for this is most probably that the message is malformed.") . '
' . _("Command:") . "$cmd
' . _("Response:") . "$response
' . _("Message:") . "$message
' . _("FETCH line:") . "$topline


"; $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message, TRUE); array_shift($data); $wholemessage = implode('', $data); $ret = $wholemessage; } return $ret; } // TODO: Needs documentation. $ent_id default is usually 1 function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding, $rStream='php://stdout', $force_crlf='') { /* Don't kill the connection if the browser is over a dialup * and it would take over 30 seconds to download it. * Don't call set_time_limit in safe mode. */ if (!ini_get('safe_mode')) { set_time_limit(0); } /* in case of base64 encoded attachments, do not buffer them. Instead, echo the decoded attachment directly to screen */ if (strtolower($encoding) == 'base64') { if (!$ent_id) { $query = "FETCH $id BODY[]"; } else { $query = "FETCH $id BODY[$ent_id]"; } sqimap_run_command($imap_stream,$query,true,$response,$message,TRUE,'sqimap_base64_decode',$rStream,true); } else { $body = mime_fetch_body ($imap_stream, $id, $ent_id); if (is_resource($rStream)) { fputs($rStream,decodeBody($body, $encoding, $force_crlf)); } else { echo decodeBody($body, $encoding, $force_crlf); } } /* TODO, use the same method for quoted printable. However, I assume that quoted printable attachments aren't that large so the performancegain / memory usage drop will be minimal. If we decide to add that then we need to adapt sqimap_fread because we need to split te result on \n and fread doesn't stop at \n. That means we also should provide $results from sqimap_fread (by ref) to te function and set $no_return to false. The $filter function for quoted printable should handle unsetting of $results. */ /* TODO 2: find out how we write to the output stream php://stdout. fwrite doesn't work because 'php://stdout isn't a stream. */ return; } /* -[ END MIME DECODING ]----------------------------------------------------------- */ /* This is here for debugging purposes. It will print out a list * of all the entity IDs that are in the $message object. */ function listEntities ($message) { if ($message) { echo "" . $message->entity_id . ' : ' . $message->type0 . '/' . $message->type1 . ' parent = '. $message->parent->entity_id. '
'; for ($i = 0; isset($message->entities[$i]); $i++) { echo "$i : "; $msg = listEntities($message->entities[$i]); if ($msg) { echo "return: "; return $msg; } } } } function getPriorityStr($priority) { $priority_level = substr($priority,0,1); switch($priority_level) { /* Check for a higher then normal priority. */ case '1': case '2': $priority_string = _("High"); break; /* Check for a lower then normal priority. */ case '4': case '5': $priority_string = _("Low"); break; /* Check for a normal priority. */ case '3': default: $priority_level = '3'; $priority_string = _("Normal"); break; } return $priority_string; } /* returns a $message object for a particular entity id */ function getEntity ($message, $ent_id) { return $message->getEntity($ent_id); } /* translateText * Extracted from strings.php 23/03/2002 */ function translateText(&$body, $wrap_at, $charset) { global $where, $what; /* from searching */ global $color; /* color theme */ // require_once(SM_PATH . 'functions/url_parser.php'); $body_ary = explode("\n", $body); for ($i=0; $i < count($body_ary); $i++) { $line = rtrim($body_ary[$i],"\r"); if (strlen($line) - 2 >= $wrap_at) { sqWordWrap($line, $wrap_at, $charset); } $line = charset_decode($charset, $line); $line = str_replace("\t", ' ', $line); parseUrl ($line); $quotes = 0; $pos = 0; $j = strlen($line); while ($pos < $j) { if ($line[$pos] == ' ') { $pos++; } else if (strpos($line, '>', $pos) === $pos) { $pos += 4; $quotes++; } else { break; } } if ($quotes % 2) { $line = '' . $line . ''; } elseif ($quotes) { $line = '' . $line . ''; } $body_ary[$i] = $line; } $body = '
' . implode("\n", $body_ary) . '
'; } /** * This returns a parsed string called $body. That string can then * be displayed as the actual message in the HTML. It contains * everything needed, including HTML Tags, Attachments at the * bottom, etc. * * Since 1.2.0 function uses message_body hook. * Till 1.3.0 function included output of formatAttachments(). * * @param resource $imap_stream imap connection resource * @param object $message squirrelmail message object * @param array $color squirrelmail color theme array * @param integer $wrap_at number of characters per line * @param string $ent_num (since 1.3.0) message part id * @param integer $id (since 1.3.0) message id * @param string $mailbox (since 1.3.0) imap folder name * @return string html formated message text */ function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $mailbox='INBOX') { /* This if statement checks for the entity to show as the * primary message. To add more of them, just put them in the * order that is their priority. */ global $startMessage, $languages, $squirrelmail_language, $show_html_default, $sort, $has_unsafe_images, $passed_ent_id, $use_iframe, $iframe_height, $download_and_unsafe_link, $download_href, $unsafe_image_toggle_href, $unsafe_image_toggle_text, $oTemplate, $nbsp; // workaround for not updated config.php if (! isset($use_iframe)) $use_iframe = false; // If there's no "view_unsafe_images" variable in the URL, turn unsafe // images off by default. sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET, FALSE); $body = ''; $urlmailbox = urlencode($mailbox); $body_message = getEntity($message, $ent_num); if (($body_message->header->type0 == 'text') || ($body_message->header->type0 == 'rfc822')) { $body = mime_fetch_body ($imap_stream, $id, $ent_num); $body = decodeBody($body, $body_message->header->encoding); if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode')) { if (mb_detect_encoding($body) != 'ASCII') { $body = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode',$body); } } /* As of 1.5.2, $body is passed (and modified) by reference */ do_hook('message_body', $body); /* If there are other types that shouldn't be formatted, add * them here. */ if ($body_message->header->type1 == 'html') { // Do we need to make an HTML part viewable as non-HTML plain text? if ($show_html_default != 1) { $entity_conv = array(' ' => ' ', // These are better done by regex (below) // '

' => "\n", // '

' => "\n", // '
' => "\n", // '
' => "\n", // '
' => "\n", // '
' => "\n", // '' => "\n", // '

' => "\n", '>' => '>', '<' => '<', '&' => '&', '©' => '©'); // first, completely remove * @param $mailbox the message mailbox * @return a string with edited content. */ function sq_fixstyle($body, $pos, $message, $id, $mailbox){ $me = 'sq_fixstyle'; // workaround for in between comments $iCurrentPos = $pos; $content = ''; $sToken = ''; $bSucces = false; $bEndTag = false; for ($i=$pos,$iCount=strlen($body);$i<$iCount;++$i) { $char = $body[$i]; switch ($char) { case '<': $sToken = $char; break; case '/': if ($sToken == '<') { $sToken .= $char; $bEndTag = true; } else { $content .= $char; } break; case '>': if ($bEndTag) { $sToken .= $char; if (preg_match('/\<\/\s*style\s*\>/i',$sToken,$aMatch)) { $newpos = $i + 1; $bSucces = true; break 2; } else { $content .= $sToken; } $bEndTag = false; } else { $content .= $char; } break; case '!': if ($sToken == '<') { // possible comment if (isset($body[$i+2]) && substr($body,$i,3) == '!--') { $i = strpos($body,'-->',$i+3); if ($i === false) { // no end comment $i = strlen($body); } $sToken = ''; } } else { $content .= $char; } break; default: if ($bEndTag) { $sToken .= $char; } else { $content .= $char; } break; } } if ($bSucces == FALSE){ return array(FALSE, strlen($body)); } /** * First look for general BODY style declaration, which would be * like so: * body {background: blah-blah} * and change it to .bodyclass so we can just assign it to a
*/ // $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content); // Nah, this is even better - try to preface all CSS selectors with // our
class ID "bodyclass" then correct generic "body" selectors // TODO: this works pretty good but breaks stuff like this: // @media print { body { font-size: 10pt; } } // but there isn't an easy way to make this regex skip @media // definitions... though lots of the ones in the wild will be // correctly handled because they tend to end with a parenthesis, like: // @media screen and (max-width:480px) { ... $content = preg_replace('/([a-z0-9._-][a-z0-9 >+~|:._-]*\s*(?:,|{.*?}))/si', '.bodyclass $1', $content); $content = str_replace('.bodyclass body', '.bodyclass', $content); global $use_transparent_security_image; if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png'; else $secremoveimg = '../images/' . _("sec_remove_eng.png"); /** * Fix url('blah') declarations. */ // $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si", // "url(\\1$secremoveimg\\2)", $content); // first check for 8bit sequences and disallowed control characters if (preg_match('/[\16-\37\200-\377]+/',$content)) { $content = ''; return array($content, $newpos); } // IE Sucks hard. We have a special function for it. sq_fixIE_idiocy($content); // remove @import line $content = preg_replace("/^\s*(@import.*)$/mi","\n\n",$content); // translate ur\l and variations (IE parses that) // TODO check if the sq_fixIE_idiocy function already handles this. $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content); preg_match_all("/url\s*\((.+)\)/si",$content,$aMatch); if (count($aMatch)) { $aValue = $aReplace = array(); foreach($aMatch[1] as $sMatch) { // url value $urlvalue = $sMatch; sq_fix_url('style',$urlvalue, $message, $id, $mailbox,"'"); $aValue[] = $sMatch; $aReplace[] = $urlvalue; } $content = str_replace($aValue,$aReplace,$content); } /** * Remove any backslashes, entities, and extraneous whitespace. */ $contentTemp = $content; sq_defang($contentTemp); sq_unspace($contentTemp); /** * Fix stupid css declarations which lead to vulnerabilities * in IE. * * Also remove "position" attribute, as it can easily be set * to "fixed" or "absolute" with "left" and "top" attributes * of zero, taking over the whole content frame. It can also * be set to relative and move itself anywhere it wants to, * displaying content in areas it shouldn't be allowed to touch. */ $match = Array('/\/\*.*\*\//', // removes /* blah blah */ '/expression/i', '/behaviou*r/i', '/binding/i', '/include-source/i', '/javascript/i', '/script/i', '/position/i'); $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', ''); $contentNew = preg_replace($match, $replace, $contentTemp); if ($contentNew !== $contentTemp) { // insecure css declarations are used. From now on we don't care // anymore if the css is destroyed by sq_deent, sq_unspace or sq_unbackslash $content = $contentNew; } return array($content, $newpos); } /** * This function converts cid: url's into the ones that can be viewed in * the browser. * * @param $message the message object * @param $id the message id * @param $cidurl the cid: url. * @param $mailbox the message mailbox * @return a string with a http-friendly url */ function sq_cid2http($message, $id, $cidurl, $mailbox){ /** * Get rid of quotes. */ $quotchar = substr($cidurl, 0, 1); if ($quotchar == '"' || $quotchar == "'"){ $cidurl = str_replace($quotchar, "", $cidurl); } else { $quotchar = ''; } $cidurl = substr(trim($cidurl), 4); $match_str = '/\{.*?\}\//'; $str_rep = ''; $cidurl = preg_replace($match_str, $str_rep, $cidurl); $linkurl = find_ent_id($cidurl, $message); /* in case of non-safe cid links $httpurl should be replaced by a sort of unsafe link image */ $httpurl = ''; /** * This is part of a fix for Outlook Express 6.x generating * cid URLs without creating content-id headers. These images are * not part of the multipart/related html mail. The html contains * references to * attached images with as goal to render them inline although * the attachment disposition property is not inline. */ if (empty($linkurl)) { if (preg_match('/{.*}\//', $cidurl)) { $cidurl = preg_replace('/{.*}\//','', $cidurl); if (!empty($cidurl)) { $linkurl = find_ent_id($cidurl, $message); } } } if (!empty($linkurl)) { $httpurl = $quotchar . sqm_baseuri() . 'src/download.php?absolute_dl=true&' . "passed_id=$id&mailbox=" . urlencode($mailbox) . '&ent_id=' . $linkurl . $quotchar; } else { /** * If we couldn't generate a proper img url, drop in a blank image * instead of sending back empty, otherwise it causes unusual behaviour */ $httpurl = $quotchar . SM_PATH . 'images/blank.png' . $quotchar; } return $httpurl; } /** * This function changes the tag into a
tag since we * can't really have a body-within-body. * * @param $attary an array of attributes and values of * @param $mailbox mailbox we're currently reading (for cid2http) * @param $message current message (for cid2http) * @param $id current message id (for cid2http) * @return a modified array of attributes to be set for
*/ function sq_body2div($attary, $mailbox, $message, $id){ $me = 'sq_body2div'; $divattary = Array('class' => "'bodyclass'"); $text = '#000000'; $has_bgc_stl = $has_txt_stl = false; $styledef = ''; if (is_array($attary) && sizeof($attary) > 0){ foreach ($attary as $attname=>$attvalue){ $quotchar = substr($attvalue, 0, 1); $attvalue = str_replace($quotchar, "", $attvalue); switch ($attname){ case 'background': $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox); $styledef .= "background-image: url('$attvalue'); "; break; case 'bgcolor': $has_bgc_stl = true; $styledef .= "background-color: $attvalue; "; break; case 'text': $has_txt_stl = true; $styledef .= "color: $attvalue; "; break; } } // Outlook defines a white bgcolor and no text color. This can lead to // white text on a white bg with certain themes. if ($has_bgc_stl && !$has_txt_stl) { $styledef .= "color: $text; "; } if (strlen($styledef) > 0){ $divattary["style"] = "\"$styledef\""; } } return $divattary; } /** * This is the main function and the one you should actually be calling. * There are several variables you should be aware of an which need * special description. * * Since the description is quite lengthy, see it here: * http://linux.duke.edu/projects/mini/htmlfilter/ * * @param $body the string with HTML you wish to filter * @param $tag_list see description above * @param $rm_tags_with_content see description above * @param $self_closing_tags see description above * @param $force_tag_closing see description above * @param $rm_attnames see description above * @param $bad_attvals see description above * @param $add_attr_to_tag see description above * @param $message message object * @param $id message id * @param $recursively_called boolean flag for recursive calls into this function (optional; default FALSE) * @return sanitized html safe to show on your pages. */ function sq_sanitize($body, $tag_list, $rm_tags_with_content, $self_closing_tags, $force_tag_closing, $rm_attnames, $bad_attvals, $add_attr_to_tag, $message, $id, $mailbox, $recursively_called=FALSE ){ $me = 'sq_sanitize'; /** * See if tag_list is of tags to remove or tags to allow. * false means remove these tags * true means allow these tags */ $orig_tag_list = $tag_list; $rm_tags = array_shift($tag_list); /** * Normalize rm_tags and rm_tags_with_content. */ @array_walk($tag_list, 'sq_casenormalize'); @array_walk($rm_tags_with_content, 'sq_casenormalize'); @array_walk($self_closing_tags, 'sq_casenormalize'); $curpos = 0; $open_tags = Array(); $trusted = "\n\n"; $skip_content = false; /** * Take care of netscape's stupid javascript entities like * &{alert('boo')}; */ $body = preg_replace("/&(\{.*?\};)/si", "&\\1", $body); while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){ list($tagname, $attary, $tagtype, $lt, $gt) = $curtag; /** * RCDATA and RAWTEXT tags are handled differently: * next instance of closing tag is used, whether or not * the HTML is well formed before that */ global $rcdata_rawtext_tags; if (!$recursively_called && in_array($tagname, $rcdata_rawtext_tags) && $tagtype === 1){ $closing_tag = false; $closing_tag_offset = $curpos; // seek out the closing tag for the current RCDATA/RAWTEXT tag while (1) { // first we need to move forward to next available closing tag // (intentionally leave off the closing > and let sq_getnxtag() validate a proper tag syntax) $next_tag = sq_findnxreg($body, $closing_tag_offset, " */ if ($tagname == "style" && $tagtype == 1){ list($free_content, $curpos) = sq_fixstyle($body, $gt+1, $message, $id, $mailbox); if ($free_content != FALSE){ if ( !empty($attary) ) { $attary = sq_fixatts($tagname, $attary, $rm_attnames, $bad_attvals, $add_attr_to_tag, $message, $id, $mailbox ); } $trusted .= sq_tagprint($tagname, $attary, $tagtype); $trusted .= $free_content; $trusted .= sq_tagprint($tagname, false, 2); } continue; } if ($skip_content == false){ $trusted .= $free_content; } if ($tagname != FALSE){ if ($tagtype == 2){ if ($skip_content == $tagname){ /** * Got to the end of tag we needed to remove. */ $tagname = false; $skip_content = false; } else { if ($skip_content == false){ if ($tagname == "body"){ $tagname = "div"; } if (isset($open_tags[$tagname]) && $open_tags[$tagname] > 0){ $open_tags[$tagname]--; } else { $tagname = false; } } } } else { /** * $rm_tags_with_content */ if ($skip_content == false){ /** * See if this is a self-closing type and change * tagtype appropriately. */ if ($tagtype == 1 && in_array($tagname, $self_closing_tags)){ $tagtype = 3; } /** * See if we should skip this tag and any content * inside it. */ if ($tagtype == 1 && in_array($tagname, $rm_tags_with_content)){ $skip_content = $tagname; } else { if (($rm_tags == false && in_array($tagname, $tag_list)) || ($rm_tags == true && !in_array($tagname, $tag_list))){ $tagname = false; } else { /** * Convert body into div. */ if ($tagname == "body"){ $tagname = "div"; $attary = sq_body2div($attary, $mailbox, $message, $id); } if ($tagtype == 1){ if (isset($open_tags[$tagname])){ $open_tags[$tagname]++; } else { $open_tags[$tagname]=1; } } /** * This is where we run other checks. */ if (is_array($attary) && sizeof($attary) > 0){ $attary = sq_fixatts($tagname, $attary, $rm_attnames, $bad_attvals, $add_attr_to_tag, $message, $id, $mailbox ); } } } } } if ($tagname != false && $skip_content == false){ $trusted .= sq_tagprint($tagname, $attary, $tagtype); } } $curpos = $gt+1; } $trusted .= substr($body, $curpos, strlen($body)-$curpos); if ($force_tag_closing == true){ foreach ($open_tags as $tagname=>$opentimes){ while ($opentimes > 0){ $trusted .= ''; $opentimes--; } } $trusted .= "\n"; } $trusted .= "\n"; return $trusted; } /** * This is a wrapper function to call html sanitizing routines. * * @param $body the body of the message * @param $id the id of the message * @param $message * @param $mailbox * @param boolean $take_mailto_links When TRUE, converts mailto: links * into internal SM compose links * (optional; default = TRUE) * @return a string with html safe to display in the browser. */ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links =true) { // require_once(SM_PATH . 'functions/url_parser.php'); // for $MailTo_PReg_Match global $attachment_common_show_images, $view_unsafe_images, $has_unsafe_images, $allow_svg_display, $rcdata_rawtext_tags, $remove_rcdata_rawtext_tags_and_content; $rcdata_rawtext_tags = array( "noscript", "noframes", "noembed", "textarea", // also "title", "xmp", "script", "iframe", "plaintext" which we already remove below ); /** * Don't display attached images in HTML mode. * * SB: why? */ $attachment_common_show_images = false; $tag_list = Array( false, // remove these tags "meta", "html", "head", "base", "link", "frame", "iframe", "plaintext", "marquee", ); $rm_tags_with_content = Array( "script", "object", "applet", "embed", "title", "frameset", "xmp", "xml", ); if (!$allow_svg_display) $rm_tags_with_content[] = 'svg'; /** * SquirrelMail will parse RCDATA and RAWTEXT tags and handle them as the special * case that they are, but if you prefer to remove them and their contents entirely * (in most cases, should be a safe thing with minimal impact), you can add the * following to config/config_local.php * $remove_rcdata_rawtext_tags_and_content = TRUE; */ if ($remove_rcdata_rawtext_tags_and_content) $rm_tags_with_content = array_merge($rm_tags_with_content, $rcdata_rawtext_tags); $self_closing_tags = Array( "img", "br", "hr", "input", "outbind", ); $force_tag_closing = true; $rm_attnames = Array( "/.*/" => Array( "/target/i", "/^on.*/i", "/^dynsrc/i", "/^data.*/i", "/^lowsrc.*/i", ) ); global $use_transparent_security_image; if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png'; else $secremoveimg = '../images/' . _("sec_remove_eng.png"); $bad_attvals = Array( "/.*/" => Array( "/^src|background/i" => Array( Array( "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si", "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si", "/^([\'\"])\s*about\s*:.*([\'\"])/si" ), Array( "\\1$secremoveimg\\2", "\\1$secremoveimg\\2", "\\1$secremoveimg\\2", ) ), "/^href|action/i" => Array( Array( "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si", "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si", "/^([\'\"])\s*about\s*:.*([\'\"])/si" ), Array( "\\1#\\1", "\\1#\\1", "\\1#\\1" ) ), "/^style/i" => Array( Array( "/\/\*.*\*\//", "/expression/i", "/binding/i", "/behaviou*r/i", "/include-source/i", // position:relative can also be exploited // to put content outside of email body area // and position:fixed is similarly exploitable // as position:absolute, so we'll remove it // altogether.... // // Does this screw up legitimate HTML messages? // If so, the only fix I see is to allow position // attributes (any values? I think we still have // to block static and fixed) only if $use_iframe // is enabled (1.5.0+) // // was: "/position\s*:\s*absolute/i", // "/position\s*:/i", "/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", "/url\s*\(\s*([\'\"])\s*\S+script\s*:.*([\'\"])\s*\)/si", "/url\s*\(\s*([\'\"])\s*mocha\s*:.*([\'\"])\s*\)/si", "/url\s*\(\s*([\'\"])\s*about\s*:.*([\'\"])\s*\)/si", "/(.*)\s*:\s*url\s*\(\s*([\'\"]*)\s*\S+script\s*:.*([\'\"]*)\s*\)/si", ), Array( "", "idiocy", "idiocy", "idiocy", "idiocy", "idiocy", "url", "url(\\1#\\1)", "url(\\1#\\1)", "url(\\1#\\1)", "\\1:url(\\2#\\3)" ) ) ) ); // If there's no "view_unsafe_images" variable in the URL, turn unsafe // images off by default. sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET, FALSE); if (!$view_unsafe_images){ /** * Remove any references to http/https if view_unsafe_images set * to false. */ array_push($bad_attvals['/.*/']['/^src|background/i'][0], '/^([\'\"])\s*https*:.*([\'\"])/si'); array_push($bad_attvals['/.*/']['/^src|background/i'][1], "\\1$secremoveimg\\1"); array_push($bad_attvals['/.*/']['/^style/i'][0], '/url\([\'\"]?https?:[^\)]*[\'\"]?\)/si'); array_push($bad_attvals['/.*/']['/^style/i'][1], "url(\\1$secremoveimg\\1)"); } $add_attr_to_tag = Array( "/^a$/i" => Array('target'=>'"_blank"', 'title'=>'"'._("This external link will open in a new window").'"' ) ); $trusted = sq_sanitize($body, $tag_list, $rm_tags_with_content, $self_closing_tags, $force_tag_closing, $rm_attnames, $bad_attvals, $add_attr_to_tag, $message, $id, $mailbox ); if (strpos($trusted,$secremoveimg)){ $has_unsafe_images = true; } // we want to parse mailto's in HTML output, change to SM compose links // this is a modified version of code from url_parser.php... but Marc is // right: we need a better filtering implementation; adding this randomly // here is not a great solution // if ($take_mailto_links) { // parseUrl($trusted); // this even parses URLs inside of tags... too aggressive global $MailTo_PReg_Match; // some mailers (Microsoft, surprise surprise) produce mailto strings without being // inside an anchor (link) tag, so we have to make sure the regex looks for the // quote before mailto, and we'll also try to convert the non-links back into links $MailTo_PReg_Match = '/([\'"])?mailto:' . substr($MailTo_PReg_Match, 1) ; if ((preg_match_all($MailTo_PReg_Match, $trusted, $regs)) && ($regs[0][0] != '')) { foreach ($regs[0] as $i => $mailto_before) { $mailto_params = $regs[11][$i]; // get rid of any leading quote we may have captured but don't care about // $mailto_before = ltrim($mailto_before, '"\''); // get rid of any tailing quote since we have to add send_to to the end // $mailto_before = rtrim($mailto_before, '"\''); $mailto_params = rtrim($mailto_params, '"\''); if ($regs[2][$i]) { //if there is an email addr before '?', we need to merge it with the params $to = 'to=' . $regs[2][$i]; if (strpos($mailto_params, 'to=') > -1) //already a 'to=' $mailto_params = str_replace('to=', $to . '%2C%20', $mailto_params); else { if ($mailto_params) //already some params, append to them $mailto_params .= '&' . $to; else $mailto_params .= '?' . $to; } } $url_str = preg_replace(array('/to=/i', '/(?= 6 && strstr($HTTP_USER_AGENT, 'Opera') === false) { $isIE6plus = true; } if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_downloadfilename')) { $filename = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_downloadfilename', $filename, $HTTP_USER_AGENT); } else { $filename = preg_replace('/[\\\\\/:*?"<>|;]/', '_', str_replace(' ', ' ', $filename)); } // A Pox on Microsoft and it's Internet Explorer! // // IE has lots of bugs with file downloads. // It also has problems with SSL. Both of these cause problems // for us in this function. // // See this article on Cache Control headers and SSL // http://support.microsoft.com/default.aspx?scid=kb;en-us;323308 // // The best thing you can do for IE is to upgrade to the latest // version //set all the Cache Control Headers for IE if ($isIE) { $filename=rawurlencode($filename); header ("Pragma: public"); header ("Cache-Control: no-store, max-age=0, no-cache, must-revalidate"); // HTTP/1.1 // does nothing - see: https://blogs.msdn.microsoft.com/ieinternals/2009/07/20/internet-explorers-cache-control-extensions/ // header ("Cache-Control: post-check=0, pre-check=0", false); header ("Cache-Control: private"); //set the inline header for IE, we'll add the attachment header later if we need it header ("Content-Disposition: inline; filename=$filename"); } if (!$force) { // Try to show in browser window header ("Content-Disposition: inline; filename=\"$filename\""); header ("Content-Type: $type0/$type1; name=\"$filename\""); } else { // Try to pop up the "save as" box // IE makes this hard. It pops up 2 save boxes, or none. // http://support.microsoft.com/support/kb/articles/Q238/5/88.ASP // http://support.microsoft.com/default.aspx?scid=kb;EN-US;260519 // But, according to Microsoft, it is "RFC compliant but doesn't // take into account some deviations that allowed within the // specification." Doesn't that mean RFC non-compliant? // http://support.microsoft.com/support/kb/articles/Q258/4/52.ASP // all browsers need the application/octet-stream header for this header ("Content-Type: application/octet-stream; name=\"$filename\""); // http://support.microsoft.com/support/kb/articles/Q182/3/15.asp // Do not have quotes around filename, but that applied to // "attachment"... does it apply to inline too? header ("Content-Disposition: attachment; filename=\"$filename\""); if ($isIE && !$isIE6plus) { // This combination seems to work mostly. IE 5.5 SP 1 has // known issues (see the Microsoft Knowledge Base) // This works for most types, but doesn't work with Word files header ("Content-Type: application/download; name=\"$filename\""); header ("Content-Type: application/force-download; name=\"$filename\""); // These are spares, just in case. :-) //header("Content-Type: $type0/$type1; name=\"$filename\""); //header("Content-Type: application/x-msdownload; name=\"$filename\""); //header("Content-Type: application/octet-stream; name=\"$filename\""); } else if ($isIE) { // This is to prevent IE for MIME sniffing and auto open a file in IE header ("Content-Type: application/force-download; name=\"$filename\""); } else { // another application/octet-stream forces download for Netscape header ("Content-Type: application/octet-stream; name=\"$filename\""); } } //send the content-length header if the calling function provides it if ($filesize > 0) { header("Content-Length: $filesize"); } } // end fn SendDownloadHeaders