X-Git-Url: https://vcs.fsf.org/?p=squirrelmail.git;a=blobdiff_plain;f=functions%2Fmime.php;h=7433d7c8b49a3c3c7f5e4b14d777e04d72582771;hp=3322249a7423b964d1da6aaf3073ede298e0cb0f;hb=fd181f534c2c598516e6f5f50fa7acb636225244;hpb=358a78a1aef871123cabcc980cfee1cb317da6c0 diff --git a/functions/mime.php b/functions/mime.php index 3322249a..7433d7c8 100644 --- a/functions/mime.php +++ b/functions/mime.php @@ -3,15 +3,17 @@ /** * mime.php * - * Copyright (c) 1999-2003 The SquirrelMail Project Team + * Copyright (c) 1999-2004 The SquirrelMail Project Team * Licensed under the GNU GPL. For full terms see the file COPYING. * * This contains the functions necessary to detect and decode MIME * messages. * * $Id$ + * @package squirrelmail */ +/** The typical includes... */ require_once(SM_PATH . 'functions/imap.php'); require_once(SM_PATH . 'functions/attachment_common.php'); @@ -19,11 +21,13 @@ require_once(SM_PATH . 'functions/attachment_common.php'); /* MIME DECODING */ /* -------------------------------------------------------------------------- */ -/* This function gets the structure of a message and stores it in the "message" class. +/** + * Get the MIME structure + * + * This function gets the structure of a message and stores it in the "message" class. * It will return this object for use with all relevant header information and * fully parsed into the standard "message" object format. */ - function mime_structure ($bodystructure, $flags=array()) { /* Isolate the body structure and remove beginning and end parenthesis. */ @@ -92,8 +96,8 @@ function mime_structure ($bodystructure, $flags=array()) { * to mime_get_elements() */ -function mime_fetch_body($imap_stream, $id, $ent_id=1) { - global $uid_support; +function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) { + global $uid_support; /* Do a bit of error correction. If we couldn't find the entity id, just guess * that it is the first one. That is usually the case anyway. */ @@ -104,6 +108,8 @@ function mime_fetch_body($imap_stream, $id, $ent_id=1) { $cmd = "FETCH $id BODY[$ent_id]"; } + if ($fetch_size!=0) $cmd .= "<0.$fetch_size>"; + $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, $uid_support); do { $topline = trim(array_shift($data)); @@ -157,7 +163,7 @@ function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding) { /* Don't kill the connection if the browser is over a dialup * and it would take over 30 seconds to download it. - * DonĀ“t call set_time_limit in safe mode. + * Dont call set_time_limit in safe mode. */ if (!ini_get('safe_mode')) { @@ -177,17 +183,17 @@ function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding) { echo decodeBody($body, $encoding); } - /* + /* TODO, use the same method for quoted printable. However, I assume that quoted printable attachments aren't that large so the performancegain / memory usage drop will be minimal. If we decide to add that then we need to adapt sqimap_fread because - we need to split te result on \n and fread doesn't stop at \n. That + we need to split te result on \n and fread doesn't stop at \n. That means we also should provide $results from sqimap_fread (by ref) to te function and set $no_return to false. The $filter function for - quoted printable should handle unsetting of $results. + quoted printable should handle unsetting of $results. */ - /* + /* TODO 2: find out how we write to the output stream php://stdout. fwrite doesn't work because 'php://stdout isn't a stream. */ @@ -387,12 +393,14 @@ function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $ma translateText($body, $wrap_at, $body_message->header->getParameter('charset')); } - $link = 'read_body.php?passed_id=' . $id . '&ent_id='.$ent_num. + $link = 'passed_id=' . $id . '&ent_id='.$ent_num. '&mailbox=' . $urlmailbox .'&sort=' . $sort . - '&startMessage=' . $startMessage . '&show_more=0'; + '&startMessage=' . $startMessage . '&show_more=0'; if (isset($passed_ent_id)) { $link .= '&passed_ent_id='.$passed_ent_id; } + $body .= '
' . _("Download this as a file") . ''; if ($view_unsafe_images) { $text = _("Hide Unsafe Images"); } else { @@ -403,8 +411,10 @@ function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $ma $text = ''; } } - $body .= '
'.$text. - '

' . "\n"; + if($text != '') { + $body .= ' | ' . $text . ''; + } + $body .= '

' . "\n"; } return $body; } @@ -441,11 +451,10 @@ function formatAttachments($message, $exclude_id, $mailbox, $id) { } $from_o = $rfc822_header->from; if (is_object($from_o)) { - $from_name = $from_o->getAddress(false); + $from_name = decodeHeader($from_o->getAddress(false)); } else { $from_name = _("Unknown sender"); } - $from_name = decodeHeader(($from_name)); $description = $from_name; } else { $default_page = SM_PATH . 'src/download.php'; @@ -552,7 +561,15 @@ function decodeBody($body, $encoding) { $body = str_replace("\r\n", "\n", $body); $encoding = strtolower($encoding); - if ($encoding == 'quoted-printable' || + $encoding_handler = do_hook_function('decode_body', $encoding); + + + // plugins get first shot at decoding the body + // + if (!empty($encoding_handler) && function_exists($encoding_handler)) { + $body = $encoding_handler('decode', $body); + + } else if ($encoding == 'quoted-printable' || $encoding == 'quoted_printable') { $body = quoted_printable_decode($body); @@ -568,17 +585,25 @@ function decodeBody($body, $encoding) { return $body; } -/* +/** + * Decodes headers + * * This functions decode strings that is encoded according to * RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text). * Patched by Christian Schmidt 23/03/2002 + * + * @param string $string header string that has to be made readable + * @param boolean $utfencode change message in order to be readable on user's charset. defaults to true + * @param boolean $htmlsave preserve spaces and sanitize html special characters. defaults to true + * @param boolean $decide decide if string can be utfencoded. defaults to false + * @return string decoded header string */ -function decodeHeader ($string, $utfencode=true,$htmlsave=true) { +function decodeHeader ($string, $utfencode=true,$htmlsave=true,$decide=false) { global $languages, $squirrelmail_language; if (is_array($string)) { $string = implode("\n", $string); } - + if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { $string = $languages[$squirrelmail_language]['XTRA_CODE']('decodeheader', $string); @@ -587,7 +612,7 @@ function decodeHeader ($string, $utfencode=true,$htmlsave=true) { } $i = 0; $iLastMatch = -2; - $encoded = false; + $encoded = true; $aString = explode(' ',$string); $ret = ''; @@ -601,12 +626,12 @@ function decodeHeader ($string, $utfencode=true,$htmlsave=true) { $encoded = false; /* if encoded words are not separated by a linear-space-white we still catch them */ $j = $i-1; -// if ($chunk{0} === '=') { /* performance, saves an unnessecarry preg call */ + while ($match = preg_match('/^(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=(.*)$/Ui',$chunk,$res)) { /* if the last chunk isn't an encoded string then put back the space, otherwise don't */ if ($iLastMatch !== $j) { if ($htmlsave) { - $ret .= ' '; + $ret .= ' '; } else { $ret .= ' '; } @@ -623,8 +648,15 @@ function decodeHeader ($string, $utfencode=true,$htmlsave=true) { break; case 'Q': $replace = str_replace('_', ' ', $res[4]); - $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))', + $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))', $replace); + /* decide about valid decoding */ + if ($decide && is_conversion_safe($res[2])) { + $utfencode=true; + $can_be_decoded=true; + } else { + $can_be_decoded=false; + } /* Only encode into entities by default. Some places * don't need the encoding, like the compose form. */ @@ -643,13 +675,12 @@ function decodeHeader ($string, $utfencode=true,$htmlsave=true) { $chunk = $res[5]; $encoded = true; } -// } if (!$encoded) { if ($htmlsave) { - $ret .= ' '; + $ret .= ' '; } else { $ret .= ' '; - } + } } if (!$encoded && $htmlsave) { @@ -662,19 +693,24 @@ function decodeHeader ($string, $utfencode=true,$htmlsave=true) { /* remove the first added space */ if ($ret) { if ($htmlsave) { - $ret = substr($ret,6); + $ret = substr($ret,5); } else { $ret = substr($ret,1); } } - + return $ret; } -/* +/** + * Encodes header as quoted-printable + * * Encode a string according to RFC 1522 for use in headers if it * contains 8-bit characters or anything that looks like it should * be encoded. + * + * @param string $string header string, that has to be encoded + * @return string quoted-printable encoded string */ function encodeHeader ($string) { global $default_charset, $languages, $squirrelmail_language; @@ -683,9 +719,10 @@ function encodeHeader ($string) { function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { return $languages[$squirrelmail_language]['XTRA_CODE']('encodeheader', $string); } - if (strtolower($default_charset) == 'iso-8859-1') { - $string = str_replace("\240",' ',$string); - } + // instead of removing nbsp here, we don't add it in decodeHeader + // if (strtolower($default_charset) == 'iso-8859-1') { + // $string = str_replace("\240",' ',$string); + //} // Encode only if the string contains 8-bit characters or =? $j = strlen($string); @@ -752,7 +789,7 @@ function encodeHeader ($string) { // do not start encoding in the middle of a string, also take the rest of the word. $sLeadString = substr($string,0,$i); $aLeadString = explode(' ',$sLeadString); - $sToBeEncoded = array_pop($aLeadString); + $sToBeEncoded = array_pop($aLeadString); $iEncStart = $i - strlen($sToBeEncoded); $ret .= $sToBeEncoded; $cur_l += strlen($sToBeEncoded); @@ -809,7 +846,7 @@ function find_ent_id($id, $message) { if (strcasecmp($message->entities[$i]->header->id, $id) == 0) { // if (sq_check_save_extension($message->entities[$i])) { return $message->entities[$i]->entity_id; -// } +// } } } } @@ -828,9 +865,45 @@ function sq_check_save_extension($message) { ** HTMLFILTER ROUTINES */ +/** + * This function is more or less a wrapper around stripslashes. Apparently + * Explorer is stupid enough to just remove the backslashes and then + * execute the content of the attribute as if nothing happened. + * Who does that? + * + * @param attvalue The value of the attribute + * @return attvalue The value of the attribute stripslashed. + */ +function sq_unbackslash($attvalue){ + /** + * Remove any backslashes. See if there are any first. + */ + + if (strstr($attvalue, '\\') !== false){ + $attvalue = stripslashes($attvalue); + } + return $attvalue; +} + +/** + * Kill any tabs, newlines, or carriage returns. Our friends the + * makers of the browser with 95% market value decided that it'd + * be funny to make "java[tab]script" be just as good as "javascript". + * + * @param attvalue The attribute value before extraneous spaces removed. + * @return attvalue The attribute value after extraneous spaces removed. + */ +function sq_unspace($attvalue){ + if (strcspn($attvalue, "\t\r\n") != strlen($attvalue)){ + $attvalue = str_replace(Array("\t", "\r", "\n"), Array('', '', ''), + $attvalue); + } + return $attvalue; +} + /** * This function returns the final tag out of the tag name, an array - * of attributes, and the type of the tag. This function is called by + * of attributes, and the type of the tag. This function is called by * sq_sanitize internally. * * @param $tagname the name of the tag. @@ -874,7 +947,7 @@ function sq_casenormalize(&$val){ /** * This function skips any whitespace from the current position within * a string and to the next non-whitespace value. - * + * * @param $body the string * @param $offset the offset within the string where we should start * looking for the next non-whitespace character. @@ -1029,7 +1102,7 @@ function sq_getnxtag($body, $offset){ * '>' indicating the end of the tag entirely. * '\s' indicating the end of the tag name. * '/' indicating that this is type-3 xhtml tag. - * + * * Whatever else we find there indicates an invalid tag. */ switch ($match){ @@ -1117,7 +1190,7 @@ function sq_getnxtag($body, $offset){ * double quotes. Type 4 we convert into: * attrname="yes". */ - $regary = sq_findnxreg($body, $pos, "[^\w\-_]"); + $regary = sq_findnxreg($body, $pos, "[^:\w\-_]"); if ($regary == false){ /** * Looks like body ended before the end of tag. @@ -1238,13 +1311,14 @@ function sq_getnxtag($body, $offset){ * @param $attvalue A string to run entity check against. * @return Translated value. */ + function sq_deent($attvalue){ $me = 'sq_deent'; /** * See if we have to run the checks first. All entities must start * with "&". */ - if (strpos($attvalue, "&") === false){ + if (strpos($attvalue, '&') === false){ return $attvalue; } /** @@ -1255,22 +1329,22 @@ function sq_deent($attvalue){ * Leave " in, as it can mess us up. */ $trans = array_flip($trans); - unset($trans{"""}); + unset($trans{'"'}); while (list($ent, $val) = each($trans)){ - $attvalue = preg_replace("/$ent*(\W)/si", "$val\\1", $attvalue); + $attvalue = preg_replace('/' . $ent . '*/si', $val, $attvalue); } /** * Now translate numbered entities from 1 to 255 if needed. */ - if (strpos($attvalue, "#") !== false){ + if (strpos($attvalue, '#') !== false){ $omit = Array(34, 39); - for ($asc=1; $asc<256; $asc++){ + for ($asc = 256; $asc >= 0; $asc--){ if (!in_array($asc, $omit)){ $chr = chr($asc); - $attvalue = preg_replace("/\�*$asc;*(\D)/si", "$chr\\1", - $attvalue); - $attvalue = preg_replace("/\�*".dechex($asc).";*(\W)/si", - "$chr\\1", $attvalue); + $octrule = '/\�*' . $asc . ';*/si'; + $hexrule = '/\�*' . dechex($asc) . ';*/si'; + $attvalue = preg_replace($octrule, $chr, $attvalue); + $attvalue = preg_replace($hexrule, $chr, $attvalue); } } } @@ -1289,8 +1363,8 @@ function sq_deent($attvalue){ * @param $id message id * @return Array with modified attributes. */ -function sq_fixatts($tagname, - $attary, +function sq_fixatts($tagname, + $attary, $rm_attnames, $bad_attvals, $add_attr_to_tag, @@ -1314,9 +1388,17 @@ function sq_fixatts($tagname, } } /** - * Remove any entities. + * Remove any backslashes, entities, and extraneous whitespace. */ + $attvalue = sq_unbackslash($attvalue); $attvalue = sq_deent($attvalue); + $attvalue = sq_unspace($attvalue); + + /** + * Remove \r \n \t \0 " " "\\" + */ + $attvalue = str_replace(Array("\r", "\n", "\t", "\0", " ", "\\"), + Array('', '','','','',''), $attvalue); /** * Now let's run checks on the attvalues. @@ -1334,7 +1416,7 @@ function sq_fixatts($tagname, * Second one is replacements */ list($valmatch, $valrepl) = $valary; - $newvalue = + $newvalue = preg_replace($valmatch, $valrepl, $attvalue); if ($newvalue != $attvalue){ $attary{$attname} = $newvalue; @@ -1364,13 +1446,14 @@ function sq_fixatts($tagname, /** * This function edits the style definition to make them friendly and * usable in squirrelmail. - * + * * @param $message the message object * @param $id the message id * @param $content a string with whatever is between + * @param $mailbox the message mailbox * @return a string with edited content. */ -function sq_fixstyle($body, $pos, $message, $id){ +function sq_fixstyle($body, $pos, $message, $id, $mailbox){ global $view_unsafe_images; $me = 'sq_fixstyle'; $ret = sq_findnxreg($body, $pos, ''); @@ -1400,14 +1483,14 @@ function sq_fixstyle($body, $pos, $message, $id){ $content = preg_replace("|url\s*\(\s*([\'\"])\s*https*:.*?([\'\"])\s*\)|si", "url(\\1$secremoveimg\\2)", $content); } - + /** * Fix urls that refer to cid: */ - while (preg_match("|url\s*\(\s*([\'\"]\s*cid:.*?[\'\"])\s*\)|si", + while (preg_match("|url\s*\(\s*([\'\"]\s*cid:.*?[\'\"])\s*\)|si", $content, $matches)){ $cidurl = $matches{1}; - $httpurl = sq_cid2http($message, $id, $cidurl); + $httpurl = sq_cid2http($message, $id, $cidurl, $mailbox); $content = preg_replace("|url\s*\(\s*$cidurl\s*\)|si", "url($httpurl)", $content); } @@ -1418,8 +1501,9 @@ function sq_fixstyle($body, $pos, $message, $id){ */ $match = Array('/expression/i', '/behaviou*r/i', - '/binding/i'); - $replace = Array('idiocy', 'idiocy', 'idiocy'); + '/binding/i', + '/include-source/i'); + $replace = Array('idiocy', 'idiocy', 'idiocy', 'idiocy'); $content = preg_replace($match, $replace, $content); return array($content, $newpos); } @@ -1431,6 +1515,7 @@ function sq_fixstyle($body, $pos, $message, $id){ * @param $message the message object * @param $id the message id * @param $cidurl the cid: url. + * @param $mailbox the message mailbox * @return a string with a http-friendly url */ function sq_cid2http($message, $id, $cidurl, $mailbox){ @@ -1438,7 +1523,11 @@ function sq_cid2http($message, $id, $cidurl, $mailbox){ * Get rid of quotes. */ $quotchar = substr($cidurl, 0, 1); - $cidurl = str_replace($quotchar, "", $cidurl); + if ($quotchar == '"' || $quotchar == "'"){ + $cidurl = str_replace($quotchar, "", $cidurl); + } else { + $quotchar = ''; + } $cidurl = substr(trim($cidurl), 4); $linkurl = find_ent_id($cidurl, $message); /* in case of non-save cid links $httpurl should be replaced by a sort of @@ -1456,14 +1545,18 @@ function sq_cid2http($message, $id, $cidurl, $mailbox){ * This function changes the tag into a
tag since we * can't really have a body-within-body. * - * @param $attary an array of attributes and values of - * @return a modified array of attributes to be set for
+ * @param $attary an array of attributes and values of + * @param $mailbox mailbox we're currently reading (for cid2http) + * @param $message current message (for cid2http) + * @param $id current message id (for cid2http) + * @return a modified array of attributes to be set for
*/ -function sq_body2div($attary){ +function sq_body2div($attary, $mailbox, $message, $id){ $me = 'sq_body2div'; $divattary = Array('class' => "'bodyclass'"); $bgcolor = '#ffffff'; $text = '#000000'; + $has_bgc_stl = $has_txt_stl = false; $styledef = ''; if (is_array($attary) && sizeof($attary) > 0){ foreach ($attary as $attname=>$attvalue){ @@ -1471,16 +1564,25 @@ function sq_body2div($attary){ $attvalue = str_replace($quotchar, "", $attvalue); switch ($attname){ case 'background': + $attvalue = sq_cid2http($message, $id, + $attvalue, $mailbox); $styledef .= "background-image: url('$attvalue'); "; break; case 'bgcolor': + $has_bgc_stl = true; $styledef .= "background-color: $attvalue; "; break; case 'text': + $has_txt_stl = true; $styledef .= "color: $attvalue; "; break; } } + // Outlook defines a white bgcolor and no text color. This can lead to + // white text on a white bg with certain themes. + if ($has_bgc_stl && !$has_txt_stl) { + $styledef .= "color: $text; "; + } if (strlen($styledef) > 0){ $divattary{"style"} = "\"$styledef\""; } @@ -1508,8 +1610,8 @@ function sq_body2div($attary){ * @param $id message id * @return sanitized html safe to show on your pages. */ -function sq_sanitize($body, - $tag_list, +function sq_sanitize($body, + $tag_list, $rm_tags_with_content, $self_closing_tags, $force_tag_closing, @@ -1535,7 +1637,7 @@ function sq_sanitize($body, */ $curpos = 0; $open_tags = Array(); - $trusted = "\n"; + $trusted = "\n\n"; $skip_content = false; /** * Take care of netscape's stupid javascript entities like @@ -1550,8 +1652,8 @@ function sq_sanitize($body, * Take care of