X-Git-Url: https://vcs.fsf.org/?a=blobdiff_plain;f=functions%2Fmime.php;h=e8538eb918fb79c9ed0427c72edbc271742ee85a;hb=de1ebc4b317860aeaff8caa585c51e62c53088f6;hp=273e6b3ec0346bada7523e32a3b6df0467a64b66;hpb=b68edc750e3b478f5302681e8fbd289b17447550;p=squirrelmail.git diff --git a/functions/mime.php b/functions/mime.php index 273e6b3e..e8538eb9 100644 --- a/functions/mime.php +++ b/functions/mime.php @@ -3,7 +3,7 @@ /** * mime.php * - * Copyright (c) 1999-2002 The SquirrelMail Project Team + * Copyright (c) 1999-2003 The SquirrelMail Project Team * Licensed under the GNU GPL. For full terms see the file COPYING. * * This contains the functions necessary to detect and decode MIME @@ -15,9 +15,9 @@ require_once(SM_PATH . 'functions/imap.php'); require_once(SM_PATH . 'functions/attachment_common.php'); -/* --------------------------------------------------------------------------------- */ -/* MIME DECODING */ -/* --------------------------------------------------------------------------------- */ +/* -------------------------------------------------------------------------- */ +/* MIME DECODING */ +/* -------------------------------------------------------------------------- */ /* This function gets the structure of a message and stores it in the "message" class. * It will return this object for use with all relevant header information and @@ -29,23 +29,20 @@ function mime_structure ($bodystructure, $flags=array()) { /* Isolate the body structure and remove beginning and end parenthesis. */ $read = trim(substr ($bodystructure, strpos(strtolower($bodystructure), 'bodystructure') + 13)); $read = trim(substr ($read, 0, -1)); - $msg =& new Message(); - $res = $msg->parseStructure($read); - $msg = $res[0]; + $i = 0; + $msg = Message::parseStructure($read,$i); if (!is_object($msg)) { - include_once( '../functions/display_messages.php' ); + include_once(SM_PATH . 'functions/display_messages.php'); global $color, $mailbox; - displayPageHeader( $color, urldecode($mailbox) ); - echo "\n\n" . - '
'; - $errormessage = _("Squirrelmail could not decode the bodystructure of the message"); + /* removed urldecode because $_GET is auto urldecoded ??? */ + displayPageHeader( $color, $mailbox ); + $errormessage = _("SquirrelMail could not decode the bodystructure of the message"); $errormessage .= '
'._("the provided bodystructure by your imap-server").':

'; - $errormessage .= '
' . htmlspecialchars($read) . '
'; + $errormessage .= '
' . htmlspecialchars($read) . '
'; plain_error_message( $errormessage, $color ); echo ''; exit; } - $msg->setEnt('0'); if (count($flags)) { foreach ($flags as $flag) { $char = strtoupper($flag{1}); @@ -84,6 +81,8 @@ function mime_structure ($bodystructure, $flags=array()) { return $msg; } + + /* This starts the parsing of a particular structure. It is called recursively, * so it can be passed different structures. It returns an object of type * $message. @@ -93,16 +92,20 @@ function mime_structure ($bodystructure, $flags=array()) { * to mime_get_elements() */ -function mime_fetch_body($imap_stream, $id, $ent_id) { +function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) { global $uid_support; /* Do a bit of error correction. If we couldn't find the entity id, just guess * that it is the first one. That is usually the case anyway. */ + if (!$ent_id) { - $ent_id = 1; + $cmd = "FETCH $id BODY[]"; + } else { + $cmd = "FETCH $id BODY[$ent_id]"; } - $cmd = "FETCH $id BODY[$ent_id]"; + if ($fetch_size!=0) $cmd .= "<0.$fetch_size>"; + $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, $uid_support); do { $topline = trim(array_shift($data)); @@ -114,9 +117,9 @@ function mime_fetch_body($imap_stream, $id, $ent_id) { /* There is some information in the content info header that could be important * in order to parse html messages. Let's get them here. */ - if ($ret{0} == '<') { - $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, $uid_support); - } +// if ($ret{0} == '<') { +// $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, $uid_support); +// } } else if (ereg('"([^"]*)"', $topline, $regs)) { $ret = $regs[1]; } else { @@ -134,14 +137,12 @@ function mime_fetch_body($imap_stream, $id, $ent_id) { echo '
' . '' . '' . - '" . - '" . - '" . - '" . + '" . + '" . + '" . + '" . "
' . - _("Body retrieval error. The reason for this is most probably that the message is malformed. Please help us making future versions better by submitting this message to the developers knowledgebase!") . - "
" . - _("Submit message") . '

 ' . + _("Body retrieval error. The reason for this is most probably that the message is malformed.") . '
' . _("Command:") . "$cmd
' . _("Response:") . "$response
' . _("Message:") . "$message
' . _("FETCH line:") . "$topline
' . _("Command:") . "$cmd
' . _("Response:") . "$response
' . _("Message:") . "$message
' . _("FETCH line:") . "$topline


"; $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message, $uid_support); @@ -153,15 +154,9 @@ function mime_fetch_body($imap_stream, $id, $ent_id) { return $ret; } -function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) { +function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding) { global $uid_support; - /* Do a bit of error correction. If we couldn't find the entity id, just guess - * that it is the first one. That is usually the case anyway. - */ - if (!$ent_id) { - $ent_id = 1; - } - $sid = sqimap_session_id($uid_support); + /* Don't kill the connection if the browser is over a dialup * and it would take over 30 seconds to download it. * DonĀ“t call set_time_limit in safe mode. @@ -170,14 +165,35 @@ function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) { if (!ini_get('safe_mode')) { set_time_limit(0); } - if ($uid_support) { - $sid_s = substr($sid,0,strpos($sid, ' ')); + /* in case of base64 encoded attachments, do not buffer them. + Instead, echo the decoded attachment directly to screen */ + if (strtolower($encoding) == 'base64') { + if (!$ent_id) { + $query = "FETCH $id BODY[]"; + } else { + $query = "FETCH $id BODY[$ent_id]"; + } + sqimap_run_command($imap_stream,$query,true,$response,$message,$uid_support,'sqimap_base64_decode','php://stdout',true); } else { - $sid_s = $sid; + $body = mime_fetch_body ($imap_stream, $id, $ent_id); + echo decodeBody($body, $encoding); } - $body = mime_fetch_body ($imap_stream, $id, $ent_id); - echo decodeBody($body, $encoding); + /* + TODO, use the same method for quoted printable. + However, I assume that quoted printable attachments aren't that large + so the performancegain / memory usage drop will be minimal. + If we decide to add that then we need to adapt sqimap_fread because + we need to split te result on \n and fread doesn't stop at \n. That + means we also should provide $results from sqimap_fread (by ref) to + te function and set $no_return to false. The $filter function for + quoted printable should handle unsetting of $results. + */ + /* + TODO 2: find out how we write to the output stream php://stdout. fwrite + doesn't work because 'php://stdout isn't a stream. + */ + return; /* fputs ($imap_stream, "$sid FETCH $id BODY[$ent_id]\r\n"); @@ -296,16 +312,16 @@ function translateText(&$body, $wrap_at, $charset) { } } - if ($quotes > 1) { - if (!isset($color[14])) { - $color[14] = '#FF0000'; - } - $line = '' . $line . ''; - } elseif ($quotes) { + if ($quotes % 2) { if (!isset($color[13])) { $color[13] = '#800000'; } - $line = '' . $line . ''; + $line = '' . $line . ''; + } elseif ($quotes) { + if (!isset($color[14])) { + $color[14] = '#FF0000'; + } + $line = '' . $line . ''; } $body_ary[$i] = $line; @@ -313,7 +329,6 @@ function translateText(&$body, $wrap_at, $charset) { $body = '
' . implode("\n", $body_ary) . '
'; } - /* This returns a parsed string called $body. That string can then * be displayed as the actual message in the HTML. It contains * everything needed, including HTML Tags, Attachments at the @@ -325,9 +340,13 @@ function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $ma * order that is their priority. */ global $startMessage, $username, $key, $imapServerAddress, $imapPort, - $show_html_default, $has_unsafe_images, $view_unsafe_images, $sort; + $show_html_default, $sort, $has_unsafe_images, $passed_ent_id; + global $languages, $squirrelmail_language; + + if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) { + $view_unsafe_images = false; + } - $has_unsafe_images= 0; $body = ''; $urlmailbox = urlencode($mailbox); $body_message = getEntity($message, $ent_num); @@ -335,6 +354,13 @@ function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $ma ($body_message->header->type0 == 'rfc822')) { $body = mime_fetch_body ($imap_stream, $id, $ent_num); $body = decodeBody($body, $body_message->header->encoding); + + if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && + function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { + if (mb_detect_encoding($body) != 'ASCII') { + $body = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $body); + } + } $hookResults = do_hook("message_body", $body); $body = $hookResults[1]; @@ -363,18 +389,24 @@ function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $ma translateText($body, $wrap_at, $body_message->header->getParameter('charset')); } - - if ($has_unsafe_images) { - if ($view_unsafe_images) { - $untext = '">' . _("Hide Unsafe Images"); + $link = 'read_body.php?passed_id=' . $id . '&ent_id='.$ent_num. + '&mailbox=' . $urlmailbox .'&sort=' . $sort . + '&startMessage=' . $startMessage . '&show_more=0'; + if (isset($passed_ent_id)) { + $link .= '&passed_ent_id='.$passed_ent_id; + } + if ($view_unsafe_images) { + $text = _("Hide Unsafe Images"); + } else { + if (isset($has_unsafe_images) && $has_unsafe_images) { + $link .= '&view_unsafe_images=1'; + $text = _("View Unsafe Images"); } else { - $untext = '&view_unsafe_images=1">' . _("View Unsafe Images"); + $text = ''; } - $body .= '
'.$text. + '

' . "\n"; } return $body; } @@ -393,54 +425,62 @@ function formatAttachments($message, $exclude_id, $mailbox, $id) { $urlMailbox = urlencode($mailbox); foreach ($att_ar as $att) { - $ent = urldecode($att->entity_id); + $ent = $att->entity_id; $header = $att->header; $type0 = strtolower($header->type0); $type1 = strtolower($header->type1); $name = ''; $links['download link']['text'] = _("download"); - $links['download link']['href'] = - "../src/download.php?absolute_dl=true&passed_id=$id&mailbox=$urlMailbox&ent_id=$ent"; + $links['download link']['href'] = SM_PATH . + "src/download.php?absolute_dl=true&passed_id=$id&mailbox=$urlMailbox&ent_id=$ent"; $ImageURL = ''; if ($type0 =='message' && $type1 == 'rfc822') { - $default_page = '../src/read_body.php'; + $default_page = SM_PATH . 'src/read_body.php'; $rfc822_header = $att->rfc822_header; - $filename = decodeHeader($rfc822_header->subject); - + $filename = $rfc822_header->subject; + if (trim( $filename ) == '') { + $filename = 'untitled-[' . $ent . ']' ; + } $from_o = $rfc822_header->from; if (is_object($from_o)) { - $from_name = $from_o->getAddress(false); + $from_name = decodeHeader($from_o->getAddress(false)); } else { $from_name = _("Unknown sender"); } - $from_name = decodeHeader(htmlspecialchars($from_name)); $description = $from_name; } else { - $default_page = '../src/download.php'; + $default_page = SM_PATH . 'src/download.php'; if (is_object($header->disposition)) { - $filename = decodeHeader($header->disposition->getProperty('filename')); + $filename = $header->disposition->getProperty('filename'); if (trim($filename) == '') { $name = decodeHeader($header->disposition->getProperty('name')); if (trim($name) == '') { - if (trim( $header->id ) == '') { - $filename = 'untitled-[' . $ent . ']' ; + $name = $header->getParameter('name'); + if(trim($name) == '') { + if (trim( $header->id ) == '') { + $filename = 'untitled-[' . $ent . ']' ; + } else { + $filename = 'cid: ' . $header->id; + } } else { - $filename = 'cid: ' . $header->id; + $filename = $name; } } else { $filename = $name; } } } else { - if (trim( $header->id ) == '') { - $filename = 'untitled-[' . $ent . ']' ; - } else { - $filename = 'cid: ' . $header->id; + $filename = $header->getParameter('name'); + if (!trim($filename)) { + if (trim( $header->id ) == '') { + $filename = 'untitled-[' . $ent . ']' ; + } else { + $filename = 'cid: ' . $header->id; + } } } - if ($header->description) { - $description = htmlspecialchars($header->description); + $description = decodeHeader($header->description); } else { $description = ''; } @@ -456,7 +496,7 @@ function formatAttachments($message, $exclude_id, $mailbox, $id) { . "&passed_id=$id&mailbox=$urlMailbox" . '&ent_id='.$ent.$passed_ent_id_link; if ($where && $what) { - $defaultlink = '&where='. urlencode($where).'&what='.urlencode($what); + $defaultlink .= '&where='. urlencode($where).'&what='.urlencode($what); } /* This executes the attachment hook with a specific MIME-type. * If that doesn't have results, it tries if there's a rule @@ -475,7 +515,7 @@ function formatAttachments($message, $exclude_id, $mailbox, $id) { $defaultlink = $hookresults[6]; $attachments .= '' . - "$display_filename " . + ''.decodeHeader($display_filename).' ' . '' . show_readable_size($header->size) . '  ' . "[ $type0/$type1 ] " . @@ -495,18 +535,33 @@ function formatAttachments($message, $exclude_id, $mailbox, $id) { unset($links); $attachments .= "\n"; } + $attachmentadd = do_hook_function('attachments_bottom',$attachments); + if ($attachmentadd != '') + $attachments = $attachmentadd; return $attachments; } +function sqimap_base64_decode(&$string) { + $string = str_replace("\r\n", "\n", $string); + $string = base64_decode($string); +} + /* This function decodes the body depending on the encoding type. */ function decodeBody($body, $encoding) { - global $languages, $squirrelmail_language; global $show_html_default; $body = str_replace("\r\n", "\n", $body); $encoding = strtolower($encoding); - if ($encoding == 'quoted-printable' || + $encoding_handler = do_hook_function('decode_body', $encoding); + + + // plugins get first shot at decoding the body + // + if (!empty($encoding_handler) && function_exists($encoding_handler)) { + $body = $encoding_handler('decode', $body); + + } else if ($encoding == 'quoted-printable' || $encoding == 'quoted_printable') { $body = quoted_printable_decode($body); @@ -518,11 +573,6 @@ function decodeBody($body, $encoding) { $body = base64_decode($body); } - if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && - function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { - $body = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $body); - } - // All other encodings are returned raw. return $body; } @@ -532,43 +582,102 @@ function decodeBody($body, $encoding) { * RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text). * Patched by Christian Schmidt 23/03/2002 */ -function decodeHeader ($string, $utfencode=true) { +function decodeHeader ($string, $utfencode=true,$htmlsave=true) { global $languages, $squirrelmail_language; if (is_array($string)) { $string = implode("\n", $string); } - + if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { $string = $languages[$squirrelmail_language]['XTRA_CODE']('decodeheader', $string); + // Do we need to return at this point? + // return $string; } - $i = 0; - while (preg_match('/^(.{' . $i . '})(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=/Ui', - $string, $res)) { - $prefix = $res[1]; - /* Ignore white-space between consecutive encoded-words. */ - if (strspn($res[2], " \t") != strlen($res[2])) { - $prefix .= $res[2]; - } + $iLastMatch = -2; + $encoded = true; - if (ucfirst($res[4]) == 'B') { - $replace = base64_decode($res[5]); - } else { - $replace = str_replace('_', ' ', $res[5]); - $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))', + $aString = explode(' ',$string); + $ret = ''; + foreach ($aString as $chunk) { + if ($encoded && $chunk === '') { + continue; + } elseif ($chunk === '') { + $ret .= ' '; + continue; + } + $encoded = false; + /* if encoded words are not separated by a linear-space-white we still catch them */ + $j = $i-1; +// if ($chunk{0} === '=') { /* performance, saves an unnessecarry preg call */ + while ($match = preg_match('/^(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=(.*)$/Ui',$chunk,$res)) { + /* if the last chunk isn't an encoded string then put back the space, otherwise don't */ + if ($iLastMatch !== $j) { + if ($htmlsave) { + $ret .= ' '; + } else { + $ret .= ' '; + } + } + $iLastMatch = $i; + $j = $i; + $ret .= $res[1]; + $encoding = ucfirst($res[3]); + switch ($encoding) + { + case 'B': + $replace = base64_decode($res[4]); + $ret .= charset_decode($res[2],$replace); + break; + case 'Q': + $replace = str_replace('_', ' ', $res[4]); + $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))', $replace); - /* Only encode into entities by default. Some places - * don't need the encoding, like the compose form. - */ - if ($utfencode) { - $replace = charset_decode($res[3], $replace); + /* Only encode into entities by default. Some places + * don't need the encoding, like the compose form. + */ + if ($utfencode) { + $replace = charset_decode($res[2], $replace); + } else { + if ($htmlsave) { + $replace = htmlspecialchars($replace); + } + } + $ret .= $replace; + break; + default: + break; } + $chunk = $res[5]; + $encoded = true; + } +// } + if (!$encoded) { + if ($htmlsave) { + $ret .= ' '; + } else { + $ret .= ' '; + } } - $string = $prefix . $replace . substr($string, strlen($res[0])); - $i = strlen($prefix) + strlen($replace); + + if (!$encoded && $htmlsave) { + $ret .= htmlspecialchars($chunk); + } else { + $ret .= $chunk; + } + ++$i; } - return $string; + /* remove the first added space */ + if ($ret) { + if ($htmlsave) { + $ret = substr($ret,6); + } else { + $ret = substr($ret,1); + } + } + + return $ret; } /* @@ -583,41 +692,120 @@ function encodeHeader ($string) { function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { return $languages[$squirrelmail_language]['XTRA_CODE']('encodeheader', $string); } + if (strtolower($default_charset) == 'iso-8859-1') { + $string = str_replace("\240",' ',$string); + } // Encode only if the string contains 8-bit characters or =? $j = strlen($string); - $l = strstr($string, '=?'); // Must be encoded ? + $max_l = 75 - strlen($default_charset) - 7; + $aRet = array(); $ret = ''; + $iEncStart = $enc_init = false; + $cur_l = $iOffset = 0; for($i = 0; $i < $j; ++$i) { - switch($string{$i}) { - case '=': - $ret .= '=3D'; - break; - case '?': - $ret .= '=3F'; - break; - case '_': - $ret .= '=5F'; - break; - case ' ': - $ret .= '_'; - break; - default: - $k = ord($string{$i}); - if ($k > 126) { - $ret .= sprintf("=%02X", $k); - $l = TRUE; + switch($string{$i}) + { + case '=': + case '<': + case '>': + case ',': + case '?': + case '_': + if ($iEncStart === false) { + $iEncStart = $i; + } + $cur_l+=3; + if ($cur_l > ($max_l-2)) { + /* if there is an stringpart that doesn't need encoding, add it */ + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + $iEncStart = false; + } else { + $ret .= sprintf("=%02X",ord($string{$i})); + } + break; + case '(': + case ')': + if ($iEncStart !== false) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + $iEncStart = false; + } + break; + case ' ': + if ($iEncStart !== false) { + $cur_l++; + if ($cur_l > $max_l) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + $iEncStart = false; } else { - $ret .= $string{$i}; + $ret .= '_'; } - break; + } + break; + default: + $k = ord($string{$i}); + if ($k > 126) { + if ($iEncStart === false) { + // do not start encoding in the middle of a string, also take the rest of the word. + $sLeadString = substr($string,0,$i); + $aLeadString = explode(' ',$sLeadString); + $sToBeEncoded = array_pop($aLeadString); + $iEncStart = $i - strlen($sToBeEncoded); + $ret .= $sToBeEncoded; + $cur_l += strlen($sToBeEncoded); + } + $cur_l += 3; + /* first we add the encoded string that reached it's max size */ + if ($cur_l > ($max_l-2)) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?= "; /* the next part is also encoded => separate by space */ + $cur_l = 3; + $ret = ''; + $iOffset = $i; + $iEncStart = $i; + } + $enc_init = true; + $ret .= sprintf("=%02X", $k); + } else { + if ($iEncStart !== false) { + $cur_l++; + if ($cur_l > $max_l) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iEncStart = false; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + } else { + $ret .= $string{$i}; + } + } + } + break; } } - if ($l) { - $string = "=?$default_charset?Q?$ret?="; + if ($enc_init) { + if ($iEncStart !== false) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + } else { + $aRet[] = substr($string,$iOffset); + } + $string = implode('',$aRet); } - return $string; } @@ -628,9 +816,9 @@ function find_ent_id($id, $message) { $ret = find_ent_id($id, $message->entities[$i]); } else { if (strcasecmp($message->entities[$i]->header->id, $id) == 0) { - if (sq_check_save_extension($message->entities[$i])) { +// if (sq_check_save_extension($message->entities[$i])) { return $message->entities[$i]->entity_id; - } +// } } } } @@ -649,6 +837,41 @@ function sq_check_save_extension($message) { ** HTMLFILTER ROUTINES */ +/** + * This function is more or less a wrapper around stripslashes. Apparently + * Explorer is stupid enough to just remove the backslashes and then + * execute the content of the attribute as if nothing happened. + * Who does that? + * + * @param attvalue The value of the attribute + * @return attvalue The value of the attribute stripslashed. + */ +function sq_unbackslash($attvalue){ + /** + * Remove any backslashes. See if there are any first. + */ + if (strstr($attvalue, '\\') !== false){ + $attvalue = stripslashes($attvalue); + } + return $attvalue; +} + +/** + * Kill any tabs, newlines, or carriage returns. Our friends the + * makers of the browser with 95% market value decided that it'd + * be funny to make "java[tab]script" be just as good as "javascript". + * + * @param attvalue The attribute value before extraneous spaces removed. + * @return attvalue The attribute value after extraneous spaces removed. + */ +function sq_unspace($attvalue){ + if (strcspn($attvalue, "\t\r\n") != strlen($attvalue)){ + $attvalue = str_replace(Array("\t", "\r", "\n"), Array('', '', ''), + $attvalue); + } + return $attvalue; +} + /** * This function returns the final tag out of the tag name, an array * of attributes, and the type of the tag. This function is called by @@ -749,8 +972,8 @@ function sq_findnxreg($body, $offset, $reg){ $me = 'sq_findnxreg'; $matches = Array(); $retarr = Array(); - preg_match("%^(.*?)($reg)%s", substr($body, $offset), $matches); - if (!$matches{0}){ + preg_match("%^(.*?)($reg)%si", substr($body, $offset), $matches); + if (!isset($matches{0}) || !$matches{0}){ $retarr = false; } else { $retarr{0} = $offset + strlen($matches{1}); @@ -879,7 +1102,7 @@ function sq_getnxtag($body, $offset){ /** * This is an invalid tag! Look for the next closing ">". */ - $gt = sq_findnxstr($body, $offset, ">"); + $gt = sq_findnxstr($body, $lt, ">"); return Array(false, false, false, $lt, $gt); } break; @@ -938,7 +1161,7 @@ function sq_getnxtag($body, $offset){ * double quotes. Type 4 we convert into: * attrname="yes". */ - $regary = sq_findnxreg($body, $pos, "[^\w\-_]"); + $regary = sq_findnxreg($body, $pos, "[^:\w\-_]"); if ($regary == false){ /** * Looks like body ended before the end of tag. @@ -1135,9 +1358,11 @@ function sq_fixatts($tagname, } } /** - * Remove any entities. + * Remove any backslashes, entities, and extraneous whitespace. */ + $attvalue = sq_unbackslash($attvalue); $attvalue = sq_deent($attvalue); + $attvalue = sq_unspace($attvalue); /** * Now let's run checks on the attvalues. @@ -1191,9 +1416,15 @@ function sq_fixatts($tagname, * @param $content a string with whatever is between * @return a string with edited content. */ -function sq_fixstyle($message, $id, $content){ +function sq_fixstyle($body, $pos, $message, $id){ global $view_unsafe_images; $me = 'sq_fixstyle'; + $ret = sq_findnxreg($body, $pos, ''); + if ($ret == FALSE){ + return array(FALSE, strlen($body)); + } + $newpos = $ret[0] + strlen($ret[2]); + $content = $ret[1]; /** * First look for general BODY style declaration, which would be * like so: @@ -1205,25 +1436,25 @@ function sq_fixstyle($message, $id, $content){ /** * Fix url('blah') declarations. */ - $content = preg_replace("|url\(([\'\"])\s*\S+script\s*:.*?([\'\"])\)|si", + $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si", "url(\\1$secremoveimg\\2)", $content); /** * Fix url('https*://.*) declarations but only if $view_unsafe_images * is false. */ if (!$view_unsafe_images){ - $content = preg_replace("|url\(([\'\"])\s*https*:.*?([\'\"])\)|si", + $content = preg_replace("|url\s*\(\s*([\'\"])\s*https*:.*?([\'\"])\s*\)|si", "url(\\1$secremoveimg\\2)", $content); } - + /** * Fix urls that refer to cid: */ - while (preg_match("|url\(([\'\"]\s*cid:.*?[\'\"])\)|si", $content, - $matches)){ + while (preg_match("|url\s*\(\s*([\'\"]\s*cid:.*?[\'\"])\s*\)|si", + $content, $matches)){ $cidurl = $matches{1}; $httpurl = sq_cid2http($message, $id, $cidurl); - $content = preg_replace("|url\($cidurl\)|si", + $content = preg_replace("|url\s*\(\s*$cidurl\s*\)|si", "url($httpurl)", $content); } @@ -1231,12 +1462,13 @@ function sq_fixstyle($message, $id, $content){ * Fix stupid css declarations which lead to vulnerabilities * in IE. */ - $match = Array('/expression/si', - '/behaviou*r/si', - '/binding/si'); - $replace = Array('idiocy', 'idiocy', 'idiocy'); + $match = Array('/expression/i', + '/behaviou*r/i', + '/binding/i', + '/include-source/i'); + $replace = Array('idiocy', 'idiocy', 'idiocy', 'idiocy'); $content = preg_replace($match, $replace, $content); - return $content; + return array($content, $newpos); } /** @@ -1253,14 +1485,18 @@ function sq_cid2http($message, $id, $cidurl, $mailbox){ * Get rid of quotes. */ $quotchar = substr($cidurl, 0, 1); - $cidurl = str_replace($quotchar, "", $cidurl); + if ($quotchar == '"' || $quotchar == "'"){ + $cidurl = str_replace($quotchar, "", $cidurl); + } else { + $quotchar = ''; + } $cidurl = substr(trim($cidurl), 4); $linkurl = find_ent_id($cidurl, $message); /* in case of non-save cid links $httpurl should be replaced by a sort of unsave link image */ $httpurl = ''; if ($linkurl) { - $httpurl = $quotchar . '../src/download.php?absolute_dl=true&' . + $httpurl = $quotchar . SM_PATH . 'src/download.php?absolute_dl=true&' . "passed_id=$id&mailbox=" . urlencode($mailbox) . '&ent_id=' . $linkurl . $quotchar; } @@ -1271,10 +1507,13 @@ function sq_cid2http($message, $id, $cidurl, $mailbox){ * This function changes the tag into a
tag since we * can't really have a body-within-body. * - * @param $attary an array of attributes and values of - * @return a modified array of attributes to be set for
+ * @param $attary an array of attributes and values of + * @param $mailbox mailbox we're currently reading (for cid2http) + * @param $message current message (for cid2http) + * @param $id current message id (for cid2http) + * @return a modified array of attributes to be set for
*/ -function sq_body2div($attary){ +function sq_body2div($attary, $mailbox, $message, $id){ $me = 'sq_body2div'; $divattary = Array('class' => "'bodyclass'"); $bgcolor = '#ffffff'; @@ -1286,6 +1525,8 @@ function sq_body2div($attary){ $attvalue = str_replace($quotchar, "", $attvalue); switch ($attname){ case 'background': + $attvalue = sq_cid2http($message, $id, + $attvalue, $mailbox); $styledef .= "background-image: url('$attvalue'); "; break; case 'bgcolor': @@ -1336,10 +1577,11 @@ function sq_sanitize($body, $mailbox ){ $me = 'sq_sanitize'; + $rm_tags = array_shift($tag_list); /** * Normalize rm_tags and rm_tags_with_content. */ - @array_walk($rm_tags, 'sq_casenormalize'); + @array_walk($tag_list, 'sq_casenormalize'); @array_walk($rm_tags_with_content, 'sq_casenormalize'); @array_walk($self_closing_tags, 'sq_casenormalize'); /** @@ -1347,10 +1589,9 @@ function sq_sanitize($body, * false means remove these tags * true means allow these tags */ - $rm_tags = array_shift($tag_list); $curpos = 0; $open_tags = Array(); - $trusted = "\n"; + $trusted = "\n\n"; $skip_content = false; /** * Take care of netscape's stupid javascript entities like @@ -1358,18 +1599,21 @@ function sq_sanitize($body, */ $body = preg_replace("/&(\{.*?\};)/si", "&\\1", $body); - while (($curtag=sq_getnxtag($body, $curpos)) != FALSE){ + while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){ list($tagname, $attary, $tagtype, $lt, $gt) = $curtag; $free_content = substr($body, $curpos, $lt-$curpos); /** * Take care of . Edit the - * content before we apply it. - */ - $free_content = sq_fixstyle($message, $id, $free_content); + if ($tagname == "style" && $tagtype == 1){ + list($free_content, $curpos) = + sq_fixstyle($body, $gt+1, $message, $id); + if ($free_content != FALSE){ + $trusted .= sq_tagprint($tagname, $attary, $tagtype); + $trusted .= $free_content; + $trusted .= sq_tagprint($tagname, false, 2); + } + continue; } if ($skip_content == false){ $trusted .= $free_content; @@ -1386,13 +1630,12 @@ function sq_sanitize($body, if ($skip_content == false){ if ($tagname == "body"){ $tagname = "div"; + } + if (isset($open_tags{$tagname}) && + $open_tags{$tagname} > 0){ + $open_tags{$tagname}--; } else { - if (isset($open_tags{$tagname}) && - $open_tags{$tagname} > 0){ - $open_tags{$tagname}--; - } else { - $tagname = false; - } + $tagname = false; } } } @@ -1407,7 +1650,7 @@ function sq_sanitize($body, */ if ($tagtype == 1 && in_array($tagname, $self_closing_tags)){ - $tagtype=3; + $tagtype = 3; } /** * See if we should skip this tag and any content @@ -1423,6 +1666,14 @@ function sq_sanitize($body, !in_array($tagname, $tag_list))){ $tagname = false; } else { + /** + * Convert body into div. + */ + if ($tagname == "body"){ + $tagname = "div"; + $attary = sq_body2div($attary, $mailbox, + $message, $id); + } if ($tagtype == 1){ if (isset($open_tags{$tagname})){ $open_tags{$tagname}++; @@ -1444,13 +1695,6 @@ function sq_sanitize($body, $mailbox ); } - /** - * Convert body into div. - */ - if ($tagname == "body"){ - $tagname = "div"; - $attary = sq_body2div($attary, $message, $id); - } } } } @@ -1482,7 +1726,7 @@ function sq_sanitize($body, * @param $id the id of the message * @return a string with html safe to display in the browser. */ -function magicHTML($body, $id, $message, $mailbox = 'INBOX'){ +function magicHTML($body, $id, $message, $mailbox = 'INBOX') { global $attachment_common_show_images, $view_unsafe_images, $has_unsafe_images; /** @@ -1496,14 +1740,20 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX'){ "html", "head", "base", - "link" + "link", + "frame", + "iframe", + "plaintext", + "marquee" ); $rm_tags_with_content = Array( "script", "applet", "embed", - "title" + "title", + "frameset", + "xml" ); $self_closing_tags = Array( @@ -1513,15 +1763,16 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX'){ "input" ); - $force_tag_closing = false; + $force_tag_closing = true; $rm_attnames = Array( "/.*/" => Array( - "/target/si", - "/^on.*/si", - "/^dynsrc/si", - "/^data.*/si" + "/target/i", + "/^on.*/i", + "/^dynsrc/i", + "/^data.*/i", + "/^lowsrc.*/i" ) ); @@ -1532,7 +1783,6 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX'){ "/^src|background/i" => Array( Array( - "|^([\'\"])\s*\.\./.*([\'\"])|si", "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si", "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si", "/^([\'\"])\s*about\s*:.*([\'\"])/si" @@ -1547,41 +1797,44 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX'){ "/^href|action/i" => Array( Array( - "|^([\'\"])\s*\.\./.*([\'\"])|si", "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si", "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si", "/^([\'\"])\s*about\s*:.*([\'\"])/si" ), Array( - "\\1#\\2", - "\\1#\\2", - "\\1#\\2", - "\\1#\\2" + "\\1#\\1", + "\\1#\\1", + "\\1#\\1", + "\\1#\\1" ) ), - "/^style/si" => + "/^style/i" => Array( Array( - "/expression/si", - "/binding/si", - "/behaviou*r/si", - "|url\(([\'\"])\s*\.\./.*([\'\"])\)|si", - "/url\(([\'\"])\s*\S+script\s*:.*([\'\"])\)/si", - "/url\(([\'\"])\s*mocha\s*:.*([\'\"])\)/si", - "/url\(([\'\"])\s*about\s*:.*([\'\"])\)/si" + "/expression/i", + "/binding/i", + "/behaviou*r/i", + "/include-source/i", + "/url\s*\(\s*([\'\"])\s*\S+script\s*:.*([\'\"])\s*\)/si", + "/url\s*\(\s*([\'\"])\s*mocha\s*:.*([\'\"])\s*\)/si", + "/url\s*\(\s*([\'\"])\s*about\s*:.*([\'\"])\s*\)/si" ), Array( "idiocy", "idiocy", "idiocy", - "url(\\1#\\2)", - "url(\\1#\\2)", - "url(\\1#\\2)", - "url(\\1#\\2)" + "idiocy", + "url(\\1#\\1)", + "url(\\1#\\1)", + "url(\\1#\\1)", + "url(\\1#\\1)" ) ) ) ); + if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) { + $view_unsafe_images = false; + } if (!$view_unsafe_images){ /** * Remove any references to http/https if view_unsafe_images set @@ -1590,16 +1843,19 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX'){ array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0], '/^([\'\"])\s*https*:.*([\'\"])/si'); array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1], - "\\1$secremoveimg\\2"); - array_push($bad_attvals{'/.*/'}{'/^style/si'}[0], + "\\1$secremoveimg\\1"); + array_push($bad_attvals{'/.*/'}{'/^style/i'}[0], '/url\(([\'\"])\s*https*:.*([\'\"])\)/si'); - array_push($bad_attvals{'/.*/'}{'/^style/si'}[1], - "url(\\1$secremoveimg\\2)"); + array_push($bad_attvals{'/.*/'}{'/^style/i'}[1], + "url(\\1$secremoveimg\\1)"); } $add_attr_to_tag = Array( - "/^a$/si" => Array('target'=>'"_new"') - ); + "/^a$/i" => + Array('target'=>'"_new"', + 'title'=>'"'._("This external link will open in a new window").'"' + ) + ); $trusted = sq_sanitize($body, $tag_list, $rm_tags_with_content, @@ -1612,7 +1868,7 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX'){ $id, $mailbox ); - if (preg_match("|$secremoveimg|si", $trusted)){ + if (preg_match("|$secremoveimg|i", $trusted)){ $has_unsafe_images = true; } return $trusted;