X-Git-Url: https://vcs.fsf.org/?p=squirrelmail.git;a=blobdiff_plain;f=functions%2Fmime.php;h=00469acebd13974eff9707f46fbe376c1b9a8f13;hp=19fa5a83adde004d75fe329cae722b3a0134b4b2;hb=5a6fde9e7a1b7494f27a621d8944f7de968bb66d;hpb=6cc08d8b2f590379c5d0a9e2905245d2d2f407b4 diff --git a/functions/mime.php b/functions/mime.php index 19fa5a83..00469ace 100644 --- a/functions/mime.php +++ b/functions/mime.php @@ -1,29 +1,33 @@ \n\n" . - '
'; + /* removed urldecode because $_GET is auto urldecoded ??? */ + displayPageHeader( $color, $mailbox ); $errormessage = _("SquirrelMail could not decode the bodystructure of the message"); - $errormessage .= '
'._("the provided bodystructure by your imap-server").':

'; - $errormessage .= '
' . htmlspecialchars($read) . '
'; + $errormessage .= '
'._("the provided bodystructure by your imap-server").':

'; + $errormessage .= '
' . htmlspecialchars($read) . '
'; plain_error_message( $errormessage, $color ); echo ''; exit; } - $msg->setEnt('0'); if (count($flags)) { foreach ($flags as $flag) { $char = strtoupper($flag{1}); @@ -86,25 +88,28 @@ function mime_structure ($bodystructure, $flags=array()) { /* This starts the parsing of a particular structure. It is called recursively, - * so it can be passed different structures. It returns an object of type - * $message. - * First, it checks to see if it is a multipart message. If it is, then it - * handles that as it sees is necessary. If it is just a regular entity, - * then it parses it and adds the necessary header information (by calling out - * to mime_get_elements() - */ - -function mime_fetch_body($imap_stream, $id, $ent_id) { - global $uid_support; +* so it can be passed different structures. It returns an object of type +* $message. +* First, it checks to see if it is a multipart message. If it is, then it +* handles that as it sees is necessary. If it is just a regular entity, +* then it parses it and adds the necessary header information (by calling out +* to mime_get_elements() +*/ + +function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) { /* Do a bit of error correction. If we couldn't find the entity id, just guess - * that it is the first one. That is usually the case anyway. - */ + * that it is the first one. That is usually the case anyway. + */ + if (!$ent_id) { - $ent_id = 1; + $cmd = "FETCH $id BODY[]"; + } else { + $cmd = "FETCH $id BODY[$ent_id]"; } - $cmd = "FETCH $id BODY[$ent_id]"; - $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, $uid_support); + if ($fetch_size!=0) $cmd .= "<0.$fetch_size>"; + + $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, TRUE); do { $topline = trim(array_shift($data)); } while($topline && ($topline[0] == '*') && !preg_match('/\* [0-9]+ FETCH.*/i', $topline)) ; @@ -113,11 +118,11 @@ function mime_fetch_body($imap_stream, $id, $ent_id) { if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) { $ret = substr($wholemessage, 0, $regs[1]); /* There is some information in the content info header that could be important - * in order to parse html messages. Let's get them here. - */ - if ($ret{0} == '<') { - $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, $uid_support); - } + * in order to parse html messages. Let's get them here. + */ +// if ($ret{0} == '<') { +// $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, TRUE); +// } } else if (ereg('"([^"]*)"', $topline, $regs)) { $ret = $regs[1]; } else { @@ -132,18 +137,18 @@ function mime_fetch_body($imap_stream, $id, $ent_id) { '&message=' . urlencode($message) . '&topline=' . urlencode($topline); - echo '
' . - '' . - '' . - '" . - '" . - '" . - '" . - "
' . - _("Body retrieval error. The reason for this is most probably that the message is malformed.") . - '
' . _("Command:") . "$cmd
' . _("Response:") . "$response
' . _("Message:") . "$message
' . _("FETCH line:") . "$topline


"; - - $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message, $uid_support); + echo '
' . + '' . + '' . + '" . + '" . + '" . + '" . + "
' . + _("Body retrieval error. The reason for this is most probably that the message is malformed.") . + '
' . _("Command:") . "$cmd
' . _("Response:") . "$response
' . _("Message:") . "$message
' . _("FETCH line:") . "$topline


"; + + $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message, TRUE); array_shift($data); $wholemessage = implode('', $data); @@ -152,31 +157,45 @@ function mime_fetch_body($imap_stream, $id, $ent_id) { return $ret; } -function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) { - global $uid_support; - /* Do a bit of error correction. If we couldn't find the entity id, just guess - * that it is the first one. That is usually the case anyway. - */ - if (!$ent_id) { - $ent_id = 1; - } - $sid = sqimap_session_id($uid_support); +function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding) { + /* Don't kill the connection if the browser is over a dialup - * and it would take over 30 seconds to download it. - * DonĀ“t call set_time_limit in safe mode. - */ + * and it would take over 30 seconds to download it. + * Don't call set_time_limit in safe mode. + */ if (!ini_get('safe_mode')) { set_time_limit(0); } - if ($uid_support) { - $sid_s = substr($sid,0,strpos($sid, ' ')); + /* in case of base64 encoded attachments, do not buffer them. + Instead, echo the decoded attachment directly to screen */ + if (strtolower($encoding) == 'base64') { + if (!$ent_id) { + $query = "FETCH $id BODY[]"; + } else { + $query = "FETCH $id BODY[$ent_id]"; + } + sqimap_run_command($imap_stream,$query,true,$response,$message,TRUE,'sqimap_base64_decode','php://stdout',true); } else { - $sid_s = $sid; - } - $body = mime_fetch_body ($imap_stream, $id, $ent_id); echo decodeBody($body, $encoding); + } + + /* + TODO, use the same method for quoted printable. + However, I assume that quoted printable attachments aren't that large + so the performancegain / memory usage drop will be minimal. + If we decide to add that then we need to adapt sqimap_fread because + we need to split te result on \n and fread doesn't stop at \n. That + means we also should provide $results from sqimap_fread (by ref) to + te function and set $no_return to false. The $filter function for + quoted printable should handle unsetting of $results. + */ + /* + TODO 2: find out how we write to the output stream php://stdout. fwrite + doesn't work because 'php://stdout isn't a stream. + */ + return; /* fputs ($imap_stream, "$sid FETCH $id BODY[$ent_id]\r\n"); @@ -195,7 +214,7 @@ function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) { return; } else { echo decodeBody($read1, $encoding) . - decodeBody($read, $encoding); + decodeBody($read, $encoding); } } else if ($cnt) { echo decodeBody($read, $encoding); @@ -210,11 +229,11 @@ function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) { /* -[ END MIME DECODING ]----------------------------------------------------------- */ /* This is here for debugging purposes. It will print out a list - * of all the entity IDs that are in the $message object. - */ +* of all the entity IDs that are in the $message object. +*/ function listEntities ($message) { if ($message) { - echo "" . $message->entity_id . ' : ' . $message->type0 . '/' . $message->type1 . ' parent = '. $message->parent->entity_id. '
'; + echo "" . $message->entity_id . ' : ' . $message->type0 . '/' . $message->type1 . ' parent = '. $message->parent->entity_id. '
'; for ($i = 0; isset($message->entities[$i]); $i++) { echo "$i : "; $msg = listEntities($message->entities[$i]); @@ -260,8 +279,8 @@ function getEntity ($message, $ent_id) { } /* translateText - * Extracted from strings.php 23/03/2002 - */ +* Extracted from strings.php 23/03/2002 +*/ function translateText(&$body, $wrap_at, $charset) { global $where, $what; /* from searching */ @@ -273,7 +292,7 @@ function translateText(&$body, $wrap_at, $charset) { for ($i=0; $i < count($body_ary); $i++) { $line = $body_ary[$i]; if (strlen($line) - 2 >= $wrap_at) { - sqWordWrap($line, $wrap_at); + sqWordWrap($line, $wrap_at, $charset); } $line = charset_decode($charset, $line); $line = str_replace("\t", ' ', $line); @@ -295,16 +314,16 @@ function translateText(&$body, $wrap_at, $charset) { } } - if ($quotes > 1) { - if (!isset($color[14])) { - $color[14] = '#FF0000'; - } - $line = '' . $line . ''; - } elseif ($quotes) { + if ($quotes % 2) { if (!isset($color[13])) { $color[13] = '#800000'; } - $line = '' . $line . ''; + $line = '' . $line . ''; + } elseif ($quotes) { + if (!isset($color[14])) { + $color[14] = '#FF0000'; + } + $line = '' . $line . ''; } $body_ary[$i] = $line; @@ -312,28 +331,25 @@ function translateText(&$body, $wrap_at, $charset) { $body = '
' . implode("\n", $body_ary) . '
'; } - -/* This returns a parsed string called $body. That string can then - * be displayed as the actual message in the HTML. It contains - * everything needed, including HTML Tags, Attachments at the - * bottom, etc. - */ -function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $mailbox='INBOX') { +/** +* This returns a parsed string called $body. That string can then +* be displayed as the actual message in the HTML. It contains +* everything needed, including HTML Tags, Attachments at the +* bottom, etc. +* @param clean Do not output stuff that's irrelevant for the printable version. +*/ +function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $mailbox='INBOX', $clean=FALSE) { /* This if statement checks for the entity to show as the - * primary message. To add more of them, just put them in the - * order that is their priority. - */ - global $startMessage, $username, $key, $imapServerAddress, $imapPort, - $show_html_default, $has_unsafe_images, $sort; - - if ( !check_php_version(4,1) ) { - global $_GET; - } - if(isset($_GET['view_unsafe_images'])) { - $view_unsafe_images = $_GET['view_unsafe_images']; + * primary message. To add more of them, just put them in the + * order that is their priority. + */ + global $startMessage, $languages, $squirrelmail_language, + $show_html_default, $sort, $has_unsafe_images, $passed_ent_id; + + if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) { + $view_unsafe_images = false; } - $has_unsafe_images= 0; $body = ''; $urlmailbox = urlencode($mailbox); $body_message = getEntity($message, $ent_num); @@ -341,54 +357,78 @@ function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $ma ($body_message->header->type0 == 'rfc822')) { $body = mime_fetch_body ($imap_stream, $id, $ent_num); $body = decodeBody($body, $body_message->header->encoding); + + if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && + function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode')) { + if (mb_detect_encoding($body) != 'ASCII') { + $body = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode',$body); + } + } $hookResults = do_hook("message_body", $body); $body = $hookResults[1]; /* If there are other types that shouldn't be formatted, add - * them here. - */ + * them here. + */ if ($body_message->header->type1 == 'html') { if ($show_html_default <> 1) { $entity_conv = array(' ' => ' ', - '

' => "\n", - '
' => "\n", - '

' => "\n", - '
' => "\n", - '>' => '>', - '<' => '<'); + '

' => "\n", + '

' => "\n", + '
' => "\n", + '
' => "\n", + '
' => "\n", + '
' => "\n", + '>' => '>', + '<' => '<'); $body = strtr($body, $entity_conv); $body = strip_tags($body); $body = trim($body); translateText($body, $wrap_at, - $body_message->header->getParameter('charset')); + $body_message->header->getParameter('charset')); } else { $body = magicHTML($body, $id, $message, $mailbox); } } else { translateText($body, $wrap_at, - $body_message->header->getParameter('charset')); + $body_message->header->getParameter('charset')); + } + + // if this is the clean display (i.e. printer friendly), stop here. + if ( $clean ) { + return $body; } - if ($has_unsafe_images) { - if ($view_unsafe_images) { - $untext = '">' . _("Hide Unsafe Images"); + $link = 'passed_id=' . $id . '&ent_id='.$ent_num. + '&mailbox=' . $urlmailbox .'&sort=' . $sort . + '&startMessage=' . $startMessage . '&show_more=0'; + if (isset($passed_ent_id)) { + $link .= '&passed_ent_id='.$passed_ent_id; + } + $body .= '

' . _("Download this as a file") . ''; + if ($view_unsafe_images) { + $text = _("Hide Unsafe Images"); + } else { + if (isset($has_unsafe_images) && $has_unsafe_images) { + $link .= '&view_unsafe_images=1'; + $text = _("View Unsafe Images"); } else { - $untext = '&view_unsafe_images=1">' . _("View Unsafe Images"); + $text = ''; } - $body .= '
' . $text . ''; + } + $body .= '

' . "\n"; } return $body; } function formatAttachments($message, $exclude_id, $mailbox, $id) { - global $where, $what, $startMessage, $color; - static $ShownHTML = 0; + global $where, $what, $startMessage, $color, $passed_ent_id; $att_ar = $message->getAttachments($exclude_id); @@ -399,56 +439,33 @@ function formatAttachments($message, $exclude_id, $mailbox, $id) { $urlMailbox = urlencode($mailbox); foreach ($att_ar as $att) { - $ent = urldecode($att->entity_id); + $ent = $att->entity_id; $header = $att->header; $type0 = strtolower($header->type0); $type1 = strtolower($header->type1); $name = ''; - $links['download link']['text'] = _("download"); - $links['download link']['href'] = - "../src/download.php?absolute_dl=true&passed_id=$id&mailbox=$urlMailbox&ent_id=$ent"; - $ImageURL = ''; + $links['download link']['text'] = _("Download"); + $links['download link']['href'] = SM_PATH . + "src/download.php?absolute_dl=true&passed_id=$id&mailbox=$urlMailbox&ent_id=$ent"; if ($type0 =='message' && $type1 == 'rfc822') { - $default_page = '../src/read_body.php'; + $default_page = SM_PATH . 'src/read_body.php'; $rfc822_header = $att->rfc822_header; - $filename = decodeHeader($rfc822_header->subject); + $filename = $rfc822_header->subject; if (trim( $filename ) == '') { $filename = 'untitled-[' . $ent . ']' ; - } + } $from_o = $rfc822_header->from; if (is_object($from_o)) { - $from_name = $from_o->getAddress(false); + $from_name = decodeHeader($from_o->getAddress(false)); } else { $from_name = _("Unknown sender"); } - $from_name = decodeHeader(htmlspecialchars($from_name)); $description = $from_name; } else { - $default_page = '../src/download.php'; - if (is_object($header->disposition)) { - $filename = decodeHeader($header->disposition->getProperty('filename')); - if (trim($filename) == '') { - $name = decodeHeader($header->disposition->getProperty('name')); - if (trim($name) == '') { - if (trim( $header->id ) == '') { - $filename = 'untitled-[' . $ent . ']' ; - } else { - $filename = 'cid: ' . $header->id; - } - } else { - $filename = $name; - } - } - } else { - if (trim( $header->id ) == '') { - $filename = 'untitled-[' . $ent . ']' ; - } else { - $filename = 'cid: ' . $header->id; - } - } - + $default_page = SM_PATH . 'src/download.php'; + $filename = $att->getFilename(); if ($header->description) { - $description = htmlspecialchars($header->description); + $description = decodeHeader($header->description); } else { $description = ''; } @@ -461,35 +478,36 @@ function formatAttachments($message, $exclude_id, $mailbox, $id) { $passed_ent_id_link = ''; } $defaultlink = $default_page . "?startMessage=$startMessage" - . "&passed_id=$id&mailbox=$urlMailbox" - . '&ent_id='.$ent.$passed_ent_id_link.'&absolute_dl=true'; + . "&passed_id=$id&mailbox=$urlMailbox" + . '&ent_id='.$ent.$passed_ent_id_link; if ($where && $what) { - $defaultlink .= '&where='. urlencode($where).'&what='.urlencode($what); + $defaultlink .= '&where='. urlencode($where).'&what='.urlencode($what); } + /* This executes the attachment hook with a specific MIME-type. - * If that doesn't have results, it tries if there's a rule - * for a more generic type. - */ + * If that doesn't have results, it tries if there's a rule + * for a more generic type. + */ $hookresults = do_hook("attachment $type0/$type1", $links, - $startMessage, $id, $urlMailbox, $ent, $defaultlink, - $display_filename, $where, $what); + $startMessage, $id, $urlMailbox, $ent, $defaultlink, + $display_filename, $where, $what); if(count($hookresults[1]) <= 1) { $hookresults = do_hook("attachment $type0/*", $links, - $startMessage, $id, $urlMailbox, $ent, $defaultlink, - $display_filename, $where, $what); + $startMessage, $id, $urlMailbox, $ent, $defaultlink, + $display_filename, $where, $what); } $links = $hookresults[1]; $defaultlink = $hookresults[6]; - $attachments .= '' . - "$display_filename " . - '' . show_readable_size($header->size) . - '  ' . - "[ $type0/$type1 ] " . - ''; + $attachments .= '' . + ''.decodeHeader($display_filename).' ' . + '' . show_readable_size($header->size) . + '  ' . + '[ '.htmlspecialchars($type0).'/'.htmlspecialchars($type1).' ] ' . + ''; $attachments .= '' . $description . ''; - $attachments .= ' '; + $attachments .= ' '; $skipspaces = 1; foreach ($links as $val) { @@ -501,20 +519,55 @@ function formatAttachments($message, $exclude_id, $mailbox, $id) { $attachments .= '' . $val['text'] . ''; } unset($links); - $attachments .= "\n"; + $attachments .= "\n"; } + $attachmentadd = do_hook_function('attachments_bottom',$attachments); + if ($attachmentadd != '') + $attachments = $attachmentadd; return $attachments; } +function sqimap_base64_decode(&$string) { + + // Base64 encoded data goes in pairs of 4 bytes. To achieve on the + // fly decoding (to reduce memory usage) you have to check if the + // data has incomplete pairs + + // Remove the noise in order to check if the 4 bytes pairs are complete + $string = str_replace(array("\r\n","\n", "\r", " "),array('','','',''),$string); + + $sStringRem = ''; + $iMod = strlen($string) % 4; + if ($iMod) { + $sStringRem = substr($string,-$iMod); + // Check if $sStringRem contains padding characters + if (substr($sStringRem,-1) != '=') { + $string = substr($string,0,-$iMod); + } else { + $sStringRem = ''; + } + } + $string = base64_decode($string); + return $sStringRem; +} + + /* This function decodes the body depending on the encoding type. */ function decodeBody($body, $encoding) { - global $languages, $squirrelmail_language; global $show_html_default; $body = str_replace("\r\n", "\n", $body); $encoding = strtolower($encoding); - if ($encoding == 'quoted-printable' || + $encoding_handler = do_hook_function('decode_body', $encoding); + + + // plugins get first shot at decoding the body + // + if (!empty($encoding_handler) && function_exists($encoding_handler)) { + $body = $encoding_handler('decode', $body); + + } else if ($encoding == 'quoted-printable' || $encoding == 'quoted_printable') { $body = quoted_printable_decode($body); @@ -526,106 +579,280 @@ function decodeBody($body, $encoding) { $body = base64_decode($body); } - if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && - function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { - $body = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $body); - } - // All other encodings are returned raw. return $body; } -/* - * This functions decode strings that is encoded according to - * RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text). - * Patched by Christian Schmidt 23/03/2002 - */ -function decodeHeader ($string, $utfencode=true) { - global $languages, $squirrelmail_language; +/** +* Decodes headers +* +* This functions decode strings that is encoded according to +* RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text). +* Patched by Christian Schmidt 23/03/2002 +* +* @param string $string header string that has to be made readable +* @param boolean $utfencode change message in order to be readable on user's charset. defaults to true +* @param boolean $htmlsave preserve spaces and sanitize html special characters. defaults to true +* @param boolean $decide decide if string can be utfencoded. defaults to false +* @return string decoded header string +*/ +function decodeHeader ($string, $utfencode=true,$htmlsave=true,$decide=false) { + global $languages, $squirrelmail_language,$default_charset; if (is_array($string)) { $string = implode("\n", $string); } if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && - function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { - $string = $languages[$squirrelmail_language]['XTRA_CODE']('decodeheader', $string); + function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_decodeheader')) { + $string = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_decodeheader', $string); + // Do we need to return at this point? + // return $string; } - $i = 0; - while (preg_match('/^(.{' . $i . '})(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=/Ui', - $string, $res)) { - $prefix = $res[1]; - /* Ignore white-space between consecutive encoded-words. */ - if (strspn($res[2], " \t") != strlen($res[2])) { - $prefix .= $res[2]; + $iLastMatch = -2; + $encoded = true; + + $aString = explode(' ',$string); + $ret = ''; + foreach ($aString as $chunk) { + if ($encoded && $chunk === '') { + continue; + } elseif ($chunk === '') { + $ret .= ' '; + continue; } + $encoded = false; + /* if encoded words are not separated by a linear-space-white we still catch them */ + $j = $i-1; + + while ($match = preg_match('/^(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=(.*)$/Ui',$chunk,$res)) { + /* if the last chunk isn't an encoded string then put back the space, otherwise don't */ + if ($iLastMatch !== $j) { + if ($htmlsave) { + $ret .= ' '; + } else { + $ret .= ' '; + } + } + $iLastMatch = $i; + $j = $i; + if ($htmlsave) { + $ret .= htmlspecialchars($res[1]); + } else { + $ret .= $res[1]; + } + $encoding = ucfirst($res[3]); - if (ucfirst($res[4]) == 'B') { - $replace = base64_decode($res[5]); - } else { - $replace = str_replace('_', ' ', $res[5]); - $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))', + /* decide about valid decoding */ + if ($decide && is_conversion_safe($res[2])) { + $utfencode=true; + $can_be_encoded=true; + } else { + $can_be_encoded=false; + } + switch ($encoding) + { + case 'B': + $replace = base64_decode($res[4]); + if ($utfencode) { + if ($can_be_encoded) { + /* convert string to different charset, + * if functions asks for it (usually in compose) + */ + $ret .= charset_convert($res[2],$replace,$default_charset); + } else { + // convert string to html codes in order to display it + $ret .= charset_decode($res[2],$replace); + } + } else { + if ($htmlsave) { + $replace = htmlspecialchars($replace); + } + $ret.= $replace; + } + break; + case 'Q': + $replace = str_replace('_', ' ', $res[4]); + $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))', $replace); - /* Only encode into entities by default. Some places - * don't need the encoding, like the compose form. - */ - if ($utfencode) { - $replace = charset_decode($res[3], $replace); + if ($utfencode) { + if ($can_be_encoded) { + /* convert string to different charset, + * if functions asks for it (usually in compose) + */ + $replace = charset_convert($res[2], $replace,$default_charset); + } else { + // convert string to html codes in order to display it + $replace = charset_decode($res[2], $replace); + } + } else { + if ($htmlsave) { + $replace = htmlspecialchars($replace); + } + } + $ret .= $replace; + break; + default: + break; } + $chunk = $res[5]; + $encoded = true; } - $string = $prefix . $replace . substr($string, strlen($res[0])); - $i = strlen($prefix) + strlen($replace); + if (!$encoded) { + if ($htmlsave) { + $ret .= ' '; + } else { + $ret .= ' '; + } + } + + if (!$encoded && $htmlsave) { + $ret .= htmlspecialchars($chunk); + } else { + $ret .= $chunk; + } + ++$i; } - return $string; + /* remove the first added space */ + if ($ret) { + if ($htmlsave) { + $ret = substr($ret,5); + } else { + $ret = substr($ret,1); + } + } + + return $ret; } -/* - * Encode a string according to RFC 1522 for use in headers if it - * contains 8-bit characters or anything that looks like it should - * be encoded. - */ +/** +* Encodes header as quoted-printable +* +* Encode a string according to RFC 1522 for use in headers if it +* contains 8-bit characters or anything that looks like it should +* be encoded. +* +* @param string $string header string, that has to be encoded +* @return string quoted-printable encoded string +*/ function encodeHeader ($string) { global $default_charset, $languages, $squirrelmail_language; if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && - function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { - return $languages[$squirrelmail_language]['XTRA_CODE']('encodeheader', $string); + function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_encodeheader')) { + return call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_encodeheader', $string); } // Encode only if the string contains 8-bit characters or =? $j = strlen($string); - $l = strstr($string, '=?'); // Must be encoded ? + $max_l = 75 - strlen($default_charset) - 7; + $aRet = array(); $ret = ''; + $iEncStart = $enc_init = false; + $cur_l = $iOffset = 0; for($i = 0; $i < $j; ++$i) { - switch($string{$i}) { - case '=': - $ret .= '=3D'; - break; - case '?': - $ret .= '=3F'; - break; - case '_': - $ret .= '=5F'; - break; - case ' ': - $ret .= '_'; - break; - default: - $k = ord($string{$i}); - if ($k > 126) { - $ret .= sprintf("=%02X", $k); - $l = TRUE; + switch($string{$i}) + { + case '=': + case '<': + case '>': + case ',': + case '?': + case '_': + if ($iEncStart === false) { + $iEncStart = $i; + } + $cur_l+=3; + if ($cur_l > ($max_l-2)) { + /* if there is an stringpart that doesn't need encoding, add it */ + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + $iEncStart = false; + } else { + $ret .= sprintf("=%02X",ord($string{$i})); + } + break; + case '(': + case ')': + if ($iEncStart !== false) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + $iEncStart = false; + } + break; + case ' ': + if ($iEncStart !== false) { + $cur_l++; + if ($cur_l > $max_l) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + $iEncStart = false; } else { - $ret .= $string{$i}; + $ret .= '_'; } - break; + } + break; + default: + $k = ord($string{$i}); + if ($k > 126) { + if ($iEncStart === false) { + // do not start encoding in the middle of a string, also take the rest of the word. + $sLeadString = substr($string,0,$i); + $aLeadString = explode(' ',$sLeadString); + $sToBeEncoded = array_pop($aLeadString); + $iEncStart = $i - strlen($sToBeEncoded); + $ret .= $sToBeEncoded; + $cur_l += strlen($sToBeEncoded); + } + $cur_l += 3; + /* first we add the encoded string that reached it's max size */ + if ($cur_l > ($max_l-2)) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?= "; /* the next part is also encoded => separate by space */ + $cur_l = 3; + $ret = ''; + $iOffset = $i; + $iEncStart = $i; + } + $enc_init = true; + $ret .= sprintf("=%02X", $k); + } else { + if ($iEncStart !== false) { + $cur_l++; + if ($cur_l > $max_l) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iEncStart = false; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + } else { + $ret .= $string{$i}; + } + } + } + break; } } - if ($l) { - $string = "=?$default_charset?Q?$ret?="; + if ($enc_init) { + if ($iEncStart !== false) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + } else { + $aRet[] = substr($string,$iOffset); + } + $string = implode('',$aRet); } - return $string; } @@ -638,7 +865,16 @@ function find_ent_id($id, $message) { if (strcasecmp($message->entities[$i]->header->id, $id) == 0) { // if (sq_check_save_extension($message->entities[$i])) { return $message->entities[$i]->entity_id; -// } +// } + } elseif (!empty($message->entities[$i]->header->parameters['name'])) { + /** + * This is part of a fix for Outlook Express 6.x generating + * cid URLs without creating content-id headers + * @@JA - 20050207 + */ + if (strcasecmp($message->entities[$i]->header->parameters['name'], $id) == 0) { + return $message->entities[$i]->entity_id; + } } } } @@ -654,19 +890,55 @@ function sq_check_save_extension($message) { /** - ** HTMLFILTER ROUTINES - */ +** HTMLFILTER ROUTINES +*/ + +/** +* This function is more or less a wrapper around stripslashes. Apparently +* Explorer is stupid enough to just remove the backslashes and then +* execute the content of the attribute as if nothing happened. +* Who does that? +* +* @param attvalue The value of the attribute +* @return attvalue The value of the attribute stripslashed. +*/ +function sq_unbackslash($attvalue){ + /** + * Remove any backslashes. See if there are any first. + */ + + if (strstr($attvalue, '\\') !== false){ + $attvalue = stripslashes($attvalue); + } + return $attvalue; +} /** - * This function returns the final tag out of the tag name, an array - * of attributes, and the type of the tag. This function is called by - * sq_sanitize internally. - * - * @param $tagname the name of the tag. - * @param $attary the array of attributes and their values - * @param $tagtype The type of the tag (see in comments). - * @return a string with the final tag representation. - */ +* Kill any tabs, newlines, or carriage returns. Our friends the +* makers of the browser with 95% market value decided that it'd +* be funny to make "java[tab]script" be just as good as "javascript". +* +* @param attvalue The attribute value before extraneous spaces removed. +* @return attvalue The attribute value after extraneous spaces removed. +*/ +function sq_unspace($attvalue){ + if (strcspn($attvalue, "\t\r\n") != strlen($attvalue)){ + $attvalue = str_replace(Array("\t", "\r", "\n"), Array('', '', ''), + $attvalue); + } + return $attvalue; +} + +/** +* This function returns the final tag out of the tag name, an array +* of attributes, and the type of the tag. This function is called by +* sq_sanitize internally. +* +* @param $tagname the name of the tag. +* @param $attary the array of attributes and their values +* @param $tagtype The type of the tag (see in comments). +* @return a string with the final tag representation. +*/ function sq_tagprint($tagname, $attary, $tagtype){ $me = 'sq_tagprint'; @@ -690,26 +962,26 @@ function sq_tagprint($tagname, $attary, $tagtype){ } /** - * A small helper function to use with array_walk. Modifies a by-ref - * value and makes it lowercase. - * - * @param $val a value passed by-ref. - * @return void since it modifies a by-ref value. - */ +* A small helper function to use with array_walk. Modifies a by-ref +* value and makes it lowercase. +* +* @param $val a value passed by-ref. +* @return void since it modifies a by-ref value. +*/ function sq_casenormalize(&$val){ $val = strtolower($val); } /** - * This function skips any whitespace from the current position within - * a string and to the next non-whitespace value. - * - * @param $body the string - * @param $offset the offset within the string where we should start - * looking for the next non-whitespace character. - * @return the location within the $body where the next - * non-whitespace char is located. - */ +* This function skips any whitespace from the current position within +* a string and to the next non-whitespace value. +* +* @param $body the string +* @param $offset the offset within the string where we should start +* looking for the next non-whitespace character. +* @return the location within the $body where the next +* non-whitespace char is located. +*/ function sq_skipspace($body, $offset){ $me = 'sq_skipspace'; preg_match('/^(\s*)/s', substr($body, $offset), $matches); @@ -721,16 +993,16 @@ function sq_skipspace($body, $offset){ } /** - * This function looks for the next character within a string. It's - * really just a glorified "strpos", except it catches if failures - * nicely. - * - * @param $body The string to look for needle in. - * @param $offset Start looking from this position. - * @param $needle The character/string to look for. - * @return location of the next occurance of the needle, or - * strlen($body) if needle wasn't found. - */ +* This function looks for the next character within a string. It's +* really just a glorified "strpos", except it catches if failures +* nicely. +* +* @param $body The string to look for needle in. +* @param $offset Start looking from this position. +* @param $needle The character/string to look for. +* @return location of the next occurance of the needle, or +* strlen($body) if needle wasn't found. +*/ function sq_findnxstr($body, $offset, $needle){ $me = 'sq_findnxstr'; $pos = strpos($body, $needle, $offset); @@ -741,24 +1013,24 @@ function sq_findnxstr($body, $offset, $needle){ } /** - * This function takes a PCRE-style regexp and tries to match it - * within the string. - * - * @param $body The string to look for needle in. - * @param $offset Start looking from here. - * @param $reg A PCRE-style regex to match. - * @return Returns a false if no matches found, or an array - * with the following members: - * - integer with the location of the match within $body - * - string with whatever content between offset and the match - * - string with whatever it is we matched - */ +* This function takes a PCRE-style regexp and tries to match it +* within the string. +* +* @param $body The string to look for needle in. +* @param $offset Start looking from here. +* @param $reg A PCRE-style regex to match. +* @return Returns a false if no matches found, or an array +* with the following members: +* - integer with the location of the match within $body +* - string with whatever content between offset and the match +* - string with whatever it is we matched +*/ function sq_findnxreg($body, $offset, $reg){ $me = 'sq_findnxreg'; $matches = Array(); $retarr = Array(); - preg_match("%^(.*?)($reg)%s", substr($body, $offset), $matches); - if (!$matches{0}){ + preg_match("%^(.*?)($reg)%si", substr($body, $offset), $matches); + if (!isset($matches{0}) || !$matches{0}){ $retarr = false; } else { $retarr{0} = $offset + strlen($matches{1}); @@ -769,19 +1041,19 @@ function sq_findnxreg($body, $offset, $reg){ } /** - * This function looks for the next tag. - * - * @param $body String where to look for the next tag. - * @param $offset Start looking from here. - * @return false if no more tags exist in the body, or - * an array with the following members: - * - string with the name of the tag - * - array with attributes and their values - * - integer with tag type (1, 2, or 3) - * - integer where the tag starts (starting "<") - * - integer where the tag ends (ending ">") - * first three members will be false, if the tag is invalid. - */ +* This function looks for the next tag. +* +* @param $body String where to look for the next tag. +* @param $offset Start looking from here. +* @return false if no more tags exist in the body, or +* an array with the following members: +* - string with the name of the tag +* - array with attributes and their values +* - integer with tag type (1, 2, or 3) +* - integer where the tag starts (starting "<") +* - integer where the tag ends (ending ">") +* first three members will be false, if the tag is invalid. +*/ function sq_getnxtag($body, $offset){ $me = 'sq_getnxtag'; if ($offset > strlen($body)){ @@ -792,23 +1064,23 @@ function sq_getnxtag($body, $offset){ return false; } /** - * We are here: - * blah blah - * \---------^ - */ + * We are here: + * blah blah + * \---------^ + */ $pos = sq_skipspace($body, $lt+1); if ($pos >= strlen($body)){ return Array(false, false, false, $lt, strlen($body)); } /** - * There are 3 kinds of tags: - * 1. Opening tag, e.g.: - * - * 2. Closing tag, e.g.: - * - * 3. XHTML-style content-less tag, e.g.: - * - */ + * There are 3 kinds of tags: + * 1. Opening tag, e.g.: + * + * 2. Closing tag, e.g.: + * + * 3. XHTML-style content-less tag, e.g.: + * + */ $tagtype = false; switch (substr($body, $pos, 1)){ case '/': @@ -817,8 +1089,8 @@ function sq_getnxtag($body, $offset){ break; case '!': /** - * A comment or an SGML declaration. - */ + * A comment or an SGML declaration. + */ if (substr($body, $pos+1, 2) == "--"){ $gt = strpos($body, "-->", $pos); if ($gt === false){ @@ -834,18 +1106,17 @@ function sq_getnxtag($body, $offset){ break; default: /** - * Assume tagtype 1 for now. If it's type 3, we'll switch values - * later. - */ + * Assume tagtype 1 for now. If it's type 3, we'll switch values + * later. + */ $tagtype = 1; break; } - $tag_start = $pos; $tagname = ''; /** - * Look for next [\W-_], which will indicate the end of the tag name. - */ + * Look for next [\W-_], which will indicate the end of the tag name. + */ $regary = sq_findnxreg($body, $pos, "[^\w\-_]"); if ($regary == false){ return Array(false, false, false, $lt, strlen($body)); @@ -854,20 +1125,20 @@ function sq_getnxtag($body, $offset){ $tagname = strtolower($tagname); /** - * $match can be either of these: - * '>' indicating the end of the tag entirely. - * '\s' indicating the end of the tag name. - * '/' indicating that this is type-3 xhtml tag. - * - * Whatever else we find there indicates an invalid tag. - */ + * $match can be either of these: + * '>' indicating the end of the tag entirely. + * '\s' indicating the end of the tag name. + * '/' indicating that this is type-3 xhtml tag. + * + * Whatever else we find there indicates an invalid tag. + */ switch ($match){ case '/': /** - * This is an xhtml-style tag with a closing / at the - * end, like so: . Check if it's followed - * by the closing bracket. If not, then this tag is invalid - */ + * This is an xhtml-style tag with a closing / at the + * end, like so: . Check if it's followed + * by the closing bracket. If not, then this tag is invalid + */ if (substr($body, $pos, 2) == "/>"){ $pos++; $tagtype = 3; @@ -881,46 +1152,45 @@ function sq_getnxtag($body, $offset){ break; default: /** - * Check if it's whitespace - */ + * Check if it's whitespace + */ if (!preg_match('/\s/', $match)){ /** - * This is an invalid tag! Look for the next closing ">". - */ - $gt = sq_findnxstr($body, $offset, ">"); + * This is an invalid tag! Look for the next closing ">". + */ + $gt = sq_findnxstr($body, $lt, ">"); return Array(false, false, false, $lt, $gt); } break; } /** - * At this point we're here: - * - * \-------^ - * - * At this point we loop in order to find all attributes. - */ + * At this point we're here: + * + * \-------^ + * + * At this point we loop in order to find all attributes. + */ $attname = ''; - $atttype = false; $attary = Array(); while ($pos <= strlen($body)){ $pos = sq_skipspace($body, $pos); if ($pos == strlen($body)){ /** - * Non-closed tag. - */ + * Non-closed tag. + */ return Array(false, false, false, $lt, $pos); } /** - * See if we arrived at a ">" or "/>", which means that we reached - * the end of the tag. - */ + * See if we arrived at a ">" or "/>", which means that we reached + * the end of the tag. + */ $matches = Array(); if (preg_match("%^(\s*)(>|/>)%s", substr($body, $pos), $matches)) { /** - * Yep. So we did. - */ + * Yep. So we did. + */ $pos += strlen($matches{1}); if ($matches{2} == "/>"){ $tagtype = 3; @@ -930,46 +1200,46 @@ function sq_getnxtag($body, $offset){ } /** - * There are several types of attributes, with optional - * [:space:] between members. - * Type 1: - * attrname[:space:]=[:space:]'CDATA' - * Type 2: - * attrname[:space:]=[:space:]"CDATA" - * Type 3: - * attr[:space:]=[:space:]CDATA - * Type 4: - * attrname - * - * We leave types 1 and 2 the same, type 3 we check for - * '"' and convert to """ if needed, then wrap in - * double quotes. Type 4 we convert into: - * attrname="yes". - */ - $regary = sq_findnxreg($body, $pos, "[^\w\-_]"); + * There are several types of attributes, with optional + * [:space:] between members. + * Type 1: + * attrname[:space:]=[:space:]'CDATA' + * Type 2: + * attrname[:space:]=[:space:]"CDATA" + * Type 3: + * attr[:space:]=[:space:]CDATA + * Type 4: + * attrname + * + * We leave types 1 and 2 the same, type 3 we check for + * '"' and convert to """ if needed, then wrap in + * double quotes. Type 4 we convert into: + * attrname="yes". + */ + $regary = sq_findnxreg($body, $pos, "[^:\w\-_]"); if ($regary == false){ /** - * Looks like body ended before the end of tag. - */ + * Looks like body ended before the end of tag. + */ return Array(false, false, false, $lt, strlen($body)); } list($pos, $attname, $match) = $regary; $attname = strtolower($attname); /** - * We arrived at the end of attribute name. Several things possible - * here: - * '>' means the end of the tag and this is attribute type 4 - * '/' if followed by '>' means the same thing as above - * '\s' means a lot of things -- look what it's followed by. - * anything else means the attribute is invalid. - */ + * We arrived at the end of attribute name. Several things possible + * here: + * '>' means the end of the tag and this is attribute type 4 + * '/' if followed by '>' means the same thing as above + * '\s' means a lot of things -- look what it's followed by. + * anything else means the attribute is invalid. + */ switch($match){ case '/': /** - * This is an xhtml-style tag with a closing / at the - * end, like so: . Check if it's followed - * by the closing bracket. If not, then this tag is invalid - */ + * This is an xhtml-style tag with a closing / at the + * end, like so: . Check if it's followed + * by the closing bracket. If not, then this tag is invalid + */ if (substr($body, $pos, 2) == "/>"){ $pos++; $tagtype = 3; @@ -984,27 +1254,27 @@ function sq_getnxtag($body, $offset){ break; default: /** - * Skip whitespace and see what we arrive at. - */ + * Skip whitespace and see what we arrive at. + */ $pos = sq_skipspace($body, $pos); $char = substr($body, $pos, 1); /** - * Two things are valid here: - * '=' means this is attribute type 1 2 or 3. - * \w means this was attribute type 4. - * anything else we ignore and re-loop. End of tag and - * invalid stuff will be caught by our checks at the beginning - * of the loop. - */ + * Two things are valid here: + * '=' means this is attribute type 1 2 or 3. + * \w means this was attribute type 4. + * anything else we ignore and re-loop. End of tag and + * invalid stuff will be caught by our checks at the beginning + * of the loop. + */ if ($char == "="){ $pos++; $pos = sq_skipspace($body, $pos); /** - * Here are 3 possibilities: - * "'" attribute type 1 - * '"' attribute type 2 - * everything else is the content of tag type 3 - */ + * Here are 3 possibilities: + * "'" attribute type 1 + * '"' attribute type 2 + * everything else is the content of tag type 3 + */ $quot = substr($body, $pos, 1); if ($quot == "'"){ $regary = sq_findnxreg($body, $pos+1, "\'"); @@ -1024,28 +1294,28 @@ function sq_getnxtag($body, $offset){ $attary{$attname} = '"' . $attval . '"'; } else { /** - * These are hateful. Look for \s, or >. - */ + * These are hateful. Look for \s, or >. + */ $regary = sq_findnxreg($body, $pos, "[\s>]"); if ($regary == false){ return Array(false, false, false, $lt, strlen($body)); } list($pos, $attval, $match) = $regary; /** - * If it's ">" it will be caught at the top. - */ + * If it's ">" it will be caught at the top. + */ $attval = preg_replace("/\"/s", """, $attval); $attary{$attname} = '"' . $attval . '"'; } } else if (preg_match("|[\w/>]|", $char)) { /** - * That was attribute type 4. - */ + * That was attribute type 4. + */ $attary{$attname} = '"yes"'; } else { /** - * An illegal character. Find next '>' and return. - */ + * An illegal character. Find next '>' and return. + */ $gt = sq_findnxstr($body, $pos, ">"); return Array(false, false, false, $lt, $gt); } @@ -1053,53 +1323,54 @@ function sq_getnxtag($body, $offset){ } } /** - * The fact that we got here indicates that the tag end was never - * found. Return invalid tag indication so it gets stripped. - */ + * The fact that we got here indicates that the tag end was never + * found. Return invalid tag indication so it gets stripped. + */ return Array(false, false, false, $lt, strlen($body)); } /** - * This function checks attribute values for entity-encoded values - * and returns them translated into 8-bit strings so we can run - * checks on them. - * - * @param $attvalue A string to run entity check against. - * @return Translated value. - */ +* This function checks attribute values for entity-encoded values +* and returns them translated into 8-bit strings so we can run +* checks on them. +* +* @param $attvalue A string to run entity check against. +* @return Translated value. +*/ + function sq_deent($attvalue){ $me = 'sq_deent'; /** - * See if we have to run the checks first. All entities must start - * with "&". - */ - if (strpos($attvalue, "&") === false){ + * See if we have to run the checks first. All entities must start + * with "&". + */ + if (strpos($attvalue, '&') === false){ return $attvalue; } /** - * Check named entities first. - */ + * Check named entities first. + */ $trans = get_html_translation_table(HTML_ENTITIES); /** - * Leave " in, as it can mess us up. - */ + * Leave " in, as it can mess us up. + */ $trans = array_flip($trans); - unset($trans{"""}); + unset($trans{'"'}); while (list($ent, $val) = each($trans)){ - $attvalue = preg_replace("/$ent*(\W)/si", "$val\\1", $attvalue); + $attvalue = preg_replace('/' . $ent . '*/si', $val, $attvalue); } /** - * Now translate numbered entities from 1 to 255 if needed. - */ - if (strpos($attvalue, "#") !== false){ + * Now translate numbered entities from 1 to 255 if needed. + */ + if (strpos($attvalue, '#') !== false){ $omit = Array(34, 39); - for ($asc=1; $asc<256; $asc++){ + for ($asc = 256; $asc >= 0; $asc--){ if (!in_array($asc, $omit)){ $chr = chr($asc); - $attvalue = preg_replace("/\�*$asc;*(\D)/si", "$chr\\1", - $attvalue); - $attvalue = preg_replace("/\�*".dechex($asc).";*(\W)/si", - "$chr\\1", $attvalue); + $octrule = '/\�*' . $asc . ';*/si'; + $hexrule = '/\�*' . dechex($asc) . ';*/si'; + $attvalue = preg_replace($octrule, $chr, $attvalue); + $attvalue = preg_replace($hexrule, $chr, $attvalue); } } } @@ -1107,19 +1378,19 @@ function sq_deent($attvalue){ } /** - * This function runs various checks against the attributes. - * - * @param $tagname String with the name of the tag. - * @param $attary Array with all tag attributes. - * @param $rm_attnames See description for sq_sanitize - * @param $bad_attvals See description for sq_sanitize - * @param $add_attr_to_tag See description for sq_sanitize - * @param $message message object - * @param $id message id - * @return Array with modified attributes. - */ -function sq_fixatts($tagname, - $attary, +* This function runs various checks against the attributes. +* +* @param $tagname String with the name of the tag. +* @param $attary Array with all tag attributes. +* @param $rm_attnames See description for sq_sanitize +* @param $bad_attvals See description for sq_sanitize +* @param $add_attr_to_tag See description for sq_sanitize +* @param $message message object +* @param $id message id +* @return Array with modified attributes. +*/ +function sq_fixatts($tagname, + $attary, $rm_attnames, $bad_attvals, $add_attr_to_tag, @@ -1130,8 +1401,8 @@ function sq_fixatts($tagname, $me = 'sq_fixatts'; while (list($attname, $attvalue) = each($attary)){ /** - * See if this attribute should be removed. - */ + * See if this attribute should be removed. + */ foreach ($rm_attnames as $matchtag=>$matchattrs){ if (preg_match($matchtag, $tagname)){ foreach ($matchattrs as $matchattr){ @@ -1143,27 +1414,35 @@ function sq_fixatts($tagname, } } /** - * Remove any entities. - */ + * Remove any backslashes, entities, and extraneous whitespace. + */ + $attvalue = sq_unbackslash($attvalue); $attvalue = sq_deent($attvalue); + $attvalue = sq_unspace($attvalue); /** - * Now let's run checks on the attvalues. - * I don't expect anyone to comprehend this. If you do, - * get in touch with me so I can drive to where you live and - * shake your hand personally. :) - */ + * Remove \r \n \t \0 " " "\\" + */ + $attvalue = str_replace(Array("\r", "\n", "\t", "\0", " ", "\\"), + Array('', '','','','',''), $attvalue); + + /** + * Now let's run checks on the attvalues. + * I don't expect anyone to comprehend this. If you do, + * get in touch with me so I can drive to where you live and + * shake your hand personally. :) + */ foreach ($bad_attvals as $matchtag=>$matchattrs){ if (preg_match($matchtag, $tagname)){ foreach ($matchattrs as $matchattr=>$valary){ if (preg_match($matchattr, $attname)){ /** - * There are two arrays in valary. - * First is matches. - * Second one is replacements - */ + * There are two arrays in valary. + * First is matches. + * Second one is replacements + */ list($valmatch, $valrepl) = $valary; - $newvalue = + $newvalue = preg_replace($valmatch, $valrepl, $attvalue); if ($newvalue != $attvalue){ $attary{$attname} = $newvalue; @@ -1172,16 +1451,38 @@ function sq_fixatts($tagname, } } } + + /** - * Turn cid: urls into http-friendly ones. + * Replace empty src tags with the blank image. src is only used + * for frames, images, and image inputs. Doing a replace should + * not affect them working as should be, however it will stop + * IE from being kicked off when src for img tags are not set */ + if (($attname == 'src') && ($attvalue == '""')) { + $attary{$attname} = '"' . SM_PATH . 'images/blank.png"'; + } + + /** + * Turn cid: urls into http-friendly ones. + */ if (preg_match("/^[\'\"]\s*cid:/si", $attvalue)){ $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox); } + + /** + * "Hack" fix for Outlook using propriatary outbind:// protocol in img tags. + * One day MS might actually make it match something useful, for now, falling + * back to using cid2http, so we can grab the blank.png. + */ + if (preg_match("/^[\'\"]\s*outbind:\/\//si", $attvalue)) { + $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox); + } + } /** - * See if we need to append any attributes to this tag. - */ + * See if we need to append any attributes to this tag. + */ foreach ($add_attr_to_tag as $matchtag=>$addattary){ if (preg_match($matchtag, $tagname)){ $attary = array_merge($attary, $addattary); @@ -1191,102 +1492,144 @@ function sq_fixatts($tagname, } /** - * This function edits the style definition to make them friendly and - * usable in squirrelmail. - * - * @param $message the message object - * @param $id the message id - * @param $content a string with whatever is between - * @return a string with edited content. - */ -function sq_fixstyle($message, $id, $content){ +* This function edits the style definition to make them friendly and +* usable in SquirrelMail. +* +* @param $message the message object +* @param $id the message id +* @param $content a string with whatever is between +* @param $mailbox the message mailbox +* @return a string with edited content. +*/ +function sq_fixstyle($body, $pos, $message, $id, $mailbox){ global $view_unsafe_images; $me = 'sq_fixstyle'; + $ret = sq_findnxreg($body, $pos, ''); + if ($ret == FALSE){ + return array(FALSE, strlen($body)); + } + $newpos = $ret[0] + strlen($ret[2]); + $content = $ret[1]; /** - * First look for general BODY style declaration, which would be - * like so: - * body {background: blah-blah} - * and change it to .bodyclass so we can just assign it to a
- */ + * First look for general BODY style declaration, which would be + * like so: + * body {background: blah-blah} + * and change it to .bodyclass so we can just assign it to a
+ */ $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content); $secremoveimg = '../images/' . _("sec_remove_eng.png"); /** - * Fix url('blah') declarations. - */ - $content = preg_replace("|url\(([\'\"])\s*\S+script\s*:.*?([\'\"])\)|si", + * Fix url('blah') declarations. + */ + $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si", "url(\\1$secremoveimg\\2)", $content); /** - * Fix url('https*://.*) declarations but only if $view_unsafe_images - * is false. - */ + * Fix url('https*://.*) declarations but only if $view_unsafe_images + * is false. + */ if (!$view_unsafe_images){ - $content = preg_replace("|url\(([\'\"])\s*https*:.*?([\'\"])\)|si", + $content = preg_replace("|url\s*\(\s*([\'\"])\s*https*:.*?([\'\"])\s*\)|si", "url(\\1$secremoveimg\\2)", $content); } - + /** - * Fix urls that refer to cid: - */ - while (preg_match("|url\(([\'\"]\s*cid:.*?[\'\"])\)|si", $content, - $matches)){ + * Fix urls that refer to cid: + */ + while (preg_match("|url\s*\(\s*([\'\"]\s*cid:.*?[\'\"])\s*\)|si", + $content, $matches)){ $cidurl = $matches{1}; - $httpurl = sq_cid2http($message, $id, $cidurl); - $content = preg_replace("|url\($cidurl\)|si", + $httpurl = sq_cid2http($message, $id, $cidurl, $mailbox); + $content = preg_replace("|url\s*\(\s*$cidurl\s*\)|si", "url($httpurl)", $content); } /** - * Fix stupid css declarations which lead to vulnerabilities - * in IE. - */ - $match = Array('/expression/si', - '/behaviou*r/si', - '/binding/si'); - $replace = Array('idiocy', 'idiocy', 'idiocy'); + * Fix stupid css declarations which lead to vulnerabilities + * in IE. + */ + $match = Array('/expression/i', + '/behaviou*r/i', + '/binding/i', + '/include-source/i'); + $replace = Array('idiocy', 'idiocy', 'idiocy', 'idiocy'); $content = preg_replace($match, $replace, $content); - return $content; + return array($content, $newpos); } /** - * This function converts cid: url's into the ones that can be viewed in - * the browser. - * - * @param $message the message object - * @param $id the message id - * @param $cidurl the cid: url. - * @return a string with a http-friendly url - */ +* This function converts cid: url's into the ones that can be viewed in +* the browser. +* +* @param $message the message object +* @param $id the message id +* @param $cidurl the cid: url. +* @param $mailbox the message mailbox +* @return a string with a http-friendly url +*/ function sq_cid2http($message, $id, $cidurl, $mailbox){ /** - * Get rid of quotes. - */ + * Get rid of quotes. + */ $quotchar = substr($cidurl, 0, 1); - $cidurl = str_replace($quotchar, "", $cidurl); + if ($quotchar == '"' || $quotchar == "'"){ + $cidurl = str_replace($quotchar, "", $cidurl); + } else { + $quotchar = ''; + } $cidurl = substr(trim($cidurl), 4); $linkurl = find_ent_id($cidurl, $message); /* in case of non-save cid links $httpurl should be replaced by a sort of - unsave link image */ + unsave link image */ $httpurl = ''; - if ($linkurl) { - $httpurl = $quotchar . '../src/download.php?absolute_dl=true&' . + + /** + * This is part of a fix for Outlook Express 6.x generating + * cid URLs without creating content-id headers. These images are + * not part of the multipart/related html mail. The html contains + * references to + * attached images with as goal to render them inline although + * the attachment disposition property is not inline. + **/ + + if (empty($linkurl)) { + if (preg_match('/{.*}\//', $cidurl)) { + $cidurl = preg_replace('/{.*}\//','', $cidurl); + if (!empty($cidurl)) { + $linkurl = find_ent_id($cidurl, $message); + } + } + } + + if (!empty($linkurl)) { + $httpurl = $quotchar . SM_PATH . 'src/download.php?absolute_dl=true&' . "passed_id=$id&mailbox=" . urlencode($mailbox) . '&ent_id=' . $linkurl . $quotchar; + } else { + /** + * If we couldn't generate a proper img url, drop in a blank image + * instead of sending back empty, otherwise it causes unusual behaviour + */ + $httpurl = $quotechar . SM_PATH . 'images/blank.png'; } + return $httpurl; } /** - * This function changes the tag into a
tag since we - * can't really have a body-within-body. - * - * @param $attary an array of attributes and values of - * @return a modified array of attributes to be set for
- */ -function sq_body2div($attary){ +* This function changes the tag into a
tag since we +* can't really have a body-within-body. +* +* @param $attary an array of attributes and values of +* @param $mailbox mailbox we're currently reading (for cid2http) +* @param $message current message (for cid2http) +* @param $id current message id (for cid2http) +* @return a modified array of attributes to be set for
+*/ +function sq_body2div($attary, $mailbox, $message, $id){ $me = 'sq_body2div'; $divattary = Array('class' => "'bodyclass'"); - $bgcolor = '#ffffff'; $text = '#000000'; + $has_bgc_stl = $has_txt_stl = false; $styledef = ''; if (is_array($attary) && sizeof($attary) > 0){ foreach ($attary as $attname=>$attvalue){ @@ -1294,16 +1637,25 @@ function sq_body2div($attary){ $attvalue = str_replace($quotchar, "", $attvalue); switch ($attname){ case 'background': + $attvalue = sq_cid2http($message, $id, + $attvalue, $mailbox); $styledef .= "background-image: url('$attvalue'); "; break; case 'bgcolor': + $has_bgc_stl = true; $styledef .= "background-color: $attvalue; "; break; case 'text': + $has_txt_stl = true; $styledef .= "color: $attvalue; "; break; } } + // Outlook defines a white bgcolor and no text color. This can lead to + // white text on a white bg with certain themes. + if ($has_bgc_stl && !$has_txt_stl) { + $styledef .= "color: $text; "; + } if (strlen($styledef) > 0){ $divattary{"style"} = "\"$styledef\""; } @@ -1312,72 +1664,75 @@ function sq_body2div($attary){ } /** - * This is the main function and the one you should actually be calling. - * There are several variables you should be aware of an which need - * special description. - * - * Since the description is quite lengthy, see it here: - * http://www.mricon.com/html/phpfilter.html - * - * @param $body the string with HTML you wish to filter - * @param $tag_list see description above - * @param $rm_tags_with_content see description above - * @param $self_closing_tags see description above - * @param $force_tag_closing see description above - * @param $rm_attnames see description above - * @param $bad_attvals see description above - * @param $add_attr_to_tag see description above - * @param $message message object - * @param $id message id - * @return sanitized html safe to show on your pages. - */ -function sq_sanitize($body, - $tag_list, - $rm_tags_with_content, - $self_closing_tags, - $force_tag_closing, - $rm_attnames, - $bad_attvals, - $add_attr_to_tag, - $message, - $id, - $mailbox - ){ +* This is the main function and the one you should actually be calling. +* There are several variables you should be aware of an which need +* special description. +* +* Since the description is quite lengthy, see it here: +* http://linux.duke.edu/projects/mini/htmlfilter/ +* +* @param $body the string with HTML you wish to filter +* @param $tag_list see description above +* @param $rm_tags_with_content see description above +* @param $self_closing_tags see description above +* @param $force_tag_closing see description above +* @param $rm_attnames see description above +* @param $bad_attvals see description above +* @param $add_attr_to_tag see description above +* @param $message message object +* @param $id message id +* @return sanitized html safe to show on your pages. +*/ +function sq_sanitize($body, + $tag_list, + $rm_tags_with_content, + $self_closing_tags, + $force_tag_closing, + $rm_attnames, + $bad_attvals, + $add_attr_to_tag, + $message, + $id, + $mailbox + ){ $me = 'sq_sanitize'; + $rm_tags = array_shift($tag_list); /** - * Normalize rm_tags and rm_tags_with_content. - */ - @array_walk($rm_tags, 'sq_casenormalize'); + * Normalize rm_tags and rm_tags_with_content. + */ + @array_walk($tag_list, 'sq_casenormalize'); @array_walk($rm_tags_with_content, 'sq_casenormalize'); @array_walk($self_closing_tags, 'sq_casenormalize'); /** - * See if tag_list is of tags to remove or tags to allow. - * false means remove these tags - * true means allow these tags - */ - $rm_tags = array_shift($tag_list); + * See if tag_list is of tags to remove or tags to allow. + * false means remove these tags + * true means allow these tags + */ $curpos = 0; $open_tags = Array(); - $trusted = "\n"; + $trusted = "\n\n"; $skip_content = false; /** - * Take care of netscape's stupid javascript entities like - * &{alert('boo')}; - */ + * Take care of netscape's stupid javascript entities like + * &{alert('boo')}; + */ $body = preg_replace("/&(\{.*?\};)/si", "&\\1", $body); - while (($curtag=sq_getnxtag($body, $curpos)) != FALSE){ + while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){ list($tagname, $attary, $tagtype, $lt, $gt) = $curtag; $free_content = substr($body, $curpos, $lt-$curpos); /** - * Take care of . Edit the - * content before we apply it. - */ - $free_content = sq_fixstyle($message, $id, $free_content); + * Take care of