X-Git-Url: https://vcs.fsf.org/?a=blobdiff_plain;f=functions%2Fmime.php;h=202028ee3840cf3499757d2dd6ec953020ed5bf4;hb=fd81e884c200a3e6e2933c6c810adaf35cd8448b;hp=90b8b37a0a8e6e7e4d137fc63a5e9b1d76175209;hpb=cba164a0429d0be0b895bf254d9d7f8ce16015c1;p=squirrelmail.git diff --git a/functions/mime.php b/functions/mime.php index 90b8b37a..a65e5619 100644 --- a/functions/mime.php +++ b/functions/mime.php @@ -1,750 +1,1980 @@ entities[] = $msg; - } - } - - - - /* --------------------------------------------------------------------------------- */ - /* MIME DECODING */ - /* --------------------------------------------------------------------------------- */ - - // This function gets the structure of a message and stores it in the "message" class. - // It will return this object for use with all relevant header information and - // fully parsed into the standard "message" object format. - function mime_structure ($imap_stream, $header) { - global $debug_mime; - sqimap_messages_flag ($imap_stream, $header->id, $header->id, "Seen"); - - $id = $header->id; - fputs ($imap_stream, "a001 FETCH $id BODYSTRUCTURE\r\n"); - // - // This should use sqimap_read_data instead of reading it itself - // - $read = fgets ($imap_stream, 10000); - $response = substr($read, 0, 4); - $bodystructure = ""; - while ($response != "a001") { - $bodystructure .= $read; - $read = fgets ($imap_stream, 10000); - $response = substr($read, 0, 4); - } - $read = $bodystructure; - - if ($debug_mime) echo "$read

\n"; - // isolate the body structure and remove beginning and end parenthesis - $read = trim(substr ($read, strpos(strtolower($read), "bodystructure") + 13)); - $read = trim(substr ($read, 0, -1)); - $end = mime_match_parenthesis(0, $read); - while ($end == strlen($read)-1) { - $read = trim(substr ($read, 0, -1)); - $read = trim(substr ($read, 1)); - $end = mime_match_parenthesis(0, $read); - } - - if ($debug_mime) echo "$read

\n"; - - $msg = mime_parse_structure ($read, 0); - $msg->header = $header; - return $msg; - } - - // this starts the parsing of a particular structure. It is called recursively, - // so it can be passed different structures. It returns an object of type - // $message. - // First, it checks to see if it is a multipart message. If it is, then it - // handles that as it sees is necessary. If it is just a regular entity, - // then it parses it and adds the necessary header information (by calling out - // to mime_get_elements() - function mime_parse_structure ($structure, $ent_id) { - global $debug_mime; - if ($debug_mime) echo "START: mime_parse_structure()
\n"; - $msg = new message(); - if (substr($structure, 0, 1) == "(") { - $ent_id = mime_new_element_level($ent_id); - $start = $end = -1; - if ($debug_mime) echo "
$structure
"; - do { - if ($debug_mime) echo "Found entity...
"; - $start = $end+1; - $end = mime_match_parenthesis ($start, $structure); - - $element = substr($structure, $start+1, ($end - $start)-1); - $ent_id = mime_increment_id ($ent_id); - $newmsg = mime_parse_structure ($element, $ent_id); - $msg->addEntity ($newmsg); - } while (substr($structure, $end+1, 1) == "("); - } else { - // parse the elements - if ($debug_mime) echo "
$structure
"; - $msg = mime_get_element ($structure, $msg, $ent_id); - if ($debug_mime) echo "
"; - } - return $msg; - if ($debug_mime) echo "  END: mime_parse_structure()
"; - } - - // Increments the element ID. An element id can look like any of - // the following: 1, 1.2, 4.3.2.4.1, etc. This function increments - // the last number of the element id, changing 1.2 to 1.3. - function mime_increment_id ($id) { - global $debug_mime; - if (strpos($id, ".")) { - $first = substr($id, 0, strrpos($id, ".")); - $last = substr($id, strrpos($id, ".")+1); - $last++; - $new = $first . "." .$last; - } else { - $new = $id + 1; - } - if ($debug_mime) echo "INCREMENT: $new
"; - return $new; - } - - // See comment for mime_increment_id(). - // This adds another level on to the entity_id changing 1.3 to 1.3.0 - // NOTE: 1.3.0 is not a valid element ID. It MUST be incremented - // before it can be used. I left it this way so as not to have - // to make a special case if it is the first entity_id. It - // always increments it, and that works fine. - function mime_new_element_level ($id) { - if (!$id) $id = 0; - else $id = $id . ".0"; - - return $id; - } - - function mime_get_element (&$structure, $msg, $ent_id) { - global $debug_mime; - $elem_num = 1; - $msg->header = new msg_header(); - $msg->header->entity_id = $ent_id; - $properties = array(); - - while (strlen($structure) > 0) { - $structure = trim($structure); - $char = substr($structure, 0, 1); - - if (strtolower(substr($structure, 0, 3)) == "nil") { - $text = ""; - $structure = substr($structure, 3); - } else if ($char == "\"") { - // loop through until we find the matching quote, and return that as a string - $pos = 1; - $char = substr($structure, $pos, 1); - $text = ""; - while ($char != "\"" && $pos < strlen($structure)) { - $text .= $char; - $pos++; - $char = substr($structure, $pos, 1); - } - $structure = substr($structure, strlen($text) + 2); - } else if ($char == "(") { - // comment me - $end = mime_match_parenthesis (0, $structure); - $sub = substr($structure, 1, $end-1); - $properties = mime_get_props($properties, $sub); - $structure = substr($structure, strlen($sub) + 2); - } else { - // loop through until we find a space or an end parenthesis - $pos = 0; - $char = substr($structure, $pos, 1); - $text = ""; - while ($char != " " && $char != ")" && $pos < strlen($structure)) { - $text .= $char; - $pos++; - $char = substr($structure, $pos, 1); - } - $structure = substr($structure, strlen($text)); - } - if ($debug_mime) echo "$elem_num : $text
"; - - // This is where all the text parts get put into the header - switch ($elem_num) { - case 1: - $msg->header->type0 = strtolower($text); - if ($debug_mime) echo "type0 = ".strtolower($text)."
"; - break; - case 2: - $msg->header->type1 = strtolower($text); - if ($debug_mime) echo "type1 = ".strtolower($text)."
"; - break; - case 5: - $msg->header->description = $text; - if ($debug_mime) echo "description = $text
"; - break; - case 6: - $msg->header->encoding = strtolower($text); - if ($debug_mime) echo "encoding = ".strtolower($text)."
"; - break; - case 7: - $msg->header->size = $text; - if ($debug_mime) echo "size = $text
"; - break; + +/** + * mime.php + * + * Copyright (c) 1999-2003 The SquirrelMail Project Team + * Licensed under the GNU GPL. For full terms see the file COPYING. + * + * This contains the functions necessary to detect and decode MIME + * messages. + * + * $Id$ + * @package squirrelmail + */ + +/** The typical includes... */ +require_once(SM_PATH . 'functions/imap.php'); +require_once(SM_PATH . 'functions/attachment_common.php'); + +/* -------------------------------------------------------------------------- */ +/* MIME DECODING */ +/* -------------------------------------------------------------------------- */ + +/** + * Get the MIME structure + * + * This function gets the structure of a message and stores it in the "message" class. + * It will return this object for use with all relevant header information and + * fully parsed into the standard "message" object format. + */ +function mime_structure ($bodystructure, $flags=array()) { + + /* Isolate the body structure and remove beginning and end parenthesis. */ + $read = trim(substr ($bodystructure, strpos(strtolower($bodystructure), 'bodystructure') + 13)); + $read = trim(substr ($read, 0, -1)); + $i = 0; + $msg = Message::parseStructure($read,$i); + if (!is_object($msg)) { + include_once(SM_PATH . 'functions/display_messages.php'); + global $color, $mailbox; + /* removed urldecode because $_GET is auto urldecoded ??? */ + displayPageHeader( $color, $mailbox ); + $errormessage = _("SquirrelMail could not decode the bodystructure of the message"); + $errormessage .= '
'._("the provided bodystructure by your imap-server").':

'; + $errormessage .= '
' . htmlspecialchars($read) . '
'; + plain_error_message( $errormessage, $color ); + echo ''; + exit; + } + if (count($flags)) { + foreach ($flags as $flag) { + $char = strtoupper($flag{1}); + switch ($char) { + case 'S': + if (strtolower($flag) == '\\seen') { + $msg->is_seen = true; + } + break; + case 'A': + if (strtolower($flag) == '\\answered') { + $msg->is_answered = true; + } + break; + case 'D': + if (strtolower($flag) == '\\deleted') { + $msg->is_deleted = true; + } + break; + case 'F': + if (strtolower($flag) == '\\flagged') { + $msg->is_flagged = true; + } + break; + case 'M': + if (strtolower($flag) == '$mdnsent') { + $msg->is_mdnsent = true; + } + break; + default: + break; + } + } + } + // listEntities($msg); + return $msg; +} + + + +/* This starts the parsing of a particular structure. It is called recursively, + * so it can be passed different structures. It returns an object of type + * $message. + * First, it checks to see if it is a multipart message. If it is, then it + * handles that as it sees is necessary. If it is just a regular entity, + * then it parses it and adds the necessary header information (by calling out + * to mime_get_elements() + */ + +function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) { + global $uid_support; + /* Do a bit of error correction. If we couldn't find the entity id, just guess + * that it is the first one. That is usually the case anyway. + */ + + if (!$ent_id) { + $cmd = "FETCH $id BODY[]"; + } else { + $cmd = "FETCH $id BODY[$ent_id]"; + } + + if ($fetch_size!=0) $cmd .= "<0.$fetch_size>"; + + $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, $uid_support); + do { + $topline = trim(array_shift($data)); + } while($topline && ($topline[0] == '*') && !preg_match('/\* [0-9]+ FETCH.*/i', $topline)) ; + + $wholemessage = implode('', $data); + if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) { + $ret = substr($wholemessage, 0, $regs[1]); + /* There is some information in the content info header that could be important + * in order to parse html messages. Let's get them here. + */ +// if ($ret{0} == '<') { +// $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, $uid_support); +// } + } else if (ereg('"([^"]*)"', $topline, $regs)) { + $ret = $regs[1]; + } else { + global $where, $what, $mailbox, $passed_id, $startMessage; + $par = 'mailbox=' . urlencode($mailbox) . '&passed_id=' . $passed_id; + if (isset($where) && isset($what)) { + $par .= '&where=' . urlencode($where) . '&what=' . urlencode($what); + } else { + $par .= '&startMessage=' . $startMessage . '&show_more=0'; + } + $par .= '&response=' . urlencode($response) . + '&message=' . urlencode($message) . + '&topline=' . urlencode($topline); + + echo '
' . + '' . + '' . + '" . + '" . + '" . + '" . + "
' . + _("Body retrieval error. The reason for this is most probably that the message is malformed.") . + '
' . _("Command:") . "$cmd
' . _("Response:") . "$response
' . _("Message:") . "$message
' . _("FETCH line:") . "$topline


"; + + $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message, $uid_support); + array_shift($data); + $wholemessage = implode('', $data); + + $ret = $wholemessage; + } + return $ret; +} + +function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding) { + global $uid_support; + + /* Don't kill the connection if the browser is over a dialup + * and it would take over 30 seconds to download it. + * Dont call set_time_limit in safe mode. + */ + + if (!ini_get('safe_mode')) { + set_time_limit(0); + } + /* in case of base64 encoded attachments, do not buffer them. + Instead, echo the decoded attachment directly to screen */ + if (strtolower($encoding) == 'base64') { + if (!$ent_id) { + $query = "FETCH $id BODY[]"; + } else { + $query = "FETCH $id BODY[$ent_id]"; + } + sqimap_run_command($imap_stream,$query,true,$response,$message,$uid_support,'sqimap_base64_decode','php://stdout',true); + } else { + $body = mime_fetch_body ($imap_stream, $id, $ent_id); + echo decodeBody($body, $encoding); + } + + /* + TODO, use the same method for quoted printable. + However, I assume that quoted printable attachments aren't that large + so the performancegain / memory usage drop will be minimal. + If we decide to add that then we need to adapt sqimap_fread because + we need to split te result on \n and fread doesn't stop at \n. That + means we also should provide $results from sqimap_fread (by ref) to + te function and set $no_return to false. The $filter function for + quoted printable should handle unsetting of $results. + */ + /* + TODO 2: find out how we write to the output stream php://stdout. fwrite + doesn't work because 'php://stdout isn't a stream. + */ + + return; +/* + fputs ($imap_stream, "$sid FETCH $id BODY[$ent_id]\r\n"); + $cnt = 0; + $continue = true; + $read = fgets ($imap_stream,8192); + + + // This could be bad -- if the section has sqimap_session_id() . ' OK' + // or similar, it will kill the download. + while (!ereg("^".$sid_s." (OK|BAD|NO)(.*)$", $read, $regs)) { + if (trim($read) == ')==') { + $read1 = $read; + $read = fgets ($imap_stream,4096); + if (ereg("^".$sid." (OK|BAD|NO)(.*)$", $read, $regs)) { + return; + } else { + echo decodeBody($read1, $encoding) . + decodeBody($read, $encoding); + } + } else if ($cnt) { + echo decodeBody($read, $encoding); + } + $read = fgets ($imap_stream,4096); + $cnt++; +// break; + } +*/ +} + +/* -[ END MIME DECODING ]----------------------------------------------------------- */ + +/* This is here for debugging purposes. It will print out a list + * of all the entity IDs that are in the $message object. + */ +function listEntities ($message) { + if ($message) { + echo "" . $message->entity_id . ' : ' . $message->type0 . '/' . $message->type1 . ' parent = '. $message->parent->entity_id. '
'; + for ($i = 0; isset($message->entities[$i]); $i++) { + echo "$i : "; + $msg = listEntities($message->entities[$i]); + + if ($msg) { + echo "return: "; + return $msg; + } + } + } +} + +function getPriorityStr($priority) { + $priority_level = substr($priority,0,1); + + switch($priority_level) { + /* Check for a higher then normal priority. */ + case '1': + case '2': + $priority_string = _("High"); + break; + + /* Check for a lower then normal priority. */ + case '4': + case '5': + $priority_string = _("Low"); + break; + + /* Check for a normal priority. */ + case '3': + default: + $priority_level = '3'; + $priority_string = _("Normal"); + break; + + } + return $priority_string; +} + +/* returns a $message object for a particular entity id */ +function getEntity ($message, $ent_id) { + return $message->getEntity($ent_id); +} + +/* translateText + * Extracted from strings.php 23/03/2002 + */ + +function translateText(&$body, $wrap_at, $charset) { + global $where, $what; /* from searching */ + global $color; /* color theme */ + + require_once(SM_PATH . 'functions/url_parser.php'); + + $body_ary = explode("\n", $body); + for ($i=0; $i < count($body_ary); $i++) { + $line = $body_ary[$i]; + if (strlen($line) - 2 >= $wrap_at) { + sqWordWrap($line, $wrap_at); + } + $line = charset_decode($charset, $line); + $line = str_replace("\t", ' ', $line); + + parseUrl ($line); + + $quotes = 0; + $pos = 0; + $j = strlen($line); + + while ($pos < $j) { + if ($line[$pos] == ' ') { + $pos++; + } else if (strpos($line, '>', $pos) === $pos) { + $pos += 4; + $quotes++; + } else { + break; + } + } + + if ($quotes % 2) { + if (!isset($color[13])) { + $color[13] = '#800000'; + } + $line = '' . $line . ''; + } elseif ($quotes) { + if (!isset($color[14])) { + $color[14] = '#FF0000'; + } + $line = '' . $line . ''; + } + + $body_ary[$i] = $line; + } + $body = '
' . implode("\n", $body_ary) . '
'; +} + +/* This returns a parsed string called $body. That string can then + * be displayed as the actual message in the HTML. It contains + * everything needed, including HTML Tags, Attachments at the + * bottom, etc. + */ +function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $mailbox='INBOX') { + /* This if statement checks for the entity to show as the + * primary message. To add more of them, just put them in the + * order that is their priority. + */ + global $startMessage, $username, $key, $imapServerAddress, $imapPort, + $show_html_default, $sort, $has_unsafe_images, $passed_ent_id; + global $languages, $squirrelmail_language; + + if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) { + $view_unsafe_images = false; + } + + $body = ''; + $urlmailbox = urlencode($mailbox); + $body_message = getEntity($message, $ent_num); + if (($body_message->header->type0 == 'text') || + ($body_message->header->type0 == 'rfc822')) { + $body = mime_fetch_body ($imap_stream, $id, $ent_num); + $body = decodeBody($body, $body_message->header->encoding); + + if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && + function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { + if (mb_detect_encoding($body) != 'ASCII') { + $body = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $body); + } + } + $hookResults = do_hook("message_body", $body); + $body = $hookResults[1]; + + /* If there are other types that shouldn't be formatted, add + * them here. + */ + + if ($body_message->header->type1 == 'html') { + if ($show_html_default <> 1) { + $entity_conv = array(' ' => ' ', + '

' => "\n", + '
' => "\n", + '

' => "\n", + '
' => "\n", + '>' => '>', + '<' => '<'); + $body = strtr($body, $entity_conv); + $body = strip_tags($body); + $body = trim($body); + translateText($body, $wrap_at, + $body_message->header->getParameter('charset')); + } else { + $body = magicHTML($body, $id, $message, $mailbox); + } + } else { + translateText($body, $wrap_at, + $body_message->header->getParameter('charset')); + } + $link = 'passed_id=' . $id . '&ent_id='.$ent_num. + '&mailbox=' . $urlmailbox .'&sort=' . $sort . + '&startMessage=' . $startMessage . '&show_more=0'; + if (isset($passed_ent_id)) { + $link .= '&passed_ent_id='.$passed_ent_id; + } + $body .= '

' . _("Download this as a file") . ''; + if ($view_unsafe_images) { + $text = _("Hide Unsafe Images"); + } else { + if (isset($has_unsafe_images) && $has_unsafe_images) { + $link .= '&view_unsafe_images=1'; + $text = _("View Unsafe Images"); + } else { + $text = ''; + } + } + if($text != '') { + $body .= ' | ' . $text . ''; + } + $body .= '

' . "\n"; + } + return $body; +} + + +function formatAttachments($message, $exclude_id, $mailbox, $id) { + global $where, $what, $startMessage, $color; + static $ShownHTML = 0; + + $att_ar = $message->getAttachments($exclude_id); + + if (!count($att_ar)) return ''; + + $attachments = ''; + + $urlMailbox = urlencode($mailbox); + + foreach ($att_ar as $att) { + $ent = $att->entity_id; + $header = $att->header; + $type0 = strtolower($header->type0); + $type1 = strtolower($header->type1); + $name = ''; + $links['download link']['text'] = _("download"); + $links['download link']['href'] = SM_PATH . + "src/download.php?absolute_dl=true&passed_id=$id&mailbox=$urlMailbox&ent_id=$ent"; + $ImageURL = ''; + if ($type0 =='message' && $type1 == 'rfc822') { + $default_page = SM_PATH . 'src/read_body.php'; + $rfc822_header = $att->rfc822_header; + $filename = $rfc822_header->subject; + if (trim( $filename ) == '') { + $filename = 'untitled-[' . $ent . ']' ; + } + $from_o = $rfc822_header->from; + if (is_object($from_o)) { + $from_name = decodeHeader($from_o->getAddress(false)); + } else { + $from_name = _("Unknown sender"); + } + $description = $from_name; + } else { + $default_page = SM_PATH . 'src/download.php'; + if (is_object($header->disposition)) { + $filename = $header->disposition->getProperty('filename'); + if (trim($filename) == '') { + $name = decodeHeader($header->disposition->getProperty('name')); + if (trim($name) == '') { + $name = $header->getParameter('name'); + if(trim($name) == '') { + if (trim( $header->id ) == '') { + $filename = 'untitled-[' . $ent . ']' ; + } else { + $filename = 'cid: ' . $header->id; + } + } else { + $filename = $name; + } + } else { + $filename = $name; + } + } + } else { + $filename = $header->getParameter('name'); + if (!trim($filename)) { + if (trim( $header->id ) == '') { + $filename = 'untitled-[' . $ent . ']' ; + } else { + $filename = 'cid: ' . $header->id; + } + } + } + if ($header->description) { + $description = decodeHeader($header->description); + } else { + $description = ''; + } + } + + $display_filename = $filename; + if (isset($passed_ent_id)) { + $passed_ent_id_link = '&passed_ent_id='.$passed_ent_id; + } else { + $passed_ent_id_link = ''; + } + $defaultlink = $default_page . "?startMessage=$startMessage" + . "&passed_id=$id&mailbox=$urlMailbox" + . '&ent_id='.$ent.$passed_ent_id_link; + if ($where && $what) { + $defaultlink .= '&where='. urlencode($where).'&what='.urlencode($what); + } + /* This executes the attachment hook with a specific MIME-type. + * If that doesn't have results, it tries if there's a rule + * for a more generic type. + */ + $hookresults = do_hook("attachment $type0/$type1", $links, + $startMessage, $id, $urlMailbox, $ent, $defaultlink, + $display_filename, $where, $what); + if(count($hookresults[1]) <= 1) { + $hookresults = do_hook("attachment $type0/*", $links, + $startMessage, $id, $urlMailbox, $ent, $defaultlink, + $display_filename, $where, $what); + } + + $links = $hookresults[1]; + $defaultlink = $hookresults[6]; + + $attachments .= '' . + ''.decodeHeader($display_filename).' ' . + '' . show_readable_size($header->size) . + '  ' . + "[ $type0/$type1 ] " . + ''; + $attachments .= '' . $description . ''; + $attachments .= ' '; + + $skipspaces = 1; + foreach ($links as $val) { + if ($skipspaces) { + $skipspaces = 0; + } else { + $attachments .= '  |  '; + } + $attachments .= '' . $val['text'] . ''; + } + unset($links); + $attachments .= "\n"; + } + $attachmentadd = do_hook_function('attachments_bottom',$attachments); + if ($attachmentadd != '') + $attachments = $attachmentadd; + return $attachments; +} + +function sqimap_base64_decode(&$string) { + $string = str_replace("\r\n", "\n", $string); + $string = base64_decode($string); +} + +/* This function decodes the body depending on the encoding type. */ +function decodeBody($body, $encoding) { + global $show_html_default; + + $body = str_replace("\r\n", "\n", $body); + $encoding = strtolower($encoding); + + $encoding_handler = do_hook_function('decode_body', $encoding); + + + // plugins get first shot at decoding the body + // + if (!empty($encoding_handler) && function_exists($encoding_handler)) { + $body = $encoding_handler('decode', $body); + + } else if ($encoding == 'quoted-printable' || + $encoding == 'quoted_printable') { + $body = quoted_printable_decode($body); + + while (ereg("=\n", $body)) { + $body = ereg_replace ("=\n", '', $body); + } + + } else if ($encoding == 'base64') { + $body = base64_decode($body); + } + + // All other encodings are returned raw. + return $body; +} + +/* + * This functions decode strings that is encoded according to + * RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text). + * Patched by Christian Schmidt 23/03/2002 + */ +function decodeHeader ($string, $utfencode=true,$htmlsave=true) { + global $languages, $squirrelmail_language; + if (is_array($string)) { + $string = implode("\n", $string); + } + + if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && + function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { + $string = $languages[$squirrelmail_language]['XTRA_CODE']('decodeheader', $string); + // Do we need to return at this point? + // return $string; + } + $i = 0; + $iLastMatch = -2; + $encoded = true; + + $aString = explode(' ',$string); + $ret = ''; + foreach ($aString as $chunk) { + if ($encoded && $chunk === '') { + continue; + } elseif ($chunk === '') { + $ret .= ' '; + continue; + } + $encoded = false; + /* if encoded words are not separated by a linear-space-white we still catch them */ + $j = $i-1; + + while ($match = preg_match('/^(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=(.*)$/Ui',$chunk,$res)) { + /* if the last chunk isn't an encoded string then put back the space, otherwise don't */ + if ($iLastMatch !== $j) { + if ($htmlsave) { + $ret .= ' '; + } else { + $ret .= ' '; + } + } + $iLastMatch = $i; + $j = $i; + $ret .= $res[1]; + $encoding = ucfirst($res[3]); + switch ($encoding) + { + case 'B': + $replace = base64_decode($res[4]); + $ret .= charset_decode($res[2],$replace); + break; + case 'Q': + $replace = str_replace('_', ' ', $res[4]); + $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))', + $replace); + /* Only encode into entities by default. Some places + * don't need the encoding, like the compose form. + */ + if ($utfencode) { + $replace = charset_decode($res[2], $replace); + } else { + if ($htmlsave) { + $replace = htmlspecialchars($replace); + } + } + $ret .= $replace; + break; + default: + break; + } + $chunk = $res[5]; + $encoded = true; + } + + if (!$encoded && $htmlsave) { + $ret .= htmlspecialchars($chunk); + } else { + $ret .= $chunk; + } + + if (!$encoded) { + if ($htmlsave) { + $ret .= ' '; + } else { + $ret .= ' '; + } + } + ++$i; + } + /* remove the first added space */ + if ($ret) { + if ($htmlsave) { + $ret = substr($ret,6); + } else { + $ret = substr($ret,1); + } + } + + return $ret; +} + +/* + * Encode a string according to RFC 1522 for use in headers if it + * contains 8-bit characters or anything that looks like it should + * be encoded. + */ +function encodeHeader ($string) { + global $default_charset, $languages, $squirrelmail_language; + + if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && + function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) { + return $languages[$squirrelmail_language]['XTRA_CODE']('encodeheader', $string); + } + if (strtolower($default_charset) == 'iso-8859-1') { + $string = str_replace("\240",' ',$string); + } + + // Encode only if the string contains 8-bit characters or =? + $j = strlen($string); + $max_l = 75 - strlen($default_charset) - 7; + $aRet = array(); + $ret = ''; + $iEncStart = $enc_init = false; + $cur_l = $iOffset = 0; + for($i = 0; $i < $j; ++$i) { + switch($string{$i}) + { + case '=': + case '<': + case '>': + case ',': + case '?': + case '_': + if ($iEncStart === false) { + $iEncStart = $i; + } + $cur_l+=3; + if ($cur_l > ($max_l-2)) { + /* if there is an stringpart that doesn't need encoding, add it */ + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + $iEncStart = false; + } else { + $ret .= sprintf("=%02X",ord($string{$i})); + } + break; + case '(': + case ')': + if ($iEncStart !== false) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + $iEncStart = false; + } + break; + case ' ': + if ($iEncStart !== false) { + $cur_l++; + if ($cur_l > $max_l) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + $iEncStart = false; + } else { + $ret .= '_'; + } + } + break; + default: + $k = ord($string{$i}); + if ($k > 126) { + if ($iEncStart === false) { + // do not start encoding in the middle of a string, also take the rest of the word. + $sLeadString = substr($string,0,$i); + $aLeadString = explode(' ',$sLeadString); + $sToBeEncoded = array_pop($aLeadString); + $iEncStart = $i - strlen($sToBeEncoded); + $ret .= $sToBeEncoded; + $cur_l += strlen($sToBeEncoded); + } + $cur_l += 3; + /* first we add the encoded string that reached it's max size */ + if ($cur_l > ($max_l-2)) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?= "; /* the next part is also encoded => separate by space */ + $cur_l = 3; + $ret = ''; + $iOffset = $i; + $iEncStart = $i; + } + $enc_init = true; + $ret .= sprintf("=%02X", $k); + } else { + if ($iEncStart !== false) { + $cur_l++; + if ($cur_l > $max_l) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iEncStart = false; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + } else { + $ret .= $string{$i}; + } + } + } + break; + } + } + + if ($enc_init) { + if ($iEncStart !== false) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + } else { + $aRet[] = substr($string,$iOffset); + } + $string = implode('',$aRet); + } + return $string; +} + +/* This function trys to locate the entity_id of a specific mime element */ +function find_ent_id($id, $message) { + for ($i = 0, $ret = ''; $ret == '' && $i < count($message->entities); $i++) { + if ($message->entities[$i]->header->type0 == 'multipart') { + $ret = find_ent_id($id, $message->entities[$i]); + } else { + if (strcasecmp($message->entities[$i]->header->id, $id) == 0) { +// if (sq_check_save_extension($message->entities[$i])) { + return $message->entities[$i]->entity_id; +// } + } + } + } + return $ret; +} + +function sq_check_save_extension($message) { + $filename = $message->getFilename(); + $ext = substr($filename, strrpos($filename,'.')+1); + $save_extensions = array('jpg','jpeg','gif','png','bmp'); + return in_array($ext, $save_extensions); +} + + +/** + ** HTMLFILTER ROUTINES + */ + +/** + * This function is more or less a wrapper around stripslashes. Apparently + * Explorer is stupid enough to just remove the backslashes and then + * execute the content of the attribute as if nothing happened. + * Who does that? + * + * @param attvalue The value of the attribute + * @return attvalue The value of the attribute stripslashed. + */ +function sq_unbackslash($attvalue){ + /** + * Remove any backslashes. See if there are any first. + */ + + if (strstr($attvalue, '\\') !== false){ + $attvalue = stripslashes($attvalue); + } + return $attvalue; +} + +/** + * Kill any tabs, newlines, or carriage returns. Our friends the + * makers of the browser with 95% market value decided that it'd + * be funny to make "java[tab]script" be just as good as "javascript". + * + * @param attvalue The attribute value before extraneous spaces removed. + * @return attvalue The attribute value after extraneous spaces removed. + */ +function sq_unspace($attvalue){ + if (strcspn($attvalue, "\t\r\n") != strlen($attvalue)){ + $attvalue = str_replace(Array("\t", "\r", "\n"), Array('', '', ''), + $attvalue); + } + return $attvalue; +} + +/** + * This function returns the final tag out of the tag name, an array + * of attributes, and the type of the tag. This function is called by + * sq_sanitize internally. + * + * @param $tagname the name of the tag. + * @param $attary the array of attributes and their values + * @param $tagtype The type of the tag (see in comments). + * @return a string with the final tag representation. + */ +function sq_tagprint($tagname, $attary, $tagtype){ + $me = 'sq_tagprint'; + + if ($tagtype == 2){ + $fulltag = ''; + } else { + $fulltag = '<' . $tagname; + if (is_array($attary) && sizeof($attary)){ + $atts = Array(); + while (list($attname, $attvalue) = each($attary)){ + array_push($atts, "$attname=$attvalue"); + } + $fulltag .= ' ' . join(" ", $atts); + } + if ($tagtype == 3){ + $fulltag .= ' /'; + } + $fulltag .= '>'; + } + return $fulltag; +} + +/** + * A small helper function to use with array_walk. Modifies a by-ref + * value and makes it lowercase. + * + * @param $val a value passed by-ref. + * @return void since it modifies a by-ref value. + */ +function sq_casenormalize(&$val){ + $val = strtolower($val); +} + +/** + * This function skips any whitespace from the current position within + * a string and to the next non-whitespace value. + * + * @param $body the string + * @param $offset the offset within the string where we should start + * looking for the next non-whitespace character. + * @return the location within the $body where the next + * non-whitespace char is located. + */ +function sq_skipspace($body, $offset){ + $me = 'sq_skipspace'; + preg_match('/^(\s*)/s', substr($body, $offset), $matches); + if (sizeof($matches{1})){ + $count = strlen($matches{1}); + $offset += $count; + } + return $offset; +} + +/** + * This function looks for the next character within a string. It's + * really just a glorified "strpos", except it catches if failures + * nicely. + * + * @param $body The string to look for needle in. + * @param $offset Start looking from this position. + * @param $needle The character/string to look for. + * @return location of the next occurance of the needle, or + * strlen($body) if needle wasn't found. + */ +function sq_findnxstr($body, $offset, $needle){ + $me = 'sq_findnxstr'; + $pos = strpos($body, $needle, $offset); + if ($pos === FALSE){ + $pos = strlen($body); + } + return $pos; +} + +/** + * This function takes a PCRE-style regexp and tries to match it + * within the string. + * + * @param $body The string to look for needle in. + * @param $offset Start looking from here. + * @param $reg A PCRE-style regex to match. + * @return Returns a false if no matches found, or an array + * with the following members: + * - integer with the location of the match within $body + * - string with whatever content between offset and the match + * - string with whatever it is we matched + */ +function sq_findnxreg($body, $offset, $reg){ + $me = 'sq_findnxreg'; + $matches = Array(); + $retarr = Array(); + preg_match("%^(.*?)($reg)%si", substr($body, $offset), $matches); + if (!isset($matches{0}) || !$matches{0}){ + $retarr = false; + } else { + $retarr{0} = $offset + strlen($matches{1}); + $retarr{1} = $matches{1}; + $retarr{2} = $matches{2}; + } + return $retarr; +} + +/** + * This function looks for the next tag. + * + * @param $body String where to look for the next tag. + * @param $offset Start looking from here. + * @return false if no more tags exist in the body, or + * an array with the following members: + * - string with the name of the tag + * - array with attributes and their values + * - integer with tag type (1, 2, or 3) + * - integer where the tag starts (starting "<") + * - integer where the tag ends (ending ">") + * first three members will be false, if the tag is invalid. + */ +function sq_getnxtag($body, $offset){ + $me = 'sq_getnxtag'; + if ($offset > strlen($body)){ + return false; + } + $lt = sq_findnxstr($body, $offset, "<"); + if ($lt == strlen($body)){ + return false; + } + /** + * We are here: + * blah blah + * \---------^ + */ + $pos = sq_skipspace($body, $lt+1); + if ($pos >= strlen($body)){ + return Array(false, false, false, $lt, strlen($body)); + } + /** + * There are 3 kinds of tags: + * 1. Opening tag, e.g.: + * + * 2. Closing tag, e.g.: + * + * 3. XHTML-style content-less tag, e.g.: + * + */ + $tagtype = false; + switch (substr($body, $pos, 1)){ + case '/': + $tagtype = 2; + $pos++; + break; + case '!': + /** + * A comment or an SGML declaration. + */ + if (substr($body, $pos+1, 2) == "--"){ + $gt = strpos($body, "-->", $pos); + if ($gt === false){ + $gt = strlen($body); + } else { + $gt += 2; + } + return Array(false, false, false, $lt, $gt); + } else { + $gt = sq_findnxstr($body, $pos, ">"); + return Array(false, false, false, $lt, $gt); + } + break; + default: + /** + * Assume tagtype 1 for now. If it's type 3, we'll switch values + * later. + */ + $tagtype = 1; + break; + } + + $tag_start = $pos; + $tagname = ''; + /** + * Look for next [\W-_], which will indicate the end of the tag name. + */ + $regary = sq_findnxreg($body, $pos, "[^\w\-_]"); + if ($regary == false){ + return Array(false, false, false, $lt, strlen($body)); + } + list($pos, $tagname, $match) = $regary; + $tagname = strtolower($tagname); + + /** + * $match can be either of these: + * '>' indicating the end of the tag entirely. + * '\s' indicating the end of the tag name. + * '/' indicating that this is type-3 xhtml tag. + * + * Whatever else we find there indicates an invalid tag. + */ + switch ($match){ + case '/': + /** + * This is an xhtml-style tag with a closing / at the + * end, like so: . Check if it's followed + * by the closing bracket. If not, then this tag is invalid + */ + if (substr($body, $pos, 2) == "/>"){ + $pos++; + $tagtype = 3; + } else { + $gt = sq_findnxstr($body, $pos, ">"); + $retary = Array(false, false, false, $lt, $gt); + return $retary; + } + case '>': + return Array($tagname, false, $tagtype, $lt, $pos); + break; + default: + /** + * Check if it's whitespace + */ + if (!preg_match('/\s/', $match)){ + /** + * This is an invalid tag! Look for the next closing ">". + */ + $gt = sq_findnxstr($body, $lt, ">"); + return Array(false, false, false, $lt, $gt); + } + break; + } + + /** + * At this point we're here: + * + * \-------^ + * + * At this point we loop in order to find all attributes. + */ + $attname = ''; + $atttype = false; + $attary = Array(); + + while ($pos <= strlen($body)){ + $pos = sq_skipspace($body, $pos); + if ($pos == strlen($body)){ + /** + * Non-closed tag. + */ + return Array(false, false, false, $lt, $pos); + } + /** + * See if we arrived at a ">" or "/>", which means that we reached + * the end of the tag. + */ + $matches = Array(); + if (preg_match("%^(\s*)(>|/>)%s", substr($body, $pos), $matches)) { + /** + * Yep. So we did. + */ + $pos += strlen($matches{1}); + if ($matches{2} == "/>"){ + $tagtype = 3; + $pos++; + } + return Array($tagname, $attary, $tagtype, $lt, $pos); + } + + /** + * There are several types of attributes, with optional + * [:space:] between members. + * Type 1: + * attrname[:space:]=[:space:]'CDATA' + * Type 2: + * attrname[:space:]=[:space:]"CDATA" + * Type 3: + * attr[:space:]=[:space:]CDATA + * Type 4: + * attrname + * + * We leave types 1 and 2 the same, type 3 we check for + * '"' and convert to """ if needed, then wrap in + * double quotes. Type 4 we convert into: + * attrname="yes". + */ + $regary = sq_findnxreg($body, $pos, "[^:\w\-_]"); + if ($regary == false){ + /** + * Looks like body ended before the end of tag. + */ + return Array(false, false, false, $lt, strlen($body)); + } + list($pos, $attname, $match) = $regary; + $attname = strtolower($attname); + /** + * We arrived at the end of attribute name. Several things possible + * here: + * '>' means the end of the tag and this is attribute type 4 + * '/' if followed by '>' means the same thing as above + * '\s' means a lot of things -- look what it's followed by. + * anything else means the attribute is invalid. + */ + switch($match){ + case '/': + /** + * This is an xhtml-style tag with a closing / at the + * end, like so: . Check if it's followed + * by the closing bracket. If not, then this tag is invalid + */ + if (substr($body, $pos, 2) == "/>"){ + $pos++; + $tagtype = 3; + } else { + $gt = sq_findnxstr($body, $pos, ">"); + $retary = Array(false, false, false, $lt, $gt); + return $retary; + } + case '>': + $attary{$attname} = '"yes"'; + return Array($tagname, $attary, $tagtype, $lt, $pos); + break; default: - if ($msg->header->type0 == "text" && $elem_num == 8) { - // This is a plain text message, so lets get the number of lines - // that it contains. - $msg->header->num_lines = $text; - if ($debug_mime) echo "num_lines = $text
"; - - } else if ($msg->header->type0 == "message" && $msg->header->type1 == "rfc822" && $elem_num == 8) { - // This is an encapsulated message, so lets start all over again and - // parse this message adding it on to the existing one. - $structure = trim($structure); - if (substr($structure, 0, 1) == "(") { - $e = mime_match_parenthesis (0, $structure); - $structure = substr($structure, 0, $e); - $structure = substr($structure, 1); - $m = mime_parse_structure($structure, $msg->header->entity_id); - - // the following conditional is there to correct a bug that wasn't - // incrementing the entity IDs correctly because of the special case - // that message/rfc822 is. This fixes it fine. - if (substr($structure, 1, 1) != "(") - $m->header->entity_id = mime_increment_id(mime_new_element_level($ent_id)); - - // Now we'll go through and reformat the results. - if ($m->entities) { - for ($i=0; $i < count($m->entities); $i++) { - $msg->addEntity($m->entities[$i]); + /** + * Skip whitespace and see what we arrive at. + */ + $pos = sq_skipspace($body, $pos); + $char = substr($body, $pos, 1); + /** + * Two things are valid here: + * '=' means this is attribute type 1 2 or 3. + * \w means this was attribute type 4. + * anything else we ignore and re-loop. End of tag and + * invalid stuff will be caught by our checks at the beginning + * of the loop. + */ + if ($char == "="){ + $pos++; + $pos = sq_skipspace($body, $pos); + /** + * Here are 3 possibilities: + * "'" attribute type 1 + * '"' attribute type 2 + * everything else is the content of tag type 3 + */ + $quot = substr($body, $pos, 1); + if ($quot == "'"){ + $regary = sq_findnxreg($body, $pos+1, "\'"); + if ($regary == false){ + return Array(false, false, false, $lt, strlen($body)); } - } else { - $msg->addEntity($m); - } - $structure = ""; - } - } - break; - } - $elem_num++; - $text = ""; - } - // loop through the additional properties and put those in the various headers - if ($msg->header->type0 != "message") { - for ($i=0; $i < count($properties); $i++) { - $msg->header->{$properties[$i]["name"]} = $properties[$i]["value"]; - if ($debug_mime) echo "".$properties[$i]["name"]." = " . $properties[$i]["value"] . "
"; - } - } - - return $msg; - } - - // I did most of the MIME stuff yesterday (June 20, 2000), but I couldn't - // figure out how to do this part, so I decided to go to bed. I woke up - // in the morning and had a flash of insight. I went to the white-board - // and scribbled it out, then spent a bit programming it, and this is the - // result. Nothing complicated, but I think my brain was fried yesterday. - // Funny how that happens some times. - // - // This gets properties in a nested parenthesisized list. For example, - // this would get passed something like: ("attachment" ("filename" "luke.tar.gz")) - // This returns an array called $props with all paired up properties. - // It ignores the "attachment" for now, maybe that should change later - // down the road. In this case, what is returned is: - // $props[0]["name"] = "filename"; - // $props[0]["value"] = "luke.tar.gz"; - function mime_get_props ($props, $structure) { - global $debug_mime; - while (strlen($structure) > 0) { - $structure = trim($structure); - $char = substr($structure, 0, 1); - - if ($char == "\"") { - $pos = 1; - $char = substr($structure, $pos, 1); - $tmp = ""; - while ($char != "\"" && $pos < strlen($structure)) { - $tmp .= $char; - $pos++; - $char = substr($structure, $pos, 1); - } - $structure = trim(substr($structure, strlen($tmp) + 2)); - $char = substr($structure, 0, 1); - - if ($char == "\"") { - $pos = 1; - $char = substr($structure, $pos, 1); - $value = ""; - while ($char != "\"" && $pos < strlen($structure)) { - $value .= $char; - $pos++; - $char = substr($structure, $pos, 1); - } - $structure = trim(substr($structure, strlen($tmp) + 2)); - - $k = count($props); - $props[$k]["name"] = strtolower($tmp); - $props[$k]["value"] = $value; - } else if ($char == "(") { - $end = mime_match_parenthesis (0, $structure); - $sub = substr($structure, 1, $end-1); - if (! isset($props)) - $props = array(); - $props = mime_get_props($props, $sub); - $structure = substr($structure, strlen($sub) + 2); - } - return $props; - } else if ($char == "(") { - $end = mime_match_parenthesis (0, $structure); - $sub = substr($structure, 1, $end-1); - $props = mime_get_props($props, $sub); - $structure = substr($structure, strlen($sub) + 2); - return $props; - } else { - return $props; - } - } - } - - // Matches parenthesis. It will return the position of the matching - // parenthesis in $structure. For instance, if $structure was: - // ("text" "plain" ("val1name", "1") nil ... ) - // x x - // then this would return 42 to match up those two. - function mime_match_parenthesis ($pos, $structure) { - $char = substr($structure, $pos, 1); - - // ignore all extra characters - // If inside of a string, skip string -- Boundary IDs and other - // things can have ) in them. - if ($char != '(') - return strlen($structure); - while ($pos < strlen($structure)) { - $pos++; - $char = substr($structure, $pos, 1); - if ($char == ")") { - return $pos; - } else if ($char == '"') { - $pos ++; - while (substr($structure, $pos, 1) != '"' && - $pos < strlen($structure)) { - if (substr($structure, $pos, 2) == '\\"') - $pos ++; - elseif (substr($structure, $pos, 2) == '\\\\') - $pos ++; - $pos ++; - } - } else if ($char == "(") { - $pos = mime_match_parenthesis ($pos, $structure); - } - } - echo "Error decoding mime structure. Report this as a bug!
\n"; - return $pos; - } - - function mime_fetch_body ($imap_stream, $id, $ent_id) { - // do a bit of error correction. If we couldn't find the entity id, just guess - // that it is the first one. That is usually the case anyway. - if (!$ent_id) $ent_id = 1; - - fputs ($imap_stream, "a010 FETCH $id BODY[$ent_id]\r\n"); - $data = sqimap_read_data ($imap_stream, 'a010', true, $response, $message); - $topline = array_shift($data); - while (! ereg('\\* [0-9]+ FETCH ', $topline) && data) - $topline = array_shift($data); - $wholemessage = implode('', $data); - - if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) { - return substr($wholemessage, 0, $regs[1]); - } - else if (ereg('"([^"]*)"', $topline, $regs)) { - return $regs[1]; - } - - $str = "Body retrival error. Please report this bug!\n"; - $str .= "Response: $response\n"; - $str .= "Message: $message\n"; - $str .= "FETCH line: $topline"; - $str .= "---------------\n$wholemessage"; - foreach ($data as $d) - { - $str .= htmlspecialchars($d) . "\n"; - } - return $str; - - return "Body retrival error, please report this bug!\n\nTop line is \"$topline\"\n"; - } - - function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) { - // do a bit of error correction. If we couldn't find the entity id, just guess - // that it is the first one. That is usually the case anyway. - if (!$ent_id) $ent_id = 1; - - fputs ($imap_stream, "a001 FETCH $id BODY[$ent_id]\r\n"); - $cnt = 0; - $continue = true; - $read = fgets ($imap_stream,4096); - while (!ereg("^a001 (OK|BAD|NO)(.*)$", $read, $regs)) { - if (trim($read) == ")==") { - $read1 = $read; - $read = fgets ($imap_stream,4096); - if (ereg("^a001 (OK|BAD|NO)(.*)$", $read, $regs)) { - return; - } else { - echo decodeBody($read1, $encoding); - echo decodeBody($read, $encoding); - } - } else if ($cnt) { - echo decodeBody($read, $encoding); - } - $read = fgets ($imap_stream,4096); - $cnt++; - } - } - - /* -[ END MIME DECODING ]----------------------------------------------------------- */ - - - - /** This is the first function called. It decides if this is a multipart - message or if it should be handled as a single entity - **/ - function decodeMime ($imap_stream, &$header) { - global $username, $key, $imapServerAddress, $imapPort; - return mime_structure ($imap_stream, $header); - } - - // This is here for debugging purposese. It will print out a list - // of all the entity IDs that are in the $message object. - function listEntities ($message) { - if ($message) { - if ($message->header->entity_id) - echo "" . $message->header->entity_id . " : " . $message->header->type0 . "/" . $message->header->type1 . "
"; - for ($i = 0; $message->entities[$i]; $i++) { - $msg = listEntities($message->entities[$i], $ent_id); - if ($msg) - return $msg; - } - } - } - - // returns a $message object for a particular entity id - function getEntity ($message, $ent_id) { - if ($message) { - if ($message->header->entity_id == $ent_id && strlen($ent_id) == strlen($message->header->entity_id)) { - return $message; - } else { - for ($i = 0; isset($message->entities[$i]); $i++) { - $msg = getEntity ($message->entities[$i], $ent_id); - if ($msg) - return $msg; - } - } - } - } - - // figures out what entity to display and returns the $message object - // for that entity. - function findDisplayEntity ($message, $textOnly = 1, $next = 'none') - { - global $show_html_default; - - if (! $message) - return; - - // Show text/plain or text/html -- the first one we find. - if ($message->header->type0 == 'text' && - ($message->header->type1 == 'plain' || - $message->header->type1 == 'html')) - { - // If the next part is an HTML version, this will - // all be true. Show it, if the user so desires. - // HTML mails this way all have entity_id of 2. 1 = text/plain - if ($next != 'none' && - $textOnly == 0 && - $next->header->type0 == "text" && - $next->header->type1 == "html" && - $next->header->entity_id == 2 && - $message->header->type1 == "plain" && - isset($show_html_default) && - $show_html_default) - $message = $next; - - if (isset($message->header->entity_id)) - return $message->header->entity_id; - } - else - { - for ($i=0; isset($message->entities[$i]); $i++) - { - $next = 'none'; - if (isset($message->entities[$i + 1])) - $next = $message->entities[$i + 1]; - $entity = findDisplayEntity($message->entities[$i], - $textOnly, $next); - if ($entity != 0) - return $entity; - } - } - return 0; - } - - /** This returns a parsed string called $body. That string can then - be displayed as the actual message in the HTML. It contains - everything needed, including HTML Tags, Attachments at the - bottom, etc. - **/ - function formatBody($imap_stream, $message, $color, $wrap_at) { - // this if statement checks for the entity to show as the - // primary message. To add more of them, just put them in the - // order that is their priority. - global $startMessage, $username, $key, $imapServerAddress, $imapPort; - - $id = $message->header->id; - $urlmailbox = urlencode($message->header->mailbox); - - // Get the right entity and redefine message to be this entity - // Pass the 0 to mean that we want the 'best' viewable one - $ent_num = findDisplayEntity ($message, 0); - $body_message = getEntity($message, $ent_num); - if (($body_message->header->type0 == "text") || - ($body_message->header->type0 == "rfc822")) { - - $body = mime_fetch_body ($imap_stream, $id, $ent_num); - $body = decodeBody($body, $body_message->header->encoding); - - // If there are other types that shouldn't be formatted, add - // them here - if ($body_message->header->type1 != "html") { - translateText($body, $wrap_at, $body_message->header->charset); - } - - $body .= "
". _("Download this as a file") ."

"; - - /** Display the ATTACHMENTS: message if there's more than one part **/ - $body .= ""; - if (isset($message->entities[0])) { - $body .= formatAttachments ($message, $ent_num, $message->header->mailbox, $id); - } - } else { - $body = formatAttachments ($message, -1, $message->header->mailbox, $id); - } - return $body; - } - - // A recursive function that returns a list of attachments with links - // to where to download these attachments - function formatAttachments ($message, $ent_id, $mailbox, $id) { - global $where, $what; - global $startMessage, $color; - static $ShownHTML = 0; - - $body = ""; - if ($ShownHTML == 0) - { - $ShownHTML = 1; - - $body .= "\n"; - $body .= "
\n"; - $body .= _("Attachments") . ':'; - $body .= "
\n"; - - $body .= "\n"; - - $body .= formatAttachments ($message, $ent_id, $mailbox, $id); - - $body .= "
"; - - return $body; - } - - if ($message) { - if (!$message->entities) { - $type0 = strtolower($message->header->type0); - $type1 = strtolower($message->header->type1); - $name = decodeHeader($message->header->name); - - if ($message->header->entity_id != $ent_id) { - $filename = decodeHeader($message->header->filename); - if (trim($filename) == "") { - if (trim($name) == "") { - $display_filename = "untitled-".$message->header->entity_id; - } else { - $display_filename = $name; - $filename = $name; - } - } else { - $display_filename = $filename; - } - - $urlMailbox = urlencode($mailbox); - $ent = urlencode($message->header->entity_id); - - $DefaultLink = - "../src/download.php?startMessage=$startMessage&passed_id=$id&mailbox=$urlMailbox&passed_ent_id=$ent"; - if ($where && $what) - $DefaultLink .= '&where=' . urlencode($where) . '&what=' . urlencode($what); - $Links['download link']['text'] = _("download"); - $Links['download link']['href'] = - "../src/download.php?absolute_dl=true&passed_id=$id&mailbox=$urlMailbox&passed_ent_id=$ent"; - $ImageURL = ''; - - $HookResults = do_hook("attachment $type0/$type1", $Links, - $startMessage, $id, $urlMailbox, $ent, $DefaultLink, - $display_filename, $where, $what); - - $Links = $HookResults[1]; - $DefaultLink = $HookResults[6]; - - $body .= '  '; - $body .= "$display_filename "; - $body .= '' . show_readable_size($message->header->size) . - '  '; - $body .= "[ $type0/$type1 ] "; - $body .= ''; - if ($message->header->description) - $body .= '' . htmlspecialchars($message->header->description) . ''; - $body .= ' '; - - - $SkipSpaces = 1; - foreach ($Links as $Val) - { - if ($SkipSpaces) - { - $SkipSpaces = 0; - } - else - { - $body .= '  |  '; - } - $body .= '' . $Val['text'] . ''; - } - - unset($Links); - - $body .= "\n"; - } - return $body; - } else { - for ($i = 0; $i < count($message->entities); $i++) { - $body .= formatAttachments ($message->entities[$i], $ent_id, $mailbox, $id); + list($pos, $attval, $match) = $regary; + $pos++; + $attary{$attname} = "'" . $attval . "'"; + } else if ($quot == '"'){ + $regary = sq_findnxreg($body, $pos+1, '\"'); + if ($regary == false){ + return Array(false, false, false, $lt, strlen($body)); + } + list($pos, $attval, $match) = $regary; + $pos++; + $attary{$attname} = '"' . $attval . '"'; + } else { + /** + * These are hateful. Look for \s, or >. + */ + $regary = sq_findnxreg($body, $pos, "[\s>]"); + if ($regary == false){ + return Array(false, false, false, $lt, strlen($body)); + } + list($pos, $attval, $match) = $regary; + /** + * If it's ">" it will be caught at the top. + */ + $attval = preg_replace("/\"/s", """, $attval); + $attary{$attname} = '"' . $attval . '"'; + } + } else if (preg_match("|[\w/>]|", $char)) { + /** + * That was attribute type 4. + */ + $attary{$attname} = '"yes"'; + } else { + /** + * An illegal character. Find next '>' and return. + */ + $gt = sq_findnxstr($body, $pos, ">"); + return Array(false, false, false, $lt, $gt); + } + break; + } + } + /** + * The fact that we got here indicates that the tag end was never + * found. Return invalid tag indication so it gets stripped. + */ + return Array(false, false, false, $lt, strlen($body)); +} + +/** + * This function checks attribute values for entity-encoded values + * and returns them translated into 8-bit strings so we can run + * checks on them. + * + * @param $attvalue A string to run entity check against. + * @return Translated value. + */ + +function sq_deent($attvalue){ + $me = 'sq_deent'; + /** + * See if we have to run the checks first. All entities must start + * with "&". + */ + if (strpos($attvalue, '&') === false){ + return $attvalue; + } + /** + * Check named entities first. + */ + $trans = get_html_translation_table(HTML_ENTITIES); + /** + * Leave " in, as it can mess us up. + */ + $trans = array_flip($trans); + unset($trans{'"'}); + while (list($ent, $val) = each($trans)){ + $attvalue = preg_replace('/' . $ent . '*/si', $val, $attvalue); + } + /** + * Now translate numbered entities from 1 to 255 if needed. + */ + if (strpos($attvalue, '#') !== false){ + $omit = Array(34, 39); + for ($asc = 256; $asc >= 0; $asc--){ + if (!in_array($asc, $omit)){ + $chr = chr($asc); + $octrule = '/\�*' . $asc . ';*/si'; + $hexrule = '/\�*' . dechex($asc) . ';*/si'; + $attvalue = preg_replace($octrule, $chr, $attvalue); + $attvalue = preg_replace($hexrule, $chr, $attvalue); } - return $body; - } - } - } + } + } + return $attvalue; +} +/** + * This function runs various checks against the attributes. + * + * @param $tagname String with the name of the tag. + * @param $attary Array with all tag attributes. + * @param $rm_attnames See description for sq_sanitize + * @param $bad_attvals See description for sq_sanitize + * @param $add_attr_to_tag See description for sq_sanitize + * @param $message message object + * @param $id message id + * @return Array with modified attributes. + */ +function sq_fixatts($tagname, + $attary, + $rm_attnames, + $bad_attvals, + $add_attr_to_tag, + $message, + $id, + $mailbox + ){ + $me = 'sq_fixatts'; + while (list($attname, $attvalue) = each($attary)){ + /** + * See if this attribute should be removed. + */ + foreach ($rm_attnames as $matchtag=>$matchattrs){ + if (preg_match($matchtag, $tagname)){ + foreach ($matchattrs as $matchattr){ + if (preg_match($matchattr, $attname)){ + unset($attary{$attname}); + continue; + } + } + } + } + /** + * Remove any backslashes, entities, and extraneous whitespace. + */ + $attvalue = sq_unbackslash($attvalue); + $attvalue = sq_deent($attvalue); + $attvalue = sq_unspace($attvalue); - /** this function decodes the body depending on the encoding type. **/ - function decodeBody($body, $encoding) { - $body = str_replace("\r\n", "\n", $body); - $encoding = strtolower($encoding); + /** + * Now let's run checks on the attvalues. + * I don't expect anyone to comprehend this. If you do, + * get in touch with me so I can drive to where you live and + * shake your hand personally. :) + */ + foreach ($bad_attvals as $matchtag=>$matchattrs){ + if (preg_match($matchtag, $tagname)){ + foreach ($matchattrs as $matchattr=>$valary){ + if (preg_match($matchattr, $attname)){ + /** + * There are two arrays in valary. + * First is matches. + * Second one is replacements + */ + list($valmatch, $valrepl) = $valary; + $newvalue = + preg_replace($valmatch, $valrepl, $attvalue); + if ($newvalue != $attvalue){ + $attary{$attname} = $newvalue; + } + } + } + } + } + /** + * Turn cid: urls into http-friendly ones. + */ + if (preg_match("/^[\'\"]\s*cid:/si", $attvalue)){ + $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox); + } + } + /** + * See if we need to append any attributes to this tag. + */ + foreach ($add_attr_to_tag as $matchtag=>$addattary){ + if (preg_match($matchtag, $tagname)){ + $attary = array_merge($attary, $addattary); + } + } + return $attary; +} - if ($encoding == "quoted-printable") { - $body = quoted_printable_decode($body); +/** + * This function edits the style definition to make them friendly and + * usable in squirrelmail. + * + * @param $message the message object + * @param $id the message id + * @param $content a string with whatever is between + * @param $mailbox the message mailbox + * @return a string with edited content. + */ +function sq_fixstyle($body, $pos, $message, $id, $mailbox){ + global $view_unsafe_images; + $me = 'sq_fixstyle'; + $ret = sq_findnxreg($body, $pos, ''); + if ($ret == FALSE){ + return array(FALSE, strlen($body)); + } + $newpos = $ret[0] + strlen($ret[2]); + $content = $ret[1]; + /** + * First look for general BODY style declaration, which would be + * like so: + * body {background: blah-blah} + * and change it to .bodyclass so we can just assign it to a
+ */ + $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content); + $secremoveimg = '../images/' . _("sec_remove_eng.png"); + /** + * Fix url('blah') declarations. + */ + $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si", + "url(\\1$secremoveimg\\2)", $content); + /** + * Fix url('https*://.*) declarations but only if $view_unsafe_images + * is false. + */ + if (!$view_unsafe_images){ + $content = preg_replace("|url\s*\(\s*([\'\"])\s*https*:.*?([\'\"])\s*\)|si", + "url(\\1$secremoveimg\\2)", $content); + } - while (ereg("=\n", $body)) - $body = ereg_replace ("=\n", "", $body); - } else if ($encoding == "base64") { - $body = base64_decode($body); - } + /** + * Fix urls that refer to cid: + */ + while (preg_match("|url\s*\(\s*([\'\"]\s*cid:.*?[\'\"])\s*\)|si", + $content, $matches)){ + $cidurl = $matches{1}; + $httpurl = sq_cid2http($message, $id, $cidurl, $mailbox); + $content = preg_replace("|url\s*\(\s*$cidurl\s*\)|si", + "url($httpurl)", $content); + } - // All other encodings are returned raw. - return $body; - } + /** + * Fix stupid css declarations which lead to vulnerabilities + * in IE. + */ + $match = Array('/expression/i', + '/behaviou*r/i', + '/binding/i', + '/include-source/i'); + $replace = Array('idiocy', 'idiocy', 'idiocy', 'idiocy'); + $content = preg_replace($match, $replace, $content); + return array($content, $newpos); +} +/** + * This function converts cid: url's into the ones that can be viewed in + * the browser. + * + * @param $message the message object + * @param $id the message id + * @param $cidurl the cid: url. + * @param $mailbox the message mailbox + * @return a string with a http-friendly url + */ +function sq_cid2http($message, $id, $cidurl, $mailbox){ + /** + * Get rid of quotes. + */ + $quotchar = substr($cidurl, 0, 1); + if ($quotchar == '"' || $quotchar == "'"){ + $cidurl = str_replace($quotchar, "", $cidurl); + } else { + $quotchar = ''; + } + $cidurl = substr(trim($cidurl), 4); + $linkurl = find_ent_id($cidurl, $message); + /* in case of non-save cid links $httpurl should be replaced by a sort of + unsave link image */ + $httpurl = ''; + if ($linkurl) { + $httpurl = $quotchar . SM_PATH . 'src/download.php?absolute_dl=true&' . + "passed_id=$id&mailbox=" . urlencode($mailbox) . + '&ent_id=' . $linkurl . $quotchar; + } + return $httpurl; +} - // This functions decode strings that is encoded according to - // RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text). - function decodeHeader ($string) { - if (eregi('=\\?([^?]+)\\?(q|b)\\?([^?]+)\\?=', - $string, $res)) { - if (ucfirst($res[2]) == "B") { - $replace = base64_decode($res[3]); - } else { - $replace = ereg_replace("_", " ", $res[3]); - // Convert lowercase Quoted Printable to uppercase for - // quoted_printable_decode to understand it. - while (ereg("(=(([0-9][abcdef])|([abcdef][0-9])|([abcdef][abcdef])))", $replace, $res)) { - $replace = str_replace($res[1], strtoupper($res[1]), $replace); - } - $replace = quoted_printable_decode($replace); - } +/** + * This function changes the tag into a
tag since we + * can't really have a body-within-body. + * + * @param $attary an array of attributes and values of + * @param $mailbox mailbox we're currently reading (for cid2http) + * @param $message current message (for cid2http) + * @param $id current message id (for cid2http) + * @return a modified array of attributes to be set for
+ */ +function sq_body2div($attary, $mailbox, $message, $id){ + $me = 'sq_body2div'; + $divattary = Array('class' => "'bodyclass'"); + $bgcolor = '#ffffff'; + $text = '#000000'; + $has_bgc_stl = $has_txt_stl = false; + $styledef = ''; + if (is_array($attary) && sizeof($attary) > 0){ + foreach ($attary as $attname=>$attvalue){ + $quotchar = substr($attvalue, 0, 1); + $attvalue = str_replace($quotchar, "", $attvalue); + switch ($attname){ + case 'background': + $attvalue = sq_cid2http($message, $id, + $attvalue, $mailbox); + $styledef .= "background-image: url('$attvalue'); "; + break; + case 'bgcolor': + $has_bgc_stl = true; + $styledef .= "background-color: $attvalue; "; + break; + case 'text': + $has_txt_stl = true; + $styledef .= "color: $attvalue; "; + break; + } + } + // Outlook defines a white bgcolor and no text color. This can lead to + // white text on a white bg with certain themes. + if ($has_bgc_stl && !$has_txt_stl) { + $styledef .= "color: $text; "; + } + if (strlen($styledef) > 0){ + $divattary{"style"} = "\"$styledef\""; + } + } + return $divattary; +} - $replace = charset_decode ($res[1], $replace); - - // Remove the name of the character set. - $string = eregi_replace ('=\\?([^?]+)\\?(q|b)\\?([^?]+)\\?=', - $replace, $string); - - // In case there should be more encoding in the string: recurse - return (decodeHeader($string)); - } else - return ($string); - } - - // Encode a string according to RFC 1522 for use in headers if it - // contains 8-bit characters or anything that looks like it should - // be encoded. - function encodeHeader ($string) { - global $default_charset; - - // Encode only if the string contains 8-bit characters or =? - if (ereg("([\200-\377]|=\\?)", $string)) { - - // First the special characters - $string = str_replace("=", "=3D", $string); - $string = str_replace("?", "=3F", $string); - $string = str_replace("_", "=5F", $string); - $string = str_replace(" ", "_", $string); - - for ( $ch = 127 ; $ch <= 255 ; $ch++ ) { - $replace = chr($ch); - $insert = sprintf("=%02X", $ch); - $string = str_replace($replace, $insert, $string); +/** + * This is the main function and the one you should actually be calling. + * There are several variables you should be aware of an which need + * special description. + * + * Since the description is quite lengthy, see it here: + * http://www.mricon.com/html/phpfilter.html + * + * @param $body the string with HTML you wish to filter + * @param $tag_list see description above + * @param $rm_tags_with_content see description above + * @param $self_closing_tags see description above + * @param $force_tag_closing see description above + * @param $rm_attnames see description above + * @param $bad_attvals see description above + * @param $add_attr_to_tag see description above + * @param $message message object + * @param $id message id + * @return sanitized html safe to show on your pages. + */ +function sq_sanitize($body, + $tag_list, + $rm_tags_with_content, + $self_closing_tags, + $force_tag_closing, + $rm_attnames, + $bad_attvals, + $add_attr_to_tag, + $message, + $id, + $mailbox + ){ + $me = 'sq_sanitize'; + $rm_tags = array_shift($tag_list); + /** + * Normalize rm_tags and rm_tags_with_content. + */ + @array_walk($tag_list, 'sq_casenormalize'); + @array_walk($rm_tags_with_content, 'sq_casenormalize'); + @array_walk($self_closing_tags, 'sq_casenormalize'); + /** + * See if tag_list is of tags to remove or tags to allow. + * false means remove these tags + * true means allow these tags + */ + $curpos = 0; + $open_tags = Array(); + $trusted = "\n\n"; + $skip_content = false; + /** + * Take care of netscape's stupid javascript entities like + * &{alert('boo')}; + */ + $body = preg_replace("/&(\{.*?\};)/si", "&\\1", $body); + + while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){ + list($tagname, $attary, $tagtype, $lt, $gt) = $curtag; + $free_content = substr($body, $curpos, $lt-$curpos); + /** + * Take care of