X-Git-Url: https://vcs.fsf.org/?a=blobdiff_plain;f=functions%2Fmime.php;h=1419d58854dd7a859d3140abc07a842c07a9886a;hb=da1b55add6a9207dfd72e141e2fe4ec74cf26e3e;hp=a1394a6647e2c632e9f20043219e0024de0f7c86;hpb=8196f7718f60f8b76a36c5d50c76b5d9c2ae2e4e;p=squirrelmail.git diff --git a/functions/mime.php b/functions/mime.php index a1394a66..989018ae 100644 --- a/functions/mime.php +++ b/functions/mime.php @@ -1,1233 +1,2200 @@ entities[] = $msg; + /* Isolate the body structure and remove beginning and end parenthesis. */ + $read = trim(substr ($bodystructure, strpos(strtolower($bodystructure), 'bodystructure') + 13)); + $read = trim(substr ($read, 0, -1)); + $i = 0; + $msg = Message::parseStructure($read,$i); + if (!is_object($msg)) { + include_once(SM_PATH . 'functions/display_messages.php'); + global $color, $mailbox; + /* removed urldecode because $_GET is auto urldecoded ??? */ + displayPageHeader( $color, $mailbox ); + $errormessage = _("SquirrelMail could not decode the bodystructure of the message"); + $errormessage .= '
'._("the provided bodystructure by your imap-server").':

'; + $errormessage .= '
' . htmlspecialchars($read) . '
'; + plain_error_message( $errormessage, $color ); + echo ''; + exit; + } + if (count($flags)) { + foreach ($flags as $flag) { + $char = strtoupper($flag{1}); + switch ($char) { + case 'S': + if (strtolower($flag) == '\\seen') { + $msg->is_seen = true; + } + break; + case 'A': + if (strtolower($flag) == '\\answered') { + $msg->is_answered = true; + } + break; + case 'D': + if (strtolower($flag) == '\\deleted') { + $msg->is_deleted = true; + } + break; + case 'F': + if (strtolower($flag) == '\\flagged') { + $msg->is_flagged = true; + } + break; + case 'M': + if (strtolower($flag) == '$mdnsent') { + $msg->is_mdnsent = true; + } + break; + default: + break; + } + } } + // listEntities($msg); + return $msg; } -/* --------------------------------------------------------------------------------- */ -/* MIME DECODING */ -/* --------------------------------------------------------------------------------- */ -/* This function gets the structure of a message and stores it in the "message" class. - * It will return this object for use with all relevant header information and - * fully parsed into the standard "message" object format. - */ -function mime_structure ($imap_stream, $header) { - - sqimap_messages_flag ($imap_stream, $header->id, $header->id, 'Seen'); - $ssid = sqimap_session_id(); - $lsid = strlen( $ssid ); - $id = $header->id; - fputs ($imap_stream, "$ssid FETCH $id BODYSTRUCTURE\r\n"); - // - // This should use sqimap_read_data instead of reading it itself - // - $read = fgets ($imap_stream, 10000); - $bodystructure = ''; - while ( substr($read, 0, $lsid) <> $ssid && - !feof( $imap_stream ) ) { - $bodystructure .= $read; - $read = fgets ($imap_stream, 10000); - } - $read = $bodystructure; - - // isolate the body structure and remove beginning and end parenthesis - $read = trim(substr ($read, strpos(strtolower($read), 'bodystructure') + 13)); - $read = trim(substr ($read, 0, -1)); - $end = mime_match_parenthesis(0, $read); - while ($end == strlen($read)-1) { - $read = trim(substr ($read, 0, -1)); - $read = trim(substr ($read, 1)); - $end = mime_match_parenthesis(0, $read); - } - - $msg = mime_parse_structure ($read, 0); - $msg->header = $header; - - return( $msg ); -} -/* this starts the parsing of a particular structure. It is called recursively, - * so it can be passed different structures. It returns an object of type - * $message. - * First, it checks to see if it is a multipart message. If it is, then it - * handles that as it sees is necessary. If it is just a regular entity, - * then it parses it and adds the necessary header information (by calling out - * to mime_get_elements() - */ -function mime_parse_structure ($structure, $ent_id) { - - $msg = new message(); - if ($structure{0} == '(') { - $ent_id = mime_new_element_level($ent_id); - $start = $end = -1; - do { - $start = $end+1; - $end = mime_match_parenthesis ($start, $structure); - - $element = substr($structure, $start+1, ($end - $start)-1); - $ent_id = mime_increment_id ($ent_id); - $newmsg = mime_parse_structure ($element, $ent_id); - $msg->addEntity ($newmsg); - } while ($structure{$end+1} == '('); - } else { - // parse the elements - $msg = mime_get_element ($structure, $msg, $ent_id); - } - return $msg; -} +/* This starts the parsing of a particular structure. It is called recursively, +* so it can be passed different structures. It returns an object of type +* $message. +* First, it checks to see if it is a multipart message. If it is, then it +* handles that as it sees is necessary. If it is just a regular entity, +* then it parses it and adds the necessary header information (by calling out +* to mime_get_elements() +*/ -/* Increments the element ID. An element id can look like any of - * the following: 1, 1.2, 4.3.2.4.1, etc. This function increments - * the last number of the element id, changing 1.2 to 1.3. - */ -function mime_increment_id ($id) { +function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) { + /* Do a bit of error correction. If we couldn't find the entity id, just guess + * that it is the first one. That is usually the case anyway. + */ - if (strpos($id, ".")) { - $first = substr($id, 0, strrpos($id, ".")); - $last = substr($id, strrpos($id, ".")+1); - $last++; - $new = $first . "." .$last; + if (!$ent_id) { + $cmd = "FETCH $id BODY[]"; } else { - $new = $id + 1; + $cmd = "FETCH $id BODY[$ent_id]"; } - - return $new; -} - -/* - * See comment for mime_increment_id(). - * This adds another level on to the entity_id changing 1.3 to 1.3.0 - * NOTE: 1.3.0 is not a valid element ID. It MUST be incremented - * before it can be used. I left it this way so as not to have - * to make a special case if it is the first entity_id. It - * always increments it, and that works fine. - */ -function mime_new_element_level ($id) { - - if (!$id) { - $id = 0; - } else { - $id = $id . '.0'; - } - - return( $id ); -} - -function mime_get_element (&$structure, $msg, $ent_id) { - - $elem_num = 1; - $msg->header = new msg_header(); - $msg->header->entity_id = $ent_id; - $properties = array(); - - while (strlen($structure) > 0) { - $structure = trim($structure); - $char = $structure{0}; - - if (strtolower(substr($structure, 0, 3)) == 'nil') { - $text = ''; - $structure = substr($structure, 3); - } else if ($char == '"') { - // loop through until we find the matching quote, and return that as a string - $pos = 1; - $text = ''; - while ( ($char = $structure{$pos} ) <> '"' && $pos < strlen($structure)) { - $text .= $char; - $pos++; - } - $structure = substr($structure, strlen($text) + 2); - } else if ($char == '(') { - // comment me - $end = mime_match_parenthesis (0, $structure); - $sub = substr($structure, 1, $end-1); - $properties = mime_get_props($properties, $sub); - $structure = substr($structure, strlen($sub) + 2); - } else { - // loop through until we find a space or an end parenthesis - $pos = 0; - $char = $structure{$pos}; - $text = ''; - while ($char != ' ' && $char != ')' && $pos < strlen($structure)) { - $text .= $char; - $pos++; - $char = $structure{$pos}; - } - $structure = substr($structure, strlen($text)); - } - - // This is where all the text parts get put into the header - switch ($elem_num) { - case 1: - $msg->header->type0 = strtolower($text); - break; - case 2: - $msg->header->type1 = strtolower($text); - break; - case 4: // Id - // Invisimail enclose images with <> - $msg->header->id = str_replace( '<', '', str_replace( '>', '', $text ) ); - break; - case 5: - $msg->header->description = $text; - break; - case 6: - $msg->header->encoding = strtolower($text); - break; - case 7: - $msg->header->size = $text; - break; - default: - if ($msg->header->type0 == 'text' && $elem_num == 8) { - // This is a plain text message, so lets get the number of lines - // that it contains. - $msg->header->num_lines = $text; - - } else if ($msg->header->type0 == 'message' && $msg->header->type1 == 'rfc822' && $elem_num == 8) { - // This is an encapsulated message, so lets start all over again and - // parse this message adding it on to the existing one. - $structure = trim($structure); - if ( $structure{0} == '(' ) { - $e = mime_match_parenthesis (0, $structure); - $structure = substr($structure, 0, $e); - $structure = substr($structure, 1); - $m = mime_parse_structure($structure, $msg->header->entity_id); - - // the following conditional is there to correct a bug that wasn't - // incrementing the entity IDs correctly because of the special case - // that message/rfc822 is. This fixes it fine. - if (substr($structure, 1, 1) != '(') - $m->header->entity_id = mime_increment_id(mime_new_element_level($ent_id)); - - // Now we'll go through and reformat the results. - if ($m->entities) { - for ($i=0; $i < count($m->entities); $i++) { - $msg->addEntity($m->entities[$i]); - } - } else { - $msg->addEntity($m); - } - $structure = ""; - } - } - break; - } - $elem_num++; - $text = ""; - } - // loop through the additional properties and put those in the various headers - if ($msg->header->type0 != 'message') { - for ($i=0; $i < count($properties); $i++) { - $msg->header->{$properties[$i]['name']} = $properties[$i]['value']; - } - } - - return $msg; -} -/* - * I did most of the MIME stuff yesterday (June 20, 2000), but I couldn't - * figure out how to do this part, so I decided to go to bed. I woke up - * in the morning and had a flash of insight. I went to the white-board - * and scribbled it out, then spent a bit programming it, and this is the - * result. Nothing complicated, but I think my brain was fried yesterday. - * Funny how that happens some times. - * - * This gets properties in a nested parenthesisized list. For example, - * this would get passed something like: ("attachment" ("filename" "luke.tar.gz")) - * This returns an array called $props with all paired up properties. - * It ignores the "attachment" for now, maybe that should change later - * down the road. In this case, what is returned is: - * $props[0]["name"] = "filename"; - * $props[0]["value"] = "luke.tar.gz"; - */ -function mime_get_props ($props, $structure) { - - while (strlen($structure) > 0) { - $structure = trim($structure); - $char = $structure{0}; - - if ($char == '"') { - $pos = 1; - $tmp = ''; - while ( ( $char = $structure{$pos} ) != '"' && - $pos < strlen($structure)) { - $tmp .= $char; - $pos++; - } - $structure = trim(substr($structure, strlen($tmp) + 2)); - $char = $structure{0}; - - if ($char == '"') { - $pos = 1; - $value = ''; - while ( ( $char = $structure{$pos} ) != '"' && - $pos < strlen($structure) ) { - $value .= $char; - $pos++; - } - $structure = trim(substr($structure, strlen($tmp) + 2)); - - $k = count($props); - $props[$k]['name'] = strtolower($tmp); - $props[$k]['value'] = $value; - } else if ($char == '(') { - $end = mime_match_parenthesis (0, $structure); - $sub = substr($structure, 1, $end-1); - if (! isset($props)) - $props = array(); - $props = mime_get_props($props, $sub); - $structure = substr($structure, strlen($sub) + 2); - } - return $props; - } else if ($char == '(') { - $end = mime_match_parenthesis (0, $structure); - $sub = substr($structure, 1, $end-1); - $props = mime_get_props($props, $sub); - $structure = substr($structure, strlen($sub) + 2); - return $props; - } else { - return $props; - } - } -} + if ($fetch_size!=0) $cmd .= "<0.$fetch_size>"; -/* - * Matches parenthesis. It will return the position of the matching - * parenthesis in $structure. For instance, if $structure was: - * ("text" "plain" ("val1name", "1") nil ... ) - * x x - * then this would return 42 to match up those two. - */ -function mime_match_parenthesis ($pos, $structure) { + $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, TRUE); + do { + $topline = trim(array_shift($data)); + } while($topline && ($topline[0] == '*') && !preg_match('/\* [0-9]+ FETCH.*/i', $topline)) ; - $j = strlen( $structure ); - - // ignore all extra characters - // If inside of a string, skip string -- Boundary IDs and other - // things can have ) in them. - if ( $structure{$pos} != '(' ) { - return( $j ); - } - - while ( $pos < $j ) { - $pos++; - if ($structure{$pos} == ')') { - return $pos; - } elseif ($structure{$pos} == '"') { - $pos++; - while ( $structure{$pos} != '"' && - $pos < $j ) { - if (substr($structure, $pos, 2) == '\\"') { - $pos++; - } elseif (substr($structure, $pos, 2) == '\\\\') { - $pos++; - } - $pos++; - } - } elseif ( $structure{$pos} == '(' ) { - $pos = mime_match_parenthesis ($pos, $structure); - } - } - echo _("Error decoding mime structure. Report this as a bug!") . '
'; - return( $pos ); -} - -function mime_fetch_body ($imap_stream, $id, $ent_id ) { - // do a bit of error correction. If we couldn't find the entity id, just guess - // that it is the first one. That is usually the case anyway. - if (!$ent_id) - $ent_id = 1; - $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id]", true, $response, $message); - $topline = array_shift($data); - while (! ereg('\\* [0-9]+ FETCH ', $topline) && $data) - $topline = array_shift($data); $wholemessage = implode('', $data); if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) { - $ret = substr( $wholemessage, 0, $regs[1] ); - /* - There is some information in the content info header that could be important - in order to parse html messages. Let's get them here. + $ret = substr($wholemessage, 0, $regs[1]); + /* There is some information in the content info header that could be important + * in order to parse html messages. Let's get them here. */ - if ( $ret{0} == '<' ) { - $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message); - $base = ''; - $k = 10; - foreach( $data as $d ) { - if ( substr( $d, 0, 13 ) == 'Content-Base:' ) { - $j = strlen( $d ); - $i = 13; - $base = ''; - while ( $i < $j && - ( !isNoSep( $d{$i} ) || $d{$i} == '"' ) ) - $i++; - while ( $i < $j ) { - if ( isNoSep( $d{$i} ) ) - $base .= $d{$i}; - $i++; - } - $k = 0; - } elseif ( $k == 1 && !isnosep( $d{0} ) ) { - $base .= substr( $d, 1 ); - } - $k++; - } - if ( $base <> '' ) - - $ret = "" . $ret; - } +// if ($ret{0} == '<') { +// $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, TRUE); +// } } else if (ereg('"([^"]*)"', $topline, $regs)) { $ret = $regs[1]; } else { global $where, $what, $mailbox, $passed_id, $startMessage; - $par = "mailbox=".urlencode($mailbox)."&passed_id=$passed_id"; + $par = 'mailbox=' . urlencode($mailbox) . '&passed_id=' . $passed_id; if (isset($where) && isset($what)) { - $par .= "&where=".urlencode($where)."&what=".urlencode($what); + $par .= '&where=' . urlencode($where) . '&what=' . urlencode($what); } else { - $par .= "&startMessage=$startMessage&show_more=0"; + $par .= '&startMessage=' . $startMessage . '&show_more=0'; } - $par .= '&response='.urlencode($response).'&message='.urlencode($message). - '&topline='.urlencode($topline); - - echo '' . - _("Body retrieval error. The reason for this is most probably that the message is malformed. Please help us making future versions better by submitting this message to the developers knowledgebase!") . - "Submit message
" . - '' . _("Response:") . "$response
" . - _("Message:") . " $message
" . - _("FETCH line:") . " $topline
"; - - $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message); + $par .= '&response=' . urlencode($response) . + '&message=' . urlencode($message) . + '&topline=' . urlencode($topline); + + echo '
' . + '' . + '' . + '" . + '" . + '" . + '" . + "
' . + _("Body retrieval error. The reason for this is most probably that the message is malformed.") . + '
' . _("Command:") . "$cmd
' . _("Response:") . "$response
' . _("Message:") . "$message
' . _("FETCH line:") . "$topline


"; + + $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message, TRUE); array_shift($data); $wholemessage = implode('', $data); - $ret = "---------------\n$wholemessage"; - + $ret = $wholemessage; } - return( $ret ); + return $ret; } -function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) { - // do a bit of error correction. If we couldn't find the entity id, just guess - // that it is the first one. That is usually the case anyway. - if (!$ent_id) { - $ent_id = 1; - } - $sid = sqimap_session_id(); - // Don't kill the connection if the browser is over a dialup - // and it would take over 30 seconds to download it. +function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding) { + + /* Don't kill the connection if the browser is over a dialup + * and it would take over 30 seconds to download it. + * Don't call set_time_limit in safe mode. + */ - // donĀ“t call set_time_limit in safe mode. - if (!ini_get("safe_mode")) { + if (!ini_get('safe_mode')) { set_time_limit(0); } - + /* in case of base64 encoded attachments, do not buffer them. + Instead, echo the decoded attachment directly to screen */ + if (strtolower($encoding) == 'base64') { + if (!$ent_id) { + $query = "FETCH $id BODY[]"; + } else { + $query = "FETCH $id BODY[$ent_id]"; + } + sqimap_run_command($imap_stream,$query,true,$response,$message,TRUE,'sqimap_base64_decode','php://stdout',true); + } else { + $body = mime_fetch_body ($imap_stream, $id, $ent_id); + echo decodeBody($body, $encoding); + } + + /* + TODO, use the same method for quoted printable. + However, I assume that quoted printable attachments aren't that large + so the performancegain / memory usage drop will be minimal. + If we decide to add that then we need to adapt sqimap_fread because + we need to split te result on \n and fread doesn't stop at \n. That + means we also should provide $results from sqimap_fread (by ref) to + te function and set $no_return to false. The $filter function for + quoted printable should handle unsetting of $results. + */ + /* + TODO 2: find out how we write to the output stream php://stdout. fwrite + doesn't work because 'php://stdout isn't a stream. + */ + + return; +/* fputs ($imap_stream, "$sid FETCH $id BODY[$ent_id]\r\n"); $cnt = 0; $continue = true; - $read = fgets ($imap_stream,4096); + $read = fgets ($imap_stream,8192); + + // This could be bad -- if the section has sqimap_session_id() . ' OK' // or similar, it will kill the download. - while (!ereg("^".$sid." (OK|BAD|NO)(.*)$", $read, $regs)) { - if (trim($read) == ')==') { - $read1 = $read; - $read = fgets ($imap_stream,4096); - if (ereg("^".$sid." (OK|BAD|NO)(.*)$", $read, $regs)) { - return; - } else { - echo decodeBody($read1, $encoding) . - decodeBody($read, $encoding); - } - } else if ($cnt) { - echo decodeBody($read, $encoding); - } - $read = fgets ($imap_stream,4096); - $cnt++; + while (!ereg("^".$sid_s." (OK|BAD|NO)(.*)$", $read, $regs)) { + if (trim($read) == ')==') { + $read1 = $read; + $read = fgets ($imap_stream,4096); + if (ereg("^".$sid." (OK|BAD|NO)(.*)$", $read, $regs)) { + return; + } else { + echo decodeBody($read1, $encoding) . + decodeBody($read, $encoding); + } + } else if ($cnt) { + echo decodeBody($read, $encoding); + } + $read = fgets ($imap_stream,4096); + $cnt++; +// break; } +*/ } /* -[ END MIME DECODING ]----------------------------------------------------------- */ +/* This is here for debugging purposes. It will print out a list +* of all the entity IDs that are in the $message object. +*/ +function listEntities ($message) { + if ($message) { + echo "" . $message->entity_id . ' : ' . $message->type0 . '/' . $message->type1 . ' parent = '. $message->parent->entity_id. '
'; + for ($i = 0; isset($message->entities[$i]); $i++) { + echo "$i : "; + $msg = listEntities($message->entities[$i]); + + if ($msg) { + echo "return: "; + return $msg; + } + } + } +} +function getPriorityStr($priority) { + $priority_level = substr($priority,0,1); -/* This is the first function called. It decides if this is a multipart - message or if it should be handled as a single entity - */ -function decodeMime ($imap_stream, &$header) { - global $username, $key, $imapServerAddress, $imapPort; - return mime_structure ($imap_stream, $header); -} + switch($priority_level) { + /* Check for a higher then normal priority. */ + case '1': + case '2': + $priority_string = _("High"); + break; -// This is here for debugging purposese. It will print out a list -// of all the entity IDs that are in the $message object. -/* -function listEntities ($message) { -if ($message) { - if ($message->header->entity_id) - echo "" . $message->header->entity_id . ' : ' . $message->header->type0 . '/' . $message->header->type1 . '
'; - for ($i = 0; $message->entities[$i]; $i++) { - $msg = listEntities($message->entities[$i], $ent_id); - if ($msg) - return $msg; - } -} + /* Check for a lower then normal priority. */ + case '4': + case '5': + $priority_string = _("Low"); + break; + + /* Check for a normal priority. */ + case '3': + default: + $priority_level = '3'; + $priority_string = _("Normal"); + break; + + } + return $priority_string; } -*/ /* returns a $message object for a particular entity id */ function getEntity ($message, $ent_id) { - if ($message) { - if ($message->header->entity_id == $ent_id && strlen($ent_id) == strlen($message->header->entity_id)) { - return $message; - } else { - for ($i = 0; isset($message->entities[$i]); $i++) { - $msg = getEntity ($message->entities[$i], $ent_id); - if ($msg) { - return $msg; - } - } - } - } + return $message->getEntity($ent_id); } -/* - * figures out what entity to display and returns the $message object - * for that entity. - */ -function findDisplayEntity ($message, $textOnly = 1) { - global $show_html_default; - - $entity = 0; - - if ($message) { - if ( $message->header->type0 == 'multipart' && - ( $message->header->type1 == 'alternative' || - $message->header->type1 == 'related' ) && - $show_html_default && ! $textOnly ) { - $entity = findDisplayEntityHTML($message); +/* translateText +* Extracted from strings.php 23/03/2002 +*/ + +function translateText(&$body, $wrap_at, $charset) { + global $where, $what; /* from searching */ + global $color; /* color theme */ + + require_once(SM_PATH . 'functions/url_parser.php'); + + $body_ary = explode("\n", $body); + for ($i=0; $i < count($body_ary); $i++) { + $line = $body_ary[$i]; + if (strlen($line) - 2 >= $wrap_at) { + sqWordWrap($line, $wrap_at, $charset); } - - // Show text/plain or text/html -- the first one we find. - if ( $entity == 0 && - $message->header->type0 == 'text' && - ( $message->header->type1 == 'plain' || - $message->header->type1 == 'html' ) && - isset($message->header->entity_id) ) { - $entity = $message->header->entity_id; + $line = charset_decode($charset, $line); + $line = str_replace("\t", ' ', $line); + + parseUrl ($line); + + $quotes = 0; + $pos = 0; + $j = strlen($line); + + while ($pos < $j) { + if ($line[$pos] == ' ') { + $pos++; + } else if (strpos($line, '>', $pos) === $pos) { + $pos += 4; + $quotes++; + } else { + break; + } } - - $i = 0; - while ($entity == 0 && isset($message->entities[$i]) ) { - $entity = findDisplayEntity($message->entities[$i], $textOnly); - $i++; + + if ($quotes % 2) { + if (!isset($color[13])) { + $color[13] = '#800000'; + } + $line = '' . $line . ''; + } elseif ($quotes) { + if (!isset($color[14])) { + $color[14] = '#FF0000'; + } + $line = '' . $line . ''; } + + $body_ary[$i] = $line; } - - return( $entity ); + $body = '
' . implode("\n", $body_ary) . '
'; } -/* Shows the HTML version */ -function findDisplayEntityHTML ($message) { - - if ( $message->header->type0 == 'text' && - $message->header->type1 == 'html' && - isset($message->header->entity_id)) { - return $message->header->entity_id; - } - for ($i = 0; isset($message->entities[$i]); $i ++) { - $entity = findDisplayEntityHTML($message->entities[$i]); - if ($entity != 0) { - return $entity; - } +/** + * This returns a parsed string called $body. That string can then + * be displayed as the actual message in the HTML. It contains + * everything needed, including HTML Tags, Attachments at the + * bottom, etc. + * + * Since 1.2.0 function uses message_body hook. + * Till 1.3.0 function included output of formatAttachments(). + * + * @param resource $imap_stream imap connection resource + * @param object $message squirrelmail message object + * @param array $color squirrelmail color theme array + * @param integer $wrap_at number of characters per line + * @param string $ent_num (since 1.3.0) message part id + * @param integer $id (since 1.3.0) message id + * @param string $mailbox (since 1.3.0) imap folder name + * @param boolean $clean (since 1.5.1) Do not output stuff that's irrelevant for the printable version. + * @return string html formated message text + */ +function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $mailbox='INBOX', $clean=FALSE) { + /* This if statement checks for the entity to show as the + * primary message. To add more of them, just put them in the + * order that is their priority. + */ + global $startMessage, $languages, $squirrelmail_language, + $show_html_default, $sort, $has_unsafe_images, $passed_ent_id; + + if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) { + $view_unsafe_images = false; } - - return 0; -} -/* This returns a parsed string called $body. That string can then -be displayed as the actual message in the HTML. It contains -everything needed, including HTML Tags, Attachments at the -bottom, etc. -*/ -function formatBody($imap_stream, $message, $color, $wrap_at) { - // this if statement checks for the entity to show as the - // primary message. To add more of them, just put them in the - // order that is their priority. - global $startMessage, $username, $key, $imapServerAddress, $imapPort, - $show_html_default; - - $id = $message->header->id; - $urlmailbox = urlencode($message->header->mailbox); - - // Get the right entity and redefine message to be this entity - // Pass the 0 to mean that we want the 'best' viewable one - $ent_num = findDisplayEntity ($message, 0); + $body = ''; + $urlmailbox = urlencode($mailbox); $body_message = getEntity($message, $ent_num); if (($body_message->header->type0 == 'text') || ($body_message->header->type0 == 'rfc822')) { - $body = mime_fetch_body ($imap_stream, $id, $ent_num); $body = decodeBody($body, $body_message->header->encoding); + + if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && + function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode')) { + if (mb_detect_encoding($body) != 'ASCII') { + $body = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode',$body); + } + } $hookResults = do_hook("message_body", $body); $body = $hookResults[1]; - - // If there are other types that shouldn't be formatted, add - // them here + + /* If there are other types that shouldn't be formatted, add + * them here. + */ + if ($body_message->header->type1 == 'html') { - if ( $show_html_default <> 1 ) { - $body = strip_tags( $body ); - translateText($body, $wrap_at, $body_message->header->charset); + if ($show_html_default <> 1) { + $entity_conv = array(' ' => ' ', + '

' => "\n", + '

' => "\n", + '
' => "\n", + '
' => "\n", + '
' => "\n", + '
' => "\n", + '>' => '>', + '<' => '<'); + $body = strtr($body, $entity_conv); + $body = strip_tags($body); + $body = trim($body); + translateText($body, $wrap_at, + $body_message->header->getParameter('charset')); } else { - $body = MagicHTML( $body, $id ); + $body = magicHTML($body, $id, $message, $mailbox); } } else { - translateText($body, $wrap_at, $body_message->header->charset); + translateText($body, $wrap_at, + $body_message->header->getParameter('charset')); } - - $body .= "

". _("Download this as a file") ."

"; - - /** Display the ATTACHMENTS: message if there's more than one part **/ - $body .= ""; - if (isset($message->entities[0])) { - $body .= formatAttachments ($message, $ent_num, $message->header->mailbox, $id); + + // if this is the clean display (i.e. printer friendly), stop here. + if ( $clean ) { + return $body; } - $body .= ""; - } else { - $body = formatAttachments ($message, -1, $message->header->mailbox, $id); + + $link = 'passed_id=' . $id . '&ent_id='.$ent_num. + '&mailbox=' . $urlmailbox .'&sort=' . $sort . + '&startMessage=' . $startMessage . '&show_more=0'; + if (isset($passed_ent_id)) { + $link .= '&passed_ent_id='.$passed_ent_id; + } + $body .= '
' . _("Download this as a file") . ''; + if ($view_unsafe_images) { + $text = _("Hide Unsafe Images"); + } else { + if (isset($has_unsafe_images) && $has_unsafe_images) { + $link .= '&view_unsafe_images=1'; + $text = _("View Unsafe Images"); + } else { + $text = ''; + } + } + if($text != '') { + $body .= ' | ' . $text . ''; + } + $body .= '

' . "\n"; } - return ($body); + return $body; } -/* - * A recursive function that returns a list of attachments with links - * to where to download these attachments +/** + * Displays attachment links and information + * FIXME: SM_PATH is used in URLs + * + * Since 1.3.0 function is not included in formatBody() call. + * + * Since 1.0.2 uses attachment $type0/$type1 hook. + * Since 1.2.5 uses attachment $type0/* hook. + * Since 1.5.0 uses attachments_bottom hook. + * + * @param object $message SquirrelMail message object + * @param array $exclude_id message parts that are not attachments. + * @param string $mailbox mailbox name + * @param integer $id message id + * @return string html formated attachment information. */ -function formatAttachments ($message, $ent_id, $mailbox, $id) { - global $where, $what; - global $startMessage, $color; - static $ShownHTML = 0; - - $body = ""; - if ($ShownHTML == 0) { - $ShownHTML = 1; - - $body .= "\n" . - "
\n" . - _("Attachments") . ':' . - "
\n" . - "\n" . - formatAttachments ($message, $ent_id, $mailbox, $id) . - "
"; - - return( $body ); - } - - if ($message) { - if (!$message->entities) { - $type0 = strtolower($message->header->type0); - $type1 = strtolower($message->header->type1); - $name = decodeHeader($message->header->name); - - if ($message->header->entity_id != $ent_id) { - $filename = decodeHeader($message->header->filename); - if (trim($filename) == '') { - if (trim($name) == '') { - if ( trim( $message->header->id ) == '' ) - $display_filename = 'untitled-[' . $message->header->entity_id . ']' ; - else - $display_filename = 'cid: ' . $message->header->id; - // $display_filename = 'untitled-[' . $message->header->entity_id . ']' ; - } else { - $display_filename = $name; - $filename = $name; - } - } else { - $display_filename = $filename; - } - - $urlMailbox = urlencode($mailbox); - $ent = urlencode($message->header->entity_id); - - $DefaultLink = - "../src/download.php?startMessage=$startMessage&passed_id=$id&mailbox=$urlMailbox&passed_ent_id=$ent"; - if ($where && $what) - $DefaultLink .= '&where=' . urlencode($where) . '&what=' . urlencode($what); - $Links['download link']['text'] = _("download"); - $Links['download link']['href'] = - "../src/download.php?absolute_dl=true&passed_id=$id&mailbox=$urlMailbox&passed_ent_id=$ent"; - $ImageURL = ''; - - /* this executes the attachment hook with a specific MIME-type. - * if that doens't have results, it tries if there's a rule - * for a more generic type. */ - $HookResults = do_hook("attachment $type0/$type1", $Links, - $startMessage, $id, $urlMailbox, $ent, $DefaultLink, - $display_filename, $where, $what); - if(count($HookResults[1]) <= 1) { - $HookResults = do_hook("attachment $type0/*", $Links, - $startMessage, $id, $urlMailbox, $ent, $DefaultLink, - $display_filename, $where, $what); - } - - $Links = $HookResults[1]; - $DefaultLink = $HookResults[6]; - - $body .= '  ' . - "$display_filename " . - '' . show_readable_size($message->header->size) . - '  ' . - "[ $type0/$type1 ] " . - ''; - if ($message->header->description) - $body .= '' . htmlspecialchars($message->header->description) . ''; - $body .= ' '; - - - $SkipSpaces = 1; - foreach ($Links as $Val) { - if ($SkipSpaces) { - $SkipSpaces = 0; - } else { - $body .= '  |  '; - } - $body .= '' . $Val['text'] . ''; - } - - unset($Links); - - $body .= "\n"; +function formatAttachments($message, $exclude_id, $mailbox, $id) { + global $where, $what, $startMessage, $color, $passed_ent_id; + + $att_ar = $message->getAttachments($exclude_id); + + if (!count($att_ar)) return ''; + + $attachments = ''; + + $urlMailbox = urlencode($mailbox); + + foreach ($att_ar as $att) { + $ent = $att->entity_id; + $header = $att->header; + $type0 = strtolower($header->type0); + $type1 = strtolower($header->type1); + $name = ''; + $links['download link']['text'] = _("Download"); + $links['download link']['href'] = SM_PATH . + "src/download.php?absolute_dl=true&passed_id=$id&mailbox=$urlMailbox&ent_id=$ent"; + if ($type0 =='message' && $type1 == 'rfc822') { + $default_page = SM_PATH . 'src/read_body.php'; + $rfc822_header = $att->rfc822_header; + $filename = $rfc822_header->subject; + if (trim( $filename ) == '') { + $filename = 'untitled-[' . $ent . ']' ; + } + $from_o = $rfc822_header->from; + if (is_object($from_o)) { + $from_name = decodeHeader($from_o->getAddress(false)); + } else { + $from_name = _("Unknown sender"); + } + $description = $from_name; + } else { + $default_page = SM_PATH . 'src/download.php'; + $filename = $att->getFilename(); + if ($header->description) { + $description = decodeHeader($header->description); + } else { + $description = ''; + } + } + + $display_filename = $filename; + if (isset($passed_ent_id)) { + $passed_ent_id_link = '&passed_ent_id='.$passed_ent_id; + } else { + $passed_ent_id_link = ''; + } + $defaultlink = $default_page . "?startMessage=$startMessage" + . "&passed_id=$id&mailbox=$urlMailbox" + . '&ent_id='.$ent.$passed_ent_id_link; + if ($where && $what) { + $defaultlink .= '&where='. urlencode($where).'&what='.urlencode($what); } - } else { - for ($i = 0; $i < count($message->entities); $i++) { - $body .= formatAttachments ($message->entities[$i], $ent_id, $mailbox, $id); + + /* This executes the attachment hook with a specific MIME-type. + * If that doesn't have results, it tries if there's a rule + * for a more generic type. + */ + $hookresults = do_hook("attachment $type0/$type1", $links, + $startMessage, $id, $urlMailbox, $ent, $defaultlink, + $display_filename, $where, $what); + if(count($hookresults[1]) <= 1) { + $hookresults = do_hook("attachment $type0/*", $links, + $startMessage, $id, $urlMailbox, $ent, $defaultlink, + $display_filename, $where, $what); + } + + $links = $hookresults[1]; + $defaultlink = $hookresults[6]; + + $attachments .= '' . + ''.decodeHeader($display_filename).' ' . + '' . show_readable_size($header->size) . + '  ' . + '[ '.htmlspecialchars($type0).'/'.htmlspecialchars($type1).' ] ' . + ''; + $attachments .= '' . $description . ''; + $attachments .= ' '; + + $skipspaces = 1; + foreach ($links as $val) { + if ($skipspaces) { + $skipspaces = 0; + } else { + $attachments .= '  |  '; + } + $attachments .= '' . (isset($val['text']) && !empty($val['text']) ? $val['text'] : '') . (isset($val['extra']) && !empty($val['extra']) ? $val['extra'] : '') . ''; } - } - return( $body ); - } + unset($links); + $attachments .= "\n"; + } + $attachmentadd = do_hook_function('attachments_bottom',$attachments); + if ($attachmentadd != '') + $attachments = $attachmentadd; + return $attachments; } +function sqimap_base64_decode(&$string) { + + // Base64 encoded data goes in pairs of 4 bytes. To achieve on the + // fly decoding (to reduce memory usage) you have to check if the + // data has incomplete pairs -/** this function decodes the body depending on the encoding type. **/ + // Remove the noise in order to check if the 4 bytes pairs are complete + $string = str_replace(array("\r\n","\n", "\r", " "),array('','','',''),$string); + + $sStringRem = ''; + $iMod = strlen($string) % 4; + if ($iMod) { + $sStringRem = substr($string,-$iMod); + // Check if $sStringRem contains padding characters + if (substr($sStringRem,-1) != '=') { + $string = substr($string,0,-$iMod); + } else { + $sStringRem = ''; + } + } + $string = base64_decode($string); + return $sStringRem; +} + + +/* This function decodes the body depending on the encoding type. */ function decodeBody($body, $encoding) { - $body = str_replace("\r\n", "\n", $body); - $encoding = strtolower($encoding); + global $show_html_default; - global $show_html_default; + $body = str_replace("\r\n", "\n", $body); + $encoding = strtolower($encoding); - if ($encoding == 'quoted-printable') { - $body = quoted_printable_decode($body); + $encoding_handler = do_hook_function('decode_body', $encoding); - while (ereg("=\n", $body)) - $body = ereg_replace ("=\n", "", $body); + // plugins get first shot at decoding the body + // + if (!empty($encoding_handler) && function_exists($encoding_handler)) { + $body = $encoding_handler('decode', $body); + + } else if ($encoding == 'quoted-printable' || + $encoding == 'quoted_printable') { + $body = quoted_printable_decode($body); + + while (ereg("=\n", $body)) { + $body = ereg_replace ("=\n", '', $body); + } - } else if ($encoding == 'base64') { - $body = base64_decode($body); - } + } else if ($encoding == 'base64') { + $body = base64_decode($body); + } - // All other encodings are returned raw. - return $body; + // All other encodings are returned raw. + return $body; } -/* - * This functions decode strings that is encoded according to - * RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text). - */ -function decodeHeader ($string, $utfencode=true) { - if (eregi('=\\?([^?]+)\\?(q|b)\\?([^?]+)\\?=', - $string, $res)) { - if (ucfirst($res[2]) == 'B') { - $replace = base64_decode($res[3]); - } else { - $replace = str_replace('_', ' ', $res[3]); - // Convert lowercase Quoted Printable to uppercase for - // quoted_printable_decode to understand it. - while (ereg("(=(([0-9][abcdef])|([abcdef][0-9])|([abcdef][abcdef])))", $replace, $res)) { - $replace = str_replace($res[1], strtoupper($res[1]), $replace); +/** +* Decodes headers +* +* This functions decode strings that is encoded according to +* RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text). +* Patched by Christian Schmidt 23/03/2002 +* +* @param string $string header string that has to be made readable +* @param boolean $utfencode change message in order to be readable on user's charset. defaults to true +* @param boolean $htmlsave preserve spaces and sanitize html special characters. defaults to true +* @param boolean $decide decide if string can be utfencoded. defaults to false +* @return string decoded header string +*/ +function decodeHeader ($string, $utfencode=true,$htmlsave=true,$decide=false) { + global $languages, $squirrelmail_language,$default_charset; + if (is_array($string)) { + $string = implode("\n", $string); + } + + if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && + function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_decodeheader')) { + $string = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_decodeheader', $string); + // Do we need to return at this point? + // return $string; } - $replace = quoted_printable_decode($replace); - } - /* Only encode into entities by default. Some places - don't need the encoding, like the compose form. */ - if ($utfencode){ - $replace = charset_decode ($res[1], $replace); - } - - // Remove the name of the character set. - $string = eregi_replace ('=\\?([^?]+)\\?(q|b)\\?([^?]+)\\?=', - $replace, $string); - - // In case there should be more encoding in the string: recurse - return (decodeHeader($string)); - } else - return ($string); + $i = 0; + $iLastMatch = -2; + $encoded = true; + + $aString = explode(' ',$string); + $ret = ''; + foreach ($aString as $chunk) { + if ($encoded && $chunk === '') { + continue; + } elseif ($chunk === '') { + $ret .= ' '; + continue; + } + $encoded = false; + /* if encoded words are not separated by a linear-space-white we still catch them */ + $j = $i-1; + + while ($match = preg_match('/^(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=(.*)$/Ui',$chunk,$res)) { + /* if the last chunk isn't an encoded string then put back the space, otherwise don't */ + if ($iLastMatch !== $j) { + if ($htmlsave) { + $ret .= ' '; + } else { + $ret .= ' '; + } + } + $iLastMatch = $i; + $j = $i; + if ($htmlsave) { + $ret .= htmlspecialchars($res[1]); + } else { + $ret .= $res[1]; + } + $encoding = ucfirst($res[3]); + + /* decide about valid decoding */ + if ($decide && is_conversion_safe($res[2])) { + $utfencode=true; + $can_be_encoded=true; + } else { + $can_be_encoded=false; + } + switch ($encoding) + { + case 'B': + $replace = base64_decode($res[4]); + if ($utfencode) { + if ($can_be_encoded) { + /* convert string to different charset, + * if functions asks for it (usually in compose) + */ + $ret .= charset_convert($res[2],$replace,$default_charset); + } else { + // convert string to html codes in order to display it + $ret .= charset_decode($res[2],$replace); + } + } else { + if ($htmlsave) { + $replace = htmlspecialchars($replace); + } + $ret.= $replace; + } + break; + case 'Q': + $replace = str_replace('_', ' ', $res[4]); + $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))', + $replace); + if ($utfencode) { + if ($can_be_encoded) { + /* convert string to different charset, + * if functions asks for it (usually in compose) + */ + $replace = charset_convert($res[2], $replace,$default_charset); + } else { + // convert string to html codes in order to display it + $replace = charset_decode($res[2], $replace); + } + } else { + if ($htmlsave) { + $replace = htmlspecialchars($replace); + } + } + $ret .= $replace; + break; + default: + break; + } + $chunk = $res[5]; + $encoded = true; + } + if (!$encoded) { + if ($htmlsave) { + $ret .= ' '; + } else { + $ret .= ' '; + } + } + + if (!$encoded && $htmlsave) { + $ret .= htmlspecialchars($chunk); + } else { + $ret .= $chunk; + } + ++$i; + } + /* remove the first added space */ + if ($ret) { + if ($htmlsave) { + $ret = substr($ret,5); + } else { + $ret = substr($ret,1); + } + } + + return $ret; } -/* - * Encode a string according to RFC 1522 for use in headers if it - * contains 8-bit characters or anything that looks like it should - * be encoded. - */ +/** +* Encodes header as quoted-printable +* +* Encode a string according to RFC 1522 for use in headers if it +* contains 8-bit characters or anything that looks like it should +* be encoded. +* +* @param string $string header string, that has to be encoded +* @return string quoted-printable encoded string +*/ function encodeHeader ($string) { - global $default_charset; - + global $default_charset, $languages, $squirrelmail_language; + + if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && + function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_encodeheader')) { + return call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_encodeheader', $string); + } + // Encode only if the string contains 8-bit characters or =? - $j = strlen( $string ); - $l = strstr($string, '=?'); // Must be encoded ? + $j = strlen($string); + $max_l = 75 - strlen($default_charset) - 7; + $aRet = array(); $ret = ''; - for( $i=0; $i < $j; ++$i) { - switch( $string{$i} ) { - case '=': - $ret .= '=3D'; - break; + $iEncStart = $enc_init = false; + $cur_l = $iOffset = 0; + for($i = 0; $i < $j; ++$i) { + switch($string{$i}) + { + case '=': + case '<': + case '>': + case ',': case '?': - $ret .= '=3F'; - break; case '_': - $ret .= '=5F'; - break; + if ($iEncStart === false) { + $iEncStart = $i; + } + $cur_l+=3; + if ($cur_l > ($max_l-2)) { + /* if there is an stringpart that doesn't need encoding, add it */ + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + $iEncStart = false; + } else { + $ret .= sprintf("=%02X",ord($string{$i})); + } + break; + case '(': + case ')': + if ($iEncStart !== false) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + $iEncStart = false; + } + break; case ' ': - $ret .= '_'; - break; + if ($iEncStart !== false) { + $cur_l++; + if ($cur_l > $max_l) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + $iEncStart = false; + } else { + $ret .= '_'; + } + } + break; default: - $k = ord( $string{$i} ); - if ( $k > 126 ) { - $ret .= sprintf("=%02X", $k); - $l = TRUE; - } else - $ret .= $string{$i}; + $k = ord($string{$i}); + if ($k > 126) { + if ($iEncStart === false) { + // do not start encoding in the middle of a string, also take the rest of the word. + $sLeadString = substr($string,0,$i); + $aLeadString = explode(' ',$sLeadString); + $sToBeEncoded = array_pop($aLeadString); + $iEncStart = $i - strlen($sToBeEncoded); + $ret .= $sToBeEncoded; + $cur_l += strlen($sToBeEncoded); + } + $cur_l += 3; + /* first we add the encoded string that reached it's max size */ + if ($cur_l > ($max_l-2)) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?= "; /* the next part is also encoded => separate by space */ + $cur_l = 3; + $ret = ''; + $iOffset = $i; + $iEncStart = $i; + } + $enc_init = true; + $ret .= sprintf("=%02X", $k); + } else { + if ($iEncStart !== false) { + $cur_l++; + if ($cur_l > $max_l) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + $iEncStart = false; + $iOffset = $i; + $cur_l = 0; + $ret = ''; + } else { + $ret .= $string{$i}; + } + } + } + break; } } - - if ( $l ) { - $string = "=?$default_charset?Q?$ret?="; + + if ($enc_init) { + if ($iEncStart !== false) { + $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset); + $aRet[] = "=?$default_charset?Q?$ret?="; + } else { + $aRet[] = substr($string,$iOffset); + } + $string = implode('',$aRet); } - - return( $string ); + return $string; } -/* - Strips dangerous tags from html messages. +/* This function trys to locate the entity_id of a specific mime element */ +function find_ent_id($id, $message) { + for ($i = 0, $ret = ''; $ret == '' && $i < count($message->entities); $i++) { + if ($message->entities[$i]->header->type0 == 'multipart') { + $ret = find_ent_id($id, $message->entities[$i]); + } else { + if (strcasecmp($message->entities[$i]->header->id, $id) == 0) { +// if (sq_check_save_extension($message->entities[$i])) { + return $message->entities[$i]->entity_id; +// } + } elseif (!empty($message->entities[$i]->header->parameters['name'])) { + /** + * This is part of a fix for Outlook Express 6.x generating + * cid URLs without creating content-id headers + * @@JA - 20050207 + */ + if (strcasecmp($message->entities[$i]->header->parameters['name'], $id) == 0) { + return $message->entities[$i]->entity_id; + } + } + } + } + return $ret; +} + +function sq_check_save_extension($message) { + $filename = $message->getFilename(); + $ext = substr($filename, strrpos($filename,'.')+1); + $save_extensions = array('jpg','jpeg','gif','png','bmp'); + return in_array($ext, $save_extensions); +} + + +/** +** HTMLFILTER ROUTINES */ -function MagicHTML( $body, $id ) { - global $message, $HTTP_SERVER_VARS, - $attachment_common_show_images; +/** +* This function is more or less a wrapper around stripslashes. Apparently +* Explorer is stupid enough to just remove the backslashes and then +* execute the content of the attribute as if nothing happened. +* Who does that? +* +* @param attvalue The value of the attribute +* @return attvalue The value of the attribute stripslashed. +*/ +function sq_unbackslash($attvalue){ + /** + * Remove any backslashes. See if there are any first. + */ - $attachment_common_show_images = - FALSE; // Don't display attached images in HTML mode - $j = strlen( $body ); // Legnth of the HTML - $ret = ''; // Returned string - $bgcolor = '#ffffff'; // Background style color (defaults to white) - $textcolor = '#000000'; // Foreground style color (defaults to black) - $leftmargin = ''; // Left margin style - $title = ''; // HTML title if any + if (strstr($attvalue, '\\') !== false){ + $attvalue = stripslashes($attvalue); + } + return $attvalue; +} - $i = 0; - while ( $i < $j ) { - if ( $body{$i} == '<' ) { - $pos = $i + 1; - $tag = ''; - while ($body{$pos} == ' ' || $body{$pos} == "\t" || - $body{$pos} == "\n") { - $pos ++; +/** +* Kill any tabs, newlines, or carriage returns. Our friends the +* makers of the browser with 95% market value decided that it'd +* be funny to make "java[tab]script" be just as good as "javascript". +* +* @param attvalue The attribute value before extraneous spaces removed. +* @return attvalue The attribute value after extraneous spaces removed. +*/ +function sq_unspace($attvalue){ + if (strcspn($attvalue, "\t\r\n") != strlen($attvalue)){ + $attvalue = str_replace(Array("\t", "\r", "\n"), Array('', '', ''), + $attvalue); + } + return $attvalue; +} + +/** +* This function returns the final tag out of the tag name, an array +* of attributes, and the type of the tag. This function is called by +* sq_sanitize internally. +* +* @param $tagname the name of the tag. +* @param $attary the array of attributes and their values +* @param $tagtype The type of the tag (see in comments). +* @return a string with the final tag representation. +*/ +function sq_tagprint($tagname, $attary, $tagtype){ + $me = 'sq_tagprint'; + + if ($tagtype == 2){ + $fulltag = ''; + } else { + $fulltag = '<' . $tagname; + if (is_array($attary) && sizeof($attary)){ + $atts = Array(); + while (list($attname, $attvalue) = each($attary)){ + array_push($atts, "$attname=$attvalue"); } - while (strlen($tag) < 4 && $body{$pos} != ' ' && - $body{$pos} != "\t" && $body{$pos} != "\n") { - $tag .= $body{$pos}; - $pos ++; + $fulltag .= ' ' . join(" ", $atts); + } + if ($tagtype == 3){ + $fulltag .= ' /'; + } + $fulltag .= '>'; + } + return $fulltag; +} + +/** +* A small helper function to use with array_walk. Modifies a by-ref +* value and makes it lowercase. +* +* @param $val a value passed by-ref. +* @return void since it modifies a by-ref value. +*/ +function sq_casenormalize(&$val){ + $val = strtolower($val); +} + +/** +* This function skips any whitespace from the current position within +* a string and to the next non-whitespace value. +* +* @param $body the string +* @param $offset the offset within the string where we should start +* looking for the next non-whitespace character. +* @return the location within the $body where the next +* non-whitespace char is located. +*/ +function sq_skipspace($body, $offset){ + $me = 'sq_skipspace'; + preg_match('/^(\s*)/s', substr($body, $offset), $matches); + if (sizeof($matches{1})){ + $count = strlen($matches{1}); + $offset += $count; + } + return $offset; +} + +/** +* This function looks for the next character within a string. It's +* really just a glorified "strpos", except it catches if failures +* nicely. +* +* @param $body The string to look for needle in. +* @param $offset Start looking from this position. +* @param $needle The character/string to look for. +* @return location of the next occurance of the needle, or +* strlen($body) if needle wasn't found. +*/ +function sq_findnxstr($body, $offset, $needle){ + $me = 'sq_findnxstr'; + $pos = strpos($body, $needle, $offset); + if ($pos === FALSE){ + $pos = strlen($body); + } + return $pos; +} + +/** +* This function takes a PCRE-style regexp and tries to match it +* within the string. +* +* @param $body The string to look for needle in. +* @param $offset Start looking from here. +* @param $reg A PCRE-style regex to match. +* @return Returns a false if no matches found, or an array +* with the following members: +* - integer with the location of the match within $body +* - string with whatever content between offset and the match +* - string with whatever it is we matched +*/ +function sq_findnxreg($body, $offset, $reg){ + $me = 'sq_findnxreg'; + $matches = Array(); + $retarr = Array(); + preg_match("%^(.*?)($reg)%si", substr($body, $offset), $matches); + if (!isset($matches{0}) || !$matches{0}){ + $retarr = false; + } else { + $retarr{0} = $offset + strlen($matches{1}); + $retarr{1} = $matches{1}; + $retarr{2} = $matches{2}; + } + return $retarr; +} + +/** +* This function looks for the next tag. +* +* @param $body String where to look for the next tag. +* @param $offset Start looking from here. +* @return false if no more tags exist in the body, or +* an array with the following members: +* - string with the name of the tag +* - array with attributes and their values +* - integer with tag type (1, 2, or 3) +* - integer where the tag starts (starting "<") +* - integer where the tag ends (ending ">") +* first three members will be false, if the tag is invalid. +*/ +function sq_getnxtag($body, $offset){ + $me = 'sq_getnxtag'; + if ($offset > strlen($body)){ + return false; + } + $lt = sq_findnxstr($body, $offset, "<"); + if ($lt == strlen($body)){ + return false; + } + /** + * We are here: + * blah blah + * \---------^ + */ + $pos = sq_skipspace($body, $lt+1); + if ($pos >= strlen($body)){ + return Array(false, false, false, $lt, strlen($body)); + } + /** + * There are 3 kinds of tags: + * 1. Opening tag, e.g.: + * + * 2. Closing tag, e.g.: + * + * 3. XHTML-style content-less tag, e.g.: + * + */ + $tagtype = false; + switch (substr($body, $pos, 1)){ + case '/': + $tagtype = 2; + $pos++; + break; + case '!': + /** + * A comment or an SGML declaration. + */ + if (substr($body, $pos+1, 2) == "--"){ + $gt = strpos($body, "-->", $pos); + if ($gt === false){ + $gt = strlen($body); + } else { + $gt += 2; + } + return Array(false, false, false, $lt, $gt); + } else { + $gt = sq_findnxstr($body, $pos, ">"); + return Array(false, false, false, $lt, $gt); } - switch( strtoupper( $tag ) ) { - // Strips the entire tag and contents - case 'APPL': - case 'EMBB': - case 'FRAM': - case 'SCRI': - case 'OBJE': - $etg = '/' . $tag; - while ( $body{$i+1}.$body{$i+2}.$body{$i+3}.$body{$i+4}.$body{$i+5} <> $etg && - $i < $j ) $i++; - while ( $i < $j && $body{++$i} <> '>' ); - // $ret .= ""; - break; - // Substitute Title - case 'TITL': - $i += 5; - while ( $body{$i} <> '>' && // - $i < $j ) - $i++; - $i++; - $title = ''; - while ( $body{$i} <> '<' && // - $i < $j ) { - $title .= $body{$i}; - $i++; + break; + default: + /** + * Assume tagtype 1 for now. If it's type 3, we'll switch values + * later. + */ + $tagtype = 1; + break; + } + + $tagname = ''; + /** + * Look for next [\W-_], which will indicate the end of the tag name. + */ + $regary = sq_findnxreg($body, $pos, "[^\w\-_]"); + if ($regary == false){ + return Array(false, false, false, $lt, strlen($body)); + } + list($pos, $tagname, $match) = $regary; + $tagname = strtolower($tagname); + + /** + * $match can be either of these: + * '>' indicating the end of the tag entirely. + * '\s' indicating the end of the tag name. + * '/' indicating that this is type-3 xhtml tag. + * + * Whatever else we find there indicates an invalid tag. + */ + switch ($match){ + case '/': + /** + * This is an xhtml-style tag with a closing / at the + * end, like so: . Check if it's followed + * by the closing bracket. If not, then this tag is invalid + */ + if (substr($body, $pos, 2) == "/>"){ + $pos++; + $tagtype = 3; + } else { + $gt = sq_findnxstr($body, $pos, ">"); + $retary = Array(false, false, false, $lt, $gt); + return $retary; + } + case '>': + return Array($tagname, false, $tagtype, $lt, $pos); + break; + default: + /** + * Check if it's whitespace + */ + if (!preg_match('/\s/', $match)){ + /** + * This is an invalid tag! Look for the next closing ">". + */ + $gt = sq_findnxstr($body, $lt, ">"); + return Array(false, false, false, $lt, $gt); + } + break; + } + + /** + * At this point we're here: + * + * \-------^ + * + * At this point we loop in order to find all attributes. + */ + $attname = ''; + $attary = Array(); + + while ($pos <= strlen($body)){ + $pos = sq_skipspace($body, $pos); + if ($pos == strlen($body)){ + /** + * Non-closed tag. + */ + return Array(false, false, false, $lt, $pos); + } + /** + * See if we arrived at a ">" or "/>", which means that we reached + * the end of the tag. + */ + $matches = Array(); + if (preg_match("%^(\s*)(>|/>)%s", substr($body, $pos), $matches)) { + /** + * Yep. So we did. + */ + $pos += strlen($matches{1}); + if ($matches{2} == "/>"){ + $tagtype = 3; + $pos++; + } + return Array($tagname, $attary, $tagtype, $lt, $pos); + } + + /** + * There are several types of attributes, with optional + * [:space:] between members. + * Type 1: + * attrname[:space:]=[:space:]'CDATA' + * Type 2: + * attrname[:space:]=[:space:]"CDATA" + * Type 3: + * attr[:space:]=[:space:]CDATA + * Type 4: + * attrname + * + * We leave types 1 and 2 the same, type 3 we check for + * '"' and convert to """ if needed, then wrap in + * double quotes. Type 4 we convert into: + * attrname="yes". + */ + $regary = sq_findnxreg($body, $pos, "[^:\w\-_]"); + if ($regary == false){ + /** + * Looks like body ended before the end of tag. + */ + return Array(false, false, false, $lt, strlen($body)); + } + list($pos, $attname, $match) = $regary; + $attname = strtolower($attname); + /** + * We arrived at the end of attribute name. Several things possible + * here: + * '>' means the end of the tag and this is attribute type 4 + * '/' if followed by '>' means the same thing as above + * '\s' means a lot of things -- look what it's followed by. + * anything else means the attribute is invalid. + */ + switch($match){ + case '/': + /** + * This is an xhtml-style tag with a closing / at the + * end, like so: . Check if it's followed + * by the closing bracket. If not, then this tag is invalid + */ + if (substr($body, $pos, 2) == "/>"){ + $pos++; + $tagtype = 3; + } else { + $gt = sq_findnxstr($body, $pos, ">"); + $retary = Array(false, false, false, $lt, $gt); + return $retary; } - $i += 7; - break; - // Destroy these tags - case 'HTML': - case 'HEAD': - case '/HTM': - case '/HEA': - case '!DOC': - case 'META': - case 'DIV ': - case '/DIV': - case '!-- ': - $i += 4; - while ( $body{$i} <> '>' && - $i < $j ) - $i++; - // $i++; + case '>': + $attary{$attname} = '"yes"'; + return Array($tagname, $attary, $tagtype, $lt, $pos); break; - case 'STYL': - $i += 5; - while ( $body{$i} <> '>' && // - $i < $j ) - $i++; - $i++; - // We parse the style to look for interesting stuff - $styleblk = ''; - while ( $body{$i} <> '>' && - $i < $j ) { - // First we get the name of the style - $style = ''; - while ( $body{$i} <> '>' && - $body{$i} <> '<' && - $body{$i} <> '{' && - $i < $j ) { - if ( isnoSep( $body{$i} ) ) - $style .= $body{$i}; - $i++; - } - stripComments( $i, $j, $body ); - $style = strtoupper( trim( $style ) ); - if ( $style == 'BODY' ) { - // Next we look into the definitions of the body style - while ( $body{$i} <> '>' && - $body{$i} <> '}' && - $i < $j ) { - // We look for the background color if any. - if ( substr( $body, $i, 17 ) == 'BACKGROUND-COLOR:' ) { - $i += 17; - $bgcolor = getStyleData( $i, $j, $body ); - } elseif ( substr( $body, $i, 12 ) == 'MARGIN-LEFT:' ) { - $i += 12; - $leftmargin = getStyleData( $i, $j, $body ); - } - $i++; + default: + /** + * Skip whitespace and see what we arrive at. + */ + $pos = sq_skipspace($body, $pos); + $char = substr($body, $pos, 1); + /** + * Two things are valid here: + * '=' means this is attribute type 1 2 or 3. + * \w means this was attribute type 4. + * anything else we ignore and re-loop. End of tag and + * invalid stuff will be caught by our checks at the beginning + * of the loop. + */ + if ($char == "="){ + $pos++; + $pos = sq_skipspace($body, $pos); + /** + * Here are 3 possibilities: + * "'" attribute type 1 + * '"' attribute type 2 + * everything else is the content of tag type 3 + */ + $quot = substr($body, $pos, 1); + if ($quot == "'"){ + $regary = sq_findnxreg($body, $pos+1, "\'"); + if ($regary == false){ + return Array(false, false, false, $lt, strlen($body)); + } + list($pos, $attval, $match) = $regary; + $pos++; + $attary{$attname} = "'" . $attval . "'"; + } else if ($quot == '"'){ + $regary = sq_findnxreg($body, $pos+1, '\"'); + if ($regary == false){ + return Array(false, false, false, $lt, strlen($body)); } + list($pos, $attval, $match) = $regary; + $pos++; + $attary{$attname} = '"' . $attval . '"'; } else { - // Other style are mantained - $styleblk .= "$style "; - while ( $body{$i} <> '>' && - $body{$i} <> '<' && - $body{$i} <> '}' && - $i < $j ) { - $styleblk .= $body{$i}; - $i++; + /** + * These are hateful. Look for \s, or >. + */ + $regary = sq_findnxreg($body, $pos, "[\s>]"); + if ($regary == false){ + return Array(false, false, false, $lt, strlen($body)); } - $styleblk .= $body{$i}; + list($pos, $attval, $match) = $regary; + /** + * If it's ">" it will be caught at the top. + */ + $attval = preg_replace("/\"/s", """, $attval); + $attary{$attname} = '"' . $attval . '"'; } - stripComments( $i, $j, $body ); - if ( $body{$i} <> '>' ) - $i++; - } - if ( $styleblk <> '' ) - $ret .= " +* @param $mailbox the message mailbox +* @return a string with edited content. +*/ +function sq_fixstyle($body, $pos, $message, $id, $mailbox){ + global $view_unsafe_images; + $me = 'sq_fixstyle'; + $ret = sq_findnxreg($body, $pos, ''); + if ($ret == FALSE){ + return array(FALSE, strlen($body)); } - // And get the color - $ret = ''; - while ( isNoSep( $body{$i} ) && - $i < $j ) { - $ret .= $body{$i}; - $i++; + $newpos = $ret[0] + strlen($ret[2]); + $content = $ret[1]; + /** + * First look for general BODY style declaration, which would be + * like so: + * body {background: blah-blah} + * and change it to .bodyclass so we can just assign it to a
+ */ + $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content); + $secremoveimg = '../images/' . _("sec_remove_eng.png"); + /** + * Fix url('blah') declarations. + */ + $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si", + "url(\\1$secremoveimg\\2)", $content); + /** + * Fix url('https*://.*) declarations but only if $view_unsafe_images + * is false. + */ + if (!$view_unsafe_images){ + $content = preg_replace("|url\s*\(\s*([\'\"])\s*https*:.*?([\'\"])\s*\)|si", + "url(\\1$secremoveimg\\2)", $content); + } + + /** + * Fix urls that refer to cid: + */ + while (preg_match("|url\s*\(\s*([\'\"]\s*cid:.*?[\'\"])\s*\)|si", + $content, $matches)){ + $cidurl = $matches{1}; + $httpurl = sq_cid2http($message, $id, $cidurl, $mailbox); + $content = preg_replace("|url\s*\(\s*$cidurl\s*\)|si", + "url($httpurl)", $content); } - return( $ret ); + /** + * Fix stupid css declarations which lead to vulnerabilities + * in IE. + */ + $match = Array('/expression/i', + '/behaviou*r/i', + '/binding/i', + '/include-source/i'); + $replace = Array('idiocy', 'idiocy', 'idiocy', 'idiocy'); + $content = preg_replace($match, $replace, $content); + return array($content, $newpos); } -/* -Private function for strip_dangerous_tag. Look for event based coded and "remove" it -change on with no (onload -> noload) +/** +* This function converts cid: url's into the ones that can be viewed in +* the browser. +* +* @param $message the message object +* @param $id the message id +* @param $cidurl the cid: url. +* @param $mailbox the message mailbox +* @return a string with a http-friendly url */ +function sq_cid2http($message, $id, $cidurl, $mailbox){ + /** + * Get rid of quotes. + */ + $quotchar = substr($cidurl, 0, 1); + if ($quotchar == '"' || $quotchar == "'"){ + $cidurl = str_replace($quotchar, "", $cidurl); + } else { + $quotchar = ''; + } + $cidurl = substr(trim($cidurl), 4); + $linkurl = find_ent_id($cidurl, $message); + /* in case of non-save cid links $httpurl should be replaced by a sort of + unsave link image */ + $httpurl = ''; + + /** + * This is part of a fix for Outlook Express 6.x generating + * cid URLs without creating content-id headers. These images are + * not part of the multipart/related html mail. The html contains + * references to + * attached images with as goal to render them inline although + * the attachment disposition property is not inline. + **/ -function stripEvent( &$i, $j, &$body, $id, $base ) { - - global $message, $base_uri; + if (empty($linkurl)) { + if (preg_match('/{.*}\//', $cidurl)) { + $cidurl = preg_replace('/{.*}\//','', $cidurl); + if (!empty($cidurl)) { + $linkurl = find_ent_id($cidurl, $message); + } + } + } + + if (!empty($linkurl)) { + $httpurl = $quotchar . SM_PATH . 'src/download.php?absolute_dl=true&' . + "passed_id=$id&mailbox=" . urlencode($mailbox) . + '&ent_id=' . $linkurl . $quotchar; + } else { + /** + * If we couldn't generate a proper img url, drop in a blank image + * instead of sending back empty, otherwise it causes unusual behaviour + */ + $httpurl = $quotchar . SM_PATH . 'images/blank.png'; + } + + return $httpurl; +} - $ret = ''; +/** +* This function changes the tag into a
tag since we +* can't really have a body-within-body. +* +* @param $attary an array of attributes and values of +* @param $mailbox mailbox we're currently reading (for cid2http) +* @param $message current message (for cid2http) +* @param $id current message id (for cid2http) +* @return a modified array of attributes to be set for
+*/ +function sq_body2div($attary, $mailbox, $message, $id){ + $me = 'sq_body2div'; + $divattary = Array('class' => "'bodyclass'"); + $text = '#000000'; + $has_bgc_stl = $has_txt_stl = false; + $styledef = ''; + if (is_array($attary) && sizeof($attary) > 0){ + foreach ($attary as $attname=>$attvalue){ + $quotchar = substr($attvalue, 0, 1); + $attvalue = str_replace($quotchar, "", $attvalue); + switch ($attname){ + case 'background': + $attvalue = sq_cid2http($message, $id, + $attvalue, $mailbox); + $styledef .= "background-image: url('$attvalue'); "; + break; + case 'bgcolor': + $has_bgc_stl = true; + $styledef .= "background-color: $attvalue; "; + break; + case 'text': + $has_txt_stl = true; + $styledef .= "color: $attvalue; "; + break; + } + } + // Outlook defines a white bgcolor and no text color. This can lead to + // white text on a white bg with certain themes. + if ($has_bgc_stl && !$has_txt_stl) { + $styledef .= "color: $text; "; + } + if (strlen($styledef) > 0){ + $divattary{"style"} = "\"$styledef\""; + } + } + return $divattary; +} - while ( $body{$i} <> '>' && - $i < $j ) { - $etg = strtolower($body{$i}.$body{$i+1}.$body{$i+2}); - switch( $etg ) { - case 'src': - // This is probably a src specification - $k = $i + 3; - while( !isNoSep( $body{$k} )) { - $k++; +/** +* This is the main function and the one you should actually be calling. +* There are several variables you should be aware of an which need +* special description. +* +* Since the description is quite lengthy, see it here: +* http://linux.duke.edu/projects/mini/htmlfilter/ +* +* @param $body the string with HTML you wish to filter +* @param $tag_list see description above +* @param $rm_tags_with_content see description above +* @param $self_closing_tags see description above +* @param $force_tag_closing see description above +* @param $rm_attnames see description above +* @param $bad_attvals see description above +* @param $add_attr_to_tag see description above +* @param $message message object +* @param $id message id +* @return sanitized html safe to show on your pages. +*/ +function sq_sanitize($body, + $tag_list, + $rm_tags_with_content, + $self_closing_tags, + $force_tag_closing, + $rm_attnames, + $bad_attvals, + $add_attr_to_tag, + $message, + $id, + $mailbox + ){ + $me = 'sq_sanitize'; + $rm_tags = array_shift($tag_list); + /** + * Normalize rm_tags and rm_tags_with_content. + */ + @array_walk($tag_list, 'sq_casenormalize'); + @array_walk($rm_tags_with_content, 'sq_casenormalize'); + @array_walk($self_closing_tags, 'sq_casenormalize'); + /** + * See if tag_list is of tags to remove or tags to allow. + * false means remove these tags + * true means allow these tags + */ + $curpos = 0; + $open_tags = Array(); + $trusted = "\n\n"; + $skip_content = false; + /** + * Take care of netscape's stupid javascript entities like + * &{alert('boo')}; + */ + $body = preg_replace("/&(\{.*?\};)/si", "&\\1", $body); + + while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){ + list($tagname, $attary, $tagtype, $lt, $gt) = $curtag; + $free_content = substr($body, $curpos, $lt-$curpos); + /** + * Take care of