X-Git-Url: https://vcs.fsf.org/?p=squirrelmail.git;a=blobdiff_plain;f=functions%2Fmime.php;h=7433d7c8b49a3c3c7f5e4b14d777e04d72582771;hp=3322249a7423b964d1da6aaf3073ede298e0cb0f;hb=fd181f534c2c598516e6f5f50fa7acb636225244;hpb=358a78a1aef871123cabcc980cfee1cb317da6c0
diff --git a/functions/mime.php b/functions/mime.php
index 3322249a..7433d7c8 100644
--- a/functions/mime.php
+++ b/functions/mime.php
@@ -3,15 +3,17 @@
/**
* mime.php
*
- * Copyright (c) 1999-2003 The SquirrelMail Project Team
+ * Copyright (c) 1999-2004 The SquirrelMail Project Team
* Licensed under the GNU GPL. For full terms see the file COPYING.
*
* This contains the functions necessary to detect and decode MIME
* messages.
*
* $Id$
+ * @package squirrelmail
*/
+/** The typical includes... */
require_once(SM_PATH . 'functions/imap.php');
require_once(SM_PATH . 'functions/attachment_common.php');
@@ -19,11 +21,13 @@ require_once(SM_PATH . 'functions/attachment_common.php');
/* MIME DECODING */
/* -------------------------------------------------------------------------- */
-/* This function gets the structure of a message and stores it in the "message" class.
+/**
+ * Get the MIME structure
+ *
+ * This function gets the structure of a message and stores it in the "message" class.
* It will return this object for use with all relevant header information and
* fully parsed into the standard "message" object format.
*/
-
function mime_structure ($bodystructure, $flags=array()) {
/* Isolate the body structure and remove beginning and end parenthesis. */
@@ -92,8 +96,8 @@ function mime_structure ($bodystructure, $flags=array()) {
* to mime_get_elements()
*/
-function mime_fetch_body($imap_stream, $id, $ent_id=1) {
- global $uid_support;
+function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) {
+ global $uid_support;
/* Do a bit of error correction. If we couldn't find the entity id, just guess
* that it is the first one. That is usually the case anyway.
*/
@@ -104,6 +108,8 @@ function mime_fetch_body($imap_stream, $id, $ent_id=1) {
$cmd = "FETCH $id BODY[$ent_id]";
}
+ if ($fetch_size!=0) $cmd .= "<0.$fetch_size>";
+
$data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, $uid_support);
do {
$topline = trim(array_shift($data));
@@ -157,7 +163,7 @@ function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding) {
/* Don't kill the connection if the browser is over a dialup
* and it would take over 30 seconds to download it.
- * DonĀ“t call set_time_limit in safe mode.
+ * Dont call set_time_limit in safe mode.
*/
if (!ini_get('safe_mode')) {
@@ -177,17 +183,17 @@ function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding) {
echo decodeBody($body, $encoding);
}
- /*
+ /*
TODO, use the same method for quoted printable.
However, I assume that quoted printable attachments aren't that large
so the performancegain / memory usage drop will be minimal.
If we decide to add that then we need to adapt sqimap_fread because
- we need to split te result on \n and fread doesn't stop at \n. That
+ we need to split te result on \n and fread doesn't stop at \n. That
means we also should provide $results from sqimap_fread (by ref) to
te function and set $no_return to false. The $filter function for
- quoted printable should handle unsetting of $results.
+ quoted printable should handle unsetting of $results.
*/
- /*
+ /*
TODO 2: find out how we write to the output stream php://stdout. fwrite
doesn't work because 'php://stdout isn't a stream.
*/
@@ -387,12 +393,14 @@ function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $ma
translateText($body, $wrap_at,
$body_message->header->getParameter('charset'));
}
- $link = 'read_body.php?passed_id=' . $id . '&ent_id='.$ent_num.
+ $link = 'passed_id=' . $id . '&ent_id='.$ent_num.
'&mailbox=' . $urlmailbox .'&sort=' . $sort .
- '&startMessage=' . $startMessage . '&show_more=0';
+ '&startMessage=' . $startMessage . '&show_more=0';
if (isset($passed_ent_id)) {
$link .= '&passed_ent_id='.$passed_ent_id;
}
+ $body .= '
' . _("Download this as a file") . '';
if ($view_unsafe_images) {
$text = _("Hide Unsafe Images");
} else {
@@ -403,8 +411,10 @@ function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $ma
$text = '';
}
}
- $body .= ''.$text.
- '
' . "\n";
+ if($text != '') {
+ $body .= ' | ' . $text . '';
+ }
+ $body .= '
' . "\n";
}
return $body;
}
@@ -441,11 +451,10 @@ function formatAttachments($message, $exclude_id, $mailbox, $id) {
}
$from_o = $rfc822_header->from;
if (is_object($from_o)) {
- $from_name = $from_o->getAddress(false);
+ $from_name = decodeHeader($from_o->getAddress(false));
} else {
$from_name = _("Unknown sender");
}
- $from_name = decodeHeader(($from_name));
$description = $from_name;
} else {
$default_page = SM_PATH . 'src/download.php';
@@ -552,7 +561,15 @@ function decodeBody($body, $encoding) {
$body = str_replace("\r\n", "\n", $body);
$encoding = strtolower($encoding);
- if ($encoding == 'quoted-printable' ||
+ $encoding_handler = do_hook_function('decode_body', $encoding);
+
+
+ // plugins get first shot at decoding the body
+ //
+ if (!empty($encoding_handler) && function_exists($encoding_handler)) {
+ $body = $encoding_handler('decode', $body);
+
+ } else if ($encoding == 'quoted-printable' ||
$encoding == 'quoted_printable') {
$body = quoted_printable_decode($body);
@@ -568,17 +585,25 @@ function decodeBody($body, $encoding) {
return $body;
}
-/*
+/**
+ * Decodes headers
+ *
* This functions decode strings that is encoded according to
* RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text).
* Patched by Christian Schmidt 23/03/2002
+ *
+ * @param string $string header string that has to be made readable
+ * @param boolean $utfencode change message in order to be readable on user's charset. defaults to true
+ * @param boolean $htmlsave preserve spaces and sanitize html special characters. defaults to true
+ * @param boolean $decide decide if string can be utfencoded. defaults to false
+ * @return string decoded header string
*/
-function decodeHeader ($string, $utfencode=true,$htmlsave=true) {
+function decodeHeader ($string, $utfencode=true,$htmlsave=true,$decide=false) {
global $languages, $squirrelmail_language;
if (is_array($string)) {
$string = implode("\n", $string);
}
-
+
if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
$string = $languages[$squirrelmail_language]['XTRA_CODE']('decodeheader', $string);
@@ -587,7 +612,7 @@ function decodeHeader ($string, $utfencode=true,$htmlsave=true) {
}
$i = 0;
$iLastMatch = -2;
- $encoded = false;
+ $encoded = true;
$aString = explode(' ',$string);
$ret = '';
@@ -601,12 +626,12 @@ function decodeHeader ($string, $utfencode=true,$htmlsave=true) {
$encoded = false;
/* if encoded words are not separated by a linear-space-white we still catch them */
$j = $i-1;
-// if ($chunk{0} === '=') { /* performance, saves an unnessecarry preg call */
+
while ($match = preg_match('/^(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=(.*)$/Ui',$chunk,$res)) {
/* if the last chunk isn't an encoded string then put back the space, otherwise don't */
if ($iLastMatch !== $j) {
if ($htmlsave) {
- $ret .= ' ';
+ $ret .= ' ';
} else {
$ret .= ' ';
}
@@ -623,8 +648,15 @@ function decodeHeader ($string, $utfencode=true,$htmlsave=true) {
break;
case 'Q':
$replace = str_replace('_', ' ', $res[4]);
- $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))',
+ $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))',
$replace);
+ /* decide about valid decoding */
+ if ($decide && is_conversion_safe($res[2])) {
+ $utfencode=true;
+ $can_be_decoded=true;
+ } else {
+ $can_be_decoded=false;
+ }
/* Only encode into entities by default. Some places
* don't need the encoding, like the compose form.
*/
@@ -643,13 +675,12 @@ function decodeHeader ($string, $utfencode=true,$htmlsave=true) {
$chunk = $res[5];
$encoded = true;
}
-// }
if (!$encoded) {
if ($htmlsave) {
- $ret .= ' ';
+ $ret .= ' ';
} else {
$ret .= ' ';
- }
+ }
}
if (!$encoded && $htmlsave) {
@@ -662,19 +693,24 @@ function decodeHeader ($string, $utfencode=true,$htmlsave=true) {
/* remove the first added space */
if ($ret) {
if ($htmlsave) {
- $ret = substr($ret,6);
+ $ret = substr($ret,5);
} else {
$ret = substr($ret,1);
}
}
-
+
return $ret;
}
-/*
+/**
+ * Encodes header as quoted-printable
+ *
* Encode a string according to RFC 1522 for use in headers if it
* contains 8-bit characters or anything that looks like it should
* be encoded.
+ *
+ * @param string $string header string, that has to be encoded
+ * @return string quoted-printable encoded string
*/
function encodeHeader ($string) {
global $default_charset, $languages, $squirrelmail_language;
@@ -683,9 +719,10 @@ function encodeHeader ($string) {
function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
return $languages[$squirrelmail_language]['XTRA_CODE']('encodeheader', $string);
}
- if (strtolower($default_charset) == 'iso-8859-1') {
- $string = str_replace("\240",' ',$string);
- }
+ // instead of removing nbsp here, we don't add it in decodeHeader
+ // if (strtolower($default_charset) == 'iso-8859-1') {
+ // $string = str_replace("\240",' ',$string);
+ //}
// Encode only if the string contains 8-bit characters or =?
$j = strlen($string);
@@ -752,7 +789,7 @@ function encodeHeader ($string) {
// do not start encoding in the middle of a string, also take the rest of the word.
$sLeadString = substr($string,0,$i);
$aLeadString = explode(' ',$sLeadString);
- $sToBeEncoded = array_pop($aLeadString);
+ $sToBeEncoded = array_pop($aLeadString);
$iEncStart = $i - strlen($sToBeEncoded);
$ret .= $sToBeEncoded;
$cur_l += strlen($sToBeEncoded);
@@ -809,7 +846,7 @@ function find_ent_id($id, $message) {
if (strcasecmp($message->entities[$i]->header->id, $id) == 0) {
// if (sq_check_save_extension($message->entities[$i])) {
return $message->entities[$i]->entity_id;
-// }
+// }
}
}
}
@@ -828,9 +865,45 @@ function sq_check_save_extension($message) {
** HTMLFILTER ROUTINES
*/
+/**
+ * This function is more or less a wrapper around stripslashes. Apparently
+ * Explorer is stupid enough to just remove the backslashes and then
+ * execute the content of the attribute as if nothing happened.
+ * Who does that?
+ *
+ * @param attvalue The value of the attribute
+ * @return attvalue The value of the attribute stripslashed.
+ */
+function sq_unbackslash($attvalue){
+ /**
+ * Remove any backslashes. See if there are any first.
+ */
+
+ if (strstr($attvalue, '\\') !== false){
+ $attvalue = stripslashes($attvalue);
+ }
+ return $attvalue;
+}
+
+/**
+ * Kill any tabs, newlines, or carriage returns. Our friends the
+ * makers of the browser with 95% market value decided that it'd
+ * be funny to make "java[tab]script" be just as good as "javascript".
+ *
+ * @param attvalue The attribute value before extraneous spaces removed.
+ * @return attvalue The attribute value after extraneous spaces removed.
+ */
+function sq_unspace($attvalue){
+ if (strcspn($attvalue, "\t\r\n") != strlen($attvalue)){
+ $attvalue = str_replace(Array("\t", "\r", "\n"), Array('', '', ''),
+ $attvalue);
+ }
+ return $attvalue;
+}
+
/**
* This function returns the final tag out of the tag name, an array
- * of attributes, and the type of the tag. This function is called by
+ * of attributes, and the type of the tag. This function is called by
* sq_sanitize internally.
*
* @param $tagname the name of the tag.
@@ -874,7 +947,7 @@ function sq_casenormalize(&$val){
/**
* This function skips any whitespace from the current position within
* a string and to the next non-whitespace value.
- *
+ *
* @param $body the string
* @param $offset the offset within the string where we should start
* looking for the next non-whitespace character.
@@ -1029,7 +1102,7 @@ function sq_getnxtag($body, $offset){
* '>' indicating the end of the tag entirely.
* '\s' indicating the end of the tag name.
* '/' indicating that this is type-3 xhtml tag.
- *
+ *
* Whatever else we find there indicates an invalid tag.
*/
switch ($match){
@@ -1117,7 +1190,7 @@ function sq_getnxtag($body, $offset){
* double quotes. Type 4 we convert into:
* attrname="yes".
*/
- $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
+ $regary = sq_findnxreg($body, $pos, "[^:\w\-_]");
if ($regary == false){
/**
* Looks like body ended before the end of tag.
@@ -1238,13 +1311,14 @@ function sq_getnxtag($body, $offset){
* @param $attvalue A string to run entity check against.
* @return Translated value.
*/
+
function sq_deent($attvalue){
$me = 'sq_deent';
/**
* See if we have to run the checks first. All entities must start
* with "&".
*/
- if (strpos($attvalue, "&") === false){
+ if (strpos($attvalue, '&') === false){
return $attvalue;
}
/**
@@ -1255,22 +1329,22 @@ function sq_deent($attvalue){
* Leave " in, as it can mess us up.
*/
$trans = array_flip($trans);
- unset($trans{"""});
+ unset($trans{'"'});
while (list($ent, $val) = each($trans)){
- $attvalue = preg_replace("/$ent*(\W)/si", "$val\\1", $attvalue);
+ $attvalue = preg_replace('/' . $ent . '*/si', $val, $attvalue);
}
/**
* Now translate numbered entities from 1 to 255 if needed.
*/
- if (strpos($attvalue, "#") !== false){
+ if (strpos($attvalue, '#') !== false){
$omit = Array(34, 39);
- for ($asc=1; $asc<256; $asc++){
+ for ($asc = 256; $asc >= 0; $asc--){
if (!in_array($asc, $omit)){
$chr = chr($asc);
- $attvalue = preg_replace("/\*$asc;*(\D)/si", "$chr\\1",
- $attvalue);
- $attvalue = preg_replace("/\*".dechex($asc).";*(\W)/si",
- "$chr\\1", $attvalue);
+ $octrule = '/\*' . $asc . ';*/si';
+ $hexrule = '/\*' . dechex($asc) . ';*/si';
+ $attvalue = preg_replace($octrule, $chr, $attvalue);
+ $attvalue = preg_replace($hexrule, $chr, $attvalue);
}
}
}
@@ -1289,8 +1363,8 @@ function sq_deent($attvalue){
* @param $id message id
* @return Array with modified attributes.
*/
-function sq_fixatts($tagname,
- $attary,
+function sq_fixatts($tagname,
+ $attary,
$rm_attnames,
$bad_attvals,
$add_attr_to_tag,
@@ -1314,9 +1388,17 @@ function sq_fixatts($tagname,
}
}
/**
- * Remove any entities.
+ * Remove any backslashes, entities, and extraneous whitespace.
*/
+ $attvalue = sq_unbackslash($attvalue);
$attvalue = sq_deent($attvalue);
+ $attvalue = sq_unspace($attvalue);
+
+ /**
+ * Remove \r \n \t \0 " " "\\"
+ */
+ $attvalue = str_replace(Array("\r", "\n", "\t", "\0", " ", "\\"),
+ Array('', '','','','',''), $attvalue);
/**
* Now let's run checks on the attvalues.
@@ -1334,7 +1416,7 @@ function sq_fixatts($tagname,
* Second one is replacements
*/
list($valmatch, $valrepl) = $valary;
- $newvalue =
+ $newvalue =
preg_replace($valmatch, $valrepl, $attvalue);
if ($newvalue != $attvalue){
$attary{$attname} = $newvalue;
@@ -1364,13 +1446,14 @@ function sq_fixatts($tagname,
/**
* This function edits the style definition to make them friendly and
* usable in squirrelmail.
- *
+ *
* @param $message the message object
* @param $id the message id
* @param $content a string with whatever is between
+ * @param $mailbox the message mailbox
* @return a string with edited content.
*/
-function sq_fixstyle($body, $pos, $message, $id){
+function sq_fixstyle($body, $pos, $message, $id, $mailbox){
global $view_unsafe_images;
$me = 'sq_fixstyle';
$ret = sq_findnxreg($body, $pos, '\s*style\s*>');
@@ -1400,14 +1483,14 @@ function sq_fixstyle($body, $pos, $message, $id){
$content = preg_replace("|url\s*\(\s*([\'\"])\s*https*:.*?([\'\"])\s*\)|si",
"url(\\1$secremoveimg\\2)", $content);
}
-
+
/**
* Fix urls that refer to cid:
*/
- while (preg_match("|url\s*\(\s*([\'\"]\s*cid:.*?[\'\"])\s*\)|si",
+ while (preg_match("|url\s*\(\s*([\'\"]\s*cid:.*?[\'\"])\s*\)|si",
$content, $matches)){
$cidurl = $matches{1};
- $httpurl = sq_cid2http($message, $id, $cidurl);
+ $httpurl = sq_cid2http($message, $id, $cidurl, $mailbox);
$content = preg_replace("|url\s*\(\s*$cidurl\s*\)|si",
"url($httpurl)", $content);
}
@@ -1418,8 +1501,9 @@ function sq_fixstyle($body, $pos, $message, $id){
*/
$match = Array('/expression/i',
'/behaviou*r/i',
- '/binding/i');
- $replace = Array('idiocy', 'idiocy', 'idiocy');
+ '/binding/i',
+ '/include-source/i');
+ $replace = Array('idiocy', 'idiocy', 'idiocy', 'idiocy');
$content = preg_replace($match, $replace, $content);
return array($content, $newpos);
}
@@ -1431,6 +1515,7 @@ function sq_fixstyle($body, $pos, $message, $id){
* @param $message the message object
* @param $id the message id
* @param $cidurl the cid: url.
+ * @param $mailbox the message mailbox
* @return a string with a http-friendly url
*/
function sq_cid2http($message, $id, $cidurl, $mailbox){
@@ -1438,7 +1523,11 @@ function sq_cid2http($message, $id, $cidurl, $mailbox){
* Get rid of quotes.
*/
$quotchar = substr($cidurl, 0, 1);
- $cidurl = str_replace($quotchar, "", $cidurl);
+ if ($quotchar == '"' || $quotchar == "'"){
+ $cidurl = str_replace($quotchar, "", $cidurl);
+ } else {
+ $quotchar = '';
+ }
$cidurl = substr(trim($cidurl), 4);
$linkurl = find_ent_id($cidurl, $message);
/* in case of non-save cid links $httpurl should be replaced by a sort of
@@ -1456,14 +1545,18 @@ function sq_cid2http($message, $id, $cidurl, $mailbox){
* This function changes the tag into a tag since we
* can't really have a body-within-body.
*
- * @param $attary an array of attributes and values of
- * @return a modified array of attributes to be set for
+ * @param $attary an array of attributes and values of
+ * @param $mailbox mailbox we're currently reading (for cid2http)
+ * @param $message current message (for cid2http)
+ * @param $id current message id (for cid2http)
+ * @return a modified array of attributes to be set for
*/
-function sq_body2div($attary){
+function sq_body2div($attary, $mailbox, $message, $id){
$me = 'sq_body2div';
$divattary = Array('class' => "'bodyclass'");
$bgcolor = '#ffffff';
$text = '#000000';
+ $has_bgc_stl = $has_txt_stl = false;
$styledef = '';
if (is_array($attary) && sizeof($attary) > 0){
foreach ($attary as $attname=>$attvalue){
@@ -1471,16 +1564,25 @@ function sq_body2div($attary){
$attvalue = str_replace($quotchar, "", $attvalue);
switch ($attname){
case 'background':
+ $attvalue = sq_cid2http($message, $id,
+ $attvalue, $mailbox);
$styledef .= "background-image: url('$attvalue'); ";
break;
case 'bgcolor':
+ $has_bgc_stl = true;
$styledef .= "background-color: $attvalue; ";
break;
case 'text':
+ $has_txt_stl = true;
$styledef .= "color: $attvalue; ";
break;
}
}
+ // Outlook defines a white bgcolor and no text color. This can lead to
+ // white text on a white bg with certain themes.
+ if ($has_bgc_stl && !$has_txt_stl) {
+ $styledef .= "color: $text; ";
+ }
if (strlen($styledef) > 0){
$divattary{"style"} = "\"$styledef\"";
}
@@ -1508,8 +1610,8 @@ function sq_body2div($attary){
* @param $id message id
* @return sanitized html safe to show on your pages.
*/
-function sq_sanitize($body,
- $tag_list,
+function sq_sanitize($body,
+ $tag_list,
$rm_tags_with_content,
$self_closing_tags,
$force_tag_closing,
@@ -1535,7 +1637,7 @@ function sq_sanitize($body,
*/
$curpos = 0;
$open_tags = Array();
- $trusted = "\n";
+ $trusted = "\n\n";
$skip_content = false;
/**
* Take care of netscape's stupid javascript entities like
@@ -1550,8 +1652,8 @@ function sq_sanitize($body,
* Take care of