fixes for getting textOnly entities
[squirrelmail.git] / functions / mime.php
CommitLineData
59177427 1<?php
2ba13803 2
35586184 3/**
4 * mime.php
5 *
15e6162e 6 * Copyright (c) 1999-2002 The SquirrelMail Project Team
35586184 7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This contains the functions necessary to detect and decode MIME
10 * messages.
11 *
12 * $Id$
13 */
b74ba498 14
35586184 15require_once('../functions/imap.php');
16require_once('../functions/attachment_common.php');
8beafbbc 17
35586184 18/** Setting up the objects that have the structure for the message **/
19class msg_header {
20 /** msg_header contains generic variables for values that **/
21 /** could be in a header. **/
b74ba498 22
88cb1b4d 23 var $type0 = '', $type1 = '', $boundary = '', $charset = '',
24 $encoding = '', $size = 0, $to = array(), $from = '', $date = '',
25 $cc = array(), $bcc = array(), $reply_to = '', $subject = '',
26 $id = 0, $mailbox = '', $description = '', $filename = '',
c9d78ab4 27 $entity_id = 0, $message_id = 0, $name = '', $priority = 3, $type = '';
35586184 28}
b74ba498 29
451f74a2 30class message {
31 /** message is the object that contains messages. It is a recursive
32 object in that through the $entities variable, it can contain
33 more objects of type message. See documentation in mime.txt for
34 a better description of how this works.
35 **/
77b88425 36 var $header = '', $entities = array();
37
451f74a2 38 function addEntity ($msg) {
39 $this->entities[] = $msg;
40 }
41}
8beafbbc 42
451f74a2 43/* --------------------------------------------------------------------------------- */
44/* MIME DECODING */
45/* --------------------------------------------------------------------------------- */
b74ba498 46
451f74a2 47/* This function gets the structure of a message and stores it in the "message" class.
48 * It will return this object for use with all relevant header information and
49 * fully parsed into the standard "message" object format.
50 */
51function mime_structure ($imap_stream, $header) {
52
451f74a2 53 $ssid = sqimap_session_id();
54 $lsid = strlen( $ssid );
55 $id = $header->id;
56 fputs ($imap_stream, "$ssid FETCH $id BODYSTRUCTURE\r\n");
57 //
58 // This should use sqimap_read_data instead of reading it itself
59 //
77b88425 60 $read = fgets ($imap_stream, 9216);
451f74a2 61 $bodystructure = '';
62 while ( substr($read, 0, $lsid) <> $ssid &&
63 !feof( $imap_stream ) ) {
64 $bodystructure .= $read;
77b88425 65 $read = fgets ($imap_stream, 9216);
451f74a2 66 }
67 $read = $bodystructure;
77b88425 68
451f74a2 69 // isolate the body structure and remove beginning and end parenthesis
70 $read = trim(substr ($read, strpos(strtolower($read), 'bodystructure') + 13));
c9d78ab4 71
451f74a2 72 $read = trim(substr ($read, 0, -1));
73 $end = mime_match_parenthesis(0, $read);
74 while ($end == strlen($read)-1) {
75 $read = trim(substr ($read, 0, -1));
76 $read = trim(substr ($read, 1));
77 $end = mime_match_parenthesis(0, $read);
78 }
77b88425 79
451f74a2 80 $msg = mime_parse_structure ($read, 0);
81 $msg->header = $header;
77b88425 82
451f74a2 83 return( $msg );
84}
b74ba498 85
451f74a2 86/* this starts the parsing of a particular structure. It is called recursively,
87 * so it can be passed different structures. It returns an object of type
88 * $message.
89 * First, it checks to see if it is a multipart message. If it is, then it
90 * handles that as it sees is necessary. If it is just a regular entity,
91 * then it parses it and adds the necessary header information (by calling out
92 * to mime_get_elements()
93 */
94function mime_parse_structure ($structure, $ent_id) {
164800ad 95 global $mailbox;
c9d78ab4 96 $properties = array();
451f74a2 97 $msg = new message();
98 if ($structure{0} == '(') {
c9d78ab4 99 $old_ent_id = $ent_id;
451f74a2 100 $ent_id = mime_new_element_level($ent_id);
101 $start = $end = -1;
102 do {
103 $start = $end+1;
104 $end = mime_match_parenthesis ($start, $structure);
164800ad 105
106 /* check if we are dealing with a new entity-level */
107 $i = strrpos($ent_id,'.');
108 if ($i>0) {
109 $ent = substr($ent_id, $i+1);
110 } else {
111 $ent = '';
112 }
c9d78ab4 113 /* add "forgotten" parent entities (alternative and relative) */
164800ad 114 if ($ent == '0') {
115 /* new entity levels have information about the type (type1) and
116 * the properties. This information is situated at the end of the
117 * structure string like for example (example between the brackets)
118 * [ "RELATED" ("BOUNDARY" "myboundary" "TYPE" "plain/html") ]
119 */
120
121 /* get the involved properties for parsing to mime_get_properties */
122 $startprop = strrpos($structure,'(');
123 $properties_str = substr($structure,$startprop);
124 $endprop = mime_match_parenthesis ($startprop, $structure);
125 $propstr = substr($structure, $startprop + 1, ($endprop - $startprop)-1);
126 /* cut off the used properties */
127 if ($startprop) {
128 $structure_end = substr($structure, $endprop+2);
129 $structure = trim(substr($structure,0,$startprop));
130 }
164800ad 131 /* get type1 */
132 $pos = strrpos($structure,' ');
008989da 133 if ($structure{$pos+1} =='(') $pos++;
134
164800ad 135 $type1 = strtolower(substr($structure, $pos+2, (count($structure)-2)));
164800ad 136 /* cut off type1 */
137 if ($pos && $startprop) {
138 $structure = trim(substr($structure, 0, $pos));
139 }
140
141 /* process the found information */
142 $properties = mime_get_props($properties, $properties_str);
c9d78ab4 143 if (count($properties)>0) {
144 $msg->header->entity_id = $old_ent_id;
145 $msg->header->type0 = 'multipart';
146 $msg->header->type1 = $type1;
164800ad 147 for ($i=0; $i < count($properties); $i++) {
148 $msg->header->{$properties[$i]['name']} = $properties[$i]['value'];
149 }
c9d78ab4 150 }
164800ad 151 $structure = $structure . ' ' . $structure_end;
152 }
153 $element = substr($structure, $start+1, ($end - $start)-1);
154 $ent_id = mime_increment_id ($ent_id);
155 $newmsg = mime_parse_structure ($element, $ent_id);
156 /* set mailbox in case of message/rfc822 entities */
157 if (isset($newmsg->header->type0) && isset($newmsg->header->type1)) {
158 if ($newmsg->header->type0 == 'message' && $newmsg->header->type1 == 'rfc822') {
159 $newmsg->header->mailbox=$mailbox;
c9d78ab4 160 }
161 }
164800ad 162 $msg->addEntity ($newmsg);
451f74a2 163
451f74a2 164 } while ($structure{$end+1} == '(');
165 } else {
166 // parse the elements
c9d78ab4 167 $msg = mime_get_element ($structure, $msg, $ent_id);
451f74a2 168 }
169 return $msg;
170}
e4a256af 171
164800ad 172
451f74a2 173/* Increments the element ID. An element id can look like any of
174 * the following: 1, 1.2, 4.3.2.4.1, etc. This function increments
175 * the last number of the element id, changing 1.2 to 1.3.
176 */
177function mime_increment_id ($id) {
178
77b88425 179 if (strpos($id, '.')) {
180 $first = substr($id, 0, strrpos($id, '.'));
181 $last = substr($id, strrpos($id, '.')+1);
451f74a2 182 $last++;
77b88425 183 $new = $first . '.' .$last;
451f74a2 184 } else {
185 $new = $id + 1;
186 }
77b88425 187
451f74a2 188 return $new;
189}
190
191/*
192 * See comment for mime_increment_id().
193 * This adds another level on to the entity_id changing 1.3 to 1.3.0
194 * NOTE: 1.3.0 is not a valid element ID. It MUST be incremented
195 * before it can be used. I left it this way so as not to have
196 * to make a special case if it is the first entity_id. It
197 * always increments it, and that works fine.
198 */
199function mime_new_element_level ($id) {
200
77b88425 201 if (!$id) {
202 $id = 0;
203 } else {
204 $id = $id . '.0';
205 }
451f74a2 206
77b88425 207 return( $id );
451f74a2 208}
209
210function mime_get_element (&$structure, $msg, $ent_id) {
211
212 $elem_num = 1;
213 $msg->header = new msg_header();
214 $msg->header->entity_id = $ent_id;
215 $properties = array();
451f74a2 216 while (strlen($structure) > 0) {
217 $structure = trim($structure);
218 $char = $structure{0};
219
220 if (strtolower(substr($structure, 0, 3)) == 'nil') {
221 $text = '';
222 $structure = substr($structure, 3);
223 } else if ($char == '"') {
224 // loop through until we find the matching quote, and return that as a string
225 $pos = 1;
226 $text = '';
227 while ( ($char = $structure{$pos} ) <> '"' && $pos < strlen($structure)) {
228 $text .= $char;
229 $pos++;
230 }
231 $structure = substr($structure, strlen($text) + 2);
795d42e2 232 } else if ($char == '{') {
233 /**
234 * loop through until we find the matching quote,
235 * and return that as a string
236 */
237 $pos = 1;
238 $len = '';
239 while (($char = $structure{$pos}) != '}'
240 && $pos < strlen($structure)) {
241 $len .= $char;
242 $pos++;
243 }
244 $structure = substr($structure, strlen($len) + 4);
245 $text = substr($structure, 0, $len);
246 $structure = substr($structure, $len + 1);
451f74a2 247 } else if ($char == '(') {
248 // comment me
249 $end = mime_match_parenthesis (0, $structure);
250 $sub = substr($structure, 1, $end-1);
251 $properties = mime_get_props($properties, $sub);
252 $structure = substr($structure, strlen($sub) + 2);
253 } else {
254 // loop through until we find a space or an end parenthesis
255 $pos = 0;
256 $char = $structure{$pos};
257 $text = '';
258 while ($char != ' ' && $char != ')' && $pos < strlen($structure)) {
259 $text .= $char;
260 $pos++;
261 $char = $structure{$pos};
262 }
263 $structure = substr($structure, strlen($text));
264 }
265
266 // This is where all the text parts get put into the header
267 switch ($elem_num) {
268 case 1:
269 $msg->header->type0 = strtolower($text);
270 break;
271 case 2:
272 $msg->header->type1 = strtolower($text);
273 break;
274 case 4: // Id
275 // Invisimail enclose images with <>
276 $msg->header->id = str_replace( '<', '', str_replace( '>', '', $text ) );
277 break;
278 case 5:
279 $msg->header->description = $text;
280 break;
281 case 6:
282 $msg->header->encoding = strtolower($text);
283 break;
284 case 7:
285 $msg->header->size = $text;
286 break;
287 default:
288 if ($msg->header->type0 == 'text' && $elem_num == 8) {
289 // This is a plain text message, so lets get the number of lines
290 // that it contains.
291 $msg->header->num_lines = $text;
292
293 } else if ($msg->header->type0 == 'message' && $msg->header->type1 == 'rfc822' && $elem_num == 8) {
294 // This is an encapsulated message, so lets start all over again and
295 // parse this message adding it on to the existing one.
296 $structure = trim($structure);
297 if ( $structure{0} == '(' ) {
298 $e = mime_match_parenthesis (0, $structure);
299 $structure = substr($structure, 0, $e);
300 $structure = substr($structure, 1);
301 $m = mime_parse_structure($structure, $msg->header->entity_id);
302
303 // the following conditional is there to correct a bug that wasn't
304 // incrementing the entity IDs correctly because of the special case
305 // that message/rfc822 is. This fixes it fine.
306 if (substr($structure, 1, 1) != '(')
307 $m->header->entity_id = mime_increment_id(mime_new_element_level($ent_id));
308
309 // Now we'll go through and reformat the results.
310 if ($m->entities) {
311 for ($i=0; $i < count($m->entities); $i++) {
312 $msg->addEntity($m->entities[$i]);
313 }
314 } else {
315 $msg->addEntity($m);
316 }
317 $structure = "";
318 }
319 }
320 break;
321 }
322 $elem_num++;
323 $text = "";
324 }
325 // loop through the additional properties and put those in the various headers
c9d78ab4 326 for ($i=0; $i < count($properties); $i++) {
327 $msg->header->{$properties[$i]['name']} = $properties[$i]['value'];
328 }
451f74a2 329
330 return $msg;
331}
332
333/*
334 * I did most of the MIME stuff yesterday (June 20, 2000), but I couldn't
335 * figure out how to do this part, so I decided to go to bed. I woke up
336 * in the morning and had a flash of insight. I went to the white-board
337 * and scribbled it out, then spent a bit programming it, and this is the
338 * result. Nothing complicated, but I think my brain was fried yesterday.
339 * Funny how that happens some times.
340 *
341 * This gets properties in a nested parenthesisized list. For example,
342 * this would get passed something like: ("attachment" ("filename" "luke.tar.gz"))
343 * This returns an array called $props with all paired up properties.
344 * It ignores the "attachment" for now, maybe that should change later
345 * down the road. In this case, what is returned is:
346 * $props[0]["name"] = "filename";
347 * $props[0]["value"] = "luke.tar.gz";
348 */
349function mime_get_props ($props, $structure) {
350
351 while (strlen($structure) > 0) {
352 $structure = trim($structure);
353 $char = $structure{0};
451f74a2 354 if ($char == '"') {
355 $pos = 1;
356 $tmp = '';
357 while ( ( $char = $structure{$pos} ) != '"' &&
358 $pos < strlen($structure)) {
359 $tmp .= $char;
360 $pos++;
361 }
362 $structure = trim(substr($structure, strlen($tmp) + 2));
363 $char = $structure{0};
364
365 if ($char == '"') {
366 $pos = 1;
367 $value = '';
368 while ( ( $char = $structure{$pos} ) != '"' &&
369 $pos < strlen($structure) ) {
370 $value .= $char;
371 $pos++;
372 }
c9d78ab4 373 $structure = trim(substr($structure, strlen($value) + 2));
451f74a2 374 $k = count($props);
375 $props[$k]['name'] = strtolower($tmp);
376 $props[$k]['value'] = $value;
c9d78ab4 377 if ($structure != '') {
378 mime_get_props($props, $structure);
379 } else {
380 return $props;
381 }
451f74a2 382 } else if ($char == '(') {
383 $end = mime_match_parenthesis (0, $structure);
384 $sub = substr($structure, 1, $end-1);
c9d78ab4 385 if (! isset($props))
386 $props = array();
387 $props = mime_get_props($props, $sub);
388 $structure = substr($structure, strlen($sub) + 2);
389 return $props;
451f74a2 390 }
451f74a2 391 } else if ($char == '(') {
392 $end = mime_match_parenthesis (0, $structure);
393 $sub = substr($structure, 1, $end-1);
394 $props = mime_get_props($props, $sub);
395 $structure = substr($structure, strlen($sub) + 2);
396 return $props;
397 } else {
398 return $props;
399 }
400 }
401}
402
403/*
404 * Matches parenthesis. It will return the position of the matching
405 * parenthesis in $structure. For instance, if $structure was:
406 * ("text" "plain" ("val1name", "1") nil ... )
407 * x x
408 * then this would return 42 to match up those two.
409 */
410function mime_match_parenthesis ($pos, $structure) {
411
412 $j = strlen( $structure );
413
414 // ignore all extra characters
415 // If inside of a string, skip string -- Boundary IDs and other
416 // things can have ) in them.
417 if ( $structure{$pos} != '(' ) {
418 return( $j );
419 }
420
421 while ( $pos < $j ) {
422 $pos++;
423 if ($structure{$pos} == ')') {
8beafbbc 424 return $pos;
451f74a2 425 } elseif ($structure{$pos} == '"') {
b74ba498 426 $pos++;
451f74a2 427 while ( $structure{$pos} != '"' &&
428 $pos < $j ) {
429 if (substr($structure, $pos, 2) == '\\"') {
b74ba498 430 $pos++;
451f74a2 431 } elseif (substr($structure, $pos, 2) == '\\\\') {
b74ba498 432 $pos++;
451f74a2 433 }
b74ba498 434 $pos++;
5ffe5a7e 435 }
451f74a2 436 } elseif ( $structure{$pos} == '(' ) {
8beafbbc 437 $pos = mime_match_parenthesis ($pos, $structure);
451f74a2 438 }
439 }
440 echo _("Error decoding mime structure. Report this as a bug!") . '<br>';
441 return( $pos );
442}
443
93f92f03 444function mime_fetch_body($imap_stream, $id, $ent_id) {
09a4bde3 445
446 /*
447 * do a bit of error correction. If we couldn't find the entity id, just guess
448 * that it is the first one. That is usually the case anyway.
449 */
450 if (!$ent_id) {
451f74a2 451 $ent_id = 1;
09a4bde3 452 }
6ab1bd9e 453 $cmd = "FETCH $id BODY[$ent_id]";
77b88425 454
93f92f03 455 $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message);
77b88425 456 do {
e976319c 457 $topline = trim(array_shift( $data ));
458 } while( $topline && $topline[0] == '*' && !preg_match( '/\* [0-9]+ FETCH.*/i', $topline )) ;
451f74a2 459 $wholemessage = implode('', $data);
460 if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
77b88425 461
451f74a2 462 $ret = substr( $wholemessage, 0, $regs[1] );
463 /*
464 There is some information in the content info header that could be important
465 in order to parse html messages. Let's get them here.
466 */
467 if ( $ret{0} == '<' ) {
1c72b151 468 $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message);
e5ea9327 469 /* BASE within HTML documents is illegal (see w3 spec)
470* $base = '';
471* $k = 10;
472* foreach( $data as $d ) {
473* if ( substr( $d, 0, 13 ) == 'Content-Base:' ) {
474* $j = strlen( $d );
475* $i = 13;
476* $base = '';
477* while ( $i < $j &&
478* ( !isNoSep( $d{$i} ) || $d{$i} == '"' ) )
479* $i++;
480* while ( $i < $j ) {
481* if ( isNoSep( $d{$i} ) )
482* $base .= $d{$i};
483* $i++;
484* }
485* $k = 0;
486* } elseif ( $k == 1 && !isnosep( $d{0} ) ) {
487* $base .= substr( $d, 1 );
488* }
489* $k++;
490* }
491* if ( $base <> '' ) {
492* $ret = "<base href=\"$base\">" . $ret;
493* }
7e235a1a 494* */
451f74a2 495 }
496 } else if (ereg('"([^"]*)"', $topline, $regs)) {
497 $ret = $regs[1];
498 } else {
499 global $where, $what, $mailbox, $passed_id, $startMessage;
e5ea9327 500 $par = 'mailbox=' . urlencode($mailbox) . "&amp;passed_id=$passed_id";
451f74a2 501 if (isset($where) && isset($what)) {
e5ea9327 502 $par .= '&amp;where='. urlencode($where) . "&amp;what=" . urlencode($what);
a3daaaf3 503 } else {
e5ea9327 504 $par .= "&amp;startMessage=$startMessage&amp;show_more=0";
451f74a2 505 }
e5ea9327 506 $par .= '&amp;response=' . urlencode($response) .
507 '&amp;message=' . urlencode($message).
508 '&amp;topline=' . urlencode($topline);
a019eeb8 509
346817d4 510 echo '<tt><br>' .
511 '<table width="80%"><tr>' .
512 '<tr><td colspan=2>' .
451f74a2 513 _("Body retrieval error. The reason for this is most probably that the message is malformed. Please help us making future versions better by submitting this message to the developers knowledgebase!") .
346817d4 514 " <A HREF=\"../src/retrievalerror.php?$par\"><br>" .
515 _("Submit message") . '</A><BR>&nbsp;' .
516 '</td></tr>' .
517 '<td><b>' . _("Command:") . "</td><td>$cmd</td></tr>" .
518 '<td><b>' . _("Response:") . "</td><td>$response</td></tr>" .
519 '<td><b>' . _("Message:") . "</td><td>$message</td></tr>" .
520 '<td><b>' . _("FETCH line:") . "</td><td>$topline</td></tr>" .
521 "</table><BR></tt></font><hr>";
522
1c72b151 523 $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message);
451f74a2 524 array_shift($data);
525 $wholemessage = implode('', $data);
a019eeb8 526
346817d4 527 $ret = $wholemessage;
a3daaaf3 528 }
451f74a2 529 return( $ret );
530}
d4467150 531
451f74a2 532function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) {
533 // do a bit of error correction. If we couldn't find the entity id, just guess
534 // that it is the first one. That is usually the case anyway.
535 if (!$ent_id) {
536 $ent_id = 1;
537 }
538 $sid = sqimap_session_id();
539 // Don't kill the connection if the browser is over a dialup
540 // and it would take over 30 seconds to download it.
b7206e1d 541
542