Added headerfield type for use with multipart/related messages
[squirrelmail.git] / functions / mime.php
CommitLineData
59177427 1<?php
2ba13803 2
35586184 3/**
4 * mime.php
5 *
15e6162e 6 * Copyright (c) 1999-2002 The SquirrelMail Project Team
35586184 7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This contains the functions necessary to detect and decode MIME
10 * messages.
11 *
12 * $Id$
13 */
b74ba498 14
35586184 15require_once('../functions/imap.php');
16require_once('../functions/attachment_common.php');
8beafbbc 17
35586184 18/** Setting up the objects that have the structure for the message **/
19class msg_header {
20 /** msg_header contains generic variables for values that **/
21 /** could be in a header. **/
b74ba498 22
88cb1b4d 23 var $type0 = '', $type1 = '', $boundary = '', $charset = '',
24 $encoding = '', $size = 0, $to = array(), $from = '', $date = '',
25 $cc = array(), $bcc = array(), $reply_to = '', $subject = '',
26 $id = 0, $mailbox = '', $description = '', $filename = '',
c9d78ab4 27 $entity_id = 0, $message_id = 0, $name = '', $priority = 3, $type = '';
35586184 28}
b74ba498 29
451f74a2 30class message {
31 /** message is the object that contains messages. It is a recursive
32 object in that through the $entities variable, it can contain
33 more objects of type message. See documentation in mime.txt for
34 a better description of how this works.
35 **/
77b88425 36 var $header = '', $entities = array();
37
451f74a2 38 function addEntity ($msg) {
39 $this->entities[] = $msg;
40 }
41}
8beafbbc 42
451f74a2 43/* --------------------------------------------------------------------------------- */
44/* MIME DECODING */
45/* --------------------------------------------------------------------------------- */
b74ba498 46
451f74a2 47/* This function gets the structure of a message and stores it in the "message" class.
48 * It will return this object for use with all relevant header information and
49 * fully parsed into the standard "message" object format.
50 */
51function mime_structure ($imap_stream, $header) {
52
451f74a2 53 $ssid = sqimap_session_id();
54 $lsid = strlen( $ssid );
55 $id = $header->id;
56 fputs ($imap_stream, "$ssid FETCH $id BODYSTRUCTURE\r\n");
57 //
58 // This should use sqimap_read_data instead of reading it itself
59 //
77b88425 60 $read = fgets ($imap_stream, 9216);
451f74a2 61 $bodystructure = '';
62 while ( substr($read, 0, $lsid) <> $ssid &&
63 !feof( $imap_stream ) ) {
64 $bodystructure .= $read;
77b88425 65 $read = fgets ($imap_stream, 9216);
451f74a2 66 }
67 $read = $bodystructure;
77b88425 68
451f74a2 69 // isolate the body structure and remove beginning and end parenthesis
70 $read = trim(substr ($read, strpos(strtolower($read), 'bodystructure') + 13));
c9d78ab4 71
451f74a2 72 $read = trim(substr ($read, 0, -1));
73 $end = mime_match_parenthesis(0, $read);
74 while ($end == strlen($read)-1) {
75 $read = trim(substr ($read, 0, -1));
76 $read = trim(substr ($read, 1));
77 $end = mime_match_parenthesis(0, $read);
78 }
77b88425 79
451f74a2 80 $msg = mime_parse_structure ($read, 0);
81 $msg->header = $header;
77b88425 82
451f74a2 83 return( $msg );
84}
b74ba498 85
451f74a2 86/* this starts the parsing of a particular structure. It is called recursively,
87 * so it can be passed different structures. It returns an object of type
88 * $message.
89 * First, it checks to see if it is a multipart message. If it is, then it
90 * handles that as it sees is necessary. If it is just a regular entity,
91 * then it parses it and adds the necessary header information (by calling out
92 * to mime_get_elements()
93 */
94function mime_parse_structure ($structure, $ent_id) {
c9d78ab4 95 $properties = array();
451f74a2 96 $msg = new message();
97 if ($structure{0} == '(') {
c9d78ab4 98 $old_ent_id = $ent_id;
451f74a2 99 $ent_id = mime_new_element_level($ent_id);
100 $start = $end = -1;
101 do {
102 $start = $end+1;
103 $end = mime_match_parenthesis ($start, $structure);
c9d78ab4 104 /* add "forgotten" parent entities (alternative and relative) */
105 if (strpos($ent_id, '0') || strpos($ent_id) == 0) {
106 $str = substr($structure, $end+1 );
107 $startprop = strrpos($str,'(');
108 $endprop = strrpos($str,')');
109 $propstr = substr($str, $startprop + 1, ($endprop - $startprop)-1);
110
111 $type1 = trim(substr($str,0, $startprop));
112 $pos = strrpos($type1,' ');
113 $type1 = strtolower(trim(substr($type1,$pos +1)));
114 $cnt = strlen($type1);
115 $type1 = substr($type1,0,$cnt-1);
116
117 $properties = mime_get_props($properties, $propstr);
118 if (count($properties)>0) {
119 $msg->header->entity_id = $old_ent_id;
120 $msg->header->type0 = 'multipart';
121 $msg->header->type1 = $type1;
122 }
123 for ($i=0; $i < count($properties); $i++) {
124 $msg->header->{$properties[$i]['name']} = $properties[$i]['value'];
125 $name = $properties[$i]['name'];
126 $value = $properties[$i]['value'];
127 }
128 }
451f74a2 129
130 $element = substr($structure, $start+1, ($end - $start)-1);
c9d78ab4 131
451f74a2 132 $ent_id = mime_increment_id ($ent_id);
133 $newmsg = mime_parse_structure ($element, $ent_id);
134 $msg->addEntity ($newmsg);
c9d78ab4 135
451f74a2 136 } while ($structure{$end+1} == '(');
137 } else {
138 // parse the elements
c9d78ab4 139 $msg = mime_get_element ($structure, $msg, $ent_id);
451f74a2 140 }
141 return $msg;
142}
e4a256af 143
451f74a2 144/* Increments the element ID. An element id can look like any of
145 * the following: 1, 1.2, 4.3.2.4.1, etc. This function increments
146 * the last number of the element id, changing 1.2 to 1.3.
147 */
148function mime_increment_id ($id) {
149
77b88425 150 if (strpos($id, '.')) {
151 $first = substr($id, 0, strrpos($id, '.'));
152 $last = substr($id, strrpos($id, '.')+1);
451f74a2 153 $last++;
77b88425 154 $new = $first . '.' .$last;
451f74a2 155 } else {
156 $new = $id + 1;
157 }
77b88425 158
451f74a2 159 return $new;
160}
161
162/*
163 * See comment for mime_increment_id().
164 * This adds another level on to the entity_id changing 1.3 to 1.3.0
165 * NOTE: 1.3.0 is not a valid element ID. It MUST be incremented
166 * before it can be used. I left it this way so as not to have
167 * to make a special case if it is the first entity_id. It
168 * always increments it, and that works fine.
169 */
170function mime_new_element_level ($id) {
171
77b88425 172 if (!$id) {
173 $id = 0;
174 } else {
175 $id = $id . '.0';
176 }
451f74a2 177
77b88425 178 return( $id );
451f74a2 179}
180
181function mime_get_element (&$structure, $msg, $ent_id) {
182
183 $elem_num = 1;
184 $msg->header = new msg_header();
185 $msg->header->entity_id = $ent_id;
186 $properties = array();
451f74a2 187 while (strlen($structure) > 0) {
188 $structure = trim($structure);
189 $char = $structure{0};
190
191 if (strtolower(substr($structure, 0, 3)) == 'nil') {
192 $text = '';
193 $structure = substr($structure, 3);
194 } else if ($char == '"') {
195 // loop through until we find the matching quote, and return that as a string
196 $pos = 1;
197 $text = '';
198 while ( ($char = $structure{$pos} ) <> '"' && $pos < strlen($structure)) {
199 $text .= $char;
200 $pos++;
201 }
202 $structure = substr($structure, strlen($text) + 2);
203 } else if ($char == '(') {
204 // comment me
205 $end = mime_match_parenthesis (0, $structure);
206 $sub = substr($structure, 1, $end-1);
207 $properties = mime_get_props($properties, $sub);
208 $structure = substr($structure, strlen($sub) + 2);
209 } else {
210 // loop through until we find a space or an end parenthesis
211 $pos = 0;
212 $char = $structure{$pos};
213 $text = '';
214 while ($char != ' ' && $char != ')' && $pos < strlen($structure)) {
215 $text .= $char;
216 $pos++;
217 $char = $structure{$pos};
218 }
219 $structure = substr($structure, strlen($text));
220 }
221
222 // This is where all the text parts get put into the header
223 switch ($elem_num) {
224 case 1:
225 $msg->header->type0 = strtolower($text);
226 break;
227 case 2:
228 $msg->header->type1 = strtolower($text);
229 break;
230 case 4: // Id
231 // Invisimail enclose images with <>
232 $msg->header->id = str_replace( '<', '', str_replace( '>', '', $text ) );
233 break;
234 case 5:
235 $msg->header->description = $text;
236 break;
237 case 6:
238 $msg->header->encoding = strtolower($text);
239 break;
240 case 7:
241 $msg->header->size = $text;
242 break;
243 default:
244 if ($msg->header->type0 == 'text' && $elem_num == 8) {
245 // This is a plain text message, so lets get the number of lines
246 // that it contains.
247 $msg->header->num_lines = $text;
248
249 } else if ($msg->header->type0 == 'message' && $msg->header->type1 == 'rfc822' && $elem_num == 8) {
250 // This is an encapsulated message, so lets start all over again and
251 // parse this message adding it on to the existing one.
252 $structure = trim($structure);
253 if ( $structure{0} == '(' ) {
254 $e = mime_match_parenthesis (0, $structure);
255 $structure = substr($structure, 0, $e);
256 $structure = substr($structure, 1);
257 $m = mime_parse_structure($structure, $msg->header->entity_id);
258
259 // the following conditional is there to correct a bug that wasn't
260 // incrementing the entity IDs correctly because of the special case
261 // that message/rfc822 is. This fixes it fine.
262 if (substr($structure, 1, 1) != '(')
263 $m->header->entity_id = mime_increment_id(mime_new_element_level($ent_id));
264
265 // Now we'll go through and reformat the results.
266 if ($m->entities) {
267 for ($i=0; $i < count($m->entities); $i++) {
268 $msg->addEntity($m->entities[$i]);
269 }
270 } else {
271 $msg->addEntity($m);
272 }
273 $structure = "";
274 }
275 }
276 break;
277 }
278 $elem_num++;
279 $text = "";
280 }
281 // loop through the additional properties and put those in the various headers
c9d78ab4 282 for ($i=0; $i < count($properties); $i++) {
283 $msg->header->{$properties[$i]['name']} = $properties[$i]['value'];
284 }
451f74a2 285
286 return $msg;
287}
288
289/*
290 * I did most of the MIME stuff yesterday (June 20, 2000), but I couldn't
291 * figure out how to do this part, so I decided to go to bed. I woke up
292 * in the morning and had a flash of insight. I went to the white-board
293 * and scribbled it out, then spent a bit programming it, and this is the
294 * result. Nothing complicated, but I think my brain was fried yesterday.
295 * Funny how that happens some times.
296 *
297 * This gets properties in a nested parenthesisized list. For example,
298 * this would get passed something like: ("attachment" ("filename" "luke.tar.gz"))
299 * This returns an array called $props with all paired up properties.
300 * It ignores the "attachment" for now, maybe that should change later
301 * down the road. In this case, what is returned is:
302 * $props[0]["name"] = "filename";
303 * $props[0]["value"] = "luke.tar.gz";
304 */
305function mime_get_props ($props, $structure) {
306
307 while (strlen($structure) > 0) {
308 $structure = trim($structure);
309 $char = $structure{0};
451f74a2 310 if ($char == '"') {
311 $pos = 1;
312 $tmp = '';
313 while ( ( $char = $structure{$pos} ) != '"' &&
314 $pos < strlen($structure)) {
315 $tmp .= $char;
316 $pos++;
317 }
318 $structure = trim(substr($structure, strlen($tmp) + 2));
319 $char = $structure{0};
320
321 if ($char == '"') {
322 $pos = 1;
323 $value = '';
324 while ( ( $char = $structure{$pos} ) != '"' &&
325 $pos < strlen($structure) ) {
326 $value .= $char;
327 $pos++;
328 }
c9d78ab4 329 $structure = trim(substr($structure, strlen($value) + 2));
451f74a2 330 $k = count($props);
331 $props[$k]['name'] = strtolower($tmp);
332 $props[$k]['value'] = $value;
c9d78ab4 333 if ($structure != '') {
334 mime_get_props($props, $structure);
335 } else {
336 return $props;
337 }
451f74a2 338 } else if ($char == '(') {
339 $end = mime_match_parenthesis (0, $structure);
340 $sub = substr($structure, 1, $end-1);
c9d78ab4 341 if (! isset($props))
342 $props = array();
343 $props = mime_get_props($props, $sub);
344 $structure = substr($structure, strlen($sub) + 2);
345 return $props;
451f74a2 346 }
451f74a2 347 } else if ($char == '(') {
348 $end = mime_match_parenthesis (0, $structure);
349 $sub = substr($structure, 1, $end-1);
350 $props = mime_get_props($props, $sub);
351 $structure = substr($structure, strlen($sub) + 2);
352 return $props;
353 } else {
354 return $props;
355 }
356 }
357}
358
359/*
360 * Matches parenthesis. It will return the position of the matching
361 * parenthesis in $structure. For instance, if $structure was:
362 * ("text" "plain" ("val1name", "1") nil ... )
363 * x x
364 * then this would return 42 to match up those two.
365 */
366function mime_match_parenthesis ($pos, $structure) {
367
368 $j = strlen( $structure );
369
370 // ignore all extra characters
371 // If inside of a string, skip string -- Boundary IDs and other
372 // things can have ) in them.
373 if ( $structure{$pos} != '(' ) {
374 return( $j );
375 }
376
377 while ( $pos < $j ) {
378 $pos++;
379 if ($structure{$pos} == ')') {
8beafbbc 380 return $pos;
451f74a2 381 } elseif ($structure{$pos} == '"') {
b74ba498 382 $pos++;
451f74a2 383 while ( $structure{$pos} != '"' &&
384 $pos < $j ) {
385 if (substr($structure, $pos, 2) == '\\"') {
b74ba498 386 $pos++;
451f74a2 387 } elseif (substr($structure, $pos, 2) == '\\\\') {
b74ba498 388 $pos++;
451f74a2 389 }
b74ba498 390 $pos++;
5ffe5a7e 391 }
451f74a2 392 } elseif ( $structure{$pos} == '(' ) {
8beafbbc 393 $pos = mime_match_parenthesis ($pos, $structure);
451f74a2 394 }
395 }
396 echo _("Error decoding mime structure. Report this as a bug!") . '<br>';
397 return( $pos );
398}
399
346817d4 400function mime_fetch_body($imap_stream, $id, $ent_id ) {
09a4bde3 401
402 /*
403 * do a bit of error correction. If we couldn't find the entity id, just guess
404 * that it is the first one. That is usually the case anyway.
405 */
406 if (!$ent_id) {
451f74a2 407 $ent_id = 1;
09a4bde3 408 }
346817d4 409
6ab1bd9e 410 $cmd = "FETCH $id BODY[$ent_id]";
346817d4 411 $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message);
77b88425 412
413 do {
e976319c 414 $topline = trim(array_shift( $data ));
415 } while( $topline && $topline[0] == '*' && !preg_match( '/\* [0-9]+ FETCH.*/i', $topline )) ;
451f74a2 416 $wholemessage = implode('', $data);
417 if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
77b88425 418
451f74a2 419 $ret = substr( $wholemessage, 0, $regs[1] );
420 /*
421 There is some information in the content info header that could be important
422 in order to parse html messages. Let's get them here.
423 */
424 if ( $ret{0} == '<' ) {
1c72b151 425 $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message);
e5ea9327 426 /* BASE within HTML documents is illegal (see w3 spec)
427* $base = '';
428* $k = 10;
429* foreach( $data as $d ) {
430* if ( substr( $d, 0, 13 ) == 'Content-Base:' ) {
431* $j = strlen( $d );
432* $i = 13;
433* $base = '';
434* while ( $i < $j &&
435* ( !isNoSep( $d{$i} ) || $d{$i} == '"' ) )
436* $i++;
437* while ( $i < $j ) {
438* if ( isNoSep( $d{$i} ) )
439* $base .= $d{$i};
440* $i++;
441* }
442* $k = 0;
443* } elseif ( $k == 1 && !isnosep( $d{0} ) ) {
444* $base .= substr( $d, 1 );
445* }
446* $k++;
447* }
448* if ( $base <> '' ) {
449* $ret = "<base href=\"$base\">" . $ret;
450* }
7e235a1a 451* */
451f74a2 452 }
453 } else if (ereg('"([^"]*)"', $topline, $regs)) {
454 $ret = $regs[1];
455 } else {
456 global $where, $what, $mailbox, $passed_id, $startMessage;
e5ea9327 457 $par = 'mailbox=' . urlencode($mailbox) . "&amp;passed_id=$passed_id";
451f74a2 458 if (isset($where) && isset($what)) {
e5ea9327 459 $par .= '&amp;where='. urlencode($where) . "&amp;what=" . urlencode($what);
a3daaaf3 460 } else {
e5ea9327 461 $par .= "&amp;startMessage=$startMessage&amp;show_more=0";
451f74a2 462 }
e5ea9327 463 $par .= '&amp;response=' . urlencode($response) .
464 '&amp;message=' . urlencode($message).
465 '&amp;topline=' . urlencode($topline);
a019eeb8 466
346817d4 467 echo '<tt><br>' .
468 '<table width="80%"><tr>' .
469 '<tr><td colspan=2>' .
451f74a2 470 _("Body retrieval error. The reason for this is most probably that the message is malformed. Please help us making future versions better by submitting this message to the developers knowledgebase!") .
346817d4 471 " <A HREF=\"../src/retrievalerror.php?$par\"><br>" .
472 _("Submit message") . '</A><BR>&nbsp;' .
473 '</td></tr>' .
474 '<td><b>' . _("Command:") . "</td><td>$cmd</td></tr>" .
475 '<td><b>' . _("Response:") . "</td><td>$response</td></tr>" .
476 '<td><b>' . _("Message:") . "</td><td>$message</td></tr>" .
477 '<td><b>' . _("FETCH line:") . "</td><td>$topline</td></tr>" .
478 "</table><BR></tt></font><hr>";
479
1c72b151 480 $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message);
451f74a2 481 array_shift($data);
482 $wholemessage = implode('', $data);
a019eeb8 483
346817d4 484 $ret = $wholemessage;
a3daaaf3 485 }
451f74a2 486 return( $ret );
487}
d4467150 488
451f74a2 489function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) {
490 // do a bit of error correction. If we couldn't find the entity id, just guess
491 // that it is the first one. That is usually the case anyway.
492 if (!$ent_id) {
493 $ent_id = 1;
494 }
495 $sid = sqimap_session_id();
496 // Don't kill the connection if the browser is over a dialup
497 // and it would take over 30 seconds to download it.
b7206e1d 498
499