Optimization:
[squirrelmail.git] / functions / mime.php
CommitLineData
59177427 1<?php
2ba13803 2
35586184 3/**
4 * mime.php
5 *
15e6162e 6 * Copyright (c) 1999-2002 The SquirrelMail Project Team
35586184 7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This contains the functions necessary to detect and decode MIME
10 * messages.
11 *
12 * $Id$
13 */
b74ba498 14
35586184 15require_once('../functions/imap.php');
16require_once('../functions/attachment_common.php');
8beafbbc 17
35586184 18/** Setting up the objects that have the structure for the message **/
19class msg_header {
20 /** msg_header contains generic variables for values that **/
21 /** could be in a header. **/
b74ba498 22
88cb1b4d 23 var $type0 = '', $type1 = '', $boundary = '', $charset = '',
24 $encoding = '', $size = 0, $to = array(), $from = '', $date = '',
25 $cc = array(), $bcc = array(), $reply_to = '', $subject = '',
26 $id = 0, $mailbox = '', $description = '', $filename = '',
27 $entity_id = 0, $message_id = 0, $name = '', $priority = 3;
35586184 28}
b74ba498 29
451f74a2 30class message {
31 /** message is the object that contains messages. It is a recursive
32 object in that through the $entities variable, it can contain
33 more objects of type message. See documentation in mime.txt for
34 a better description of how this works.
35 **/
77b88425 36 var $header = '', $entities = array();
37
451f74a2 38 function addEntity ($msg) {
39 $this->entities[] = $msg;
40 }
41}
8beafbbc 42
451f74a2 43/* --------------------------------------------------------------------------------- */
44/* MIME DECODING */
45/* --------------------------------------------------------------------------------- */
b74ba498 46
451f74a2 47/* This function gets the structure of a message and stores it in the "message" class.
48 * It will return this object for use with all relevant header information and
49 * fully parsed into the standard "message" object format.
50 */
51function mime_structure ($imap_stream, $header) {
52
451f74a2 53 $ssid = sqimap_session_id();
54 $lsid = strlen( $ssid );
55 $id = $header->id;
56 fputs ($imap_stream, "$ssid FETCH $id BODYSTRUCTURE\r\n");
57 //
58 // This should use sqimap_read_data instead of reading it itself
59 //
77b88425 60 $read = fgets ($imap_stream, 9216);
451f74a2 61 $bodystructure = '';
62 while ( substr($read, 0, $lsid) <> $ssid &&
63 !feof( $imap_stream ) ) {
64 $bodystructure .= $read;
77b88425 65 $read = fgets ($imap_stream, 9216);
451f74a2 66 }
67 $read = $bodystructure;
77b88425 68
451f74a2 69 // isolate the body structure and remove beginning and end parenthesis
70 $read = trim(substr ($read, strpos(strtolower($read), 'bodystructure') + 13));
71 $read = trim(substr ($read, 0, -1));
72 $end = mime_match_parenthesis(0, $read);
73 while ($end == strlen($read)-1) {
74 $read = trim(substr ($read, 0, -1));
75 $read = trim(substr ($read, 1));
76 $end = mime_match_parenthesis(0, $read);
77 }
77b88425 78
451f74a2 79 $msg = mime_parse_structure ($read, 0);
80 $msg->header = $header;
77b88425 81
451f74a2 82 return( $msg );
83}
b74ba498 84
451f74a2 85/* this starts the parsing of a particular structure. It is called recursively,
86 * so it can be passed different structures. It returns an object of type
87 * $message.
88 * First, it checks to see if it is a multipart message. If it is, then it
89 * handles that as it sees is necessary. If it is just a regular entity,
90 * then it parses it and adds the necessary header information (by calling out
91 * to mime_get_elements()
92 */
93function mime_parse_structure ($structure, $ent_id) {
94
95 $msg = new message();
96 if ($structure{0} == '(') {
97 $ent_id = mime_new_element_level($ent_id);
98 $start = $end = -1;
99 do {
100 $start = $end+1;
101 $end = mime_match_parenthesis ($start, $structure);
102
103 $element = substr($structure, $start+1, ($end - $start)-1);
104 $ent_id = mime_increment_id ($ent_id);
105 $newmsg = mime_parse_structure ($element, $ent_id);
106 $msg->addEntity ($newmsg);
107 } while ($structure{$end+1} == '(');
108 } else {
109 // parse the elements
110 $msg = mime_get_element ($structure, $msg, $ent_id);
111 }
112 return $msg;
113}
e4a256af 114
451f74a2 115/* Increments the element ID. An element id can look like any of
116 * the following: 1, 1.2, 4.3.2.4.1, etc. This function increments
117 * the last number of the element id, changing 1.2 to 1.3.
118 */
119function mime_increment_id ($id) {
120
77b88425 121 if (strpos($id, '.')) {
122 $first = substr($id, 0, strrpos($id, '.'));
123 $last = substr($id, strrpos($id, '.')+1);
451f74a2 124 $last++;
77b88425 125 $new = $first . '.' .$last;
451f74a2 126 } else {
127 $new = $id + 1;
128 }
77b88425 129
451f74a2 130 return $new;
131}
132
133/*
134 * See comment for mime_increment_id().
135 * This adds another level on to the entity_id changing 1.3 to 1.3.0
136 * NOTE: 1.3.0 is not a valid element ID. It MUST be incremented
137 * before it can be used. I left it this way so as not to have
138 * to make a special case if it is the first entity_id. It
139 * always increments it, and that works fine.
140 */
141function mime_new_element_level ($id) {
142
77b88425 143 if (!$id) {
144 $id = 0;
145 } else {
146 $id = $id . '.0';
147 }
451f74a2 148
77b88425 149 return( $id );
451f74a2 150}
151
152function mime_get_element (&$structure, $msg, $ent_id) {
153
154 $elem_num = 1;
155 $msg->header = new msg_header();
156 $msg->header->entity_id = $ent_id;
157 $properties = array();
158
159 while (strlen($structure) > 0) {
160 $structure = trim($structure);
161 $char = $structure{0};
162
163 if (strtolower(substr($structure, 0, 3)) == 'nil') {
164 $text = '';
165 $structure = substr($structure, 3);
166 } else if ($char == '"') {
167 // loop through until we find the matching quote, and return that as a string
168 $pos = 1;
169 $text = '';
170 while ( ($char = $structure{$pos} ) <> '"' && $pos < strlen($structure)) {
171 $text .= $char;
172 $pos++;
173 }
174 $structure = substr($structure, strlen($text) + 2);
175 } else if ($char == '(') {
176 // comment me
177 $end = mime_match_parenthesis (0, $structure);
178 $sub = substr($structure, 1, $end-1);
179 $properties = mime_get_props($properties, $sub);
180 $structure = substr($structure, strlen($sub) + 2);
181 } else {
182 // loop through until we find a space or an end parenthesis
183 $pos = 0;
184 $char = $structure{$pos};
185 $text = '';
186 while ($char != ' ' && $char != ')' && $pos < strlen($structure)) {
187 $text .= $char;
188 $pos++;
189 $char = $structure{$pos};
190 }
191 $structure = substr($structure, strlen($text));
192 }
193
194 // This is where all the text parts get put into the header
195 switch ($elem_num) {
196 case 1:
197 $msg->header->type0 = strtolower($text);
198 break;
199 case 2:
200 $msg->header->type1 = strtolower($text);
201 break;
202 case 4: // Id
203 // Invisimail enclose images with <>
204 $msg->header->id = str_replace( '<', '', str_replace( '>', '', $text ) );
205 break;
206 case 5:
207 $msg->header->description = $text;
208 break;
209 case 6:
210 $msg->header->encoding = strtolower($text);
211 break;
212 case 7:
213 $msg->header->size = $text;
214 break;
215 default:
216 if ($msg->header->type0 == 'text' && $elem_num == 8) {
217 // This is a plain text message, so lets get the number of lines
218 // that it contains.
219 $msg->header->num_lines = $text;
220
221 } else if ($msg->header->type0 == 'message' && $msg->header->type1 == 'rfc822' && $elem_num == 8) {
222 // This is an encapsulated message, so lets start all over again and
223 // parse this message adding it on to the existing one.
224 $structure = trim($structure);
225 if ( $structure{0} == '(' ) {
226 $e = mime_match_parenthesis (0, $structure);
227 $structure = substr($structure, 0, $e);
228 $structure = substr($structure, 1);
229 $m = mime_parse_structure($structure, $msg->header->entity_id);
230
231 // the following conditional is there to correct a bug that wasn't
232 // incrementing the entity IDs correctly because of the special case
233 // that message/rfc822 is. This fixes it fine.
234 if (substr($structure, 1, 1) != '(')
235 $m->header->entity_id = mime_increment_id(mime_new_element_level($ent_id));
236
237 // Now we'll go through and reformat the results.
238 if ($m->entities) {
239 for ($i=0; $i < count($m->entities); $i++) {
240 $msg->addEntity($m->entities[$i]);
241 }
242 } else {
243 $msg->addEntity($m);
244 }
245 $structure = "";
246 }
247 }
248 break;
249 }
250 $elem_num++;
251 $text = "";
252 }
253 // loop through the additional properties and put those in the various headers
f0c4dc12 254// if ($msg->header->type0 != 'message') {
451f74a2 255 for ($i=0; $i < count($properties); $i++) {
256 $msg->header->{$properties[$i]['name']} = $properties[$i]['value'];
257 }
f0c4dc12 258// }
451f74a2 259
260 return $msg;
261}
262
263/*
264 * I did most of the MIME stuff yesterday (June 20, 2000), but I couldn't
265 * figure out how to do this part, so I decided to go to bed. I woke up
266 * in the morning and had a flash of insight. I went to the white-board
267 * and scribbled it out, then spent a bit programming it, and this is the
268 * result. Nothing complicated, but I think my brain was fried yesterday.
269 * Funny how that happens some times.
270 *
271 * This gets properties in a nested parenthesisized list. For example,
272 * this would get passed something like: ("attachment" ("filename" "luke.tar.gz"))
273 * This returns an array called $props with all paired up properties.
274 * It ignores the "attachment" for now, maybe that should change later
275 * down the road. In this case, what is returned is:
276 * $props[0]["name"] = "filename";
277 * $props[0]["value"] = "luke.tar.gz";
278 */
279function mime_get_props ($props, $structure) {
280
281 while (strlen($structure) > 0) {
282 $structure = trim($structure);
283 $char = $structure{0};
284
285 if ($char == '"') {
286 $pos = 1;
287 $tmp = '';
288 while ( ( $char = $structure{$pos} ) != '"' &&
289 $pos < strlen($structure)) {
290 $tmp .= $char;
291 $pos++;
292 }
293 $structure = trim(substr($structure, strlen($tmp) + 2));
294 $char = $structure{0};
295
296 if ($char == '"') {
297 $pos = 1;
298 $value = '';
299 while ( ( $char = $structure{$pos} ) != '"' &&
300 $pos < strlen($structure) ) {
301 $value .= $char;
302 $pos++;
303 }
304 $structure = trim(substr($structure, strlen($tmp) + 2));
305
306 $k = count($props);
307 $props[$k]['name'] = strtolower($tmp);
308 $props[$k]['value'] = $value;
309 } else if ($char == '(') {
310 $end = mime_match_parenthesis (0, $structure);
311 $sub = substr($structure, 1, $end-1);
312 if (! isset($props))
313 $props = array();
314 $props = mime_get_props($props, $sub);
315 $structure = substr($structure, strlen($sub) + 2);
316 }
317 return $props;
318 } else if ($char == '(') {
319 $end = mime_match_parenthesis (0, $structure);
320 $sub = substr($structure, 1, $end-1);
321 $props = mime_get_props($props, $sub);
322 $structure = substr($structure, strlen($sub) + 2);
323 return $props;
324 } else {
325 return $props;
326 }
327 }
328}
329
330/*
331 * Matches parenthesis. It will return the position of the matching
332 * parenthesis in $structure. For instance, if $structure was:
333 * ("text" "plain" ("val1name", "1") nil ... )
334 * x x
335 * then this would return 42 to match up those two.
336 */
337function mime_match_parenthesis ($pos, $structure) {
338
339 $j = strlen( $structure );
340
341 // ignore all extra characters
342 // If inside of a string, skip string -- Boundary IDs and other
343 // things can have ) in them.
344 if ( $structure{$pos} != '(' ) {
345 return( $j );
346 }
347
348 while ( $pos < $j ) {
349 $pos++;
350 if ($structure{$pos} == ')') {
8beafbbc 351 return $pos;
451f74a2 352 } elseif ($structure{$pos} == '"') {
b74ba498 353 $pos++;
451f74a2 354 while ( $structure{$pos} != '"' &&
355 $pos < $j ) {
356 if (substr($structure, $pos, 2) == '\\"') {
b74ba498 357 $pos++;
451f74a2 358 } elseif (substr($structure, $pos, 2) == '\\\\') {
b74ba498 359 $pos++;
451f74a2 360 }
b74ba498 361 $pos++;
5ffe5a7e 362 }
451f74a2 363 } elseif ( $structure{$pos} == '(' ) {
8beafbbc 364 $pos = mime_match_parenthesis ($pos, $structure);
451f74a2 365 }
366 }
367 echo _("Error decoding mime structure. Report this as a bug!") . '<br>';
368 return( $pos );
369}
370
346817d4 371function mime_fetch_body($imap_stream, $id, $ent_id ) {
09a4bde3 372
373 /*
374 * do a bit of error correction. If we couldn't find the entity id, just guess
375 * that it is the first one. That is usually the case anyway.
376 */
377 if (!$ent_id) {
451f74a2 378 $ent_id = 1;
09a4bde3 379 }
346817d4 380
6ab1bd9e 381 $cmd = "FETCH $id BODY[$ent_id]";
346817d4 382 $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message);
77b88425 383
384 do {
e976319c 385 $topline = trim(array_shift( $data ));
386 } while( $topline && $topline[0] == '*' && !preg_match( '/\* [0-9]+ FETCH.*/i', $topline )) ;
451f74a2 387 $wholemessage = implode('', $data);
388 if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
77b88425 389
451f74a2 390 $ret = substr( $wholemessage, 0, $regs[1] );
391 /*
392 There is some information in the content info header that could be important
393 in order to parse html messages. Let's get them here.
394 */
395 if ( $ret{0} == '<' ) {
1c72b151 396 $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message);
e5ea9327 397 /* BASE within HTML documents is illegal (see w3 spec)
398* $base = '';
399* $k = 10;
400* foreach( $data as $d ) {
401* if ( substr( $d, 0, 13 ) == 'Content-Base:' ) {
402* $j = strlen( $d );
403* $i = 13;
404* $base = '';
405* while ( $i < $j &&
406* ( !isNoSep( $d{$i} ) || $d{$i} == '"' ) )
407* $i++;
408* while ( $i < $j ) {
409* if ( isNoSep( $d{$i} ) )
410* $base .= $d{$i};
411* $i++;
412* }
413* $k = 0;
414* } elseif ( $k == 1 && !isnosep( $d{0} ) ) {
415* $base .= substr( $d, 1 );
416* }
417* $k++;
418* }
419* if ( $base <> '' ) {
420* $ret = "<base href=\"$base\">" . $ret;
421* }
7e235a1a 422* */
451f74a2 423 }
424 } else if (ereg('"([^"]*)"', $topline, $regs)) {
425 $ret = $regs[1];
426 } else {
427 global $where, $what, $mailbox, $passed_id, $startMessage;
e5ea9327 428 $par = 'mailbox=' . urlencode($mailbox) . "&amp;passed_id=$passed_id";
451f74a2 429 if (isset($where) && isset($what)) {
e5ea9327 430 $par .= '&amp;where='. urlencode($where) . "&amp;what=" . urlencode($what);
a3daaaf3 431 } else {
e5ea9327 432 $par .= "&amp;startMessage=$startMessage&amp;show_more=0";
451f74a2 433 }
e5ea9327 434 $par .= '&amp;response=' . urlencode($response) .
435 '&amp;message=' . urlencode($message).
436 '&amp;topline=' . urlencode($topline);
a019eeb8 437
346817d4 438 echo '<tt><br>' .
439 '<table width="80%"><tr>' .
440 '<tr><td colspan=2>' .
451f74a2 441 _("Body retrieval error. The reason for this is most probably that the message is malformed. Please help us making future versions better by submitting this message to the developers knowledgebase!") .
346817d4 442 " <A HREF=\"../src/retrievalerror.php?$par\"><br>" .
443 _("Submit message") . '</A><BR>&nbsp;' .
444 '</td></tr>' .
445 '<td><b>' . _("Command:") . "</td><td>$cmd</td></tr>" .
446 '<td><b>' . _("Response:") . "</td><td>$response</td></tr>" .
447 '<td><b>' . _("Message:") . "</td><td>$message</td></tr>" .
448 '<td><b>' . _("FETCH line:") . "</td><td>$topline</td></tr>" .
449 "</table><BR></tt></font><hr>";
450
1c72b151 451 $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message);
451f74a2 452 array_shift($data);
453 $wholemessage = implode('', $data);
a019eeb8 454
346817d4 455 $ret = $wholemessage;
a3daaaf3 456 }
451f74a2 457 return( $ret );
458}
d4467150 459
451f74a2 460function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) {
461 // do a bit of error correction. If we couldn't find the entity id, just guess
462 // that it is the first one. That is usually the case anyway.
463 if (!$ent_id) {
464 $ent_id = 1;
465 }
466 $sid = sqimap_session_id();
467 // Don't kill the connection if the browser is over a dialup
468 // and it would take over 30 seconds to download it.
b7206e1d 469
470