Fix & in URLs to & for correctness of HTML, also 100% to "100%" in
[squirrelmail.git] / functions / mime.php
... / ...
CommitLineData
1<?php
2
3/**
4 * mime.php
5 *
6 * Copyright (c) 1999-2002 The SquirrelMail Project Team
7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This contains the functions necessary to detect and decode MIME
10 * messages.
11 *
12 * $Id$
13 */
14
15require_once('../functions/imap.php');
16require_once('../functions/attachment_common.php');
17
18/** Setting up the objects that have the structure for the message **/
19class msg_header {
20 /** msg_header contains generic variables for values that **/
21 /** could be in a header. **/
22
23 var $type0 = '', $type1 = '', $boundary = '', $charset = '',
24 $encoding = '', $size = 0, $to = array(), $from = '', $date = '',
25 $cc = array(), $bcc = array(), $reply_to = '', $subject = '',
26 $id = 0, $mailbox = '', $description = '', $filename = '',
27 $entity_id = 0, $message_id = 0, $name = '', $priority = 3;
28}
29
30class message {
31 /** message is the object that contains messages. It is a recursive
32 object in that through the $entities variable, it can contain
33 more objects of type message. See documentation in mime.txt for
34 a better description of how this works.
35 **/
36 var $header = '', $entities = array();
37
38 function addEntity ($msg) {
39 $this->entities[] = $msg;
40 }
41}
42
43/* --------------------------------------------------------------------------------- */
44/* MIME DECODING */
45/* --------------------------------------------------------------------------------- */
46
47/* This function gets the structure of a message and stores it in the "message" class.
48 * It will return this object for use with all relevant header information and
49 * fully parsed into the standard "message" object format.
50 */
51function mime_structure ($imap_stream, $header) {
52
53 sqimap_messages_flag ($imap_stream, $header->id, $header->id, 'Seen');
54 $ssid = sqimap_session_id();
55 $lsid = strlen( $ssid );
56 $id = $header->id;
57 fputs ($imap_stream, "$ssid FETCH $id BODYSTRUCTURE\r\n");
58 //
59 // This should use sqimap_read_data instead of reading it itself
60 //
61 $read = fgets ($imap_stream, 9216);
62 $bodystructure = '';
63 while ( substr($read, 0, $lsid) <> $ssid &&
64 !feof( $imap_stream ) ) {
65 $bodystructure .= $read;
66 $read = fgets ($imap_stream, 9216);
67 }
68 $read = $bodystructure;
69
70 // isolate the body structure and remove beginning and end parenthesis
71 $read = trim(substr ($read, strpos(strtolower($read), 'bodystructure') + 13));
72 $read = trim(substr ($read, 0, -1));
73 $end = mime_match_parenthesis(0, $read);
74 while ($end == strlen($read)-1) {
75 $read = trim(substr ($read, 0, -1));
76 $read = trim(substr ($read, 1));
77 $end = mime_match_parenthesis(0, $read);
78 }
79
80 $msg = mime_parse_structure ($read, 0);
81 $msg->header = $header;
82
83 return( $msg );
84}
85
86/* this starts the parsing of a particular structure. It is called recursively,
87 * so it can be passed different structures. It returns an object of type
88 * $message.
89 * First, it checks to see if it is a multipart message. If it is, then it
90 * handles that as it sees is necessary. If it is just a regular entity,
91 * then it parses it and adds the necessary header information (by calling out
92 * to mime_get_elements()
93 */
94function mime_parse_structure ($structure, $ent_id) {
95
96 $msg = new message();
97 if ($structure{0} == '(') {
98 $ent_id = mime_new_element_level($ent_id);
99 $start = $end = -1;
100 do {
101 $start = $end+1;
102 $end = mime_match_parenthesis ($start, $structure);
103
104 $element = substr($structure, $start+1, ($end - $start)-1);
105 $ent_id = mime_increment_id ($ent_id);
106 $newmsg = mime_parse_structure ($element, $ent_id);
107 $msg->addEntity ($newmsg);
108 } while ($structure{$end+1} == '(');
109 } else {
110 // parse the elements
111 $msg = mime_get_element ($structure, $msg, $ent_id);
112 }
113 return $msg;
114}
115
116/* Increments the element ID. An element id can look like any of
117 * the following: 1, 1.2, 4.3.2.4.1, etc. This function increments
118 * the last number of the element id, changing 1.2 to 1.3.
119 */
120function mime_increment_id ($id) {
121
122 if (strpos($id, '.')) {
123 $first = substr($id, 0, strrpos($id, '.'));
124 $last = substr($id, strrpos($id, '.')+1);
125 $last++;
126 $new = $first . '.' .$last;
127 } else {
128 $new = $id + 1;
129 }
130
131 return $new;
132}
133
134/*
135 * See comment for mime_increment_id().
136 * This adds another level on to the entity_id changing 1.3 to 1.3.0
137 * NOTE: 1.3.0 is not a valid element ID. It MUST be incremented
138 * before it can be used. I left it this way so as not to have
139 * to make a special case if it is the first entity_id. It
140 * always increments it, and that works fine.
141 */
142function mime_new_element_level ($id) {
143
144 if (!$id) {
145 $id = 0;
146 } else {
147 $id = $id . '.0';
148 }
149
150 return( $id );
151}
152
153function mime_get_element (&$structure, $msg, $ent_id) {
154
155 $elem_num = 1;
156 $msg->header = new msg_header();
157 $msg->header->entity_id = $ent_id;
158 $properties = array();
159
160 while (strlen($structure) > 0) {
161 $structure = trim($structure);
162 $char = $structure{0};
163
164 if (strtolower(substr($structure, 0, 3)) == 'nil') {
165 $text = '';
166 $structure = substr($structure, 3);
167 } else if ($char == '"') {
168 // loop through until we find the matching quote, and return that as a string
169 $pos = 1;
170 $text = '';
171 while ( ($char = $structure{$pos} ) <> '"' && $pos < strlen($structure)) {
172 $text .= $char;
173 $pos++;
174 }
175 $structure = substr($structure, strlen($text) + 2);
176 } else if ($char == '(') {
177 // comment me
178 $end = mime_match_parenthesis (0, $structure);
179 $sub = substr($structure, 1, $end-1);
180 $properties = mime_get_props($properties, $sub);
181 $structure = substr($structure, strlen($sub) + 2);
182 } else {
183 // loop through until we find a space or an end parenthesis
184 $pos = 0;
185 $char = $structure{$pos};
186 $text = '';
187 while ($char != ' ' && $char != ')' && $pos < strlen($structure)) {
188 $text .= $char;
189 $pos++;
190 $char = $structure{$pos};
191 }
192 $structure = substr($structure, strlen($text));
193 }
194
195 // This is where all the text parts get put into the header
196 switch ($elem_num) {
197 case 1:
198 $msg->header->type0 = strtolower($text);
199 break;
200 case 2:
201 $msg->header->type1 = strtolower($text);
202 break;
203 case 4: // Id
204 // Invisimail enclose images with <>
205 $msg->header->id = str_replace( '<', '', str_replace( '>', '', $text ) );
206 break;
207 case 5:
208 $msg->header->description = $text;
209 break;
210 case 6:
211 $msg->header->encoding = strtolower($text);
212 break;
213 case 7:
214 $msg->header->size = $text;
215 break;
216 default:
217 if ($msg->header->type0 == 'text' && $elem_num == 8) {
218 // This is a plain text message, so lets get the number of lines
219 // that it contains.
220 $msg->header->num_lines = $text;
221
222 } else if ($msg->header->type0 == 'message' && $msg->header->type1 == 'rfc822' && $elem_num == 8) {
223 // This is an encapsulated message, so lets start all over again and
224 // parse this message adding it on to the existing one.
225 $structure = trim($structure);
226 if ( $structure{0} == '(' ) {
227 $e = mime_match_parenthesis (0, $structure);
228 $structure = substr($structure, 0, $e);
229 $structure = substr($structure, 1);
230 $m = mime_parse_structure($structure, $msg->header->entity_id);
231
232 // the following conditional is there to correct a bug that wasn't
233 // incrementing the entity IDs correctly because of the special case
234 // that message/rfc822 is. This fixes it fine.
235 if (substr($structure, 1, 1) != '(')
236 $m->header->entity_id = mime_increment_id(mime_new_element_level($ent_id));
237
238 // Now we'll go through and reformat the results.
239 if ($m->entities) {
240 for ($i=0; $i < count($m->entities); $i++) {
241 $msg->addEntity($m->entities[$i]);
242 }
243 } else {
244 $msg->addEntity($m);
245 }
246 $structure = "";
247 }
248 }
249 break;
250 }
251 $elem_num++;
252 $text = "";
253 }
254 // loop through the additional properties and put those in the various headers
255 if ($msg->header->type0 != 'message') {
256 for ($i=0; $i < count($properties); $i++) {
257 $msg->header->{$properties[$i]['name']} = $properties[$i]['value'];
258 }
259 }
260
261 return $msg;
262}
263
264/*
265 * I did most of the MIME stuff yesterday (June 20, 2000), but I couldn't
266 * figure out how to do this part, so I decided to go to bed. I woke up
267 * in the morning and had a flash of insight. I went to the white-board
268 * and scribbled it out, then spent a bit programming it, and this is the
269 * result. Nothing complicated, but I think my brain was fried yesterday.
270 * Funny how that happens some times.
271 *
272 * This gets properties in a nested parenthesisized list. For example,
273 * this would get passed something like: ("attachment" ("filename" "luke.tar.gz"))
274 * This returns an array called $props with all paired up properties.
275 * It ignores the "attachment" for now, maybe that should change later
276 * down the road. In this case, what is returned is:
277 * $props[0]["name"] = "filename";
278 * $props[0]["value"] = "luke.tar.gz";
279 */
280function mime_get_props ($props, $structure) {
281
282 while (strlen($structure) > 0) {
283 $structure = trim($structure);
284 $char = $structure{0};
285
286 if ($char == '"') {
287 $pos = 1;
288 $tmp = '';
289 while ( ( $char = $structure{$pos} ) != '"' &&
290 $pos < strlen($structure)) {
291 $tmp .= $char;
292 $pos++;
293 }
294 $structure = trim(substr($structure, strlen($tmp) + 2));
295 $char = $structure{0};
296
297 if ($char == '"') {
298 $pos = 1;
299 $value = '';
300 while ( ( $char = $structure{$pos} ) != '"' &&
301 $pos < strlen($structure) ) {
302 $value .= $char;
303 $pos++;
304 }
305 $structure = trim(substr($structure, strlen($tmp) + 2));
306
307 $k = count($props);
308 $props[$k]['name'] = strtolower($tmp);
309 $props[$k]['value'] = $value;
310 } else if ($char == '(') {
311 $end = mime_match_parenthesis (0, $structure);
312 $sub = substr($structure, 1, $end-1);
313 if (! isset($props))
314 $props = array();
315 $props = mime_get_props($props, $sub);
316 $structure = substr($structure, strlen($sub) + 2);
317 }
318 return $props;
319 } else if ($char == '(') {
320 $end = mime_match_parenthesis (0, $structure);
321 $sub = substr($structure, 1, $end-1);
322 $props = mime_get_props($props, $sub);
323 $structure = substr($structure, strlen($sub) + 2);
324 return $props;
325 } else {
326 return $props;
327 }
328 }
329}
330
331/*
332 * Matches parenthesis. It will return the position of the matching
333 * parenthesis in $structure. For instance, if $structure was:
334 * ("text" "plain" ("val1name", "1") nil ... )
335 * x x
336 * then this would return 42 to match up those two.
337 */
338function mime_match_parenthesis ($pos, $structure) {
339
340 $j = strlen( $structure );
341
342 // ignore all extra characters
343 // If inside of a string, skip string -- Boundary IDs and other
344 // things can have ) in them.
345 if ( $structure{$pos} != '(' ) {
346 return( $j );
347 }
348
349 while ( $pos < $j ) {
350 $pos++;
351 if ($structure{$pos} == ')') {
352 return $pos;
353 } elseif ($structure{$pos} == '"') {
354 $pos++;
355 while ( $structure{$pos} != '"' &&
356 $pos < $j ) {
357 if (substr($structure, $pos, 2) == '\\"') {
358 $pos++;
359 } elseif (substr($structure, $pos, 2) == '\\\\') {
360 $pos++;
361 }
362 $pos++;
363 }
364 } elseif ( $structure{$pos} == '(' ) {
365 $pos = mime_match_parenthesis ($pos, $structure);
366 }
367 }
368 echo _("Error decoding mime structure. Report this as a bug!") . '<br>';
369 return( $pos );
370}
371
372function mime_fetch_body($imap_stream, $id, $ent_id ) {
373
374 /*
375 * do a bit of error correction. If we couldn't find the entity id, just guess
376 * that it is the first one. That is usually the case anyway.
377 */
378 if (!$ent_id) {
379 $ent_id = 1;
380 }
381
382 $cmd = "FETCH $id BODY[$ent_id]";
383 $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message);
384
385 do {
386 $topline = array_shift( $data );
387 } while( $topline && $topline == '*' && !preg_match( '/\\* [0-9] FETCH.*/i', $topline )) ;
388 $wholemessage = implode('', $data);
389 if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
390
391 $ret = substr( $wholemessage, 0, $regs[1] );
392 /*
393 There is some information in the content info header that could be important
394 in order to parse html messages. Let's get them here.
395 */
396 if ( $ret{0} == '<' ) {
397 $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message);
398 $base = '';
399 $k = 10;
400 foreach( $data as $d ) {
401 if ( substr( $d, 0, 13 ) == 'Content-Base:' ) {
402 $j = strlen( $d );
403 $i = 13;
404 $base = '';
405 while ( $i < $j &&
406 ( !isNoSep( $d{$i} ) || $d{$i} == '"' ) )
407 $i++;
408 while ( $i < $j ) {
409 if ( isNoSep( $d{$i} ) )
410 $base .= $d{$i};
411 $i++;
412 }
413 $k = 0;
414 } elseif ( $k == 1 && !isnosep( $d{0} ) ) {
415 $base .= substr( $d, 1 );
416 }
417 $k++;
418 }
419 if ( $base <> '' ) {
420 $ret = "<base href=\"$base\">" . $ret;
421 }
422 }
423 } else if (ereg('"([^"]*)"', $topline, $regs)) {
424 $ret = $regs[1];
425 } else {
426 global $where, $what, $mailbox, $passed_id, $startMessage;
427 $par = 'mailbox=' . urlencode($mailbox) . "&passed_id=$passed_id";
428 if (isset($where) && isset($what)) {
429 $par .= '&where='. urlencode($where) . "&what=" . urlencode($what);
430 } else {
431 $par .= "&startMessage=$startMessage&show_more=0";
432 }
433 $par .= '&response=' . urlencode($response) .
434 '&message=' . urlencode($message).
435 '&topline=' . urlencode($topline);
436
437 echo '<tt><br>' .
438 '<table width="80%"><tr>' .
439 '<tr><td colspan=2>' .
440 _("Body retrieval error. The reason for this is most probably that the message is malformed. Please help us making future versions better by submitting this message to the developers knowledgebase!") .
441 " <A HREF=\"../src/retrievalerror.php?$par\"><br>" .
442 _("Submit message") . '</A><BR>&nbsp;' .
443 '</td></tr>' .
444 '<td><b>' . _("Command:") . "</td><td>$cmd</td></tr>" .
445 '<td><b>' . _("Response:") . "</td><td>$response</td></tr>" .
446 '<td><b>' . _("Message:") . "</td><td>$message</td></tr>" .
447 '<td><b>' . _("FETCH line:") . "</td><td>$topline</td></tr>" .
448 "</table><BR></tt></font><hr>";
449
450 $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message);
451 array_shift($data);
452 $wholemessage = implode('', $data);
453
454 $ret = $wholemessage;
455 }
456 return( $ret );
457}
458
459function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) {
460 // do a bit of error correction. If we couldn't find the entity id, just guess
461 // that it is the first one. That is usually the case anyway.
462 if (!$ent_id) {
463 $ent_id = 1;
464 }
465 $sid = sqimap_session_id();
466 // Don't kill the connection if the browser is over a dialup
467 // and it would take over 30 seconds to download it.
468
469