Rewrote MIME support and made it much, MUCH quicker. All parsing of the
[squirrelmail.git] / functions / mime.php
1 <?php
2 /** mime.php
3 **
4 ** This contains the functions necessary to detect and decode MIME
5 ** messages.
6 **
7 **/
8
9 $mime_php = true;
10
11 if (!isset($i18n_php))
12 include "../functions/i18n.php";
13 if (!isset($imap_php))
14 include "../functions/imap.php";
15 if (!isset($config_php))
16 include "../config/config.php";
17
18
19 /** Setting up the object that has the structure for the message **/
20
21 class msg_header {
22 /** msg_header contains generic variables for values that **/
23 /** could be in a header. **/
24
25 var $type0, $type1, $boundary, $charset, $encoding;
26 var $to, $from, $date, $cc, $bcc, $reply_to, $subject;
27 var $id, $mailbox;
28 var $entity_id;
29 }
30
31 class message {
32 /** message is the object that contains messages. It is a recursive
33 object in that through the $entities variable, it can contain
34 more objects of type message. See documentation in mime.txt for
35 a better description of how this works.
36 **/
37 var $header;
38 var $entities;
39
40 function addEntity ($msg) {
41 $this->entities[count($this->entities)] = $msg;
42 }
43 }
44
45
46
47 /* --------------------------------------------------------------------------------- */
48 /* MIME DECODING */
49 /* --------------------------------------------------------------------------------- */
50
51 /** This function gets the structure of a message and stores it in the "message" class.
52 It will return this object for use with all relevant header information and
53 fully parsed into the standard "message" object format.
54 **/
55 function mime_structure ($imap_stream, $header) {
56 sqimap_messages_flag ($imap_stream, $header->id, $header->id, "Seen");
57
58 $id = $header->id;
59 fputs ($imap_stream, "a001 FETCH $id BODYSTRUCTURE\r\n");
60 $read = sqimap_read_data ($imap_stream, "a001", true, $a, $b);
61 $read = strtolower($read[0]);
62
63 //echo $read."<br><br>";
64 // isolate the body structure and remove beginning and end parenthesis
65 $read = trim(substr ($read, strpos($read, "bodystructure") + 13));
66 $read = trim(substr ($read, 0, -2));
67 $read = trim(substr ($read, 1));
68
69 $msg = mime_parse_structure ($read);
70 $msg->header = $header;
71 return $msg;
72 }
73
74 function mime_parse_structure ($structure, $ent_id) {
75 //echo "<font color=008800><tt>START: mime_parse_structure()</tt></font><br>";
76 $msg = new message();
77 if (substr($structure, 0, 1) == "(") {
78 $ent_id = mime_new_element_level($ent_id);
79 $start = $end = -1;
80 do {
81 //echo "<font color=008800><tt>Found entity...</tt></font><br>";
82 $start = $end+1;
83 $end = mime_match_parenthesis ($start, $structure);
84
85 $element = substr($structure, $start+1, ($end - $start)-1);
86 $ent_id = mime_increment_id($ent_id);
87 $newmsg = mime_parse_structure ($element, $ent_id);
88 $msg->addEntity ($newmsg);
89 } while (substr($structure, $end+1, 1) == "(");
90 } else {
91 // parse the elements
92 //echo "<br><font color=0000aa><tt>$structure</tt></font><br>";
93 $msg->header = new msg_header();
94 $msg->header = mime_get_element (&$structure, $header);
95 $msg->header->entity_id = $ent_id;
96 //echo "<br>";
97 }
98 return $msg;
99 //echo "<font color=008800><tt>&nbsp;&nbsp;END: mime_parse_structure()</tt></font><br>";
100 }
101
102 // Increments the element ID. An element id can look like any of
103 // the following: 1, 1.2, 4.3.2.4.1, etc. This function increments
104 // the last number of the element id, changing 1.2 to 1.3.
105 function mime_increment_id ($id) {
106 if (strpos($id, ".")) {
107 $first = substr($id, 0, strrpos($id, "."));
108 $last = substr($id, strlen($id) - strlen($first));
109 $last++;
110 $new = $first . $last;
111 } else {
112 $new = $id + 1;
113 }
114 return $new;
115 }
116
117 // See comment for mime_increment_id().
118 // This adds another level on to the entity_id changing 1.3 to 1.3.0
119 // NOTE: 1.3.0 is not a valid element ID. It MUST be incremented
120 // before it can be used. I left it this way so as not to have
121 // to make a special case if it is the first entity_id. It
122 // always increments it, and that works fine.
123 function mime_new_element_level ($id) {
124 if (!$id)
125 $id = 0;
126 else
127 $id . ".0";
128 return $id;
129 }
130
131 function mime_get_element (&$structure, $header) {
132 $elem_num = 1;
133
134 while (strlen($structure) > 0) {
135 $structure = trim($structure);
136 $char = substr($structure, 0, 1);
137
138 if (substr($structure, 0, 3) == "nil") {
139 $text = "";
140 $structure = substr($structure, 3);
141 } else if ($char == "\"") {
142 // loop through until we find the matching quote, and return that as a string
143 $pos = 1;
144 $char = substr($structure, $pos, 1);
145 while ($char != "\"" && $pos < strlen($structure)) {
146 $text .= $char;
147 $pos++;
148 $char = substr($structure, $pos, 1);
149 }
150 $structure = substr($structure, strlen($text) + 2);
151 } else if ($char == "(") {
152 // comment me
153 $end = mime_match_parenthesis (0, $structure);
154 $sub = substr($structure, 1, $end-1);
155 $properties = mime_get_props($properties, $sub);
156 $structure = substr($structure, strlen($sub) + 2);
157 } else {
158 // loop through until we find a space or an end parenthesis
159 $pos = 0;
160 $char = substr($structure, $pos, 1);
161 while ($char != " " && $char != ")" && $pos < strlen($structure)) {
162 $text .= $char;
163 $pos++;
164 $char = substr($structure, $pos, 1);
165 }
166 $structure = substr($structure, strlen($text));
167 }
168 //echo "$elem_num : $text<br>";
169
170 // This is where all the text parts get put into the header
171 switch ($elem_num) {
172 case 1:
173 $header->type0 = $text;
174 //echo "<tt>type0 = $text</tt><br>";
175 break;
176 case 2:
177 $header->type1 = $text;
178 //echo "<tt>type1 = $text</tt><br>";
179 break;
180 case 6:
181 $header->encoding = $text;
182 //echo "<tt>encoding = $text</tt><br>";
183 break;
184 case 7:
185 $header->size = $text;
186 //echo "<tt>size = $text</tt><br>";
187 break;
188 default:
189 if ($header->type0 == "text" && $elem_num == 8) {
190 $header->num_lines = $text;
191 //echo "<tt>num_lines = $text</tt><br>";
192 }
193 break;
194 }
195 $elem_num++;
196 $text = "";
197 }
198 // loop through the additional properties and put those in the various headers
199 for ($i=0; $i < count($properties); $i++) {
200 $header->{$properties[$i]["name"]} = $properties[$i]["value"];
201 //echo "<tt>".$properties[$i]["name"]." = " . $properties[$i]["value"] . "</tt><br>";
202 }
203 return $header;
204 }
205
206 // I did most of the MIME stuff yesterday (June 20, 2000), but I couldn't
207 // figure out how to do this part, so I decided to go to bed. I woke up
208 // in the morning and had a flash of insight. I went to the white-board
209 // and scribbled it out, then spent a bit programming it, and this is the
210 // result. Nothing complicated, but I think my brain was fried yesterday.
211 //
212 // This gets properties in a nested parenthesisized list. For example,
213 // this would get passed something like: ("attachment" ("filename" "luke.tar.gz"))
214 // This returns an array called $props with all paired up properties.
215 // It ignores the "attachment" for now, maybe that should change later
216 // down the road. In this case, what is returned is:
217 // $props[0]["name"] = "filename";
218 // $props[0]["value"] = "luke.tar.gz";
219 function mime_get_props ($props, $structure) {
220 while (strlen($structure) > 0) {
221 $structure = trim($structure);
222 $char = substr($structure, 0, 1);
223
224 if ($char == "\"") {
225 $pos = 1;
226 $char = substr($structure, $pos, 1);
227 while ($char != "\"" && $pos < strlen($structure)) {
228 $tmp .= $char;
229 $pos++;
230 $char = substr($structure, $pos, 1);
231 }
232 $structure = trim(substr($structure, strlen($tmp) + 2));
233 $char = substr($structure, 0, 1);
234
235 if ($char == "\"") {
236 $pos = 1;
237 $char = substr($structure, $pos, 1);
238 while ($char != "\"" && $pos < strlen($structure)) {
239 $value .= $char;
240 $pos++;
241 $char = substr($structure, $pos, 1);
242 }
243 $structure = trim(substr($structure, strlen($tmp) + 2));
244
245 $k = count($props);
246 $props[$k]["name"] = $tmp;
247 $props[$k]["value"] = $value;
248 } else if ($char == "(") {
249 $end = mime_match_parenthesis (0, $structure);
250 $sub = substr($structure, 1, $end-1);
251 $props = mime_get_props($props, $sub);
252 $structure = substr($structure, strlen($sub) + 2);
253 }
254 return $props;
255 } else if ($char == "(") {
256 $end = mime_match_parenthesis (0, $structure);
257 $sub = substr($structure, 1, $end-1);
258 $props = mime_get_props($props, $sub);
259 $structure = substr($structure, strlen($sub) + 2);
260 } else {
261 return $props;
262 }
263 }
264 }
265
266 // Matches parenthesis. It will return the position of the matching
267 // parenthesis in $structure. For instance, if $structure was:
268 // ("text" "plain" ("val1name", "1") nil ... )
269 // x x
270 // then this would return 42 to match up those two.
271 function mime_match_parenthesis ($pos, $structure) {
272 $char = substr($structure, $pos, 1);
273
274 // ignore all extra characters
275 while ($pos < strlen($structure)) {
276 $pos++;
277 $char = substr($structure, $pos, 1);
278 if ($char == ")") {
279 return $pos;
280 } else if ($char == "(") {
281 $pos = mime_match_parenthesis ($pos, $structure);
282 }
283 }
284 }
285
286 function mime_fetch_body ($imap_stream, $id, $ent_id) {
287 // do a bit of error correction. If we couldn't find the entity id, just guess
288 // that it is the first one. That is usually the case anyway.
289 if (!$ent_id) $ent_id = 1;
290
291 fputs ($imap_stream, "a001 FETCH $id BODY[$ent_id]\r\n");
292 $read = sqimap_read_data ($imap_stream, "a001", true, $a, $b);
293 for ($i=1; $i < count($read)-1; $i++) {
294 // This fixes a bug in UW. UW doesn't return what would normall be
295 // expected from the BODY fetch command. It has an extra line at the
296 // end. So if the second from the last line is a ), then remove it.
297 if (trim($read[$i]) == ")" && $i == count($read)-2) {
298 continue;
299 }
300 $text .= $read[$i];
301 }
302 return $text;
303 }
304
305 /* -[ END MIME DECODING ]----------------------------------------------------------- */
306
307
308
309 /** This is the first function called. It decides if this is a multipart
310 message or if it should be handled as a single entity
311 **/
312 function decodeMime ($body, $header) {
313 global $username, $key, $imapServerAddress, $imapPort;
314 $imap_stream = sqimap_login($username, $key, $imapServerAddress, $imapPort, 0);
315 sqimap_mailbox_select($imap_stream, $header->mailbox);
316
317 return mime_structure ($imap_stream, $header);
318 }
319
320 function getEntity ($message, $ent_id) {
321 if ($message) {
322 if ($message->header->entity_id == $ent_id) {
323 return $message;
324 } else {
325 for ($i = 0; $message->entities[$i]; $i++) {
326 $msg = getEntity ($message->entities[$i], $ent_id);
327 if ($msg)
328 return $msg;
329 }
330 }
331 }
332 }
333
334 function findDisplayEntity ($message) {
335 if ($message) {
336 if ($message->header->type0 == "text") {
337 if ($message->header->type1 == "plain" ||
338 $message->header->type1 == "html") {
339 return $message->header->entity_id;
340 }
341 } else {
342 for ($i=0; $message->entities[$i]; $i++) {
343 return findDisplayEntity($message->entities[$i]);
344 }
345 }
346 }
347 }
348
349 /** This returns a parsed string called $body. That string can then
350 be displayed as the actual message in the HTML. It contains
351 everything needed, including HTML Tags, Attachments at the
352 bottom, etc.
353 **/
354 function formatBody($message, $color, $wrap_at) {
355 /** this if statement checks for the entity to show as the
356 primary message. To add more of them, just put them in the
357 order that is their priority.
358 **/
359 global $username, $key, $imapServerAddress, $imapPort;
360
361
362 $id = $message->header->id;
363 $urlmailbox = urlencode($message->header->mailbox);
364
365 $imap_stream = sqimap_login($username, $key, $imapServerAddress, $imapPort, 0);
366 sqimap_mailbox_select($imap_stream, $message->header->mailbox);
367
368 $ent_num = findDisplayEntity ($message);
369 $body = mime_fetch_body ($imap_stream, $id, $ent_num);
370
371 /** If there are other types that shouldn't be formatted, add
372 them here **/
373 //if ($->type1 != "html") {
374 $body = translateText($body, $wrap_at, $charset);
375 //}
376
377 $body .= "<BR><SMALL><CENTER><A HREF=\"../src/download.php?absolute_dl=true&passed_id=$id&passed_ent_id=$ent_num&mailbox=$urlmailbox\">". _("Download this as a file") ."</A></CENTER><BR></SMALL>";
378
379 /** Display the ATTACHMENTS: message if there's more than one part **/
380 if ($message->entities) {
381 $body .= "<TABLE WIDTH=100% CELLSPACING=0 CELLPADDING=4 BORDER=0><TR><TD BGCOLOR=\"$color[0]\">";
382 $body .= "<TT><B>ATTACHMENTS:</B></TT>";
383 $body .= "</TD></TR><TR><TD BGCOLOR=\"$color[0]\">";
384 $num = 0;
385
386 /** make this recurisve at some point **/
387 $body .= formatAttachments ($message, $ent_num, $message->header->mailbox, $id);
388 $body .= "</TD></TR></TABLE>";
389 }
390 return $body;
391 }
392
393 // A recursive function that returns a list of attachments with links
394 // to where to download these attachments
395 function formatAttachments ($message, $ent_id, $mailbox, $id) {
396 if ($message) {
397 if (!$message->entities) {
398 $type0 = strtolower($message->header->type0);
399 $type1 = strtolower($message->header->type1);
400
401 if ($message->header->entity_id != $ent_id) {
402 $filename = $message->header->filename;
403 if (trim($filename) == "") {
404 $display_filename = "untitled-$ent_id";
405 } else {
406 $display_filename = $filename;
407 }
408
409 $urlMailbox = urlencode($mailbox);
410 $ent = urlencode($message->header->entity_id);
411 $body .= "<TT>&nbsp;&nbsp;&nbsp;<A HREF=\"../src/download.php?passed_id=$id&mailbox=$urlMailbox&passed_ent_id=$ent\">" . $display_filename . "</A>&nbsp;&nbsp;<SMALL>(TYPE: $type0/$type1)</SMALL></TT><BR>";
412 $num++;
413 }
414 return $body;
415 } else {
416 for ($i = 0; $i < count($message->entities); $i++) {
417 $body .= formatAttachments ($message->entities[$i], $ent_id, $mailbox, $id);
418 }
419 return $body;
420 }
421 }
422 }
423
424
425 /** this function decodes the body depending on the encoding type. **/
426 function decodeBody($body, $encoding) {
427 $encoding = strtolower($encoding);
428
429 if ($encoding == "quoted-printable") {
430 $body = quoted_printable_decode($body);
431
432 while (ereg("=\n", $body))
433 $body = ereg_replace ("=\n", "", $body);
434 } else if ($encoding == "base64") {
435 $body = base64_decode($body);
436 }
437
438 // All other encodings are returned raw.
439 return $body;
440 }
441
442
443 // This functions decode strings that is encoded according to
444 // RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text).
445 function decodeHeader ($string) {
446 if (eregi('=\?([^?]+)\?(q|b)\?([^?]+)\?=',
447 $string, $res)) {
448 if (ucfirst($res[2]) == "B") {
449 $replace = base64_decode($res[3]);
450 } else {
451 $replace = ereg_replace("_", " ", $res[3]);
452 $replace = quoted_printable_decode($replace);
453 }
454
455 $replace = charset_decode ($res[1], $replace);
456
457 $string = eregi_replace
458 ('=\?([^?]+)\?(q|b)\?([^?]+)\?=',
459 $replace, $string);
460 // In case there should be more encoding in the string: recurse
461 return (decodeHeader($string));
462 } else
463 return ($string);
464 }
465
466 // Encode a string according to RFC 1522 for use in headers if it
467 // contains 8-bit characters
468 function encodeHeader ($string) {
469 global $default_charset;
470
471 // Encode only if the string contains 8-bit characters
472 if (ereg("[\200-\377]", $string)) {
473 $newstring = "=?$default_charset?Q?";
474 $newstring .= str_replace(" ", "_", $string);
475
476 while (ereg("([\200-\377])", $newstring, $regs)) {
477 $replace = $regs[1];
478 $insert = "=" . bin2hex($replace);
479 $newstring = str_replace($replace, $insert, $newstring);
480 }
481
482 $newstring .= "?=";
483
484 return $newstring;
485 }
486
487 return $string;
488 }
489
490 ?>