Rewrote MIME support and made it much, MUCH quicker. All parsing of the
[squirrelmail.git] / functions / mime.php
CommitLineData
59177427 1<?php
aceb0d5c 2 /** mime.php
3 **
d068c0ec 4 ** This contains the functions necessary to detect and decode MIME
5 ** messages.
6 **
aceb0d5c 7 **/
8
d068c0ec 9 $mime_php = true;
aceb0d5c 10
1fd97780 11 if (!isset($i18n_php))
12 include "../functions/i18n.php";
8beafbbc 13 if (!isset($imap_php))
14 include "../functions/imap.php";
15 if (!isset($config_php))
16 include "../config/config.php";
17
18
19 /** Setting up the object that has the structure for the message **/
20
21 class msg_header {
22 /** msg_header contains generic variables for values that **/
23 /** could be in a header. **/
24
25 var $type0, $type1, $boundary, $charset, $encoding;
26 var $to, $from, $date, $cc, $bcc, $reply_to, $subject;
27 var $id, $mailbox;
28 var $entity_id;
29 }
30
31 class message {
32 /** message is the object that contains messages. It is a recursive
33 object in that through the $entities variable, it can contain
34 more objects of type message. See documentation in mime.txt for
35 a better description of how this works.
36 **/
37 var $header;
38 var $entities;
39
40 function addEntity ($msg) {
41 $this->entities[count($this->entities)] = $msg;
42 }
43 }
1fd97780 44
bcb432a3 45
bcb432a3 46
8beafbbc 47 /* --------------------------------------------------------------------------------- */
48 /* MIME DECODING */
49 /* --------------------------------------------------------------------------------- */
50
51 /** This function gets the structure of a message and stores it in the "message" class.
52 It will return this object for use with all relevant header information and
53 fully parsed into the standard "message" object format.
54 **/
55 function mime_structure ($imap_stream, $header) {
56 sqimap_messages_flag ($imap_stream, $header->id, $header->id, "Seen");
57
58 $id = $header->id;
59 fputs ($imap_stream, "a001 FETCH $id BODYSTRUCTURE\r\n");
60 $read = sqimap_read_data ($imap_stream, "a001", true, $a, $b);
61 $read = strtolower($read[0]);
62
63 //echo $read."<br><br>";
64 // isolate the body structure and remove beginning and end parenthesis
65 $read = trim(substr ($read, strpos($read, "bodystructure") + 13));
66 $read = trim(substr ($read, 0, -2));
67 $read = trim(substr ($read, 1));
68
69 $msg = mime_parse_structure ($read);
70 $msg->header = $header;
71 return $msg;
72 }
73
74 function mime_parse_structure ($structure, $ent_id) {
75 //echo "<font color=008800><tt>START: mime_parse_structure()</tt></font><br>";
76 $msg = new message();
77 if (substr($structure, 0, 1) == "(") {
78 $ent_id = mime_new_element_level($ent_id);
79 $start = $end = -1;
80 do {
81 //echo "<font color=008800><tt>Found entity...</tt></font><br>";
82 $start = $end+1;
83 $end = mime_match_parenthesis ($start, $structure);
84
85 $element = substr($structure, $start+1, ($end - $start)-1);
86 $ent_id = mime_increment_id($ent_id);
87 $newmsg = mime_parse_structure ($element, $ent_id);
88 $msg->addEntity ($newmsg);
89 } while (substr($structure, $end+1, 1) == "(");
90 } else {
91 // parse the elements
92 //echo "<br><font color=0000aa><tt>$structure</tt></font><br>";
93 $msg->header = new msg_header();
94 $msg->header = mime_get_element (&$structure, $header);
95 $msg->header->entity_id = $ent_id;
96 //echo "<br>";
97 }
98 return $msg;
99 //echo "<font color=008800><tt>&nbsp;&nbsp;END: mime_parse_structure()</tt></font><br>";
100 }
101
102 // Increments the element ID. An element id can look like any of
103 // the following: 1, 1.2, 4.3.2.4.1, etc. This function increments
104 // the last number of the element id, changing 1.2 to 1.3.
105 function mime_increment_id ($id) {
106 if (strpos($id, ".")) {
107 $first = substr($id, 0, strrpos($id, "."));
108 $last = substr($id, strlen($id) - strlen($first));
109 $last++;
110 $new = $first . $last;
111 } else {
112 $new = $id + 1;
113 }
114 return $new;
115 }
116
117 // See comment for mime_increment_id().
118 // This adds another level on to the entity_id changing 1.3 to 1.3.0
119 // NOTE: 1.3.0 is not a valid element ID. It MUST be incremented
120 // before it can be used. I left it this way so as not to have
121 // to make a special case if it is the first entity_id. It
122 // always increments it, and that works fine.
123 function mime_new_element_level ($id) {
124 if (!$id)
125 $id = 0;
126 else
127 $id . ".0";
128 return $id;
129 }
130
131 function mime_get_element (&$structure, $header) {
132 $elem_num = 1;
133
134 while (strlen($structure) > 0) {
135 $structure = trim($structure);
136 $char = substr($structure, 0, 1);
137
138 if (substr($structure, 0, 3) == "nil") {
139 $text = "";
140 $structure = substr($structure, 3);
141 } else if ($char == "\"") {
142 // loop through until we find the matching quote, and return that as a string
143 $pos = 1;
144 $char = substr($structure, $pos, 1);
145 while ($char != "\"" && $pos < strlen($structure)) {
146 $text .= $char;
147 $pos++;
148 $char = substr($structure, $pos, 1);
149 }
150 $structure = substr($structure, strlen($text) + 2);
151 } else if ($char == "(") {
152 // comment me
153 $end = mime_match_parenthesis (0, $structure);
154 $sub = substr($structure, 1, $end-1);
155 $properties = mime_get_props($properties, $sub);
156 $structure = substr($structure, strlen($sub) + 2);
157 } else {
158 // loop through until we find a space or an end parenthesis
159 $pos = 0;
160 $char = substr($structure, $pos, 1);
161 while ($char != " " && $char != ")" && $pos < strlen($structure)) {
162 $text .= $char;
163 $pos++;
164 $char = substr($structure, $pos, 1);
aceb0d5c 165 }
8beafbbc 166 $structure = substr($structure, strlen($text));
aceb0d5c 167 }
8beafbbc 168 //echo "$elem_num : $text<br>";
169
170 // This is where all the text parts get put into the header
171 switch ($elem_num) {
172 case 1:
173 $header->type0 = $text;
174 //echo "<tt>type0 = $text</tt><br>";
175 break;
176 case 2:
177 $header->type1 = $text;
178 //echo "<tt>type1 = $text</tt><br>";
179 break;
180 case 6:
181 $header->encoding = $text;
182 //echo "<tt>encoding = $text</tt><br>";
183 break;
184 case 7:
185 $header->size = $text;
186 //echo "<tt>size = $text</tt><br>";
187 break;
188 default:
189 if ($header->type0 == "text" && $elem_num == 8) {
190 $header->num_lines = $text;
191 //echo "<tt>num_lines = $text</tt><br>";
192 }
193 break;
194 }
195 $elem_num++;
196 $text = "";
197 }
198 // loop through the additional properties and put those in the various headers
199 for ($i=0; $i < count($properties); $i++) {
200 $header->{$properties[$i]["name"]} = $properties[$i]["value"];
201 //echo "<tt>".$properties[$i]["name"]." = " . $properties[$i]["value"] . "</tt><br>";
202 }
203 return $header;
204 }
205
206 // I did most of the MIME stuff yesterday (June 20, 2000), but I couldn't
207 // figure out how to do this part, so I decided to go to bed. I woke up
208 // in the morning and had a flash of insight. I went to the white-board
209 // and scribbled it out, then spent a bit programming it, and this is the
210 // result. Nothing complicated, but I think my brain was fried yesterday.
211 //
212 // This gets properties in a nested parenthesisized list. For example,
213 // this would get passed something like: ("attachment" ("filename" "luke.tar.gz"))
214 // This returns an array called $props with all paired up properties.
215 // It ignores the "attachment" for now, maybe that should change later
216 // down the road. In this case, what is returned is:
217 // $props[0]["name"] = "filename";
218 // $props[0]["value"] = "luke.tar.gz";
219 function mime_get_props ($props, $structure) {
220 while (strlen($structure) > 0) {
221 $structure = trim($structure);
222 $char = substr($structure, 0, 1);
223
224 if ($char == "\"") {
225 $pos = 1;
226 $char = substr($structure, $pos, 1);
227 while ($char != "\"" && $pos < strlen($structure)) {
228 $tmp .= $char;
229 $pos++;
230 $char = substr($structure, $pos, 1);
231 }
232 $structure = trim(substr($structure, strlen($tmp) + 2));
233 $char = substr($structure, 0, 1);
234
235 if ($char == "\"") {
236 $pos = 1;
237 $char = substr($structure, $pos, 1);
238 while ($char != "\"" && $pos < strlen($structure)) {
239 $value .= $char;
240 $pos++;
241 $char = substr($structure, $pos, 1);
242 }
243 $structure = trim(substr($structure, strlen($tmp) + 2));
244
245 $k = count($props);
246 $props[$k]["name"] = $tmp;
247 $props[$k]["value"] = $value;
248 } else if ($char == "(") {
249 $end = mime_match_parenthesis (0, $structure);
250 $sub = substr($structure, 1, $end-1);
251 $props = mime_get_props($props, $sub);
252 $structure = substr($structure, strlen($sub) + 2);
253 }
254 return $props;
255 } else if ($char == "(") {
256 $end = mime_match_parenthesis (0, $structure);
257 $sub = substr($structure, 1, $end-1);
258 $props = mime_get_props($props, $sub);
259 $structure = substr($structure, strlen($sub) + 2);
260 } else {
261 return $props;
7831268e 262 }
8beafbbc 263 }
264 }
7831268e 265
8beafbbc 266 // Matches parenthesis. It will return the position of the matching
267 // parenthesis in $structure. For instance, if $structure was:
268 // ("text" "plain" ("val1name", "1") nil ... )
269 // x x
270 // then this would return 42 to match up those two.
271 function mime_match_parenthesis ($pos, $structure) {
272 $char = substr($structure, $pos, 1);
273
274 // ignore all extra characters
275 while ($pos < strlen($structure)) {
276 $pos++;
277 $char = substr($structure, $pos, 1);
278 if ($char == ")") {
279 return $pos;
280 } else if ($char == "(") {
281 $pos = mime_match_parenthesis ($pos, $structure);
282 }
d4467150 283 }
8beafbbc 284 }
d4467150 285
8beafbbc 286 function mime_fetch_body ($imap_stream, $id, $ent_id) {
287 // do a bit of error correction. If we couldn't find the entity id, just guess
288 // that it is the first one. That is usually the case anyway.
289 if (!$ent_id) $ent_id = 1;
290
291 fputs ($imap_stream, "a001 FETCH $id BODY[$ent_id]\r\n");
292 $read = sqimap_read_data ($imap_stream, "a001", true, $a, $b);
293 for ($i=1; $i < count($read)-1; $i++) {
294 // This fixes a bug in UW. UW doesn't return what would normall be
295 // expected from the BODY fetch command. It has an extra line at the
296 // end. So if the second from the last line is a ), then remove it.
297 if (trim($read[$i]) == ")" && $i == count($read)-2) {
298 continue;
299 }
300 $text .= $read[$i];
301 }
302 return $text;
d4467150 303 }
304
8beafbbc 305 /* -[ END MIME DECODING ]----------------------------------------------------------- */
d4467150 306
aceb0d5c 307
d4467150 308
8beafbbc 309 /** This is the first function called. It decides if this is a multipart
310 message or if it should be handled as a single entity
4809f489 311 **/
8beafbbc 312 function decodeMime ($body, $header) {
313 global $username, $key, $imapServerAddress, $imapPort;
314 $imap_stream = sqimap_login($username, $key, $imapServerAddress, $imapPort, 0);
315 sqimap_mailbox_select($imap_stream, $header->mailbox);
316
317 return mime_structure ($imap_stream, $header);
318 }
b1dadc61 319
8beafbbc 320 function getEntity ($message, $ent_id) {
321 if ($message) {
322 if ($message->header->entity_id == $ent_id) {
323 return $message;
b1dadc61 324 } else {
8beafbbc 325 for ($i = 0; $message->entities[$i]; $i++) {
326 $msg = getEntity ($message->entities[$i], $ent_id);
327 if ($msg)
328 return $msg;
b1dadc61 329 }
8beafbbc 330 }
331 }
332 }
333
334 function findDisplayEntity ($message) {
335 if ($message) {
336 if ($message->header->type0 == "text") {
337 if ($message->header->type1 == "plain" ||
338 $message->header->type1 == "html") {
339 return $message->header->entity_id;
340 }
341 } else {
342 for ($i=0; $message->entities[$i]; $i++) {
343 return findDisplayEntity($message->entities[$i]);
344 }
345 }
d4467150 346 }
b1dadc61 347 }
8405ee35 348
d068c0ec 349 /** This returns a parsed string called $body. That string can then
350 be displayed as the actual message in the HTML. It contains
351 everything needed, including HTML Tags, Attachments at the
352 bottom, etc.
4809f489 353 **/
a8648d75 354 function formatBody($message, $color, $wrap_at) {
d068c0ec 355 /** this if statement checks for the entity to show as the
356 primary message. To add more of them, just put them in the
357 order that is their priority.
4809f489 358 **/
8beafbbc 359 global $username, $key, $imapServerAddress, $imapPort;
360
361
362 $id = $message->header->id;
363 $urlmailbox = urlencode($message->header->mailbox);
364
365 $imap_stream = sqimap_login($username, $key, $imapServerAddress, $imapPort, 0);
366 sqimap_mailbox_select($imap_stream, $message->header->mailbox);
367
368 $ent_num = findDisplayEntity ($message);
369 $body = mime_fetch_body ($imap_stream, $id, $ent_num);
8405ee35 370
d068c0ec 371 /** If there are other types that shouldn't be formatted, add
372 them here **/
8beafbbc 373 //if ($->type1 != "html") {
17ce8467 374 $body = translateText($body, $wrap_at, $charset);
8beafbbc 375 //}
78509c54 376
9f2215a1 377 $body .= "<BR><SMALL><CENTER><A HREF=\"../src/download.php?absolute_dl=true&passed_id=$id&passed_ent_id=$ent_num&mailbox=$urlmailbox\">". _("Download this as a file") ."</A></CENTER><BR></SMALL>";
7831268e 378
b1dadc61 379 /** Display the ATTACHMENTS: message if there's more than one part **/
8beafbbc 380 if ($message->entities) {
7831268e 381 $body .= "<TABLE WIDTH=100% CELLSPACING=0 CELLPADDING=4 BORDER=0><TR><TD BGCOLOR=\"$color[0]\">";
382 $body .= "<TT><B>ATTACHMENTS:</B></TT>";
383 $body .= "</TD></TR><TR><TD BGCOLOR=\"$color[0]\">";
b1dadc61 384 $num = 0;
385
8beafbbc 386 /** make this recurisve at some point **/
387 $body .= formatAttachments ($message, $ent_num, $message->header->mailbox, $id);
7831268e 388 $body .= "</TD></TR></TABLE>";
8405ee35 389 }
d4467150 390 return $body;
391 }
392
8beafbbc 393 // A recursive function that returns a list of attachments with links
394 // to where to download these attachments
395 function formatAttachments ($message, $ent_id, $mailbox, $id) {
396 if ($message) {
397 if (!$message->entities) {
398 $type0 = strtolower($message->header->type0);
399 $type1 = strtolower($message->header->type1);
400
401 if ($message->header->entity_id != $ent_id) {
402 $filename = $message->header->filename;
403 if (trim($filename) == "") {
404 $display_filename = "untitled-$ent_id";
405 } else {
406 $display_filename = $filename;
407 }
408
409 $urlMailbox = urlencode($mailbox);
410 $ent = urlencode($message->header->entity_id);
411 $body .= "<TT>&nbsp;&nbsp;&nbsp;<A HREF=\"../src/download.php?passed_id=$id&mailbox=$urlMailbox&passed_ent_id=$ent\">" . $display_filename . "</A>&nbsp;&nbsp;<SMALL>(TYPE: $type0/$type1)</SMALL></TT><BR>";
412 $num++;
413 }
414 return $body;
415 } else {
416 for ($i = 0; $i < count($message->entities); $i++) {
417 $body .= formatAttachments ($message->entities[$i], $ent_id, $mailbox, $id);
418 }
419 return $body;
420 }
421 }
422 }
4809f489 423
424
425 /** this function decodes the body depending on the encoding type. **/
d4467150 426 function decodeBody($body, $encoding) {
427 $encoding = strtolower($encoding);
7831268e 428
ef3f274f 429 if ($encoding == "quoted-printable") {
430 $body = quoted_printable_decode($body);
db87f79c 431
ef3f274f 432 while (ereg("=\n", $body))
433 $body = ereg_replace ("=\n", "", $body);
97be2168 434 } else if ($encoding == "base64") {
ef3f274f 435 $body = base64_decode($body);
d4467150 436 }
ef3f274f 437
438 // All other encodings are returned raw.
439 return $body;
aceb0d5c 440 }
a4c2cd49 441
442
443 // This functions decode strings that is encoded according to
444 // RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text).
2e434774 445 function decodeHeader ($string) {
1fd97780 446 if (eregi('=\?([^?]+)\?(q|b)\?([^?]+)\?=',
a4c2cd49 447 $string, $res)) {
1fd97780 448 if (ucfirst($res[2]) == "B") {
449 $replace = base64_decode($res[3]);
a4c2cd49 450 } else {
1fd97780 451 $replace = ereg_replace("_", " ", $res[3]);
a4c2cd49 452 $replace = quoted_printable_decode($replace);
453 }
454
1fd97780 455 $replace = charset_decode ($res[1], $replace);
a4c2cd49 456
457 $string = eregi_replace
1fd97780 458 ('=\?([^?]+)\?(q|b)\?([^?]+)\?=',
a4c2cd49 459 $replace, $string);
2e434774 460 // In case there should be more encoding in the string: recurse
461 return (decodeHeader($string));
a4c2cd49 462 } else
463 return ($string);
464 }
465
c3084273 466 // Encode a string according to RFC 1522 for use in headers if it
467 // contains 8-bit characters
468 function encodeHeader ($string) {
469 global $default_charset;
470
471 // Encode only if the string contains 8-bit characters
472 if (ereg("[\200-\377]", $string)) {
473 $newstring = "=?$default_charset?Q?";
474 $newstring .= str_replace(" ", "_", $string);
475
476 while (ereg("([\200-\377])", $newstring, $regs)) {
477 $replace = $regs[1];
478 $insert = "=" . bin2hex($replace);
479 $newstring = str_replace($replace, $insert, $newstring);
480 }
481
482 $newstring .= "?=";
483
484 return $newstring;
485 }
486
487 return $string;
488 }
489
9f9d7d28 490?>