Removed orbz.gst-group.co.uk and added sbl.spamhaus.org
[squirrelmail.git] / functions / mime.php
... / ...
CommitLineData
1<?php
2
3/**
4 * mime.php
5 *
6 * Copyright (c) 1999-2002 The SquirrelMail Project Team
7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This contains the functions necessary to detect and decode MIME
10 * messages.
11 *
12 * $Id$
13 */
14
15require_once('../functions/imap.php');
16require_once('../functions/attachment_common.php');
17
18/** Setting up the objects that have the structure for the message **/
19class msg_header {
20 /** msg_header contains generic variables for values that **/
21 /** could be in a header. **/
22
23 var $type0 = '', $type1 = '', $boundary = '', $charset = '',
24 $encoding = '', $size = 0, $to = array(), $from = '', $date = '',
25 $cc = array(), $bcc = array(), $reply_to = '', $subject = '',
26 $id = 0, $mailbox = '', $description = '', $filename = '',
27 $entity_id = 0, $message_id = 0, $name = '', $priority = 3;
28}
29
30class message {
31 /** message is the object that contains messages. It is a recursive
32 object in that through the $entities variable, it can contain
33 more objects of type message. See documentation in mime.txt for
34 a better description of how this works.
35 **/
36 var $header = '';
37 var $entities = array();
38
39 function addEntity ($msg) {
40 $this->entities[] = $msg;
41 }
42}
43
44/* --------------------------------------------------------------------------------- */
45/* MIME DECODING */
46/* --------------------------------------------------------------------------------- */
47
48/* This function gets the structure of a message and stores it in the "message" class.
49 * It will return this object for use with all relevant header information and
50 * fully parsed into the standard "message" object format.
51 */
52function mime_structure ($imap_stream, $header) {
53
54 sqimap_messages_flag ($imap_stream, $header->id, $header->id, 'Seen');
55 $ssid = sqimap_session_id();
56 $lsid = strlen( $ssid );
57 $id = $header->id;
58 fputs ($imap_stream, "$ssid FETCH $id BODYSTRUCTURE\r\n");
59 //
60 // This should use sqimap_read_data instead of reading it itself
61 //
62 $read = fgets ($imap_stream, 10000);
63 $bodystructure = '';
64 while ( substr($read, 0, $lsid) <> $ssid &&
65 !feof( $imap_stream ) ) {
66 $bodystructure .= $read;
67 $read = fgets ($imap_stream, 10000);
68 }
69 $read = $bodystructure;
70
71 // isolate the body structure and remove beginning and end parenthesis
72 $read = trim(substr ($read, strpos(strtolower($read), 'bodystructure') + 13));
73 $read = trim(substr ($read, 0, -1));
74 $end = mime_match_parenthesis(0, $read);
75 while ($end == strlen($read)-1) {
76 $read = trim(substr ($read, 0, -1));
77 $read = trim(substr ($read, 1));
78 $end = mime_match_parenthesis(0, $read);
79 }
80
81 $msg = mime_parse_structure ($read, 0);
82 $msg->header = $header;
83
84 return( $msg );
85}
86
87/* this starts the parsing of a particular structure. It is called recursively,
88 * so it can be passed different structures. It returns an object of type
89 * $message.
90 * First, it checks to see if it is a multipart message. If it is, then it
91 * handles that as it sees is necessary. If it is just a regular entity,
92 * then it parses it and adds the necessary header information (by calling out
93 * to mime_get_elements()
94 */
95function mime_parse_structure ($structure, $ent_id) {
96
97 $msg = new message();
98 if ($structure{0} == '(') {
99 $ent_id = mime_new_element_level($ent_id);
100 $start = $end = -1;
101 do {
102 $start = $end+1;
103 $end = mime_match_parenthesis ($start, $structure);
104
105 $element = substr($structure, $start+1, ($end - $start)-1);
106 $ent_id = mime_increment_id ($ent_id);
107 $newmsg = mime_parse_structure ($element, $ent_id);
108 $msg->addEntity ($newmsg);
109 } while ($structure{$end+1} == '(');
110 } else {
111 // parse the elements
112 $msg = mime_get_element ($structure, $msg, $ent_id);
113 }
114 return $msg;
115}
116
117/* Increments the element ID. An element id can look like any of
118 * the following: 1, 1.2, 4.3.2.4.1, etc. This function increments
119 * the last number of the element id, changing 1.2 to 1.3.
120 */
121function mime_increment_id ($id) {
122
123 if (strpos($id, ".")) {
124 $first = substr($id, 0, strrpos($id, "."));
125 $last = substr($id, strrpos($id, ".")+1);
126 $last++;
127 $new = $first . "." .$last;
128 } else {
129 $new = $id + 1;
130 }
131
132 return $new;
133}
134
135/*
136 * See comment for mime_increment_id().
137 * This adds another level on to the entity_id changing 1.3 to 1.3.0
138 * NOTE: 1.3.0 is not a valid element ID. It MUST be incremented
139 * before it can be used. I left it this way so as not to have
140 * to make a special case if it is the first entity_id. It
141 * always increments it, and that works fine.
142 */
143function mime_new_element_level ($id) {
144
145 if (!$id) {
146 $id = 0;
147 } else {
148 $id = $id . '.0';
149 }
150
151 return( $id );
152}
153
154function mime_get_element (&$structure, $msg, $ent_id) {
155
156 $elem_num = 1;
157 $msg->header = new msg_header();
158 $msg->header->entity_id = $ent_id;
159 $properties = array();
160
161 while (strlen($structure) > 0) {
162 $structure = trim($structure);
163 $char = $structure{0};
164
165 if (strtolower(substr($structure, 0, 3)) == 'nil') {
166 $text = '';
167 $structure = substr($structure, 3);
168 } else if ($char == '"') {
169 // loop through until we find the matching quote, and return that as a string
170 $pos = 1;
171 $text = '';
172 while ( ($char = $structure{$pos} ) <> '"' && $pos < strlen($structure)) {
173 $text .= $char;
174 $pos++;
175 }
176 $structure = substr($structure, strlen($text) + 2);
177 } else if ($char == '(') {
178 // comment me
179 $end = mime_match_parenthesis (0, $structure);
180 $sub = substr($structure, 1, $end-1);
181 $properties = mime_get_props($properties, $sub);
182 $structure = substr($structure, strlen($sub) + 2);
183 } else {
184 // loop through until we find a space or an end parenthesis
185 $pos = 0;
186 $char = $structure{$pos};
187 $text = '';
188 while ($char != ' ' && $char != ')' && $pos < strlen($structure)) {
189 $text .= $char;
190 $pos++;
191 $char = $structure{$pos};
192 }
193 $structure = substr($structure, strlen($text));
194 }
195
196 // This is where all the text parts get put into the header
197 switch ($elem_num) {
198 case 1:
199 $msg->header->type0 = strtolower($text);
200 break;
201 case 2:
202 $msg->header->type1 = strtolower($text);
203 break;
204 case 4: // Id
205 // Invisimail enclose images with <>
206 $msg->header->id = str_replace( '<', '', str_replace( '>', '', $text ) );
207 break;
208 case 5:
209 $msg->header->description = $text;
210 break;
211 case 6:
212 $msg->header->encoding = strtolower($text);
213 break;
214 case 7:
215 $msg->header->size = $text;
216 break;
217 default:
218 if ($msg->header->type0 == 'text' && $elem_num == 8) {
219 // This is a plain text message, so lets get the number of lines
220 // that it contains.
221 $msg->header->num_lines = $text;
222
223 } else if ($msg->header->type0 == 'message' && $msg->header->type1 == 'rfc822' && $elem_num == 8) {
224 // This is an encapsulated message, so lets start all over again and
225 // parse this message adding it on to the existing one.
226 $structure = trim($structure);
227 if ( $structure{0} == '(' ) {
228 $e = mime_match_parenthesis (0, $structure);
229 $structure = substr($structure, 0, $e);
230 $structure = substr($structure, 1);
231 $m = mime_parse_structure($structure, $msg->header->entity_id);
232
233 // the following conditional is there to correct a bug that wasn't
234 // incrementing the entity IDs correctly because of the special case
235 // that message/rfc822 is. This fixes it fine.
236 if (substr($structure, 1, 1) != '(')
237 $m->header->entity_id = mime_increment_id(mime_new_element_level($ent_id));
238
239 // Now we'll go through and reformat the results.
240 if ($m->entities) {
241 for ($i=0; $i < count($m->entities); $i++) {
242 $msg->addEntity($m->entities[$i]);
243 }
244 } else {
245 $msg->addEntity($m);
246 }
247 $structure = "";
248 }
249 }
250 break;
251 }
252 $elem_num++;
253 $text = "";
254 }
255 // loop through the additional properties and put those in the various headers
256 if ($msg->header->type0 != 'message') {
257 for ($i=0; $i < count($properties); $i++) {
258 $msg->header->{$properties[$i]['name']} = $properties[$i]['value'];
259 }
260 }
261
262 return $msg;
263}
264
265/*
266 * I did most of the MIME stuff yesterday (June 20, 2000), but I couldn't
267 * figure out how to do this part, so I decided to go to bed. I woke up
268 * in the morning and had a flash of insight. I went to the white-board
269 * and scribbled it out, then spent a bit programming it, and this is the
270 * result. Nothing complicated, but I think my brain was fried yesterday.
271 * Funny how that happens some times.
272 *
273 * This gets properties in a nested parenthesisized list. For example,
274 * this would get passed something like: ("attachment" ("filename" "luke.tar.gz"))
275 * This returns an array called $props with all paired up properties.
276 * It ignores the "attachment" for now, maybe that should change later
277 * down the road. In this case, what is returned is:
278 * $props[0]["name"] = "filename";
279 * $props[0]["value"] = "luke.tar.gz";
280 */
281function mime_get_props ($props, $structure) {
282
283 while (strlen($structure) > 0) {
284 $structure = trim($structure);
285 $char = $structure{0};
286
287 if ($char == '"') {
288 $pos = 1;
289 $tmp = '';
290 while ( ( $char = $structure{$pos} ) != '"' &&
291 $pos < strlen($structure)) {
292 $tmp .= $char;
293 $pos++;
294 }
295 $structure = trim(substr($structure, strlen($tmp) + 2));
296 $char = $structure{0};
297
298 if ($char == '"') {
299 $pos = 1;
300 $value = '';
301 while ( ( $char = $structure{$pos} ) != '"' &&
302 $pos < strlen($structure) ) {
303 $value .= $char;
304 $pos++;
305 }
306 $structure = trim(substr($structure, strlen($tmp) + 2));
307
308 $k = count($props);
309 $props[$k]['name'] = strtolower($tmp);
310 $props[$k]['value'] = $value;
311 } else if ($char == '(') {
312 $end = mime_match_parenthesis (0, $structure);
313 $sub = substr($structure, 1, $end-1);
314 if (! isset($props))
315 $props = array();
316 $props = mime_get_props($props, $sub);
317 $structure = substr($structure, strlen($sub) + 2);
318 }
319 return $props;
320 } else if ($char == '(') {
321 $end = mime_match_parenthesis (0, $structure);
322 $sub = substr($structure, 1, $end-1);
323 $props = mime_get_props($props, $sub);
324 $structure = substr($structure, strlen($sub) + 2);
325 return $props;
326 } else {
327 return $props;
328 }
329 }
330}
331
332/*
333 * Matches parenthesis. It will return the position of the matching
334 * parenthesis in $structure. For instance, if $structure was:
335 * ("text" "plain" ("val1name", "1") nil ... )
336 * x x
337 * then this would return 42 to match up those two.
338 */
339function mime_match_parenthesis ($pos, $structure) {
340
341 $j = strlen( $structure );
342
343 // ignore all extra characters
344 // If inside of a string, skip string -- Boundary IDs and other
345 // things can have ) in them.
346 if ( $structure{$pos} != '(' ) {
347 return( $j );
348 }
349
350 while ( $pos < $j ) {
351 $pos++;
352 if ($structure{$pos} == ')') {
353 return $pos;
354 } elseif ($structure{$pos} == '"') {
355 $pos++;
356 while ( $structure{$pos} != '"' &&
357 $pos < $j ) {
358 if (substr($structure, $pos, 2) == '\\"') {
359 $pos++;
360 } elseif (substr($structure, $pos, 2) == '\\\\') {
361 $pos++;
362 }
363 $pos++;
364 }
365 } elseif ( $structure{$pos} == '(' ) {
366 $pos = mime_match_parenthesis ($pos, $structure);
367 }
368 }
369 echo _("Error decoding mime structure. Report this as a bug!") . '<br>';
370 return( $pos );
371}
372
373function mime_fetch_body ($imap_stream, $id, $ent_id ) {
374 // do a bit of error correction. If we couldn't find the entity id, just guess
375 // that it is the first one. That is usually the case anyway.
376 if (!$ent_id)
377 $ent_id = 1;
378 $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id]", true, $response, $message);
379 $topline = array_shift($data);
380 while (! ereg('\\* [0-9]+ FETCH ', $topline) && $data)
381 $topline = array_shift($data);
382 $wholemessage = implode('', $data);
383 if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
384 $ret = substr( $wholemessage, 0, $regs[1] );
385 /*
386 There is some information in the content info header that could be important
387 in order to parse html messages. Let's get them here.
388 */
389 if ( $ret{0} == '<' ) {
390 $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message);
391 $base = '';
392 $k = 10;
393 foreach( $data as $d ) {
394 if ( substr( $d, 0, 13 ) == 'Content-Base:' ) {
395 $j = strlen( $d );
396 $i = 13;
397 $base = '';
398 while ( $i < $j &&
399 ( !isNoSep( $d{$i} ) || $d{$i} == '"' ) )
400 $i++;
401 while ( $i < $j ) {
402 if ( isNoSep( $d{$i} ) )
403 $base .= $d{$i};
404 $i++;
405 }
406 $k = 0;
407 } elseif ( $k == 1 && !isnosep( $d{0} ) ) {
408 $base .= substr( $d, 1 );
409 }
410 $k++;
411 }
412 if ( $base <> '' )
413
414 $ret = "<base href=\"$base\">" . $ret;
415 }
416 } else if (ereg('"([^"]*)"', $topline, $regs)) {
417 $ret = $regs[1];
418 } else {
419 global $where, $what, $mailbox, $passed_id, $startMessage;
420 $par = "mailbox=".urlencode($mailbox)."&passed_id=$passed_id";
421 if (isset($where) && isset($what)) {
422 $par .= "&where=".urlencode($where)."&what=".urlencode($what);
423 } else {
424 $par .= "&startMessage=$startMessage&show_more=0";
425 }
426 $par .= '&response='.urlencode($response).'&message='.urlencode($message).
427 '&topline='.urlencode($topline);
428
429 echo '<b><font color=$color[2]>' .
430 _("Body retrieval error. The reason for this is most probably that the message is malformed. Please help us making future versions better by submitting this message to the developers knowledgebase!") .
431 "<A HREF=\"../src/retrievalerror.php?$par\">Submit message</A><BR>" .
432 '<tt>' . _("Response:") . "$response<BR>" .
433 _("Message:") . " $message<BR>" .
434 _("FETCH line:") . " $topline<BR></tt></font></b>";
435
436 $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message);
437 array_shift($data);
438 $wholemessage = implode('', $data);
439
440 $ret = "---------------\n$wholemessage";
441
442 }
443 return( $ret );
444}
445
446function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) {
447 // do a bit of error correction. If we couldn't find the entity id, just guess
448 // that it is the first one. That is usually the case anyway.
449 if (!$ent_id) {
450 $ent_id = 1;
451 }
452 $sid = sqimap_session_id();
453 // Don't kill the connection if the browser is over a dialup
454 // and it would take over 30 seconds to download it.
455
456