Address parsing fixes. This whole addressparsing is driving me nuts and the
[squirrelmail.git] / class / mime / Rfc822Header.class.php
1 <?php
2
3 /**
4 * Rfc822Header.class.php
5 *
6 * Copyright (c) 2003 The SquirrelMail Project Team
7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This contains functions needed to handle mime messages.
10 *
11 * $Id$
12 */
13
14 /*
15 * rdc822_header class
16 * input: header_string or array
17 */
18 class Rfc822Header {
19 var $date = '',
20 $subject = '',
21 $from = array(),
22 $sender = '',
23 $reply_to = array(),
24 $to = array(),
25 $cc = array(),
26 $bcc = array(),
27 $in_reply_to = '',
28 $message_id = '',
29 $references = '',
30 $mime = false,
31 $content_type = '',
32 $disposition = '',
33 $xmailer = '',
34 $priority = 3,
35 $dnt = '',
36 $encoding = '',
37 $mlist = array(),
38 $more_headers = array(); /* only needed for constructing headers
39 in smtp.php */
40 function parseHeader($hdr) {
41 if (is_array($hdr)) {
42 $hdr = implode('', $hdr);
43 }
44 /* First we unfold the header */
45 $hdr = trim(str_replace(array("\r\n\t", "\r\n "),array('', ''), $hdr));
46
47 /* Now we can make a new header array with */
48 /* each element representing a headerline */
49 $hdr = explode("\r\n" , $hdr);
50 foreach ($hdr as $line) {
51 $pos = strpos($line, ':');
52 if ($pos > 0) {
53 $field = substr($line, 0, $pos);
54 if (!strstr($field,' ')) { /* valid field */
55 $value = trim(substr($line, $pos+1));
56 $this->parseField($field, $value);
57 }
58 }
59 }
60 if ($this->content_type == '') {
61 $this->parseContentType('text/plain; charset=us-ascii');
62 }
63 }
64
65 function stripComments($value) {
66 $result = '';
67 $cnt = strlen($value);
68 for ($i = 0; $i < $cnt; ++$i) {
69 switch ($value{$i}) {
70 case '"':
71 $result .= '"';
72 while ((++$i < $cnt) && ($value{$i} != '"')) {
73 if ($value{$i} == '\\') {
74 $result .= '\\';
75 ++$i;
76 }
77 $result .= $value{$i};
78 }
79 $result .= $value{$i};
80 break;
81 case '(':
82 $depth = 1;
83 while (($depth > 0) && (++$i < $cnt)) {
84 switch($value{$i}) {
85 case '\\':
86 ++$i;
87 break;
88 case '(':
89 ++$depth;
90 break;
91 case ')':
92 --$depth;
93 break;
94 default:
95 break;
96 }
97 }
98 break;
99 default:
100 $result .= $value{$i};
101 break;
102 }
103 }
104 return $result;
105 }
106
107 function parseField($field, $value) {
108 $field = strtolower($field);
109 switch($field) {
110 case 'date':
111 $value = $this->stripComments($value);
112 $d = strtr($value, array(' ' => ' '));
113 $d = explode(' ', $d);
114 $this->date = getTimeStamp($d);
115 break;
116 case 'subject':
117 $this->subject = $value;
118 break;
119 case 'from':
120 $this->from = $this->parseAddress($value,true);
121 break;
122 case 'sender':
123 $this->sender = $this->parseAddress($value);
124 break;
125 case 'reply-to':
126 $this->reply_to = $this->parseAddress($value, true);
127 break;
128 case 'to':
129 $this->to = $this->parseAddress($value, true);
130 break;
131 case 'cc':
132 $this->cc = $this->parseAddress($value, true);
133 break;
134 case 'bcc':
135 $this->bcc = $this->parseAddress($value, true);
136 break;
137 case 'in-reply-to':
138 $this->in_reply_to = $value;
139 break;
140 case 'message-id':
141 $value = $this->stripComments($value);
142 $this->message_id = $value;
143 break;
144 case 'references':
145 $value = $this->stripComments($value);
146 $this->references = $value;
147 break;
148 case 'x-confirm-reading-to':
149 case 'return-receipt-to':
150 case 'disposition-notification-to':
151 $value = $this->stripComments($value);
152 $this->dnt = $this->parseAddress($value);
153 break;
154 case 'mime-version':
155 $value = $this->stripComments($value);
156 $value = str_replace(' ', '', $value);
157 $this->mime = ($value == '1.0' ? true : $this->mime);
158 break;
159 case 'content-type':
160 $value = $this->stripComments($value);
161 $this->parseContentType($value);
162 break;
163 case 'content-disposition':
164 $value = $this->stripComments($value);
165 $this->parseDisposition($value);
166 break;
167 case 'user-agent':
168 case 'x-mailer':
169 $this->xmailer = $value;
170 break;
171 case 'x-priority':
172 $this->priority = $value;
173 break;
174 case 'list-post':
175 $value = $this->stripComments($value);
176 $this->mlist('post', $value);
177 break;
178 case 'list-reply':
179 $value = $this->stripComments($value);
180 $this->mlist('reply', $value);
181 break;
182 case 'list-subscribe':
183 $value = $this->stripComments($value);
184 $this->mlist('subscribe', $value);
185 break;
186 case 'list-unsubscribe':
187 $value = $this->stripComments($value);
188 $this->mlist('unsubscribe', $value);
189 break;
190 case 'list-archive':
191 $value = $this->stripComments($value);
192 $this->mlist('archive', $value);
193 break;
194 case 'list-owner':
195 $value = $this->stripComments($value);
196 $this->mlist('owner', $value);
197 break;
198 case 'list-help':
199 $value = $this->stripComments($value);
200 $this->mlist('help', $value);
201 break;
202 case 'list-id':
203 $value = $this->stripComments($value);
204 $this->mlist('id', $value);
205 break;
206 default:
207 break;
208 }
209 }
210 /*
211 * parseAddress: recursive function for parsing address strings and store
212 * them in an address stucture object.
213 * input: $address = string
214 * $ar = boolean (return array instead of only the
215 * first element)
216 * $addr_ar = array with parsed addresses
217 * $group = string
218 * $host = string (default domainname in case of
219 * addresses without a domainname)
220 * $lookup = callback function (for lookup address
221 * strings which are probably nicks
222 * (without @ ) )
223 * output: array with addressstructure objects or only one
224 * address_structure object.
225 * personal name: encoded: =?charset?Q|B?string?=
226 * quoted: "string"
227 * normal: string
228 * email : <mailbox@host>
229 * : mailbox@host
230 * This function is also used for validating addresses returned from compose
231 * That's also the reason that the function became a little bit huge and horrible
232 * Todo: Find a way to clean up this mess a bit (Marc Groot Koerkamp)
233 */
234 function parseAddress
235 ($address, $ar=false, $addr_ar = array(), $group = '', $host='',$lookup=false) {
236 $pos = 0;
237 $name = $addr = $comment = $is_encoded = '';
238 /*
239 * in case of 8 bit addresses some how <SPACE> is represented as
240 * NON BRAKING SPACE
241 * This only happens when we validate addresses from the compose form.
242 *
243 * Note: when other charsets have dificulties with characters
244 * =,;:<>()"<SPACE>
245 * then we should find out the value for those characters ans replace
246 * them by proper ASCII values before we start parsing.
247 *
248 */
249 $address = str_replace("\240",' ',$address);
250
251 $address = trim($address);
252 $j = strlen($address);
253
254 while ($pos < $j) {
255 $char = $address{$pos};
256 switch ($char)
257 {
258 case '=':
259 /* get the encoded personal name */
260 if (preg_match('/^(=\?([^?]*)\?(Q|B)\?([^?]*)\?=)(.*)/Ui',substr($address,$pos),$reg)) {
261 $name .= $reg[1];
262 $pos += strlen($reg[1]);
263 } else {
264 ++$pos;
265 }
266 $addr_start = $pos;
267 $is_encoded = true;
268 break;
269 case '"': /* get the personal name */
270 //$name .= parseString($address,$pos);
271 $start_encoded = $pos;
272 ++$pos;
273 if ($address{$pos} == '"') {
274 ++$pos;
275 } else {
276 $personal_start = $personal_end = $pos;
277 while ($pos < $j) {
278 $personal_end = strpos($address,'"',$pos);
279 if (($personal_end-2)>0 && (substr($address,$personal_end-2,2) === '\\"' ||
280 substr($address,$personal_end-2,2) === '\\\\')) {
281 $pos = $personal_end+1;
282 } else {
283 $name .= substr($address,$personal_start,$personal_end-$personal_start);
284 break;
285 }
286 }
287 if ($personal_end) {
288 $pos = $personal_end+1;
289 } else {
290 $pos = $j;
291 }
292 }
293 $addr_start = $pos;
294 break;
295 case '<': /* get email address */
296 $addr_start = $pos;
297 $addr_end = strpos($address,'>',$addr_start);
298 /* check for missing '>' */
299 if ($addr_end === false) {
300 $addr_end = $j;
301 }
302 $addr = substr($address,$addr_start+1,$addr_end-$addr_start-1);
303 if ($addr_end) {
304 $pos = $addr_end+1;
305 } else {
306 $addr = substr($address,$addr_start+1);
307 $pos = $j;
308 }
309 break;
310 case '(': /* rip off comments */
311 $comment_start = $pos;
312 $pos = strpos($address,')');
313 if ($pos !== false) {
314 $comment = substr($address, $comment_start+1,($pos-$comment_start-1));
315 $address_start = substr($address, 0, $comment_start);
316 $address_end = substr($address, $pos + 1);
317 $address = $address_start . $address_end;
318 }
319 $j = strlen($address);
320 if ($comment_start) {
321 $pos = $comment_start-1;
322 } else {
323 $pos = 0;
324 }
325 break;
326 case ';':
327 if ($group) {
328 $address = substr($address, 0, $pos - 1);
329 ++$pos;
330 break;
331 }
332 case ',': /* we reached a delimiter */
333 if (!$name && !$addr) {
334 $addr = substr($address, 0, $pos);
335 } else if (!$addr) {
336 $addr = trim(substr($address, $addr_start, $pos));
337 } else if ($name == '') {
338 $name = trim(substr($address, 0, $addr_start));
339 }
340 $at = strpos($addr, '@');
341 $addr_structure = new AddressStructure();
342 if (!$name && $comment) $name = $comment;
343 if (!$is_encoded) {
344 $addr_structure->personal = encodeHeader($name);
345 } else {
346 $addr_structure->personal = $name;
347 }
348 $is_encoded = false;
349 $addr_structure->group = $group;
350 $grouplookup = false;
351 if ($at) {
352 $addr_structure->mailbox = substr($addr, 0, $at);
353 $addr_structure->host = substr($addr, $at+1);
354 } else {
355 /* if lookup function */
356 if ($lookup) {
357 $aAddr = call_user_func_array($lookup,array($addr));
358 if (isset($aAddr['email'])) {
359 if (strpos($aAddr['email'],',')) {
360 $grouplookup = true;
361 $addr_ar = $this->parseAddress($aAddr['email'], $ar, $addr_ar, $group, $host,$lookup);
362 } else {
363 $at = strpos($aAddr['email'], '@');
364 $addr_structure->mailbox = substr($aAddr['email'], 0, $at);
365 $addr_structure->host = substr($aAddr['email'], $at+1);
366 if (isset($aAddr['name'])) {
367 $addr_structure->personal = $aAddr['name'];
368 } else {
369 $addr_structure->personal = encodeHeader($addr);
370 }
371 }
372 }
373 }
374 if (!$grouplookup && !$addr_structure->mailbox) {
375 $addr_structure->mailbox = trim($addr);
376 if ($host) {
377 $addr_structure->host = $host;
378 }
379 }
380 }
381 $address = trim(substr($address, $pos+1));
382 $j = strlen($address);
383 $pos = 0;
384 $name = '';
385 $addr = '';
386 if (!$grouplookup) {
387 $addr_ar[] = $addr_structure;
388 }
389 break;
390 case ':': /* process the group addresses */
391 /* group marker */
392 $group = substr($address, 0, $pos);
393 $address = substr($address, $pos+1);
394 $result = $this->parseAddress($address, $ar, $addr_ar, $group, $lookup);
395 $addr_ar = $result[0];
396 $pos = $result[1];
397 $address = substr($address, $pos++);
398 $j = strlen($address);
399 $group = '';
400 break;
401 case ' ':
402 ++$pos;
403 break;
404 default:
405 /*
406 * this happens in the folowing situations :
407 * 1: unquoted personal name
408 * 2: emailaddress without < and >
409 * 3: unquoted personal name from compose that should be encoded.
410 * if it's a personal name then an emailaddress should follow
411 * the personal name may not have ',' inside it
412 * If it's a emailaddress then the personal name is not set.
413 * we should look for the delimiter ',' or a SPACE
414 */
415 /* check for emailaddress */
416
417 /* Blah, this code sucks */
418
419 /* we need an tokenizer !!!!!!!! */
420
421 $i_space = strpos($address,' ',$pos);
422 $i_del = strpos($address,',',$pos);
423 if ($i_space || $i_del) {
424 if ($i_del) { /* extract the stringpart before the delimiter */
425 $address_part = substr($address,$pos,$i_del-$pos);
426 } else { /* extract the stringpart started with pos */
427 $address_part = substr($address,$pos);
428 }
429 if ($i = strpos($address_part,'@')) {
430 /* an email address is following */
431 if (($i+$pos) < $i_space) {
432 $addr_start = $pos;
433 /* multiple addresses are following */
434 if ($i_space < $i_del && $i_del) {
435 /* <space> is present */
436 if ($i_space) {
437 if ($i = strpos($address_part,'<')) {
438 $name .= substr($address_part,0,$i);
439 $pos = $i+$pos;
440 } else {
441 $addr = substr($address,$pos,$i_space-$pos);
442 $pos = $i_space;
443 }
444 } else { /* no <space> $i_space === false */
445 if ($i = strpos($address_part,'<')) {
446 $name .= substr($address_part,0,$i);
447 $pos = $i+$pos;
448 } else {
449 $addr = substr($address,$pos);
450 $pos = $j;
451 }
452 }
453 } else { /* <space> is available in the next address */
454 /* OR no delimiter and <space> */
455 if ($i_del) {
456 /* check for < > addresses */
457 if ($i = strpos($address_part,'<')) {
458 $name .= substr($address_part,0,$i);
459 $pos = $i+$pos;
460 } else {
461 $addr = substr($address,$pos,$i_del-$pos);
462 $pos = $i_del;
463 }
464 /* no delimiter */
465 } else if ($i_space) { /* can never happen ? */
466 if ($i = strpos($address_part,'<')) {
467 $name .= substr($address_part,0,$i);
468 $pos = $i+$pos;
469 } else {
470 $addr = substr($address,$pos,$i_space-$pos);
471 $pos = $i_space+1;
472 }
473 } else { /* can never happen */
474 $addr = substr($address,$pos);
475 $pos = $j;
476 }
477 }
478 } else { /* <space> is located after the user@domain part */
479 /* or no <space> present */
480 if ($i_space) {
481 if ($i = strpos($address_part,'<')) {
482 $name .= substr($address_part,0,$i);
483 $pos = $i+$pos;
484 } else {
485 $name .= substr($address,$pos,$i_space-$pos) . ' ';
486 $addr_start = $i_space+1;
487 $pos = $i_space+1;
488 }
489 } else { /* no <space> */
490 $addr = substr($address,$pos,$i_del-$pos);
491 $addr_start = $pos;
492 if ($i_del) {
493 $pos = $i_del;
494 } else { /* can never happen. REMOVE */
495 $pos = $j;
496 }
497 }
498 }
499 } else {
500 /* email address without domain name, could be an alias */
501 $addr_start = $pos;
502 /* FIXME check for comments */
503 $addr = $address_part;
504 $pos = strlen($address_part) + $pos;
505 }
506 } else {
507 /* check for < > addresses */
508 if ($i = strpos($address,'<')) {
509 $name .= substr($address,$pos,$i-$pos);
510 $pos = $i;
511 } else {
512 /* FIXME check for comments */
513 $addr = substr($address,$pos);
514 $addr_start = $pos;
515 $pos = $j;
516 }
517 }
518 break;
519 }
520 }
521 if (!$name && !$addr) {
522 $addr = substr($address, 0, $pos);
523 } else if (!$addr) {
524 $addr = trim(substr($address, $addr_start, $pos));
525 } else if ($name == '') {
526 $name = trim(substr($address, 0, $addr_start));
527 }
528 if (!$name && $comment) {
529 $name = $comment;
530 } else if ($name && $comment) {
531 $name = $name .' ('.$comment.')';
532 }
533 $at = strpos($addr, '@');
534 $addr_structure = new AddressStructure();
535 $addr_structure->group = $group;
536 if ($at) {
537 $addr_structure->mailbox = trim(substr($addr, 0, $at));
538 $addr_structure->host = trim(substr($addr, $at+1));
539 } else {
540 /* if lookup function */
541 if ($lookup) {
542 $aAddr = call_user_func_array($lookup,array($addr));
543 if (isset($aAddr['email'])) {
544 if (strpos($aAddr['email'],',')) {
545 return $this->parseAddress($aAddr['email'], $ar, $addr_ar, $group, $host,$lookup);
546 } else {
547 $at = strpos($aAddr['email'], '@');
548 $addr_structure->mailbox = substr($aAddr['email'], 0, $at);
549 $addr_structure->host = substr($aAddr['email'], $at+1);
550 if (isset($aAddr['name']) && $aAddr['name']) {
551 $name = $aAddr['name'];
552 } else {
553 $name = $addr;
554 }
555 }
556 }
557 }
558 if (!$addr_structure->mailbox) {
559 $addr_structure->mailbox = trim($addr);
560 if ($host) {
561 $addr_structure->host = $host;
562 }
563 }
564 }
565 $name = trim($name);
566 if (!$is_encoded && !$group) {
567 $name = encodeHeader($name);
568 }
569 if ($group && $addr == '') { /* no addresses found in group */
570 $name = $group;
571 $addr_structure->personal = $name;
572 $addr_ar[] = $addr_structure;
573 return (array($addr_ar,$pos+1 ));
574 } elseif ($group) {
575 $addr_structure->personal = $name;
576 $addr_ar[] = $addr_structure;
577 return (array($addr_ar,$pos+1 ));
578 } else {
579 $addr_structure->personal = $name;
580 if ($name || $addr) {
581 $addr_ar[] = $addr_structure;
582 }
583 }
584 if ($ar) {
585 return ($addr_ar);
586 }
587 return ($addr_ar[0]);
588 }
589
590 function parseContentType($value) {
591 $pos = strpos($value, ';');
592 $props = '';
593 if ($pos > 0) {
594 $type = trim(substr($value, 0, $pos));
595 $props = trim(substr($value, $pos+1));
596 } else {
597 $type = $value;
598 }
599 $content_type = new ContentType($type);
600 if ($props) {
601 $properties = $this->parseProperties($props);
602 if (!isset($properties['charset'])) {
603 $properties['charset'] = 'us-ascii';
604 }
605 $content_type->properties = $this->parseProperties($props);
606 }
607 $this->content_type = $content_type;
608 }
609
610 function parseProperties($value) {
611 $propArray = explode(';', $value);
612 $propResultArray = array();
613 foreach ($propArray as $prop) {
614 $prop = trim($prop);
615 $pos = strpos($prop, '=');
616 if ($pos > 0) {
617 $key = trim(substr($prop, 0, $pos));
618 $val = trim(substr($prop, $pos+1));
619 if ($val{0} == '"') {
620 $val = substr($val, 1, -1);
621 }
622 $propResultArray[$key] = $val;
623 }
624 }
625 return $propResultArray;
626 }
627
628 function parseDisposition($value) {
629 $pos = strpos($value, ';');
630 $props = '';
631 if ($pos > 0) {
632 $name = trim(substr($value, 0, $pos));
633 $props = trim(substr($value, $pos+1));
634 } else {
635 $name = $value;
636 }
637 $props_a = $this->parseProperties($props);
638 $disp = new Disposition($name);
639 $disp->properties = $props_a;
640 $this->disposition = $disp;
641 }
642
643 function mlist($field, $value) {
644 $res_a = array();
645 $value_a = explode(',', $value);
646 foreach ($value_a as $val) {
647 $val = trim($val);
648 if ($val{0} == '<') {
649 $val = substr($val, 1, -1);
650 }
651 if (substr($val, 0, 7) == 'mailto:') {
652 $res_a['mailto'] = substr($val, 7);
653 } else {
654 $res_a['href'] = $val;
655 }
656 }
657 $this->mlist[$field] = $res_a;
658 }
659
660 /*
661 * function to get the addres strings out of the header.
662 * Arguments: string or array of strings !
663 * example1: header->getAddr_s('to').
664 * example2: header->getAddr_s(array('to', 'cc', 'bcc'))
665 */
666 function getAddr_s($arr, $separator = ',',$encoded=false) {
667 $s = '';
668
669 if (is_array($arr)) {
670 foreach($arr as $arg) {
671 if ($this->getAddr_s($arg, $separator, $encoded)) {
672 $s .= $separator . $result;
673 }
674 }
675 $s = ($s ? substr($s, 2) : $s);
676 } else {
677 $addr = $this->{$arr};
678 if (is_array($addr)) {
679 foreach ($addr as $addr_o) {
680 if (is_object($addr_o)) {
681 if ($encoded) {
682 $s .= $addr_o->getEncodedAddress() . $separator;
683 } else {
684 $s .= $addr_o->getAddress() . $separator;
685 }
686 }
687 }
688 $s = substr($s, 0, -strlen($separator));
689 } else {
690 if (is_object($addr)) {
691 if ($encoded) {
692 $s .= $addr->getEncodedAddress();
693 } else {
694 $s .= $addr->getAddress();
695 }
696 }
697 }
698 }
699 return $s;
700 }
701
702 function getAddr_a($arg, $excl_arr = array(), $arr = array()) {
703 if (is_array($arg)) {
704 foreach($arg as $argument) {
705 $arr = $this->getAddr_a($argument, $excl_arr, $arr);
706 }
707 } else {
708 $addr = $this->{$arg};
709 if (is_array($addr)) {
710 foreach ($addr as $next_addr) {
711 if (is_object($next_addr)) {
712 if (isset($next_addr->host) && ($next_addr->host != '')) {
713 $email = $next_addr->mailbox . '@' . $next_addr->host;
714 } else {
715 $email = $next_addr->mailbox;
716 }
717 $email = strtolower($email);
718 if ($email && !isset($arr[$email]) && !isset($excl_arr[$email])) {
719 $arr[$email] = $next_addr->personal;
720 }
721 }
722 }
723 } else {
724 if (is_object($addr)) {
725 $email = $addr->mailbox;
726 $email .= (isset($addr->host) ? '@' . $addr->host : '');
727 $email = strtolower($email);
728 if ($email && !isset($arr[$email]) && !isset($excl_arr[$email])) {
729 $arr[$email] = $addr->personal;
730 }
731 }
732 }
733 }
734 return $arr;
735 }
736
737 function findAddress($address, $recurs = false) {
738 $result = false;
739 if (is_array($address)) {
740 $i=0;
741 foreach($address as $argument) {
742 $match = $this->findAddress($argument, true);
743 $last = end($match);
744 if ($match[1]) {
745 return $i;
746 } else {
747 if (count($match[0]) && !$result) {
748 $result = $i;
749 }
750 }
751 ++$i;
752 }
753 } else {
754 if (!is_array($this->cc)) $this->cc = array();
755 $srch_addr = $this->parseAddress($address);
756 $results = array();
757 foreach ($this->to as $to) {
758 if ($to->host == $srch_addr->host) {
759 if ($to->mailbox == $srch_addr->mailbox) {
760 $results[] = $srch_addr;
761 if ($to->personal == $srch_addr->personal) {
762 if ($recurs) {
763 return array($results, true);
764 } else {
765 return true;
766 }
767 }
768 }
769 }
770 }
771 foreach ($this->cc as $cc) {
772 if ($cc->host == $srch_addr->host) {
773 if ($cc->mailbox == $srch_addr->mailbox) {
774 $results[] = $srch_addr;
775 if ($cc->personal == $srch_addr->personal) {
776 if ($recurs) {
777 return array($results, true);
778 } else {
779 return true;
780 }
781 }
782 }
783 }
784 }
785 if ($recurs) {
786 return array($results, false);
787 } elseif (count($result)) {
788 return true;
789 } else {
790 return false;
791 }
792 }
793 //exit;
794 return $result;
795 }
796
797 function getContentType($type0, $type1) {
798 $type0 = $this->content_type->type0;
799 $type1 = $this->content_type->type1;
800 return $this->content_type->properties;
801 }
802 }
803
804 ?>