better parsing of email addresses (also used for processing compose form
[squirrelmail.git] / class / mime / Rfc822Header.class.php
CommitLineData
19d470aa 1<?php
2
3/**
4 * Rfc822Header.class.php
5 *
76911253 6 * Copyright (c) 2003 The SquirrelMail Project Team
19d470aa 7 * Licensed under the GNU GPL. For full terms see the file COPYING.
8 *
9 * This contains functions needed to handle mime messages.
10 *
11 * $Id$
12 */
13
14/*
15 * rdc822_header class
16 * input: header_string or array
17 */
18class Rfc822Header {
19 var $date = '',
20 $subject = '',
21 $from = array(),
22 $sender = '',
23 $reply_to = array(),
24 $to = array(),
25 $cc = array(),
26 $bcc = array(),
27 $in_reply_to = '',
28 $message_id = '',
340d67c2 29 $references = '',
19d470aa 30 $mime = false,
31 $content_type = '',
32 $disposition = '',
33 $xmailer = '',
34 $priority = 3,
35 $dnt = '',
36 $mlist = array(),
37 $more_headers = array(); /* only needed for constructing headers
38 in smtp.php */
39 function parseHeader($hdr) {
40 if (is_array($hdr)) {
41 $hdr = implode('', $hdr);
42 }
43
44 /* First we unfold the header */
45 $hdr = trim(str_replace(array("\r\n\t", "\r\n "),array('', ''), $hdr));
46
47 /* Now we can make a new header array with */
48 /* each element representing a headerline */
49 $hdr = explode("\r\n" , $hdr);
50 foreach ($hdr as $line) {
51 $pos = strpos($line, ':');
52 if ($pos > 0) {
53 $field = substr($line, 0, $pos);
340d67c2 54 if (!strstr($field,' ')) { /* valid field */
55 $value = trim(substr($line, $pos+1));
56 $this->parseField($field, $value);
57 }
19d470aa 58 }
59 }
60 if ($this->content_type == '') {
61 $this->parseContentType('text/plain; charset=us-ascii');
62 }
63 }
64
65 function stripComments($value) {
66 $result = '';
19d470aa 67 $cnt = strlen($value);
68 for ($i = 0; $i < $cnt; ++$i) {
69 switch ($value{$i}) {
70 case '"':
71 $result .= '"';
72 while ((++$i < $cnt) && ($value{$i} != '"')) {
73 if ($value{$i} == '\\') {
74 $result .= '\\';
75 ++$i;
76 }
77 $result .= $value{$i};
78 }
79 $result .= $value{$i};
80 break;
81 case '(':
82 $depth = 1;
83 while (($depth > 0) && (++$i < $cnt)) {
84 switch($value{$i}) {
85 case '\\':
86 ++$i;
87 break;
88 case '(':
89 ++$depth;
90 break;
91 case ')':
92 --$depth;
93 break;
94 default:
95 break;
96 }
97 }
98 break;
99 default:
100 $result .= $value{$i};
101 break;
102 }
103 }
104 return $result;
105 }
106
107 function parseField($field, $value) {
108 $field = strtolower($field);
109 switch($field) {
110 case 'date':
340d67c2 111 $value = $this->stripComments($value);
19d470aa 112 $d = strtr($value, array(' ' => ' '));
113 $d = explode(' ', $d);
114 $this->date = getTimeStamp($d);
115 break;
116 case 'subject':
117 $this->subject = $value;
118 break;
119 case 'from':
120 $this->from = $this->parseAddress($value,true);
121 break;
122 case 'sender':
123 $this->sender = $this->parseAddress($value);
124 break;
125 case 'reply-to':
126 $this->reply_to = $this->parseAddress($value, true);
127 break;
128 case 'to':
129 $this->to = $this->parseAddress($value, true);
130 break;
131 case 'cc':
132 $this->cc = $this->parseAddress($value, true);
133 break;
134 case 'bcc':
135 $this->bcc = $this->parseAddress($value, true);
136 break;
137 case 'in-reply-to':
138 $this->in_reply_to = $value;
139 break;
140 case 'message-id':
340d67c2 141 $value = $this->stripComments($value);
19d470aa 142 $this->message_id = $value;
143 break;
340d67c2 144 case 'references':
145 $value = $this->stripComments($value);
146 $this->references = $value;
147 break;
148 case 'x-confirm-reading-to':
149 case 'return-receipt-to':
19d470aa 150 case 'disposition-notification-to':
340d67c2 151 $value = $this->stripComments($value);
19d470aa 152 $this->dnt = $this->parseAddress($value);
153 break;
154 case 'mime-version':
340d67c2 155 $value = $this->stripComments($value);
19d470aa 156 $value = str_replace(' ', '', $value);
157 $this->mime = ($value == '1.0' ? true : $this->mime);
158 break;
159 case 'content-type':
340d67c2 160 $value = $this->stripComments($value);
19d470aa 161 $this->parseContentType($value);
162 break;
163 case 'content-disposition':
340d67c2 164 $value = $this->stripComments($value);
19d470aa 165 $this->parseDisposition($value);
166 break;
167 case 'user-agent':
168 case 'x-mailer':
340d67c2 169 $this->xmailer = $value;
19d470aa 170 break;
171 case 'x-priority':
172 $this->priority = $value;
173 break;
174 case 'list-post':
340d67c2 175 $value = $this->stripComments($value);
19d470aa 176 $this->mlist('post', $value);
177 break;
178 case 'list-reply':
340d67c2 179 $value = $this->stripComments($value);
19d470aa 180 $this->mlist('reply', $value);
181 break;
182 case 'list-subscribe':
340d67c2 183 $value = $this->stripComments($value);
19d470aa 184 $this->mlist('subscribe', $value);
185 break;
186 case 'list-unsubscribe':
340d67c2 187 $value = $this->stripComments($value);
19d470aa 188 $this->mlist('unsubscribe', $value);
189 break;
190 case 'list-archive':
340d67c2 191 $value = $this->stripComments($value);
19d470aa 192 $this->mlist('archive', $value);
193 break;
194 case 'list-owner':
340d67c2 195 $value = $this->stripComments($value);
19d470aa 196 $this->mlist('owner', $value);
197 break;
198 case 'list-help':
340d67c2 199 $value = $this->stripComments($value);
19d470aa 200 $this->mlist('help', $value);
201 break;
202 case 'list-id':
340d67c2 203 $value = $this->stripComments($value);
19d470aa 204 $this->mlist('id', $value);
205 break;
206 default:
207 break;
208 }
209 }
e74ba378 210 /*
211 * parseAddress: recursive function for parsing address strings and store
212 * them in an address stucture object.
213 * input: $address = string
214 * $ar = boolean (return array instead of only the
215 * first element)
216 * $addr_ar = array with parsed addresses
217 * $group = string
218 * $host = string (default domainname in case of
219 * addresses without a domainname)
220 * $lookup = callback function (for lookup address
221 * strings which are probably nicks
222 * (without @ ) )
223 * output: array with addressstructure objects or only one
224 * address_structure object.
340d67c2 225 * personal name: encoded: =?charset?Q|B?string?=
226 * quoted: "string"
227 * normal: string
228 * email : <mailbox@host>
229 * : mailbox@host
230 * This function is also used for validating addresses returned from compose
231 * That's also the reason that the function became a little bit huge and horrible
232 * Todo: Find a way to clean up this mess a bit (Marc Groot Koerkamp)
e74ba378 233 */
19d470aa 234 function parseAddress
e74ba378 235 ($address, $ar=false, $addr_ar = array(), $group = '', $host='',$lookup=false) {
19d470aa 236 $pos = 0;
340d67c2 237 $name = $addr = $comment = $is_encoded = '';
238 /*
239 * in case of 8 bit addresses some how <SPACE> is represented as
240 * NON BRAKING SPACE
241 * This only happens when we validate addresses from the compose form.
242 *
243 * Note: when other charsets have dificulties with characters
244 * =,;:<>()"<SPACE>
245 * then we should find out the value for those characters ans replace
246 * them by proper ASCII values before we start parsing.
247 *
248 */
249 $address = str_replace("\240",' ',$address);
250
251 $address = trim($address);
19d470aa 252 $j = strlen($address);
340d67c2 253
19d470aa 254 while ($pos < $j) {
340d67c2 255 $char = $address{$pos};
256 switch ($char)
257 {
258 case '=':
259 /* get the encoded personal name */
260 if (preg_match('/^(=\?([^?]*)\?(Q|B)\?([^?]*)\?=)(.*)/Ui',substr($address,$pos),$reg)) {
261 $name .= $reg[1];
262 $pos += strlen($reg[1]);
263 }
264 ++$pos;
265 $addr_start = $pos;
266 $is_encoded = true;
267 break;
268 case '"': /* get the personal name */
269 $start_encoded = $pos;
270 ++$pos;
271 if ($address{$pos} == '"') {
272 ++$pos;
273 } else {
274 $personal_start = $personal_end = $pos;
275 while ($pos < $j) {
276 $personal_end = strpos($address,'"',$pos);
277 if (($personal_end-2)>0 && (substr($address,$personal_end-2,2) === '\\"' ||
278 substr($address,$personal_end-2,2) === '\\\\')) {
279 $pos = $personal_end+1;
280 } else {
281 $name .= substr($address,$personal_start,$personal_end-$personal_start);
282 break;
283 }
284 }
285 if ($personal_end) {
286 $pos = $personal_end+1;
19d470aa 287 } else {
340d67c2 288 $pos = $j;
289 }
290 }
291 $addr_start = $pos;
292 break;
293 case '<': /* get email address */
294 $addr_start = $pos;
295 $addr_end = strpos($address,'>',$addr_start);
296 $addr = substr($address,$addr_start+1,$addr_end-$addr_start-1);
297 if ($addr_end) {
298 $pos = $addr_end+1;
299 } else {
300 $addr = substr($address,$addr_start+1);
301 $pos = $j;
302 }
303 break;
304 case '(': /* rip off comments */
305 $addr_start = $pos;
306 $pos = strpos($address,')');
307 if ($pos !== false) {
308 $comment = substr($address, $addr_start+1,($pos-$addr_start-1));
309 $address_start = substr($address, 0, $addr_start);
310 $address_end = substr($address, $pos + 1);
311 $address = $address_start . $address_end;
312 }
313 $j = strlen($address);
314 $pos = $addr_start + 1;
315 break;
316 case ',': /* we reached a delimiter */
317 if (!$name && !$addr) {
318 $addr = substr($address, 0, $pos);
319 } else if (!$addr) {
320 $addr = trim(substr($address, $addr_start, $pos));
321 } else if ($name == '') {
322 $name = trim(substr($address, 0, $addr_start));
323 }
324 $at = strpos($addr, '@');
325 $addr_structure = new AddressStructure();
326 if (!$name && $comment) $name = $comment;
327 if (!$is_encoded) {
328 $addr_structure->personal = encodeHeader($name);
329 } else {
330 $addr_structure->personal = $name;
331 }
332 $is_encoded = false;
333 $addr_structure->group = $group;
334 if ($at) {
335 $addr_structure->mailbox = substr($addr, 0, $at);
336 $addr_structure->host = substr($addr, $at+1);
337 } else {
338 /* if lookup function */
339 if ($lookup) {
340 $aAddr = call_user_func_array($lookup,array($addr));
341 if (isset($aAddr['email'])) {
342 $at = strpos($aAddr['email'], '@');
343 $addr_structure->mailbox = substr($aAddr['email'], 0, $at);
344 $addr_structure->host = substr($aAddr['email'], $at+1);
345 if (isset($aAddr['name'])) {
346 $addr_structure->personal = $aAddr['name'];
347 } else {
348 $addr_structure->personal = encodeHeader($addr);
19d470aa 349 }
19d470aa 350 }
351 }
340d67c2 352 if (!$addr_structure->mailbox) {
353 $addr_structure->mailbox = trim($addr);
354 if ($host) {
355 $addr_structure->host = $host;
356 }
19d470aa 357 }
340d67c2 358 }
359 $address = trim(substr($address, $pos+1));
360 $j = strlen($address);
361 $pos = 0;
362 $name = '';
363 $addr = '';
364 $addr_ar[] = $addr_structure;
365 break;
366 case ':': /* process the group addresses */
367 /* group marker */
368 $group = substr($address, 0, $pos);
369 $address = substr($address, $pos+1);
370 $result = $this->parseAddress($address, $ar, $addr_ar, $group);
371 $addr_ar = $result[0];
372 $pos = $result[1];
373 $address = substr($address, $pos++);
374 $j = strlen($address);
375 $group = '';
376 break;
377 case ';':
378 if ($group) {
379 $address = substr($address, 0, $pos - 1);
380 }
381 ++$pos;
382 break;
383 case ' ':
384 ++$pos;
385 break;
386 default:
387 /*
388 * this happens in the folowing situations :
389 * 1: unquoted personal name
390 * 2: emailaddress without < and >
391 * 3: unquoted personal name from compose that should be encoded.
392 * if it's a personal name then an emailaddress should follow
393 * the personal name may not have ',' inside it
394 * If it's a emailaddress then the personal name is not set.
395 * we should look for the delimiter ',' or a SPACE
396 */
397 /* check for emailaddress */
398 $i_space = strpos($address,' ',$pos);
399 $i_del = strpos($address,',',$pos);
400 if ($i_space || $i_del) {
401 if ($i_del) {
402 $address_part = substr($address,$pos,$i_del-$pos);
19d470aa 403 } else {
340d67c2 404 $address_part = substr($address,$pos);
19d470aa 405 }
340d67c2 406 if ($i = strpos($address_part,'@')) {
407 /* an email address is following */
408 if (($i+$pos) < $i_space) {
409 $addr_start = $pos;
410 if ($i_space < $i_del && $i_del) {
411 if ($i_space) {
412 $addr = substr($address,$pos,$i_space-$pos);
413 $pos = $i_space;
414 } else {
415 $addr = substr($address,$pos);
416 $pos = $j;
417 }
418 } else {
419 if ($i_del) {
420 $addr = substr($address,$pos,$i_del-$pos);
421 $pos = $i_del;
422 } else {
423 $addr = substr($address,$pos);
424 $pos = $j;
425 }
426 }
427 } else {
428 if ($i_space) {
429 $name .= substr($address,$pos,$i_space-$pos) . ' ';
430 $addr_start = $i_space+1;
431 $pos = $i_space+1;
432 } else {
433 $addr = substr($address,$pos,$i_del-$pos);
434 $addr_start = $pos;
435 if ($i_del) {
436 $pos = $i_del;
437 } else {
438 $pos = $j;
439 }
440 }
441 }
442 } else {
443 /* email address without domain name, could be an alias */
444 $addr_start = $pos;
445 $addr = $address_part;
446 $pos = strlen($address_part) + $pos;
19d470aa 447 }
340d67c2 448 } else {
449 $addr = substr($address,$pos);
450 $addr_start = $pos;
451 $pos = $j;
452 }
453 break;
19d470aa 454 }
455 }
340d67c2 456 if (!$name && !$addr) {
19d470aa 457 $addr = substr($address, 0, $pos);
340d67c2 458 } else if (!$addr) {
459 $addr = trim(substr($address, $addr_start, $pos));
19d470aa 460 } else if ($name == '') {
461 $name = trim(substr($address, 0, $addr_start));
462 }
1465f80c 463 if (!$name && $comment) $name = $comment;
19d470aa 464 $at = strpos($addr, '@');
465 $addr_structure = new AddressStructure();
466 $addr_structure->group = $group;
467 if ($at) {
468 $addr_structure->mailbox = trim(substr($addr, 0, $at));
469 $addr_structure->host = trim(substr($addr, $at+1));
470 } else {
340d67c2 471 /* if lookup function */
472 if ($lookup) {
473 $aAddr = call_user_func_array($lookup,array($addr));
474 if (isset($aAddr['email'])) {
475 $at = strpos($aAddr['email'], '@');
e74ba378 476 $addr_structure->mailbox = substr($aAddr['email'], 0, $at);
477 $addr_structure->host = substr($aAddr['email'], $at+1);
340d67c2 478 if (isset($aAddr['name']) && $aAddr['name']) {
479 $name = $aAddr['name'];
480 } else {
481 $name = $addr;
482 }
483 }
484 }
485 if (!$addr_structure->mailbox) {
e74ba378 486 $addr_structure->mailbox = trim($addr);
340d67c2 487 if ($host) {
488 $addr_structure->host = $host;
489 }
490 }
491 }
492 $name = trim($name);
493 if (!$is_encoded && !$group) {
494 $name = encodeHeader($name);
19d470aa 495 }
496 if ($group && $addr == '') { /* no addresses found in group */
340d67c2 497 $name = $group;
19d470aa 498 $addr_structure->personal = $name;
499 $addr_ar[] = $addr_structure;
085103f0 500 return (array($addr_ar,$pos+1 ));
340d67c2 501 } elseif ($group) {
085103f0 502 $addr_structure->personal = $name;
503 $addr_ar[] = $addr_structure;
340d67c2 504 return (array($addr_ar,$pos+1 ));
19d470aa 505 } else {
506 $addr_structure->personal = $name;
507 if ($name || $addr) {
508 $addr_ar[] = $addr_structure;
509 }
510 }
511 if ($ar) {
512 return ($addr_ar);
513 }
514 return ($addr_ar[0]);
515 }
516
517 function parseContentType($value) {
518 $pos = strpos($value, ';');
519 $props = '';
520 if ($pos > 0) {
521 $type = trim(substr($value, 0, $pos));
522 $props = trim(substr($type, $pos+1));
523 } else {
524 $type = $value;
525 }
526 $content_type = new ContentType($type);
527 if ($props) {
528 $properties = $this->parseProperties($props);
529 if (!isset($properties['charset'])) {
530 $properties['charset'] = 'us-ascii';
531 }
532 $content_type->properties = $this->parseProperties($props);
533 }
534 $this->content_type = $content_type;
535 }
536
537 function parseProperties($value) {
538 $propArray = explode(';', $value);
539 $propResultArray = array();
540 foreach ($propArray as $prop) {
541 $prop = trim($prop);
542 $pos = strpos($prop, '=');
543 if ($pos > 0) {
544 $key = trim(substr($prop, 0, $pos));
545 $val = trim(substr($prop, $pos+1));
546 if ($val{0} == '"') {
547 $val = substr($val, 1, -1);
548 }
549 $propResultArray[$key] = $val;
550 }
551 }
552 return $propResultArray;
553 }
554
555 function parseDisposition($value) {
556 $pos = strpos($value, ';');
557 $props = '';
558 if ($pos > 0) {
559 $name = trim(substr($value, 0, $pos));
fc9269ec 560 $props = trim(substr($value, $pos+1));
19d470aa 561 } else {
562 $name = $value;
563 }
564 $props_a = $this->parseProperties($props);
565 $disp = new Disposition($name);
566 $disp->properties = $props_a;
567 $this->disposition = $disp;
568 }
569
570 function mlist($field, $value) {
571 $res_a = array();
572 $value_a = explode(',', $value);
573 foreach ($value_a as $val) {
574 $val = trim($val);
575 if ($val{0} == '<') {
576 $val = substr($val, 1, -1);
577 }
578 if (substr($val, 0, 7) == 'mailto:') {
579 $res_a['mailto'] = substr($val, 7);
580 } else {
581 $res_a['href'] = $val;
582 }
583 }
584 $this->mlist[$field] = $res_a;
585 }
586
587 /*
588 * function to get the addres strings out of the header.
589 * Arguments: string or array of strings !
590 * example1: header->getAddr_s('to').
591 * example2: header->getAddr_s(array('to', 'cc', 'bcc'))
592 */
2c9ecd11 593 function getAddr_s($arr, $separator = ',',$encoded=false) {
19d470aa 594 $s = '';
595
596 if (is_array($arr)) {
597 foreach($arr as $arg) {
2c9ecd11 598 if ($this->getAddr_s($arg, $separator, $encoded)) {
19d470aa 599 $s .= $separator . $result;
600 }
601 }
602 $s = ($s ? substr($s, 2) : $s);
603 } else {
2c9ecd11 604 $addr = $this->{$arr};
19d470aa 605 if (is_array($addr)) {
606 foreach ($addr as $addr_o) {
607 if (is_object($addr_o)) {
2c9ecd11 608 if ($encoded) {
609 $s .= $addr_o->getEncodedAddress() . $separator;
610 } else {
611 $s .= $addr_o->getAddress() . $separator;
612 }
19d470aa 613 }
614 }
615 $s = substr($s, 0, -strlen($separator));
616 } else {
617 if (is_object($addr)) {
2c9ecd11 618 if ($encoded) {
619 $s .= $addr->getEncodedAddress();
620 } else {
621 $s .= $addr->getAddress();
622 }
19d470aa 623 }
624 }
625 }
626 return $s;
627 }
628
629 function getAddr_a($arg, $excl_arr = array(), $arr = array()) {
630 if (is_array($arg)) {
631 foreach($arg as $argument) {
632 $arr = $this->getAddr_a($argument, $excl_arr, $arr);
633 }
634 } else {
340d67c2 635 $addr = $this->{$arg};
19d470aa 636 if (is_array($addr)) {
637 foreach ($addr as $next_addr) {
638 if (is_object($next_addr)) {
639 if (isset($next_addr->host) && ($next_addr->host != '')) {
640 $email = $next_addr->mailbox . '@' . $next_addr->host;
641 } else {
642 $email = $next_addr->mailbox;
643 }
644 $email = strtolower($email);
645 if ($email && !isset($arr[$email]) && !isset($excl_arr[$email])) {
646 $arr[$email] = $next_addr->personal;
647 }
648 }
649 }
650 } else {
651 if (is_object($addr)) {
652 $email = $addr->mailbox;
653 $email .= (isset($addr->host) ? '@' . $addr->host : '');
654 $email = strtolower($email);
655 if ($email && !isset($arr[$email]) && !isset($excl_arr[$email])) {
656 $arr[$email] = $addr->personal;
657 }
658 }
659 }
660 }
661 return $arr;
662 }
d0719411 663
664 function findAddress($address, $recurs = false) {
340d67c2 665 $result = false;
d0719411 666 if (is_array($address)) {
340d67c2 667 $i=0;
d0719411 668 foreach($address as $argument) {
669 $match = $this->findAddress($argument, true);
340d67c2 670 $last = end($match);
671 if ($match[1]) {
672 return $i;
673 } else {
674 if (count($match[0]) && !$result) {
675 $result = $i;
676 }
677 }
678 ++$i;
679 }
680 } else {
681 if (!is_array($this->cc)) $this->cc = array();
682 $srch_addr = $this->parseAddress($address);
683 $results = array();
684 foreach ($this->to as $to) {
685 if ($to->host == $srch_addr->host) {
686 if ($to->mailbox == $srch_addr->mailbox) {
687 $results[] = $srch_addr;
688 if ($to->personal == $srch_addr->personal) {
689 if ($recurs) {
690 return array($results, true);
691 } else {
692 return true;
693 }
694 }
695 }
696 }
d0719411 697 }
340d67c2 698 foreach ($this->cc as $cc) {
699 if ($cc->host == $srch_addr->host) {
700 if ($cc->mailbox == $srch_addr->mailbox) {
701 $results[] = $srch_addr;
702 if ($cc->personal == $srch_addr->personal) {
703 if ($recurs) {
704 return array($results, true);
705 } else {
706 return true;
707 }
708 }
709 }
710 }
711 }
712 if ($recurs) {
713 return array($results, false);
714 } elseif (count($result)) {
715 return true;
716 } else {
717 return false;
718 }
719 }
1465f80c 720 //exit;
340d67c2 721 return $result;
d0719411 722 }
19d470aa 723
724 function getContentType($type0, $type1) {
725 $type0 = $this->content_type->type0;
726 $type1 = $this->content_type->type1;
727 return $this->content_type->properties;
728 }
729}
730
731?>