Correct disappearing addresses when address book entry has email address without...
[squirrelmail.git] / functions / mime.php
CommitLineData
59177427 1<?php
2ba13803 2
35586184 3/**
8bd0068d 4 * mime.php
5 *
8bd0068d 6 * This contains the functions necessary to detect and decode MIME
7 * messages.
8 *
4b5049de 9 * @copyright &copy; 1999-2007 The SquirrelMail Project Team
4b4abf93 10 * @license http://opensource.org/licenses/gpl-license.php GNU Public License
8bd0068d 11 * @version $Id$
12 * @package squirrelmail
13 */
b74ba498 14
202bcbcc 15/**
16 * dependency information
17 functions dependency
18 mime_structure
19 class/mime/Message.class.php
20 Message::parseStructure
21 functions/page_header.php
22 displayPageHeader
23 functions/display_messages.php
24 plain_error_message
25 mime_fetch_body
26 functions/imap_general.php
27 sqimap_run_command
28 mime_print_body_lines
29
30
31
32functions/imap.php
33functions/attachment_common.php
34functions/display_messages.php
35
36magicHtml => url_parser
37translateText => url_parser
38
39*/
40
8beafbbc 41
7c7b74b3 42/* -------------------------------------------------------------------------- */
43/* MIME DECODING */
44/* -------------------------------------------------------------------------- */
b74ba498 45
d6c32258 46/**
8bd0068d 47 * Get the MIME structure
48 *
49 * This function gets the structure of a message and stores it in the "message" class.
50 * It will return this object for use with all relevant header information and
51 * fully parsed into the standard "message" object format.
52 */
a4a70693 53function mime_structure ($bodystructure, $flags=array()) {
c9d78ab4 54
3d8371be 55 /* Isolate the body structure and remove beginning and end parenthesis. */
a4a70693 56 $read = trim(substr ($bodystructure, strpos(strtolower($bodystructure), 'bodystructure') + 13));
451f74a2 57 $read = trim(substr ($read, 0, -1));
22efa9fb 58 $i = 0;
59 $msg = Message::parseStructure($read,$i);
2b665f28 60
9de42168 61 if (!is_object($msg)) {
3d8371be 62 global $color, $mailbox;
a48eba8f 63 displayPageHeader( $color, $mailbox );
5e8de8b6 64 $errormessage = _("SquirrelMail could not decode the bodystructure of the message");
8bd0068d 65 $errormessage .= '<br />'._("The bodystructure provided by your IMAP server:").'<br /><br />';
472e7acb 66 $errormessage .= '<pre>' . htmlspecialchars($read) . '</pre>';
ce8c6f42 67 plain_error_message( $errormessage );
3d8371be 68 echo '</body></html>';
9de42168 69 exit;
70 }
a4a70693 71 if (count($flags)) {
7a9e9c89 72 foreach ($flags as $flag) {
e1115979 73//FIXME: please document why it is we have to check the first char of the flag but we then go ahead and do a full string comparison anyway. Is this a speed enhancement? If not, let's keep it simple and just compare the full string and forget the switch block.
7a9e9c89 74 $char = strtoupper($flag{1});
75 switch ($char) {
3d8371be 76 case 'S':
77 if (strtolower($flag) == '\\seen') {
78 $msg->is_seen = true;
79 }
80 break;
81 case 'A':
82 if (strtolower($flag) == '\\answered') {
83 $msg->is_answered = true;
84 }
85 break;
86 case 'D':
87 if (strtolower($flag) == '\\deleted') {
88 $msg->is_deleted = true;
89 }
90 break;
91 case 'F':
92 if (strtolower($flag) == '\\flagged') {
93 $msg->is_flagged = true;
94 }
95 break;
96 case 'M':
97 if (strtolower($flag) == '$mdnsent') {
98 $msg->is_mdnsent = true;
99 }
100 break;
101 default:
102 break;
7a9e9c89 103 }
104 }
451f74a2 105 }
7a9e9c89 106 // listEntities($msg);
3d8371be 107 return $msg;
451f74a2 108}
b74ba498 109
22efa9fb 110
111
3d8371be 112/* This starts the parsing of a particular structure. It is called recursively,
8bd0068d 113 * so it can be passed different structures. It returns an object of type
114 * $message.
115 * First, it checks to see if it is a multipart message. If it is, then it
116 * handles that as it sees is necessary. If it is just a regular entity,
117 * then it parses it and adds the necessary header information (by calling out
118 * to mime_get_elements()
119 */
451f74a2 120
4d592352 121function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) {
3d8371be 122 /* Do a bit of error correction. If we couldn't find the entity id, just guess
8bd0068d 123 * that it is the first one. That is usually the case anyway.
124 */
7c7b74b3 125
09a4bde3 126 if (!$ent_id) {
08b7f7cc 127 $cmd = "FETCH $id BODY[]";
1035e159 128 } else {
08b7f7cc 129 $cmd = "FETCH $id BODY[$ent_id]";
09a4bde3 130 }
3d8371be 131
4d592352 132 if ($fetch_size!=0) $cmd .= "<0.$fetch_size>";
da2415c1 133
6201339c 134 $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, TRUE);
77b88425 135 do {
3d8371be 136 $topline = trim(array_shift($data));
137 } while($topline && ($topline[0] == '*') && !preg_match('/\* [0-9]+ FETCH.*/i', $topline)) ;
a4a70693 138
451f74a2 139 $wholemessage = implode('', $data);
140 if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
3d8371be 141 $ret = substr($wholemessage, 0, $regs[1]);
142 /* There is some information in the content info header that could be important
8bd0068d 143 * in order to parse html messages. Let's get them here.
144 */
0600bdf1 145// if ($ret{0} == '<') {
6201339c 146// $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, TRUE);
0600bdf1 147// }
451f74a2 148 } else if (ereg('"([^"]*)"', $topline, $regs)) {
149 $ret = $regs[1];
06bcb9c3 150 } else if ((stristr($topline, 'nil') !== false) && (empty($wholemessage))) {
151 $ret = $wholemessage;
451f74a2 152 } else {
153 global $where, $what, $mailbox, $passed_id, $startMessage;
3d8371be 154 $par = 'mailbox=' . urlencode($mailbox) . '&amp;passed_id=' . $passed_id;
451f74a2 155 if (isset($where) && isset($what)) {
3d8371be 156 $par .= '&amp;where=' . urlencode($where) . '&amp;what=' . urlencode($what);
a3daaaf3 157 } else {
3d8371be 158 $par .= '&amp;startMessage=' . $startMessage . '&amp;show_more=0';
451f74a2 159 }
e5ea9327 160 $par .= '&amp;response=' . urlencode($response) .
8bd0068d 161 '&amp;message=' . urlencode($message) .
162 '&amp;topline=' . urlencode($topline);
a019eeb8 163
8bd0068d 164 echo '<tt><br />' .
02474e43 165 '<table width="80%"><tr>' .
166 '<tr><td colspan="2">' .
167 _("Body retrieval error. The reason for this is most probably that the message is malformed.") .
168 '</td></tr>' .
169 '<tr><td><b>' . _("Command:") . "</td><td>$cmd</td></tr>" .
170 '<tr><td><b>' . _("Response:") . "</td><td>$response</td></tr>" .
171 '<tr><td><b>' . _("Message:") . "</td><td>$message</td></tr>" .
172 '<tr><td><b>' . _("FETCH line:") . "</td><td>$topline</td></tr>" .
173 "</table><br /></tt></font><hr />";
346817d4 174
6201339c 175 $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message, TRUE);
451f74a2 176 array_shift($data);
177 $wholemessage = implode('', $data);
a019eeb8 178
346817d4 179 $ret = $wholemessage;
a3daaaf3 180 }
3d8371be 181 return $ret;
451f74a2 182}
d4467150 183
6cc670de 184function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding, $rStream='php://stdout') {
1035e159 185
3d8371be 186 /* Don't kill the connection if the browser is over a dialup
8bd0068d 187 * and it would take over 30 seconds to download it.
188 * Don't call set_time_limit in safe mode.
189 */
b7206e1d 190
3d8371be 191 if (!ini_get('safe_mode')) {
b7206e1d 192 set_time_limit(0);
193 }
7c7b74b3 194 /* in case of base64 encoded attachments, do not buffer them.
8bd0068d 195 Instead, echo the decoded attachment directly to screen */
7c7b74b3 196 if (strtolower($encoding) == 'base64') {
197 if (!$ent_id) {
7591f143 198 $query = "FETCH $id BODY[]";
7c7b74b3 199 } else {
7591f143 200 $query = "FETCH $id BODY[$ent_id]";
7c7b74b3 201 }
7591f143 202 sqimap_run_command($imap_stream,$query,true,$response,$message,TRUE,'sqimap_base64_decode',$rStream,true);
1d142b8d 203 } else {
7591f143 204 $body = mime_fetch_body ($imap_stream, $id, $ent_id);
205 if (is_resource($rStream)) {
206 fputs($rStream,decodeBody($body,$encoding));
207 } else {
208 echo decodeBody($body, $encoding);
209 }
1d142b8d 210 }
346817d4 211
da2415c1 212 /*
8bd0068d 213 TODO, use the same method for quoted printable.
214 However, I assume that quoted printable attachments aren't that large
215 so the performancegain / memory usage drop will be minimal.
216 If we decide to add that then we need to adapt sqimap_fread because
217 we need to split te result on \n and fread doesn't stop at \n. That
218 means we also should provide $results from sqimap_fread (by ref) to
219 te function and set $no_return to false. The $filter function for
220 quoted printable should handle unsetting of $results.
221 */
da2415c1 222 /*
8bd0068d 223 TODO 2: find out how we write to the output stream php://stdout. fwrite
224 doesn't work because 'php://stdout isn't a stream.
225 */
7c7b74b3 226
5d9c6f73 227 return;
451f74a2 228}
beb9e459 229
451f74a2 230/* -[ END MIME DECODING ]----------------------------------------------------------- */
d4467150 231
3d8371be 232/* This is here for debugging purposes. It will print out a list
8bd0068d 233 * of all the entity IDs that are in the $message object.
234 */
451f74a2 235function listEntities ($message) {
3d8371be 236 if ($message) {
3c621ba1 237 echo "<tt>" . $message->entity_id . ' : ' . $message->type0 . '/' . $message->type1 . ' parent = '. $message->parent->entity_id. '<br />';
3d8371be 238 for ($i = 0; isset($message->entities[$i]); $i++) {
239 echo "$i : ";
240 $msg = listEntities($message->entities[$i]);
241
242 if ($msg) {
243 echo "return: ";
244 return $msg;
245 }
246 }
a4a70693 247 }
451f74a2 248}
f0c4dc12 249
f792c641 250function getPriorityStr($priority) {
3d8371be 251 $priority_level = substr($priority,0,1);
252
253 switch($priority_level) {
254 /* Check for a higher then normal priority. */
255 case '1':
256 case '2':
257 $priority_string = _("High");
258 break;
259
260 /* Check for a lower then normal priority. */
261 case '4':
262 case '5':
263 $priority_string = _("Low");
264 break;
265
266 /* Check for a normal priority. */
267 case '3':
268 default:
269 $priority_level = '3';
270 $priority_string = _("Normal");
271 break;
272
273 }
274 return $priority_string;
f792c641 275}
276
451f74a2 277/* returns a $message object for a particular entity id */
278function getEntity ($message, $ent_id) {
a4a70693 279 return $message->getEntity($ent_id);
451f74a2 280}
8beafbbc 281
3d8371be 282/* translateText
8bd0068d 283 * Extracted from strings.php 23/03/2002
284 */
da4c66e8 285
286function translateText(&$body, $wrap_at, $charset) {
3d8371be 287 global $where, $what; /* from searching */
288 global $color; /* color theme */
da4c66e8 289
202bcbcc 290 // require_once(SM_PATH . 'functions/url_parser.php');
da4c66e8 291
292 $body_ary = explode("\n", $body);
da4c66e8 293 for ($i=0; $i < count($body_ary); $i++) {
294 $line = $body_ary[$i];
295 if (strlen($line) - 2 >= $wrap_at) {
c7aff938 296 sqWordWrap($line, $wrap_at, $charset);
da4c66e8 297 }
298 $line = charset_decode($charset, $line);
299 $line = str_replace("\t", ' ', $line);
300
301 parseUrl ($line);
302
3d8371be 303 $quotes = 0;
da4c66e8 304 $pos = 0;
3d8371be 305 $j = strlen($line);
da4c66e8 306
3d8371be 307 while ($pos < $j) {
da4c66e8 308 if ($line[$pos] == ' ') {
3d8371be 309 $pos++;
da4c66e8 310 } else if (strpos($line, '&gt;', $pos) === $pos) {
311 $pos += 4;
3d8371be 312 $quotes++;
da4c66e8 313 } else {
314 break;
315 }
316 }
3d8371be 317
83c94382 318 if ($quotes % 2) {
d0814c02 319 $line = '<span class="quote1">' . $line . '</span>';
4c25967c 320 } elseif ($quotes) {
d0814c02 321 $line = '<span class="quote2">' . $line . '</span>';
da4c66e8 322 }
3d8371be 323
da4c66e8 324 $body_ary[$i] = $line;
325 }
326 $body = '<pre>' . implode("\n", $body_ary) . '</pre>';
327}
328
a2bfcbce 329/**
da1b55ad 330 * This returns a parsed string called $body. That string can then
331 * be displayed as the actual message in the HTML. It contains
332 * everything needed, including HTML Tags, Attachments at the
333 * bottom, etc.
f8a1ed5a 334 *
da1b55ad 335 * Since 1.2.0 function uses message_body hook.
336 * Till 1.3.0 function included output of formatAttachments().
337 *
338 * @param resource $imap_stream imap connection resource
339 * @param object $message squirrelmail message object
340 * @param array $color squirrelmail color theme array
341 * @param integer $wrap_at number of characters per line
342 * @param string $ent_num (since 1.3.0) message part id
343 * @param integer $id (since 1.3.0) message id
344 * @param string $mailbox (since 1.3.0) imap folder name
41f701c1 345 * @param boolean $clean (since 1.5.1) Do not output stuff that's irrelevant for the printable version.
da1b55ad 346 * @return string html formated message text
347 */
a2bfcbce 348function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $mailbox='INBOX', $clean=FALSE) {
3d8371be 349 /* This if statement checks for the entity to show as the
8bd0068d 350 * primary message. To add more of them, just put them in the
351 * order that is their priority.
352 */
ce68b76b 353 global $startMessage, $languages, $squirrelmail_language,
40a34e57 354 $show_html_default, $sort, $has_unsafe_images, $passed_ent_id,
14c85e39 355 $use_iframe, $iframe_height, $download_and_unsafe_link,
955bfc8f 356 $download_href, $unsafe_image_toggle_href, $unsafe_image_toggle_text,
551c7b53 357 $oTemplate, $nbsp;
2c25d36a 358
359 // workaround for not updated config.php
360 if (! isset($use_iframe)) $use_iframe = false;
77bfbd2e 361
5262d9a6 362 if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) {
363 $view_unsafe_images = false;
77bfbd2e 364 }
d03c24f4 365
cc34b00d 366 $body = '';
23bcec6f 367 $urlmailbox = urlencode($mailbox);
451f74a2 368 $body_message = getEntity($message, $ent_num);
369 if (($body_message->header->type0 == 'text') ||
8bd0068d 370 ($body_message->header->type0 == 'rfc822')) {
3d8371be 371 $body = mime_fetch_body ($imap_stream, $id, $ent_num);
451f74a2 372 $body = decodeBody($body, $body_message->header->encoding);
e842b215 373
374 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
8bd0068d 375 function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode')) {
e842b215 376 if (mb_detect_encoding($body) != 'ASCII') {
33a55f5a 377 $body = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode',$body);
e842b215 378 }
379 }
d849b570 380
381 /* As of 1.5.2, $body is passed (and modified) by reference */
382 do_hook('message_body', $body);
23bcec6f 383
3d8371be 384 /* If there are other types that shouldn't be formatted, add
8bd0068d 385 * them here.
386 */
3d8371be 387
451f74a2 388 if ($body_message->header->type1 == 'html') {
3d8371be 389 if ($show_html_default <> 1) {
85015544 390 $entity_conv = array('&nbsp;' => ' ',
8bd0068d 391 '<p>' => "\n",
392 '<P>' => "\n",
393 '<br>' => "\n",
394 '<BR>' => "\n",
395 '<br />' => "\n",
396 '<BR />' => "\n",
397 '&gt;' => '>',
398 '&lt;' => '<');
85015544 399 $body = strtr($body, $entity_conv);
3d8371be 400 $body = strip_tags($body);
85015544 401 $body = trim($body);
402 translateText($body, $wrap_at,
8bd0068d 403 $body_message->header->getParameter('charset'));
2c25d36a 404 } elseif ($use_iframe && ! $clean) {
405 // $clean is used to remove iframe in printable view.
406
84410f31 407 /**
408 * If we don't add html message between iframe tags,
409 * we must detect unsafe images and modify $has_unsafe_images.
f8a1ed5a 410 */
758a7889 411 $html_body = magicHTML($body, $id, $message, $mailbox);
b6c52e61 412 // Convert character set in order to display html mails in different character set
413 $html_body = charset_decode($body_message->header->getParameter('charset'),$html_body,false,true);
84410f31 414
2c25d36a 415 // creating iframe url
416 $iframeurl=sqm_baseuri().'src/view_html.php?'
f8a1ed5a 417 . 'mailbox=' . $urlmailbox
2c25d36a 418 . '&amp;passed_id=' . $id
419 . '&amp;ent_id=' . $ent_num
420 . '&amp;view_unsafe_images=' . (int) $view_unsafe_images;
421
79d58d4c 422 global $oTemplate;
423 $oTemplate->assign('iframe_url', $iframeurl);
424 $oTemplate->assign('html_body', $html_body);
2b665f28 425
79d58d4c 426 $body = $oTemplate->fetch('read_html_iframe.tpl');
a3daaaf3 427 } else {
2c25d36a 428 // old way of html rendering
b6c52e61 429 /**
758a7889 430 * convert character set. charset_decode does not remove html special chars
b6c52e61 431 * applied by magicHTML functions and does not sanitize them second time if
758a7889 432 * fourth argument is true.
433 */
567dc524 434 $charset = $body_message->header->getParameter('charset');
435 if (!empty($charset)) {
436 $body = charset_decode($charset,$body,false,true);
437 }
438 $body = magicHTML($body, $id, $message, $mailbox);
a3daaaf3 439 }
451f74a2 440 } else {
3d8371be 441 translateText($body, $wrap_at,
8bd0068d 442 $body_message->header->getParameter('charset'));
451f74a2 443 }
a2bfcbce 444
445 // if this is the clean display (i.e. printer friendly), stop here.
446 if ( $clean ) {
447 return $body;
448 }
449
40a34e57 450 $download_and_unsafe_link = '';
451
83cf04bd 452 $link = 'passed_id=' . $id . '&amp;ent_id='.$ent_num.
8bd0068d 453 '&amp;mailbox=' . $urlmailbox .'&amp;sort=' . $sort .
454 '&amp;startMessage=' . $startMessage . '&amp;show_more=0';
08b7f7cc 455 if (isset($passed_ent_id)) {
456 $link .= '&amp;passed_ent_id='.$passed_ent_id;
457 }
14c85e39 458 $download_href = SM_PATH . 'src/download.php?absolute_dl=true&amp;' . $link;
955bfc8f 459 $download_and_unsafe_link .= "$nbsp|$nbsp"
460 . create_hyperlink($download_href, _("Download this as a file"));
7aad7b77 461 if ($view_unsafe_images) {
23f617b8 462 $text = _("Hide Unsafe Images");
7aad7b77 463 } else {
08b7f7cc 464 if (isset($has_unsafe_images) && $has_unsafe_images) {
465 $link .= '&amp;view_unsafe_images=1';
466 $text = _("View Unsafe Images");
467 } else {
468 $text = '';
469 }
3d8371be 470 }
83cf04bd 471 if($text != '') {
14c85e39 472 $unsafe_image_toggle_href = SM_PATH . 'src/read_body.php?'.$link;
473 $unsafe_image_toggle_text = $text;
955bfc8f 474 $download_and_unsafe_link .= "$nbsp|$nbsp"
475 . create_hyperlink($unsafe_image_toggle_href, $text);
83cf04bd 476 }
3d8371be 477 }
478 return $body;
451f74a2 479}
b74ba498 480
da1b55ad 481/**
d67f519a 482 * Generate attachments array for passing to templates. Separated from
483 * formatAttachments() below so that the same array can be given to the
484 * print-friendly version.
2b665f28 485 *
d67f519a 486 * @since 1.5.2
da1b55ad 487 * @param object $message SquirrelMail message object
488 * @param array $exclude_id message parts that are not attachments.
489 * @param string $mailbox mailbox name
490 * @param integer $id message id
da1b55ad 491 */
d67f519a 492function buildAttachmentArray($message, $exclude_id, $mailbox, $id) {
493 global $where, $what, $startMessage, $color, $passed_ent_id, $base_uri;
451f74a2 494
23bcec6f 495 $att_ar = $message->getAttachments($exclude_id);
23bcec6f 496 $urlMailbox = urlencode($mailbox);
497
d67f519a 498 $attachments = array();
23bcec6f 499 foreach ($att_ar as $att) {
fdc9d9b5 500 $ent = $att->entity_id;
2e25760a 501 $header = $att->header;
f0c4dc12 502 $type0 = strtolower($header->type0);
503 $type1 = strtolower($header->type1);
2e25760a 504 $name = '';
d0187bd6 505 $links = array();
21dab2dc 506 $links['download link']['text'] = _("Download");
202bcbcc 507 $links['download link']['href'] = $base_uri .
8bd0068d 508 "src/download.php?absolute_dl=true&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;ent_id=$ent";
2b665f28 509
2e25760a 510 if ($type0 =='message' && $type1 == 'rfc822') {
202bcbcc 511 $default_page = $base_uri . 'src/read_body.php';
2e25760a 512 $rfc822_header = $att->rfc822_header;
098ea084 513 $filename = $rfc822_header->subject;
6cc08d8b 514 if (trim( $filename ) == '') {
515 $filename = 'untitled-[' . $ent . ']' ;
08b7f7cc 516 }
2e25760a 517 $from_o = $rfc822_header->from;
518 if (is_object($from_o)) {
04ea844e 519 $from_name = decodeHeader($from_o->getAddress(false));
7e697748 520 } elseif (is_array($from_o) && count($from_o) && is_object($from_o[0])) {
521 // something weird happens when a digest message is opened and you return to the digest
522 // now the from object is part of an array. Probably the parseHeader call overwrites the info
523 // retrieved from the bodystructure in a different way. We need to fix this later.
524 // possible starting point, do not fetch header we already have and inspect how
525 // the rfc822_header object behaves.
526 $from_name = decodeHeader($from_o[0]->getAddress(false));
2e25760a 527 } else {
528 $from_name = _("Unknown sender");
f0c4dc12 529 }
d0187bd6 530 $description = _("From").': '.$from_name;
23bcec6f 531 } else {
202bcbcc 532 $default_page = $base_uri . 'src/download.php';
02474e43 533 $filename = $att->getFilename();
f810c0b2 534 if ($header->description) {
098ea084 535 $description = decodeHeader($header->description);
f810c0b2 536 } else {
3d8371be 537 $description = '';
538 }
2e25760a 539 }
540
541 $display_filename = $filename;
3d8371be 542 if (isset($passed_ent_id)) {
543 $passed_ent_id_link = '&amp;passed_ent_id='.$passed_ent_id;
544 } else {
545 $passed_ent_id_link = '';
546 }
547 $defaultlink = $default_page . "?startMessage=$startMessage"
8bd0068d 548 . "&amp;passed_id=$id&amp;mailbox=$urlMailbox"
549 . '&amp;ent_id='.$ent.$passed_ent_id_link;
2e25760a 550 if ($where && $what) {
8bd0068d 551 $defaultlink .= '&amp;where='. urlencode($where).'&amp;what='.urlencode($what);
2e25760a 552 }
7e2ff844 553 // IE does make use of mime content sniffing. Forcing a download
554 // prohibit execution of XSS inside an application/octet-stream attachment
555 if ($type0 == 'application' && $type1 == 'octet-stream') {
556 $defaultlink .= '&amp;absolute_dl=true';
557 }
2b665f28 558
3d8371be 559 /* This executes the attachment hook with a specific MIME-type.
8bd0068d 560 * If that doesn't have results, it tries if there's a rule
9b94c54d 561 * for a more generic type. Finally, a hook for ALL attachment
562 * types is run as well.
8bd0068d 563 */
8dca4d22 564 // First remember the default link.
565 $defaultlink_orig = $defaultlink;
566
d849b570 567 /* The API for this hook has changed as of 1.5.2 so that all plugin
568 arguments are passed in an array instead of each their own plugin
569 argument, and arguments are passed by reference, so instead of
570 returning any changes, changes should simply be made to the original
571 arguments themselves. */
9c3b2d22 572 $temp = array(&$links, &$startMessage, &$id, &$urlMailbox, &$ent,
573 &$defaultlink, &$display_filename, &$where, &$what);
574 do_hook("attachment $type0/$type1", $temp);
8dca4d22 575 if(count($links) <= 1 && $defaultlink == $defaultlink_orig) {
d849b570 576 /* The API for this hook has changed as of 1.5.2 so that all plugin
577 arguments are passed in an array instead of each their own plugin
578 argument, and arguments are passed by reference, so instead of
579 returning any changes, changes should simply be made to the original
580 arguments themselves. */
9c3b2d22 581 $temp = array(&$links, &$startMessage, &$id, &$urlMailbox, &$ent,
582 &$defaultlink, &$display_filename, &$where, &$what);
583 do_hook("attachment $type0/*", $temp);
2e25760a 584 }
d849b570 585 /* The API for this hook has changed as of 1.5.2 so that all plugin
586 arguments are passed in an array instead of each their own plugin
587 argument, and arguments are passed by reference, so instead of
588 returning any changes, changes should simply be made to the original
589 arguments themselves. */
9c3b2d22 590 $temp = array(&$links, &$startMessage, &$id, &$urlMailbox, &$ent,
591 &$defaultlink, &$display_filename, &$where, &$what);
8dca4d22 592 // Do not let a generic plugin change the default link if a more
593 // specialized one already did it...
594 if ($defaultlink != $defaultlink_orig) {
595 $dummy = '';
596 $temp[5] = &$dummy;
597 }
9c3b2d22 598 do_hook("attachment */*", $temp);
77b88425 599
d67f519a 600 $this_attachment = array();
601 $this_attachment['Name'] = decodeHeader($display_filename);
602 $this_attachment['Description'] = $description;
603 $this_attachment['DefaultHREF'] = $defaultlink;
604 $this_attachment['DownloadHREF'] = $links['download link']['href'];
605 $this_attachment['ViewHREF'] = isset($links['attachment_common']) ? $links['attachment_common']['href'] : '';
606 $this_attachment['Size'] = $header->size;
607 $this_attachment['ContentType'] = htmlspecialchars($type0 .'/'. $type1);
608 $this_attachment['OtherLinks'] = array();
3d8371be 609 foreach ($links as $val) {
d0187bd6 610 if ($val['text']==_("Download") || $val['text'] == _("View"))
611 continue;
612 if (empty($val['text']) && empty($val['extra']))
613 continue;
2b665f28 614
d67f519a 615 $temp = array();
616 $temp['HREF'] = $val['href'];
617 $temp['Text'] = (empty($val['text']) ? '' : $val['text']) . (empty($val['extra']) ? '' : $val['extra']);
618 $this_attachment['OtherLinks'][] = $temp;
2e25760a 619 }
d67f519a 620 $attachments[] = $this_attachment;
2b665f28 621
3d8371be 622 unset($links);
2e25760a 623 }
2b665f28 624
d67f519a 625 return $attachments;
626}
627
628/**
629 * Displays attachment links and information
630 *
631 * Since 1.3.0 function is not included in formatBody() call.
632 *
633 * Since 1.0.2 uses attachment $type0/$type1 hook.
634 * Since 1.2.5 uses attachment $type0/* hook.
635 * Since 1.5.0 uses attachments_bottom hook.
636 * Since 1.5.2 uses templates and does *not* return a value.
637 *
638 * @param object $message SquirrelMail message object
639 * @param array $exclude_id message parts that are not attachments.
640 * @param string $mailbox mailbox name
641 * @param integer $id message id
642 */
643function formatAttachments($message, $exclude_id, $mailbox, $id) {
644 global $oTemplate;
2b665f28 645
d67f519a 646 $attach = buildAttachmentArray($message, $exclude_id, $mailbox, $id);
d0187bd6 647
648 $oTemplate->assign('attachments', $attach);
649 $oTemplate->display('read_attachments.tpl');
451f74a2 650}
b74ba498 651
7c7b74b3 652function sqimap_base64_decode(&$string) {
7c0ec1d8 653
b17a8968 654 // Base64 encoded data goes in pairs of 4 bytes. To achieve on the
7c0ec1d8 655 // fly decoding (to reduce memory usage) you have to check if the
656 // data has incomplete pairs
657
b17a8968 658 // Remove the noise in order to check if the 4 bytes pairs are complete
7c0ec1d8 659 $string = str_replace(array("\r\n","\n", "\r", " "),array('','','',''),$string);
660
42ce44f8 661 $sStringRem = '';
7c0ec1d8 662 $iMod = strlen($string) % 4;
663 if ($iMod) {
664 $sStringRem = substr($string,-$iMod);
b17a8968 665 // Check if $sStringRem contains padding characters
7c0ec1d8 666 if (substr($sStringRem,-1) != '=') {
667 $string = substr($string,0,-$iMod);
668 } else {
669 $sStringRem = '';
670 }
671 }
7c7b74b3 672 $string = base64_decode($string);
7c0ec1d8 673 return $sStringRem;
7c7b74b3 674}
675
fdf7cef1 676/**
677 * Decodes encoded message body
678 *
679 * This function decodes the body depending on the encoding type.
680 * Currently quoted-printable and base64 encodings are supported.
681 * decode_body hook was added to this function in 1.4.2/1.5.0
682 * @param string $body encoded message body
683 * @param string $encoding used encoding
684 * @return string decoded string
685 * @since 1.0
686 */
451f74a2 687function decodeBody($body, $encoding) {
83be314a 688
b583c3e8 689 $body = str_replace("\r\n", "\n", $body);
690 $encoding = strtolower($encoding);
3d8371be 691
d849b570 692 $encoding_handler = do_hook('decode_body', $encoding);
5166f86a 693
694
695 // plugins get first shot at decoding the body
696 //
697 if (!empty($encoding_handler) && function_exists($encoding_handler)) {
698 $body = $encoding_handler('decode', $body);
699
fdf7cef1 700 } elseif ($encoding == 'quoted-printable' ||
8bd0068d 701 $encoding == 'quoted_printable') {
fdf7cef1 702 /**
703 * quoted_printable_decode() function is broken in older
704 * php versions. Text with \r\n decoding was fixed only
758a7889 705 * in php 4.3.0. Minimal code requirement 4.0.4 +
fdf7cef1 706 * str_replace("\r\n", "\n", $body); call.
707 */
b583c3e8 708 $body = quoted_printable_decode($body);
fdf7cef1 709 } elseif ($encoding == 'base64') {
b583c3e8 710 $body = base64_decode($body);
711 }
3d8371be 712
b583c3e8 713 // All other encodings are returned raw.
3d8371be 714 return $body;
451f74a2 715}
716
9f7f68c3 717/**
8bd0068d 718 * Decodes headers
719 *
e1115979 720 * This function decodes strings that are encoded according to
8bd0068d 721 * RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text).
722 * Patched by Christian Schmidt <christian@ostenfeld.dk> 23/03/2002
723 *
724 * @param string $string header string that has to be made readable
725 * @param boolean $utfencode change message in order to be readable on user's charset. defaults to true
726 * @param boolean $htmlsave preserve spaces and sanitize html special characters. defaults to true
727 * @param boolean $decide decide if string can be utfencoded. defaults to false
728 * @return string decoded header string
729 */
9f7f68c3 730function decodeHeader ($string, $utfencode=true,$htmlsave=true,$decide=false) {
d6f584fc 731 global $languages, $squirrelmail_language,$default_charset;
79e07c7e 732 if (is_array($string)) {
733 $string = implode("\n", $string);
734 }
da2415c1 735
10dec454 736 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
8bd0068d 737 function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_decodeheader')) {
33a55f5a 738 $string = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_decodeheader', $string);
08b7f7cc 739 // Do we need to return at this point?
740 // return $string;
83be314a 741 }
79e07c7e 742 $i = 0;
08b7f7cc 743 $iLastMatch = -2;
db65b6b0 744 $encoded = true;
0a06275a 745
098ea084 746 $aString = explode(' ',$string);
08b7f7cc 747 $ret = '';
098ea084 748 foreach ($aString as $chunk) {
358a78a1 749 if ($encoded && $chunk === '') {
08b7f7cc 750 continue;
358a78a1 751 } elseif ($chunk === '') {
08b7f7cc 752 $ret .= ' ';
753 continue;
754 }
098ea084 755 $encoded = false;
08b7f7cc 756 /* if encoded words are not separated by a linear-space-white we still catch them */
757 $j = $i-1;
7e6ca3e8 758
08b7f7cc 759 while ($match = preg_match('/^(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=(.*)$/Ui',$chunk,$res)) {
760 /* if the last chunk isn't an encoded string then put back the space, otherwise don't */
761 if ($iLastMatch !== $j) {
762 if ($htmlsave) {
9f7f68c3 763 $ret .= '&#32;';
08b7f7cc 764 } else {
765 $ret .= ' ';
766 }
767 }
768 $iLastMatch = $i;
769 $j = $i;
cb718de0 770 if ($htmlsave) {
771 $ret .= htmlspecialchars($res[1]);
772 } else {
773 $ret .= $res[1];
774 }
098ea084 775 $encoding = ucfirst($res[3]);
d6f584fc 776
777 /* decide about valid decoding */
778 if ($decide && is_conversion_safe($res[2])) {
8bd0068d 779 $utfencode=true;
780 $can_be_encoded=true;
d6f584fc 781 } else {
8bd0068d 782 $can_be_encoded=false;
d6f584fc 783 }
098ea084 784 switch ($encoding)
785 {
8bd0068d 786 case 'B':
787 $replace = base64_decode($res[4]);
788 if ($utfencode) {
789 if ($can_be_encoded) {
790 /* convert string to different charset,
791 * if functions asks for it (usually in compose)
792 */
bc6c0fba 793 $ret .= charset_convert($res[2],$replace,$default_charset,$htmlsave);
8bd0068d 794 } else {
795 // convert string to html codes in order to display it
796 $ret .= charset_decode($res[2],$replace);
797 }
fab65ca9 798 } else {
8bd0068d 799 if ($htmlsave) {
800 $replace = htmlspecialchars($replace);
801 }
802 $ret.= $replace;
fab65ca9 803 }
8bd0068d 804 break;
805 case 'Q':
806 $replace = str_replace('_', ' ', $res[4]);
807 $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))',
808 $replace);
809 if ($utfencode) {
810 if ($can_be_encoded) {
811 /* convert string to different charset,
812 * if functions asks for it (usually in compose)
813 */
bc6c0fba 814 $replace = charset_convert($res[2], $replace,$default_charset,$htmlsave);
8bd0068d 815 } else {
816 // convert string to html codes in order to display it
817 $replace = charset_decode($res[2], $replace);
818 }
819 } else {
820 if ($htmlsave) {
821 $replace = htmlspecialchars($replace);
822 }
098ea084 823 }
8bd0068d 824 $ret .= $replace;
825 break;
826 default:
827 break;
79e07c7e 828 }
098ea084 829 $chunk = $res[5];
830 $encoded = true;
08b7f7cc 831 }
832 if (!$encoded) {
833 if ($htmlsave) {
9f7f68c3 834 $ret .= '&#32;';
08b7f7cc 835 } else {
836 $ret .= ' ';
da2415c1 837 }
08b7f7cc 838 }
dc3d13a7 839
840 if (!$encoded && $htmlsave) {
841 $ret .= htmlspecialchars($chunk);
842 } else {
843 $ret .= $chunk;
844 }
098ea084 845 ++$i;
846 }
fd81e884 847 /* remove the first added space */
848 if ($ret) {
849 if ($htmlsave) {
9f7f68c3 850 $ret = substr($ret,5);
fd81e884 851 } else {
852 $ret = substr($ret,1);
853 }
854 }
da2415c1 855
08b7f7cc 856 return $ret;
451f74a2 857}
858
9f7f68c3 859/**
a24cf710 860 * Encodes header
8bd0068d 861 *
a24cf710 862 * Function uses XTRA_CODE _encodeheader function, if such function exists.
a24cf710 863 *
758a7889 864 * Function uses Q encoding by default and encodes a string according to RFC
865 * 1522 for use in headers if it contains 8-bit characters or anything that
a24cf710 866 * looks like it should be encoded.
8bd0068d 867 *
758a7889 868 * Function switches to B encoding and encodeHeaderBase64() function, if
869 * string is 8bit and multibyte character set supported by mbstring extension
870 * is used. It can cause E_USER_NOTICE errors, if interface is used with
f270a6eb 871 * multibyte character set unsupported by mbstring extension.
872 *
8bd0068d 873 * @param string $string header string, that has to be encoded
874 * @return string quoted-printable encoded string
f270a6eb 875 * @todo make $mb_charsets system wide constant
8bd0068d 876 */
451f74a2 877function encodeHeader ($string) {
6fbd125b 878 global $default_charset, $languages, $squirrelmail_language;
83be314a 879
10dec454 880 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
8bd0068d 881 function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_encodeheader')) {
33a55f5a 882 return call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_encodeheader', $string);
83be314a 883 }
793cc001 884
a24cf710 885 // Use B encoding for multibyte charsets
f270a6eb 886 $mb_charsets = array('utf-8','big5','gb2313','euc-kr');
887 if (in_array($default_charset,$mb_charsets) &&
888 in_array($default_charset,sq_mb_list_encodings()) &&
889 sq_is8bit($string)) {
890 return encodeHeaderBase64($string,$default_charset);
891 } elseif (in_array($default_charset,$mb_charsets) &&
892 sq_is8bit($string) &&
893 ! in_array($default_charset,sq_mb_list_encodings())) {
894 // Add E_USER_NOTICE error here (can cause 'Cannot add header information' warning in compose.php)
895 // trigger_error('encodeHeader: Multibyte character set unsupported by mbstring extension.',E_USER_NOTICE);
896 }
a24cf710 897
451f74a2 898 // Encode only if the string contains 8-bit characters or =?
3d8371be 899 $j = strlen($string);
098ea084 900 $max_l = 75 - strlen($default_charset) - 7;
901 $aRet = array();
451f74a2 902 $ret = '';
c96c32f4 903 $iEncStart = $enc_init = false;
0d53f0f9 904 $cur_l = $iOffset = 0;
3d8371be 905 for($i = 0; $i < $j; ++$i) {
c96c32f4 906 switch($string{$i})
907 {
8bd0068d 908 case '=':
909 case '<':
910 case '>':
911 case ',':
912 case '?':
913 case '_':
914 if ($iEncStart === false) {
915 $iEncStart = $i;
916 }
917 $cur_l+=3;
918 if ($cur_l > ($max_l-2)) {
919 /* if there is an stringpart that doesn't need encoding, add it */
08b7f7cc 920 $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
c96c32f4 921 $aRet[] = "=?$default_charset?Q?$ret?=";
922 $iOffset = $i;
923 $cur_l = 0;
924 $ret = '';
925 $iEncStart = false;
08b7f7cc 926 } else {
8bd0068d 927 $ret .= sprintf("=%02X",ord($string{$i}));
c96c32f4 928 }
8bd0068d 929 break;
930 case '(':
931 case ')':
932 if ($iEncStart !== false) {
08b7f7cc 933 $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
8bd0068d 934 $aRet[] = "=?$default_charset?Q?$ret?=";
c96c32f4 935 $iOffset = $i;
8bd0068d 936 $cur_l = 0;
937 $ret = '';
938 $iEncStart = false;
c96c32f4 939 }
8bd0068d 940 break;
941 case ' ':
c96c32f4 942 if ($iEncStart !== false) {
098ea084 943 $cur_l++;
944 if ($cur_l > $max_l) {
08b7f7cc 945 $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
c96c32f4 946 $aRet[] = "=?$default_charset?Q?$ret?=";
c96c32f4 947 $iOffset = $i;
948 $cur_l = 0;
098ea084 949 $ret = '';
8bd0068d 950 $iEncStart = false;
08b7f7cc 951 } else {
8bd0068d 952 $ret .= '_';
c96c32f4 953 }
3d8371be 954 }
8bd0068d 955 break;
956 default:
957 $k = ord($string{$i});
958 if ($k > 126) {
959 if ($iEncStart === false) {
960 // do not start encoding in the middle of a string, also take the rest of the word.
961 $sLeadString = substr($string,0,$i);
962 $aLeadString = explode(' ',$sLeadString);
963 $sToBeEncoded = array_pop($aLeadString);
964 $iEncStart = $i - strlen($sToBeEncoded);
965 $ret .= $sToBeEncoded;
966 $cur_l += strlen($sToBeEncoded);
967 }
968 $cur_l += 3;
969 /* first we add the encoded string that reached it's max size */
970 if ($cur_l > ($max_l-2)) {
971 $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
972 $aRet[] = "=?$default_charset?Q?$ret?= "; /* the next part is also encoded => separate by space */
973 $cur_l = 3;
974 $ret = '';
975 $iOffset = $i;
976 $iEncStart = $i;
977 }
978 $enc_init = true;
979 $ret .= sprintf("=%02X", $k);
980 } else {
981 if ($iEncStart !== false) {
982 $cur_l++;
983 if ($cur_l > $max_l) {
984 $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
985 $aRet[] = "=?$default_charset?Q?$ret?=";
986 $iEncStart = false;
987 $iOffset = $i;
988 $cur_l = 0;
989 $ret = '';
990 } else {
991 $ret .= $string{$i};
992 }
993 }
994 }
995 break;
f7b3ba37 996 }
451f74a2 997 }
793cc001 998
c96c32f4 999 if ($enc_init) {
1000 if ($iEncStart !== false) {
1001 $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
1002 $aRet[] = "=?$default_charset?Q?$ret?=";
1003 } else {
1004 $aRet[] = substr($string,$iOffset);
1005 }
1006 $string = implode('',$aRet);
451f74a2 1007 }
3d8371be 1008 return $string;
451f74a2 1009}
b74ba498 1010
f270a6eb 1011/**
1012 * Encodes string according to rfc2047 B encoding header formating rules
1013 *
758a7889 1014 * It is recommended way to encode headers with character sets that store
f270a6eb 1015 * symbols in more than one byte.
1016 *
1017 * Function requires mbstring support. If required mbstring functions are missing,
1018 * function returns false and sets E_USER_WARNING level error message.
1019 *
758a7889 1020 * Minimal requirements - php 4.0.6 with mbstring extension. Please note,
1021 * that mbstring functions will generate E_WARNING errors, if unsupported
f270a6eb 1022 * character set is used. mb_encode_mimeheader function provided by php
1023 * mbstring extension is not used in order to get better control of header
1024 * encoding.
1025 *
758a7889 1026 * Used php code functions - function_exists(), trigger_error(), strlen()
1027 * (is used with charset names and base64 strings). Used php mbstring
f270a6eb 1028 * functions - mb_strlen and mb_substr.
1029 *
758a7889 1030 * Related documents: rfc 2045 (BASE64 encoding), rfc 2047 (mime header
f270a6eb 1031 * encoding), rfc 2822 (header folding)
1032 *
1033 * @param string $string header string that must be encoded
758a7889 1034 * @param string $charset character set. Must be supported by mbstring extension.
f270a6eb 1035 * Use sq_mb_list_encodings() to detect supported charsets.
1036 * @return string string encoded according to rfc2047 B encoding formating rules
1037 * @since 1.5.1
1038 * @todo First header line can be wrapped to $iMaxLength - $HeaderFieldLength - 1
1039 * @todo Do we want to control max length of header?
1040 * @todo Do we want to control EOL (end-of-line) marker?
1041 * @todo Do we want to translate error message?
1042 */
1043function encodeHeaderBase64($string,$charset) {
1044 /**
1045 * Check mbstring function requirements.
1046 */
1047 if (! function_exists('mb_strlen') ||
1048 ! function_exists('mb_substr')) {
1049 // set E_USER_WARNING
1050 trigger_error('encodeHeaderBase64: Required mbstring functions are missing.',E_USER_WARNING);
1051 // return false
1052 return false;
1053 }
1054
1055 // initial return array
1056 $aRet = array();
1057
1058 /**
1059 * header length = 75 symbols max (same as in encodeHeader)
1060 * remove $charset length
1061 * remove =? ? ?= (5 chars)
1062 * remove 2 more chars (\r\n ?)
1063 */
1064 $iMaxLength = 75 - strlen($charset) - 7;
1065
1066 // set first character position
1067 $iStartCharNum = 0;
1068
1069 // loop through all characters. count characters and not bytes.
1070 for ($iCharNum=1; $iCharNum<=mb_strlen($string,$charset); $iCharNum++) {
1071 // encode string from starting character to current character.
1072 $encoded_string = base64_encode(mb_substr($string,$iStartCharNum,$iCharNum-$iStartCharNum,$charset));
1073
1074 // Check encoded string length
1075 if(strlen($encoded_string)>$iMaxLength) {
1076 // if string exceeds max length, reduce number of encoded characters and add encoded string part to array
1077 $aRet[] = base64_encode(mb_substr($string,$iStartCharNum,$iCharNum-$iStartCharNum-1,$charset));
1078
1079 // set new starting character
1080 $iStartCharNum = $iCharNum-1;
1081
1082 // encode last char (in case it is last character in string)
1083 $encoded_string = base64_encode(mb_substr($string,$iStartCharNum,$iCharNum-$iStartCharNum,$charset));
1084 } // if string is shorter than max length - add next character
1085 }
1086
1087 // add last encoded string to array
1088 $aRet[] = $encoded_string;
1089
1090 // set initial return string
1091 $sRet = '';
1092
1093 // loop through encoded strings
1094 foreach($aRet as $string) {
1095 // TODO: Do we want to control EOL (end-of-line) marker
1096 if ($sRet!='') $sRet.= " ";
1097
1098 // add header tags and encoded string to return string
1099 $sRet.= '=?'.$charset.'?B?'.$string.'?=';
1100 }
1101
1102 return $sRet;
1103}
1104
691a2d25 1105/* This function trys to locate the entity_id of a specific mime element */
3d8371be 1106function find_ent_id($id, $message) {
a171b359 1107 for ($i = 0, $ret = ''; $ret == '' && $i < count($message->entities); $i++) {
1108 if ($message->entities[$i]->header->type0 == 'multipart') {
3d8371be 1109 $ret = find_ent_id($id, $message->entities[$i]);
451f74a2 1110 } else {
3d8371be 1111 if (strcasecmp($message->entities[$i]->header->id, $id) == 0) {
d8cffbab 1112// if (sq_check_save_extension($message->entities[$i])) {
8bd0068d 1113 return $message->entities[$i]->entity_id;
da2415c1 1114// }
c8f5f606 1115 } elseif (!empty($message->entities[$i]->header->parameters['name'])) {
1116 /**
1117 * This is part of a fix for Outlook Express 6.x generating
1118 * cid URLs without creating content-id headers
1119 * @@JA - 20050207
1120 */
1121 if (strcasecmp($message->entities[$i]->header->parameters['name'], $id) == 0) {
1122 return $message->entities[$i]->entity_id;
1123 }
3d8371be 1124 }
a3daaaf3 1125 }
451f74a2 1126 }
3d8371be 1127 return $ret;
451f74a2 1128}
a3daaaf3 1129
e5e9381a 1130function sq_check_save_extension($message) {
1131 $filename = $message->getFilename();
1132 $ext = substr($filename, strrpos($filename,'.')+1);
1133 $save_extensions = array('jpg','jpeg','gif','png','bmp');
3d8371be 1134 return in_array($ext, $save_extensions);
e5e9381a 1135}
1136
1137
691a2d25 1138/**
8bd0068d 1139 ** HTMLFILTER ROUTINES
1140 */
451f74a2 1141
2dd879b8 1142/**
0493ed11 1143 * This function checks attribute values for entity-encoded values
1144 * and returns them translated into 8-bit strings so we can run
1145 * checks on them.
8bd0068d 1146 *
0493ed11 1147 * @param $attvalue A string to run entity check against.
1148 * @return Nothing, modifies a reference value.
8bd0068d 1149 */
0493ed11 1150function sq_defang(&$attvalue){
1151 $me = 'sq_defang';
2dd879b8 1152 /**
0493ed11 1153 * Skip this if there aren't ampersands or backslashes.
8bd0068d 1154 */
0493ed11 1155 if (strpos($attvalue, '&') === false
1156 && strpos($attvalue, '\\') === false){
1157 return;
2dd879b8 1158 }
0493ed11 1159 $m = false;
2b665f28 1160 // before deent, translate the dangerous unicode characters and ... to safe values
1161 // otherwise the regular expressions do not match.
1162
1163
1164
0493ed11 1165 do {
1166 $m = false;
1167 $m = $m || sq_deent($attvalue, '/\&#0*(\d+);*/s');
1168 $m = $m || sq_deent($attvalue, '/\&#x0*((\d|[a-f])+);*/si', true);
1169 $m = $m || sq_deent($attvalue, '/\\\\(\d+)/s', true);
1170 } while ($m == true);
1171 $attvalue = stripslashes($attvalue);
2dd879b8 1172}
1173
1174/**
8bd0068d 1175 * Kill any tabs, newlines, or carriage returns. Our friends the
1176 * makers of the browser with 95% market value decided that it'd
1177 * be funny to make "java[tab]script" be just as good as "javascript".
1178 *
1179 * @param attvalue The attribute value before extraneous spaces removed.
0493ed11 1180 * @return attvalue Nothing, modifies a reference value.
8bd0068d 1181 */
0493ed11 1182function sq_unspace(&$attvalue){
1183 $me = 'sq_unspace';
1184 if (strcspn($attvalue, "\t\r\n\0 ") != strlen($attvalue)){
1185 $attvalue = str_replace(Array("\t", "\r", "\n", "\0", " "),
1186 Array('', '', '', '', ''), $attvalue);
2dd879b8 1187 }
2dd879b8 1188}
1189
2b665f28 1190/**
88de4926 1191 * Translate all dangerous Unicode or Shift_JIS characters which are accepted by
2b665f28 1192 * IE as regular characters.
1193 *
1194 * @param attvalue The attribute value before dangerous characters are translated.
1195 * @return attvalue Nothing, modifies a reference value.
1196 * @author Marc Groot Koerkamp.
1197 */
1198function sq_fixIE_idiocy(&$attvalue) {
1199 // remove NUL
1200 $attvalue = str_replace("\0", "", $attvalue);
1201 // remove comments
1202 $attvalue = preg_replace("/(\/\*.*?\*\/)/","",$attvalue);
1203
88de4926 1204 // IE has the evil habit of accepting every possible value for the attribute expression.
1205 // The table below contains characters which are parsed by IE if they are used in the "expression"
2b665f28 1206 // attribute value.
1207 $aDangerousCharsReplacementTable = array(
1208 array('&#x029F;', '&#0671;' ,/* L UNICODE IPA Extension */
1209 '&#x0280;', '&#0640;' ,/* R UNICODE IPA Extension */
1210 '&#x0274;', '&#0628;' ,/* N UNICODE IPA Extension */
567dc524 1211 '&#xFF25;', '&#65317;' ,/* Unicode FULLWIDTH LATIN CAPITAL LETTER E */
1212 '&#xFF45;', '&#65349;' ,/* Unicode FULLWIDTH LATIN SMALL LETTER E */
2b665f28 1213 '&#xFF38;', '&#65336;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER X */
1214 '&#xFF58;', '&#65368;',/* Unicode FULLWIDTH LATIN SMALL LETTER X */
1215 '&#xFF30;', '&#65328;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER P */
1216 '&#xFF50;', '&#65360;',/* Unicode FULLWIDTH LATIN SMALL LETTER P */
1217 '&#xFF32;', '&#65330;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER R */
1218 '&#xFF52;', '&#65362;',/* Unicode FULLWIDTH LATIN SMALL LETTER R */
1219 '&#xFF33;', '&#65331;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER S */
1220 '&#xFF53;', '&#65363;',/* Unicode FULLWIDTH LATIN SMALL LETTER S */
1221 '&#xFF29;', '&#65321;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER I */
1222 '&#xFF49;', '&#65353;',/* Unicode FULLWIDTH LATIN SMALL LETTER I */
1223 '&#xFF2F;', '&#65327;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER O */
1224 '&#xFF4F;', '&#65359;',/* Unicode FULLWIDTH LATIN SMALL LETTER O */
1225 '&#xFF2E;', '&#65326;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER N */
1226 '&#xFF4E;', '&#65358;',/* Unicode FULLWIDTH LATIN SMALL LETTER N */
1227 '&#xFF2C;', '&#65324;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER L */
1228 '&#xFF4C;', '&#65356;',/* Unicode FULLWIDTH LATIN SMALL LETTER L */
1229 '&#xFF35;', '&#65333;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER U */
1230 '&#xFF55;', '&#65365;',/* Unicode FULLWIDTH LATIN SMALL LETTER U */
1231 '&#x207F;', '&#8319;' ,/* Unicode SUPERSCRIPT LATIN SMALL LETTER N */
567dc524 1232 "\xEF\xBC\xA5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER E */ // in unicode this is some Chinese char range
1233 "\xEF\xBD\x85", /* Shift JIS FULLWIDTH LATIN SMALL LETTER E */
1234 "\xEF\xBC\xB8", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER X */
1235 "\xEF\xBD\x98", /* Shift JIS FULLWIDTH LATIN SMALL LETTER X */
1236 "\xEF\xBC\xB0", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER P */
1237 "\xEF\xBD\x90", /* Shift JIS FULLWIDTH LATIN SMALL LETTER P */
1238 "\xEF\xBC\xB2", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER R */
1239 "\xEF\xBD\x92", /* Shift JIS FULLWIDTH LATIN SMALL LETTER R */
1240 "\xEF\xBC\xB3", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER S */
1241 "\xEF\xBD\x93", /* Shift JIS FULLWIDTH LATIN SMALL LETTER S */
1242 "\xEF\xBC\xA9", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER I */
1243 "\xEF\xBD\x89", /* Shift JIS FULLWIDTH LATIN SMALL LETTER I */
1244 "\xEF\xBC\xAF", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER O */
1245 "\xEF\xBD\x8F", /* Shift JIS FULLWIDTH LATIN SMALL LETTER O */
1246 "\xEF\xBC\xAE", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER N */
1247 "\xEF\xBD\x8E", /* Shift JIS FULLWIDTH LATIN SMALL LETTER N */
1248 "\xEF\xBC\xAC", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER L */
1249 "\xEF\xBD\x8C", /* Shift JIS FULLWIDTH LATIN SMALL LETTER L */
1250 "\xEF\xBC\xB5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER U */
1251 "\xEF\xBD\x95", /* Shift JIS FULLWIDTH LATIN SMALL LETTER U */
1252 "\xE2\x81\xBF", /* Shift JIS FULLWIDTH SUPERSCRIPT N */
1253 "\xCA\x9F", /* L UNICODE IPA Extension */
1254 "\xCA\x80", /* R UNICODE IPA Extension */
1255 "\xC9\xB4"), /* N UNICODE IPA Extension */
2b665f28 1256 array('l', 'l', 'r','r','n','n',
567dc524 1257 'E','E','e','e','X','X','x','x','P','P','p','p','R','R','r','r','S','S','s','s','I','I',
1258 'i','i','O','O','o','o','N','N','n','n','L','L','l','l','U','U','u','u','n','n',
1259 'E','e','X','x','P','p','R','r','S','s','I','i','O','o','N','n','L','l','U','u','n','l','r','n'));
2b665f28 1260 $attvalue = str_replace($aDangerousCharsReplacementTable[0],$aDangerousCharsReplacementTable[1],$attvalue);
1261
88de4926 1262 // Escapes are useful for special characters like "{}[]()'&. In other cases they are
1263 // used for XSS.
2b665f28 1264 $attvalue = preg_replace("/(\\\\)([a-zA-Z]{1})/",'$2',$attvalue);
1265}
1266
691a2d25 1267/**
8bd0068d 1268 * This function returns the final tag out of the tag name, an array
1269 * of attributes, and the type of the tag. This function is called by
1270 * sq_sanitize internally.
1271 *
1272 * @param $tagname the name of the tag.
1273 * @param $attary the array of attributes and their values
1274 * @param $tagtype The type of the tag (see in comments).
1275 * @return a string with the final tag representation.
1276 */
691a2d25 1277function sq_tagprint($tagname, $attary, $tagtype){
b583c3e8 1278 $me = 'sq_tagprint';
3d8371be 1279
691a2d25 1280 if ($tagtype == 2){
1281 $fulltag = '</' . $tagname . '>';
1282 } else {
1283 $fulltag = '<' . $tagname;
1284 if (is_array($attary) && sizeof($attary)){
1285 $atts = Array();
1286 while (list($attname, $attvalue) = each($attary)){
1287 array_push($atts, "$attname=$attvalue");
1288 }
1289 $fulltag .= ' ' . join(" ", $atts);
1290 }
1291 if ($tagtype == 3){
b583c3e8 1292 $fulltag .= ' /';
691a2d25 1293 }
b583c3e8 1294 $fulltag .= '>';
451f74a2 1295 }
691a2d25 1296 return $fulltag;
451f74a2 1297}
a3daaaf3 1298
691a2d25 1299/**
8bd0068d 1300 * A small helper function to use with array_walk. Modifies a by-ref
1301 * value and makes it lowercase.
1302 *
1303 * @param $val a value passed by-ref.
1304 * @return void since it modifies a by-ref value.
1305 */
691a2d25 1306function sq_casenormalize(&$val){
1307 $val = strtolower($val);
1308}
451f74a2 1309
691a2d25 1310/**
8bd0068d 1311 * This function skips any whitespace from the current position within
1312 * a string and to the next non-whitespace value.
1313 *
1314 * @param $body the string
1315 * @param $offset the offset within the string where we should start
1316 * looking for the next non-whitespace character.
1317 * @return the location within the $body where the next
1318 * non-whitespace char is located.
1319 */
691a2d25 1320function sq_skipspace($body, $offset){
b583c3e8 1321 $me = 'sq_skipspace';
3d8371be 1322 preg_match('/^(\s*)/s', substr($body, $offset), $matches);
691a2d25 1323 if (sizeof($matches{1})){
1324 $count = strlen($matches{1});
1325 $offset += $count;
451f74a2 1326 }
691a2d25 1327 return $offset;
451f74a2 1328}
a3daaaf3 1329
691a2d25 1330/**
8bd0068d 1331 * This function looks for the next character within a string. It's
1332 * really just a glorified "strpos", except it catches if failures
1333 * nicely.
1334 *
1335 * @param $body The string to look for needle in.
1336 * @param $offset Start looking from this position.
1337 * @param $needle The character/string to look for.
1338 * @return location of the next occurance of the needle, or
1339 * strlen($body) if needle wasn't found.
1340 */
691a2d25 1341function sq_findnxstr($body, $offset, $needle){
3d8371be 1342 $me = 'sq_findnxstr';
691a2d25 1343 $pos = strpos($body, $needle, $offset);
1344 if ($pos === FALSE){
1345 $pos = strlen($body);
451f74a2 1346 }
691a2d25 1347 return $pos;
451f74a2 1348}
a3daaaf3 1349
691a2d25 1350/**
8bd0068d 1351 * This function takes a PCRE-style regexp and tries to match it
1352 * within the string.
1353 *
1354 * @param $body The string to look for needle in.
1355 * @param $offset Start looking from here.
1356 * @param $reg A PCRE-style regex to match.
1357 * @return Returns a false if no matches found, or an array
1358 * with the following members:
1359 * - integer with the location of the match within $body
1360 * - string with whatever content between offset and the match
1361 * - string with whatever it is we matched
1362 */
691a2d25 1363function sq_findnxreg($body, $offset, $reg){
b583c3e8 1364 $me = 'sq_findnxreg';
691a2d25 1365 $matches = Array();
1366 $retarr = Array();
7d06541f 1367 preg_match("%^(.*?)($reg)%si", substr($body, $offset), $matches);
1368 if (!isset($matches{0}) || !$matches{0}){
691a2d25 1369 $retarr = false;
1370 } else {
1371 $retarr{0} = $offset + strlen($matches{1});
1372 $retarr{1} = $matches{1};
1373 $retarr{2} = $matches{2};
1374 }
1375 return $retarr;
1376}
a3daaaf3 1377
691a2d25 1378/**
8bd0068d 1379 * This function looks for the next tag.
1380 *
1381 * @param $body String where to look for the next tag.
1382 * @param $offset Start looking from here.
1383 * @return false if no more tags exist in the body, or
1384 * an array with the following members:
1385 * - string with the name of the tag
1386 * - array with attributes and their values
1387 * - integer with tag type (1, 2, or 3)
1388 * - integer where the tag starts (starting "<")
1389 * - integer where the tag ends (ending ">")
1390 * first three members will be false, if the tag is invalid.
1391 */
691a2d25 1392function sq_getnxtag($body, $offset){
b583c3e8 1393 $me = 'sq_getnxtag';
691a2d25 1394 if ($offset > strlen($body)){
1395 return false;
1396 }
1397 $lt = sq_findnxstr($body, $offset, "<");
1398 if ($lt == strlen($body)){
1399 return false;
1400 }
1401 /**
8bd0068d 1402 * We are here:
1403 * blah blah <tag attribute="value">
1404 * \---------^
1405 */
691a2d25 1406 $pos = sq_skipspace($body, $lt+1);
1407 if ($pos >= strlen($body)){
1408 return Array(false, false, false, $lt, strlen($body));
1409 }
1410 /**
8bd0068d 1411 * There are 3 kinds of tags:
1412 * 1. Opening tag, e.g.:
1413 * <a href="blah">
1414 * 2. Closing tag, e.g.:
1415 * </a>
1416 * 3. XHTML-style content-less tag, e.g.:
1417 * <img src="blah" />
1418 */
691a2d25 1419 $tagtype = false;
1420 switch (substr($body, $pos, 1)){
3d8371be 1421 case '/':
1422 $tagtype = 2;
1423 $pos++;
1424 break;
1425 case '!':
1426 /**
8bd0068d 1427 * A comment or an SGML declaration.
1428 */
3d8371be 1429 if (substr($body, $pos+1, 2) == "--"){
1430 $gt = strpos($body, "-->", $pos);
1431 if ($gt === false){
1432 $gt = strlen($body);
1433 } else {
1434 $gt += 2;
1435 }
1436 return Array(false, false, false, $lt, $gt);
bb8d0799 1437 } else {
3d8371be 1438 $gt = sq_findnxstr($body, $pos, ">");
1439 return Array(false, false, false, $lt, $gt);
1440 }
1441 break;
1442 default:
1443 /**
8bd0068d 1444 * Assume tagtype 1 for now. If it's type 3, we'll switch values
1445 * later.
1446 */
3d8371be 1447 $tagtype = 1;
1448 break;
691a2d25 1449 }
a3daaaf3 1450
0493ed11 1451 $tag_start = $pos;
691a2d25 1452 $tagname = '';
1453 /**
8bd0068d 1454 * Look for next [\W-_], which will indicate the end of the tag name.
1455 */
691a2d25 1456 $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
1457 if ($regary == false){
1458 return Array(false, false, false, $lt, strlen($body));
1459 }
1460 list($pos, $tagname, $match) = $regary;
1461 $tagname = strtolower($tagname);
1462
1463 /**
8bd0068d 1464 * $match can be either of these:
1465 * '>' indicating the end of the tag entirely.
1466 * '\s' indicating the end of the tag name.
1467 * '/' indicating that this is type-3 xhtml tag.
1468 *
1469 * Whatever else we find there indicates an invalid tag.
1470 */
691a2d25 1471 switch ($match){
3d8371be 1472 case '/':
691a2d25 1473 /**
8bd0068d 1474 * This is an xhtml-style tag with a closing / at the
1475 * end, like so: <img src="blah" />. Check if it's followed
1476 * by the closing bracket. If not, then this tag is invalid
1477 */
3d8371be 1478 if (substr($body, $pos, 2) == "/>"){
1479 $pos++;
1480 $tagtype = 3;
1481 } else {
1482 $gt = sq_findnxstr($body, $pos, ">");
1483 $retary = Array(false, false, false, $lt, $gt);
1484 return $retary;
1485 }
1486 case '>':
1487 return Array($tagname, false, $tagtype, $lt, $pos);
1488 break;
1489 default:
1490 /**
8bd0068d 1491 * Check if it's whitespace
1492 */
3d8371be 1493 if (!preg_match('/\s/', $match)){
1494 /**
8bd0068d 1495 * This is an invalid tag! Look for the next closing ">".
1496 */
7d06541f 1497 $gt = sq_findnxstr($body, $lt, ">");
3d8371be 1498 return Array(false, false, false, $lt, $gt);
1499 }
1500 break;
691a2d25 1501 }
3d8371be 1502
691a2d25 1503 /**
8bd0068d 1504 * At this point we're here:
1505 * <tagname attribute='blah'>
1506 * \-------^
1507 *
1508 * At this point we loop in order to find all attributes.
1509 */
691a2d25 1510 $attname = '';
0493ed11 1511 $atttype = false;
691a2d25 1512 $attary = Array();
1513
1514 while ($pos <= strlen($body)){
1515 $pos = sq_skipspace($body, $pos);
1516 if ($pos == strlen($body)){
1517 /**
8bd0068d 1518 * Non-closed tag.
1519 */
691a2d25 1520 return Array(false, false, false, $lt, $pos);
1521 }
1522 /**
8bd0068d 1523 * See if we arrived at a ">" or "/>", which means that we reached
1524 * the end of the tag.
1525 */
691a2d25 1526 $matches = Array();
164800ad 1527 if (preg_match("%^(\s*)(>|/>)%s", substr($body, $pos), $matches)) {
c828931c 1528 /**
8bd0068d 1529 * Yep. So we did.
1530 */
c828931c 1531 $pos += strlen($matches{1});
1532 if ($matches{2} == "/>"){
1533 $tagtype = 3;
1534 $pos++;
1535 }
1536 return Array($tagname, $attary, $tagtype, $lt, $pos);
1537 }
a3daaaf3 1538
cca46357 1539 /**
8bd0068d 1540 * There are several types of attributes, with optional
1541 * [:space:] between members.
1542 * Type 1:
1543 * attrname[:space:]=[:space:]'CDATA'
1544 * Type 2:
1545 * attrname[:space:]=[:space:]"CDATA"
1546 * Type 3:
1547 * attr[:space:]=[:space:]CDATA
1548 * Type 4:
1549 * attrname
1550 *
1551 * We leave types 1 and 2 the same, type 3 we check for
1552 * '"' and convert to "&quot" if needed, then wrap in
1553 * double quotes. Type 4 we convert into:
1554 * attrname="yes".
1555 */
3f7c623f 1556 $regary = sq_findnxreg($body, $pos, "[^:\w\-_]");
691a2d25 1557 if ($regary == false){
1558 /**
8bd0068d 1559 * Looks like body ended before the end of tag.
1560 */
691a2d25 1561 return Array(false, false, false, $lt, strlen($body));
cca46357 1562 }
691a2d25 1563 list($pos, $attname, $match) = $regary;
1564 $attname = strtolower($attname);
1565 /**
8bd0068d 1566 * We arrived at the end of attribute name. Several things possible
1567 * here:
1568 * '>' means the end of the tag and this is attribute type 4
1569 * '/' if followed by '>' means the same thing as above
1570 * '\s' means a lot of things -- look what it's followed by.
1571 * anything else means the attribute is invalid.
1572 */
691a2d25 1573 switch($match){
3d8371be 1574 case '/':
691a2d25 1575 /**
8bd0068d 1576 * This is an xhtml-style tag with a closing / at the
1577 * end, like so: <img src="blah" />. Check if it's followed
1578 * by the closing bracket. If not, then this tag is invalid
1579 */
3d8371be 1580 if (substr($body, $pos, 2) == "/>"){
691a2d25 1581 $pos++;
3d8371be 1582 $tagtype = 3;
691a2d25 1583 } else {
3d8371be 1584 $gt = sq_findnxstr($body, $pos, ">");
1585 $retary = Array(false, false, false, $lt, $gt);
1586 return $retary;
1587 }
1588 case '>':
1589 $attary{$attname} = '"yes"';
1590 return Array($tagname, $attary, $tagtype, $lt, $pos);
1591 break;
1592 default:
1593 /**
8bd0068d 1594 * Skip whitespace and see what we arrive at.
1595 */
3d8371be 1596 $pos = sq_skipspace($body, $pos);
1597 $char = substr($body, $pos, 1);
1598 /**
8bd0068d 1599 * Two things are valid here:
1600 * '=' means this is attribute type 1 2 or 3.
1601 * \w means this was attribute type 4.
1602 * anything else we ignore and re-loop. End of tag and
1603 * invalid stuff will be caught by our checks at the beginning
1604 * of the loop.
1605 */
3d8371be 1606 if ($char == "="){
1607 $pos++;
1608 $pos = sq_skipspace($body, $pos);
691a2d25 1609 /**
8bd0068d 1610 * Here are 3 possibilities:
1611 * "'" attribute type 1
1612 * '"' attribute type 2
1613 * everything else is the content of tag type 3
1614 */
3d8371be 1615 $quot = substr($body, $pos, 1);
1616 if ($quot == "'"){
1617 $regary = sq_findnxreg($body, $pos+1, "\'");
1618 if ($regary == false){
1619 return Array(false, false, false, $lt, strlen($body));
1620 }
1621 list($pos, $attval, $match) = $regary;
1622 $pos++;
1623 $attary{$attname} = "'" . $attval . "'";
1624 } else if ($quot == '"'){
1625 $regary = sq_findnxreg($body, $pos+1, '\"');
1626 if ($regary == false){
1627 return Array(false, false, false, $lt, strlen($body));
1628 }
1629 list($pos, $attval, $match) = $regary;
1630 $pos++;
1631 $attary{$attname} = '"' . $attval . '"';
1632 } else {
1633 /**
8bd0068d 1634 * These are hateful. Look for \s, or >.
1635 */
3d8371be 1636 $regary = sq_findnxreg($body, $pos, "[\s>]");
1637 if ($regary == false){
1638 return Array(false, false, false, $lt, strlen($body));
1639 }
1640 list($pos, $attval, $match) = $regary;
1641 /**
8bd0068d 1642 * If it's ">" it will be caught at the top.
1643 */
3d8371be 1644 $attval = preg_replace("/\"/s", "&quot;", $attval);
1645 $attary{$attname} = '"' . $attval . '"';
7e235a1a 1646 }
3d8371be 1647 } else if (preg_match("|[\w/>]|", $char)) {
691a2d25 1648 /**
8bd0068d 1649 * That was attribute type 4.
1650 */
3d8371be 1651 $attary{$attname} = '"yes"';
1652 } else {
1653 /**
8bd0068d 1654 * An illegal character. Find next '>' and return.
1655 */
3d8371be 1656 $gt = sq_findnxstr($body, $pos, ">");
1657 return Array(false, false, false, $lt, $gt);
451f74a2 1658 }
3d8371be 1659 break;
691a2d25 1660 }
1661 }
1662 /**
8bd0068d 1663 * The fact that we got here indicates that the tag end was never
1664 * found. Return invalid tag indication so it gets stripped.
1665 */
691a2d25 1666 return Array(false, false, false, $lt, strlen($body));
1667}
1668
1669/**
0493ed11 1670 * Translates entities into literal values so they can be checked.
8bd0068d 1671 *
0493ed11 1672 * @param $attvalue the by-ref value to check.
1673 * @param $regex the regular expression to check against.
1674 * @param $hex whether the entites are hexadecimal.
1675 * @return True or False depending on whether there were matches.
8bd0068d 1676 */
0493ed11 1677function sq_deent(&$attvalue, $regex, $hex=false){
b583c3e8 1678 $me = 'sq_deent';
0493ed11 1679 $ret_match = false;
2b665f28 1680 // remove comments
1681 //$attvalue = preg_replace("/(\/\*.*\*\/)/","",$attvalue);
0493ed11 1682 preg_match_all($regex, $attvalue, $matches);
1683 if (is_array($matches) && sizeof($matches[0]) > 0){
1684 $repl = Array();
1685 for ($i = 0; $i < sizeof($matches[0]); $i++){
1686 $numval = $matches[1][$i];
1687 if ($hex){
1688 $numval = hexdec($numval);
a3daaaf3 1689 }
0493ed11 1690 $repl{$matches[0][$i]} = chr($numval);
691a2d25 1691 }
0493ed11 1692 $attvalue = strtr($attvalue, $repl);
1693 return true;
1694 } else {
1695 return false;
691a2d25 1696 }
691a2d25 1697}
1698
1699/**
8bd0068d 1700 * This function runs various checks against the attributes.
1701 *
1702 * @param $tagname String with the name of the tag.
1703 * @param $attary Array with all tag attributes.
1704 * @param $rm_attnames See description for sq_sanitize
1705 * @param $bad_attvals See description for sq_sanitize
1706 * @param $add_attr_to_tag See description for sq_sanitize
1707 * @param $message message object
1708 * @param $id message id
1709 * @return Array with modified attributes.
1710 */
da2415c1 1711function sq_fixatts($tagname,
1712 $attary,
691a2d25 1713 $rm_attnames,
1714 $bad_attvals,
1715 $add_attr_to_tag,
1716 $message,
b3af12ef 1717 $id,
3d8371be 1718 $mailbox
691a2d25 1719 ){
b583c3e8 1720 $me = 'sq_fixatts';
691a2d25 1721 while (list($attname, $attvalue) = each($attary)){
1722 /**
8bd0068d 1723 * See if this attribute should be removed.
1724 */
691a2d25 1725 foreach ($rm_attnames as $matchtag=>$matchattrs){
1726 if (preg_match($matchtag, $tagname)){
1727 foreach ($matchattrs as $matchattr){
1728 if (preg_match($matchattr, $attname)){
1729 unset($attary{$attname});
1730 continue;
1731 }
451f74a2 1732 }
451f74a2 1733 }
691a2d25 1734 }
2b665f28 1735 /**
1736 * Workaround for IE quirks
1737 */
1738 sq_fixIE_idiocy($attvalue);
1739
691a2d25 1740 /**
8bd0068d 1741 * Remove any backslashes, entities, and extraneous whitespace.
1742 */
2b665f28 1743
1744 $oldattvalue = $attvalue;
0493ed11 1745 sq_defang($attvalue);
2b665f28 1746 if ($attname == 'style' && $attvalue !== $oldattvalue) {
1747 // entities are used in the attribute value. In 99% of the cases it's there as XSS
1748 // i.e.<div style="{ left:exp&#x0280;essio&#x0274;( alert('XSS') ) }">
1749 $attvalue = "idiocy";
1750 $attary{$attname} = $attvalue;
1751 }
0493ed11 1752 sq_unspace($attvalue);
af861a34 1753
691a2d25 1754 /**
8bd0068d 1755 * Now let's run checks on the attvalues.
1756 * I don't expect anyone to comprehend this. If you do,
1757 * get in touch with me so I can drive to where you live and
1758 * shake your hand personally. :)
1759 */
691a2d25 1760 foreach ($bad_attvals as $matchtag=>$matchattrs){
1761 if (preg_match($matchtag, $tagname)){
1762 foreach ($matchattrs as $matchattr=>$valary){
1763 if (preg_match($matchattr, $attname)){
1764 /**
8bd0068d 1765 * There are two arrays in valary.
1766 * First is matches.
1767 * Second one is replacements
1768 */
691a2d25 1769 list($valmatch, $valrepl) = $valary;
da2415c1 1770 $newvalue =
691a2d25 1771 preg_replace($valmatch, $valrepl, $attvalue);
1772 if ($newvalue != $attvalue){
1773 $attary{$attname} = $newvalue;
567dc524 1774 $attvalue = $newvalue;
691a2d25 1775 }
1776 }
1777 }
451f74a2 1778 }
a3daaaf3 1779 }
567dc524 1780 if ($attname == 'style') {
1781 if (preg_match('/[\0-\37\200-\377]+/',$attvalue)) {
1782 // 8bit and control characters in style attribute values can be used for XSS, remove them
1783 $attary{$attname} = '"disallowed character"';
1784 }
1785 preg_match_all("/url\s*\((.+)\)/si",$attvalue,$aMatch);
1786 if (count($aMatch)) {
1787 foreach($aMatch[1] as $sMatch) {
1788 // url value
1789 $urlvalue = $sMatch;
1790 sq_fix_url($attname, $urlvalue, $message, $id, $mailbox,"'");
1791 $attary{$attname} = str_replace($sMatch,$urlvalue,$attvalue);
1792 }
1793 }
691a2d25 1794 }
ff940ebc 1795 /**
567dc524 1796 * Use white list based filtering on attributes which can contain url's
ff940ebc 1797 */
567dc524 1798 else if ($attname == 'href' || $attname == 'src' || $attname == 'background') {
1799 sq_fix_url($attname, $attvalue, $message, $id, $mailbox);
1800 $attary{$attname} = $attvalue;
ff940ebc 1801 }
a3daaaf3 1802 }
691a2d25 1803 /**
8bd0068d 1804 * See if we need to append any attributes to this tag.
1805 */
691a2d25 1806 foreach ($add_attr_to_tag as $matchtag=>$addattary){
1807 if (preg_match($matchtag, $tagname)){
1808 $attary = array_merge($attary, $addattary);
1809 }
1810 }
1811 return $attary;
451f74a2 1812}
a3daaaf3 1813
567dc524 1814/**
1815 * This function filters url's
1816 *
1817 * @param $attvalue String with attribute value to filter
1818 * @param $message message object
1819 * @param $id message id
1820 * @param $mailbox mailbox
1821 * @param $sQuote quoting characters around url's
1822 */
1823function sq_fix_url($attname, &$attvalue, $message, $id, $mailbox,$sQuote = '"') {
1824 $attvalue = trim($attvalue);
1825 if ($attvalue && ($attvalue[0] =='"'|| $attvalue[0] == "'")) {
1826 // remove the double quotes
1827 $sQuote = $attvalue[0];
1828 $attvalue = trim(substr($attvalue,1,-1));
1829 }
1830
1831 if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) {
1832 $view_unsafe_images = false;
1833 }
1834 $secremoveimg = '../images/' . _("sec_remove_eng.png");
1835
1836 /**
1837 * Replace empty src tags with the blank image. src is only used
1838 * for frames, images, and image inputs. Doing a replace should
1839 * not affect them working as should be, however it will stop
1840 * IE from being kicked off when src for img tags are not set
1841 */
1842 if ($attvalue == '') {
1843 $attvalue = '"' . SM_PATH . 'images/blank.png"';
1844 } else {
1845 // first, disallow 8 bit characters and control characters
1846 if (preg_match('/[\0-\37\200-\377]+/',$attvalue)) {
1847 switch ($attname) {
1848 case 'href':
1849 $attvalue = $sQuote . 'http://invalid-stuff-detected.example.com' . $sQuote;
1850 break;
1851 default:
1852 $attvalue = $sQuote . SM_PATH . 'images/blank.png'. $sQuote;
1853 break;
1854 }
1855 } else {
1856 $aUrl = parse_url($attvalue);
1857 if (isset($aUrl['scheme'])) {
1858 switch(strtolower($aUrl['scheme'])) {
d75e755b 1859 case 'mailto':
567dc524 1860 case 'http':
1861 case 'https':
1862 case 'ftp':
1863 if ($attname != 'href') {
1864 if ($view_unsafe_images == false) {
1865 $attvalue = $sQuote . $secremoveimg . $sQuote;
1866 } else {
1867 if (isset($aUrl['path'])) {
1868 // validate image extension.
1869 $ext = strtolower(substr($aUrl['path'],strrpos($aUrl['path'],'.')));
1870 if (!in_array($ext,array('.jpeg','.jpg','xjpeg','.gif','.bmp','.jpe','.png','.xbm'))) {
1871 $attvalue = $sQuote . SM_PATH . 'images/blank.png'. $sQuote;
1872 }
1873 } else {
1874 $attvalue = $sQuote . SM_PATH . 'images/blank.png'. $sQuote;
1875 }
1876 }
1877 }
1878 break;
1879 case 'outbind':
1880 /**
1881 * "Hack" fix for Outlook using propriatary outbind:// protocol in img tags.
1882 * One day MS might actually make it match something useful, for now, falling
1883 * back to using cid2http, so we can grab the blank.png.
1884 */
1885 $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox);
1886 break;
1887 case 'cid':
1888 /**
1889 * Turn cid: urls into http-friendly ones.
1890 */
1891 $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox);
1892 break;
1893 default:
1894 $attvalue = $sQuote . SM_PATH . 'images/blank.png' . $sQuote;
1895 break;
1896 }
1897 } else {
1898 if (!(isset($aUrl['path']) && $aUrl['path'] == $secremoveimg)) {
1899 // parse_url did not lead to satisfying result
1900 $attvalue = $sQuote . SM_PATH . 'images/blank.png' . $sQuote;
1901 }
1902 }
1903 }
1904 }
1905}
1906
691a2d25 1907/**
8bd0068d 1908 * This function edits the style definition to make them friendly and
1909 * usable in SquirrelMail.
1910 *
1911 * @param $message the message object
1912 * @param $id the message id
1913 * @param $content a string with whatever is between <style> and </style>
1914 * @param $mailbox the message mailbox
1915 * @return a string with edited content.
1916 */
e60a299a 1917function sq_fixstyle($body, $pos, $message, $id, $mailbox){
b583c3e8 1918 $me = 'sq_fixstyle';
7e2ff844 1919 // workaround for </style> in between comments
1920 $iCurrentPos = $pos;
1921 $content = '';
1922 $sToken = '';
1923 $bSucces = false;
1924 $bEndTag = false;
1925 for ($i=$pos,$iCount=strlen($body);$i<$iCount;++$i) {
1926 $char = $body{$i};
1927 switch ($char) {
1928 case '<':
1929 $sToken .= $char;
1930 break;
1931 case '/':
1932 if ($sToken == '<') {
1933 $sToken .= $char;
1934 $bEndTag = true;
1935 } else {
1936 $content .= $char;
1937 }
1938 break;
1939 case '>':
1940 if ($bEndTag) {
1941 $sToken .= $char;
1942 if (preg_match('/\<\/\s*style\s*\>/i',$sToken,$aMatch)) {
1943 $newpos = $i + 1;
1944 $bSucces = true;
1945 break 2;
1946 } else {
1947 $content .= $sToken;
1948 }
1949 $bEndTag = false;
1950 } else {
1951 $content .= $char;
1952 }
1953 break;
1954 case '!':
1955 if ($sToken == '<') {
1956 // possible comment
1957 if (isset($body{$i+2}) && substr($body,$i,3) == '!--') {
1958 $i = strpos($body,'-->',$i+3);
2b665f28 1959 if ($i === false) { // no end comment
1960 $i = strlen($body);
1961 }
7e2ff844 1962 $sToken = '';
1963 }
1964 } else {
1965 $content .= $char;
1966 }
1967 break;
1968 default:
1969 if ($bEndTag) {
1970 $sToken .= $char;
1971 } else {
1972 $content .= $char;
1973 }
1974 break;
1975 }
1976 }
1977 if ($bSucces == FALSE){
7d06541f 1978 return array(FALSE, strlen($body));
1979 }
7e2ff844 1980
2b665f28 1981
1982
691a2d25 1983 /**
0493ed11 1984 * First look for general BODY style declaration, which would be
1985 * like so:
1986 * body {background: blah-blah}
1987 * and change it to .bodyclass so we can just assign it to a <div>
1988 */
691a2d25 1989 $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
3d8371be 1990 $secremoveimg = '../images/' . _("sec_remove_eng.png");
691a2d25 1991 /**
0493ed11 1992 * Fix url('blah') declarations.
1993 */
1994 // $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si",
1995 // "url(\\1$secremoveimg\\2)", $content);
2b665f28 1996
567dc524 1997 // first check for 8bit sequences and disallowed control characters
1998 if (preg_match('/[\16-\37\200-\377]+/',$content)) {
1999 $content = '<!-- style block removed by html filter due to presence of 8bit characters -->';
2000 return array($content, $newpos);
2001 }
2002
2b665f28 2003 // IE Sucks hard. We have a special function for it.
2004 sq_fixIE_idiocy($content);
2005
2006 // remove @import line
2007 $content = preg_replace("/^\s*(@import.*)$/mi","\n<!-- @import rules forbidden -->\n",$content);
2008
5b4884be 2009 // translate ur\l and variations (IE parses that)
2b665f28 2010 // TODO check if the sq_fixIE_idiocy function already handles this.
5b4884be 2011 $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content);
567dc524 2012 preg_match_all("/url\s*\((.+)\)/si",$content,$aMatch);
2013 if (count($aMatch)) {
2014 $aValue = $aReplace = array();
2015 foreach($aMatch[1] as $sMatch) {
2016 // url value
2017 $urlvalue = $sMatch;
2018 sq_fix_url('style',$urlvalue, $message, $id, $mailbox,"'");
2019 $aValue[] = $sMatch;
2020 $aReplace[] = $urlvalue;
0493ed11 2021 }
567dc524 2022 $content = str_replace($aValue,$aReplace,$content);
691a2d25 2023 }
567dc524 2024
0493ed11 2025 /**
2026 * Remove any backslashes, entities, and extraneous whitespace.
2027 */
2028 $contentTemp = $content;
2029 sq_defang($contentTemp);
2030 sq_unspace($contentTemp);
a3daaaf3 2031
691a2d25 2032 /**
8bd0068d 2033 * Fix stupid css declarations which lead to vulnerabilities
2034 * in IE.
2035 */
5db90261 2036 $match = Array('/\/\*.*\*\//',
2037 '/expression/i',
0493ed11 2038 '/behaviou*r/i',
2039 '/binding/i',
2b665f28 2040 '/include-source/i',
2041 '/javascript/i',
2042 '/script/i');
2043 $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy');
0493ed11 2044 $contentNew = preg_replace($match, $replace, $contentTemp);
2045 if ($contentNew !== $contentTemp) {
2046 // insecure css declarations are used. From now on we don't care
2047 // anymore if the css is destroyed by sq_deent, sq_unspace or sq_unbackslash
2048 $content = $contentNew;
2049 }
7d06541f 2050 return array($content, $newpos);
691a2d25 2051}
a3daaaf3 2052
0493ed11 2053
691a2d25 2054/**
8bd0068d 2055 * This function converts cid: url's into the ones that can be viewed in
2056 * the browser.
2057 *
2058 * @param $message the message object
2059 * @param $id the message id
2060 * @param $cidurl the cid: url.
2061 * @param $mailbox the message mailbox
2062 * @return a string with a http-friendly url
2063 */
b3af12ef 2064function sq_cid2http($message, $id, $cidurl, $mailbox){
691a2d25 2065 /**
8bd0068d 2066 * Get rid of quotes.
2067 */
691a2d25 2068 $quotchar = substr($cidurl, 0, 1);
2dd879b8 2069 if ($quotchar == '"' || $quotchar == "'"){
2070 $cidurl = str_replace($quotchar, "", $cidurl);
2071 } else {
2072 $quotchar = '';
2073 }
691a2d25 2074 $cidurl = substr(trim($cidurl), 4);
0493ed11 2075
2076 $match_str = '/\{.*?\}\//';
2077 $str_rep = '';
2078 $cidurl = preg_replace($match_str, $str_rep, $cidurl);
2079
e5e9381a 2080 $linkurl = find_ent_id($cidurl, $message);
2081 /* in case of non-save cid links $httpurl should be replaced by a sort of
8bd0068d 2082 unsave link image */
e5e9381a 2083 $httpurl = '';
c8f5f606 2084
8bd0068d 2085 /**
2086 * This is part of a fix for Outlook Express 6.x generating
2087 * cid URLs without creating content-id headers. These images are
2088 * not part of the multipart/related html mail. The html contains
2089 * <img src="cid:{some_id}/image_filename.ext"> references to
2090 * attached images with as goal to render them inline although
2091 * the attachment disposition property is not inline.
2092 */
c8f5f606 2093
2094 if (empty($linkurl)) {
2095 if (preg_match('/{.*}\//', $cidurl)) {
2096 $cidurl = preg_replace('/{.*}\//','', $cidurl);
2097 if (!empty($cidurl)) {
2098 $linkurl = find_ent_id($cidurl, $message);
2099 }
2100 }
2101 }
f8a1ed5a 2102
c8f5f606 2103 if (!empty($linkurl)) {
6b04287c 2104 $httpurl = $quotchar . SM_PATH . 'src/download.php?absolute_dl=true&amp;' .
8bd0068d 2105 "passed_id=$id&amp;mailbox=" . urlencode($mailbox) .
2106 '&amp;ent_id=' . $linkurl . $quotchar;
c8f5f606 2107 } else {
2108 /**
2109 * If we couldn't generate a proper img url, drop in a blank image
2110 * instead of sending back empty, otherwise it causes unusual behaviour
2111 */
bc017c1d 2112 $httpurl = $quotchar . SM_PATH . 'images/blank.png' . $quotchar;
e5e9381a 2113 }
f8a1ed5a 2114
691a2d25 2115 return $httpurl;
2116}
2117
2118/**
8bd0068d 2119 * This function changes the <body> tag into a <div> tag since we
2120 * can't really have a body-within-body.
2121 *
2122 * @param $attary an array of attributes and values of <body>
2123 * @param $mailbox mailbox we're currently reading (for cid2http)
2124 * @param $message current message (for cid2http)
2125 * @param $id current message id (for cid2http)
2126 * @return a modified array of attributes to be set for <div>
2127 */
2dd879b8 2128function sq_body2div($attary, $mailbox, $message, $id){
b583c3e8 2129 $me = 'sq_body2div';
3d8371be 2130 $divattary = Array('class' => "'bodyclass'");
b583c3e8 2131 $text = '#000000';
c189a963 2132 $has_bgc_stl = $has_txt_stl = false;
b583c3e8 2133 $styledef = '';
691a2d25 2134 if (is_array($attary) && sizeof($attary) > 0){
2135 foreach ($attary as $attname=>$attvalue){
2136 $quotchar = substr($attvalue, 0, 1);
2137 $attvalue = str_replace($quotchar, "", $attvalue);
2138 switch ($attname){
3d8371be 2139 case 'background':
8bd0068d 2140 $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox);
3d8371be 2141 $styledef .= "background-image: url('$attvalue'); ";
2142 break;
2143 case 'bgcolor':
c189a963 2144 $has_bgc_stl = true;
3d8371be 2145 $styledef .= "background-color: $attvalue; ";
2146 break;
2147 case 'text':
c189a963 2148 $has_txt_stl = true;
3d8371be 2149 $styledef .= "color: $attvalue; ";
2150 break;
691a2d25 2151 }
a3daaaf3 2152 }
c189a963 2153 // Outlook defines a white bgcolor and no text color. This can lead to
2154 // white text on a white bg with certain themes.
2155 if ($has_bgc_stl && !$has_txt_stl) {
2156 $styledef .= "color: $text; ";
2157 }
691a2d25 2158 if (strlen($styledef) > 0){
2159 $divattary{"style"} = "\"$styledef\"";
2160 }
2161 }
2162 return $divattary;
2163}
a3daaaf3 2164
691a2d25 2165/**
8bd0068d 2166 * This is the main function and the one you should actually be calling.
2167 * There are several variables you should be aware of an which need
2168 * special description.
2169 *
2170 * Since the description is quite lengthy, see it here:
2171 * http://linux.duke.edu/projects/mini/htmlfilter/
2172 *
2173 * @param $body the string with HTML you wish to filter
2174 * @param $tag_list see description above
2175 * @param $rm_tags_with_content see description above
2176 * @param $self_closing_tags see description above
2177 * @param $force_tag_closing see description above
2178 * @param $rm_attnames see description above
2179 * @param $bad_attvals see description above
2180 * @param $add_attr_to_tag see description above
2181 * @param $message message object
2182 * @param $id message id
2183 * @return sanitized html safe to show on your pages.
2184 */
da2415c1 2185function sq_sanitize($body,
8bd0068d 2186 $tag_list,
2187 $rm_tags_with_content,
2188 $self_closing_tags,
2189 $force_tag_closing,
2190 $rm_attnames,
2191 $bad_attvals,
2192 $add_attr_to_tag,
2193 $message,
2194 $id,
2195 $mailbox
2196 ){
b583c3e8 2197 $me = 'sq_sanitize';
7d06541f 2198 $rm_tags = array_shift($tag_list);
691a2d25 2199 /**
8bd0068d 2200 * Normalize rm_tags and rm_tags_with_content.
2201 */
7d06541f 2202 @array_walk($tag_list, 'sq_casenormalize');
691a2d25 2203 @array_walk($rm_tags_with_content, 'sq_casenormalize');
2204 @array_walk($self_closing_tags, 'sq_casenormalize');
2205 /**
8bd0068d 2206 * See if tag_list is of tags to remove or tags to allow.
2207 * false means remove these tags
2208 * true means allow these tags
2209 */
691a2d25 2210 $curpos = 0;
2211 $open_tags = Array();
2dd879b8 2212 $trusted = "\n<!-- begin sanitized html -->\n";
691a2d25 2213 $skip_content = false;
bb8d0799 2214 /**
8bd0068d 2215 * Take care of netscape's stupid javascript entities like
2216 * &{alert('boo')};
2217 */
bb8d0799 2218 $body = preg_replace("/&(\{.*?\};)/si", "&amp;\\1", $body);
691a2d25 2219
7d06541f 2220 while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){
691a2d25 2221 list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
2222 $free_content = substr($body, $curpos, $lt-$curpos);
2223 /**
8bd0068d 2224 * Take care of <style>
2225 */
7d06541f 2226 if ($tagname == "style" && $tagtype == 1){
da2415c1 2227 list($free_content, $curpos) =
e60a299a 2228 sq_fixstyle($body, $gt+1, $message, $id, $mailbox);
7d06541f 2229 if ($free_content != FALSE){
2230 $trusted .= sq_tagprint($tagname, $attary, $tagtype);
2231 $trusted .= $free_content;
2232 $trusted .= sq_tagprint($tagname, false, 2);
2233 }
2234 continue;
691a2d25 2235 }
2236 if ($skip_content == false){
2237 $trusted .= $free_content;
691a2d25 2238 }
2239 if ($tagname != FALSE){
2240 if ($tagtype == 2){
2241 if ($skip_content == $tagname){
2242 /**
8bd0068d 2243 * Got to the end of tag we needed to remove.
2244 */
691a2d25 2245 $tagname = false;
2246 $skip_content = false;
2247 } else {
2248 if ($skip_content == false){
c828931c 2249 if ($tagname == "body"){
2250 $tagname = "div";
2dd879b8 2251 }
da2415c1 2252 if (isset($open_tags{$tagname}) &&
8bd0068d 2253 $open_tags{$tagname} > 0){
2dd879b8 2254 $open_tags{$tagname}--;
691a2d25 2255 } else {
2dd879b8 2256 $tagname = false;
691a2d25 2257 }
691a2d25 2258 }
2259 }
2260 } else {
2261 /**
8bd0068d 2262 * $rm_tags_with_content
2263 */
691a2d25 2264 if ($skip_content == false){
2265 /**
8bd0068d 2266 * See if this is a self-closing type and change
2267 * tagtype appropriately.
2268 */
691a2d25 2269 if ($tagtype == 1
8bd0068d 2270 && in_array($tagname, $self_closing_tags)){
2dd879b8 2271 $tagtype = 3;
691a2d25 2272 }
2273 /**
8bd0068d 2274 * See if we should skip this tag and any content
2275 * inside it.
2276 */
691a2d25 2277 if ($tagtype == 1 &&
8bd0068d 2278 in_array($tagname, $rm_tags_with_content)){
691a2d25 2279 $skip_content = $tagname;
2280 } else {
da2415c1 2281 if (($rm_tags == false
8bd0068d 2282 && in_array($tagname, $tag_list)) ||
2283 ($rm_tags == true &&
2284 !in_array($tagname, $tag_list))){
691a2d25 2285 $tagname = false;
2286 } else {
2dd879b8 2287 /**
8bd0068d 2288 * Convert body into div.
2289 */
2dd879b8 2290 if ($tagname == "body"){
2291 $tagname = "div";
da2415c1 2292 $attary = sq_body2div($attary, $mailbox,
8bd0068d 2293 $message, $id);
2dd879b8 2294 }
691a2d25 2295 if ($tagtype == 1){
2296 if (isset($open_tags{$tagname})){
2297 $open_tags{$tagname}++;
2298 } else {
2299 $open_tags{$tagname}=1;
2300 }
2301 }
2302 /**
8bd0068d 2303 * This is where we run other checks.
2304 */
691a2d25 2305 if (is_array($attary) && sizeof($attary) > 0){
2306 $attary = sq_fixatts($tagname,
8bd0068d 2307 $attary,
2308 $rm_attnames,
2309 $bad_attvals,
2310 $add_attr_to_tag,
2311 $message,
2312 $id,
2313 $mailbox
2314 );
691a2d25 2315 }
2316 }
2317 }
691a2d25 2318 }
2319 }
2320 if ($tagname != false && $skip_content == false){
2321 $trusted .= sq_tagprint($tagname, $attary, $tagtype);
2322 }
691a2d25 2323 }
2324 $curpos = $gt+1;
a3daaaf3 2325 }
691a2d25 2326 $trusted .= substr($body, $curpos, strlen($body)-$curpos);
2327 if ($force_tag_closing == true){
2328 foreach ($open_tags as $tagname=>$opentimes){
2329 while ($opentimes > 0){
2330 $trusted .= '</' . $tagname . '>';
2331 $opentimes--;
2332 }
2333 }
2334 $trusted .= "\n";
2335 }
2336 $trusted .= "<!-- end sanitized html -->\n";
2337 return $trusted;
2338}
451f74a2 2339
691a2d25 2340/**
8bd0068d 2341 * This is a wrapper function to call html sanitizing routines.
2342 *
2343 * @param $body the body of the message
2344 * @param $id the id of the message
c189a963 2345
2346 * @param $message
2347 * @param $mailbox
2348 * @param boolean $take_mailto_links When TRUE, converts mailto: links
2349 * into internal SM compose links
2350 * (optional; default = TRUE)
8bd0068d 2351 * @return a string with html safe to display in the browser.
2352 */
c189a963 2353function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links =true) {
2354
202bcbcc 2355 // require_once(SM_PATH . 'functions/url_parser.php'); // for $MailTo_PReg_Match
c189a963 2356
691a2d25 2357 global $attachment_common_show_images, $view_unsafe_images,
8bd0068d 2358 $has_unsafe_images;
691a2d25 2359 /**
8bd0068d 2360 * Don't display attached images in HTML mode.
2b665f28 2361 *
d0187bd6 2362 * SB: why?
8bd0068d 2363 */
691a2d25 2364 $attachment_common_show_images = false;
2365 $tag_list = Array(
8bd0068d 2366 false,
2367 "object",
2368 "meta",
2369 "html",
2370 "head",
2371 "base",
2372 "link",
2373 "frame",
2374 "iframe",
2375 "plaintext",
2376 "marquee"
2377 );
691a2d25 2378
2379 $rm_tags_with_content = Array(
8bd0068d 2380 "script",
2381 "applet",
2382 "embed",
2383 "title",
2384 "frameset",
0493ed11 2385 "xmp",
8bd0068d 2386 "xml"
2387 );
691a2d25 2388
2389 $self_closing_tags = Array(
8bd0068d 2390 "img",
2391 "br",
2392 "hr",
2393 "input",
2394 "outbind"
2395 );
691a2d25 2396
2dd879b8 2397 $force_tag_closing = true;
691a2d25 2398
2399 $rm_attnames = Array(
8bd0068d 2400 "/.*/" =>
2401 Array(
2402 "/target/i",
2403 "/^on.*/i",
2404 "/^dynsrc/i",
2405 "/^data.*/i",
2406 "/^lowsrc.*/i"
2407 )
2408 );
691a2d25 2409
2410 $secremoveimg = "../images/" . _("sec_remove_eng.png");
2411 $bad_attvals = Array(
8bd0068d 2412 "/.*/" =>
691a2d25 2413 Array(
0a6ec9b5 2414 "/^src|background/i" =>
8bd0068d 2415 Array(
691a2d25 2416 Array(
8bd0068d 2417 "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
2418 "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
2419 "/^([\'\"])\s*about\s*:.*([\'\"])/si"
691a2d25 2420 ),
8bd0068d 2421 Array(
2422 "\\1$secremoveimg\\2",
2423 "\\1$secremoveimg\\2",
2424 "\\1$secremoveimg\\2",
8bd0068d 2425 )
2426 ),
0a6ec9b5 2427 "/^href|action/i" =>
8bd0068d 2428 Array(
0a6ec9b5 2429 Array(
8bd0068d 2430 "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
2431 "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
2432 "/^([\'\"])\s*about\s*:.*([\'\"])/si"
0a6ec9b5 2433 ),
691a2d25 2434 Array(
8bd0068d 2435 "\\1#\\1",
2436 "\\1#\\1",
2437 "\\1#\\1"
02474e43 2438 )
8bd0068d 2439 ),
2440 "/^style/i" =>
2441 Array(
2442 Array(
5db90261 2443 "/\/\*.*\*\//",
8bd0068d 2444 "/expression/i",
2445 "/binding/i",
2446 "/behaviou*r/i",
2447 "/include-source/i",
2448 "/position\s*:\s*absolute/i",
1d935bc2 2449 "/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i",
8bd0068d 2450 "/url\s*\(\s*([\'\"])\s*\S+script\s*:.*([\'\"])\s*\)/si",
2451 "/url\s*\(\s*([\'\"])\s*mocha\s*:.*([\'\"])\s*\)/si",
2452 "/url\s*\(\s*([\'\"])\s*about\s*:.*([\'\"])\s*\)/si",
2453 "/(.*)\s*:\s*url\s*\(\s*([\'\"]*)\s*\S+script\s*:.*([\'\"]*)\s*\)/si"
2454 ),
2455 Array(
5db90261 2456 "",
8bd0068d 2457 "idiocy",
2458 "idiocy",
2459 "idiocy",
2460 "idiocy",
567dc524 2461 "idiocy",
5b4884be 2462 "url",
8bd0068d 2463 "url(\\1#\\1)",
2464 "url(\\1#\\1)",
2465 "url(\\1#\\1)",
8bd0068d 2466 "\\1:url(\\2#\\3)"
2467 )
691a2d25 2468 )
8bd0068d 2469 )
691a2d25 2470 );
5262d9a6 2471 if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) {
2dd879b8 2472 $view_unsafe_images = false;
45071bd6 2473 }
691a2d25 2474 if (!$view_unsafe_images){
2475 /**
8bd0068d 2476 * Remove any references to http/https if view_unsafe_images set
2477 * to false.
2478 */
02474e43 2479 array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0],
8bd0068d 2480 '/^([\'\"])\s*https*:.*([\'\"])/si');
02474e43 2481 array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1],
8bd0068d 2482 "\\1$secremoveimg\\1");
02474e43 2483 array_push($bad_attvals{'/.*/'}{'/^style/i'}[0],
7ef0b415 2484 '/url\([\'\"]?https?:[^\)]*[\'\"]?\)/si');
02474e43 2485 array_push($bad_attvals{'/.*/'}{'/^style/i'}[1],
8bd0068d 2486 "url(\\1$secremoveimg\\1)");
691a2d25 2487 }
451f74a2 2488
691a2d25 2489 $add_attr_to_tag = Array(
8bd0068d 2490 "/^a$/i" =>
c25f2fbb 2491 Array('target'=>'"_blank"',
02474e43 2492 'title'=>'"'._("This external link will open in a new window").'"'
8bd0068d 2493 )
2494 );
da2415c1 2495 $trusted = sq_sanitize($body,
8bd0068d 2496 $tag_list,
2497 $rm_tags_with_content,
2498 $self_closing_tags,
2499 $force_tag_closing,
2500 $rm_attnames,
2501 $bad_attvals,
2502 $add_attr_to_tag,
2503 $message,
2504 $id,
2505 $mailbox
2506 );
567dc524 2507 if (strpos($trusted,$secremoveimg)){
691a2d25 2508 $has_unsafe_images = true;
da2415c1 2509 }
c189a963 2510
2511 // we want to parse mailto's in HTML output, change to SM compose links
2512 // this is a modified version of code from url_parser.php... but Marc is
2513 // right: we need a better filtering implementation; adding this randomly
2514 // here is not a great solution
2515 //
2516 if ($take_mailto_links) {
2517 // parseUrl($trusted); // this even parses URLs inside of tags... too aggressive
2518 global $MailTo_PReg_Match;
202bcbcc 2519 $MailTo_PReg_Match = '/mailto:' . substr($MailTo_PReg_Match, 1) ;
c189a963 2520 if ((preg_match_all($MailTo_PReg_Match, $trusted, $regs)) && ($regs[0][0] != '')) {
2521 foreach ($regs[0] as $i => $mailto_before) {
2522 $mailto_params = $regs[10][$i];
2523 // get rid of any tailing quote since we have to add send_to to the end
2524 //
2525 if (substr($mailto_before, strlen($mailto_before) - 1) == '"')
2526 $mailto_before = substr($mailto_before, 0, strlen($mailto_before) - 1);
2527 if (substr($mailto_params, strlen($mailto_params) - 1) == '"')
2528 $mailto_params = substr($mailto_params, 0, strlen($mailto_params) - 1);
2529
2530 if ($regs[1][$i]) { //if there is an email addr before '?', we need to merge it with the params
2531 $to = 'to=' . $regs[1][$i];
2532 if (strpos($mailto_params, 'to=') > -1) //already a 'to='
2533 $mailto_params = str_replace('to=', $to . '%2C%20', $mailto_params);
2534 else {
2535 if ($mailto_params) //already some params, append to them
2536 $mailto_params .= '&amp;' . $to;
2537 else
2538 $mailto_params .= '?' . $to;
2539 }
2540 }
2541
2542 $url_str = preg_replace(array('/to=/i', '/(?<!b)cc=/i', '/bcc=/i'), array('send_to=', 'send_to_cc=', 'send_to_bcc='), $mailto_params);
2543
2544 // we'll already have target=_blank, no need to allow comp_in_new
2545 // here (which would be a lot more work anyway)
2546 //
2547 global $compose_new_win;
2548 $temp_comp_in_new = $compose_new_win;
2549 $compose_new_win = 0;
2550 $comp_uri = makeComposeLink('src/compose.php' . $url_str, $mailto_before);
2551 $compose_new_win = $temp_comp_in_new;
2552
2553 // remove <a href=" and anything after the next quote (we only
2554 // need the uri, not the link HTML) in compose uri
2555 //
2556 $comp_uri = substr($comp_uri, 9);
2557 $comp_uri = substr($comp_uri, 0, strpos($comp_uri, '"', 1));
2558 $trusted = str_replace($mailto_before, $comp_uri, $trusted);
2559 }
2560 }
2561 }
2562
691a2d25 2563 return $trusted;
451f74a2 2564}
a4a70693 2565
da2415c1 2566/**
8bd0068d 2567 * function SendDownloadHeaders - send file to the browser
2568 *
2569 * Original Source: SM core src/download.php
2570 * moved here to make it available to other code, and separate
2571 * front end from back end functionality.
2572 *
2573 * @param string $type0 first half of mime type
2574 * @param string $type1 second half of mime type
2575 * @param string $filename filename to tell the browser for downloaded file
2576 * @param boolean $force whether to force the download dialog to pop
2577 * @param optional integer $filesize send the Content-Header and length to the browser
2578 * @return void
2579 */
02474e43 2580function SendDownloadHeaders($type0, $type1, $filename, $force, $filesize=0) {
2581 global $languages, $squirrelmail_language;
cfffd60b 2582 $isIE = $isIE6plus = false;
02474e43 2583
2584 sqgetGlobalVar('HTTP_USER_AGENT', $HTTP_USER_AGENT, SQ_SERVER);
2585
2586 if (strstr($HTTP_USER_AGENT, 'compatible; MSIE ') !== false &&
8bd0068d 2587 strstr($HTTP_USER_AGENT, 'Opera') === false) {
cfffd60b 2588 $isIE = true;
02474e43 2589 }
2590
cfffd60b 2591 if (preg_match('/compatible; MSIE ([0-9]+)/', $HTTP_USER_AGENT, $match) &&
2592 ((int)$match[1]) >= 6 && strstr($HTTP_USER_AGENT, 'Opera') === false) {
2593 $isIE6plus = true;
02474e43 2594 }
2595
2596 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
8bd0068d 2597 function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_downloadfilename')) {
02474e43 2598 $filename =
8bd0068d 2599 call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_downloadfilename', $filename, $HTTP_USER_AGENT);
02474e43 2600 } else {
2601 $filename = ereg_replace('[\\/:\*\?"<>\|;]', '_', str_replace('&nbsp;', ' ', $filename));
2602 }
2603
2604 // A Pox on Microsoft and it's Internet Explorer!
2605 //
2606 // IE has lots of bugs with file downloads.
2607 // It also has problems with SSL. Both of these cause problems
2608 // for us in this function.
2609 //
2610 // See this article on Cache Control headers and SSL
2611 // http://support.microsoft.com/default.aspx?scid=kb;en-us;323308
2612 //
2613 // The best thing you can do for IE is to upgrade to the latest
2614 // version
2615 //set all the Cache Control Headers for IE
2616 if ($isIE) {
2617 $filename=rawurlencode($filename);
2618 header ("Pragma: public");
8bd0068d 2619 header ("Cache-Control: no-store, max-age=0, no-cache, must-revalidate"); // HTTP/1.1
02474e43 2620 header ("Cache-Control: post-check=0, pre-check=0", false);
8bd0068d 2621 header ("Cache-Control: private");
02474e43 2622
2623 //set the inline header for IE, we'll add the attachment header later if we need it
2624 header ("Content-Disposition: inline; filename=$filename");
2625 }
2626
2627 if (!$force) {
2628 // Try to show in browser window
2629 header ("Content-Disposition: inline; filename=\"$filename\"");
2630 header ("Content-Type: $type0/$type1; name=\"$filename\"");
2631 } else {
2632 // Try to pop up the "save as" box
2633
2634 // IE makes this hard. It pops up 2 save boxes, or none.
2635 // http://support.microsoft.com/support/kb/articles/Q238/5/88.ASP
2636 // http://support.microsoft.com/default.aspx?scid=kb;EN-US;260519
2637 // But, according to Microsoft, it is "RFC compliant but doesn't
2638 // take into account some deviations that allowed within the
2639 // specification." Doesn't that mean RFC non-compliant?
2640 // http://support.microsoft.com/support/kb/articles/Q258/4/52.ASP
2641
2642 // all browsers need the application/octet-stream header for this
2643 header ("Content-Type: application/octet-stream; name=\"$filename\"");
2644
2645 // http://support.microsoft.com/support/kb/articles/Q182/3/15.asp
2646 // Do not have quotes around filename, but that applied to
2647 // "attachment"... does it apply to inline too?
2648 header ("Content-Disposition: attachment; filename=\"$filename\"");
2649
cfffd60b 2650 if ($isIE && !$isIE6plus) {
02474e43 2651 // This combination seems to work mostly. IE 5.5 SP 1 has
2652 // known issues (see the Microsoft Knowledge Base)
2653
2654 // This works for most types, but doesn't work with Word files
2655 header ("Content-Type: application/download; name=\"$filename\"");
7e2ff844 2656 header ("Content-Type: application/force-download; name=\"$filename\"");
02474e43 2657 // These are spares, just in case. :-)
2658 //header("Content-Type: $type0/$type1; name=\"$filename\"");
2659 //header("Content-Type: application/x-msdownload; name=\"$filename\"");
2660 //header("Content-Type: application/octet-stream; name=\"$filename\"");
7e2ff844 2661 } else if ($isIE) {
2662 // This is to prevent IE for MIME sniffing and auto open a file in IE
2663 header ("Content-Type: application/force-download; name=\"$filename\"");
02474e43 2664 } else {
2665 // another application/octet-stream forces download for Netscape
2666 header ("Content-Type: application/octet-stream; name=\"$filename\"");
2667 }
2668 }
2669
2670 //send the content-length header if the calling function provides it
2671 if ($filesize > 0) {
2672 header("Content-Length: $filesize");
2673 }
07c49f57 2674
8d863f64 2675} // end fn SendDownloadHeaders