test
[squirrelmail.git] / functions / mime.php
CommitLineData
59177427 1<?php
2ba13803 2
35586184 3/**
8bd0068d 4 * mime.php
5 *
8bd0068d 6 * This contains the functions necessary to detect and decode MIME
7 * messages.
8 *
47ccfad4 9 * @copyright &copy; 1999-2006 The SquirrelMail Project Team
4b4abf93 10 * @license http://opensource.org/licenses/gpl-license.php GNU Public License
8bd0068d 11 * @version $Id$
12 * @package squirrelmail
13 */
b74ba498 14
202bcbcc 15/**
16 * dependency information
17 functions dependency
18 mime_structure
19 class/mime/Message.class.php
20 Message::parseStructure
21 functions/page_header.php
22 displayPageHeader
23 functions/display_messages.php
24 plain_error_message
25 mime_fetch_body
26 functions/imap_general.php
27 sqimap_run_command
28 mime_print_body_lines
29
30
31
32functions/imap.php
33functions/attachment_common.php
34functions/display_messages.php
35
36magicHtml => url_parser
37translateText => url_parser
38
39*/
40
8beafbbc 41
7c7b74b3 42/* -------------------------------------------------------------------------- */
43/* MIME DECODING */
44/* -------------------------------------------------------------------------- */
b74ba498 45
d6c32258 46/**
8bd0068d 47 * Get the MIME structure
48 *
49 * This function gets the structure of a message and stores it in the "message" class.
50 * It will return this object for use with all relevant header information and
51 * fully parsed into the standard "message" object format.
52 */
a4a70693 53function mime_structure ($bodystructure, $flags=array()) {
c9d78ab4 54
3d8371be 55 /* Isolate the body structure and remove beginning and end parenthesis. */
a4a70693 56 $read = trim(substr ($bodystructure, strpos(strtolower($bodystructure), 'bodystructure') + 13));
451f74a2 57 $read = trim(substr ($read, 0, -1));
22efa9fb 58 $i = 0;
59 $msg = Message::parseStructure($read,$i);
2b665f28 60
9de42168 61 if (!is_object($msg)) {
3d8371be 62 global $color, $mailbox;
c96c32f4 63 /* removed urldecode because $_GET is auto urldecoded ??? */
a48eba8f 64 displayPageHeader( $color, $mailbox );
5e8de8b6 65 $errormessage = _("SquirrelMail could not decode the bodystructure of the message");
8bd0068d 66 $errormessage .= '<br />'._("The bodystructure provided by your IMAP server:").'<br /><br />';
472e7acb 67 $errormessage .= '<pre>' . htmlspecialchars($read) . '</pre>';
9de42168 68 plain_error_message( $errormessage, $color );
3d8371be 69 echo '</body></html>';
9de42168 70 exit;
71 }
a4a70693 72 if (count($flags)) {
7a9e9c89 73 foreach ($flags as $flag) {
74 $char = strtoupper($flag{1});
75 switch ($char) {
3d8371be 76 case 'S':
77 if (strtolower($flag) == '\\seen') {
78 $msg->is_seen = true;
79 }
80 break;
81 case 'A':
82 if (strtolower($flag) == '\\answered') {
83 $msg->is_answered = true;
84 }
85 break;
86 case 'D':
87 if (strtolower($flag) == '\\deleted') {
88 $msg->is_deleted = true;
89 }
90 break;
91 case 'F':
92 if (strtolower($flag) == '\\flagged') {
93 $msg->is_flagged = true;
94 }
95 break;
96 case 'M':
97 if (strtolower($flag) == '$mdnsent') {
98 $msg->is_mdnsent = true;
99 }
100 break;
101 default:
102 break;
7a9e9c89 103 }
104 }
451f74a2 105 }
7a9e9c89 106 // listEntities($msg);
3d8371be 107 return $msg;
451f74a2 108}
b74ba498 109
22efa9fb 110
111
3d8371be 112/* This starts the parsing of a particular structure. It is called recursively,
8bd0068d 113 * so it can be passed different structures. It returns an object of type
114 * $message.
115 * First, it checks to see if it is a multipart message. If it is, then it
116 * handles that as it sees is necessary. If it is just a regular entity,
117 * then it parses it and adds the necessary header information (by calling out
118 * to mime_get_elements()
119 */
451f74a2 120
4d592352 121function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) {
3d8371be 122 /* Do a bit of error correction. If we couldn't find the entity id, just guess
8bd0068d 123 * that it is the first one. That is usually the case anyway.
124 */
7c7b74b3 125
09a4bde3 126 if (!$ent_id) {
08b7f7cc 127 $cmd = "FETCH $id BODY[]";
1035e159 128 } else {
08b7f7cc 129 $cmd = "FETCH $id BODY[$ent_id]";
09a4bde3 130 }
3d8371be 131
4d592352 132 if ($fetch_size!=0) $cmd .= "<0.$fetch_size>";
da2415c1 133
6201339c 134 $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, TRUE);
77b88425 135 do {
3d8371be 136 $topline = trim(array_shift($data));
137 } while($topline && ($topline[0] == '*') && !preg_match('/\* [0-9]+ FETCH.*/i', $topline)) ;
a4a70693 138
451f74a2 139 $wholemessage = implode('', $data);
140 if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
3d8371be 141 $ret = substr($wholemessage, 0, $regs[1]);
142 /* There is some information in the content info header that could be important
8bd0068d 143 * in order to parse html messages. Let's get them here.
144 */
0600bdf1 145// if ($ret{0} == '<') {
6201339c 146// $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, TRUE);
0600bdf1 147// }
451f74a2 148 } else if (ereg('"([^"]*)"', $topline, $regs)) {
149 $ret = $regs[1];
150 } else {
151 global $where, $what, $mailbox, $passed_id, $startMessage;
3d8371be 152 $par = 'mailbox=' . urlencode($mailbox) . '&amp;passed_id=' . $passed_id;
451f74a2 153 if (isset($where) && isset($what)) {
3d8371be 154 $par .= '&amp;where=' . urlencode($where) . '&amp;what=' . urlencode($what);
a3daaaf3 155 } else {
3d8371be 156 $par .= '&amp;startMessage=' . $startMessage . '&amp;show_more=0';
451f74a2 157 }
e5ea9327 158 $par .= '&amp;response=' . urlencode($response) .
8bd0068d 159 '&amp;message=' . urlencode($message) .
160 '&amp;topline=' . urlencode($topline);
a019eeb8 161
8bd0068d 162 echo '<tt><br />' .
02474e43 163 '<table width="80%"><tr>' .
164 '<tr><td colspan="2">' .
165 _("Body retrieval error. The reason for this is most probably that the message is malformed.") .
166 '</td></tr>' .
167 '<tr><td><b>' . _("Command:") . "</td><td>$cmd</td></tr>" .
168 '<tr><td><b>' . _("Response:") . "</td><td>$response</td></tr>" .
169 '<tr><td><b>' . _("Message:") . "</td><td>$message</td></tr>" .
170 '<tr><td><b>' . _("FETCH line:") . "</td><td>$topline</td></tr>" .
171 "</table><br /></tt></font><hr />";
346817d4 172
6201339c 173 $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message, TRUE);
451f74a2 174 array_shift($data);
175 $wholemessage = implode('', $data);
a019eeb8 176
346817d4 177 $ret = $wholemessage;
a3daaaf3 178 }
3d8371be 179 return $ret;
451f74a2 180}
d4467150 181
6cc670de 182function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding, $rStream='php://stdout') {
1035e159 183
3d8371be 184 /* Don't kill the connection if the browser is over a dialup
8bd0068d 185 * and it would take over 30 seconds to download it.
186 * Don't call set_time_limit in safe mode.
187 */
b7206e1d 188
3d8371be 189 if (!ini_get('safe_mode')) {
b7206e1d 190 set_time_limit(0);
191 }
7c7b74b3 192 /* in case of base64 encoded attachments, do not buffer them.
8bd0068d 193 Instead, echo the decoded attachment directly to screen */
7c7b74b3 194 if (strtolower($encoding) == 'base64') {
195 if (!$ent_id) {
7591f143 196 $query = "FETCH $id BODY[]";
7c7b74b3 197 } else {
7591f143 198 $query = "FETCH $id BODY[$ent_id]";
7c7b74b3 199 }
7591f143 200 sqimap_run_command($imap_stream,$query,true,$response,$message,TRUE,'sqimap_base64_decode',$rStream,true);
1d142b8d 201 } else {
7591f143 202 $body = mime_fetch_body ($imap_stream, $id, $ent_id);
203 if (is_resource($rStream)) {
204 fputs($rStream,decodeBody($body,$encoding));
205 } else {
206 echo decodeBody($body, $encoding);
207 }
1d142b8d 208 }
346817d4 209
da2415c1 210 /*
8bd0068d 211 TODO, use the same method for quoted printable.
212 However, I assume that quoted printable attachments aren't that large
213 so the performancegain / memory usage drop will be minimal.
214 If we decide to add that then we need to adapt sqimap_fread because
215 we need to split te result on \n and fread doesn't stop at \n. That
216 means we also should provide $results from sqimap_fread (by ref) to
217 te function and set $no_return to false. The $filter function for
218 quoted printable should handle unsetting of $results.
219 */
da2415c1 220 /*
8bd0068d 221 TODO 2: find out how we write to the output stream php://stdout. fwrite
222 doesn't work because 'php://stdout isn't a stream.
223 */
7c7b74b3 224
5d9c6f73 225 return;
451f74a2 226}
beb9e459 227
451f74a2 228/* -[ END MIME DECODING ]----------------------------------------------------------- */
d4467150 229
3d8371be 230/* This is here for debugging purposes. It will print out a list
8bd0068d 231 * of all the entity IDs that are in the $message object.
232 */
451f74a2 233function listEntities ($message) {
3d8371be 234 if ($message) {
3c621ba1 235 echo "<tt>" . $message->entity_id . ' : ' . $message->type0 . '/' . $message->type1 . ' parent = '. $message->parent->entity_id. '<br />';
3d8371be 236 for ($i = 0; isset($message->entities[$i]); $i++) {
237 echo "$i : ";
238 $msg = listEntities($message->entities[$i]);
239
240 if ($msg) {
241 echo "return: ";
242 return $msg;
243 }
244 }
a4a70693 245 }
451f74a2 246}
f0c4dc12 247
f792c641 248function getPriorityStr($priority) {
3d8371be 249 $priority_level = substr($priority,0,1);
250
251 switch($priority_level) {
252 /* Check for a higher then normal priority. */
253 case '1':
254 case '2':
255 $priority_string = _("High");
256 break;
257
258 /* Check for a lower then normal priority. */
259 case '4':
260 case '5':
261 $priority_string = _("Low");
262 break;
263
264 /* Check for a normal priority. */
265 case '3':
266 default:
267 $priority_level = '3';
268 $priority_string = _("Normal");
269 break;
270
271 }
272 return $priority_string;
f792c641 273}
274
451f74a2 275/* returns a $message object for a particular entity id */
276function getEntity ($message, $ent_id) {
a4a70693 277 return $message->getEntity($ent_id);
451f74a2 278}
8beafbbc 279
3d8371be 280/* translateText
8bd0068d 281 * Extracted from strings.php 23/03/2002
282 */
da4c66e8 283
284function translateText(&$body, $wrap_at, $charset) {
3d8371be 285 global $where, $what; /* from searching */
286 global $color; /* color theme */
da4c66e8 287
202bcbcc 288 // require_once(SM_PATH . 'functions/url_parser.php');
da4c66e8 289
290 $body_ary = explode("\n", $body);
da4c66e8 291 for ($i=0; $i < count($body_ary); $i++) {
292 $line = $body_ary[$i];
293 if (strlen($line) - 2 >= $wrap_at) {
c7aff938 294 sqWordWrap($line, $wrap_at, $charset);
da4c66e8 295 }
296 $line = charset_decode($charset, $line);
297 $line = str_replace("\t", ' ', $line);
298
299 parseUrl ($line);
300
3d8371be 301 $quotes = 0;
da4c66e8 302 $pos = 0;
3d8371be 303 $j = strlen($line);
da4c66e8 304
3d8371be 305 while ($pos < $j) {
da4c66e8 306 if ($line[$pos] == ' ') {
3d8371be 307 $pos++;
da4c66e8 308 } else if (strpos($line, '&gt;', $pos) === $pos) {
309 $pos += 4;
3d8371be 310 $quotes++;
da4c66e8 311 } else {
312 break;
313 }
314 }
3d8371be 315
83c94382 316 if ($quotes % 2) {
d0814c02 317 $line = '<span class="quote1">' . $line . '</span>';
4c25967c 318 } elseif ($quotes) {
d0814c02 319 $line = '<span class="quote2">' . $line . '</span>';
da4c66e8 320 }
3d8371be 321
da4c66e8 322 $body_ary[$i] = $line;
323 }
324 $body = '<pre>' . implode("\n", $body_ary) . '</pre>';
325}
326
a2bfcbce 327/**
da1b55ad 328 * This returns a parsed string called $body. That string can then
329 * be displayed as the actual message in the HTML. It contains
330 * everything needed, including HTML Tags, Attachments at the
331 * bottom, etc.
f8a1ed5a 332 *
da1b55ad 333 * Since 1.2.0 function uses message_body hook.
334 * Till 1.3.0 function included output of formatAttachments().
335 *
336 * @param resource $imap_stream imap connection resource
337 * @param object $message squirrelmail message object
338 * @param array $color squirrelmail color theme array
339 * @param integer $wrap_at number of characters per line
340 * @param string $ent_num (since 1.3.0) message part id
341 * @param integer $id (since 1.3.0) message id
342 * @param string $mailbox (since 1.3.0) imap folder name
41f701c1 343 * @param boolean $clean (since 1.5.1) Do not output stuff that's irrelevant for the printable version.
da1b55ad 344 * @return string html formated message text
345 */
a2bfcbce 346function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $mailbox='INBOX', $clean=FALSE) {
3d8371be 347 /* This if statement checks for the entity to show as the
8bd0068d 348 * primary message. To add more of them, just put them in the
349 * order that is their priority.
350 */
ce68b76b 351 global $startMessage, $languages, $squirrelmail_language,
40a34e57 352 $show_html_default, $sort, $has_unsafe_images, $passed_ent_id,
14c85e39 353 $use_iframe, $iframe_height, $download_and_unsafe_link,
354 $download_href, $unsafe_image_toggle_href, $unsafe_image_toggle_text;
2c25d36a 355
356 // workaround for not updated config.php
357 if (! isset($use_iframe)) $use_iframe = false;
77bfbd2e 358
5262d9a6 359 if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) {
360 $view_unsafe_images = false;
77bfbd2e 361 }
d03c24f4 362
cc34b00d 363 $body = '';
23bcec6f 364 $urlmailbox = urlencode($mailbox);
451f74a2 365 $body_message = getEntity($message, $ent_num);
366 if (($body_message->header->type0 == 'text') ||
8bd0068d 367 ($body_message->header->type0 == 'rfc822')) {
3d8371be 368 $body = mime_fetch_body ($imap_stream, $id, $ent_num);
451f74a2 369 $body = decodeBody($body, $body_message->header->encoding);
e842b215 370
371 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
8bd0068d 372 function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode')) {
e842b215 373 if (mb_detect_encoding($body) != 'ASCII') {
33a55f5a 374 $body = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode',$body);
e842b215 375 }
376 }
d849b570 377
378 /* As of 1.5.2, $body is passed (and modified) by reference */
379 do_hook('message_body', $body);
23bcec6f 380
3d8371be 381 /* If there are other types that shouldn't be formatted, add
8bd0068d 382 * them here.
383 */
3d8371be 384
451f74a2 385 if ($body_message->header->type1 == 'html') {
3d8371be 386 if ($show_html_default <> 1) {
85015544 387 $entity_conv = array('&nbsp;' => ' ',
8bd0068d 388 '<p>' => "\n",
389 '<P>' => "\n",
390 '<br>' => "\n",
391 '<BR>' => "\n",
392 '<br />' => "\n",
393 '<BR />' => "\n",
394 '&gt;' => '>',
395 '&lt;' => '<');
85015544 396 $body = strtr($body, $entity_conv);
3d8371be 397 $body = strip_tags($body);
85015544 398 $body = trim($body);
399 translateText($body, $wrap_at,
8bd0068d 400 $body_message->header->getParameter('charset'));
2c25d36a 401 } elseif ($use_iframe && ! $clean) {
402 // $clean is used to remove iframe in printable view.
403
84410f31 404 /**
405 * If we don't add html message between iframe tags,
406 * we must detect unsafe images and modify $has_unsafe_images.
f8a1ed5a 407 */
758a7889 408 $html_body = magicHTML($body, $id, $message, $mailbox);
b6c52e61 409 // Convert character set in order to display html mails in different character set
410 $html_body = charset_decode($body_message->header->getParameter('charset'),$html_body,false,true);
84410f31 411
2c25d36a 412 // creating iframe url
413 $iframeurl=sqm_baseuri().'src/view_html.php?'
f8a1ed5a 414 . 'mailbox=' . $urlmailbox
2c25d36a 415 . '&amp;passed_id=' . $id
416 . '&amp;ent_id=' . $ent_num
417 . '&amp;view_unsafe_images=' . (int) $view_unsafe_images;
418
79d58d4c 419 global $oTemplate;
420 $oTemplate->assign('iframe_url', $iframeurl);
421 $oTemplate->assign('html_body', $html_body);
2b665f28 422
79d58d4c 423 $body = $oTemplate->fetch('read_html_iframe.tpl');
a3daaaf3 424 } else {
2c25d36a 425 // old way of html rendering
f8a1ed5a 426 $body = magicHTML($body, $id, $message, $mailbox);
b6c52e61 427 /**
758a7889 428 * convert character set. charset_decode does not remove html special chars
b6c52e61 429 * applied by magicHTML functions and does not sanitize them second time if
758a7889 430 * fourth argument is true.
431 */
b6c52e61 432 $body = charset_decode($body_message->header->getParameter('charset'),$body,false,true);
a3daaaf3 433 }
451f74a2 434 } else {
3d8371be 435 translateText($body, $wrap_at,
8bd0068d 436 $body_message->header->getParameter('charset'));
451f74a2 437 }
a2bfcbce 438
439 // if this is the clean display (i.e. printer friendly), stop here.
440 if ( $clean ) {
441 return $body;
442 }
443
40a34e57 444 $download_and_unsafe_link = '';
445
83cf04bd 446 $link = 'passed_id=' . $id . '&amp;ent_id='.$ent_num.
8bd0068d 447 '&amp;mailbox=' . $urlmailbox .'&amp;sort=' . $sort .
448 '&amp;startMessage=' . $startMessage . '&amp;show_more=0';
08b7f7cc 449 if (isset($passed_ent_id)) {
450 $link .= '&amp;passed_ent_id='.$passed_ent_id;
451 }
14c85e39 452 $download_href = SM_PATH . 'src/download.php?absolute_dl=true&amp;' . $link;
453 $download_and_unsafe_link .= '&nbsp;|&nbsp;<a href="'. $download_href .'">' . _("Download this as a file") . '</a>';
7aad7b77 454 if ($view_unsafe_images) {
23f617b8 455 $text = _("Hide Unsafe Images");
7aad7b77 456 } else {
08b7f7cc 457 if (isset($has_unsafe_images) && $has_unsafe_images) {
458 $link .= '&amp;view_unsafe_images=1';
459 $text = _("View Unsafe Images");
460 } else {
461 $text = '';
462 }
3d8371be 463 }
83cf04bd 464 if($text != '') {
14c85e39 465 $unsafe_image_toggle_href = SM_PATH . 'src/read_body.php?'.$link;
466 $unsafe_image_toggle_text = $text;
467 $download_and_unsafe_link .= '&nbsp;|&nbsp;<a href="'. $unsafe_image_toggle_href .'">' . $text . '</a>';
83cf04bd 468 }
3d8371be 469 }
470 return $body;
451f74a2 471}
b74ba498 472
da1b55ad 473/**
d67f519a 474 * Generate attachments array for passing to templates. Separated from
475 * formatAttachments() below so that the same array can be given to the
476 * print-friendly version.
2b665f28 477 *
d67f519a 478 * @since 1.5.2
da1b55ad 479 * @param object $message SquirrelMail message object
480 * @param array $exclude_id message parts that are not attachments.
481 * @param string $mailbox mailbox name
482 * @param integer $id message id
da1b55ad 483 */
d67f519a 484function buildAttachmentArray($message, $exclude_id, $mailbox, $id) {
485 global $where, $what, $startMessage, $color, $passed_ent_id, $base_uri;
451f74a2 486
23bcec6f 487 $att_ar = $message->getAttachments($exclude_id);
23bcec6f 488 $urlMailbox = urlencode($mailbox);
489
d67f519a 490 $attachments = array();
23bcec6f 491 foreach ($att_ar as $att) {
fdc9d9b5 492 $ent = $att->entity_id;
2e25760a 493 $header = $att->header;
f0c4dc12 494 $type0 = strtolower($header->type0);
495 $type1 = strtolower($header->type1);
2e25760a 496 $name = '';
d0187bd6 497 $links = array();
21dab2dc 498 $links['download link']['text'] = _("Download");
202bcbcc 499 $links['download link']['href'] = $base_uri .
8bd0068d 500 "src/download.php?absolute_dl=true&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;ent_id=$ent";
2b665f28 501
2e25760a 502 if ($type0 =='message' && $type1 == 'rfc822') {
202bcbcc 503 $default_page = $base_uri . 'src/read_body.php';
2e25760a 504 $rfc822_header = $att->rfc822_header;
098ea084 505 $filename = $rfc822_header->subject;
6cc08d8b 506 if (trim( $filename ) == '') {
507 $filename = 'untitled-[' . $ent . ']' ;
08b7f7cc 508 }
2e25760a 509 $from_o = $rfc822_header->from;
510 if (is_object($from_o)) {
04ea844e 511 $from_name = decodeHeader($from_o->getAddress(false));
7e697748 512 } elseif (is_array($from_o) && count($from_o) && is_object($from_o[0])) {
513 // something weird happens when a digest message is opened and you return to the digest
514 // now the from object is part of an array. Probably the parseHeader call overwrites the info
515 // retrieved from the bodystructure in a different way. We need to fix this later.
516 // possible starting point, do not fetch header we already have and inspect how
517 // the rfc822_header object behaves.
518 $from_name = decodeHeader($from_o[0]->getAddress(false));
2e25760a 519 } else {
520 $from_name = _("Unknown sender");
f0c4dc12 521 }
d0187bd6 522 $description = _("From").': '.$from_name;
23bcec6f 523 } else {
202bcbcc 524 $default_page = $base_uri . 'src/download.php';
02474e43 525 $filename = $att->getFilename();
f810c0b2 526 if ($header->description) {
098ea084 527 $description = decodeHeader($header->description);
f810c0b2 528 } else {
3d8371be 529 $description = '';
530 }
2e25760a 531 }
532
533 $display_filename = $filename;
3d8371be 534 if (isset($passed_ent_id)) {
535 $passed_ent_id_link = '&amp;passed_ent_id='.$passed_ent_id;
536 } else {
537 $passed_ent_id_link = '';
538 }
539 $defaultlink = $default_page . "?startMessage=$startMessage"
8bd0068d 540 . "&amp;passed_id=$id&amp;mailbox=$urlMailbox"
541 . '&amp;ent_id='.$ent.$passed_ent_id_link;
2e25760a 542 if ($where && $what) {
8bd0068d 543 $defaultlink .= '&amp;where='. urlencode($where).'&amp;what='.urlencode($what);
2e25760a 544 }
7e2ff844 545 // IE does make use of mime content sniffing. Forcing a download
546 // prohibit execution of XSS inside an application/octet-stream attachment
547 if ($type0 == 'application' && $type1 == 'octet-stream') {
548 $defaultlink .= '&amp;absolute_dl=true';
549 }
2b665f28 550
3d8371be 551 /* This executes the attachment hook with a specific MIME-type.
8bd0068d 552 * If that doesn't have results, it tries if there's a rule
9b94c54d 553 * for a more generic type. Finally, a hook for ALL attachment
554 * types is run as well.
8bd0068d 555 */
d849b570 556 /* The API for this hook has changed as of 1.5.2 so that all plugin
557 arguments are passed in an array instead of each their own plugin
558 argument, and arguments are passed by reference, so instead of
559 returning any changes, changes should simply be made to the original
560 arguments themselves. */
561 do_hook("attachment $type0/$type1", $temp=array(&$links,
562 &$startMessage, &$id, &$urlMailbox, &$ent, &$defaultlink,
563 &$display_filename, &$where, &$what));
564 if(count($links) <= 1) {
565 /* The API for this hook has changed as of 1.5.2 so that all plugin
566 arguments are passed in an array instead of each their own plugin
567 argument, and arguments are passed by reference, so instead of
568 returning any changes, changes should simply be made to the original
569 arguments themselves. */
570 do_hook("attachment $type0/*", $temp=array(&$links,
571 &$startMessage, &$id, &$urlMailbox, &$ent, &$defaultlink,
572 &$display_filename, &$where, &$what));
2e25760a 573 }
d849b570 574 /* The API for this hook has changed as of 1.5.2 so that all plugin
575 arguments are passed in an array instead of each their own plugin
576 argument, and arguments are passed by reference, so instead of
577 returning any changes, changes should simply be made to the original
578 arguments themselves. */
579 do_hook("attachment */*", $temp=array(&$links,
580 &$startMessage, &$id, &$urlMailbox, &$ent, &$defaultlink,
581 &$display_filename, &$where, &$what));
77b88425 582
d67f519a 583 $this_attachment = array();
584 $this_attachment['Name'] = decodeHeader($display_filename);
585 $this_attachment['Description'] = $description;
586 $this_attachment['DefaultHREF'] = $defaultlink;
587 $this_attachment['DownloadHREF'] = $links['download link']['href'];
588 $this_attachment['ViewHREF'] = isset($links['attachment_common']) ? $links['attachment_common']['href'] : '';
589 $this_attachment['Size'] = $header->size;
590 $this_attachment['ContentType'] = htmlspecialchars($type0 .'/'. $type1);
591 $this_attachment['OtherLinks'] = array();
3d8371be 592 foreach ($links as $val) {
d0187bd6 593 if ($val['text']==_("Download") || $val['text'] == _("View"))
594 continue;
595 if (empty($val['text']) && empty($val['extra']))
596 continue;
2b665f28 597
d67f519a 598 $temp = array();
599 $temp['HREF'] = $val['href'];
600 $temp['Text'] = (empty($val['text']) ? '' : $val['text']) . (empty($val['extra']) ? '' : $val['extra']);
601 $this_attachment['OtherLinks'][] = $temp;
2e25760a 602 }
d67f519a 603 $attachments[] = $this_attachment;
2b665f28 604
3d8371be 605 unset($links);
2e25760a 606 }
2b665f28 607
d67f519a 608 return $attachments;
609}
610
611/**
612 * Displays attachment links and information
613 *
614 * Since 1.3.0 function is not included in formatBody() call.
615 *
616 * Since 1.0.2 uses attachment $type0/$type1 hook.
617 * Since 1.2.5 uses attachment $type0/* hook.
618 * Since 1.5.0 uses attachments_bottom hook.
619 * Since 1.5.2 uses templates and does *not* return a value.
620 *
621 * @param object $message SquirrelMail message object
622 * @param array $exclude_id message parts that are not attachments.
623 * @param string $mailbox mailbox name
624 * @param integer $id message id
625 */
626function formatAttachments($message, $exclude_id, $mailbox, $id) {
627 global $oTemplate;
2b665f28 628
d67f519a 629 $attach = buildAttachmentArray($message, $exclude_id, $mailbox, $id);
d0187bd6 630
631 $oTemplate->assign('attachments', $attach);
632 $oTemplate->display('read_attachments.tpl');
451f74a2 633}
b74ba498 634
7c7b74b3 635function sqimap_base64_decode(&$string) {
7c0ec1d8 636
b17a8968 637 // Base64 encoded data goes in pairs of 4 bytes. To achieve on the
7c0ec1d8 638 // fly decoding (to reduce memory usage) you have to check if the
639 // data has incomplete pairs
640
b17a8968 641 // Remove the noise in order to check if the 4 bytes pairs are complete
7c0ec1d8 642 $string = str_replace(array("\r\n","\n", "\r", " "),array('','','',''),$string);
643
42ce44f8 644 $sStringRem = '';
7c0ec1d8 645 $iMod = strlen($string) % 4;
646 if ($iMod) {
647 $sStringRem = substr($string,-$iMod);
b17a8968 648 // Check if $sStringRem contains padding characters
7c0ec1d8 649 if (substr($sStringRem,-1) != '=') {
650 $string = substr($string,0,-$iMod);
651 } else {
652 $sStringRem = '';
653 }
654 }
7c7b74b3 655 $string = base64_decode($string);
7c0ec1d8 656 return $sStringRem;
7c7b74b3 657}
658
fdf7cef1 659/**
660 * Decodes encoded message body
661 *
662 * This function decodes the body depending on the encoding type.
663 * Currently quoted-printable and base64 encodings are supported.
664 * decode_body hook was added to this function in 1.4.2/1.5.0
665 * @param string $body encoded message body
666 * @param string $encoding used encoding
667 * @return string decoded string
668 * @since 1.0
669 */
451f74a2 670function decodeBody($body, $encoding) {
83be314a 671
b583c3e8 672 $body = str_replace("\r\n", "\n", $body);
673 $encoding = strtolower($encoding);
3d8371be 674
d849b570 675 $encoding_handler = do_hook('decode_body', $encoding);
5166f86a 676
677
678 // plugins get first shot at decoding the body
679 //
680 if (!empty($encoding_handler) && function_exists($encoding_handler)) {
681 $body = $encoding_handler('decode', $body);
682
fdf7cef1 683 } elseif ($encoding == 'quoted-printable' ||
8bd0068d 684 $encoding == 'quoted_printable') {
fdf7cef1 685 /**
686 * quoted_printable_decode() function is broken in older
687 * php versions. Text with \r\n decoding was fixed only
758a7889 688 * in php 4.3.0. Minimal code requirement 4.0.4 +
fdf7cef1 689 * str_replace("\r\n", "\n", $body); call.
690 */
b583c3e8 691 $body = quoted_printable_decode($body);
fdf7cef1 692 } elseif ($encoding == 'base64') {
b583c3e8 693 $body = base64_decode($body);
694 }
3d8371be 695
b583c3e8 696 // All other encodings are returned raw.
3d8371be 697 return $body;
451f74a2 698}
699
9f7f68c3 700/**
8bd0068d 701 * Decodes headers
702 *
703 * This functions decode strings that is encoded according to
704 * RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text).
705 * Patched by Christian Schmidt <christian@ostenfeld.dk> 23/03/2002
706 *
707 * @param string $string header string that has to be made readable
708 * @param boolean $utfencode change message in order to be readable on user's charset. defaults to true
709 * @param boolean $htmlsave preserve spaces and sanitize html special characters. defaults to true
710 * @param boolean $decide decide if string can be utfencoded. defaults to false
711 * @return string decoded header string
712 */
9f7f68c3 713function decodeHeader ($string, $utfencode=true,$htmlsave=true,$decide=false) {
d6f584fc 714 global $languages, $squirrelmail_language,$default_charset;
79e07c7e 715 if (is_array($string)) {
716 $string = implode("\n", $string);
717 }
da2415c1 718
10dec454 719 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
8bd0068d 720 function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_decodeheader')) {
33a55f5a 721 $string = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_decodeheader', $string);
08b7f7cc 722 // Do we need to return at this point?
723 // return $string;
83be314a 724 }
79e07c7e 725 $i = 0;
08b7f7cc 726 $iLastMatch = -2;
db65b6b0 727 $encoded = true;
0a06275a 728
098ea084 729 $aString = explode(' ',$string);
08b7f7cc 730 $ret = '';
098ea084 731 foreach ($aString as $chunk) {
358a78a1 732 if ($encoded && $chunk === '') {
08b7f7cc 733 continue;
358a78a1 734 } elseif ($chunk === '') {
08b7f7cc 735 $ret .= ' ';
736 continue;
737 }
098ea084 738 $encoded = false;
08b7f7cc 739 /* if encoded words are not separated by a linear-space-white we still catch them */
740 $j = $i-1;
7e6ca3e8 741
08b7f7cc 742 while ($match = preg_match('/^(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=(.*)$/Ui',$chunk,$res)) {
743 /* if the last chunk isn't an encoded string then put back the space, otherwise don't */
744 if ($iLastMatch !== $j) {
745 if ($htmlsave) {
9f7f68c3 746 $ret .= '&#32;';
08b7f7cc 747 } else {
748 $ret .= ' ';
749 }
750 }
751 $iLastMatch = $i;
752 $j = $i;
cb718de0 753 if ($htmlsave) {
754 $ret .= htmlspecialchars($res[1]);
755 } else {
756 $ret .= $res[1];
757 }
098ea084 758 $encoding = ucfirst($res[3]);
d6f584fc 759
760 /* decide about valid decoding */
761 if ($decide && is_conversion_safe($res[2])) {
8bd0068d 762 $utfencode=true;
763 $can_be_encoded=true;
d6f584fc 764 } else {
8bd0068d 765 $can_be_encoded=false;
d6f584fc 766 }
098ea084 767 switch ($encoding)
768 {
8bd0068d 769 case 'B':
770 $replace = base64_decode($res[4]);
771 if ($utfencode) {
772 if ($can_be_encoded) {
773 /* convert string to different charset,
774 * if functions asks for it (usually in compose)
775 */
bc6c0fba 776 $ret .= charset_convert($res[2],$replace,$default_charset,$htmlsave);
8bd0068d 777 } else {
778 // convert string to html codes in order to display it
779 $ret .= charset_decode($res[2],$replace);
780 }
fab65ca9 781 } else {
8bd0068d 782 if ($htmlsave) {
783 $replace = htmlspecialchars($replace);
784 }
785 $ret.= $replace;
fab65ca9 786 }
8bd0068d 787 break;
788 case 'Q':
789 $replace = str_replace('_', ' ', $res[4]);
790 $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))',
791 $replace);
792 if ($utfencode) {
793 if ($can_be_encoded) {
794 /* convert string to different charset,
795 * if functions asks for it (usually in compose)
796 */
bc6c0fba 797 $replace = charset_convert($res[2], $replace,$default_charset,$htmlsave);
8bd0068d 798 } else {
799 // convert string to html codes in order to display it
800 $replace = charset_decode($res[2], $replace);
801 }
802 } else {
803 if ($htmlsave) {
804 $replace = htmlspecialchars($replace);
805 }
098ea084 806 }
8bd0068d 807 $ret .= $replace;
808 break;
809 default:
810 break;
79e07c7e 811 }
098ea084 812 $chunk = $res[5];
813 $encoded = true;
08b7f7cc 814 }
815 if (!$encoded) {
816 if ($htmlsave) {
9f7f68c3 817 $ret .= '&#32;';
08b7f7cc 818 } else {
819 $ret .= ' ';
da2415c1 820 }
08b7f7cc 821 }
dc3d13a7 822
823 if (!$encoded && $htmlsave) {
824 $ret .= htmlspecialchars($chunk);
825 } else {
826 $ret .= $chunk;
827 }
098ea084 828 ++$i;
829 }
fd81e884 830 /* remove the first added space */
831 if ($ret) {
832 if ($htmlsave) {
9f7f68c3 833 $ret = substr($ret,5);
fd81e884 834 } else {
835 $ret = substr($ret,1);
836 }
837 }
da2415c1 838
08b7f7cc 839 return $ret;
451f74a2 840}
841
9f7f68c3 842/**
a24cf710 843 * Encodes header
8bd0068d 844 *
a24cf710 845 * Function uses XTRA_CODE _encodeheader function, if such function exists.
a24cf710 846 *
758a7889 847 * Function uses Q encoding by default and encodes a string according to RFC
848 * 1522 for use in headers if it contains 8-bit characters or anything that
a24cf710 849 * looks like it should be encoded.
8bd0068d 850 *
758a7889 851 * Function switches to B encoding and encodeHeaderBase64() function, if
852 * string is 8bit and multibyte character set supported by mbstring extension
853 * is used. It can cause E_USER_NOTICE errors, if interface is used with
f270a6eb 854 * multibyte character set unsupported by mbstring extension.
855 *
8bd0068d 856 * @param string $string header string, that has to be encoded
857 * @return string quoted-printable encoded string
f270a6eb 858 * @todo make $mb_charsets system wide constant
8bd0068d 859 */
451f74a2 860function encodeHeader ($string) {
6fbd125b 861 global $default_charset, $languages, $squirrelmail_language;
83be314a 862
10dec454 863 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
8bd0068d 864 function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_encodeheader')) {
33a55f5a 865 return call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_encodeheader', $string);
83be314a 866 }
793cc001 867
a24cf710 868 // Use B encoding for multibyte charsets
f270a6eb 869 $mb_charsets = array('utf-8','big5','gb2313','euc-kr');
870 if (in_array($default_charset,$mb_charsets) &&
871 in_array($default_charset,sq_mb_list_encodings()) &&
872 sq_is8bit($string)) {
873 return encodeHeaderBase64($string,$default_charset);
874 } elseif (in_array($default_charset,$mb_charsets) &&
875 sq_is8bit($string) &&
876 ! in_array($default_charset,sq_mb_list_encodings())) {
877 // Add E_USER_NOTICE error here (can cause 'Cannot add header information' warning in compose.php)
878 // trigger_error('encodeHeader: Multibyte character set unsupported by mbstring extension.',E_USER_NOTICE);
879 }
a24cf710 880
451f74a2 881 // Encode only if the string contains 8-bit characters or =?
3d8371be 882 $j = strlen($string);
098ea084 883 $max_l = 75 - strlen($default_charset) - 7;
884 $aRet = array();
451f74a2 885 $ret = '';
c96c32f4 886 $iEncStart = $enc_init = false;
0d53f0f9 887 $cur_l = $iOffset = 0;
3d8371be 888 for($i = 0; $i < $j; ++$i) {
c96c32f4 889 switch($string{$i})
890 {
8bd0068d 891 case '=':
892 case '<':
893 case '>':
894 case ',':
895 case '?':
896 case '_':
897 if ($iEncStart === false) {
898 $iEncStart = $i;
899 }
900 $cur_l+=3;
901 if ($cur_l > ($max_l-2)) {
902 /* if there is an stringpart that doesn't need encoding, add it */
08b7f7cc 903 $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
c96c32f4 904 $aRet[] = "=?$default_charset?Q?$ret?=";
905 $iOffset = $i;
906 $cur_l = 0;
907 $ret = '';
908 $iEncStart = false;
08b7f7cc 909 } else {
8bd0068d 910 $ret .= sprintf("=%02X",ord($string{$i}));
c96c32f4 911 }
8bd0068d 912 break;
913 case '(':
914 case ')':
915 if ($iEncStart !== false) {
08b7f7cc 916 $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
8bd0068d 917 $aRet[] = "=?$default_charset?Q?$ret?=";
c96c32f4 918 $iOffset = $i;
8bd0068d 919 $cur_l = 0;
920 $ret = '';
921 $iEncStart = false;
c96c32f4 922 }
8bd0068d 923 break;
924 case ' ':
c96c32f4 925 if ($iEncStart !== false) {
098ea084 926 $cur_l++;
927 if ($cur_l > $max_l) {
08b7f7cc 928 $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
c96c32f4 929 $aRet[] = "=?$default_charset?Q?$ret?=";
c96c32f4 930 $iOffset = $i;
931 $cur_l = 0;
098ea084 932 $ret = '';
8bd0068d 933 $iEncStart = false;
08b7f7cc 934 } else {
8bd0068d 935 $ret .= '_';
c96c32f4 936 }
3d8371be 937 }
8bd0068d 938 break;
939 default:
940 $k = ord($string{$i});
941 if ($k > 126) {
942 if ($iEncStart === false) {
943 // do not start encoding in the middle of a string, also take the rest of the word.
944 $sLeadString = substr($string,0,$i);
945 $aLeadString = explode(' ',$sLeadString);
946 $sToBeEncoded = array_pop($aLeadString);
947 $iEncStart = $i - strlen($sToBeEncoded);
948 $ret .= $sToBeEncoded;
949 $cur_l += strlen($sToBeEncoded);
950 }
951 $cur_l += 3;
952 /* first we add the encoded string that reached it's max size */
953 if ($cur_l > ($max_l-2)) {
954 $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
955 $aRet[] = "=?$default_charset?Q?$ret?= "; /* the next part is also encoded => separate by space */
956 $cur_l = 3;
957 $ret = '';
958 $iOffset = $i;
959 $iEncStart = $i;
960 }
961 $enc_init = true;
962 $ret .= sprintf("=%02X", $k);
963 } else {
964 if ($iEncStart !== false) {
965 $cur_l++;
966 if ($cur_l > $max_l) {
967 $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
968 $aRet[] = "=?$default_charset?Q?$ret?=";
969 $iEncStart = false;
970 $iOffset = $i;
971 $cur_l = 0;
972 $ret = '';
973 } else {
974 $ret .= $string{$i};
975 }
976 }
977 }
978 break;
f7b3ba37 979 }
451f74a2 980 }
793cc001 981
c96c32f4 982 if ($enc_init) {
983 if ($iEncStart !== false) {
984 $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
985 $aRet[] = "=?$default_charset?Q?$ret?=";
986 } else {
987 $aRet[] = substr($string,$iOffset);
988 }
989 $string = implode('',$aRet);
451f74a2 990 }
3d8371be 991 return $string;
451f74a2 992}
b74ba498 993
f270a6eb 994/**
995 * Encodes string according to rfc2047 B encoding header formating rules
996 *
758a7889 997 * It is recommended way to encode headers with character sets that store
f270a6eb 998 * symbols in more than one byte.
999 *
1000 * Function requires mbstring support. If required mbstring functions are missing,
1001 * function returns false and sets E_USER_WARNING level error message.
1002 *
758a7889 1003 * Minimal requirements - php 4.0.6 with mbstring extension. Please note,
1004 * that mbstring functions will generate E_WARNING errors, if unsupported
f270a6eb 1005 * character set is used. mb_encode_mimeheader function provided by php
1006 * mbstring extension is not used in order to get better control of header
1007 * encoding.
1008 *
758a7889 1009 * Used php code functions - function_exists(), trigger_error(), strlen()
1010 * (is used with charset names and base64 strings). Used php mbstring
f270a6eb 1011 * functions - mb_strlen and mb_substr.
1012 *
758a7889 1013 * Related documents: rfc 2045 (BASE64 encoding), rfc 2047 (mime header
f270a6eb 1014 * encoding), rfc 2822 (header folding)
1015 *
1016 * @param string $string header string that must be encoded
758a7889 1017 * @param string $charset character set. Must be supported by mbstring extension.
f270a6eb 1018 * Use sq_mb_list_encodings() to detect supported charsets.
1019 * @return string string encoded according to rfc2047 B encoding formating rules
1020 * @since 1.5.1
1021 * @todo First header line can be wrapped to $iMaxLength - $HeaderFieldLength - 1
1022 * @todo Do we want to control max length of header?
1023 * @todo Do we want to control EOL (end-of-line) marker?
1024 * @todo Do we want to translate error message?
1025 */
1026function encodeHeaderBase64($string,$charset) {
1027 /**
1028 * Check mbstring function requirements.
1029 */
1030 if (! function_exists('mb_strlen') ||
1031 ! function_exists('mb_substr')) {
1032 // set E_USER_WARNING
1033 trigger_error('encodeHeaderBase64: Required mbstring functions are missing.',E_USER_WARNING);
1034 // return false
1035 return false;
1036 }
1037
1038 // initial return array
1039 $aRet = array();
1040
1041 /**
1042 * header length = 75 symbols max (same as in encodeHeader)
1043 * remove $charset length
1044 * remove =? ? ?= (5 chars)
1045 * remove 2 more chars (\r\n ?)
1046 */
1047 $iMaxLength = 75 - strlen($charset) - 7;
1048
1049 // set first character position
1050 $iStartCharNum = 0;
1051
1052 // loop through all characters. count characters and not bytes.
1053 for ($iCharNum=1; $iCharNum<=mb_strlen($string,$charset); $iCharNum++) {
1054 // encode string from starting character to current character.
1055 $encoded_string = base64_encode(mb_substr($string,$iStartCharNum,$iCharNum-$iStartCharNum,$charset));
1056
1057 // Check encoded string length
1058 if(strlen($encoded_string)>$iMaxLength) {
1059 // if string exceeds max length, reduce number of encoded characters and add encoded string part to array
1060 $aRet[] = base64_encode(mb_substr($string,$iStartCharNum,$iCharNum-$iStartCharNum-1,$charset));
1061
1062 // set new starting character
1063 $iStartCharNum = $iCharNum-1;
1064
1065 // encode last char (in case it is last character in string)
1066 $encoded_string = base64_encode(mb_substr($string,$iStartCharNum,$iCharNum-$iStartCharNum,$charset));
1067 } // if string is shorter than max length - add next character
1068 }
1069
1070 // add last encoded string to array
1071 $aRet[] = $encoded_string;
1072
1073 // set initial return string
1074 $sRet = '';
1075
1076 // loop through encoded strings
1077 foreach($aRet as $string) {
1078 // TODO: Do we want to control EOL (end-of-line) marker
1079 if ($sRet!='') $sRet.= " ";
1080
1081 // add header tags and encoded string to return string
1082 $sRet.= '=?'.$charset.'?B?'.$string.'?=';
1083 }
1084
1085 return $sRet;
1086}
1087
691a2d25 1088/* This function trys to locate the entity_id of a specific mime element */
3d8371be 1089function find_ent_id($id, $message) {
a171b359 1090 for ($i = 0, $ret = ''; $ret == '' && $i < count($message->entities); $i++) {
1091 if ($message->entities[$i]->header->type0 == 'multipart') {
3d8371be 1092 $ret = find_ent_id($id, $message->entities[$i]);
451f74a2 1093 } else {
3d8371be 1094 if (strcasecmp($message->entities[$i]->header->id, $id) == 0) {
d8cffbab 1095// if (sq_check_save_extension($message->entities[$i])) {
8bd0068d 1096 return $message->entities[$i]->entity_id;
da2415c1 1097// }
c8f5f606 1098 } elseif (!empty($message->entities[$i]->header->parameters['name'])) {
1099 /**
1100 * This is part of a fix for Outlook Express 6.x generating
1101 * cid URLs without creating content-id headers
1102 * @@JA - 20050207
1103 */
1104 if (strcasecmp($message->entities[$i]->header->parameters['name'], $id) == 0) {
1105 return $message->entities[$i]->entity_id;
1106 }
3d8371be 1107 }
a3daaaf3 1108 }
451f74a2 1109 }
3d8371be 1110 return $ret;
451f74a2 1111}
a3daaaf3 1112
e5e9381a 1113function sq_check_save_extension($message) {
1114 $filename = $message->getFilename();
1115 $ext = substr($filename, strrpos($filename,'.')+1);
1116 $save_extensions = array('jpg','jpeg','gif','png','bmp');
3d8371be 1117 return in_array($ext, $save_extensions);
e5e9381a 1118}
1119
1120
691a2d25 1121/**
8bd0068d 1122 ** HTMLFILTER ROUTINES
1123 */
451f74a2 1124
2dd879b8 1125/**
0493ed11 1126 * This function checks attribute values for entity-encoded values
1127 * and returns them translated into 8-bit strings so we can run
1128 * checks on them.
8bd0068d 1129 *
0493ed11 1130 * @param $attvalue A string to run entity check against.
1131 * @return Nothing, modifies a reference value.
8bd0068d 1132 */
0493ed11 1133function sq_defang(&$attvalue){
1134 $me = 'sq_defang';
2dd879b8 1135 /**
0493ed11 1136 * Skip this if there aren't ampersands or backslashes.
8bd0068d 1137 */
0493ed11 1138 if (strpos($attvalue, '&') === false
1139 && strpos($attvalue, '\\') === false){
1140 return;
2dd879b8 1141 }
0493ed11 1142 $m = false;
2b665f28 1143 // before deent, translate the dangerous unicode characters and ... to safe values
1144 // otherwise the regular expressions do not match.
1145
1146
1147
0493ed11 1148 do {
1149 $m = false;
1150 $m = $m || sq_deent($attvalue, '/\&#0*(\d+);*/s');
1151 $m = $m || sq_deent($attvalue, '/\&#x0*((\d|[a-f])+);*/si', true);
1152 $m = $m || sq_deent($attvalue, '/\\\\(\d+)/s', true);
1153 } while ($m == true);
1154 $attvalue = stripslashes($attvalue);
2dd879b8 1155}
1156
1157/**
8bd0068d 1158 * Kill any tabs, newlines, or carriage returns. Our friends the
1159 * makers of the browser with 95% market value decided that it'd
1160 * be funny to make "java[tab]script" be just as good as "javascript".
1161 *
1162 * @param attvalue The attribute value before extraneous spaces removed.
0493ed11 1163 * @return attvalue Nothing, modifies a reference value.
8bd0068d 1164 */
0493ed11 1165function sq_unspace(&$attvalue){
1166 $me = 'sq_unspace';
1167 if (strcspn($attvalue, "\t\r\n\0 ") != strlen($attvalue)){
1168 $attvalue = str_replace(Array("\t", "\r", "\n", "\0", " "),
1169 Array('', '', '', '', ''), $attvalue);
2dd879b8 1170 }
2dd879b8 1171}
1172
2b665f28 1173/**
88de4926 1174 * Translate all dangerous Unicode or Shift_JIS characters which are accepted by
2b665f28 1175 * IE as regular characters.
1176 *
1177 * @param attvalue The attribute value before dangerous characters are translated.
1178 * @return attvalue Nothing, modifies a reference value.
1179 * @author Marc Groot Koerkamp.
1180 */
1181function sq_fixIE_idiocy(&$attvalue) {
1182 // remove NUL
1183 $attvalue = str_replace("\0", "", $attvalue);
1184 // remove comments
1185 $attvalue = preg_replace("/(\/\*.*?\*\/)/","",$attvalue);
1186
88de4926 1187 // IE has the evil habit of accepting every possible value for the attribute expression.
1188 // The table below contains characters which are parsed by IE if they are used in the "expression"
2b665f28 1189 // attribute value.
1190 $aDangerousCharsReplacementTable = array(
1191 array('&#x029F;', '&#0671;' ,/* L UNICODE IPA Extension */
1192 '&#x0280;', '&#0640;' ,/* R UNICODE IPA Extension */
1193 '&#x0274;', '&#0628;' ,/* N UNICODE IPA Extension */
1194 '&#xFF25;', '&#65317' ,/* Unicode FULLWIDTH LATIN CAPITAL LETTER E */
1195 '&#xFF45;', '&#65349' ,/* Unicode FULLWIDTH LATIN SMALL LETTER E */
1196 '&#xFF38;', '&#65336;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER X */
1197 '&#xFF58;', '&#65368;',/* Unicode FULLWIDTH LATIN SMALL LETTER X */
1198 '&#xFF30;', '&#65328;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER P */
1199 '&#xFF50;', '&#65360;',/* Unicode FULLWIDTH LATIN SMALL LETTER P */
1200 '&#xFF32;', '&#65330;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER R */
1201 '&#xFF52;', '&#65362;',/* Unicode FULLWIDTH LATIN SMALL LETTER R */
1202 '&#xFF33;', '&#65331;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER S */
1203 '&#xFF53;', '&#65363;',/* Unicode FULLWIDTH LATIN SMALL LETTER S */
1204 '&#xFF29;', '&#65321;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER I */
1205 '&#xFF49;', '&#65353;',/* Unicode FULLWIDTH LATIN SMALL LETTER I */
1206 '&#xFF2F;', '&#65327;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER O */
1207 '&#xFF4F;', '&#65359;',/* Unicode FULLWIDTH LATIN SMALL LETTER O */
1208 '&#xFF2E;', '&#65326;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER N */
1209 '&#xFF4E;', '&#65358;',/* Unicode FULLWIDTH LATIN SMALL LETTER N */
1210 '&#xFF2C;', '&#65324;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER L */
1211 '&#xFF4C;', '&#65356;',/* Unicode FULLWIDTH LATIN SMALL LETTER L */
1212 '&#xFF35;', '&#65333;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER U */
1213 '&#xFF55;', '&#65365;',/* Unicode FULLWIDTH LATIN SMALL LETTER U */
1214 '&#x207F;', '&#8319;' ,/* Unicode SUPERSCRIPT LATIN SMALL LETTER N */
88de4926 1215 '&#x8264;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER E */ // in unicode this is some Chinese char range
2b665f28 1216 '&#x8285;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER E */
1217 '&#x8277;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER X */
1218 '&#x8298;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER X */
1219 '&#x826F;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER P */
1220 '&#x8290;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER P */
1221 '&#x8271;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER R */
1222 '&#x8292;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER R */
1223 '&#x8272;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER S */
1224 '&#x8293;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER S */
1225 '&#x8268;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER I */
1226 '&#x8289;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER I */
1227 '&#x826E;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER O */
1228 '&#x828F;', /* Shift JIS FULLWIDTH LATIN SMALL LETTER O */
1229 '&#x826D;', /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER N */
1230 '&#x828E;'), /* Shift JIS FULLWIDTH LATIN SMALL LETTER N */
1231 array('l', 'l', 'r','r','n','n',
1232 'E','E','e','e','X','X','x','x','P','P','p','p','S','S','s','s','I','I',
1233 'i','i','O','O','o','o','N','N','n','n','L','L','l','l','U','U','u','u','n',
1234 'E','e','X','x','P','p','S','s','I','i','O','o','N','n'));
1235 $attvalue = str_replace($aDangerousCharsReplacementTable[0],$aDangerousCharsReplacementTable[1],$attvalue);
1236
88de4926 1237 // Escapes are useful for special characters like "{}[]()'&. In other cases they are
1238 // used for XSS.
2b665f28 1239 $attvalue = preg_replace("/(\\\\)([a-zA-Z]{1})/",'$2',$attvalue);
1240}
1241
691a2d25 1242/**
8bd0068d 1243 * This function returns the final tag out of the tag name, an array
1244 * of attributes, and the type of the tag. This function is called by
1245 * sq_sanitize internally.
1246 *
1247 * @param $tagname the name of the tag.
1248 * @param $attary the array of attributes and their values
1249 * @param $tagtype The type of the tag (see in comments).
1250 * @return a string with the final tag representation.
1251 */
691a2d25 1252function sq_tagprint($tagname, $attary, $tagtype){
b583c3e8 1253 $me = 'sq_tagprint';
3d8371be 1254
691a2d25 1255 if ($tagtype == 2){
1256 $fulltag = '</' . $tagname . '>';
1257 } else {
1258 $fulltag = '<' . $tagname;
1259 if (is_array($attary) && sizeof($attary)){
1260 $atts = Array();
1261 while (list($attname, $attvalue) = each($attary)){
1262 array_push($atts, "$attname=$attvalue");
1263 }
1264 $fulltag .= ' ' . join(" ", $atts);
1265 }
1266 if ($tagtype == 3){
b583c3e8 1267 $fulltag .= ' /';
691a2d25 1268 }
b583c3e8 1269 $fulltag .= '>';
451f74a2 1270 }
691a2d25 1271 return $fulltag;
451f74a2 1272}
a3daaaf3 1273
691a2d25 1274/**
8bd0068d 1275 * A small helper function to use with array_walk. Modifies a by-ref
1276 * value and makes it lowercase.
1277 *
1278 * @param $val a value passed by-ref.
1279 * @return void since it modifies a by-ref value.
1280 */
691a2d25 1281function sq_casenormalize(&$val){
1282 $val = strtolower($val);
1283}
451f74a2 1284
691a2d25 1285/**
8bd0068d 1286 * This function skips any whitespace from the current position within
1287 * a string and to the next non-whitespace value.
1288 *
1289 * @param $body the string
1290 * @param $offset the offset within the string where we should start
1291 * looking for the next non-whitespace character.
1292 * @return the location within the $body where the next
1293 * non-whitespace char is located.
1294 */
691a2d25 1295function sq_skipspace($body, $offset){
b583c3e8 1296 $me = 'sq_skipspace';
3d8371be 1297 preg_match('/^(\s*)/s', substr($body, $offset), $matches);
691a2d25 1298 if (sizeof($matches{1})){
1299 $count = strlen($matches{1});
1300 $offset += $count;
451f74a2 1301 }
691a2d25 1302 return $offset;
451f74a2 1303}
a3daaaf3 1304
691a2d25 1305/**
8bd0068d 1306 * This function looks for the next character within a string. It's
1307 * really just a glorified "strpos", except it catches if failures
1308 * nicely.
1309 *
1310 * @param $body The string to look for needle in.
1311 * @param $offset Start looking from this position.
1312 * @param $needle The character/string to look for.
1313 * @return location of the next occurance of the needle, or
1314 * strlen($body) if needle wasn't found.
1315 */
691a2d25 1316function sq_findnxstr($body, $offset, $needle){
3d8371be 1317 $me = 'sq_findnxstr';
691a2d25 1318 $pos = strpos($body, $needle, $offset);
1319 if ($pos === FALSE){
1320 $pos = strlen($body);
451f74a2 1321 }
691a2d25 1322 return $pos;
451f74a2 1323}
a3daaaf3 1324
691a2d25 1325/**
8bd0068d 1326 * This function takes a PCRE-style regexp and tries to match it
1327 * within the string.
1328 *
1329 * @param $body The string to look for needle in.
1330 * @param $offset Start looking from here.
1331 * @param $reg A PCRE-style regex to match.
1332 * @return Returns a false if no matches found, or an array
1333 * with the following members:
1334 * - integer with the location of the match within $body
1335 * - string with whatever content between offset and the match
1336 * - string with whatever it is we matched
1337 */
691a2d25 1338function sq_findnxreg($body, $offset, $reg){
b583c3e8 1339 $me = 'sq_findnxreg';
691a2d25 1340 $matches = Array();
1341 $retarr = Array();
7d06541f 1342 preg_match("%^(.*?)($reg)%si", substr($body, $offset), $matches);
1343 if (!isset($matches{0}) || !$matches{0}){
691a2d25 1344 $retarr = false;
1345 } else {
1346 $retarr{0} = $offset + strlen($matches{1});
1347 $retarr{1} = $matches{1};
1348 $retarr{2} = $matches{2};
1349 }
1350 return $retarr;
1351}
a3daaaf3 1352
691a2d25 1353/**
8bd0068d 1354 * This function looks for the next tag.
1355 *
1356 * @param $body String where to look for the next tag.
1357 * @param $offset Start looking from here.
1358 * @return false if no more tags exist in the body, or
1359 * an array with the following members:
1360 * - string with the name of the tag
1361 * - array with attributes and their values
1362 * - integer with tag type (1, 2, or 3)
1363 * - integer where the tag starts (starting "<")
1364 * - integer where the tag ends (ending ">")
1365 * first three members will be false, if the tag is invalid.
1366 */
691a2d25 1367function sq_getnxtag($body, $offset){
b583c3e8 1368 $me = 'sq_getnxtag';
691a2d25 1369 if ($offset > strlen($body)){
1370 return false;
1371 }
1372 $lt = sq_findnxstr($body, $offset, "<");
1373 if ($lt == strlen($body)){
1374 return false;
1375 }
1376 /**
8bd0068d 1377 * We are here:
1378 * blah blah <tag attribute="value">
1379 * \---------^
1380 */
691a2d25 1381 $pos = sq_skipspace($body, $lt+1);
1382 if ($pos >= strlen($body)){
1383 return Array(false, false, false, $lt, strlen($body));
1384 }
1385 /**
8bd0068d 1386 * There are 3 kinds of tags:
1387 * 1. Opening tag, e.g.:
1388 * <a href="blah">
1389 * 2. Closing tag, e.g.:
1390 * </a>
1391 * 3. XHTML-style content-less tag, e.g.:
1392 * <img src="blah" />
1393 */
691a2d25 1394 $tagtype = false;
1395 switch (substr($body, $pos, 1)){
3d8371be 1396 case '/':
1397 $tagtype = 2;
1398 $pos++;
1399 break;
1400 case '!':
1401 /**
8bd0068d 1402 * A comment or an SGML declaration.
1403 */
3d8371be 1404 if (substr($body, $pos+1, 2) == "--"){
1405 $gt = strpos($body, "-->", $pos);
1406 if ($gt === false){
1407 $gt = strlen($body);
1408 } else {
1409 $gt += 2;
1410 }
1411 return Array(false, false, false, $lt, $gt);
bb8d0799 1412 } else {
3d8371be 1413 $gt = sq_findnxstr($body, $pos, ">");
1414 return Array(false, false, false, $lt, $gt);
1415 }
1416 break;
1417 default:
1418 /**
8bd0068d 1419 * Assume tagtype 1 for now. If it's type 3, we'll switch values
1420 * later.
1421 */
3d8371be 1422 $tagtype = 1;
1423 break;
691a2d25 1424 }
a3daaaf3 1425
0493ed11 1426 $tag_start = $pos;
691a2d25 1427 $tagname = '';
1428 /**
8bd0068d 1429 * Look for next [\W-_], which will indicate the end of the tag name.
1430 */
691a2d25 1431 $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
1432 if ($regary == false){
1433 return Array(false, false, false, $lt, strlen($body));
1434 }
1435 list($pos, $tagname, $match) = $regary;
1436 $tagname = strtolower($tagname);
1437
1438 /**
8bd0068d 1439 * $match can be either of these:
1440 * '>' indicating the end of the tag entirely.
1441 * '\s' indicating the end of the tag name.
1442 * '/' indicating that this is type-3 xhtml tag.
1443 *
1444 * Whatever else we find there indicates an invalid tag.
1445 */
691a2d25 1446 switch ($match){
3d8371be 1447 case '/':
691a2d25 1448 /**
8bd0068d 1449 * This is an xhtml-style tag with a closing / at the
1450 * end, like so: <img src="blah" />. Check if it's followed
1451 * by the closing bracket. If not, then this tag is invalid
1452 */
3d8371be 1453 if (substr($body, $pos, 2) == "/>"){
1454 $pos++;
1455 $tagtype = 3;
1456 } else {
1457 $gt = sq_findnxstr($body, $pos, ">");
1458 $retary = Array(false, false, false, $lt, $gt);
1459 return $retary;
1460 }
1461 case '>':
1462 return Array($tagname, false, $tagtype, $lt, $pos);
1463 break;
1464 default:
1465 /**
8bd0068d 1466 * Check if it's whitespace
1467 */
3d8371be 1468 if (!preg_match('/\s/', $match)){
1469 /**
8bd0068d 1470 * This is an invalid tag! Look for the next closing ">".
1471 */
7d06541f 1472 $gt = sq_findnxstr($body, $lt, ">");
3d8371be 1473 return Array(false, false, false, $lt, $gt);
1474 }
1475 break;
691a2d25 1476 }
3d8371be 1477
691a2d25 1478 /**
8bd0068d 1479 * At this point we're here:
1480 * <tagname attribute='blah'>
1481 * \-------^
1482 *
1483 * At this point we loop in order to find all attributes.
1484 */
691a2d25 1485 $attname = '';
0493ed11 1486 $atttype = false;
691a2d25 1487 $attary = Array();
1488
1489 while ($pos <= strlen($body)){
1490 $pos = sq_skipspace($body, $pos);
1491 if ($pos == strlen($body)){
1492 /**
8bd0068d 1493 * Non-closed tag.
1494 */
691a2d25 1495 return Array(false, false, false, $lt, $pos);
1496 }
1497 /**
8bd0068d 1498 * See if we arrived at a ">" or "/>", which means that we reached
1499 * the end of the tag.
1500 */
691a2d25 1501 $matches = Array();
164800ad 1502 if (preg_match("%^(\s*)(>|/>)%s", substr($body, $pos), $matches)) {
c828931c 1503 /**
8bd0068d 1504 * Yep. So we did.
1505 */
c828931c 1506 $pos += strlen($matches{1});
1507 if ($matches{2} == "/>"){
1508 $tagtype = 3;
1509 $pos++;
1510 }
1511 return Array($tagname, $attary, $tagtype, $lt, $pos);
1512 }
a3daaaf3 1513
cca46357 1514 /**
8bd0068d 1515 * There are several types of attributes, with optional
1516 * [:space:] between members.
1517 * Type 1:
1518 * attrname[:space:]=[:space:]'CDATA'
1519 * Type 2:
1520 * attrname[:space:]=[:space:]"CDATA"
1521 * Type 3:
1522 * attr[:space:]=[:space:]CDATA
1523 * Type 4:
1524 * attrname
1525 *
1526 * We leave types 1 and 2 the same, type 3 we check for
1527 * '"' and convert to "&quot" if needed, then wrap in
1528 * double quotes. Type 4 we convert into:
1529 * attrname="yes".
1530 */
3f7c623f 1531 $regary = sq_findnxreg($body, $pos, "[^:\w\-_]");
691a2d25 1532 if ($regary == false){
1533 /**
8bd0068d 1534 * Looks like body ended before the end of tag.
1535 */
691a2d25 1536 return Array(false, false, false, $lt, strlen($body));
cca46357 1537 }
691a2d25 1538 list($pos, $attname, $match) = $regary;
1539 $attname = strtolower($attname);
1540 /**
8bd0068d 1541 * We arrived at the end of attribute name. Several things possible
1542 * here:
1543 * '>' means the end of the tag and this is attribute type 4
1544 * '/' if followed by '>' means the same thing as above
1545 * '\s' means a lot of things -- look what it's followed by.
1546 * anything else means the attribute is invalid.
1547 */
691a2d25 1548 switch($match){
3d8371be 1549 case '/':
691a2d25 1550 /**
8bd0068d 1551 * This is an xhtml-style tag with a closing / at the
1552 * end, like so: <img src="blah" />. Check if it's followed
1553 * by the closing bracket. If not, then this tag is invalid
1554 */
3d8371be 1555 if (substr($body, $pos, 2) == "/>"){
691a2d25 1556 $pos++;
3d8371be 1557 $tagtype = 3;
691a2d25 1558 } else {
3d8371be 1559 $gt = sq_findnxstr($body, $pos, ">");
1560 $retary = Array(false, false, false, $lt, $gt);
1561 return $retary;
1562 }
1563 case '>':
1564 $attary{$attname} = '"yes"';
1565 return Array($tagname, $attary, $tagtype, $lt, $pos);
1566 break;
1567 default:
1568 /**
8bd0068d 1569 * Skip whitespace and see what we arrive at.
1570 */
3d8371be 1571 $pos = sq_skipspace($body, $pos);
1572 $char = substr($body, $pos, 1);
1573 /**
8bd0068d 1574 * Two things are valid here:
1575 * '=' means this is attribute type 1 2 or 3.
1576 * \w means this was attribute type 4.
1577 * anything else we ignore and re-loop. End of tag and
1578 * invalid stuff will be caught by our checks at the beginning
1579 * of the loop.
1580 */
3d8371be 1581 if ($char == "="){
1582 $pos++;
1583 $pos = sq_skipspace($body, $pos);
691a2d25 1584 /**
8bd0068d 1585 * Here are 3 possibilities:
1586 * "'" attribute type 1
1587 * '"' attribute type 2
1588 * everything else is the content of tag type 3
1589 */
3d8371be 1590 $quot = substr($body, $pos, 1);
1591 if ($quot == "'"){
1592 $regary = sq_findnxreg($body, $pos+1, "\'");
1593 if ($regary == false){
1594 return Array(false, false, false, $lt, strlen($body));
1595 }
1596 list($pos, $attval, $match) = $regary;
1597 $pos++;
1598 $attary{$attname} = "'" . $attval . "'";
1599 } else if ($quot == '"'){
1600 $regary = sq_findnxreg($body, $pos+1, '\"');
1601 if ($regary == false){
1602 return Array(false, false, false, $lt, strlen($body));
1603 }
1604 list($pos, $attval, $match) = $regary;
1605 $pos++;
1606 $attary{$attname} = '"' . $attval . '"';
1607 } else {
1608 /**
8bd0068d 1609 * These are hateful. Look for \s, or >.
1610 */
3d8371be 1611 $regary = sq_findnxreg($body, $pos, "[\s>]");
1612 if ($regary == false){
1613 return Array(false, false, false, $lt, strlen($body));
1614 }
1615 list($pos, $attval, $match) = $regary;
1616 /**
8bd0068d 1617 * If it's ">" it will be caught at the top.
1618 */
3d8371be 1619 $attval = preg_replace("/\"/s", "&quot;", $attval);
1620 $attary{$attname} = '"' . $attval . '"';
7e235a1a 1621 }
3d8371be 1622 } else if (preg_match("|[\w/>]|", $char)) {
691a2d25 1623 /**
8bd0068d 1624 * That was attribute type 4.
1625 */
3d8371be 1626 $attary{$attname} = '"yes"';
1627 } else {
1628 /**
8bd0068d 1629 * An illegal character. Find next '>' and return.
1630 */
3d8371be 1631 $gt = sq_findnxstr($body, $pos, ">");
1632 return Array(false, false, false, $lt, $gt);
451f74a2 1633 }
3d8371be 1634 break;
691a2d25 1635 }
1636 }
1637 /**
8bd0068d 1638 * The fact that we got here indicates that the tag end was never
1639 * found. Return invalid tag indication so it gets stripped.
1640 */
691a2d25 1641 return Array(false, false, false, $lt, strlen($body));
1642}
1643
1644/**
0493ed11 1645 * Translates entities into literal values so they can be checked.
8bd0068d 1646 *
0493ed11 1647 * @param $attvalue the by-ref value to check.
1648 * @param $regex the regular expression to check against.
1649 * @param $hex whether the entites are hexadecimal.
1650 * @return True or False depending on whether there were matches.
8bd0068d 1651 */
0493ed11 1652function sq_deent(&$attvalue, $regex, $hex=false){
b583c3e8 1653 $me = 'sq_deent';
0493ed11 1654 $ret_match = false;
2b665f28 1655 // remove comments
1656 //$attvalue = preg_replace("/(\/\*.*\*\/)/","",$attvalue);
0493ed11 1657 preg_match_all($regex, $attvalue, $matches);
1658 if (is_array($matches) && sizeof($matches[0]) > 0){
1659 $repl = Array();
1660 for ($i = 0; $i < sizeof($matches[0]); $i++){
1661 $numval = $matches[1][$i];
1662 if ($hex){
1663 $numval = hexdec($numval);
a3daaaf3 1664 }
0493ed11 1665 $repl{$matches[0][$i]} = chr($numval);
691a2d25 1666 }
0493ed11 1667 $attvalue = strtr($attvalue, $repl);
1668 return true;
1669 } else {
1670 return false;
691a2d25 1671 }
691a2d25 1672}
1673
1674/**
8bd0068d 1675 * This function runs various checks against the attributes.
1676 *
1677 * @param $tagname String with the name of the tag.
1678 * @param $attary Array with all tag attributes.
1679 * @param $rm_attnames See description for sq_sanitize
1680 * @param $bad_attvals See description for sq_sanitize
1681 * @param $add_attr_to_tag See description for sq_sanitize
1682 * @param $message message object
1683 * @param $id message id
1684 * @return Array with modified attributes.
1685 */
da2415c1 1686function sq_fixatts($tagname,
1687 $attary,
691a2d25 1688 $rm_attnames,
1689 $bad_attvals,
1690 $add_attr_to_tag,
1691 $message,
b3af12ef 1692 $id,
3d8371be 1693 $mailbox
691a2d25 1694 ){
b583c3e8 1695 $me = 'sq_fixatts';
691a2d25 1696 while (list($attname, $attvalue) = each($attary)){
1697 /**
8bd0068d 1698 * See if this attribute should be removed.
1699 */
691a2d25 1700 foreach ($rm_attnames as $matchtag=>$matchattrs){
1701 if (preg_match($matchtag, $tagname)){
1702 foreach ($matchattrs as $matchattr){
1703 if (preg_match($matchattr, $attname)){
1704 unset($attary{$attname});
1705 continue;
1706 }
451f74a2 1707 }
451f74a2 1708 }
691a2d25 1709 }
2b665f28 1710 /**
1711 * Workaround for IE quirks
1712 */
1713 sq_fixIE_idiocy($attvalue);
1714
691a2d25 1715 /**
8bd0068d 1716 * Remove any backslashes, entities, and extraneous whitespace.
1717 */
2b665f28 1718
1719 $oldattvalue = $attvalue;
0493ed11 1720 sq_defang($attvalue);
2b665f28 1721 if ($attname == 'style' && $attvalue !== $oldattvalue) {
1722 // entities are used in the attribute value. In 99% of the cases it's there as XSS
1723 // i.e.<div style="{ left:exp&#x0280;essio&#x0274;( alert('XSS') ) }">
1724 $attvalue = "idiocy";
1725 $attary{$attname} = $attvalue;
1726 }
0493ed11 1727 sq_unspace($attvalue);
af861a34 1728
691a2d25 1729 /**
8bd0068d 1730 * Now let's run checks on the attvalues.
1731 * I don't expect anyone to comprehend this. If you do,
1732 * get in touch with me so I can drive to where you live and
1733 * shake your hand personally. :)
1734 */
691a2d25 1735 foreach ($bad_attvals as $matchtag=>$matchattrs){
1736 if (preg_match($matchtag, $tagname)){
1737 foreach ($matchattrs as $matchattr=>$valary){
1738 if (preg_match($matchattr, $attname)){
1739 /**
8bd0068d 1740 * There are two arrays in valary.
1741 * First is matches.
1742 * Second one is replacements
1743 */
691a2d25 1744 list($valmatch, $valrepl) = $valary;
da2415c1 1745 $newvalue =
691a2d25 1746 preg_replace($valmatch, $valrepl, $attvalue);
1747 if ($newvalue != $attvalue){
1748 $attary{$attname} = $newvalue;
1749 }
1750 }
1751 }
451f74a2 1752 }
a3daaaf3 1753 }
834a1027 1754
834a1027 1755 /**
1756 * Replace empty src tags with the blank image. src is only used
1757 * for frames, images, and image inputs. Doing a replace should
1758 * not affect them working as should be, however it will stop
1759 * IE from being kicked off when src for img tags are not set
1760 */
5a6fde9e 1761 if (($attname == 'src') && ($attvalue == '""')) {
834a1027 1762 $attary{$attname} = '"' . SM_PATH . 'images/blank.png"';
1763 }
1764
691a2d25 1765 /**
8bd0068d 1766 * Turn cid: urls into http-friendly ones.
1767 */
691a2d25 1768 if (preg_match("/^[\'\"]\s*cid:/si", $attvalue)){
b3af12ef 1769 $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox);
691a2d25 1770 }
ff940ebc 1771
1772 /**
1773 * "Hack" fix for Outlook using propriatary outbind:// protocol in img tags.
1774 * One day MS might actually make it match something useful, for now, falling
1775 * back to using cid2http, so we can grab the blank.png.
1776 */
1777 if (preg_match("/^[\'\"]\s*outbind:\/\//si", $attvalue)) {
1778 $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox);
1779 }
1780
a3daaaf3 1781 }
691a2d25 1782 /**
8bd0068d 1783 * See if we need to append any attributes to this tag.
1784 */
691a2d25 1785 foreach ($add_attr_to_tag as $matchtag=>$addattary){
1786 if (preg_match($matchtag, $tagname)){
1787 $attary = array_merge($attary, $addattary);
1788 }
1789 }
1790 return $attary;
451f74a2 1791}
a3daaaf3 1792
691a2d25 1793/**
8bd0068d 1794 * This function edits the style definition to make them friendly and
1795 * usable in SquirrelMail.
1796 *
1797 * @param $message the message object
1798 * @param $id the message id
1799 * @param $content a string with whatever is between <style> and </style>
1800 * @param $mailbox the message mailbox
1801 * @return a string with edited content.
1802 */
e60a299a 1803function sq_fixstyle($body, $pos, $message, $id, $mailbox){
691a2d25 1804 global $view_unsafe_images;
b583c3e8 1805 $me = 'sq_fixstyle';
7e2ff844 1806 // workaround for </style> in between comments
1807 $iCurrentPos = $pos;
1808 $content = '';
1809 $sToken = '';
1810 $bSucces = false;
1811 $bEndTag = false;
1812 for ($i=$pos,$iCount=strlen($body);$i<$iCount;++$i) {
1813 $char = $body{$i};
1814 switch ($char) {
1815 case '<':
1816 $sToken .= $char;
1817 break;
1818 case '/':
1819 if ($sToken == '<') {
1820 $sToken .= $char;
1821 $bEndTag = true;
1822 } else {
1823 $content .= $char;
1824 }
1825 break;
1826 case '>':
1827 if ($bEndTag) {
1828 $sToken .= $char;
1829 if (preg_match('/\<\/\s*style\s*\>/i',$sToken,$aMatch)) {
1830 $newpos = $i + 1;
1831 $bSucces = true;
1832 break 2;
1833 } else {
1834 $content .= $sToken;
1835 }
1836 $bEndTag = false;
1837 } else {
1838 $content .= $char;
1839 }
1840 break;
1841 case '!':
1842 if ($sToken == '<') {
1843 // possible comment
1844 if (isset($body{$i+2}) && substr($body,$i,3) == '!--') {
1845 $i = strpos($body,'-->',$i+3);
2b665f28 1846 if ($i === false) { // no end comment
1847 $i = strlen($body);
1848 }
7e2ff844 1849 $sToken = '';
1850 }
1851 } else {
1852 $content .= $char;
1853 }
1854 break;
1855 default:
1856 if ($bEndTag) {
1857 $sToken .= $char;
1858 } else {
1859 $content .= $char;
1860 }
1861 break;
1862 }
1863 }
1864 if ($bSucces == FALSE){
7d06541f 1865 return array(FALSE, strlen($body));
1866 }
7e2ff844 1867
2b665f28 1868
1869
691a2d25 1870 /**
0493ed11 1871 * First look for general BODY style declaration, which would be
1872 * like so:
1873 * body {background: blah-blah}
1874 * and change it to .bodyclass so we can just assign it to a <div>
1875 */
691a2d25 1876 $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
3d8371be 1877 $secremoveimg = '../images/' . _("sec_remove_eng.png");
691a2d25 1878 /**
0493ed11 1879 * Fix url('blah') declarations.
1880 */
1881 // $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si",
1882 // "url(\\1$secremoveimg\\2)", $content);
2b665f28 1883
1884 // IE Sucks hard. We have a special function for it.
1885 sq_fixIE_idiocy($content);
1886
1887 // remove @import line
1888 $content = preg_replace("/^\s*(@import.*)$/mi","\n<!-- @import rules forbidden -->\n",$content);
1889
5b4884be 1890 // translate ur\l and variations (IE parses that)
2b665f28 1891 // TODO check if the sq_fixIE_idiocy function already handles this.
5b4884be 1892 $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content);
0493ed11 1893 // NB I insert NUL characters to keep to avoid an infinite loop. They are removed after the loop.
1894 while (preg_match("/url\s*\(\s*[\'\"]?([^:]+):(.*)?[\'\"]?\s*\)/si", $content, $matches)) {
1895 $sProto = strtolower($matches[1]);
1896 switch ($sProto) {
0493ed11 1897 /**
758a7889 1898 * Fix url('https*://.*) declarations but only if $view_unsafe_images
1899 * is false.
0493ed11 1900 */
758a7889 1901 case 'https':
1902 case 'http':
1903 if (!$view_unsafe_images){
1904
1905 $sExpr = "/url\s*\(\s*[\'\"]?\s*$sProto*:.*[\'\"]?\s*\)/si";
1906 $content = preg_replace($sExpr, "u\0r\0l(\\1$secremoveimg\\2)", $content);
1907
1908 } else {
1909 $content = preg_replace('/url/i',"u\0r\0l",$content);
1910 }
1911 break;
1912 /**
1913 * Fix urls that refer to cid:
1914 */
1915 case 'cid':
1916 $cidurl = 'cid:'. $matches[2];
1917 $httpurl = sq_cid2http($message, $id, $cidurl, $mailbox);
1918 // escape parentheses that can modify the regular expression
1919 $cidurl = str_replace(array('(',')'),array('\\(','\\)'),$cidurl);
1920 $content = preg_replace("|url\s*\(\s*$cidurl\s*\)|si",
1921 "u\0r\0l($httpurl)", $content);
1922 break;
1923 default:
1924 /**
1925 * replace url with protocol other then the white list
1926 * http,https and cid by an empty string.
1927 */
1928 $content = preg_replace("/url\s*\(\s*[\'\"]?([^:]+):(.*)?[\'\"]?\s*\)/si",
1929 "", $content);
1930 break;
0493ed11 1931 }
691a2d25 1932 }
0493ed11 1933 // remove NUL
1934 $content = str_replace("\0", "", $content);
0493ed11 1935 /**
1936 * Remove any backslashes, entities, and extraneous whitespace.
1937 */
1938 $contentTemp = $content;
1939 sq_defang($contentTemp);
1940 sq_unspace($contentTemp);
a3daaaf3 1941
691a2d25 1942 /**
8bd0068d 1943 * Fix stupid css declarations which lead to vulnerabilities
1944 * in IE.
1945 */
5db90261 1946 $match = Array('/\/\*.*\*\//',
1947 '/expression/i',
0493ed11 1948 '/behaviou*r/i',
1949 '/binding/i',
2b665f28 1950 '/include-source/i',
1951 '/javascript/i',
1952 '/script/i');
1953 $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy');
0493ed11 1954 $contentNew = preg_replace($match, $replace, $contentTemp);
1955 if ($contentNew !== $contentTemp) {
1956 // insecure css declarations are used. From now on we don't care
1957 // anymore if the css is destroyed by sq_deent, sq_unspace or sq_unbackslash
1958 $content = $contentNew;
1959 }
7d06541f 1960 return array($content, $newpos);
691a2d25 1961}
a3daaaf3 1962
0493ed11 1963
691a2d25 1964/**
8bd0068d 1965 * This function converts cid: url's into the ones that can be viewed in
1966 * the browser.
1967 *
1968 * @param $message the message object
1969 * @param $id the message id
1970 * @param $cidurl the cid: url.
1971 * @param $mailbox the message mailbox
1972 * @return a string with a http-friendly url
1973 */
b3af12ef 1974function sq_cid2http($message, $id, $cidurl, $mailbox){
691a2d25 1975 /**
8bd0068d 1976 * Get rid of quotes.
1977 */
691a2d25 1978 $quotchar = substr($cidurl, 0, 1);
2dd879b8 1979 if ($quotchar == '"' || $quotchar == "'"){
1980 $cidurl = str_replace($quotchar, "", $cidurl);
1981 } else {
1982 $quotchar = '';
1983 }
691a2d25 1984 $cidurl = substr(trim($cidurl), 4);
0493ed11 1985
1986 $match_str = '/\{.*?\}\//';
1987 $str_rep = '';
1988 $cidurl = preg_replace($match_str, $str_rep, $cidurl);
1989
e5e9381a 1990 $linkurl = find_ent_id($cidurl, $message);
1991 /* in case of non-save cid links $httpurl should be replaced by a sort of
8bd0068d 1992 unsave link image */
e5e9381a 1993 $httpurl = '';
c8f5f606 1994
8bd0068d 1995 /**
1996 * This is part of a fix for Outlook Express 6.x generating
1997 * cid URLs without creating content-id headers. These images are
1998 * not part of the multipart/related html mail. The html contains
1999 * <img src="cid:{some_id}/image_filename.ext"> references to
2000 * attached images with as goal to render them inline although
2001 * the attachment disposition property is not inline.
2002 */
c8f5f606 2003
2004 if (empty($linkurl)) {
2005 if (preg_match('/{.*}\//', $cidurl)) {
2006 $cidurl = preg_replace('/{.*}\//','', $cidurl);
2007 if (!empty($cidurl)) {
2008 $linkurl = find_ent_id($cidurl, $message);
2009 }
2010 }
2011 }
f8a1ed5a 2012
c8f5f606 2013 if (!empty($linkurl)) {
6b04287c 2014 $httpurl = $quotchar . SM_PATH . 'src/download.php?absolute_dl=true&amp;' .
8bd0068d 2015 "passed_id=$id&amp;mailbox=" . urlencode($mailbox) .
2016 '&amp;ent_id=' . $linkurl . $quotchar;
c8f5f606 2017 } else {
2018 /**
2019 * If we couldn't generate a proper img url, drop in a blank image
2020 * instead of sending back empty, otherwise it causes unusual behaviour
2021 */
bc017c1d 2022 $httpurl = $quotchar . SM_PATH . 'images/blank.png' . $quotchar;
e5e9381a 2023 }
f8a1ed5a 2024
691a2d25 2025 return $httpurl;
2026}
2027
2028/**
8bd0068d 2029 * This function changes the <body> tag into a <div> tag since we
2030 * can't really have a body-within-body.
2031 *
2032 * @param $attary an array of attributes and values of <body>
2033 * @param $mailbox mailbox we're currently reading (for cid2http)
2034 * @param $message current message (for cid2http)
2035 * @param $id current message id (for cid2http)
2036 * @return a modified array of attributes to be set for <div>
2037 */
2dd879b8 2038function sq_body2div($attary, $mailbox, $message, $id){
b583c3e8 2039 $me = 'sq_body2div';
3d8371be 2040 $divattary = Array('class' => "'bodyclass'");
b583c3e8 2041 $text = '#000000';
c189a963 2042 $has_bgc_stl = $has_txt_stl = false;
b583c3e8 2043 $styledef = '';
691a2d25 2044 if (is_array($attary) && sizeof($attary) > 0){
2045 foreach ($attary as $attname=>$attvalue){
2046 $quotchar = substr($attvalue, 0, 1);
2047 $attvalue = str_replace($quotchar, "", $attvalue);
2048 switch ($attname){
3d8371be 2049 case 'background':
8bd0068d 2050 $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox);
3d8371be 2051 $styledef .= "background-image: url('$attvalue'); ";
2052 break;
2053 case 'bgcolor':
c189a963 2054 $has_bgc_stl = true;
3d8371be 2055 $styledef .= "background-color: $attvalue; ";
2056 break;
2057 case 'text':
c189a963 2058 $has_txt_stl = true;
3d8371be 2059 $styledef .= "color: $attvalue; ";
2060 break;
691a2d25 2061 }
a3daaaf3 2062 }
c189a963 2063 // Outlook defines a white bgcolor and no text color. This can lead to
2064 // white text on a white bg with certain themes.
2065 if ($has_bgc_stl && !$has_txt_stl) {
2066 $styledef .= "color: $text; ";
2067 }
691a2d25 2068 if (strlen($styledef) > 0){
2069 $divattary{"style"} = "\"$styledef\"";
2070 }
2071 }
2072 return $divattary;
2073}
a3daaaf3 2074
691a2d25 2075/**
8bd0068d 2076 * This is the main function and the one you should actually be calling.
2077 * There are several variables you should be aware of an which need
2078 * special description.
2079 *
2080 * Since the description is quite lengthy, see it here:
2081 * http://linux.duke.edu/projects/mini/htmlfilter/
2082 *
2083 * @param $body the string with HTML you wish to filter
2084 * @param $tag_list see description above
2085 * @param $rm_tags_with_content see description above
2086 * @param $self_closing_tags see description above
2087 * @param $force_tag_closing see description above
2088 * @param $rm_attnames see description above
2089 * @param $bad_attvals see description above
2090 * @param $add_attr_to_tag see description above
2091 * @param $message message object
2092 * @param $id message id
2093 * @return sanitized html safe to show on your pages.
2094 */
da2415c1 2095function sq_sanitize($body,
8bd0068d 2096 $tag_list,
2097 $rm_tags_with_content,
2098 $self_closing_tags,
2099 $force_tag_closing,
2100 $rm_attnames,
2101 $bad_attvals,
2102 $add_attr_to_tag,
2103 $message,
2104 $id,
2105 $mailbox
2106 ){
b583c3e8 2107 $me = 'sq_sanitize';
7d06541f 2108 $rm_tags = array_shift($tag_list);
691a2d25 2109 /**
8bd0068d 2110 * Normalize rm_tags and rm_tags_with_content.
2111 */
7d06541f 2112 @array_walk($tag_list, 'sq_casenormalize');
691a2d25 2113 @array_walk($rm_tags_with_content, 'sq_casenormalize');
2114 @array_walk($self_closing_tags, 'sq_casenormalize');
2115 /**
8bd0068d 2116 * See if tag_list is of tags to remove or tags to allow.
2117 * false means remove these tags
2118 * true means allow these tags
2119 */
691a2d25 2120 $curpos = 0;
2121 $open_tags = Array();
2dd879b8 2122 $trusted = "\n<!-- begin sanitized html -->\n";
691a2d25 2123 $skip_content = false;
bb8d0799 2124 /**
8bd0068d 2125 * Take care of netscape's stupid javascript entities like
2126 * &{alert('boo')};
2127 */
bb8d0799 2128 $body = preg_replace("/&(\{.*?\};)/si", "&amp;\\1", $body);
691a2d25 2129
7d06541f 2130 while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){
691a2d25 2131 list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
2132 $free_content = substr($body, $curpos, $lt-$curpos);
2133 /**
8bd0068d 2134 * Take care of <style>
2135 */
7d06541f 2136 if ($tagname == "style" && $tagtype == 1){
da2415c1 2137 list($free_content, $curpos) =
e60a299a 2138 sq_fixstyle($body, $gt+1, $message, $id, $mailbox);
7d06541f 2139 if ($free_content != FALSE){
2140 $trusted .= sq_tagprint($tagname, $attary, $tagtype);
2141 $trusted .= $free_content;
2142 $trusted .= sq_tagprint($tagname, false, 2);
2143 }
2144 continue;
691a2d25 2145 }
2146 if ($skip_content == false){
2147 $trusted .= $free_content;
691a2d25 2148 }
2149 if ($tagname != FALSE){
2150 if ($tagtype == 2){
2151 if ($skip_content == $tagname){
2152 /**
8bd0068d 2153 * Got to the end of tag we needed to remove.
2154 */
691a2d25 2155 $tagname = false;
2156 $skip_content = false;
2157 } else {
2158 if ($skip_content == false){
c828931c 2159 if ($tagname == "body"){
2160 $tagname = "div";
2dd879b8 2161 }
da2415c1 2162 if (isset($open_tags{$tagname}) &&
8bd0068d 2163 $open_tags{$tagname} > 0){
2dd879b8 2164 $open_tags{$tagname}--;
691a2d25 2165 } else {
2dd879b8 2166 $tagname = false;
691a2d25 2167 }
691a2d25 2168 }
2169 }
2170 } else {
2171 /**
8bd0068d 2172 * $rm_tags_with_content
2173 */
691a2d25 2174 if ($skip_content == false){
2175 /**
8bd0068d 2176 * See if this is a self-closing type and change
2177 * tagtype appropriately.
2178 */
691a2d25 2179 if ($tagtype == 1
8bd0068d 2180 && in_array($tagname, $self_closing_tags)){
2dd879b8 2181 $tagtype = 3;
691a2d25 2182 }
2183 /**
8bd0068d 2184 * See if we should skip this tag and any content
2185 * inside it.
2186 */
691a2d25 2187 if ($tagtype == 1 &&
8bd0068d 2188 in_array($tagname, $rm_tags_with_content)){
691a2d25 2189 $skip_content = $tagname;
2190 } else {
da2415c1 2191 if (($rm_tags == false
8bd0068d 2192 && in_array($tagname, $tag_list)) ||
2193 ($rm_tags == true &&
2194 !in_array($tagname, $tag_list))){
691a2d25 2195 $tagname = false;
2196 } else {
2dd879b8 2197 /**
8bd0068d 2198 * Convert body into div.
2199 */
2dd879b8 2200 if ($tagname == "body"){
2201 $tagname = "div";
da2415c1 2202 $attary = sq_body2div($attary, $mailbox,
8bd0068d 2203 $message, $id);
2dd879b8 2204 }
691a2d25 2205 if ($tagtype == 1){
2206 if (isset($open_tags{$tagname})){
2207 $open_tags{$tagname}++;
2208 } else {
2209 $open_tags{$tagname}=1;
2210 }
2211 }
2212 /**
8bd0068d 2213 * This is where we run other checks.
2214 */
691a2d25 2215 if (is_array($attary) && sizeof($attary) > 0){
2216 $attary = sq_fixatts($tagname,
8bd0068d 2217 $attary,
2218 $rm_attnames,
2219 $bad_attvals,
2220 $add_attr_to_tag,
2221 $message,
2222 $id,
2223 $mailbox
2224 );
691a2d25 2225 }
2226 }
2227 }
691a2d25 2228 }
2229 }
2230 if ($tagname != false && $skip_content == false){
2231 $trusted .= sq_tagprint($tagname, $attary, $tagtype);
2232 }
691a2d25 2233 }
2234 $curpos = $gt+1;
a3daaaf3 2235 }
691a2d25 2236 $trusted .= substr($body, $curpos, strlen($body)-$curpos);
2237 if ($force_tag_closing == true){
2238 foreach ($open_tags as $tagname=>$opentimes){
2239 while ($opentimes > 0){
2240 $trusted .= '</' . $tagname . '>';
2241 $opentimes--;
2242 }
2243 }
2244 $trusted .= "\n";
2245 }
2246 $trusted .= "<!-- end sanitized html -->\n";
2247 return $trusted;
2248}
451f74a2 2249
691a2d25 2250/**
8bd0068d 2251 * This is a wrapper function to call html sanitizing routines.
2252 *
2253 * @param $body the body of the message
2254 * @param $id the id of the message
c189a963 2255
2256 * @param $message
2257 * @param $mailbox
2258 * @param boolean $take_mailto_links When TRUE, converts mailto: links
2259 * into internal SM compose links
2260 * (optional; default = TRUE)
8bd0068d 2261 * @return a string with html safe to display in the browser.
2262 */
c189a963 2263function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links =true) {
2264
202bcbcc 2265 // require_once(SM_PATH . 'functions/url_parser.php'); // for $MailTo_PReg_Match
c189a963 2266
691a2d25 2267 global $attachment_common_show_images, $view_unsafe_images,
8bd0068d 2268 $has_unsafe_images;
691a2d25 2269 /**
8bd0068d 2270 * Don't display attached images in HTML mode.
2b665f28 2271 *
d0187bd6 2272 * SB: why?
8bd0068d 2273 */
691a2d25 2274 $attachment_common_show_images = false;
2275 $tag_list = Array(
8bd0068d 2276 false,
2277 "object",
2278 "meta",
2279 "html",
2280 "head",
2281 "base",
2282 "link",
2283 "frame",
2284 "iframe",
2285 "plaintext",
2286 "marquee"
2287 );
691a2d25 2288
2289 $rm_tags_with_content = Array(
8bd0068d 2290 "script",
2291 "applet",
2292 "embed",
2293 "title",
2294 "frameset",
0493ed11 2295 "xmp",
8bd0068d 2296 "xml"
2297 );
691a2d25 2298
2299 $self_closing_tags = Array(
8bd0068d 2300 "img",
2301 "br",
2302 "hr",
2303 "input",
2304 "outbind"
2305 );
691a2d25 2306
2dd879b8 2307 $force_tag_closing = true;
691a2d25 2308
2309 $rm_attnames = Array(
8bd0068d 2310 "/.*/" =>
2311 Array(
2312 "/target/i",
2313 "/^on.*/i",
2314 "/^dynsrc/i",
2315 "/^data.*/i",
2316 "/^lowsrc.*/i"
2317 )
2318 );
691a2d25 2319
2320 $secremoveimg = "../images/" . _("sec_remove_eng.png");
2321 $bad_attvals = Array(
8bd0068d 2322 "/.*/" =>
691a2d25 2323 Array(
0a6ec9b5 2324 "/^src|background/i" =>
8bd0068d 2325 Array(
691a2d25 2326 Array(
8bd0068d 2327 "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
2328 "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
2329 "/^([\'\"])\s*about\s*:.*([\'\"])/si"
691a2d25 2330 ),
8bd0068d 2331 Array(
2332 "\\1$secremoveimg\\2",
2333 "\\1$secremoveimg\\2",
2334 "\\1$secremoveimg\\2",
8bd0068d 2335 )
2336 ),
0a6ec9b5 2337 "/^href|action/i" =>
8bd0068d 2338 Array(
0a6ec9b5 2339 Array(
8bd0068d 2340 "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
2341 "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
2342 "/^([\'\"])\s*about\s*:.*([\'\"])/si"
0a6ec9b5 2343 ),
691a2d25 2344 Array(
8bd0068d 2345 "\\1#\\1",
2346 "\\1#\\1",
2347 "\\1#\\1"
02474e43 2348 )
8bd0068d 2349 ),
2350 "/^style/i" =>
2351 Array(
2352 Array(
5db90261 2353 "/\/\*.*\*\//",
8bd0068d 2354 "/expression/i",
2355 "/binding/i",
2356 "/behaviou*r/i",
2357 "/include-source/i",
2358 "/position\s*:\s*absolute/i",
1d935bc2 2359 "/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i",
8bd0068d 2360 "/url\s*\(\s*([\'\"])\s*\S+script\s*:.*([\'\"])\s*\)/si",
2361 "/url\s*\(\s*([\'\"])\s*mocha\s*:.*([\'\"])\s*\)/si",
2362 "/url\s*\(\s*([\'\"])\s*about\s*:.*([\'\"])\s*\)/si",
2363 "/(.*)\s*:\s*url\s*\(\s*([\'\"]*)\s*\S+script\s*:.*([\'\"]*)\s*\)/si"
2364 ),
2365 Array(
5db90261 2366 "",
8bd0068d 2367 "idiocy",
2368 "idiocy",
2369 "idiocy",
2370 "idiocy",
2371 "",
5b4884be 2372 "url",
8bd0068d 2373 "url(\\1#\\1)",
2374 "url(\\1#\\1)",
2375 "url(\\1#\\1)",
8bd0068d 2376 "\\1:url(\\2#\\3)"
2377 )
691a2d25 2378 )
8bd0068d 2379 )
691a2d25 2380 );
5262d9a6 2381 if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) {
2dd879b8 2382 $view_unsafe_images = false;
45071bd6 2383 }
691a2d25 2384 if (!$view_unsafe_images){
2385 /**
8bd0068d 2386 * Remove any references to http/https if view_unsafe_images set
2387 * to false.
2388 */
02474e43 2389 array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0],
8bd0068d 2390 '/^([\'\"])\s*https*:.*([\'\"])/si');
02474e43 2391 array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1],
8bd0068d 2392 "\\1$secremoveimg\\1");
02474e43 2393 array_push($bad_attvals{'/.*/'}{'/^style/i'}[0],
7ef0b415 2394 '/url\([\'\"]?https?:[^\)]*[\'\"]?\)/si');
02474e43 2395 array_push($bad_attvals{'/.*/'}{'/^style/i'}[1],
8bd0068d 2396 "url(\\1$secremoveimg\\1)");
691a2d25 2397 }
451f74a2 2398
691a2d25 2399 $add_attr_to_tag = Array(
8bd0068d 2400 "/^a$/i" =>
c25f2fbb 2401 Array('target'=>'"_blank"',
02474e43 2402 'title'=>'"'._("This external link will open in a new window").'"'
8bd0068d 2403 )
2404 );
da2415c1 2405 $trusted = sq_sanitize($body,
8bd0068d 2406 $tag_list,
2407 $rm_tags_with_content,
2408 $self_closing_tags,
2409 $force_tag_closing,
2410 $rm_attnames,
2411 $bad_attvals,
2412 $add_attr_to_tag,
2413 $message,
2414 $id,
2415 $mailbox
2416 );
f83c60a2 2417 if (preg_match("|$secremoveimg|i", $trusted)){
691a2d25 2418 $has_unsafe_images = true;
da2415c1 2419 }
c189a963 2420
2421 // we want to parse mailto's in HTML output, change to SM compose links
2422 // this is a modified version of code from url_parser.php... but Marc is
2423 // right: we need a better filtering implementation; adding this randomly
2424 // here is not a great solution
2425 //
2426 if ($take_mailto_links) {
2427 // parseUrl($trusted); // this even parses URLs inside of tags... too aggressive
2428 global $MailTo_PReg_Match;
202bcbcc 2429 $MailTo_PReg_Match = '/mailto:' . substr($MailTo_PReg_Match, 1) ;
c189a963 2430 if ((preg_match_all($MailTo_PReg_Match, $trusted, $regs)) && ($regs[0][0] != '')) {
2431 foreach ($regs[0] as $i => $mailto_before) {
2432 $mailto_params = $regs[10][$i];
2433 // get rid of any tailing quote since we have to add send_to to the end
2434 //
2435 if (substr($mailto_before, strlen($mailto_before) - 1) == '"')
2436 $mailto_before = substr($mailto_before, 0, strlen($mailto_before) - 1);
2437 if (substr($mailto_params, strlen($mailto_params) - 1) == '"')
2438 $mailto_params = substr($mailto_params, 0, strlen($mailto_params) - 1);
2439
2440 if ($regs[1][$i]) { //if there is an email addr before '?', we need to merge it with the params
2441 $to = 'to=' . $regs[1][$i];
2442 if (strpos($mailto_params, 'to=') > -1) //already a 'to='
2443 $mailto_params = str_replace('to=', $to . '%2C%20', $mailto_params);
2444 else {
2445 if ($mailto_params) //already some params, append to them
2446 $mailto_params .= '&amp;' . $to;
2447 else
2448 $mailto_params .= '?' . $to;
2449 }
2450 }
2451
2452 $url_str = preg_replace(array('/to=/i', '/(?<!b)cc=/i', '/bcc=/i'), array('send_to=', 'send_to_cc=', 'send_to_bcc='), $mailto_params);
2453
2454 // we'll already have target=_blank, no need to allow comp_in_new
2455 // here (which would be a lot more work anyway)
2456 //
2457 global $compose_new_win;
2458 $temp_comp_in_new = $compose_new_win;
2459 $compose_new_win = 0;
2460 $comp_uri = makeComposeLink('src/compose.php' . $url_str, $mailto_before);
2461 $compose_new_win = $temp_comp_in_new;
2462
2463 // remove <a href=" and anything after the next quote (we only
2464 // need the uri, not the link HTML) in compose uri
2465 //
2466 $comp_uri = substr($comp_uri, 9);
2467 $comp_uri = substr($comp_uri, 0, strpos($comp_uri, '"', 1));
2468 $trusted = str_replace($mailto_before, $comp_uri, $trusted);
2469 }
2470 }
2471 }
2472
691a2d25 2473 return $trusted;
451f74a2 2474}
a4a70693 2475
da2415c1 2476/**
8bd0068d 2477 * function SendDownloadHeaders - send file to the browser
2478 *
2479 * Original Source: SM core src/download.php
2480 * moved here to make it available to other code, and separate
2481 * front end from back end functionality.
2482 *
2483 * @param string $type0 first half of mime type
2484 * @param string $type1 second half of mime type
2485 * @param string $filename filename to tell the browser for downloaded file
2486 * @param boolean $force whether to force the download dialog to pop
2487 * @param optional integer $filesize send the Content-Header and length to the browser
2488 * @return void
2489 */
02474e43 2490function SendDownloadHeaders($type0, $type1, $filename, $force, $filesize=0) {
2491 global $languages, $squirrelmail_language;
cfffd60b 2492 $isIE = $isIE6plus = false;
02474e43 2493
2494 sqgetGlobalVar('HTTP_USER_AGENT', $HTTP_USER_AGENT, SQ_SERVER);
2495
2496 if (strstr($HTTP_USER_AGENT, 'compatible; MSIE ') !== false &&
8bd0068d 2497 strstr($HTTP_USER_AGENT, 'Opera') === false) {
cfffd60b 2498 $isIE = true;
02474e43 2499 }
2500
cfffd60b 2501 if (preg_match('/compatible; MSIE ([0-9]+)/', $HTTP_USER_AGENT, $match) &&
2502 ((int)$match[1]) >= 6 && strstr($HTTP_USER_AGENT, 'Opera') === false) {
2503 $isIE6plus = true;
02474e43 2504 }
2505
2506 if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
8bd0068d 2507 function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_downloadfilename')) {
02474e43 2508 $filename =
8bd0068d 2509 call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_downloadfilename', $filename, $HTTP_USER_AGENT);
02474e43 2510 } else {
2511 $filename = ereg_replace('[\\/:\*\?"<>\|;]', '_', str_replace('&nbsp;', ' ', $filename));
2512 }
2513
2514 // A Pox on Microsoft and it's Internet Explorer!
2515 //
2516 // IE has lots of bugs with file downloads.
2517 // It also has problems with SSL. Both of these cause problems
2518 // for us in this function.
2519 //
2520 // See this article on Cache Control headers and SSL
2521 // http://support.microsoft.com/default.aspx?scid=kb;en-us;323308
2522 //
2523 // The best thing you can do for IE is to upgrade to the latest
2524 // version
2525 //set all the Cache Control Headers for IE
2526 if ($isIE) {
2527 $filename=rawurlencode($filename);
2528 header ("Pragma: public");
8bd0068d 2529 header ("Cache-Control: no-store, max-age=0, no-cache, must-revalidate"); // HTTP/1.1
02474e43 2530 header ("Cache-Control: post-check=0, pre-check=0", false);
8bd0068d 2531 header ("Cache-Control: private");
02474e43 2532
2533 //set the inline header for IE, we'll add the attachment header later if we need it
2534 header ("Content-Disposition: inline; filename=$filename");
2535 }
2536
2537 if (!$force) {
2538 // Try to show in browser window
2539 header ("Content-Disposition: inline; filename=\"$filename\"");
2540 header ("Content-Type: $type0/$type1; name=\"$filename\"");
2541 } else {
2542 // Try to pop up the "save as" box
2543
2544 // IE makes this hard. It pops up 2 save boxes, or none.
2545 // http://support.microsoft.com/support/kb/articles/Q238/5/88.ASP
2546 // http://support.microsoft.com/default.aspx?scid=kb;EN-US;260519
2547 // But, according to Microsoft, it is "RFC compliant but doesn't
2548 // take into account some deviations that allowed within the
2549 // specification." Doesn't that mean RFC non-compliant?
2550 // http://support.microsoft.com/support/kb/articles/Q258/4/52.ASP
2551
2552 // all browsers need the application/octet-stream header for this
2553 header ("Content-Type: application/octet-stream; name=\"$filename\"");
2554
2555 // http://support.microsoft.com/support/kb/articles/Q182/3/15.asp
2556 // Do not have quotes around filename, but that applied to
2557 // "attachment"... does it apply to inline too?
2558 header ("Content-Disposition: attachment; filename=\"$filename\"");
2559
cfffd60b 2560 if ($isIE && !$isIE6plus) {
02474e43 2561 // This combination seems to work mostly. IE 5.5 SP 1 has
2562 // known issues (see the Microsoft Knowledge Base)
2563
2564 // This works for most types, but doesn't work with Word files
2565 header ("Content-Type: application/download; name=\"$filename\"");
7e2ff844 2566 header ("Content-Type: application/force-download; name=\"$filename\"");
02474e43 2567 // These are spares, just in case. :-)
2568 //header("Content-Type: $type0/$type1; name=\"$filename\"");
2569 //header("Content-Type: application/x-msdownload; name=\"$filename\"");
2570 //header("Content-Type: application/octet-stream; name=\"$filename\"");
7e2ff844 2571 } else if ($isIE) {
2572 // This is to prevent IE for MIME sniffing and auto open a file in IE
2573 header ("Content-Type: application/force-download; name=\"$filename\"");
02474e43 2574 } else {
2575 // another application/octet-stream forces download for Netscape
2576 header ("Content-Type: application/octet-stream; name=\"$filename\"");
2577 }
2578 }
2579
2580 //send the content-length header if the calling function provides it
2581 if ($filesize > 0) {
2582 header("Content-Length: $filesize");
2583 }
07c49f57 2584
8d863f64 2585} // end fn SendDownloadHeaders