23/03/2002
*
@@ -1129,6 +1149,11 @@ function sq_defang(&$attvalue){
return;
}
$m = false;
+ // before deent, translate the dangerous unicode characters and ... to safe values
+ // otherwise the regular expressions do not match.
+
+
+
do {
$m = false;
$m = $m || sq_deent($attvalue, '/\*(\d+);*/s');
@@ -1154,6 +1179,83 @@ function sq_unspace(&$attvalue){
}
}
+/**
+ * Translate all dangerous Unicode or Shift_JIS characters which are accepted by
+ * IE as regular characters.
+ *
+ * @param attvalue The attribute value before dangerous characters are translated.
+ * @return attvalue Nothing, modifies a reference value.
+ * @author Marc Groot Koerkamp.
+ */
+function sq_fixIE_idiocy(&$attvalue) {
+ // remove NUL
+ $attvalue = str_replace("\0", "", $attvalue);
+ // remove comments
+ $attvalue = preg_replace("/(\/\*.*?\*\/)/","",$attvalue);
+
+ // IE has the evil habit of accepting every possible value for the attribute expression.
+ // The table below contains characters which are parsed by IE if they are used in the "expression"
+ // attribute value.
+ $aDangerousCharsReplacementTable = array(
+ array('ʟ', 'ʟ' ,/* L UNICODE IPA Extension */
+ 'ʀ', 'ʀ' ,/* R UNICODE IPA Extension */
+ 'ɴ', 'ɴ' ,/* N UNICODE IPA Extension */
+ 'E', 'E' ,/* Unicode FULLWIDTH LATIN CAPITAL LETTER E */
+ 'e', 'e' ,/* Unicode FULLWIDTH LATIN SMALL LETTER E */
+ 'X', 'X',/* Unicode FULLWIDTH LATIN CAPITAL LETTER X */
+ 'x', 'x',/* Unicode FULLWIDTH LATIN SMALL LETTER X */
+ 'P', 'P',/* Unicode FULLWIDTH LATIN CAPITAL LETTER P */
+ 'p', 'p',/* Unicode FULLWIDTH LATIN SMALL LETTER P */
+ 'R', 'R',/* Unicode FULLWIDTH LATIN CAPITAL LETTER R */
+ 'r', 'r',/* Unicode FULLWIDTH LATIN SMALL LETTER R */
+ 'S', 'S',/* Unicode FULLWIDTH LATIN CAPITAL LETTER S */
+ 's', 's',/* Unicode FULLWIDTH LATIN SMALL LETTER S */
+ 'I', 'I',/* Unicode FULLWIDTH LATIN CAPITAL LETTER I */
+ 'i', 'i',/* Unicode FULLWIDTH LATIN SMALL LETTER I */
+ 'O', 'O',/* Unicode FULLWIDTH LATIN CAPITAL LETTER O */
+ 'o', 'o',/* Unicode FULLWIDTH LATIN SMALL LETTER O */
+ 'N', 'N',/* Unicode FULLWIDTH LATIN CAPITAL LETTER N */
+ 'n', 'n',/* Unicode FULLWIDTH LATIN SMALL LETTER N */
+ 'L', 'L',/* Unicode FULLWIDTH LATIN CAPITAL LETTER L */
+ 'l', 'l',/* Unicode FULLWIDTH LATIN SMALL LETTER L */
+ 'U', 'U',/* Unicode FULLWIDTH LATIN CAPITAL LETTER U */
+ 'u', 'u',/* Unicode FULLWIDTH LATIN SMALL LETTER U */
+ 'ⁿ', 'ⁿ' ,/* Unicode SUPERSCRIPT LATIN SMALL LETTER N */
+ "\xEF\xBC\xA5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER E */ // in unicode this is some Chinese char range
+ "\xEF\xBD\x85", /* Shift JIS FULLWIDTH LATIN SMALL LETTER E */
+ "\xEF\xBC\xB8", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER X */
+ "\xEF\xBD\x98", /* Shift JIS FULLWIDTH LATIN SMALL LETTER X */
+ "\xEF\xBC\xB0", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER P */
+ "\xEF\xBD\x90", /* Shift JIS FULLWIDTH LATIN SMALL LETTER P */
+ "\xEF\xBC\xB2", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER R */
+ "\xEF\xBD\x92", /* Shift JIS FULLWIDTH LATIN SMALL LETTER R */
+ "\xEF\xBC\xB3", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER S */
+ "\xEF\xBD\x93", /* Shift JIS FULLWIDTH LATIN SMALL LETTER S */
+ "\xEF\xBC\xA9", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER I */
+ "\xEF\xBD\x89", /* Shift JIS FULLWIDTH LATIN SMALL LETTER I */
+ "\xEF\xBC\xAF", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER O */
+ "\xEF\xBD\x8F", /* Shift JIS FULLWIDTH LATIN SMALL LETTER O */
+ "\xEF\xBC\xAE", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER N */
+ "\xEF\xBD\x8E", /* Shift JIS FULLWIDTH LATIN SMALL LETTER N */
+ "\xEF\xBC\xAC", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER L */
+ "\xEF\xBD\x8C", /* Shift JIS FULLWIDTH LATIN SMALL LETTER L */
+ "\xEF\xBC\xB5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER U */
+ "\xEF\xBD\x95", /* Shift JIS FULLWIDTH LATIN SMALL LETTER U */
+ "\xE2\x81\xBF", /* Shift JIS FULLWIDTH SUPERSCRIPT N */
+ "\xCA\x9F", /* L UNICODE IPA Extension */
+ "\xCA\x80", /* R UNICODE IPA Extension */
+ "\xC9\xB4"), /* N UNICODE IPA Extension */
+ array('l', 'l', 'r','r','n','n',
+ 'E','E','e','e','X','X','x','x','P','P','p','p','R','R','r','r','S','S','s','s','I','I',
+ 'i','i','O','O','o','o','N','N','n','n','L','L','l','l','U','U','u','u','n','n',
+ 'E','e','X','x','P','p','R','r','S','s','I','i','O','o','N','n','L','l','U','u','n','l','r','n'));
+ $attvalue = str_replace($aDangerousCharsReplacementTable[0],$aDangerousCharsReplacementTable[1],$attvalue);
+
+ // Escapes are useful for special characters like "{}[]()'&. In other cases they are
+ // used for XSS.
+ $attvalue = preg_replace("/(\\\\)([a-zA-Z]{1})/",'$2',$attvalue);
+}
+
/**
* This function returns the final tag out of the tag name, an array
* of attributes, and the type of the tag. This function is called by
@@ -1567,6 +1669,8 @@ function sq_getnxtag($body, $offset){
function sq_deent(&$attvalue, $regex, $hex=false){
$me = 'sq_deent';
$ret_match = false;
+ // remove comments
+ //$attvalue = preg_replace("/(\/\*.*\*\/)/","",$attvalue);
preg_match_all($regex, $attvalue, $matches);
if (is_array($matches) && sizeof($matches[0]) > 0){
$repl = Array();
@@ -1620,10 +1724,23 @@ function sq_fixatts($tagname,
}
}
}
+ /**
+ * Workaround for IE quirks
+ */
+ sq_fixIE_idiocy($attvalue);
+
/**
* Remove any backslashes, entities, and extraneous whitespace.
*/
+
+ $oldattvalue = $attvalue;
sq_defang($attvalue);
+ if ($attname == 'style' && $attvalue !== $oldattvalue) {
+ // entities are used in the attribute value. In 99% of the cases it's there as XSS
+ // i.e.
+ $attvalue = "idiocy";
+ $attary{$attname} = $attvalue;
+ }
sq_unspace($attvalue);
/**
@@ -1646,38 +1763,34 @@ function sq_fixatts($tagname,
preg_replace($valmatch, $valrepl, $attvalue);
if ($newvalue != $attvalue){
$attary{$attname} = $newvalue;
+ $attvalue = $newvalue;
}
}
}
}
}
-
- /**
- * Replace empty src tags with the blank image. src is only used
- * for frames, images, and image inputs. Doing a replace should
- * not affect them working as should be, however it will stop
- * IE from being kicked off when src for img tags are not set
- */
- if (($attname == 'src') && ($attvalue == '""')) {
- $attary{$attname} = '"' . SM_PATH . 'images/blank.png"';
- }
-
- /**
- * Turn cid: urls into http-friendly ones.
- */
- if (preg_match("/^[\'\"]\s*cid:/si", $attvalue)){
- $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox);
+ if ($attname == 'style') {
+ if (preg_match('/[\0-\37\200-\377]+/',$attvalue)) {
+ // 8bit and control characters in style attribute values can be used for XSS, remove them
+ $attary{$attname} = '"disallowed character"';
+ }
+ preg_match_all("/url\s*\((.+)\)/si",$attvalue,$aMatch);
+ if (count($aMatch)) {
+ foreach($aMatch[1] as $sMatch) {
+ // url value
+ $urlvalue = $sMatch;
+ sq_fix_url($attname, $urlvalue, $message, $id, $mailbox,"'");
+ $attary{$attname} = str_replace($sMatch,$urlvalue,$attvalue);
+ }
+ }
}
-
/**
- * "Hack" fix for Outlook using propriatary outbind:// protocol in img tags.
- * One day MS might actually make it match something useful, for now, falling
- * back to using cid2http, so we can grab the blank.png.
+ * Use white list based filtering on attributes which can contain url's
*/
- if (preg_match("/^[\'\"]\s*outbind:\/\//si", $attvalue)) {
- $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox);
+ else if ($attname == 'href' || $attname == 'src' || $attname == 'background') {
+ sq_fix_url($attname, $attvalue, $message, $id, $mailbox);
+ $attary{$attname} = $attvalue;
}
-
}
/**
* See if we need to append any attributes to this tag.
@@ -1690,6 +1803,98 @@ function sq_fixatts($tagname,
return $attary;
}
+/**
+ * This function filters url's
+ *
+ * @param $attvalue String with attribute value to filter
+ * @param $message message object
+ * @param $id message id
+ * @param $mailbox mailbox
+ * @param $sQuote quoting characters around url's
+ */
+function sq_fix_url($attname, &$attvalue, $message, $id, $mailbox,$sQuote = '"') {
+ $attvalue = trim($attvalue);
+ if ($attvalue && ($attvalue[0] =='"'|| $attvalue[0] == "'")) {
+ // remove the double quotes
+ $sQuote = $attvalue[0];
+ $attvalue = trim(substr($attvalue,1,-1));
+ }
+
+ if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) {
+ $view_unsafe_images = false;
+ }
+ $secremoveimg = '../images/' . _("sec_remove_eng.png");
+
+ /**
+ * Replace empty src tags with the blank image. src is only used
+ * for frames, images, and image inputs. Doing a replace should
+ * not affect them working as should be, however it will stop
+ * IE from being kicked off when src for img tags are not set
+ */
+ if ($attvalue == '') {
+ $attvalue = '"' . SM_PATH . 'images/blank.png"';
+ } else {
+ // first, disallow 8 bit characters and control characters
+ if (preg_match('/[\0-\37\200-\377]+/',$attvalue)) {
+ switch ($attname) {
+ case 'href':
+ $attvalue = $sQuote . 'http://invalid-stuff-detected.example.com' . $sQuote;
+ break;
+ default:
+ $attvalue = $sQuote . SM_PATH . 'images/blank.png'. $sQuote;
+ break;
+ }
+ } else {
+ $aUrl = parse_url($attvalue);
+ if (isset($aUrl['scheme'])) {
+ switch(strtolower($aUrl['scheme'])) {
+ case 'http':
+ case 'https':
+ case 'ftp':
+ if ($attname != 'href') {
+ if ($view_unsafe_images == false) {
+ $attvalue = $sQuote . $secremoveimg . $sQuote;
+ } else {
+ if (isset($aUrl['path'])) {
+ // validate image extension.
+ $ext = strtolower(substr($aUrl['path'],strrpos($aUrl['path'],'.')));
+ if (!in_array($ext,array('.jpeg','.jpg','xjpeg','.gif','.bmp','.jpe','.png','.xbm'))) {
+ $attvalue = $sQuote . SM_PATH . 'images/blank.png'. $sQuote;
+ }
+ } else {
+ $attvalue = $sQuote . SM_PATH . 'images/blank.png'. $sQuote;
+ }
+ }
+ }
+ break;
+ case 'outbind':
+ /**
+ * "Hack" fix for Outlook using propriatary outbind:// protocol in img tags.
+ * One day MS might actually make it match something useful, for now, falling
+ * back to using cid2http, so we can grab the blank.png.
+ */
+ $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox);
+ break;
+ case 'cid':
+ /**
+ * Turn cid: urls into http-friendly ones.
+ */
+ $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox);
+ break;
+ default:
+ $attvalue = $sQuote . SM_PATH . 'images/blank.png' . $sQuote;
+ break;
+ }
+ } else {
+ if (!(isset($aUrl['path']) && $aUrl['path'] == $secremoveimg)) {
+ // parse_url did not lead to satisfying result
+ $attvalue = $sQuote . SM_PATH . 'images/blank.png' . $sQuote;
+ }
+ }
+ }
+ }
+}
+
/**
* This function edits the style definition to make them friendly and
* usable in SquirrelMail.
@@ -1703,12 +1908,70 @@ function sq_fixatts($tagname,
function sq_fixstyle($body, $pos, $message, $id, $mailbox){
global $view_unsafe_images;
$me = 'sq_fixstyle';
- $ret = sq_findnxreg($body, $pos, '\s*style\s*>');
- if ($ret == FALSE){
+ // workaround for in between comments
+ $iCurrentPos = $pos;
+ $content = '';
+ $sToken = '';
+ $bSucces = false;
+ $bEndTag = false;
+ for ($i=$pos,$iCount=strlen($body);$i<$iCount;++$i) {
+ $char = $body{$i};
+ switch ($char) {
+ case '<':
+ $sToken .= $char;
+ break;
+ case '/':
+ if ($sToken == '<') {
+ $sToken .= $char;
+ $bEndTag = true;
+ } else {
+ $content .= $char;
+ }
+ break;
+ case '>':
+ if ($bEndTag) {
+ $sToken .= $char;
+ if (preg_match('/\<\/\s*style\s*\>/i',$sToken,$aMatch)) {
+ $newpos = $i + 1;
+ $bSucces = true;
+ break 2;
+ } else {
+ $content .= $sToken;
+ }
+ $bEndTag = false;
+ } else {
+ $content .= $char;
+ }
+ break;
+ case '!':
+ if ($sToken == '<') {
+ // possible comment
+ if (isset($body{$i+2}) && substr($body,$i,3) == '!--') {
+ $i = strpos($body,'-->',$i+3);
+ if ($i === false) { // no end comment
+ $i = strlen($body);
+ }
+ $sToken = '';
+ }
+ } else {
+ $content .= $char;
+ }
+ break;
+ default:
+ if ($bEndTag) {
+ $sToken .= $char;
+ } else {
+ $content .= $char;
+ }
+ break;
+ }
+ }
+ if ($bSucces == FALSE){
return array(FALSE, strlen($body));
}
- $newpos = $ret[0] + strlen($ret[2]);
- $content = $ret[1];
+
+
+
/**
* First look for general BODY style declaration, which would be
* like so:
@@ -1722,52 +1985,35 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){
*/
// $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si",
// "url(\\1$secremoveimg\\2)", $content);
- // remove NUL
- $content = str_replace("\0", "", $content);
- // translate ur\l and variations (IE parses that)
- $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content);
- // NB I insert NUL characters to keep to avoid an infinite loop. They are removed after the loop.
- while (preg_match("/url\s*\(\s*[\'\"]?([^:]+):(.*)?[\'\"]?\s*\)/si", $content, $matches)) {
- $sProto = strtolower($matches[1]);
- switch ($sProto) {
- /**
- * Fix url('https*://.*) declarations but only if $view_unsafe_images
- * is false.
- */
- case 'https':
- case 'http':
- if (!$view_unsafe_images){
- $sExpr = "/url\s*\(\s*[\'\"]?\s*$sProto*:.*[\'\"]?\s*\)/si";
- $content = preg_replace($sExpr, "u\0r\0l(\\1$secremoveimg\\2)", $content);
+ // first check for 8bit sequences and disallowed control characters
+ if (preg_match('/[\16-\37\200-\377]+/',$content)) {
+ $content = '';
+ return array($content, $newpos);
+ }
- } else {
- $content = preg_replace('/url/i',"u\0r\0l",$content);
- }
- break;
- /**
- * Fix urls that refer to cid:
- */
- case 'cid':
- $cidurl = 'cid:'. $matches[2];
- $httpurl = sq_cid2http($message, $id, $cidurl, $mailbox);
- // escape parentheses that can modify the regular expression
- $cidurl = str_replace(array('(',')'),array('\\(','\\)'),$cidurl);
- $content = preg_replace("|url\s*\(\s*$cidurl\s*\)|si",
- "u\0r\0l($httpurl)", $content);
- break;
- default:
- /**
- * replace url with protocol other then the white list
- * http,https and cid by an empty string.
- */
- $content = preg_replace("/url\s*\(\s*[\'\"]?([^:]+):(.*)?[\'\"]?\s*\)/si",
- "", $content);
- break;
+ // IE Sucks hard. We have a special function for it.
+ sq_fixIE_idiocy($content);
+
+ // remove @import line
+ $content = preg_replace("/^\s*(@import.*)$/mi","\n\n",$content);
+
+ // translate ur\l and variations (IE parses that)
+ // TODO check if the sq_fixIE_idiocy function already handles this.
+ $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content);
+ preg_match_all("/url\s*\((.+)\)/si",$content,$aMatch);
+ if (count($aMatch)) {
+ $aValue = $aReplace = array();
+ foreach($aMatch[1] as $sMatch) {
+ // url value
+ $urlvalue = $sMatch;
+ sq_fix_url('style',$urlvalue, $message, $id, $mailbox,"'");
+ $aValue[] = $sMatch;
+ $aReplace[] = $urlvalue;
}
+ $content = str_replace($aValue,$aReplace,$content);
}
- // remove NUL
- $content = str_replace("\0", "", $content);
+
/**
* Remove any backslashes, entities, and extraneous whitespace.
*/
@@ -1783,8 +2029,10 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){
'/expression/i',
'/behaviou*r/i',
'/binding/i',
- '/include-source/i');
- $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy');
+ '/include-source/i',
+ '/javascript/i',
+ '/script/i');
+ $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy');
$contentNew = preg_replace($match, $replace, $contentTemp);
if ($contentNew !== $contentTemp) {
// insecure css declarations are used. From now on we don't care
@@ -2102,7 +2350,7 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links
$has_unsafe_images;
/**
* Don't display attached images in HTML mode.
- *
+ *
* SB: why?
*/
$attachment_common_show_images = false;
@@ -2202,7 +2450,7 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links
"idiocy",
"idiocy",
"idiocy",
- "",
+ "idiocy",
"url",
"url(\\1#\\1)",
"url(\\1#\\1)",
@@ -2248,7 +2496,7 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links
$id,
$mailbox
);
- if (preg_match("|$secremoveimg|i", $trusted)){
+ if (strpos($trusted,$secremoveimg)){
$has_unsafe_images = true;
}
@@ -2397,11 +2645,14 @@ function SendDownloadHeaders($type0, $type1, $filename, $force, $filesize=0) {
// This works for most types, but doesn't work with Word files
header ("Content-Type: application/download; name=\"$filename\"");
-
+ header ("Content-Type: application/force-download; name=\"$filename\"");
// These are spares, just in case. :-)
//header("Content-Type: $type0/$type1; name=\"$filename\"");
//header("Content-Type: application/x-msdownload; name=\"$filename\"");
//header("Content-Type: application/octet-stream; name=\"$filename\"");
+ } else if ($isIE) {
+ // This is to prevent IE for MIME sniffing and auto open a file in IE
+ header ("Content-Type: application/force-download; name=\"$filename\"");
} else {
// another application/octet-stream forces download for Netscape
header ("Content-Type: application/octet-stream; name=\"$filename\"");