- $i = 0;
- while ( $i < $j ) {
- if ( $body{$i} == '<' ) {
- $pos = $i + 1;
- $tag = '';
- while ($body{$pos} == ' ' || $body{$pos} == "\t" ||
- $body{$pos} == "\n") {
- $pos ++;
- }
- while (strlen($tag) < 4 && $body{$pos} != ' ' &&
- $body{$pos} != "\t" && $body{$pos} != "\n") {
- $tag .= $body{$pos};
- $pos ++;
- }
- switch( strtoupper( $tag ) ) {
- // Strips the entire tag and contents
- case 'APPL':
- case 'EMBB':
- case 'FRAM':
- case 'SCRI':
- case 'OBJE':
- $etg = '/' . $tag;
- while ( $body{$i+1}.$body{$i+2}.$body{$i+3}.$body{$i+4}.$body{$i+5} <> $etg &&
- $i < $j ) $i++;
- while ( $i < $j && $body{++$i} <> '>' );
- // $ret .= "<!-- $tag removed -->";
- break;
- // Substitute Title
- case 'TITL':
- $i += 5;
- while ( $body{$i} <> '>' && // </title>
- $i < $j )
- $i++;
- $i++;
- $title = '';
- while ( $body{$i} <> '<' && // </title>
- $i < $j ) {
- $title .= $body{$i};
- $i++;
- }
- $i += 7;
- break;
- // Destroy these tags
- case 'HTML':
- case 'HEAD':
- case '/HTM':
- case '/HEA':
- case '!DOC':
- case 'META':
- //case 'DIV ':
- //case '/DIV':
- case '!-- ':
- $i += 4;
- while ( $body{$i} <> '>' &&
- $i < $j )
- $i++;
- // $i++;
- break;
- case 'STYL':
- $i += 5;
- while ( $body{$i} <> '>' && // </title>
- $i < $j )
- $i++;
- $i++;
- // We parse the style to look for interesting stuff
- $styleblk = '';
- while ( $body{$i} <> '>' &&
- $i < $j ) {
- // First we get the name of the style
- $style = '';
- while ( $body{$i} <> '>' &&
- $body{$i} <> '<' &&
- $body{$i} <> '{' &&
- $i < $j ) {
- if ( isnoSep( $body{$i} ) )
- $style .= $body{$i};
- $i++;
- }
- stripComments( $i, $j, $body );
- $style = strtoupper( trim( $style ) );
- if ( $style == 'BODY' ) {
- // Next we look into the definitions of the body style
- while ( $body{$i} <> '>' &&
- $body{$i} <> '}' &&
- $i < $j ) {
- // We look for the background color if any.
- if ( substr( $body, $i, 17 ) == 'BACKGROUND-COLOR:' ) {
- $i += 17;
- $bgcolor = getStyleData( $i, $j, $body );
- } elseif ( substr( $body, $i, 12 ) == 'MARGIN-LEFT:' ) {
- $i += 12;
- $leftmargin = getStyleData( $i, $j, $body );
- }
- $i++;
- }
- } else {
- // Other style are mantained
- $styleblk .= "$style ";
- while ( $body{$i} <> '>' &&
- $body{$i} <> '<' &&
- $body{$i} <> '}' &&
- $i < $j ) {
- $styleblk .= $body{$i};
- $i++;
- }
- $styleblk .= $body{$i};
- }
- stripComments( $i, $j, $body );
- if ( $body{$i} <> '>' )
- $i++;
- }
- if ( $styleblk <> '' )
- $ret .= "<style>$styleblk";
- break;
- case 'BODY':
- if ( $title <> '' )
- $ret .= '<b>' . _("Title:") . " </b>$title<br>\n";
- $ret .= "<TABLE";
- $i += 5;
- if (! isset($base)) {
- $base = '';
- }
- $ret .= stripEvent( $i, $j, $body, $id, $base );
- $ret .= " bgcolor=$bgcolor width=\"100%\"><tr>";
- if ( $leftmargin <> '' )
- $ret .= "<td width=$leftmargin> </td>";
- $ret .= '<td>';
- if (strtolower($bgcolor) == 'ffffff' ||
- strtolower($bgcolor) == '#ffffff')
- $ret .= '<font color=#000000>';
- break;
- case 'BASE':
- $i += 5;
- $base = '';
- while ( !isNoSep( $body{$i} ) &&
- $i < $j ) {
- $i++;
- }
- if ( strcasecmp( substr( $base, 0, 4 ), 'href' ) ) {
- $i += 5;
- while ( !isNoSep( $body{$i} ) &&
- $i < $j ) {
- $i++;
- }
- while ( $body{$i} <> '>' &&
- $i < $j ) {
- if ( $body{$i} <> '"' ) {
- $base .= $body{$i};
- }
- $i++;
- }
- // Debuging $ret .= "<!-- base == $base -->";
- if ( strcasecmp( substr( $base, 0, 4 ), 'file' ) <> 0 ) {
- $ret .= "\n<BASE HREF=\"$base\">\n";
- }
- }
- break;
- case '/BOD':
- $ret .= '</font></td></tr></TABLE>';
- $i += 6;
- break;
- default:
- // Following tags can contain some event handler, lets search it
- stripComments( $i, $j, $body );
- if (! isset($base)) {
- $base = '';
- }
- $ret .= stripEvent( $i, $j, $body, $id, $base ) . '>';
- // $ret .= "<!-- $tag detected -->";
- }