improved behavior of CS mail message filtering

This commit is contained in:
Graham Knop 2008-04-01 14:42:42 +00:00
parent edc1c55395
commit 357b2b3c5c
4 changed files with 26 additions and 39 deletions

View file

@ -19,6 +19,7 @@
- use UTF8 JSON encoding and decoding universally
- fixed: Import/Export of packages with international text is broken
- fixed: CS rating links are exposed to site spiders
- improved behavior of CS mail message filtering
7.5.8
- moved Gallery utility methods to WebGUI::Utility::Gallery

View file

@ -399,7 +399,7 @@ sub view {
$var{content} = sprintf $i18n->get('no frame error message'), $proxiedUrl;
} else {
$var{content} =~ s/\<style.*?\/style\>//isg if ($self->get("removeStyle"));
$var{content} = WebGUI::HTML::cleanSegment($var{content});
$var{content} = WebGUI::HTML::cleanSegment($var{content}, 1);
$var{content} = WebGUI::HTML::filter($var{content}, $self->get("filterHtml"));
}
}

View file

@ -46,40 +46,36 @@ These methods are available from this package:
#-------------------------------------------------------------------
=head2 cleanSegment ( html )
=head2 cleanSegment ( html , preserveStyleScript )
Returns an HTML segment that has been stripped of the <BODY> tag and anything before it, as well as the </BODY> tag and anything after it. It's main purpose is to get rid of META tags and other garbage from an HTML page that will be used as a segment inside of another page.
B<NOTE:> This filter does have one exception, it leaves anything before the <BODY> tag that is enclosed in <STYLE></STYLE> or <SCRIPT></SCRIPT> tags.
=head3 html
The HTML segment you want cleaned.
=head3 preserveStyleScript
With this option set, <style> and <script> tags will be preserved in the output.
=cut
sub cleanSegment {
my $html = shift;
# remove windows carriage returns
if ($html =~ s/\r/\n/g) {
$html =~ s/\n\n/\n/g
}
# remove meta tags
$html =~ s/\<meta.*?\>//ixsg;
# remove link tags
$html =~ s/\<link.*?\>//ixsg;
# remove title tags
$html =~ s/\<title\>.*?\<\/title\>//ixsg;
# remove head tags
$html =~ s/\<head.*?\>//ixsg;
$html =~ s/\<\/head>//ixsg;
# remove body tags
$html =~ s/\<body.*?\>//ixsg;
$html =~ s/\<\/body>//ixsg;
# remove html tags
$html =~ s/\<html>//ixsg;
$html =~ s/\<\/html>//ixsg;
return $html;
my $html = shift;
my $preserveStyleScript = shift;
my $headers = "";
if ($html =~ s{(.*)<body\b.*?>}{}is && $preserveStyleScript) {
my $head = $1;
# extract every script or style tag
while ($head =~ m{(<(script|style)\b.*?</\2>)}isg) {
$headers .= $1;
}
}
$html =~ s{</body>.*}{}is;
# remove windows carriage returns
$html =~ s/\r\n/\n/g;
$html =~ s/\r/\n/g;
return $headers . $html;
}
#-------------------------------------------------------------------

View file

@ -82,26 +82,16 @@ sub addPost {
$text = WebGUI::HTML::filter($text, "all");
$text = WebGUI::HTML::format($text, "text");
}
else if ($part->type eq 'text/html') {
$text = WebGUI::HTML::cleanSegment($text);
}
$content .= $text;
} else {
push(@attachments, $part);
}
}
$prefix =~ s/\\/\\\\/g;
$prefix =~ s/\[/\\[/g;
$prefix =~ s/\]/\\]/g;
$prefix =~ s/\(/\\(/g;
$prefix =~ s/\)/\\)/g;
$prefix =~ s/\}/\\}/g;
$prefix =~ s/\{/\\{/g;
$prefix =~ s/\?/\\?/g;
$prefix =~ s/\./\\./g;
$prefix =~ s/\*/\\*/g;
$prefix =~ s/\+/\\+/g;
$prefix =~ s/\|/\\|/g;
$prefix =~ s/\//\\\//g;
my $title = $message->{subject};
$title =~ s/$prefix//;
$title =~ s/\Q$prefix//;
if ($title =~ m/re:/i) {
$title =~ s/re://ig;
$title = "Re: ".$title;