added a new function to WebGUI::HTML to parse html text and extract a list of elements based on a tag

spec. used to extract paragraphs for the synopsis; convert FeedPP properties to scalar context so that
blank fields are rendered properly.
This commit is contained in:
David Delikat 2009-01-23 03:41:49 +00:00
parent 2893944837
commit 16ce54bf4f
4 changed files with 61 additions and 8 deletions

View file

@ -1,4 +1,5 @@
7.6.10
- fixed #9455: fixed synopsis to pick out html paragraphs, fixed FeedPP fields to scalar when blank
- fixed: With autocommit and no comments on, making a shortcut of an asset takes you to that asset's view.
7.6.9

View file

@ -548,11 +548,15 @@ sub getSynopsisAndContent {
my $synopsis = shift;
my $body = shift;
unless ($synopsis) {
$body =~ s/\n/\^\-\;/ unless ($body =~ m/\^\-\;/);
my @content = split(/\^\-\;/,$body);
$synopsis = WebGUI::HTML::filter($content[0],"all");
my @content;
if( $body =~ /<p>/ ) {
@content = WebGUI::HTML::splitTag($body);
} else {
@content = split("\n",$body);
}
shift @content if $content[0] =~ /^\s*$/;
$synopsis = WebGUI::HTML::filter($content[0],"all");
}
$body =~ s/\^\-\;/\n/;
return ($synopsis,$body);
}

View file

@ -196,9 +196,9 @@ sub getTemplateVariables {
my @items = $feed->get_item;
my %var;
$var{channel_title} = WebGUI::HTML::filter($feed->title, 'javascript');
$var{channel_description} = WebGUI::HTML::filter($feed->description, 'javascript');
$var{channel_date} = WebGUI::HTML::filter($feed->get_pubDate_epoch, 'javascript');
$var{channel_copyright} = WebGUI::HTML::filter($feed->copyright, 'javascript');
$var{channel_description} = WebGUI::HTML::filter(scalar($feed->description), 'javascript');
$var{channel_date} = WebGUI::HTML::filter(scalar($feed->get_pubDate_epoch), 'javascript');
$var{channel_copyright} = WebGUI::HTML::filter(scalar($feed->copyright), 'javascript');
$var{channel_link} = WebGUI::HTML::filter($feed->link, 'javascript');
my @image = $feed->image;
$var{channel_image_url} = WebGUI::HTML::filter($image[0], 'javascript');
@ -215,7 +215,7 @@ sub getTemplateVariables {
$item{author} = WebGUI::HTML::filter($object->author, 'javascript');
$item{guid} = WebGUI::HTML::filter($object->guid, 'javascript');
$item{link} = WebGUI::HTML::filter($object->link, 'javascript');
$item{description} = WebGUI::HTML::filter($object->description, 'javascript');
$item{description} = WebGUI::HTML::filter(scalar($object->description), 'javascript');
$item{descriptionFirst100words} = $item{description};
$item{descriptionFirst100words} =~ s/(((\S+)\s+){100}).*/$1/s;
$item{descriptionFirst75words} = $item{descriptionFirst100words};

View file

@ -14,6 +14,7 @@ package WebGUI::HTML;
=cut
use HTML::TokeParser;
use HTML::TagFilter;
use strict;
use WebGUI::Macro;
@ -36,6 +37,7 @@ A package for manipulating and massaging HTML.
$html = WebGUI::HTML::html2text($html);
$html = WebGUI::HTML::makeAbsolute($session, $html);
$html = WebGUI::HTML::processReplacements($session, $html);
$html = WebGUI::HTML::splitTag([$tag,]$html[,$count]); # defaults to ( 'p', $html, 0 )
=head1 METHODS
@ -396,5 +398,51 @@ sub processReplacements {
return $content;
}
#-------------------------------------------------------------------
=head2 WebGUI::HTML::splitTag([$tag,]$html[,$count]);
splits an block of HTML into an array based on the contents of a single tag
=head3 tag
The HTML tag top extract from the text. this defaults to 'p' giving a list of paragraphs
=head3 html
The block of HTML text that will be disected
=head3 count
How many items do we want? defaults to 1; returns 1 non-blank item
=cut
sub splitTag {
my $tag = shift;
my $html = shift;
my $count = shift || 1;
if( not defined $html or $html =~ /^(\d+)$/ ) {
$count = $html if $1;
$html = $tag;
$tag = 'p'; # the default tag is 'p' -- grabs a paragraph
}
my @result;
$html =~ s/\&nbsp;//g; # get rid of all non-breaking spaces
my $p = HTML::TokeParser->new(\$html);
while (my $token = $p->get_tag($tag)) {
my $text = $p->get_trimmed_text("/$tag");
next if $text =~ /^([:space:]|[:^print:])*$/; # skip whitespace
push @result, $text; # add the text between the tags to the result array
last if @result == $count; # if we have a full count then quit
}
return @result;
}
1;