added a new function to WebGUI::HTML to parse html text and extract a list of elements based on a tag
spec. used to extract paragraphs for the synopsis; convert FeedPP properties to scalar context so that blank fields are rendered properly.
This commit is contained in:
parent
2893944837
commit
16ce54bf4f
4 changed files with 61 additions and 8 deletions
|
|
@ -14,6 +14,7 @@ package WebGUI::HTML;
|
|||
|
||||
=cut
|
||||
|
||||
use HTML::TokeParser;
|
||||
use HTML::TagFilter;
|
||||
use strict;
|
||||
use WebGUI::Macro;
|
||||
|
|
@ -36,6 +37,7 @@ A package for manipulating and massaging HTML.
|
|||
$html = WebGUI::HTML::html2text($html);
|
||||
$html = WebGUI::HTML::makeAbsolute($session, $html);
|
||||
$html = WebGUI::HTML::processReplacements($session, $html);
|
||||
$html = WebGUI::HTML::splitTag([$tag,]$html[,$count]); # defaults to ( 'p', $html, 0 )
|
||||
|
||||
=head1 METHODS
|
||||
|
||||
|
|
@ -396,5 +398,51 @@ sub processReplacements {
|
|||
return $content;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 WebGUI::HTML::splitTag([$tag,]$html[,$count]);
|
||||
|
||||
splits an block of HTML into an array based on the contents of a single tag
|
||||
|
||||
=head3 tag
|
||||
|
||||
The HTML tag top extract from the text. this defaults to 'p' giving a list of paragraphs
|
||||
|
||||
=head3 html
|
||||
|
||||
The block of HTML text that will be disected
|
||||
|
||||
=head3 count
|
||||
|
||||
How many items do we want? defaults to 1; returns 1 non-blank item
|
||||
|
||||
=cut
|
||||
|
||||
sub splitTag {
|
||||
|
||||
my $tag = shift;
|
||||
my $html = shift;
|
||||
my $count = shift || 1;
|
||||
if( not defined $html or $html =~ /^(\d+)$/ ) {
|
||||
$count = $html if $1;
|
||||
$html = $tag;
|
||||
$tag = 'p'; # the default tag is 'p' -- grabs a paragraph
|
||||
}
|
||||
my @result;
|
||||
|
||||
$html =~ s/\ //g; # get rid of all non-breaking spaces
|
||||
|
||||
my $p = HTML::TokeParser->new(\$html);
|
||||
|
||||
while (my $token = $p->get_tag($tag)) {
|
||||
my $text = $p->get_trimmed_text("/$tag");
|
||||
next if $text =~ /^([:space:]|[:^print:])*$/; # skip whitespace
|
||||
push @result, $text; # add the text between the tags to the result array
|
||||
last if @result == $count; # if we have a full count then quit
|
||||
}
|
||||
|
||||
return @result;
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue