added a new function to WebGUI::HTML to parse html text and extract a list of elements based on a tag

spec. used to extract paragraphs for the synopsis; convert FeedPP properties to scalar context so that blank fields are rendered properly.
2009-01-23 03:41:49 +00:00 · 2009-01-23 03:41:49 +00:00 · 16ce54bf4f
commit 16ce54bf4f
parent 2893944837
4 changed files with 61 additions and 8 deletions
--- a/lib/WebGUI/HTML.pm
+++ b/lib/WebGUI/HTML.pm
@ -14,6 +14,7 @@ package WebGUI::HTML;

 =cut

+use HTML::TokeParser;
 use HTML::TagFilter;
 use strict;
 use WebGUI::Macro;
@ -36,6 +37,7 @@ A package for manipulating and massaging HTML.
 $html = WebGUI::HTML::html2text($html);
 $html = WebGUI::HTML::makeAbsolute($session, $html);
 $html = WebGUI::HTML::processReplacements($session, $html);
+ $html = WebGUI::HTML::splitTag([$tag,]$html[,$count]);    # defaults to ( 'p', $html, 0 )

 =head1 METHODS

@ -396,5 +398,51 @@ sub processReplacements {
 	return $content;
 }

+#-------------------------------------------------------------------
+
+=head2 WebGUI::HTML::splitTag([$tag,]$html[,$count]);
+
+splits an block of HTML into an array based on the contents of a single tag
+
+=head3 tag
+
+The HTML tag top extract from the text.  this defaults to 'p' giving a list of paragraphs
+
+=head3 html
+
+The block of HTML text that will be disected
+
+=head3 count
+
+How many items do we want?  defaults to 1; returns 1 non-blank item
+
+=cut
+
+sub splitTag {
+
+    my $tag = shift;
+    my $html = shift;
+    my $count = shift || 1;
+    if( not defined $html or $html =~ /^(\d+)$/ ) {
+        $count = $html if $1;
+        $html = $tag;
+        $tag = 'p';                 # the default tag is 'p' -- grabs a paragraph
+    }
+    my @result;
+
+    $html =~ s/\&nbsp;//g;   # get rid of all non-breaking spaces
+
+    my $p = HTML::TokeParser->new(\$html);
+
+    while (my $token = $p->get_tag($tag)) {
+        my $text = $p->get_trimmed_text("/$tag");
+        next if $text =~ /^([:space:]|[:^print:])*$/;    # skip whitespace
+        push @result, $text;          # add the text between the tags to the result array
+        last if @result == $count;    # if we have a full count then quit
+    }
+
+    return @result;
+}
+
 1;