From 16ce54bf4f4a5ee70845bd31ab961b1f821c0e8b Mon Sep 17 00:00:00 2001
From: David Delikat <david-delikat@usa.net>
Date: Fri, 23 Jan 2009 03:41:49 +0000
Subject: [PATCH] added a new function to WebGUI::HTML to parse html text and
 extract a list of elements based on a tag spec. used to extract paragraphs
 for the synopsis; convert FeedPP properties to scalar context so that blank
 fields are rendered properly.

---
 docs/changelog/7.x.x.txt                      |  1 +
 lib/WebGUI/Asset/Post.pm                      | 12 +++--
 lib/WebGUI/Asset/Wobject/SyndicatedContent.pm |  8 ++--
 lib/WebGUI/HTML.pm                            | 48 +++++++++++++++++++
 4 files changed, 61 insertions(+), 8 deletions(-)
diff --git a/docs/changelog/7.x.x.txt b/docs/changelog/7.x.x.txt
index ae18a4283..5a490c178 100644
--- a/docs/changelog/7.x.x.txt
+++ b/docs/changelog/7.x.x.txt
@@ -1,4 +1,5 @@
 7.6.10
+ - fixed #9455: fixed synopsis to pick out html paragraphs, fixed FeedPP fields to scalar when blank
  - fixed: With autocommit and no comments on, making a shortcut of an asset takes you to that asset's view.
 
 7.6.9
diff --git a/lib/WebGUI/Asset/Post.pm b/lib/WebGUI/Asset/Post.pm
index 79d3770a7..3332d4e71 100644
--- a/lib/WebGUI/Asset/Post.pm
+++ b/lib/WebGUI/Asset/Post.pm
@@ -548,11 +548,15 @@ sub getSynopsisAndContent {
 	my $synopsis = shift;
 	my $body = shift;
 	unless ($synopsis) {
-        	$body =~ s/\n/\^\-\;/ unless ($body =~ m/\^\-\;/);
-       	 	my @content = split(/\^\-\;/,$body);
-		$synopsis = WebGUI::HTML::filter($content[0],"all");
+           my @content;
+           if( $body =~ /<p>/ ) {
+               @content = WebGUI::HTML::splitTag($body);
+           } else {
+       	       @content = split("\n",$body);
+           }
+           shift @content if $content[0] =~ /^\s*$/;
+           $synopsis = WebGUI::HTML::filter($content[0],"all");
 	}
-	$body =~ s/\^\-\;/\n/;
 	return ($synopsis,$body);
 }
 
diff --git a/lib/WebGUI/Asset/Wobject/SyndicatedContent.pm b/lib/WebGUI/Asset/Wobject/SyndicatedContent.pm
index cf0a8fc20..0de8d7730 100644
--- a/lib/WebGUI/Asset/Wobject/SyndicatedContent.pm
+++ b/lib/WebGUI/Asset/Wobject/SyndicatedContent.pm
@@ -196,9 +196,9 @@ sub getTemplateVariables {
 	my @items = $feed->get_item;
 	my %var;
 	$var{channel_title} = WebGUI::HTML::filter($feed->title, 'javascript');
-	$var{channel_description} = WebGUI::HTML::filter($feed->description, 'javascript');
-	$var{channel_date} = WebGUI::HTML::filter($feed->get_pubDate_epoch, 'javascript');
-	$var{channel_copyright} = WebGUI::HTML::filter($feed->copyright, 'javascript');
+	$var{channel_description} = WebGUI::HTML::filter(scalar($feed->description), 'javascript');
+	$var{channel_date} = WebGUI::HTML::filter(scalar($feed->get_pubDate_epoch), 'javascript');
+	$var{channel_copyright} = WebGUI::HTML::filter(scalar($feed->copyright), 'javascript');
 	$var{channel_link} = WebGUI::HTML::filter($feed->link, 'javascript');
 	my @image = $feed->image;
 	$var{channel_image_url} = WebGUI::HTML::filter($image[0], 'javascript');
@@ -215,7 +215,7 @@ sub getTemplateVariables {
         $item{author} = WebGUI::HTML::filter($object->author, 'javascript');
         $item{guid} = WebGUI::HTML::filter($object->guid, 'javascript');
         $item{link} = WebGUI::HTML::filter($object->link, 'javascript');
-        $item{description} = WebGUI::HTML::filter($object->description, 'javascript');
+        $item{description} = WebGUI::HTML::filter(scalar($object->description), 'javascript');
         $item{descriptionFirst100words} = $item{description};
         $item{descriptionFirst100words} =~ s/(((\S+)\s+){100}).*/$1/s;
         $item{descriptionFirst75words} = $item{descriptionFirst100words};
diff --git a/lib/WebGUI/HTML.pm b/lib/WebGUI/HTML.pm
index 67e4f8068..32d94ea7d 100644
--- a/lib/WebGUI/HTML.pm
+++ b/lib/WebGUI/HTML.pm
@@ -14,6 +14,7 @@ package WebGUI::HTML;
 
 =cut
 
+use HTML::TokeParser;
 use HTML::TagFilter;
 use strict;
 use WebGUI::Macro;
@@ -36,6 +37,7 @@ A package for manipulating and massaging HTML.
  $html = WebGUI::HTML::html2text($html);
  $html = WebGUI::HTML::makeAbsolute($session, $html);
  $html = WebGUI::HTML::processReplacements($session, $html);
+ $html = WebGUI::HTML::splitTag([$tag,]$html[,$count]);    # defaults to ( 'p', $html, 0 )
 
 =head1 METHODS
 
@@ -396,5 +398,51 @@ sub processReplacements {
 	return $content;
 }
 
+#-------------------------------------------------------------------
+
+=head2 WebGUI::HTML::splitTag([$tag,]$html[,$count]);
+
+splits an block of HTML into an array based on the contents of a single tag
+
+=head3 tag
+
+The HTML tag top extract from the text.  this defaults to 'p' giving a list of paragraphs
+
+=head3 html
+
+The block of HTML text that will be disected
+
+=head3 count
+
+How many items do we want?  defaults to 1; returns 1 non-blank item
+
+=cut
+
+sub splitTag {
+
+    my $tag = shift;
+    my $html = shift;
+    my $count = shift || 1;
+    if( not defined $html or $html =~ /^(\d+)$/ ) {
+        $count = $html if $1;
+        $html = $tag;
+        $tag = 'p';                 # the default tag is 'p' -- grabs a paragraph
+    }
+    my @result;
+
+    $html =~ s/\&nbsp;//g;   # get rid of all non-breaking spaces
+
+    my $p = HTML::TokeParser->new(\$html);
+
+    while (my $token = $p->get_tag($tag)) {
+        my $text = $p->get_trimmed_text("/$tag");
+        next if $text =~ /^([:space:]|[:^print:])*$/;    # skip whitespace
+        push @result, $text;          # add the text between the tags to the result array
+        last if @result == $count;    # if we have a full count then quit
+    }
+
+    return @result;
+}
+
 1;