Fix the separator macro. Adds a method to WebGUI::HTML that splits

content based on the separator macro, and does a best effort to fill in tags that the splitting breaks by and start and end tags. Update Post and Article to use it.
2009-07-25 04:30:28 +00:00 · 2009-07-25 04:30:28 +00:00 · 1a74cb056d
commit 1a74cb056d
parent 5dbea78e69
5 changed files with 172 additions and 3 deletions
--- a/lib/WebGUI/Asset/Post.pm
+++ b/lib/WebGUI/Asset/Post.pm
@ -652,7 +652,9 @@ sub getSynopsisAndContent {
 	unless ($synopsis) {
           my @content;
           if( $body =~ /\^\-\;/ ) {
-               @content = split(/\^\-\;/, $body ,2);
+               my @pieces = WebGUI::HTML::splitSeparator($body);
+               $content[0] = shift @pieces;
+               $content[1] = join '', @pieces;
           }
           elsif( $body =~ /<p>/ ) {
               @content = WebGUI::HTML::splitTag($body);
--- a/lib/WebGUI/Asset/Wobject/Article.pm
+++ b/lib/WebGUI/Asset/Wobject/Article.pm
@ -17,6 +17,7 @@ use WebGUI::Cache;
 use WebGUI::Paginator;
 use WebGUI::Asset::Wobject;
 use WebGUI::Storage;
+use WebGUI::HTML;

 our @ISA = qw(WebGUI::Asset::Wobject);

@ -405,7 +406,7 @@ sub view {
 		$var{description} =~ s/\^\-\;//g;
 		$p->setDataByArrayRef([$var{description}]);
 	} else {
-		my @pages = split(/\^\-\;/,$var{description});
+		my @pages = WebGUI::HTML::splitSeparator($var{description});
 		$p->setDataByArrayRef(\@pages);
 		$var{description} = $p->getPage;
 	}
--- a/lib/WebGUI/HTML.pm
+++ b/lib/WebGUI/HTML.pm
@ -401,7 +401,59 @@ sub processReplacements {

 #-------------------------------------------------------------------

-=head2 WebGUI::HTML::splitTag([$tag,]$html[,$count]);
+=head2 splitSeparator ( $content )
+
+Splits the supplied content on the separator macro, ^-;.  Returns an array
+of content.  If the content contains HTML, and splitting the content would
+result in sections of content missing start or end HTML tags, these are filled
+in.  Unary tags, like br, img, and hr are ignored, whether they are proper XHTML
+or not.
+
+In the special case of the separator macro inside bare paragraph tags,
+
+    <p>^-;</p>,
+    
+no empty paragraph tags are generated.
+
+=head3 content
+
+The content to split.
+
+=cut
+
+sub splitSeparator {
+	my $content = shift;
+    return $content unless $content =~ /\^-;/;
+    $content =~ s{<p>\s*\^-;\s*</p>}{\^-;}g;
+    my @tagStack = ();
+    my $parser = HTML::Parser->new(
+        api_version      => 3,
+        ignore_elements  => [ qw/br img hr/ ],
+        start_h     => [ sub { push @tagStack, $_[0]; }, 'tag'],
+        end_h       => [ sub { pop  @tagStack;        }, 'tag'], 
+    );
+    my @sections = ();
+    CHUNK: while (my ($leader, $trailer) = split /\^-;/, $content, 2) {
+        if (! defined $trailer) {
+            push @sections, $leader;
+            last CHUNK;
+        }
+        $parser->parse($leader);
+        while( my $tag = pop @tagStack) {
+            my $endTag = '</'.$tag.'>';
+            $tag       = '<'.$tag.'>';
+            $leader  .= $endTag;
+            $trailer  = $tag . $trailer;
+        }
+        push @sections, $leader;
+        $content = $trailer;
+    }
+	return @sections;
+}
+
+#-------------------------------------------------------------------
+
+=head2 splitTag([$tag,]$html[,$count]);

 splits an block of HTML into an array based on the contents of a single tag