Fix the separator macro. Adds a method to WebGUI::HTML that splits

content based on the separator macro, and does a best effort to fill in
tags that the splitting breaks by and start and end tags.
Update Post and Article to use it.
This commit is contained in:
Colin Kuskie 2009-07-25 04:30:28 +00:00
parent 5dbea78e69
commit 1a74cb056d
5 changed files with 172 additions and 3 deletions

View file

@ -2,6 +2,7 @@
- fixed #10654: Story Archive: Search not working properly
- fixed search indexer concatenates keywords to content.
- fixed Index Story fields for Search (headline, subtitle, location, byline, highlights, story)
- fixed #10619: Pagination macro
7.7.16
- fixed #10590: Session::DateTime->secondsToInterval doesn't allow 7 weeks

View file

@ -652,7 +652,9 @@ sub getSynopsisAndContent {
unless ($synopsis) {
my @content;
if( $body =~ /\^\-\;/ ) {
@content = split(/\^\-\;/, $body ,2);
my @pieces = WebGUI::HTML::splitSeparator($body);
$content[0] = shift @pieces;
$content[1] = join '', @pieces;
}
elsif( $body =~ /<p>/ ) {
@content = WebGUI::HTML::splitTag($body);

View file

@ -17,6 +17,7 @@ use WebGUI::Cache;
use WebGUI::Paginator;
use WebGUI::Asset::Wobject;
use WebGUI::Storage;
use WebGUI::HTML;
our @ISA = qw(WebGUI::Asset::Wobject);
@ -405,7 +406,7 @@ sub view {
$var{description} =~ s/\^\-\;//g;
$p->setDataByArrayRef([$var{description}]);
} else {
my @pages = split(/\^\-\;/,$var{description});
my @pages = WebGUI::HTML::splitSeparator($var{description});
$p->setDataByArrayRef(\@pages);
$var{description} = $p->getPage;
}

View file

@ -401,7 +401,59 @@ sub processReplacements {
#-------------------------------------------------------------------
=head2 WebGUI::HTML::splitTag([$tag,]$html[,$count]);
=head2 splitSeparator ( $content )
Splits the supplied content on the separator macro, ^-;. Returns an array
of content. If the content contains HTML, and splitting the content would
result in sections of content missing start or end HTML tags, these are filled
in. Unary tags, like br, img, and hr are ignored, whether they are proper XHTML
or not.
In the special case of the separator macro inside bare paragraph tags,
<p>^-;</p>,
no empty paragraph tags are generated.
=head3 content
The content to split.
=cut
sub splitSeparator {
my $content = shift;
return $content unless $content =~ /\^-;/;
$content =~ s{<p>\s*\^-;\s*</p>}{\^-;}g;
my @tagStack = ();
my $parser = HTML::Parser->new(
api_version => 3,
ignore_elements => [ qw/br img hr/ ],
start_h => [ sub { push @tagStack, $_[0]; }, 'tag'],
end_h => [ sub { pop @tagStack; }, 'tag'],
);
my @sections = ();
CHUNK: while (my ($leader, $trailer) = split /\^-;/, $content, 2) {
if (! defined $trailer) {
push @sections, $leader;
last CHUNK;
}
$parser->parse($leader);
while( my $tag = pop @tagStack) {
my $endTag = '</'.$tag.'>';
$tag = '<'.$tag.'>';
$leader .= $endTag;
$trailer = $tag . $trailer;
}
push @sections, $leader;
$content = $trailer;
}
return @sections;
}
#-------------------------------------------------------------------
=head2 splitTag([$tag,]$html[,$count]);
splits an block of HTML into an array based on the contents of a single tag

113
t/HTML/splitSeparator.t Normal file
View file

@ -0,0 +1,113 @@
#-------------------------------------------------------------------
# WebGUI is Copyright 2001-2009 Plain Black Corporation.
#-------------------------------------------------------------------
# Please read the legal notices (docs/legal.txt) and the license
# (docs/license.txt) that came with this distribution before using
# this software.
#-------------------------------------------------------------------
# http://www.plainblack.com info@plainblack.com
#-------------------------------------------------------------------
use FindBin;
use strict;
use lib "$FindBin::Bin/../lib";
use WebGUI::Test;
use WebGUI::HTML;
use WebGUI::Session;
use Test::More;
use Test::Deep;
use Data::Dumper;
my $session = WebGUI::Test->session;
my @testArray = (
{
inputText => q!section_1!,
output => [ qw/section_1/ ],
comment => 'bare text, no macro',
},
{
inputText => q!section_1^-;section_2!,
output => [ qw/section_1 section_2/ ],
comment => 'bare text, 2 sections',
},
{
inputText => q!<p>section_1</p><p>^-;</p><p>section_2</p>!,
output => [ qw{<p>section_1</p> <p>section_2</p>} ],
comment => 'paragraph text, 2 sections, macro in separate paragraph',
},
{
inputText => q!<p>section_1</p><p> ^-;</p><p>section_2</p>!,
output => [ qw{<p>section_1</p> <p>section_2</p>} ],
comment => 'paragraph text, 2 sections, macro in separate paragraph with whitespace',
},
{
inputText => q!<p>section_1</p><p> ^-;</p><p>section_2</p><p>^-;</p><p>section_3</p>!,
output => [ qw{<p>section_1</p> <p>section_2</p> <p>section_3</p>} ],
comment => 'paragraph text, 3 sections, macros in separate paragraphs with whitespace',
},
{
inputText => q!<p>section_1^-;section_2</p>!,
output => [ qw{<p>section_1</p> <p>section_2</p>} ],
comment => 'paragraph text, 2 sections, macro inside tags',
},
{
inputText => q!<p><b>section_1^-;section_2</b>trailer</p>!,
output => [ qw{<p><b>section_1</b></p> <p><b>section_2</b>trailer</p>} ],
comment => 'paragraph text, 2 sections, macro inside 2 nested tags',
},
{
inputText => q!<p>section_1^-;<br />section_2</p>!,
output => [ '<p>section_1</p>', '<p><br />section_2</p>' ],
comment => 'paragraph text, 2 sections, macro inside tags, with br self-close',
},
{
inputText => q!<p>section_1^-;<br>section_2</p>!,
output => [ '<p>section_1</p>', '<p><br>section_2</p>' ],
comment => 'paragraph text, 2 sections, macro inside tags, with br unclosed',
},
{
inputText => q!<p>section_1<br>trailer_1^-;section_2</p>!,
output => [ '<p>section_1<br>trailer_1</p>', '<p>section_2</p>' ],
comment => 'paragraph text, 2 sections, macro inside tags, with br unclosed in first section',
},
{
inputText => q!<p>Very^-;long^-;paragraph</p>!,
output => [ '<p>Very</p>', '<p>long</p>', '<p>paragraph</p>' ],
comment => 'paragraph text, 3 sections, macros inside tags',
},
{
inputText => q!<p><b>Very^-;long</b>^-;paragraph</p>!,
output => [ '<p><b>Very</b></p>', '<p><b>long</b></p>', '<p>paragraph</p>' ],
comment => 'paragraph text, 3 sections, macros inside tags, nesting first two tags',
},
{
inputText => q!<p><b>Very^-;long^-;paragraph</b></p>!,
output => [ '<p><b>Very</b></p>', '<p><b>long</b></p>', '<p><b>paragraph</b></p>' ],
comment => 'paragraph text, 3 sections, macros inside tags, nesting all 3 sections',
},
{
inputText => q!<p><b>Very^-;long^-;</b>paragraph</p>!,
output => [ '<p><b>Very</b></p>', '<p><b>long</b></p>', '<p><b></b>paragraph</p>' ],
comment => 'paragraph text, 3 sections, macros inside tags, bridge right after macro',
},
);
my $numTests = scalar @testArray;
plan tests => $numTests;
foreach my $testSet (@testArray) {
my @output = WebGUI::HTML::splitSeparator($testSet->{inputText});
my $ok = cmp_deeply(
\@output,
$testSet->{output},
$testSet->{comment}
);
if (!$ok) {
diag Dumper \@output;
}
}