Work around bugs in HTML::Entities, which doesn't output strict utf8, and

DBD::mysql, which doesn't encode high characters correctly.  This shows up
in the Post synopsis, HTML::splitTag and Post->getSynopsisAndContent.
This commit is contained in:
Colin Kuskie 2009-07-07 22:24:00 +00:00
parent d6e242f8b2
commit b89486d5f3
4 changed files with 121 additions and 89 deletions

View file

@ -7,6 +7,7 @@
- fixed #10601: Edit field in Thing: no submit button
- fixed #10560: 7.7.2 upgrade script leaves rssFromParent assets behind (Wes Morgan)
- fixed #10612: strange i18n, with regard to label
- fixed #9935: German characters in Collaboration System
7.7.13
- fixed #10574: Creating Calendar Entry

View file

@ -434,6 +434,7 @@ sub splitTag {
while (my $token = $p->get_tag($tag)) {
my $text = $p->get_trimmed_text("/$tag");
utf8::upgrade($text); ##PATCH to work around HTML::Entities and DBD::mysql
next if $text =~ /^([[:space:]]|[[:^print:]])*$/; # skip whitespace
push @result, $text; # add the text between the tags to the result array
last if @result == $count; # if we have a full count then quit

View file

@ -26,7 +26,7 @@ use strict;
use lib "$FindBin::Bin/../lib";
use WebGUI::Test;
use WebGUI::Session;
use Test::More tests => 5; # increment this value for each test you create
use Test::More tests => 9; # increment this value for each test you create
use WebGUI::Asset::Wobject::Collaboration;
use WebGUI::Asset::Post;
use WebGUI::Asset::Post::Thread;
@ -115,13 +115,27 @@ ok($post->canEdit(), "User in groupToEditPost group can edit post after the time
$session->user({userId => $groupIdEditUser->userId});
ok($post->canEdit(), "User in groupIdEditUserGroup group can edit post after the timeout");
# getSynopsisAndContent
my ($synopsis, $content) = $post->getSynopsisAndContent('', q|Brandheiße Neuigkeiten rund um's Klettern für euch aus der Region |);
is($synopsis, q|Brandheiße Neuigkeiten rund um's Klettern für euch aus der Region |, 'getSynopsisAndContent: UTF8 characters okay');
$post->update({synopsis => $synopsis});
##There is a bug in DBD::mysql with not properly encoding 8-bit characters. Also, HTML::Entities produces
##8-bit utf8 (not strict) characters. So we write a quick test to make sure our patch in splitTag works correctly.
my $dbPost = WebGUI::Asset->newByDynamicClass($session, $post->getId);
like($dbPost->get('synopsis'), qr/Brandhei.e Neuigkeiten rund um's Klettern f.r euch aus der Region /, 'patch test for DBD::Mysql and HTML::Entities');
($synopsis, $content) = $post->getSynopsisAndContent('', q|less than < greater than >|);
is($synopsis, q|less than < greater than >|, '... HTML escaped characters okay');
($synopsis, $content) = $post->getSynopsisAndContent('', q|<p>less than &lt; greater than &gt;</p>|);
is($synopsis, q|less than < greater than >|, '... HTML entities decoded by HTML::splitTag');
TODO: {
local $TODO = "Tests to make later";
ok(0, 'Whole lot more work to do here');
}
END {
}
# vim: syntax=perl filetype=perl

View file

@ -30,86 +30,102 @@ my $session = WebGUI::Test->session;
# call type = 4: only the text; return scalar
my @testArray = (
{
inputText => q!<p>First Paragraph</p> <p>Second Paragraph</p>!,
output => q!First Paragraph!,
style => 1,
tag => 'p',
count => 1,
comment => 'get one paragraph, no defaults',
},
{
inputText => q!<p>First Paragraph</p> <p>Second Paragraph</p>!,
output => q!First Paragraph!,
style => 4,
tag => 'p',
count => 1,
comment => 'get one paragraph, all defaults',
},
{
inputText => q!<p>First Paragraph</p> <p>Second Paragraph</p>!,
output => q!First Paragraph;Second Paragraph!,
style => 1,
tag => 'p',
count => -1,
comment => 'get all paragraphs, no defaults',
},
{
inputText => q!<p>First Paragraph</p> <p>Second Paragraph</p>!,
output => q!First Paragraph;Second Paragraph!,
style => 3,
tag => 'p',
count => -1,
comment => 'get all paragraphs, default tag',
},
{
inputText => q!<p> </p><p>First nonblank Paragraph</p> <p>Second Paragraph</p>!,
output => q!First nonblank Paragraph!,
style => 4,
tag => 'p',
count => -1,
comment => 'get nonblank paragraph, all defaults',
},
{
inputText => q!<p> &nbsp;</p><p>First nonblank Paragraph</p> <p>Second Paragraph</p>!,
output => q!First nonblank Paragraph!,
style => 4,
tag => 'p',
count => -1,
comment => 'get nonblank(nbsp) paragraph, all defaults',
},
{
inputText => q!<li>First List Item</li> <li>Second List Item</li> <li>Third List Item</li>!,
output => q!First List Item;Second List Item;Third List Item!,
style => 1,
tag => 'li',
count => -1,
comment => 'get all list items, no defaults',
},
{
inputText => q!<li>First List Item</li> <li>Second List Item</li> <li>Third List Item</li>!,
output => q!First List Item;Second List Item!,
style => 1,
tag => 'li',
count => 2,
comment => 'get first 2 list items, no defaults',
},
{
inputText => q!<tr><td>First Item</td> <td>Second Item</td></tr>!,
output => q!First Item Second Item!,
style => 1,
tag => 'tr',
count => -1,
comment => 'get table row ignore table data',
},
{
inputText => q!<tr><td>First Item</td> <td>Second Item</td></tr>!,
output => q!First Item;Second Item!,
style => 1,
tag => 'td',
count => -1,
comment => 'get table data ignore table row',
},
{
inputText => q!<p>First Paragraph</p> <p>Second Paragraph</p>!,
output => q!First Paragraph!,
style => 1,
tag => 'p',
count => 1,
comment => 'get one paragraph, no defaults',
},
{
inputText => q!<p>First Paragraph</p> <p>Second Paragraph</p>!,
output => q!First Paragraph!,
style => 4,
tag => 'p',
count => 1,
comment => 'get one paragraph, all defaults',
},
{
inputText => q!<p>First Paragraph</p> <p>Second Paragraph</p>!,
output => q!First Paragraph;Second Paragraph!,
style => 1,
tag => 'p',
count => -1,
comment => 'get all paragraphs, no defaults',
},
{
inputText => q!<p>First Paragraph</p> <p>Second Paragraph</p>!,
output => q!First Paragraph;Second Paragraph!,
style => 3,
tag => 'p',
count => -1,
comment => 'get all paragraphs, default tag',
},
{
inputText => q!<p> </p><p>First nonblank Paragraph</p> <p>Second Paragraph</p>!,
output => q!First nonblank Paragraph!,
style => 4,
tag => 'p',
count => -1,
comment => 'get nonblank paragraph, all defaults',
},
{
inputText => q!<p> &nbsp;</p><p>First nonblank Paragraph</p> <p>Second Paragraph</p>!,
output => q!First nonblank Paragraph!,
style => 4,
tag => 'p',
count => -1,
comment => 'first nonblank(nbsp) paragraph, all defaults',
},
{
inputText => q!<li>First List Item</li> <li>Second List Item</li> <li>Third List Item</li>!,
output => q!First List Item;Second List Item;Third List Item!,
style => 1,
tag => 'li',
count => -1,
comment => 'get all list items, no defaults',
},
{
inputText => q!<li>First List Item</li> <li>Second List Item</li> <li>Third List Item</li>!,
output => q!First List Item;Second List Item!,
style => 1,
tag => 'li',
count => 2,
comment => 'get first 2 list items, no defaults',
},
{
inputText => q!<tr><td>First Item</td> <td>Second Item</td></tr>!,
output => q!First Item Second Item!,
style => 1,
tag => 'tr',
count => -1,
comment => 'get table row ignore table data',
},
{
inputText => q!<tr><td>First Item</td> <td>Second Item</td></tr>!,
output => q!First Item;Second Item!,
style => 1,
tag => 'td',
count => -1,
comment => 'get table data ignore table row',
},
{
inputText => q!<p>less than &lt; greater than &gt;</p>!,
output => q!less than < greater than >!,
style => 1,
tag => 'p',
count => -1,
comment => 'Entities get decoded',
},
{
inputText => q!<p>Brandheiße Neuigkeiten rund um's Klettern für euch aus der Region </p>!,
output => q!Brandheiße Neuigkeiten rund um's Klettern für euch aus der Region!,
style => 1,
tag => 'p',
count => -1,
comment => 'Get a paragraph tag with long characters',
},
);
my $numTests = scalar @testArray;
@ -119,13 +135,13 @@ plan tests => $numTests;
my $output;
foreach my $testSet (@testArray) {
if( $testSet->{style} == 1 ) {
$output = join ';',WebGUI::HTML::splitTag($testSet->{tag},$testSet->{inputText}, $testSet->{count});
$output = join ';',WebGUI::HTML::splitTag($testSet->{tag},$testSet->{inputText}, $testSet->{count});
} elsif( $testSet->{style} == 2 ) {
$output = WebGUI::HTML::splitTag($testSet->{tag},$testSet->{inputText});
$output = WebGUI::HTML::splitTag($testSet->{tag},$testSet->{inputText});
} elsif( $testSet->{style} == 3 ) {
$output = join ';', WebGUI::HTML::splitTag($testSet->{inputText}, $testSet->{count});
$output = join ';', WebGUI::HTML::splitTag($testSet->{inputText}, $testSet->{count});
} elsif( $testSet->{style} == 4 ) {
$output = WebGUI::HTML::splitTag($testSet->{inputText});
$output = WebGUI::HTML::splitTag($testSet->{inputText});
}
is($output, $testSet->{output}, $testSet->{comment});
}