Decode HTML entities sent by rich editors. Add tests for the decoding
inside the search indexer. Gotcha note. Fixes #10797.
This commit is contained in:
parent
91b37aae36
commit
f68db111aa
4 changed files with 43 additions and 13 deletions
|
|
@ -1,4 +1,5 @@
|
|||
7.7.19
|
||||
- fixed #10797: searching non-ascii-characters (e.g. wiki), part 2
|
||||
|
||||
7.7.18
|
||||
- fixed #10801: Payment Methods: can select a blank method
|
||||
|
|
|
|||
|
|
@ -7,6 +7,17 @@ upgrading from one version to the next, or even between multiple
|
|||
versions. Be sure to heed the warnings contained herein as they will
|
||||
save you many hours of grief.
|
||||
|
||||
7.7.19
|
||||
--------------------------------------------------------------------
|
||||
* The search indexer was not properly indexing non-ASCII content
|
||||
entered via TinyMCE. The behaviour has been corrected. If your
|
||||
site has content in languages other than English, you should
|
||||
re-index the site. This can be done with the search.pl utility
|
||||
script by running
|
||||
perl search.pl --configFile=www.mysite.com.conf --indexsite
|
||||
or, to index all of the sites on the server:
|
||||
perl search.pl --indexall
|
||||
|
||||
7.7.18
|
||||
--------------------------------------------------------------------
|
||||
* The search indexer was not properly indexing non-ASCII content. The
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ package WebGUI::Search::Index;
|
|||
=cut
|
||||
|
||||
use strict;
|
||||
use HTML::Entities;
|
||||
|
||||
=head1 NAME
|
||||
|
||||
|
|
@ -82,7 +83,6 @@ sub addKeywords {
|
|||
my $self = shift;
|
||||
my $text = join(" ", @_);
|
||||
|
||||
$text = WebGUI::HTML::filter($text, "all");
|
||||
$text = $self->_filterKeywords($text);
|
||||
my ($keywords) = $self->session->db->quickArray("select keywords from assetIndex where assetId=?",[$self->getId]);
|
||||
$self->session->db->write("update assetIndex set keywords =? where assetId=?", [$keywords.' '.$text, $self->getId]);
|
||||
|
|
@ -166,7 +166,8 @@ sub DESTROY {
|
|||
|
||||
=head2 _filterKeywords ( $keywords )
|
||||
|
||||
Perform filtering and cleaning up of the keywords before submitting them.
|
||||
Perform filtering and cleaning up of the keywords before submitting them. Ideographic characters are padded
|
||||
so that they are still searchable. HTML entities are decoded.
|
||||
|
||||
=head3 $keywords
|
||||
|
||||
|
|
@ -179,6 +180,8 @@ sub _filterKeywords {
|
|||
my $keywords = shift;
|
||||
|
||||
$keywords = WebGUI::HTML::filter($keywords, "all");
|
||||
$keywords = HTML::Entities::decode_entities($keywords);
|
||||
utf8::upgrade($keywords);
|
||||
|
||||
# split into 'words'. Ideographic characters (such as Chinese) are
|
||||
# treated as distinct words. Everything else is space delimited.
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ WebGUI::Test->tagsToRollback(
|
|||
#----------------------------------------------------------------------------
|
||||
# Tests
|
||||
|
||||
plan tests => 16; # Increment this number for each test you create
|
||||
plan tests => 15; # Increment this number for each test you create
|
||||
|
||||
use_ok( 'WebGUI::Search::Index' );
|
||||
|
||||
|
|
@ -123,7 +123,7 @@ $article->update({
|
|||
} );
|
||||
$indexer = WebGUI::Search::Index->create( $article );
|
||||
|
||||
ok ( my $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ),
|
||||
ok ( $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ),
|
||||
"assetId exists in assetIndex"
|
||||
);
|
||||
cmp_deeply (
|
||||
|
|
@ -149,7 +149,7 @@ cmp_deeply (
|
|||
),
|
||||
lineage => $article->get('lineage'),
|
||||
},
|
||||
"Index has correct information"
|
||||
"Index has synopsis information in keywords"
|
||||
);
|
||||
|
||||
|
||||
|
|
@ -161,9 +161,7 @@ $article->update({
|
|||
});
|
||||
$indexer = WebGUI::Search::Index->create( $article );
|
||||
|
||||
ok ( my $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ),
|
||||
"assetId exists in assetIndex"
|
||||
);
|
||||
$row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ]);
|
||||
cmp_deeply (
|
||||
$row,
|
||||
{
|
||||
|
|
@ -187,7 +185,7 @@ cmp_deeply (
|
|||
),
|
||||
lineage => $article->get('lineage'),
|
||||
},
|
||||
"Index has correct information"
|
||||
"Index has description in keywords"
|
||||
);
|
||||
|
||||
|
||||
|
|
@ -199,9 +197,7 @@ $article->update({
|
|||
});
|
||||
$indexer = WebGUI::Search::Index->create( $article );
|
||||
|
||||
ok ( my $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ),
|
||||
"assetId exists in assetIndex"
|
||||
);
|
||||
$row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] );
|
||||
cmp_deeply (
|
||||
$row,
|
||||
{
|
||||
|
|
@ -224,7 +220,26 @@ cmp_deeply (
|
|||
),
|
||||
lineage => $article->get('lineage'),
|
||||
},
|
||||
"Index has correct information"
|
||||
"Index has synopsis and description in keywords"
|
||||
);
|
||||
|
||||
#----------------------------------------------------------------------------
|
||||
# Test that HTML entities are decoded.
|
||||
$article->update({
|
||||
description => "schön cañón",
|
||||
});
|
||||
$indexer = WebGUI::Search::Index->create( $article );
|
||||
|
||||
$row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] );
|
||||
cmp_deeply (
|
||||
$row,
|
||||
superhashof({
|
||||
keywords => all( # keywords contains title, menuTitle, every part of the URL and every keyword
|
||||
re("sch\xF6n"),
|
||||
re("ca\xF1\xF3n"),
|
||||
),
|
||||
}),
|
||||
"Index has decoded entities"
|
||||
);
|
||||
|
||||
#vim:ft=perl
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue