Decode HTML entities sent by rich editors. Add tests for the decoding
inside the search indexer. Gotcha note. Fixes #10797.
This commit is contained in:
parent
91b37aae36
commit
f68db111aa
4 changed files with 43 additions and 13 deletions
|
|
@ -1,4 +1,5 @@
|
||||||
7.7.19
|
7.7.19
|
||||||
|
- fixed #10797: searching non-ascii-characters (e.g. wiki), part 2
|
||||||
|
|
||||||
7.7.18
|
7.7.18
|
||||||
- fixed #10801: Payment Methods: can select a blank method
|
- fixed #10801: Payment Methods: can select a blank method
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,17 @@ upgrading from one version to the next, or even between multiple
|
||||||
versions. Be sure to heed the warnings contained herein as they will
|
versions. Be sure to heed the warnings contained herein as they will
|
||||||
save you many hours of grief.
|
save you many hours of grief.
|
||||||
|
|
||||||
|
7.7.19
|
||||||
|
--------------------------------------------------------------------
|
||||||
|
* The search indexer was not properly indexing non-ASCII content
|
||||||
|
entered via TinyMCE. The behaviour has been corrected. If your
|
||||||
|
site has content in languages other than English, you should
|
||||||
|
re-index the site. This can be done with the search.pl utility
|
||||||
|
script by running
|
||||||
|
perl search.pl --configFile=www.mysite.com.conf --indexsite
|
||||||
|
or, to index all of the sites on the server:
|
||||||
|
perl search.pl --indexall
|
||||||
|
|
||||||
7.7.18
|
7.7.18
|
||||||
--------------------------------------------------------------------
|
--------------------------------------------------------------------
|
||||||
* The search indexer was not properly indexing non-ASCII content. The
|
* The search indexer was not properly indexing non-ASCII content. The
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ package WebGUI::Search::Index;
|
||||||
=cut
|
=cut
|
||||||
|
|
||||||
use strict;
|
use strict;
|
||||||
|
use HTML::Entities;
|
||||||
|
|
||||||
=head1 NAME
|
=head1 NAME
|
||||||
|
|
||||||
|
|
@ -82,7 +83,6 @@ sub addKeywords {
|
||||||
my $self = shift;
|
my $self = shift;
|
||||||
my $text = join(" ", @_);
|
my $text = join(" ", @_);
|
||||||
|
|
||||||
$text = WebGUI::HTML::filter($text, "all");
|
|
||||||
$text = $self->_filterKeywords($text);
|
$text = $self->_filterKeywords($text);
|
||||||
my ($keywords) = $self->session->db->quickArray("select keywords from assetIndex where assetId=?",[$self->getId]);
|
my ($keywords) = $self->session->db->quickArray("select keywords from assetIndex where assetId=?",[$self->getId]);
|
||||||
$self->session->db->write("update assetIndex set keywords =? where assetId=?", [$keywords.' '.$text, $self->getId]);
|
$self->session->db->write("update assetIndex set keywords =? where assetId=?", [$keywords.' '.$text, $self->getId]);
|
||||||
|
|
@ -166,7 +166,8 @@ sub DESTROY {
|
||||||
|
|
||||||
=head2 _filterKeywords ( $keywords )
|
=head2 _filterKeywords ( $keywords )
|
||||||
|
|
||||||
Perform filtering and cleaning up of the keywords before submitting them.
|
Perform filtering and cleaning up of the keywords before submitting them. Ideographic characters are padded
|
||||||
|
so that they are still searchable. HTML entities are decoded.
|
||||||
|
|
||||||
=head3 $keywords
|
=head3 $keywords
|
||||||
|
|
||||||
|
|
@ -179,6 +180,8 @@ sub _filterKeywords {
|
||||||
my $keywords = shift;
|
my $keywords = shift;
|
||||||
|
|
||||||
$keywords = WebGUI::HTML::filter($keywords, "all");
|
$keywords = WebGUI::HTML::filter($keywords, "all");
|
||||||
|
$keywords = HTML::Entities::decode_entities($keywords);
|
||||||
|
utf8::upgrade($keywords);
|
||||||
|
|
||||||
# split into 'words'. Ideographic characters (such as Chinese) are
|
# split into 'words'. Ideographic characters (such as Chinese) are
|
||||||
# treated as distinct words. Everything else is space delimited.
|
# treated as distinct words. Everything else is space delimited.
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@ WebGUI::Test->tagsToRollback(
|
||||||
#----------------------------------------------------------------------------
|
#----------------------------------------------------------------------------
|
||||||
# Tests
|
# Tests
|
||||||
|
|
||||||
plan tests => 16; # Increment this number for each test you create
|
plan tests => 15; # Increment this number for each test you create
|
||||||
|
|
||||||
use_ok( 'WebGUI::Search::Index' );
|
use_ok( 'WebGUI::Search::Index' );
|
||||||
|
|
||||||
|
|
@ -123,7 +123,7 @@ $article->update({
|
||||||
} );
|
} );
|
||||||
$indexer = WebGUI::Search::Index->create( $article );
|
$indexer = WebGUI::Search::Index->create( $article );
|
||||||
|
|
||||||
ok ( my $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ),
|
ok ( $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ),
|
||||||
"assetId exists in assetIndex"
|
"assetId exists in assetIndex"
|
||||||
);
|
);
|
||||||
cmp_deeply (
|
cmp_deeply (
|
||||||
|
|
@ -149,7 +149,7 @@ cmp_deeply (
|
||||||
),
|
),
|
||||||
lineage => $article->get('lineage'),
|
lineage => $article->get('lineage'),
|
||||||
},
|
},
|
||||||
"Index has correct information"
|
"Index has synopsis information in keywords"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -161,9 +161,7 @@ $article->update({
|
||||||
});
|
});
|
||||||
$indexer = WebGUI::Search::Index->create( $article );
|
$indexer = WebGUI::Search::Index->create( $article );
|
||||||
|
|
||||||
ok ( my $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ),
|
$row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ]);
|
||||||
"assetId exists in assetIndex"
|
|
||||||
);
|
|
||||||
cmp_deeply (
|
cmp_deeply (
|
||||||
$row,
|
$row,
|
||||||
{
|
{
|
||||||
|
|
@ -187,7 +185,7 @@ cmp_deeply (
|
||||||
),
|
),
|
||||||
lineage => $article->get('lineage'),
|
lineage => $article->get('lineage'),
|
||||||
},
|
},
|
||||||
"Index has correct information"
|
"Index has description in keywords"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -199,9 +197,7 @@ $article->update({
|
||||||
});
|
});
|
||||||
$indexer = WebGUI::Search::Index->create( $article );
|
$indexer = WebGUI::Search::Index->create( $article );
|
||||||
|
|
||||||
ok ( my $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ),
|
$row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] );
|
||||||
"assetId exists in assetIndex"
|
|
||||||
);
|
|
||||||
cmp_deeply (
|
cmp_deeply (
|
||||||
$row,
|
$row,
|
||||||
{
|
{
|
||||||
|
|
@ -224,7 +220,26 @@ cmp_deeply (
|
||||||
),
|
),
|
||||||
lineage => $article->get('lineage'),
|
lineage => $article->get('lineage'),
|
||||||
},
|
},
|
||||||
"Index has correct information"
|
"Index has synopsis and description in keywords"
|
||||||
|
);
|
||||||
|
|
||||||
|
#----------------------------------------------------------------------------
|
||||||
|
# Test that HTML entities are decoded.
|
||||||
|
$article->update({
|
||||||
|
description => "schön cañón",
|
||||||
|
});
|
||||||
|
$indexer = WebGUI::Search::Index->create( $article );
|
||||||
|
|
||||||
|
$row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] );
|
||||||
|
cmp_deeply (
|
||||||
|
$row,
|
||||||
|
superhashof({
|
||||||
|
keywords => all( # keywords contains title, menuTitle, every part of the URL and every keyword
|
||||||
|
re("sch\xF6n"),
|
||||||
|
re("ca\xF1\xF3n"),
|
||||||
|
),
|
||||||
|
}),
|
||||||
|
"Index has decoded entities"
|
||||||
);
|
);
|
||||||
|
|
||||||
#vim:ft=perl
|
#vim:ft=perl
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue