Decode HTML entities sent by rich editors. Add tests for the decoding
inside the search indexer. Gotcha note. Fixes #10797.
This commit is contained in:
parent
91b37aae36
commit
f68db111aa
4 changed files with 43 additions and 13 deletions
|
|
@ -15,6 +15,7 @@ package WebGUI::Search::Index;
|
|||
=cut
|
||||
|
||||
use strict;
|
||||
use HTML::Entities;
|
||||
|
||||
=head1 NAME
|
||||
|
||||
|
|
@ -82,7 +83,6 @@ sub addKeywords {
|
|||
my $self = shift;
|
||||
my $text = join(" ", @_);
|
||||
|
||||
$text = WebGUI::HTML::filter($text, "all");
|
||||
$text = $self->_filterKeywords($text);
|
||||
my ($keywords) = $self->session->db->quickArray("select keywords from assetIndex where assetId=?",[$self->getId]);
|
||||
$self->session->db->write("update assetIndex set keywords =? where assetId=?", [$keywords.' '.$text, $self->getId]);
|
||||
|
|
@ -166,7 +166,8 @@ sub DESTROY {
|
|||
|
||||
=head2 _filterKeywords ( $keywords )
|
||||
|
||||
Perform filtering and cleaning up of the keywords before submitting them.
|
||||
Perform filtering and cleaning up of the keywords before submitting them. Ideographic characters are padded
|
||||
so that they are still searchable. HTML entities are decoded.
|
||||
|
||||
=head3 $keywords
|
||||
|
||||
|
|
@ -179,6 +180,8 @@ sub _filterKeywords {
|
|||
my $keywords = shift;
|
||||
|
||||
$keywords = WebGUI::HTML::filter($keywords, "all");
|
||||
$keywords = HTML::Entities::decode_entities($keywords);
|
||||
utf8::upgrade($keywords);
|
||||
|
||||
# split into 'words'. Ideographic characters (such as Chinese) are
|
||||
# treated as distinct words. Everything else is space delimited.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue