fixed #10797: Unable to search for non-ASCII text

This commit is contained in:
Graham Knop 2009-08-20 19:24:31 +00:00
parent a5204a7386
commit 4984e19d68
4 changed files with 49 additions and 34 deletions

View file

@ -175,18 +175,27 @@ A string containing keywords.
=cut
sub _filterKeywords {
my $self = shift;
my $self = shift;
my $keywords = shift;
$keywords = WebGUI::HTML::filter($keywords, "all");
#-------------------- added by zxp for chinese word segment
utf8::decode($keywords);
my @segs = split /([A-z|\d]+|\S)/, $keywords;
my $newKeywords = join " ",@segs;
$newKeywords =~ s/(^\s+|\s+$)//g;
$newKeywords =~ s/\s+/\'\'/g;
#-------------------- added by zxp end
return $newKeywords;
$keywords = WebGUI::HTML::filter($keywords, "all");
# split into 'words'. Ideographic characters (such as Chinese) are
# treated as distinct words. Everything else is space delimited.
my @words = grep { $_ ne '' } split /\s+|(\p{Ideographic})/, $keywords;
# remove punctuation characters at the start and end of each word.
for my $word ( @words ) {
$word =~ s/\A\p{isPunct}+//;
$word =~ s/\p{isPunct}+\z//;
# we add padding to ideographic characters to avoid minimum word length limits on indexing
if ($word =~ /\p{Ideographic}/) {
$word = qq{''$word''};
}
}
$keywords = join q{ }, @words;
return $keywords;
}
#-------------------------------------------------------------------