- Added Chinese character support to search engine and indexer thanks to Zhou

Xiaopeng.
This commit is contained in:
JT Smith 2006-07-13 15:41:30 +00:00
parent 6ad290ff63
commit 1845a3e1d5
4 changed files with 25 additions and 0 deletions

View file

@ -270,6 +270,17 @@ sub search {
unless ($keywords =~ m/"|\*/) { # do wildcards for people, like they'd expect
my @terms = split(' ',$keywords);
for (my $i = 0; $i < scalar(@terms); $i++) {
#-------------- Edited by zxp for Chinese Word Segment
utf8::decode($terms[$i]);
my @segs = split /([A-z|\d]+|\S)/, $terms[$i];
$terms[$i] = join " ",@segs;
$terms[$i] =~ s/\s{2,}/ /g;
$terms[$i] =~ s/(^\s|\s$)//g;
$terms[$i] =~ s/\s/\'\'/g;
if($terms[$i] =~ m/\'/) { # has non-latin latter in terms
$terms[$i] = '"' . $terms[$i] . '"';
}
#-------------- Edited by zxp end
$terms[$i] .= "*";
}
$keywords = join(" ", @terms);