Add tests for ideogram searching. ft_min_word_len must be 2 or less.
This commit is contained in:
parent
d9cf707963
commit
ea51ba559e
4 changed files with 36 additions and 3 deletions
|
|
@ -18,6 +18,11 @@ save you many hours of grief.
|
|||
or, to index all of the sites on the server:
|
||||
perl search.pl --indexall
|
||||
|
||||
* For searching content with ideograms, ft_min_word_len must be
|
||||
set to 2 in the Mysql config file. WebGUI will safely pad the
|
||||
characters so that it does not need to be set to 1. The WRE
|
||||
has ft_min_word_len set to 2 by default.
|
||||
|
||||
7.7.18
|
||||
--------------------------------------------------------------------
|
||||
* The search indexer was not properly indexing non-ASCII content. The
|
||||
|
|
|
|||
|
|
@ -383,7 +383,7 @@ sub search {
|
|||
for my $term (@terms) {
|
||||
# we add padding to ideographic characters to avoid minimum word length limits on indexing
|
||||
if ($term =~ /\p{Ideographic}/) {
|
||||
$term = qq{''$term''};
|
||||
$term = q{''}.$term.q{''};
|
||||
}
|
||||
$term .= q{*};
|
||||
next
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@ sub _filterKeywords {
|
|||
$word =~ s/\p{isPunct}+\z//;
|
||||
# we add padding to ideographic characters to avoid minimum word length limits on indexing
|
||||
if ($word =~ /\p{Ideographic}/) {
|
||||
$word = qq{''$word''};
|
||||
$word = q{''}.$word.q{''};
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
30
t/Search.t
30
t/Search.t
|
|
@ -17,6 +17,7 @@ use FindBin;
|
|||
use strict;
|
||||
use lib "$FindBin::Bin/lib";
|
||||
use Test::More;
|
||||
use Test::Deep;
|
||||
use WebGUI::Test; # Must use this before any other WebGUI modules
|
||||
use WebGUI::Session;
|
||||
|
||||
|
|
@ -28,12 +29,13 @@ my $session = WebGUI::Test->session;
|
|||
#----------------------------------------------------------------------------
|
||||
# Tests
|
||||
|
||||
plan tests => 7; # Increment this number for each test you create
|
||||
plan tests => 10; # Increment this number for each test you create
|
||||
|
||||
#----------------------------------------------------------------------------
|
||||
# put your tests here
|
||||
|
||||
use_ok('WebGUI::Search');
|
||||
use_ok('WebGUI::Search::Index');
|
||||
|
||||
my $search = WebGUI::Search->new($session);
|
||||
|
||||
|
|
@ -50,6 +52,32 @@ ok( $search->_isStopword('anybody+'), '_isStopword: regex metacharacter +
|
|||
ok( $search->_isStopword('maybe?'), '_isStopword: regex metacharacter ? does not crash the search');
|
||||
ok(! $search->_isStopword('private.+'), '_isStopword: regex metacharacters .+ do not crash the search');
|
||||
|
||||
################################################
|
||||
#
|
||||
# Chinese ideograph handling
|
||||
#
|
||||
################################################
|
||||
{
|
||||
use utf8;
|
||||
|
||||
# Create an article to index
|
||||
my $article = WebGUI::Asset->getImportNode( $session )->addChild( {
|
||||
className => 'WebGUI::Asset::Wobject::Article',
|
||||
title => 'Chinese ideograph experiment',
|
||||
description => "甲骨文",
|
||||
} );
|
||||
my $tag = WebGUI::VersionTag->getWorking( $session );
|
||||
$tag->commit;
|
||||
WebGUI::Test->tagsToRollback($tag);
|
||||
WebGUI::Search::Index->create( $article );
|
||||
my $searcher = WebGUI::Search->new($session);
|
||||
my $assetIds = $searcher->search({ keywords => "甲", })->getAssetIds;
|
||||
cmp_deeply( $assetIds, [ $article->getId ], 'basic test for search works');
|
||||
my $searcher = WebGUI::Search->new($session);
|
||||
my $assetIds = $searcher->search({ keywords => "Chinese", })->getAssetIds;
|
||||
cmp_deeply( $assetIds, [ $article->getId ], 'ideograph search works');
|
||||
}
|
||||
|
||||
#----------------------------------------------------------------------------
|
||||
# Cleanup
|
||||
END {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue