Add tests for ideogram searching. ft_min_word_len must be 2 or less.

This commit is contained in:
Colin Kuskie 2009-08-21 21:29:43 +00:00
parent d9cf707963
commit ea51ba559e
4 changed files with 36 additions and 3 deletions

View file

@ -18,6 +18,11 @@ save you many hours of grief.
or, to index all of the sites on the server:
perl search.pl --indexall
* For searching content with ideograms, ft_min_word_len must be
set to 2 in the Mysql config file. WebGUI will safely pad the
characters so that it does not need to be set to 1. The WRE
has ft_min_word_len set to 2 by default.
7.7.18
--------------------------------------------------------------------
* The search indexer was not properly indexing non-ASCII content. The

View file

@ -383,7 +383,7 @@ sub search {
for my $term (@terms) {
# we add padding to ideographic characters to avoid minimum word length limits on indexing
if ($term =~ /\p{Ideographic}/) {
$term = qq{''$term''};
$term = q{''}.$term.q{''};
}
$term .= q{*};
next

View file

@ -193,7 +193,7 @@ sub _filterKeywords {
$word =~ s/\p{isPunct}+\z//;
# we add padding to ideographic characters to avoid minimum word length limits on indexing
if ($word =~ /\p{Ideographic}/) {
$word = qq{''$word''};
$word = q{''}.$word.q{''};
}
}

View file

@ -17,6 +17,7 @@ use FindBin;
use strict;
use lib "$FindBin::Bin/lib";
use Test::More;
use Test::Deep;
use WebGUI::Test; # Must use this before any other WebGUI modules
use WebGUI::Session;
@ -28,12 +29,13 @@ my $session = WebGUI::Test->session;
#----------------------------------------------------------------------------
# Tests
plan tests => 7; # Increment this number for each test you create
plan tests => 10; # Increment this number for each test you create
#----------------------------------------------------------------------------
# put your tests here
use_ok('WebGUI::Search');
use_ok('WebGUI::Search::Index');
my $search = WebGUI::Search->new($session);
@ -50,6 +52,32 @@ ok( $search->_isStopword('anybody+'), '_isStopword: regex metacharacter +
ok( $search->_isStopword('maybe?'), '_isStopword: regex metacharacter ? does not crash the search');
ok(! $search->_isStopword('private.+'), '_isStopword: regex metacharacters .+ do not crash the search');
################################################
#
# Chinese ideograph handling
#
################################################
{
use utf8;
# Create an article to index
my $article = WebGUI::Asset->getImportNode( $session )->addChild( {
className => 'WebGUI::Asset::Wobject::Article',
title => 'Chinese ideograph experiment',
description => "甲骨文",
} );
my $tag = WebGUI::VersionTag->getWorking( $session );
$tag->commit;
WebGUI::Test->tagsToRollback($tag);
WebGUI::Search::Index->create( $article );
my $searcher = WebGUI::Search->new($session);
my $assetIds = $searcher->search({ keywords => "甲", })->getAssetIds;
cmp_deeply( $assetIds, [ $article->getId ], 'basic test for search works');
my $searcher = WebGUI::Search->new($session);
my $assetIds = $searcher->search({ keywords => "Chinese", })->getAssetIds;
cmp_deeply( $assetIds, [ $article->getId ], 'ideograph search works');
}
#----------------------------------------------------------------------------
# Cleanup
END {