Migrated IndexedSearch

This commit is contained in:
Len Kranendonk 2005-02-13 21:42:31 +00:00
parent 1b8828c5c0
commit 531365296c
2 changed files with 0 additions and 1263 deletions

View file

@ -1,596 +0,0 @@
package WebGUI::Wobject::IndexedSearch;
use strict;
use Time::HiRes;
use WebGUI::Wobject::IndexedSearch::Search;
use WebGUI::HTMLForm;
use WebGUI::HTML;
use WebGUI::Macro;
use WebGUI::International;
use WebGUI::Session;
use WebGUI::SQL;
use WebGUI::Wobject;
use Tie::IxHash;
use WebGUI::Utility;
use WebGUI::Paginator;
use WebGUI::Page;
our @ISA = qw(WebGUI::Wobject);
#-------------------------------------------------------------------
sub name {
return WebGUI::International::get(17,$_[0]->get("namespace"));
}
#-------------------------------------------------------------------
sub new {
my $class = shift;
my $property = shift;
my $self = WebGUI::Wobject->new(
-useTemplate=>1,
-useMetaData=>1,
-properties=>$property,
-extendedProperties=>{
indexName=>{
defaultValue=>'default'
},
searchRoot=>{
fieldType=>'checkList',
defaultValue=>'any'
},
forceSearchRoots=>{
fieldType=>'yesNo',
defaultValue=>1
},
users=>{
fieldType=>'selectList',
defaultValue=>'any'
},
namespaces=>{
fieldType=>'selectList',
defaultValue=>'any'
},
languages=>{
fieldType=>'selectList',
defaultValue=>'any'
},
contentTypes=>{
fieldType=>'selectList',
defaultValue=>'any'
},
paginateAfter=>{
defaultValue=>10
},
highlight=>{
defaultValue=>1
},
previewLength=>{
defaultValue=>130
},
highlight_1=>{
defaultValue=>'#ffff66'
},
highlight_2=>{
defaultValue=>'#A0FFFF'
},
highlight_3=>{
defaultValue=>'#99ff99'
},
highlight_4=>{
defaultValue=>'#ff9999'
},
highlight_5=>{
defaultValue=>'#ff66ff'
},
}
);
bless $self, $class;
}
#-------------------------------------------------------------------
sub uiLevel {
return 5;
}
#-------------------------------------------------------------------
sub www_edit {
my $self = shift;
my (@data, %indexName);
tie my %searchRoot, 'Tie::IxHash';
my $layout = WebGUI::HTMLForm->new;
my $properties = WebGUI::HTMLForm->new;
my $privileges = WebGUI::HTMLForm->new;
# Unconditional read to catch intallation errors.
my $sth = WebGUI::SQL->unconditionalRead("select distinct(indexName), indexName from IndexedSearch_docInfo");
unless ($sth->errorCode < 1) {
return "<p><b>" . WebGUI::International::get(1,$self->get("namespace")) . $sth->errorMessage."</b></p>";
}
while (@data = $sth->array) {
$indexName{$data[0]} = $data[1];
}
$sth->finish;
unless(%indexName) {
return "<p><b>" . WebGUI::International::get(2,$self->get("namespace")) .
"<p>" . WebGUI::International::get(3,$self->get("namespace")) . "</b></p>";
}
# Index to use
# $properties->radioList( -name=>'indexName',
# -options=>\%indexName,
# -label=>WebGUI::International::get(5,$self->get("namespace")),
# -value=>$self->getValue("indexName"),
# -vertical=>1
# );
# NOTE: For now we're limiting each site to one index. Will allow more in the future.
$properties->hidden(
-name=>"indexName",
-value=>"IndexedSearch_default"
);
# Page roots
%searchRoot = ( 'any'=>WebGUI::International::get(15,$self->get("namespace")),
$session{page}{pageId}=>WebGUI::International::get(4,$self->get("namespace")),
WebGUI::SQL->buildHash("select pageId,title from page where parentId='0' and isSystem<>1 order by title")
);
$properties->checkList ( -name=>'searchRoot',
-options=>\%searchRoot,
-label=>WebGUI::International::get(6,$self->get("namespace")),
-value=>[ split("\n", $self->getValue("searchRoot")) ],
-multiple=>1,
-vertical=>1,
);
$properties->yesNo(
-name=>'forceSearchRoots',
-label=>WebGUI::International::get('edit-forceSearchRoots-label',$self->get("namespace")),
-value=>$self->getValue("forceSearchRoots")
);
# Content of specific user
$properties->selectList ( -name=>'users',
-options=>$self->_getUsers(),
-label=>WebGUI::International::get(7,$self->get("namespace")),
-value=>[ split("\n", $self->getValue("users")) ],
-multiple=>1,
-size=>5
);
# Content in specific namespaces
$properties->selectList ( -name=>'namespaces',
-options=>$self->_getNamespaces,
-label=>WebGUI::International::get(8,$self->get("namespace")),
-value=>[ split("\n", $self->getValue("namespaces")) ],
-multiple=>1,
-size=>5
);
# Content in specific language
$properties->checkList ( -name=>'languages',
-options=>$self->_getLanguages(),
-label=>WebGUI::International::get(9,$self->get("namespace")),
-value=>[ split("\n", $self->getValue("languages")) ],
-multiple=>1,
);
# Only specific content types
my $contentTypes = $self->_getContentTypes();
delete $contentTypes->{content};
$properties->checkList ( -name=>'contentTypes',
-options=>$contentTypes,
-label=>WebGUI::International::get(10,$self->get("namespace")),
-value=>[ split("\n", $self->getValue("contentTypes")) ],
-multiple=>1,
-vertical=>1,
);
$layout->integer ( -name=>'paginateAfter',
-label=>WebGUI::International::get(11,$self->get("namespace")),
-value=>$self->getValue("paginateAfter"),
);
$layout->integer ( -name=>'previewLength',
-label=>WebGUI::International::get(12,$self->get("namespace")),
-value=>$self->getValue("previewLength"),
);
$layout->yesNo ( -name=>'highlight',
-label=>WebGUI::International::get(13,$self->get("namespace")),
-value=>$self->getValue("highlight"),
);
# Color picker for highlight colors
$layout->raw ( -value=>'
<SCRIPT LANGUAGE="Javascript" SRC="'.$session{config}{extrasURL}.'/wobject/IndexedSearch/ColorPicker2.js"></SCRIPT>
<SCRIPT LANGUAGE="JavaScript">
var cp = new ColorPicker("window");
</SCRIPT>'
);
for (1..5) {
my $highlight = "highlight_$_";
$layout->text ( -name=>$highlight,
-label=>WebGUI::International::get(14,$self->get("namespace")) ." $_:",
-size=>7,
-value=>$self->getValue($highlight),
-subtext=>qq{
<A HREF="#" onClick="cp.select($highlight,'$highlight');
return false;" NAME="$highlight" ID="$highlight">Pick</A>}
);
}
return $self->SUPER::www_edit(
-properties=>$properties->printRowsOnly,
-layout=>$layout->printRowsOnly,
-privileges=>$privileges->printRowsOnly,
-heading=>"Edit Search",
-helpId=>"search add/edit"
);
}
#-------------------------------------------------------------------
sub www_view {
my $self = shift;
$self->logView() if ($session{setting}{passiveProfilingEnabled});
my (%var, @resultsLoop);
# Do some query handling
$var{exactPhrase} = $session{form}{exactPhrase};
$var{allWords} = $session{form}{allWords};
$var{atLeastOne} = $session{form}{atLeastOne};
$var{without} = $session{form}{without};
$var{query} = $session{form}{query};
$var{query} .= qq{ +"$var{exactPhrase}"} if ($var{exactPhrase});
$var{query} .= " ".join(" ",map("+".$_,split(/\s+/,$var{allWords}))) if ($var{allWords});
$var{query} .= qq{ $var{atLeastOne}} if ($var{atLeastOne});
$var{query} .= " ".join(" ",map("-".$_,split(/\s+/,$var{without}))) if ($var{without});
# Remove macro's from query
$var{query} = WebGUI::Macro::negate($var{query});
# Set some standard vars
$var{submit} = WebGUI::Form::submit({value=>WebGUI::International::get(16, $self->get("namespace"))});
$var{"int.search"} = WebGUI::International::get(16,$self->get("namespace"));
$var{wid} = $self->get("wobjectId");
$var{numberOfResults} = '0';
$var{"select_".$self->getValue("paginateAfter")} = "selected";
# Do the search
my $startTime = Time::HiRes::time();
my $filter = $self->_buildFilter;
my $search = WebGUI::Wobject::IndexedSearch::Search->new($self->getValue('indexName'));
$search->open;
my $results = $search->search($var{query},$filter);
$var{duration} = Time::HiRes::time() - $startTime;
$var{duration} = sprintf("%.3f", $var{duration}); # Duration rounded to 3 decimal places
# Let's see if the search returned any results
if (defined ($results)) {
$var{numberOfResults} = scalar(@$results);
# Deal with pagination
my $url = "wid=".$self->get("wobjectId")."&func=view&query=".WebGUI::URL::escape($var{query});
map {$url .= "&users=".WebGUI::URL::escape($_)} $session{cgi}->param('users');
map {$url .= "&namespaces=".WebGUI::URL::escape($_)} $session{cgi}->param('namespaces');
map {$url .= "&languages=".WebGUI::URL::escape($_)} $session{cgi}->param('languages');
map {$url .= "&contentTypes=".WebGUI::URL::escape($_)} $session{cgi}->param('contentTypes');
$url .= "&paginateAfter=".$self->getValue("paginateAfter");
my $p = WebGUI::Paginator->new(WebGUI::URL::page($url), $self->getValue("paginateAfter"));
$p->setDataByArrayRef($results);
$var{startNr} = 1;
if($session{form}{pn}) {
$var{startNr} = (($session{form}{pn} - 1) * $self->getValue("paginateAfter")) + 1;
}
my @highlightColors = map { $self->getValue("highlight_$_") } (1..5);
$var{queryHighlighted} = $search->highlight($var{query}, undef, \@highlightColors);
# Get result details for this page
if($p->getPageNumber > $p->getNumberOfPages) {
$var{numberOfResults} = 0;
$var{resultsLoop} = [];
} else {
$var{resultsLoop} = $search->getDetails($p->getPageData,
highlightColors => \@highlightColors,
previewLength => $self->getValue('previewLength'),
highlight => $self->getValue('highlight')
);
# Pagination variables
$var{endNr} = $var{startNr}+(scalar(@{$var{resultsLoop}}))-1;
$p->appendTemplateVars(\%var);
}
}
# Create a loop with namespaces
$var{namespaces} = [];
my $namespaces = $self->_getNamespaces('restricted');
foreach(keys %$namespaces) {
my $selected = 0;
if (scalar $session{cgi}->param('namespaces')) {
$selected = isIn($_, $session{cgi}->param('namespaces'));
} else {
$selected = ($session{form}{namespaces} =~ /$_/);
}
push(@{$var{namespaces}}, { value => $_, name => $namespaces->{$_}, selected => $selected });
}
# Create a loop with contentTypes
#
# And while we are busy we also create a loop with simplified contentTypes
# This means: wobject, page, wobjectDetail are masked in one option: content
$var{contentTypes} = [];
$var{contentTypesSimple} = [];
my $contentTypes = $self->_getContentTypes('restricted');
foreach(keys %$contentTypes) {
my $selected = 0;
if (scalar $session{cgi}->param('contentTypes')) {
$selected = isIn($_, $session{cgi}->param('contentTypes'));
} else {
$selected = ($session{form}{contentTypes} =~ /$_/);
}
unless(/^content$/) { # No shortcut in the detailed contentType list
push(@{$var{contentTypes}}, { value => $_,
name => $contentTypes->{$_},
selected => $selected,
'type_'.$_ => 1 });
}
unless(/^page|wobject|wobjectDetail$/) { # No details in the simple contentType list
push(@{$var{contentTypesSimple}}, { value => $_,
name => $contentTypes->{$_},
selected => $selected,
'type_'.$_ => 1 });
}
}
# Create a loop with users
$var{users} = [];
my $users = $self->_getUsers('restricted');
foreach(keys %$users) {
my $selected = 0;
if (scalar $session{cgi}->param('users')) {
$selected = isIn($_, $session{cgi}->param('users'));
} else {
$selected = ($session{form}{users} =~ /$_/);
}
push(@{$var{users}}, { value => $_, name => $users->{$_}, selected => $selected });
}
# Create a loop with languages
$var{languages} = [];
my $languages = $self->_getLanguages('restricted');
foreach(keys %$languages) {
my $selected = 0;
if (scalar $session{cgi}->param('languages')) {
$selected = isIn($_, $session{cgi}->param('languages'));
} else {
$selected = ($session{form}{languages} =~ /$_/);
}
push(@{$var{languages}}, { value => $_, name => $languages->{$_}, selected => $selected });
}
# Create a loop with searchable page roots
my $rootData;
my @roots = split(/\n/, $self->get('searchRoot'));
my %checked = map {$_=>1} $session{cgi}->param("searchRoot");
if (isIn('any', @roots)) {
foreach $rootData (WebGUI::Page->getAnonymousRoot->daughters) {
push (@{$var{searchRoots}}, {
value => $rootData->{'pageId'},
menuTitle => $rootData->{'menuTitle'},
title => $rootData->{'title'},
urlizedTitle => $rootData->{'urlizedTitle'},
checked => $checked{$rootData->{'pageId'}},
});
$var{"rootPage.".$rootData->{'urlizedTitle'}.".id"} = $rootData->{'pageId'};
$var{"rootPage.".$rootData->{'urlizedTitle'}.".checked"} = $checked{$rootData->{'pageId'}};
}
} else {
foreach (@roots) {
$rootData = WebGUI::Page->new($_);
push (@{$var{searchRoots}}, {
value => $rootData->get('pageId'),
menuTitle => $rootData->get('menuTitle'),
title => $rootData->get('title'),
urlizedTitle => $rootData->get('urlizedTitle'),
checked => $checked{$rootData->get('pageId')},
});
$var{"rootPage.".$rootData->get('urlizedTitle').".id"} = $rootData->get('pageId');
$var{"rootPage.".$rootData->get('urlizedTitle').".checked"} = $checked{$rootData->get('pageId')};
}
}
$var{"anyRootPage.checked"} = $checked{'any'};
# close the search
$search->close;
return $self->processTemplate($self->get("templateId"),\%var);
}
#-------------------------------------------------------------------
sub _buildPageList {
my ($self, @userSpecifiedRoots, @roots, @allowedRoots, $pageId, @pages);
$self = shift;
@userSpecifiedRoots = $session{cgi}->param("searchRoot");
if ((scalar(@userSpecifiedRoots) == 0)
|| ($self->getValue("forceSearchRoots"))
|| (isIn('any', @userSpecifiedRoots))
) {
@roots = split(/\n+/i, $self->get("searchRoot"));
} else {
@allowedRoots = split(/\n+/, $self->get("searchRoot"));
foreach (@userSpecifiedRoots) {
push (@roots, $_) if (isIn($_, @allowedRoots));
}
}
foreach $pageId (@roots) {
WebGUI::Page->new($pageId)->traversePreOrder(
sub {
push(@pages, $_[0]->get('pageId'));
}
);
}
return [ @pages ];
}
#-------------------------------------------------------------------
sub _buildFilter {
my $self = shift;
my %filter = ();
# pages
if($self->get('searchRoot') !~ /any/i) {
$filter{pageId} = $self->_buildPageList;
}
# languages
if($session{form}{languages} && ! isIn('any', $session{cgi}->param('languages'))) {
$filter{languageId} = [ $session{cgi}->param('languages') ];
} elsif ($self->getValue('languages') !~ /any/i) {
$filter{languageId} = [ split(/\n/, $self->getValue('languages')) ];
}
push(@{$filter{languageId}}, '0') if (exists $filter{languageId}); # Some content (i.e. profiles)
# don't have a language. They
# must be found as well.
# content-types
if($session{form}{contentTypes} && ! isIn('any', $session{cgi}->param('contentTypes'))) {
$filter{contentType} = [ $session{cgi}->param('contentTypes') ];
# contentType "content" is a shortcut for "page", "wobject" and "wobjectDetail"
if (isIn('content', $session{cgi}->param('contentTypes'))) {
push(@{$filter{contentType}}, qw/page wobject wobjectDetail/);
}
} elsif ($self->getValue('contentTypes') !~ /any/i) {
$filter{contentType} = [ split(/\n/, $self->getValue('contentTypes')) ];
}
# users
if($session{form}{users} && ! isIn('any', $session{cgi}->param('users'))) {
$filter{ownerId} = [];
foreach my $user ($session{cgi}->param('users')) {
if ($user =~ /\D/) {
$user =~ s/\*/%/g;
($user) = WebGUI::SQL->buildArray("select userId from users where username like ".quote($user));
}
push(@{$filter{ownerId}}, quote($user)) if ($user =~ /^\d+$/);
}
} elsif ($self->getValue('users') !~ /any/i) {
$filter{ownerId} = [ split(/\n/, $self->getValue('users')) ];
}
# namespaces
if($session{form}{namespaces} && ! isIn('any', $session{cgi}->param('namespaces'))) {
$filter{namespace} = [ $session{cgi}->param('namespaces') ];
} elsif ($self->getValue('namespaces') !~ /any/i) {
$filter{namespace} = [ split(/\n/, $self->getValue('namespaces')) ];
}
# delete $filter{ownerId} if it is an empty array reference
if(exists($filter{ownerId})) {
delete $filter{ownerId} unless (scalar(@{$filter{ownerId}}));
}
return \%filter;
}
#-------------------------------------------------------------------
sub _getLanguages {
my ($self, $restricted) = @_;
my $international = WebGUI::SQL->buildHashRef("select distinct(languageId) from IndexedSearch_docInfo");
tie my %languages, 'Tie::IxHash';
if ($restricted and $self->get('languages') !~ /any/i) {
$languages{any} = WebGUI::International::get(24,$self->get("namespace"));
foreach (split/\n/, $self->get('languages')) {
$languages{$_} = $international->{$_};
}
} else {
%languages = ('any' => WebGUI::International::get(24,$self->get("namespace")) , %$international);
}
return \%languages;
}
#-------------------------------------------------------------------
sub _getNamespaces {
my ($self, $restricted) = @_;
my %international;
foreach my $wobject (@{$session{config}{wobjects}}){
my $cmd = "WebGUI::Wobject::".$wobject;
my $load = 'use '.$cmd;
eval($load);
WebGUI::ErrorHandler::warn("Wobject failed to compile: $cmd.".$@) if($@);
my $w = $cmd->new({namespace=>$wobject, wobjectId=>'new'});
$international{$wobject} = $w->name;
}
tie my %namespaces, 'Tie::IxHash';
if ($restricted and $self->get('namespaces') !~ /any/i) {
$namespaces{any} = WebGUI::International::get(18,$self->get("namespace"));
foreach (split/\n/, $self->get('namespaces')) {
$namespaces{$_} = $international{$_} || ucfirst($_);
}
} else {
$namespaces{any} = WebGUI::International::get(18,$self->get("namespace"));
foreach (WebGUI::SQL->buildArray("select distinct(namespace) from IndexedSearch_docInfo order by namespace")) {
$namespaces{$_} = $international{$_} ||ucfirst($_);
}
}
return \%namespaces;
}
#-------------------------------------------------------------------
sub _getContentTypes {
my ($self, $restricted) = @_;
my %international = ( 'page' => WebGUI::International::get(2),
'wobject' => WebGUI::International::get(19,$self->get("namespace")),
'wobjectDetail' => WebGUI::International::get(20,$self->get("namespace")),
'content' => WebGUI::International::get(21,$self->get("namespace")),
'discussion' => WebGUI::International::get(892),
'profile' => WebGUI::International::get(22,$self->get("namespace")),
'any' => WebGUI::International::get(23,$self->get("namespace")),
);
tie my %contentTypes, 'Tie::IxHash';
if ($restricted and $self->get('contentTypes') !~ /any/i) {
$contentTypes{any} = $international{any};
$contentTypes{content} = $international{content}; # shortcut for page, wobject and wobjectDetail
foreach (split/\n/, $self->get('contentTypes')) {
$contentTypes{$_} = $international{$_};
}
} else {
%contentTypes = ( 'any' => $international{any},
'content' => $international{content}, # shortcut for page, wobject and wobjectDetail
);
foreach (WebGUI::SQL->buildArray("select distinct(contentType) from IndexedSearch_docInfo order by contentType")) {
$contentTypes{$_} = $international{$_} || ucfirst($_);
}
}
return \%contentTypes;
}
#-------------------------------------------------------------------
sub _getSearchablePages {
my $searchRoot = shift;
my %pages;
my $sth = WebGUI::SQL->read("select pageId from page where parentId = ".quote($searchRoot));
while (my %data = $sth->hash) {
$pages{$data{pageId}} = 1;
%pages = (%pages, _getSearchablePages($data{pageId}) );
}
return %pages;
}
#-------------------------------------------------------------------
sub _getUsers {
my ($self, $restricted) = @_;
tie my %users, 'Tie::IxHash';
if ($restricted and $self->get('users') !~ /any/i) {
$users{any} = WebGUI::International::get(25,$self->get("namespace"));
foreach (split/\n/, $self->get('users')) {
$users{$_} = $_;
}
} else {
%users = ( 'any' => WebGUI::International::get(25,$self->get("namespace")),
WebGUI::SQL->buildHash("select userId, username from users order by username")
);
}
return \%users;
}
1;

View file

@ -1,667 +0,0 @@
package WebGUI::Wobject::IndexedSearch::Search;
=head1 LEGAL
-------------------------------------------------------------------
WebGUI is Copyright 2001-2005 Plain Black Corporation.
-------------------------------------------------------------------
Please read the legal notices (docs/legal.txt) and the license
(docs/license.txt) that came with this distribution before using
this software.
-------------------------------------------------------------------
http://www.plainblack.com info@plainblack.com
-------------------------------------------------------------------
=cut
use strict;
use DBIx::FullTextSearch;
use WebGUI::DateTime;
use WebGUI::SQL;
use WebGUI::URL;
use WebGUI::HTML;
use WebGUI::ErrorHandler;
use WebGUI::Grouping;
use DBIx::FullTextSearch::StopList;
use WebGUI::Utility;
use WebGUI::Session;
use WebGUI::Privilege;
use HTML::Highlight;
use WebGUI::Macro;
=head1 NAME
Package WebGUI::Wobject::IndexedSearch::Search
=head1 DESCRIPTION
Search implementation for WebGUI.
=head1 SYNOPSIS
use WebGUI::Wobject::IndexedSearch::Search;
my $search = WebGUI::Wobject::IndexedSearch::Search->new();
$search->indexDocument( { text => 'Index this text',
location => 'http://www.mysite.com/index.pl/faq#45',
languageId => 3,
namespace => 'FAQ'
});
my $hits = search->search("+foo -bar koo",{ namespace = ['Article', 'FAQ']} );
$search->close;
=head1 SEE ALSO
This package is an extension to DBIx::FullTextSearch and HTML::Highlight.
See that packages for documentation of their methods.
=head1 METHODS
These methods are available from this package:
=cut
#-------------------------------------------------------------------
sub _recurseCrumbTrail {
my ($sth, %data, $output);
tie %data, 'Tie::CPHash';
%data = WebGUI::SQL->quickHash("select pageId,parentId,menuTitle,urlizedTitle from page where pageId=".quote($_[0]));
if ($data{pageId}) {
$output .= _recurseCrumbTrail($data{parentId});
}
if ($data{pageId} ne "0") {
$output .= '<a class="crumbTrail" href="'.WebGUI::URL::gateway($data{urlizedTitle})
.'">'.$data{menuTitle}.'</a> &gt; ';
}
return $output;
}
#-------------------------------------------------------------------
=head2 close ( )
Closes the DBIx::FullTextSearch session.
=cut
sub close {
my $self=shift;
$self->DESTROY();
}
#-------------------------------------------------------------------
=head2 create ( [ %options ] )
Creates a new DBIx::FullTextSearch index.
=head3 %options
Options to pass to DBIx::FullTextSearch.
The default options that are used are:
( backend => column, word_length => 20, stoplist => undef )
Please refer to the DBIx::FullTextSearch documentation for a complete list of options.
=cut
sub create {
my ($self, %options) = @_;
%options = (%{$self->{_createOptions}}, %options);
if($options{stemmer}) {
eval "use Lingua::Stem";
if ($@) {
WebGUI::ErrorHandler::warn("IndexedSearch: Can't use stemmer: $@");
delete $options{stemmer};
}
}
if($options{stoplist}) {
if(not $self->existsTable($self->getIndexName."_".$options{stoplist}."_stoplist")) {
DBIx::FullTextSearch::StopList->create_default($self->getDbh, $self->getIndexName."_".$options{stoplist}, $options{stoplist});
}
$options{stoplist} = $self->getIndexName."_".$options{stoplist};
}
$self->{_fts} = DBIx::FullTextSearch->create($self->getDbh, $self->getIndexName, %options);
if (not defined $self->{_fts}) {
WebGUI::ErrorHandler::fatalError("IndexedSearch: Unable to create index.\n$DBIx::FullTextSearch::errstr");
return undef;
}
$self->{_docId} = 1;
return $self->{_fts};
}
#-------------------------------------------------------------------
=head2 existsTable ( tableName )
Returns true if tableName exists in database.
=head3 tableName
The name of table.
=cut
sub existsTable {
my ($self, $table) = @_;
return isIn($table, WebGUI::SQL->buildArray("show tables"));
}
#-------------------------------------------------------------------
=head2 getDetails ( docIdList , [ %options ] )
Returns an array reference containing details for each docId.
=head3 docIdList
An array reference containing docIds.
=head3 previewLength
The maximum number of characters in each of the context sections. Defaults to "80".
=head3 highlight
A boolean indicating whether or not to enable highlight. Defaults to "1".
=head3 highlightColors
A reference to an array of CSS color identificators.
=head3
=cut
sub getDetails {
my ($self, $docIdList, %options) = @_;
my $docIds = quoteAndJoin($docIdList);
my (@searchDetails, %namespace);
foreach my $wobject (@{$session{config}{wobjects}}){
my $cmd = "WebGUI::Wobject::".$wobject;
my $load = 'use '.$cmd;
eval($load);
WebGUI::ErrorHandler::warn("Wobject failed to compile: $cmd.".$@) if($@);
my $w = $cmd->new({namespace=>$wobject, wobjectId=>'new'});
$namespace{$wobject} = $w->name;
}
my $sql = "select * from IndexedSearch_docInfo where docId in ($docIds) and indexName = ".quote($self->getIndexName) ;
$sql .= " ORDER BY FIELD(docId, $docIds)"; # Maintain $docIdList order
my $sth = WebGUI::SQL->read($sql);
while (my %data = $sth->hash) {
$data{namespace} = $namespace{$data{namespace}} || ucfirst($data{namespace});
if ($data{ownerId}) {
($data{username}) = WebGUI::SQL->quickArray("select username from users where userId = ".quote($data{ownerId}));
$data{userProfile} = WebGUI::URL::page("op=viewProfile&uid=$data{ownerId}");
}
if ($data{bodyShortcut} =~ /^\s*select /i) {
$data{body} = (WebGUI::SQL->quickArray($data{bodyShortcut}))[0];
} else {
$data{body} = $data{bodyShortcut};
}
if ($data{headerShortcut} =~ /^\s*select /i) {
$data{header} = (WebGUI::SQL->quickArray($data{headerShortcut}))[0];
} else {
$data{header} = $data{headerShortcut};
}
delete($data{bodyShortcut});
delete($data{headerShortcut});
if($data{body}) {
$data{body} = WebGUI::Macro::filter($data{body});
$data{body} = WebGUI::HTML::filter($data{body},'all');
$data{body} = $self->preview($data{body}, $options{previewLength});
$data{body} = $self->highlight($data{body},undef, $options{highlightColors}) if ($options{highlight});
}
if($data{header}) {
$data{header} = WebGUI::Macro::filter($data{header});
$data{header} = WebGUI::HTML::filter($data{header},'all');
$data{header} = $self->highlight($data{header},undef, $options{highlightColors}) if ($options{highlight});
$data{location} = WebGUI::URL::gateway($data{location});
}
$data{crumbTrail} = _recurseCrumbTrail($data{pageId});
$data{crumbTrail} =~ s/\s*\&gt;\s*$//;
push(@searchDetails, \%data);
}
$sth->finish;
return \@searchDetails;
}
#-------------------------------------------------------------------
=head2 getDbh ( )
Returns the object's database handler.
=cut
sub getDbh {
my $self = shift;
return $self->{_dbh};
}
#-------------------------------------------------------------------
=head2 getDocId ( )
Returns the next docId for this object.
=cut
sub getDocId {
my $self=shift;
return $self->{_docId};
}
#-------------------------------------------------------------------
=head2 getIndexName ( )
Returns the full index name of this object.
=cut
sub getIndexName {
my $self = shift;
return $self->{_indexName};
}
#-------------------------------------------------------------------
=head2 _queryToWords ( [ query ] )
Converts a DBIx::FullTextSearch query to (\@Words, \@Wildcards) suitable to pass to HTML::Highlight
=cut
sub _queryToWords {
my ($self, $query) = @_;
my $query ||= $self->{_query};
# Return the processed words / wildcards from memory if it's cached.
if ($self->{$query."words"} && $self->{$query."wildcards"}) {
return ($self->{$query."words"}, $self->{$query."wildcards"});
}
# deal with quotes
my $inQuote=0;
my (@words, @wildcards);
foreach (split(/\"/, $query)) {
if($inQuote == 0) {
foreach (split(/\s+/, $_)) {
next if (/^AND$/i); # boolean AND
next if (/^OR$/i); # boolean OR
next if (/^NOT$/i); # boolean OR
next if (/^\-/); # exclude word
next if (/^.{0,1}$/); # at least 2 characters
if (/\*/) {
push(@wildcards, '%'); # match any character
} else {
push(@wildcards, '*'); # Also match plural of word
}
s/['"()+*]+//g; # remove query operators and quotes
push(@words, $_);
}
} else {
my $phrase = $_;
push(@words, qq/$phrase/);
push(@wildcards, undef); # Exact match
}
$inQuote = ++$inQuote % 2;
}
# Store words / wildcards in memory
$self->{$query."words"} = \@words;
$self->{$query."wildcards"} = \@wildcards;
return (\@words, \@wildcards);
}
#-------------------------------------------------------------------
=head2 highlight ( text [ , query , colors ] )
highlight words or patterns in HTML documents.
=head3 text
The text to highlight
=head3 query
A query containing the words to highlight. Defaults to the last used $search->search query.
Special case: When query contains only an asterisk '*', no highlighting is applied.
=head3 colors
A reference to an array of CSS color identificators.
=cut
sub highlight {
my ($self, $text, $query, $colors) = @_;
my $query ||= $self->{_query};
return $text if ($query =~ /^\s*\*\s*$/); # query = '*', no highlight
my ($words, $wildcards) = $self->_queryToWords($query);
my $hl = new HTML::Highlight ( words => $words,
wildcards => $wildcards,
colors => $colors
);
return $hl->highlight($text);
}
#-------------------------------------------------------------------
=head2 indexDocument ( hashRef )
Adds a document to the index.
This method doesn't store the document itself. Instead, it stores information about words
in the document in such a structured way that it makes easy and fast to look up what
documents contain certain words and return id's of the documents.
=head3 text
The text to index.
=head3 location
The location of the document. Most likely an URL.
=head3 contentType
The content type of this document.
=head3 docId
The unique Id of this document. Defaults to the next empty docId.
=head3 pageId
The pageId of the page on which this document resides. Defaults to 0.
=head3 wobjectId
The wobjectID of the wobject that holds this document. Defaults to 0.
=head3 ownerId
The ownerId of the document. Defaults to 3.
=head3 languageId
The languageId of this document. Defaults to undef.
=head3 namespace
The namespace of this document. Defaults to 'WebGUI'.
=head3 page_groupIdView
Id of group authorized to view this page. Defaults to '7' (everyone)
=head3 wobject_groupIdView
Id of group authorized to view this wobject. Defaults to '7' (everyone)
=head3 wobject_special_groupIdView
Id of group authorized to view the details of this wobject.
=head3 headerShortcut
An sql statement that returns the header (title, question, subject, name, whatever)
of this document.
=head3 bodyShortcut
An sql statement that returns the body (description, answer, message, whatever)
of this document.
=cut
sub indexDocument {
my ($self, $document) = @_;
$self->{_fts}->index_document($document->{docId} || $self->{_docId}, $document->{text});
my $docId = ($document->{docId} || $self->{_docId});
WebGUI::SQL->write("insert into IndexedSearch_docInfo ( docId,
indexName,
pageId,
wobjectId,
languageId,
namespace,
location,
page_groupIdView,
wobject_groupIdView,
wobject_special_groupIdView,
headerShortcut,
bodyShortcut,
contentType,
ownerId,
dateIndexed )
values ( ".
quote($docId).", ".
quote($self->getIndexName).", ".
quote($document->{pageId} || 0).", ".
quote($document->{wobjectId} || 0).", ".
quote($document->{languageId}).", ".
quote($document->{namespace} || 'WebGUI')." , ".
quote($document->{location}).", ".
quote($document->{page_groupIdView} || 7).", ".
quote($document->{wobject_groupIdView} || 7).", ".
quote($document->{wobject_special_groupIdView} || 7).", ".
quote($document->{headerShortcut})." ,".
quote($document->{bodyShortcut})." ,".
quote($document->{contentType})." ,".
quote($document->{ownerId} || 3).",
".WebGUI::DateTime::time()." )"
);
$self->{_docId}++;
}
#-------------------------------------------------------------------
=head2 new ( [ indexName , dbh ] )
Constructor.
=head3 indexName
The name of the index to open. Defaults to 'default'.
=head3 $dbh
Database handler to use. Defaults to $WebGUI::Session::session{dbh}.
=cut
sub new {
my ($class, $indexName, $dbh) = @_;
$indexName = $indexName || 'default';
my $self = { _indexName => $indexName,
_dbh => $dbh || $WebGUI::Session::session{dbh},
_createOptions => {( backend => 'column',
word_length => 20,
filter => 'map { lc $_ if ($_ !~ /\^.*;/) }'
)},
};
bless $self, $class;
}
#-------------------------------------------------------------------
=head2 open ( )
Opens an existing DBIx::FullTextSearch index.
=cut
sub open {
my ($self) = @_;
$self->{_fts} = DBIx::FullTextSearch->open($self->getDbh, $self->getIndexName);
if (not defined $self->{_fts}) {
WebGUI::ErrorHandler::fatalError("IndexedSearch: Unable to open index.\n$DBIx::FullTextSearch::errstr");
return undef;
}
($self->{_docId}) = WebGUI::SQL->quickArray("select max(docId) from IndexedSearch_docInfo where indexName = ".quote($self->getIndexName));
$self->{_docId}++;
return $self->{_fts};
}
#-------------------------------------------------------------------
=head2 preview ( text , [ previewLength , query ] )
Returns a context preview in which words from a search query appear in the resulting documents.
The words are always in the middle of each of the sections.
=head3 text
The text to preview
=head3 previewLength
The maximum number of characters in each of the context sections. Defaults to 80.
A preview length of "0" means no preview,
while a negative preview length returns the complete text.
=head3 query
A query containing the words to highlight. Defaults to the last used $search->search query.
=cut
sub preview {
my ($self, $text, $previewLength, $query) = @_;
$previewLength = 80 if (not defined $previewLength);
return '' unless ($previewLength);
return $text if ($previewLength < 0);
my $query ||= $self->{_query};
if(($query =~ /^\s*\*\s*$/) or not $query) { # Query is '*' or empty.
$text = WebGUI::HTML::filter($text,'all');
$text =~ s/^(.{1,$previewLength})\s+.*$/$1/s;
} else {
my ($words, $wildcards) = $self->_queryToWords($query);
my $hl = new HTML::Highlight ( words => $words,
wildcards => $wildcards
);
my $preview = join('... ',@{$hl->preview_context($text, $previewLength)});
if ($preview) {
$text = $preview;
} else {
$text = WebGUI::HTML::filter($text,'all');
$text =~ s/^(.{1,$previewLength})\s+.*$/$1/s;
}
}
$text =~ s/^(\s|&nbsp;)+//;
$text =~ s/(\s|&nbsp;)+$//;
if($text ne '') {
$text = '<STRONG>... </STRONG>'.$text if ($text !~ /^[A-Z]+/); # ... broken up at the beginning
$text .='<STRONG> ...</STRONG>' if ($text !~ /\.$/); # broken up at the end ...
}
return $text;
}
#-------------------------------------------------------------------
=head2 recreate ( [ %options ] )
Like create, but first drops the existing index. Useful when rebuilding the index.
=head3 %options
Options to pass to WebGUI::IndexedSearch->create()
=cut
sub recreate {
my ($self, %options) = @_;
$self->{_fts} = DBIx::FullTextSearch->open($self->getDbh, $self->getIndexName);
if (defined $self->{_fts}) {
$self->{_fts}->drop;
}
$self->{_fts} = $self->create($self->getIndexName, $self->getDbh, %options);
WebGUI::SQL->write("delete from IndexedSearch_docInfo where indexName = ".quote($self->getIndexName));
return $self->{_fts};
}
#-------------------------------------------------------------------
=head2 search ( query, \%filter )
Returns an array reference of docId's of documents that match the query.
If the search has no results, undef is returned.
=head3 query
user input string. Will be parsed into can-include, must-include and must-not-include words and phrases.
Special case: when query is an asterisk (*), then no full text search is done, and results are returned
using \%filter.
Examples are:
+"this is a phrase" -koo +bar foo
(foo OR baz) AND (bar OR caz)
=head3 filter
A hash reference containing filter elements.
Example:
{
language => [ 1, 3 ],
namespace => [ 'Article', 'USS' ]
}
=cut
sub search {
my ($self, $query, $filter) = @_;
$self->{_query} = $query;
my $noFtsSearch = ($query =~ /^\s*\*\s*$/); # query = '*', no full text search
my @fts_docIds = $self->{_fts}->search($query) unless $noFtsSearch ;
if(@fts_docIds || $noFtsSearch) {
my $groups = quoteAndJoin($self->_getGroups);
my $docIds = quoteAndJoin(\@fts_docIds);
my $sql = "select docId from IndexedSearch_docInfo where indexName = ".quote($self->getIndexName);
$sql .= " and docId in ($docIds)" unless $noFtsSearch;
$sql .= " and page_groupIdView in ($groups)";
$sql .= " and wobject_special_groupIdView in ($groups)";
if ($session{setting}{wobjectPrivileges}) {
$sql .= " and wobject_groupIdView in ($groups)";
}
foreach my $filterElement (keys %{$filter}) {
$sql .= " AND $filterElement in (".quoteAndJoin($filter->{$filterElement}).")";
}
# Keep @fts_docIds list order
$sql .= " ORDER BY FIELD(docID,$docIds)" unless $noFtsSearch;
my $filteredDocIds = WebGUI::SQL->buildArrayRef($sql);
return $filteredDocIds if (ref $filteredDocIds eq 'ARRAY' and @{$filteredDocIds});
}
return undef;
}
#-------------------------------------------------------------------
=head2 _getGroups ( )
Returns an array reference containing all groupIds of groups the user is in.
=cut
sub _getGroups {
my @groups;
foreach my $groupId (WebGUI::SQL->buildArray("select groupId from groups")) {
push(@groups, $groupId) if (WebGUI::Grouping::isInGroup($groupId));
}
return \@groups;
}
#-------------------------------------------------------------------
sub DESTROY {
my $self=shift;
if (ref($self->{_fts})) {
$self->{_fts}->DESTROY();
}
}
1;