diff --git a/lib/WebGUI/Wobject/IndexedSearch.pm b/lib/WebGUI/Wobject/IndexedSearch.pm deleted file mode 100644 index d7ff7d5d4..000000000 --- a/lib/WebGUI/Wobject/IndexedSearch.pm +++ /dev/null @@ -1,596 +0,0 @@ -package WebGUI::Wobject::IndexedSearch; - -use strict; -use Time::HiRes; -use WebGUI::Wobject::IndexedSearch::Search; -use WebGUI::HTMLForm; -use WebGUI::HTML; -use WebGUI::Macro; -use WebGUI::International; -use WebGUI::Session; -use WebGUI::SQL; -use WebGUI::Wobject; -use Tie::IxHash; -use WebGUI::Utility; -use WebGUI::Paginator; -use WebGUI::Page; - -our @ISA = qw(WebGUI::Wobject); - -#------------------------------------------------------------------- -sub name { - return WebGUI::International::get(17,$_[0]->get("namespace")); -} - -#------------------------------------------------------------------- -sub new { - my $class = shift; - my $property = shift; - my $self = WebGUI::Wobject->new( - -useTemplate=>1, - -useMetaData=>1, - -properties=>$property, - -extendedProperties=>{ - indexName=>{ - defaultValue=>'default' - }, - searchRoot=>{ - fieldType=>'checkList', - defaultValue=>'any' - }, - forceSearchRoots=>{ - fieldType=>'yesNo', - defaultValue=>1 - }, - users=>{ - fieldType=>'selectList', - defaultValue=>'any' - }, - namespaces=>{ - fieldType=>'selectList', - defaultValue=>'any' - }, - languages=>{ - fieldType=>'selectList', - defaultValue=>'any' - }, - contentTypes=>{ - fieldType=>'selectList', - defaultValue=>'any' - }, - paginateAfter=>{ - defaultValue=>10 - }, - highlight=>{ - defaultValue=>1 - }, - previewLength=>{ - defaultValue=>130 - }, - highlight_1=>{ - defaultValue=>'#ffff66' - }, - highlight_2=>{ - defaultValue=>'#A0FFFF' - }, - highlight_3=>{ - defaultValue=>'#99ff99' - }, - highlight_4=>{ - defaultValue=>'#ff9999' - }, - highlight_5=>{ - defaultValue=>'#ff66ff' - }, - } - ); - bless $self, $class; -} - -#------------------------------------------------------------------- -sub uiLevel { - return 5; -} - -#------------------------------------------------------------------- -sub www_edit { - my $self = shift; - my (@data, %indexName); - - tie my %searchRoot, 'Tie::IxHash'; - - my $layout = WebGUI::HTMLForm->new; - my $properties = WebGUI::HTMLForm->new; - my $privileges = WebGUI::HTMLForm->new; - - # Unconditional read to catch intallation errors. - my $sth = WebGUI::SQL->unconditionalRead("select distinct(indexName), indexName from IndexedSearch_docInfo"); - unless ($sth->errorCode < 1) { - return "
" . WebGUI::International::get(1,$self->get("namespace")) . $sth->errorMessage."
"; - } - while (@data = $sth->array) { - $indexName{$data[0]} = $data[1]; - } - $sth->finish; - unless(%indexName) { - return "" . WebGUI::International::get(2,$self->get("namespace")) . - "
" . WebGUI::International::get(3,$self->get("namespace")) . "
"; - } - - # Index to use -# $properties->radioList( -name=>'indexName', -# -options=>\%indexName, -# -label=>WebGUI::International::get(5,$self->get("namespace")), -# -value=>$self->getValue("indexName"), -# -vertical=>1 -# ); - # NOTE: For now we're limiting each site to one index. Will allow more in the future. - $properties->hidden( - -name=>"indexName", - -value=>"IndexedSearch_default" - ); - - # Page roots - %searchRoot = ( 'any'=>WebGUI::International::get(15,$self->get("namespace")), - $session{page}{pageId}=>WebGUI::International::get(4,$self->get("namespace")), - WebGUI::SQL->buildHash("select pageId,title from page where parentId='0' and isSystem<>1 order by title") - ); - $properties->checkList ( -name=>'searchRoot', - -options=>\%searchRoot, - -label=>WebGUI::International::get(6,$self->get("namespace")), - -value=>[ split("\n", $self->getValue("searchRoot")) ], - -multiple=>1, - -vertical=>1, - ); - $properties->yesNo( - -name=>'forceSearchRoots', - -label=>WebGUI::International::get('edit-forceSearchRoots-label',$self->get("namespace")), - -value=>$self->getValue("forceSearchRoots") - ); - # Content of specific user - $properties->selectList ( -name=>'users', - -options=>$self->_getUsers(), - -label=>WebGUI::International::get(7,$self->get("namespace")), - -value=>[ split("\n", $self->getValue("users")) ], - -multiple=>1, - -size=>5 - ); - - # Content in specific namespaces - $properties->selectList ( -name=>'namespaces', - -options=>$self->_getNamespaces, - -label=>WebGUI::International::get(8,$self->get("namespace")), - -value=>[ split("\n", $self->getValue("namespaces")) ], - -multiple=>1, - -size=>5 - ); - - # Content in specific language - $properties->checkList ( -name=>'languages', - -options=>$self->_getLanguages(), - -label=>WebGUI::International::get(9,$self->get("namespace")), - -value=>[ split("\n", $self->getValue("languages")) ], - -multiple=>1, - ); - - # Only specific content types - my $contentTypes = $self->_getContentTypes(); - delete $contentTypes->{content}; - $properties->checkList ( -name=>'contentTypes', - -options=>$contentTypes, - -label=>WebGUI::International::get(10,$self->get("namespace")), - -value=>[ split("\n", $self->getValue("contentTypes")) ], - -multiple=>1, - -vertical=>1, - ); - $layout->integer ( -name=>'paginateAfter', - -label=>WebGUI::International::get(11,$self->get("namespace")), - -value=>$self->getValue("paginateAfter"), - ); - $layout->integer ( -name=>'previewLength', - -label=>WebGUI::International::get(12,$self->get("namespace")), - -value=>$self->getValue("previewLength"), - ); - $layout->yesNo ( -name=>'highlight', - -label=>WebGUI::International::get(13,$self->get("namespace")), - -value=>$self->getValue("highlight"), - ); - - # Color picker for highlight colors - $layout->raw ( -value=>' - - ' - ); - for (1..5) { - my $highlight = "highlight_$_"; - $layout->text ( -name=>$highlight, - -label=>WebGUI::International::get(14,$self->get("namespace")) ." $_:", - -size=>7, - -value=>$self->getValue($highlight), - -subtext=>qq{ - Pick} - ); - } - - return $self->SUPER::www_edit( - -properties=>$properties->printRowsOnly, - -layout=>$layout->printRowsOnly, - -privileges=>$privileges->printRowsOnly, - -heading=>"Edit Search", - -helpId=>"search add/edit" - ); - -} - -#------------------------------------------------------------------- -sub www_view { - my $self = shift; - $self->logView() if ($session{setting}{passiveProfilingEnabled}); - my (%var, @resultsLoop); - - # Do some query handling - $var{exactPhrase} = $session{form}{exactPhrase}; - $var{allWords} = $session{form}{allWords}; - $var{atLeastOne} = $session{form}{atLeastOne}; - $var{without} = $session{form}{without}; - $var{query} = $session{form}{query}; - $var{query} .= qq{ +"$var{exactPhrase}"} if ($var{exactPhrase}); - $var{query} .= " ".join(" ",map("+".$_,split(/\s+/,$var{allWords}))) if ($var{allWords}); - $var{query} .= qq{ $var{atLeastOne}} if ($var{atLeastOne}); - $var{query} .= " ".join(" ",map("-".$_,split(/\s+/,$var{without}))) if ($var{without}); - - # Remove macro's from query - $var{query} = WebGUI::Macro::negate($var{query}); - - # Set some standard vars - $var{submit} = WebGUI::Form::submit({value=>WebGUI::International::get(16, $self->get("namespace"))}); - $var{"int.search"} = WebGUI::International::get(16,$self->get("namespace")); - $var{wid} = $self->get("wobjectId"); - $var{numberOfResults} = '0'; - $var{"select_".$self->getValue("paginateAfter")} = "selected"; - - # Do the search - my $startTime = Time::HiRes::time(); - my $filter = $self->_buildFilter; - my $search = WebGUI::Wobject::IndexedSearch::Search->new($self->getValue('indexName')); - $search->open; - my $results = $search->search($var{query},$filter); - $var{duration} = Time::HiRes::time() - $startTime; - $var{duration} = sprintf("%.3f", $var{duration}); # Duration rounded to 3 decimal places - # Let's see if the search returned any results - if (defined ($results)) { - $var{numberOfResults} = scalar(@$results); - - # Deal with pagination - my $url = "wid=".$self->get("wobjectId")."&func=view&query=".WebGUI::URL::escape($var{query}); - map {$url .= "&users=".WebGUI::URL::escape($_)} $session{cgi}->param('users'); - map {$url .= "&namespaces=".WebGUI::URL::escape($_)} $session{cgi}->param('namespaces'); - map {$url .= "&languages=".WebGUI::URL::escape($_)} $session{cgi}->param('languages'); - map {$url .= "&contentTypes=".WebGUI::URL::escape($_)} $session{cgi}->param('contentTypes'); - $url .= "&paginateAfter=".$self->getValue("paginateAfter"); - my $p = WebGUI::Paginator->new(WebGUI::URL::page($url), $self->getValue("paginateAfter")); - $p->setDataByArrayRef($results); - $var{startNr} = 1; - if($session{form}{pn}) { - $var{startNr} = (($session{form}{pn} - 1) * $self->getValue("paginateAfter")) + 1; - } - - my @highlightColors = map { $self->getValue("highlight_$_") } (1..5); - $var{queryHighlighted} = $search->highlight($var{query}, undef, \@highlightColors); - - # Get result details for this page - if($p->getPageNumber > $p->getNumberOfPages) { - $var{numberOfResults} = 0; - $var{resultsLoop} = []; - } else { - $var{resultsLoop} = $search->getDetails($p->getPageData, - highlightColors => \@highlightColors, - previewLength => $self->getValue('previewLength'), - highlight => $self->getValue('highlight') - ); - # Pagination variables - $var{endNr} = $var{startNr}+(scalar(@{$var{resultsLoop}}))-1; - $p->appendTemplateVars(\%var); - } - } - - # Create a loop with namespaces - $var{namespaces} = []; - my $namespaces = $self->_getNamespaces('restricted'); - foreach(keys %$namespaces) { - my $selected = 0; - if (scalar $session{cgi}->param('namespaces')) { - $selected = isIn($_, $session{cgi}->param('namespaces')); - } else { - $selected = ($session{form}{namespaces} =~ /$_/); - } - push(@{$var{namespaces}}, { value => $_, name => $namespaces->{$_}, selected => $selected }); - } - - # Create a loop with contentTypes - # - # And while we are busy we also create a loop with simplified contentTypes - # This means: wobject, page, wobjectDetail are masked in one option: content - - $var{contentTypes} = []; - $var{contentTypesSimple} = []; - my $contentTypes = $self->_getContentTypes('restricted'); - foreach(keys %$contentTypes) { - my $selected = 0; - if (scalar $session{cgi}->param('contentTypes')) { - $selected = isIn($_, $session{cgi}->param('contentTypes')); - } else { - $selected = ($session{form}{contentTypes} =~ /$_/); - } - unless(/^content$/) { # No shortcut in the detailed contentType list - push(@{$var{contentTypes}}, { value => $_, - name => $contentTypes->{$_}, - selected => $selected, - 'type_'.$_ => 1 }); - } - unless(/^page|wobject|wobjectDetail$/) { # No details in the simple contentType list - push(@{$var{contentTypesSimple}}, { value => $_, - name => $contentTypes->{$_}, - selected => $selected, - 'type_'.$_ => 1 }); - } - } - - # Create a loop with users - $var{users} = []; - my $users = $self->_getUsers('restricted'); - foreach(keys %$users) { - my $selected = 0; - if (scalar $session{cgi}->param('users')) { - $selected = isIn($_, $session{cgi}->param('users')); - } else { - $selected = ($session{form}{users} =~ /$_/); - } - push(@{$var{users}}, { value => $_, name => $users->{$_}, selected => $selected }); - } - - # Create a loop with languages - $var{languages} = []; - my $languages = $self->_getLanguages('restricted'); - foreach(keys %$languages) { - my $selected = 0; - if (scalar $session{cgi}->param('languages')) { - $selected = isIn($_, $session{cgi}->param('languages')); - } else { - $selected = ($session{form}{languages} =~ /$_/); - } - push(@{$var{languages}}, { value => $_, name => $languages->{$_}, selected => $selected }); - } - - # Create a loop with searchable page roots - my $rootData; - my @roots = split(/\n/, $self->get('searchRoot')); - my %checked = map {$_=>1} $session{cgi}->param("searchRoot"); - if (isIn('any', @roots)) { - foreach $rootData (WebGUI::Page->getAnonymousRoot->daughters) { - push (@{$var{searchRoots}}, { - value => $rootData->{'pageId'}, - menuTitle => $rootData->{'menuTitle'}, - title => $rootData->{'title'}, - urlizedTitle => $rootData->{'urlizedTitle'}, - checked => $checked{$rootData->{'pageId'}}, - }); - $var{"rootPage.".$rootData->{'urlizedTitle'}.".id"} = $rootData->{'pageId'}; - $var{"rootPage.".$rootData->{'urlizedTitle'}.".checked"} = $checked{$rootData->{'pageId'}}; - } - } else { - foreach (@roots) { - $rootData = WebGUI::Page->new($_); - push (@{$var{searchRoots}}, { - value => $rootData->get('pageId'), - menuTitle => $rootData->get('menuTitle'), - title => $rootData->get('title'), - urlizedTitle => $rootData->get('urlizedTitle'), - checked => $checked{$rootData->get('pageId')}, - }); - $var{"rootPage.".$rootData->get('urlizedTitle').".id"} = $rootData->get('pageId'); - $var{"rootPage.".$rootData->get('urlizedTitle').".checked"} = $checked{$rootData->get('pageId')}; - } - } - $var{"anyRootPage.checked"} = $checked{'any'}; - # close the search - $search->close; - - return $self->processTemplate($self->get("templateId"),\%var); -} - -#------------------------------------------------------------------- -sub _buildPageList { - my ($self, @userSpecifiedRoots, @roots, @allowedRoots, $pageId, @pages); - $self = shift; - - @userSpecifiedRoots = $session{cgi}->param("searchRoot"); - - if ((scalar(@userSpecifiedRoots) == 0) - || ($self->getValue("forceSearchRoots")) - || (isIn('any', @userSpecifiedRoots)) - ) { - @roots = split(/\n+/i, $self->get("searchRoot")); - } else { - @allowedRoots = split(/\n+/, $self->get("searchRoot")); - - foreach (@userSpecifiedRoots) { - push (@roots, $_) if (isIn($_, @allowedRoots)); - } - } - foreach $pageId (@roots) { - WebGUI::Page->new($pageId)->traversePreOrder( - sub { - push(@pages, $_[0]->get('pageId')); - } - ); - } - - return [ @pages ]; -} - -#------------------------------------------------------------------- -sub _buildFilter { - my $self = shift; - my %filter = (); - - # pages - if($self->get('searchRoot') !~ /any/i) { - $filter{pageId} = $self->_buildPageList; - } - - # languages - if($session{form}{languages} && ! isIn('any', $session{cgi}->param('languages'))) { - $filter{languageId} = [ $session{cgi}->param('languages') ]; - } elsif ($self->getValue('languages') !~ /any/i) { - $filter{languageId} = [ split(/\n/, $self->getValue('languages')) ]; - } - push(@{$filter{languageId}}, '0') if (exists $filter{languageId}); # Some content (i.e. profiles) - # don't have a language. They - # must be found as well. - - # content-types - if($session{form}{contentTypes} && ! isIn('any', $session{cgi}->param('contentTypes'))) { - $filter{contentType} = [ $session{cgi}->param('contentTypes') ]; - - # contentType "content" is a shortcut for "page", "wobject" and "wobjectDetail" - if (isIn('content', $session{cgi}->param('contentTypes'))) { - push(@{$filter{contentType}}, qw/page wobject wobjectDetail/); - } - } elsif ($self->getValue('contentTypes') !~ /any/i) { - $filter{contentType} = [ split(/\n/, $self->getValue('contentTypes')) ]; - } - - # users - if($session{form}{users} && ! isIn('any', $session{cgi}->param('users'))) { - $filter{ownerId} = []; - foreach my $user ($session{cgi}->param('users')) { - if ($user =~ /\D/) { - $user =~ s/\*/%/g; - ($user) = WebGUI::SQL->buildArray("select userId from users where username like ".quote($user)); - } - push(@{$filter{ownerId}}, quote($user)) if ($user =~ /^\d+$/); - } - } elsif ($self->getValue('users') !~ /any/i) { - $filter{ownerId} = [ split(/\n/, $self->getValue('users')) ]; - } - - # namespaces - if($session{form}{namespaces} && ! isIn('any', $session{cgi}->param('namespaces'))) { - $filter{namespace} = [ $session{cgi}->param('namespaces') ]; - } elsif ($self->getValue('namespaces') !~ /any/i) { - $filter{namespace} = [ split(/\n/, $self->getValue('namespaces')) ]; - } - - # delete $filter{ownerId} if it is an empty array reference - if(exists($filter{ownerId})) { - delete $filter{ownerId} unless (scalar(@{$filter{ownerId}})); - } - return \%filter; -} - -#------------------------------------------------------------------- -sub _getLanguages { - my ($self, $restricted) = @_; - my $international = WebGUI::SQL->buildHashRef("select distinct(languageId) from IndexedSearch_docInfo"); - tie my %languages, 'Tie::IxHash'; - if ($restricted and $self->get('languages') !~ /any/i) { - $languages{any} = WebGUI::International::get(24,$self->get("namespace")); - foreach (split/\n/, $self->get('languages')) { - $languages{$_} = $international->{$_}; - } - } else { - %languages = ('any' => WebGUI::International::get(24,$self->get("namespace")) , %$international); - } - return \%languages; -} - -#------------------------------------------------------------------- -sub _getNamespaces { - my ($self, $restricted) = @_; - my %international; - foreach my $wobject (@{$session{config}{wobjects}}){ - my $cmd = "WebGUI::Wobject::".$wobject; - my $load = 'use '.$cmd; - eval($load); - WebGUI::ErrorHandler::warn("Wobject failed to compile: $cmd.".$@) if($@); - my $w = $cmd->new({namespace=>$wobject, wobjectId=>'new'}); - $international{$wobject} = $w->name; - } - tie my %namespaces, 'Tie::IxHash'; - if ($restricted and $self->get('namespaces') !~ /any/i) { - $namespaces{any} = WebGUI::International::get(18,$self->get("namespace")); - foreach (split/\n/, $self->get('namespaces')) { - $namespaces{$_} = $international{$_} || ucfirst($_); - } - } else { - $namespaces{any} = WebGUI::International::get(18,$self->get("namespace")); - foreach (WebGUI::SQL->buildArray("select distinct(namespace) from IndexedSearch_docInfo order by namespace")) { - $namespaces{$_} = $international{$_} ||ucfirst($_); - } - } - return \%namespaces; -} - -#------------------------------------------------------------------- -sub _getContentTypes { - my ($self, $restricted) = @_; - my %international = ( 'page' => WebGUI::International::get(2), - 'wobject' => WebGUI::International::get(19,$self->get("namespace")), - 'wobjectDetail' => WebGUI::International::get(20,$self->get("namespace")), - 'content' => WebGUI::International::get(21,$self->get("namespace")), - 'discussion' => WebGUI::International::get(892), - 'profile' => WebGUI::International::get(22,$self->get("namespace")), - 'any' => WebGUI::International::get(23,$self->get("namespace")), - ); - tie my %contentTypes, 'Tie::IxHash'; - if ($restricted and $self->get('contentTypes') !~ /any/i) { - $contentTypes{any} = $international{any}; - $contentTypes{content} = $international{content}; # shortcut for page, wobject and wobjectDetail - foreach (split/\n/, $self->get('contentTypes')) { - $contentTypes{$_} = $international{$_}; - } - } else { - %contentTypes = ( 'any' => $international{any}, - 'content' => $international{content}, # shortcut for page, wobject and wobjectDetail - ); - foreach (WebGUI::SQL->buildArray("select distinct(contentType) from IndexedSearch_docInfo order by contentType")) { - $contentTypes{$_} = $international{$_} || ucfirst($_); - } - } - return \%contentTypes; -} - -#------------------------------------------------------------------- -sub _getSearchablePages { - my $searchRoot = shift; - my %pages; - my $sth = WebGUI::SQL->read("select pageId from page where parentId = ".quote($searchRoot)); - while (my %data = $sth->hash) { - $pages{$data{pageId}} = 1; - %pages = (%pages, _getSearchablePages($data{pageId}) ); - } - return %pages; -} - -#------------------------------------------------------------------- -sub _getUsers { - my ($self, $restricted) = @_; - tie my %users, 'Tie::IxHash'; - if ($restricted and $self->get('users') !~ /any/i) { - $users{any} = WebGUI::International::get(25,$self->get("namespace")); - foreach (split/\n/, $self->get('users')) { - $users{$_} = $_; - } - } else { - %users = ( 'any' => WebGUI::International::get(25,$self->get("namespace")), - WebGUI::SQL->buildHash("select userId, username from users order by username") - ); - } - return \%users; -} - -1; diff --git a/lib/WebGUI/Wobject/IndexedSearch/Search.pm b/lib/WebGUI/Wobject/IndexedSearch/Search.pm deleted file mode 100644 index 79cbb4f82..000000000 --- a/lib/WebGUI/Wobject/IndexedSearch/Search.pm +++ /dev/null @@ -1,667 +0,0 @@ -package WebGUI::Wobject::IndexedSearch::Search; - -=head1 LEGAL - - ------------------------------------------------------------------- - WebGUI is Copyright 2001-2005 Plain Black Corporation. - ------------------------------------------------------------------- - Please read the legal notices (docs/legal.txt) and the license - (docs/license.txt) that came with this distribution before using - this software. - ------------------------------------------------------------------- - http://www.plainblack.com info@plainblack.com - ------------------------------------------------------------------- - -=cut - -use strict; -use DBIx::FullTextSearch; -use WebGUI::DateTime; -use WebGUI::SQL; -use WebGUI::URL; -use WebGUI::HTML; -use WebGUI::ErrorHandler; -use WebGUI::Grouping; -use DBIx::FullTextSearch::StopList; -use WebGUI::Utility; -use WebGUI::Session; -use WebGUI::Privilege; -use HTML::Highlight; -use WebGUI::Macro; - -=head1 NAME - -Package WebGUI::Wobject::IndexedSearch::Search - -=head1 DESCRIPTION - -Search implementation for WebGUI. - -=head1 SYNOPSIS - - use WebGUI::Wobject::IndexedSearch::Search; - my $search = WebGUI::Wobject::IndexedSearch::Search->new(); - $search->indexDocument( { text => 'Index this text', - location => 'http://www.mysite.com/index.pl/faq#45', - languageId => 3, - namespace => 'FAQ' - }); - my $hits = search->search("+foo -bar koo",{ namespace = ['Article', 'FAQ']} ); - - $search->close; - - -=head1 SEE ALSO - -This package is an extension to DBIx::FullTextSearch and HTML::Highlight. -See that packages for documentation of their methods. - -=head1 METHODS - -These methods are available from this package: - -=cut - -#------------------------------------------------------------------- -sub _recurseCrumbTrail { - my ($sth, %data, $output); - tie %data, 'Tie::CPHash'; - %data = WebGUI::SQL->quickHash("select pageId,parentId,menuTitle,urlizedTitle from page where pageId=".quote($_[0])); - if ($data{pageId}) { - $output .= _recurseCrumbTrail($data{parentId}); - } - if ($data{pageId} ne "0") { - $output .= ''.$data{menuTitle}.' > '; - } - return $output; -} - -#------------------------------------------------------------------- - -=head2 close ( ) - -Closes the DBIx::FullTextSearch session. - -=cut - -sub close { - my $self=shift; - $self->DESTROY(); -} - -#------------------------------------------------------------------- - -=head2 create ( [ %options ] ) - -Creates a new DBIx::FullTextSearch index. - -=head3 %options - -Options to pass to DBIx::FullTextSearch. -The default options that are used are: - -( backend => column, word_length => 20, stoplist => undef ) - -Please refer to the DBIx::FullTextSearch documentation for a complete list of options. - -=cut - -sub create { - my ($self, %options) = @_; - %options = (%{$self->{_createOptions}}, %options); - if($options{stemmer}) { - eval "use Lingua::Stem"; - if ($@) { - WebGUI::ErrorHandler::warn("IndexedSearch: Can't use stemmer: $@"); - delete $options{stemmer}; - } - } - if($options{stoplist}) { - if(not $self->existsTable($self->getIndexName."_".$options{stoplist}."_stoplist")) { - DBIx::FullTextSearch::StopList->create_default($self->getDbh, $self->getIndexName."_".$options{stoplist}, $options{stoplist}); - } - $options{stoplist} = $self->getIndexName."_".$options{stoplist}; - } - $self->{_fts} = DBIx::FullTextSearch->create($self->getDbh, $self->getIndexName, %options); - if (not defined $self->{_fts}) { - WebGUI::ErrorHandler::fatalError("IndexedSearch: Unable to create index.\n$DBIx::FullTextSearch::errstr"); - return undef; - } - $self->{_docId} = 1; - return $self->{_fts}; -} - -#------------------------------------------------------------------- - -=head2 existsTable ( tableName ) - -Returns true if tableName exists in database. - -=head3 tableName - -The name of table. - -=cut - -sub existsTable { - my ($self, $table) = @_; - return isIn($table, WebGUI::SQL->buildArray("show tables")); -} - -#------------------------------------------------------------------- - -=head2 getDetails ( docIdList , [ %options ] ) - -Returns an array reference containing details for each docId. - -=head3 docIdList - -An array reference containing docIds. - -=head3 previewLength - -The maximum number of characters in each of the context sections. Defaults to "80". - -=head3 highlight - -A boolean indicating whether or not to enable highlight. Defaults to "1". - -=head3 highlightColors - -A reference to an array of CSS color identificators. - -=head3 - -=cut - -sub getDetails { - my ($self, $docIdList, %options) = @_; - my $docIds = quoteAndJoin($docIdList); - my (@searchDetails, %namespace); - foreach my $wobject (@{$session{config}{wobjects}}){ - my $cmd = "WebGUI::Wobject::".$wobject; - my $load = 'use '.$cmd; - eval($load); - WebGUI::ErrorHandler::warn("Wobject failed to compile: $cmd.".$@) if($@); - my $w = $cmd->new({namespace=>$wobject, wobjectId=>'new'}); - $namespace{$wobject} = $w->name; - } - my $sql = "select * from IndexedSearch_docInfo where docId in ($docIds) and indexName = ".quote($self->getIndexName) ; - $sql .= " ORDER BY FIELD(docId, $docIds)"; # Maintain $docIdList order - my $sth = WebGUI::SQL->read($sql); - while (my %data = $sth->hash) { - $data{namespace} = $namespace{$data{namespace}} || ucfirst($data{namespace}); - if ($data{ownerId}) { - ($data{username}) = WebGUI::SQL->quickArray("select username from users where userId = ".quote($data{ownerId})); - $data{userProfile} = WebGUI::URL::page("op=viewProfile&uid=$data{ownerId}"); - } - if ($data{bodyShortcut} =~ /^\s*select /i) { - $data{body} = (WebGUI::SQL->quickArray($data{bodyShortcut}))[0]; - } else { - $data{body} = $data{bodyShortcut}; - } - if ($data{headerShortcut} =~ /^\s*select /i) { - $data{header} = (WebGUI::SQL->quickArray($data{headerShortcut}))[0]; - } else { - $data{header} = $data{headerShortcut}; - } - delete($data{bodyShortcut}); - delete($data{headerShortcut}); - if($data{body}) { - $data{body} = WebGUI::Macro::filter($data{body}); - $data{body} = WebGUI::HTML::filter($data{body},'all'); - $data{body} = $self->preview($data{body}, $options{previewLength}); - $data{body} = $self->highlight($data{body},undef, $options{highlightColors}) if ($options{highlight}); - } - if($data{header}) { - $data{header} = WebGUI::Macro::filter($data{header}); - $data{header} = WebGUI::HTML::filter($data{header},'all'); - $data{header} = $self->highlight($data{header},undef, $options{highlightColors}) if ($options{highlight}); - $data{location} = WebGUI::URL::gateway($data{location}); - } - $data{crumbTrail} = _recurseCrumbTrail($data{pageId}); - $data{crumbTrail} =~ s/\s*\>\s*$//; - push(@searchDetails, \%data); - } - $sth->finish; - return \@searchDetails; -} - -#------------------------------------------------------------------- - -=head2 getDbh ( ) - -Returns the object's database handler. - -=cut - -sub getDbh { - my $self = shift; - return $self->{_dbh}; -} - -#------------------------------------------------------------------- - -=head2 getDocId ( ) - -Returns the next docId for this object. - -=cut - -sub getDocId { - my $self=shift; - return $self->{_docId}; -} - -#------------------------------------------------------------------- - -=head2 getIndexName ( ) - -Returns the full index name of this object. - -=cut - -sub getIndexName { - my $self = shift; - return $self->{_indexName}; -} - -#------------------------------------------------------------------- - -=head2 _queryToWords ( [ query ] ) - -Converts a DBIx::FullTextSearch query to (\@Words, \@Wildcards) suitable to pass to HTML::Highlight - -=cut - -sub _queryToWords { - my ($self, $query) = @_; - my $query ||= $self->{_query}; - - # Return the processed words / wildcards from memory if it's cached. - if ($self->{$query."words"} && $self->{$query."wildcards"}) { - return ($self->{$query."words"}, $self->{$query."wildcards"}); - } - - # deal with quotes - my $inQuote=0; - my (@words, @wildcards); - foreach (split(/\"/, $query)) { - if($inQuote == 0) { - foreach (split(/\s+/, $_)) { - next if (/^AND$/i); # boolean AND - next if (/^OR$/i); # boolean OR - next if (/^NOT$/i); # boolean OR - next if (/^\-/); # exclude word - next if (/^.{0,1}$/); # at least 2 characters - if (/\*/) { - push(@wildcards, '%'); # match any character - } else { - push(@wildcards, '*'); # Also match plural of word - } - s/['"()+*]+//g; # remove query operators and quotes - push(@words, $_); - } - } else { - my $phrase = $_; - push(@words, qq/$phrase/); - push(@wildcards, undef); # Exact match - } - $inQuote = ++$inQuote % 2; - } - # Store words / wildcards in memory - $self->{$query."words"} = \@words; - $self->{$query."wildcards"} = \@wildcards; - - return (\@words, \@wildcards); -} - -#------------------------------------------------------------------- - -=head2 highlight ( text [ , query , colors ] ) - -highlight words or patterns in HTML documents. - -=head3 text - -The text to highlight - -=head3 query - -A query containing the words to highlight. Defaults to the last used $search->search query. -Special case: When query contains only an asterisk '*', no highlighting is applied. - -=head3 colors - -A reference to an array of CSS color identificators. - -=cut - -sub highlight { - my ($self, $text, $query, $colors) = @_; - my $query ||= $self->{_query}; - return $text if ($query =~ /^\s*\*\s*$/); # query = '*', no highlight - my ($words, $wildcards) = $self->_queryToWords($query); - my $hl = new HTML::Highlight ( words => $words, - wildcards => $wildcards, - colors => $colors - ); - return $hl->highlight($text); -} - -#------------------------------------------------------------------- - -=head2 indexDocument ( hashRef ) - -Adds a document to the index. - -This method doesn't store the document itself. Instead, it stores information about words -in the document in such a structured way that it makes easy and fast to look up what -documents contain certain words and return id's of the documents. - -=head3 text - -The text to index. - -=head3 location - -The location of the document. Most likely an URL. - -=head3 contentType - -The content type of this document. - -=head3 docId - -The unique Id of this document. Defaults to the next empty docId. - -=head3 pageId - -The pageId of the page on which this document resides. Defaults to 0. - -=head3 wobjectId - -The wobjectID of the wobject that holds this document. Defaults to 0. - -=head3 ownerId - -The ownerId of the document. Defaults to 3. - -=head3 languageId - -The languageId of this document. Defaults to undef. - -=head3 namespace - -The namespace of this document. Defaults to 'WebGUI'. - -=head3 page_groupIdView - -Id of group authorized to view this page. Defaults to '7' (everyone) - -=head3 wobject_groupIdView - -Id of group authorized to view this wobject. Defaults to '7' (everyone) - -=head3 wobject_special_groupIdView - -Id of group authorized to view the details of this wobject. - -=head3 headerShortcut - -An sql statement that returns the header (title, question, subject, name, whatever) -of this document. - -=head3 bodyShortcut - -An sql statement that returns the body (description, answer, message, whatever) -of this document. - -=cut - -sub indexDocument { - my ($self, $document) = @_; - $self->{_fts}->index_document($document->{docId} || $self->{_docId}, $document->{text}); - my $docId = ($document->{docId} || $self->{_docId}); - WebGUI::SQL->write("insert into IndexedSearch_docInfo ( docId, - indexName, - pageId, - wobjectId, - languageId, - namespace, - location, - page_groupIdView, - wobject_groupIdView, - wobject_special_groupIdView, - headerShortcut, - bodyShortcut, - contentType, - ownerId, - dateIndexed ) - values ( ". - quote($docId).", ". - quote($self->getIndexName).", ". - quote($document->{pageId} || 0).", ". - quote($document->{wobjectId} || 0).", ". - quote($document->{languageId}).", ". - quote($document->{namespace} || 'WebGUI')." , ". - quote($document->{location}).", ". - quote($document->{page_groupIdView} || 7).", ". - quote($document->{wobject_groupIdView} || 7).", ". - quote($document->{wobject_special_groupIdView} || 7).", ". - quote($document->{headerShortcut})." ,". - quote($document->{bodyShortcut})." ,". - quote($document->{contentType})." ,". - quote($document->{ownerId} || 3).", - ".WebGUI::DateTime::time()." )" - ); - $self->{_docId}++; -} - -#------------------------------------------------------------------- - -=head2 new ( [ indexName , dbh ] ) - -Constructor. - -=head3 indexName - -The name of the index to open. Defaults to 'default'. - -=head3 $dbh - -Database handler to use. Defaults to $WebGUI::Session::session{dbh}. - -=cut - -sub new { - my ($class, $indexName, $dbh) = @_; - $indexName = $indexName || 'default'; - my $self = { _indexName => $indexName, - _dbh => $dbh || $WebGUI::Session::session{dbh}, - _createOptions => {( backend => 'column', - word_length => 20, - filter => 'map { lc $_ if ($_ !~ /\^.*;/) }' - )}, - }; - bless $self, $class; -} - -#------------------------------------------------------------------- - -=head2 open ( ) - -Opens an existing DBIx::FullTextSearch index. - -=cut - -sub open { - my ($self) = @_; - $self->{_fts} = DBIx::FullTextSearch->open($self->getDbh, $self->getIndexName); - if (not defined $self->{_fts}) { - WebGUI::ErrorHandler::fatalError("IndexedSearch: Unable to open index.\n$DBIx::FullTextSearch::errstr"); - return undef; - } - ($self->{_docId}) = WebGUI::SQL->quickArray("select max(docId) from IndexedSearch_docInfo where indexName = ".quote($self->getIndexName)); - $self->{_docId}++; - return $self->{_fts}; -} - -#------------------------------------------------------------------- - -=head2 preview ( text , [ previewLength , query ] ) - -Returns a context preview in which words from a search query appear in the resulting documents. -The words are always in the middle of each of the sections. - -=head3 text - -The text to preview - -=head3 previewLength - -The maximum number of characters in each of the context sections. Defaults to 80. -A preview length of "0" means no preview, -while a negative preview length returns the complete text. - -=head3 query - -A query containing the words to highlight. Defaults to the last used $search->search query. - -=cut - -sub preview { - my ($self, $text, $previewLength, $query) = @_; - $previewLength = 80 if (not defined $previewLength); - return '' unless ($previewLength); - return $text if ($previewLength < 0); - my $query ||= $self->{_query}; - if(($query =~ /^\s*\*\s*$/) or not $query) { # Query is '*' or empty. - $text = WebGUI::HTML::filter($text,'all'); - $text =~ s/^(.{1,$previewLength})\s+.*$/$1/s; - } else { - my ($words, $wildcards) = $self->_queryToWords($query); - my $hl = new HTML::Highlight ( words => $words, - wildcards => $wildcards - ); - my $preview = join('... ',@{$hl->preview_context($text, $previewLength)}); - if ($preview) { - $text = $preview; - } else { - $text = WebGUI::HTML::filter($text,'all'); - $text =~ s/^(.{1,$previewLength})\s+.*$/$1/s; - } - } - $text =~ s/^(\s| )+//; - $text =~ s/(\s| )+$//; - if($text ne '') { - $text = '... '.$text if ($text !~ /^[A-Z]+/); # ... broken up at the beginning - $text .=' ...' if ($text !~ /\.$/); # broken up at the end ... - } - return $text; -} - -#------------------------------------------------------------------- - -=head2 recreate ( [ %options ] ) - -Like create, but first drops the existing index. Useful when rebuilding the index. - -=head3 %options - -Options to pass to WebGUI::IndexedSearch->create() - -=cut - -sub recreate { - my ($self, %options) = @_; - $self->{_fts} = DBIx::FullTextSearch->open($self->getDbh, $self->getIndexName); - if (defined $self->{_fts}) { - $self->{_fts}->drop; - } - $self->{_fts} = $self->create($self->getIndexName, $self->getDbh, %options); - WebGUI::SQL->write("delete from IndexedSearch_docInfo where indexName = ".quote($self->getIndexName)); - return $self->{_fts}; -} - -#------------------------------------------------------------------- - -=head2 search ( query, \%filter ) - -Returns an array reference of docId's of documents that match the query. -If the search has no results, undef is returned. - -=head3 query - -user input string. Will be parsed into can-include, must-include and must-not-include words and phrases. -Special case: when query is an asterisk (*), then no full text search is done, and results are returned -using \%filter. - -Examples are: - +"this is a phrase" -koo +bar foo - (foo OR baz) AND (bar OR caz) - -=head3 filter - -A hash reference containing filter elements. - -Example: - { - language => [ 1, 3 ], - namespace => [ 'Article', 'USS' ] - } - -=cut - -sub search { - my ($self, $query, $filter) = @_; - $self->{_query} = $query; - my $noFtsSearch = ($query =~ /^\s*\*\s*$/); # query = '*', no full text search - my @fts_docIds = $self->{_fts}->search($query) unless $noFtsSearch ; - if(@fts_docIds || $noFtsSearch) { - my $groups = quoteAndJoin($self->_getGroups); - my $docIds = quoteAndJoin(\@fts_docIds); - my $sql = "select docId from IndexedSearch_docInfo where indexName = ".quote($self->getIndexName); - $sql .= " and docId in ($docIds)" unless $noFtsSearch; - $sql .= " and page_groupIdView in ($groups)"; - $sql .= " and wobject_special_groupIdView in ($groups)"; - if ($session{setting}{wobjectPrivileges}) { - $sql .= " and wobject_groupIdView in ($groups)"; - } - foreach my $filterElement (keys %{$filter}) { - $sql .= " AND $filterElement in (".quoteAndJoin($filter->{$filterElement}).")"; - } - # Keep @fts_docIds list order - $sql .= " ORDER BY FIELD(docID,$docIds)" unless $noFtsSearch; - my $filteredDocIds = WebGUI::SQL->buildArrayRef($sql); - return $filteredDocIds if (ref $filteredDocIds eq 'ARRAY' and @{$filteredDocIds}); - } - return undef; -} - -#------------------------------------------------------------------- - -=head2 _getGroups ( ) - -Returns an array reference containing all groupIds of groups the user is in. - -=cut - -sub _getGroups { - my @groups; - foreach my $groupId (WebGUI::SQL->buildArray("select groupId from groups")) { - push(@groups, $groupId) if (WebGUI::Grouping::isInGroup($groupId)); - } - return \@groups; -} - -#------------------------------------------------------------------- -sub DESTROY { - my $self=shift; - if (ref($self->{_fts})) { - $self->{_fts}->DESTROY(); - } -} - -1;