diff --git a/lib/WebGUI/Asset/Wobject/IndexedSearch.pm b/lib/WebGUI/Asset/Wobject/IndexedSearch.pm new file mode 100644 index 000000000..c38bc52a7 --- /dev/null +++ b/lib/WebGUI/Asset/Wobject/IndexedSearch.pm @@ -0,0 +1,564 @@ +package WebGUI::Asset::Wobject::IndexedSearch; + +use strict; +use Time::HiRes; +use WebGUI::Asset::Wobject::IndexedSearch::Search; +use WebGUI::HTMLForm; +use WebGUI::HTML; +use WebGUI::Macro; +use WebGUI::International; +use WebGUI::Session; +use WebGUI::SQL; +use Tie::IxHash; +use WebGUI::Utility; +use WebGUI::Paginator; +use WebGUI::Asset::Wobject; + +our @ISA = qw(WebGUI::Asset::Wobject); + + +#------------------------------------------------------------------- +sub definition { + my $class = shift; + my $definition = shift; + push (@{$definition}, { + tableName=>'IndexedSearch', + className=>'WebGUI::Asset::Wobject::IndexedSearch', + properties=>{ + templateId=>{ + fieldType=>"template", + defaultValue=>"PBtmpl0000000000000034" + }, + linkURL=>{ + fieldType=>'url', + defaultValue=>undef + }, + linkTitle=>{ + fieldType=>'text', + defaultValue=>undef + }, + indexName=>{ + fieldType=>'text', + defaultValue=>'default' + }, + searchRoot=>{ + fieldType=>'checkList', + defaultValue=>'any' + }, + forceSearchRoots=>{ + fieldType=>'yesNo', + defaultValue=>1 + }, + users=>{ + fieldType=>'selectList', + defaultValue=>'any' + }, + namespaces=>{ + fieldType=>'selectList', + defaultValue=>'any' + }, + contentTypes=>{ + fieldType=>'selectList', + defaultValue=>'any' + }, + paginateAfter=>{ + defaultValue=>10 + }, + highlight=>{ + defaultValue=>1 + }, + previewLength=>{ + defaultValue=>130 + }, + highlight_1=>{ + defaultValue=>'#ffff66' + }, + highlight_2=>{ + defaultValue=>'#A0FFFF' + }, + highlight_3=>{ + defaultValue=>'#99ff99' + }, + highlight_4=>{ + defaultValue=>'#ff9999' + }, + highlight_5=>{ + defaultValue=>'#ff66ff' + }, + } + }); + return $class->SUPER::definition($definition); +} + +#------------------------------------------------------------------- +sub getUiLevel { + return 5; +} + +#------------------------------------------------------------------- +sub getEditForm { + my $self = shift; + my (@data, %indexName); + my $tabform = $self->SUPER::getEditForm(); + tie my %searchRoot, 'Tie::IxHash'; + + # Unconditional read to catch intallation errors. + my $sth = WebGUI::SQL->unconditionalRead("select distinct(indexName), indexName from IndexedSearch_docInfo"); + unless ($sth->errorCode < 1) { + return "

" . WebGUI::International::get(1,"IndexedSearch") . $sth->errorMessage."

"; + } + while (@data = $sth->array) { + $indexName{$data[0]} = $data[1]; + } + $sth->finish; + unless(%indexName) { + return "

" . WebGUI::International::get(2,"IndexedSearch") . + "

" . WebGUI::International::get(3,"IndexedSearch") . "

"; + } + + # Index to use +# $tabform->getTab("properties")->radioList( -name=>'indexName', +# -options=>\%indexName, +# -label=>WebGUI::International::get(5,"IndexedSearch"), +# -value=>$self->getValue("indexName"), +# -vertical=>1 +# ); + # NOTE: For now we're limiting each site to one index. Will allow more in the future. + + $tabform->getTab("properties")->hidden( + -name=>"indexName", + -value=>"IndexedSearch_default" + ); + + # Page roots + #%searchRoot = ( 'any'=>WebGUI::International::get(15,"IndexedSearch"), + # $session{page}{pageId}=>WebGUI::International::get(4,"IndexedSearch"), + # WebGUI::SQL->buildHash("select pageId,title from page where parentId='0' and isSystem<>1 order by title") + # ); + #$tabform->getTab("properties")->checkList ( -name=>'searchRoot', + # -options=>\%searchRoot, + # -label=>WebGUI::International::get(6,"IndexedSearch"), + # -value=>[ split("\n", $self->getValue("searchRoot")) ], + # -multiple=>1, + # -vertical=>1, + # ); + $tabform->getTab("properties")->yesNo( + -name=>'forceSearchRoots', + -label=>WebGUI::International::get('edit-forceSearchRoots-label',"IndexedSearch"), + -value=>$self->getValue("forceSearchRoots") + ); + # Content of specific user + $tabform->getTab("properties")->selectList ( -name=>'users', + -options=>$self->_getUsers(), + -label=>WebGUI::International::get(7,"IndexedSearch"), + -value=>[ split("\n", $self->getValue("users")) ], + -multiple=>1, + -size=>5 + ); + + # Content in specific namespaces + $tabform->getTab("properties")->selectList ( -name=>'namespaces', + -options=>$self->_getNamespaces, + -label=>WebGUI::International::get(8,"IndexedSearch"), + -value=>[ split("\n", $self->getValue("namespaces")) ], + -multiple=>1, + -size=>5 + ); + + # Only specific content types + my $contentTypes = $self->_getContentTypes(); + delete $contentTypes->{content}; + $tabform->getTab("properties")->checkList ( -name=>'contentTypes', + -options=>$contentTypes, + -label=>WebGUI::International::get(10,"IndexedSearch"), + -value=>[ split("\n", $self->getValue("contentTypes")) ], + -multiple=>1, + -vertical=>1, + ); + $tabform->getTab("display")->template( + -value=>$self->getValue("templateId"), + -namespace=>"IndexedSearch" + ); + $tabform->getTab("display")->integer ( -name=>'paginateAfter', + -label=>WebGUI::International::get(11,"IndexedSearch"), + -value=>$self->getValue("paginateAfter"), + ); + $tabform->getTab("display")->integer ( -name=>'previewLength', + -label=>WebGUI::International::get(12,"IndexedSearch"), + -value=>$self->getValue("previewLength"), + ); + $tabform->getTab("display")->yesNo ( -name=>'highlight', + -label=>WebGUI::International::get(13,"IndexedSearch"), + -value=>$self->getValue("highlight"), + ); + + # Color picker for highlight colors + $tabform->getTab("display")->raw ( -value=>' + + ' + ); + for (1..5) { + my $highlight = "highlight_$_"; + $tabform->getTab("display")->text ( -name=>$highlight, + -label=>WebGUI::International::get(14,"IndexedSearch") ." $_:", + -size=>7, + -value=>$self->getValue($highlight), + -subtext=>qq{ + Pick} + ); + } + return $tabform; +} + +#------------------------------------------------------------------- +sub getIcon { + my $self = shift; + my $small = shift; + return $session{config}{extrasURL}.'/assets/small/search.gif' if ($small); + return $session{config}{extrasURL}.'/assets/search.gif'; +} + +#------------------------------------------------------------------- +sub getName { + return WebGUI::International::get(17,"IndexedSearch"); +} + +#------------------------------------------------------------------- +sub view { + my $self = shift; + my (%var, @resultsLoop); + + # Do some query handling + $var{exactPhrase} = $session{form}{exactPhrase}; + $var{allWords} = $session{form}{allWords}; + $var{atLeastOne} = $session{form}{atLeastOne}; + $var{without} = $session{form}{without}; + $var{query} = $session{form}{query}; + $var{query} .= qq/ +"$var{exactPhrase}"/ if ($var{exactPhrase}); + $var{query} .= " ".join(" ",map("+".$_,split(/\s+/,$var{allWords}))) if ($var{allWords}); + $var{query} .= qq{ $var{atLeastOne}} if ($var{atLeastOne}); + $var{query} .= " ".join(" ",map("-".$_,split(/\s+/,$var{without}))) if ($var{without}); + + # Remove macro's from query + $var{query} = WebGUI::Macro::negate($var{query}); + + # Set some standard vars + $var{submit} = WebGUI::Form::submit({value=>WebGUI::International::get(16, "IndexedSearch")}); + $var{actionURL} = $self->getUrl; + $var{"int.search"} = WebGUI::International::get(16,"IndexedSearch"); + $var{numberOfResults} = '0'; + $var{"select_".$self->getValue("paginateAfter")} = "selected"; + + # Do the search + my $startTime = Time::HiRes::time(); + my $filter = $self->_buildFilter; + my $search = WebGUI::Asset::Wobject::IndexedSearch::Search->new($self->getValue('indexName')); + $search->open; + my $results = $search->search($var{query},$filter); + $var{duration} = Time::HiRes::time() - $startTime; + $var{duration} = sprintf("%.3f", $var{duration}); # Duration rounded to 3 decimal places + # Let's see if the search returned any results + if (defined ($results)) { + $var{numberOfResults} = scalar(@$results); + + # Deal with pagination + my $url = "query=".WebGUI::URL::escape($var{query}); + map {$url .= "&users=".WebGUI::URL::escape($_)} $session{cgi}->param('users'); + map {$url .= "&namespaces=".WebGUI::URL::escape($_)} $session{cgi}->param('namespaces'); + map {$url .= "&contentTypes=".WebGUI::URL::escape($_)} $session{cgi}->param('contentTypes'); + $url .= "&paginateAfter=".$self->getValue("paginateAfter"); + my $p = WebGUI::Paginator->new(WebGUI::URL::page($url), $self->getValue("paginateAfter")); + $p->setDataByArrayRef($results); + $var{startNr} = 1; + if($session{form}{pn}) { + $var{startNr} = (($session{form}{pn} - 1) * $self->getValue("paginateAfter")) + 1; + } + + my @highlightColors = map { $self->getValue("highlight_$_") } (1..5); + $var{queryHighlighted} = $search->highlight($var{query}, undef, \@highlightColors); + + # Get result details for this page + if($p->getPageNumber > $p->getNumberOfPages) { + $var{numberOfResults} = 0; + $var{resultsLoop} = []; + } else { + $var{resultsLoop} = $search->getDetails($p->getPageData, + highlightColors => \@highlightColors, + previewLength => $self->getValue('previewLength'), + highlight => $self->getValue('highlight') + ); + # Pagination variables + $var{endNr} = $var{startNr}+(scalar(@{$var{resultsLoop}}))-1; + $p->appendTemplateVars(\%var); + } + } + + # Create a loop with namespaces + $var{namespaces} = []; + my $namespaces = $self->_getNamespaces('restricted'); + foreach(keys %$namespaces) { + my $selected = 0; + if (scalar $session{cgi}->param('namespaces')) { + $selected = isIn($_, $session{cgi}->param('namespaces')); + } else { + $selected = ($session{form}{namespaces} =~ /$_/); + } + push(@{$var{namespaces}}, { value => $_, name => $namespaces->{$_}, selected => $selected }); + } + + # Create a loop with contentTypes + # + # And while we are busy we also create a loop with simplified contentTypes + # This means: wobject, page, wobjectDetail are masked in one option: content + + $var{contentTypes} = []; + $var{contentTypesSimple} = []; + my $contentTypes = $self->_getContentTypes('restricted'); + foreach(keys %$contentTypes) { + my $selected = 0; + if (scalar $session{cgi}->param('contentTypes')) { + $selected = isIn($_, $session{cgi}->param('contentTypes')); + } else { + $selected = ($session{form}{contentTypes} =~ /$_/); + } + unless(/^content$/) { # No shortcut in the detailed contentType list + push(@{$var{contentTypes}}, { value => $_, + name => $contentTypes->{$_}, + selected => $selected, + 'type_'.$_ => 1 }); + } + unless(/^page|wobject|wobjectDetail$/) { # No details in the simple contentType list + push(@{$var{contentTypesSimple}}, { value => $_, + name => $contentTypes->{$_}, + selected => $selected, + 'type_'.$_ => 1 }); + } + } + + # Create a loop with users + $var{users} = []; + my $users = $self->_getUsers('restricted'); + foreach(keys %$users) { + my $selected = 0; + if (scalar $session{cgi}->param('users')) { + $selected = isIn($_, $session{cgi}->param('users')); + } else { + $selected = ($session{form}{users} =~ /$_/); + } + push(@{$var{users}}, { value => $_, name => $users->{$_}, selected => $selected }); + } + + # Create a loop with searchable page roots + my $rootData; + my @roots = split(/\n/, $self->get('searchRoot')); + my %checked = map {$_=>1} $session{cgi}->param("searchRoot"); + #if (isIn('any', @roots)) { + # foreach $rootData (WebGUI::Page->getAnonymousRoot->daughters) { + # push (@{$var{searchRoots}}, { + # value => $rootData->{'pageId'}, + # menuTitle => $rootData->{'menuTitle'}, + # title => $rootData->{'title'}, + # urlizedTitle => $rootData->{'urlizedTitle'}, + # checked => $checked{$rootData->{'pageId'}}, + # }); + # $var{"rootPage.".$rootData->{'urlizedTitle'}.".id"} = $rootData->{'pageId'}; + # $var{"rootPage.".$rootData->{'urlizedTitle'}.".checked"} = $checked{$rootData->{'pageId'}}; + # } + #} else { + # foreach (@roots) { + # $rootData = WebGUI::Page->new($_); + # push (@{$var{searchRoots}}, { + # value => $rootData->get('pageId'), + # menuTitle => $rootData->get('menuTitle'), + # title => $rootData->get('title'), + # urlizedTitle => $rootData->get('urlizedTitle'), + # checked => $checked{$rootData->get('pageId')}, + # }); + # $var{"rootPage.".$rootData->get('urlizedTitle').".id"} = $rootData->get('pageId'); + # $var{"rootPage.".$rootData->get('urlizedTitle').".checked"} = $checked{$rootData->get('pageId')}; + # } + #} + $var{"anyRootPage.checked"} = $checked{'any'}; + # close the search + $search->close; + return $self->processTemplate(\%var, $self->get("templateId")); +} + +#------------------------------------------------------------------- +sub www_edit { + my $self = shift; + return $self->getAdminConsole->render(WebGUI::Privilege::insufficient()) unless $self->canEdit; + $self->getAdminConsole->setHelp("search add/edit"); + return $self->getAdminConsole->render($self->getEditForm->print,WebGUI::International::get("26","Article")); +} + +#------------------------------------------------------------------- +sub _buildPageList { + my ($self, @userSpecifiedRoots, @roots, @allowedRoots, $pageId, @pages); + $self = shift; + + @userSpecifiedRoots = $session{cgi}->param("searchRoot"); + + if ((scalar(@userSpecifiedRoots) == 0) + || ($self->getValue("forceSearchRoots")) + || (isIn('any', @userSpecifiedRoots)) + ) { + @roots = split(/\n+/i, $self->get("searchRoot")); + } else { + @allowedRoots = split(/\n+/, $self->get("searchRoot")); + + foreach (@userSpecifiedRoots) { + push (@roots, $_) if (isIn($_, @allowedRoots)); + } + } + #foreach $pageId (@roots) { + # WebGUI::Page->new($pageId)->traversePreOrder( + # sub { + # push(@pages, $_[0]->get('pageId')); + # } + # ); + #} + + return [ @pages ]; +} + +#------------------------------------------------------------------- +sub _buildFilter { + my $self = shift; + my %filter = (); + + # pages + if($self->get('searchRoot') !~ /any/i) { + $filter{pageId} = $self->_buildPageList; + } + + # content-types + if($session{form}{contentTypes} && ! isIn('any', $session{cgi}->param('contentTypes'))) { + $filter{contentType} = [ $session{cgi}->param('contentTypes') ]; + + # contentType "content" is a shortcut for "page", "wobject" and "wobjectDetail" + if (isIn('content', $session{cgi}->param('contentTypes'))) { + push(@{$filter{contentType}}, qw/Asset assetDetail/); + } + } elsif ($self->getValue('contentTypes') !~ /any/i) { + $filter{contentType} = [ split(/\n/, $self->getValue('contentTypes')) ]; + } + + # users + if($session{form}{users} && ! isIn('any', $session{cgi}->param('users'))) { + $filter{ownerId} = []; + foreach my $user ($session{cgi}->param('users')) { + if ($user =~ /\D/) { + $user =~ s/\*/%/g; + ($user) = WebGUI::SQL->buildArray("select userId from users where username like ".quote($user)); + } + push(@{$filter{ownerId}}, quote($user)) if ($user =~ /^\d+$/); + } + } elsif ($self->getValue('users') !~ /any/i) { + $filter{ownerId} = [ split(/\n/, $self->getValue('users')) ]; + } + + # namespaces + if($session{form}{namespaces} && ! isIn('any', $session{cgi}->param('namespaces'))) { + $filter{namespace} = [ $session{cgi}->param('namespaces') ]; + } elsif ($self->getValue('namespaces') !~ /any/i) { + $filter{namespace} = [ split(/\n/, $self->getValue('namespaces')) ]; + } + + # delete $filter{ownerId} if it is an empty array reference + if(exists($filter{ownerId})) { + delete $filter{ownerId} unless (scalar(@{$filter{ownerId}})); + } + return \%filter; +} + +#------------------------------------------------------------------- +sub _getNamespaces { + my ($self, $restricted) = @_; + my %international; + foreach my $class (@{$session{config}{assets}}) { + my $load = 'use '.$class; + eval($load); + if ($@) { + WebGUI::ErrorHandler::warn("Couldn't compile ".$class." because ".$@); + } else { + $international{$class} = eval{$class->getName()}; + } + } + tie my %namespaces, 'Tie::IxHash'; + if ($restricted and $self->get('namespaces') !~ /any/i) { + $namespaces{any} = WebGUI::International::get(18,"IndexedSearch"); + foreach (split/\n/, $self->get('namespaces')) { + $namespaces{$_} = $international{$_} || ucfirst($_); + } + } else { + $namespaces{any} = WebGUI::International::get(18,"IndexedSearch"); + foreach (WebGUI::SQL->buildArray("select distinct(namespace) from IndexedSearch_docInfo order by namespace")) { + $namespaces{$_} = $international{$_} ||ucfirst($_); + } + } + return \%namespaces; +} + +#------------------------------------------------------------------- +sub _getContentTypes { + my ($self, $restricted) = @_; + my %international = ( 'page' => WebGUI::International::get(2), + 'wobject' => WebGUI::International::get(19,"IndexedSearch"), + 'wobjectDetail' => WebGUI::International::get(20,"IndexedSearch"), + 'content' => WebGUI::International::get(21,"IndexedSearch"), + 'discussion' => WebGUI::International::get(892), + 'profile' => WebGUI::International::get(22,"IndexedSearch"), + 'any' => WebGUI::International::get(23,"IndexedSearch"), + ); + tie my %contentTypes, 'Tie::IxHash'; + if ($restricted and $self->get('contentTypes') !~ /any/i) { + $contentTypes{any} = $international{any}; + $contentTypes{content} = $international{content}; # shortcut for page, wobject and wobjectDetail + foreach (split/\n/, $self->get('contentTypes')) { + $contentTypes{$_} = $international{$_}; + } + } else { + %contentTypes = ( 'any' => $international{any}, + 'content' => $international{content}, # shortcut for page, wobject and wobjectDetail + ); + foreach (WebGUI::SQL->buildArray("select distinct(contentType) from IndexedSearch_docInfo order by contentType")) { + $contentTypes{$_} = $international{$_} || ucfirst($_); + } + } + return \%contentTypes; +} + +#------------------------------------------------------------------- +sub _getSearchablePages { + my $searchRoot = shift; + my %pages; + my $sth = WebGUI::SQL->read("select pageId from page where parentId = ".quote($searchRoot)); + while (my %data = $sth->hash) { + $pages{$data{pageId}} = 1; + %pages = (%pages, _getSearchablePages($data{pageId}) ); + } + return %pages; +} + +#------------------------------------------------------------------- +sub _getUsers { + my ($self, $restricted) = @_; + tie my %users, 'Tie::IxHash'; + if ($restricted and $self->get('users') !~ /any/i) { + $users{any} = WebGUI::International::get(25,"IndexedSearch"); + foreach (split/\n/, $self->get('users')) { + $users{$_} = $_; + } + } else { + %users = ( 'any' => WebGUI::International::get(25,"IndexedSearch"), + WebGUI::SQL->buildHash("select userId, username from users order by username") + ); + } + return \%users; +} + +1; diff --git a/lib/WebGUI/Asset/Wobject/IndexedSearch/Search.pm b/lib/WebGUI/Asset/Wobject/IndexedSearch/Search.pm new file mode 100644 index 000000000..90dd31b30 --- /dev/null +++ b/lib/WebGUI/Asset/Wobject/IndexedSearch/Search.pm @@ -0,0 +1,637 @@ +package WebGUI::Asset::Wobject::IndexedSearch::Search; + +=head1 LEGAL + + ------------------------------------------------------------------- + WebGUI is Copyright 2001-2004 Plain Black Corporation. + ------------------------------------------------------------------- + Please read the legal notices (docs/legal.txt) and the license + (docs/license.txt) that came with this distribution before using + this software. + ------------------------------------------------------------------- + http://www.plainblack.com info@plainblack.com + ------------------------------------------------------------------- + +=cut + +use strict; +use DBIx::FullTextSearch; +use WebGUI::DateTime; +use WebGUI::SQL; +use WebGUI::URL; +use WebGUI::HTML; +use WebGUI::ErrorHandler; +use WebGUI::Grouping; +use DBIx::FullTextSearch::StopList; +use WebGUI::Utility; +use WebGUI::Session; +use WebGUI::Privilege; +use HTML::Highlight; +use WebGUI::Macro; + +=head1 NAME + +Package WebGUI::Wobject::IndexedSearch::Search + +=head1 DESCRIPTION + +Search implementation for WebGUI. + +=head1 SYNOPSIS + + use WebGUI::Wobject::IndexedSearch::Search; + my $search = WebGUI::Wobject::IndexedSearch::Search->new(); + $search->indexDocument( { text => 'Index this text', + location => 'http://www.mysite.com/index.pl/faq#45', + languageId => 3, + namespace => 'FAQ' + }); + my $hits = search->search("+foo -bar koo",{ namespace = ['Article', 'FAQ']} ); + + $search->close; + + +=head1 SEE ALSO + +This package is an extension to DBIx::FullTextSearch and HTML::Highlight. +See that packages for documentation of their methods. + +=head1 METHODS + +These methods are available from this package: + +=cut + +#------------------------------------------------------------------- +sub _recurseCrumbTrail { + my ($sth, %data, $output); + tie %data, 'Tie::CPHash'; + %data = WebGUI::SQL->quickHash("select assetId,parentId,menuTitle,url from asset where assetId=".quote($_[0])); + if ($data{assetId}) { + $output .= _recurseCrumbTrail($data{parentId}); + } + if ($data{assetId} ne "0") { + $output .= ''.$data{menuTitle}.' > '; + } + return $output; +} + +#------------------------------------------------------------------- + +=head2 close ( ) + +Closes the DBIx::FullTextSearch session. + +=cut + +sub close { + my $self=shift; + $self->DESTROY(); +} + +#------------------------------------------------------------------- + +=head2 create ( [ %options ] ) + +Creates a new DBIx::FullTextSearch index. + +=head3 %options + +Options to pass to DBIx::FullTextSearch. +The default options that are used are: + +( backend => column, word_length => 20, stoplist => undef ) + +Please refer to the DBIx::FullTextSearch documentation for a complete list of options. + +=cut + +sub create { + my ($self, %options) = @_; + %options = (%{$self->{_createOptions}}, %options); + if($options{stemmer}) { + eval "use Lingua::Stem"; + if ($@) { + WebGUI::ErrorHandler::warn("IndexedSearch: Can't use stemmer: $@"); + delete $options{stemmer}; + } + } + if($options{stoplist}) { + if(not $self->existsTable($self->getIndexName."_".$options{stoplist}."_stoplist")) { + DBIx::FullTextSearch::StopList->create_default($self->getDbh, $self->getIndexName."_".$options{stoplist}, $options{stoplist}); + } + $options{stoplist} = $self->getIndexName."_".$options{stoplist}; + } + $self->{_fts} = DBIx::FullTextSearch->create($self->getDbh, $self->getIndexName, %options); + if (not defined $self->{_fts}) { + WebGUI::ErrorHandler::fatalError("IndexedSearch: Unable to create index.\n$DBIx::FullTextSearch::errstr"); + return undef; + } + $self->{_docId} = 1; + return $self->{_fts}; +} + +#------------------------------------------------------------------- + +=head2 existsTable ( tableName ) + +Returns true if tableName exists in database. + +=head3 tableName + +The name of table. + +=cut + +sub existsTable { + my ($self, $table) = @_; + return isIn($table, WebGUI::SQL->buildArray("show tables")); +} + +#------------------------------------------------------------------- + +=head2 getDetails ( docIdList , [ %options ] ) + +Returns an array reference containing details for each docId. + +=head3 docIdList + +An array reference containing docIds. + +=head3 previewLength + +The maximum number of characters in each of the context sections. Defaults to "80". + +=head3 highlight + +A boolean indicating whether or not to enable highlight. Defaults to "1". + +=head3 highlightColors + +A reference to an array of CSS color identificators. + +=head3 + +=cut + +sub getDetails { + my ($self, $docIdList, %options) = @_; + my $docIds = quoteAndJoin($docIdList); + my (@searchDetails); + my $sql = "select * from IndexedSearch_docInfo where docId in ($docIds) and indexName = ".quote($self->getIndexName) ; + $sql .= " ORDER BY FIELD(docId, $docIds)"; # Maintain $docIdList order + my $sth = WebGUI::SQL->read($sql); + while (my %data = $sth->hash) { + if ($data{ownerId}) { + ($data{username}) = WebGUI::SQL->quickArray("select username from users where userId = ".quote($data{ownerId})); + $data{userProfile} = WebGUI::URL::page("op=viewProfile&uid=$data{ownerId}"); + } + if ($data{bodyShortcut} =~ /^\s*select /i) { + $data{body} = (WebGUI::SQL->quickArray($data{bodyShortcut}))[0]; + } else { + $data{body} = $data{bodyShortcut}; + } + if ($data{headerShortcut} =~ /^\s*select /i) { + $data{header} = (WebGUI::SQL->quickArray($data{headerShortcut}))[0]; + } else { + $data{header} = $data{headerShortcut}; + } + delete($data{bodyShortcut}); + delete($data{headerShortcut}); + if($data{body}) { + $data{body} = WebGUI::Macro::filter($data{body}); + $data{body} = WebGUI::HTML::filter($data{body},'all'); + $data{body} = $self->preview($data{body}, $options{previewLength}); + $data{body} = $self->highlight($data{body},undef, $options{highlightColors}) if ($options{highlight}); + } + if($data{header}) { + $data{header} = WebGUI::Macro::filter($data{header}); + $data{header} = WebGUI::HTML::filter($data{header},'all'); + $data{header} = $self->highlight($data{header},undef, $options{highlightColors}) if ($options{highlight}); + $data{location} = WebGUI::URL::gateway($data{location}); + } + $data{crumbTrail} = _recurseCrumbTrail($data{assetId}); + $data{crumbTrail} =~ s/\s*\>\s*$//; + push(@searchDetails, \%data); + } + $sth->finish; + return \@searchDetails; +} + +#------------------------------------------------------------------- + +=head2 getDbh ( ) + +Returns the object's database handler. + +=cut + +sub getDbh { + my $self = shift; + return $self->{_dbh}; +} + +#------------------------------------------------------------------- + +=head2 getDocId ( ) + +Returns the next docId for this object. + +=cut + +sub getDocId { + my $self=shift; + return $self->{_docId}; +} + +#------------------------------------------------------------------- + +=head2 getIndexName ( ) + +Returns the full index name of this object. + +=cut + +sub getIndexName { + my $self = shift; + return $self->{_indexName}; +} + +#------------------------------------------------------------------- + +=head2 _queryToWords ( [ query ] ) + +Converts a DBIx::FullTextSearch query to (\@Words, \@Wildcards) suitable to pass to HTML::Highlight + +=cut + +sub _queryToWords { + my ($self, $query) = @_; + my $query ||= $self->{_query}; + + # Return the processed words / wildcards from memory if it's cached. + if ($self->{$query."words"} && $self->{$query."wildcards"}) { + return ($self->{$query."words"}, $self->{$query."wildcards"}); + } + + # deal with quotes + my $inQuote=0; + my (@words, @wildcards); + foreach (split(/\"/, $query)) { + if($inQuote == 0) { + foreach (split(/\s+/, $_)) { + next if (/^AND$/i); # boolean AND + next if (/^OR$/i); # boolean OR + next if (/^NOT$/i); # boolean OR + next if (/^\-/); # exclude word + next if (/^.{0,1}$/); # at least 2 characters + if (/\*/) { + push(@wildcards, '%'); # match any character + } else { + push(@wildcards, '*'); # Also match plural of word + } + s/['"()+*]+//g; # remove query operators and quotes + push(@words, $_); + } + } else { + my $phrase = $_; + push(@words, qq/$phrase/); + push(@wildcards, undef); # Exact match + } + $inQuote = ++$inQuote % 2; + } + # Store words / wildcards in memory + $self->{$query."words"} = \@words; + $self->{$query."wildcards"} = \@wildcards; + + return (\@words, \@wildcards); +} + +#------------------------------------------------------------------- + +=head2 highlight ( text [ , query , colors ] ) + +highlight words or patterns in HTML documents. + +=head3 text + +The text to highlight + +=head3 query + +A query containing the words to highlight. Defaults to the last used $search->search query. +Special case: When query contains only an asterisk '*', no highlighting is applied. + +=head3 colors + +A reference to an array of CSS color identificators. + +=cut + +sub highlight { + my ($self, $text, $query, $colors) = @_; + my $query ||= $self->{_query}; + return $text if ($query =~ /^\s*\*\s*$/); # query = '*', no highlight + my ($words, $wildcards) = $self->_queryToWords($query); + my $hl = new HTML::Highlight ( words => $words, + wildcards => $wildcards, + colors => $colors + ); + return $hl->highlight($text); +} + +#------------------------------------------------------------------- + +=head2 indexDocument ( hashRef ) + +Adds a document to the index. + +This method doesn't store the document itself. Instead, it stores information about words +in the document in such a structured way that it makes easy and fast to look up what +documents contain certain words and return id's of the documents. + +=head3 text + +The text to index. + +=head3 location + +The location of the document. Most likely an URL. + +=head3 contentType + +The content type of this document. + +=head3 docId + +The unique Id of this document. Defaults to the next empty docId. + +=head3 assetId + +The assetId of the asset that holds this content. Defaults to NULL. + +=head3 ownerId + +The ownerId of the document. Defaults to 3. + +=head3 namespace + +The namespace of this document. Defaults to 'WebGUI'. + +=head3 groupIdView + +Id of group authorized to view this content. Defaults to '7' (everyone) + +=head3 special_groupIdView + +Id of group authorized to view the details of this content. + +=head3 headerShortcut + +An sql statement that returns the header (title, question, subject, name, whatever) +of this document. + +=head3 bodyShortcut + +An sql statement that returns the body (description, answer, message, whatever) +of this document. + +=cut + +sub indexDocument { + my ($self, $document) = @_; + $self->{_fts}->index_document($document->{docId} || $self->{_docId}, $document->{text}); + my $docId = ($document->{docId} || $self->{_docId}); + WebGUI::SQL->write("insert into IndexedSearch_docInfo ( docId, + indexName, + assetId, + groupIdView, + special_groupIdView, + namespace, + location, + headerShortcut, + bodyShortcut, + contentType, + ownerId, + dateIndexed ) + values ( ". + quote($docId).", ". + quote($self->getIndexName).", ". + quote($document->{assetId}).", ". + quote($document->{groupIdView} || "7").", ". + quote($document->{special_groupIdView} || "7").", ". + quote($document->{namespace} || 'WebGUI')." , ". + quote($document->{location}).", ". + quote($document->{headerShortcut})." ,". + quote($document->{bodyShortcut})." ,". + quote($document->{contentType})." ,". + quote($document->{ownerId} || 3).", + ".WebGUI::DateTime::time()." )" + ); + $self->{_docId}++; +} + +#------------------------------------------------------------------- + +=head2 new ( [ indexName , dbh ] ) + +Constructor. + +=head3 indexName + +The name of the index to open. Defaults to 'default'. + +=head3 $dbh + +Database handler to use. Defaults to $WebGUI::Session::session{dbh}. + +=cut + +sub new { + my ($class, $indexName, $dbh) = @_; + $indexName = $indexName || 'default'; + my $self = { _indexName => $indexName, + _dbh => $dbh || $WebGUI::Session::session{dbh}, + _createOptions => {( backend => 'column', + word_length => 20, + filter => 'map { lc $_ if ($_ !~ /\^.*;/) }' + )}, + }; + bless $self, $class; +} + +#------------------------------------------------------------------- + +=head2 open ( ) + +Opens an existing DBIx::FullTextSearch index. + +=cut + +sub open { + my ($self) = @_; + $self->{_fts} = DBIx::FullTextSearch->open($self->getDbh, $self->getIndexName); + if (not defined $self->{_fts}) { + WebGUI::ErrorHandler::fatalError("IndexedSearch: Unable to open index.\n$DBIx::FullTextSearch::errstr"); + return undef; + } + ($self->{_docId}) = WebGUI::SQL->quickArray("select max(docId) from IndexedSearch_docInfo where indexName = ".quote($self->getIndexName)); + $self->{_docId}++; + return $self->{_fts}; +} + +#------------------------------------------------------------------- + +=head2 preview ( text , [ previewLength , query ] ) + +Returns a context preview in which words from a search query appear in the resulting documents. +The words are always in the middle of each of the sections. + +=head3 text + +The text to preview + +=head3 previewLength + +The maximum number of characters in each of the context sections. Defaults to 80. +A preview length of "0" means no preview, +while a negative preview length returns the complete text. + +=head3 query + +A query containing the words to highlight. Defaults to the last used $search->search query. + +=cut + +sub preview { + my ($self, $text, $previewLength, $query) = @_; + $previewLength = 80 if (not defined $previewLength); + return '' unless ($previewLength); + return $text if ($previewLength < 0); + my $query ||= $self->{_query}; + if(($query =~ /^\s*\*\s*$/) or not $query) { # Query is '*' or empty. + $text = WebGUI::HTML::filter($text,'all'); + $text =~ s/^(.{1,$previewLength})\s+.*$/$1/s; + } else { + my ($words, $wildcards) = $self->_queryToWords($query); + my $hl = new HTML::Highlight ( words => $words, + wildcards => $wildcards + ); + my $preview = join('... ',@{$hl->preview_context($text, $previewLength)}); + if ($preview) { + $text = $preview; + } else { + $text = WebGUI::HTML::filter($text,'all'); + $text =~ s/^(.{1,$previewLength})\s+.*$/$1/s; + } + } + $text =~ s/^(\s| )+//; + $text =~ s/(\s| )+$//; + if($text ne '') { + $text = '... '.$text if ($text !~ /^[A-Z]+/); # ... broken up at the beginning + $text .=' ...' if ($text !~ /\.$/); # broken up at the end ... + } + return $text; +} + +#------------------------------------------------------------------- + +=head2 recreate ( [ %options ] ) + +Like create, but first drops the existing index. Useful when rebuilding the index. + +=head3 %options + +Options to pass to WebGUI::IndexedSearch->create() + +=cut + +sub recreate { + my ($self, %options) = @_; + $self->{_fts} = DBIx::FullTextSearch->open($self->getDbh, $self->getIndexName); + if (defined $self->{_fts}) { + $self->{_fts}->drop; + } + $self->{_fts} = $self->create($self->getIndexName, $self->getDbh, %options); + WebGUI::SQL->write("delete from IndexedSearch_docInfo where indexName = ".quote($self->getIndexName)); + return $self->{_fts}; +} + +#------------------------------------------------------------------- + +=head2 search ( query, \%filter ) + +Returns an array reference of docId's of documents that match the query. +If the search has no results, undef is returned. + +=head3 query + +user input string. Will be parsed into can-include, must-include and must-not-include words and phrases. +Special case: when query is an asterisk (*), then no full text search is done, and results are returned +using \%filter. + +Examples are: + +"this is a phrase" -koo +bar foo + (foo OR baz) AND (bar OR caz) + +=head3 filter + +A hash reference containing filter elements. + +Example: + { + language => [ 1, 3 ], + namespace => [ 'Article', 'USS' ] + } + +=cut + +sub search { + my ($self, $query, $filter) = @_; + $self->{_query} = $query; + my $noFtsSearch = ($query =~ /^\s*\*\s*$/); # query = '*', no full text search + my @fts_docIds = $self->{_fts}->search($query) unless $noFtsSearch ; + if(@fts_docIds || $noFtsSearch) { + my $groups = quoteAndJoin($self->_getGroups); + my $docIds = quoteAndJoin(\@fts_docIds); + my $sql = "select docId from IndexedSearch_docInfo where indexName = ".quote($self->getIndexName); + $sql .= " and docId in ($docIds)" unless $noFtsSearch; + $sql .= " and groupIdView in ($groups)"; + $sql .= " and special_groupIdView in ($groups)"; + foreach my $filterElement (keys %{$filter}) { + $sql .= " AND $filterElement in (".quoteAndJoin($filter->{$filterElement}).")"; + } + # Keep @fts_docIds list order + $sql .= " ORDER BY FIELD(docID,$docIds)" unless $noFtsSearch; + my $filteredDocIds = WebGUI::SQL->buildArrayRef($sql); + return $filteredDocIds if (ref $filteredDocIds eq 'ARRAY' and @{$filteredDocIds}); + } + return undef; +} + +#------------------------------------------------------------------- + +=head2 _getGroups ( ) + +Returns an array reference containing all groupIds of groups the user is in. + +=cut + +sub _getGroups { + my @groups; + foreach my $groupId (WebGUI::SQL->buildArray("select groupId from groups")) { + push(@groups, $groupId) if (WebGUI::Grouping::isInGroup($groupId)); + } + return \@groups; +} + +#------------------------------------------------------------------- +sub DESTROY { + my $self=shift; + if (ref($self->{_fts})) { + $self->{_fts}->DESTROY(); + } +} + +1;