webgui/lib/WebGUI/Search.pm

419 lines
12 KiB
Perl

package WebGUI::Search;
=head1 LEGAL
-------------------------------------------------------------------
WebGUI is Copyright 2001-2006 Plain Black Corporation.
-------------------------------------------------------------------
Please read the legal notices (docs/legal.txt) and the license
(docs/license.txt) that came with this distribution before using
this software.
-------------------------------------------------------------------
http://www.plainblack.com info@plainblack.com
-------------------------------------------------------------------
=cut
use strict;
use WebGUI::Asset;
=head1 NAME
Package WebGUI::Search
=head1 DESCRIPTION
A package for creating queries with the WebGUI Search Engine.
=head1 SYNOPSIS
use WebGUI::Search;
=head1 METHODS
These methods are available from this package:
=cut
#-------------------------------------------------------------------
=head2 _getQuery ( columnsToSelect )
This is a private method and should never be used outside of this class.
=cut
sub _getQuery {
my $self = shift;
my $selectsRef = shift;
return ('select '
. join(', ', @$selectsRef, ($self->{_score} ? $self->{_score} : ()))
. ' from assetIndex '
. ($self->{_join} ? join(" ",@{$self->{_join}}) : '') # JOIN
. ' where '
. ($self->{_isPublic}? 'isPublic = 1 and ' : '')
. '('.$self->{_where}.')'
. ($self->{_score} ? ' order by score desc' : '')
);
}
#-------------------------------------------------------------------
=head2 getAssetIds ( )
Returns an array reference containing all the asset ids of the assets that matched.
=cut
sub getAssetIds {
my $self = shift;
my $query = $self->_getQuery(['assetIndex.assetId']);
my $rs = $self->session->db->prepare($query);
$rs->execute($self->{_params});
my @ids = ();
while (my ($id) = $rs->array) {
push(@ids, $id);
}
return \@ids;
}
#-------------------------------------------------------------------
=head2 getAssets ( )
Returns an array reference containing asset objects for those that matched.
=cut
sub getAssets {
my $self = shift;
my $query = $self->_getQuery([qw(assetIndex.assetId assetIndex.className assetIndex.revisionDate)]);
my $rs = $self->session->db->prepare($query);
$rs->execute($self->{_params});
my @assets = ();
while (my ($id, $class, $version) = $rs->array) {
my $asset = WebGUI::Asset->new($self->session, $id, $class, $version);
unless (defined $asset) {
$self->session->errorHandler->warn("Search index contains assetId $id even though it no longer exists.");
next;
}
push(@assets, $asset);
}
return \@assets;
}
#-------------------------------------------------------------------
=head2 getPaginatorResultSet ( currentURL, paginateAfter, pageNumber, formVar )
Returns a paginator object containing the search result set data.
=head3 currentURL
The URL of the current page including attributes. The page number will be appended to this in all links generated by the paginator.
=head3 paginateAfter
The number of rows to display per page. If left blank it defaults to 50.
=head3 pageNumber
By default the page number will be determined by looking at $self->session->form->process("pn"). If that is empty the page number will be defaulted to "1". If you'd like to override the page number specify it here.
=head3 formVar
Specify the form variable the paginator should use in it's links. Defaults to "pn".
=cut
sub getPaginatorResultSet {
my $self = shift;
my $url = shift;
my $paginate = shift;
my $pageNumber = shift;
my $formVar = shift;
my @columns = qw( assetIndex.assetId
assetIndex.title
assetIndex.url
assetIndex.synopsis
assetIndex.ownerUserId
assetIndex.groupIdView
assetIndex.groupIdEdit
assetIndex.creationDate
assetIndex.revisionDate
assetIndex.className
);
push @columns, (@{$self->{_columns}})
if $self->{_columns};
my $query = $self->_getQuery(\@columns);
my $paginator = WebGUI::Paginator->new($self->session, $url, $paginate, $pageNumber, $formVar);
$paginator->setDataByQuery($query, undef, undef, $self->{_params});
return $paginator;
}
#-------------------------------------------------------------------
=head2 getResultSet ( )
Returns a WebGUI::SQL::ResultSet object containing the search results with
columns labeled "assetId", "title", "url", "synopsis", "ownerUserId",
"groupIdView", "groupIdEdit", "creationDate", "revisionDate", and "className",
in addition to any columns passed as rules.
=cut
sub getResultSet {
my $self = shift;
my @columns = qw( assetIndex.assetId
assetIndex.title
assetIndex.url
assetIndex.synopsis
assetIndex.ownerUserId
assetIndex.groupIdView
assetIndex.groupIdEdit
assetIndex.creationDate
assetIndex.revisionDate
assetIndex.className
);
push @columns, (@{$self->{_columns}})
if $self->{_columns};
my $query = $self->_getQuery(\@columns);
my $rs = $self->session->db->prepare($query);
$rs->execute($self->{_params});
return $rs;
}
#-------------------------------------------------------------------
=head2 new ( session [ , isPublic ] )
Constructor.
=head3 session
A reference to the current session.
=head3 isPublic
A boolean indicating whether this search should search all internal data (0), or just public data (1). Defaults to just public data (1).
=cut
sub new {
my $class = shift;
my $session = shift;
my $isPublic = (shift eq "0") ? 0 : 1;
bless {_session=>$session, _isPublic=>$isPublic}, $class;
}
#-------------------------------------------------------------------
=head2 rawClause ( sql [, placeholders ] )
Tells the search engine to use a custom sql where clause that you've designed for the assetIndex table instead of using the API to build it. It also returns a reference to the object so you can join a result method with it like this:
my $assetIds = WebGUI::Search->new($session)->rawQuery($sql, $params)->getAssetIds;
=head3 sql
The where clause to execute. It should not actually contain the "where" term itself.
=head3 placeholders
A list of placeholder parameters to go along with the query. See WebGUI::SQL::ResultSet::execute() for details.
=cut
sub rawClause {
my $self = shift;
$self->{_where} = shift;
$self->{_params} = shift;
return $self;
}
#-------------------------------------------------------------------
=head2 search ( rules )
A rules engine for WebGUI's search system. It also returns a reference to the search object so that you can join a result method with it like:
my $assetIds = WebGUI::Search->new($session)->search(\%rules)->getAssetIds;
=head3 rules
A hash reference containing rules for a search. The rules will will be hash references containing the values of a rule. Here's an example rule set:
{ keywords => "something to search for", lineage => [ "000001000005", "000001000074000003" ] };
=head4 keywords
This rule limits the search results to assets that match keyword criteria.
keywords => "foo bar"
=head4 lineage
This rule limits the search to a specific set of descendants in the asset tree. An array reference of asset lineages to match against.
lineage => [ "000001000003", "000001000024000005" ]
=head4 classes
This rule limits the search to a specific set of asset classes. An array reference of class names.
classes => [ "WebGUI::Asset::Wobject::Article", "WebGUI::Asset::Snippet" ]
=head4 creationDate
This rule limits the search to a creation date range. It has two parameters: "start" and "end". Start and end represent the start and end dates to search in, which are represented as epoch dates. If start is not specified, it is infinity into the past. If end date is not specified, it is infinity into the future.
creationDate => {
start=>1110011,
end=>30300003
}
=head4 revisionDate
This rule limits the search to a revision date range. It has two parameters: "start" and "end". Start and end represent the start and end dates to search in, which are represented as epoch dates. If start is not specified, it is infinity into the past. If end date is not specified, it is infinity into the future.
revisionDate => {
start=>1110011,
end=>30300003
}
=head4 where
This rule adds an additional where clause to the search.
where => 'className NOT LIKE "WebGUI::Asset::Wobject%"'
=head4 join
This rule allows for an array reference of table join clauses.
join => 'join assetData on assetId = assetData.assetId'
=head4 columns
This rule allows for additional columns to be returned by getResultSet().
columns => ['assetData.title','assetData.description']
TODO: 'where' and 'join' were added hackishly. It'd be nicer to see a data
structure for 'join', and the ability to have multiple 'where' clauses with
placeholders and parameters.
=cut
sub search {
my $self = shift;
my $rules = shift;
my @params = ();
my $query = "";
my @clauses = ();
if ($rules->{keywords}) {
my $keywords = $rules->{keywords};
unless ($keywords =~ m/"|\*/) { # do wildcards for people, like they'd expect
my @terms = split(' ',$keywords);
for (my $i = 0; $i < scalar(@terms); $i++) {
#-------------- Edited by zxp for Chinese Word Segment
utf8::decode($terms[$i]);
my @segs = split /([A-z|\d]+|\S)/, $terms[$i];
$terms[$i] = join " ",@segs;
$terms[$i] =~ s/\s{2,}/ /g;
$terms[$i] =~ s/(^\s|\s$)//g;
$terms[$i] =~ s/\s/\'\'/g;
if($terms[$i] =~ m/\'/) { # has non-latin latter in terms
$terms[$i] = '"' . $terms[$i] . '"';
}
#-------------- Edited by zxp end
$terms[$i] .= "*";
# By default results need to match ALL keywords / Len Kranendonk 20060811
$terms[$i] = "+" . $terms[$i] if ($terms[$i] !~ m/^[+-]/);
}
$keywords = join(" ", @terms);
}
push(@params, $keywords, $keywords);
$self->{_score} = "match (keywords) against (?) as score";
push(@clauses, "match (keywords) against (? in boolean mode)");
}
if ($rules->{lineage}) {
my @phrases = ();
foreach my $lineage (@{$rules->{lineage}}) {
next unless defined $lineage;
push(@params, $lineage."%");
push(@phrases, "lineage like ?");
}
push(@clauses, join(" or ", @phrases)) if (scalar(@phrases));
}
if ($rules->{classes}) {
my @phrases = ();
foreach my $class (@{$rules->{classes}}) {
next unless defined $class;
push(@params, $class);
push(@phrases, "className=?");
}
push(@clauses, join(" or ", @phrases)) if (scalar(@phrases));
}
if ($rules->{creationDate}) {
my $start = $rules->{creationDate}{start} || 0;
my $end = $rules->{creationDate}{end} || 9999999999999999999999;
push(@clauses, "creationDate between ? and ?");
push(@params, $start, $end);
}
if ($rules->{revisionDate}) {
my $start = $rules->{revisionDate}{start} || 0;
my $end = $rules->{revisionDate}{end} || 9999999999999999999999;
push(@clauses, "revisionDate between ? and ?");
push(@params, $start, $end);
}
if ($rules->{where}) {
push(@clauses, $rules->{where});
}
if ($rules->{join}) { # This join happens in _getQuery
$rules->{join} = [$rules->{join}]
unless (ref $rules->{join} eq "ARRAY");
$self->{_join} = $rules->{join};
}
if ($rules->{columns}) {
$rules->{columns} = [$rules->{columns}]
unless (ref $rules->{columns} eq "ARRAY");
$self->{_columns} = $rules->{columns};
}
$self->{_params} = \@params;
$self->{_where} = "(".join(") and (", @clauses).")";
return $self;
}
#-------------------------------------------------------------------
=head2 session ( )
Returns a reference to the current session.
=cut
sub session {
my $self = shift;
return $self->{_session};
}
1;