339 lines
8.7 KiB
Perl
339 lines
8.7 KiB
Perl
package WebGUI::Search::Index;
|
|
|
|
=head1 LEGAL
|
|
|
|
-------------------------------------------------------------------
|
|
WebGUI is Copyright 2001-2009 Plain Black Corporation.
|
|
-------------------------------------------------------------------
|
|
Please read the legal notices (docs/legal.txt) and the license
|
|
(docs/license.txt) that came with this distribution before using
|
|
this software.
|
|
-------------------------------------------------------------------
|
|
http://www.plainblack.com info@plainblack.com
|
|
-------------------------------------------------------------------
|
|
|
|
=cut
|
|
|
|
use strict;
|
|
use HTML::Entities;
|
|
|
|
=head1 NAME
|
|
|
|
Package WebGUI::Search::Index
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
A package for working with the WebGUI Search Engine.
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use WebGUI::Search::Index;
|
|
|
|
=head1 METHODS
|
|
|
|
These methods are available from this package:
|
|
|
|
=cut
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 addFile ( path )
|
|
|
|
Use an external filter defined in the config file as searchIndexerPlugins to pull keywords from a file and
|
|
add them to the index.
|
|
|
|
=head3 path
|
|
|
|
The path to the filename to index, including the filename.
|
|
|
|
=cut
|
|
|
|
sub addFile {
|
|
my $self = shift;
|
|
my $path = shift;
|
|
my $keywords = $self->getKeywordsForFile($path);
|
|
return unless $keywords =~ /\S/;
|
|
return $self->addKeywords($keywords)
|
|
}
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 addKeywords ( text )
|
|
|
|
Add more text to the keywords index for this asset.
|
|
|
|
=head3 text
|
|
|
|
A string (or array of strings) of text. You may optionally also put HTML here, and it will be automatically filtered.
|
|
|
|
=cut
|
|
|
|
sub addKeywords {
|
|
my $self = shift;
|
|
my $text = join(" ", @_);
|
|
|
|
$text = $self->_filterKeywords($text);
|
|
my ($keywords) = $self->session->db->quickArray("select keywords from assetIndex where assetId=? and url=?",[$self->getId, $self->asset->get('url')]);
|
|
$self->session->db->write("update assetIndex set keywords =? where assetId=? and url=?", [$keywords.' '.$text, $self->getId, $self->asset->get('url')]);
|
|
}
|
|
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 addRecord ( %fields )
|
|
|
|
Adds a duplicate record for the current asset, along with fields that are overridden.
|
|
|
|
=head3 %fields
|
|
|
|
A hash of fields to override in the record. Entries for url and keywords are mandatory, and
|
|
no record will be added unless they exist in the hash.
|
|
|
|
The lineage entry cannot be overridden.
|
|
|
|
=cut
|
|
|
|
sub addRecord {
|
|
my $self = shift;
|
|
my %fields = @_;
|
|
return unless $fields{url} and $fields{keywords};
|
|
my $asset = $self->asset;
|
|
##Get the asset's record from the database.
|
|
my %defaults = $self->session->db->quickHash('select * from assetIndex where assetId=? and url=?', [$asset->get('assetId'), $asset->get('url')]);
|
|
$fields{keywords} = $self->_filterKeywords($fields{keywords});
|
|
%fields = (%defaults, %fields);
|
|
$fields{lineage} = $defaults{lineage};
|
|
my $add = $self->session->db->prepare("replace into assetIndex (assetId, url, title, creationDate, revisionDate,
|
|
ownerUserId, groupIdView, groupIdEdit, lineage, className, synopsis, keywords, subId) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )");
|
|
$add->execute([@fields{qw/assetId url title creationDate revisionDate ownerUserId groupIdView groupIdEdit lineage className synopsis keywords subId/}]);
|
|
}
|
|
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 asset ( )
|
|
|
|
Returns a reference to the asset object we're indexing.
|
|
|
|
=cut
|
|
|
|
sub asset {
|
|
my $self = shift;
|
|
return $self->{_asset};
|
|
}
|
|
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 create ( asset )
|
|
|
|
Constructor that also creates the initial index of an asset.
|
|
|
|
=cut
|
|
|
|
sub create {
|
|
my $class = shift;
|
|
my $asset = shift;
|
|
my $self = $class->new($asset);
|
|
$self->delete;
|
|
|
|
my $url = $asset->get("url");
|
|
$url =~ s/\/|\-|\_/ /g;
|
|
|
|
my $description = WebGUI::HTML::filter($asset->get('description'), "all");
|
|
my $keywords = join(" ",$asset->get("title"), $asset->get("menuTitle"), $asset->get("synopsis"), $url,
|
|
$description, WebGUI::Keyword->new($self->session)->getKeywordsForAsset({asset=>$asset}));
|
|
$keywords = $self->_filterKeywords($keywords);
|
|
|
|
my $synopsis = $asset->get("synopsis") || substr($description,0,255);
|
|
my $add = $self->session->db->prepare("insert into assetIndex (assetId, title, url, creationDate, revisionDate,
|
|
ownerUserId, groupIdView, groupIdEdit, lineage, className, synopsis, keywords) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )");
|
|
$add->execute([$asset->getId, $asset->get("title"), $asset->get("url"), $asset->get("creationDate"),
|
|
$asset->get("revisionDate"), $asset->get("ownerUserId"), $asset->get("groupIdView"), $asset->get("groupIdEdit"),
|
|
$asset->get("lineage"), $asset->get("className"), $synopsis, $keywords]);
|
|
return $self;
|
|
}
|
|
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 delete ( )
|
|
|
|
Deletes this indexed asset.
|
|
|
|
=cut
|
|
|
|
sub delete {
|
|
my $self = shift;
|
|
my $delete = $self->session->db->prepare("delete from assetIndex where assetId=?");
|
|
$delete->execute([$self->getId]);
|
|
}
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 DESTROY ( )
|
|
|
|
Deconstructor.
|
|
|
|
=cut
|
|
|
|
sub DESTROY {
|
|
my $self = shift;
|
|
undef $self;
|
|
}
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 _filterKeywords ( $keywords )
|
|
|
|
Perform filtering and cleaning up of the keywords before submitting them. Ideographic characters are padded
|
|
so that they are still searchable. HTML entities are decoded.
|
|
|
|
=head3 $keywords
|
|
|
|
A string containing keywords.
|
|
|
|
=cut
|
|
|
|
sub _filterKeywords {
|
|
my $self = shift;
|
|
my $keywords = shift;
|
|
|
|
$keywords = WebGUI::HTML::filter($keywords, "all");
|
|
$keywords = HTML::Entities::decode_entities($keywords);
|
|
utf8::upgrade($keywords);
|
|
|
|
# split into 'words'. Ideographic characters (such as Chinese) are
|
|
# treated as distinct words. Everything else is space delimited.
|
|
my @words = grep { $_ ne '' } split /\s+|(\p{Ideographic})/, $keywords;
|
|
|
|
# remove punctuation characters at the start and end of each word.
|
|
for my $word ( @words ) {
|
|
$word =~ s/\A\p{isPunct}+//;
|
|
$word =~ s/\p{isPunct}+\z//;
|
|
# we add padding to ideographic characters to avoid minimum word length limits on indexing
|
|
if ($word =~ /\p{Ideographic}/) {
|
|
$word = q{''}.$word.q{''};
|
|
}
|
|
}
|
|
|
|
$keywords = join q{ }, @words;
|
|
return $keywords;
|
|
}
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 getId ( )
|
|
|
|
Returns the ID used to create this object.
|
|
|
|
=cut
|
|
|
|
sub getId {
|
|
my $self = shift;
|
|
return $self->{_id};
|
|
}
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 getKeywordsForFile ( path )
|
|
|
|
Use an external filter defined in the config file as searchIndexerPlugins to get keywords
|
|
from a file.
|
|
|
|
=head3 path
|
|
|
|
The path to the filename to index, including the filename.
|
|
|
|
=cut
|
|
|
|
sub getKeywordsForFile {
|
|
my $self = shift;
|
|
my $path = shift;
|
|
my $filters = $self->session->config->get("searchIndexerPlugins");
|
|
my $content;
|
|
if ($path =~ m/\.(\w+)$/) {
|
|
my $type = lc($1);
|
|
if ($filters->{$type}) {
|
|
open my $fh, "$filters->{$type} $path |" or return undef; # open pipe to filter
|
|
$content = do { local $/; <$fh> }; # slurp file
|
|
close $fh;
|
|
}
|
|
}
|
|
return $content;
|
|
}
|
|
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 setIsPublic ( boolean )
|
|
|
|
Sets the status of whether this asset will appear in public searches.
|
|
|
|
=cut
|
|
|
|
sub setIsPublic {
|
|
my $self = shift;
|
|
my $boolean = shift;
|
|
my $set = $self->session->db->prepare("update assetIndex set isPublic=? where assetId=?");
|
|
$set->execute([$boolean, $self->getId]);
|
|
}
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 new ( asset )
|
|
|
|
Constructor.
|
|
|
|
=head3 asset
|
|
|
|
A reference to an asset object.
|
|
|
|
=cut
|
|
|
|
sub new {
|
|
my $class = shift;
|
|
my $asset = shift;
|
|
my $self = {_asset=>$asset, _session=>$asset->session, _id=>$asset->getId};
|
|
bless $self, $class;
|
|
}
|
|
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 session ( )
|
|
|
|
Returns a reference to the current session.
|
|
|
|
=cut
|
|
|
|
sub session {
|
|
my $self = shift;
|
|
return $self->{_session};
|
|
}
|
|
|
|
#-------------------------------------------------------------------
|
|
|
|
=head2 updateSynopsis ( text )
|
|
|
|
Overrides the asset's default synopsis with a new chunk of text.
|
|
|
|
NOTE: This doesn't change the asset itself, only the synopsis in the search index.
|
|
|
|
=head3 text
|
|
|
|
The text to put in place of the current synopsis.
|
|
|
|
=cut
|
|
|
|
sub updateSynopsis {
|
|
my $self = shift;
|
|
my $text = shift;
|
|
my $add = $self->session->db->prepare("update assetIndex set synopsis=? where assetId=?");
|
|
$add->execute([$text,$self->getId]);
|
|
}
|
|
|
|
|
|
|
|
1;
|
|
|