From fd3fb924f3ae3818c4ac853c47ec2ef6d9dc4d58 Mon Sep 17 00:00:00 2001 From: JT Smith Date: Fri, 20 Jan 2006 21:18:09 +0000 Subject: [PATCH] added search indexer --- docs/upgrades/upgrade_6.8.5-6.9.0.pl | 7 ++-- etc/WebGUI.conf.original | 9 +++++ lib/WebGUI/Asset.pm | 27 ++++++++------ lib/WebGUI/Asset/File.pm | 15 ++++++++ lib/WebGUI/Asset/Post.pm | 24 ++++++++++++ lib/WebGUI/Asset/RichEdit.pm | 15 ++++++++ lib/WebGUI/Asset/Snippet.pm | 13 +++++++ lib/WebGUI/Asset/Template.pm | 15 ++++++++ lib/WebGUI/Asset/Wobject/Poll.pm | 15 ++++++++ lib/WebGUI/Asset/Wobject/Product.pm | 20 ++++++++++ lib/WebGUI/Asset/_NewAsset.skeleton | 15 ++++++++ lib/WebGUI/Search/Index.pm | 55 +++++++++++++++++++++++++++- 12 files changed, 214 insertions(+), 16 deletions(-) diff --git a/docs/upgrades/upgrade_6.8.5-6.9.0.pl b/docs/upgrades/upgrade_6.8.5-6.9.0.pl index 32fc397e7..85a8c3bd3 100644 --- a/docs/upgrades/upgrade_6.8.5-6.9.0.pl +++ b/docs/upgrades/upgrade_6.8.5-6.9.0.pl @@ -36,12 +36,13 @@ sub addSearchEngine { startDate bigint, endDate bigint, creationDate bigint, - modifiedDate bigint, + revisionDate bigint, ownerUserId varchar(22) binary, - groupToView varchar(22) binary, - groupToEdit varchar(22) binary, + groupIdView varchar(22) binary, + groupIdEdit varchar(22) binary, lineage varchar(255), className varchar(255), + isPublic int not null default 1, keywords mediumtext, fulltext (keywords) )"); diff --git a/etc/WebGUI.conf.original b/etc/WebGUI.conf.original index a5c8646c3..ea1109d5b 100644 --- a/etc/WebGUI.conf.original +++ b/etc/WebGUI.conf.original @@ -117,6 +117,15 @@ templateParsers = WebGUI::Asset::Template::HTMLTemplate defaultTemplateParser = WebGUI::Asset::Template::HTMLTemplate +# Specify external helper apps that will enable WebGUI's search +# engine to index content in various uploaded file formats. The +# helpers must take the path to the file as an argument and +# return either text or html content. + +searchIndexerPlugins = txt => "/bin/cat", \ + readme => "/bin/cat", \ + html => "/bin/cat", \ + htm => "/bin/cat" # Specify a the list of assets you want to appear in your # Add Content menus. diff --git a/lib/WebGUI/Asset.pm b/lib/WebGUI/Asset.pm index fb28d5def..daa3b7b5e 100644 --- a/lib/WebGUI/Asset.pm +++ b/lib/WebGUI/Asset.pm @@ -28,6 +28,7 @@ use WebGUI::AdminConsole; use WebGUI::Cache; use WebGUI::Form; use WebGUI::HTMLForm; +use WebGUI::Search::Index; use WebGUI::TabForm; use WebGUI::Utility; @@ -741,18 +742,6 @@ sub getImportNode { return WebGUI::Asset->newByDynamicClass($session, "PBasset000000000000002"); } -#------------------------------------------------------------------- - -=head2 getIndexerParams ( ) - -Override this method and return a hash reference that includes the properties necessary to index the content of the wobject. -Currently does nothing. - -=cut - -sub getIndexerParams { - return {}; -} #------------------------------------------------------------------- @@ -993,6 +982,20 @@ sub getValue { } +#------------------------------------------------------------------- + +=head2 indexContent ( ) + +Returns an indexer object for this asset. When this method is called the asset's base content gets stored in the index. This method is often overloaded so that a particular asset can insert additional content other than the basic properties. Such uses include indexing attached files or collateral data. + +=cut + +sub indexContent { + my $self = shift; + return WebGUI::Search::Index->create($self); +} + + #------------------------------------------------------------------- =head2 new ( session, assetId [, className, revisionDate ] ) diff --git a/lib/WebGUI/Asset/File.pm b/lib/WebGUI/Asset/File.pm index b64acbe25..a55e95c7d 100644 --- a/lib/WebGUI/Asset/File.pm +++ b/lib/WebGUI/Asset/File.pm @@ -192,6 +192,21 @@ sub getStorageLocation { } +#------------------------------------------------------------------- + +=head2 indexContent ( ) + +Indexing the content of the attachment. See WebGUI::Asset::indexContent() for additonal details. + +=cut + +sub indexContent { + my $self = shift; + my $indexer = $self->SUPER::indexContent; + $indexer->addFile($self->getStorageLocation->getPath($self->get("filename"))); +} + + #------------------------------------------------------------------- sub processPropertiesFromFormPost { my $self = shift; diff --git a/lib/WebGUI/Asset/Post.pm b/lib/WebGUI/Asset/Post.pm index 417825114..fa3ac2d04 100644 --- a/lib/WebGUI/Asset/Post.pm +++ b/lib/WebGUI/Asset/Post.pm @@ -532,6 +532,30 @@ sub hasRated { #------------------------------------------------------------------- +=head2 indexContent ( ) + +Indexing the content of attachments and user defined fields. See WebGUI::Asset::indexContent() for additonal details. + +=cut + +sub indexContent { + my $self = shift; + my $indexer = $self->SUPER::indexContent; + $indexer->addKeywords($self->get("content")); + $indexer->addKeywords($self->get("userDefined1")); + $indexer->addKeywords($self->get("userDefined2")); + $indexer->addKeywords($self->get("userDefined3")); + $indexer->addKeywords($self->get("userDefined4")); + $indexer->addKeywords($self->get("userDefined5")); + $indexer->addKeywords($self->get("username")); + my $storage = $self->getStorageLocation; + foreach my $file (@{$storage->getFiles}) { + $indexer->addFile($storage->getPath($file)); + } +} + +#------------------------------------------------------------------- + =head2 incrementViews ( ) Increments the views counter for this post. diff --git a/lib/WebGUI/Asset/RichEdit.pm b/lib/WebGUI/Asset/RichEdit.pm index 4a76511d3..e1b864723 100644 --- a/lib/WebGUI/Asset/RichEdit.pm +++ b/lib/WebGUI/Asset/RichEdit.pm @@ -489,6 +489,21 @@ sub getRichEditor { } +#------------------------------------------------------------------- + +=head2 indexContent ( ) + +Making private. See WebGUI::Asset::indexContent() for additonal details. + +=cut + +sub indexContent { + my $self = shift; + my $indexer = $self->SUPER::indexContent; + $indexer->setIsPublic(0); +} + + #------------------------------------------------------------------- sub view { my $self = shift; diff --git a/lib/WebGUI/Asset/Snippet.pm b/lib/WebGUI/Asset/Snippet.pm index ed6445b3a..c2f366689 100644 --- a/lib/WebGUI/Asset/Snippet.pm +++ b/lib/WebGUI/Asset/Snippet.pm @@ -142,7 +142,20 @@ sub getToolbar { return $self->SUPER::getToolbar(); } +#------------------------------------------------------------------- +=head2 indexContent ( ) + +Indexing the content of the snippet. See WebGUI::Asset::indexContent() for additonal details. + +=cut + +sub indexContent { + my $self = shift; + my $indexer = $self->SUPER::indexContent; + $indexer->addKeywords($self->get("snippet")); + $indexer->setIsPublic(0); +} #------------------------------------------------------------------- diff --git a/lib/WebGUI/Asset/Template.pm b/lib/WebGUI/Asset/Template.pm index c5fbcd26c..a4d661204 100644 --- a/lib/WebGUI/Asset/Template.pm +++ b/lib/WebGUI/Asset/Template.pm @@ -229,6 +229,21 @@ sub getParser { } +#------------------------------------------------------------------- + +=head2 indexContent ( ) + +Making private. See WebGUI::Asset::indexContent() for additonal details. + +=cut + +sub indexContent { + my $self = shift; + my $indexer = $self->SUPER::indexContent; + $indexer->setIsPublic(0); +} + + #------------------------------------------------------------------- =head2 process ( vars ) diff --git a/lib/WebGUI/Asset/Wobject/Poll.pm b/lib/WebGUI/Asset/Wobject/Poll.pm index 6c0c7e890..e626f4301 100644 --- a/lib/WebGUI/Asset/Wobject/Poll.pm +++ b/lib/WebGUI/Asset/Wobject/Poll.pm @@ -242,6 +242,21 @@ sub getEditForm { return $tabform; } +#------------------------------------------------------------------- + +=head2 indexContent ( ) + +Indexing question and answers. See WebGUI::Asset::indexContent() for additonal details. + +=cut + +sub indexContent { + my $self = shift; + my $indexer = $self->SUPER::indexContent; + $indexer->addKeywords($self->get("question")." ".$self->get("answers")); +} + + #------------------------------------------------------------------- sub processPropertiesFromFormPost { my $self = shift; diff --git a/lib/WebGUI/Asset/Wobject/Product.pm b/lib/WebGUI/Asset/Wobject/Product.pm index b2afdc375..31b341bba 100644 --- a/lib/WebGUI/Asset/Wobject/Product.pm +++ b/lib/WebGUI/Asset/Wobject/Product.pm @@ -275,6 +275,26 @@ sub getThumbnailUrl { return $store->getUrl($self->getThumbnailFilename($store)); } +#------------------------------------------------------------------- + +=head2 indexContent ( ) + +Indexing product data. See WebGUI::Asset::indexContent() for additonal details. + +=cut + +sub indexContent { + my $self = shift; + my $indexer = $self->SUPER::indexContent; + my @data = $self->session->db->buildArray("select feature from Product_feature where assetId=".$self->session->db->quote($self->getId)); + $indexer->addKeywords(join(" ", @data)); + @data = $self->session->db->buildArray("select benefit from Product_benefit where assetId=".$self->session->db->quote($self->getId)); + $indexer->addKeywords(join(" ", @data)); + @data = $self->session->db->buildArray("select concat(name,' ',value,' ', units') from Product_specification where assetId=".$self->session->db->quote($self->getId)); + $indexer->addKeywords(join(" ", @data)); +} + + #------------------------------------------------------------------- sub purge { my $self = shift; diff --git a/lib/WebGUI/Asset/_NewAsset.skeleton b/lib/WebGUI/Asset/_NewAsset.skeleton index c4627feec..bcd0945aa 100644 --- a/lib/WebGUI/Asset/_NewAsset.skeleton +++ b/lib/WebGUI/Asset/_NewAsset.skeleton @@ -176,6 +176,21 @@ sub getIcon { } +#------------------------------------------------------------------- + +=head2 indexContent ( ) + +Making private. See WebGUI::Asset::indexContent() for additonal details. + +=cut + +sub indexContent { + my $self = shift; + my $indexer = $self->SUPER::indexContent; + $indexer->setIsPublic(0); +} + + #------------------------------------------------------------------- =head2 processPropertiesFromFormPost ( ) diff --git a/lib/WebGUI/Search/Index.pm b/lib/WebGUI/Search/Index.pm index 0b4343979..0b1ce0d64 100644 --- a/lib/WebGUI/Search/Index.pm +++ b/lib/WebGUI/Search/Index.pm @@ -36,6 +36,30 @@ These methods are available from this package: =cut +#------------------------------------------------------------------- + +=head2 addFile ( path ) + +Use an external filter defined in the config file as searchIndexerPlugins. + +=head3 path + +The path to the filename to index, including the filename. + +=cut + +sub addFile { + my $self = shift; + my $path = shift; + $path =~ m/\.(\w)$/; + my $type = lc($1); + my $filters = $self->session->config->get("searchIndexerPlugins"); + my $filter = $filters->{$type}; + my $content = `$filter $path`; + $self->addKeywords($content) if (!$content =~ m/^\s*$/); +} + + #------------------------------------------------------------------- =head2 addKeywords ( text ) @@ -57,6 +81,20 @@ sub addKeywords { } +#------------------------------------------------------------------- + +=head2 asset ( ) + +Returns a reference to the asset object we're indexing. + +=cut + +sub asset { + my $self = shift; + return $self->{_asset}; +} + + #------------------------------------------------------------------- =head2 create ( asset ) @@ -125,6 +163,21 @@ sub getId { #------------------------------------------------------------------- +=head2 setIsPublic ( boolean ) + +Sets the status of whether this asset will appear in public searches. + +=cut + +sub isPublic { + my $self = shift; + my $boolean = shift; + my $set = $self->session->db->prepare("update assetIndex set isPublic=? where assetId=?"); + $set->execute($boolean, $self->getId); +} + +#------------------------------------------------------------------- + =head2 new ( asset ) Constructor. @@ -138,7 +191,7 @@ A reference to an asset object. sub new { my $class = shift; my $asset = shift; - my $self = {_session=>$asset->session, _id=>$asset->getId}; + my $self = {_asset=>$asset, _session=>$asset->session, _id=>$asset->getId}; return $self; }