diff --git a/docs/gotcha.txt b/docs/gotcha.txt index 3f2b538af..793e4e178 100644 --- a/docs/gotcha.txt +++ b/docs/gotcha.txt @@ -17,12 +17,6 @@ save you many hours of grief. * You need to upgrade to the latest 6.8 release before you can upgrade to 6.9. - * Due to new features in 6.9 there are several new perl modules you - need to install before the upgrade: - - Bit::Vector::Minimal - Plucene - * If you wish to use one of the new optional template engines you'll need to install the perl modules for them: diff --git a/docs/upgrades/upgrade_6.8.5-6.9.0.pl b/docs/upgrades/upgrade_6.8.5-6.9.0.pl index 5ef8f18af..32fc397e7 100644 --- a/docs/upgrades/upgrade_6.8.5-6.9.0.pl +++ b/docs/upgrades/upgrade_6.8.5-6.9.0.pl @@ -26,6 +26,43 @@ removeFiles(); finish($session); # this line required +#------------------------------------------------- +sub addSearchEngine { + print "\tUpgrading search engine.\n" unless ($quiet); + $session->db->write("create table assetIndex ( + assetId varchar(22) binary not null primary key, + title varchar(255), + synopsis text, + startDate bigint, + endDate bigint, + creationDate bigint, + modifiedDate bigint, + ownerUserId varchar(22) binary, + groupToView varchar(22) binary, + groupToEdit varchar(22) binary, + lineage varchar(255), + className varchar(255), + keywords mediumtext, + fulltext (keywords) + )"); + my @searchParents = $session->db->buildArray("select parentId from asset where className='WebGUI::Asset::Wobject::IndexedSearch'"); + my @searchIds = $session->db->buildArray("select assetId from asset where className='WebGUI::Asset::Wobject::IndexedSearch'"); + $session->db->write("delete from asset where className='WebGUI::Asset::Wobject::IndexedSearch'"); + my $deleteWobject = $session->db->prepare("delete from wobject where assetId=?"); + my $deleteAssetData = $session->db->prepare("delete from assetData where assetId=?"); + foreach my $id (@searchIds) { + $deleteWobject->execute($id); + $deleteAssetData->execute($id); + } + $deleteWobject->finish; + $deleteAssetData->finish; + $session->db->write("drop table if exists IndexedSearch"); + $session->db->write("drop table if exists IndexedSearch_default"); + $session->db->write("drop table if exists IndexedSearch_default_data"); + $session->db->write("drop table if exists IndexedSearch_default_words"); + $session->db->write("drop table if exists IndexedSearch_docInfo"); +} + #------------------------------------------------- sub templateParsers { print "\tAdding support for multiple template parsers.\n" unless ($quiet); @@ -46,6 +83,8 @@ sub removeFiles { unlink '../../lib/WebGUI/Style.pm'; unlink '../../lib/WebGUI/Setting.pm'; unlink '../../lib/WebGUI/Grouping.pm'; + unlink '../../lib/WebGUI/Asset/Wobject/IndexedSearch.pm'; + rmtree('../../lib/WebGUI/Asset/Wobject/IndexedSearch'); } diff --git a/lib/WebGUI/SQL/ResultSet.pm b/lib/WebGUI/SQL/ResultSet.pm index d1c9a316a..935f9727e 100644 --- a/lib/WebGUI/SQL/ResultSet.pm +++ b/lib/WebGUI/SQL/ResultSet.pm @@ -115,9 +115,8 @@ An array reference containing a list of values to be used in the placeholders de sub execute { my $self = shift; - my $placeholders = shift || []; my $sql = $self->{_sql}; - $self->sth->execute(@{$placeholders}) or $self->db->session->errorHandler->fatal("Couldn't execute prepared statement: $sql Root cause: ". $self->errorMessage); + $self->sth->execute(shift||[]) or $self->db->session->errorHandler->fatal("Couldn't execute prepared statement: $sql Root cause: ". $self->errorMessage); } diff --git a/lib/WebGUI/Search/DateTimeFilter.pm b/lib/WebGUI/Search/DateTimeFilter.pm deleted file mode 100644 index 4977d887e..000000000 --- a/lib/WebGUI/Search/DateTimeFilter.pm +++ /dev/null @@ -1,91 +0,0 @@ -package WebGUI::Search::DateTimeFilter; - -=head1 LEGAL - - ------------------------------------------------------------------- - WebGUI is Copyright 2001-2006 Plain Black Corporation. - ------------------------------------------------------------------- - Please read the legal notices (docs/legal.txt) and the license - (docs/license.txt) that came with this distribution before using - this software. - ------------------------------------------------------------------- - http://www.plainblack.com info@plainblack.com - ------------------------------------------------------------------- - -=cut - -use strict; -use base 'Plucene::Search::Filter'; -use Bit::Vector::Minimal; -use Plucene::Index::Term; -use WebGUI::Utility; - -=head1 DESCRIPTION - -This package is a replacement for Plucene::Search::DateFilter that uses epochs rather than Time::Piece objects. - -=cut - - -#------------------------------------------------------------------- - -=head2 new ( field [, from, to] ) - -Constructor. - -=head3 field - -The field name to build the filter for. - -=head3 from - -An epoch date to start the search from. Defaults to now minus one year. - -=head3 to - -An epoch date to end searching on. Defaults to now. - -=cut - -sub new { - my $class = shift; - my $args = shift; - bless { - field => $args->{field}, - from => toBase36(($args->{from}||time()-60*60*24*365)*1000), - to => toBase36(($args->{to}||time())*1000), - }, $class; -} - -#------------------------------------------------------------------- - -=head2 bits ( ) - -The actual filter method required by Plucene::Search::IndexSearcher. - -=cut - -sub bits { - my ($self, $reader) = @_; - my $bits = Bit::Vector::Minimal->new(size => $reader->max_doc); - my $enum = $reader->terms( - Plucene::Index::Term->new({ - field => $self->{field}, - text => $self->{from} })); - return $bits unless $enum->term; - my $termdocs = $reader->term_docs; - - my $stop = Plucene::Index::Term->new({ - field => $self->{field}, - text => $self->{to} }); - while ($enum->term->le($stop)) { - $termdocs->seek($enum->term); - $bits->set($termdocs->doc) while $termdocs->next; - last unless $enum->next; - } - return $bits; -} - -1; - - diff --git a/lib/WebGUI/Search/Index.pm b/lib/WebGUI/Search/Index.pm index a04dd3b18..0b4343979 100644 --- a/lib/WebGUI/Search/Index.pm +++ b/lib/WebGUI/Search/Index.pm @@ -16,14 +16,6 @@ package WebGUI::Search::Index; use strict; use warnings; -use Plucene::Analysis::SimpleAnalyzer; -use Plucene::Document; -use Plucene::Document::Field; -use Plucene::Index::Reader; -use Plucene::Index::Writer; -use Plucene::Index::Term; -use File::Spec::Functions qw(catfile); -use WebGUI::Utility; =head1 NAME @@ -46,168 +38,63 @@ These methods are available from this package: #------------------------------------------------------------------- -=head2 addDate ( key, epoch ) +=head2 addKeywords ( text ) -Adds a date field to the index which may later be used to search on date ranges. - -=head3 key - -A unique label to store this data. - -=head3 epoch - -A date represented as the number of seconds since January 1, 1970. - -=cut - -sub addDate { - my $self = shift; - my $key = shift; - my $epoch = shift; - $self->addKeyword($key, toBase36($epoch*1000)); -} - -#------------------------------------------------------------------- - -=head2 addKeyword ( key, text ) - -Adds some text that is stored and indexed, but not tokenized. This is best for single word items like keys. - -=head3 key - -A unique label to store this data. +Add more text to the keywords index for this asset. =head3 text -A string of text. +A string of text. You may optionally also put HTML here, and it will be automatically filtered. =cut -sub addKeyword { +sub addKeywords { my $self = shift; - my $key = shift; my $text = shift; - $self->{_doc}->add(Plucene::Document::Field->Keyword($key=>$text)); -} - -#------------------------------------------------------------------- - -=head2 addRawText ( text ) - -This should be used when you're just dumping a big block of raw text into the search indexer. It doesn't store the raw text, just indexes it for key words. - -=head3 text - -A string of text. - -=cut - -sub addRawText { - my $self = shift; - $self->{_raw} .= ' '.shift; + $text = WebGUI::HTML::filter($text, "all"); + my $add = $self->session->db->prepare("update assetIndex set keywords=concat(keywords,' ',?) where assetId = ?"); + $add->execute([$text, $self->getId]); } #------------------------------------------------------------------- -=head2 addText ( key, text ) +=head2 create ( asset ) -Adds some text that is stored, indexed, and tokenized. This is best for simple phrases like titles and subjects. - -=head3 key - -A unique label to store this data. - -=head3 text - -A string of text. +Constructor that also creates the initial index of an asset. =cut -sub addText { - my $self = shift; - my $key = shift; - my $text = shift; - $self->{_doc}->add(Plucene::Document::Field->Text($key => $text)); - $self->addRawText($text); +sub create { + my $class = shift; + my $asset = shift; + my $self = $class->new($asset); + $self->delete; + my $url = $asset->get("url"); + $url =~ s/\/|\-|\_/ /g; + my $description = WebGUI::HTML::filter($description, "all"); + my $keywords = join(" ",$asset->get("title"), $asset->get("menuTitle"), $asset->get("synopsis"), $url, $description)); + my $add = $self->session->db->prepare("insert into assetIndex (assetId, title, startDate, endDate, creationDate, revisionDate, + ownerUserId, groupIdView, groupIdEdit, lineage, className, synopsis, keywords) values ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )"); + $add->execute([$asset->getId, $asset->get("title"), $asset->get("startDate"), $asset->get("endDate"), $asset->get("creationDate"), + $asset->get("revisionDate"), $asset->get("ownerUserId"), $asset->get("groupIdView", $asset->get("groupIdEdit"), + $asset->get("lineage"), $asset->get("className"), $asset->get("synopsis"), $keywords]); + return $self; } -#------------------------------------------------------------------- - -=head2 addUnindexed ( key, text ) - -Adds some text that is stored but not indexed or tokenized. This should be used sparingly, if ever, and is just a way to store extra metadata with search content that will not actually be used in search matches. - -=head3 key - -A unique label to store this data. - -=head3 text - -A string of text. - -=cut - -sub addUnindexed { - my $self = shift; - my $key = shift; - my $text = shift; - $self->{_doc}->add(Plucene::Document::Field->UnIndexed($key=>$text)); -} - -#------------------------------------------------------------------- - -=head2 addUnstored ( key, text ) - -Adds some text that is indexed and tokenized, but is not stored verbatim. This is best for big test blocks like descriptions. - -=head3 key - -A unique label to store this data. - -=head3 text - -A string of text. - -=cut - -sub addUnstored { - my $self = shift; - my $key = shift; - my $text = shift; - $self->{_doc}->add(Plucene::Document::Field->UnStored($key => $text)); - $self->addRawText($text); -} - -#------------------------------------------------------------------- - -=head2 commit ( ) - -Writes the data added using the various add methods to the index. This is the last thing should do and it must be done or the index will not be created. - -=cut - -sub commit { - my $self = shift; - my $writer = Plucene::Index::Writer->new( $self->{_path}, Plucene::Analysis::SimpleAnalyzer->new(), -e catfile($self->{_path}, "segments") ? 0 : 1); - $self->{_doc}->add(Plucene::Document::Field->UnStored(_raw_=> $self->{_raw})); - $writer->add_document($self->{_doc}); - undef $writer; - $self->DESTROY; -} #------------------------------------------------------------------- =head2 delete ( ) -Deletes this indexed item. +Deletes this indexed asset. =cut sub delete { my $self = shift; - # note: currently this method does nothing because stuff is actually deleted when you call the constructor - $self->DESTROY; + my $delete = $self->session->db->prepare("delete from assetIndex where assetId=?"); + $delete->execute([$self->getId]); } #------------------------------------------------------------------- @@ -220,7 +107,6 @@ Deconstructor. sub DESTROY { my $self = shift; - delete $self->{_doc}; undef $self; } @@ -239,53 +125,24 @@ sub getId { #------------------------------------------------------------------- -=head2 new ( session , id ) +=head2 new ( asset ) Constructor. -=head3 session +=head3 asset -A reference to the current session. - -=head3 id - -The unique ID for this record in the index. Should be the assetId for the content you're indexing. +A reference to an asset object. =cut sub new { my $class = shift; - my $session = shift; - my $id = shift; - my $doc = Plucene::Document->new; - my $self = {_path => "/tmp/plucy1", _p=>$session->config->get("uploadsPath")."/assetindex", _session=>$session, _doc=>$doc, _id=>$id}; - bless $self; - if (-f $self->{_path}."/segments") { # don't make the following checks unless the index has been initialized - my $reader = Plucene::Index::Reader->open($self->{_path}); - my $term = Plucene::Index::Term->new({ field => 'id', text => $self->getId }); - if ($reader->doc_freq($term)) { # delete the existing index if it already exists - $reader->delete_term(Plucene::Index::Term->new({ field => "id", text => $self->getId })); - $reader->close; - } - } - $doc->add(Plucene::Document::Field->Keyword(id => $id)); # create a new index for this id + my $asset = shift; + my $self = {_session=>$asset->session, _id=>$asset->getId}; return $self; } -#------------------------------------------------------------------- - -=head2 optimize ( session ) - -=cut - -sub optimize { - my $class = shift; - my $session = shift; - Plucene::Index::Writer->new( "/tmp/plucy1", Plucene::Analysis::SimpleAnalyzer->new(), -e catfile("/tmp/plucy1", "segments") ? 0 : 1)->optimize; -} - - #------------------------------------------------------------------- =head2 session ( ) @@ -299,10 +156,28 @@ sub session { return $self->{_session}; } +#------------------------------------------------------------------- +=head2 updateSynopsis ( text ) +Overrides the asset's default synopsis with a new chunk of text. +NOTE: This doesn't change the asset itself, only the synopsis in the search index. + +=head3 text + +The text to put in place of the current synopsis. + +=cut + +sub updateSynopsis { + my $self = shift; + my $text = shift; + my $add = $self->session->db->prepare("update assetIndex set synopsis=? where assetId=?"); + $add->execute([$text,$self->getId]); +} 1; + diff --git a/lib/WebGUI/Utility.pm b/lib/WebGUI/Utility.pm index 9f6e4f049..699ec71fe 100644 --- a/lib/WebGUI/Utility.pm +++ b/lib/WebGUI/Utility.pm @@ -23,7 +23,7 @@ use Tie::IxHash; our @ISA = qw(Exporter); our @EXPORT = qw(&isBetween &makeTabSafe &makeArrayTabSafe &randomizeHash &commify &randomizeArray &formatBytes &sortHashDescending &sortHash &isIn &makeCommaSafe &makeArrayCommaSafe &randint &round - &fromBase36 &toBase36); + ); =head1 NAME @@ -39,7 +39,6 @@ This package provides miscellaneous but useful utilities to the WebGUI programme use WebGUI::Utility; $string = commify($integer); $size = formatBytes($integer); - $number = fromBase36($string); $boolean = isIn($value, @array); makeArrayCommaSafe(\@array); makeArrayTabSafe(\@array); @@ -50,7 +49,6 @@ This package provides miscellaneous but useful utilities to the WebGUI programme $hashRef = randomizeHash(\%hash); %hash = sortHash(%hash); %hash = sortHashDescending(%hash); - $string = toBase36($number); =head1 METHODS @@ -101,27 +99,6 @@ sub formatBytes { } } -#------------------------------------------------------------------- - -=head2 fromBase36 ( string ) - -Returns a number that has been decoded from base36. - -=head3 string - -A base 36 encoded string. - -=cut - -sub fromBase36 { - my $string = shift; - my $exponent = 0; - my $number; - for (reverse split //, $string) { - $number += ($_ =~ /\d/ ? $_ : (ord($_) - 87)) * (36**$exponent++); - } - return $number; -} #------------------------------------------------------------------- @@ -385,29 +362,6 @@ sub sortHashDescending { return %newHash; } -#------------------------------------------------------------------- - -=head2 toBase36 ( number ) - -Returns a string that is base36 encoded. - -=head3 number - -A number that you wish to encode. - -=cut - -sub toBase36 { - my $number = shift; - my $string = ""; - while ($number) { - my $quot = $number % 36; - $string = ($quot < 10 ? $quot : chr($quot + 87)) . $string; - $number = int($number / 36); - } - $string = "0$string" while length($string) < 9; - $string; -} 1; diff --git a/sbin/testEnvironment.pl b/sbin/testEnvironment.pl index 0c7614e7b..99841b879 100644 --- a/sbin/testEnvironment.pl +++ b/sbin/testEnvironment.pl @@ -77,8 +77,6 @@ checkModule("Parse::PlainConfig",1.1); checkModule("XML::RSSLite",0.11); checkModule("JSON",0.991); checkModule("Finance::Quote",1.08); -checkModule("Bit::Vector::Minimal",1.3); -checkModule("Plucene",1.24); #checkModule("POE",0.3202); #checkModule("POE::Component::IKC::Server",0.18); #checkModule("POE::Component::JobQueue",0.5402);