243 lines
7 KiB
Perl
Executable file
243 lines
7 KiB
Perl
Executable file
#!/usr/bin/env perl
|
|
|
|
#-------------------------------------------------------------------
|
|
# WebGUI is Copyright 2001-2009 Plain Black Corporation.
|
|
#-------------------------------------------------------------------
|
|
# Please read the legal notices (docs/legal.txt) and the license
|
|
# (docs/license.txt) that came with this distribution before using
|
|
# this software.
|
|
#-------------------------------------------------------------------
|
|
# http://www.plainblack.com info@plainblack.com
|
|
#-------------------------------------------------------------------
|
|
|
|
use strict;
|
|
use File::Basename ();
|
|
use File::Spec;
|
|
|
|
my $webguiRoot;
|
|
BEGIN {
|
|
$webguiRoot = File::Spec->rel2abs(File::Spec->catdir(File::Basename::dirname(__FILE__), File::Spec->updir));
|
|
unshift @INC, File::Spec->catdir($webguiRoot, 'lib');
|
|
}
|
|
foreach my $libDir ( readLines( "preload.custom" ) ) {
|
|
if ( !-d $libDir ) {
|
|
warn "WARNING: Not adding lib directory '$libDir' from preload.custom: Directory does not exist.\n";
|
|
next;
|
|
}
|
|
unshift @INC, $libDir;
|
|
}
|
|
|
|
use Getopt::Long;
|
|
use WebGUI::Asset;
|
|
use WebGUI::Config;
|
|
use WebGUI::Session;
|
|
use WebGUI::Search;
|
|
use WebGUI::Search::Index;
|
|
use Time::HiRes;
|
|
use Pod::Usage;
|
|
|
|
$|=1;
|
|
my $search = "";
|
|
my $help = 0;
|
|
my $indexsite = 0;
|
|
my $configFile = "";
|
|
my $indexAll = "";
|
|
my $updatesite = 0;
|
|
|
|
GetOptions(
|
|
'indexall'=>\$indexAll,
|
|
'configFile=s'=>\$configFile,
|
|
'search=s'=>\$search,
|
|
'help'=>\$help,
|
|
'indexsite'=>\$indexsite,
|
|
'updatesite'=>\$updatesite
|
|
);
|
|
|
|
pod2usage( verbose => 2 ) if $help;
|
|
|
|
if ($configFile) {
|
|
my $session = WebGUI::Session->open($webguiRoot, $configFile);
|
|
if ($indexsite) {
|
|
reindexSite($session);
|
|
} elsif ($updatesite) {
|
|
updateSite($session);
|
|
} elsif ($search) {
|
|
searchSite($session, $search);
|
|
} else {
|
|
pod2usage();
|
|
}
|
|
$session->var->end;
|
|
$session->close;
|
|
} elsif ($indexAll) {
|
|
reindexAllSites();
|
|
} else {
|
|
pod2usage();
|
|
}
|
|
|
|
#-------------------------------------------------------------------
|
|
sub reindexAllSites {
|
|
my $configs = WebGUI::Config->readAllConfigs($webguiRoot);
|
|
foreach my $site (keys %{$configs}) {
|
|
print "Indexing ".$site."...\n";
|
|
my $session = WebGUI::Session->open($webguiRoot,$site);
|
|
reindexSite($session);
|
|
$session->var->end;
|
|
$session->close;
|
|
print "Finished indexing ".$site.".\n";
|
|
}
|
|
}
|
|
|
|
|
|
#-------------------------------------------------------------------
|
|
sub reindexSite {
|
|
my $session = shift;
|
|
my $siteTime = [Time::HiRes::gettimeofday()];
|
|
my $rs = $session->db->read("select assetId, className from asset where state='published'");
|
|
my @searchableAssetIds;
|
|
while (my ($id, $class) = $rs->array) {
|
|
my $asset = WebGUI::Asset->new($session,$id,$class);
|
|
if ( !$asset ) {
|
|
warn sprintf "- Asset %s (%s) could not be instantiated\n", $id, $class;
|
|
next;
|
|
}
|
|
if ($asset->get("state") eq "published" && ($asset->get("status") eq "approved" || $asset->get("status") eq "archived")) {
|
|
print $asset->getId."\t".$asset->getTitle."\t";
|
|
my $t = [Time::HiRes::gettimeofday()];
|
|
$asset->indexContent;
|
|
print "(".Time::HiRes::tv_interval($t).")\n";
|
|
push (@searchableAssetIds, $id);
|
|
}
|
|
}
|
|
|
|
# delete indexes of assets that are no longer searchable
|
|
my $list = $session->db->quoteAndJoin(\@searchableAssetIds) if scalar(@searchableAssetIds);
|
|
$session->db->write("delete from assetIndex where assetId not in (".$list.")") if $list;
|
|
print "\nSite indexing took ".Time::HiRes::tv_interval($siteTime)." seconds.\n";
|
|
}
|
|
|
|
#-------------------------------------------------------------------
|
|
sub searchSite {
|
|
my $session = shift;
|
|
my $keywords = shift;
|
|
my $t = [Time::HiRes::gettimeofday()];
|
|
my $search = WebGUI::Search->new($session, 0);
|
|
$search->search({keywords=>$keywords});
|
|
my $rs = $search->getResultSet;
|
|
while (my $data = $rs->hashRef) {
|
|
print $data->{assetId}."\t".$data->{title}."\n";
|
|
}
|
|
print "\nSearch took ".Time::HiRes::tv_interval($t)." seconds.\n";
|
|
}
|
|
|
|
#-------------------------------------------------------------------
|
|
sub updateSite {
|
|
my $session = shift;
|
|
my $siteTime = [Time::HiRes::gettimeofday()];
|
|
my $rs = $session->db->read("select assetId, className from asset where state='published'");
|
|
my @searchableAssetIds;
|
|
while (my ($id, $class) = $rs->array) {
|
|
push(@searchableAssetIds, $id);
|
|
my ($done) = $session->db->quickArray("select count(*) from assetIndex where assetId=?",[$id]);
|
|
next if $done;
|
|
my $asset = WebGUI::Asset->new($session,$id,$class);
|
|
if (defined $asset && $asset->get("status") eq "approved" || defined $asset && $asset->get("status") eq "archived") {
|
|
print $asset->getId."\t".$asset->getTitle."\t";
|
|
my $t = [Time::HiRes::gettimeofday()];
|
|
$asset->indexContent;
|
|
print "(".Time::HiRes::tv_interval($t).")\n";
|
|
}
|
|
}
|
|
|
|
# delete indexes of assets that are no longer searchable
|
|
my $list = $session->db->quoteAndJoin(\@searchableAssetIds) if scalar(@searchableAssetIds);
|
|
$session->db->write("delete from assetIndex where assetId not in (".$list.")") if $list;
|
|
print "\nSite indexing took ".Time::HiRes::tv_interval($siteTime)." seconds.\n";
|
|
}
|
|
|
|
#-------------------------------------------------
|
|
sub readLines {
|
|
my $file = shift;
|
|
my @lines;
|
|
if (open(my $fh, '<', $file)) {
|
|
while (my $line = <$fh>) {
|
|
$line =~ s/#.*//;
|
|
$line =~ s/^\s+//;
|
|
$line =~ s/\s+$//;
|
|
next if !$line;
|
|
push @lines, $line;
|
|
}
|
|
close $fh;
|
|
}
|
|
return @lines;
|
|
}
|
|
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
search - Reindex and search a WebGUI site.
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
search --configFile config.conf --indexsite
|
|
|
|
search --configFile config.conf --updatesite
|
|
|
|
search --configFile config.conf --search text
|
|
|
|
search --indexall
|
|
|
|
search --help
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This WebGUI utility scripts helps maintaining search indexes on
|
|
any site. It can be used to build the index for an entire site,
|
|
build the index only for new content, and perform searches.
|
|
|
|
=over
|
|
|
|
=item B<--configFile config.conf>
|
|
|
|
The WebGUI config file to use. Only the file name needs to be specified,
|
|
since it will be looked up inside WebGUI's configuration directory.
|
|
This parameter is required.
|
|
|
|
=item B<--indexsite>
|
|
|
|
Reindexes the entire site specified in the config file. This process
|
|
may take a while (even hours) depending on the amount of content the
|
|
site has. Server performance will suffer somewhat during the
|
|
indexing process. This option requires a B<--configFile> to be
|
|
specified.
|
|
|
|
=item B<--updatesite>
|
|
|
|
Indexes content that has not be indexed for the site specified in
|
|
the config file, keeping the indexes for already indexed content
|
|
intact. This is useful if the B<--indexsite> had to be stopped
|
|
partway through. This option requires a B<--configFile> to be
|
|
specified.
|
|
|
|
=item B<--search text>
|
|
|
|
Searches the site specified in the config file for a given keyword or
|
|
phrase, returning the results. This option requires a B<--configFile>
|
|
to be specified.
|
|
|
|
=item B<--indexall>
|
|
|
|
Reindexes B<all> the sites. Note that this can take many hours and
|
|
will affect performance of the server during the indexing process.
|
|
|
|
=item B<--help>
|
|
|
|
Shows this documentation, then exits.
|
|
|
|
=back
|
|
|
|
=head1 AUTHOR
|
|
|
|
Copyright 2001-2009 Plain Black Corporation.
|
|
|
|
=cut
|