adding search files to the repository
This commit is contained in:
parent
ef8e8704c5
commit
aa59f59f7a
18 changed files with 10298 additions and 0 deletions
1243
lib/DBIx/FullTextSearch.pm
Normal file
1243
lib/DBIx/FullTextSearch.pm
Normal file
File diff suppressed because it is too large
Load diff
263
lib/DBIx/FullTextSearch/Blob.pm
Normal file
263
lib/DBIx/FullTextSearch/Blob.pm
Normal file
|
|
@ -0,0 +1,263 @@
|
|||
|
||||
package DBIx::FullTextSearch::Blob;
|
||||
use strict;
|
||||
|
||||
# Open in the backend just sets the object
|
||||
sub open {
|
||||
my ($class, $fts) = @_;
|
||||
return bless { 'fts' => $fts }, $class;
|
||||
}
|
||||
# Create creates the table(s) according to the parameters
|
||||
sub _create_tables {
|
||||
my ($class, $fts) = @_;
|
||||
my $CREATE_DATA = <<EOF;
|
||||
create table $fts->{'data_table'} (
|
||||
word varchar($fts->{'word_length'}) binary
|
||||
default '' not null,
|
||||
idx longblob default '' not null,
|
||||
primary key (word)
|
||||
)
|
||||
EOF
|
||||
my $dbh = $fts->{'dbh'};
|
||||
$dbh->do($CREATE_DATA) or return $dbh->errstr;
|
||||
push @{$fts->{'created_tables'}}, $fts->{'data_table'};
|
||||
return;
|
||||
}
|
||||
|
||||
sub add_document {
|
||||
my ($self, $id, $words) = @_;
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $data_table = $fts->{'data_table'};
|
||||
|
||||
my $update_sth = ( defined $self->{'adding_update_sth'}
|
||||
? $self->{'adding_update_sth'}
|
||||
: $self->{'adding_update_sth'} = $dbh->prepare(
|
||||
"update $data_table set idx = concat(idx, ?)
|
||||
where word = ?") );
|
||||
|
||||
my @insert_values;
|
||||
|
||||
my $packstring = $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'doc_id_bits'}}
|
||||
. $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'count_bits'}};
|
||||
my $num_words = 0;
|
||||
for my $word ( keys %$words ) {
|
||||
### print STDERR "$word($id) adding\n";
|
||||
# here we will want to parametrize the bit size of the
|
||||
# data
|
||||
my $value = pack $packstring, $id, $words->{$word};
|
||||
my $rows = $update_sth->execute($value, $word);
|
||||
push @insert_values, $word, $value if $rows == 0;
|
||||
$num_words += $words->{$word};
|
||||
}
|
||||
|
||||
if(@insert_values){
|
||||
my $sql_str = "insert into $data_table values ". join(',', ('(?, ?)') x (@insert_values/2));
|
||||
$dbh->do($sql_str,{},@insert_values);
|
||||
}
|
||||
|
||||
return $num_words;
|
||||
}
|
||||
|
||||
sub delete_document {
|
||||
my $self = shift;
|
||||
for my $id (@_) { $self->update_document($id, {}); }
|
||||
}
|
||||
|
||||
sub update_document {
|
||||
my ($self, $id, $words) = @_;
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $data_table = $fts->{'data_table'};
|
||||
|
||||
my $insert_sth = ( defined $self->{'insert_sth'}
|
||||
? $self->{'insert_sth'}
|
||||
: $self->{'insert_sth'} = $dbh->prepare("
|
||||
insert into $data_table values (?, ?)") );
|
||||
|
||||
my $update_sth = ( defined $self->{'update_update_sth'}
|
||||
? $self->{'update_update_sth'}
|
||||
: $self->{'update_update_sth'} =
|
||||
$dbh->prepare("update $data_table set idx =
|
||||
concat(substring(idx, 1, ?), ?, substring(idx, ?))
|
||||
where word = ?") );
|
||||
|
||||
|
||||
my @insert_values;
|
||||
|
||||
$dbh->do("lock tables $data_table write");
|
||||
|
||||
my $select_sth = $dbh->prepare("select word from $data_table");
|
||||
$select_sth->execute;
|
||||
|
||||
my $packstring = $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'doc_id_bits'}}
|
||||
. $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'count_bits'}};
|
||||
my ($packnulls) = pack $packstring, 0, 0;
|
||||
my $packlength = length $packnulls;
|
||||
my $num_words = 0;
|
||||
while (my ($word) = $select_sth->fetchrow_array) {
|
||||
my $value = (defined $words->{$word} ?
|
||||
pack($packstring, $id, $words->{$word}) : '');
|
||||
|
||||
# the method find_position finds the position of the
|
||||
# "record" for document $id with word $word; returned is
|
||||
# the position in bytes and yes/no values specifying if
|
||||
# the record is already present in the blob; if it is,
|
||||
# we need to replace it, otherwise just insert.
|
||||
|
||||
my ($pos, $shift) = $self->find_position($word, $id);
|
||||
if (not defined $pos) {
|
||||
push @insert_values, $word, $value;
|
||||
}
|
||||
else {
|
||||
my $spos = $pos + 1; # I'm not sure why this
|
||||
$spos += $packlength if $shift;
|
||||
$update_sth->execute($pos, $value, $spos, $word);
|
||||
}
|
||||
delete $words->{$word};
|
||||
$num_words++ if defined $value;
|
||||
}
|
||||
|
||||
for my $word ( keys %$words ) {
|
||||
my $value = pack $packstring, $id, $words->{$word};
|
||||
push @insert_values, $word, $value;
|
||||
# $insert_sth->execute($word, $value);
|
||||
$num_words++;
|
||||
}
|
||||
|
||||
if(@insert_values){
|
||||
my $sql_str = "insert into $data_table values ". join(',', ('(?, ?)') x (@insert_values/2));
|
||||
$dbh->do($sql_str,{},@insert_values);
|
||||
}
|
||||
|
||||
$dbh->do("unlock tables");
|
||||
|
||||
return $num_words;
|
||||
}
|
||||
|
||||
sub find_position {
|
||||
my ($self, $word, $id) = @_;
|
||||
# here, with the calculation of where in the blob we have the
|
||||
# docid and where the count of words and how long they are, we
|
||||
# should really look at the parameters (num of bits of various
|
||||
# structures and values) given to create
|
||||
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $data_table = $fts->{'data_table'};
|
||||
|
||||
# Sth to read the length of the blob holding the document/count info
|
||||
my $get_length_sth = ( defined $self->{'get_length_sth'}
|
||||
? $self->{'get_length_sth'}
|
||||
: $self->{'get_length_sth'} = $dbh->prepare("select
|
||||
length(idx) from $data_table where word = ?"));
|
||||
my $length = $dbh->selectrow_array($get_length_sth, {}, $word);
|
||||
|
||||
my $packstring = $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'doc_id_bits'}}
|
||||
. $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'count_bits'}};
|
||||
my ($packnulls) = pack $packstring, 0, 0;
|
||||
my $packlength = length $packnulls;
|
||||
|
||||
if (not defined $length) { return; }
|
||||
$length = int($length/$packlength);
|
||||
|
||||
my ($bot, $top, $med, $val) = (0, $length);
|
||||
|
||||
if (not defined $fts->{'max_doc_id'})
|
||||
{ $med = int(($top - $bot) / 2); }
|
||||
else
|
||||
{ $med = int($top * $id / $fts->{'max_doc_id'}); }
|
||||
|
||||
my $blob_direct_fetch = $fts->{'blob_direct_fetch'};
|
||||
# we divide the interval
|
||||
while ($bot != $top) {
|
||||
$med = $top - 1 if $med >= $top;
|
||||
$med = $bot if $med < $bot;
|
||||
|
||||
if ($top - $bot <= $blob_direct_fetch) {
|
||||
my $get_interval_sth = (
|
||||
defined $self->{'get_interval_sth'}
|
||||
? $self->{'get_interval_sth'}
|
||||
: $self->{'get_interval_sth'} = $dbh->prepare("select substring(idx,?,?) from $data_table where word = ?"));
|
||||
my $alldata = $dbh->selectrow_array($get_interval_sth,
|
||||
{},
|
||||
$bot * $packlength + 1,
|
||||
($top - $bot) * $packlength,
|
||||
$word);
|
||||
return unless defined $alldata;
|
||||
|
||||
my @docs;
|
||||
my $i = 0;
|
||||
while ($i < length $alldata) {
|
||||
push @docs, unpack $packstring,
|
||||
substr $alldata, $i, $packlength;
|
||||
$i += $packlength;
|
||||
}
|
||||
for (my $i = 0; $i < @docs; $i += 2) {
|
||||
if ($docs[$i] == $id) { return (($bot+($i/2))*$packlength, 1); }
|
||||
if ($docs[$i] > $id) { return (($bot+($i/2))*$packlength, 0); }
|
||||
}
|
||||
return ($top * $packlength, 0);
|
||||
}
|
||||
($val) = $dbh->selectrow_array(
|
||||
"select substring(idx, ?, 2) from $data_table
|
||||
where word = ?", {}, ($med * $packlength) + 1, $word);
|
||||
($val) = unpack $packstring, $val;
|
||||
|
||||
if (not defined $val) { return; }
|
||||
if ($val == $id) { return ($med * $packlength, 1); }
|
||||
|
||||
elsif ($val < $id) { $bot = $med + 1; }
|
||||
else { $top = $med; }
|
||||
|
||||
$med = int($med * $id / $val);
|
||||
}
|
||||
return ($bot * $packlength, 0);
|
||||
}
|
||||
|
||||
sub contains_hashref {
|
||||
my $self = shift;
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $data_table = $fts->{'data_table'};
|
||||
|
||||
my $packstring = $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'doc_id_bits'}}
|
||||
. $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'count_bits'}};
|
||||
my ($packnulls) = pack $packstring, 0, 0;
|
||||
my $packlength = length $packnulls;
|
||||
|
||||
my $sth = ( defined $self->{'get_idx_sth'} ?
|
||||
$self->{'get_idx_sth'} :
|
||||
$self->{'get_idx_sth'} =
|
||||
$dbh->prepare(
|
||||
"select idx from $data_table where word like ?"
|
||||
));
|
||||
|
||||
my $out = {};
|
||||
for my $word (@_) {
|
||||
$sth->execute($word);
|
||||
while (my ($blob) = $sth->fetchrow_array) {
|
||||
next unless defined $blob;
|
||||
my @data;
|
||||
my $i = 0;
|
||||
while ($i < length $blob) {
|
||||
push @data, unpack $packstring,
|
||||
substr $blob, $i, $packlength;
|
||||
$i += $packlength;
|
||||
}
|
||||
while (@data) {
|
||||
my $doc = shift @data;
|
||||
my $count = shift @data;
|
||||
unless (defined $out->{$doc}) { $out->{$doc} = 0; }
|
||||
$out->{$doc} += $count;
|
||||
}
|
||||
}
|
||||
$sth->finish;
|
||||
}
|
||||
$out;
|
||||
}
|
||||
|
||||
*parse_and_index_data = \&DBIx::FullTextSearch::parse_and_index_data_count;
|
||||
|
||||
1;
|
||||
|
||||
83
lib/DBIx/FullTextSearch/BlobFast.pm
Normal file
83
lib/DBIx/FullTextSearch/BlobFast.pm
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
|
||||
package DBIx::FullTextSearch::BlobFast;
|
||||
use DBIx::FullTextSearch::Blob;
|
||||
use vars qw! @ISA !;
|
||||
@ISA = qw! DBIx::FullTextSearch::Blob !;
|
||||
use strict;
|
||||
|
||||
sub delete_document {
|
||||
my $self = shift;
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $data_table = $fts->{'data_table'};
|
||||
|
||||
my $packstring = $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'doc_id_bits'}}
|
||||
. $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'count_bits'}};
|
||||
my $value = '';
|
||||
for my $id (@_) {
|
||||
$value .= pack $packstring, $id, 0;
|
||||
}
|
||||
$dbh->do("
|
||||
update $data_table set idx = concat(idx, ?)
|
||||
", {}, $value);
|
||||
1;
|
||||
}
|
||||
|
||||
sub update_document {
|
||||
my $self = shift;
|
||||
$self->delete_document($_[0]);
|
||||
$self->add_document(@_);
|
||||
}
|
||||
|
||||
sub contains_hashref {
|
||||
my $self = shift;
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $data_table = $fts->{'data_table'};
|
||||
|
||||
my $packstring = $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'doc_id_bits'}}
|
||||
. $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'count_bits'}};
|
||||
my ($packnulls) = pack $packstring, 0, 0;
|
||||
my $packlength = length $packnulls;
|
||||
|
||||
my $sth = ( defined $self->{'get_idx_sth'} ?
|
||||
$self->{'get_idx_sth'} :
|
||||
$self->{'get_idx_sth'} =
|
||||
$dbh->prepare(
|
||||
"select idx from $data_table where word like ?"
|
||||
));
|
||||
|
||||
my $out = {};
|
||||
for my $word (@_) {
|
||||
$sth->execute($word);
|
||||
while (my ($blob) = $sth->fetchrow_array) {
|
||||
next unless defined $blob;
|
||||
my %docs = ();
|
||||
my @data;
|
||||
my $i = length($blob) - $packlength;
|
||||
while ($i >= 0) {
|
||||
my ($doc_id, $count) =
|
||||
unpack "\@$i$packstring", $blob;
|
||||
### print STDERR "$doc_id $count\n";
|
||||
$i -= $packlength;
|
||||
next if exists $docs{$doc_id};
|
||||
$docs{$doc_id} = 1;
|
||||
next unless $count;
|
||||
push @data, $doc_id, $count;
|
||||
}
|
||||
while (@data) {
|
||||
my $doc = shift @data;
|
||||
my $count = shift @data;
|
||||
unless (defined $out->{$doc}) { $out->{$doc} = 0; }
|
||||
$out->{$doc} += $count;
|
||||
}
|
||||
}
|
||||
$sth->finish;
|
||||
}
|
||||
$out;
|
||||
}
|
||||
|
||||
*parse_and_index_data = \&DBIx::FullTextSearch::parse_and_index_data_count;
|
||||
|
||||
1;
|
||||
|
||||
188
lib/DBIx/FullTextSearch/Column.pm
Normal file
188
lib/DBIx/FullTextSearch/Column.pm
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
|
||||
package DBIx::FullTextSearch::Column;
|
||||
use strict;
|
||||
|
||||
# Open in the backend just sets the object
|
||||
sub open {
|
||||
my ($class, $fts) = @_;
|
||||
return bless { 'fts' => $fts }, $class;
|
||||
}
|
||||
|
||||
sub DESTROY {
|
||||
my ($self) = @_;
|
||||
if (defined $self->{'select_wordid_sth'}) {
|
||||
$self->{'select_wordid_sth'}->finish();
|
||||
}
|
||||
}
|
||||
|
||||
# Create creates the table(s) according to the parameters
|
||||
sub _create_tables {
|
||||
my ($class, $fts) = @_;
|
||||
my $COUNT_FIELD = '';
|
||||
if ($fts->{'count_bits'}) {
|
||||
$COUNT_FIELD = "count $DBIx::FullTextSearch::BITS_TO_INT{$fts->{'count_bits'}} unsigned,"
|
||||
}
|
||||
my $CREATE_DATA = <<EOF;
|
||||
create table $fts->{'data_table'} (
|
||||
word_id $DBIx::FullTextSearch::BITS_TO_INT{$fts->{'word_id_bits'}} unsigned not null,
|
||||
doc_id $DBIx::FullTextSearch::BITS_TO_INT{$fts->{'doc_id_bits'}} unsigned not null,
|
||||
$COUNT_FIELD
|
||||
index (word_id),
|
||||
index (doc_id)
|
||||
)
|
||||
EOF
|
||||
|
||||
$fts->{'word_id_table'} = $fts->{'table'}.'_words'
|
||||
unless defined $fts->{'word_id_table'};
|
||||
|
||||
|
||||
my $CREATE_WORD_ID = <<EOF;
|
||||
create table if not exists $fts->{'word_id_table'} (
|
||||
word varchar($fts->{'word_length'}) binary
|
||||
default '' not null,
|
||||
id $DBIx::FullTextSearch::BITS_TO_INT{$fts->{'word_id_bits'}} unsigned not null auto_increment,
|
||||
primary key (id),
|
||||
unique (word)
|
||||
)
|
||||
EOF
|
||||
|
||||
my $dbh = $fts->{'dbh'};
|
||||
$dbh->do($CREATE_DATA) or return $dbh->errstr;
|
||||
push @{$fts->{'created_tables'}}, $fts->{'data_table'};
|
||||
$dbh->do($CREATE_WORD_ID) or return $dbh->errstr;
|
||||
push @{$fts->{'created_tables'}}, $fts->{'word_id_table'};
|
||||
return;
|
||||
}
|
||||
|
||||
sub add_document {
|
||||
my ($self, $id, $words) = @_;
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $word_id_table = $fts->{'word_id_table'};
|
||||
if (not defined $self->{'select_wordid_sth'}) {
|
||||
$self->{'select_wordid_sth'} = $dbh->prepare("
|
||||
select id from $word_id_table where word = ?
|
||||
");
|
||||
}
|
||||
my $data_table = $fts->{'data_table'};
|
||||
my $count_bits = $fts->{'count_bits'};
|
||||
my $num_words = 0;
|
||||
my (@wids,@data,@widshandler,@datahandler);
|
||||
my $wordid;
|
||||
$dbh->do("lock tables $word_id_table write");
|
||||
my ($maxid) = $dbh->selectrow_array("select max(id)
|
||||
from $word_id_table");
|
||||
foreach my $word (keys %$words) {
|
||||
if(!defined $self->{'wordids'}->{$word}) {
|
||||
$self->{'select_wordid_sth'}->execute($word);
|
||||
($wordid) = $self->{'select_wordid_sth'}->fetchrow_array();
|
||||
unless ($wordid) {
|
||||
$maxid++;
|
||||
push @widshandler, "(?,$maxid)";
|
||||
push @wids, $word;
|
||||
$wordid = $maxid;
|
||||
}
|
||||
$self->{'wordids'}->{$word} = $wordid;
|
||||
} else {
|
||||
$wordid=$self->{'wordids'}->{$word};
|
||||
}
|
||||
if ($count_bits) {
|
||||
push @datahandler, "($wordid,$id,?)";
|
||||
push @data, $words->{$word};
|
||||
} else {
|
||||
push @datahandler, "($wordid,$id)";
|
||||
}
|
||||
$num_words++;
|
||||
};
|
||||
$dbh->do("insert into $word_id_table values " .
|
||||
join (',',@widshandler),undef,@wids) if @wids;
|
||||
$dbh->do("unlock tables");
|
||||
if ($count_bits) {
|
||||
$dbh->do("insert into $data_table values " . join (',',@datahandler),undef,@data) if @data;
|
||||
} else {
|
||||
$dbh->do("insert into $data_table values " . join (',',@datahandler)) if @datahandler;
|
||||
}
|
||||
return $num_words;
|
||||
}
|
||||
|
||||
sub delete_document {
|
||||
my $self = shift;
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $data_table = $fts->{'data_table'};
|
||||
my $sth = $dbh->prepare("delete from $data_table where doc_id = ?");
|
||||
for my $id (@_) { $sth->execute($id); }
|
||||
}
|
||||
|
||||
sub update_document {
|
||||
my ($self, $id, $words) = @_;
|
||||
$self->delete_document($id);
|
||||
$self->add_document($id, $words);
|
||||
}
|
||||
|
||||
sub contains_hashref {
|
||||
my $self = shift;
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $data_table = $fts->{'data_table'};
|
||||
my $word_id_table = $fts->{'word_id_table'};
|
||||
|
||||
my $count_bits = $fts->{'count_bits'};
|
||||
my $sth = ( defined $self->{'get_data_sth'}
|
||||
? $self->{'get_data_sth'}
|
||||
: ( $count_bits
|
||||
? ( $self->{'get_data_sth'} = $dbh->prepare(
|
||||
"select doc_id, count
|
||||
from $data_table, $word_id_table
|
||||
where word like ?
|
||||
and id = word_id" ) )
|
||||
: ( $self->{'get_data_sth'} = $dbh->prepare(
|
||||
"select doc_id, 1
|
||||
from $data_table, $word_id_table
|
||||
where word like ?
|
||||
and id = word_id" ) )
|
||||
) );
|
||||
|
||||
my $out = {};
|
||||
for my $word (@_) {
|
||||
$sth->execute($word);
|
||||
while (my ($doc, $count) = $sth->fetchrow_array) {
|
||||
$out->{$doc} += $count;
|
||||
}
|
||||
$sth->finish;
|
||||
}
|
||||
$out;
|
||||
}
|
||||
|
||||
sub common_word {
|
||||
my ($self, $k) = @_;
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
|
||||
my $num = $fts->document_count;
|
||||
|
||||
$k /= 100;
|
||||
|
||||
my $SQL = <<EOF;
|
||||
select word_id, count(*)/? as k
|
||||
from $fts->{'data_table'}
|
||||
group by word_id
|
||||
having k >= ?
|
||||
EOF
|
||||
my $ary_ref = $dbh->selectcol_arrayref($SQL, {}, $num, $k);
|
||||
return unless @$ary_ref;
|
||||
|
||||
my $QUESTION_MARKS = join ',', ('?') x scalar(@$ary_ref);
|
||||
|
||||
$SQL = <<EOF;
|
||||
select word
|
||||
from $fts->{'word_id_table'}
|
||||
where id IN ($QUESTION_MARKS)
|
||||
EOF
|
||||
return $dbh->selectcol_arrayref($SQL, {}, @$ary_ref);
|
||||
}
|
||||
|
||||
*parse_and_index_data = \&DBIx::FullTextSearch::parse_and_index_data_count;
|
||||
|
||||
1;
|
||||
|
||||
27
lib/DBIx/FullTextSearch/File.pm
Normal file
27
lib/DBIx/FullTextSearch/File.pm
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
|
||||
package DBIx::FullTextSearch::File;
|
||||
use DBIx::FullTextSearch::String;
|
||||
use strict;
|
||||
use vars qw! @ISA !;
|
||||
@ISA = qw! DBIx::FullTextSearch::String !;
|
||||
|
||||
sub index_document {
|
||||
my ($self, $file, $extra_data) = @_;
|
||||
my $dbh = $self->{'dbh'};
|
||||
|
||||
open FILE, $file or do {
|
||||
$self->{'errstr'} = "Reading the file `$file' failed: $!";
|
||||
return;
|
||||
};
|
||||
my $data;
|
||||
{
|
||||
local $/ = undef;
|
||||
$data = <FILE>;
|
||||
}
|
||||
$data .= " $extra_data" if $extra_data;
|
||||
close FILE;
|
||||
$self->SUPER::index_document($file, $data);
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
182
lib/DBIx/FullTextSearch/Phrase.pm
Normal file
182
lib/DBIx/FullTextSearch/Phrase.pm
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
package DBIx::FullTextSearch::Phrase;
|
||||
use strict;
|
||||
use DBIx::FullTextSearch::Column;
|
||||
use vars qw! @ISA !;
|
||||
@ISA = qw! DBIx::FullTextSearch::Column !;
|
||||
|
||||
# Open in the backend just sets the object
|
||||
sub open {
|
||||
my ($class, $fts) = @_;
|
||||
return bless { 'fts' => $fts }, $class;
|
||||
}
|
||||
|
||||
sub DESTROY {
|
||||
my ($self) = @_;
|
||||
if (defined $self->{'select_wordid_sth'}) {
|
||||
$self->{'select_wordid_sth'}->finish();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Create creates the table(s) according to the parameters
|
||||
sub _create_tables {
|
||||
my ($class, $fts) = @_;
|
||||
my $COUNT_FIELD = '';
|
||||
|
||||
my $CREATE_DATA = <<EOF;
|
||||
create table $fts->{'data_table'} (
|
||||
word_id $DBIx::FullTextSearch::BITS_TO_INT{$fts->{'word_id_bits'}} unsigned not null,
|
||||
doc_id $DBIx::FullTextSearch::BITS_TO_INT{$fts->{'doc_id_bits'}} unsigned not null,
|
||||
idx longblob default '' not null,
|
||||
index (word_id),
|
||||
index (doc_id)
|
||||
)
|
||||
EOF
|
||||
|
||||
$fts->{'word_id_table'} = $fts->{'table'}.'_words'
|
||||
unless defined $fts->{'word_id_table'};
|
||||
|
||||
|
||||
my $CREATE_WORD_ID = <<EOF;
|
||||
create table if not exists $fts->{'word_id_table'} (
|
||||
word varchar($fts->{'word_length'}) binary
|
||||
default '' not null,
|
||||
id $DBIx::FullTextSearch::BITS_TO_INT{$fts->{'word_id_bits'}} unsigned not null auto_increment,
|
||||
primary key (id),
|
||||
unique (word)
|
||||
)
|
||||
EOF
|
||||
|
||||
my $dbh = $fts->{'dbh'};
|
||||
$dbh->do($CREATE_DATA) or return $dbh->errstr;
|
||||
push @{$fts->{'created_tables'}}, $fts->{'data_table'};
|
||||
$dbh->do($CREATE_WORD_ID) or return $dbh->errstr;
|
||||
push @{$fts->{'created_tables'}}, $fts->{'word_id_table'};
|
||||
return;
|
||||
}
|
||||
|
||||
sub add_document {
|
||||
my ($self, $id, $words) = @_;
|
||||
# here the value in the %$words hash is an array of word
|
||||
# positions
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $word_id_table = $fts->{'word_id_table'};
|
||||
if (not defined $self->{'select_wordid_sth'}) {
|
||||
$self->{'select_wordid_sth'} = $dbh->prepare("
|
||||
select id from $word_id_table where word = ?
|
||||
");
|
||||
}
|
||||
my $data_table = $fts->{'data_table'};
|
||||
my $packstring = $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'position_bits'}};
|
||||
my $num_words = 0;
|
||||
my (@wids,@data,@widshandler,@datahandler);
|
||||
my $wordid;
|
||||
$dbh->do("lock tables $word_id_table write");
|
||||
my ($maxid) = $dbh->selectrow_array("select max(id)
|
||||
from $word_id_table");
|
||||
foreach my $word (keys %$words) {
|
||||
if(!defined $self->{'wordids'}->{$word}) {
|
||||
$self->{'select_wordid_sth'}->execute($word);
|
||||
($wordid) = $self->{'select_wordid_sth'}->fetchrow_array();
|
||||
unless ($wordid) {
|
||||
$maxid++;
|
||||
push @widshandler, "(?,$maxid)";
|
||||
push @wids, $word;
|
||||
$wordid = $maxid;
|
||||
}
|
||||
$self->{'wordids'}->{$word} = $wordid;
|
||||
} else {
|
||||
$wordid=$self->{'wordids'}->{$word};
|
||||
}
|
||||
push @datahandler, "($wordid,$id,?)";
|
||||
push @data, pack $packstring.'*', @{$words->{$word}};
|
||||
$num_words++;
|
||||
};
|
||||
$dbh->do("insert into $word_id_table values " .
|
||||
join (',',@widshandler),undef,@wids) if @wids;
|
||||
$dbh->do("unlock tables");
|
||||
$dbh->do("insert into $data_table values " .
|
||||
join (',',@datahandler),undef,@data) if @data;
|
||||
return $num_words;
|
||||
}
|
||||
|
||||
sub update_document {
|
||||
my ($self, $id, $words) = @_;
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $data_table = $fts->{'data_table'};
|
||||
$dbh->do("delete from $data_table where doc_id = ?", {}, $id);
|
||||
|
||||
$self->add_document($id, $words);
|
||||
}
|
||||
|
||||
sub contains_hashref {
|
||||
my $self = shift;
|
||||
my $fts = $self->{'fts'};
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $data_table = $fts->{'data_table'};
|
||||
my $word_id_table = $fts->{'word_id_table'};
|
||||
|
||||
my $packstring = $DBIx::FullTextSearch::BITS_TO_PACK{$fts->{'position_bits'}};
|
||||
|
||||
my $SQL = qq{
|
||||
select doc_id, idx
|
||||
from $data_table, $word_id_table
|
||||
where word like ?
|
||||
and id = word_id
|
||||
};
|
||||
my $out = {};
|
||||
|
||||
for my $phrase (@_){
|
||||
|
||||
my @words = split(' ', $phrase);
|
||||
|
||||
my @sths;
|
||||
for (my $i = 0; $i < @words; $i++) {
|
||||
$sths[$i] = $dbh->prepare($SQL);
|
||||
$sths[$i]->execute($words[$i]);
|
||||
}
|
||||
|
||||
my %prev_pos = ();
|
||||
my %cur_pos = ();
|
||||
|
||||
# iterate through words in phrase
|
||||
for (my $i = 0; $i < @words; $i++){
|
||||
if($i > 0){
|
||||
%prev_pos = %cur_pos;
|
||||
%cur_pos = ();
|
||||
}
|
||||
# get docs that have this word
|
||||
while (my ($doc, $data) = $sths[$i]->fetchrow_array){
|
||||
# get positions of words in doc
|
||||
my @positions = unpack $packstring.'*', $data;
|
||||
map { $cur_pos{$doc}->{$_} = 1 } @positions;
|
||||
}
|
||||
if($i > 0){
|
||||
# check to see if word $i comes after word $i-1
|
||||
for my $doc (keys %cur_pos){
|
||||
my $isPhrase = 0;
|
||||
for my $position (keys %{$cur_pos{$doc}}){
|
||||
if ($position > 0 && exists $prev_pos{$doc}{$position - 1}){
|
||||
$isPhrase = 1;
|
||||
} else {
|
||||
delete $cur_pos{$doc}{$position};
|
||||
}
|
||||
}
|
||||
delete $cur_pos{$doc} unless $isPhrase;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for my $doc (keys %cur_pos){
|
||||
my @positions = keys %{$cur_pos{$doc}};
|
||||
$out->{$doc} += scalar (@positions);
|
||||
}
|
||||
}
|
||||
return $out;
|
||||
}
|
||||
|
||||
*parse_and_index_data = \&DBIx::FullTextSearch::parse_and_index_data_list;
|
||||
|
||||
1;
|
||||
271
lib/DBIx/FullTextSearch/StopList.pm
Normal file
271
lib/DBIx/FullTextSearch/StopList.pm
Normal file
|
|
@ -0,0 +1,271 @@
|
|||
package DBIx::FullTextSearch::StopList;
|
||||
use strict;
|
||||
|
||||
use Carp;
|
||||
|
||||
sub create_default {
|
||||
my ($class, $dbh, $TABLE, $language) = @_;
|
||||
|
||||
croak("Error: no language specified") unless $language;
|
||||
|
||||
$language = lc $language;
|
||||
|
||||
my @stopList;
|
||||
|
||||
if($language eq 'english'){
|
||||
@stopList = qw/ a about after all also an and any are as at be because been but by can co corp could for from had has have he her his if in inc into is it its last more most mr mrs ms mz no not of on one only or other out over s says she so some such than that the their there they this to up was we were when which who will with would /;
|
||||
} elsif ($language eq 'czech'){
|
||||
@stopList = qw/ a aby ale ani a¾ bude by byl byla bylo být co èi dal¹í do i jak jako je jeho jejich jen je¹tì ji¾ jsem jsme jsou k kde kdy¾ korun která které který kteøí let mezi má mù¾e na nebo není ne¾ o od pak po podle pouze pro proti první pøed pøi roce roku øekl s se si své tak také tedy to tom tím u u¾ v ve v¹ak z za ze ¾e/;
|
||||
} elsif ($language eq 'danish'){
|
||||
@stopList = qw/ af aldrig alle altid bagved De de der du efter eller en endnu et få fjernt for foran fra gennem god han her hos hovfor hun hurtig hvad hvem hvonår hvor hvordan hvorhen I i imod ja jeg langsom lidt mange måske med meget mellem mere mindre når nede nej nok nu og oppe på rask sammen temmelig til uden udenfor under ved vi /;
|
||||
} elsif ($language eq 'dutch'){
|
||||
@stopList = qw/ aan aangaande aangezien achter achterna afgelopen al aldaar aldus alhoewel alias alle allebei alleen alsnog altijd altoos ander andere anders anderszins behalve behoudens beide beiden ben beneden bent bepaald betreffende bij binnen binnenin boven bovenal bovendien bovengenoemd bovenstaand bovenvermeld buiten daar daarheen daarin daarna daarnet daarom daarop daarvanlangs dan dat de die dikwijls dit door doorgaand dus echter eer eerdat eerder eerlang eerst elk elke en enig enigszins enkel er erdoor even eveneens evenwel gauw gedurende geen gehad gekund geleden gelijk gemoeten gemogen geweest gewoon gewoonweg haar had hadden hare heb hebben hebt heeft hem hen het hierbeneden hierboven hij hoe hoewel hun hunne ik ikzelf in inmiddels inzake is jezelf jij jijzelf jou jouw jouwe juist jullie kan klaar kon konden krachtens kunnen kunt later liever maar mag meer met mezelf mij mijn mijnent mijner mijzelf misschien mocht mochten moest moesten moet moeten mogen na naar nadat net niet noch nog nogal nu of ofschoon om omdat omhoog omlaag omstreeks omtrent omver onder ondertussen ongeveer ons onszelf onze ook op opnieuw opzij over overeind overigens pas precies reeds rond rondom sedert sinds sindsdien slechts sommige spoedig steeds tamelijk tenzij terwijl thans tijdens toch toen toenmaals toenmalig tot totdat tussen uit uitgezonderd vaak van vandaan vanuit vanwege veeleer verder vervolgens vol volgens voor vooraf vooral vooralsnog voorbij voordat voordezen voordien voorheen voorop vooruit vrij vroeg waar waarom wanneer want waren was wat weer weg wegens wel weldra welk welke wie wiens wier wij wijzelf zal ze zelfs zichzelf zij zijn zijne zo zodra zonder zou zouden zowat zulke zullen zult /;
|
||||
} elsif ($language eq 'finnish'){
|
||||
@stopList = qw/ aina alla ansiosta ehkä ei enemmän ennen etessa haikki hän he hitaasti hoikein hyvin ilman ja jälkeen jos kanssa kaukana kenties keskellä kesken koskaan kuinkan kukka kyllä kylliksi lähellä läpi liian lla lla luona me mikä miksi milloin milloinkan minä missä miten nopeasti nyt oikea oikealla paljon siellä sinä ssa sta suoraan tai takana takia tarpeeksi tässä te ulkopuolella vähän vahemmän vasen vasenmalla vastan vielä vieressä yhdessä ylös /;
|
||||
} elsif ($language eq 'french'){
|
||||
@stopList = qw/ a à afin ailleurs ainsi alors après attendant au aucun aucune au-dessous au-dessus auprès auquel aussi aussitôt autant autour aux auxquelles auxquels avec beaucoup ça ce ceci cela celle celles celui cependant certain certaine certaines certains ces cet cette ceux chacun chacune chaque chez combien comme comment concernant dans de dedans dehors déjà delà depuis des dès desquelles desquels dessus donc donné dont du duquel durant elle elles en encore entre et étaient était étant etc eux furent grâce hormis hors ici il ils jadis je jusqu jusque la là laquelle le lequel les lesquelles lesquels leur leurs lors lorsque lui ma mais malgré me même mêmes mes mien mienne miennes miens moins moment mon moyennant ne ni non nos notamment notre nôtre notres nôtres nous nulle nulles on ou où par parce parmi plus plusieurs pour pourquoi près puis puisque quand quant que quel quelle quelque quelques-unes quelques-uns quelqu''un quelqu''une quels qui quiconque quoi quoique sa sans sauf se selon ses sien sienne siennes siens soi soi-même soit sont suis sur ta tandis tant te telle telles tes tienne tiennes tiens toi ton toujours tous toute toutes très trop tu un une vos votre vôtre vôtres vous vu y /;
|
||||
} elsif ($language eq 'german'){
|
||||
@stopList = qw/ ab aber allein als also am an auch auf aus außer bald bei beim bin bis bißchen bist da dabei dadurch dafür dagegen dahinter damit danach daneben dann daran darauf daraus darin darüber darum darunter das daß dasselbe davon davor dazu dazwischen dein deine deinem deinen deiner deines dem demselben den denn der derselben des desselben dessen dich die dies diese dieselbe dieselben diesem diesen dieser dieses dir doch dort du ebenso ehe ein eine einem einen einer eines entlang er es etwa etwas euch euer eure eurem euren eurer eures für fürs ganz gar gegen genau gewesen her herein herum hin hinter hintern ich ihm ihn Ihnen ihnen ihr Ihre ihre Ihrem ihrem Ihren ihren Ihrer ihrer Ihres ihres im in ist ja je jedesmal jedoch jene jenem jenen jener jenes kaum kein keine keinem keinen keiner keines man mehr mein meine meinem meinen meiner meines mich mir mit nach nachdem nämlich neben nein nicht nichts noch nun nur ob ober obgleich oder ohne paar sehr sei sein seine seinem seinen seiner seines seit seitdem selbst sich Sie sie sind so sogar solch solche solchem solchen solcher solches sondern sonst soviel soweit über um und uns unser unsre unsrem unsren unsrer unsres vom von vor während war wäre wären warum was wegen weil weit welche welchem welchen welcher welches wem wen wenn wer weshalb wessen wie wir wo womit zu zum zur zwar zwischen zwischens /;
|
||||
} elsif ($language eq 'italian'){
|
||||
@stopList = qw/ a affinchè agl'' agli ai al all'' alla alle allo anzichè avere bensì che chi cioè come comunque con contro cosa da dachè dagl'' dagli dai dal dall'' dalla dalle dallo degl'' degli dei del dell'' delle dello di dopo dove dunque durante e egli eppure essere essi finché fino fra giacchè gl'' gli grazie I il in inoltre io l'' la le lo loro ma mentre mio ne neanche negl'' negli nei nel nell'' nella nelle nello nemmeno neppure noi nonchè nondimeno nostro o onde oppure ossia ovvero per perchè perciò però poichè prima purchè quand''anche quando quantunque quasi quindi se sebbene sennonchè senza seppure si siccome sopra sotto su subito sugl'' sugli sui sul sull'' sulla sulle sullo suo talchè tu tuo tuttavia tutti un una uno voi vostr/;
|
||||
} elsif ($language eq 'portuguese'){
|
||||
@stopList = qw/ a abaixo adiante agora ali antes aqui até atras bastante bem com como contra debaixo demais depois depressa devagar direito e ela elas êle eles em entre eu fora junto longe mais menos muito não ninguem nós nunca onde ou para por porque pouco próximo qual quando quanto que quem se sem sempre sim sob sobre talvez todas todos vagarosamente você vocês /;
|
||||
} elsif ($language eq 'spanish'){
|
||||
@stopList = qw/ a acá ahí ajena ajenas ajeno ajenos al algo algún alguna algunas alguno algunos allá allí aquel aquella aquellas aquello aquellos aquí cada cierta ciertas cierto ciertos como cómo con conmigo consigo contigo cualquier cualquiera cualquieras cuan cuán cuanta cuánta cuantas cuántas cuanto cuánto cuantos cuántos de dejar del demás demasiada demasiadas demasiado demasiados el él ella ellas ellos esa esas ese esos esta estar estas este estos hacer hasta jamás junto juntos la las lo los mas más me menos mía mientras mío misma mismas mismo mismos mucha muchas muchísima muchísimas muchísimo muchísimos mucho muchos muy nada ni ninguna ningunas ninguno ningunos no nos nosotras nosotros nuestra nuestras nuestro nuestros nunca os otra otras otro otros para parecer poca pocas poco pocos por porque que qué querer quien quién quienes quienesquiera quienquiera ser si sí siempre sín Sr Sra Sres Sta suya suyas suyo suyos tal tales tan tanta tantas tanto tantos te tener ti toda todas todo todos tomar tú tuya tuyo un una unas unos usted ustedes varias varios vosotras vosotros vuestra vuestras vuestro vuestros y yo /;
|
||||
} elsif ($language eq 'swedish'){
|
||||
@stopList = qw/ ab aldrig all alla alltid än ännu ånyo är att av avser avses bakom bra bredvid dä där de dem den denna deras dess det detta du efter efterät eftersom ej eller emot en ett fastän för fort framför från genom gott hamske han här hellre hon hos hur i in ingen innan inte ja jag långsamt långt lite man med medan mellan mer mera mindre mot myckett när nära nej nere ni nu och oksa om över på så sådan sin skall som till tillräckligt tillsammans trotsatt under uppe ut utan utom vad väl var varför vart varthän vem vems vi vid vilken /;
|
||||
}
|
||||
|
||||
croak("Error: language $language is not a supported") unless @stopList;
|
||||
|
||||
my $sl = $class->create_empty($dbh, $TABLE);
|
||||
|
||||
$sl->add_stop_word(\@stopList);
|
||||
return $sl;
|
||||
}
|
||||
|
||||
sub create_empty {
|
||||
my ($class, $dbh, $name) = @_;
|
||||
|
||||
my $table = $name . '_stoplist';
|
||||
|
||||
my $SQL = qq{
|
||||
CREATE TABLE $table
|
||||
(word VARCHAR(255) PRIMARY KEY)
|
||||
};
|
||||
|
||||
$dbh->do($SQL) or croak "Can't create table $table: " . $dbh->errstr;
|
||||
|
||||
my $self = {};
|
||||
$self->{'dbh'} = $dbh;
|
||||
$self->{'name'} = $name;
|
||||
$self->{'table'} = $table;
|
||||
$self->{'stoplist'} = {};
|
||||
bless $self, $class;
|
||||
return $self;
|
||||
}
|
||||
|
||||
sub open {
|
||||
my ($class, $dbh, $name) = @_;
|
||||
|
||||
my $table = $name . '_stoplist';
|
||||
|
||||
my $self = {};
|
||||
$self->{'dbh'} = $dbh;
|
||||
$self->{'name'} = $name;
|
||||
$self->{'table'} = $table;
|
||||
$self->{'stoplist'} = {};
|
||||
bless $self, $class;
|
||||
|
||||
# load stoplist into a hash
|
||||
my $SQL = qq{
|
||||
SELECT word FROM $table
|
||||
};
|
||||
my $ary_ref = $dbh->selectcol_arrayref($SQL) or croak "Can't load stoplist from $table: " . $dbh->errstr;
|
||||
for (@$ary_ref){
|
||||
$self->{'stoplist'}->{$_} = 1;
|
||||
}
|
||||
|
||||
return $self;
|
||||
}
|
||||
|
||||
sub drop {
|
||||
my $self = shift;
|
||||
my $dbh = $self->{'dbh'};
|
||||
my $table = $self->{'table'};
|
||||
my $SQL = qq{
|
||||
DROP table $table
|
||||
};
|
||||
$dbh->do($SQL) or croak "Can't drop table $table: " . $dbh->errstr;
|
||||
$self->{'stoplist'} = {};
|
||||
}
|
||||
|
||||
sub empty {
|
||||
my $self = shift;
|
||||
my $dbh = $self->{'dbh'};
|
||||
my $table = $self->{'table'};
|
||||
my $SQL = qq{
|
||||
DELETE FROM $table
|
||||
};
|
||||
$dbh->do($SQL) or croak "Can't empty table $table: " . $dbh->errstr;
|
||||
$self->{'stoplist'} = {};
|
||||
}
|
||||
|
||||
sub add_stop_word {
|
||||
my ($self, $words) = @_;
|
||||
my $dbh = $self->{'dbh'};
|
||||
|
||||
$words = [ $words ] unless ref($words) eq 'ARRAY';
|
||||
|
||||
my @new_stop_words;
|
||||
|
||||
for my $word (@$words){
|
||||
next if $self->is_stop_word($word);
|
||||
push @new_stop_words, $word;
|
||||
$self->{'stoplist'}->{lc($word)} = 1;
|
||||
}
|
||||
my $SQL = "INSERT INTO $self->{'table'} (word) VALUES " . join(',', ('(?)') x @new_stop_words);
|
||||
$dbh->do($SQL,{},@new_stop_words);
|
||||
}
|
||||
|
||||
sub remove_stop_word {
|
||||
my ($self, $words) = @_;
|
||||
my $dbh = $self->{'dbh'};
|
||||
|
||||
$words = [ $words ] unless ref($words) eq 'ARRAY';
|
||||
|
||||
my $SQL = qq{
|
||||
DELETE FROM $self->{'table'} WHERE word=?
|
||||
};
|
||||
|
||||
my $sth = $dbh->prepare($SQL);
|
||||
|
||||
my $stoplist = $self->{'stoplist'};
|
||||
|
||||
for my $word (@$words){
|
||||
next unless $self->is_stop_word($word);
|
||||
$sth->execute($word);
|
||||
delete $stoplist->{lc($word)};
|
||||
}
|
||||
}
|
||||
|
||||
sub is_stop_word {
|
||||
exists shift->{'stoplist'}->{lc($_[0])};
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
DBIx::FullTextSearch::StopList - Stopwords for DBIx::FullTextSearch
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use DBIx::FullTextSearch::StopList;
|
||||
# connect to database (regular DBI)
|
||||
my $dbh = DBI->connect('dbi:mysql:database', 'user', 'passwd');
|
||||
|
||||
# create a new empty stop word list
|
||||
my $sl1 = DBIx::FullTextSearch::StopList->create_empty($dbh, 'sl_web_1');
|
||||
|
||||
# or create a new one with default stop words
|
||||
my $sl2 = DBIx::FullTextSearch::StopList->create_default($dbh, 'sl_web_2', 'english');
|
||||
|
||||
# or open an existing one
|
||||
my $sl3 = DBIx::FullTextSearch::StopList->open($dbh, 'sl_web_3');
|
||||
|
||||
# add stop words
|
||||
$sl1->add_stop_word(['a','in','on','the']);
|
||||
|
||||
# remove stop words
|
||||
$sl2->remove_stop_word(['be','because','been','but','by']);
|
||||
|
||||
# check if word is in stoplist
|
||||
$bool = $sl1->is_stop_word('in');
|
||||
|
||||
# empty stop words
|
||||
$sl3->empty;
|
||||
|
||||
# drop stop word table
|
||||
$sl2->drop;
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
DBIx::FullTextSearch::StopList provides stop lists that can be used -L<DBIx::FullTextSearch>.
|
||||
StopList objects can be reused accross several FullTextSearch objects.
|
||||
|
||||
=head1 METHODS
|
||||
|
||||
=over 4
|
||||
|
||||
=head2 CONSTRUCTERS
|
||||
|
||||
=item create_empty
|
||||
|
||||
my $sl = DBIx::FullTextSearch::StopList->create_empty($dbh, $sl_name);
|
||||
|
||||
This class method creates a new StopList object.
|
||||
|
||||
=item create_default
|
||||
|
||||
my $sl = DBIx::FullTextSearch::StopList->create_default($dbh, $sl_name, $language);
|
||||
|
||||
This class method creates a new StopList object, with default words loaded in for the
|
||||
given language. Supported languages include Czech, Danish, Dutch, English, Finnish, French,
|
||||
German, Italian, Portuguese, Spanish, and Swedish.
|
||||
|
||||
=item open
|
||||
|
||||
my $sl = DBIx::FullTextSearch::StopList->open($dbh, $sl_name);
|
||||
|
||||
Opens and returns StopList object
|
||||
|
||||
=head2 OBJECT METHODS
|
||||
|
||||
=item add_stop_word
|
||||
|
||||
$sl->add_stop_word(\@stop_words);
|
||||
|
||||
Adds stop words to StopList object. Expects array reference as argument.
|
||||
|
||||
=item remove_stop_word
|
||||
|
||||
$sl->remove_stop_word(\@stop_words);
|
||||
|
||||
Remove stop words from StopList object.
|
||||
|
||||
=item is_stop_word
|
||||
|
||||
$bool = $sl->is_stop_word($stop_word);
|
||||
|
||||
Returns true iff stop_word is StopList object
|
||||
|
||||
=item empty
|
||||
|
||||
$sl->empty;
|
||||
|
||||
Removes all stop words in StopList object.
|
||||
|
||||
=item drop
|
||||
|
||||
$sl->drop;
|
||||
|
||||
Removes table associated with the StopList object.
|
||||
|
||||
=back
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
T.J. Mather, tjmather@tjmather.com,
|
||||
http://www.tjmather.com/
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
All rights reserved. This package is free software; you can
|
||||
redistribute it and/or modify it under the same terms as Perl itself.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<DBIx::FullTextSearch>
|
||||
78
lib/DBIx/FullTextSearch/String.pm
Normal file
78
lib/DBIx/FullTextSearch/String.pm
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
|
||||
package DBIx::FullTextSearch::String;
|
||||
use DBIx::FullTextSearch;
|
||||
use strict;
|
||||
use vars qw! @ISA !;
|
||||
@ISA = qw! DBIx::FullTextSearch !;
|
||||
|
||||
# Create creates the conversion table that converts string names of
|
||||
# documents to numbers
|
||||
sub _create_tables {
|
||||
my $fts = shift;
|
||||
$fts->{'doc_id_table'} = $fts->{'table'} . '_docid'
|
||||
unless defined $fts->{'doc_id_table'};
|
||||
|
||||
unless($fts->{'name_length'}){
|
||||
return "The parameter name_length has to be specified.";
|
||||
}
|
||||
|
||||
my $CREATE_DOCID = <<EOF;
|
||||
create table $fts->{'doc_id_table'} (
|
||||
name varchar($fts->{'name_length'}) binary not null,
|
||||
id $DBIx::FullTextSearch::BITS_TO_INT{$fts->{'doc_id_bits'}} unsigned not null auto_increment,
|
||||
primary key (id),
|
||||
unique (name)
|
||||
)
|
||||
EOF
|
||||
my $dbh = $fts->{'dbh'};
|
||||
$dbh->do($CREATE_DOCID) or return $dbh->errstr;
|
||||
push @{$fts->{'created_tables'}}, $fts->{'doc_id_table'};
|
||||
return;
|
||||
}
|
||||
|
||||
sub get_id_for_name {
|
||||
my ($self, $string) = @_;
|
||||
my $dbh = $self->{'dbh'};
|
||||
my $doc_id_table = $self->{'doc_id_table'};
|
||||
|
||||
my $name_to_id_sth = ( defined $self->{'name_to_id_sth'}
|
||||
? $self->{'name_to_id_sth'}
|
||||
: $self->{'name_to_id_sth'} = $dbh->prepare("select id from $doc_id_table where name = ?") or die $dbh->errstr);
|
||||
my $id = $dbh->selectrow_array($name_to_id_sth, {}, $string);
|
||||
if (not defined $id) {
|
||||
my $new_name_sth = (defined $self->{'new_name_sth'}
|
||||
? $self->{'new_name_sth'}
|
||||
: $self->{'new_name_sth'} =
|
||||
$dbh->prepare("insert into $doc_id_table values (?, null)") or die $dbh->errstr );
|
||||
$new_name_sth->execute($string) or die $new_name_sth->errstr;
|
||||
$id = $new_name_sth->{'mysql_insertid'};
|
||||
}
|
||||
$id;
|
||||
}
|
||||
|
||||
sub index_document {
|
||||
my ($self, $string, $data) = @_;
|
||||
my $id = $self->get_id_for_name($string);
|
||||
$self->SUPER::index_document($id, $data);
|
||||
}
|
||||
|
||||
sub delete_document {
|
||||
my ($self, $doc_id) = @_;
|
||||
|
||||
$self->SUPER::delete_document($self->get_id_for_name($doc_id));
|
||||
}
|
||||
|
||||
sub contains_hashref {
|
||||
my $self = shift;
|
||||
my $res = $self->SUPER::contains_hashref(@_);
|
||||
return unless keys %$res;
|
||||
|
||||
my $doc_id_table = $self->{'doc_id_table'};
|
||||
|
||||
my $data = $self->{'dbh'}->selectall_arrayref("select name, id from $doc_id_table where id in (" . join(',', ('?') x keys %$res).")", {}, keys %$res);
|
||||
return { map { ( $_->[0], $res->{$_->[1]} ) } @$data };
|
||||
}
|
||||
|
||||
|
||||
1;
|
||||
|
||||
142
lib/DBIx/FullTextSearch/Table.pm
Normal file
142
lib/DBIx/FullTextSearch/Table.pm
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
|
||||
package DBIx::FullTextSearch::TableString;
|
||||
use vars qw! @ISA !;
|
||||
@ISA = qw! DBIx::FullTextSearch::String DBIx::FullTextSearch::Table !;
|
||||
|
||||
sub index_document {
|
||||
my ($self, $id, $data) = @_;
|
||||
my @data_sets = $self->get_the_data_from_table($id);
|
||||
push @data_sets, $data if $data;
|
||||
$self->SUPER::index_document($id, \@data_sets);
|
||||
}
|
||||
|
||||
package DBIx::FullTextSearch::TableNum;
|
||||
use vars qw! @ISA !;
|
||||
@ISA = qw! DBIx::FullTextSearch::Table !;
|
||||
|
||||
sub index_document {
|
||||
my ($self, $id, $extra_data) = @_;
|
||||
my @data_sets = $self->get_the_data_from_table($id);
|
||||
push @data_sets, $extra_data if $extra_data;
|
||||
$self->SUPER::index_document($id, \@data_sets);
|
||||
}
|
||||
|
||||
|
||||
package DBIx::FullTextSearch::Table;
|
||||
use DBIx::FullTextSearch;
|
||||
use strict;
|
||||
use vars qw! @ISA !;
|
||||
@ISA = qw! DBIx::FullTextSearch !;
|
||||
|
||||
sub _open_tables {
|
||||
my $self = shift;
|
||||
if (defined $self->{'doc_id_table'}) {
|
||||
eval 'use DBIx::FullTextSearch::String';
|
||||
bless $self, 'DBIx::FullTextSearch::TableString';
|
||||
}
|
||||
else {
|
||||
bless $self, 'DBIx::FullTextSearch::TableNum';
|
||||
}
|
||||
}
|
||||
|
||||
# we do not create any new tables, we just check that the parameters are
|
||||
# OK (the table and columns exist, etc.)
|
||||
sub _create_tables {
|
||||
my $fts = shift;
|
||||
my ($table, $column, $id) = @{$fts}{ qw! table_name column_name
|
||||
column_id_name ! };
|
||||
if (not defined $table and $column =~ /\./) {
|
||||
($table, $column) = ($column =~ /^(.*)\.(.*)$/s);
|
||||
}
|
||||
my $id_type;
|
||||
|
||||
if (not defined $table) {
|
||||
return "The parameter table_name has to be specified with the table frontend.";
|
||||
}
|
||||
if (not defined $column) {
|
||||
return "The parameter column_name has to be specified with the table frontend.";
|
||||
}
|
||||
my $dbh = $fts->{'dbh'};
|
||||
my $sth = $dbh->prepare("show columns from $table");
|
||||
$sth->{'PrintError'} = 0;
|
||||
$sth->{'RaiseError'} = 0;
|
||||
$sth->execute or return "The table `$table' doesn't exist.";
|
||||
|
||||
my $info = $dbh->selectall_arrayref($sth,
|
||||
{ 'PrintError' => 0, 'RaiseError' => 0 });
|
||||
if (not defined $info) {
|
||||
return "The table `$table' doesn't exist.";
|
||||
}
|
||||
|
||||
# use Data::Dumper; print Dumper $info;
|
||||
|
||||
if (not defined $id) {
|
||||
# search for column with primary key
|
||||
my $pri_num = 0;
|
||||
for my $i (0 .. $#$info) {
|
||||
if ($info->[$i][3] eq 'PRI') {
|
||||
$pri_num++;
|
||||
$id = $info->[$i][0];
|
||||
$id_type = $info->[$i][1];
|
||||
}
|
||||
}
|
||||
if ($pri_num > 1) {
|
||||
return 'The primary key has to be one-column.';
|
||||
}
|
||||
if ($pri_num == 0) {
|
||||
return "No primary key found in the table `$table'.";
|
||||
}
|
||||
}
|
||||
else {
|
||||
# find '$id' column
|
||||
for my $i (0 .. $#$info) {
|
||||
if ($info->[$i][0] eq $id){
|
||||
$id_type = $info->[$i][1];
|
||||
last;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unless(defined $id_type){
|
||||
return "No key named '$id' found in the table '$table'";
|
||||
}
|
||||
|
||||
my $testcol = $dbh->prepare("select $column from $table where 1 = 0");
|
||||
$testcol->execute or
|
||||
return "Column `$column' doesn't exist in table `$table'.";
|
||||
$testcol->finish;
|
||||
|
||||
$fts->{'column_id_name'} = $id;
|
||||
|
||||
my $errstr;
|
||||
|
||||
if ($id_type =~ /([a-z]*int)/) {
|
||||
$fts->{'doc_id_bits'} = $DBIx::FullTextSearch::INT_TO_BITS{$1};
|
||||
bless $fts, 'DBIx::FullTextSearch::TableNum';
|
||||
}
|
||||
else {
|
||||
my ($length) = ($id_type =~ /^\w+\((\d+)\)$/);
|
||||
$fts->{'name_length'} = $1;
|
||||
eval 'use DBIx::FullTextSearch::String';
|
||||
bless $fts, 'DBIx::FullTextSearch::TableString';
|
||||
$errstr = $fts->DBIx::FullTextSearch::String::_create_tables($fts);
|
||||
}
|
||||
### use Data::Dumper; print Dumper $fts;
|
||||
return $errstr;
|
||||
}
|
||||
|
||||
sub get_the_data_from_table {
|
||||
my ($self, $id) = @_;
|
||||
my $dbh = $self->{'dbh'};
|
||||
my $get_data = ( defined $self->{'get_data_sth'}
|
||||
? $self->{'get_data_sth'}
|
||||
: $self->{'get_data_sth'} = $dbh->prepare("
|
||||
select $self->{'column_name'} from $self->{'table_name'}
|
||||
where $self->{'column_id_name'} = ?
|
||||
") );
|
||||
|
||||
my @data_ary = $dbh->selectrow_array($get_data, {}, $id);
|
||||
return wantarray ? @data_ary : join(" ", @data_ary);
|
||||
}
|
||||
|
||||
1;
|
||||
6
lib/DBIx/FullTextSearch/TestConfig.pm
Normal file
6
lib/DBIx/FullTextSearch/TestConfig.pm
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
%DBIx::FullTextSearch::TestConfig::Config = (
|
||||
'password' => undef,
|
||||
'dsn' => 'dbi:mysql:test',
|
||||
'user' => 'test'
|
||||
);
|
||||
1;
|
||||
30
lib/DBIx/FullTextSearch/URL.pm
Normal file
30
lib/DBIx/FullTextSearch/URL.pm
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
|
||||
package DBIx::FullTextSearch::URL;
|
||||
use DBIx::FullTextSearch::String;
|
||||
use strict;
|
||||
use vars qw! @ISA !;
|
||||
@ISA = qw! DBIx::FullTextSearch::String !;
|
||||
|
||||
use LWP::UserAgent;
|
||||
|
||||
sub index_document {
|
||||
my ($self, $uri, $extra_data) = @_;
|
||||
my $ua = ( defined $self->{'user_agent'}
|
||||
? $self->{'user_agent'}
|
||||
: $self->{'user_agent'} = new LWP::UserAgent );
|
||||
|
||||
my $request = new HTTP::Request('GET', $uri);
|
||||
my $response = $ua->simple_request($request);
|
||||
if ($response->is_success) {
|
||||
my $data = $response->content;
|
||||
$data .= " $extra_data" if $extra_data;
|
||||
return $self->SUPER::index_document($uri, $data);
|
||||
}
|
||||
else {
|
||||
$self->{'errstr'} = $response->message;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
395
lib/HTML/Highlight.pm
Normal file
395
lib/HTML/Highlight.pm
Normal file
|
|
@ -0,0 +1,395 @@
|
|||
|
||||
package HTML::Highlight;
|
||||
|
||||
use locale;
|
||||
|
||||
use strict;
|
||||
use Carp;
|
||||
|
||||
BEGIN {
|
||||
use vars qw ($VERSION @ISA);
|
||||
$VERSION = 0.20;
|
||||
@ISA = ();
|
||||
}
|
||||
|
||||
END { }
|
||||
|
||||
my $MIN_SECTION_LENGTH = 60;
|
||||
my $DEFAULT_SECTION_LENGTH = 80;
|
||||
|
||||
sub new {
|
||||
$_ = shift;
|
||||
my $class = ref($_) || $_;
|
||||
|
||||
croak ('HTML::Highlight - even number of parameters expected.')
|
||||
if (@_ % 2);
|
||||
|
||||
# set the defaults
|
||||
my $self = {
|
||||
words => [],
|
||||
wildcards => [],
|
||||
colors => [
|
||||
'#ffff66',
|
||||
'#A0FFFF',
|
||||
'#99ff99',
|
||||
'#ff9999',
|
||||
'#ff66ff'
|
||||
],
|
||||
czech_language => 0,
|
||||
debug => 0
|
||||
};
|
||||
|
||||
bless ($self, $class);
|
||||
|
||||
# get parameters, overiding the defaults
|
||||
for (my $i = 0; $i <= $#_; $i += 2) {
|
||||
exists ( $self->{lc($_[$i])} ) or
|
||||
croak ('HTML::Highlight - invalid parameter ' . $_[$i] . '.');
|
||||
$self->{lc($_[$i])} = $_[($i + 1)];
|
||||
}
|
||||
|
||||
croak ('HTML::Highlight - "words" and "wildcards" parameters must be references to arrays')
|
||||
if (ref($self->{words}) ne 'ARRAY' or ref($self->{wildcards}) ne 'ARRAY');
|
||||
|
||||
require CzFast if ($self->{czech_language});
|
||||
|
||||
return $self;
|
||||
}
|
||||
|
||||
|
||||
sub highlight {
|
||||
my $self = shift;
|
||||
my $document = shift;
|
||||
|
||||
croak ('HTML::Highlight - no document defined')
|
||||
if (not defined($document));
|
||||
return '' if (length($document) == 0);
|
||||
|
||||
my $doc = $document;
|
||||
|
||||
for (my $i = 0, my $cindex = 0; $i < @{$self->{words}}; $i++, $cindex++) {
|
||||
my $color;
|
||||
my $out;
|
||||
if ($self->{colors}->[$cindex]) {
|
||||
$color = $self->{colors}->[$cindex];
|
||||
}
|
||||
else {
|
||||
$cindex = 0;
|
||||
$color = $self->{colors}->[$cindex];
|
||||
}
|
||||
while($doc) {
|
||||
if ($doc !~ /(.*?)(<.*?>)(.*)/s) {
|
||||
$out .= $self->_highlight($doc, $i, $color);
|
||||
last;
|
||||
}
|
||||
else {
|
||||
my $str = $1;
|
||||
my $html = $2;
|
||||
my $rest = $3;
|
||||
$out .= $self->_highlight($str, $i, $color);
|
||||
$out .= $html;
|
||||
$doc = $rest;
|
||||
}
|
||||
}
|
||||
$doc = $out;
|
||||
}
|
||||
|
||||
return $doc;
|
||||
}
|
||||
|
||||
sub preview_context {
|
||||
my $self = shift;
|
||||
my $document = shift;
|
||||
my $sectlen = shift;
|
||||
|
||||
$self->{context} = {};
|
||||
$self->{sectlen} = $sectlen >= $MIN_SECTION_LENGTH ?
|
||||
$sectlen : $DEFAULT_SECTION_LENGTH;
|
||||
$self->{sections} = [];
|
||||
|
||||
$document =~ s/<.*?>//g;
|
||||
|
||||
for (my $i = 0; $i < @{$self->{words}}; $i++) {
|
||||
my $pattern = $self->{czech_language} ?
|
||||
&CzFast::czregexp($self->{words}->[$i]) :
|
||||
$self->{words}->[$i];
|
||||
|
||||
my $wildcard = $self->{wildcards}->[$i];
|
||||
my $regexp;
|
||||
|
||||
if ($wildcard eq '%') {
|
||||
$regexp = "${pattern}\\w*";
|
||||
}
|
||||
elsif ($wildcard eq '*') {
|
||||
$regexp = "${pattern}s?";
|
||||
}
|
||||
else {
|
||||
$regexp = $pattern;
|
||||
}
|
||||
|
||||
if (not $self->{context}->{$pattern}
|
||||
and not grep (/$regexp/i, values %{$self->{context}})) {
|
||||
my $chars = int(($self->{sectlen} - length($pattern)) / 2);
|
||||
print "Chars: $chars\n" if ($self->{debug});
|
||||
if ($document =~ /(?:^|\W)(.{0,$chars})(\W+|^)($regexp)(\W+|$)(.{0,$chars})(?:\W|$)/si) {
|
||||
my $section = $1.$2.$3.$4.$5;
|
||||
$self->{context}->{$pattern} = $section;
|
||||
push(@{$self->{sections}}, $section);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $self->{sections};
|
||||
}
|
||||
|
||||
#########################
|
||||
#### private methods ####
|
||||
#########################
|
||||
|
||||
sub _highlight {
|
||||
my $self = shift;
|
||||
my $str = shift;
|
||||
my $word = shift;
|
||||
my $color = shift;
|
||||
|
||||
my $pattern = $self->{words}->[$word];
|
||||
$pattern = &CzFast::czregexp($pattern) if ($self->{czech_language});
|
||||
|
||||
my $wildcard = $self->{wildcards}->[$word];
|
||||
my $regexp;
|
||||
|
||||
if ($wildcard eq '%') {
|
||||
my $pat = $self->{czech_language} ? &_cz_pattern : '\w*';
|
||||
$regexp = "${pattern}$pat";
|
||||
}
|
||||
elsif ($wildcard eq '*') {
|
||||
$regexp = "${pattern}s?";
|
||||
}
|
||||
else {
|
||||
$regexp = $pattern;
|
||||
}
|
||||
|
||||
print "$str: $pattern | $wildcard | $regexp | $color\n" if ($self->{debug});
|
||||
$str =~ s!(\W+|^)($regexp)!$1<span style="background-color: $color">$2</span>!sig;
|
||||
return $str;
|
||||
}
|
||||
|
||||
sub _cz_pattern {
|
||||
my @chars;
|
||||
my $pat = '(';
|
||||
foreach my $char ('a'..'z') {
|
||||
push(@chars, &CzFast::czregexp($char));
|
||||
}
|
||||
$pat .= join('|',@chars);
|
||||
$pat .= ')*';
|
||||
return $pat;
|
||||
}
|
||||
|
||||
|
||||
1;
|
||||
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
B<HTML::Highlight - A module to highlight words or patterns in HTML documents>
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use HTML::Highlight;
|
||||
|
||||
# create the highlighter object
|
||||
|
||||
my $hl = new HTML::Highlight (
|
||||
words => [
|
||||
'word',
|
||||
'any',
|
||||
'car',
|
||||
'some phrase'
|
||||
],
|
||||
wildcards => [
|
||||
undef,
|
||||
'%',
|
||||
'*',
|
||||
undef
|
||||
],
|
||||
colors => [
|
||||
'#FF0000',
|
||||
'red',
|
||||
'green',
|
||||
'rgb(255, 0, 0)'
|
||||
],
|
||||
czech_language => 0,
|
||||
debug => 0
|
||||
);
|
||||
|
||||
# Remember that you don't need to specify your own colors.
|
||||
# The default colors should be optimal.
|
||||
|
||||
# Now you can use the object to highlight patterns in a document
|
||||
# by passing content of the document to its highlight() method.
|
||||
# The highlighter object "remembers" its configuration.
|
||||
|
||||
my $highlighted_document = $hl->highlight($document);
|
||||
|
||||
|
||||
=head1 MOTIVATION
|
||||
|
||||
This module was originaly created to work together with fulltext
|
||||
indexing module DBIx::TextIndex to highlight search results.
|
||||
|
||||
A need for a highlighter that takes wildcard matches and HTML tags into
|
||||
account and supports czech language (or other Slavic languages) was
|
||||
the motivation to create this module.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This module provides Google-like highlighting of words or patterns in HTML
|
||||
documents. This feature is typically used to highlight search results.
|
||||
|
||||
|
||||
=item The construcutor:
|
||||
|
||||
my $hl = new HTML::Highlight (
|
||||
words => [],
|
||||
wildcards => [],
|
||||
colors => [],
|
||||
czech_language => 0,
|
||||
debug => 0
|
||||
);
|
||||
|
||||
This is a constructor of the highlighter object. It takes an array of
|
||||
even number of parameters.
|
||||
|
||||
|
||||
The B<words> parameter is a reference to an array of words to highlight.
|
||||
|
||||
The B<wildcards> parameter is a reference to an array of wildcards, that
|
||||
are applied to corresponding words in the B<words> array.
|
||||
|
||||
A wildcard can be either undef or one of '%' or '*'.
|
||||
|
||||
B<The "%" character> means "match any characters":
|
||||
|
||||
"%" applied to 'car' ==> matches "car", "cars", "careful", ...
|
||||
|
||||
|
||||
B<The "*" character> means "match also plural form of the word":
|
||||
|
||||
"*" applied to 'car' ==> matches only "car" or "cars"
|
||||
|
||||
|
||||
B<An undefined wildcard> means "match exactly the corresponding word":
|
||||
|
||||
undefined wildcard applied to 'car' ==> matches only "car"
|
||||
|
||||
|
||||
|
||||
The B<colors> parameter is a reference to an array of CSS color
|
||||
identificators, that are used to highlight the corresponding words in
|
||||
the B<words> array.
|
||||
|
||||
Default Google-like colors are used if you don't specify your own
|
||||
colors. Number of colors can be lower than number of words - in this case
|
||||
the colors are rotated and some of the words are therefore
|
||||
highlighted using the same color.
|
||||
|
||||
The highlighter takes HTML tags into account and therefore does not
|
||||
"highlight" a word or a pattern inside a tag.
|
||||
|
||||
A support for diacritics insenstive matching for ISO-8859-2 languages (for
|
||||
for example the czech language) can be activated using the B<czech_language>
|
||||
option. This feature requires a module B<CzFast> that is available on CPAN in
|
||||
a directory of author TRIPIE or at http://geocities.com/tripiecz/.
|
||||
|
||||
B<Your system's locales must be set correctly to use the
|
||||
czech_language feature.>
|
||||
|
||||
|
||||
=item highlight
|
||||
|
||||
my $hl_document = $hl->highlight($document);
|
||||
|
||||
The only parameter is a document in that you want
|
||||
to highlight the words that were passed to the constructor of the
|
||||
highlighter object. The method returns a version of the document in which
|
||||
the words are highlighted.
|
||||
|
||||
|
||||
=item preview_context
|
||||
|
||||
my $sections = $hl->preview_context($document, $num);
|
||||
|
||||
|
||||
This method takes two parameters. The first one is the document you
|
||||
want to scan for the words that were passed to the constructor of the
|
||||
highlighter object. The second parameter is an optional integer
|
||||
that specifies maximum number of characters in each of the context
|
||||
sections (see below). This parameter defaults to 80
|
||||
characters if it's not specified. Minimum allowed value of this
|
||||
parameter is 60.
|
||||
|
||||
The method returns a reference to an array of sections of the document
|
||||
in which the words that were passed to the constructor appear.
|
||||
HTML tags are removed before the document is proccessed and are
|
||||
not present in the ouput.
|
||||
This feature is typically used in search engines to preview a context
|
||||
in which words from a search query appear in the resulting documents.
|
||||
The words are always in the middle of each of the sections. The
|
||||
number of sections this method returns is equal to the number of words
|
||||
passed to the constructor of the highlighter object.
|
||||
That means only the first occurence of each of the words is taken into
|
||||
account.
|
||||
|
||||
=head1 SUPPORT
|
||||
|
||||
No official support is provided, but I welcome any comments, patches
|
||||
and suggestions on my email.
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
I am aware of no bugs.
|
||||
|
||||
=head1 AVAILABILITY
|
||||
|
||||
http://geocities.com/tripiecz/
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
B<Tomas Styblo>, tripie@cpan.org, CPAN-ID TRIPIE
|
||||
|
||||
Prague, the Czech republic
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
HTML::Highlight - A module to highlight words or patterns in HTML documents
|
||||
|
||||
Copyright (C) 2000 Tomas Styblo (tripie@cpan.org)
|
||||
|
||||
This module is free software; you can redistribute it and/or modify it
|
||||
under the terms of either:
|
||||
|
||||
a) the GNU General Public License as published by the Free Software
|
||||
Foundation; either version 1, or (at your option) any later version,
|
||||
or
|
||||
|
||||
b) the "Artistic License" which comes with this module.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See either
|
||||
the GNU General Public License or the Artistic License for more details.
|
||||
|
||||
You should have received a copy of the Artistic License with this
|
||||
module, in the file Artistic. If not, I'll be glad to provide one.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
||||
USA
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
perl(1).
|
||||
|
||||
=cut
|
||||
3045
lib/Parse/RecDescent.pm
Normal file
3045
lib/Parse/RecDescent.pm
Normal file
File diff suppressed because it is too large
Load diff
2823
lib/Parse/RecDescent.pod
Normal file
2823
lib/Parse/RecDescent.pod
Normal file
File diff suppressed because it is too large
Load diff
539
lib/WebGUI/Wobject/IndexedSearch.pm
Normal file
539
lib/WebGUI/Wobject/IndexedSearch.pm
Normal file
|
|
@ -0,0 +1,539 @@
|
|||
package WebGUI::Wobject::IndexedSearch;
|
||||
$VERSION = "1.4";
|
||||
|
||||
#Test to see if Time::HiRes will load.
|
||||
my $hasTimeHiRes=1;
|
||||
eval "use Time::HiRes"; $hasTimeHiRes=0 if $@;
|
||||
|
||||
use strict;
|
||||
use WebGUI::Wobject::IndexedSearch::Search;
|
||||
use WebGUI::HTMLForm;
|
||||
use WebGUI::HTML;
|
||||
use WebGUI::Macro;
|
||||
use WebGUI::International;
|
||||
use WebGUI::Session;
|
||||
use WebGUI::SQL;
|
||||
use WebGUI::Wobject;
|
||||
use Tie::IxHash;
|
||||
use WebGUI::Utility;
|
||||
use WebGUI::Paginator;
|
||||
|
||||
our @ISA = qw(WebGUI::Wobject);
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub name {
|
||||
return WebGUI::International::get(17,$_[0]->get("namespace"));
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my $property = shift;
|
||||
my $self = WebGUI::Wobject->new(
|
||||
-useTemplate=>1,
|
||||
-properties=>$property,
|
||||
-extendedProperties=>{
|
||||
indexName=>{
|
||||
defaultValue=>'default'
|
||||
},
|
||||
searchRoot=>{
|
||||
fieldType=>'selectList',
|
||||
defaultValue=>'any'
|
||||
},
|
||||
users=>{
|
||||
fieldType=>'selectList',
|
||||
defaultValue=>'any'
|
||||
},
|
||||
namespaces=>{
|
||||
fieldType=>'selectList',
|
||||
defaultValue=>'any'
|
||||
},
|
||||
languages=>{
|
||||
fieldType=>'selectList',
|
||||
defaultValue=>'any'
|
||||
},
|
||||
contentTypes=>{
|
||||
fieldType=>'selectList',
|
||||
defaultValue=>'any'
|
||||
},
|
||||
paginateAfter=>{
|
||||
defaultValue=>10
|
||||
},
|
||||
highlight=>{
|
||||
defaultValue=>1
|
||||
},
|
||||
previewLength=>{
|
||||
defaultValue=>130
|
||||
},
|
||||
highlight_1=>{
|
||||
defaultValue=>'#ffff66'
|
||||
},
|
||||
highlight_2=>{
|
||||
defaultValue=>'#A0FFFF'
|
||||
},
|
||||
highlight_3=>{
|
||||
defaultValue=>'#99ff99'
|
||||
},
|
||||
highlight_4=>{
|
||||
defaultValue=>'#ff9999'
|
||||
},
|
||||
highlight_5=>{
|
||||
defaultValue=>'#ff66ff'
|
||||
},
|
||||
}
|
||||
);
|
||||
bless $self, $class;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub uiLevel {
|
||||
return 5;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub www_edit {
|
||||
my $self = shift;
|
||||
my (@data, %indexName);
|
||||
|
||||
tie my %searchRoot, 'Tie::IxHash';
|
||||
|
||||
my $layout = WebGUI::HTMLForm->new;
|
||||
my $properties = WebGUI::HTMLForm->new;
|
||||
my $privileges = WebGUI::HTMLForm->new;
|
||||
|
||||
# Unconditional read to catch intallation errors.
|
||||
my $sth = WebGUI::SQL->unconditionalRead("select distinct(indexName), indexName from IndexedSearch_docInfo");
|
||||
unless ($sth->errorCode < 1) {
|
||||
return "<p><b>" . WebGUI::International::get(1,$self->get("namespace")) . $sth->errorMessage."</b></p>";
|
||||
}
|
||||
while (@data = $sth->array) {
|
||||
$indexName{$data[0]} = $data[1];
|
||||
}
|
||||
$sth->finish;
|
||||
unless(%indexName) {
|
||||
return "<p><b>" . WebGUI::International::get(2,$self->get("namespace")) .
|
||||
"<p>" . WebGUI::International::get(3,$self->get("namespace")) . "</b></p>";
|
||||
}
|
||||
|
||||
# Index to use
|
||||
$properties->radioList( -name=>'indexName',
|
||||
-options=>\%indexName,
|
||||
-label=>WebGUI::International::get(5,$self->get("namespace")),
|
||||
-value=>$self->getValue("indexName"),
|
||||
-vertical=>1
|
||||
);
|
||||
|
||||
# Page roots
|
||||
%searchRoot = ( 'any'=>WebGUI::International::get(15,$self->get("namespace")),
|
||||
$session{page}{pageId}=>WebGUI::International::get(4,$self->get("namespace")),
|
||||
WebGUI::SQL->buildHash("select pageId,title from page where parentId=0 and (pageId=1 or pageId>999) order by title")
|
||||
);
|
||||
$properties->checkList ( -name=>'searchRoot',
|
||||
-options=>\%searchRoot,
|
||||
-label=>WebGUI::International::get(6,$self->get("namespace")),
|
||||
-value=>[ split("\n", $self->getValue("searchRoot")) ],
|
||||
-multiple=>1,
|
||||
-vertical=>1,
|
||||
);
|
||||
|
||||
# Content of specific user
|
||||
$properties->selectList ( -name=>'users',
|
||||
-options=>$self->_getUsers(),
|
||||
-label=>WebGUI::International::get(7,$self->get("namespace")),
|
||||
-value=>[ split("\n", $self->getValue("users")) ],
|
||||
-multiple=>1,
|
||||
-size=>5
|
||||
);
|
||||
|
||||
# Content in specific namespaces
|
||||
$properties->selectList ( -name=>'namespaces',
|
||||
-options=>$self->_getNamespaces,
|
||||
-label=>WebGUI::International::get(8,$self->get("namespace")),
|
||||
-value=>[ split("\n", $self->getValue("namespaces")) ],
|
||||
-multiple=>1,
|
||||
-size=>5
|
||||
);
|
||||
|
||||
# Content in specific language
|
||||
$properties->checkList ( -name=>'languages',
|
||||
-options=>$self->_getLanguages(),
|
||||
-label=>WebGUI::International::get(9,$self->get("namespace")),
|
||||
-value=>[ split("\n", $self->getValue("languages")) ],
|
||||
-multiple=>1,
|
||||
);
|
||||
|
||||
# Only specific content types
|
||||
my $contentTypes = $self->_getContentTypes();
|
||||
delete $contentTypes->{content};
|
||||
$properties->checkList ( -name=>'contentTypes',
|
||||
-options=>$contentTypes,
|
||||
-label=>WebGUI::International::get(10,$self->get("namespace")),
|
||||
-value=>[ split("\n", $self->getValue("contentTypes")) ],
|
||||
-multiple=>1,
|
||||
-vertical=>1,
|
||||
);
|
||||
$layout->integer ( -name=>'paginateAfter',
|
||||
-label=>WebGUI::International::get(11,$self->get("namespace")),
|
||||
-value=>$self->getValue("paginateAfter"),
|
||||
);
|
||||
$layout->integer ( -name=>'previewLength',
|
||||
-label=>WebGUI::International::get(12,$self->get("namespace")),
|
||||
-value=>$self->getValue("previewLength"),
|
||||
);
|
||||
$layout->yesNo ( -name=>'highlight',
|
||||
-label=>WebGUI::International::get(13,$self->get("namespace")),
|
||||
-value=>$self->getValue("highlight"),
|
||||
);
|
||||
|
||||
# Color picker for highlight colors
|
||||
$layout->raw ( -value=>'
|
||||
<SCRIPT LANGUAGE="Javascript" SRC="'.$session{config}{extrasURL}.'/wobject/IndexedSearch/ColorPicker2.js"></SCRIPT>
|
||||
<SCRIPT LANGUAGE="JavaScript">
|
||||
var cp = new ColorPicker("window");
|
||||
</SCRIPT>'
|
||||
);
|
||||
for (1..5) {
|
||||
my $highlight = "highlight_$_";
|
||||
$layout->text ( -name=>$highlight,
|
||||
-label=>WebGUI::International::get(14,$self->get("namespace")) ." $_:",
|
||||
-size=>7,
|
||||
-value=>$self->getValue($highlight),
|
||||
-subtext=>qq{
|
||||
<A HREF="#" onClick="cp.select($highlight,'$highlight');
|
||||
return false;" NAME="$highlight" ID="$highlight">Pick</A>}
|
||||
);
|
||||
}
|
||||
|
||||
return $self->SUPER::www_edit(
|
||||
-properties=>$properties->printRowsOnly,
|
||||
-layout=>$layout->printRowsOnly,
|
||||
-privileges=>$privileges->printRowsOnly,
|
||||
-heading=>"Edit Search",
|
||||
-helpId=>1
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub www_editSave {
|
||||
# default editSave overruled to build & save the pageList for faster retrieval.
|
||||
return WebGUI::Privilege::insufficient() unless (WebGUI::Privilege::canEditPage());
|
||||
my $self = shift;
|
||||
$self->SUPER::www_editSave();
|
||||
my (%pages, $pageList);
|
||||
my $searchRoot = $self->get("searchRoot");
|
||||
if ($searchRoot =~ /any/i) {
|
||||
$pageList = 'any';
|
||||
} else {
|
||||
foreach my $pageId (split(/\n+/,$searchRoot)) {
|
||||
%pages = (%pages, _getSearchablePages($pageId), $pageId => defined);
|
||||
}
|
||||
$pageList = join(" , ", keys %pages);
|
||||
}
|
||||
WebGUI::SQL->write("update IndexedSearch set pageList = ".quote($pageList)." where wobjectId = ".$self->get("wobjectId"));
|
||||
return '';
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub www_view {
|
||||
my $self = shift;
|
||||
my (%var, @resultsLoop);
|
||||
|
||||
# Do some query handling
|
||||
$var{exactPhrase} = $session{form}{exactPhrase};
|
||||
$var{allWords} = $session{form}{allWords};
|
||||
$var{atLeastOne} = $session{form}{atLeastOne};
|
||||
$var{without} = $session{form}{without};
|
||||
$var{query} = $session{form}{query};
|
||||
$var{query} .= qq{ +"$var{exactPhrase}"} if ($var{exactPhrase});
|
||||
$var{query} .= " ".join(" ",map("+".$_,split(/\s+/,$var{allWords}))) if ($var{allWords});
|
||||
$var{query} .= qq{ $var{atLeastOne}} if ($var{atLeastOne});
|
||||
$var{query} .= " ".join(" ",map("-".$_,split(/\s+/,$var{without}))) if ($var{without});
|
||||
|
||||
# Set some standard vars
|
||||
$var{submit} = WebGUI::Form::submit({value=>WebGUI::International::get(16, $self->get("namespace"))});
|
||||
$var{"int.search"} = WebGUI::International::get(16,$self->get("namespace"));
|
||||
$var{wid} = $self->get("wobjectId");
|
||||
$var{numberOfResults} = '0';
|
||||
$var{"select_".$self->getValue("paginateAfter")} = "selected";
|
||||
|
||||
# Do the search
|
||||
my $startTime = ($hasTimeHiRes) ? Time::HiRes::time() : time();
|
||||
my $filter = $self->_buildFilter;
|
||||
my $search = WebGUI::Wobject::IndexedSearch::Search->new($self->getValue('indexName'));
|
||||
$search->open;
|
||||
my $results = $search->search($var{query},$filter);
|
||||
$var{duration} = (($hasTimeHiRes) ? Time::HiRes::time() : time()) - $startTime;
|
||||
$var{duration} = sprintf("%.3f", $var{duration}) if $hasTimeHiRes; # Duration rounded to 3 decimal places
|
||||
|
||||
# Let's see if the search returned any results
|
||||
if (defined ($results)) {
|
||||
$var{numberOfResults} = scalar(@$results);
|
||||
|
||||
# Deal with pagination
|
||||
my $url = "wid=".$self->get("wobjectId")."&func=view&query=".WebGUI::URL::escape($var{query});
|
||||
map {$url .= "&users=".WebGUI::URL::escape($_)} $session{cgi}->param('users');
|
||||
map {$url .= "&namespaces=".WebGUI::URL::escape($_)} $session{cgi}->param('namespaces');
|
||||
map {$url .= "&languages=".WebGUI::URL::escape($_)} $session{cgi}->param('languages');
|
||||
map {$url .= "&contentTypes=".WebGUI::URL::escape($_)} $session{cgi}->param('contentTypes');
|
||||
$url .= "&paginateAfter=".$self->getValue("paginateAfter");
|
||||
my $p = WebGUI::Paginator->new(WebGUI::URL::page($url), $results, $self->getValue("paginateAfter"));
|
||||
$var{startNr} = 1;
|
||||
if($session{form}{pn}) {
|
||||
$var{startNr} = (($session{form}{pn} - 1) * $self->getValue("paginateAfter")) + 1;
|
||||
}
|
||||
|
||||
my @highlightColors = map { $self->getValue("highlight_$_") } (1..5);
|
||||
$var{queryHighlighted} = $search->highlight($var{query}, undef, \@highlightColors);
|
||||
|
||||
# Get result details for this page
|
||||
if($p->getPageNumber > $p->getNumberOfPages) {
|
||||
$var{numberOfResults} = 0;
|
||||
$var{resultsLoop} = [];
|
||||
} else {
|
||||
$var{resultsLoop} = $search->getDetails($p->getPageData,
|
||||
highlightColors => \@highlightColors,
|
||||
previewLength => $self->getValue('previewLength'),
|
||||
highlight => $self->getValue('highlight')
|
||||
);
|
||||
# Pagination variables
|
||||
$var{endNr} = $var{startNr}+(scalar(@{$var{resultsLoop}}))-1;
|
||||
$p->appendTemplateVars(\%var);
|
||||
}
|
||||
}
|
||||
|
||||
# Create a loop with namespaces
|
||||
$var{namespaces} = [];
|
||||
my $namespaces = $self->_getNamespaces('restricted');
|
||||
foreach(keys %$namespaces) {
|
||||
my $selected = 0;
|
||||
if (scalar $session{cgi}->param('namespaces')) {
|
||||
$selected = isIn($_, $session{cgi}->param('namespaces'));
|
||||
} else {
|
||||
$selected = ($session{form}{namespaces} =~ /$_/);
|
||||
}
|
||||
push(@{$var{namespaces}}, { value => $_, name => $namespaces->{$_}, selected => $selected });
|
||||
}
|
||||
|
||||
# Create a loop with contentTypes
|
||||
#
|
||||
# And while we are busy we also create a loop with simplified contentTypes
|
||||
# This means: wobject, page, wobjectDetail are masked in one option: content
|
||||
|
||||
$var{contentTypes} = [];
|
||||
$var{contentTypesSimple} = [];
|
||||
my $contentTypes = $self->_getContentTypes('restricted');
|
||||
foreach(keys %$contentTypes) {
|
||||
my $selected = 0;
|
||||
if (scalar $session{cgi}->param('contentTypes')) {
|
||||
$selected = isIn($_, $session{cgi}->param('contentTypes'));
|
||||
} else {
|
||||
$selected = ($session{form}{contentTypes} =~ /$_/);
|
||||
}
|
||||
unless(/^content$/) { # No shortcut in the detailed contentType list
|
||||
push(@{$var{contentTypes}}, { value => $_,
|
||||
name => $contentTypes->{$_},
|
||||
selected => $selected,
|
||||
'type_'.$_ => 1 });
|
||||
}
|
||||
unless(/^page|wobject|wobjectDetail$/) { # No details in the simple contentType list
|
||||
push(@{$var{contentTypesSimple}}, { value => $_,
|
||||
name => $contentTypes->{$_},
|
||||
selected => $selected,
|
||||
'type_'.$_ => 1 });
|
||||
}
|
||||
}
|
||||
|
||||
# Create a loop with users
|
||||
$var{users} = [];
|
||||
my $users = $self->_getUsers('restricted');
|
||||
foreach(keys %$users) {
|
||||
my $selected = 0;
|
||||
if (scalar $session{cgi}->param('users')) {
|
||||
$selected = isIn($_, $session{cgi}->param('users'));
|
||||
} else {
|
||||
$selected = ($session{form}{users} =~ /$_/);
|
||||
}
|
||||
push(@{$var{users}}, { value => $_, name => $users->{$_}, selected => $selected });
|
||||
}
|
||||
|
||||
# Create a loop with languages
|
||||
$var{languages} = [];
|
||||
my $languages = $self->_getLanguages('restricted');
|
||||
foreach(keys %$languages) {
|
||||
my $selected = 0;
|
||||
if (scalar $session{cgi}->param('languages')) {
|
||||
$selected = isIn($_, $session{cgi}->param('languages'));
|
||||
} else {
|
||||
$selected = ($session{form}{languages} =~ /$_/);
|
||||
}
|
||||
push(@{$var{languages}}, { value => $_, name => $languages->{$_}, selected => $selected });
|
||||
}
|
||||
|
||||
# close the search
|
||||
$search->close;
|
||||
|
||||
return $self->processTemplate($self->get("templateId"),\%var);
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _buildFilter {
|
||||
my $self = shift;
|
||||
my %filter = ();
|
||||
|
||||
# pages
|
||||
if($self->getValue('pageList') ne 'any') {
|
||||
$filter{pageId} = [ split(/\n+/, $self->getValue('pageList')) ];
|
||||
}
|
||||
|
||||
# languages
|
||||
if($session{form}{languages} && ! isIn('any', $session{cgi}->param('languages'))) {
|
||||
$filter{languageId} = [ map { quote($_) } $session{cgi}->param('languages') ];
|
||||
} elsif ($self->getValue('languages') !~ /any/i) {
|
||||
$filter{languageId} = [ map { quote($_) } split(/\n/, $self->getValue('languages')) ];
|
||||
}
|
||||
push(@{$filter{languageId}}, '0') if (exists $filter{languageId}); # Some content (i.e. profiles)
|
||||
# don't have a language. They
|
||||
# must be found as well.
|
||||
|
||||
# content-types
|
||||
if($session{form}{contentTypes} && ! isIn('any', $session{cgi}->param('contentTypes'))) {
|
||||
$filter{contentType} = [ map { quote($_) } $session{cgi}->param('contentTypes') ];
|
||||
|
||||
# contentType "content" is a shortcut for "page", "wobject" and "wobjectDetail"
|
||||
if (isIn('content', $session{cgi}->param('contentTypes'))) {
|
||||
push(@{$filter{contentType}}, map { quote($_) } qw/page wobject wobjectDetail/);
|
||||
}
|
||||
} elsif ($self->getValue('contentTypes') !~ /any/i) {
|
||||
$filter{contentType} = [ map { quote($_) } split(/\n/, $self->getValue('contentTypes')) ];
|
||||
}
|
||||
|
||||
# users
|
||||
if($session{form}{users} && ! isIn('any', $session{cgi}->param('users'))) {
|
||||
$filter{ownerId} = [];
|
||||
foreach my $user ($session{cgi}->param('users')) {
|
||||
if ($user =~ /\D/) {
|
||||
$user =~ s/\*/%/g;
|
||||
($user) = WebGUI::SQL->buildArray("select userId from users where username like ".quote($user));
|
||||
}
|
||||
push(@{$filter{ownerId}}, quote($user)) if ($user =~ /^\d+$/);
|
||||
}
|
||||
} elsif ($self->getValue('users') !~ /any/i) {
|
||||
$filter{ownerId} = [ map { quote($_) } split(/\n/, $self->getValue('users')) ];
|
||||
}
|
||||
|
||||
# namespaces
|
||||
if($session{form}{namespaces} && ! isIn('any', $session{cgi}->param('namespaces'))) {
|
||||
$filter{namespace} = [ map { quote($_) } $session{cgi}->param('namespaces') ];
|
||||
} elsif ($self->getValue('namespaces') !~ /any/i) {
|
||||
$filter{namespace} = [ map { quote($_) } split(/\n/, $self->getValue('namespaces')) ];
|
||||
}
|
||||
|
||||
# delete $filter{ownerId} if it is an empty array reference
|
||||
if(exists($filter{ownerId})) {
|
||||
delete $filter{ownerId} unless (scalar(@{$filter{ownerId}}));
|
||||
}
|
||||
return \%filter;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _getLanguages {
|
||||
my ($self, $restricted) = @_;
|
||||
my $international = WebGUI::SQL->buildHashRef("select distinct(IndexedSearch_docInfo.languageId), language.language from IndexedSearch_docInfo, language
|
||||
where language.languageId = IndexedSearch_docInfo.languageId");
|
||||
tie my %languages, 'Tie::IxHash';
|
||||
if ($restricted and $self->get('languages') !~ /any/i) {
|
||||
$languages{any} = WebGUI::International::get(24,$self->get("namespace"));
|
||||
foreach (split/\n/, $self->get('languages')) {
|
||||
$languages{$_} = $international->{$_};
|
||||
}
|
||||
} else {
|
||||
%languages = ('any' => WebGUI::International::get(24,$self->get("namespace")) , %$international);
|
||||
}
|
||||
return \%languages;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _getNamespaces {
|
||||
my ($self, $restricted) = @_;
|
||||
my %international;
|
||||
foreach my $wobject (@{$session{config}{wobjects}}){
|
||||
my $cmd = "WebGUI::Wobject::".$wobject;
|
||||
my $w = $cmd->new({namespace=>$wobject, wobjectId=>'new'});
|
||||
$international{$wobject} = $w->name;
|
||||
}
|
||||
tie my %namespaces, 'Tie::IxHash';
|
||||
if ($restricted and $self->get('namespaces') !~ /any/i) {
|
||||
$namespaces{any} = WebGUI::International::get(18,$self->get("namespace"));
|
||||
foreach (split/\n/, $self->get('namespaces')) {
|
||||
$namespaces{$_} = $international{$_} || ucfirst($_);
|
||||
}
|
||||
} else {
|
||||
$namespaces{any} = WebGUI::International::get(18,$self->get("namespace"));
|
||||
foreach (WebGUI::SQL->buildArray("select distinct(namespace) from IndexedSearch_docInfo order by namespace")) {
|
||||
$namespaces{$_} = $international{$_} ||ucfirst($_);
|
||||
}
|
||||
}
|
||||
return \%namespaces;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _getContentTypes {
|
||||
my ($self, $restricted) = @_;
|
||||
my %international = ( 'page' => WebGUI::International::get(2),
|
||||
'wobject' => WebGUI::International::get(19,$self->get("namespace")),
|
||||
'wobjectDetail' => WebGUI::International::get(20,$self->get("namespace")),
|
||||
'content' => WebGUI::International::get(21,$self->get("namespace")),
|
||||
'discussion' => WebGUI::International::get(892),
|
||||
'profile' => WebGUI::International::get(22,$self->get("namespace")),
|
||||
'help' => WebGUI::International::get(93),
|
||||
'any' => WebGUI::International::get(23,$self->get("namespace")),
|
||||
);
|
||||
tie my %contentTypes, 'Tie::IxHash';
|
||||
if ($restricted and $self->get('contentTypes') !~ /any/i) {
|
||||
$contentTypes{any} = $international{any};
|
||||
$contentTypes{content} = $international{content}; # shortcut for page, wobject and wobjectDetail
|
||||
foreach (split/\n/, $self->get('contentTypes')) {
|
||||
$contentTypes{$_} = $international{$_};
|
||||
}
|
||||
} else {
|
||||
%contentTypes = ( 'any' => $international{any},
|
||||
'content' => $international{content}, # shortcut for page, wobject and wobjectDetail
|
||||
);
|
||||
foreach (WebGUI::SQL->buildArray("select distinct(contentType) from IndexedSearch_docInfo order by contentType")) {
|
||||
$contentTypes{$_} = $international{$_} || ucfirst($_);
|
||||
}
|
||||
}
|
||||
return \%contentTypes;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _getSearchablePages {
|
||||
my $searchRoot = shift;
|
||||
my %pages;
|
||||
my $sth = WebGUI::SQL->read("select pageId from page where parentId = $searchRoot");
|
||||
while (my %data = $sth->hash) {
|
||||
$pages{$data{pageId}} = defined;
|
||||
%pages = (%pages, _getSearchablePages($data{pageId}) );
|
||||
}
|
||||
return %pages;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _getUsers {
|
||||
my ($self, $restricted) = @_;
|
||||
tie my %users, 'Tie::IxHash';
|
||||
if ($restricted and $self->get('users') !~ /any/i) {
|
||||
$users{any} = WebGUI::International::get(25,$self->get("namespace"));
|
||||
foreach (split/\n/, $self->get('users')) {
|
||||
$users{$_} = $_;
|
||||
}
|
||||
} else {
|
||||
%users = ( 'any' => WebGUI::International::get(25,$self->get("namespace")),
|
||||
WebGUI::SQL->buildHash("select userId, username from users order by username")
|
||||
);
|
||||
}
|
||||
return \%users;
|
||||
}
|
||||
|
||||
1;
|
||||
682
lib/WebGUI/Wobject/IndexedSearch/Search.pm
Normal file
682
lib/WebGUI/Wobject/IndexedSearch/Search.pm
Normal file
|
|
@ -0,0 +1,682 @@
|
|||
package WebGUI::Wobject::IndexedSearch::Search;
|
||||
|
||||
=head1 LEGAL
|
||||
|
||||
-------------------------------------------------------------------
|
||||
WebGUI is Copyright 2001-2003 Plain Black LLC.
|
||||
-------------------------------------------------------------------
|
||||
Please read the legal notices (docs/legal.txt) and the license
|
||||
(docs/license.txt) that came with this distribution before using
|
||||
this software.
|
||||
-------------------------------------------------------------------
|
||||
http://www.plainblack.com info@plainblack.com
|
||||
-------------------------------------------------------------------
|
||||
|
||||
=cut
|
||||
|
||||
use strict;
|
||||
use DBIx::FullTextSearch;
|
||||
use WebGUI::SQL;
|
||||
use WebGUI::URL;
|
||||
use WebGUI::HTML;
|
||||
use WebGUI::ErrorHandler;
|
||||
use DBIx::FullTextSearch::StopList;
|
||||
use WebGUI::Utility;
|
||||
use WebGUI::Session;
|
||||
use WebGUI::Privilege;
|
||||
use HTML::Highlight;
|
||||
use WebGUI::Macro;
|
||||
|
||||
=head1 NAME
|
||||
|
||||
Package WebGUI::Wobject::IndexedSearch::Search
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Search implementation for WebGUI.
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use WebGUI::Wobject::IndexedSearch::Search;
|
||||
my $search = WebGUI::Wobject::IndexedSearch::Search->new();
|
||||
$search->indexDocument( { text => 'Index this text',
|
||||
location => 'http://www.mysite.com/index.pl/faq#45',
|
||||
languageId => 3,
|
||||
namespace => 'FAQ'
|
||||
});
|
||||
my $hits = search->search("+foo -bar koo",{ namespace = ['Article', 'FAQ']} );
|
||||
|
||||
$search->close;
|
||||
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
This package is an extension to DBIx::FullTextSearch and HTML::Highlight.
|
||||
See that packages for documentation of their methods.
|
||||
|
||||
=head1 METHODS
|
||||
|
||||
These methods are available from this package:
|
||||
|
||||
=cut
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 close ( )
|
||||
|
||||
Closes the DBIx::FullTextSearch session.
|
||||
|
||||
=cut
|
||||
|
||||
sub close {
|
||||
my $self=shift;
|
||||
$self->DESTROY();
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 create ( [ %options ] )
|
||||
|
||||
Creates a new DBIx::FullTextSearch index.
|
||||
|
||||
=over
|
||||
|
||||
=item %options
|
||||
|
||||
Options to pass to DBIx::FullTextSearch.
|
||||
The default options that are used are:
|
||||
|
||||
( backend => column, word_length => 20, stoplist => undef )
|
||||
|
||||
Please refer to the DBIx::FullTextSearch documentation for a complete list of options.
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
sub create {
|
||||
my ($self, %options) = @_;
|
||||
%options = (%{$self->{_createOptions}}, %options);
|
||||
if($options{stemmer}) {
|
||||
eval "use Lingua::Stem";
|
||||
if ($@) {
|
||||
WebGUI::ErrorHandler::warn("IndexedSearch: Can't use stemmer: $@");
|
||||
delete $options{stemmer};
|
||||
}
|
||||
}
|
||||
if($options{stoplist}) {
|
||||
if(not $self->existsTable($self->getIndexName."_".$options{stoplist}."_stoplist")) {
|
||||
DBIx::FullTextSearch::StopList->create_default($self->getDbh, $self->getIndexName."_".$options{stoplist}, $options{stoplist});
|
||||
}
|
||||
$options{stoplist} = $self->getIndexName."_".$options{stoplist};
|
||||
}
|
||||
$self->{_fts} = DBIx::FullTextSearch->create($self->getDbh, $self->getIndexName, %options);
|
||||
if (not defined $self->{_fts}) {
|
||||
WebGUI::ErrorHandler::fatalError("IndexedSearch: Unable to create index.\n$DBIx::FullTextSearch::errstr");
|
||||
return undef;
|
||||
}
|
||||
$self->{_docId} = 1;
|
||||
return $self->{_fts};
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 existsTable ( tableName )
|
||||
|
||||
Returns true if tableName exists in database.
|
||||
|
||||
=over
|
||||
|
||||
=item tableName
|
||||
|
||||
The name of table.
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
sub existsTable {
|
||||
my ($self, $table) = @_;
|
||||
return isIn($table, WebGUI::SQL->buildArray("show tables"));
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 getDetails ( docIdList , [ %options ] )
|
||||
|
||||
Returns an array reference containing details for each docId.
|
||||
|
||||
=over
|
||||
|
||||
=item docIdList
|
||||
|
||||
An array reference containing docIds.
|
||||
|
||||
=item previewLength
|
||||
|
||||
The maximum number of characters in each of the context sections. Defaults to "80".
|
||||
|
||||
=item highlight
|
||||
|
||||
A boolean indicating whether or not to enable highlight. Defaults to "1".
|
||||
|
||||
=item highlightColors
|
||||
|
||||
A reference to an array of CSS color identificators.
|
||||
|
||||
=item
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
sub getDetails {
|
||||
my ($self, $docIdList, %options) = @_;
|
||||
my $docIds = join(',',@$docIdList);
|
||||
my (@searchDetails, %namespace);
|
||||
foreach my $wobject (@{$session{config}{wobjects}}){
|
||||
my $cmd = "WebGUI::Wobject::".$wobject;
|
||||
my $w = $cmd->new({namespace=>$wobject, wobjectId=>'new'});
|
||||
$namespace{$wobject} = $w->name;
|
||||
}
|
||||
my $sql = "select * from IndexedSearch_docInfo where docId in ($docIds) and indexName = ".quote($self->getIndexName) ;
|
||||
$sql .= " ORDER BY FIELD(docId, $docIds)"; # Maintain $docIdList order
|
||||
my $sth = WebGUI::SQL->read($sql);
|
||||
while (my %data = $sth->hash) {
|
||||
$data{namespace} = $namespace{$data{namespace}} || ucfirst($data{namespace});
|
||||
if ($data{ownerId}) {
|
||||
($data{username}) = WebGUI::SQL->quickArray("select username from users where userId = ".quote($data{ownerId}));
|
||||
$data{userProfile} = WebGUI::URL::page("op=viewProfile&uid=$data{ownerId}");
|
||||
}
|
||||
if ($data{bodyShortcut} =~ /^\s*select /i) {
|
||||
$data{body} = (WebGUI::SQL->quickArray($data{bodyShortcut}))[0];
|
||||
} else {
|
||||
$data{body} = $data{bodyShortcut};
|
||||
}
|
||||
if ($data{headerShortcut} =~ /^\s*select /i) {
|
||||
$data{header} = (WebGUI::SQL->quickArray($data{headerShortcut}))[0];
|
||||
} else {
|
||||
$data{header} = $data{headerShortcut};
|
||||
}
|
||||
delete($data{bodyShortcut});
|
||||
delete($data{headerShortcut});
|
||||
if($data{body}) {
|
||||
$data{body} = WebGUI::Macro::filter($data{body});
|
||||
$data{body} = WebGUI::HTML::filter($data{body},'all');
|
||||
$data{body} = $self->preview($data{body}, $options{previewLength});
|
||||
$data{body} = $self->highlight($data{body},undef, $options{highlightColors}) if ($options{highlight});
|
||||
}
|
||||
if($data{header}) {
|
||||
$data{header} = WebGUI::Macro::filter($data{header});
|
||||
$data{header} = WebGUI::HTML::filter($data{header},'all');
|
||||
$data{header} = $self->highlight($data{header},undef, $options{highlightColors}) if ($options{highlight});
|
||||
$data{location} = WebGUI::URL::gateway($data{location});
|
||||
}
|
||||
# $data{crumbTrail} = WebGUI::Macro::C_crumbTrail::_recurseCrumbTrail($data{pageId}, ' > ');
|
||||
# $data{crumbTrail} =~ s/\s*>\s*$//;
|
||||
push(@searchDetails, \%data);
|
||||
}
|
||||
$sth->finish;
|
||||
return \@searchDetails;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 getDbh ( )
|
||||
|
||||
Returns the object's database handler.
|
||||
|
||||
=cut
|
||||
|
||||
sub getDbh {
|
||||
my $self = shift;
|
||||
return $self->{_dbh};
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 getDocId ( )
|
||||
|
||||
Returns the next docId for this object.
|
||||
|
||||
=cut
|
||||
|
||||
sub getDocId {
|
||||
my $self=shift;
|
||||
return $self->{_docId};
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 getIndexName ( )
|
||||
|
||||
Returns the full index name of this object.
|
||||
|
||||
=cut
|
||||
|
||||
sub getIndexName {
|
||||
my $self = shift;
|
||||
return $self->{_indexName};
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 _queryToWords ( [ query ] )
|
||||
|
||||
Converts a DBIx::FullTextSearch query to (\@Words, \@Wildcards) suitable to pass to HTML::Highlight
|
||||
|
||||
=cut
|
||||
|
||||
sub _queryToWords {
|
||||
my ($self, $query) = @_;
|
||||
my $query ||= $self->{_query};
|
||||
|
||||
# Return the processed words / wildcards from memory if it's cached.
|
||||
if ($self->{$query."words"} && $self->{$query."wildcards"}) {
|
||||
return ($self->{$query."words"}, $self->{$query."wildcards"});
|
||||
}
|
||||
|
||||
# deal with quotes
|
||||
my $inQuote=0;
|
||||
my (@words, @wildcards);
|
||||
foreach (split(/\"/, $query)) {
|
||||
if($inQuote == 0) {
|
||||
foreach (split(/\s+/, $_)) {
|
||||
next if (/^AND$/i); # boolean AND
|
||||
next if (/^OR$/i); # boolean OR
|
||||
next if (/^NOT$/i); # boolean OR
|
||||
next if (/^\-/); # exclude word
|
||||
next if (/^.{0,1}$/); # at least 2 characters
|
||||
if (/\*/) {
|
||||
push(@wildcards, '%'); # match any character
|
||||
} else {
|
||||
push(@wildcards, '*'); # Also match plural of word
|
||||
}
|
||||
s/['"()+*]+//g; # remove query operators and quotes
|
||||
push(@words, $_);
|
||||
}
|
||||
} else {
|
||||
my $phrase = $_;
|
||||
push(@words, qq/$phrase/);
|
||||
push(@wildcards, undef); # Exact match
|
||||
}
|
||||
$inQuote = ++$inQuote % 2;
|
||||
}
|
||||
# Store words / wildcards in memory
|
||||
$self->{$query."words"} = \@words;
|
||||
$self->{$query."wildcards"} = \@wildcards;
|
||||
|
||||
return (\@words, \@wildcards);
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 highlight ( text [ , query , colors ] )
|
||||
|
||||
highlight words or patterns in HTML documents.
|
||||
|
||||
=over
|
||||
|
||||
=item text
|
||||
|
||||
The text to highlight
|
||||
|
||||
=item query
|
||||
|
||||
A query containing the words to highlight. Defaults to the last used $search->search query.
|
||||
Special case: When query contains only an asterisk '*', no highlighting is applied.
|
||||
|
||||
=item colors
|
||||
|
||||
A reference to an array of CSS color identificators.
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
sub highlight {
|
||||
my ($self, $text, $query, $colors) = @_;
|
||||
my $query ||= $self->{_query};
|
||||
return $text if ($query =~ /^\s*\*\s*$/); # query = '*', no highlight
|
||||
my ($words, $wildcards) = $self->_queryToWords($query);
|
||||
my $hl = new HTML::Highlight ( words => $words,
|
||||
wildcards => $wildcards,
|
||||
colors => $colors
|
||||
);
|
||||
return $hl->highlight($text);
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 indexDocument ( hashRef )
|
||||
|
||||
Adds a document to the index.
|
||||
|
||||
This method doesn't store the document itself. Instead, it stores information about words
|
||||
in the document in such a structured way that it makes easy and fast to look up what
|
||||
documents contain certain words and return id's of the documents.
|
||||
|
||||
=over
|
||||
|
||||
=item text
|
||||
|
||||
The text to index.
|
||||
|
||||
=item location
|
||||
|
||||
The location of the document. Most likely an URL.
|
||||
|
||||
=item contentType
|
||||
|
||||
The content type of this document.
|
||||
|
||||
=item docId
|
||||
|
||||
The unique Id of this document. Defaults to the next empty docId.
|
||||
|
||||
=item pageId
|
||||
|
||||
The pageId of the page on which this document resides. Defaults to 0.
|
||||
|
||||
=item wobjectId
|
||||
|
||||
The wobjectID of the wobject that holds this document. Defaults to 0.
|
||||
|
||||
=item ownerId
|
||||
|
||||
The ownerId of the document. Defaults to 3.
|
||||
|
||||
=item languageId
|
||||
|
||||
The languageId of this document. Defaults to undef.
|
||||
|
||||
=item namespace
|
||||
|
||||
The namespace of this document. Defaults to 'WebGUI'.
|
||||
|
||||
=item page_groupIdView
|
||||
|
||||
Id of group authorized to view this page. Defaults to '7' (everyone)
|
||||
|
||||
=item wobject_groupIdView
|
||||
|
||||
Id of group authorized to view this wobject. Defaults to '7' (everyone)
|
||||
|
||||
=item wobject_special_groupIdView
|
||||
|
||||
Id of group authorized to view the details of this wobject.
|
||||
|
||||
=item headerShortcut
|
||||
|
||||
An sql statement that returns the header (title, question, subject, name, whatever)
|
||||
of this document.
|
||||
|
||||
=item bodyShortcut
|
||||
|
||||
An sql statement that returns the body (description, answer, message, whatever)
|
||||
of this document.
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
sub indexDocument {
|
||||
my ($self, $document) = @_;
|
||||
$self->{_fts}->index_document($document->{docId} || $self->{_docId}, $document->{text});
|
||||
WebGUI::SQL->write("insert into IndexedSearch_docInfo ( docId,
|
||||
indexName,
|
||||
pageId,
|
||||
wobjectId,
|
||||
languageId,
|
||||
namespace,
|
||||
location,
|
||||
page_groupIdView,
|
||||
wobject_groupIdView,
|
||||
wobject_special_groupIdView,
|
||||
headerShortcut,
|
||||
bodyShortcut,
|
||||
contentType,
|
||||
ownerId )
|
||||
values ( ".
|
||||
($document->{docId} || $self->{_docId}).", ".
|
||||
quote($self->getIndexName).", ".
|
||||
($document->{pageId} || 0).", ".
|
||||
($document->{wobjectId} || 0).", ".
|
||||
($document->{languageId} || quote('')).", ".
|
||||
quote($document->{namespace} || 'WebGUI')." , ".
|
||||
quote($document->{location}).", ".
|
||||
($document->{page_groupIdView} || 7).", ".
|
||||
($document->{wobject_groupIdView} || 7).", ".
|
||||
($document->{wobject_special_groupIdView} || 7).", ".
|
||||
quote($document->{headerShortcut})." ,".
|
||||
quote($document->{bodyShortcut})." ,".
|
||||
quote($document->{contentType})." ,".
|
||||
($document->{ownerId} || 3)." )"
|
||||
);
|
||||
$self->{_docId}++;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 new ( [ indexName , dbh ] )
|
||||
|
||||
Constructor.
|
||||
|
||||
=over
|
||||
|
||||
=item indexName
|
||||
|
||||
The name of the index to open. Defaults to 'default'.
|
||||
|
||||
=item $dbh
|
||||
|
||||
Database handler to use. Defaults to $WebGUI::Session::session{dbh}.
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
sub new {
|
||||
my ($class, $indexName, $dbh) = @_;
|
||||
$indexName = $indexName || 'default';
|
||||
my $self = { _indexName => $indexName,
|
||||
_dbh => $dbh || $WebGUI::Session::session{dbh},
|
||||
_createOptions => {( backend => 'column',
|
||||
word_length => 20,
|
||||
filter => 'map { lc $_ if ($_ !~ /\^.*;/) }'
|
||||
)},
|
||||
};
|
||||
bless $self, $class;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 open ( )
|
||||
|
||||
Opens an existing DBIx::FullTextSearch index.
|
||||
|
||||
=cut
|
||||
|
||||
sub open {
|
||||
my ($self) = @_;
|
||||
$self->{_fts} = DBIx::FullTextSearch->open($self->getDbh, $self->getIndexName);
|
||||
if (not defined $self->{_fts}) {
|
||||
WebGUI::ErrorHandler::fatalError("IndexedSearch: Unable to open index.\n$DBIx::FullTextSearch::errstr");
|
||||
return undef;
|
||||
}
|
||||
($self->{_docId}) = WebGUI::SQL->quickArray("select max(docId) from IndexedSearch_docInfo where indexName = ".quote($self->getIndexName));
|
||||
$self->{_docId}++;
|
||||
return $self->{_fts};
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 preview ( text , [ previewLength , query ] )
|
||||
|
||||
Returns a context preview in which words from a search query appear in the resulting documents.
|
||||
The words are always in the middle of each of the sections.
|
||||
|
||||
=over
|
||||
|
||||
=item text
|
||||
|
||||
The text to preview
|
||||
|
||||
=item previewLength
|
||||
|
||||
The maximum number of characters in each of the context sections. Defaults to 80.
|
||||
A preview length of "0" means no preview,
|
||||
while a negative preview length returns the complete text.
|
||||
|
||||
=item query
|
||||
|
||||
A query containing the words to highlight. Defaults to the last used $search->search query.
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
sub preview {
|
||||
my ($self, $text, $previewLength, $query) = @_;
|
||||
$previewLength = 80 if (not defined $previewLength);
|
||||
return '' unless ($previewLength);
|
||||
return $text if ($previewLength < 0);
|
||||
my $query ||= $self->{_query};
|
||||
if(($query =~ /^\s*\*\s*$/) or not $query) { # Query is '*' or empty.
|
||||
$text = WebGUI::HTML::filter($text,'all');
|
||||
$text =~ s/^(.{1,$previewLength})\s+.*$/$1/s;
|
||||
} else {
|
||||
my ($words, $wildcards) = $self->_queryToWords($query);
|
||||
my $hl = new HTML::Highlight ( words => $words,
|
||||
wildcards => $wildcards
|
||||
);
|
||||
my $preview = join('... ',@{$hl->preview_context($text, $previewLength)});
|
||||
if ($preview) {
|
||||
$text = $preview;
|
||||
} else {
|
||||
$text = WebGUI::HTML::filter($text,'all');
|
||||
$text =~ s/^(.{1,$previewLength})\s+.*$/$1/s;
|
||||
}
|
||||
}
|
||||
$text =~ s/^(\s| )+//;
|
||||
$text =~ s/(\s| )+$//;
|
||||
if($text ne '') {
|
||||
$text = '<STRONG>... </STRONG>'.$text if ($text !~ /^[A-Z]+/); # ... broken up at the beginning
|
||||
$text .='<STRONG> ...</STRONG>' if ($text !~ /\.$/); # broken up at the end ...
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 recreate ( [ %options ] )
|
||||
|
||||
Like create, but first drops the existing index. Useful when rebuilding the index.
|
||||
|
||||
=over
|
||||
|
||||
=item %options
|
||||
|
||||
Options to pass to WebGUI::IndexedSearch->create()
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
sub recreate {
|
||||
my ($self, %options) = @_;
|
||||
$self->{_fts} = DBIx::FullTextSearch->open($self->getDbh, $self->getIndexName);
|
||||
if (defined $self->{_fts}) {
|
||||
$self->{_fts}->drop;
|
||||
}
|
||||
$self->{_fts} = $self->create($self->getIndexName, $self->getDbh, %options);
|
||||
WebGUI::SQL->write("delete from IndexedSearch_docInfo where indexName = ".quote($self->getIndexName));
|
||||
return $self->{_fts};
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 search ( query, \%filter )
|
||||
|
||||
Returns an array reference of docId's of documents that match the query.
|
||||
If the search has no results, undef is returned.
|
||||
|
||||
=over
|
||||
|
||||
=item query
|
||||
|
||||
user input string. Will be parsed into can-include, must-include and must-not-include words and phrases.
|
||||
Special case: when query is an asterisk (*), then no full text search is done, and results are returned
|
||||
using \%filter.
|
||||
|
||||
Examples are:
|
||||
+"this is a phrase" -koo +bar foo
|
||||
(foo OR baz) AND (bar OR caz)
|
||||
|
||||
=item filter
|
||||
|
||||
A hash reference containing filter elements.
|
||||
|
||||
Example:
|
||||
{
|
||||
language => [ 1, 3 ],
|
||||
namespace => [ 'Article', 'USS' ]
|
||||
}
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
sub search {
|
||||
my ($self, $query, $filter) = @_;
|
||||
$self->{_query} = $query;
|
||||
my $noFtsSearch = ($query =~ /^\s*\*\s*$/); # query = '*', no full text search
|
||||
my @fts_docIds = $self->{_fts}->search($query) unless $noFtsSearch ;
|
||||
if(@fts_docIds || $noFtsSearch) {
|
||||
my $groups = join(',',@{$self->_getGroups});
|
||||
my $docIds = join(',',@fts_docIds);
|
||||
my $sql = "select docId from IndexedSearch_docInfo where indexName = ".quote($self->getIndexName);
|
||||
$sql .= " and docId in ($docIds)" unless $noFtsSearch;
|
||||
$sql .= " and page_groupIdView in ($groups)";
|
||||
$sql .= " and wobject_special_groupIdView in ($groups)";
|
||||
if ($session{setting}{wobjectPrivileges}) {
|
||||
$sql .= " and wobject_groupIdView in ($groups)";
|
||||
}
|
||||
foreach my $filterElement (keys %{$filter}) {
|
||||
$sql .= " AND $filterElement in (".join(',', @{$filter->{$filterElement}}).")";
|
||||
}
|
||||
# No trash or other garbage
|
||||
$sql .= " AND (pageId > 999 or pageId < 0 or pageId = 1) ";
|
||||
# Keep @fts_docIds list order
|
||||
$sql .= " ORDER BY FIELD(docID,$docIds)" unless $noFtsSearch;
|
||||
my $filteredDocIds = WebGUI::SQL->buildArrayRef($sql);
|
||||
return $filteredDocIds if (ref $filteredDocIds eq 'ARRAY' and @{$filteredDocIds});
|
||||
}
|
||||
return undef;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 _getGroups ( )
|
||||
|
||||
Returns an array reference containing all groupIds of groups the user is in.
|
||||
|
||||
=cut
|
||||
|
||||
sub _getGroups {
|
||||
my @groups;
|
||||
foreach my $groupId (WebGUI::SQL->buildArray("select groupId from groups")) {
|
||||
push(@groups, $groupId) if (WebGUI::Privilege::isInGroup($groupId));
|
||||
}
|
||||
return \@groups;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub DESTROY {
|
||||
my $self=shift;
|
||||
if (ref($self->{_fts})) {
|
||||
$self->{_fts}->DESTROY();
|
||||
}
|
||||
}
|
||||
|
||||
1;
|
||||
228
sbin/Hourly/IndexedSearch_buildIndex.pm
Normal file
228
sbin/Hourly/IndexedSearch_buildIndex.pm
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
package Hourly::IndexedSearch_buildIndex;
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# WebGUI is Copyright 2001-2003 Plain Black LLC.
|
||||
#-------------------------------------------------------------------
|
||||
# Please read the legal notices (docs/legal.txt) and the license
|
||||
# (docs/license.txt) that came with this distribution before using
|
||||
# this software.
|
||||
#-------------------------------------------------------------------
|
||||
# http://www.plainblack.com info@plainblack.com
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
use DBI;
|
||||
use strict;
|
||||
use WebGUI::DateTime;
|
||||
use WebGUI::Session;
|
||||
use WebGUI::Utility;
|
||||
use WebGUI::SQL;
|
||||
use WebGUI::URL;
|
||||
use WebGUI::Wobject::IndexedSearch::Search;
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub process {
|
||||
my $verbose = shift;
|
||||
print "\n";
|
||||
my $indexName = 'IndexedSearch_default';
|
||||
my $htmlFilter = 'all';
|
||||
my $stopList = 'none';
|
||||
undef $stopList if ($stopList eq 'none');
|
||||
my $stemmer = 'none';
|
||||
undef $stemmer if ($stemmer eq 'none');
|
||||
my $backend = 'phrase';
|
||||
my $indexInfo = getIndexerParams();
|
||||
my $search = WebGUI::Wobject::IndexedSearch::Search->new($indexName);
|
||||
$search->recreate('','',stemmer => $stemmer, stoplist => $stopList, backend => $backend);
|
||||
my $startTime = WebGUI::DateTime::time();
|
||||
foreach my $namespace (keys %{$indexInfo}) {
|
||||
my $sth = WebGUI::SQL->read($indexInfo->{$namespace}{sql});
|
||||
my $total = $sth->rows;
|
||||
my $actual = 1;
|
||||
while (my %data = $sth->hash) {
|
||||
if ($verbose) {
|
||||
print "\r\t\tIndexing $namespace data ($total items) ...".
|
||||
(" " x (30 - (length($namespace)) - length("$total"))).
|
||||
int(($actual/$total)*100)." % ";
|
||||
}
|
||||
my $textToIndex = "";
|
||||
foreach my $field (@{$indexInfo->{$namespace}{fieldsToIndex}}) {
|
||||
if($field =~ /^\s*select/i) {
|
||||
my $sql = eval 'sprintf("%s","'.$field.'")';
|
||||
$textToIndex .= join("\n", WebGUI::SQL->buildArray($sql));
|
||||
} else {
|
||||
$textToIndex .= $data{$field}."\n";
|
||||
}
|
||||
}
|
||||
$textToIndex = WebGUI::HTML::filter($textToIndex,$htmlFilter);
|
||||
my $url = eval $indexInfo->{$namespace}{url};
|
||||
my $headerShortcut = eval 'sprintf("%s","'.$indexInfo->{$namespace}{headerShortcut}.'")';
|
||||
my $bodyShortcut = eval 'sprintf("%s","'.$indexInfo->{$namespace}{bodyShortcut}.'")';
|
||||
$search->indexDocument({
|
||||
text => $textToIndex,
|
||||
location => $url,
|
||||
pageId => $data{pageId},
|
||||
wobjectId => $data{wid},
|
||||
languageId => $data{languageId},
|
||||
namespace => $data{namespace},
|
||||
page_groupIdView => $data{page_groupIdView},
|
||||
wobject_groupIdView => $data{wobject_groupIdView},
|
||||
wobject_special_groupIdView => $data{wobject_special_groupIdView},
|
||||
headerShortcut => $headerShortcut,
|
||||
bodyShortcut => $bodyShortcut,
|
||||
contentType => $indexInfo->{$namespace}{contentType},
|
||||
ownerId => $data{ownerId}
|
||||
});
|
||||
$actual++;
|
||||
}
|
||||
print "\n" if ($verbose && $total);
|
||||
}
|
||||
print "\t\t".(($search->getDocId -1)." WebGUI items indexed in ".(time() - $startTime)." seconds.\n\t") if ($verbose);
|
||||
$search->close;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub getIndexerParams {
|
||||
my $now = WebGUI::DateTime::time();
|
||||
my %params = (
|
||||
page => {
|
||||
sql => "select pageId,
|
||||
title,
|
||||
urlizedTitle,
|
||||
synopsis,
|
||||
languageId,
|
||||
ownerId,
|
||||
'Page' as namespace,
|
||||
groupIdView as page_groupIdView,
|
||||
7 as wobject_groupIdView,
|
||||
7 as wobject_special_groupIdView
|
||||
from page
|
||||
where startDate < $now and endDate > $now",
|
||||
fieldsToIndex => ["synopsis" , "title"],
|
||||
contentType => 'page',
|
||||
url => '$data{urlizedTitle}',
|
||||
headerShortcut => 'select title from page where pageId = $data{pageId}',
|
||||
bodyShortcut => 'select synopsis from page where pageId = $data{pageId}',
|
||||
},
|
||||
wobject => {
|
||||
sql => "select wobject.namespace as namespace,
|
||||
wobject.title as title,
|
||||
wobject.description as description,
|
||||
wobject.wobjectId as wid,
|
||||
wobject.addedBy as ownerId,
|
||||
page.urlizedTitle as urlizedTitle,
|
||||
page.languageId as languageId,
|
||||
page.pageId as pageId,
|
||||
page.groupIdView as page_groupIdView,
|
||||
wobject.groupIdView as wobject_groupIdView,
|
||||
7 as wobject_special_groupIdView
|
||||
from wobject , page
|
||||
where wobject.pageId = page.pageId
|
||||
and wobject.startDate < $now
|
||||
and wobject.endDate > $now
|
||||
and page.startDate < $now
|
||||
and page.endDate > $now",
|
||||
fieldsToIndex => ["title", "description"],
|
||||
contentType => 'wobject',
|
||||
url => '$data{urlizedTitle}."#".$data{wid}',
|
||||
headerShortcut => 'select title from wobject where wobjectId = $data{wid}',
|
||||
bodyShortcut => 'select description from wobject where wobjectId = $data{wid}',
|
||||
},
|
||||
wobjectDiscussion => {
|
||||
sql => "select forumPost.forumPostId,
|
||||
forumPost.username,
|
||||
forumPost.subject,
|
||||
forumPost.message,
|
||||
forumPost.userId as ownerId,
|
||||
wobject.namespace as namespace,
|
||||
wobject.wobjectId as wid,
|
||||
forumThread.forumId as forumId,
|
||||
page.urlizedTitle as urlizedTitle,
|
||||
page.languageId as languageId,
|
||||
page.pageId as pageId,
|
||||
page.groupIdView as page_groupIdView,
|
||||
wobject.groupIdView as wobject_groupIdView,
|
||||
7 as wobject_special_groupIdView
|
||||
from forumPost, forumThread, wobject, page
|
||||
where forumPost.forumThreadId = forumThread.forumThreadId
|
||||
and forumThread.forumId = wobject.forumId
|
||||
and wobject.pageId = page.pageId
|
||||
and wobject.startDate < $now
|
||||
and wobject.endDate > $now
|
||||
and page.startDate < $now
|
||||
and page.endDate > $now",
|
||||
fieldsToIndex => ["username", "subject", "message"],
|
||||
contentType => 'discussion',
|
||||
url => 'WebGUI::URL::append($data{urlizedTitle},"func=view&wid=$data{wid}&forumId=$data{forumId}&forumOp=viewThread&forumPostId=$data{forumPostId}")',
|
||||
headerShortcut => 'select subject from forumPost where forumPostId = $data{forumPostId}',
|
||||
bodyShortcut => 'select message from forumPost where forumPostId = $data{forumPostId}',
|
||||
},
|
||||
help => {
|
||||
sql => "select distinct(page.languageId) as languageId,
|
||||
title.message as title,
|
||||
body.message as body,
|
||||
help.helpId as hid,
|
||||
help.titleId as titleId,
|
||||
help.bodyId as bodyId,
|
||||
help.namespace as namespace,
|
||||
1 as pageId,
|
||||
7 as page_groupIdView,
|
||||
7 as wobject_groupIdView,
|
||||
7 as wobject_special_groupIdView
|
||||
from help, page
|
||||
left join international body on bodyId = body.internationalId
|
||||
and help.namespace = body.namespace
|
||||
and page.languageId = body.languageId
|
||||
left join international title on titleId = title.internationalId
|
||||
and help.namespace = title.namespace
|
||||
and page.languageId = title.languageId
|
||||
where body.languageId = title.languageId",
|
||||
fieldsToIndex => ["title", "body"],
|
||||
contentType => 'help',
|
||||
url => '"?op=viewHelp&hid=$data{hid}&namespace=$data{namespace}"',
|
||||
headerShortcut => q/select message from international where languageId=$data{languageId}
|
||||
and namespace='$data{namespace}' and internationalId=$data{titleId}/,
|
||||
bodyShortcut => q/select message from international where languageId=$data{languageId}
|
||||
and namespace='$data{namespace}' and internationalId=$data{bodyId}/,
|
||||
|
||||
},
|
||||
userProfileData => {
|
||||
sql => "select distinct(userProfileData.userId),
|
||||
userProfileData.userId as ownerId,
|
||||
'' as languageId,
|
||||
b.fieldData as publicProfile,
|
||||
'profile' as namespace,
|
||||
1 as pageId,
|
||||
7 as page_groupIdView,
|
||||
7 as wobject_groupIdView,
|
||||
7 as wobject_special_groupIdView
|
||||
from userProfileData
|
||||
LEFT join userProfileData b
|
||||
on userProfileData.userId=b.userId
|
||||
and b.fieldName='publicProfile'
|
||||
where b.fieldData=1;",
|
||||
fieldsToIndex => [ q/select concat(userProfileField.fieldName,' ',userProfileData.fieldData)
|
||||
from userProfileField, userProfileCategory, userProfileData
|
||||
where userProfileField.profileCategoryId=userProfileCategory.profileCategoryId
|
||||
and userProfileCategory.visible=1
|
||||
and userProfileField.visible=1
|
||||
and userProfileData.fieldName = userProfileField.fieldName
|
||||
and fieldData <> ''
|
||||
and userProfileData.userId = $data{userId}
|
||||
/ ],
|
||||
url => '"?op=viewProfile&uid=$data{userId}"',
|
||||
contentType => 'profile',
|
||||
headerShortcut => 'select username from users where userId = $data{userId}',
|
||||
#bodyShortcut => q/select concat(fieldName,': ',fieldData) from userProfileData where userId = $data{userId}/
|
||||
bodyShortcut => '$textToIndex',
|
||||
}
|
||||
);
|
||||
foreach my $wobject (@{$session{config}{wobjects}}) {
|
||||
my $cmd = "WebGUI::Wobject::".$wobject;
|
||||
my $w = $cmd->new({wobjectId=>"new",namespace=>$wobject});
|
||||
%params = (%params, %{$w->getIndexerParams($now)});
|
||||
}
|
||||
return \%params;
|
||||
}
|
||||
|
||||
1;
|
||||
73
www/extras/wobject/IndexedSearch/ColorPicker2.js
Normal file
73
www/extras/wobject/IndexedSearch/ColorPicker2.js
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
// ===================================================================
|
||||
// Author: Matt Kruse <matt@mattkruse.com>
|
||||
// WWW: http://www.mattkruse.com/
|
||||
//
|
||||
// NOTICE: You may use this code for any purpose, commercial or
|
||||
// private, without any further permission from the author. You may
|
||||
// remove this notice from your final code if you wish, however it is
|
||||
// appreciated by the author if at least my web site address is kept.
|
||||
//
|
||||
// You may *NOT* re-distribute this code in any way except through its
|
||||
// use. That means, you can include it in your product, or your web
|
||||
// site, or any other form where the code is actually being used. You
|
||||
// may not put the plain javascript up on your site for download or
|
||||
// include it in your javascript libraries for download.
|
||||
// If you wish to share this code with others, please just point them
|
||||
// to the URL instead.
|
||||
// Please DO NOT link directly to my .js files from your site. Copy
|
||||
// the files to your server and use them there. Thank you.
|
||||
// ===================================================================
|
||||
|
||||
/* SOURCE FILE: AnchorPosition.js */
|
||||
function getAnchorPosition(anchorname){var useWindow=false;var coordinates=new Object();var x=0,y=0;var use_gebi=false, use_css=false, use_layers=false;if(document.getElementById){use_gebi=true;}else if(document.all){use_css=true;}else if(document.layers){use_layers=true;}if(use_gebi && document.all){x=AnchorPosition_getPageOffsetLeft(document.all[anchorname]);y=AnchorPosition_getPageOffsetTop(document.all[anchorname]);}else if(use_gebi){var o=document.getElementById(anchorname);x=AnchorPosition_getPageOffsetLeft(o);y=AnchorPosition_getPageOffsetTop(o);}else if(use_css){x=AnchorPosition_getPageOffsetLeft(document.all[anchorname]);y=AnchorPosition_getPageOffsetTop(document.all[anchorname]);}else if(use_layers){var found=0;for(var i=0;i<document.anchors.length;i++){if(document.anchors[i].name==anchorname){found=1;break;}}if(found==0){coordinates.x=0;coordinates.y=0;return coordinates;}x=document.anchors[i].x;y=document.anchors[i].y;}else{coordinates.x=0;coordinates.y=0;return coordinates;}coordinates.x=x;coordinates.y=y;return coordinates;}
|
||||
function getAnchorWindowPosition(anchorname){var coordinates=getAnchorPosition(anchorname);var x=0;var y=0;if(document.getElementById){if(isNaN(window.screenX)){x=coordinates.x-document.body.scrollLeft+window.screenLeft;y=coordinates.y-document.body.scrollTop+window.screenTop;}else{x=coordinates.x+window.screenX+(window.outerWidth-window.innerWidth)-window.pageXOffset;y=coordinates.y+window.screenY+(window.outerHeight-24-window.innerHeight)-window.pageYOffset;}}else if(document.all){x=coordinates.x-document.body.scrollLeft+window.screenLeft;y=coordinates.y-document.body.scrollTop+window.screenTop;}else if(document.layers){x=coordinates.x+window.screenX+(window.outerWidth-window.innerWidth)-window.pageXOffset;y=coordinates.y+window.screenY+(window.outerHeight-24-window.innerHeight)-window.pageYOffset;}coordinates.x=x;coordinates.y=y;return coordinates;}
|
||||
function AnchorPosition_getPageOffsetLeft(el){var ol=el.offsetLeft;while((el=el.offsetParent) != null){ol += el.offsetLeft;}return ol;}
|
||||
function AnchorPosition_getWindowOffsetLeft(el){return AnchorPosition_getPageOffsetLeft(el)-document.body.scrollLeft;}
|
||||
function AnchorPosition_getPageOffsetTop(el){var ot=el.offsetTop;while((el=el.offsetParent) != null){ot += el.offsetTop;}return ot;}
|
||||
function AnchorPosition_getWindowOffsetTop(el){return AnchorPosition_getPageOffsetTop(el)-document.body.scrollTop;}
|
||||
|
||||
/* SOURCE FILE: PopupWindow.js */
|
||||
function PopupWindow_getXYPosition(anchorname){var coordinates;if(this.type == "WINDOW"){coordinates = getAnchorWindowPosition(anchorname);}else{coordinates = getAnchorPosition(anchorname);}this.x = coordinates.x;this.y = coordinates.y;}
|
||||
function PopupWindow_setSize(width,height){this.width = width;this.height = height;}
|
||||
function PopupWindow_populate(contents){this.contents = contents;this.populated = false;}
|
||||
function PopupWindow_refresh(){if(this.divName != null){if(this.use_gebi){document.getElementById(this.divName).innerHTML = this.contents;}else if(this.use_css){document.all[this.divName].innerHTML = this.contents;}else if(this.use_layers){var d = document.layers[this.divName];d.document.open();d.document.writeln(this.contents);d.document.close();}}else{if(this.popupWindow != null && !this.popupWindow.closed){this.popupWindow.document.open();this.popupWindow.document.writeln(this.contents);this.popupWindow.document.close();this.popupWindow.focus();}}}
|
||||
function PopupWindow_showPopup(anchorname){this.getXYPosition(anchorname);this.x += this.offsetX;this.y += this.offsetY;if(!this.populated &&(this.contents != "")){this.populated = true;this.refresh();}if(this.divName != null){if(this.use_gebi){document.getElementById(this.divName).style.left = this.x;document.getElementById(this.divName).style.top = this.y;document.getElementById(this.divName).style.visibility = "visible";}else if(this.use_css){document.all[this.divName].style.left = this.x;document.all[this.divName].style.top = this.y;document.all[this.divName].style.visibility = "visible";}else if(this.use_layers){document.layers[this.divName].left = this.x;document.layers[this.divName].top = this.y;document.layers[this.divName].visibility = "visible";}}else{if(this.popupWindow == null || this.popupWindow.closed){if(screen && screen.availHeight){if((this.y + this.height) > screen.availHeight){this.y = screen.availHeight - this.height;}}if(screen && screen.availWidth){if((this.x + this.width) > screen.availWidth){this.x = screen.availWidth - this.width;}}this.popupWindow = window.open("about:blank","window_"+anchorname,"toolbar=no,location=no,status=no,menubar=no,scrollbars=auto,resizable,alwaysRaised,dependent,titlebar=no,width="+this.width+",height="+this.height+",screenX="+this.x+",left="+this.x+",screenY="+this.y+",top="+this.y+"");}this.refresh();}}
|
||||
function PopupWindow_hidePopup(){if(this.divName != null){if(this.use_gebi){document.getElementById(this.divName).style.visibility = "hidden";}else if(this.use_css){document.all[this.divName].style.visibility = "hidden";}else if(this.use_layers){document.layers[this.divName].visibility = "hidden";}}else{if(this.popupWindow && !this.popupWindow.closed){this.popupWindow.close();this.popupWindow = null;}}}
|
||||
function PopupWindow_isClicked(e){if(this.divName != null){if(this.use_layers){var clickX = e.pageX;var clickY = e.pageY;var t = document.layers[this.divName];if((clickX > t.left) &&(clickX < t.left+t.clip.width) &&(clickY > t.top) &&(clickY < t.top+t.clip.height)){return true;}else{return false;}}else if(document.all){var t = window.event.srcElement;while(t.parentElement != null){if(t.id==this.divName){return true;}t = t.parentElement;}return false;}else if(this.use_gebi){var t = e.originalTarget;while(t.parentNode != null){if(t.id==this.divName){return true;}t = t.parentNode;}return false;}return false;}return false;}
|
||||
function PopupWindow_hideIfNotClicked(e){if(this.autoHideEnabled && !this.isClicked(e)){this.hidePopup();}}
|
||||
function PopupWindow_autoHide(){this.autoHideEnabled = true;}
|
||||
function PopupWindow_hidePopupWindows(e){for(var i=0;i<popupWindowObjects.length;i++){if(popupWindowObjects[i] != null){var p = popupWindowObjects[i];p.hideIfNotClicked(e);}}}
|
||||
function PopupWindow_attachListener(){if(document.layers){document.captureEvents(Event.MOUSEUP);}window.popupWindowOldEventListener = document.onmouseup;if(window.popupWindowOldEventListener != null){document.onmouseup = new Function("window.popupWindowOldEventListener();PopupWindow_hidePopupWindows();");}else{document.onmouseup = PopupWindow_hidePopupWindows;}}
|
||||
function PopupWindow(){if(!window.popupWindowIndex){window.popupWindowIndex = 0;}if(!window.popupWindowObjects){window.popupWindowObjects = new Array();}if(!window.listenerAttached){window.listenerAttached = true;PopupWindow_attachListener();}this.index = popupWindowIndex++;popupWindowObjects[this.index] = this;this.divName = null;this.popupWindow = null;this.width=0;this.height=0;this.populated = false;this.visible = false;this.autoHideEnabled = false;this.contents = "";if(arguments.length>0){this.type="DIV";this.divName = arguments[0];}else{this.type="WINDOW";}this.use_gebi = false;this.use_css = false;this.use_layers = false;if(document.getElementById){this.use_gebi = true;}else if(document.all){this.use_css = true;}else if(document.layers){this.use_layers = true;}else{this.type = "WINDOW";}this.offsetX = 0;this.offsetY = 0;this.getXYPosition = PopupWindow_getXYPosition;this.populate = PopupWindow_populate;this.refresh = PopupWindow_refresh;this.showPopup = PopupWindow_showPopup;this.hidePopup = PopupWindow_hidePopup;this.setSize = PopupWindow_setSize;this.isClicked = PopupWindow_isClicked;this.autoHide = PopupWindow_autoHide;this.hideIfNotClicked = PopupWindow_hideIfNotClicked;}
|
||||
|
||||
|
||||
/* SOURCE FILE: ColorPicker2.js */
|
||||
|
||||
ColorPicker_targetInput = null;
|
||||
function ColorPicker_writeDiv(){document.writeln("<DIV ID=\"colorPickerDiv\" STYLE=\"position:absolute;visibility:hidden;\"> </DIV>");}
|
||||
function ColorPicker_show(anchorname){this.showPopup(anchorname);}
|
||||
function ColorPicker_pickColor(color,obj){obj.hidePopup();pickColor(color);}
|
||||
function pickColor(color){if(ColorPicker_targetInput==null){alert("Target Input is null, which means you either didn't use the 'select' function or you have no defined your own 'pickColor' function to handle the picked color!");return;}ColorPicker_targetInput.value = color;}
|
||||
function ColorPicker_select(inputobj,linkname){if(inputobj.type!="text" && inputobj.type!="hidden" && inputobj.type!="textarea"){alert("colorpicker.select: Input object passed is not a valid form input object");window.ColorPicker_targetInput=null;return;}window.ColorPicker_targetInput = inputobj;this.show(linkname);}
|
||||
function ColorPicker_highlightColor(c){var thedoc =(arguments.length>1)?arguments[1]:window.document;var d = thedoc.getElementById("colorPickerSelectedColor");d.style.backgroundColor = c;d = thedoc.getElementById("colorPickerSelectedColorValue");d.innerHTML = c;}
|
||||
function ColorPicker(){var windowMode = false;if(arguments.length==0){var divname = "colorPickerDiv";}else if(arguments[0] == "window"){var divname = '';windowMode = true;}else{var divname = arguments[0];}if(divname != ""){var cp = new PopupWindow(divname);}else{var cp = new PopupWindow();cp.setSize(250,225);}cp.currentValue = "#FFFFFF";cp.writeDiv = ColorPicker_writeDiv;cp.highlightColor = ColorPicker_highlightColor;cp.show = ColorPicker_show;cp.select = ColorPicker_select;var colors = new Array("#000000","#000033","#000066","#000099","#0000CC","#0000FF","#330000","#330033","#330066","#330099","#3300CC",
|
||||
"#3300FF","#660000","#660033","#660066","#660099","#6600CC","#6600FF","#990000","#990033","#990066","#990099",
|
||||
"#9900CC","#9900FF","#CC0000","#CC0033","#CC0066","#CC0099","#CC00CC","#CC00FF","#FF0000","#FF0033","#FF0066",
|
||||
"#FF0099","#FF00CC","#FF00FF","#003300","#003333","#003366","#003399","#0033CC","#0033FF","#333300","#333333",
|
||||
"#333366","#333399","#3333CC","#3333FF","#663300","#663333","#663366","#663399","#6633CC","#6633FF","#993300",
|
||||
"#993333","#993366","#993399","#9933CC","#9933FF","#CC3300","#CC3333","#CC3366","#CC3399","#CC33CC","#CC33FF",
|
||||
"#FF3300","#FF3333","#FF3366","#FF3399","#FF33CC","#FF33FF","#006600","#006633","#006666","#006699","#0066CC",
|
||||
"#0066FF","#336600","#336633","#336666","#336699","#3366CC","#3366FF","#666600","#666633","#666666","#666699",
|
||||
"#6666CC","#6666FF","#996600","#996633","#996666","#996699","#9966CC","#9966FF","#CC6600","#CC6633","#CC6666",
|
||||
"#CC6699","#CC66CC","#CC66FF","#FF6600","#FF6633","#FF6666","#FF6699","#FF66CC","#FF66FF","#009900","#009933",
|
||||
"#009966","#009999","#0099CC","#0099FF","#339900","#339933","#339966","#339999","#3399CC","#3399FF","#669900",
|
||||
"#669933","#669966","#669999","#6699CC","#6699FF","#999900","#999933","#999966","#999999","#9999CC","#9999FF",
|
||||
"#CC9900","#CC9933","#CC9966","#CC9999","#CC99CC","#CC99FF","#FF9900","#FF9933","#FF9966","#FF9999","#FF99CC",
|
||||
"#FF99FF","#00CC00","#00CC33","#00CC66","#00CC99","#00CCCC","#00CCFF","#33CC00","#33CC33","#33CC66","#33CC99",
|
||||
"#33CCCC","#33CCFF","#66CC00","#66CC33","#66CC66","#66CC99","#66CCCC","#66CCFF","#99CC00","#99CC33","#99CC66",
|
||||
"#99CC99","#99CCCC","#99CCFF","#CCCC00","#CCCC33","#CCCC66","#CCCC99","#CCCCCC","#CCCCFF","#FFCC00","#FFCC33",
|
||||
"#FFCC66","#FFCC99","#FFCCCC","#FFCCFF","#00FF00","#00FF33","#00FF66","#00FF99","#00FFCC","#00FFFF","#33FF00",
|
||||
"#33FF33","#33FF66","#33FF99","#33FFCC","#33FFFF","#66FF00","#66FF33","#66FF66","#66FF99","#66FFCC","#66FFFF",
|
||||
"#99FF00","#99FF33","#99FF66","#99FF99","#99FFCC","#99FFFF","#CCFF00","#CCFF33","#CCFF66","#CCFF99","#CCFFCC",
|
||||
"#CCFFFF","#FFFF00","#FFFF33","#FFFF66","#FFFF99","#FFFFCC","#FFFFFF");var total = colors.length;var width = 18;var cp_contents = "";var windowRef =(windowMode)?"window.opener.":"";if(windowMode){cp_contents += "<HTML><HEAD><TITLE>Select Color</TITLE></HEAD>";cp_contents += "<BODY MARGINWIDTH=0 MARGINHEIGHT=0 LEFMARGIN=0 TOPMARGIN=0><CENTER>";}cp_contents += "<TABLE BORDER=1 CELLSPACING=1 CELLPADDING=0>";var use_highlight =(document.getElementById || document.all)?true:false;for(var i=0;i<total;i++){if((i % width) == 0){cp_contents += "<TR>";}if(use_highlight){var mo = 'onMouseOver="'+windowRef+'ColorPicker_highlightColor(\''+colors[i]+'\',window.document)"';}else{mo = "";}cp_contents += '<TD BGCOLOR="'+colors[i]+'"><FONT SIZE="-3"><A HREF="#" onClick="'+windowRef+'ColorPicker_pickColor(\''+colors[i]+'\','+windowRef+'window.popupWindowObjects['+cp.index+']);return false;" '+mo+' STYLE="text-decoration:none;"> </A></FONT></TD>';if( ((i+1)>=total) ||(((i+1) % width) == 0)){cp_contents += "</TR>";}}if(document.getElementById){var width1 = Math.floor(width/2);var width2 = width = width1;cp_contents += "<TR><TD COLSPAN='"+width1+"' BGCOLOR='#ffffff' ID='colorPickerSelectedColor'> </TD><TD COLSPAN='"+width2+"' ALIGN='CENTER' ID='colorPickerSelectedColorValue'>#FFFFFF</TD></TR>";}cp_contents += "</TABLE>";if(windowMode){cp_contents += "</CENTER></BODY></HTML>";}cp.populate(cp_contents+"\n");cp.offsetY = 25;cp.autoHide();return cp;}
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue