added code and test suite for a series of functions that

identify non-human web clients so that advertising can
eliminate ghost impressions and ghost clicks
This commit is contained in:
David Delikat 2009-02-23 00:20:46 +00:00
parent fdeece9a64
commit af5ad84fc8
5 changed files with 305 additions and 2 deletions

View file

@ -30,6 +30,8 @@ $env = WebGUI::Session::Env->new;
$value = $env->get('REMOTE_ADDR');
return 'not gonna see it' if $env->requestNotViewed() ;
=head1 METHODS
These methods are available from this package:
@ -37,6 +39,66 @@ These methods are available from this package:
=cut
#-------------------------------------------------------------------
=head2 callerIsSearchSite ( )
returns true if the remote address matches a site which is a known indexer or spider.
=cut
sub callerIsSearchSite {
my $self = shift;
my $remoteAddress = $self->get('REMOTE_ADDR');
return 1 if $remoteAddress =~ /203\.87\.123\.1../ # Blaiz Enterprise Rawgrunt search
|| $remoteAddress =~ /123\.113\.184\.2../ # Unknown Yahoo Robot
|| $remoteAddress == '';
return 0;
}
#-------------------------------------------------------------------
=head2 clientIsSpider ( )
returns true is the client/agent is a spider/indexer or some other non-human interface
=cut
sub clientIsSpider {
my $self = shift;
my $userAgent = $self->get('HTTP_USER_AGENT');
return 1 if $userAgent eq ''
|| $userAgent =~ m<(^wre\/| # the WRE wget's http://localhost/ every 2-3 minutes 24 hours a day...
^morpheus|
libwww|
s[pb]ider|
bot|
robo|
sco[ou]t|
crawl|
miner|
reaper|
finder|
search|
engine|
download|
fetch|
scan|
slurp)>ix;
return 0;
}
#-------------------------------------------------------------------
=head2 DESTROY ( )
@ -100,5 +162,22 @@ sub new {
bless {_env=>\%ENV}, $class;
}
#-------------------------------------------------------------------
=head2 requestNotViewed ( )
returns true is the client/agent is a spider/indexer or some other non-human interface
=cut
sub requestNotViewed {
my $self = shift;
return $self->clientIsSpider()
|| $self->callerIsSearchSite();
}
1;