added code and test suite for a series of functions that
identify non-human web clients so that advertising can eliminate ghost impressions and ghost clicks
This commit is contained in:
parent
fdeece9a64
commit
af5ad84fc8
5 changed files with 305 additions and 2 deletions
|
|
@ -30,6 +30,8 @@ $env = WebGUI::Session::Env->new;
|
|||
|
||||
$value = $env->get('REMOTE_ADDR');
|
||||
|
||||
return 'not gonna see it' if $env->requestNotViewed() ;
|
||||
|
||||
=head1 METHODS
|
||||
|
||||
These methods are available from this package:
|
||||
|
|
@ -37,6 +39,66 @@ These methods are available from this package:
|
|||
=cut
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 callerIsSearchSite ( )
|
||||
|
||||
returns true if the remote address matches a site which is a known indexer or spider.
|
||||
|
||||
=cut
|
||||
|
||||
sub callerIsSearchSite {
|
||||
|
||||
my $self = shift;
|
||||
my $remoteAddress = $self->get('REMOTE_ADDR');
|
||||
|
||||
return 1 if $remoteAddress =~ /203\.87\.123\.1../ # Blaiz Enterprise Rawgrunt search
|
||||
|| $remoteAddress =~ /123\.113\.184\.2../ # Unknown Yahoo Robot
|
||||
|| $remoteAddress == '';
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 clientIsSpider ( )
|
||||
|
||||
returns true is the client/agent is a spider/indexer or some other non-human interface
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
sub clientIsSpider {
|
||||
|
||||
my $self = shift;
|
||||
my $userAgent = $self->get('HTTP_USER_AGENT');
|
||||
|
||||
return 1 if $userAgent eq ''
|
||||
|| $userAgent =~ m<(^wre\/| # the WRE wget's http://localhost/ every 2-3 minutes 24 hours a day...
|
||||
^morpheus|
|
||||
libwww|
|
||||
s[pb]ider|
|
||||
bot|
|
||||
robo|
|
||||
sco[ou]t|
|
||||
crawl|
|
||||
miner|
|
||||
reaper|
|
||||
finder|
|
||||
search|
|
||||
engine|
|
||||
download|
|
||||
fetch|
|
||||
scan|
|
||||
slurp)>ix;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 DESTROY ( )
|
||||
|
|
@ -100,5 +162,22 @@ sub new {
|
|||
bless {_env=>\%ENV}, $class;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 requestNotViewed ( )
|
||||
|
||||
returns true is the client/agent is a spider/indexer or some other non-human interface
|
||||
|
||||
=cut
|
||||
|
||||
sub requestNotViewed {
|
||||
|
||||
my $self = shift;
|
||||
return $self->clientIsSpider()
|
||||
|| $self->callerIsSearchSite();
|
||||
|
||||
}
|
||||
|
||||
|
||||
1;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue