added code and test suite for a series of functions that
identify non-human web clients so that advertising can eliminate ghost impressions and ghost clicks
This commit is contained in:
parent
fdeece9a64
commit
af5ad84fc8
5 changed files with 305 additions and 2 deletions
|
|
@ -1,4 +1,5 @@
|
|||
7.6.13
|
||||
- fixed AdSpace bug: impressions and clicks for most non-human web clients will not be counted.
|
||||
- fixed #9760: DataForm not working in demo.plainblack.com
|
||||
- fixed #9759: Delete Entry Button missing in Data Form
|
||||
- fixed #9767: FileAsset breaks 'null' rule for FileAsset table
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ sub countClick {
|
|||
my $session = shift;
|
||||
my $id = shift;
|
||||
my ($url) = $session->db->quickArray("select url from advertisement where adId=?",[$id]);
|
||||
return $url if $self->session->env->requestNotViewed();
|
||||
$session->db->write("update advertisement set clicks=clicks+1 where adId=?",[$id]);
|
||||
return $url;
|
||||
}
|
||||
|
|
@ -131,6 +132,7 @@ A boolean that tells the ad system not to count this impression if true.
|
|||
sub displayImpression {
|
||||
my $self = shift;
|
||||
my $dontCount = shift;
|
||||
return '' if $self->session->env->requestNotViewed();
|
||||
my ($id, $ad, $priority, $clicks, $clicksBought, $impressions, $impressionsBought) = $self->session->db->quickArray("select adId, renderedAd, priority, clicks, clicksBought, impressions, impressionsBought from advertisement where adSpaceId=? and isActive=1 order by nextInPriority asc limit 1",[$self->getId]);
|
||||
unless ($dontCount) {
|
||||
my $isActive = 1;
|
||||
|
|
|
|||
|
|
@ -262,10 +262,25 @@ sub set {
|
|||
# prerender the ad for faster display
|
||||
my $adSpace = WebGUI::AdSpace->new($self->session, $self->get("adSpaceId"));
|
||||
if ($self->get("type") eq "text") {
|
||||
$self->{_properties}{renderedAd} = '<div style="position:relative; width:'.($adSpace->get("width")-2).'px; height:'.($adSpace->get("height")-2).'px; margin:0px; overflow:hidden; border:solid '.$self->get("borderColor").' 1px;"><a href="'.$self->session->url->gateway(undef, "op=clickAd;id=".$self->getId).'" style="position:absolute; padding: 3px; top:0px; left:0px; width:100%; height:100%; z-index:10; display:block; text-decoration:none; vertical-align:top; background-color:'.$self->get("backgroundColor").'; font-size: 13px; font-weight: normal;"><b><span style="color:'.$self->get("textColor").';">'.$self->get("title").'</span></b><br /><span style="color:'.$self->get("textColor").';">'.$self->get("adText").'</span></a></div>';
|
||||
$self->{_properties}{renderedAd} = '<div style="position:relative; width:' . ($adSpace->get("width")-2) . 'px; height:' .
|
||||
($adSpace->get("height")-2) . 'px; margin:0px; overflow:hidden; border:solid ' . $self->get("borderColor") .
|
||||
q{ 1px;"><a href='#' OnClick="window.location.assign('} .
|
||||
$self->session->url->gateway(undef, "op=clickAd;id=".$self->getId) .
|
||||
q{')" style="position:absolute; padding: 3px; top:0px; left:0px; width:100%; height:100%; z-index:10;} .
|
||||
' display:block; text-decoration:none; vertical-align:top; background-color:' . $self->get("backgroundColor") .
|
||||
'; font-size: 13px; font-weight: normal;"><b><span style="color:' . $self->get("textColor") . ';">' .
|
||||
$self->get("title") . '</span></b><br /><span style="color:' . $self->get("textColor") . ';">' .
|
||||
$self->get("adText") . '</span></a></div>';
|
||||
} elsif ($self->get("type") eq "image") {
|
||||
my $storage = WebGUI::Storage->get($self->session, $self->get("storageId"));
|
||||
$self->{_properties}{renderedAd} = '<div style="position:relative; width:'.$adSpace->get("width").'px; height:'.$adSpace->get("height").'px; margin:0px; overflow:hidden; border:0px;"><a href="'.$self->session->url->gateway(undef, "op=clickAd;id=".$self->getId).'" style="position:absolute; padding: 3px; top:0px; left:0px; width:100%; height:100%; z-index:10; display:block; text-decoration:none; vertical-align:top;"><img src="'.$storage->getUrl($storage->getFiles->[0]).'" alt="'.$self->get("title").'" style="z-index:0;position:relative;border-style:none;border: 0px;" /></a></div>';
|
||||
$self->{_properties}{renderedAd} = '<div style="position:relative; width:' . $adSpace->get("width") . 'px; height:' .
|
||||
$adSpace->get("height") . 'px; margin:0px; overflow:hidden; border:0px;"><a href="#" ' .
|
||||
q{onClick="window.location.assign('} .
|
||||
$self->session->url->gateway(undef, "op=clickAd;id=".$self->getId) . q{')" style="position:absolute; padding: } .
|
||||
'3px; top:0px; left:0px; width:100%; height:100%; z-index:10; display:block; text-decoration:none; ' .
|
||||
'vertical-align:top;"><img ' .
|
||||
'src="' . $storage->getUrl($storage->getFiles->[0]) . '" alt="' . $self->get("title") .
|
||||
'" style="z-index:0;position:relative;border-style:none;border: 0px;" /></a></div>';
|
||||
} elsif ($self->get("type") eq "rich") {
|
||||
my $ad = $self->get("richMedia");
|
||||
WebGUI::Macro::process($self->session, \$ad);
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ $env = WebGUI::Session::Env->new;
|
|||
|
||||
$value = $env->get('REMOTE_ADDR');
|
||||
|
||||
return 'not gonna see it' if $env->requestNotViewed() ;
|
||||
|
||||
=head1 METHODS
|
||||
|
||||
These methods are available from this package:
|
||||
|
|
@ -37,6 +39,66 @@ These methods are available from this package:
|
|||
=cut
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 callerIsSearchSite ( )
|
||||
|
||||
returns true if the remote address matches a site which is a known indexer or spider.
|
||||
|
||||
=cut
|
||||
|
||||
sub callerIsSearchSite {
|
||||
|
||||
my $self = shift;
|
||||
my $remoteAddress = $self->get('REMOTE_ADDR');
|
||||
|
||||
return 1 if $remoteAddress =~ /203\.87\.123\.1../ # Blaiz Enterprise Rawgrunt search
|
||||
|| $remoteAddress =~ /123\.113\.184\.2../ # Unknown Yahoo Robot
|
||||
|| $remoteAddress == '';
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 clientIsSpider ( )
|
||||
|
||||
returns true is the client/agent is a spider/indexer or some other non-human interface
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
sub clientIsSpider {
|
||||
|
||||
my $self = shift;
|
||||
my $userAgent = $self->get('HTTP_USER_AGENT');
|
||||
|
||||
return 1 if $userAgent eq ''
|
||||
|| $userAgent =~ m<(^wre\/| # the WRE wget's http://localhost/ every 2-3 minutes 24 hours a day...
|
||||
^morpheus|
|
||||
libwww|
|
||||
s[pb]ider|
|
||||
bot|
|
||||
robo|
|
||||
sco[ou]t|
|
||||
crawl|
|
||||
miner|
|
||||
reaper|
|
||||
finder|
|
||||
search|
|
||||
engine|
|
||||
download|
|
||||
fetch|
|
||||
scan|
|
||||
slurp)>ix;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 DESTROY ( )
|
||||
|
|
@ -100,5 +162,22 @@ sub new {
|
|||
bless {_env=>\%ENV}, $class;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 requestNotViewed ( )
|
||||
|
||||
returns true is the client/agent is a spider/indexer or some other non-human interface
|
||||
|
||||
=cut
|
||||
|
||||
sub requestNotViewed {
|
||||
|
||||
my $self = shift;
|
||||
return $self->clientIsSpider()
|
||||
|| $self->callerIsSearchSite();
|
||||
|
||||
}
|
||||
|
||||
|
||||
1;
|
||||
|
||||
|
|
|
|||
206
t/Session/CheckClient.t
Normal file
206
t/Session/CheckClient.t
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
#-------------------------------------------------------------------
|
||||
# WebGUI is Copyright 2001-2009 Plain Black Corporation.
|
||||
#-------------------------------------------------------------------
|
||||
# Please read the legal notices (docs/legal.txt) and the license
|
||||
# (docs/license.txt) that came with this distribution before using
|
||||
# this software.
|
||||
#-------------------------------------------------------------------
|
||||
# http://www.plainblack.com info@plainblack.com
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
# this test can take two parameters
|
||||
# first is an xml file, second indicates
|
||||
# the percentage of items to test.
|
||||
# the xml file can be downloaded from
|
||||
# http://www.user-agents.org/
|
||||
# the percent will default to 25 and
|
||||
# should be passed as a whole number
|
||||
# so 100 will test all items, 75 will
|
||||
# test 75% or 3 out of four items
|
||||
|
||||
use FindBin;
|
||||
use strict;
|
||||
use lib "$FindBin::Bin/lib";
|
||||
use lib '/data/WebGUI/t/lib';
|
||||
|
||||
use WebGUI::Test;
|
||||
use WebGUI::Session;
|
||||
|
||||
use Test::More;
|
||||
|
||||
my $session = WebGUI::Test->session;
|
||||
|
||||
# this test is for code in the WebGUI::Session::Env Module
|
||||
|
||||
my @testArray = (
|
||||
{
|
||||
agent => "",
|
||||
output => 1,
|
||||
comment => "blank user agent"
|
||||
},
|
||||
{
|
||||
agent => "<a href='http://www.unchaos.com/'> UnChaos </a> From Chaos To Order Hybrid Web Search Engine.(vadim_goncharunchaos.com)",
|
||||
output => 1,
|
||||
comment => "UnChaos hybrid search engine"
|
||||
},
|
||||
{
|
||||
agent => "(DreamPassport/3.0; isao/MyDiGiRabi)",
|
||||
output => 0,
|
||||
comment => "DreamCast DreamPassport browser"
|
||||
},
|
||||
{
|
||||
agent => "Privoxy web proxy", # I think proxy's whould be considered browsers?
|
||||
output => 0,
|
||||
comment => "s.also Privoxy/3.0 (Anonymous)"
|
||||
},
|
||||
{
|
||||
agent => "*/Nutch-0.9-dev",
|
||||
address => "123.113.184.232",
|
||||
output => 1,
|
||||
comment => "Unknown Yahoo robot"
|
||||
},
|
||||
{
|
||||
agent => "123spider-Bot (Version: 1.02, powered by www.123spider.de",
|
||||
output => 1,
|
||||
comment => "123spider.de (Germany) web directory link checking"
|
||||
},
|
||||
{
|
||||
agent => "1st ZipCommander (Net) - http://www.zipcommander.com/",
|
||||
output => 0,
|
||||
comment => "1st ZipCommander Net - IE based browser"
|
||||
},
|
||||
{
|
||||
agent => "2Bone_LinkChecker/1.0 libwww-perl/5.64",
|
||||
output => 1,
|
||||
comment => "2Bone online link checker"
|
||||
},
|
||||
{
|
||||
agent => "A-Online Search",
|
||||
output => 1,
|
||||
comment => "A-Online.at robot - now Jet2Web Search"
|
||||
},
|
||||
{
|
||||
agent => "Advanced Browser (http://www.avantbrowser.com)",
|
||||
output => 0,
|
||||
comment => "Avant Browser - IE based browser"
|
||||
},
|
||||
{
|
||||
agent => "AESOP_com_SpiderMan",
|
||||
output => 1,
|
||||
comment => "Aesop robot"
|
||||
},
|
||||
{
|
||||
agent => "Mozilla/5.0 (compatible; SpurlBot/0.2)",
|
||||
output => 1,
|
||||
comment => "Spurl.net bookmark service & search engine (84.40.30.xxx)"
|
||||
},
|
||||
{
|
||||
agent => "Mozilla/5.0 (compatible;MAINSEEK_BOT)",
|
||||
output => 1,
|
||||
comment => "Mozilla/5.0 (compatible;MAINSEEK_BOT)"
|
||||
},
|
||||
{
|
||||
agent => "Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20021219 Chimera/0.6",
|
||||
output => 0,
|
||||
comment => "Chimera browser (Mozilla/Gecko engine) - now Camino Mac PowerPC"
|
||||
},
|
||||
{
|
||||
agent => "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US) AppleWebKit/xx (KHTML like Gecko) OmniWeb/v5xx.xx",
|
||||
output => 0,
|
||||
comment => "OmniWeb 5.x.x Mac OS X browser"
|
||||
},
|
||||
{
|
||||
agent => "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:x.x.x) Gecko/20041107 Firefox/x.x",
|
||||
output => 0,
|
||||
comment => "Firefox browser (Mozilla/Gecko engine) - ex Firebird WinXP"
|
||||
},
|
||||
{
|
||||
agent => "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabotorange-ftgroup.com)",
|
||||
output => 1,
|
||||
comment => "Voila.fr robot"
|
||||
},
|
||||
{
|
||||
agent => "Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawlerhealth",
|
||||
output => 1,
|
||||
comment => "Healthline health related search robot (72.5.115.xx)"
|
||||
},
|
||||
{
|
||||
agent => "Mozilla/5.0 (X11; U; Linux i686; de-AT; rv:1.8.0.2) Gecko/20060309 SeaMonkey/1.0",
|
||||
output => 0,
|
||||
comment => "SeaMonkey browser suite (ex Mozilla) on Linux"
|
||||
},
|
||||
{
|
||||
agent => "Mozilla/5.0 [en] (compatible; Gulper Web Bot 0.2.4 www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)",
|
||||
output => 1,
|
||||
comment => "Yuntis : Collaborative Web Resource Categorization and Ranking Project robot"
|
||||
},
|
||||
);
|
||||
|
||||
sub transType {
|
||||
return 0 if $_[0] =~ /(B|P)/; # browser or proxy
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub getAddress { # There are precious few that have an IP that can be gotten out of the XML so I decided to skip this.
|
||||
my $x = '69.42.78.32';
|
||||
#if( $_[0]{Comment} =~ /\d\.\d\.\d/ ) {
|
||||
# print $_[0]{Comment},"\t|\t",$_[0]{Description},"\n";
|
||||
# $x = $_[0]{Comment};
|
||||
# $x =~ s/x/2/;
|
||||
#}
|
||||
return $x;
|
||||
}
|
||||
|
||||
sub testCount {
|
||||
|
||||
if( @ARGV ) {
|
||||
if( $ARGV[0] =~ /\.xml$/ && -r $ARGV[0] ) {
|
||||
my $infile = shift @ARGV ;
|
||||
my $percent = shift @ARGV || 25;
|
||||
use XML::Simple;
|
||||
my $xml = new XML::Simple;
|
||||
my $data = $xml->XMLin($infile);
|
||||
# use Data::Dumper;
|
||||
# print Dumper $data;
|
||||
@testArray = ();
|
||||
my $c = 1;
|
||||
my $div = 20;
|
||||
my $n = $div * $percent / 100;
|
||||
foreach my $set (@{$data->{'user-agent'}}) {
|
||||
$c = 1 if $c > $div;
|
||||
if( $c <= $n ) {
|
||||
push @testArray, {
|
||||
agent => $set->{String},
|
||||
output => transType($set->{Type}),
|
||||
type => $set->{Type},
|
||||
comment => $set->{Description},
|
||||
# comment => $set->{String}, # this is handy for fine tuning the code: it shows the string that failed...
|
||||
address => getAddress($set),
|
||||
};
|
||||
}
|
||||
$c ++;
|
||||
}
|
||||
# use Data::Dumper;
|
||||
# print Dumper \@testArray;
|
||||
}
|
||||
}
|
||||
return scalar(@testArray);
|
||||
}
|
||||
|
||||
|
||||
plan tests => testCount() ;
|
||||
|
||||
my $output;
|
||||
foreach my $testSet (@testArray) {
|
||||
$output = new FAKE_ENV( $testSet->{agent},
|
||||
$testSet->{address} || '69.42.78.32')
|
||||
->requestNotViewed();
|
||||
is($output, $testSet->{output}, $testSet->{comment});
|
||||
}
|
||||
|
||||
{ # this is a local fake of the session, used for testing only
|
||||
package FAKE_ENV;
|
||||
use base 'WebGUI::Session::Env';
|
||||
sub new { shift; return bless { _env => { HTTP_USER_AGENT => $_[0], REMOTE_ADDR => $_[1] } }, __PACKAGE__; }
|
||||
}
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue