diff --git a/docs/changelog/7.x.x.txt b/docs/changelog/7.x.x.txt
index f7f8046ee..65b662f0a 100644
--- a/docs/changelog/7.x.x.txt
+++ b/docs/changelog/7.x.x.txt
@@ -1,4 +1,5 @@
7.6.13
+ - fixed AdSpace bug: impressions and clicks for most non-human web clients will not be counted.
- fixed #9760: DataForm not working in demo.plainblack.com
- fixed #9759: Delete Entry Button missing in Data Form
- fixed #9767: FileAsset breaks 'null' rule for FileAsset table
diff --git a/lib/WebGUI/AdSpace.pm b/lib/WebGUI/AdSpace.pm
index 27f937d67..096abbc77 100644
--- a/lib/WebGUI/AdSpace.pm
+++ b/lib/WebGUI/AdSpace.pm
@@ -52,6 +52,7 @@ sub countClick {
my $session = shift;
my $id = shift;
my ($url) = $session->db->quickArray("select url from advertisement where adId=?",[$id]);
+ return $url if $self->session->env->requestNotViewed();
$session->db->write("update advertisement set clicks=clicks+1 where adId=?",[$id]);
return $url;
}
@@ -131,6 +132,7 @@ A boolean that tells the ad system not to count this impression if true.
sub displayImpression {
my $self = shift;
my $dontCount = shift;
+ return '' if $self->session->env->requestNotViewed();
my ($id, $ad, $priority, $clicks, $clicksBought, $impressions, $impressionsBought) = $self->session->db->quickArray("select adId, renderedAd, priority, clicks, clicksBought, impressions, impressionsBought from advertisement where adSpaceId=? and isActive=1 order by nextInPriority asc limit 1",[$self->getId]);
unless ($dontCount) {
my $isActive = 1;
diff --git a/lib/WebGUI/AdSpace/Ad.pm b/lib/WebGUI/AdSpace/Ad.pm
index 6309c218d..dc0dc2fd6 100644
--- a/lib/WebGUI/AdSpace/Ad.pm
+++ b/lib/WebGUI/AdSpace/Ad.pm
@@ -262,10 +262,25 @@ sub set {
# prerender the ad for faster display
my $adSpace = WebGUI::AdSpace->new($self->session, $self->get("adSpaceId"));
if ($self->get("type") eq "text") {
- $self->{_properties}{renderedAd} = '
';
+ $self->{_properties}{renderedAd} = '';
} elsif ($self->get("type") eq "image") {
my $storage = WebGUI::Storage->get($self->session, $self->get("storageId"));
- $self->{_properties}{renderedAd} = '';
+ $self->{_properties}{renderedAd} = '';
} elsif ($self->get("type") eq "rich") {
my $ad = $self->get("richMedia");
WebGUI::Macro::process($self->session, \$ad);
diff --git a/lib/WebGUI/Session/Env.pm b/lib/WebGUI/Session/Env.pm
index 16e66b690..491767c0d 100644
--- a/lib/WebGUI/Session/Env.pm
+++ b/lib/WebGUI/Session/Env.pm
@@ -30,6 +30,8 @@ $env = WebGUI::Session::Env->new;
$value = $env->get('REMOTE_ADDR');
+return 'not gonna see it' if $env->requestNotViewed() ;
+
=head1 METHODS
These methods are available from this package:
@@ -37,6 +39,66 @@ These methods are available from this package:
=cut
+#-------------------------------------------------------------------
+
+=head2 callerIsSearchSite ( )
+
+returns true if the remote address matches a site which is a known indexer or spider.
+
+=cut
+
+sub callerIsSearchSite {
+
+ my $self = shift;
+ my $remoteAddress = $self->get('REMOTE_ADDR');
+
+ return 1 if $remoteAddress =~ /203\.87\.123\.1../ # Blaiz Enterprise Rawgrunt search
+ || $remoteAddress =~ /123\.113\.184\.2../ # Unknown Yahoo Robot
+ || $remoteAddress == '';
+
+ return 0;
+
+}
+
+
+#-------------------------------------------------------------------
+
+=head2 clientIsSpider ( )
+
+returns true is the client/agent is a spider/indexer or some other non-human interface
+
+=cut
+
+
+sub clientIsSpider {
+
+ my $self = shift;
+ my $userAgent = $self->get('HTTP_USER_AGENT');
+
+ return 1 if $userAgent eq ''
+ || $userAgent =~ m<(^wre\/| # the WRE wget's http://localhost/ every 2-3 minutes 24 hours a day...
+ ^morpheus|
+ libwww|
+ s[pb]ider|
+ bot|
+ robo|
+ sco[ou]t|
+ crawl|
+ miner|
+ reaper|
+ finder|
+ search|
+ engine|
+ download|
+ fetch|
+ scan|
+ slurp)>ix;
+
+ return 0;
+
+}
+
+
#-------------------------------------------------------------------
=head2 DESTROY ( )
@@ -100,5 +162,22 @@ sub new {
bless {_env=>\%ENV}, $class;
}
+#-------------------------------------------------------------------
+
+=head2 requestNotViewed ( )
+
+returns true is the client/agent is a spider/indexer or some other non-human interface
+
+=cut
+
+sub requestNotViewed {
+
+ my $self = shift;
+ return $self->clientIsSpider()
+ || $self->callerIsSearchSite();
+
+}
+
1;
+
diff --git a/t/Session/CheckClient.t b/t/Session/CheckClient.t
new file mode 100644
index 000000000..e75598a97
--- /dev/null
+++ b/t/Session/CheckClient.t
@@ -0,0 +1,206 @@
+#-------------------------------------------------------------------
+# WebGUI is Copyright 2001-2009 Plain Black Corporation.
+#-------------------------------------------------------------------
+# Please read the legal notices (docs/legal.txt) and the license
+# (docs/license.txt) that came with this distribution before using
+# this software.
+#-------------------------------------------------------------------
+# http://www.plainblack.com info@plainblack.com
+#-------------------------------------------------------------------
+
+# this test can take two parameters
+# first is an xml file, second indicates
+# the percentage of items to test.
+# the xml file can be downloaded from
+# http://www.user-agents.org/
+# the percent will default to 25 and
+# should be passed as a whole number
+# so 100 will test all items, 75 will
+# test 75% or 3 out of four items
+
+use FindBin;
+use strict;
+use lib "$FindBin::Bin/lib";
+use lib '/data/WebGUI/t/lib';
+
+use WebGUI::Test;
+use WebGUI::Session;
+
+use Test::More;
+
+my $session = WebGUI::Test->session;
+
+# this test is for code in the WebGUI::Session::Env Module
+
+my @testArray = (
+ {
+ agent => "",
+ output => 1,
+ comment => "blank user agent"
+ },
+ {
+ agent => "<a href='http://www.unchaos.com/'> UnChaos </a> From Chaos To Order Hybrid Web Search Engine.(vadim_goncharunchaos.com)",
+ output => 1,
+ comment => "UnChaos hybrid search engine"
+ },
+ {
+ agent => "(DreamPassport/3.0; isao/MyDiGiRabi)",
+ output => 0,
+ comment => "DreamCast DreamPassport browser"
+ },
+ {
+ agent => "Privoxy web proxy", # I think proxy's whould be considered browsers?
+ output => 0,
+ comment => "s.also Privoxy/3.0 (Anonymous)"
+ },
+ {
+ agent => "*/Nutch-0.9-dev",
+ address => "123.113.184.232",
+ output => 1,
+ comment => "Unknown Yahoo robot"
+ },
+ {
+ agent => "123spider-Bot (Version: 1.02, powered by www.123spider.de",
+ output => 1,
+ comment => "123spider.de (Germany) web directory link checking"
+ },
+ {
+ agent => "1st ZipCommander (Net) - http://www.zipcommander.com/",
+ output => 0,
+ comment => "1st ZipCommander Net - IE based browser"
+ },
+ {
+ agent => "2Bone_LinkChecker/1.0 libwww-perl/5.64",
+ output => 1,
+ comment => "2Bone online link checker"
+ },
+ {
+ agent => "A-Online Search",
+ output => 1,
+ comment => "A-Online.at robot - now Jet2Web Search"
+ },
+ {
+ agent => "Advanced Browser (http://www.avantbrowser.com)",
+ output => 0,
+ comment => "Avant Browser - IE based browser"
+ },
+ {
+ agent => "AESOP_com_SpiderMan",
+ output => 1,
+ comment => "Aesop robot"
+ },
+ {
+ agent => "Mozilla/5.0 (compatible; SpurlBot/0.2)",
+ output => 1,
+ comment => "Spurl.net bookmark service & search engine (84.40.30.xxx)"
+ },
+ {
+ agent => "Mozilla/5.0 (compatible;MAINSEEK_BOT)",
+ output => 1,
+ comment => "Mozilla/5.0 (compatible;MAINSEEK_BOT)"
+ },
+ {
+ agent => "Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20021219 Chimera/0.6",
+ output => 0,
+ comment => "Chimera browser (Mozilla/Gecko engine) - now Camino Mac PowerPC"
+ },
+ {
+ agent => "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US) AppleWebKit/xx (KHTML like Gecko) OmniWeb/v5xx.xx",
+ output => 0,
+ comment => "OmniWeb 5.x.x Mac OS X browser"
+ },
+ {
+ agent => "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:x.x.x) Gecko/20041107 Firefox/x.x",
+ output => 0,
+ comment => "Firefox browser (Mozilla/Gecko engine) - ex Firebird WinXP"
+ },
+ {
+ agent => "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabotorange-ftgroup.com)",
+ output => 1,
+ comment => "Voila.fr robot"
+ },
+ {
+ agent => "Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawlerhealth",
+ output => 1,
+ comment => "Healthline health related search robot (72.5.115.xx)"
+ },
+ {
+ agent => "Mozilla/5.0 (X11; U; Linux i686; de-AT; rv:1.8.0.2) Gecko/20060309 SeaMonkey/1.0",
+ output => 0,
+ comment => "SeaMonkey browser suite (ex Mozilla) on Linux"
+ },
+ {
+ agent => "Mozilla/5.0 [en] (compatible; Gulper Web Bot 0.2.4 www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)",
+ output => 1,
+ comment => "Yuntis : Collaborative Web Resource Categorization and Ranking Project robot"
+ },
+);
+
+sub transType {
+ return 0 if $_[0] =~ /(B|P)/; # browser or proxy
+ return 1;
+}
+
+sub getAddress { # There are precious few that have an IP that can be gotten out of the XML so I decided to skip this.
+ my $x = '69.42.78.32';
+ #if( $_[0]{Comment} =~ /\d\.\d\.\d/ ) {
+ # print $_[0]{Comment},"\t|\t",$_[0]{Description},"\n";
+ # $x = $_[0]{Comment};
+ # $x =~ s/x/2/;
+ #}
+ return $x;
+}
+
+sub testCount {
+
+ if( @ARGV ) {
+ if( $ARGV[0] =~ /\.xml$/ && -r $ARGV[0] ) {
+ my $infile = shift @ARGV ;
+ my $percent = shift @ARGV || 25;
+ use XML::Simple;
+ my $xml = new XML::Simple;
+ my $data = $xml->XMLin($infile);
+ # use Data::Dumper;
+ # print Dumper $data;
+ @testArray = ();
+ my $c = 1;
+ my $div = 20;
+ my $n = $div * $percent / 100;
+ foreach my $set (@{$data->{'user-agent'}}) {
+ $c = 1 if $c > $div;
+ if( $c <= $n ) {
+ push @testArray, {
+ agent => $set->{String},
+ output => transType($set->{Type}),
+ type => $set->{Type},
+ comment => $set->{Description},
+ # comment => $set->{String}, # this is handy for fine tuning the code: it shows the string that failed...
+ address => getAddress($set),
+ };
+ }
+ $c ++;
+ }
+ # use Data::Dumper;
+ # print Dumper \@testArray;
+ }
+ }
+ return scalar(@testArray);
+}
+
+
+plan tests => testCount() ;
+
+my $output;
+foreach my $testSet (@testArray) {
+ $output = new FAKE_ENV( $testSet->{agent},
+ $testSet->{address} || '69.42.78.32')
+ ->requestNotViewed();
+ is($output, $testSet->{output}, $testSet->{comment});
+}
+
+{ # this is a local fake of the session, used for testing only
+package FAKE_ENV;
+use base 'WebGUI::Session::Env';
+sub new { shift; return bless { _env => { HTTP_USER_AGENT => $_[0], REMOTE_ADDR => $_[1] } }, __PACKAGE__; }
+}
+