diff --git a/docs/changelog/7.x.x.txt b/docs/changelog/7.x.x.txt index f7f8046ee..65b662f0a 100644 --- a/docs/changelog/7.x.x.txt +++ b/docs/changelog/7.x.x.txt @@ -1,4 +1,5 @@ 7.6.13 + - fixed AdSpace bug: impressions and clicks for most non-human web clients will not be counted. - fixed #9760: DataForm not working in demo.plainblack.com - fixed #9759: Delete Entry Button missing in Data Form - fixed #9767: FileAsset breaks 'null' rule for FileAsset table diff --git a/lib/WebGUI/AdSpace.pm b/lib/WebGUI/AdSpace.pm index 27f937d67..096abbc77 100644 --- a/lib/WebGUI/AdSpace.pm +++ b/lib/WebGUI/AdSpace.pm @@ -52,6 +52,7 @@ sub countClick { my $session = shift; my $id = shift; my ($url) = $session->db->quickArray("select url from advertisement where adId=?",[$id]); + return $url if $self->session->env->requestNotViewed(); $session->db->write("update advertisement set clicks=clicks+1 where adId=?",[$id]); return $url; } @@ -131,6 +132,7 @@ A boolean that tells the ad system not to count this impression if true. sub displayImpression { my $self = shift; my $dontCount = shift; + return '' if $self->session->env->requestNotViewed(); my ($id, $ad, $priority, $clicks, $clicksBought, $impressions, $impressionsBought) = $self->session->db->quickArray("select adId, renderedAd, priority, clicks, clicksBought, impressions, impressionsBought from advertisement where adSpaceId=? and isActive=1 order by nextInPriority asc limit 1",[$self->getId]); unless ($dontCount) { my $isActive = 1; diff --git a/lib/WebGUI/AdSpace/Ad.pm b/lib/WebGUI/AdSpace/Ad.pm index 6309c218d..dc0dc2fd6 100644 --- a/lib/WebGUI/AdSpace/Ad.pm +++ b/lib/WebGUI/AdSpace/Ad.pm @@ -262,10 +262,25 @@ sub set { # prerender the ad for faster display my $adSpace = WebGUI::AdSpace->new($self->session, $self->get("adSpaceId")); if ($self->get("type") eq "text") { - $self->{_properties}{renderedAd} = '
get("height")-2).'px; margin:0px; overflow:hidden; border:solid '.$self->get("borderColor").' 1px;">getId).'" style="position:absolute; padding: 3px; top:0px; left:0px; width:100%; height:100%; z-index:10; display:block; text-decoration:none; vertical-align:top; background-color:'.$self->get("backgroundColor").'; font-size: 13px; font-weight: normal;">'.$self->get("title").'
'.$self->get("adText").'
'; + $self->{_properties}{renderedAd} = '
get("height")-2) . 'px; margin:0px; overflow:hidden; border:solid ' . $self->get("borderColor") . + q{ 1px;">getId) . + q{')" style="position:absolute; padding: 3px; top:0px; left:0px; width:100%; height:100%; z-index:10;} . + ' display:block; text-decoration:none; vertical-align:top; background-color:' . $self->get("backgroundColor") . + '; font-size: 13px; font-weight: normal;">' . + $self->get("title") . '
' . + $self->get("adText") . '
'; } elsif ($self->get("type") eq "image") { my $storage = WebGUI::Storage->get($self->session, $self->get("storageId")); - $self->{_properties}{renderedAd} = '
get("height").'px; margin:0px; overflow:hidden; border:0px;">getId).'" style="position:absolute; padding: 3px; top:0px; left:0px; width:100%; height:100%; z-index:10; display:block; text-decoration:none; vertical-align:top;">'.$self->get(
'; + $self->{_properties}{renderedAd} = '
get("height") . 'px; margin:0px; overflow:hidden; border:0px;">getId) . q{')" style="position:absolute; padding: } . + '3px; top:0px; left:0px; width:100%; height:100%; z-index:10; display:block; text-decoration:none; ' . + 'vertical-align:top;">' . $self->get(
'; } elsif ($self->get("type") eq "rich") { my $ad = $self->get("richMedia"); WebGUI::Macro::process($self->session, \$ad); diff --git a/lib/WebGUI/Session/Env.pm b/lib/WebGUI/Session/Env.pm index 16e66b690..491767c0d 100644 --- a/lib/WebGUI/Session/Env.pm +++ b/lib/WebGUI/Session/Env.pm @@ -30,6 +30,8 @@ $env = WebGUI::Session::Env->new; $value = $env->get('REMOTE_ADDR'); +return 'not gonna see it' if $env->requestNotViewed() ; + =head1 METHODS These methods are available from this package: @@ -37,6 +39,66 @@ These methods are available from this package: =cut +#------------------------------------------------------------------- + +=head2 callerIsSearchSite ( ) + +returns true if the remote address matches a site which is a known indexer or spider. + +=cut + +sub callerIsSearchSite { + + my $self = shift; + my $remoteAddress = $self->get('REMOTE_ADDR'); + + return 1 if $remoteAddress =~ /203\.87\.123\.1../ # Blaiz Enterprise Rawgrunt search + || $remoteAddress =~ /123\.113\.184\.2../ # Unknown Yahoo Robot + || $remoteAddress == ''; + + return 0; + +} + + +#------------------------------------------------------------------- + +=head2 clientIsSpider ( ) + +returns true is the client/agent is a spider/indexer or some other non-human interface + +=cut + + +sub clientIsSpider { + + my $self = shift; + my $userAgent = $self->get('HTTP_USER_AGENT'); + + return 1 if $userAgent eq '' + || $userAgent =~ m<(^wre\/| # the WRE wget's http://localhost/ every 2-3 minutes 24 hours a day... + ^morpheus| + libwww| + s[pb]ider| + bot| + robo| + sco[ou]t| + crawl| + miner| + reaper| + finder| + search| + engine| + download| + fetch| + scan| + slurp)>ix; + + return 0; + +} + + #------------------------------------------------------------------- =head2 DESTROY ( ) @@ -100,5 +162,22 @@ sub new { bless {_env=>\%ENV}, $class; } +#------------------------------------------------------------------- + +=head2 requestNotViewed ( ) + +returns true is the client/agent is a spider/indexer or some other non-human interface + +=cut + +sub requestNotViewed { + + my $self = shift; + return $self->clientIsSpider() + || $self->callerIsSearchSite(); + +} + 1; + diff --git a/t/Session/CheckClient.t b/t/Session/CheckClient.t new file mode 100644 index 000000000..e75598a97 --- /dev/null +++ b/t/Session/CheckClient.t @@ -0,0 +1,206 @@ +#------------------------------------------------------------------- +# WebGUI is Copyright 2001-2009 Plain Black Corporation. +#------------------------------------------------------------------- +# Please read the legal notices (docs/legal.txt) and the license +# (docs/license.txt) that came with this distribution before using +# this software. +#------------------------------------------------------------------- +# http://www.plainblack.com info@plainblack.com +#------------------------------------------------------------------- + +# this test can take two parameters +# first is an xml file, second indicates +# the percentage of items to test. +# the xml file can be downloaded from +# http://www.user-agents.org/ +# the percent will default to 25 and +# should be passed as a whole number +# so 100 will test all items, 75 will +# test 75% or 3 out of four items + +use FindBin; +use strict; +use lib "$FindBin::Bin/lib"; +use lib '/data/WebGUI/t/lib'; + +use WebGUI::Test; +use WebGUI::Session; + +use Test::More; + +my $session = WebGUI::Test->session; + +# this test is for code in the WebGUI::Session::Env Module + +my @testArray = ( + { + agent => "", + output => 1, + comment => "blank user agent" + }, + { + agent => "<a href='http://www.unchaos.com/'> UnChaos </a> From Chaos To Order Hybrid Web Search Engine.(vadim_goncharunchaos.com)", + output => 1, + comment => "UnChaos hybrid search engine" + }, + { + agent => "(DreamPassport/3.0; isao/MyDiGiRabi)", + output => 0, + comment => "DreamCast DreamPassport browser" + }, + { + agent => "Privoxy web proxy", # I think proxy's whould be considered browsers? + output => 0, + comment => "s.also Privoxy/3.0 (Anonymous)" + }, + { + agent => "*/Nutch-0.9-dev", + address => "123.113.184.232", + output => 1, + comment => "Unknown Yahoo robot" + }, + { + agent => "123spider-Bot (Version: 1.02, powered by www.123spider.de", + output => 1, + comment => "123spider.de (Germany) web directory link checking" + }, + { + agent => "1st ZipCommander (Net) - http://www.zipcommander.com/", + output => 0, + comment => "1st ZipCommander Net - IE based browser" + }, + { + agent => "2Bone_LinkChecker/1.0 libwww-perl/5.64", + output => 1, + comment => "2Bone online link checker" + }, + { + agent => "A-Online Search", + output => 1, + comment => "A-Online.at robot - now Jet2Web Search" + }, + { + agent => "Advanced Browser (http://www.avantbrowser.com)", + output => 0, + comment => "Avant Browser - IE based browser" + }, + { + agent => "AESOP_com_SpiderMan", + output => 1, + comment => "Aesop robot" + }, + { + agent => "Mozilla/5.0 (compatible; SpurlBot/0.2)", + output => 1, + comment => "Spurl.net bookmark service & search engine (84.40.30.xxx)" + }, + { + agent => "Mozilla/5.0 (compatible;MAINSEEK_BOT)", + output => 1, + comment => "Mozilla/5.0 (compatible;MAINSEEK_BOT)" + }, + { + agent => "Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20021219 Chimera/0.6", + output => 0, + comment => "Chimera browser (Mozilla/Gecko engine) - now Camino Mac PowerPC" + }, + { + agent => "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US) AppleWebKit/xx (KHTML like Gecko) OmniWeb/v5xx.xx", + output => 0, + comment => "OmniWeb 5.x.x Mac OS X browser" + }, + { + agent => "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:x.x.x) Gecko/20041107 Firefox/x.x", + output => 0, + comment => "Firefox browser (Mozilla/Gecko engine) - ex Firebird WinXP" + }, + { + agent => "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabotorange-ftgroup.com)", + output => 1, + comment => "Voila.fr robot" + }, + { + agent => "Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawlerhealth", + output => 1, + comment => "Healthline health related search robot (72.5.115.xx)" + }, + { + agent => "Mozilla/5.0 (X11; U; Linux i686; de-AT; rv:1.8.0.2) Gecko/20060309 SeaMonkey/1.0", + output => 0, + comment => "SeaMonkey browser suite (ex Mozilla) on Linux" + }, + { + agent => "Mozilla/5.0 [en] (compatible; Gulper Web Bot 0.2.4 www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)", + output => 1, + comment => "Yuntis : Collaborative Web Resource Categorization and Ranking Project robot" + }, +); + +sub transType { + return 0 if $_[0] =~ /(B|P)/; # browser or proxy + return 1; +} + +sub getAddress { # There are precious few that have an IP that can be gotten out of the XML so I decided to skip this. + my $x = '69.42.78.32'; + #if( $_[0]{Comment} =~ /\d\.\d\.\d/ ) { + # print $_[0]{Comment},"\t|\t",$_[0]{Description},"\n"; + # $x = $_[0]{Comment}; + # $x =~ s/x/2/; + #} + return $x; +} + +sub testCount { + + if( @ARGV ) { + if( $ARGV[0] =~ /\.xml$/ && -r $ARGV[0] ) { + my $infile = shift @ARGV ; + my $percent = shift @ARGV || 25; + use XML::Simple; + my $xml = new XML::Simple; + my $data = $xml->XMLin($infile); + # use Data::Dumper; + # print Dumper $data; + @testArray = (); + my $c = 1; + my $div = 20; + my $n = $div * $percent / 100; + foreach my $set (@{$data->{'user-agent'}}) { + $c = 1 if $c > $div; + if( $c <= $n ) { + push @testArray, { + agent => $set->{String}, + output => transType($set->{Type}), + type => $set->{Type}, + comment => $set->{Description}, + # comment => $set->{String}, # this is handy for fine tuning the code: it shows the string that failed... + address => getAddress($set), + }; + } + $c ++; + } + # use Data::Dumper; + # print Dumper \@testArray; + } + } + return scalar(@testArray); +} + + +plan tests => testCount() ; + +my $output; +foreach my $testSet (@testArray) { + $output = new FAKE_ENV( $testSet->{agent}, + $testSet->{address} || '69.42.78.32') + ->requestNotViewed(); + is($output, $testSet->{output}, $testSet->{comment}); +} + +{ # this is a local fake of the session, used for testing only +package FAKE_ENV; +use base 'WebGUI::Session::Env'; +sub new { shift; return bless { _env => { HTTP_USER_AGENT => $_[0], REMOTE_ADDR => $_[1] } }, __PACKAGE__; } +} +