Merge branch 'passive_analytics_without_flux' into master

* passive_analytics_without_flux:
  Added Passive Analytics to upgrade script
  Removed DataTable edits from this topic branch
  Merge branch 'master' into passive_analytics_without_flux
  Added Passive Analytics icon
  Added PASSIVE_ANALYTICS_README.txt
  Added some screenshots
  Optimize the Bucket analysis for speed.
  Test for running benchmarking.
  If userId=0, then do not send emails when the workflow is done.
  And add the logging class.
  Add an enable button, to disable Logging.
  Provide CVS data for all logs.
  Generalized the CSV creation routine, then made wrappers for all log types.
  Remove 45 second sleep used for testing.
  Real PA status, as described in the RFE.
  Tweak the display a little more to make it nice.
  Passive Analytics Workflow status checks.
  Label for regular expression errors on rule save.
  Basic regexp checking for submitted rules.
  Patch HTMLForm to make dynamically generated forms sticky.
  Bucket activity now clears deltaLog based on settings.
  Add admin settings for interval and table cleanup.
  Bug fixes: sticky delta time and dropped 1st entry.
  Must use stringy GUIDs to compare in Summarizer.
  passiveLog entry should land in the first bucket that matches, not all buckets that match.
  Perl code for user selectable columns in the data table.
  Let the user choose which columns get sorted in a DataTable, js changes.
  Log the complete URI, with query fragment.
  Fix the name of the SummarizePassizeAnalytics workflow.
  Minor changes to sbin/installPassiveAnalytics.pl so that script would run.
  Add Passive Analytics modules, Workflow Activites, i18n and content handler.
  Add logging to the Asset Content handler.
  Add file to install Passive Analytics in the db and config file.
This commit is contained in:
Patrick Donelan 2009-03-11 08:02:54 +00:00 committed by Graham Knop
commit e93f02c16b
12 changed files with 1390 additions and 1 deletions

View file

@ -0,0 +1,153 @@
package WebGUI::Workflow::Activity::BucketPassiveAnalytics;
use strict;
use base 'WebGUI::Workflow::Activity';
use WebGUI::PassiveAnalytics::Rule;
use WebGUI::Inbox;
=head1 NAME
Package WebGUI::Workflow::Activity::BucketPassiveAnalytics
=head1 DESCRIPTION
Run through a set of rules to figure out how to classify log file entries.
=head1 SYNOPSIS
See WebGUI::Workflow::Activity for details on how to use any activity.
=head1 METHODS
These methods are available from this class:
=cut
#-------------------------------------------------------------------
=head2 definition ( session, definition )
See WebGUI::Workflow::Activity::defintion() for details.
=cut
sub definition {
my $class = shift;
my $session = shift;
my $definition = shift;
my $i18n = WebGUI::International->new($session, "PassiveAnalytics");
push( @{$definition}, {
name=>$i18n->get("Bucket Passive Analytics"),
properties=> {
notifyUser => {
fieldType => 'user',
label => $i18n->get('User'),
hoverHelp => $i18n->get('User help'),
defaultValue => $session->user->userId,
},
},
});
return $class->SUPER::definition($session,$definition);
}
#-------------------------------------------------------------------
=head2 execute ( [ object ] )
Analyze the deltaLog table, and generate the bucketLog table.
=head3 notes
=cut
sub execute {
my ($self, undef, $instance) = @_;
my $session = $self->session;
my $endTime = time() + $self->getTTL;
my $expired = 0;
##Load all the rules into an array
my @rules = ();
my $getARule = WebGUI::PassiveAnalytics::Rule->getAllIterator($session);
while (my $rule = $getARule->()) {
my $regexp = $rule->get('regexp');
push @rules, [ $rule->get('bucketName'), qr/$regexp/];
}
##Get the index stored from the last invocation of the Activity. If this is
##the first run, then clear out the table.
my $logIndex = $instance->getScratch('lastPassiveLogIndex') || 0;
if ($logIndex == 0) {
$session->db->write('delete from bucketLog');
}
my %bucketCache = ();
##Configure all the SQL
my $deltaSql = <<"EOSQL1";
select userId, assetId, url, delta, from_unixtime(timeStamp) as stamp
from deltaLog order by timestamp limit $logIndex, 1234567890
EOSQL1
my $deltaSth = $session->db->read($deltaSql);
my $bucketSth = $session->db->prepare('insert into bucketLog (userId, Bucket, duration, timeStamp) VALUES (?,?,?,?)');
##Walk through the log file entries, one by one. Run each entry against
##all the rules until 1 matches. If it doesn't match any rule, then bin it
##into the "Other" bucket.
DELTA_ENTRY: while (my $entry = $deltaSth->hashRef()) {
++$logIndex;
my $bucketFound = 0;
my $url = $entry->{url};
if (exists $bucketCache{$url}) {
$bucketSth->execute([$entry->{userId}, $bucketCache{$url}, $entry->{delta}, $entry->{stamp}]);
}
else {
RULE: foreach my $rule (@rules) {
next RULE unless $url =~ $rule->[1];
# Into the bucket she goes..
$bucketCache{$url} = $rule->[0];
$bucketSth->execute([$entry->{userId}, $rule->[0], $entry->{delta}, $entry->{stamp}]);
$bucketFound = 1;
last RULE;
}
if (!$bucketFound) {
$bucketCache{$url} = 'Other';
$bucketSth->execute([$entry->{userId}, 'Other', $entry->{delta}, $entry->{stamp}]);
}
}
if (time() > $endTime) {
$expired = 1;
last DELTA_ENTRY;
}
}
if ($expired) {
$instance->setScratch('logIndex', $logIndex);
return $self->WAITING(1);
}
my $message = 'Passive analytics is done.';
if ($session->setting->get('passiveAnalyticsDeleteDelta')) {
$session->log->info('Clearing Passive Analytics delta log');
$session->db->write('delete from deltaLog');
$message .= ' The delta log has been cleaned up.';
}
##If userId was set to 0, do not send any emails.
if ($self->get('userId')) {
my $inbox = WebGUI::Inbox->new($self->session);
$inbox->addMessage({
status => 'unread',
subject => 'Passive analytics is done',
userId => $self->get('userId'),
message => $message,
});
}
$session->db->write('update passiveAnalyticsStatus set endDate=NOW(), running=0');
return $self->COMPLETE;
}
1;
#vim:ft=perl

View file

@ -0,0 +1,148 @@
package WebGUI::Workflow::Activity::SummarizePassiveAnalytics;
use strict;
use base 'WebGUI::Workflow::Activity';
=head1 NAME
Package WebGUI::Workflow::Activity::SummarizePassiveAnalytics
=head1 DESCRIPTION
Summarize how long a user stayed on a page, using a user supplied interval.
=head1 SYNOPSIS
See WebGUI::Workflow::Activity for details on how to use any activity.
=head1 METHODS
These methods are available from this class:
=cut
#-------------------------------------------------------------------
=head2 definition ( session, definition )
See WebGUI::Workflow::Activity::defintion() for details.
=cut
sub definition {
my $class = shift;
my $session = shift;
my $definition = shift;
my $i18n = WebGUI::International->new($session, 'PassiveAnalytics');
push(@{$definition}, {
name=>$i18n->get('Summarize Passive Analytics'),
properties=> {
deltaInterval => {
fieldType => 'interval',
label => $i18n->get('pause interval'),
defaultValue => 15,
hoverHelp => $i18n->get('pause interval help'),
},
}
});
return $class->SUPER::definition($session,$definition);
}
#-------------------------------------------------------------------
=head2 execute ( [ object ] )
Analyze the passiveLog table, and generate the deltaLog table.
=head3 notes
If there is only 1 line in the table for a particular sessionId or
userId, no conclusions as to how long the user viewed a page can be
drawn from that. Similarly, the last entry in their browsing log
yields no data, since we require another entry in the passiveLog to
determine a delta.
=cut
sub execute {
my ($self, undef, $instance) = @_;
my $session = $self->session;
my $endTime = time() + $self->getTTL;
my $deltaInterval = $self->get('deltaInterval');
my $passive = q{select * from passiveLog where userId <> '1' order by userId, sessionId, timeStamp};
my $sth;
my $lastUserId;
my $lastSessionId;
my $lastTimeStamp;
my $lastAssetId;
my $lastUrl;
my $counter = $instance->getScratch('counter');
if ($counter) {
$passive .= ' limit '. $counter .', 1234567890';
$sth = $session->db->read($passive);
$lastUserId = $instance->getScratch('lastUserId');
$lastSessionId = $instance->getScratch('lastSessionId');
$lastTimeStamp = $instance->getScratch('lastTimeStamp');
$lastAssetId = $instance->getScratch('lastAssetId');
$lastUrl = $instance->getScratch('lastUrl');
}
else {
$sth = $session->db->read($passive);
my $logLine = $sth->hashRef();
$lastUserId = $logLine->{userId};
$lastSessionId = $logLine->{sessionId};
$lastTimeStamp = $logLine->{timeStamp};
$lastAssetId = $logLine->{assetId};
$lastUrl = $logLine->{url};
}
$session->db->write('delete from deltaLog'); ##Only if we're starting out
my $deltaLog = $session->db->prepare('insert into deltaLog (userId, assetId, delta, timeStamp, url) VALUES (?,?,?,?,?)');
my $expired = 0;
LOG_ENTRY: while (my $logLine = $sth->hashRef()) {
$counter++;
my $delta = $logLine->{timeStamp} - $lastTimeStamp;
if ( $logLine->{userId} eq $lastUserId
&& $logLine->{sessionId} eq $lastSessionId
&& $delta < $deltaInterval ) {
$deltaLog->execute([$lastUserId, $lastAssetId, $delta, $lastTimeStamp, $lastUrl]);
}
$lastUserId = $logLine->{userId};
$lastSessionId = $logLine->{sessionId};
$lastTimeStamp = $logLine->{timeStamp};
$lastAssetId = $logLine->{assetId};
$lastUrl = $logLine->{url};
if (time() > $endTime) {
$instance->setScratch('lastUserId', $lastUserId);
$instance->setScratch('lastSessionId', $lastSessionId);
$instance->setScratch('lastTimeStamp', $lastTimeStamp);
$instance->setScratch('lastAssetId', $lastAssetId);
$instance->setScratch('lastUrl', $lastUrl);
$instance->setScratch('counter', $counter);
$expired = 1;
last LOG_ENTRY;
}
}
if ($expired) {
return $self->WAITING(1);
}
$instance->deleteScratch('lastUserId');
$instance->deleteScratch('lastSessionId');
$instance->deleteScratch('lastTimeStamp');
$instance->deleteScratch('lastAssetId');
$instance->deleteScratch('lastUrl');
$instance->deleteScratch('counter');
return $self->COMPLETE;
}
1;
#vim:ft=perl