Allow Passive Analytics to scale to analyzing millions of log entries without nuking MySQL in the process. More thorough test cleanup.

This commit is contained in:
Colin Kuskie 2011-09-25 18:45:19 -07:00
parent ec00e867a9
commit 45b5370469
3 changed files with 101 additions and 66 deletions

View file

@ -49,6 +49,20 @@ sub definition {
return $class->SUPER::definition($session,$definition);
}
#-------------------------------------------------------------------
=head2 get_statement( session, counter )
Return a statement handle at the desired offset.
=cut
sub get_statement {
my ($session, $counter) = @_;
my $passive = q{select SQL_CALC_FOUND_ROWS * from passiveLog where userId <> '1' order by userId, sessionId, timeStamp limit ?, 500000};
my $sth = $session->db->read($passive, [$counter+0]);
return $sth;
}
#-------------------------------------------------------------------
@ -72,17 +86,14 @@ sub execute {
my $endTime = time() + $self->getTTL;
my $deltaInterval = $self->get('deltaInterval');
my $passive = q{select * from passiveLog where userId <> '1' order by userId, sessionId, timeStamp};
my $sth;
my $lastUserId;
my $lastSessionId;
my $lastTimeStamp;
my $lastAssetId;
my $lastUrl;
my $counter = $instance->getScratch('counter');
my $sth = get_statement($session, $counter);
if ($counter) {
$passive .= ' limit '. $counter .', 1234567890';
$sth = $session->db->read($passive);
$lastUserId = $instance->getScratch('lastUserId');
$lastSessionId = $instance->getScratch('lastSessionId');
$lastTimeStamp = $instance->getScratch('lastTimeStamp');
@ -90,46 +101,52 @@ sub execute {
$lastUrl = $instance->getScratch('lastUrl');
}
else {
$sth = $session->db->read($passive);
my $logLine = $sth->hashRef();
$lastUserId = $logLine->{userId};
$lastSessionId = $logLine->{sessionId};
$lastTimeStamp = $logLine->{timeStamp};
$lastAssetId = $logLine->{assetId};
$lastUrl = $logLine->{url};
$session->db->write('delete from deltaLog'); ##Only if we're starting out
}
$session->db->write('delete from deltaLog'); ##Only if we're starting out
my $total_rows = $session->db->quickScalar('select found_rows()');
my $deltaLog = $session->db->prepare('insert into deltaLog (userId, assetId, delta, timeStamp, url) VALUES (?,?,?,?,?)');
my $expired = 0;
LOG_ENTRY: while (my $logLine = $sth->hashRef()) {
$counter++;
my $delta = $logLine->{timeStamp} - $lastTimeStamp;
if ( $logLine->{userId} eq $lastUserId
&& $logLine->{sessionId} eq $lastSessionId
&& $delta < $deltaInterval ) {
$deltaLog->execute([$lastUserId, $lastAssetId, $delta, $lastTimeStamp, $lastUrl]);
LOG_CHUNK: while (1) {
LOG_ENTRY: while (my $logLine = $sth->hashRef()) {
$counter++;
my $delta = $logLine->{timeStamp} - $lastTimeStamp;
if ( $logLine->{userId} eq $lastUserId
&& $logLine->{sessionId} eq $lastSessionId
&& $delta < $deltaInterval ) {
$deltaLog->execute([$lastUserId, $lastAssetId, $delta, $lastTimeStamp, $lastUrl]);
}
$lastUserId = $logLine->{userId};
$lastSessionId = $logLine->{sessionId};
$lastTimeStamp = $logLine->{timeStamp};
$lastAssetId = $logLine->{assetId};
$lastUrl = $logLine->{url};
if (time() > $endTime) {
$instance->setScratch('lastUserId', $lastUserId);
$instance->setScratch('lastSessionId', $lastSessionId);
$instance->setScratch('lastTimeStamp', $lastTimeStamp);
$instance->setScratch('lastAssetId', $lastAssetId);
$instance->setScratch('lastUrl', $lastUrl);
$instance->setScratch('counter', $counter);
$expired = 1;
last LOG_ENTRY;
}
}
$lastUserId = $logLine->{userId};
$lastSessionId = $logLine->{sessionId};
$lastTimeStamp = $logLine->{timeStamp};
$lastAssetId = $logLine->{assetId};
$lastUrl = $logLine->{url};
if (time() > $endTime) {
$instance->setScratch('lastUserId', $lastUserId);
$instance->setScratch('lastSessionId', $lastSessionId);
$instance->setScratch('lastTimeStamp', $lastTimeStamp);
$instance->setScratch('lastAssetId', $lastAssetId);
$instance->setScratch('lastUrl', $lastUrl);
$instance->setScratch('counter', $counter);
$expired = 1;
last LOG_ENTRY;
$sth->finish;
if ($expired) {
return $self->WAITING(1);
}
}
if ($expired) {
return $self->WAITING(1);
last LOG_CHUNK if $counter >= $total_rows;
$sth = get_statement($session, $counter);
}
$instance->deleteScratch('lastUserId');