Optimize the Bucket analysis for speed.

Optimized in 3 ways.
1) Local caching and compiling of all regexps.
2) URL level bucket caching
3) Tweaked canShowDebug to speed it up.  This was ported back
   to the core.
This commit is contained in:
Colin Kuskie 2009-02-16 13:04:33 -08:00 committed by Patrick Donelan
parent 3bafce6e3e
commit 30d171515a
2 changed files with 23 additions and 11 deletions

View file

@ -81,7 +81,8 @@ sub execute {
my @rules = ();
my $getARule = WebGUI::PassiveAnalytics::Rule->getAllIterator($session);
while (my $rule = $getARule->()) {
push @rules, $rule;
my $regexp = $rule->get('regexp');
push @rules, [ $rule->get('bucketName'), qr/$regexp/];
}
##Get the index stored from the last invocation of the Activity. If this is
@ -90,6 +91,7 @@ sub execute {
if ($logIndex == 0) {
$session->db->write('delete from bucketLog');
}
my %bucketCache = ();
##Configure all the SQL
my $deltaSql = <<"EOSQL1";
@ -105,16 +107,24 @@ EOSQL1
DELTA_ENTRY: while (my $entry = $deltaSth->hashRef()) {
++$logIndex;
my $bucketFound = 0;
RULE: foreach my $rule (@rules) {
next RULE unless $rule->matchesBucket($entry);
# Into the bucket she goes..
$bucketSth->execute([$entry->{userId}, $rule->get('bucketName'), $entry->{delta}, $entry->{stamp}]);
$bucketFound = 1;
last RULE;
my $url = $entry->{url};
if (exists $bucketCache{$url}) {
$bucketSth->execute([$entry->{userId}, $bucketCache{$url}, $entry->{delta}, $entry->{stamp}]);
}
if (!$bucketFound) {
$bucketSth->execute([$entry->{userId}, 'Other', $entry->{delta}, $entry->{stamp}]);
else {
RULE: foreach my $rule (@rules) {
next RULE unless $url =~ $rule->[1];
# Into the bucket she goes..
$bucketCache{$url} = $rule->[0];
$bucketSth->execute([$entry->{userId}, $rule->[0], $entry->{delta}, $entry->{stamp}]);
$bucketFound = 1;
last RULE;
}
if (!$bucketFound) {
$bucketCache{$url} = 'Other';
$bucketSth->execute([$entry->{userId}, 'Other', $entry->{delta}, $entry->{stamp}]);
}
}
if (time() > $endTime) {
$expired = 1;

View file

@ -72,11 +72,13 @@ diag "Data logged";
##Now, run it and wait for it to finish
my $counter = 0;
diag time();
#DB::enable_profile();
PAUSE: while (my $retval = $instance->run()) {
diag $retval;
last PAUSE if $retval eq 'done';
last PAUSE if $counter++ >= 16;
}
#DB::disable_profile();
diag time();
ok(1, 'One test');
@ -93,7 +95,7 @@ sub loadLogData {
my $insert = $session->db->prepare(
q!insert into passiveLog (userId, sessionId, timeStamp, url, assetId) VALUES (?,?,?,?,'assetId')!
);
my $logCount = 5000;
my $logCount = 15000;
my $counter;
my $startTime = 1000;
my $numUrls = scalar @urls;