SurveyJSON size reduction/optimization

SurveyJSON was storing a lot of redundant information (every setting on
every single section/question/answer, which, in most cases, will simply
take on the default values). This was bloating the surveyJSON property in
the db, and equally as importantly, slowing down Survey because it had to
do a lot of JSON parsing on the serialised surveyJSON object.

We now factor out and store the current section/question/answer defaults
along with the surveyJSON data itself, which means that we only needs to
store properties that differ from the defaults. This results is a massive
reduction in the size of the serialized surveyJSON stored in the database,
as well as a speed-up in json parsing time.

The compression/uncompression happens transparently to the rest of Survey.
This commit is contained in:
Patrick Donelan 2009-06-19 08:07:18 +00:00
parent d26ce5b447
commit cbc308c55a
4 changed files with 192 additions and 21 deletions

View file

@ -17,6 +17,7 @@
- fixed #10551: paypal (link to section of paypal website to enter in WebGUI information)
- fixed #10550: shipping plugins have no privileges
- fixed: Add progress bars for paste and edit branch.
- fixed: SurveyJSON database bloating
7.7.10
- Made a change to LDAP auth that adds an OR to that query so that it also searches for a row with fieldData REGEXP '^uid=(value-from-ldap-directory-server),'. (Wes Morgan)

View file

@ -36,6 +36,7 @@ my $session = start(); # this line required
setDefaultIcalInterval($session);
makeSurveyResponsesVersionAware($session);
addShipperGroupToUse($session);
shrinkSurveyJSON($session);
finish($session); # this line required
@ -95,6 +96,23 @@ END_SQL
print "DONE!\n" unless $quiet;
}
#----------------------------------------------------------------------------
sub shrinkSurveyJSON {
my $session = shift;
print "\tCompressing surveyJSON column in Survey table (this may take some time)... " unless $quiet;
my $sth = $session->db->read('select assetId, revisionDate from Survey');
use WebGUI::Asset::Wobject::Survey;
while (my ($assetId, $revision) = $sth->array) {
my $survey = WebGUI::Asset->new($session, $assetId, 'WebGUI::Asset::Wobject::Survey', $revision);
$survey->persistSurveyJSON;
}
print "DONE!\n" unless $quiet;
print "\tOptimizing Survey table... " unless $quiet;
$session->db->write('optimize table Survey');
print "DONE!\n" unless $quiet;
}
# -------------- DO NOT EDIT BELOW THIS LINE --------------------------------

View file

@ -50,7 +50,6 @@ use strict;
use JSON;
use Params::Validate qw(:all);
Params::Validate::validation_options( on_fail => sub { WebGUI::Error::InvalidParam->throw( error => shift ) } );
use Clone qw/clone/;
use WebGUI::Asset::Wobject::Survey::ExpressionEngine;
@ -67,8 +66,10 @@ WebGUI::Session object
=head3 $json (optional)
A JSON string used to construct a new Perl object. The string should represent
a JSON hash made up of "survey" and "sections" keys.
A json-encoded serialized surveyJSON object. Typically this will have just been retrieved from the
database, have been previously generated from another surveyJSON object using L<freeze>.
See L<freeze> for more information on what the serialized object should look like.
=cut
@ -78,16 +79,28 @@ sub new {
# Load json object if given..
my $jsonData = $json ? from_json($json) : {};
# Create skeleton object..
# When the a new empty surveyJSON object is created (as opposed to being re-initialised from existing json),
# we create a snapshot of the *current* default values for new Sections, Questions and Answers.
# This snapshot (the mold) is used by L<compress> and L<uncompress> to reduce redundancy in the json-encoded
# surveyJSON object that gets serialized to the database. This compression is mostly transparent since it
# happens only at serialisation/instantiation time.
my $sections = $jsonData->{sections} || [];
my $self = {
_session => $session,
_sections => $jsonData->{sections} || [],
_survey => $jsonData->{survey} || {},
_mold => $jsonData->{mold}
|| {
answer => $class->newAnswer,
question => $class->newQuestion,
section => $class->newSection,
},
};
bless $self, $class;
# Uncompress the survey configuration and store it in the new object
$self->{_sections} = $self->uncompress($sections);
#Load question types
$self->loadTypes();
@ -177,17 +190,18 @@ sub multipleChoiceTypes {
=head2 freeze
Serialize this Perl object into a JSON string. The serialized object is made up of the survey and sections
components of this object.
components of this object, as well as the "mold" which is used .
=cut
sub freeze {
my $self = shift;
return to_json(
{ sections => $self->sections,
survey => $self->{_survey},
{ sections => $self->compress(),
mold => $self->{_mold},
}
);
}
=head2 newObject ( $address )
@ -587,6 +601,99 @@ sub getAnswerEditVars {
return \%var;
}
=head2 compress
Returns a copy of L<sections> with all redundancy (relative to L<mold>) removed.
That is, any section/question/answer property that matches the mold is removed from
the array object that is returned.
This is handy for shinking down the L<sections> hash as much as possible prior
to json-encoding and storing in the db (see L<freeze>).
=cut
sub compress {
my $self = shift;
# Get the Section, Question and Answer molds that will be used to remove redundancy
my ($smold, $qmold, $amold) = @{$self->mold}{'section', 'question', 'answer'};
# Iterate over all objects, only adding them to our new object if they differ from the mold
# Properties are assumed to be simple scalars only (strings and numbers). The only except to
# this is the 'questions' arrayref in sections and the 'answers' arrayref in questions.
my @sections;
for my $s (@{$self->sections}) {
my $newS = {};
for my $q (@{$s->{questions} || []}) {
my $newQ = {};
for my $a (@{$q->{answers} || []}) {
my $newA = {};
while (my($key, $value) = each %$a) {
next if ref $value;
$newA->{$key} = $value unless WebGUI::Utility::scalarEquals($value, $amold->{$key});
}
push @{$newQ->{answers}}, $newA;
}
while (my($key, $value) = each %$q) {
next if ref $value;
$newQ->{$key} = $value unless WebGUI::Utility::scalarEquals($value, $qmold->{$key});
}
push @{$newS->{questions}}, $newQ;
}
while (my($key, $value) = each %$s) {
next if ref $value;
$newS->{$key} = $value unless WebGUI::Utility::scalarEquals($value, $smold->{$key});
}
push @sections, $newS;
}
return \@sections;
}
=head2 uncompress ($sections)
Modifies the supplied arrayref of sections, adding back in all redundancy that has been
removed by a previous call to L<compress>. Typically this is done immediately after
retrieving serialised surveyJSON from the db (see L<db>).
Any L<mold> property missing from the supplied list of section/question/answers is added,
and then the modified arrayref is returned.
=head3 sections
An arrayref of sections that you want to uncompress. Typically retrieved from the database.
=cut
sub uncompress {
my $self = shift;
my $sections = shift;
return if !$sections;
# Get the Section, Question and Answer molds
my ($smold, $qmold, $amold) = @{$self->mold}{'section', 'question', 'answer'};
# Iterate over all objects, adding back in the missing properties
for my $s (@{$sections || []}) {
for my $q (@{$s->{questions} || []}) {
for my $a (@{$q->{answers} || []}) {
while (my($key, $value) = each %$amold) {
$a->{$key} = $value unless exists $a->{$key};
}
}
while (my($key, $value) = each %$qmold) {
$q->{$key} = $value unless exists $q->{$key};
}
}
while (my($key, $value) = each %$smold) {
$s->{$key} = $value unless exists $s->{$key};
}
}
# Return the modified arrayref
return $sections;
}
=head2 update ( $address, $properties )
Update a section/question/answer with $properties, or add new ones.
@ -1086,6 +1193,18 @@ sub sections {
return $self->{_sections};
}
=head2 mold
Accessor for the mold property. See L<compress> and L<uncompress> for more info on
what this property is used for.
=cut
sub mold {
my $self = shift;
return $self->{_mold};
}
=head2 lastSectionIndex
Convenience method to return the index of the last Section. Frequently used to

View file

@ -21,7 +21,7 @@ my $session = WebGUI::Test->session;
#----------------------------------------------------------------------------
# Tests
my $tests = 137;
my $tests = 140;
plan tests => $tests + 1 + 3;
#----------------------------------------------------------------------------
@ -158,6 +158,46 @@ cmp_deeply(
'newAnswer data structure is okay'
);
####################################################
#
# freeze, compress, uncompress
#
####################################################
{
my $sJSON = WebGUI::Asset::Wobject::Survey::SurveyJSON->new($session);
my $mold = {
answer => $sJSON->newAnswer,
question => $sJSON->newQuestion,
section => $sJSON->newSection,
};
cmp_deeply(from_json($sJSON->freeze), {
sections => [ {} ],
mold => $mold,
}, 'got back appropriate frozen object for empty survey');
# Set a few non-standard properties on the (default) 0th Section
my $nonStandardSProps = { variable => 'S0', logical => '0 but true' };
$sJSON->update( [0], $nonStandardSProps );
# Create a question, and set some other non-standard properties
$sJSON->newObject( [0] );
my $nonStandardQProps = { randomizeAnswers => 1, textInButton => '1', text => 'blah' };
$sJSON->update( [0, 0], $nonStandardQProps );
# And create an answer
$sJSON->updateQuestionAnswers( [0], 'Country' );
$nonStandardQProps->{questionType} = 'Country';
my $nonStandardAProps = { value => 0, terminal => '' };
$sJSON->update( [0, 0, 0], $nonStandardAProps );
$nonStandardSProps->{questions} = [$nonStandardQProps];
$nonStandardQProps->{answers} = [$nonStandardAProps];
cmp_deeply(from_json($sJSON->freeze)->{sections}, $sJSON->compress, 'freeze returns sections via compress');
cmp_deeply($sJSON->compress, [$nonStandardSProps], 'molded data only contains non-standard properties');
cmp_deeply($sJSON->uncompress($sJSON->compress), $sJSON->{_sections}, 'uncompress completes the round-trip');
}
####################################################
#
# new, part 2
@ -199,23 +239,16 @@ $sJSON2 = WebGUI::Asset::Wobject::Survey::SurveyJSON->new($session,
cmp_deeply(
$sJSON2->sections,
[
{
superhashof {
type => 'section',
logical => 0, # this is added from the default-created mold
},
],
'new: If the JSON has a section, a new one will not be added',
'new: If the JSON has a section, a new one will not be added (but mold defaults will be)',
);
undef $sJSON2;
####################################################
#
# freeze
#
####################################################
like( $surveyJSON->freeze, qr/"survey":\{\}/, 'freeze: got back something that looks like JSON, not a thorough check');
####################################################
#
# newObject