a start on a new memcached based cache system

This commit is contained in:
JT Smith 2009-09-24 16:49:32 -05:00
parent 4344007a88
commit 1f41a72140
4 changed files with 271 additions and 578 deletions

View file

@ -15,11 +15,13 @@ package WebGUI::Cache;
=cut
use strict;
use File::Path ();
use Digest::MD5;
use HTTP::Headers;
use HTTP::Request;
use LWP::UserAgent;
use Digest::MD5;
use Memcached::libmemcached;
use Storable ();
=head1 NAME
@ -27,19 +29,19 @@ Package WebGUI::Cache
=head1 DESCRIPTION
A base class for all Cache modules to extend.
An API that allows you to cache items to a memcached server.
=head1 SYNOPSIS
use WebGUI::Cache;
my $cache = WebGUI::Cache->new($session, "my app cache");
my $cache = WebGUI::Cache->new($session, [ "my app", $assetId, $version ]);
my $cache = WebGUI::Cache->new($session);
$cache->set($value);
$cache->setByHTTP("http://www.google.com/");
$cache->set($name, $value);
$cache->set(\@nameSegments, $value);
$cache->setByHttp($name, "http://www.google.com/");
my $value = $cache->get;
my $value = $cache->get($name);
$cache->delete;
$cache->deleteChunk("my app cache");
@ -54,62 +56,98 @@ These methods are available from this class:
#-------------------------------------------------------------------
=head2 delete ( )
=head2 delete ( name )
Delete a key from the cache. Must be overridden.
Delete a key from the cache.
=head3 name
The key to delete.
=cut
sub delete {
}
#-------------------------------------------------------------------
=head2 deleteChunk ( key )
Deletes a bunch of keys from the cache based upon a partial composite key. Unless overridden by the cache subclass this will just flush the whole cache.
=head3 key
An array reference representing the portion of the key to delete. So if you have a key like ["asset","abc","def"] and you want to delete all items that match abc, you'd specify ["asset","abc"].
=cut
sub deleteChunk {
my $self = shift;
$self->flush;
my ($self, $name) = @_;
Memcached::libmemcached::memcached_delete($self->getMemcached, $self->parseKey($name));
}
#-------------------------------------------------------------------
=head2 flush ( )
Flushes the caching system. Must be overridden.
Empties the caching system.
=cut
sub flush {
my $self = shift;
File::Path::rmtree($self->session->config->get("uploadsPath")."/temp");
my ($self) = @_;
Memcached::libmemcached::memcached_flush($self->getMemcached);
}
#-------------------------------------------------------------------
=head2 get ( )
=head2 get ( name )
Retrieves a key value from the cache. Must be overridden.
Retrieves a key value from the cache.
=head3 name
The key to retrieve.
=cut
sub get {
my ($self, $name) = @_;
my $content = Memcached::libmemcached::memcached_get($self->getMemcached, $self->parseKey($name));
$content = Storable::thaw($content);
return undef unless $content && ref $content;
return ${$content};
}
#-------------------------------------------------------------------
=head2 getMemcached ( )
Returns a reference to the Memcached::libmemcached object.
=cut
sub getMemcached {
my ($self) = @_;
return $self->{_memcached};
}
#-------------------------------------------------------------------
=head2 new ( session, key, [ namespace ] )
=head2 mget ( names )
Retrieves multiple values from cache at once, which is much faster than retrieving one at a time. Returns an array reference containing the values in the order they were requested.
=head3 names
An array reference of keys to retrieve.
=cut
sub mget {
my ($self, $names) = @_;
my @parsedNames = ();
foreach my $name (@{$names}) {
push @parsedNames, $self->parseKey($name);
}
$self->getMemcached->mget_into_hashref($self->getMemcached, \@parsedNames, my $result);
my @values = ();
foreach my $name (@{$names}) {
my $parsedName = shift @parsedNames;
push @values, ${$result->{$parsedName}};
}
return \@values;
}
#-------------------------------------------------------------------
=head2 new ( session, [ namespace ] )
The new method will return a handler for the configured caching mechanism. Defaults to WebGUI::Cache::FileCache. You must override this method when building your own cache plug-in.
@ -117,12 +155,6 @@ The new method will return a handler for the configured caching mechanism. Defa
A reference to the current session.
=head3 key
A key to store the value under or retrieve it from. Can either be a scalar or an array reference of pieces (called
a composite key). Composite keys are useful for deleting a chunk (see deleteChunk()) of cache data all at once, and
for using multi-level identifiers like assetId/revisionDate.
=head3 namespace
A subdivider to store this cache under. When building your own cache plug-in default this to the WebGUI config file.
@ -130,49 +162,52 @@ A subdivider to store this cache under. When building your own cache plug-in def
=cut
sub new {
my $class = shift;
my $session = shift;
if ($session->config->get("cacheType") eq "WebGUI::Cache::Database") {
require WebGUI::Cache::Database;
return WebGUI::Cache::Database->new($session,@_);
} else {
require WebGUI::Cache::FileCache;
return WebGUI::Cache::FileCache->new($session,@_);
}
my ($class, $session, $namespace) = @_;
my $config = $session->config;
$namespace ||= $config->getFilename;
my $memcached = Memcached::libmemcached::memcached_create();
foreach my $server (@{$config->get('cacheServers')}) {
if (exists $server->{socket}) {
Memcached::libmemcached::memcached_server_add_unix_socket($memcached, $server->{socket});
}
else {
Memcached::libmemcached::memcached_server_add($memcached, $server->{host}, $server->{port});
}
}
bless {_memcached => $memcached, _namespace => $namespace, _sesssion => $session}, $class;
}
#-------------------------------------------------------------------
=head2 parseKey ( key )
=head2 parseKey ( name )
Returns a formatted string version of the key. A class method.
Returns a formatted string version of the key.
=head3 key
=head3 name
Can either be a text key, or a composite key. If it's a composite key, it will be an array reference of strings that can be joined together to create a key. You might want to use a composite key in order to be able to delete large portions of cache all at once. For instance, if you have a key of ["asset","abc","def"] you can delete all cache matching ["asset","abc"].
=cut
sub parseKey {
my $class = shift;
my ($self, $name) = @_;
# prepend namespace to the key
my @key = ($self->{_namespace});
# check for composite or simple key, make array from either
my @key;
if (! $_[0]) {
return;
if (! $name) {
# throw exception because no key was specified
}
elsif (ref $_[0] eq 'ARRAY') {
@key = @{ +shift };
elsif (ref $name eq 'ARRAY') {
@key = @{ $name };
}
else {
@key = shift;
@key = $name;
}
foreach my $part (@key) {
# convert to octets, then md5 them
utf8::encode($part);
$part = Digest::MD5::md5_base64($part);
$part =~ tr{/}{-};
}
return join('/', @key);
# merge key parts
return join(':', @key);
}
#-------------------------------------------------------------------
@ -184,18 +219,23 @@ Returns a reference to the current session.
=cut
sub session {
$_[0]->{_session};
my ($self) = @_;
$self->{_session};
}
#-------------------------------------------------------------------
=head2 set ( value [, ttl] )
=head2 set ( name, value [, ttl] )
Sets a key value to the cache. Must be overridden.
Sets a key value to the cache.
=head3 name
The name of the key to set.
=head3 value
A scalar value to store.
A scalar value to store. You can also pass a hash reference or an array reference.
=head3 ttl
@ -204,16 +244,24 @@ A time in seconds for the cache to exist. When you override default it to 60 sec
=cut
sub set {
my ($self, $name, $value, $ttl) = @_;
$ttl ||= 60;
$value = Storable::nfreeze(\(scalar $value)); # Storable doesn't like non-reference arguments, so we wrap it in a scalar ref.
Memcached::libmemcached::memcached_set($self->getMemcached, $self->parseKey($name), $value, $ttl);
return $value;
}
#-------------------------------------------------------------------
=head2 setByHTTP ( url [, ttl ] )
=head2 setByHttp ( name, url [, ttl ] )
Retrieves a document via HTTP and stores it in the cache and returns the content as a string. No need to override.
=head3 name
The name of the key to store the request under.
=head3 url
The URL of the document to retrieve. It must begin with the standard "http://".
@ -224,27 +272,20 @@ The time to live for this content. This is the amount of time (in seconds) that
=cut
sub setByHTTP {
my $self = shift;
my $url = shift;
my $ttl = shift;
my $userAgent = new LWP::UserAgent;
sub setByHttp {
my ($self, $name, $url, $ttl) = @_;
my $userAgent = new LWP::UserAgent;
$userAgent->env_proxy;
$userAgent->agent("WebGUI/".$WebGUI::VERSION);
$userAgent->timeout(30);
my $header = new HTTP::Headers;
my $referer = "http://webgui.http.request/".$self->session->env->get("SERVER_NAME").$self->session->env->get("REQUEST_URI");
chomp $referer;
$header->referer($referer);
my $request = HTTP::Request->new(GET => $url, $header);
$userAgent->agent("WebGUI/".$WebGUI::VERSION);
$userAgent->timeout(30);
my $request = HTTP::Request->new(GET => $url);
my $response = $userAgent->request($request);
if ($response->is_error) {
$self->session->errorHandler->error($url." could not be retrieved.");
$self->session->log->error($url." could not be retrieved.");
# show throw exception
return undef;
}
else {
$self->set($response->decoded_content,$ttl);
}
return $response->decoded_content;
return $self->set($response->decoded_content, $ttl);
}
#-------------------------------------------------------------------

View file

@ -1,205 +0,0 @@
package WebGUI::Cache::Database;
=head1 LEGAL
-------------------------------------------------------------------
WebGUI is Copyright 2001-2009 Plain Black Corporation.
-------------------------------------------------------------------
Please read the legal notices (docs/legal.txt) and the license
(docs/license.txt) that came with this distribution before using
this software.
-------------------------------------------------------------------
http://www.plainblack.com info@plainblack.com
-------------------------------------------------------------------
=cut
use strict;
use base "WebGUI::Cache";
use Storable ();
=head1 NAME
Package WebGUI::Cache::Database
=head1 DESCRIPTION
This package provides a means for WebGUI to cache data to the database.
=head1 SYNOPSIS
use WebGUI::Cache::Database;
=head1 METHODS
These methods are available from this class:
=cut
#-------------------------------------------------------------------
=head2 delete ( )
Remove content from the filesystem cache.
=cut
sub delete {
my $self = shift;
$self->session->db->write("delete from cache where namespace=? and cachekey=?",[$self->{_namespace}, $self->{_key}]);
}
#-------------------------------------------------------------------
=head2 deleteChunk ( key )
Remove a partial composite key from the cache.
=head3 key
A partial composite key to remove.
=cut
sub deleteChunk {
my $self = shift;
my $key = $self->parseKey(shift);
$self->session->db->write("delete from cache where namespace=? and cachekey like ?",[$self->{_namespace}, $key.'%']);
}
#-------------------------------------------------------------------
=head2 flush ( )
Remove all objects from the filecache system.
=cut
sub flush {
my $self = shift;
$self->SUPER::flush();
$self->session->db->write("delete from cache where namespace=?",[$self->{_namespace}]);
}
#-------------------------------------------------------------------
=head2 get ( )
Retrieve content from the database cache.
=cut
sub get {
my $self = shift;
my $session = $self->session;
return undef if ($session->config->get("disableCache"));
my $sth = $session->db->dbh->prepare("select content from cache where namespace=? and cachekey=? and expires>?");
$sth->execute($self->{_namespace},$self->{_key},time());
my $data = $sth->fetchrow_arrayref;
$sth->finish;
my $content = $data->[0];
return undef unless ($content);
# Storable doesn't like non-reference arguments, so we wrap it in a scalar ref.
eval {
$content = Storable::thaw($content);
};
return undef unless $content && ref $content;
return $$content;
}
#-------------------------------------------------------------------
=head2 getNamespaceSize ( )
Returns the size (in bytes) of the current cache under this namespace. Consequently it also cleans up expired cache items.
=cut
sub getNamespaceSize {
my $self = shift;
my $expiresModifier = shift || 0;
$self->session->db->write("delete from cache where expires < ?",[time()+$expiresModifier]);
my ($size) = $self->session->db->quickArray("select sum(size) from cache where namespace=?",[$self->{_namespace}]);
return $size;
}
#-------------------------------------------------------------------
=head2 new ( session, key [, namespace ] )
Constructor.
=head3 session
A reference to the current session.
=head3 key
A key unique to this namespace. It is used to uniquely identify the cached content.
=head3 namespace
Defaults to the config filename for the current site. The only reason to override the default is if you want the cached content to be shared among all WebGUI instances on this machine. A common alternative namespace is "URL", which is typically used when caching content using the setByHTTP method.
=cut
sub new {
my $cache;
my $class = shift;
my $session = shift;
my $key = $class->parseKey(shift);
my $namespace = shift || $session->config->getFilename;
bless {_session=>$session, _key=>$key, _namespace=>$namespace}, $class;
}
#-------------------------------------------------------------------
=head2 set ( content [, ttl ] )
Save content to the filesystem cache.
=head3 content
A scalar variable containing the content to be set.
=head3 ttl
The time to live for this content. This is the amount of time (in seconds) that the content will remain in the cache. Defaults to "60".
=cut
sub set {
my $self = shift;
# Storable doesn't like non-reference arguments, so we wrap it in a scalar ref.
my $content = Storable::nfreeze(\(scalar shift));
my $ttl = shift || 60;
my $size = length($content);
# getting better performance using native dbi than webgui sql
my $dbh = $self->session->db->dbh;
my $sth = $dbh->prepare("replace into cache (namespace,cachekey,expires,size,content) values (?,?,?,?,?)");
$sth->execute($self->{_namespace}, $self->{_key}, time()+$ttl, $size, $content);
$sth->finish;
}
#-------------------------------------------------------------------
=head2 stats ( )
Returns statistic information about the caching system.
=cut
sub stats {
my $self = shift;
my ($size) = $self->session->db->quickArray("select sum(size) from cache where namespace=?",[$self->{_namespace}]);
return $size." bytes";
}
1;

View file

@ -1,284 +0,0 @@
package WebGUI::Cache::FileCache;
=head1 LEGAL
-------------------------------------------------------------------
WebGUI is Copyright 2001-2009 Plain Black Corporation.
-------------------------------------------------------------------
Please read the legal notices (docs/legal.txt) and the license
(docs/license.txt) that came with this distribution before using
this software.
-------------------------------------------------------------------
http://www.plainblack.com info@plainblack.com
-------------------------------------------------------------------
=cut
use strict;
use Storable ();
use File::Path ();
use File::Find ();
our @ISA = qw(WebGUI::Cache);
=head1 NAME
Package WebGUI::Cache::FileCache
=head1 DESCRIPTION
This package provides a means for WebGUI to cache data to the filesystem.
=head1 SYNOPSIS
use WebGUI::Cache::FileCache;
=head1 METHODS
These methods are available from this class:
=cut
#-------------------------------------------------------------------
=head2 delete ( )
Remove content from the filesystem cache.
=cut
sub delete {
my $self = shift;
my $folder = $self->getFolder;
if (-e $folder) {
File::Path::rmtree($folder);
}
}
#-------------------------------------------------------------------
=head2 deleteChunk ( key )
Remove a partial composite key from the cache.
=head3 key
A partial composite key to remove.
=cut
sub deleteChunk {
my $self = shift;
my $folder = $self->getNamespaceRoot."/".$self->parseKey(shift);
if (-e $folder) {
File::Path::rmtree($folder);
}
}
#-------------------------------------------------------------------
=head2 flush ( )
Remove all objects from the filecache system.
=cut
sub flush {
my $self = shift;
$self->SUPER::flush();
my $folder = $self->getNamespaceRoot;
if (-e $folder) {
File::Path::rmtree($folder);
}
}
#-------------------------------------------------------------------
=head2 get ( )
Retrieve content from the filesystem cache.
=cut
sub get {
my $self = shift;
return undef if ($self->session->config->get("disableCache"));
my $folder = $self->getFolder;
if (-e $folder."/expires" && -e $folder."/cache" && open(my $FILE,"<",$folder."/expires")) {
my $expires = <$FILE>;
close($FILE);
return undef if ($expires < time);
my $value;
eval {$value = Storable::retrieve($folder."/cache")};
if (ref $value eq "SCALAR") {
return $$value;
} else {
return $value;
}
}
return undef;
}
#-------------------------------------------------------------------
=head2 getFolder ( )
Returns the path to the cache folder for this key.
=cut
sub getFolder {
my $self = shift;
return $self->getNamespaceRoot()."/".$self->{_key};
}
#-------------------------------------------------------------------
=head2 getNamespaceRoot ( )
Figures out what the cache root for this namespace should be. A class method.
=cut
sub getNamespaceRoot {
my $self = shift;
my $root = $self->session->config->get("fileCacheRoot");
unless ($root) {
if ($self->session->os->get("windowsish")) {
$root = $self->session->env->get("TEMP") || $self->session->env->get("TMP") || "/temp";
} else {
$root = "/tmp";
}
$root .= "/WebGUICache";
}
$root .= "/".$self->{_namespace};
return $root;
}
#-------------------------------------------------------------------
=head2 getNamespaceSize ( )
Returns the size (in bytes) of the current cache under this namespace. Consequently it also cleans up expired cache items.
=cut
sub getNamespaceSize {
my $self = shift;
my $expiresModifier = shift || 0;
my $cacheSize = 0;
File::Find::find({
no_chdir => 1,
wanted => sub {
return
unless $File::Find::name =~ m/expires$/;
if ( open my $FILE, "<", $File::Find::name ) {
my $expires = <$FILE>;
close $FILE;
if ($expires < time + $expiresModifier) {
File::Path::rmtree($File::Find::dir);
$File::Find::prune = 1;
return
}
else {
$cacheSize += -s $File::Find::dir.'/cache';
}
}
},
}, $self->getNamespaceRoot);
return $cacheSize;
}
#-------------------------------------------------------------------
=head2 new ( session, key [, namespace ] )
Constructor.
=head3 session
A reference to the current session.
=head3 key
A key unique to this namespace. It is used to uniquely identify the cached content.
=head3 namespace
Defaults to the config filename for the current site. The only reason to override the default is if you want the cached content to be shared among all WebGUI instances on this machine. A common alternative namespace is "URL", which is typically used when caching content using the setByHTTP method.
=cut
sub new {
my $cache;
my $class = shift;
my $session = shift;
my $key = $class->parseKey(shift);
my $namespace = shift || $session->config->getFilename;
bless {_session=>$session, _key=>$key, _namespace=>$namespace}, $class;
}
#-------------------------------------------------------------------
=head2 set ( content [, ttl ] )
Save content to the filesystem cache.
=head3 content
A scalar variable containing the content to be set.
=head3 ttl
The time to live for this content. This is the amount of time (in seconds) that the content will remain in the cache. Defaults to "60".
=cut
sub set {
my $self = shift;
my $content = shift;
my $ttl = shift || 60;
my $oldumask = umask();
umask(0000);
my $path = $self->getFolder();
unless (-e $path) {
eval {File::Path::mkpath($path,0)};
if ($@) {
$self->session->errorHandler->error("Couldn't create cache folder: ".$path." : ".$@);
return undef;
}
}
my $value;
unless (ref $content) {
$value = \$content;
} else {
$value = $content;
}
Storable::nstore($value, $path."/cache");
open my $FILE, ">", $path."/expires";
print $FILE time + $ttl;
close $FILE;
umask($oldumask);
}
#-------------------------------------------------------------------
=head2 stats ( )
Returns statistic information about the caching system.
=cut
sub stats {
my $self = shift;
return $self->getNamespaceSize." bytes";
}
1;

141
sbin/testmc.pl Normal file
View file

@ -0,0 +1,141 @@
#!/usr/bin/env perl
#-------------------------------------------------------------------
# WebGUI is Copyright 2001-2009 Plain Black Corporation.
#-------------------------------------------------------------------
# Please read the legal notices (docs/legal.txt) and the license
# (docs/license.txt) that came with this distribution before using
# this software.
#-------------------------------------------------------------------
# http://www.plainblack.com info@plainblack.com
#-------------------------------------------------------------------
$|++; # disable output buffering
our ($webguiRoot, $configFile, $help, $man);
BEGIN {
$webguiRoot = "..";
unshift (@INC, $webguiRoot."/lib");
}
use strict;
use Pod::Usage;
use Getopt::Long;
use WebGUI::Session;
use Config::JSON;
use WebGUI::Cache;
use Time::HiRes;
# Get parameters here, including $help
GetOptions(
'configFile=s' => \$configFile,
'help' => \$help,
'man' => \$man,
);
pod2usage( verbose => 1 ) if $help;
pod2usage( verbose => 2 ) if $man;
pod2usage( msg => "Must specify a config file!" ) unless $configFile;
my $session = start( $webguiRoot, $configFile );
print "creating cache object\n";
my $cache = WebGUI::Cache->new($session);
print "setting cache\n";
my $t = [Time::HiRes::gettimeofday];
my @keys;
my $sth = $session->db->read("select assetId, revisionDate, title from assetData");
while (my ($id, $rev, $title) = $sth->array) {
push @keys, [$id, $rev];
$cache->set([$id,$rev],$title);
}
print "Took ".Time::HiRes::tv_interval($t)." seconds to set ".scalar(@keys)." cache objects.\n";
print "fetching cache\n";
my $t = [Time::HiRes::gettimeofday];
foreach my $key (@keys) {
my $value = $cache->get($key);
}
print "Took ".Time::HiRes::tv_interval($t)." seconds to get ".scalar(@keys)." cache objects.\n";
print "done\n";
finish($session);
#----------------------------------------------------------------------------
# Your sub here
#----------------------------------------------------------------------------
sub start {
my $webguiRoot = shift;
my $configFile = shift;
my $session = WebGUI::Session->open($webguiRoot,$configFile);
$session->user({userId=>3});
## If your script is adding or changing content you need these lines, otherwise leave them commented
#
# my $versionTag = WebGUI::VersionTag->getWorking($session);
# $versionTag->set({name => 'Name Your Tag'});
#
##
return $session;
}
#----------------------------------------------------------------------------
sub finish {
my $session = shift;
## If your script is adding or changing content you need these lines, otherwise leave them commented
#
# my $versionTag = WebGUI::VersionTag->getWorking($session);
# $versionTag->commit;
##
$session->var->end;
$session->close;
}
__END__
=head1 NAME
utility - A template for WebGUI utility scripts
=head1 SYNOPSIS
utility --configFile config.conf ...
utility --help
=head1 DESCRIPTION
This WebGUI utility script helps you...
=head1 ARGUMENTS
=head1 OPTIONS
=over
=item B<--configFile config.conf>
The WebGUI config file to use. Only the file name needs to be specified,
since it will be looked up inside WebGUI's configuration directory.
This parameter is required.
=item B<--help>
Shows a short summary and usage
=item B<--man>
Shows this document
=back
=head1 AUTHOR
Copyright 2001-2009 Plain Black Corporation.
=cut
#vim:ft=perl