webgui/lib/WebGUI/BackgroundProcess.pm

590 lines
16 KiB
Perl

package WebGUI::BackgroundProcess;
use warnings;
use strict;
use Config;
use POSIX;
use WebGUI::Session;
use WebGUI::Pluggable;
use JSON;
use Getopt::Long qw(GetOptionsFromArray);
use Time::HiRes qw(sleep);
=head1 NAME
WebGUI::BackgroundProcess
=head1 DESCRIPTION
Safely and portably spawn a long running process that you can check the
status of.
=head1 SYNOPSIS
package WebGUI::Some::Class;
sub doWork {
my ($process, $data) = @_;
$process->update("Starting...");
...
$process->update("About half way done...");
...
$process->update("Finished!");
}
sub www_doWork {
my $self = shift;
my $session = $self->session;
my $process = WebGUI::BackgroundProcess->start(
$session, 'WebGUI::Some::Class', 'doWork', { some => 'data' }
);
# See WebGUI::Content::BackgroundProcess
my $pairs = $process->contentPairs('DoWork');
$session->http->setRedirect($self->getUrl($pairs));
return 'redirect';
}
package WebGUI::Content::BackgroundProcess::DoWork;
sub handler {
my $process = shift;
my $session = $process->session;
return $session->style->userStyle($process->status);
# or better yet, an ajaxy page that polls.
}
=head1 LEGAL
-------------------------------------------------------------------
WebGUI is Copyright 2001-2009 Plain Black Corporation.
-------------------------------------------------------------------
Please read the legal notices (docs/legal.txt) and the license
(docs/license.txt) that came with this distribution before using
this software.
-------------------------------------------------------------------
http://www.plainblack.com info@plainblack.com
-------------------------------------------------------------------
=head1 METHODS
=cut
#-----------------------------------------------------------------
=head2 argv ($module, $subname, $data)
Produces an argv suitable for passing to exec (after the initial executable
name and perl switches) for running the given user routine with the supplied
data.
=cut
sub argv {
my ( $self, $module, $subname, $data ) = @_;
my $class = ref $self;
my $session = $self->session;
my $config = $session->config;
my $id = $self->getId;
return (
'--webguiRoot' => $config->getWebguiRoot,
'--configFile' => $config->getFilename,
'--sessionId' => $session->getId,
'--module' => $module,
'--subname' => $subname,
'--id' => $self->getId,
'--data' => JSON::encode_json($data),
);
} ## end sub argv
#-----------------------------------------------------------------
=head2 argvToHash ($argv)
Class method. Processes the passed array with GetOptions -- intended for use
from the exec() in start. Don't call unless you know what you're doing.
=cut
sub argvToHash {
my ( $class, $argv ) = @_;
my $hash = {};
GetOptionsFromArray( $argv, $hash,
'webguiRoot=s',
'configFile=s',
'sessionId=s',
'module=s',
'subname=s',
'id=s',
'data=s'
);
$hash->{data} = JSON::decode_json( $hash->{data} );
return $hash;
}
#-----------------------------------------------------------------
=head2 canView ($user?)
Returns whether the current user (or the user passed in, if there is one) has
permission to view the status of the background process. By default, only
admins can view, but see setGroup.
=cut
sub canView {
my $self = shift;
my $session = $self->session;
my $user = shift || $session->user;
$user = WebGUI::User->new( $session, $user )
unless eval { $user->isa('WebGUI::User') };
return 1 if $user->isAdmin;
my $group = $self->get('groupId');
return $group && $user->isInGroup($group);
}
#-------------------------------------------------------------------
=head2 contentPairs ($module, $pid)
Returns a bit of query string useful for redirecting to a
WebGUI::Content::BackgroundProcess plugin. $module should be the bit that
comes after WebGUI::Content::BackgroundProcess, e.g.
$process->contentPairs('Foo') should return something like
"op=background;module=Foo;pid=adlfjafo87ad9f78a7", which will get dispatched
to WebGUI::Content::BackgroundProcess::Foo::handler($process)
=cut
sub contentPairs {
my ( $self, $module ) = @_;
my $pid = $self->getId;
return "op=background;module=$module;pid=$pid";
}
#-----------------------------------------------------------------
=head2 create ( )
Creates a new BackgroundProcess object and inserts a blank row of data into
the db. You probably shouldn't call this -- see start().
=cut
sub create {
my ( $class, $session ) = @_;
my $id = $session->id->generate;
$session->db->setRow( $class->tableName, 'id', {}, $id );
bless { session => $session, id => $id };
}
#-----------------------------------------------------------------
=head2 delete ( )
Clean up the information for this process from the database.
=cut
sub delete {
my $self = shift;
my $db = $self->session->db;
my $tbl = $db->dbh->quote_identifier( $self->tableName );
$db->write( "DELETE FROM $tbl WHERE id = ?", [ $self->getId ] );
}
#-----------------------------------------------------------------
=head2 endTime ( )
Returns the epoch time indicating when the subroutine passed to run() finished
executing, or undef if it hasn't finished. Note that even if the sub passed
to run dies, an endTime will be recorded.
=cut
sub endTime { $_[0]->get('endTime') }
#-----------------------------------------------------------------
=head2 error ( $msg )
Call this to record an error status. You probably shouldn't, though -- just
dying from your subroutine will cause this to be set.
=cut
sub error { $_[0]->set( { error => $_[1] } ) }
#-----------------------------------------------------------------
=head2 finish ( )
Mark the process as being finished. This is called for you when your
subroutine is finished. If update() wasn't computed on the last call, it will
be computed now.
=cut
sub finish {
my $self = shift;
my %props = ( finished => 1 );
if ( my $calc = delete $self->{delay} ) {
$props{status} = $calc->();
$props{latch} = 0;
}
$props{endTime} = time();
$self->set( \%props );
}
#-----------------------------------------------------------------
=head2 get ( @keys )
Get data from the database record for this process (returned as a simple list,
not an arrayref). Valid keys are: id, status, error, startTime, endTime,
finished, groupId. They all have more specific accessors, but you can use
this to get several at once.
=cut
sub get {
my ( $self, @keys ) = @_;
my $db = $self->session->db;
my $dbh = $db->dbh;
my $tbl = $dbh->quote_identifier( $self->tableName );
my $key
= @keys
? join( ',', map { $dbh->quote_identifier($_) } @keys )
: '*';
my $id = $dbh->quote( $self->getId );
my @values = $db->quickArray("SELECT $key FROM $tbl WHERE id = $id");
return wantarray ? @values : $values[0];
}
#-----------------------------------------------------------------
=head2 getError ( )
If the process died, this will be set to stringified $@.
=cut
sub getError { $_[0]->get('error') }
#-----------------------------------------------------------------
=head2 getGroupId
Returns the group ID (not the actual WebGUI::Group) of users who are allowed
to view this process.
=cut
sub getGroupId {
my $id = $_[0]->get('groupId');
return $id || 3;
}
#-----------------------------------------------------------------
=head2 getId ( )
The unique id for this background process. Note: this is NOT the pid, but a
WebGUI guid.
=cut
sub getId { shift->{id} }
#-----------------------------------------------------------------
=head2 getStatus()
Signals the background process that it should report its next status, then
polls at $interval (can be fractional) seconds (default: .1) waiting for the
background process to claim that its status has been updated. Returns the
updated status. See setWait() for a way to change the interval (or disable
the waiting procedure entirely).
=cut
sub getStatus {
my $self = shift;
my $interval = $self->{interval};
if ($interval) {
$self->set( { latch => 1 } );
while (1) {
sleep $interval;
my ( $finished, $latch ) = $self->get( 'finished', 'latch' );
last if $finished || !$latch;
}
}
return $self->get('status');
}
#-----------------------------------------------------------------
=head2 isFinished ( )
A simple flag indicating that background process is no longer running.
=cut
sub isFinished { $_[0]->get('finished') }
#-----------------------------------------------------------------
=head2 new ( $session, $id )
Returns an object capable of checking on the status of the background process
indicated by $id. Returns undef if there is no such process.
=cut
sub new {
my ( $class, $session, $id ) = @_;
my $db = $session->db;
my $tbl = $db->dbh->quote_identifier( $class->tableName );
my $sql = "SELECT COUNT(*) FROM $tbl WHERE id = ?";
my $exists = $db->quickScalar( $sql, [$id] );
return $exists
? bless( { session => $session, id => $id, interval => .1 }, $class )
: undef;
}
#-----------------------------------------------------------------
=head2 session ()
Get the WebGUI::Session this process was created with. Note: this is safe to
call in the child process, as it is a duplicated session (same session id) and
doesn't share any handles with the parent process.
=cut
sub session { $_[0]->{session} }
#-----------------------------------------------------------------
=head2 set ($properties)
Updates the database row with the properties given by the $properties hashref.
See get() for a list of valid keys.
=cut
sub set {
my ( $self, $values ) = @_;
my @keys = keys %$values;
return unless @keys;
my $db = $self->session->db;
my $dbh = $db->dbh;
my $tbl = $dbh->quote_identifier( $self->tableName );
my $sets = join(
',',
map {
my $ident = $dbh->quote_identifier($_);
my $value = $dbh->quote( $values->{$_} );
"$ident = $value";
} @keys
);
my $id = $dbh->quote( $self->getId );
$db->write("UPDATE $tbl SET $sets WHERE id = $id");
} ## end sub set
#-----------------------------------------------------------------
=head2 setGroup($groupId)
Allow the given group (in addition to admins) the ability to check on the
status of this process
=cut
sub setGroup {
my ( $self, $groupId ) = @_;
$groupId = eval { $groupId->getId } || $groupId;
$self->set( { groupId => $groupId } );
}
#-----------------------------------------------------------------
=head2 runCmd ($hashref)
Class method. Processes ARGV and passes it to runFromHash. Don't call this
unless you're the start() method.
=cut
sub runCmd {
my $class = shift;
$class->runFromHash( $class->argvToHash( \@ARGV ) );
}
#-----------------------------------------------------------------
=head2 runFromHash ($hashref)
Class method. Expects a hash of arguments describing what to run. Don't call
this unless you know what you're doing.
=cut
sub runFromHash {
my ( $class, $args ) = @_;
my $module = $args->{module};
WebGUI::Pluggable::load($module);
my $code = $module->can( $args->{subname} );
my $session = WebGUI::Session->open( $args->{webguiRoot}, $args->{configFile}, undef, undef, $args->{sessionId} );
my $self = $class->new( $session, $args->{id} );
$self->set( { startTime => time } );
eval { $self->$code( $args->{data} ) };
$self->error($@) if $@;
$self->finish();
}
#-----------------------------------------------------------------
=head2 setWait ( $interval )
Use this to control the pace at which getStatus will poll for updated
statuses. By default, this is a tenth of a second. If you set it to 0,
getStatus will still signal the background process for an update, but will
take whatever is currently recorded as the status and return immediately.
=cut
sub setWait { $_[0]->{interval} = $_[1] }
#-----------------------------------------------------------------
=head2 start ( $session, $module, $subname, $data )
Class method. The first thing this method does is daemonize (double-fork,
setsid, chdir /, umask 0, all that good stuff). It then executes
$module::subname in a fresh perl interpreter (exec'd $^X) with ($process,
$data) as its arguments. The only restriction on $data is that it be
serializable by JSON.
=head3 $0
The process name (as it appears in ps) will be set to webgui-background-$id,
where $id is the value returned by $process->getId. It thus won't look like a
modperl process to anyone monitoring the process table (wremonitor.pl, for
example).
=cut
sub start {
my ( $class, $session, $module, $subname, $data ) = @_;
my $self = $class->create($session);
my $id = $self->getId;
my $pid = fork();
die "Cannot fork: $!" unless defined $pid;
if ($pid) {
# The child process will fork again and exit immediately, so we can
# wait for it (and thus not have zombie processes).
waitpid( $pid, 0 );
return $self;
}
# We don't want destructors called, so POSIX exit on errors.
eval {
# detach from controlling terminal, get us into a new process group
die "Cannot become session leader: $!" if POSIX::setsid() < 0;
# Fork again so we never get a controlling terminal
$pid = fork();
die "Child cannot fork: $!" unless defined $pid;
# We don't want to call any destructors, as it would mess with the
# parent's mysql connection, etc.
POSIX::_exit(0) if $pid;
# We're now in the final target process. Standard daemon-y things...
$SIG{HUP} = 'IGNORE';
chdir '/';
umask 0;
# Forcibly close any open file descriptors that remain
my $max = POSIX::sysconf(&POSIX::_SC_OPEN_MAX) || 1024;
POSIX::close($_) for ( 0 .. $max );
# Get us some reasonable STD handles
my $null = '/dev/null';
open STDIN, '<', $null or die "Cannot read $null: $!";
open STDOUT, '>', $null or die "Cannot write $null: $!";
open STDERR, '>', $null or die "Cannot write $null: $!";
# Now we're ready to run the user's code.
my $perl = $Config{perlpath};
exec {$perl} (
"webgui-background-$id",
( map {"-I$_"} @INC ),
"-M$class", "-e$class->runCmd();",
'--', $self->argv( $module, $subname, $data )
) or POSIX::_exit(-1);
};
POSIX::_exit(-1) if ($@);
} ## end sub start
#-----------------------------------------------------------------
=head2 startTime ( )
Returns the time this process started running in epoch format.
=cut
sub startTime { $_[0]->get('startTime') }
#-----------------------------------------------------------------
=head2 tableName ( )
Class method: a constant, for convenience. The name of the table that process
data is stored in.
=cut
sub tableName {'BackgroundProcess'}
#-----------------------------------------------------------------
=head2 update ( $msg )
Set a new status for the background process. This can be anything, and will
overwrite the old status. JSON is recommended for complex statuses.
Optionally, $msg can be a subroutine that returns the new status -- if your
status may take a long time to compute, you should use this, as you may be
able to avoid computing some (or all) of your status updates, depending on how
often they're being asked for. See the getStatus method for details.
=cut
sub update {
my ( $self, $msg ) = @_;
if ( ref $msg eq 'CODE' ) {
if ( $self->get('latch') ) {
$msg = $msg->();
}
else {
$self->{delay} = $msg;
return;
}
}
delete $self->{delay};
$self->set( { latch => 0, status => $msg } );
}
1;