diff --git a/docs/upgrades/upgrade_6.8.7-6.99.0.pl b/docs/upgrades/upgrade_6.8.7-6.99.0.pl index 93fe3054f..cb6466739 100644 --- a/docs/upgrades/upgrade_6.8.7-6.99.0.pl +++ b/docs/upgrades/upgrade_6.8.7-6.99.0.pl @@ -130,7 +130,8 @@ sub addWorkflow { "WebGUI::Workflow::Activity::CleanFileCache", "WebGUI::Workflow::Activity::CleanLoginHistory", "WebGUI::Workflow::Activity::ArchiveOldThreads", "WebGUI::Workflow::Activity::TrashExpiredEvents", "WebGUI::Workflow::Activity::CreateCronJob", "WebGUI::Workflow::Activity::DeleteExpiredSessions", "WebGUI::Workflow::Activity::DeleteExpiredGroupings", "WebGUI::Workflow::Activity::PurgeOldAssetRevisions", - "WebGUI::Workflow::Activity::ExpireSubscriptionCodes", "WebGUI::Workflow::Activity::PurgeOldTrash"], + "WebGUI::Workflow::Activity::ExpireSubscriptionCodes", "WebGUI::Workflow::Activity::PurgeOldTrash", + "WebGUI::Workflow::Activity::GetSyndicatedContent"], "WebGUI::User"=>["WebGUI::Workflow::Activity::CreateCronJob"], "WebGUI::VersionTag"=>["WebGUI::Workflow::Activity::CommitVersionTag", "WebGUI::Workflow::Activity::RollbackVersionTag", "WebGUI::Workflow::Activity::TrashVersionTag", "WebGUI::Workflow::Activity::CreateCronJob"] @@ -203,6 +204,8 @@ sub addWorkflow { }, "pbworkflow000000000004"); $activity = $workflow->addActivity("WebGUI::Workflow::Activity::DeleteExpiredSessions", "pbwfactivity0000000009"); $activity->set("title", "delete expired sessions"); + $activity = $workflow->addActivity("WebGUI::Workflow::Activity::GetSyndicatedContent", "pbwfactivity0000000012"); + $activity->set("title", "Get syndicated content"); WebGUI::Workflow::Cron->create($session, { title=>'Hourly Maintenance', enabled=>1, diff --git a/lib/WebGUI/Asset/Wobject/SyndicatedContent.pm b/lib/WebGUI/Asset/Wobject/SyndicatedContent.pm index e07690eec..cd495c374 100644 --- a/lib/WebGUI/Asset/Wobject/SyndicatedContent.pm +++ b/lib/WebGUI/Asset/Wobject/SyndicatedContent.pm @@ -231,10 +231,10 @@ sub _normalize_items { #------------------------------------------------------------------- sub _get_rss_data { - my $self = shift; + my $session = shift; my $url = shift; - my $cache = WebGUI::Cache->new($self->session,'url:' . $url, 'RSS'); + my $cache = WebGUI::Cache->new($session,'url:' . $url, 'RSS'); my $rss_serial = $cache->get; my $rss = {}; if ($rss_serial) { @@ -243,7 +243,7 @@ sub _get_rss_data { my $ua = LWP::UserAgent->new(timeout => 5); my $response = $ua->get($url); if (!$response->is_success()) { - $self->session->errorHandler->warn("Error retrieving url '$url': " . + $session->errorHandler->warn("Error retrieving url '$url': " . $response->status_line()); return undef; } @@ -256,7 +256,7 @@ sub _get_rss_data { my $encoding = 'utf8'; if (lc($xmlEncoding) ne lc($encoding)) { eval { from_to($xml, $xmlEncoding, $encoding) }; - $self->session->errorHandler->warn($@) if ($@); + $session->errorHandler->warn($@) if ($@); } } @@ -266,7 +266,7 @@ sub _get_rss_data { XML::RSSLite::parseXML($rss_lite, \$xml); }; if ($@) { - $self->session->errorHandler->warn("error parsing rss for url $url :".$@); + $session->errorHandler->warn("error parsing rss for url $url :".$@); #Returning undef on a parse failure is a change from previous behaviour, #but it SHOULDN'T have a major effect. return undef; @@ -281,10 +281,10 @@ sub _get_rss_data { $rss_lite = {channel => $rss_lite}; if (!($rss->{channel} = _find_record($rss_lite, qr/^channel$/))) { - $self->session->errorHandler->warn("unable to find channel info for url $url"); + $session->errorHandler->warn("unable to find channel info for url $url"); } if (!($rss->{items} = _find_record($rss_lite, qr/^items?$/))) { - $self->session->errorHandler->warn("unable to find item info for url $url"); + $session->errorHandler->warn("unable to find item info for url $url"); $rss->{items} = []; } @@ -296,7 +296,7 @@ sub _get_rss_data { #Assign dates "globally" rather than when seen in a viewed feed. #This is important because we can "filter" now and want to ensure we keep order #correctly as new items appear. - $self->_assign_rss_dates($rss->{items}); + _assign_rss_dates($session, $rss->{items}); #Default to an hour timeout $cache->set(Storable::freeze($rss), 3600); @@ -311,17 +311,17 @@ sub _get_rss_data { # whole database to keep the thing from growing too large sub _assign_rss_dates { - my $self = shift; + my $session = shift; my ($items) = @_; for my $item (@{$items}) { my $key = 'dates:' . ($item->{guid} || $item->{title} || $item->{description} || $item->{link}); - my $cache = WebGUI::Cache->new($self->session,$key, 'RSS'); + my $cache = WebGUI::Cache->new($session,$key, 'RSS'); if (my $date = $cache->get()) { $item->{date} = $date; } else { - $item->{date} =$self->session->datetime->time(); + $item->{date} =$session->datetime->time(); $cache->set($item->{date}, '1 year'); } } @@ -444,7 +444,7 @@ sub _get_items { $items = []; for my $url (@{$urls}) { - my $rss_info=$self->_get_rss_data($url); + my $rss_info=_get_rss_data($self->session,$url); push(@rss_feeds, $rss_info) if($rss_info); } diff --git a/lib/WebGUI/Workflow/Activity/GetSyndicatedContent.pm b/lib/WebGUI/Workflow/Activity/GetSyndicatedContent.pm new file mode 100644 index 000000000..b3be986f2 --- /dev/null +++ b/lib/WebGUI/Workflow/Activity/GetSyndicatedContent.pm @@ -0,0 +1,90 @@ +package WebGUI::Workflow::Activity::GetSyndicatedContent; + + +=head1 LEGAL + + ------------------------------------------------------------------- + WebGUI is Copyright 2001-2006 Plain Black Corporation. + ------------------------------------------------------------------- + Please read the legal notices (docs/legal.txt) and the license + (docs/license.txt) that came with this distribution before using + this software. + ------------------------------------------------------------------- + http://www.plainblack.com info@plainblack.com + ------------------------------------------------------------------- + +=cut + +use strict; +use base 'WebGUI::Workflow::Activity'; +use WebGUI::Asset::Wobject::SyndicatedContent; + +=head1 NAME + +Package WebGUI::Workflow::Activity::GetSyndicatedContent; + +=head1 DESCRIPTION + +Prefetches syndicated content URLs so that the pages can be served up more quickly. + +=head1 SYNOPSIS + +See WebGUI::Workflow::Activity for details on how to use any activity. + +=head1 METHODS + +These methods are available from this class: + +=cut + + +#------------------------------------------------------------------- + +=head2 definition ( session, definition ) + +See WebGUI::Workflow::Activity::defintion() for details. + +=cut + +sub definition { + my $class = shift; + my $session = shift; + my $definition = shift; + my $i18n = WebGUI::International->new($session, "Asset_SyndicatedContent"); + push(@{$definition}, { + name=>$i18n->get("get syndicated content"), + properties=> { } + }); + return $class->SUPER::definition($session,$definition); +} + + +#------------------------------------------------------------------- + +=head2 execute ( ) + +See WebGUI::Workflow::Activity::execute() for details. + +=cut + +sub execute { + my $self = shift; + #In the new Wobject, "rssURL" actually can refer to more than one URL. + my @syndicatedWobjectURLs = $self->session->db->buildArray("select distinct SyndicatedContent.rssUrl from SyndicatedContent left join asset on SyndicatedContent.assetId=asset.assetId where asset.state='published'"); + foreach my $url(@syndicatedWobjectURLs) { + #Loop through the SyndicatedWobjects and split all the URLs they are syndicating off into + #a separate array. + my @urlsToSyndicate = split(/\s+/,$url); + foreach ((@urlsToSyndicate)) { + WebGUI::Asset::Wobject::SyndicatedContent::_get_rss_data($self->session,$_); + } + } + return 1; +} + + + + +1; + + diff --git a/lib/WebGUI/i18n/English/Asset_SyndicatedContent.pm b/lib/WebGUI/i18n/English/Asset_SyndicatedContent.pm index 36d7b45d4..cf87a6414 100644 --- a/lib/WebGUI/i18n/English/Asset_SyndicatedContent.pm +++ b/lib/WebGUI/i18n/English/Asset_SyndicatedContent.pm @@ -1,6 +1,12 @@ package WebGUI::i18n::English::Asset_SyndicatedContent; our $I18N = { + 'get syndicated content' => { + 'lastUpdated' => 0, + 'message' => 'Get Syndicated Content', + context => ' the title of the get syndicated content workflow activity' + }, + '1' => { 'lastUpdated' => 1031514049, 'message' => 'URL to RSS File' diff --git a/sbin/Hourly/GetSyndicatedContent.pm b/sbin/Hourly/GetSyndicatedContent.pm deleted file mode 100644 index 2a419f666..000000000 --- a/sbin/Hourly/GetSyndicatedContent.pm +++ /dev/null @@ -1,34 +0,0 @@ -package Hourly::GetSyndicatedContent; - -use strict; -use warnings; -use WebGUI::SQL; -use WebGUI::Asset::Wobject::SyndicatedContent; - -=head2 Hourly::GetSyndicatedContent - -Loops through all the URLs in the SyndicatedWobjects and puts them into WebGUI::Cache if they haven't been spidered or if they have expired from the cache. This should reduce HTTP traffic a little, and allow for more granular scheduling of feed downloads in the future. - -=cut - - -#------------------------------------------------------------------- -sub process{ - - #In the new Wobject, "rssURL" actually can refer to more than one URL. - my @syndicatedWobjectURLs = WebGUI::SQL->buildArray("select distinct SyndicatedContent.rssUrl from SyndicatedContent left join asset on SyndicatedContent.assetId=asset.assetId where asset.state='published'"); - foreach my $url(@syndicatedWobjectURLs) { - - #Loop through the SyndicatedWobjects and split all the URLs they are syndicating off into - #a separate array. - - my @urlsToSyndicate = split(/\s+/,$url); - foreach ((@urlsToSyndicate)) { - WebGUI::Asset::Wobject::SyndicatedContent::_get_rss_data($_); - } - } -} - - - -1;