- The Syndicated Content asset was rewritten, and now uses 35% less memory and is 400% faster.
This commit is contained in:
parent
d34eadc40e
commit
ef6aedc862
12 changed files with 393 additions and 2180 deletions
|
|
@ -1,5 +1,8 @@
|
|||
7.6.3
|
||||
- fixed #8989: Missing profile field "showOnline" for the UsersOnline macro.
|
||||
- Added DataTable to WebGUI.conf.original
|
||||
- The Syndicated Content asset was rewritten, and now uses 35% less memory
|
||||
and is 400% faster.
|
||||
- fixed #9025: Testing function of UsersOnline macro fails.
|
||||
|
||||
7.6.2
|
||||
|
|
|
|||
|
|
@ -7,6 +7,17 @@ upgrading from one version to the next, or even between multiple
|
|||
versions. Be sure to heed the warnings contained herein as they will
|
||||
save you many hours of grief.
|
||||
|
||||
7.6.3
|
||||
--------------------------------------------------------------------
|
||||
* WebGUI now requires XML::FeedPP version 0.36 or greater.
|
||||
|
||||
* The Syndicated Content asset has been heavily modified. Your templates
|
||||
and settings should automatically migrate, but in less than 1% of cases
|
||||
there will be problems that the migration cannot handle. Check your
|
||||
Syndicated Content assets after upgrade to ensure they are still
|
||||
working as expected.
|
||||
|
||||
|
||||
7.6.1
|
||||
--------------------------------------------------------------------
|
||||
* WebGUI now requires Class::C3 version 0.19 or greater.
|
||||
|
|
|
|||
BIN
docs/upgrades/packages-7.6.3/default_syndicated_content.wgpkg
Normal file
BIN
docs/upgrades/packages-7.6.3/default_syndicated_content.wgpkg
Normal file
Binary file not shown.
BIN
docs/upgrades/packages-7.6.3/syndicated_articles.wgpkg
Normal file
BIN
docs/upgrades/packages-7.6.3/syndicated_articles.wgpkg
Normal file
Binary file not shown.
|
|
@ -20,6 +20,7 @@ use Getopt::Long;
|
|||
use WebGUI::Session;
|
||||
use WebGUI::Storage;
|
||||
use WebGUI::Asset;
|
||||
use WebGUI::Asset::Template;
|
||||
|
||||
|
||||
my $toVersion = "7.6.3"; # make this match what version you're going to
|
||||
|
|
@ -28,20 +29,47 @@ my $quiet; # this line required
|
|||
|
||||
my $session = start(); # this line required
|
||||
# upgrade functions go here
|
||||
|
||||
createFieldShowOnline($session);
|
||||
upgradeSyndicatedContentTemplates($session);
|
||||
|
||||
finish($session); # this line required
|
||||
|
||||
|
||||
#----------------------------------------------------------------------------
|
||||
# Describe what our function does
|
||||
#sub exampleFunction {
|
||||
# my $session = shift;
|
||||
# print "\tWe're doing some stuff here that you should know about... " unless $quiet;
|
||||
# # and here's our code
|
||||
# print "DONE!\n" unless $quiet;
|
||||
#}
|
||||
sub upgradeSyndicatedContentTemplates {
|
||||
my $session = shift;
|
||||
print "\tUpgrading syndicated content assets... " unless $quiet;
|
||||
my $db = $session->db;
|
||||
my $templates = $db->read("select distinct assetId from template where namespace='SyndicatedContent'");
|
||||
while (my ($id) = $templates->array) {
|
||||
my $asset = WebGUI::Asset::Template->new($session, $id);
|
||||
if (defined $asset) {
|
||||
if ($asset->getId eq "DPUROtmpl0000000000001") { # this one no longer applies
|
||||
$asset->trash;
|
||||
next;
|
||||
}
|
||||
my $template = $asset->get('template');
|
||||
$template =~ s{channel.title}{channel_title}xmsi;
|
||||
$template =~ s{channel.description}{channel_description}xmsi;
|
||||
$template =~ s{channel.link}{channel_link}xmsi;
|
||||
$template =~ s{site_link}{channel_link}xmsi;
|
||||
$template =~ s{site_title}{channel_title}xmsi;
|
||||
$template =~ s{descriptionFull}{description}xmsi;
|
||||
$template =~ s{rss.url.0.9}{rss_url}xmsi;
|
||||
$template =~ s{rss.url}{rss_url}xmsi;
|
||||
$template =~ s{rss.url.0.91}{rss_url}xmsi;
|
||||
$template =~ s{rss.url.1.0}{rdf_url}xmsi;
|
||||
$template =~ s{rss.url.2.0}{rss_url}xmsi;
|
||||
$asset->addRevision({template=>$template});
|
||||
}
|
||||
}
|
||||
$db->write("update SyndicatedContent set templateId='PBtmpl0000000000000065' where templateId='DPUROtmpl0000000000001'");
|
||||
$db->write("alter table SyndicatedContent drop column displayMode");
|
||||
print "DONE!\n" unless $quiet;
|
||||
}
|
||||
|
||||
#----------------------------------------------------------------------------
|
||||
sub createFieldShowOnline {
|
||||
my $session = shift;
|
||||
print "\tCreating an additional profile field 'showOnline' for the UsersOnline macro... " unless $quiet;
|
||||
|
|
|
|||
|
|
@ -14,16 +14,13 @@ use strict;
|
|||
use HTML::Entities;
|
||||
use Tie::IxHash;
|
||||
use WebGUI::Cache;
|
||||
use WebGUI::Exception;
|
||||
use WebGUI::HTML;
|
||||
use WebGUI::International;
|
||||
use WebGUI::Asset::Wobject;
|
||||
use base 'WebGUI::Asset::Wobject';
|
||||
use WebGUI::Macro;
|
||||
use XML::RSSLite;
|
||||
use XML::RSS::Creator;
|
||||
use LWP::UserAgent;
|
||||
use Encode;
|
||||
use XML::FeedPP;
|
||||
|
||||
our @ISA = qw(WebGUI::Asset::Wobject);
|
||||
|
||||
=head1 NAME
|
||||
|
||||
|
|
@ -31,7 +28,7 @@ Package WebGUI::Asset::Wobject::SyndicatedContent
|
|||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Displays items and channels from RSS feeds.
|
||||
Displays items and channels from RSS/Atom/RDF feeds.
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
|
|
@ -43,55 +40,6 @@ These methods are available from this class:
|
|||
|
||||
=cut
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _constructRSS {
|
||||
my($self,$rssObject,$var)=@_;
|
||||
#They've chosen to emit this as an RSS feed, in one of the four flavors we support.
|
||||
$rssObject->channel(
|
||||
title=>$var->{'channel.title'} || $self->get('title'),
|
||||
link=>$self->session->url->page('',1),
|
||||
description=>$var->{'channel.description'} || ''
|
||||
);
|
||||
foreach my $item (@{$var->{item_loop}}) {
|
||||
# I know this seems kludgy, but because XML::RSSLite parses
|
||||
# feeds loosely, sometimes it returns a data structure when it shouldn't.
|
||||
# So we're only pushing in attributes when they AREN'T a reference to
|
||||
# a data structure.
|
||||
my %attributes;
|
||||
foreach my $attribute(keys %$item){
|
||||
$attributes{$attribute}=$item->{$attribute} if (! ref($item->{$attribute}));
|
||||
}
|
||||
$rssObject->add_item(%attributes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _createRSSURLs {
|
||||
my $self=shift;
|
||||
my $var=shift;
|
||||
foreach({ver=>'1.0',param=>'10'},{ver=>'0.9',param=>'090'},{ver=>'0.91',param=>'091'},{ver=>'2.0',param=>'20'}){
|
||||
$var->{'rss.url.'.$_->{ver}}=$self->getUrl('func=viewRSS'.$_->{param});
|
||||
}
|
||||
$var->{'rss.url'}=$self->getUrl('func=viewRSS20');
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _getMaxHeadlines {
|
||||
my $self = shift;
|
||||
return $self->get('maxHeadlines') || 1000000;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _getValidatedUrls {
|
||||
my $self = shift;
|
||||
my @urls = split(/\s+/,$self->getRssUrl);
|
||||
my @validatedUrls = ();
|
||||
foreach my $url (@urls) {
|
||||
push(@validatedUrls, $url) if ($url =~ m/^http/);
|
||||
}
|
||||
return @validatedUrls
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
|
|
@ -129,6 +77,7 @@ sub appendChoppedDescriptionTemplateVars {
|
|||
$item->{"descriptionFirstSentence"} =~ s/^(.*?\.).*/$1/s;
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 definition ( definition )
|
||||
|
|
@ -179,26 +128,13 @@ sub definition {
|
|||
label=>$i18n->get('process macros in rss url'),
|
||||
hoverHelp=>$i18n->get('process macros in rss url description'),
|
||||
},
|
||||
maxHeadlines=>{
|
||||
tab=>"properties",
|
||||
maxHeadlines=>{
|
||||
tab=>"display",
|
||||
fieldType=>'integer',
|
||||
defaultValue=>10,
|
||||
label=>$i18n->get(3),
|
||||
hoverHelp=>$i18n->get('3 description')
|
||||
},
|
||||
displayMode=>{
|
||||
tab=>"display",
|
||||
fieldType=>'selectBox',
|
||||
defaultValue=>'interleaved',
|
||||
options=>{
|
||||
'interleaved'=>$i18n->get('interleaved'),
|
||||
'grouped'=>$i18n->get('grouped'),
|
||||
},
|
||||
sortByValue=>1,
|
||||
label=>$i18n->get('displayModeLabel'),
|
||||
hoverHelp=>$i18n->get('displayModeLabel description'),
|
||||
subtext=>$i18n->get('displayModeSubtext')
|
||||
},
|
||||
hasTerms=>{
|
||||
tab=>"properties",
|
||||
fieldType=>'text',
|
||||
|
|
@ -221,363 +157,115 @@ sub definition {
|
|||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# strip all html tags from the given data structure. This is important to
|
||||
# prevent cross site scripting attacks
|
||||
|
||||
sub _strip_html {
|
||||
unless (ref $_[0]) {
|
||||
return $_[0] = WebGUI::HTML::filter($_[0], 'all');
|
||||
}
|
||||
my $ref = shift;
|
||||
if (ref $ref eq 'HASH') {
|
||||
if (exists $ref->{description}) {
|
||||
$ref->{description} = HTML::Entities::decode_entities($ref->{description});
|
||||
}
|
||||
foreach my $value (values %$ref) {
|
||||
_strip_html($value);
|
||||
}
|
||||
}
|
||||
elsif (ref $ref eq 'ARRAY') {
|
||||
foreach my $value (@$ref) {
|
||||
_strip_html($value);
|
||||
}
|
||||
}
|
||||
return $ref;
|
||||
}
|
||||
=head2 generateFeed ()
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# horrible kludge to find the channel or item record
|
||||
# in the varying kinds of rss structures returned by RSSLite
|
||||
Combines all feeds into a single XML::FeedPP object.
|
||||
|
||||
sub _find_record {
|
||||
my ($data, $regex) = @_;
|
||||
=cut
|
||||
|
||||
if (ref($data) eq 'HASH') {
|
||||
# reset the hash before calling each()
|
||||
keys(%{$data});
|
||||
while (my ($name, $val) = each(%{$data})) {
|
||||
if ($name =~ $_[1]) {
|
||||
if ((((ref($val) eq 'HASH') &&
|
||||
($val->{link} || $val->{title} ||
|
||||
$val->{description})) ||
|
||||
((ref($val) eq 'ARRAY') && @{$val} &&
|
||||
(ref($val->[0]) eq 'HASH') &&
|
||||
($val->[0]->{link} ||
|
||||
$val->[0]->{title} ||
|
||||
$val->[0]->{description})))) {
|
||||
return $val;
|
||||
}
|
||||
}
|
||||
if (my $record = _find_record($val, $regex)) {
|
||||
return $record;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return undef;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# First, get rid of things we don't want.
|
||||
# Copy the guid field to the link field if the guid looks like a link.
|
||||
# This is a kludge that gets around the fact that some folks use the link
|
||||
# field as the link to the story while others use it as the link
|
||||
# to the story about which the story is written. The webuig templates seem
|
||||
# to assume the former, so we should use the guid instead of the link, b/c
|
||||
# the guid, if it is a link, always means the former.
|
||||
# Also copy the first few words of the description into the title if
|
||||
# there is no title
|
||||
|
||||
sub _normalize_items {
|
||||
#my ($items) = @_;
|
||||
|
||||
# max number of words to take from description to fill in an empty
|
||||
# title
|
||||
my $max_words = 10;
|
||||
|
||||
for my $item (@{$_[0]}) {
|
||||
# Get rid of any keys in the items that we do not want
|
||||
my @wantedKeys = qw( title link description pubDate );
|
||||
%{ $item } = map { $_ => $item->{ $_ } } @wantedKeys;
|
||||
|
||||
if ($item->{guid} && ($item->{guid} =~ /^http:\/\//i)) {
|
||||
$item->{link} = $item->{guid};
|
||||
}
|
||||
if (!$item->{title}) {
|
||||
my @description_words = split(/\s/, $item->{description});
|
||||
if (@description_words <= $max_words) {
|
||||
$item->{title} = $item->{description};
|
||||
} else {
|
||||
$item->{title} = join(' ', @description_words[0..$max_words-1]) .
|
||||
' ...';
|
||||
}
|
||||
}
|
||||
|
||||
# IE doesn't recognize '
|
||||
$item->{title} =~ s/'/\'/g;
|
||||
$item->{description} =~ s/'/\'/g;
|
||||
$item->{category} = [$item->{category}]
|
||||
if ref $item->{category} ne 'ARRAY';
|
||||
appendChoppedDescriptionTemplateVars($item);
|
||||
}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _get_rss_data {
|
||||
my $session = shift;
|
||||
my $url = shift;
|
||||
# format of cache was changed, differentiate
|
||||
my $cache = WebGUI::Cache->new($session,'url2:' . $url, 'RSS');
|
||||
my $rss = $cache->get;
|
||||
if ($rss) {
|
||||
if ($rss->{error}) {
|
||||
return undef;
|
||||
}
|
||||
return $rss;
|
||||
}
|
||||
else {
|
||||
my $ua = LWP::UserAgent->new(timeout => 5);
|
||||
$ua->env_proxy;
|
||||
my $response = $ua->get($url);
|
||||
if (!$response->is_success()) {
|
||||
$session->errorHandler->warn("Error retrieving url '$url': " .
|
||||
$response->status_line());
|
||||
$cache->set({'error' => 1, 'error_status' => $response->status_line}, 3600);
|
||||
return undef;
|
||||
}
|
||||
my $xmlEncoding;
|
||||
if ($response->content =~ /<\?xml.*?encoding=['"](\S+)['"]/i) {
|
||||
$xmlEncoding = $1;
|
||||
}
|
||||
|
||||
my $xml = $response->decoded_content($xmlEncoding ? (charset => $xmlEncoding) : ());
|
||||
|
||||
# Approximate with current time if we don't have a Last-Modified
|
||||
# header coming from the RSS source.
|
||||
my $http_lm = $response->last_modified;
|
||||
my $last_modified = defined($http_lm)? $http_lm : time;
|
||||
|
||||
# XML::RSSLite does not handle <![CDATA[ ]]> so:
|
||||
$xml =~ s/<!\[CDATA\[(.*?)\]\]>/HTML::Entities::encode_entities($1)/esg;
|
||||
|
||||
my $rss_lite = {};
|
||||
eval {
|
||||
XML::RSSLite::parseXML($rss_lite, \$xml);
|
||||
};
|
||||
if ($@) {
|
||||
$session->errorHandler->warn("error parsing rss for url $url :".$@);
|
||||
#Returning undef on a parse failure is a change from previous behaviour,
|
||||
#but it SHOULDN'T have a major effect.
|
||||
return undef;
|
||||
}
|
||||
|
||||
# make sure that the {channel} points to the channel
|
||||
# description record and that {items} points to the list
|
||||
# of items. without this voodoo, different versions of
|
||||
# rss return the data in different places in the data
|
||||
# structure.
|
||||
|
||||
$rss_lite = {channel => $rss_lite};
|
||||
$rss = {};
|
||||
if (!($rss->{channel} =
|
||||
_find_record($rss_lite, qr/^channel$/))) {
|
||||
$session->errorHandler->warn("unable to find channel info for url $url");
|
||||
}
|
||||
if (!($rss->{items} = _find_record($rss_lite, qr/^items?$/))) {
|
||||
$session->errorHandler->warn("unable to find item info for url $url");
|
||||
$rss->{items} = [];
|
||||
}
|
||||
|
||||
_strip_html($rss);
|
||||
$rss->{items} = [ $rss->{items} ] unless (ref $rss->{items} eq 'ARRAY');
|
||||
|
||||
_normalize_items($rss->{items});
|
||||
#Assign dates "globally" rather than when seen in a viewed feed.
|
||||
#This is important because we can "filter" now and want to ensure we keep order
|
||||
#correctly as new items appear.
|
||||
_assign_rss_dates($session, $rss->{items});
|
||||
|
||||
# Store last-modified date as well.
|
||||
$rss->{last_modified} = $last_modified;
|
||||
|
||||
#Default to an hour timeout
|
||||
$cache->set($rss, 3600);
|
||||
}
|
||||
|
||||
return $rss;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# rss items don't have a standard date, so timestamp them the first time
|
||||
# we see them and use that timestamp as the date. Periodically nuke the
|
||||
# whole database to keep the thing from growing too large
|
||||
|
||||
sub _assign_rss_dates {
|
||||
my $session = shift;
|
||||
my ($items) = @_;
|
||||
|
||||
for my $item (@{$items}) {
|
||||
my $key = 'dates:' . ($item->{guid} || $item->{title} ||
|
||||
$item->{description} || $item->{link});
|
||||
my $cache = WebGUI::Cache->new($session,$key, 'RSS');
|
||||
if (my $date = $cache->get()) {
|
||||
$item->{date} = $date;
|
||||
}
|
||||
else {
|
||||
my $pubDate;
|
||||
if ($item->{pubDate}) {
|
||||
$pubDate = $session->datetime->mailToEpoch($item->{pubDate});
|
||||
}
|
||||
$item->{date} = $pubDate || $session->datetime->time() - (60 * 60 * 24 * 365); # handicap the undated
|
||||
$cache->set($item->{date}, '1 year');
|
||||
}
|
||||
}
|
||||
@{$items} = sort { $b->{date} <=> $a->{date} } @{$items};
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# $items is the hashref to put items into.
|
||||
# $rss_feeds is an arrayref of all the feeds in this wobject
|
||||
# The only difference between an "interleaved" feed and a grouped feed
|
||||
# is the order the items are output.
|
||||
|
||||
sub _create_grouped_items{
|
||||
my($items,$rss_feeds,$maxHeadlines,$hasTermsRegex)=@_;
|
||||
|
||||
_create_interleaved_items($items,$rss_feeds,$maxHeadlines,$hasTermsRegex);
|
||||
|
||||
@$items=sort{$a->{'site_title'} cmp $b->{'site_title'}} @$items;
|
||||
|
||||
#Loop through the items and output the "site_
|
||||
my $siteTitleTracker;
|
||||
foreach (@$items) {
|
||||
if ($siteTitleTracker ne $_->{site_title}) {
|
||||
$_->{new_rss_site} = 1;
|
||||
}
|
||||
$siteTitleTracker = $_->{site_title};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# Loop through the feeds for this wobject
|
||||
# and push in the items in "interleaved mode"
|
||||
# No need to return because we're doing everything by reference.
|
||||
|
||||
sub _create_interleaved_items {
|
||||
my ($items, $rss_feeds, $maxHeadlines, $hasTermsRegex) = @_;
|
||||
# put all items together into a single list
|
||||
foreach my $rss (@$rss_feeds) {
|
||||
while (my $item = shift @{$rss->{items}}) {
|
||||
if ($hasTermsRegex && ! _check_hasTerms($item, $hasTermsRegex)) {
|
||||
next;
|
||||
}
|
||||
$item->{site_title} = $rss->{channel}->{title};
|
||||
$item->{site_link} = $rss->{channel}->{link};
|
||||
push @$items, $item;
|
||||
}
|
||||
}
|
||||
@$items = sort { $b->{date} <=> $a->{date} } @$items;
|
||||
# limit to $maxHeadlines
|
||||
if (@$items > $maxHeadlines) {
|
||||
splice @$items, $maxHeadlines;
|
||||
}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# Uses the regex constructed in _get_items (with the terms defaulting to OR)
|
||||
# to see if the title or description associated with this item match the kinds
|
||||
# of items we're looking for.
|
||||
#
|
||||
|
||||
sub _check_hasTerms{
|
||||
my($item,$hasTermsRegex)=@_;
|
||||
my $to_check=$item->{title}.$item->{description};
|
||||
if ($to_check =~ /$hasTermsRegex/gism) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _make_regex{
|
||||
my $terms = shift;
|
||||
my @terms = split(/,/,$terms);
|
||||
return join('|',@terms);
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# So- We're going to manage an "aggregate cache" that represents
|
||||
# the rendering of the cumulative feeds in a Syndicated Wobject,
|
||||
# but let each feed "fend for itself" based on URL in the cache.
|
||||
#
|
||||
# This means we can set up the hourly task to get and cache each
|
||||
# individual feed WITHOUT having to re-request (undoubtedly the slowest
|
||||
# part of every RSS parsing action is the network traffic) each feed
|
||||
# when we re-render each aggregrate representation.
|
||||
#
|
||||
# If, however, a feed expires between hourly tasks, it will be re-requested and
|
||||
# parsed per the usual. BUT, if a feed ever goes un-requested for more than an hour,
|
||||
# then it's retrieval schedule will be taken over by the hourly task, and we'll
|
||||
# be pre-seeding the RSS object cache automatically.
|
||||
#
|
||||
# Having the caching set up this way means we can re-use the same raw feed all over the site without
|
||||
# having each wobject request it separately, ASSUMING the URL is the same.
|
||||
#
|
||||
# All the values that may have an effect on the composition of items
|
||||
# are included in the cache key for the aggregate representation.
|
||||
|
||||
sub _get_items {
|
||||
sub generateFeed {
|
||||
my $self = shift;
|
||||
my $urls = shift;
|
||||
my $maxHeadlines = shift || $self->getValue('maxHeadlines');
|
||||
my $displayMode=$self->getValue('displayMode');
|
||||
|
||||
my $hasTermsRegex=_make_regex($self->getValue('hasTerms'));
|
||||
|
||||
# Format of cache has changed several times
|
||||
my $key=join(':', 'aggregate3', $displayMode,$hasTermsRegex,$maxHeadlines,$self->getRssUrl);
|
||||
my $cache = WebGUI::Cache->new($self->session,$key, 'RSS');
|
||||
my $cached = $cache->get;
|
||||
my ($items, @rss_feeds);
|
||||
|
||||
if ($cached) {
|
||||
$items = $cached->[0];
|
||||
@rss_feeds = @{$cached->[1]};
|
||||
} else {
|
||||
$items = [];
|
||||
for my $url (@{$urls}) {
|
||||
my $rss_info=_get_rss_data($self->session,$url);
|
||||
push(@rss_feeds, $rss_info) if(defined $rss_info);
|
||||
}
|
||||
|
||||
# deal with the fact that we may never get valid data
|
||||
if (scalar(@rss_feeds) < 1) {
|
||||
return ({}, []);
|
||||
my $feed = XML::FeedPP::Atom->new();
|
||||
my $log = $self->session->log;
|
||||
|
||||
# build one feed out of many
|
||||
foreach my $url (split("\n", $self->get('rssUrl'))) {
|
||||
$log->info("Processing FEED: ".$url);
|
||||
$url =~ s/^feed:/http:/;
|
||||
if ($self->get('processMacroInRssUrl')) {
|
||||
WebGUI::Macro::process($self->session, \$url);
|
||||
}
|
||||
|
||||
#Sort feeds in order by channel title.
|
||||
#@rss_feeds=sort{$a->{channel}->{title} cmp $b->{channel}->{title}} @rss_feeds;
|
||||
|
||||
if ($displayMode eq 'grouped') {
|
||||
_create_grouped_items($items,\@rss_feeds,$maxHeadlines,$hasTermsRegex);
|
||||
} else {
|
||||
_create_interleaved_items($items,\@rss_feeds,$maxHeadlines,$hasTermsRegex);
|
||||
my $cache = WebGUI::Cache->new($self->session, $url, "RSS");
|
||||
my $value = $cache->setByHTTP($url, $self->get("cacheTimeout"));
|
||||
eval { $feed->merge($value) };
|
||||
if (my $e = WebGUI::Error->caught()) {
|
||||
$log->error("Syndicated Content asset (".$self->getId.") has a bad feed URL (".$url."). Failed with ".$e->message);
|
||||
}
|
||||
|
||||
#@{$items} = sort { $b->{date} <=> $a->{date} } @{$items};
|
||||
|
||||
$cache->set([$items, \@rss_feeds], 3600);
|
||||
}
|
||||
|
||||
#So return the item loop and the first RSS feed, because
|
||||
#when we're parsing a single feed we can use that feed's title and
|
||||
#description for channel.title, channel.link, and channel.description
|
||||
return ($items,\@rss_feeds);
|
||||
|
||||
# build a new feed that matches the term the user is interested in
|
||||
if ($self->get('hasTerms') ne '') {
|
||||
my @terms = split /,\s*/, $self->get('hasTerms'); # get the list of terms
|
||||
my $termRegex = join("|", map quotemeta($_), @terms); # turn the terms into a regex string
|
||||
my @items = $feed->match_item(title=>qr/$termRegex/msi, description=>qr/$termRegex/msi);
|
||||
$feed->clear_item;
|
||||
foreach my $item (@items) {
|
||||
$feed->add_item($item);
|
||||
}
|
||||
}
|
||||
|
||||
# sort them by date
|
||||
$feed->sort_item();
|
||||
|
||||
# limit the feed to the maxium number of headlines
|
||||
$feed->limit_item($self->get('maxHeadlines'));
|
||||
return $feed;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 getTemplateVariables
|
||||
|
||||
Returns a hash reference of template variables.
|
||||
|
||||
=head3 feed
|
||||
|
||||
A reference to an XML::FeedPP object.
|
||||
|
||||
=cut
|
||||
|
||||
sub getTemplateVariables {
|
||||
my ($self, $feed) = @_;
|
||||
my @items = $feed->get_item;
|
||||
my %var;
|
||||
$var{channel_title} = WebGUI::HTML::filter($feed->title, 'javascript');
|
||||
$var{channel_description} = WebGUI::HTML::filter($feed->description, 'javascript');
|
||||
$var{channel_date} = WebGUI::HTML::filter($feed->pubDate, 'javascript');
|
||||
$var{channel_copyright} = WebGUI::HTML::filter($feed->copyright, 'javascript');
|
||||
$var{channel_link} = WebGUI::HTML::filter($feed->link, 'javascript');
|
||||
my @image = $feed->image;
|
||||
$var{channel_image_url} = WebGUI::HTML::filter($image[0], 'javascript');
|
||||
$var{channel_image_title} = WebGUI::HTML::filter($image[1], 'javascript');
|
||||
$var{channel_image_link} = WebGUI::HTML::filter($image[2], 'javascript');
|
||||
$var{channel_image_description} = WebGUI::HTML::filter($image[3], 'javascript');
|
||||
$var{channel_image_width} = WebGUI::HTML::filter($image[4], 'javascript');
|
||||
$var{channel_image_height} = WebGUI::HTML::filter($image[5], 'javascript');
|
||||
foreach my $object (@items) {
|
||||
my %item;
|
||||
$item{title} = WebGUI::HTML::filter($object->title, 'javascript');
|
||||
$item{date} = WebGUI::HTML::filter($object->pubDate, 'javascript');
|
||||
$item{category} = WebGUI::HTML::filter($object->category, 'javascript');
|
||||
$item{author} = WebGUI::HTML::filter($object->author, 'javascript');
|
||||
$item{guid} = WebGUI::HTML::filter($object->guid, 'javascript');
|
||||
$item{link} = WebGUI::HTML::filter($object->link, 'javascript');
|
||||
$item{description} = WebGUI::HTML::filter($object->description, 'javascript');
|
||||
$item{descriptionFirst100words} = $item{description};
|
||||
$item{descriptionFirst100words} =~ s/(((\S+)\s+){100}).*/$1/s;
|
||||
$item{descriptionFirst75words} = $item{descriptionFirst100words};
|
||||
$item{descriptionFirst75words} =~ s/(((\S+)\s+){75}).*/$1/s;
|
||||
$item{descriptionFirst50words} = $item{descriptionFirst75words};
|
||||
$item{descriptionFirst50words} =~ s/(((\S+)\s+){50}).*/$1/s;
|
||||
$item{descriptionFirst25words} = $item{descriptionFirst50words};
|
||||
$item{descriptionFirst25words} =~ s/(((\S+)\s+){25}).*/$1/s;
|
||||
$item{descriptionFirst10words} = $item{descriptionFirst25words};
|
||||
$item{descriptionFirst10words} =~ s/(((\S+)\s+){10}).*/$1/s;
|
||||
$item{descriptionFirst2paragraphs} = $item{description};
|
||||
$item{descriptionFirst2paragraphs} =~ s/^((.*?\n){2}).*/$1/s;
|
||||
$item{descriptionFirstParagraph} = $item{descriptionFirst2paragraphs};
|
||||
$item{descriptionFirstParagraph} =~ s/^(.*?\n).*/$1/s;
|
||||
$item{descriptionFirst4sentences} = $item{description};
|
||||
$item{descriptionFirst4sentences} =~ s/^((.*?\.){4}).*/$1/s;
|
||||
$item{descriptionFirst3sentences} = $item{descriptionFirst4sentences};
|
||||
$item{descriptionFirst3sentences} =~ s/^((.*?\.){3}).*/$1/s;
|
||||
$item{descriptionFirst2sentences} = $item{descriptionFirst3sentences};
|
||||
$item{descriptionFirst2sentences} =~ s/^((.*?\.){2}).*/$1/s;
|
||||
$item{descriptionFirstSentence} = $item{descriptionFirst2sentences};
|
||||
$item{descriptionFirstSentence} =~ s/^(.*?\.).*/$1/s;
|
||||
push @{$var{item_loop}}, \%item;
|
||||
}
|
||||
return \%var;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
|
|
@ -593,11 +281,12 @@ sub prepareView {
|
|||
my $template = WebGUI::Asset::Template->new($self->session, $self->get("templateId"));
|
||||
$template->prepare($self->getMetaDataAsTemplateVariables);
|
||||
$self->{_viewTemplate} = $template;
|
||||
my $i18n = WebGUI::International->new($self->session,'Asset_SyndicatedContent');
|
||||
my $rssFeedSuffix=$i18n->get('RSS Feed Title Suffix');
|
||||
my $title = $self->get("title")." ".$rssFeedSuffix;
|
||||
my $title = $self->get("title");
|
||||
$title =~ s/\"/"/g;
|
||||
$self->session->style->setLink($self->getUrl("func=viewRSS20"), { rel=>'alternate', type=>'application/rss+xml', title=>$title });
|
||||
my $style = $self->session->style;
|
||||
$style->setLink($self->getUrl("func=viewRss"), { rel=>'alternate', type=>'application/rss+xml', title=>$title.' (RSS)' });
|
||||
$style->setLink($self->getUrl("func=viewRdf"), { rel=>'alternate', type=>'application/rdf+xml', title=>$title.' (RDF)' });
|
||||
$style->setLink($self->getUrl("func=viewAtom"), { rel=>'alternate', type=>'application/atom+xml', title=>$title.' (Atom)' });
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -625,6 +314,22 @@ Returns the rendered output of the wobject.
|
|||
|
||||
sub view {
|
||||
my $self = shift;
|
||||
|
||||
# try the cached version
|
||||
my $cache = WebGUI::Cache->new($self->session,"view_".$self->getId);
|
||||
my $out = $cache->get;
|
||||
return $out if ($out ne "");
|
||||
|
||||
# generate from scratch
|
||||
my $feed = $self->generateFeed;
|
||||
$out = $self->processTemplate($self->getTemplateVariables($feed),undef,$self->{_viewTemplate});
|
||||
if (!$self->session->var->isAdminOn && $self->get("cacheTimeout") > 10) {
|
||||
$cache->set($out,$self->get("cacheTimeout"));
|
||||
}
|
||||
return $out;
|
||||
|
||||
|
||||
|
||||
my $rssFlavor = shift;
|
||||
if ($rssFlavor eq "" && !$self->session->var->isAdminOn && $self->get("cacheTimeout") > 10) {
|
||||
my $out = WebGUI::Cache->new($self->session,"view_".$self->getId)->get;
|
||||
|
|
@ -688,48 +393,6 @@ sub view {
|
|||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 getRssUrl
|
||||
|
||||
Get the RSS URL and process macros if we're supposed to.
|
||||
|
||||
=cut
|
||||
|
||||
sub getRssUrl {
|
||||
my $self = shift;
|
||||
my $value = $self->get("rssUrl");
|
||||
WebGUI::Macro::process($self->session,\$value) if $self->get("processMacroInRssUrl");
|
||||
return $value;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 getContentLastModified ( )
|
||||
|
||||
Derive the last-modified date from the revisionDate of the object and from the dates of the RSS feeds.
|
||||
|
||||
=cut
|
||||
|
||||
sub getContentLastModified {
|
||||
# Buggo, is this too expensive? Do we really want to do this every time?
|
||||
# But how else are we supposed to get a reasonable last-modified date?
|
||||
# Maybe just approximate... ?
|
||||
my $self = shift;
|
||||
|
||||
my $maxHeadlines = $self->_getMaxHeadlines;
|
||||
my @validatedUrls = $self->_getValidatedUrls;
|
||||
my ($item_loop, $rss_feeds) = $self->_get_items(\@validatedUrls, $maxHeadlines);
|
||||
my $mtime = $self->get("revisionDate");
|
||||
|
||||
foreach my $rss (@$rss_feeds) {
|
||||
next unless defined $rss->{last_modified};
|
||||
$mtime = $rss->{last_modified} if $rss->{last_modified} > $mtime;
|
||||
}
|
||||
|
||||
return $mtime;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_view ( )
|
||||
|
||||
See WebGUI::Asset::Wobject::www_view() for details.
|
||||
|
|
@ -742,61 +405,113 @@ sub www_view {
|
|||
$self->SUPER::www_view(@_);
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS090 ( )
|
||||
=head2 www_viewAtom ( )
|
||||
|
||||
Emit an RSS 0.9 feed.
|
||||
Emit an Atom 0.3 feed.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS090 {
|
||||
my $self=shift;
|
||||
return $self->view('0.9');
|
||||
sub www_viewAtom {
|
||||
my $self = shift;
|
||||
my $feed = $self->generateFeed;
|
||||
my $atom = XML::FeedPP::Atom->new;
|
||||
$atom->merge($feed);
|
||||
$self->session->http->setMimeType('application/atom+xml');
|
||||
return $atom->to_string;
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS091 ( )
|
||||
=head2 www_viewRdf ( )
|
||||
|
||||
Emit an RSS 0.91 feed.
|
||||
Emit an RSS 1.0 / RDF feed.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS091 {
|
||||
my $self=shift;
|
||||
return $self->view('0.91');
|
||||
sub www_viewRdf {
|
||||
my $self = shift;
|
||||
my $feed = $self->generateFeed;
|
||||
my $rdf = XML::FeedPP::RDF->new;
|
||||
$rdf->merge($feed);
|
||||
$self->session->http->setMimeType('application/rdf+xml');
|
||||
return $rdf->to_string;
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS10 ( )
|
||||
|
||||
Emit an RSS 1.0 feed.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS10 {
|
||||
my $self=shift;
|
||||
return $self->view('1.0');
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS20 ( )
|
||||
=head2 www_viewRss ( )
|
||||
|
||||
Emit an RSS 2.0 feed.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS20 {
|
||||
my $self=shift;
|
||||
return $self->view('2.0');
|
||||
sub www_viewRss {
|
||||
my $self = shift;
|
||||
my $feed = $self->generateFeed;
|
||||
my $rss = XML::FeedPP::RSS->new;
|
||||
$rss->merge($feed);
|
||||
$self->session->http->setMimeType('application/rss+xml');
|
||||
return $rss->to_string;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS090 ( )
|
||||
|
||||
Deprecated. Use www_viewRss() instead.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS10 {
|
||||
my $self = shift;
|
||||
return $self->www_viewRdf;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS091 ( )
|
||||
|
||||
Deprecated. Use www_viewRss() instead.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS10 {
|
||||
my $self = shift;
|
||||
return $self->www_viewRdf;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS10 ( )
|
||||
|
||||
Deprecated. Use www_viewRdf() instead.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS10 {
|
||||
my $self = shift;
|
||||
return $self->www_viewRdf;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS20 ( )
|
||||
|
||||
Deprecated. Use www_viewRss() instead.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS10 {
|
||||
my $self = shift;
|
||||
return $self->www_viewRdf;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
1;
|
||||
|
||||
|
|
|
|||
|
|
@ -17,24 +17,27 @@ our $HELP = {
|
|||
},
|
||||
],
|
||||
variables => [
|
||||
{ 'name' => 'channel.title' },
|
||||
{ 'name' => 'channel.description' },
|
||||
{ 'name' => 'channel.link' },
|
||||
{ 'name' => 'rss.url',
|
||||
'variables' => [
|
||||
{ 'name' => 'rss.url.0.9' },
|
||||
{ 'name' => 'rss.url.0.91' },
|
||||
{ 'name' => 'rss.url.1.0' },
|
||||
{ 'name' => 'rss.url.2.0' }
|
||||
]
|
||||
},
|
||||
{ 'name' => 'channel_title' },
|
||||
{ 'name' => 'channel_description' },
|
||||
{ 'name' => 'channel_link' },
|
||||
{ 'name' => 'channel_date' },
|
||||
{ 'name' => 'channel_copyright' },
|
||||
{ 'name' => 'channel_image_url' },
|
||||
{ 'name' => 'channel_image_title' },
|
||||
{ 'name' => 'channel_image_link' },
|
||||
{ 'name' => 'channel_image_description' },
|
||||
{ 'name' => 'channel_image_width' },
|
||||
{ 'name' => 'channel_image_height' },
|
||||
{ 'name' => 'rss_url' },
|
||||
{ 'name' => 'rdf_url' },
|
||||
{ 'name' => 'atom_url' },
|
||||
{ 'name' => 'item_loop',
|
||||
'variables' => [
|
||||
{ 'name' => 'site_title' },
|
||||
{ 'name' => 'site_link' },
|
||||
{ 'name' => 'new_rss_site' },
|
||||
{ 'name' => 'title' },
|
||||
{ 'name' => 'link' },
|
||||
{ 'name' => 'category' },
|
||||
{ 'name' => 'author' },
|
||||
{ 'name' => 'guid' },
|
||||
{ 'name' => 'description' },
|
||||
{ 'name' => 'descriptionFirst100words' },
|
||||
{ 'name' => 'descriptionFirst75words' },
|
||||
|
|
@ -73,7 +76,6 @@ our $HELP = {
|
|||
{ 'name' => 'rssUrl' },
|
||||
{ 'name' => 'processMacrosInRssUrl' },
|
||||
{ 'name' => 'maxHeadlines' },
|
||||
{ 'name' => 'displayMode' },
|
||||
{ 'name' => 'hasTerms' },
|
||||
],
|
||||
related => [],
|
||||
|
|
|
|||
|
|
@ -76,65 +76,42 @@ sub execute {
|
|||
$self->session->errorHandler->error("Could not instanciate Workflow Instance in GetSyndicatedContent Activity");
|
||||
return $self->ERROR;
|
||||
}
|
||||
|
||||
my $log = $self->session->log;
|
||||
# start time to check for timeouts
|
||||
my $time = time();
|
||||
my $ttl = $self->getTTL;
|
||||
|
||||
my @syndicatedUrls = @{$self->getSyndicatedUrls($instance)};
|
||||
while (my $url = shift(@syndicatedUrls)) {
|
||||
my $assets = JSON->new->decode($instance->getScratch("syndicatedassets") || '[]');
|
||||
if (scalar @$assets < 1) {
|
||||
$assets = $self->session->db->buildArrayRef("select assetId from asset where className like 'WebGUI::Asset::Wobject::SyndicatedContent'");
|
||||
}
|
||||
while (my $id = shift(@{$assets})) {
|
||||
# Get RSS data, which will be stored in the cache
|
||||
$self->session->errorHandler->info("GetSyndicatedContent workflow: Caching $url");
|
||||
my $returnValue = WebGUI::Asset::Wobject::SyndicatedContent::_get_rss_data($self->session, $url);
|
||||
if (!defined $returnValue) {
|
||||
$self->session->errorHandler->warn("GetSyndicatedContent Workflow Activity: _get_rss_data returned undef while trying to process syndicated content url $url, which usually indicates an improper URL, or a malformed document");
|
||||
next;
|
||||
}
|
||||
$log->info("GetSyndicatedContent: Caching for $id");
|
||||
my $asset = WebGUI::Asset::Wobject::SyndicatedContent->new($self->session, $id);
|
||||
if (defined $asset) {
|
||||
my $feed = $asset->generateFeed;
|
||||
unless ($feed->isa('XML::FeedPP')) {
|
||||
$log->error("GetSyndicatedContent: Syndicated Content Asset $id returned an invalid feed");
|
||||
}
|
||||
}
|
||||
else {
|
||||
$log->error("GetSyndicatedContent: Couldn't instanciate $id")
|
||||
}
|
||||
# Check for timeout
|
||||
last
|
||||
if (time() - $time > $ttl);
|
||||
last if (time() - $time > $ttl);
|
||||
}
|
||||
|
||||
# if there are urls left, we need to process again
|
||||
if (scalar(@syndicatedUrls) > 0) {
|
||||
$instance->setScratch("syndicatedUrls", JSON::encode_json(\@syndicatedUrls));
|
||||
if (scalar(@$assets) > 0) {
|
||||
$instance->setScratch("syndicatedassets", JSON->new->encode($assets));
|
||||
return $self->WAITING;
|
||||
}
|
||||
$instance->deleteScratch("syndicatedUrls");
|
||||
$instance->deleteScratch("syndicatedassets");
|
||||
return $self->COMPLETE;
|
||||
}
|
||||
|
||||
#---------------------------------------------------------------------
|
||||
|
||||
=head2 getWobjectUrls ( )
|
||||
|
||||
Returns URLs from all of the Syndicated Content Wobjects from scratch or fetches them from the db if needed
|
||||
|
||||
=head3 session
|
||||
|
||||
A reference to the current webgui session
|
||||
|
||||
=cut
|
||||
|
||||
sub getSyndicatedUrls {
|
||||
my $self = shift;
|
||||
my $instance = shift;
|
||||
my $syndicatedUrls = $instance->getScratch("syndicatedUrls");
|
||||
if ($syndicatedUrls) {
|
||||
return JSON::decode_json($syndicatedUrls);
|
||||
}
|
||||
|
||||
my $urls = [];
|
||||
my $assets = WebGUI::Asset->getRoot($self->session)->getLineage(['descendants'], {
|
||||
includeOnlyClasses => ['WebGUI::Asset::Wobject::SyndicatedContent'],
|
||||
returnObjects => 1,
|
||||
});
|
||||
foreach my $asset (@$assets) {
|
||||
push @$urls, split(/\s+/, $asset->getRssUrl);
|
||||
}
|
||||
$instance->setScratch("syndicatedUrls", JSON::encode_json($urls));
|
||||
return $urls;
|
||||
}
|
||||
|
||||
|
||||
1;
|
||||
|
|
|
|||
|
|
@ -46,44 +46,89 @@ our $I18N = {
|
|||
message => q|Edit Syndicated Content|
|
||||
},
|
||||
|
||||
'channel.title' => {
|
||||
message => q|The title of this piece of syndicated content. This will be the same as the title of the Syndicated Content object when you're creating an aggregate feed.|,
|
||||
lastUpdated => 1149567508,
|
||||
'channel_title' => {
|
||||
message => q|The title of this piece of syndicated content. This variable will be populated by the first feed in a multi-feed list.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'channel.description' => {
|
||||
message => q|A description of the content available through this channel. This will be the same as the description of the Syndicated Content object when you're creating an aggregate feed.|,
|
||||
lastUpdated => 1149567508,
|
||||
'channel_description' => {
|
||||
message => q|A description of the content available through this channel. This variable will be populated by the first feed in a multi-feed list.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'channel.link' => {
|
||||
message => q|A URL back to the originating site of this channel. This variable *will not* exist when you're creating an aggregate feed, because there's no single channel to link to.|,
|
||||
lastUpdated => 1149567508,
|
||||
'channel_link' => {
|
||||
message => q|A URL back to the originating site of this channel. This variable will be populated by the first feed in a multi-feed list.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'rss.url' => {
|
||||
message => q|This is the URL to use to get the contents of this Syndicated Content wobject as an RSS 2.0 feed. Additionally, you can specify RSS versions via the following template variables:|,
|
||||
lastUpdated => 1149567508,
|
||||
'channel_date' => {
|
||||
message => q|The date this channel was updated. This variable will be populated by the first feed in a multi-feed list.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'rss.url.0.9' => {
|
||||
message => q|The contents of this wobject as an RSS 0.9 feed.|,
|
||||
lastUpdated => 1149567508,
|
||||
'channel_copyright' => {
|
||||
message => q|Copyright holder information. This variable will be populated by the first feed in a multi-feed list.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'rss.url.0.91' => {
|
||||
message => q|The contents of this wobject as an RSS 0.91 feed.|,
|
||||
lastUpdated => 1149567508,
|
||||
'channel_image_url' => {
|
||||
message => q|The URL of the image attached to this feed. This variable will be populated by the first feed in a multi-feed list.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'rss.url.1.0' => {
|
||||
message => q|The contents of this wobject as an RSS 1.0 feed.|,
|
||||
lastUpdated => 1149567508,
|
||||
'channel_image_title' => {
|
||||
message => q|The title of the image attached to this feed. This variable will be populated by the first feed in a multi-feed list.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'rss.url.2.0' => {
|
||||
message => q|The contents of this wobject as an RSS 2.0 feed.|,
|
||||
lastUpdated => 1149567508,
|
||||
'channel_image_description' => {
|
||||
message => q|The description of the image attached to this feed. This variable will be populated by the first feed in a multi-feed list.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'channel_image_link' => {
|
||||
message => q|The URL of the link that should wrap this feed's image. This variable will be populated by the first feed in a multi-feed list.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'channel_image_width' => {
|
||||
message => q|The width in pixels of this feed's image. This variable will be populated by the first feed in a multi-feed list.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'channel_image_height' => {
|
||||
message => q|The height in pixels of this feed's image. This variable will be populated by the first feed in a multi-feed list.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'rss_url' => {
|
||||
message => q|This is the URL to use to get the contents of this Syndicated Content asset as an RSS 2.0 feed. Additionally, you can specify RSS versions via the following template variables:|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'rdf_url' => {
|
||||
message => q|The contents of this asset as an RDF/RSS 1.0 feed.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'atom_url' => {
|
||||
message => q|The contents of this asset as an Atom 0.3 feed.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'category' => {
|
||||
message => q|A category this item belongs to.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'author' => {
|
||||
message => q|The publisher of this item.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'guid' => {
|
||||
message => q|A unique id for this item.|,
|
||||
lastUpdated => 0,
|
||||
},
|
||||
|
||||
'item_loop' => {
|
||||
|
|
@ -91,21 +136,6 @@ our $I18N = {
|
|||
lastUpdated => 1149567508,
|
||||
},
|
||||
|
||||
'site_title' => {
|
||||
message => q|The title of the RSS feed this item comes from|,
|
||||
lastUpdated => 1149567508,
|
||||
},
|
||||
|
||||
'site_link' => {
|
||||
message => q|Link to the source RSS feed.|,
|
||||
lastUpdated => 1149567508,
|
||||
},
|
||||
|
||||
'new_rss_site' => {
|
||||
message => q|A "boolean" variable (suitable for using in a <tmpl_if> tag) that indicates we've started outputting items from a source RSS feed different than the previous item. This is most useful when you're viewing feeds in "grouped" mode- it gives you a hook to output <b>site_title</b> and <b>site_link</b> at the right time.|,
|
||||
lastUpdated => 1149567508,
|
||||
},
|
||||
|
||||
'title' => {
|
||||
message => q|The title of a piece of content. If you're filtering on terms, this field will be inspected.|,
|
||||
lastUpdated => 1149567508,
|
||||
|
|
@ -126,51 +156,21 @@ our $I18N = {
|
|||
message => q|Syndicated Content Template|
|
||||
},
|
||||
|
||||
'displayModeLabel' => {
|
||||
lastUpdated => 1047855526,
|
||||
message => q|Display Mode|
|
||||
},
|
||||
|
||||
'displayModeSubtext' => {
|
||||
lastUpdated => 1047855526,
|
||||
message => q|<p>"Interleaved" means items from all feeds are lumped together, "Grouped by Feed" means items are grouped by the feed they came from. Either setting is fine if you're only bringing in a single feed.</p>|
|
||||
},
|
||||
|
||||
'grouped' => {
|
||||
lastUpdated => 1047855526,
|
||||
message => q|Grouped by Feed|
|
||||
},
|
||||
|
||||
'hasTermsLabel' => {
|
||||
lastUpdated => 1047855526,
|
||||
message => q|With any of these terms|
|
||||
},
|
||||
|
||||
'interleaved' => {
|
||||
lastUpdated => 1047855526,
|
||||
message => q|Interleaved|
|
||||
},
|
||||
|
||||
'rssTabName' => {
|
||||
lastUpdated => 1118417024,
|
||||
message => q|RSS|
|
||||
},
|
||||
|
||||
'RSS Feed Title Suffix' => {
|
||||
lastUpdated => 1118417024,
|
||||
message => q|RSS 2.0 Feed|
|
||||
},
|
||||
|
||||
'72 description' => {
|
||||
message => q|Select a template for this content.|,
|
||||
lastUpdated => 1119977659,
|
||||
},
|
||||
|
||||
'displayModeLabel description' => {
|
||||
message => q|<p>If you're aggregating feeds, you can change the mode in which the items are displayed. "Grouped by Feed" means the items will be grouped together by the feeds they come from. "Interleaved" means the items will be mixed together in a "round-robin" fashion from all the feeds. If you're grouping your feeds, please look at <b>new_rss_site</b> "item_loop" template variables, it gives you a hook allowing you to output the feed title</p>|,
|
||||
lastUpdated => 1146799950,
|
||||
},
|
||||
|
||||
'hasTermsLabel description' => {
|
||||
message => q|<p>Enter terms (separated by commas) that you'd like to filter the feeds on. For instance, if you enter:</p>
|
||||
<div class="helpIndent"><b>linux, windows development, blogs</b></div>
|
||||
|
|
@ -192,10 +192,10 @@ our $I18N = {
|
|||
<li><a href="http://w.moreover.com/">http://w.moreover.com/</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<p>Currently, WebGUI can handle RSS versions .90, .91, 1.0, and 2.0. Atom feeds aren't supported for now. Probably other RSS-ish files would work too.
|
||||
<p>Currently, WebGUI can handle RSS versions .90, .91, 1.0, and 2.0; Atom .3 and 1.0. Probably other RSS-ish files would work too.
|
||||
</p>
|
||||
<p>To create an aggregate RSS feed (one that pulls information from multiple RSS feeds), include a list of URLs, one on each line, instead of a single URL. Items will be sorted by the date WebGUI first received the story.</p>|,
|
||||
lastUpdated => 1168228049,
|
||||
lastUpdated => 1225928949,
|
||||
},
|
||||
|
||||
'3 description' => {
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -90,7 +90,7 @@ checkModule("HTML::Highlight", 0.20 );
|
|||
checkModule("HTML::TagFilter", 0.07 );
|
||||
checkModule("HTML::Template", 2.9 );
|
||||
checkModule("HTML::Template::Expr", 0.05, 2 );
|
||||
checkModule("XML::RSSLite", 0.11 );
|
||||
checkModule("XML::FeedPP", 0.36 );
|
||||
checkModule("JSON", 2.04 );
|
||||
checkModule("Config::JSON", "1.1.2" );
|
||||
checkModule("Text::CSV_XS", "0.52" );
|
||||
|
|
|
|||
|
|
@ -20,8 +20,9 @@ use Data::Dumper;
|
|||
|
||||
use WebGUI::Test;
|
||||
use WebGUI::Session;
|
||||
use Test::More tests => 20; # increment this value for each test you create
|
||||
use Test::More tests => 19; # increment this value for each test you create
|
||||
use WebGUI::Asset::Wobject::SyndicatedContent;
|
||||
use XML::FeedPP;
|
||||
|
||||
my $session = WebGUI::Test->session;
|
||||
my %var;
|
||||
|
|
@ -49,9 +50,7 @@ isa_ok($syndicated_content, 'WebGUI::Asset::Wobject::SyndicatedContent');
|
|||
my $newSyndicatedContentSettings = {
|
||||
cacheTimeout => 124,
|
||||
templateId => "PBtmpl0000000000000065",
|
||||
#rssUrl => "http://morningmonologue.wordpress.com/feed/", # broken
|
||||
#rssUrl => "http://motivationalmuse.wordpress.com/feed/", #working feed
|
||||
rssUrl => 'https://svn.webgui.org/svnweb/plainblack/rss/WebGUI/',
|
||||
rssUrl => 'http://svn.webgui.org/svnweb/plainblack/rss/WebGUI/',
|
||||
};
|
||||
|
||||
# update the new values for this instance
|
||||
|
|
@ -62,21 +61,9 @@ foreach my $newSetting (keys %{$newSyndicatedContentSettings}) {
|
|||
is ($syndicated_content->get($newSetting), $newSyndicatedContentSettings->{$newSetting}, "updated $newSetting is ".$newSyndicatedContentSettings->{$newSetting});
|
||||
}
|
||||
|
||||
# Can we get the rss url?
|
||||
ok($syndicated_content->getRssUrl, 'getRSSUrl returns something.');
|
||||
|
||||
# test getContentLastModified
|
||||
ok($syndicated_content->getContentLastModified, 'getContentLastModified returns something.');
|
||||
|
||||
# Test max headlines parsed from feed
|
||||
my $max_headlines = $syndicated_content->_getMaxHeadlines;
|
||||
ok($syndicated_content->_getMaxHeadlines, "Max Headlines returned a value [$max_headlines]");
|
||||
|
||||
# Limit the headlines so the test will complete in a reasonable amount of time.
|
||||
# default is 100K titles, which is way too much for a test
|
||||
$syndicated_content->{maxHeadlines} = "3";
|
||||
my @validated_urls = $syndicated_content->_getValidatedUrls;
|
||||
ok($syndicated_content->_getValidatedUrls, "Validated Urls returned a value [@validated_urls]");
|
||||
my $feed = $syndicated_content->generateFeed;
|
||||
isa_ok($feed, 'XML::FeedPP', 'Got an XML::FeedPP object');
|
||||
isnt($feed->title,'', 'the feed has data');
|
||||
|
||||
# Lets make sure the view method returns something.
|
||||
is ($syndicated_content->{_viewTemplate}, undef, 'internal template cache unset until prepareView is called');
|
||||
|
|
@ -85,33 +72,19 @@ $syndicated_content->prepareView;
|
|||
isnt ($syndicated_content->{_viewTemplate}, undef, 'internal template cache set by prepare view');
|
||||
isa_ok ($syndicated_content->{_viewTemplate}, 'WebGUI::Asset::Template', 'internal template cache');
|
||||
|
||||
my $output = $syndicated_content->view('2.0');
|
||||
isnt ($output, "", 'Default view method returns something for RSS 2.0 format');
|
||||
ok($syndicated_content->view(), 'it generates some output');
|
||||
|
||||
my $output = $syndicated_content->view('1.0');
|
||||
isnt ($output, "", 'Default view method returns something for RSS 1.0 format');
|
||||
my $output = $syndicated_content->www_viewRss;
|
||||
my $feed = XML::FeedPP->new($output);
|
||||
cmp_ok($feed->get_item, ">", 0, 'RSS has items');
|
||||
|
||||
# Not really sure what this does...
|
||||
#my $hasTerms = $syndicated_content->getValue('hasTerms');
|
||||
#ok($hasTerms, "hasTerms contains a value [$hasTerms]");
|
||||
my $output = $syndicated_content->www_viewRdf;
|
||||
my $feed = XML::FeedPP->new($output);
|
||||
cmp_ok($feed->get_item, ">", 0, 'RDF has items');
|
||||
|
||||
my $hasTermsRegex = $syndicated_content->_make_regex( $syndicated_content->getValue('hasTerms') );
|
||||
|
||||
my $rss_info = WebGUI::Asset::Wobject::SyndicatedContent::_get_rss_data($session,$newSyndicatedContentSettings->{'rssUrl'});
|
||||
ok(ref($rss_info) eq 'HASH', "Hashref returned from _get_rss_data");
|
||||
push(@rss_feeds, $rss_info);
|
||||
|
||||
|
||||
my $items = [];
|
||||
WebGUI::Asset::Wobject::SyndicatedContent::_create_interleaved_items($items, \@rss_feeds , $max_headlines, $hasTermsRegex);
|
||||
ok($items , "Got results back from XML" );
|
||||
|
||||
my($item_loop,$rss_feeds) = $syndicated_content->_get_items(\@validated_urls, $max_headlines);
|
||||
ok(ref($item_loop) eq 'ARRAY',"Arrayref of items returned from _get_items" );
|
||||
ok(ref($rss_feeds) eq 'ARRAY',"Arrayref of feeds returned from _get_items" );
|
||||
|
||||
# update var with item_loop for the upcoming template processing
|
||||
$var{item_loop} = $item_loop;
|
||||
my $output = $syndicated_content->www_viewAtom;
|
||||
my $feed = XML::FeedPP->new($output);
|
||||
cmp_ok($feed->get_item, ">", 0, 'Atom has items');
|
||||
|
||||
# create a new template object in preparation for rendering
|
||||
my $template = WebGUI::Asset::Template->new($session, $syndicated_content->get("templateId"));
|
||||
|
|
@ -120,12 +93,17 @@ isa_ok($template, 'WebGUI::Asset::Template');
|
|||
|
||||
$syndicated_content->{_viewTemplate} = $template;
|
||||
|
||||
# Is a WebGUI URL created for the RSS feed?
|
||||
my $url = $syndicated_content->_createRSSURLs(\%var);
|
||||
ok($url,"A URL was created for RSS feed");
|
||||
# check out the template vars
|
||||
|
||||
my $var = $syndicated_content->getTemplateVariables($feed);
|
||||
|
||||
isnt($var->{channel_description}, '', 'got a channel description');
|
||||
isnt($var->{channel_title}, '', 'got a channel title');
|
||||
isnt($var->{channel_link}, '', 'got a channel link');
|
||||
cmp_ok(scalar(@{$var->{item_loop}}), '>', 0, 'the item loop has items');
|
||||
|
||||
# processTemplate, this is where we run into trouble...
|
||||
my $processed_template = eval {$syndicated_content->processTemplate(\%var,undef,$template) };
|
||||
my $processed_template = eval {$syndicated_content->processTemplate($var,undef,$template) };
|
||||
ok($processed_template, "A response was received from processTemplate.");
|
||||
|
||||
END {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue