- The Syndicated Content asset was rewritten, and now uses 35% less memory and is 400% faster.
This commit is contained in:
parent
d34eadc40e
commit
ef6aedc862
12 changed files with 393 additions and 2180 deletions
|
|
@ -14,16 +14,13 @@ use strict;
|
|||
use HTML::Entities;
|
||||
use Tie::IxHash;
|
||||
use WebGUI::Cache;
|
||||
use WebGUI::Exception;
|
||||
use WebGUI::HTML;
|
||||
use WebGUI::International;
|
||||
use WebGUI::Asset::Wobject;
|
||||
use base 'WebGUI::Asset::Wobject';
|
||||
use WebGUI::Macro;
|
||||
use XML::RSSLite;
|
||||
use XML::RSS::Creator;
|
||||
use LWP::UserAgent;
|
||||
use Encode;
|
||||
use XML::FeedPP;
|
||||
|
||||
our @ISA = qw(WebGUI::Asset::Wobject);
|
||||
|
||||
=head1 NAME
|
||||
|
||||
|
|
@ -31,7 +28,7 @@ Package WebGUI::Asset::Wobject::SyndicatedContent
|
|||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Displays items and channels from RSS feeds.
|
||||
Displays items and channels from RSS/Atom/RDF feeds.
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
|
|
@ -43,55 +40,6 @@ These methods are available from this class:
|
|||
|
||||
=cut
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _constructRSS {
|
||||
my($self,$rssObject,$var)=@_;
|
||||
#They've chosen to emit this as an RSS feed, in one of the four flavors we support.
|
||||
$rssObject->channel(
|
||||
title=>$var->{'channel.title'} || $self->get('title'),
|
||||
link=>$self->session->url->page('',1),
|
||||
description=>$var->{'channel.description'} || ''
|
||||
);
|
||||
foreach my $item (@{$var->{item_loop}}) {
|
||||
# I know this seems kludgy, but because XML::RSSLite parses
|
||||
# feeds loosely, sometimes it returns a data structure when it shouldn't.
|
||||
# So we're only pushing in attributes when they AREN'T a reference to
|
||||
# a data structure.
|
||||
my %attributes;
|
||||
foreach my $attribute(keys %$item){
|
||||
$attributes{$attribute}=$item->{$attribute} if (! ref($item->{$attribute}));
|
||||
}
|
||||
$rssObject->add_item(%attributes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _createRSSURLs {
|
||||
my $self=shift;
|
||||
my $var=shift;
|
||||
foreach({ver=>'1.0',param=>'10'},{ver=>'0.9',param=>'090'},{ver=>'0.91',param=>'091'},{ver=>'2.0',param=>'20'}){
|
||||
$var->{'rss.url.'.$_->{ver}}=$self->getUrl('func=viewRSS'.$_->{param});
|
||||
}
|
||||
$var->{'rss.url'}=$self->getUrl('func=viewRSS20');
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _getMaxHeadlines {
|
||||
my $self = shift;
|
||||
return $self->get('maxHeadlines') || 1000000;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _getValidatedUrls {
|
||||
my $self = shift;
|
||||
my @urls = split(/\s+/,$self->getRssUrl);
|
||||
my @validatedUrls = ();
|
||||
foreach my $url (@urls) {
|
||||
push(@validatedUrls, $url) if ($url =~ m/^http/);
|
||||
}
|
||||
return @validatedUrls
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
|
|
@ -129,6 +77,7 @@ sub appendChoppedDescriptionTemplateVars {
|
|||
$item->{"descriptionFirstSentence"} =~ s/^(.*?\.).*/$1/s;
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 definition ( definition )
|
||||
|
|
@ -179,26 +128,13 @@ sub definition {
|
|||
label=>$i18n->get('process macros in rss url'),
|
||||
hoverHelp=>$i18n->get('process macros in rss url description'),
|
||||
},
|
||||
maxHeadlines=>{
|
||||
tab=>"properties",
|
||||
maxHeadlines=>{
|
||||
tab=>"display",
|
||||
fieldType=>'integer',
|
||||
defaultValue=>10,
|
||||
label=>$i18n->get(3),
|
||||
hoverHelp=>$i18n->get('3 description')
|
||||
},
|
||||
displayMode=>{
|
||||
tab=>"display",
|
||||
fieldType=>'selectBox',
|
||||
defaultValue=>'interleaved',
|
||||
options=>{
|
||||
'interleaved'=>$i18n->get('interleaved'),
|
||||
'grouped'=>$i18n->get('grouped'),
|
||||
},
|
||||
sortByValue=>1,
|
||||
label=>$i18n->get('displayModeLabel'),
|
||||
hoverHelp=>$i18n->get('displayModeLabel description'),
|
||||
subtext=>$i18n->get('displayModeSubtext')
|
||||
},
|
||||
hasTerms=>{
|
||||
tab=>"properties",
|
||||
fieldType=>'text',
|
||||
|
|
@ -221,363 +157,115 @@ sub definition {
|
|||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# strip all html tags from the given data structure. This is important to
|
||||
# prevent cross site scripting attacks
|
||||
|
||||
sub _strip_html {
|
||||
unless (ref $_[0]) {
|
||||
return $_[0] = WebGUI::HTML::filter($_[0], 'all');
|
||||
}
|
||||
my $ref = shift;
|
||||
if (ref $ref eq 'HASH') {
|
||||
if (exists $ref->{description}) {
|
||||
$ref->{description} = HTML::Entities::decode_entities($ref->{description});
|
||||
}
|
||||
foreach my $value (values %$ref) {
|
||||
_strip_html($value);
|
||||
}
|
||||
}
|
||||
elsif (ref $ref eq 'ARRAY') {
|
||||
foreach my $value (@$ref) {
|
||||
_strip_html($value);
|
||||
}
|
||||
}
|
||||
return $ref;
|
||||
}
|
||||
=head2 generateFeed ()
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# horrible kludge to find the channel or item record
|
||||
# in the varying kinds of rss structures returned by RSSLite
|
||||
Combines all feeds into a single XML::FeedPP object.
|
||||
|
||||
sub _find_record {
|
||||
my ($data, $regex) = @_;
|
||||
=cut
|
||||
|
||||
if (ref($data) eq 'HASH') {
|
||||
# reset the hash before calling each()
|
||||
keys(%{$data});
|
||||
while (my ($name, $val) = each(%{$data})) {
|
||||
if ($name =~ $_[1]) {
|
||||
if ((((ref($val) eq 'HASH') &&
|
||||
($val->{link} || $val->{title} ||
|
||||
$val->{description})) ||
|
||||
((ref($val) eq 'ARRAY') && @{$val} &&
|
||||
(ref($val->[0]) eq 'HASH') &&
|
||||
($val->[0]->{link} ||
|
||||
$val->[0]->{title} ||
|
||||
$val->[0]->{description})))) {
|
||||
return $val;
|
||||
}
|
||||
}
|
||||
if (my $record = _find_record($val, $regex)) {
|
||||
return $record;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return undef;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# First, get rid of things we don't want.
|
||||
# Copy the guid field to the link field if the guid looks like a link.
|
||||
# This is a kludge that gets around the fact that some folks use the link
|
||||
# field as the link to the story while others use it as the link
|
||||
# to the story about which the story is written. The webuig templates seem
|
||||
# to assume the former, so we should use the guid instead of the link, b/c
|
||||
# the guid, if it is a link, always means the former.
|
||||
# Also copy the first few words of the description into the title if
|
||||
# there is no title
|
||||
|
||||
sub _normalize_items {
|
||||
#my ($items) = @_;
|
||||
|
||||
# max number of words to take from description to fill in an empty
|
||||
# title
|
||||
my $max_words = 10;
|
||||
|
||||
for my $item (@{$_[0]}) {
|
||||
# Get rid of any keys in the items that we do not want
|
||||
my @wantedKeys = qw( title link description pubDate );
|
||||
%{ $item } = map { $_ => $item->{ $_ } } @wantedKeys;
|
||||
|
||||
if ($item->{guid} && ($item->{guid} =~ /^http:\/\//i)) {
|
||||
$item->{link} = $item->{guid};
|
||||
}
|
||||
if (!$item->{title}) {
|
||||
my @description_words = split(/\s/, $item->{description});
|
||||
if (@description_words <= $max_words) {
|
||||
$item->{title} = $item->{description};
|
||||
} else {
|
||||
$item->{title} = join(' ', @description_words[0..$max_words-1]) .
|
||||
' ...';
|
||||
}
|
||||
}
|
||||
|
||||
# IE doesn't recognize '
|
||||
$item->{title} =~ s/'/\'/g;
|
||||
$item->{description} =~ s/'/\'/g;
|
||||
$item->{category} = [$item->{category}]
|
||||
if ref $item->{category} ne 'ARRAY';
|
||||
appendChoppedDescriptionTemplateVars($item);
|
||||
}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _get_rss_data {
|
||||
my $session = shift;
|
||||
my $url = shift;
|
||||
# format of cache was changed, differentiate
|
||||
my $cache = WebGUI::Cache->new($session,'url2:' . $url, 'RSS');
|
||||
my $rss = $cache->get;
|
||||
if ($rss) {
|
||||
if ($rss->{error}) {
|
||||
return undef;
|
||||
}
|
||||
return $rss;
|
||||
}
|
||||
else {
|
||||
my $ua = LWP::UserAgent->new(timeout => 5);
|
||||
$ua->env_proxy;
|
||||
my $response = $ua->get($url);
|
||||
if (!$response->is_success()) {
|
||||
$session->errorHandler->warn("Error retrieving url '$url': " .
|
||||
$response->status_line());
|
||||
$cache->set({'error' => 1, 'error_status' => $response->status_line}, 3600);
|
||||
return undef;
|
||||
}
|
||||
my $xmlEncoding;
|
||||
if ($response->content =~ /<\?xml.*?encoding=['"](\S+)['"]/i) {
|
||||
$xmlEncoding = $1;
|
||||
}
|
||||
|
||||
my $xml = $response->decoded_content($xmlEncoding ? (charset => $xmlEncoding) : ());
|
||||
|
||||
# Approximate with current time if we don't have a Last-Modified
|
||||
# header coming from the RSS source.
|
||||
my $http_lm = $response->last_modified;
|
||||
my $last_modified = defined($http_lm)? $http_lm : time;
|
||||
|
||||
# XML::RSSLite does not handle <![CDATA[ ]]> so:
|
||||
$xml =~ s/<!\[CDATA\[(.*?)\]\]>/HTML::Entities::encode_entities($1)/esg;
|
||||
|
||||
my $rss_lite = {};
|
||||
eval {
|
||||
XML::RSSLite::parseXML($rss_lite, \$xml);
|
||||
};
|
||||
if ($@) {
|
||||
$session->errorHandler->warn("error parsing rss for url $url :".$@);
|
||||
#Returning undef on a parse failure is a change from previous behaviour,
|
||||
#but it SHOULDN'T have a major effect.
|
||||
return undef;
|
||||
}
|
||||
|
||||
# make sure that the {channel} points to the channel
|
||||
# description record and that {items} points to the list
|
||||
# of items. without this voodoo, different versions of
|
||||
# rss return the data in different places in the data
|
||||
# structure.
|
||||
|
||||
$rss_lite = {channel => $rss_lite};
|
||||
$rss = {};
|
||||
if (!($rss->{channel} =
|
||||
_find_record($rss_lite, qr/^channel$/))) {
|
||||
$session->errorHandler->warn("unable to find channel info for url $url");
|
||||
}
|
||||
if (!($rss->{items} = _find_record($rss_lite, qr/^items?$/))) {
|
||||
$session->errorHandler->warn("unable to find item info for url $url");
|
||||
$rss->{items} = [];
|
||||
}
|
||||
|
||||
_strip_html($rss);
|
||||
$rss->{items} = [ $rss->{items} ] unless (ref $rss->{items} eq 'ARRAY');
|
||||
|
||||
_normalize_items($rss->{items});
|
||||
#Assign dates "globally" rather than when seen in a viewed feed.
|
||||
#This is important because we can "filter" now and want to ensure we keep order
|
||||
#correctly as new items appear.
|
||||
_assign_rss_dates($session, $rss->{items});
|
||||
|
||||
# Store last-modified date as well.
|
||||
$rss->{last_modified} = $last_modified;
|
||||
|
||||
#Default to an hour timeout
|
||||
$cache->set($rss, 3600);
|
||||
}
|
||||
|
||||
return $rss;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# rss items don't have a standard date, so timestamp them the first time
|
||||
# we see them and use that timestamp as the date. Periodically nuke the
|
||||
# whole database to keep the thing from growing too large
|
||||
|
||||
sub _assign_rss_dates {
|
||||
my $session = shift;
|
||||
my ($items) = @_;
|
||||
|
||||
for my $item (@{$items}) {
|
||||
my $key = 'dates:' . ($item->{guid} || $item->{title} ||
|
||||
$item->{description} || $item->{link});
|
||||
my $cache = WebGUI::Cache->new($session,$key, 'RSS');
|
||||
if (my $date = $cache->get()) {
|
||||
$item->{date} = $date;
|
||||
}
|
||||
else {
|
||||
my $pubDate;
|
||||
if ($item->{pubDate}) {
|
||||
$pubDate = $session->datetime->mailToEpoch($item->{pubDate});
|
||||
}
|
||||
$item->{date} = $pubDate || $session->datetime->time() - (60 * 60 * 24 * 365); # handicap the undated
|
||||
$cache->set($item->{date}, '1 year');
|
||||
}
|
||||
}
|
||||
@{$items} = sort { $b->{date} <=> $a->{date} } @{$items};
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# $items is the hashref to put items into.
|
||||
# $rss_feeds is an arrayref of all the feeds in this wobject
|
||||
# The only difference between an "interleaved" feed and a grouped feed
|
||||
# is the order the items are output.
|
||||
|
||||
sub _create_grouped_items{
|
||||
my($items,$rss_feeds,$maxHeadlines,$hasTermsRegex)=@_;
|
||||
|
||||
_create_interleaved_items($items,$rss_feeds,$maxHeadlines,$hasTermsRegex);
|
||||
|
||||
@$items=sort{$a->{'site_title'} cmp $b->{'site_title'}} @$items;
|
||||
|
||||
#Loop through the items and output the "site_
|
||||
my $siteTitleTracker;
|
||||
foreach (@$items) {
|
||||
if ($siteTitleTracker ne $_->{site_title}) {
|
||||
$_->{new_rss_site} = 1;
|
||||
}
|
||||
$siteTitleTracker = $_->{site_title};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# Loop through the feeds for this wobject
|
||||
# and push in the items in "interleaved mode"
|
||||
# No need to return because we're doing everything by reference.
|
||||
|
||||
sub _create_interleaved_items {
|
||||
my ($items, $rss_feeds, $maxHeadlines, $hasTermsRegex) = @_;
|
||||
# put all items together into a single list
|
||||
foreach my $rss (@$rss_feeds) {
|
||||
while (my $item = shift @{$rss->{items}}) {
|
||||
if ($hasTermsRegex && ! _check_hasTerms($item, $hasTermsRegex)) {
|
||||
next;
|
||||
}
|
||||
$item->{site_title} = $rss->{channel}->{title};
|
||||
$item->{site_link} = $rss->{channel}->{link};
|
||||
push @$items, $item;
|
||||
}
|
||||
}
|
||||
@$items = sort { $b->{date} <=> $a->{date} } @$items;
|
||||
# limit to $maxHeadlines
|
||||
if (@$items > $maxHeadlines) {
|
||||
splice @$items, $maxHeadlines;
|
||||
}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# Uses the regex constructed in _get_items (with the terms defaulting to OR)
|
||||
# to see if the title or description associated with this item match the kinds
|
||||
# of items we're looking for.
|
||||
#
|
||||
|
||||
sub _check_hasTerms{
|
||||
my($item,$hasTermsRegex)=@_;
|
||||
my $to_check=$item->{title}.$item->{description};
|
||||
if ($to_check =~ /$hasTermsRegex/gism) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
sub _make_regex{
|
||||
my $terms = shift;
|
||||
my @terms = split(/,/,$terms);
|
||||
return join('|',@terms);
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
# So- We're going to manage an "aggregate cache" that represents
|
||||
# the rendering of the cumulative feeds in a Syndicated Wobject,
|
||||
# but let each feed "fend for itself" based on URL in the cache.
|
||||
#
|
||||
# This means we can set up the hourly task to get and cache each
|
||||
# individual feed WITHOUT having to re-request (undoubtedly the slowest
|
||||
# part of every RSS parsing action is the network traffic) each feed
|
||||
# when we re-render each aggregrate representation.
|
||||
#
|
||||
# If, however, a feed expires between hourly tasks, it will be re-requested and
|
||||
# parsed per the usual. BUT, if a feed ever goes un-requested for more than an hour,
|
||||
# then it's retrieval schedule will be taken over by the hourly task, and we'll
|
||||
# be pre-seeding the RSS object cache automatically.
|
||||
#
|
||||
# Having the caching set up this way means we can re-use the same raw feed all over the site without
|
||||
# having each wobject request it separately, ASSUMING the URL is the same.
|
||||
#
|
||||
# All the values that may have an effect on the composition of items
|
||||
# are included in the cache key for the aggregate representation.
|
||||
|
||||
sub _get_items {
|
||||
sub generateFeed {
|
||||
my $self = shift;
|
||||
my $urls = shift;
|
||||
my $maxHeadlines = shift || $self->getValue('maxHeadlines');
|
||||
my $displayMode=$self->getValue('displayMode');
|
||||
|
||||
my $hasTermsRegex=_make_regex($self->getValue('hasTerms'));
|
||||
|
||||
# Format of cache has changed several times
|
||||
my $key=join(':', 'aggregate3', $displayMode,$hasTermsRegex,$maxHeadlines,$self->getRssUrl);
|
||||
my $cache = WebGUI::Cache->new($self->session,$key, 'RSS');
|
||||
my $cached = $cache->get;
|
||||
my ($items, @rss_feeds);
|
||||
|
||||
if ($cached) {
|
||||
$items = $cached->[0];
|
||||
@rss_feeds = @{$cached->[1]};
|
||||
} else {
|
||||
$items = [];
|
||||
for my $url (@{$urls}) {
|
||||
my $rss_info=_get_rss_data($self->session,$url);
|
||||
push(@rss_feeds, $rss_info) if(defined $rss_info);
|
||||
}
|
||||
|
||||
# deal with the fact that we may never get valid data
|
||||
if (scalar(@rss_feeds) < 1) {
|
||||
return ({}, []);
|
||||
my $feed = XML::FeedPP::Atom->new();
|
||||
my $log = $self->session->log;
|
||||
|
||||
# build one feed out of many
|
||||
foreach my $url (split("\n", $self->get('rssUrl'))) {
|
||||
$log->info("Processing FEED: ".$url);
|
||||
$url =~ s/^feed:/http:/;
|
||||
if ($self->get('processMacroInRssUrl')) {
|
||||
WebGUI::Macro::process($self->session, \$url);
|
||||
}
|
||||
|
||||
#Sort feeds in order by channel title.
|
||||
#@rss_feeds=sort{$a->{channel}->{title} cmp $b->{channel}->{title}} @rss_feeds;
|
||||
|
||||
if ($displayMode eq 'grouped') {
|
||||
_create_grouped_items($items,\@rss_feeds,$maxHeadlines,$hasTermsRegex);
|
||||
} else {
|
||||
_create_interleaved_items($items,\@rss_feeds,$maxHeadlines,$hasTermsRegex);
|
||||
my $cache = WebGUI::Cache->new($self->session, $url, "RSS");
|
||||
my $value = $cache->setByHTTP($url, $self->get("cacheTimeout"));
|
||||
eval { $feed->merge($value) };
|
||||
if (my $e = WebGUI::Error->caught()) {
|
||||
$log->error("Syndicated Content asset (".$self->getId.") has a bad feed URL (".$url."). Failed with ".$e->message);
|
||||
}
|
||||
|
||||
#@{$items} = sort { $b->{date} <=> $a->{date} } @{$items};
|
||||
|
||||
$cache->set([$items, \@rss_feeds], 3600);
|
||||
}
|
||||
|
||||
#So return the item loop and the first RSS feed, because
|
||||
#when we're parsing a single feed we can use that feed's title and
|
||||
#description for channel.title, channel.link, and channel.description
|
||||
return ($items,\@rss_feeds);
|
||||
|
||||
# build a new feed that matches the term the user is interested in
|
||||
if ($self->get('hasTerms') ne '') {
|
||||
my @terms = split /,\s*/, $self->get('hasTerms'); # get the list of terms
|
||||
my $termRegex = join("|", map quotemeta($_), @terms); # turn the terms into a regex string
|
||||
my @items = $feed->match_item(title=>qr/$termRegex/msi, description=>qr/$termRegex/msi);
|
||||
$feed->clear_item;
|
||||
foreach my $item (@items) {
|
||||
$feed->add_item($item);
|
||||
}
|
||||
}
|
||||
|
||||
# sort them by date
|
||||
$feed->sort_item();
|
||||
|
||||
# limit the feed to the maxium number of headlines
|
||||
$feed->limit_item($self->get('maxHeadlines'));
|
||||
return $feed;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 getTemplateVariables
|
||||
|
||||
Returns a hash reference of template variables.
|
||||
|
||||
=head3 feed
|
||||
|
||||
A reference to an XML::FeedPP object.
|
||||
|
||||
=cut
|
||||
|
||||
sub getTemplateVariables {
|
||||
my ($self, $feed) = @_;
|
||||
my @items = $feed->get_item;
|
||||
my %var;
|
||||
$var{channel_title} = WebGUI::HTML::filter($feed->title, 'javascript');
|
||||
$var{channel_description} = WebGUI::HTML::filter($feed->description, 'javascript');
|
||||
$var{channel_date} = WebGUI::HTML::filter($feed->pubDate, 'javascript');
|
||||
$var{channel_copyright} = WebGUI::HTML::filter($feed->copyright, 'javascript');
|
||||
$var{channel_link} = WebGUI::HTML::filter($feed->link, 'javascript');
|
||||
my @image = $feed->image;
|
||||
$var{channel_image_url} = WebGUI::HTML::filter($image[0], 'javascript');
|
||||
$var{channel_image_title} = WebGUI::HTML::filter($image[1], 'javascript');
|
||||
$var{channel_image_link} = WebGUI::HTML::filter($image[2], 'javascript');
|
||||
$var{channel_image_description} = WebGUI::HTML::filter($image[3], 'javascript');
|
||||
$var{channel_image_width} = WebGUI::HTML::filter($image[4], 'javascript');
|
||||
$var{channel_image_height} = WebGUI::HTML::filter($image[5], 'javascript');
|
||||
foreach my $object (@items) {
|
||||
my %item;
|
||||
$item{title} = WebGUI::HTML::filter($object->title, 'javascript');
|
||||
$item{date} = WebGUI::HTML::filter($object->pubDate, 'javascript');
|
||||
$item{category} = WebGUI::HTML::filter($object->category, 'javascript');
|
||||
$item{author} = WebGUI::HTML::filter($object->author, 'javascript');
|
||||
$item{guid} = WebGUI::HTML::filter($object->guid, 'javascript');
|
||||
$item{link} = WebGUI::HTML::filter($object->link, 'javascript');
|
||||
$item{description} = WebGUI::HTML::filter($object->description, 'javascript');
|
||||
$item{descriptionFirst100words} = $item{description};
|
||||
$item{descriptionFirst100words} =~ s/(((\S+)\s+){100}).*/$1/s;
|
||||
$item{descriptionFirst75words} = $item{descriptionFirst100words};
|
||||
$item{descriptionFirst75words} =~ s/(((\S+)\s+){75}).*/$1/s;
|
||||
$item{descriptionFirst50words} = $item{descriptionFirst75words};
|
||||
$item{descriptionFirst50words} =~ s/(((\S+)\s+){50}).*/$1/s;
|
||||
$item{descriptionFirst25words} = $item{descriptionFirst50words};
|
||||
$item{descriptionFirst25words} =~ s/(((\S+)\s+){25}).*/$1/s;
|
||||
$item{descriptionFirst10words} = $item{descriptionFirst25words};
|
||||
$item{descriptionFirst10words} =~ s/(((\S+)\s+){10}).*/$1/s;
|
||||
$item{descriptionFirst2paragraphs} = $item{description};
|
||||
$item{descriptionFirst2paragraphs} =~ s/^((.*?\n){2}).*/$1/s;
|
||||
$item{descriptionFirstParagraph} = $item{descriptionFirst2paragraphs};
|
||||
$item{descriptionFirstParagraph} =~ s/^(.*?\n).*/$1/s;
|
||||
$item{descriptionFirst4sentences} = $item{description};
|
||||
$item{descriptionFirst4sentences} =~ s/^((.*?\.){4}).*/$1/s;
|
||||
$item{descriptionFirst3sentences} = $item{descriptionFirst4sentences};
|
||||
$item{descriptionFirst3sentences} =~ s/^((.*?\.){3}).*/$1/s;
|
||||
$item{descriptionFirst2sentences} = $item{descriptionFirst3sentences};
|
||||
$item{descriptionFirst2sentences} =~ s/^((.*?\.){2}).*/$1/s;
|
||||
$item{descriptionFirstSentence} = $item{descriptionFirst2sentences};
|
||||
$item{descriptionFirstSentence} =~ s/^(.*?\.).*/$1/s;
|
||||
push @{$var{item_loop}}, \%item;
|
||||
}
|
||||
return \%var;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
|
|
@ -593,11 +281,12 @@ sub prepareView {
|
|||
my $template = WebGUI::Asset::Template->new($self->session, $self->get("templateId"));
|
||||
$template->prepare($self->getMetaDataAsTemplateVariables);
|
||||
$self->{_viewTemplate} = $template;
|
||||
my $i18n = WebGUI::International->new($self->session,'Asset_SyndicatedContent');
|
||||
my $rssFeedSuffix=$i18n->get('RSS Feed Title Suffix');
|
||||
my $title = $self->get("title")." ".$rssFeedSuffix;
|
||||
my $title = $self->get("title");
|
||||
$title =~ s/\"/"/g;
|
||||
$self->session->style->setLink($self->getUrl("func=viewRSS20"), { rel=>'alternate', type=>'application/rss+xml', title=>$title });
|
||||
my $style = $self->session->style;
|
||||
$style->setLink($self->getUrl("func=viewRss"), { rel=>'alternate', type=>'application/rss+xml', title=>$title.' (RSS)' });
|
||||
$style->setLink($self->getUrl("func=viewRdf"), { rel=>'alternate', type=>'application/rdf+xml', title=>$title.' (RDF)' });
|
||||
$style->setLink($self->getUrl("func=viewAtom"), { rel=>'alternate', type=>'application/atom+xml', title=>$title.' (Atom)' });
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -625,6 +314,22 @@ Returns the rendered output of the wobject.
|
|||
|
||||
sub view {
|
||||
my $self = shift;
|
||||
|
||||
# try the cached version
|
||||
my $cache = WebGUI::Cache->new($self->session,"view_".$self->getId);
|
||||
my $out = $cache->get;
|
||||
return $out if ($out ne "");
|
||||
|
||||
# generate from scratch
|
||||
my $feed = $self->generateFeed;
|
||||
$out = $self->processTemplate($self->getTemplateVariables($feed),undef,$self->{_viewTemplate});
|
||||
if (!$self->session->var->isAdminOn && $self->get("cacheTimeout") > 10) {
|
||||
$cache->set($out,$self->get("cacheTimeout"));
|
||||
}
|
||||
return $out;
|
||||
|
||||
|
||||
|
||||
my $rssFlavor = shift;
|
||||
if ($rssFlavor eq "" && !$self->session->var->isAdminOn && $self->get("cacheTimeout") > 10) {
|
||||
my $out = WebGUI::Cache->new($self->session,"view_".$self->getId)->get;
|
||||
|
|
@ -688,48 +393,6 @@ sub view {
|
|||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 getRssUrl
|
||||
|
||||
Get the RSS URL and process macros if we're supposed to.
|
||||
|
||||
=cut
|
||||
|
||||
sub getRssUrl {
|
||||
my $self = shift;
|
||||
my $value = $self->get("rssUrl");
|
||||
WebGUI::Macro::process($self->session,\$value) if $self->get("processMacroInRssUrl");
|
||||
return $value;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 getContentLastModified ( )
|
||||
|
||||
Derive the last-modified date from the revisionDate of the object and from the dates of the RSS feeds.
|
||||
|
||||
=cut
|
||||
|
||||
sub getContentLastModified {
|
||||
# Buggo, is this too expensive? Do we really want to do this every time?
|
||||
# But how else are we supposed to get a reasonable last-modified date?
|
||||
# Maybe just approximate... ?
|
||||
my $self = shift;
|
||||
|
||||
my $maxHeadlines = $self->_getMaxHeadlines;
|
||||
my @validatedUrls = $self->_getValidatedUrls;
|
||||
my ($item_loop, $rss_feeds) = $self->_get_items(\@validatedUrls, $maxHeadlines);
|
||||
my $mtime = $self->get("revisionDate");
|
||||
|
||||
foreach my $rss (@$rss_feeds) {
|
||||
next unless defined $rss->{last_modified};
|
||||
$mtime = $rss->{last_modified} if $rss->{last_modified} > $mtime;
|
||||
}
|
||||
|
||||
return $mtime;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_view ( )
|
||||
|
||||
See WebGUI::Asset::Wobject::www_view() for details.
|
||||
|
|
@ -742,61 +405,113 @@ sub www_view {
|
|||
$self->SUPER::www_view(@_);
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS090 ( )
|
||||
=head2 www_viewAtom ( )
|
||||
|
||||
Emit an RSS 0.9 feed.
|
||||
Emit an Atom 0.3 feed.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS090 {
|
||||
my $self=shift;
|
||||
return $self->view('0.9');
|
||||
sub www_viewAtom {
|
||||
my $self = shift;
|
||||
my $feed = $self->generateFeed;
|
||||
my $atom = XML::FeedPP::Atom->new;
|
||||
$atom->merge($feed);
|
||||
$self->session->http->setMimeType('application/atom+xml');
|
||||
return $atom->to_string;
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS091 ( )
|
||||
=head2 www_viewRdf ( )
|
||||
|
||||
Emit an RSS 0.91 feed.
|
||||
Emit an RSS 1.0 / RDF feed.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS091 {
|
||||
my $self=shift;
|
||||
return $self->view('0.91');
|
||||
sub www_viewRdf {
|
||||
my $self = shift;
|
||||
my $feed = $self->generateFeed;
|
||||
my $rdf = XML::FeedPP::RDF->new;
|
||||
$rdf->merge($feed);
|
||||
$self->session->http->setMimeType('application/rdf+xml');
|
||||
return $rdf->to_string;
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS10 ( )
|
||||
|
||||
Emit an RSS 1.0 feed.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS10 {
|
||||
my $self=shift;
|
||||
return $self->view('1.0');
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS20 ( )
|
||||
=head2 www_viewRss ( )
|
||||
|
||||
Emit an RSS 2.0 feed.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS20 {
|
||||
my $self=shift;
|
||||
return $self->view('2.0');
|
||||
sub www_viewRss {
|
||||
my $self = shift;
|
||||
my $feed = $self->generateFeed;
|
||||
my $rss = XML::FeedPP::RSS->new;
|
||||
$rss->merge($feed);
|
||||
$self->session->http->setMimeType('application/rss+xml');
|
||||
return $rss->to_string;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS090 ( )
|
||||
|
||||
Deprecated. Use www_viewRss() instead.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS10 {
|
||||
my $self = shift;
|
||||
return $self->www_viewRdf;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS091 ( )
|
||||
|
||||
Deprecated. Use www_viewRss() instead.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS10 {
|
||||
my $self = shift;
|
||||
return $self->www_viewRdf;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS10 ( )
|
||||
|
||||
Deprecated. Use www_viewRdf() instead.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS10 {
|
||||
my $self = shift;
|
||||
return $self->www_viewRdf;
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------
|
||||
|
||||
=head2 www_viewRSS20 ( )
|
||||
|
||||
Deprecated. Use www_viewRss() instead.
|
||||
|
||||
=cut
|
||||
|
||||
sub www_viewRSS10 {
|
||||
my $self = shift;
|
||||
return $self->www_viewRdf;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
1;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue