From ef6aedc86295cc6282c3fb79ae9eed41cdda8180 Mon Sep 17 00:00:00 2001 From: JT Smith Date: Thu, 6 Nov 2008 02:32:10 +0000 Subject: [PATCH] - The Syndicated Content asset was rewritten, and now uses 35% less memory and is 400% faster. --- docs/changelog/7.x.x.txt | 3 + docs/gotcha.txt | 11 + .../default_syndicated_content.wgpkg | Bin 0 -> 1007 bytes .../packages-7.6.3/syndicated_articles.wgpkg | Bin 0 -> 1039 bytes docs/upgrades/upgrade_7.6.2-7.6.3.pl | 42 +- lib/WebGUI/Asset/Wobject/SyndicatedContent.pm | 703 +++----- lib/WebGUI/Help/Asset_SyndicatedContent.pm | 32 +- .../Workflow/Activity/GetSyndicatedContent.pm | 65 +- .../i18n/English/Asset_SyndicatedContent.pm | 142 +- lib/XML/RSS/Creator.pm | 1501 ----------------- sbin/testEnvironment.pl | 2 +- t/Asset/Wobject/SyndicatedContent.t | 72 +- 12 files changed, 393 insertions(+), 2180 deletions(-) create mode 100644 docs/upgrades/packages-7.6.3/default_syndicated_content.wgpkg create mode 100644 docs/upgrades/packages-7.6.3/syndicated_articles.wgpkg delete mode 100644 lib/XML/RSS/Creator.pm diff --git a/docs/changelog/7.x.x.txt b/docs/changelog/7.x.x.txt index 65e7d0875..586d52759 100644 --- a/docs/changelog/7.x.x.txt +++ b/docs/changelog/7.x.x.txt @@ -1,5 +1,8 @@ 7.6.3 - fixed #8989: Missing profile field "showOnline" for the UsersOnline macro. + - Added DataTable to WebGUI.conf.original + - The Syndicated Content asset was rewritten, and now uses 35% less memory + and is 400% faster. - fixed #9025: Testing function of UsersOnline macro fails. 7.6.2 diff --git a/docs/gotcha.txt b/docs/gotcha.txt index 5f6061fe9..df780f6ab 100644 --- a/docs/gotcha.txt +++ b/docs/gotcha.txt @@ -7,6 +7,17 @@ upgrading from one version to the next, or even between multiple versions. Be sure to heed the warnings contained herein as they will save you many hours of grief. +7.6.3 +-------------------------------------------------------------------- + * WebGUI now requires XML::FeedPP version 0.36 or greater. + + * The Syndicated Content asset has been heavily modified. Your templates + and settings should automatically migrate, but in less than 1% of cases + there will be problems that the migration cannot handle. Check your + Syndicated Content assets after upgrade to ensure they are still + working as expected. + + 7.6.1 -------------------------------------------------------------------- * WebGUI now requires Class::C3 version 0.19 or greater. diff --git a/docs/upgrades/packages-7.6.3/default_syndicated_content.wgpkg b/docs/upgrades/packages-7.6.3/default_syndicated_content.wgpkg new file mode 100644 index 0000000000000000000000000000000000000000..18edf8ff0bbd1e34071ea5767031d6c87950285f GIT binary patch literal 1007 zcmVN!csO7U8f?JY2Yc<6|ebxH~*Z-3Va&}#> z`V7oh*Xw+hjJ{H$0ND?q-|*ho|G*CiJN*xWo^RQ}=HZ`1^}loY)L&zV?{DA`I#Mx# z7TB>z_P<|#GfP}Bj)fvv>Q&>WJtKjU!tvQguR1-)IBP?Sq`G+GOn~*&#Lz5Fw`Mp# z%HTUZyHAN+;<$>X`0*2=F?%$iZe@@g%D5zLIQB@fxRY4i#Jh>$CneA|M)4Fa+JQ() ztcsw?nOgHSANcaKA6C0(A~oPKPEeMrIb;))NS8m47*&|6%9qG%633XD0I$A25lk_) z;d%A7;ueBs{3gDLiOQq=DokO>tYmDAY>L+SkC{W_u}*pZfP`ICax!yl5qwV ztfgNAQ^qpx!lKFL1t=^dLAZhpOQdbpaofi2?sJIcUPab+v_zERbe_?C}Gf6Aym)wYoL_`JYKmgZa;n3OSY6PUHR z{+1h3QW8^=RwHYsU=~|+UbUpp+U7N)v+yG}Oi&i?gaTO8y4wg%9cT5GTyCs|JtLTZb4Tpo_ zFdX=`p%D0iXjMHgb=&j(aM*LbK~30+oS&3a>*WV=p_^DoTpkpwD?PoqMV?r z-K}n88i@^8lRfYXEs9%D{?|+X$y)v9 zhe0y;{=QK%KNlLDcz)0uhW%l`+7X%zXRJt%T_sQWTPRm@b2gtu=u}>;=N}#Yo8-rv dAFuI~=ZJA+;K;y{fg=OJz)$g6NUH!6008Ft`#S&t literal 0 HcmV?d00001 diff --git a/docs/upgrades/packages-7.6.3/syndicated_articles.wgpkg b/docs/upgrades/packages-7.6.3/syndicated_articles.wgpkg new file mode 100644 index 0000000000000000000000000000000000000000..9135203a62dd0720393300ebe9e853d817f6e64a GIT binary patch literal 1039 zcmV+q1n~PGiwFP!000001MOB@Z{j!<=6QZal;>TVga8A}BGFDSjM^@p(ydm@3Yo+d z7h^}ZDKM+~?{nhZ41w-Tk@kT|G}Pz#`1`(dG1bcPAIEW;^?IcezZ=Qh@%&1~${(lV zx~}84+07y8$RS4-uv?Jdv$Lo|Gr;yDi)}D{O4HtuO1)zN1X8QCul-jDlX6q zwyn1H_dmbrrAY7y3q`PuSG8O083>FN4$n97s$H|Sb8MhUs>^l61z3A}4_i{2o1uM_ zj&DcH2_L81u1^#_xldz+}Fn&YAN-1;)sAVB{-E3B+~ZInLeIO%OOJ| z0r6%?6vbJrnP&u3+R{0LMUCa1P*?_nL<$gQk#g3dVe3TJ*`Zk3DJfnnm?6e69aF;Q zSx{un1fGI)2IDO^+kl@1$Y><0COa{eIp-Yd_J%U$T#2pBWV=kBvBOA}L7G1AX`vxe z&LfLJU5(>!nFJGqVa9Bd88hw7a}q3oW+GDYN6N%j_T8zOlm`@{k&05hJ*i3DZA+>R zjeB7V#rdlw-*MY&MeE;2cJ~`}yLHC}u2!pg*{jm|2J(VM>XmC#v?5_MEQop%^h)r> z013>Jel=Pdu;FxY+QlWBT&USqT0{(BNXmQYXx;YDcyj&JZ?}8#NNTr-S?A}YMbFF& zD9o3A16OBF2YSrslhrHDr*zo-NpLbIc%c{E|oWP@as|b2&vR@T&K}0&_zhVeAk8pO10ZJ!@JwEZZjx{ zbIOBxym{*uBJviNGc|WiU+@$yX(`2K7BrextJ!MRo1USEz;7g;t6#Fo?Rs9lRdYP| zb3us60Se}MwG;+9db; + my $templates = $db->read("select distinct assetId from template where namespace='SyndicatedContent'"); + while (my ($id) = $templates->array) { + my $asset = WebGUI::Asset::Template->new($session, $id); + if (defined $asset) { + if ($asset->getId eq "DPUROtmpl0000000000001") { # this one no longer applies + $asset->trash; + next; + } + my $template = $asset->get('template'); + $template =~ s{channel.title}{channel_title}xmsi; + $template =~ s{channel.description}{channel_description}xmsi; + $template =~ s{channel.link}{channel_link}xmsi; + $template =~ s{site_link}{channel_link}xmsi; + $template =~ s{site_title}{channel_title}xmsi; + $template =~ s{descriptionFull}{description}xmsi; + $template =~ s{rss.url.0.9}{rss_url}xmsi; + $template =~ s{rss.url}{rss_url}xmsi; + $template =~ s{rss.url.0.91}{rss_url}xmsi; + $template =~ s{rss.url.1.0}{rdf_url}xmsi; + $template =~ s{rss.url.2.0}{rss_url}xmsi; + $asset->addRevision({template=>$template}); + } + } + $db->write("update SyndicatedContent set templateId='PBtmpl0000000000000065' where templateId='DPUROtmpl0000000000001'"); + $db->write("alter table SyndicatedContent drop column displayMode"); + print "DONE!\n" unless $quiet; +} +#---------------------------------------------------------------------------- sub createFieldShowOnline { my $session = shift; print "\tCreating an additional profile field 'showOnline' for the UsersOnline macro... " unless $quiet; diff --git a/lib/WebGUI/Asset/Wobject/SyndicatedContent.pm b/lib/WebGUI/Asset/Wobject/SyndicatedContent.pm index 0420c7c4e..68450700b 100644 --- a/lib/WebGUI/Asset/Wobject/SyndicatedContent.pm +++ b/lib/WebGUI/Asset/Wobject/SyndicatedContent.pm @@ -14,16 +14,13 @@ use strict; use HTML::Entities; use Tie::IxHash; use WebGUI::Cache; +use WebGUI::Exception; use WebGUI::HTML; use WebGUI::International; -use WebGUI::Asset::Wobject; +use base 'WebGUI::Asset::Wobject'; use WebGUI::Macro; -use XML::RSSLite; -use XML::RSS::Creator; -use LWP::UserAgent; -use Encode; +use XML::FeedPP; -our @ISA = qw(WebGUI::Asset::Wobject); =head1 NAME @@ -31,7 +28,7 @@ Package WebGUI::Asset::Wobject::SyndicatedContent =head1 DESCRIPTION -Displays items and channels from RSS feeds. +Displays items and channels from RSS/Atom/RDF feeds. =head1 SYNOPSIS @@ -43,55 +40,6 @@ These methods are available from this class: =cut -#------------------------------------------------------------------- -sub _constructRSS { - my($self,$rssObject,$var)=@_; - #They've chosen to emit this as an RSS feed, in one of the four flavors we support. - $rssObject->channel( - title=>$var->{'channel.title'} || $self->get('title'), - link=>$self->session->url->page('',1), - description=>$var->{'channel.description'} || '' - ); - foreach my $item (@{$var->{item_loop}}) { - # I know this seems kludgy, but because XML::RSSLite parses - # feeds loosely, sometimes it returns a data structure when it shouldn't. - # So we're only pushing in attributes when they AREN'T a reference to - # a data structure. - my %attributes; - foreach my $attribute(keys %$item){ - $attributes{$attribute}=$item->{$attribute} if (! ref($item->{$attribute})); - } - $rssObject->add_item(%attributes); - } -} - - -#------------------------------------------------------------------- -sub _createRSSURLs { - my $self=shift; - my $var=shift; - foreach({ver=>'1.0',param=>'10'},{ver=>'0.9',param=>'090'},{ver=>'0.91',param=>'091'},{ver=>'2.0',param=>'20'}){ - $var->{'rss.url.'.$_->{ver}}=$self->getUrl('func=viewRSS'.$_->{param}); - } - $var->{'rss.url'}=$self->getUrl('func=viewRSS20'); -} - -#------------------------------------------------------------------- -sub _getMaxHeadlines { - my $self = shift; - return $self->get('maxHeadlines') || 1000000; -} - -#------------------------------------------------------------------- -sub _getValidatedUrls { - my $self = shift; - my @urls = split(/\s+/,$self->getRssUrl); - my @validatedUrls = (); - foreach my $url (@urls) { - push(@validatedUrls, $url) if ($url =~ m/^http/); - } - return @validatedUrls -} #------------------------------------------------------------------- @@ -129,6 +77,7 @@ sub appendChoppedDescriptionTemplateVars { $item->{"descriptionFirstSentence"} =~ s/^(.*?\.).*/$1/s; } + #------------------------------------------------------------------- =head2 definition ( definition ) @@ -179,26 +128,13 @@ sub definition { label=>$i18n->get('process macros in rss url'), hoverHelp=>$i18n->get('process macros in rss url description'), }, - maxHeadlines=>{ - tab=>"properties", + maxHeadlines=>{ + tab=>"display", fieldType=>'integer', defaultValue=>10, label=>$i18n->get(3), hoverHelp=>$i18n->get('3 description') }, - displayMode=>{ - tab=>"display", - fieldType=>'selectBox', - defaultValue=>'interleaved', - options=>{ - 'interleaved'=>$i18n->get('interleaved'), - 'grouped'=>$i18n->get('grouped'), - }, - sortByValue=>1, - label=>$i18n->get('displayModeLabel'), - hoverHelp=>$i18n->get('displayModeLabel description'), - subtext=>$i18n->get('displayModeSubtext') - }, hasTerms=>{ tab=>"properties", fieldType=>'text', @@ -221,363 +157,115 @@ sub definition { } #------------------------------------------------------------------- -# strip all html tags from the given data structure. This is important to -# prevent cross site scripting attacks -sub _strip_html { - unless (ref $_[0]) { - return $_[0] = WebGUI::HTML::filter($_[0], 'all'); - } - my $ref = shift; - if (ref $ref eq 'HASH') { - if (exists $ref->{description}) { - $ref->{description} = HTML::Entities::decode_entities($ref->{description}); - } - foreach my $value (values %$ref) { - _strip_html($value); - } - } - elsif (ref $ref eq 'ARRAY') { - foreach my $value (@$ref) { - _strip_html($value); - } - } - return $ref; -} +=head2 generateFeed () -#------------------------------------------------------------------- -# horrible kludge to find the channel or item record -# in the varying kinds of rss structures returned by RSSLite +Combines all feeds into a single XML::FeedPP object. -sub _find_record { - my ($data, $regex) = @_; +=cut - if (ref($data) eq 'HASH') { - # reset the hash before calling each() - keys(%{$data}); - while (my ($name, $val) = each(%{$data})) { - if ($name =~ $_[1]) { - if ((((ref($val) eq 'HASH') && - ($val->{link} || $val->{title} || - $val->{description})) || - ((ref($val) eq 'ARRAY') && @{$val} && - (ref($val->[0]) eq 'HASH') && - ($val->[0]->{link} || - $val->[0]->{title} || - $val->[0]->{description})))) { - return $val; - } - } - if (my $record = _find_record($val, $regex)) { - return $record; - } - } - } - - return undef; -} - -#------------------------------------------------------------------- -# First, get rid of things we don't want. -# Copy the guid field to the link field if the guid looks like a link. -# This is a kludge that gets around the fact that some folks use the link -# field as the link to the story while others use it as the link -# to the story about which the story is written. The webuig templates seem -# to assume the former, so we should use the guid instead of the link, b/c -# the guid, if it is a link, always means the former. -# Also copy the first few words of the description into the title if -# there is no title - -sub _normalize_items { - #my ($items) = @_; - - # max number of words to take from description to fill in an empty - # title - my $max_words = 10; - - for my $item (@{$_[0]}) { - # Get rid of any keys in the items that we do not want - my @wantedKeys = qw( title link description pubDate ); - %{ $item } = map { $_ => $item->{ $_ } } @wantedKeys; - - if ($item->{guid} && ($item->{guid} =~ /^http:\/\//i)) { - $item->{link} = $item->{guid}; - } - if (!$item->{title}) { - my @description_words = split(/\s/, $item->{description}); - if (@description_words <= $max_words) { - $item->{title} = $item->{description}; - } else { - $item->{title} = join(' ', @description_words[0..$max_words-1]) . - ' ...'; - } - } - - # IE doesn't recognize ' - $item->{title} =~ s/'/\'/g; - $item->{description} =~ s/'/\'/g; - $item->{category} = [$item->{category}] - if ref $item->{category} ne 'ARRAY'; - appendChoppedDescriptionTemplateVars($item); - } -} - -#------------------------------------------------------------------- -sub _get_rss_data { - my $session = shift; - my $url = shift; - # format of cache was changed, differentiate - my $cache = WebGUI::Cache->new($session,'url2:' . $url, 'RSS'); - my $rss = $cache->get; - if ($rss) { - if ($rss->{error}) { - return undef; - } - return $rss; - } - else { - my $ua = LWP::UserAgent->new(timeout => 5); - $ua->env_proxy; - my $response = $ua->get($url); - if (!$response->is_success()) { - $session->errorHandler->warn("Error retrieving url '$url': " . - $response->status_line()); - $cache->set({'error' => 1, 'error_status' => $response->status_line}, 3600); - return undef; - } - my $xmlEncoding; - if ($response->content =~ /<\?xml.*?encoding=['"](\S+)['"]/i) { - $xmlEncoding = $1; - } - - my $xml = $response->decoded_content($xmlEncoding ? (charset => $xmlEncoding) : ()); - - # Approximate with current time if we don't have a Last-Modified - # header coming from the RSS source. - my $http_lm = $response->last_modified; - my $last_modified = defined($http_lm)? $http_lm : time; - - # XML::RSSLite does not handle so: - $xml =~ s//HTML::Entities::encode_entities($1)/esg; - - my $rss_lite = {}; - eval { - XML::RSSLite::parseXML($rss_lite, \$xml); - }; - if ($@) { - $session->errorHandler->warn("error parsing rss for url $url :".$@); - #Returning undef on a parse failure is a change from previous behaviour, - #but it SHOULDN'T have a major effect. - return undef; - } - - # make sure that the {channel} points to the channel - # description record and that {items} points to the list - # of items. without this voodoo, different versions of - # rss return the data in different places in the data - # structure. - - $rss_lite = {channel => $rss_lite}; - $rss = {}; - if (!($rss->{channel} = - _find_record($rss_lite, qr/^channel$/))) { - $session->errorHandler->warn("unable to find channel info for url $url"); - } - if (!($rss->{items} = _find_record($rss_lite, qr/^items?$/))) { - $session->errorHandler->warn("unable to find item info for url $url"); - $rss->{items} = []; - } - - _strip_html($rss); - $rss->{items} = [ $rss->{items} ] unless (ref $rss->{items} eq 'ARRAY'); - - _normalize_items($rss->{items}); - #Assign dates "globally" rather than when seen in a viewed feed. - #This is important because we can "filter" now and want to ensure we keep order - #correctly as new items appear. - _assign_rss_dates($session, $rss->{items}); - - # Store last-modified date as well. - $rss->{last_modified} = $last_modified; - - #Default to an hour timeout - $cache->set($rss, 3600); - } - - return $rss; -} - -#------------------------------------------------------------------- -# rss items don't have a standard date, so timestamp them the first time -# we see them and use that timestamp as the date. Periodically nuke the -# whole database to keep the thing from growing too large - -sub _assign_rss_dates { - my $session = shift; - my ($items) = @_; - - for my $item (@{$items}) { - my $key = 'dates:' . ($item->{guid} || $item->{title} || - $item->{description} || $item->{link}); - my $cache = WebGUI::Cache->new($session,$key, 'RSS'); - if (my $date = $cache->get()) { - $item->{date} = $date; - } - else { - my $pubDate; - if ($item->{pubDate}) { - $pubDate = $session->datetime->mailToEpoch($item->{pubDate}); - } - $item->{date} = $pubDate || $session->datetime->time() - (60 * 60 * 24 * 365); # handicap the undated - $cache->set($item->{date}, '1 year'); - } - } - @{$items} = sort { $b->{date} <=> $a->{date} } @{$items}; -} - -#------------------------------------------------------------------- -# $items is the hashref to put items into. -# $rss_feeds is an arrayref of all the feeds in this wobject -# The only difference between an "interleaved" feed and a grouped feed -# is the order the items are output. - -sub _create_grouped_items{ - my($items,$rss_feeds,$maxHeadlines,$hasTermsRegex)=@_; - - _create_interleaved_items($items,$rss_feeds,$maxHeadlines,$hasTermsRegex); - - @$items=sort{$a->{'site_title'} cmp $b->{'site_title'}} @$items; - - #Loop through the items and output the "site_ - my $siteTitleTracker; - foreach (@$items) { - if ($siteTitleTracker ne $_->{site_title}) { - $_->{new_rss_site} = 1; - } - $siteTitleTracker = $_->{site_title}; - } -} - - -#------------------------------------------------------------------- -# Loop through the feeds for this wobject -# and push in the items in "interleaved mode" -# No need to return because we're doing everything by reference. - -sub _create_interleaved_items { - my ($items, $rss_feeds, $maxHeadlines, $hasTermsRegex) = @_; - # put all items together into a single list - foreach my $rss (@$rss_feeds) { - while (my $item = shift @{$rss->{items}}) { - if ($hasTermsRegex && ! _check_hasTerms($item, $hasTermsRegex)) { - next; - } - $item->{site_title} = $rss->{channel}->{title}; - $item->{site_link} = $rss->{channel}->{link}; - push @$items, $item; - } - } - @$items = sort { $b->{date} <=> $a->{date} } @$items; - # limit to $maxHeadlines - if (@$items > $maxHeadlines) { - splice @$items, $maxHeadlines; - } -} - -#------------------------------------------------------------------- -# Uses the regex constructed in _get_items (with the terms defaulting to OR) -# to see if the title or description associated with this item match the kinds -# of items we're looking for. -# - -sub _check_hasTerms{ - my($item,$hasTermsRegex)=@_; - my $to_check=$item->{title}.$item->{description}; - if ($to_check =~ /$hasTermsRegex/gism) { - return 1; - } else { - return 0; - } -} - -#------------------------------------------------------------------- -sub _make_regex{ - my $terms = shift; - my @terms = split(/,/,$terms); - return join('|',@terms); -} - - -#------------------------------------------------------------------- -# So- We're going to manage an "aggregate cache" that represents -# the rendering of the cumulative feeds in a Syndicated Wobject, -# but let each feed "fend for itself" based on URL in the cache. -# -# This means we can set up the hourly task to get and cache each -# individual feed WITHOUT having to re-request (undoubtedly the slowest -# part of every RSS parsing action is the network traffic) each feed -# when we re-render each aggregrate representation. -# -# If, however, a feed expires between hourly tasks, it will be re-requested and -# parsed per the usual. BUT, if a feed ever goes un-requested for more than an hour, -# then it's retrieval schedule will be taken over by the hourly task, and we'll -# be pre-seeding the RSS object cache automatically. -# -# Having the caching set up this way means we can re-use the same raw feed all over the site without -# having each wobject request it separately, ASSUMING the URL is the same. -# -# All the values that may have an effect on the composition of items -# are included in the cache key for the aggregate representation. - -sub _get_items { +sub generateFeed { my $self = shift; - my $urls = shift; - my $maxHeadlines = shift || $self->getValue('maxHeadlines'); - my $displayMode=$self->getValue('displayMode'); - - my $hasTermsRegex=_make_regex($self->getValue('hasTerms')); - - # Format of cache has changed several times - my $key=join(':', 'aggregate3', $displayMode,$hasTermsRegex,$maxHeadlines,$self->getRssUrl); - my $cache = WebGUI::Cache->new($self->session,$key, 'RSS'); - my $cached = $cache->get; - my ($items, @rss_feeds); - - if ($cached) { - $items = $cached->[0]; - @rss_feeds = @{$cached->[1]}; - } else { - $items = []; - for my $url (@{$urls}) { - my $rss_info=_get_rss_data($self->session,$url); - push(@rss_feeds, $rss_info) if(defined $rss_info); - } - - # deal with the fact that we may never get valid data - if (scalar(@rss_feeds) < 1) { - return ({}, []); + my $feed = XML::FeedPP::Atom->new(); + my $log = $self->session->log; + + # build one feed out of many + foreach my $url (split("\n", $self->get('rssUrl'))) { + $log->info("Processing FEED: ".$url); + $url =~ s/^feed:/http:/; + if ($self->get('processMacroInRssUrl')) { + WebGUI::Macro::process($self->session, \$url); } - - #Sort feeds in order by channel title. - #@rss_feeds=sort{$a->{channel}->{title} cmp $b->{channel}->{title}} @rss_feeds; - - if ($displayMode eq 'grouped') { - _create_grouped_items($items,\@rss_feeds,$maxHeadlines,$hasTermsRegex); - } else { - _create_interleaved_items($items,\@rss_feeds,$maxHeadlines,$hasTermsRegex); + my $cache = WebGUI::Cache->new($self->session, $url, "RSS"); + my $value = $cache->setByHTTP($url, $self->get("cacheTimeout")); + eval { $feed->merge($value) }; + if (my $e = WebGUI::Error->caught()) { + $log->error("Syndicated Content asset (".$self->getId.") has a bad feed URL (".$url."). Failed with ".$e->message); } - - #@{$items} = sort { $b->{date} <=> $a->{date} } @{$items}; - - $cache->set([$items, \@rss_feeds], 3600); } - - #So return the item loop and the first RSS feed, because - #when we're parsing a single feed we can use that feed's title and - #description for channel.title, channel.link, and channel.description - return ($items,\@rss_feeds); + + # build a new feed that matches the term the user is interested in + if ($self->get('hasTerms') ne '') { + my @terms = split /,\s*/, $self->get('hasTerms'); # get the list of terms + my $termRegex = join("|", map quotemeta($_), @terms); # turn the terms into a regex string + my @items = $feed->match_item(title=>qr/$termRegex/msi, description=>qr/$termRegex/msi); + $feed->clear_item; + foreach my $item (@items) { + $feed->add_item($item); + } + } + + # sort them by date + $feed->sort_item(); + + # limit the feed to the maxium number of headlines + $feed->limit_item($self->get('maxHeadlines')); + return $feed; } +#------------------------------------------------------------------- + +=head2 getTemplateVariables + +Returns a hash reference of template variables. + +=head3 feed + +A reference to an XML::FeedPP object. + +=cut + +sub getTemplateVariables { + my ($self, $feed) = @_; + my @items = $feed->get_item; + my %var; + $var{channel_title} = WebGUI::HTML::filter($feed->title, 'javascript'); + $var{channel_description} = WebGUI::HTML::filter($feed->description, 'javascript'); + $var{channel_date} = WebGUI::HTML::filter($feed->pubDate, 'javascript'); + $var{channel_copyright} = WebGUI::HTML::filter($feed->copyright, 'javascript'); + $var{channel_link} = WebGUI::HTML::filter($feed->link, 'javascript'); + my @image = $feed->image; + $var{channel_image_url} = WebGUI::HTML::filter($image[0], 'javascript'); + $var{channel_image_title} = WebGUI::HTML::filter($image[1], 'javascript'); + $var{channel_image_link} = WebGUI::HTML::filter($image[2], 'javascript'); + $var{channel_image_description} = WebGUI::HTML::filter($image[3], 'javascript'); + $var{channel_image_width} = WebGUI::HTML::filter($image[4], 'javascript'); + $var{channel_image_height} = WebGUI::HTML::filter($image[5], 'javascript'); + foreach my $object (@items) { + my %item; + $item{title} = WebGUI::HTML::filter($object->title, 'javascript'); + $item{date} = WebGUI::HTML::filter($object->pubDate, 'javascript'); + $item{category} = WebGUI::HTML::filter($object->category, 'javascript'); + $item{author} = WebGUI::HTML::filter($object->author, 'javascript'); + $item{guid} = WebGUI::HTML::filter($object->guid, 'javascript'); + $item{link} = WebGUI::HTML::filter($object->link, 'javascript'); + $item{description} = WebGUI::HTML::filter($object->description, 'javascript'); + $item{descriptionFirst100words} = $item{description}; + $item{descriptionFirst100words} =~ s/(((\S+)\s+){100}).*/$1/s; + $item{descriptionFirst75words} = $item{descriptionFirst100words}; + $item{descriptionFirst75words} =~ s/(((\S+)\s+){75}).*/$1/s; + $item{descriptionFirst50words} = $item{descriptionFirst75words}; + $item{descriptionFirst50words} =~ s/(((\S+)\s+){50}).*/$1/s; + $item{descriptionFirst25words} = $item{descriptionFirst50words}; + $item{descriptionFirst25words} =~ s/(((\S+)\s+){25}).*/$1/s; + $item{descriptionFirst10words} = $item{descriptionFirst25words}; + $item{descriptionFirst10words} =~ s/(((\S+)\s+){10}).*/$1/s; + $item{descriptionFirst2paragraphs} = $item{description}; + $item{descriptionFirst2paragraphs} =~ s/^((.*?\n){2}).*/$1/s; + $item{descriptionFirstParagraph} = $item{descriptionFirst2paragraphs}; + $item{descriptionFirstParagraph} =~ s/^(.*?\n).*/$1/s; + $item{descriptionFirst4sentences} = $item{description}; + $item{descriptionFirst4sentences} =~ s/^((.*?\.){4}).*/$1/s; + $item{descriptionFirst3sentences} = $item{descriptionFirst4sentences}; + $item{descriptionFirst3sentences} =~ s/^((.*?\.){3}).*/$1/s; + $item{descriptionFirst2sentences} = $item{descriptionFirst3sentences}; + $item{descriptionFirst2sentences} =~ s/^((.*?\.){2}).*/$1/s; + $item{descriptionFirstSentence} = $item{descriptionFirst2sentences}; + $item{descriptionFirstSentence} =~ s/^(.*?\.).*/$1/s; + push @{$var{item_loop}}, \%item; + } + return \%var; +} #------------------------------------------------------------------- @@ -593,11 +281,12 @@ sub prepareView { my $template = WebGUI::Asset::Template->new($self->session, $self->get("templateId")); $template->prepare($self->getMetaDataAsTemplateVariables); $self->{_viewTemplate} = $template; - my $i18n = WebGUI::International->new($self->session,'Asset_SyndicatedContent'); - my $rssFeedSuffix=$i18n->get('RSS Feed Title Suffix'); - my $title = $self->get("title")." ".$rssFeedSuffix; + my $title = $self->get("title"); $title =~ s/\"/"/g; - $self->session->style->setLink($self->getUrl("func=viewRSS20"), { rel=>'alternate', type=>'application/rss+xml', title=>$title }); + my $style = $self->session->style; + $style->setLink($self->getUrl("func=viewRss"), { rel=>'alternate', type=>'application/rss+xml', title=>$title.' (RSS)' }); + $style->setLink($self->getUrl("func=viewRdf"), { rel=>'alternate', type=>'application/rdf+xml', title=>$title.' (RDF)' }); + $style->setLink($self->getUrl("func=viewAtom"), { rel=>'alternate', type=>'application/atom+xml', title=>$title.' (Atom)' }); } @@ -625,6 +314,22 @@ Returns the rendered output of the wobject. sub view { my $self = shift; + + # try the cached version + my $cache = WebGUI::Cache->new($self->session,"view_".$self->getId); + my $out = $cache->get; + return $out if ($out ne ""); + + # generate from scratch + my $feed = $self->generateFeed; + $out = $self->processTemplate($self->getTemplateVariables($feed),undef,$self->{_viewTemplate}); + if (!$self->session->var->isAdminOn && $self->get("cacheTimeout") > 10) { + $cache->set($out,$self->get("cacheTimeout")); + } + return $out; + + + my $rssFlavor = shift; if ($rssFlavor eq "" && !$self->session->var->isAdminOn && $self->get("cacheTimeout") > 10) { my $out = WebGUI::Cache->new($self->session,"view_".$self->getId)->get; @@ -688,48 +393,6 @@ sub view { #------------------------------------------------------------------- -=head2 getRssUrl - -Get the RSS URL and process macros if we're supposed to. - -=cut - -sub getRssUrl { - my $self = shift; - my $value = $self->get("rssUrl"); - WebGUI::Macro::process($self->session,\$value) if $self->get("processMacroInRssUrl"); - return $value; -} - -#------------------------------------------------------------------- - -=head2 getContentLastModified ( ) - -Derive the last-modified date from the revisionDate of the object and from the dates of the RSS feeds. - -=cut - -sub getContentLastModified { - # Buggo, is this too expensive? Do we really want to do this every time? - # But how else are we supposed to get a reasonable last-modified date? - # Maybe just approximate... ? - my $self = shift; - - my $maxHeadlines = $self->_getMaxHeadlines; - my @validatedUrls = $self->_getValidatedUrls; - my ($item_loop, $rss_feeds) = $self->_get_items(\@validatedUrls, $maxHeadlines); - my $mtime = $self->get("revisionDate"); - - foreach my $rss (@$rss_feeds) { - next unless defined $rss->{last_modified}; - $mtime = $rss->{last_modified} if $rss->{last_modified} > $mtime; - } - - return $mtime; -} - -#------------------------------------------------------------------- - =head2 www_view ( ) See WebGUI::Asset::Wobject::www_view() for details. @@ -742,61 +405,113 @@ sub www_view { $self->SUPER::www_view(@_); } + #------------------------------------------------------------------- -=head2 www_viewRSS090 ( ) +=head2 www_viewAtom ( ) -Emit an RSS 0.9 feed. +Emit an Atom 0.3 feed. =cut -sub www_viewRSS090 { - my $self=shift; - return $self->view('0.9'); +sub www_viewAtom { + my $self = shift; + my $feed = $self->generateFeed; + my $atom = XML::FeedPP::Atom->new; + $atom->merge($feed); + $self->session->http->setMimeType('application/atom+xml'); + return $atom->to_string; } - #------------------------------------------------------------------- -=head2 www_viewRSS091 ( ) +=head2 www_viewRdf ( ) -Emit an RSS 0.91 feed. +Emit an RSS 1.0 / RDF feed. =cut -sub www_viewRSS091 { - my $self=shift; - return $self->view('0.91'); +sub www_viewRdf { + my $self = shift; + my $feed = $self->generateFeed; + my $rdf = XML::FeedPP::RDF->new; + $rdf->merge($feed); + $self->session->http->setMimeType('application/rdf+xml'); + return $rdf->to_string; } - #------------------------------------------------------------------- -=head2 www_viewRSS10 ( ) - -Emit an RSS 1.0 feed. - -=cut - -sub www_viewRSS10 { - my $self=shift; - return $self->view('1.0'); -} - - -#------------------------------------------------------------------- - -=head2 www_viewRSS20 ( ) +=head2 www_viewRss ( ) Emit an RSS 2.0 feed. =cut -sub www_viewRSS20 { - my $self=shift; - return $self->view('2.0'); +sub www_viewRss { + my $self = shift; + my $feed = $self->generateFeed; + my $rss = XML::FeedPP::RSS->new; + $rss->merge($feed); + $self->session->http->setMimeType('application/rss+xml'); + return $rss->to_string; } +#------------------------------------------------------------------- + +=head2 www_viewRSS090 ( ) + +Deprecated. Use www_viewRss() instead. + +=cut + +sub www_viewRSS10 { + my $self = shift; + return $self->www_viewRdf; +} + +#------------------------------------------------------------------- + +=head2 www_viewRSS091 ( ) + +Deprecated. Use www_viewRss() instead. + +=cut + +sub www_viewRSS10 { + my $self = shift; + return $self->www_viewRdf; +} + +#------------------------------------------------------------------- + +=head2 www_viewRSS10 ( ) + +Deprecated. Use www_viewRdf() instead. + +=cut + +sub www_viewRSS10 { + my $self = shift; + return $self->www_viewRdf; +} + +#------------------------------------------------------------------- + +=head2 www_viewRSS20 ( ) + +Deprecated. Use www_viewRss() instead. + +=cut + +sub www_viewRSS10 { + my $self = shift; + return $self->www_viewRdf; +} + + + + 1; diff --git a/lib/WebGUI/Help/Asset_SyndicatedContent.pm b/lib/WebGUI/Help/Asset_SyndicatedContent.pm index 6e65394fb..837f80846 100644 --- a/lib/WebGUI/Help/Asset_SyndicatedContent.pm +++ b/lib/WebGUI/Help/Asset_SyndicatedContent.pm @@ -17,24 +17,27 @@ our $HELP = { }, ], variables => [ - { 'name' => 'channel.title' }, - { 'name' => 'channel.description' }, - { 'name' => 'channel.link' }, - { 'name' => 'rss.url', - 'variables' => [ - { 'name' => 'rss.url.0.9' }, - { 'name' => 'rss.url.0.91' }, - { 'name' => 'rss.url.1.0' }, - { 'name' => 'rss.url.2.0' } - ] - }, + { 'name' => 'channel_title' }, + { 'name' => 'channel_description' }, + { 'name' => 'channel_link' }, + { 'name' => 'channel_date' }, + { 'name' => 'channel_copyright' }, + { 'name' => 'channel_image_url' }, + { 'name' => 'channel_image_title' }, + { 'name' => 'channel_image_link' }, + { 'name' => 'channel_image_description' }, + { 'name' => 'channel_image_width' }, + { 'name' => 'channel_image_height' }, + { 'name' => 'rss_url' }, + { 'name' => 'rdf_url' }, + { 'name' => 'atom_url' }, { 'name' => 'item_loop', 'variables' => [ - { 'name' => 'site_title' }, - { 'name' => 'site_link' }, - { 'name' => 'new_rss_site' }, { 'name' => 'title' }, { 'name' => 'link' }, + { 'name' => 'category' }, + { 'name' => 'author' }, + { 'name' => 'guid' }, { 'name' => 'description' }, { 'name' => 'descriptionFirst100words' }, { 'name' => 'descriptionFirst75words' }, @@ -73,7 +76,6 @@ our $HELP = { { 'name' => 'rssUrl' }, { 'name' => 'processMacrosInRssUrl' }, { 'name' => 'maxHeadlines' }, - { 'name' => 'displayMode' }, { 'name' => 'hasTerms' }, ], related => [], diff --git a/lib/WebGUI/Workflow/Activity/GetSyndicatedContent.pm b/lib/WebGUI/Workflow/Activity/GetSyndicatedContent.pm index fd44e9535..adea6acb3 100644 --- a/lib/WebGUI/Workflow/Activity/GetSyndicatedContent.pm +++ b/lib/WebGUI/Workflow/Activity/GetSyndicatedContent.pm @@ -76,65 +76,42 @@ sub execute { $self->session->errorHandler->error("Could not instanciate Workflow Instance in GetSyndicatedContent Activity"); return $self->ERROR; } - + my $log = $self->session->log; # start time to check for timeouts my $time = time(); my $ttl = $self->getTTL; - my @syndicatedUrls = @{$self->getSyndicatedUrls($instance)}; - while (my $url = shift(@syndicatedUrls)) { + my $assets = JSON->new->decode($instance->getScratch("syndicatedassets") || '[]'); + if (scalar @$assets < 1) { + $assets = $self->session->db->buildArrayRef("select assetId from asset where className like 'WebGUI::Asset::Wobject::SyndicatedContent'"); + } + while (my $id = shift(@{$assets})) { # Get RSS data, which will be stored in the cache - $self->session->errorHandler->info("GetSyndicatedContent workflow: Caching $url"); - my $returnValue = WebGUI::Asset::Wobject::SyndicatedContent::_get_rss_data($self->session, $url); - if (!defined $returnValue) { - $self->session->errorHandler->warn("GetSyndicatedContent Workflow Activity: _get_rss_data returned undef while trying to process syndicated content url $url, which usually indicates an improper URL, or a malformed document"); - next; - } + $log->info("GetSyndicatedContent: Caching for $id"); + my $asset = WebGUI::Asset::Wobject::SyndicatedContent->new($self->session, $id); + if (defined $asset) { + my $feed = $asset->generateFeed; + unless ($feed->isa('XML::FeedPP')) { + $log->error("GetSyndicatedContent: Syndicated Content Asset $id returned an invalid feed"); + } + } + else { + $log->error("GetSyndicatedContent: Couldn't instanciate $id") + } # Check for timeout - last - if (time() - $time > $ttl); + last if (time() - $time > $ttl); } # if there are urls left, we need to process again - if (scalar(@syndicatedUrls) > 0) { - $instance->setScratch("syndicatedUrls", JSON::encode_json(\@syndicatedUrls)); + if (scalar(@$assets) > 0) { + $instance->setScratch("syndicatedassets", JSON->new->encode($assets)); return $self->WAITING; } - $instance->deleteScratch("syndicatedUrls"); + $instance->deleteScratch("syndicatedassets"); return $self->COMPLETE; } -#--------------------------------------------------------------------- -=head2 getWobjectUrls ( ) - -Returns URLs from all of the Syndicated Content Wobjects from scratch or fetches them from the db if needed - -=head3 session - -A reference to the current webgui session - -=cut - -sub getSyndicatedUrls { - my $self = shift; - my $instance = shift; - my $syndicatedUrls = $instance->getScratch("syndicatedUrls"); - if ($syndicatedUrls) { - return JSON::decode_json($syndicatedUrls); - } - - my $urls = []; - my $assets = WebGUI::Asset->getRoot($self->session)->getLineage(['descendants'], { - includeOnlyClasses => ['WebGUI::Asset::Wobject::SyndicatedContent'], - returnObjects => 1, - }); - foreach my $asset (@$assets) { - push @$urls, split(/\s+/, $asset->getRssUrl); - } - $instance->setScratch("syndicatedUrls", JSON::encode_json($urls)); - return $urls; -} 1; diff --git a/lib/WebGUI/i18n/English/Asset_SyndicatedContent.pm b/lib/WebGUI/i18n/English/Asset_SyndicatedContent.pm index f7045d714..0e33cc352 100644 --- a/lib/WebGUI/i18n/English/Asset_SyndicatedContent.pm +++ b/lib/WebGUI/i18n/English/Asset_SyndicatedContent.pm @@ -46,44 +46,89 @@ our $I18N = { message => q|Edit Syndicated Content| }, - 'channel.title' => { - message => q|The title of this piece of syndicated content. This will be the same as the title of the Syndicated Content object when you're creating an aggregate feed.|, - lastUpdated => 1149567508, + 'channel_title' => { + message => q|The title of this piece of syndicated content. This variable will be populated by the first feed in a multi-feed list.|, + lastUpdated => 0, }, - 'channel.description' => { - message => q|A description of the content available through this channel. This will be the same as the description of the Syndicated Content object when you're creating an aggregate feed.|, - lastUpdated => 1149567508, + 'channel_description' => { + message => q|A description of the content available through this channel. This variable will be populated by the first feed in a multi-feed list.|, + lastUpdated => 0, }, - 'channel.link' => { - message => q|A URL back to the originating site of this channel. This variable *will not* exist when you're creating an aggregate feed, because there's no single channel to link to.|, - lastUpdated => 1149567508, + 'channel_link' => { + message => q|A URL back to the originating site of this channel. This variable will be populated by the first feed in a multi-feed list.|, + lastUpdated => 0, }, - 'rss.url' => { - message => q|This is the URL to use to get the contents of this Syndicated Content wobject as an RSS 2.0 feed. Additionally, you can specify RSS versions via the following template variables:|, - lastUpdated => 1149567508, + 'channel_date' => { + message => q|The date this channel was updated. This variable will be populated by the first feed in a multi-feed list.|, + lastUpdated => 0, }, - 'rss.url.0.9' => { - message => q|The contents of this wobject as an RSS 0.9 feed.|, - lastUpdated => 1149567508, + 'channel_copyright' => { + message => q|Copyright holder information. This variable will be populated by the first feed in a multi-feed list.|, + lastUpdated => 0, }, - 'rss.url.0.91' => { - message => q|The contents of this wobject as an RSS 0.91 feed.|, - lastUpdated => 1149567508, + 'channel_image_url' => { + message => q|The URL of the image attached to this feed. This variable will be populated by the first feed in a multi-feed list.|, + lastUpdated => 0, }, - 'rss.url.1.0' => { - message => q|The contents of this wobject as an RSS 1.0 feed.|, - lastUpdated => 1149567508, + 'channel_image_title' => { + message => q|The title of the image attached to this feed. This variable will be populated by the first feed in a multi-feed list.|, + lastUpdated => 0, }, - 'rss.url.2.0' => { - message => q|The contents of this wobject as an RSS 2.0 feed.|, - lastUpdated => 1149567508, + 'channel_image_description' => { + message => q|The description of the image attached to this feed. This variable will be populated by the first feed in a multi-feed list.|, + lastUpdated => 0, + }, + + 'channel_image_link' => { + message => q|The URL of the link that should wrap this feed's image. This variable will be populated by the first feed in a multi-feed list.|, + lastUpdated => 0, + }, + + 'channel_image_width' => { + message => q|The width in pixels of this feed's image. This variable will be populated by the first feed in a multi-feed list.|, + lastUpdated => 0, + }, + + 'channel_image_height' => { + message => q|The height in pixels of this feed's image. This variable will be populated by the first feed in a multi-feed list.|, + lastUpdated => 0, + }, + + 'rss_url' => { + message => q|This is the URL to use to get the contents of this Syndicated Content asset as an RSS 2.0 feed. Additionally, you can specify RSS versions via the following template variables:|, + lastUpdated => 0, + }, + + 'rdf_url' => { + message => q|The contents of this asset as an RDF/RSS 1.0 feed.|, + lastUpdated => 0, + }, + + 'atom_url' => { + message => q|The contents of this asset as an Atom 0.3 feed.|, + lastUpdated => 0, + }, + + 'category' => { + message => q|A category this item belongs to.|, + lastUpdated => 0, + }, + + 'author' => { + message => q|The publisher of this item.|, + lastUpdated => 0, + }, + + 'guid' => { + message => q|A unique id for this item.|, + lastUpdated => 0, }, 'item_loop' => { @@ -91,21 +136,6 @@ our $I18N = { lastUpdated => 1149567508, }, - 'site_title' => { - message => q|The title of the RSS feed this item comes from|, - lastUpdated => 1149567508, - }, - - 'site_link' => { - message => q|Link to the source RSS feed.|, - lastUpdated => 1149567508, - }, - - 'new_rss_site' => { - message => q|A "boolean" variable (suitable for using in a <tmpl_if> tag) that indicates we've started outputting items from a source RSS feed different than the previous item. This is most useful when you're viewing feeds in "grouped" mode- it gives you a hook to output site_title and site_link at the right time.|, - lastUpdated => 1149567508, - }, - 'title' => { message => q|The title of a piece of content. If you're filtering on terms, this field will be inspected.|, lastUpdated => 1149567508, @@ -126,51 +156,21 @@ our $I18N = { message => q|Syndicated Content Template| }, - 'displayModeLabel' => { - lastUpdated => 1047855526, - message => q|Display Mode| - }, - - 'displayModeSubtext' => { - lastUpdated => 1047855526, - message => q|

"Interleaved" means items from all feeds are lumped together, "Grouped by Feed" means items are grouped by the feed they came from. Either setting is fine if you're only bringing in a single feed.

| - }, - - 'grouped' => { - lastUpdated => 1047855526, - message => q|Grouped by Feed| - }, - 'hasTermsLabel' => { lastUpdated => 1047855526, message => q|With any of these terms| }, - 'interleaved' => { - lastUpdated => 1047855526, - message => q|Interleaved| - }, - 'rssTabName' => { lastUpdated => 1118417024, message => q|RSS| }, - 'RSS Feed Title Suffix' => { - lastUpdated => 1118417024, - message => q|RSS 2.0 Feed| - }, - '72 description' => { message => q|Select a template for this content.|, lastUpdated => 1119977659, }, - 'displayModeLabel description' => { - message => q|

If you're aggregating feeds, you can change the mode in which the items are displayed. "Grouped by Feed" means the items will be grouped together by the feeds they come from. "Interleaved" means the items will be mixed together in a "round-robin" fashion from all the feeds. If you're grouping your feeds, please look at new_rss_site "item_loop" template variables, it gives you a hook allowing you to output the feed title

|, - lastUpdated => 1146799950, - }, - 'hasTermsLabel description' => { message => q|

Enter terms (separated by commas) that you'd like to filter the feeds on. For instance, if you enter:

linux, windows development, blogs
@@ -192,10 +192,10 @@ our $I18N = {
  • http://w.moreover.com/
  • -

    Currently, WebGUI can handle RSS versions .90, .91, 1.0, and 2.0. Atom feeds aren't supported for now. Probably other RSS-ish files would work too. +

    Currently, WebGUI can handle RSS versions .90, .91, 1.0, and 2.0; Atom .3 and 1.0. Probably other RSS-ish files would work too.

    To create an aggregate RSS feed (one that pulls information from multiple RSS feeds), include a list of URLs, one on each line, instead of a single URL. Items will be sorted by the date WebGUI first received the story.

    |, - lastUpdated => 1168228049, + lastUpdated => 1225928949, }, '3 description' => { diff --git a/lib/XML/RSS/Creator.pm b/lib/XML/RSS/Creator.pm deleted file mode 100644 index 7301a9d01..000000000 --- a/lib/XML/RSS/Creator.pm +++ /dev/null @@ -1,1501 +0,0 @@ -package XML::RSS::Creator; - -use strict; -use Carp; -use vars qw($VERSION $AUTOLOAD @ISA); - -$VERSION = '1.06'; - -my %v0_9_ok_fields = ( - channel => { - title => '', - description => '', - link => '', - }, - image => { - title => '', - url => '', - link => '' - }, - textinput => { - title => '', - description => '', - name => '', - link => '' - }, - items => [], - num_items => 0, - version => '', - encoding => '' - ); - -my %v0_9_1_ok_fields = ( - channel => { - title => '', - copyright => '', - description => '', - docs => '', - language => '', - lastBuildDate => '', - link => '', - managingEditor => '', - pubDate => '', - rating => '', - webMaster => '' - }, - image => { - title => '', - url => '', - link => '', - width => '', - height => '', - description => '' - }, - skipDays => { - day => '' - }, - skipHours => { - hour => '' - }, - textinput => { - title => '', - description => '', - name => '', - link => '' - }, - items => [], - num_items => 0, - version => '', - encoding => '', - category => '' - ); - -my %v1_0_ok_fields = ( - channel => { - title => '', - description => '', - link => '', - }, - image => { - title => '', - url => '', - link => '' - }, - textinput => { - title => '', - description => '', - name => '', - link => '' - }, - skipDays => { - day => '' - }, - skipHours => { - hour => '' - }, - items => [], - num_items => 0, - version => '', - encoding => '', - output => '', - ); - -my %v2_0_ok_fields = ( - channel => { - title => '', - link => '', - description => '', - language => '', - copyright => '', - managingEditor => '', - webMaster => '', - pubDate => '', - lastBuildDate => '', - category => '', - generator => '', - docs => '', - cloud => '', - ttl => '', - image => '', - textinput => '', - skipHours => '', - skipDays => '', - }, - image => { - title => '', - url => '', - link => '', - width => '', - height => '', - description => '' - }, - skipDays => { - day => '' - }, - skipHours => { - hour => '' - }, - textinput => { - title => '', - description => '', - name => '', - link => '' - }, - items => [], - num_items => 0, - version => '', - encoding => '', - category => '', - cloud => '', - ttl => '' - ); - -my %languages = ( - 'af' => 'Afrikaans', - 'sq' => 'Albanian', - 'eu' => 'Basque', - 'be' => 'Belarusian', - 'bg' => 'Bulgarian', - 'ca' => 'Catalan', - 'zh-cn' => 'Chinese (Simplified)', - 'zh-tw' => 'Chinese (Traditional)', - 'hr' => 'Croatian', - 'cs' => 'Czech', - 'da' => 'Danish', - 'nl' => 'Dutch', - 'nl-be' => 'Dutch (Belgium)', - 'nl-nl' => 'Dutch (Netherlands)', - 'en' => 'English', - 'en-au' => 'English (Australia)', - 'en-bz' => 'English (Belize)', - 'en-ca' => 'English (Canada)', - 'en-ie' => 'English (Ireland)', - 'en-jm' => 'English (Jamaica)', - 'en-nz' => 'English (New Zealand)', - 'en-ph' => 'English (Phillipines)', - 'en-za' => 'English (South Africa)', - 'en-tt' => 'English (Trinidad)', - 'en-gb' => 'English (United Kingdom)', - 'en-us' => 'English (United States)', - 'en-zw' => 'English (Zimbabwe)', - 'fo' => 'Faeroese', - 'fi' => 'Finnish', - 'fr' => 'French', - 'fr-be' => 'French (Belgium)', - 'fr-ca' => 'French (Canada)', - 'fr-fr' => 'French (France)', - 'fr-lu' => 'French (Luxembourg)', - 'fr-mc' => 'French (Monaco)', - 'fr-ch' => 'French (Switzerland)', - 'gl' => 'Galician', - 'gd' => 'Gaelic', - 'de' => 'German', - 'de-at' => 'German (Austria)', - 'de-de' => 'German (Germany)', - 'de-li' => 'German (Liechtenstein)', - 'de-lu' => 'German (Luxembourg)', - 'el' => 'Greek', - 'hu' => 'Hungarian', - 'is' => 'Icelandic', - 'in' => 'Indonesian', - 'ga' => 'Irish', - 'it' => 'Italian', - 'it-it' => 'Italian (Italy)', - 'it-ch' => 'Italian (Switzerland)', - 'ja' => 'Japanese', - 'ko' => 'Korean', - 'mk' => 'Macedonian', - 'no' => 'Norwegian', - 'pl' => 'Polish', - 'pt' => 'Portuguese', - 'pt-br' => 'Portuguese (Brazil)', - 'pt-pt' => 'Portuguese (Portugal)', - 'ro' => 'Romanian', - 'ro-mo' => 'Romanian (Moldova)', - 'ro-ro' => 'Romanian (Romania)', - 'ru' => 'Russian', - 'ru-mo' => 'Russian (Moldova)', - 'ru-ru' => 'Russian (Russia)', - 'sr' => 'Serbian', - 'sk' => 'Slovak', - 'sl' => 'Slovenian', - 'es' => 'Spanish', - 'es-ar' => 'Spanish (Argentina)', - 'es-bo' => 'Spanish (Bolivia)', - 'es-cl' => 'Spanish (Chile)', - 'es-co' => 'Spanish (Colombia)', - 'es-cr' => 'Spanish (Costa Rica)', - 'es-do' => 'Spanish (Dominican Republic)', - 'es-ec' => 'Spanish (Ecuador)', - 'es-sv' => 'Spanish (El Salvador)', - 'es-gt' => 'Spanish (Guatemala)', - 'es-hn' => 'Spanish (Honduras)', - 'es-mx' => 'Spanish (Mexico)', - 'es-ni' => 'Spanish (Nicaragua)', - 'es-pa' => 'Spanish (Panama)', - 'es-py' => 'Spanish (Paraguay)', - 'es-pe' => 'Spanish (Peru)', - 'es-pr' => 'Spanish (Puerto Rico)', - 'es-es' => 'Spanish (Spain)', - 'es-uy' => 'Spanish (Uruguay)', - 'es-ve' => 'Spanish (Venezuela)', - 'sv' => 'Swedish', - 'sv-fi' => 'Swedish (Finland)', - 'sv-se' => 'Swedish (Sweden)', - 'tr' => 'Turkish', - 'uk' => 'Ukranian' - ); - -# define required elements for RSS 0.9 -my $_REQ_v0_9 = { - channel => { - title => [1,40], - description => [1,500], - link => [1,500] - }, - image => { - title => [1,40], - url => [1,500], - link => [1,500] - }, - item => { - title => [1,100], - link => [1,500] - }, - textinput => { - title => [1,40], - description => [1,100], - name => [1,500], - link => [1,500] - } - }; - -# define required elements for RSS 0.91 -my $_REQ_v0_9_1 = { - channel => { - title => [1,100], - description => [1,500], - link => [1,500], - language => [1,5], - rating => [0,500], - copyright => [0,100], - pubDate => [0,100], - lastBuildDate => [0,100], - docs => [0,500], - managingEditor => [0,100], - webMaster => [0,100], - }, - image => { - title => [1,100], - url => [1,500], - link => [0,500], - width => [0,144], - height => [0,400], - description => [0,500] - }, - item => { - title => [1,100], - link => [1,500], - description => [0,500] - }, - textinput => { - title => [1,100], - description => [1,500], - name => [1,20], - link => [1,500] - }, - skipHours => { - hour => [1,23] - }, - skipDays => { - day => [1,10] - } - }; - -# define required elements for RSS 2.0 -my $_REQ_v2_0 = { - channel => { - title => [1,100], - description => [1,500], - link => [1,500], - language => [0,5], - rating => [0,500], - copyright => [0,100], - pubDate => [0,100], - lastBuildDate => [0,100], - docs => [0,500], - managingEditor => [0,100], - webMaster => [0,100], - }, - image => { - title => [1,100], - url => [1,500], - link => [0,500], - width => [0,144], - height => [0,400], - description => [0,500] - }, - item => { - title => [1,100], - link => [1,500], - description => [0,500] - }, - textinput => { - title => [1,100], - description => [1,500], - name => [1,20], - link => [1,500] - }, - skipHours => { - hour => [1,23] - }, - skipDays => { - day => [1,10] - } - }; - -my $modules = { - 'http://purl.org/rss/1.0/modules/syndication/' => 'syn', - 'http://purl.org/dc/elements/1.1/' => 'dc', - 'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo', - 'http://webns.net/mvcb/' => 'admin' - }; - -my %syn_ok_fields = ( - updateBase => '', - updateFrequency => '', - updatePeriod => '', - ); - -my %dc_ok_fields = ( - title => '', - creator => '', - subject => '', - description => '', - publisher => '', - contributor => '', - date => '', - type => '', - format => '', - identifier => '', - source => '', - language => '', - relation => '', - coverage => '', - rights => '', - ); - -my %rdf_resource_fields = ( - 'http://webns.net/mvcb/' => { - generatorAgent => 1, - errorReportsTo => 1 - }, - 'http://purl.org/rss/1.0/modules/annotate/' => { - reference => 1 - }, - 'http://my.theinfo.org/changed/1.0/rss/' => { - server => 1 - } - ); - -sub new { - my $class = shift; - - my $self={}; - bless $self, $class; - - $self->_initialize(@_); - - return $self; -} - - -sub _initialize { - my $self = shift; - my %hash = @_; - - # internal hash - $self->{_internal} = {}; - - # init num of items to 0 - $self->{num_items} = 0; - - # adhere to Netscape limits; no by default - $self->{'strict'} = 0; - - # initialize items - $self->{items} = []; - - # namespaces - $self->{namespaces} = {}; - $self->{rss_namespace} = ''; - - # modules - $self->{modules} = $modules; - - # encode output from as_string? - (exists($hash{encode_output})) - ? ($self->{encode_output} = $hash{encode_output}) - : ($self->{encode_output} = 1); - - #get version info - (exists($hash{version})) - ? ($self->{version} = $hash{version}) - : ($self->{version} = '1.0'); - - # set default output - (exists($hash{output})) - ? ($self->{output} = $hash{output}) - : ($self->{output} = ""); - - # encoding - (exists($hash{encoding})) - ? ($self->{encoding} = $hash{encoding}) - : ($self->{encoding} = 'UTF-8'); - - # initialize RSS data structure - # RSS version 0.9 - if ($self->{version} eq '0.9') { - # Copy the hashes instead of using them directly to avoid - # problems with multiple XML::RSS objects being used concurrently - foreach my $i (qw(channel image textinput)) { - my %template=%{$v0_9_ok_fields{$i}}; - $self->{$i} = \%template; - } - - # RSS version 0.91 - } elsif ($self->{version} eq '0.91') { - foreach my $i (qw(channel image textinput skipDays skipHours)) { - my %template=%{$v0_9_1_ok_fields{$i}}; - $self->{$i} = \%template; - } - - # RSS version 2.0 - } elsif ($self->{version} eq '2.0') { - $self->{namespaces}->{'blogChannel'} = "http://backend.userland.com/blogChannelModule"; - foreach my $i (qw(channel image textinput skipDays skipHours)) { - my %template=%{ $v2_0_ok_fields{$i} }; - $self->{$i} = \%template; - } - - # RSS version 1.0 - #} elsif ($self->{version} eq '1.0') { - } else { - foreach my $i (qw(channel image textinput)) { - #foreach my $i (keys(%v1_0_ok_fields)) { - my %template=%{$v1_0_ok_fields{$i}}; - $self->{$i} = \%template; - } - } -} - -sub _auto_add_modules { - my $self = shift; - - for my $ns (keys %{$self->{namespaces}}) { - # skip default namespaces - next if $ns eq "rdf" || $ns eq "#default" - || exists $self->{modules}{ $self->{namespaces}{$ns} }; - $self->add_module(prefix => $ns, uri => $self->{namespaces}{$ns}) - } - - $self; -} - -sub add_module { - my $self = shift; - my $hash = {@_}; - - $hash->{prefix} =~ /^[a-z_][a-z0-9.-_]*$/ or - croak "a namespace prefix should look like [a-z_][a-z0-9.-_]*"; - - $hash->{uri} or - croak "a URI must be provided in a namespace declaration"; - - $self->{modules}->{$hash->{uri}} = $hash->{prefix}; -} - -sub add_item { - my $self = shift; - my $hash = {@_}; - - # strict Netscape Netcenter length checks - if ($self->{'strict'}) { - # make sure we have a title and link - croak "title and link elements are required" - unless ($hash->{title} && $hash->{'link'}); - - # check string lengths - croak "title cannot exceed 100 characters in length" - if (length($hash->{title}) > 100); - croak "link cannot exceed 500 characters in length" - if (length($hash->{'link'}) > 500); - croak "description cannot exceed 500 characters in length" - if (exists($hash->{description}) - && length($hash->{description}) > 500); - - # make sure there aren't already 15 items - croak "total items cannot exceed 15 " if (@{$self->{items}} >= 15); - } - - # add the item to the list - if (defined($hash->{mode}) && $hash->{mode} eq 'insert') { - unshift (@{$self->{items}}, $hash); - } else { - push (@{$self->{items}}, $hash); - } - - # return reference to the list of items - return $self->{items}; -} - -sub as_rss_0_9 { - my $self = shift; - my $output; - - # XML declaration - my $encoding = exists $$self{encoding} ? qq| encoding="$$self{encoding}"| : ''; - $output .= qq|\n\n|; - - # RDF root element - $output .= 'encode($self->{channel}->{title}) .''."\n"; - $output .= ''. $self->encode($self->{channel}->{'link'}) .''."\n"; - $output .= ''. $self->encode($self->{channel}->{description}) .''."\n"; - $output .= ''."\n\n"; - - ################# - # image element # - ################# - if ($self->{image}->{url}) { - $output .= ''."\n"; - - # title - $output .= ''. $self->encode($self->{image}->{title}) .''."\n"; - - # url - $output .= ''. $self->encode($self->{image}->{url}) .''."\n"; - - # link - $output .= ''. $self->encode($self->{image}->{'link'}) .''."\n" - if $self->{image}->{link}; - - # end image element - $output .= ''."\n\n"; - } - - ################ - # item element # - ################ - foreach my $item (@{$self->{items}}) { - if ($item->{title}) { - $output .= ''."\n"; - $output .= ''. $self->encode($item->{title}) .''."\n"; - $output .= ''. $self->encode($item->{'link'}) .''."\n"; - - # end image element - $output .= ''."\n\n"; - } - } - - ##################### - # textinput element # - ##################### - if ($self->{textinput}->{'link'}) { - $output .= ''."\n"; - $output .= ''. $self->encode($self->{textinput}->{title}) .''."\n"; - $output .= ''. $self->encode($self->{textinput}->{description}) .''."\n"; - $output .= ''. $self->encode($self->{textinput}->{name}) .''."\n"; - $output .= ''. $self->encode($self->{textinput}->{'link'}) .''."\n"; - $output .= ''."\n\n"; - } - - $output .= ''; - - return $output; -} - -sub as_rss_0_9_1 { - my $self = shift; - my $output; - - # XML declaration - $output .= '{encoding}.'"?>'."\n\n"; - - # DOCTYPE - $output .= ''."\n\n"; - - # RSS root element - $output .= ''."\n\n"; - - ################### - # Channel Element # - ################### - $output .= ''."\n"; - $output .= ''. $self->encode($self->{channel}->{title}) .''."\n"; - $output .= ''. $self->encode($self->{channel}->{'link'}) .''."\n"; - $output .= ''. $self->encode($self->{channel}->{description}) .''."\n"; - - # language - if ($self->{channel}->{'dc'}->{'language'}) { - $output .= ''. $self->encode($self->{channel}->{'dc'}->{'language'}) .''."\n"; - } elsif ($self->{channel}->{language}) { - $output .= ''. $self->encode($self->{channel}->{language}).''."\n"; - } - - # PICS rating - $output .= ''. $self->encode($self->{channel}->{rating}) .''."\n" - if $self->{channel}->{rating}; - - # copyright - if ($self->{channel}->{'dc'}->{'rights'}) { - $output .= ''. $self->encode($self->{channel}->{'dc'}->{'rights'}) .''."\n"; - } elsif ($self->{channel}->{copyright}) { - $output .= ''. $self->encode($self->{channel}->{copyright}) .''."\n"; - } - - # publication date - if ($self->{channel}->{pubDate}) { - $output .= ''. $self->encode($self->{channel}->{pubDate}) .''."\n"; - } elsif ($self->{channel}->{'dc'}->{'date'}) { - $output .= ''. $self->encode($self->{channel}->{'dc'}->{'date'}) .''."\n"; - } - - # last build date - if ($self->{channel}->{lastBuildDate}) { - $output .= ''. $self->encode($self->{channel}->{lastBuildDate}) .''."\n"; - } elsif ($self->{channel}->{'dc'}->{'date'}) { - $output .= ''. $self->encode($self->{channel}->{'dc'}->{'date'}) .''."\n"; - } - - # external CDF URL - $output .= ''. $self->encode($self->{channel}->{docs}) .''."\n" - if $self->{channel}->{docs}; - - # managing editor - if ($self->{channel}->{'dc'}->{'publisher'}) { - $output .= ''. $self->encode($self->{channel}->{'dc'}->{'publisher'}) .''."\n"; - } elsif ($self->{channel}->{managingEditor}) { - $output .= ''. $self->encode($self->{channel}->{managingEditor}) .''."\n"; - } - - # webmaster - if ($self->{channel}->{'dc'}->{'creator'}) { - $output .= ''. $self->encode($self->{channel}->{'dc'}->{'creator'}) .''."\n"; - } elsif ($self->{channel}->{webMaster}) { - $output .= ''. $self->encode($self->{channel}->{webMaster}) .''."\n"; - } - - $output .= "\n"; - - ################# - # image element # - ################# - if ($self->{image}->{url}) { - $output .= ''."\n"; - - # title - $output .= ''. $self->encode($self->{image}->{title}) .''."\n"; - - # url - $output .= ''. $self->encode($self->{image}->{url}) .''."\n"; - - # link - $output .= ''. $self->encode($self->{image}->{'link'}) .''."\n" - if $self->{image}->{link}; - - # image width - $output .= ''. $self->encode($self->{image}->{width}) .''."\n" - if $self->{image}->{width}; - - # image height - $output .= ''. $self->encode($self->{image}->{height}) .''."\n" - if $self->{image}->{height}; - - # description - $output .= ''. $self->encode($self->{image}->{description}) .''."\n" - if $self->{image}->{description}; - - # end image element - $output .= ''."\n\n"; - } - - ################ - # item element # - ################ - foreach my $item (@{$self->{items}}) { - if ($item->{title}) { - $output .= ''."\n"; - $output .= ''. $self->encode($item->{title}) .''."\n"; - $output .= ''. $self->encode($item->{'link'}) .''."\n"; - - $output .= ''. $self->encode($item->{description}) .''."\n" - if $item->{description}; - - # end image element - $output .= ''."\n\n"; - } - } - - ##################### - # textinput element # - ##################### - if ($self->{textinput}->{'link'}) { - $output .= ''."\n"; - $output .= ''. $self->encode($self->{textinput}->{title}) .''."\n"; - $output .= ''. $self->encode($self->{textinput}->{description}) .''."\n"; - $output .= ''. $self->encode($self->{textinput}->{name}) .''."\n"; - $output .= ''. $self->encode($self->{textinput}->{'link'}) .''."\n"; - $output .= ''."\n\n"; - } - - ##################### - # skipHours element # - ##################### - if ($self->{skipHours}->{hour}) { - $output .= ''."\n"; - $output .= ''. $self->encode($self->{skipHours}->{hour}) .''."\n"; - $output .= ''."\n\n"; - } - - #################### - # skipDays element # - #################### - if ($self->{skipDays}->{day}) { - $output .= ''."\n"; - $output .= ''. $self->encode($self->{skipDays}->{day}) .''."\n"; - $output .= ''."\n\n"; - } - - # end channel element - $output .= ''."\n"; - $output .= ''; - - return $output; -} - -sub as_rss_1_0 { - my $self = shift; - my $output; - - # XML declaration - $output .= '{encoding}.'"?>'."\n\n"; - - # RDF namespaces declaration - $output .="{modules}}) { - $output.=" xmlns:$v=\"$k\"\n"; - } - - $output .=">"."\n\n"; - - ################### - # Channel Element # - ################### - unless ( defined($self->{channel}->{'about'}) ) { - $output .= ''."\n"; - } else { - $output .= ''."\n"; - } - # title - $output .= ''. $self->encode($self->{channel}->{title}) .''."\n"; - - # link - $output .= ''. $self->encode($self->{channel}->{'link'}) .''."\n"; - - # description - $output .= ''. $self->encode($self->{channel}->{description}) .''."\n"; - - # additional elements for RSS 0.91 - # language - if ($self->{channel}->{'dc'}->{'language'}) { - $output .= ''. $self->encode($self->{channel}->{'dc'}->{'language'}) .''."\n"; - } elsif ($self->{channel}->{language}) { - $output .= ''. $self->encode($self->{channel}->{language}) .''."\n"; - } - - # PICS rating - Dublin Core has not decided how to incorporate PICS ratings yet - #$$output .= ''.$self->{channel}->{rating}.''."\n" - #$if $self->{channel}->{rating}; - - # copyright - if ($self->{channel}->{'dc'}->{'rights'}) { - $output .= ''. $self->encode($self->{channel}->{'dc'}->{'rights'}) .''."\n"; - } elsif ($self->{channel}->{copyright}) { - $output .= ''. $self->encode($self->{channel}->{copyright}) .''."\n"; - } - - # publication date - if ($self->{channel}->{'dc'}->{'date'}) { - $output .= ''. $self->encode($self->{channel}->{'dc'}->{'date'}) .''."\n"; - } elsif ($self->{channel}->{pubDate}) { - $output .= ''. $self->encode($self->{channel}->{pubDate}) .''."\n"; - } elsif ($self->{channel}->{lastBuildDate}) { - $output .= ''. $self->encode($self->{channel}->{lastBuildDate}) .''."\n"; - } - - # external CDF URL - #$output .= ''.$self->{channel}->{docs}.''."\n" - #if $self->{channel}->{docs}; - - # managing editor - if ($self->{channel}->{'dc'}->{'publisher'}) { - $output .= ''. $self->encode($self->{channel}->{'dc'}->{'publisher'}) .''."\n"; - } elsif ($self->{channel}->{managingEditor}) { - $output .= ''. $self->encode($self->{channel}->{managingEditor}) .''."\n"; - } - - # webmaster - if ($self->{channel}->{'dc'}->{'creator'}) { - $output .= ''. $self->encode($self->{channel}->{'dc'}->{'creator'}) .''."\n"; - } elsif ($self->{channel}->{webMaster}) { - $output .= ''. $self->encode($self->{channel}->{webMaster}) .''."\n"; - } - - # Dublin Core module - foreach my $dc ( keys %dc_ok_fields ) { - next if ($dc eq 'language' - || $dc eq 'creator' - || $dc eq 'publisher' - || $dc eq 'rights' - || $dc eq 'date'); - $self->{channel}->{dc}->{$dc} and $output .= "". $self->encode($self->{channel}->{dc}->{$dc}) ."\n"; - } - - # Syndication module - foreach my $syn ( keys %syn_ok_fields ) { - $self->{channel}->{syn}->{$syn} and $output .= "". $self->encode($self->{channel}->{syn}->{$syn}) ."\n"; - } - - # Taxonomy module - if (exists($self->{'channel'}->{'taxo'}) && $self->{'channel'}->{'taxo'}) { - $output .= "\n \n"; - foreach my $taxo (@{$self->{'channel'}->{'taxo'}}) { - $output.= " encode($taxo) . "\" />\n"; - } - $output .= " \n\n"; - } - - # Ad-hoc modules - while ( my($url, $prefix) = each %{$self->{modules}} ) { - next if $prefix =~ /^(dc|syn|taxo)$/; - while ( my($el, $value) = each %{$self->{channel}->{$prefix}} ) { - if ( exists( $rdf_resource_fields{ $url } ) and - exists( $rdf_resource_fields{ $url }{ $el }) ) { - $output .= qq!<$prefix:$el rdf:resource="! . - $self->encode($value) . - qq!" />\n!; - } else { - $output .= "<$prefix:$el>". $self->encode($value) ."\n"; - } - } - } - - # Seq items - $output .= "\n \n"; - - foreach my $item (@{$self->{items}}) { - my $about = ( defined($item->{'about'}) ) ? $item->{'about'} : $item->{'link'}; - $output .= ' '."\n"; - } - - $output .= " \n\n"; - - $self->{image}->{url} and - $output .= ''."\n"; - - $self->{textinput}->{'link'} and - $output .= ''."\n"; - - # end channel element - $output .= ''."\n\n"; - - ################# - # image element # - ################# - if ($self->{image}->{url}) { - $output .= ''."\n"; - - # title - $output .= ''. $self->encode($self->{image}->{title}) .''."\n"; - - # url - $output .= ''. $self->encode($self->{image}->{url}) .''."\n"; - - # link - $output .= ''. $self->encode($self->{image}->{'link'}) .''."\n" - if $self->{image}->{link}; - - # image width - #$output .= ''.$self->{image}->{width}.''."\n" - # if $self->{image}->{width}; - - # image height - #$output .= ''.$self->{image}->{height}.''."\n" - # if $self->{image}->{height}; - - # description - #$output .= ''.$self->{image}->{description}.''."\n" - # if $self->{image}->{description}; - - # Dublin Core Modules - foreach my $dc ( keys %dc_ok_fields ) { - $self->{image}->{dc}->{$dc} and - $output .= "". $self->encode($self->{image}->{dc}->{$dc}) ."\n"; - } - - # Ad-hoc modules for images - while ( my($url, $prefix) = each %{$self->{modules}} ) { - next if $prefix =~ /^(dc|syn|taxo)$/; - while ( my($el, $value) = each %{$self->{image}->{$prefix}} ) { - if ( exists( $rdf_resource_fields{ $url } ) and - exists( $rdf_resource_fields{ $url }{ $el }) ) { - $output .= qq!<$prefix:$el rdf:resource="! . - $self->encode($value) . - qq!" />\n!; - } else { - $output .= "<$prefix:$el>". $self->encode($value) ."\n"; - } - } - } - # end image element - $output .= ''."\n\n"; - } # end if ($self->{image}->{url}) { - - ################ - # item element # - ################ - foreach my $item (@{$self->{items}}) { - if ($item->{title}) { - my $about = ( defined($item->{'about'}) ) ? $item->{'about'} : $item->{'link'}; - $output .= 'encode($item->{title}) .''."\n"; - $output .= ''. $self->encode($item->{'link'}) .''."\n"; - $item->{description} and $output .= ''. $self->encode($item->{description}) .''."\n"; - - # Dublin Core module - foreach my $dc ( keys %dc_ok_fields ) { - $item->{dc}->{$dc} and $output .= "". $self->encode($item->{dc}->{$dc}) ."\n"; - } - - # Taxonomy module - if (exists($item->{'taxo'}) && $item->{'taxo'}) { - $output .= "\n \n"; - foreach my $taxo (@{$item->{'taxo'}}) { - $output.= " \n"; - } - $output .= " \n\n"; - } - - # Ad-hoc modules - while ( my($url, $prefix) = each %{$self->{modules}} ) { - next if $prefix =~ /^(dc|syn|taxo)$/; - while ( my($el, $value) = each %{$item->{$prefix}} ) { - if ( exists( $rdf_resource_fields{ $url } ) and - exists( $rdf_resource_fields{ $url }{ $el }) ) { - $output .= qq!<$prefix:$el rdf:resource="! . - $self->encode($value) . - qq!" />\n!; - } else { - $output .= "<$prefix:$el>". $self->encode($value) ."\n"; - } - } - } - # end item element - $output .= ''."\n\n"; - } - } # end foreach my $item (@{$self->{items}}) { - - ##################### - # textinput element # - ##################### - if ($self->{textinput}->{'link'}) { - $output .= ''."\n"; - $output .= ''. $self->encode($self->{textinput}->{title}) .''."\n"; - $output .= ''. $self->encode($self->{textinput}->{description}) .''."\n"; - $output .= ''. $self->encode($self->{textinput}->{name}) .''."\n"; - $output .= ''. $self->encode($self->{textinput}->{'link'}) .''."\n"; - - # Dublin Core module - foreach my $dc ( keys %dc_ok_fields ) { - $self->{textinput}->{dc}->{$dc} - and $output .= "". $self->encode($self->{textinput}->{dc}->{$dc}) ."\n"; - } - - # Ad-hoc modules - while ( my($url, $prefix) = each %{$self->{modules}} ) { - next if $prefix =~ /^(dc|syn|taxo)$/; - while ( my($el, $value) = each %{$self->{textinput}->{$prefix}} ) { - $output .= "<$prefix:$el>". $self->encode($value) ."\n"; - } - } - - $output .= ''."\n\n"; - } - - $output .= ''; -} - -sub as_rss_2_0 { - my $self = shift; - my $output; - - # XML declaration - $output .= '{encoding}.'"?>'."\n\n"; - - # DOCTYPE - # $output .= ''."\n\n"; - - # RSS root element - # $output .= ''."\n\n"; - $output .= '' . "\n\n"; - - ################### - # Channel Element # - ################### - $output .= ''."\n"; - $output .= ''.$self->encode($self->{channel}->{title}).''."\n"; - $output .= ''.$self->encode($self->{channel}->{'link'}).''."\n"; - $output .= ''.$self->encode($self->{channel}->{description}).''."\n"; - - # language - if ($self->{channel}->{'dc'}->{'language'}) { - $output .= ''.$self->encode($self->{channel}->{'dc'}->{'language'}).''."\n"; - } elsif ($self->{channel}->{language}) { - $output .= ''.$self->encode($self->{channel}->{language}).''."\n"; - } - - # PICS rating - # Not supported by RSS 2.0 - # $output .= ''.$self->{channel}->{rating}.''."\n" - # if $self->{channel}->{rating}; - - # copyright - if ($self->{channel}->{'dc'}->{'rights'}) { - $output .= ''.$self->encode($self->{channel}->{'dc'}->{'rights'}).''."\n"; - } elsif ($self->{channel}->{copyright}) { - $output .= ''.$self->encode($self->{channel}->{copyright}).''."\n"; - } - - # publication date - if ($self->{channel}->{pubDate}) { - $output .= ''.$self->encode($self->{channel}->{pubDate}).''."\n"; - } elsif ($self->{channel}->{'dc'}->{'date'}) { - $output .= ''.$self->encode($self->{channel}->{'dc'}->{'date'}).''."\n"; - } - - # last build date - if ($self->{channel}->{'dc'}->{'date'}) { - $output .= ''.$self->encode($self->{channel}->{'dc'}->{lastBuildDate}).''."\n"; - } elsif ($self->{channel}->{lastBuildDate}) { - $output .= ''.$self->encode($self->{channel}->{lastBuildDate}).''."\n"; - } - - # external CDF URL - $output .= ''.$self->encode($self->{channel}->{docs}).''."\n" - if $self->{channel}->{docs}; - - # managing editor - if ($self->{channel}->{'dc'}->{'publisher'}) { - $output .= ''.$self->encode($self->{channel}->{'dc'}->{'publisher'}).''."\n"; - } elsif ($self->{channel}->{managingEditor}) { - $output .= ''.$self->encode($self->{channel}->{managingEditor}).''."\n"; - } - - # webmaster - if ($self->{channel}->{'dc'}->{'creator'}) { - $output .= ''.$self->encode($self->{channel}->{'dc'}->{'creator'}).''."\n"; - } elsif ($self->{channel}->{webMaster}) { - $output .= ''.$self->encode($self->{channel}->{webMaster}).''."\n"; - } - - # category - if ($self->{channel}->{'dc'}->{'category'}) { - $output .= ''.$self->encode($self->{channel}->{'dc'}->{'category'}).''."\n"; - } elsif ($self->{channel}->{category}) { - $output .= ''.$self->encode($self->{channel}->{generator}).''."\n"; - } - - # generator - if ($self->{channel}->{'dc'}->{'generator'}) { - $output .= ''.$self->encode($self->{channel}->{'dc'}->{'generator'}).''."\n"; - } elsif ($self->{channel}->{generator}) { - $output .= ''.$self->encode($self->{channel}->{generator}).''."\n"; - } - - # Insert cloud support here - - # ttl - if ($self->{channel}->{'dc'}->{'ttl'}) { - $output .= ''.$self->encode($self->{channel}->{'dc'}->{'ttl'}).''."\n"; - } elsif ($self->{channel}->{ttl}) { - $output .= ''.$self->encode($self->{channel}->{ttl}).''."\n"; - } - - - - $output .= "\n"; - - ################# - # image element # - ################# - if ($self->{image}->{url}) { - $output .= ''."\n"; - - # title - $output .= ''.$self->encode($self->{image}->{title}).''."\n"; - - # url - $output .= ''.$self->encode($self->{image}->{url}).''."\n"; - - # link - $output .= ''.$self->encode($self->{image}->{'link'}).''."\n" - if $self->{image}->{link}; - - # image width - $output .= ''.$self->encode($self->{image}->{width}).''."\n" - if $self->{image}->{width}; - - # image height - $output .= ''.$self->encode($self->{image}->{height}).''."\n" - if $self->{image}->{height}; - - # description - $output .= ''.$self->encode($self->{image}->{description}).''."\n" - if $self->{image}->{description}; - - # end image element - $output .= ''."\n\n"; - } - - ################ - # item element # - ################ - foreach my $item (@{$self->{items}}) { - if ($item->{title}) { - $output .= ''."\n"; - $output .= ''.$self->encode($item->{title}).''."\n" - if $item->{title}; - $output .= ''.$self->encode($item->{'link'}).''."\n" - if $item->{link}; - $output .= ''.$self->encode($item->{description}).''."\n" - if $item->{description}; - - $output .= ''.$self->encode($item->{author}).''."\n" - if $item->{author}; - - $output .= ''.$self->encode($item->{category}).''."\n" - if $item->{category}; - - $output .= ''.$self->encode($item->{comments}).''."\n" - if $item->{comments}; - - # The unique identifier. Use 'permaLink' for an external - # identifier, or 'guid' for a internal string. - # (I call it permaLink in the hash for purposes of clarity.) - if ($item->{permaLink}) { - $output .= ''.$self->encode($item->{permaLink}).''."\n"; - } elsif ($item->{guid}) { - $output .= ''.$self->encode($item->{guid}).''."\n"; - } - - $output .= ''.$self->encode($item->{pubDate}).''."\n" - if $item->{pubDate}; - - $output .= ''.$item->{source}.''."\n" - if $item->{source} && $item->{sourceUrl}; - - if (my $e = $item->{enclosure}) { - $output .= "' . "\n"; - } - - # end image element - $output .= ''."\n\n"; - } - } - - ##################### - # textinput element # - ##################### - if ($self->{textinput}->{'link'}) { - $output .= ''."\n"; - $output .= ''.$self->encode($self->{textinput}->{title}).''."\n"; - $output .= ''.$self->encode($self->{textinput}->{description}).''."\n"; - $output .= ''.$self->encode($self->{textinput}->{name}).''."\n"; - $output .= ''.$self->encode($self->{textinput}->{'link'}).''."\n"; - $output .= ''."\n\n"; - } - - ##################### - # skipHours element # - ##################### - if ($self->{skipHours}->{hour}) { - $output .= ''."\n"; - $output .= ''.$self->encode($self->{skipHours}->{hour}).''."\n"; - $output .= ''."\n\n"; - } - - #################### - # skipDays element # - #################### - if ($self->{skipDays}->{day}) { - $output .= ''."\n"; - $output .= ''.$self->encode($self->{skipDays}->{day}).''."\n"; - $output .= ''."\n\n"; - } - - # end channel element - $output .= ''."\n"; - $output .= ''; - - return $output; -} - -sub as_string { - my $self = shift; - my $version = ($self->{output} =~ /\d/) ? $self->{output} : $self->{version}; - my $output; - - ########### - # RSS 0.9 # - ########### - if ($version eq '0.9') { - $output = &as_rss_0_9($self); - - ############ - # RSS 0.91 # - ############ - } elsif ($version eq '0.91') { - $output = &as_rss_0_9_1($self); - - ########### - # RSS 2.0 # - ########### - } elsif ($version eq '2.0') { - $output = &as_rss_2_0($self); - - ########### - # RSS 1.0 # - ########### - } else { - $output = &as_rss_1_0($self); - } - - return $output; -} - - -sub AUTOLOAD { - my $self = shift; - my $type = ref($self) || croak "$self is not an object\n"; - my $name = $AUTOLOAD; - $name =~ s/.*://; - return if $name eq 'DESTROY'; - - croak "Unregistered entity: Can't access $name field in object of class $type" - unless (exists $self->{$name}); - - # return reference to RSS structure - if (@_ == 1) { - return $self->{$name}->{$_[0]} if defined $self->{$name}->{$_[0]}; - - # we're going to set values here - } elsif (@_ > 1) { - my %hash = @_; - my $_REQ; - - # make sure we have required elements and correct lengths - if ($self->{'strict'}) { - ($self->{version} eq '0.9') - ? ($_REQ = $_REQ_v0_9) - : ($_REQ = $_REQ_v0_9_1); - } - - # store data in object - foreach my $key (keys(%hash)) { - if ($self->{'strict'}) { - my $req_element = $_REQ->{$name}->{$key}; - confess "$key cannot exceed " . $req_element->[1] . " characters in length" - if defined $req_element->[1] && length($hash{$key}) > $req_element->[1]; - } - $self->{$name}->{$key} = $hash{$key}; - } - - # return value - return $self->{$name}; - - # otherwise, just return a reference to the whole thing - } else { - return $self->{$name}; - } - return 0; - - # make sure we have all required elements - #foreach my $key (keys(%{$_REQ->{$name}})) { - #my $element = $_REQ->{$name}->{$key}; - #croak "$key is required in $name" - #if ($element->[0] == 1) && (!defined($hash{$key})); - #croak "$key cannot exceed ".$element->[1]." characters in length" - #unless length($hash{$key}) <= $element->[1]; - #} -} - - -# the code here is a minorly tweaked version of code from -# Matts' rssmirror.pl script -# -my %entity = ( - nbsp => " ", - iexcl => "¡", - cent => "¢", - pound => "£", - curren => "¤", - yen => "¥", - brvbar => "¦", - sect => "§", - uml => "¨", - copy => "©", - ordf => "ª", - laquo => "«", - not => "¬", - shy => "­", - reg => "®", - macr => "¯", - deg => "°", - plusmn => "±", - sup2 => "²", - sup3 => "³", - acute => "´", - micro => "µ", - para => "¶", - middot => "·", - cedil => "¸", - sup1 => "¹", - ordm => "º", - raquo => "»", - frac14 => "¼", - frac12 => "½", - frac34 => "¾", - iquest => "¿", - Agrave => "À", - Aacute => "Á", - Acirc => "Â", - Atilde => "Ã", - Auml => "Ä", - Aring => "Å", - AElig => "Æ", - Ccedil => "Ç", - Egrave => "È", - Eacute => "É", - Ecirc => "Ê", - Euml => "Ë", - Igrave => "Ì", - Iacute => "Í", - Icirc => "Î", - Iuml => "Ï", - ETH => "Ð", - Ntilde => "Ñ", - Ograve => "Ò", - Oacute => "Ó", - Ocirc => "Ô", - Otilde => "Õ", - Ouml => "Ö", - times => "×", - Oslash => "Ø", - Ugrave => "Ù", - Uacute => "Ú", - Ucirc => "Û", - Uuml => "Ü", - Yacute => "Ý", - THORN => "Þ", - szlig => "ß", - agrave => "à", - aacute => "á", - acirc => "â", - atilde => "ã", - auml => "ä", - aring => "å", - aelig => "æ", - ccedil => "ç", - egrave => "è", - eacute => "é", - ecirc => "ê", - euml => "ë", - igrave => "ì", - iacute => "í", - icirc => "î", - iuml => "ï", - eth => "ð", - ntilde => "ñ", - ograve => "ò", - oacute => "ó", - ocirc => "ô", - otilde => "õ", - ouml => "ö", - divide => "÷", - oslash => "ø", - ugrave => "ù", - uacute => "ú", - ucirc => "û", - uuml => "ü", - yacute => "ý", - thorn => "þ", - yuml => "ÿ", - ); - -my $entities = join('|', keys %entity); - -sub encode { - my ($self, $text) = @_; - return $text unless $self->{'encode_output'}; - - my $encoded_text = ''; - - while ( $text =~ s/(.*?)(\<\!\[CDATA\[.*?\]\]\>)//s ) { - $encoded_text .= encode_text($1) . $2; - } - $encoded_text .= encode_text($text); - - return $encoded_text; -} - -sub encode_text { - my $text = shift; - - $text =~ s/&(?!(#[0-9]+|#x[0-9a-fA-F]+|\w+);)/&/g; - $text =~ s/&($entities);/$entity{$1}/g; - $text =~ s//>/g; - - return $text; -} - -sub strict { - my ($self,$value) = @_; - $self->{'strict'} = $value; -} - -sub save { - my ($self,$file) = @_; - open my $OUT, '>', $file or croak "Cannot open file $file for write: $!"; - print $OUT $self->as_string; - close $OUT; -} - - -1; diff --git a/sbin/testEnvironment.pl b/sbin/testEnvironment.pl index b2164423a..52271d2b3 100644 --- a/sbin/testEnvironment.pl +++ b/sbin/testEnvironment.pl @@ -90,7 +90,7 @@ checkModule("HTML::Highlight", 0.20 ); checkModule("HTML::TagFilter", 0.07 ); checkModule("HTML::Template", 2.9 ); checkModule("HTML::Template::Expr", 0.05, 2 ); -checkModule("XML::RSSLite", 0.11 ); +checkModule("XML::FeedPP", 0.36 ); checkModule("JSON", 2.04 ); checkModule("Config::JSON", "1.1.2" ); checkModule("Text::CSV_XS", "0.52" ); diff --git a/t/Asset/Wobject/SyndicatedContent.t b/t/Asset/Wobject/SyndicatedContent.t index 1f147aa34..28c057ca2 100644 --- a/t/Asset/Wobject/SyndicatedContent.t +++ b/t/Asset/Wobject/SyndicatedContent.t @@ -20,8 +20,9 @@ use Data::Dumper; use WebGUI::Test; use WebGUI::Session; -use Test::More tests => 20; # increment this value for each test you create +use Test::More tests => 19; # increment this value for each test you create use WebGUI::Asset::Wobject::SyndicatedContent; +use XML::FeedPP; my $session = WebGUI::Test->session; my %var; @@ -49,9 +50,7 @@ isa_ok($syndicated_content, 'WebGUI::Asset::Wobject::SyndicatedContent'); my $newSyndicatedContentSettings = { cacheTimeout => 124, templateId => "PBtmpl0000000000000065", - #rssUrl => "http://morningmonologue.wordpress.com/feed/", # broken - #rssUrl => "http://motivationalmuse.wordpress.com/feed/", #working feed - rssUrl => 'https://svn.webgui.org/svnweb/plainblack/rss/WebGUI/', + rssUrl => 'http://svn.webgui.org/svnweb/plainblack/rss/WebGUI/', }; # update the new values for this instance @@ -62,21 +61,9 @@ foreach my $newSetting (keys %{$newSyndicatedContentSettings}) { is ($syndicated_content->get($newSetting), $newSyndicatedContentSettings->{$newSetting}, "updated $newSetting is ".$newSyndicatedContentSettings->{$newSetting}); } -# Can we get the rss url? -ok($syndicated_content->getRssUrl, 'getRSSUrl returns something.'); - -# test getContentLastModified -ok($syndicated_content->getContentLastModified, 'getContentLastModified returns something.'); - -# Test max headlines parsed from feed -my $max_headlines = $syndicated_content->_getMaxHeadlines; -ok($syndicated_content->_getMaxHeadlines, "Max Headlines returned a value [$max_headlines]"); - -# Limit the headlines so the test will complete in a reasonable amount of time. -# default is 100K titles, which is way too much for a test -$syndicated_content->{maxHeadlines} = "3"; -my @validated_urls = $syndicated_content->_getValidatedUrls; -ok($syndicated_content->_getValidatedUrls, "Validated Urls returned a value [@validated_urls]"); +my $feed = $syndicated_content->generateFeed; +isa_ok($feed, 'XML::FeedPP', 'Got an XML::FeedPP object'); +isnt($feed->title,'', 'the feed has data'); # Lets make sure the view method returns something. is ($syndicated_content->{_viewTemplate}, undef, 'internal template cache unset until prepareView is called'); @@ -85,33 +72,19 @@ $syndicated_content->prepareView; isnt ($syndicated_content->{_viewTemplate}, undef, 'internal template cache set by prepare view'); isa_ok ($syndicated_content->{_viewTemplate}, 'WebGUI::Asset::Template', 'internal template cache'); -my $output = $syndicated_content->view('2.0'); -isnt ($output, "", 'Default view method returns something for RSS 2.0 format'); +ok($syndicated_content->view(), 'it generates some output'); -my $output = $syndicated_content->view('1.0'); -isnt ($output, "", 'Default view method returns something for RSS 1.0 format'); +my $output = $syndicated_content->www_viewRss; +my $feed = XML::FeedPP->new($output); +cmp_ok($feed->get_item, ">", 0, 'RSS has items'); -# Not really sure what this does... -#my $hasTerms = $syndicated_content->getValue('hasTerms'); -#ok($hasTerms, "hasTerms contains a value [$hasTerms]"); +my $output = $syndicated_content->www_viewRdf; +my $feed = XML::FeedPP->new($output); +cmp_ok($feed->get_item, ">", 0, 'RDF has items'); -my $hasTermsRegex = $syndicated_content->_make_regex( $syndicated_content->getValue('hasTerms') ); - -my $rss_info = WebGUI::Asset::Wobject::SyndicatedContent::_get_rss_data($session,$newSyndicatedContentSettings->{'rssUrl'}); -ok(ref($rss_info) eq 'HASH', "Hashref returned from _get_rss_data"); -push(@rss_feeds, $rss_info); - - -my $items = []; -WebGUI::Asset::Wobject::SyndicatedContent::_create_interleaved_items($items, \@rss_feeds , $max_headlines, $hasTermsRegex); -ok($items , "Got results back from XML" ); - -my($item_loop,$rss_feeds) = $syndicated_content->_get_items(\@validated_urls, $max_headlines); -ok(ref($item_loop) eq 'ARRAY',"Arrayref of items returned from _get_items" ); -ok(ref($rss_feeds) eq 'ARRAY',"Arrayref of feeds returned from _get_items" ); - -# update var with item_loop for the upcoming template processing -$var{item_loop} = $item_loop; +my $output = $syndicated_content->www_viewAtom; +my $feed = XML::FeedPP->new($output); +cmp_ok($feed->get_item, ">", 0, 'Atom has items'); # create a new template object in preparation for rendering my $template = WebGUI::Asset::Template->new($session, $syndicated_content->get("templateId")); @@ -120,12 +93,17 @@ isa_ok($template, 'WebGUI::Asset::Template'); $syndicated_content->{_viewTemplate} = $template; -# Is a WebGUI URL created for the RSS feed? -my $url = $syndicated_content->_createRSSURLs(\%var); -ok($url,"A URL was created for RSS feed"); +# check out the template vars + +my $var = $syndicated_content->getTemplateVariables($feed); + +isnt($var->{channel_description}, '', 'got a channel description'); +isnt($var->{channel_title}, '', 'got a channel title'); +isnt($var->{channel_link}, '', 'got a channel link'); +cmp_ok(scalar(@{$var->{item_loop}}), '>', 0, 'the item loop has items'); # processTemplate, this is where we run into trouble... -my $processed_template = eval {$syndicated_content->processTemplate(\%var,undef,$template) }; +my $processed_template = eval {$syndicated_content->processTemplate($var,undef,$template) }; ok($processed_template, "A response was received from processTemplate."); END {