package WebGUI::Asset::Wobject::SyndicatedContent; #------------------------------------------------------------------- # WebGUI is Copyright 2001-2009 Plain Black Corporation. #------------------------------------------------------------------- # Please read the legal notices (docs/legal.txt) and the license # (docs/license.txt) that came with this distribution before using # this software. #------------------------------------------------------------------- # http://www.plainblack.com info@plainblack.com #------------------------------------------------------------------- use strict; use HTML::Entities; use Tie::IxHash; use WebGUI::Cache; use WebGUI::Exception; use WebGUI::HTML; use WebGUI::International; use Class::C3; use base qw(WebGUI::AssetAspect::RssFeed WebGUI::Asset::Wobject); use WebGUI::Macro; use XML::FeedPP; use XML::FeedPP::MediaRSS; =head1 NAME Package WebGUI::Asset::Wobject::SyndicatedContent =head1 DESCRIPTION Displays items and channels from RSS/Atom/RDF feeds. =head1 SYNOPSIS use WebGUI::Asset::Wobject::SyndicatedWobject; =head1 METHODS These methods are available from this class: =cut #------------------------------------------------------------------- =head2 definition ( definition ) Defines the properties of this asset. =head3 definition A hash reference passed in from a subclass definition. =cut sub definition { my $class = shift; my $session = shift; my $definition = shift; my %properties; tie %properties, 'Tie::IxHash'; my $i18n = WebGUI::International->new($session,'Asset_SyndicatedContent'); %properties = ( cacheTimeout => { tab => "display", fieldType => "interval", defaultValue => 3600, uiLevel => 8, label => $i18n->get("cache timeout"), hoverHelp => $i18n->get("cache timeout help") }, templateId =>{ tab=>"display", fieldType=>'template', defaultValue=>'PBtmpl0000000000000065', namespace=>'SyndicatedContent', label=>$i18n->get(72), hoverHelp=>$i18n->get('72 description') }, rssUrl=>{ tab=>"properties", defaultValue=>undef, fieldType=>'textarea', label=>$i18n->get(1), hoverHelp=>$i18n->get('1 description') }, processMacroInRssUrl=>{ tab=>"properties", defaultValue=>0, fieldType=>'yesNo', label=>$i18n->get('process macros in rss url'), hoverHelp=>$i18n->get('process macros in rss url description'), }, maxHeadlines=>{ tab=>"display", fieldType=>'integer', defaultValue=>10, label=>$i18n->get(3), hoverHelp=>$i18n->get('3 description') }, hasTerms=>{ tab=>"properties", fieldType=>'text', defaultValue=>'', label=>$i18n->get('hasTermsLabel'), hoverHelp=>$i18n->get('hasTermsLabel description'), maxlength=>255 }, sortItems => { tab => 'properties', fieldType => 'selectBox', options => do { tie my %o, 'Tie::IxHash', ( none => $i18n->get('no order'), feed => $i18n->get('feed order'), pubDate_asc => $i18n->get('publication date ascending'), pubDate_des => $i18n->get('publication date descending'), ); \%o; }, defaultValue => 'none', label => $i18n->get('sortItemsLabel'), hoverHelp => $i18n->get('sortItemsLabel description'), }, ); push(@{$definition}, { assetName=>$i18n->get('assetName'), uiLevel=>6, autoGenerateForms=>1, icon=>'syndicatedContent.gif', tableName=>'SyndicatedContent', className=>'WebGUI::Asset::Wobject::SyndicatedContent', properties=>\%properties }); return $class->next::method($session, $definition); } #------------------------------------------------------------------- =head2 generateFeed () Combines all feeds into a single XML::FeedPP object. =cut sub generateFeed { my $self = shift; my $limit = shift || $self->get('maxHeadlines'); my $log = $self->session->log; my $sort = $self->get('sortItems'); my @opt = (use_ixhash => 1) if $sort eq 'feed'; my $feed = XML::FeedPP::Atom->new(@opt); # build one feed out of many my $newlyCached = 0; foreach my $url (split(/\s+/, $self->get('rssUrl'))) { $log->info("Processing FEED: ".$url); $url =~ s/^feed:/http:/; if ($self->get('processMacroInRssUrl')) { WebGUI::Macro::process($self->session, \$url); } my $cache = WebGUI::Cache->new($self->session, $url, "RSS"); my $value = $cache->get; #warn "got this: $value\n"; unless ($value) { $value = $cache->setByHTTP($url, $self->get("cacheTimeout")); $newlyCached = 1; } eval { my $singleFeed = XML::FeedPP->new($value, utf8_flag => 1, -type => 'string', xml_deref => 1, @opt); $feed->merge_channel($singleFeed); $feed->merge_item($singleFeed); }; if ($@) { $log->error("Syndicated Content asset (".$self->getId.") has a bad feed URL (".$url."). Failed with ".$@); } } # build a new feed that matches the term the user is interested in if ($self->get('hasTerms') ne '') { my @terms = split /,\s*/, $self->get('hasTerms'); # get the list of terms my $termRegex = join("|", map quotemeta($_), @terms); # turn the terms into a regex string my @items = $feed->match_item(title => qr/$termRegex/msi); push @items, $feed->match_item(description => qr/$termRegex/msi); $feed->clear_item; ITEM: foreach my $item (@items) { $feed->add_item($item); } } my %seen = {}; my @items = $feed->get_item; $feed->clear_item; ITEM: foreach my $item (@items) { my $key = join "\n", $item->link, $item->pubDate, $item->description, $item->title; next ITEM if $seen{$key}++; $feed->add_item($item); } # sort them by date and remove any duplicate from the OR based term matching above if ($sort =~ /^pubDate/) { $feed->sort_item(); } if ($sort =~ /_asc$/) { my @items = $feed->get_item; $feed->clear_item; $feed->add_item($_) for (reverse @items); } # limit the feed to the maximum number of headlines (or the feed generator limit). $feed->limit_item($limit); # mark this asset as updated $self->update({}) if ($newlyCached); return $feed; } #------------------------------------------------------------------- =head2 getFeed () Override the one in the parent... =cut sub getFeed { my $self = shift; my $feed = shift; foreach my $item ($self->generateFeed( $self->get('itemsPerFeed') )->get_item) { my $set_permalink_false = 0; my $new_item = $feed->add_item( $item ); if (!$new_item->guid) { if ($new_item->link) { $new_item->guid( $new_item->link ); } else { $new_item->guid( $self->session->id->generate ); $set_permalink_false = 1; } } $new_item->guid( $new_item->guid, isPermaLink => 0 ) if $set_permalink_false; } $feed->title( $self->get('feedTitle') || $self->get('title') ); $feed->description( $self->get('feedDescription') || $self->get('synopsis') ); $feed->pubDate( $self->getContentLastModified ); $feed->copyright( $self->get('feedCopyright') ); $feed->link( $self->getUrl ); # $feed->language( $lang ); if ($self->get('feedImage')) { my $storage = WebGUI::Storage->get($self->session, $self->get('feedImage')); my @files = @{ $storage->getFiles }; if (scalar @files) { $feed->image( $storage->getUrl( $files[0] ), $self->get('feedImageDescription') || $self->getTitle, $self->get('feedImageUrl') || $self->getUrl, $self->get('feedImageDescription') || $self->getTitle, ( $storage->getSizeInPixels( $files[0] ) ) # expands to width and height ); } } return $feed; } #------------------------------------------------------------------- =head2 getTemplateVariables Returns a hash reference of template variables. =head3 feed A reference to an XML::FeedPP object. =cut sub getTemplateVariables { my ($self, $feed) = @_; my $media = XML::FeedPP::MediaRSS->new($feed); my @items = $feed->get_item; my %var; $var{channel_title} = WebGUI::HTML::filter(scalar $feed->title, 'javascript'); $var{channel_description} = WebGUI::HTML::filter(scalar($feed->description), 'javascript'); $var{channel_date} = WebGUI::HTML::filter(scalar($feed->get_pubDate_epoch), 'javascript'); $var{channel_copyright} = WebGUI::HTML::filter(scalar($feed->copyright), 'javascript'); $var{channel_link} = WebGUI::HTML::filter(scalar $feed->link, 'javascript'); my @image = $feed->image; $var{channel_image_url} = WebGUI::HTML::filter($image[0], 'javascript'); $var{channel_image_title} = WebGUI::HTML::filter($image[1], 'javascript'); $var{channel_image_link} = WebGUI::HTML::filter($image[2], 'javascript'); $var{channel_image_description} = WebGUI::HTML::filter($image[3], 'javascript'); $var{channel_image_width} = WebGUI::HTML::filter($image[4], 'javascript'); $var{channel_image_height} = WebGUI::HTML::filter($image[5], 'javascript'); foreach my $object (@items) { my %item; $item{media} = [ map { { %$_ } } $media->for_item($object) ]; $item{title} = WebGUI::HTML::filter(scalar $object->title, 'javascript'); $item{date} = WebGUI::HTML::filter(scalar $object->get_pubDate_epoch, 'javascript'); $item{category} = WebGUI::HTML::filter(scalar $object->category, 'javascript'); $item{author} = WebGUI::HTML::filter(scalar $object->author, 'javascript'); $item{guid} = WebGUI::HTML::filter(scalar $object->guid, 'javascript'); $item{link} = WebGUI::HTML::filter(scalar $object->link, 'javascript'); my $description = WebGUI::HTML::filter(scalar($object->description), 'javascript'); my $raw_description = WebGUI::HTML::filter($description, 'all'); $raw_description =~ s/^\s+//s; $item{description} = defined $description ? $description : ''; $item{descriptionFirst100words} = $raw_description; $item{descriptionFirst100words} =~ s/(((\S+)\s+){1,100}).*/$1/ms; $item{descriptionFirst75words} = $item{descriptionFirst100words}; $item{descriptionFirst75words} =~ s/(((\S+)\s+){1,75}).*/$1/ms; $item{descriptionFirst50words} = $item{descriptionFirst75words}; $item{descriptionFirst50words} =~ s/(((\S+)\s+){1,50}).*/$1/ms; $item{descriptionFirst25words} = $item{descriptionFirst50words}; $item{descriptionFirst25words} =~ s/(((\S+)\s+){1,25}).*/$1/ms; $item{descriptionFirst10words} = $item{descriptionFirst25words}; $item{descriptionFirst10words} =~ s/(((\S+)\s+){1,10}).*/$1/ms; if ($description =~ /

/) { my $html = $description; $html =~ tr/\n/ /s; my @paragraphs = map { "

".$_."

" } WebGUI::HTML::splitTag($html,3); $item{descriptionFirstParagraph} = $paragraphs[0]; $item{descriptionFirst2paragraphs} = join '', @paragraphs[0..1]; } else { $item{descriptionFirst2paragraphs} = $item{description}; $item{descriptionFirst2paragraphs} =~ s/^((.*?\n){2}).*/$1/s; $item{descriptionFirstParagraph} = $item{descriptionFirst2paragraphs}; $item{descriptionFirstParagraph} =~ s/^(.*?\n).*/$1/s; } $item{descriptionFirst4sentences} = $raw_description; $item{descriptionFirst4sentences} =~ s/^((.*?\.){1,4}).*/$1/s; $item{descriptionFirst3sentences} = $item{descriptionFirst4sentences}; $item{descriptionFirst3sentences} =~ s/^((.*?\.){1,3}).*/$1/s; $item{descriptionFirst2sentences} = $item{descriptionFirst3sentences}; $item{descriptionFirst2sentences} =~ s/^((.*?\.){1,2}).*/$1/s; $item{descriptionFirstSentence} = $item{descriptionFirst2sentences}; $item{descriptionFirstSentence} =~ s/^(.*?\.).*/$1/s; push @{$var{item_loop}}, \%item; } return \%var; } #------------------------------------------------------------------- =head2 prepareView ( ) See WebGUI::Asset::prepareView() for details. =cut sub prepareView { my $self = shift; $self->next::method; my $template = WebGUI::Asset::Template->new($self->session, $self->get("templateId")); if (!$template) { WebGUI::Error::ObjectNotFound::Template->throw( error => qq{Template not found}, templateId => $self->get("templateId"), assetId => $self->getId, ); } $template->prepare($self->getMetaDataAsTemplateVariables); $self->{_viewTemplate} = $template; } #------------------------------------------------------------------- =head2 purgeCache ( ) See WebGUI::Asset::purgeCache() for details. =cut sub purgeCache { my $self = shift; WebGUI::Cache->new($self->session,"view_".$self->getId)->delete; $self->next::method; } #------------------------------------------------------------------- =head2 view ( ) Returns the rendered output of the wobject. =cut sub view { my $self = shift; my $session = $self->session; # try the cached version my $cache = WebGUI::Cache->new($session,"view_".$self->getId); my $out = $cache->get; return $out if ($out ne "" && !$session->var->isAdminOn); #return $out if $out; # generate from scratch my $feed = $self->generateFeed; $out = $self->processTemplate($self->getTemplateVariables($feed),undef,$self->{_viewTemplate}); if (!$session->var->isAdminOn && $self->get("cacheTimeout") > 10) { $cache->set($out,$self->get("cacheTimeout")); } return $out; } #------------------------------------------------------------------- =head2 www_view ( ) See WebGUI::Asset::Wobject::www_view() for details. =cut sub www_view { my $self = shift; $self->session->http->setCacheControl($self->get("cacheTimeout")); $self->next::method(@_); } #------------------------------------------------------------------- =head2 www_viewRSS090 ( ) Deprecated. Use www_viewRss() instead. =cut sub www_viewRSS090 { my $self = shift; return $self->www_viewRss; } #------------------------------------------------------------------- =head2 www_viewRSS091 ( ) Deprecated. Use www_viewRss() instead. =cut sub www_viewRSS091 { my $self = shift; return $self->www_viewRss; } #------------------------------------------------------------------- =head2 www_viewRSS10 ( ) Deprecated. Use www_viewRdf() instead. =cut sub www_viewRSS10 { my $self = shift; return $self->www_viewRdf; } #------------------------------------------------------------------- =head2 www_viewRSS20 ( ) Deprecated. Use www_viewRss() instead. =cut sub www_viewRSS20 { my $self = shift; return $self->www_viewRss; } 1;