package WebGUI::Wobject::SyndicatedContent; #------------------------------------------------------------------- # WebGUI is Copyright 2001-2004 Plain Black LLC. #------------------------------------------------------------------- # Please read the legal notices (docs/legal.txt) and the license # (docs/license.txt) that came with this distribution before using # this software. #------------------------------------------------------------------- # http://www.plainblack.com info@plainblack.com #------------------------------------------------------------------- use HTML::Entities; use strict; use Storable; use Tie::CPHash; use WebGUI::Cache; use WebGUI::DateTime; use WebGUI::HTMLForm; use WebGUI::HTML; use WebGUI::Icon; use WebGUI::International; use WebGUI::Privilege; use WebGUI::Session; use WebGUI::SQL; use WebGUI::Wobject; use XML::RSSLite; use LWP::UserAgent; our @ISA = qw(WebGUI::Wobject); #------------------------------------------------------------------- sub name { return WebGUI::International::get(2,$_[0]->get("namespace")); } #------------------------------------------------------------------- sub new { my $class = shift; my $property = shift; my $self = WebGUI::Wobject->new( -properties=>$property, -extendedProperties=>{ rssUrl=>{}, maxHeadlines=>{}, }, -useTemplate=>1 ); bless $self, $class; } #------------------------------------------------------------------- sub uiLevel { return 6; } #------------------------------------------------------------------- sub www_edit { my $properties = WebGUI::HTMLForm->new; $properties->url( -name=>"rssUrl", -label=>WebGUI::International::get(1,$_[0]->get("namespace")), -value=>$_[0]->getValue("rssUrl") ); my $layout = WebGUI::HTMLForm->new; $layout->integer( -name=>"maxHeadlines", -label=>WebGUI::International::get(3,$_[0]->get("namespace")), -value=>$_[0]->getValue("maxHeadlines") ); return $_[0]->SUPER::www_edit( -properties=>$properties->printRowsOnly, -layout=>$layout->printRowsOnly, -headingId=>4, -helpId=>1 ); } # strip all html tags from the given data structure. This is important to # prevent cross site scripting attacks my $_stripped_html = {}; sub _strip_html { #my ($data) = @_; if (ref($_[0]) eq 'HASH') { keys(%{$_[0]}); while (my ($name, $val) = each (%{$_[0]})) { $_[0]->{$name} = _strip_html($val); } } elsif (ref($_[0]) eq 'ARRAY') { for (my $i = 0; $i < @{$_[0]}; $i++) { $_[0]->[$i] = _strip_html($_[0]->[$i]); } } else { if ($_[0]) { $_[0] =~ s/\<//g; $_[0] = WebGUI::HTML::filter($_[0], 'all'); } } return $_[0]; } # horrible kludge to find the channel or item record # in the varying kinds of rss structures returned by RSSLite sub _find_record { my ($data, $regex) = @_; if (ref($data) eq 'HASH') { # reset the hash before calling each() keys(%{$data}); while (my ($name, $val) = each(%{$data})) { if ($name =~ $_[1]) { if ((((ref($val) eq 'HASH') && ($val->{link} || $val->{title} || $val->{description})) || ((ref($val) eq 'ARRAY') && @{$val} && (ref($val->[0]) eq 'HASH') && ($val->[0]->{link} || $val->[0]->{title} || $val->[0]->{description})))) { return $val; } } if (my $record = _find_record($val, $regex)) { return $record; } } } return undef; } # Copy the guid field to the link field if the guid looks like a link. # This is a kludge that gets around the fact that some folks use the link # field as the link to the story while others use it as the link # to the story about which the story is written. The webuig templates seem # to assume the former, so we should use the guid instead of the link, b/c # the guid, if it is a link, always means the former. # Also copy the first few words of the description into the title if # there is no title sub _normalize_items { #my ($items) = @_; # max number of words to take from description to fill in an empty # title my $max_words = 10; for my $item (@{$_[0]}) { if ($item->{guid} && ($item->{guid} =~ /^http:\/\//i)) { $item->{link} = $item->{guid}; } if (!$item->{title}) { my @description_words = split(/\s/, $item->{description}); if (@description_words <= $max_words) { $item->{title} = $item->{description}; } else { $item->{title} = join(" ", @description_words[0..$max_words-1]) . " ..."; } } # IE doesn't recognize ' $item->{title} =~ s/'/\'/; $item->{description} =~ s/'/\'/; } } sub _get_rss_data { my ($url) = @_; my $cache = WebGUI::Cache->new("url:" . $url, "RSS"); my $rss_serial = $cache->get; my $rss = {}; if ($rss_serial) { $rss = Storable::thaw($rss_serial); } else { my $ua = LWP::UserAgent->new(timeout => 5); my $response = $ua->get($url); if (!$response->is_success()) { warn("Error retrieving url '$url': " . $response->status_line()); return undef; } my $xml = $response->content(); # there is no encode_entities_numeric that I can find, so I am # commenting this out. -hal # $xml =~ s#(