From ba4a5c7e4b1cc54b3b8e7548a12303bf8985a5eb Mon Sep 17 00:00:00 2001
From: Hal Roberts \r\n\r\nIndent \r\n\r\nAlphabetic? \r\n\r\n',1039908464,NULL);
delete from international where languageId=1 and namespace='WebGUI' and internationalId=606;
insert into international values (606,'WebGUI',1,'Think of pages as containers for content. For instance, if you want to write a letter to the editor of your favorite magazine you\'d get out a notepad (or open a word processor) and start filling it with your thoughts. The same is true with WebGUI. Create a page, then add your content to the page.\r\n \r\n\r\nTitle \r\n\r\n\r\nMenu Title \r\n\r\nPage URL \r\n\r\nRedirect URL \r\nNOTE: The redirects will be disabled while in admin mode in order to make it easier to edit the properties of the page.\r\n \r\n\r\n\r\nHide from navigation? \r\nNOTE: This will not hide the page from the page tree (Administrative functions... > Manage page tree.), only from navigation macros and from site maps.\r\n \r\n\r\nOpen in new window? \r\n\r\n\r\n\r\nLanguage Cache Timeout Cache Timeout (Visitors) NOTE: Page caching is only available if your administrator has installed the Cache::FileCache Perl module. Using page caching can improve site performance by as much as 1000%. \r\n\r\n\r\nTemplate \r\n\r\nSynopsis \r\n\r\nMeta Tags \r\n\r\nAdvanced Users: If you have other things (like JavaScript) you usually put in the area of your pages, you may put them here as well.\r\n \r\n\r\nUse default meta tags? \r\n\r\n\r\nStyle \r\n\r\nIf you select \"Yes\" below the style pull-down menu, all of the pages below this page will take on the style you\'ve chosen for this page.\r\n \r\n\r\nStart Date \r\n\r\nEnd Date \r\n\r\n\r\nOwner \r\nNOTE: The owner can only be changed by an administrator.\r\n \r\n\r\n\r\nWho can view? \r\n\r\nWho can edit? \r\n\r\nYou can optionally recursively give these privileges to all pages under this page.\r\n \r\n\r\nWhat next?
\r\n\r\nTemplate
\r\nChoose a layout for this site map.\r\n\r\n\r\nStart With
\r\nSelect the page that this site map should start from.\r\n
\r\n\r\nDepth To Traverse
\r\nHow many levels deep of navigation should the Site Map show? If 0 (zero) is specified, it will show as many levels as there are.\r\n
\r\nHow many characters should indent each level?\r\n
\r\nIf this setting is true, site map entries are sorted alphabetically. If this setting is false, site map entries are sorted by the page sequence order (editable via the up and down arrows in the page toolbar).\r\n
\r\nThe title of the page is what your users will use to navigate through the site. Titles should be descriptive, but not very long.\r\n
\r\nA shorter or altered title to appear in navigation. If left blank this will default to Title.\r\n
\r\nWhen you create a page a URL for the page is generated based on the page title. If you are unhappy with the URL that was chosen, you can change it here.\r\n
\r\nWhen this page is visited, the user will be redirected to the URL specified here. \r\n
\r\nSelect yes to hide this page from the navigation menus and site maps.\r\n
\r\nSelect yes to open this page in a new window. This is often used in conjunction with the Redirect URL parameter.\r\n
\r\nChoose the default language for this page. All WebGUI generated messages will appear in that language and the character set will be changed to the character set for that language.\r\n
The amount of time this page should remain cached for registered users. \r\n\r\n
The amount of time this page should remain cached for visitors. \r\n\r\n
\r\nBy default, WebGUI has one big content area to place wobjects. However, by specifying a template other than the default you can sub-divide the content area into several sections.\r\n
\r\nA short description of a page. It is used to populate default descriptive meta tags as well as to provide descriptions on Site Maps.\r\n
\r\nMeta tags are used by some search engines to associate key words to a particular page. There is a great site called Meta Tag Builder that will help you build meta tags if you\'ve never done it before.\r\n
\r\nIf you don\'t wish to specify meta tags yourself, WebGUI can generate meta tags based on the page title and your company\'s name. Check this box to enable the WebGUI-generated meta tags.\r\n
\r\nBy default, when you create a page, it inherits a few traits from its parent. One of those traits is style. Choose from the list of styles if you would like to change the appearance of this page. See Add Style for more details.\r\n
\r\nThe date when users may begin viewing this page. Note that before this date only content managers with the rights to edit this page will see it.\r\n
\r\nThe date when users will stop viewing this page. Note that after this date only content managers with the rights to edit this page will see it.\r\n
\r\nThe owner of a page is usually the person who created the page. This user always has full edit and viewing rights on the page.\r\n
\r\nChoose which group can view this page. If you want both visitors and registered users to be able to view the page then you should choose the \"Everybody\" group.\r\n
\r\nChoose the group that can edit this page. The group assigned editing rights can also always view the page.\r\n
\r\nIf you leave this on the default setting you\'ll be redirected to the new page after creating it.\r\n
\r\n\r\nURL to RSS file
\r\nProvide the exact URL (starting with http://) to the syndicated content\'s RDF or RSS file. The syndicated content will be downloaded from this URL hourly.\r\n
\r\nYou can find syndicated content at the following locations:\r\n
\r\n\r\nTo create an aggregate RSS feed, include a list of space separated urls instead of a single url. For an aggregate feed, the system will display an equal number of headlines from each source, sorted by the date the system first received the story.
\r\n\r\nTemplate
\r\nSelect a template for this content.\r\n
Maximum Headlines
\r\nEnter the maximum number of headlines that should be displayed. For an aggregate feed, the system will display an equal number of headlines from each source, even if doing so requires displaying more than the requested maximum number of headlines. Set to zero to allow any number of headlines.\r\n
',1047855741,NULL); + diff --git a/lib/WebGUI/Wobject/SyndicatedContent.pm b/lib/WebGUI/Wobject/SyndicatedContent.pm index 26d0afc9b..333982eff 100644 --- a/lib/WebGUI/Wobject/SyndicatedContent.pm +++ b/lib/WebGUI/Wobject/SyndicatedContent.pm @@ -12,10 +12,12 @@ package WebGUI::Wobject::SyndicatedContent; use HTML::Entities; use strict; +use Storable; use Tie::CPHash; use WebGUI::Cache; use WebGUI::DateTime; use WebGUI::HTMLForm; +use WebGUI::HTML; use WebGUI::Icon; use WebGUI::International; use WebGUI::Privilege; @@ -23,6 +25,7 @@ use WebGUI::Session; use WebGUI::SQL; use WebGUI::Wobject; use XML::RSSLite; +use LWP::UserAgent; our @ISA = qw(WebGUI::Wobject); @@ -39,7 +42,8 @@ sub new { my $self = WebGUI::Wobject->new( -properties=>$property, -extendedProperties=>{ - rssUrl=>{} + rssUrl=>{}, + maxHeadlines=>{}, }, -useTemplate=>1 ); @@ -60,45 +64,278 @@ sub www_edit { -label=>WebGUI::International::get(1,$_[0]->get("namespace")), -value=>$_[0]->getValue("rssUrl") ); + my $layout = WebGUI::HTMLForm->new; + $layout->integer( + -name=>"maxHeadlines", + -label=>WebGUI::International::get(3,$_[0]->get("namespace")), + -value=>$_[0]->getValue("maxHeadlines") + ); return $_[0]->SUPER::www_edit( -properties=>$properties->printRowsOnly, + -layout=>$layout->printRowsOnly, -headingId=>4, -helpId=>1 ); } +# strip all html tags from the given data structure. This is important to +# prevent cross site scripting attacks +my $_stripped_html = {}; +sub _strip_html { + #my ($data) = @_; + + if (ref($_[0]) eq 'HASH') { + keys(%{$_[0]}); + while (my ($name, $val) = each (%{$_[0]})) { + $_[0]->{$name} = _strip_html($val); + } + } elsif (ref($_[0]) eq 'ARRAY') { + for (my $i = 0; $i < @{$_[0]}; $i++) { + $_[0]->[$i] = _strip_html($_[0]->[$i]); + } + } else { + if ($_[0]) { + $_[0] =~ s/\<//g; + $_[0] = WebGUI::HTML::filter($_[0], 'all'); + } + } + + return $_[0]; +} + +# horrible kludge to find the channel or item record +# in the varying kinds of rss structures returned by RSSLite +sub _find_record { + my ($data, $regex) = @_; + + if (ref($data) eq 'HASH') { + # reset the hash before calling each() + keys(%{$data}); + while (my ($name, $val) = each(%{$data})) { + if ($name =~ $_[1]) { + if ((((ref($val) eq 'HASH') && + ($val->{link} || $val->{title} || + $val->{description})) || + ((ref($val) eq 'ARRAY') && @{$val} && + (ref($val->[0]) eq 'HASH') && + ($val->[0]->{link} || + $val->[0]->{title} || + $val->[0]->{description})))) { + return $val; + } + } + if (my $record = _find_record($val, $regex)) { + return $record; + } + } + } + + return undef; +} + +# Copy the guid field to the link field if the guid looks like a link. +# This is a kludge that gets around the fact that some folks use the link +# field as the link to the story while others use it as the link +# to the story about which the story is written. The webuig templates seem +# to assume the former, so we should use the guid instead of the link, b/c +# the guid, if it is a link, always means the former. +# Also copy the first few words of the description into the title if +# there is no title +sub _normalize_items { + #my ($items) = @_; + + # max number of words to take from description to fill in an empty + # title + my $max_words = 10; + + for my $item (@{$_[0]}) { + if ($item->{guid} && ($item->{guid} =~ /^http:\/\//i)) { + $item->{link} = $item->{guid}; + } + if (!$item->{title}) { + my @description_words = split(/\s/, $item->{description}); + if (@description_words <= $max_words) { + $item->{title} = $item->{description}; + } else { + $item->{title} = join(" ", @description_words[0..$max_words-1]) . + " ..."; + } + } + + # IE doesn't recognize ' + $item->{title} =~ s/'/\'/; + $item->{description} =~ s/'/\'/; + } +} + +sub _get_rss_data { + my ($url) = @_; + + my $cache = WebGUI::Cache->new("url:" . $url, "RSS"); + my $rss_serial = $cache->get; + my $rss = {}; + if ($rss_serial) { + $rss = Storable::thaw($rss_serial); + } else { + my $ua = LWP::UserAgent->new(timeout => 5); + my $response = $ua->get($url); + if (!$response->is_success()) { + warn("Error retrieving url '$url': " . + $response->status_line()); + return undef; + } + my $xml = $response->content(); + + # there is no encode_entities_numeric that I can find, so I am + # commenting this out. -hal + # $xml =~ s#(