From 1ea00268f68903521efec1732bfb15d51fc63928 Mon Sep 17 00:00:00 2001 From: JT Smith Date: Wed, 26 Feb 2003 03:48:46 +0000 Subject: [PATCH] Adding HTTP Proxy. --- docs/gotcha.txt | 8 + docs/upgrades/upgrade_5.1.2-5.2.0.sql | 35 +++- lib/WebGUI/ProxyParse.pm | 103 ++++++++++ lib/WebGUI/Wobject/HttpProxy.pm | 273 ++++++++++++++++++++++++++ 4 files changed, 417 insertions(+), 2 deletions(-) create mode 100644 lib/WebGUI/ProxyParse.pm create mode 100644 lib/WebGUI/Wobject/HttpProxy.pm diff --git a/docs/gotcha.txt b/docs/gotcha.txt index 23b2c2e3f..05e59ab1e 100644 --- a/docs/gotcha.txt +++ b/docs/gotcha.txt @@ -35,6 +35,14 @@ save you many hours of grief. background-color: #dddddd; } + * If you have downloaded and installed the HttpProxy wobject from + the contributions then you will need to delete it prior to + upgrading or the database upgrade will fail. To remove it + from the database just run the following commands from your + MySQL prompt: + + drop table HttpProxy; + delete from Wobject where namespace='HttpProxy'; 5.1.0 diff --git a/docs/upgrades/upgrade_5.1.2-5.2.0.sql b/docs/upgrades/upgrade_5.1.2-5.2.0.sql index bb834898f..4438a8974 100644 --- a/docs/upgrades/upgrade_5.1.2-5.2.0.sql +++ b/docs/upgrades/upgrade_5.1.2-5.2.0.sql @@ -25,8 +25,39 @@ update style set styleSheet='','\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n
^AdminBar;\r\n\"Print!\"\r\n
\r\n\r\n\r\n \r\n\r\n\r\n \r\n \r\n\r\n\r\n \r\n\r\n
\r\n
User:\r\n ^a(^@;);
Location: ^C;
\r\n
\r\n\r\n\r\n \r\n \r\n\r\n\r\n \r\n\r\n\r\n \r\n\r\n
\r\n \r\n \r\n \r\n \r\n
^FlexMenu;
\r\n \r\n
\r\n\r\n\r\n^-;\r\n\r\n\r\n\r\n

\r\n

\r\n
^D(\"%c %D %y\");Powered by WebGUI
\r\n\r\n\r\n' where styleId=-6; update style set styleSheet='\r\n','^AdminBar;\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n
\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n
\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n
\r\n \r\n \r\n
Home\r\n
\r\n
\r\n \r\n \r\n
Print!\r\n
\r\n
\r\n \r\n \r\n
My Account\r\n
\r\n
\r\n
\r\n\r\n\r\n \r\n \r\n \r\n \r\n
^C;^?;
\r\n\r\n\r\n \r\n \r\n \r\n\r\n \r\n \r\n
\r\n \r\n
\r\n

Site Navigation

\r\n
\r\n \r\n \r\n \r\n \r\n
^T(0);
^L;
\r\n
\r\n\r\n \r\n\r\n \r\n
\r\n \r\n\r\n^-;\r\n\r\n\r\n
\r\n
\r\n\r\n\r\n


This Design ©2002 OpenServe
\r\n\r\n
\r\n\r\n' where styleId=-7 update style set styleSheet='','^AdminBar;\r\n\r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n \r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n
^C;User: ^a(^@;);
\r\n\r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n \r\n\r\n
\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n
\r\n
\r\n ^H;\r\n
\r\n ^FlexMenu;\r\n ^a;\r\n
\r\n
\r\n
\r\n
\r\n
^-;
\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n \r\n\r\n
\r\n\r\n\r\n' where styleId=-8; - - +CREATE TABLE HttpProxy ( + wobjectId int(11) NOT NULL default '0', + ProxiedUrl varchar(255) default NULL, + timeout int(11) default NULL, + removeStyle int(11) default NULL, + filterHtml varchar(30) default NULL, + followExternal int(11) default NULL, + followRedirect int(11) default NULL, + cookiebox varchar(30) default NULL, + cacheHttp int(11) default '0', + useCache int(11) default '0', + debug int(11) default '0', + PRIMARY KEY (wobjectId) +) TYPE=MyISAM; +DELETE FROM international WHERE namespace='HttpProxy'; +INSERT INTO international VALUES (3,'HttpProxy',1,'HTTP Proxy',1031510000); +INSERT INTO international VALUES (2,'HttpProxy',1,'Edit HTTP Proxy',1031510000); +INSERT INTO international VALUES (1,'HttpProxy',1,'URL',1031510000); +INSERT INTO international VALUES (4,'HttpProxy',1,'Timeout (sec)',1031510000); +INSERT INTO international VALUES (7,'HttpProxy',1,'Filter Content',1031510000); +INSERT INTO international VALUES (5,'HttpProxy',1,'Allow proxying of other domains',1031510000); +INSERT INTO international VALUES (6,'HttpProxy',1,'Remove Style from content',1031510000); +INSERT INTO international VALUES (8,'HttpProxy',1,'Follow redirects',1031510000); +INSERT INTO international VALUES (9,'HttpProxy',1,'Directory to save cookies',1031510000); +INSERT INTO international VALUES (9,'HttpProxy',2,'Verzeichnis zum Speichern der Cookies',1040395667); +INSERT INTO international VALUES (8,'HttpProxy',2,'Redirects folgen',1040395646); +INSERT INTO international VALUES (7,'HttpProxy',2,'Inhalt filtern',1040395634); +INSERT INTO international VALUES (6,'HttpProxy',2,'Style vom Inhalt entfernen',1040395623); +INSERT INTO international VALUES (5,'HttpProxy',2,'Externen Links folgen',1040395599); +INSERT INTO international VALUES (4,'HttpProxy',2,'Timeout (Sekunden)',1040395576); +INSERT INTO international VALUES (3,'HttpProxy',2,'HTTP Proxy',1040395372); +INSERT INTO international VALUES (2,'HttpProxy',2,'HTTP Proxy ändern',1040395360); +INSERT INTO international VALUES (1,'HttpProxy',2,'URL',1040395344); diff --git a/lib/WebGUI/ProxyParse.pm b/lib/WebGUI/ProxyParse.pm new file mode 100644 index 000000000..00bbb6054 --- /dev/null +++ b/lib/WebGUI/ProxyParse.pm @@ -0,0 +1,103 @@ +# Len Kranendonk - 20021212 + +package WebGUI::ProxyParse;; +require HTML::Parser; +require HTML::Entities; +require URI::URL; +use WebGUI::URL; +use vars qw(@ISA); +@ISA = qw(HTML::Parser); + +my %linkElements = # from HTML::Element.pm + ( + body => 'background', + base => 'href', + a => 'href', + img => [qw(src lowsrc usemap)], # lowsrc is a Netscape invention + form => 'action', +# input => 'src', + 'link' => 'href', # need quoting since link is a perl builtin + frame => 'src', + applet => 'codebase', + area => 'href', + ); + +my %tag_attr; +for my $tag (keys %linkElements) { + my $tagval = $linkElements{$tag}; + for my $attr (ref $tagval ? @$tagval : $tagval) { + $tag_attr{"$tag $attr"}++; + } +} + +sub new { + my $pack = shift; + my $self = $pack->SUPER::new(); + $self->{Url} = shift; + $self->{Content} = shift; + $self->{wid} = shift; + $self->{Filtered} =""; + $self->{FormAction} = ""; + $self->{FormActionIsDefined} = 0; + $self; +} + +sub filter { + my $self=shift; + $self->parse($self->{Content}); # Make paths absolute and let them return to us + $self->eof; + return $self->{Filtered}; +} + +## some items stolen from HTML::Filter +sub output { $_[0]->{Filtered} .= $_[1]; } +sub declaration { $_[0]->output("") } +sub comment { $_[0]->output("") } +sub text { $_[0]->output($_[1]) } +sub end { $_[0]->output("") } + +sub start { + my $self = shift; + my ($tag, $attr, $attrseq, $origtext) = @_; + $self->output("<$tag"); + for (keys %$attr) { + $self->output(" $_=\""); + my $val = $attr->{$_}; + if ((lc($tag) eq "input" || lc($tag) eq "textarea" || lc($tag) eq "select") + && (lc($_) eq "name" || lc($_) eq "submit")) { # Rewrite input type names + $val = 'HttpProxy_' . $val; + } + if (lc($tag) eq "form" && not $self->{FormActionIsDefined}) { + $self->{FormAction} = $self->{Url}; + } + if ($tag_attr{"$tag $_"}) { # needs rewrite + if ($val =~ /^\?/) { # link that starts with ? i.e. + my @urlBase = split(/\?/, $self->{Url}); + $val = URI::URL::url($urlBase[0] . $val); + } else { + $val = URI::URL::url($val)->abs($self->{Url},1); # make absolute + } + if ($val->scheme eq "http") { + if (lc($tag) ne "img") { # no rewrite for images + if (lc($tag) eq "form" && lc($_) eq "action") { # Found FORM ACTION + $self->{FormActionIsDefined}=1; + $self->{FormAction} = $val; # set FormAction to include hidden field later + $val = WebGUI::URL::page; # Form Action returns to us + } else { + $val = WebGUI::URL::page('proxiedUrl='.WebGUI::URL::escape($val). + '&wid='.$self->{wid}); # return to us + } + } + } + } + $self->output($val.'"'); + } + $self->output(">"); + if ($self->{FormAction} ne "") { + $self->output(''); + $self->output(''); + $self->{FormAction} = ''; + $self->{FormActionIsDefined}=0; + } +} +1; diff --git a/lib/WebGUI/Wobject/HttpProxy.pm b/lib/WebGUI/Wobject/HttpProxy.pm new file mode 100644 index 000000000..4279276c2 --- /dev/null +++ b/lib/WebGUI/Wobject/HttpProxy.pm @@ -0,0 +1,273 @@ +package WebGUI::Wobject::HttpProxy; + +#------------------------------------------------------------------- +# WebGUI is Copyright 2001-2002 Plain Black LLC. +#------------------------------------------------------------------- +# Please read the legal notices (docs/legal.txt) and the license +# (docs/license.txt) that came with this distribution before using +# this software. +#------------------------------------------------------------------- +# http://www.plainblack.com info@plainblack.com +#------------------------------------------------------------------- +# Len Kranendonk - 20021212 + +use strict; +use URI; +use LWP; +use HTTP::Cookies; +use HTTP::Request::Common; +use HTML::Entities; +use WebGUI::HTMLForm; +use WebGUI::Icon; +use WebGUI::International; +use WebGUI::Privilege; +use WebGUI::Session; +use WebGUI::Wobject; +use WebGUI::ProxyParse; + +our @ISA = qw(WebGUI::Wobject); +our $namespace = "HttpProxy"; +our $name = WebGUI::International::get(3,$namespace); + + +#------------------------------------------------------------------- +sub duplicate { + my ($w); + $w = $_[0]->SUPER::duplicate($_[1]); + $w = WebGUI::Wobject::WobjectProxy->new({wobjectId=>$w,namespace=>$namespace}); + $w->set({ + proxiedUrl=>$_[0]->get("proxiedUrl"), + timeout=>$_[0]->get("timeout"), + removeStyle=>$_[0]->get("removeStyle"), + filterHtml=>$_[0]->get("filterHtml"), + followExternal=>$_[0]->get("followExternal"), + followRedirect=>$_[0]->get("followRedirect"), + cookiebox=>$_[0]->get("cookiebox") + }); +} + +#------------------------------------------------------------------- +sub new { + my ($self, $class, $property); + $class = shift; + $property = shift; + $self = WebGUI::Wobject->new($property); + bless $self, $class; +} + +#------------------------------------------------------------------- +sub set { + $_[0]->SUPER::set($_[1],[qw(proxiedUrl timeout removeStyle filterHtml followExternal followRedirect cookiebox)]); +} + +#------------------------------------------------------------------- +sub www_copy { + if (WebGUI::Privilege::canEditPage()) { + $_[0]->duplicate; + return ""; + } else { + return WebGUI::Privilege::insufficient(); + } +} + +#------------------------------------------------------------------- +sub www_edit { + my (%htmlFilter, $output, $f, $startDate, $endDate, $templatePosition, $proxiedUrl, %hash); + if (WebGUI::Privilege::canEditPage()) { + %hash=(5=>5,10=>10,20=>20,30=>30,60=>60); + %htmlFilter = ('none'=>WebGUI::International::get(420), 'most'=>WebGUI::International::get(421), + 'javascript'=>WebGUI::International::get(526), 'all'=>WebGUI::International::get(419)); + $output = helpIcon(1,$namespace); + $output .= '

'.WebGUI::International::get(2,$namespace).'

'; + $templatePosition = $_[0]->get("templatePosition") || '0'; + $startDate = $_[0]->get("startDate") || $session{page}{startDate}; + $endDate = $_[0]->get("endDate") || $session{page}{endDate}; + $f = WebGUI::HTMLForm->new; + $f->url("proxiedUrl", WebGUI::International::get(1,$namespace),$_[0]->get("proxiedUrl")||'http://'); + $f->yesNo( + -name=>"followExternal", + -label=>WebGUI::International::get(5,$namespace), + -value=>($_[0]->get("wobjectId") eq "new") ? 1 : $_[0]->get("followExternal"), + -uiLevel=>5 + ); + $f->yesNo( + -name=>"followRedirect", + -label=>WebGUI::International::get(8,$namespace), + -value=>$_[0]->get("followRedirect"), + -uiLevel=>5 + ); + $f->yesNo( + -name=>"removeStyle", + -label=>WebGUI::International::get(6,$namespace), + -value=>($_[0]->get("wobjectId") eq "new") ? 1 : $_[0]->get("removeStyle"), + -uiLevel=>5 + ); + + $f->select("filterHtml",\%htmlFilter,WebGUI::International::get(7,$namespace),[$_[0]->get("filterHtml")||"javascript"]); + $f->select("timeout", \%hash, WebGUI::International::get(4,$namespace),[$_[0]->get("timeout")||30]); + $f->text("cookiebox", WebGUI::International::get(9,$namespace),$_[0]->get("cookiebox")||'/tmp'); + $output .= $_[0]->SUPER::www_edit($f->printRowsOnly); + return $output; + } else { + return WebGUI::Privilege::insufficient(); + } +} + +#------------------------------------------------------------------- +sub www_editSave { + if (WebGUI::Privilege::canEditPage()) { + $_[0]->SUPER::www_editSave(); + $_[0]->set({ + proxiedUrl=>$session{form}{proxiedUrl}, + timeout=>$session{form}{timeout}, + removeStyle=>$session{form}{removeStyle}, + filterHtml=>$session{form}{filterHtml}, + followExternal=>$session{form}{followExternal}, + followRedirect=>$session{form}{followRedirect}, + cookiebox=>$session{form}{cookiebox} + }); + return ""; + } else { + return WebGUI::Privilege::insufficient(); + } +} + +#------------------------------------------------------------------- +sub www_view { + my (%formdata, @formUpload, $jar, $redirect, $cookiebox, $response, $header, + $output, $userAgent, $proxiedUrl, $request, $content); + + $output = $_[0]->displayTitle; + $output .= $_[0]->description; + $output = $_[0]->processMacros($output); + + if(not(-w $_[0]->get("cookiebox") && -r $_[0]->get("cookiebox"))) { + return "Error while opening cookie directory ".$_[0]->get("cookiebox")."

$!"; + } + + $cookiebox = $_[0]->get("cookiebox").'/'.$namespace.'_cookie_'.WebGUI::URL::escape($session{var}{sessionId}).'.jar'; + $jar = HTTP::Cookies->new(File => $cookiebox, AutoSave => 1); + + if($session{form}{wid} == $_[0]->get("wobjectId") && $session{form}{func}!~/editSave/i) { + $proxiedUrl = $session{form}{FormAction} || $session{form}{proxiedUrl} || $_[0]->get("proxiedUrl") ; + } else { + $proxiedUrl = $_[0]->get("proxiedUrl"); + $session{env}{REQUEST_METHOD}='GET'; + } + + $redirect=0; + + until($redirect == 5) { # We follow max 5 redirects to prevent bouncing/flapping + $userAgent = new LWP::UserAgent; + $userAgent->agent($session{env}{HTTP_USER_AGENT}); + $userAgent->timeout($_[0]->get("timeout")); + $userAgent->env_proxy; + + $proxiedUrl = URI->new($proxiedUrl); + + #my $allowed_url = URI->new($_[0]->get('proxiedUrl'))->abs;; + + #if ($_[0]->get("followExternal")==0 && $proxiedUrl !~ /\Q$allowed_url/i) { + if ($_[0]->get("followExternal")==0 && + (URI->new($_[0]->get('proxiedUrl'))->host) ne (URI->new($proxiedUrl)->host) ) { + return "

You are not allowed to leave ".$_[0]->get("proxiedUrl")."

"; + } + + $header = new HTTP::Headers; + + if($session{env}{REQUEST_METHOD}=~/GET/i + || $redirect != 0) { # request_method is also GET after a redirection. Just to make sure we're + # not posting the same data over and over again. + if($redirect == 0 && $session{form}{wid} == $_[0]->get("wobjectId")) { + foreach my $input_name (keys %{$session{form}}) { + next if ($input_name !~ /^HttpProxy_/); # Skip non proxied form var's + $input_name =~ s/^HttpProxy_//; + $proxiedUrl=WebGUI::URL::append($proxiedUrl,"$input_name=$session{form}{'HttpProxy_'.$input_name}"); + } + } + $request = HTTP::Request->new(GET => $proxiedUrl, $header) || return "wrong url"; # Create GET request + } else { # It's a POST + + my $contentType = 'application/x-www-form-urlencoded'; # default Content Type header + + # Create a %formdata hash to pass key/value pairs to the POST request + foreach my $input_name (keys %{$session{form}}) { + next if ($input_name !~ /^HttpProxy_/); # Skip non proxied form var's + $input_name =~ s/^HttpProxy_//; + + my $uploadFile = $session{cgi}->tmpFileName($session{form}{'HttpProxy_'.$input_name}); + + if(-r $uploadFile) { # Found uploaded file + @formUpload=($uploadFile, qq/$session{form}{'HttpProxy_'.$input_name}/); + $formdata{$input_name}=\@formUpload; + $contentType = 'form-data'; # Different Content Type header for file upload + } else { + $formdata{$input_name}=qq/$session{form}{'HttpProxy_'.$input_name}/; + } + } + # Create POST request + $request = HTTP::Request::Common::POST($proxiedUrl, \%formdata, Content_Type => $contentType); + } + $jar->add_cookie_header($request); + + + $response = $userAgent->simple_request($request); + + $jar->extract_cookies($response); + + if ($response->is_redirect) { # redirected by http header + $proxiedUrl = URI::URL::url($response->header("Location"))->abs($proxiedUrl);; + $redirect++; + } elsif ($response->content_type eq "text/html" && $response->content =~ + /]+refresh[^>]+content[^>]*url=([^\s'"<>]+)/gis) { + # redirection through meta refresh + my $refreshUrl = $1; + if($refreshUrl=~ /^http/gis) { #Refresh value is absolute + $proxiedUrl=$refreshUrl; + } else { # Refresh value is relative + $proxiedUrl =~ s/[^\/\\]*$//; #chop off everything after / in $proxiedURl + $proxiedUrl .= URI::URL::url($refreshUrl)->rel($proxiedUrl); # add relative path + } + $redirect++; + } else { + $redirect = 5; #No redirection found. Leave loop. + } + $redirect=5 if (not $_[0]->get("followRedirect")); # No redirection. Overruled by setting + } + + if($response->is_success) { + $content = $response->content; + + if($response->content_type eq "text/html" || + ($response->content_type eq "" && $content=~/new($proxiedUrl, $content, $_[0]->get("wobjectId")); + $content = $p->filter; # Rewrite content. (let forms/links return to us). + $p->DESTROY; + + if ($content =~ ///isg if ($_[0]->get("removeStyle")); + $content = WebGUI::HTML::cleanSegment($content); + $content = WebGUI::HTML::filter($content, $_[0]->get("filterHtml")); + } + } elsif ($response->content_type eq "text/plain") { + $content = '
'.HTML::Entities::encode($response->content).'
'; + } elsif ($response->content_type =~ /image\//i) { + $content = '

'; + } elsif ($response->content_type ne "") { # content_type we don't know about + $content = "

Can't proxy \"".($response->content_type)."\" content.

+ Try Fetching it directly
here."; + } else { + $content = "

The request didn't return any data.

+ Try Fetching it directly here."; + } + } else { # Fetching page failed... + $content = "Getting $proxiedUrl failed". + "

GET status line: ".$response->status_line.""; + } + return $output.$content; +} +1;