From f1aef291cf3364b9e0145255c9c04101cf57ca8b Mon Sep 17 00:00:00 2001 From: Doug Bell Date: Tue, 7 Nov 2006 14:37:16 +0000 Subject: [PATCH] Partial re-write of HttpProxy's main method. --- lib/WebGUI/Asset/Wobject/HttpProxy.pm | 254 ++++++++++++++------------ 1 file changed, 140 insertions(+), 114 deletions(-) diff --git a/lib/WebGUI/Asset/Wobject/HttpProxy.pm b/lib/WebGUI/Asset/Wobject/HttpProxy.pm index 6b1e095ab..7eb1f9b0a 100644 --- a/lib/WebGUI/Asset/Wobject/HttpProxy.pm +++ b/lib/WebGUI/Asset/Wobject/HttpProxy.pm @@ -253,54 +253,78 @@ sub purgeCache { #------------------------------------------------------------------- sub view { my $self = shift; + my %var; + my %formdata; + my $redirect = 0; + my $response; + my $header; + my $proxiedUrl; + + + ### Set up a cookie jar my $cookiebox = $self->session->url->escape($self->session->var->get("sessionId")); - my $requestMethod = $self->session->env->get("REQUEST_METHOD"); $cookiebox =~ s/[^A-Za-z0-9\-\.\_]//g; #removes all funky characters $cookiebox .= '.cookie'; my $jar = HTTP::Cookies->new(File => $self->getCookieJar->getPath($cookiebox), AutoSave => 1, Ignore_Discard => 1); - my (%var, %formdata, $redirect, $response, $header, $userAgent, $proxiedUrl, $request); - if($self->session->form->param("func")!~/editSave/i) { + + ### Find the URL we're proxying + if ($self->session->form->param("func")!~/editSave/i) { # Ignore ?func=editSave $proxiedUrl = $self->session->form->process("FormAction") || $self->session->form->process("proxiedUrl") || $self->get("proxiedUrl") ; } else { $proxiedUrl = $self->get("proxiedUrl"); - $requestMethod = "GET"; } - $redirect=0; + return $self->processTemplate({},$self->get("templateId")) + unless ($proxiedUrl ne ""); - return $self->processTemplate({},$self->get("templateId")) unless ($proxiedUrl ne ""); + my $requestMethod = $self->session->env->get("REQUEST_METHOD") || "GET"; - my $cachedContent = WebGUI::Cache->new($self->session,$proxiedUrl,"URL"); - my $cachedHeader = WebGUI::Cache->new($self->session,$proxiedUrl,"HEADER"); - $var{header} = $cachedHeader->get; - $var{content} = $cachedContent->get; - unless ($var{content} && $requestMethod=~/GET/i) { - $redirect=0; - until($redirect == 5) { # We follow max 5 redirects to prevent bouncing/flapping - $userAgent = new LWP::UserAgent; - $userAgent->agent($self->session->env->get("HTTP_USER_AGENT")); - $userAgent->timeout($self->get("timeout")); - $userAgent->env_proxy; - - $proxiedUrl = URI->new($proxiedUrl); - - #my $allowed_url = URI->new($self->get('proxiedUrl'))->abs;; - - #if ($self->get("followExternal")==0 && $proxiedUrl !~ /\Q$allowed_url/i) { - if ($self->get("followExternal")==0 && - (URI->new($self->get('proxiedUrl'))->host) ne (URI->new($proxiedUrl)->host) ) { - $var{header} = "text/html"; - return "

You are not allowed to leave ".$self->get("proxiedUrl")."

"; - } + ### Do we have cached content to get? + my $cacheContent = WebGUI::Cache->new($self->session,$proxiedUrl,"URL"); + my $cacheHeader = WebGUI::Cache->new($self->session,$proxiedUrl,"HEADER"); + if ($requestMethod =~ /^GET$/i) + { + $var{header} = $cacheHeader->get; + $var{content} = $cacheContent->get; + } - $header = new HTTP::Headers; - $header->referer($self->get("proxiedUrl")); # To get around referrer blocking + + # Unless we have cached content + unless ($var{content}) { - if($requestMethod=~/GET/i || $redirect != 0) { - # request_method is also GET after a redirection. Just to make sure we're - # not posting the same data over and over again. - if($redirect == 0) { + # Get new content + for my $redirect (0..5) { # We follow max 5 redirects to prevent bouncing/flapping + + my $userAgent = new LWP::UserAgent; + $userAgent->agent($self->session->env->get("HTTP_USER_AGENT")); + $userAgent->timeout($self->get("timeout")); + $userAgent->env_proxy; + + + $proxiedUrl = URI->new($proxiedUrl); + + + # Set request method to GET after a redirect, so we're + # not posting the same data over and over + $requestMethod = "GET" if $redirect > 0; + + + ## Make sure the user isn't leaving where we've allowed + if ($self->get("followExternal")==0 + && (URI->new($self->get('proxiedUrl'))->host) ne (URI->new($proxiedUrl)->host) ) { + $var{header} = "text/html"; + $var{content} = "

You are not allowed to leave ".$self->get("proxiedUrl")."

"; + last; + } + + + $header = new HTTP::Headers; + $header->referer($self->get("proxiedUrl")); # To get around referrer blocking + + + my $request; # Create the request + if($requestMethod=~/GET/i) { my $params = $self->session->form->paramsHashRef(); for my $key (keys %{$params}) { next unless ($key =~ s/^HttpProxy_//); # Skip non-proxied params @@ -311,98 +335,100 @@ sub view { $proxiedUrl = $self->appendToUrl($proxiedUrl,"$key=$value"); } } else { - $proxiedUrl = $self->appendToUrl($proxiedUrl,"$key=".$self->session->form->process('HttpProxy_'.$key)); + $proxiedUrl = $self->appendToUrl($proxiedUrl,"$key=".$params->{$key}); } } - } - $request = HTTP::Request->new(GET => $proxiedUrl, $header) || return "wrong url"; # Create GET request - } else { # It's a POST - - my $contentType = 'application/x-www-form-urlencoded'; # default Content Type header - - # Create a %formdata hash to pass key/value pairs to the POST request - foreach my $input_name ($self->session->request->param) { - $input_name =~ s/^HttpProxy_// or next; + ### DEBUG + $self->session->errorHandler->warn("URL: $proxiedUrl"); - my (@upload) = grep{defined} $self->session->request->upload('HttpProxy_'.$input_name); - if (@upload) { # Found uploaded file - my $upload = $upload[0]; - $formdata{$input_name}=[$upload->tempname, $self->session->form->process('HttpProxy_'.$input_name)]; - $contentType = 'form-data'; # Different Content Type header for file upload + $request = HTTP::Request->new(GET => $proxiedUrl, $header) || return "wrong url"; # Create GET request + } else { # It's a POST + + my $contentType = 'application/x-www-form-urlencoded'; # default Content Type header + + # Create a %formdata hash to pass key/value pairs to the POST request + foreach my $input_name ($self->session->request->param) { + $input_name =~ s/^HttpProxy_// or next; + + my (@upload) = grep{defined} $self->session->request->upload('HttpProxy_'.$input_name); + if (@upload) { # Found uploaded file + my $upload = $upload[0]; + $formdata{$input_name}=[$upload->tempname, $self->session->form->process('HttpProxy_'.$input_name)]; + $contentType = 'form-data'; # Different Content Type header for file upload + } else { + $formdata{$input_name}=[($self->session->form->process('HttpProxy_'.$input_name))]; + } + } + # Create POST request + $request = HTTP::Request::Common::POST($proxiedUrl, \%formdata, Content_Type => $contentType); + } + $jar->add_cookie_header($request); + + + $response = $userAgent->simple_request($request); + + $jar->extract_cookies($response); + + if ($response->is_redirect) { # redirected by http header + $proxiedUrl = URI::URL::url($response->header("Location"))->abs($proxiedUrl);; + $redirect++; + } elsif ($response->content_type eq "text/html" + && $response->content =~ /]+refresh[^>]+content[^>]*url=([^\s'"<>]+)/gis) { + # redirection through meta refresh + my $refreshUrl = $1; + if($refreshUrl=~ /^http/gis) { #Refresh value is absolute + $proxiedUrl=$refreshUrl; + } else { # Refresh value is relative + $proxiedUrl =~ s/[^\/\\]*$//; #chop off everything after / in $proxiedURl + $proxiedUrl .= URI::URL::url($refreshUrl)->rel($proxiedUrl); # add relative path + } + $redirect++; + } else { + $redirect = 5; #No redirection found. Leave loop. + } + $redirect=5 if (not $self->get("followRedirect")); # No redirection. Overruled by setting + } + + if($response->is_success) { + $var{content} = $response->content; + $var{header} = $response->content_type; + if($response->content_type eq "text/html" + || ($response->content_type eq "" && $var{content}=~/getValue("searchFor"); + $var{"stop.at"} = $self->getValue("stopAt"); + if ($var{"search.for"}) { + $var{content} =~ /^(.*?)\Q$var{"search.for"}\E(.*)$/gis; + $var{"content.leading"} = $1 || $var{content}; + $var{content} = $2; + } + if ($var{"stop.at"}) { + $var{content} =~ /(.*?)\Q$var{"stop.at"}\E(.*)$/gis; + $var{content} = $1 || $var{content}; + $var{"content.trailing"} = $2; + } + my $p = WebGUI::Asset::Wobject::HttpProxy::Parse->new($self->session, $proxiedUrl, $var{content}, $self->getId,$self->get("rewriteUrls"),$self->getUrl); + $var{content} = $p->filter; # Rewrite content. (let forms/links return to us). + $p->DESTROY; + + if ($var{content} =~ /session->form->process('HttpProxy_'.$input_name))]; + $var{content} =~ s/\//isg if ($self->get("removeStyle")); + $var{content} = WebGUI::HTML::cleanSegment($var{content}); + $var{content} = WebGUI::HTML::filter($var{content}, $self->get("filterHtml")); } } - # Create POST request - $request = HTTP::Request::Common::POST($proxiedUrl, \%formdata, Content_Type => $contentType); - } - $jar->add_cookie_header($request); - - - $response = $userAgent->simple_request($request); - - $jar->extract_cookies($response); - - if ($response->is_redirect) { # redirected by http header - $proxiedUrl = URI::URL::url($response->header("Location"))->abs($proxiedUrl);; - $redirect++; - } elsif ($response->content_type eq "text/html" - && $response->content =~ /]+refresh[^>]+content[^>]*url=([^\s'"<>]+)/gis) { - # redirection through meta refresh - my $refreshUrl = $1; - if($refreshUrl=~ /^http/gis) { #Refresh value is absolute - $proxiedUrl=$refreshUrl; - } else { # Refresh value is relative - $proxiedUrl =~ s/[^\/\\]*$//; #chop off everything after / in $proxiedURl - $proxiedUrl .= URI::URL::url($refreshUrl)->rel($proxiedUrl); # add relative path - } - $redirect++; - } else { - $redirect = 5; #No redirection found. Leave loop. - } - $redirect=5 if (not $self->get("followRedirect")); # No redirection. Overruled by setting - } - - if($response->is_success) { - $var{content} = $response->content; - $var{header} = $response->content_type; - if($response->content_type eq "text/html" - || ($response->content_type eq "" && $var{content}=~/getValue("searchFor"); - $var{"stop.at"} = $self->getValue("stopAt"); - if ($var{"search.for"}) { - $var{content} =~ /^(.*?)\Q$var{"search.for"}\E(.*)$/gis; - $var{"content.leading"} = $1 || $var{content}; - $var{content} = $2; - } - if ($var{"stop.at"}) { - $var{content} =~ /(.*?)\Q$var{"stop.at"}\E(.*)$/gis; - $var{content} = $1 || $var{content}; - $var{"content.trailing"} = $2; - } - my $p = WebGUI::Asset::Wobject::HttpProxy::Parse->new($self->session, $proxiedUrl, $var{content}, $self->getId,$self->get("rewriteUrls"),$self->getUrl); - $var{content} = $p->filter; # Rewrite content. (let forms/links return to us). - $p->DESTROY; - - if ($var{content} =~ ///isg if ($self->get("removeStyle")); - $var{content} = WebGUI::HTML::cleanSegment($var{content}); - $var{content} = WebGUI::HTML::filter($var{content}, $self->get("filterHtml")); - } - } } else { # Fetching page failed... $var{header} = "text/html"; $var{content} = "Getting $proxiedUrl failed". "

GET status line: ".$response->status_line.""; } unless ($self->get("cacheTimeout") <= 10) { - $cachedContent->set($var{content},$self->get("cacheTimeout")); - $cachedHeader->set($var{header},$self->get("cacheTimeout")); + $cacheContent->set($var{content},$self->get("cacheTimeout")); + $cacheHeader->set($var{header},$self->get("cacheTimeout")); } }