Partial re-write of HttpProxy's main method.

This commit is contained in:
Doug Bell 2006-11-07 14:37:16 +00:00
parent e68f609850
commit f1aef291cf

View file

@ -253,54 +253,78 @@ sub purgeCache {
#-------------------------------------------------------------------
sub view {
my $self = shift;
my %var;
my %formdata;
my $redirect = 0;
my $response;
my $header;
my $proxiedUrl;
### Set up a cookie jar
my $cookiebox = $self->session->url->escape($self->session->var->get("sessionId"));
my $requestMethod = $self->session->env->get("REQUEST_METHOD");
$cookiebox =~ s/[^A-Za-z0-9\-\.\_]//g; #removes all funky characters
$cookiebox .= '.cookie';
my $jar = HTTP::Cookies->new(File => $self->getCookieJar->getPath($cookiebox), AutoSave => 1, Ignore_Discard => 1);
my (%var, %formdata, $redirect, $response, $header, $userAgent, $proxiedUrl, $request);
if($self->session->form->param("func")!~/editSave/i) {
### Find the URL we're proxying
if ($self->session->form->param("func")!~/editSave/i) { # Ignore ?func=editSave
$proxiedUrl = $self->session->form->process("FormAction") || $self->session->form->process("proxiedUrl") || $self->get("proxiedUrl") ;
} else {
$proxiedUrl = $self->get("proxiedUrl");
$requestMethod = "GET";
}
$redirect=0;
return $self->processTemplate({},$self->get("templateId"))
unless ($proxiedUrl ne "");
return $self->processTemplate({},$self->get("templateId")) unless ($proxiedUrl ne "");
my $requestMethod = $self->session->env->get("REQUEST_METHOD") || "GET";
my $cachedContent = WebGUI::Cache->new($self->session,$proxiedUrl,"URL");
my $cachedHeader = WebGUI::Cache->new($self->session,$proxiedUrl,"HEADER");
$var{header} = $cachedHeader->get;
$var{content} = $cachedContent->get;
unless ($var{content} && $requestMethod=~/GET/i) {
$redirect=0;
until($redirect == 5) { # We follow max 5 redirects to prevent bouncing/flapping
$userAgent = new LWP::UserAgent;
$userAgent->agent($self->session->env->get("HTTP_USER_AGENT"));
$userAgent->timeout($self->get("timeout"));
$userAgent->env_proxy;
$proxiedUrl = URI->new($proxiedUrl);
#my $allowed_url = URI->new($self->get('proxiedUrl'))->abs;;
#if ($self->get("followExternal")==0 && $proxiedUrl !~ /\Q$allowed_url/i) {
if ($self->get("followExternal")==0 &&
(URI->new($self->get('proxiedUrl'))->host) ne (URI->new($proxiedUrl)->host) ) {
$var{header} = "text/html";
return "<h1>You are not allowed to leave ".$self->get("proxiedUrl")."</h1>";
}
### Do we have cached content to get?
my $cacheContent = WebGUI::Cache->new($self->session,$proxiedUrl,"URL");
my $cacheHeader = WebGUI::Cache->new($self->session,$proxiedUrl,"HEADER");
if ($requestMethod =~ /^GET$/i)
{
$var{header} = $cacheHeader->get;
$var{content} = $cacheContent->get;
}
$header = new HTTP::Headers;
$header->referer($self->get("proxiedUrl")); # To get around referrer blocking
# Unless we have cached content
unless ($var{content}) {
if($requestMethod=~/GET/i || $redirect != 0) {
# request_method is also GET after a redirection. Just to make sure we're
# not posting the same data over and over again.
if($redirect == 0) {
# Get new content
for my $redirect (0..5) { # We follow max 5 redirects to prevent bouncing/flapping
my $userAgent = new LWP::UserAgent;
$userAgent->agent($self->session->env->get("HTTP_USER_AGENT"));
$userAgent->timeout($self->get("timeout"));
$userAgent->env_proxy;
$proxiedUrl = URI->new($proxiedUrl);
# Set request method to GET after a redirect, so we're
# not posting the same data over and over
$requestMethod = "GET" if $redirect > 0;
## Make sure the user isn't leaving where we've allowed
if ($self->get("followExternal")==0
&& (URI->new($self->get('proxiedUrl'))->host) ne (URI->new($proxiedUrl)->host) ) {
$var{header} = "text/html";
$var{content} = "<h1>You are not allowed to leave ".$self->get("proxiedUrl")."</h1>";
last;
}
$header = new HTTP::Headers;
$header->referer($self->get("proxiedUrl")); # To get around referrer blocking
my $request; # Create the request
if($requestMethod=~/GET/i) {
my $params = $self->session->form->paramsHashRef();
for my $key (keys %{$params}) {
next unless ($key =~ s/^HttpProxy_//); # Skip non-proxied params
@ -311,98 +335,100 @@ sub view {
$proxiedUrl = $self->appendToUrl($proxiedUrl,"$key=$value");
}
} else {
$proxiedUrl = $self->appendToUrl($proxiedUrl,"$key=".$self->session->form->process('HttpProxy_'.$key));
$proxiedUrl = $self->appendToUrl($proxiedUrl,"$key=".$params->{$key});
}
}
}
$request = HTTP::Request->new(GET => $proxiedUrl, $header) || return "wrong url"; # Create GET request
} else { # It's a POST
my $contentType = 'application/x-www-form-urlencoded'; # default Content Type header
# Create a %formdata hash to pass key/value pairs to the POST request
foreach my $input_name ($self->session->request->param) {
$input_name =~ s/^HttpProxy_// or next;
### DEBUG
$self->session->errorHandler->warn("URL: $proxiedUrl");
my (@upload) = grep{defined} $self->session->request->upload('HttpProxy_'.$input_name);
if (@upload) { # Found uploaded file
my $upload = $upload[0];
$formdata{$input_name}=[$upload->tempname, $self->session->form->process('HttpProxy_'.$input_name)];
$contentType = 'form-data'; # Different Content Type header for file upload
$request = HTTP::Request->new(GET => $proxiedUrl, $header) || return "wrong url"; # Create GET request
} else { # It's a POST
my $contentType = 'application/x-www-form-urlencoded'; # default Content Type header
# Create a %formdata hash to pass key/value pairs to the POST request
foreach my $input_name ($self->session->request->param) {
$input_name =~ s/^HttpProxy_// or next;
my (@upload) = grep{defined} $self->session->request->upload('HttpProxy_'.$input_name);
if (@upload) { # Found uploaded file
my $upload = $upload[0];
$formdata{$input_name}=[$upload->tempname, $self->session->form->process('HttpProxy_'.$input_name)];
$contentType = 'form-data'; # Different Content Type header for file upload
} else {
$formdata{$input_name}=[($self->session->form->process('HttpProxy_'.$input_name))];
}
}
# Create POST request
$request = HTTP::Request::Common::POST($proxiedUrl, \%formdata, Content_Type => $contentType);
}
$jar->add_cookie_header($request);
$response = $userAgent->simple_request($request);
$jar->extract_cookies($response);
if ($response->is_redirect) { # redirected by http header
$proxiedUrl = URI::URL::url($response->header("Location"))->abs($proxiedUrl);;
$redirect++;
} elsif ($response->content_type eq "text/html"
&& $response->content =~ /<meta[^>]+refresh[^>]+content[^>]*url=([^\s'"<>]+)/gis) {
# redirection through meta refresh
my $refreshUrl = $1;
if($refreshUrl=~ /^http/gis) { #Refresh value is absolute
$proxiedUrl=$refreshUrl;
} else { # Refresh value is relative
$proxiedUrl =~ s/[^\/\\]*$//; #chop off everything after / in $proxiedURl
$proxiedUrl .= URI::URL::url($refreshUrl)->rel($proxiedUrl); # add relative path
}
$redirect++;
} else {
$redirect = 5; #No redirection found. Leave loop.
}
$redirect=5 if (not $self->get("followRedirect")); # No redirection. Overruled by setting
}
if($response->is_success) {
$var{content} = $response->content;
$var{header} = $response->content_type;
if($response->content_type eq "text/html"
|| ($response->content_type eq "" && $var{content}=~/<html/gis)) {
$var{"search.for"} = $self->getValue("searchFor");
$var{"stop.at"} = $self->getValue("stopAt");
if ($var{"search.for"}) {
$var{content} =~ /^(.*?)\Q$var{"search.for"}\E(.*)$/gis;
$var{"content.leading"} = $1 || $var{content};
$var{content} = $2;
}
if ($var{"stop.at"}) {
$var{content} =~ /(.*?)\Q$var{"stop.at"}\E(.*)$/gis;
$var{content} = $1 || $var{content};
$var{"content.trailing"} = $2;
}
my $p = WebGUI::Asset::Wobject::HttpProxy::Parse->new($self->session, $proxiedUrl, $var{content}, $self->getId,$self->get("rewriteUrls"),$self->getUrl);
$var{content} = $p->filter; # Rewrite content. (let forms/links return to us).
$p->DESTROY;
if ($var{content} =~ /<frame/gis) {
$var{header} = "text/html";
$var{content} = "<h1>HttpProxy: Can't display frames</h1>
Try fetching it directly <a href='$proxiedUrl'>here.</a>";
} else {
$formdata{$input_name}=[($self->session->form->process('HttpProxy_'.$input_name))];
$var{content} =~ s/\<style.*?\/style\>//isg if ($self->get("removeStyle"));
$var{content} = WebGUI::HTML::cleanSegment($var{content});
$var{content} = WebGUI::HTML::filter($var{content}, $self->get("filterHtml"));
}
}
# Create POST request
$request = HTTP::Request::Common::POST($proxiedUrl, \%formdata, Content_Type => $contentType);
}
$jar->add_cookie_header($request);
$response = $userAgent->simple_request($request);
$jar->extract_cookies($response);
if ($response->is_redirect) { # redirected by http header
$proxiedUrl = URI::URL::url($response->header("Location"))->abs($proxiedUrl);;
$redirect++;
} elsif ($response->content_type eq "text/html"
&& $response->content =~ /<meta[^>]+refresh[^>]+content[^>]*url=([^\s'"<>]+)/gis) {
# redirection through meta refresh
my $refreshUrl = $1;
if($refreshUrl=~ /^http/gis) { #Refresh value is absolute
$proxiedUrl=$refreshUrl;
} else { # Refresh value is relative
$proxiedUrl =~ s/[^\/\\]*$//; #chop off everything after / in $proxiedURl
$proxiedUrl .= URI::URL::url($refreshUrl)->rel($proxiedUrl); # add relative path
}
$redirect++;
} else {
$redirect = 5; #No redirection found. Leave loop.
}
$redirect=5 if (not $self->get("followRedirect")); # No redirection. Overruled by setting
}
if($response->is_success) {
$var{content} = $response->content;
$var{header} = $response->content_type;
if($response->content_type eq "text/html"
|| ($response->content_type eq "" && $var{content}=~/<html/gis)) {
$var{"search.for"} = $self->getValue("searchFor");
$var{"stop.at"} = $self->getValue("stopAt");
if ($var{"search.for"}) {
$var{content} =~ /^(.*?)\Q$var{"search.for"}\E(.*)$/gis;
$var{"content.leading"} = $1 || $var{content};
$var{content} = $2;
}
if ($var{"stop.at"}) {
$var{content} =~ /(.*?)\Q$var{"stop.at"}\E(.*)$/gis;
$var{content} = $1 || $var{content};
$var{"content.trailing"} = $2;
}
my $p = WebGUI::Asset::Wobject::HttpProxy::Parse->new($self->session, $proxiedUrl, $var{content}, $self->getId,$self->get("rewriteUrls"),$self->getUrl);
$var{content} = $p->filter; # Rewrite content. (let forms/links return to us).
$p->DESTROY;
if ($var{content} =~ /<frame/gis) {
$var{header} = "text/html";
$var{content} = "<h1>HttpProxy: Can't display frames</h1>
Try fetching it directly <a href='$proxiedUrl'>here.</a>";
} else {
$var{content} =~ s/\<style.*?\/style\>//isg if ($self->get("removeStyle"));
$var{content} = WebGUI::HTML::cleanSegment($var{content});
$var{content} = WebGUI::HTML::filter($var{content}, $self->get("filterHtml"));
}
}
} else { # Fetching page failed...
$var{header} = "text/html";
$var{content} = "<b>Getting <a href='$proxiedUrl'>$proxiedUrl</a> failed</b>".
"<p><i>GET status line: ".$response->status_line."</i>";
}
unless ($self->get("cacheTimeout") <= 10) {
$cachedContent->set($var{content},$self->get("cacheTimeout"));
$cachedHeader->set($var{header},$self->get("cacheTimeout"));
$cacheContent->set($var{content},$self->get("cacheTimeout"));
$cacheHeader->set($var{header},$self->get("cacheTimeout"));
}
}