summaryrefslogtreecommitdiffstats
path: root/module/network/CookieRedirectHandler.py
diff options
context:
space:
mode:
authorGravatar mkaay <mkaay@mkaay.de> 2010-12-22 20:33:23 +0100
committerGravatar mkaay <mkaay@mkaay.de> 2010-12-22 20:33:23 +0100
commit0fd06af30e6ec943b6ddcfed2e2cf4cd64095309 (patch)
treef82c64a4504412ac848285cbf5a235e4295cb106 /module/network/CookieRedirectHandler.py
parentfixed getURL (diff)
downloadpyload-0fd06af30e6ec943b6ddcfed2e2cf4cd64095309.tar.xz
cookie handling WIP -.-
Diffstat (limited to 'module/network/CookieRedirectHandler.py')
-rw-r--r--module/network/CookieRedirectHandler.py146
1 files changed, 146 insertions, 0 deletions
diff --git a/module/network/CookieRedirectHandler.py b/module/network/CookieRedirectHandler.py
new file mode 100644
index 000000000..3eeb3e711
--- /dev/null
+++ b/module/network/CookieRedirectHandler.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+
+"""
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+ @author: mkaay, RaNaN
+"""
+
+from urllib2 import BaseHandler
+from urllib import addinfourl
+from urllib2 import Request
+from urlparse import urlparse, urlunparse, urljoin
+from CookieJar import CookieJar
+
+class CookieRedirectHandler(BaseHandler):
+ # maximum number of redirections to any single URL
+ # this is needed because of the state that cookies introduce
+ max_repeats = 4
+ # maximum total number of redirections (regardless of URL) before
+ # assuming we're in a loop
+ max_redirections = 10
+
+ def __init__(self, cookiejar=None, follow=True):
+ if cookiejar is None:
+ cookiejar = CookieJar()
+ self.cookiejar = cookiejar
+ self.follow = follow
+
+ def http_request(self, request):
+ print "add", self.cookiejar
+ self.cookiejar.add_cookie_header(request)
+ return request
+
+ def http_response(self, request, response):
+ print "get", self.cookiejar
+ self.cookiejar.extract_cookies(response, request)
+ return response
+
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ """Return a Request or None in response to a redirect.
+
+ This is called by the http_error_30x methods when a
+ redirection response is received. If a redirection should
+ take place, return a new Request to allow http_error_30x to
+ perform the redirect. Otherwise, raise HTTPError if no-one
+ else should try to handle this url. Return None if you can't
+ but another Handler might.
+ """
+ m = req.get_method()
+ if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
+ or code in (301, 302, 303) and m == "POST"):
+ # Strictly (according to RFC 2616), 301 or 302 in response
+ # to a POST MUST NOT cause a redirection without confirmation
+ # from the user (of urllib2, in this case). In practice,
+ # essentially all clients do redirect in this case, so we
+ # do the same.
+ # be conciliant with URIs containing a space
+ newurl = newurl.replace(' ', '%20')
+ newheaders = dict((k,v) for k,v in req.headers.items()
+ if k.lower() not in ("content-length", "content-type")
+ )
+ req = Request(newurl,
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True)
+ self.cookiejar.add_cookie_header(req)
+ print req.headers
+ return req
+ else:
+ raise HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+ # Implementation note: To avoid the server sending us into an
+ # infinite loop, the request object needs to track what URLs we
+ # have already seen. Do this by adding a handler-specific
+ # attribute to the Request object.
+ def http_error_302(self, req, fp, code, msg, headers):
+ resp = addinfourl(fp, headers, req.get_full_url())
+ resp.code = code
+ resp.msg = msg
+ self.cookiejar.extract_cookies(resp, req)
+
+ if not self.follow:
+ return resp
+
+ # Some servers (incorrectly) return multiple Location headers
+ # (so probably same goes for URI). Use first header.
+ if 'location' in headers:
+ newurl = headers.getheaders('location')[0]
+ elif 'uri' in headers:
+ newurl = headers.getheaders('uri')[0]
+ else:
+ return
+
+ # fix a possible malformed URL
+ urlparts = urlparse(newurl)
+ if not urlparts.path:
+ urlparts = list(urlparts)
+ urlparts[2] = "/"
+ newurl = urlunparse(urlparts)
+
+ newurl = urljoin(req.get_full_url(), newurl)
+
+ # XXX Probably want to forget about the state of the current
+ # request, although that might interact poorly with other
+ # handlers that also use handler-specific request attributes
+ new = self.redirect_request(req, fp, code, msg, headers, newurl)
+ if new is None:
+ return
+
+ # loop detection
+ # .redirect_dict has a key url if url was previously visited.
+ if hasattr(req, 'redirect_dict'):
+ visited = new.redirect_dict = req.redirect_dict
+ if (visited.get(newurl, 0) >= self.max_repeats or
+ len(visited) >= self.max_redirections):
+ raise HTTPError(req.get_full_url(), code,
+ self.inf_msg + msg, headers, fp)
+ else:
+ visited = new.redirect_dict = req.redirect_dict = {}
+ visited[newurl] = visited.get(newurl, 0) + 1
+
+ # Don't close the fp until we are sure that we won't use it
+ # with HTTPError.
+ fp.read()
+ fp.close()
+ return self.parent.open(new, timeout=req.timeout)
+
+ http_error_301 = http_error_303 = http_error_307 = http_error_302
+
+ inf_msg = "The HTTP server returned a redirect error that would " \
+ "lead to an infinite loop.\n" \
+ "The last 30x error message was:\n"
+
+ https_request = http_request
+ https_response = http_response