diff options
Diffstat (limited to 'pyload/plugins/network')
| -rw-r--r-- | pyload/plugins/network/CurlChunk.py | 299 | ||||
| -rw-r--r-- | pyload/plugins/network/CurlDownload.py | 323 | ||||
| -rw-r--r-- | pyload/plugins/network/CurlRequest.py | 314 | ||||
| -rw-r--r-- | pyload/plugins/network/DefaultRequest.py | 9 | 
4 files changed, 945 insertions, 0 deletions
| diff --git a/pyload/plugins/network/CurlChunk.py b/pyload/plugins/network/CurlChunk.py new file mode 100644 index 000000000..4250db2ce --- /dev/null +++ b/pyload/plugins/network/CurlChunk.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +############################################################################### +#   Copyright(c) 2008-2012 pyLoad Team +#   http://www.pyload.org +# +#   This file is part of pyLoad. +#   pyLoad is free software: you can redistribute it and/or modify +#   it under the terms of the GNU Affero General Public License as +#   published by the Free Software Foundation, either version 3 of the +#   License, or (at your option) any later version. +# +#   Subjected to the terms and conditions in LICENSE +# +#   @author: RaNaN +############################################################################### + +from os import remove, stat, fsync +from os.path import exists +from time import sleep +from re import search + +import codecs +import pycurl + +from pyload.utils import remove_chars +from pyload.utils.fs import fs_encode + +from CurlRequest import CurlRequest + +class WrongFormat(Exception): +    pass + + +class ChunkInfo(): +    def __init__(self, name): +        self.name = unicode(name) +        self.size = 0 +        self.resume = False +        self.chunks = [] + +    def __repr__(self): +        ret = "ChunkInfo: %s, %s\n" % (self.name, self.size) +        for i, c in enumerate(self.chunks): +            ret += "%s# %s\n" % (i, c[1]) + +        return ret + +    def setSize(self, size): +        self.size = int(size) + +    def addChunk(self, name, range): +        self.chunks.append((name, range)) + +    def clear(self): +        self.chunks = [] + +    def createChunks(self, chunks): +        self.clear() +        chunk_size = self.size / chunks + +        current = 0 +        for i in range(chunks): +            end = self.size - 1 if (i == chunks - 1) else current + chunk_size +            self.addChunk("%s.chunk%s" % (self.name, i), (current, end)) +            current += chunk_size + 1 + + +    def save(self): +        fs_name = fs_encode("%s.chunks" % self.name) +        fh = codecs.open(fs_name, "w", "utf_8") +        fh.write("name:%s\n" % self.name) +        fh.write("size:%s\n" % self.size) +        for i, c in enumerate(self.chunks): +            fh.write("#%d:\n" % i) +            fh.write("\tname:%s\n" % c[0]) +            fh.write("\trange:%i-%i\n" % c[1]) +        fh.close() + +    @staticmethod +    def load(name): +        fs_name = fs_encode("%s.chunks" % name) +        if not exists(fs_name): +            raise IOError() +        fh = codecs.open(fs_name, "r", "utf_8") +        name = fh.readline()[:-1] +        size = fh.readline()[:-1] +        if name.startswith("name:") and size.startswith("size:"): +            name = name[5:] +            size = size[5:] +        else: +            fh.close() +            raise WrongFormat() +        ci = ChunkInfo(name) +        ci.loaded = True +        ci.setSize(size) +        while True: +            if not fh.readline(): #skip line +                break +            name = fh.readline()[1:-1] +            range = fh.readline()[1:-1] +            if name.startswith("name:") and range.startswith("range:"): +                name = name[5:] +                range = range[6:].split("-") +            else: +                raise WrongFormat() + +            ci.addChunk(name, (long(range[0]), long(range[1]))) +        fh.close() +        return ci + +    def remove(self): +        fs_name = fs_encode("%s.chunks" % self.name) +        if exists(fs_name): remove(fs_name) + +    def getCount(self): +        return len(self.chunks) + +    def getChunkName(self, index): +        return self.chunks[index][0] + +    def getChunkRange(self, index): +        return self.chunks[index][1] + + +class CurlChunk(CurlRequest): +    def __init__(self, id, parent, range=None, resume=False): +        self.setContext(*parent.getContext()) + +        self.id = id +        self.p = parent # CurlDownload instance +        self.range = range # tuple (start, end) +        self.resume = resume +        self.log = parent.log + +        self.size = range[1] - range[0] if range else -1 +        self.arrived = 0 +        self.lastURL = self.p.referer + +        self.c = pycurl.Curl() + +        self.header = "" +        self.headerParsed = False #indicates if the header has been processed + +        self.fp = None #file handle + +        self.initContext() + +        self.BOMChecked = False # check and remove byte order mark + +        self.rep = None + +        self.sleep = 0.000 +        self.lastSize = 0 + +    def __repr__(self): +        return "<CurlChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived) + +    @property +    def cj(self): +        return self.p.context + +    def getHandle(self): +        """ returns a Curl handle ready to use for perform/multiperform """ + +        self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.cj) +        self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody) +        self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) + +        # request all bytes, since some servers in russia seems to have a defect arihmetic unit + +        fs_name = fs_encode(self.p.info.getChunkName(self.id)) +        if self.resume: +            self.fp = open(fs_name, "ab") +            self.arrived = self.fp.tell() +            if not self.arrived: +                self.arrived = stat(fs_name).st_size + +            if self.range: +                #do nothing if chunk already finished +                if self.arrived + self.range[0] >= self.range[1]: return None + +                if self.id == len(self.p.info.chunks) - 1: #as last chunk dont set end range, so we get everything +                    range = "%i-" % (self.arrived + self.range[0]) +                else: +                    range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1)) + +                self.log.debug("Chunked resume with range %s" % range) +                self.c.setopt(pycurl.RANGE, range) +            else: +                self.log.debug("Resume File from %i" % self.arrived) +                self.c.setopt(pycurl.RESUME_FROM, self.arrived) + +        else: +            if self.range: +                if self.id == len(self.p.info.chunks) - 1: # see above +                    range = "%i-" % self.range[0] +                else: +                    range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1)) + +                self.log.debug("Chunked with range %s" % range) +                self.c.setopt(pycurl.RANGE, range) + +            self.fp = open(fs_name, "wb") + +        return self.c + +    def writeHeader(self, buf): +        self.header += buf +        #@TODO forward headers?, this is possibly unneeded, when we just parse valid 200 headers +        # as first chunk, we will parse the headers +        if not self.range and self.header.endswith("\r\n\r\n"): +            self.parseHeader() +        elif not self.range and buf.startswith("150") and "data connection" in buf: #ftp file size parsing +            size = search(r"(\d+) bytes", buf) +            if size: +                self.p._size = int(size.group(1)) +                self.p.chunkSupport = True + +        self.headerParsed = True + +    def writeBody(self, buf): +        #ignore BOM, it confuses unrar +        if not self.BOMChecked: +            if [ord(b) for b in buf[:3]] == [239, 187, 191]: +                buf = buf[3:] +            self.BOMChecked = True + +        size = len(buf) + +        self.arrived += size + +        self.fp.write(buf) + +        if self.p.bucket: +            sleep(self.p.bucket.consumed(size)) +        else: +            # Avoid small buffers, increasing sleep time slowly if buffer size gets smaller +            # otherwise reduce sleep time percentile (values are based on tests) +            # So in general cpu time is saved without reducing bandwidth too much + +            if size < self.lastSize: +                self.sleep += 0.002 +            else: +                self.sleep *= 0.7 + +            self.lastSize = size + +            sleep(self.sleep) + +        if self.range and self.arrived > self.size: +            return 0 #close if we have enough data + + +    def parseHeader(self): +        """parse data from received header""" +        for orgline in self.decodeResponse(self.header).splitlines(): +            line = orgline.strip().lower() +            if line.startswith("accept-ranges") and "bytes" in line: +                self.p.chunkSupport = True + +            if "content-disposition" in line: + +                m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", line) +                if m: +                    name = remove_chars(m.groupdict()['name'], "\"';/").strip() +                    self.p._name = name +                    self.log.debug("Content-Disposition: %s" % name) + +            if not self.resume and line.startswith("content-length"): +                self.p._size = int(line.split(":")[1]) + +        self.headerParsed = True + +    def stop(self): +        """The download will not proceed after next call of writeBody""" +        self.range = [0,0] +        self.size = 0 + +    def resetRange(self): +        """ Reset the range, so the download will load all data available  """ +        self.range = None + +    def setRange(self, range): +        self.range = range +        self.size = range[1] - range[0] + +    def flushFile(self): +        """  flush and close file """ +        self.fp.flush() +        fsync(self.fp.fileno()) #make sure everything was written to disk +        self.fp.close() #needs to be closed, or merging chunks will fail + +    def close(self): +        """ closes everything, unusable after this """ +        if self.fp: self.fp.close() +        self.c.close() +        if hasattr(self, "p"): del self.p diff --git a/pyload/plugins/network/CurlDownload.py b/pyload/plugins/network/CurlDownload.py new file mode 100644 index 000000000..5de83ec7b --- /dev/null +++ b/pyload/plugins/network/CurlDownload.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +############################################################################### +#   Copyright(c) 2008-2012 pyLoad Team +#   http://www.pyload.org +# +#   This file is part of pyLoad. +#   pyLoad is free software: you can redistribute it and/or modify +#   it under the terms of the GNU Affero General Public License as +#   published by the Free Software Foundation, either version 3 of the +#   License, or (at your option) any later version. +# +#   Subjected to the terms and conditions in LICENSE +# +#   @author: RaNaN +############################################################################### + +from os import remove +from os.path import dirname +from time import time +from shutil import move + +import pycurl + +from pyload.plugins.Base import Abort +from pyload.utils.fs import save_join, fs_encode + +from ..Download import Download +from CurlChunk import ChunkInfo, CurlChunk +from CurlRequest import ResponseException + +# TODO: save content-disposition for resuming + +class CurlDownload(Download): +    """ loads an url, http + ftp supported """ + +    # def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None, +    #              options={}, disposition=False): + +    def __init__(self, *args, **kwargs): +        Download.__init__(self, *args, **kwargs) + +        self.path = None +        self.disposition = False + +        self.chunks = [] +        self.chunkSupport = None + +        self.m = pycurl.CurlMulti() + +        #needed for speed calculation +        self.lastArrived = [] +        self.speeds = [] +        self.lastSpeeds = [0, 0] + +    @property +    def speed(self): +        last = [sum(x) for x in self.lastSpeeds if x] +        return (sum(self.speeds) + sum(last)) / (1 + len(last)) + +    @property +    def arrived(self): +        return sum(c.arrived for c in self.chunks) if self.chunks else self._size + +    @property +    def name(self): +        return self._name if self.disposition else None + +    def _copyChunks(self): +        init = fs_encode(self.info.getChunkName(0)) #initial chunk name + +        if self.info.getCount() > 1: +            fo = open(init, "rb+") #first chunkfile +            for i in range(1, self.info.getCount()): +                #input file +                fo.seek( +                    self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks +                fname = fs_encode("%s.chunk%d" % (self.path, i)) +                fi = open(fname, "rb") +                buf = 32 * 1024 +                while True: #copy in chunks, consumes less memory +                    data = fi.read(buf) +                    if not data: +                        break +                    fo.write(data) +                fi.close() +                if fo.tell() < self.info.getChunkRange(i)[1]: +                    fo.close() +                    remove(init) +                    self.info.remove() #there are probably invalid chunks +                    raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.") +                remove(fname) #remove chunk +            fo.close() + +        if self.name: +            self.filename = save_join(dirname(self.path), self.name) + +        move(init, fs_encode(self.path)) +        self.info.remove() #remove info file + +    def checkResume(self): +        try: +            self.info = ChunkInfo.load(self.path) +            self.info.resume = True #resume is only possible with valid info file +            self._size = self.info.size +            self.infoSaved = True +        except IOError: +            self.info = ChunkInfo(self.path) + +    def download(self, uri, path, get={}, post={}, referer=True, disposition=False, chunks=1, resume=False): +        """ returns new filename or None """ +        self.url = uri +        self.path = path +        self.disposition = disposition +        self.get = get +        self.post = post +        self.referer = referer + +        self.checkResume() +        chunks = max(1, chunks) +        resume = self.info.resume and resume + +        try: +            self._download(chunks, resume) +        except pycurl.error, e: +            #code 33 - no resume +            code = e.args[0] +            if code == 33: +                # try again without resume +                self.log.debug("Errno 33 -> Restart without resume") + +                #remove old handles +                for chunk in self.chunks: +                    self.closeChunk(chunk) + +                return self._download(chunks, False) +            else: +                raise +        finally: +            self.close() + +        return self.name + +    def _download(self, chunks, resume): +        if not resume: +            self.info.clear() +            self.info.addChunk("%s.chunk0" % self.path, (0, 0)) #create an initial entry + +        self.chunks = [] + +        init = CurlChunk(0, self, None, resume) #initial chunk that will load complete file (if needed) + +        self.chunks.append(init) +        self.m.add_handle(init.getHandle()) + +        lastFinishCheck = 0 +        lastTimeCheck = 0 +        chunksDone = set()  # list of curl handles that are finished +        chunksCreated = False +        done = False +        if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can +            self.chunkSupport = True + +        while 1: +            #need to create chunks +            if not chunksCreated and self.chunkSupport and self.size: #will be set later by first chunk + +                if not resume: +                    self.info.setSize(self.size) +                    self.info.createChunks(chunks) +                    self.info.save() + +                chunks = self.info.getCount() + +                init.setRange(self.info.getChunkRange(0)) + +                for i in range(1, chunks): +                    c = CurlChunk(i, self, self.info.getChunkRange(i), resume) + +                    handle = c.getHandle() +                    if handle: +                        self.chunks.append(c) +                        self.m.add_handle(handle) +                    else: +                        #close immediately +                        self.log.debug("Invalid curl handle -> closed") +                        c.close() + +                chunksCreated = True + +            while 1: +                ret, num_handles = self.m.perform() +                if ret != pycurl.E_CALL_MULTI_PERFORM: +                    break + +            t = time() + +            # reduce these calls +            # when num_q is 0, the loop is exited +            while lastFinishCheck + 0.5 < t: +                # list of failed curl handles +                failed = [] +                ex = None # save only last exception, we can only raise one anyway + +                num_q, ok_list, err_list = self.m.info_read() +                for c in ok_list: +                    chunk = self.findChunk(c) +                    try: # check if the header implies success, else add it to failed list +                        chunk.verifyHeader() +                    except ResponseException, e: +                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) +                        failed.append(chunk) +                        ex = e +                    else: +                        chunksDone.add(c) + +                for c in err_list: +                    curl, errno, msg = c +                    chunk = self.findChunk(curl) +                    #test if chunk was finished +                    if errno != 23 or "0 !=" not in msg: +                        failed.append(chunk) +                        ex = pycurl.error(errno, msg) +                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex))) +                        continue + +                    try: # check if the header implies success, else add it to failed list +                        chunk.verifyHeader() +                    except ResponseException, e: +                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) +                        failed.append(chunk) +                        ex = e +                    else: +                        chunksDone.add(curl) +                if not num_q: # no more info to get + +                    # check if init is not finished so we reset download connections +                    # note that other chunks are closed and everything downloaded with initial connection +                    if failed and init not in failed and init.c not in chunksDone: +                        self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex)))) + +                        #list of chunks to clean and remove +                        to_clean = filter(lambda x: x is not init, self.chunks) +                        for chunk in to_clean: +                            self.closeChunk(chunk) +                            self.chunks.remove(chunk) +                            remove(fs_encode(self.info.getChunkName(chunk.id))) + +                        #let first chunk load the rest and update the info file +                        init.resetRange() +                        self.info.clear() +                        self.info.addChunk("%s.chunk0" % self.filename, (0, self.size)) +                        self.info.save() +                    elif failed: +                        raise ex + +                    lastFinishCheck = t + +                    if len(chunksDone) >= len(self.chunks): +                        if len(chunksDone) > len(self.chunks): +                            self.log.warning("Finished download chunks size incorrect, please report bug.") +                        done = True  #all chunks loaded + +                    break + +            if done: +                break #all chunks loaded + +            # calc speed once per second, averaging over 3 seconds +            if lastTimeCheck + 1 < t: +                diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in +                        enumerate(self.chunks)] + +                self.lastSpeeds[1] = self.lastSpeeds[0] +                self.lastSpeeds[0] = self.speeds +                self.speeds = [float(a) / (t - lastTimeCheck) for a in diff] +                self.lastArrived = [c.arrived for c in self.chunks] +                lastTimeCheck = t + +            if self.doAbort: +                raise Abort() + +            self.m.select(1) + +        for chunk in self.chunks: +            chunk.flushFile() #make sure downloads are written to disk + +        self._copyChunks() + +    def findChunk(self, handle): +        """ linear search to find a chunk (should be ok since chunk size is usually low) """ +        for chunk in self.chunks: +            if chunk.c == handle: return chunk + +    def closeChunk(self, chunk): +        try: +            self.m.remove_handle(chunk.c) +        except pycurl.error, e: +            self.log.debug("Error removing chunk: %s" % str(e)) +        finally: +            chunk.close() + +    def close(self): +        """ cleanup """ +        for chunk in self.chunks: +            self.closeChunk(chunk) +        else: +            #Workaround: pycurl segfaults when closing multi, that never had any curl handles +            if hasattr(self, "m"): +                c = pycurl.Curl() +                self.m.add_handle(c) +                self.m.remove_handle(c) +                c.close() + +        self.chunks = [] +        if hasattr(self, "m"): +            self.m.close() +            del self.m +        if hasattr(self, "cj"): +            del self.cj +        if hasattr(self, "info"): +            del self.info
\ No newline at end of file diff --git a/pyload/plugins/network/CurlRequest.py b/pyload/plugins/network/CurlRequest.py new file mode 100644 index 000000000..4630403df --- /dev/null +++ b/pyload/plugins/network/CurlRequest.py @@ -0,0 +1,314 @@ +# -*- coding: utf-8 -*- + +############################################################################### +#   Copyright(c) 2008-2012 pyLoad Team +#   http://www.pyload.org +# +#   This file is part of pyLoad. +#   pyLoad is free software: you can redistribute it and/or modify +#   it under the terms of the GNU Affero General Public License as +#   published by the Free Software Foundation, either version 3 of the +#   License, or (at your option) any later version. +# +#   Subjected to the terms and conditions in LICENSE +# +#   @author: RaNaN +############################################################################### + +import pycurl + +from codecs import getincrementaldecoder, lookup, BOM_UTF8 +from urllib import quote, urlencode +from httplib import responses +from cStringIO import StringIO + +from pyload.plugins.Base import Abort +from pyload.network.CookieJar import CookieJar + +from ..Request import Request, ResponseException + + +def myquote(url): +    return quote(url.encode('utf8') if isinstance(url, unicode) else url, safe="%/:=&?~#+!$,;'@()*[]") + + +def myurlencode(data): +    data = dict(data) +    return urlencode(dict((x.encode('utf8') if isinstance(x, unicode) else x, \ +                           y.encode('utf8') if isinstance(y, unicode) else y ) for x, y in data.iteritems())) + + +bad_headers = range(400, 418) + range(500, 506) + + +class CurlRequest(Request): +    """  Request class based on libcurl """ + +    __version__ = "0.1" + +    CONTEXT_CLASS = CookieJar + +    def __init__(self, *args, **kwargs): +        self.c = pycurl.Curl() +        Request.__init__(self, *args, **kwargs) + +        self.rep = StringIO() +        self.lastURL = None +        self.lastEffectiveURL = None + +        # cookiejar defines the context +        self.cj = self.context + +        self.c.setopt(pycurl.WRITEFUNCTION, self.write) +        self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) + +    # TODO: addAuth, addHeader + +    def initContext(self): +        self.initHandle() + +        if self.config: +            self.setInterface(self.config) +            self.initOptions(self.config) + +    def initHandle(self): +        """ sets common options to curl handle """ + +        self.c.setopt(pycurl.FOLLOWLOCATION, 1) +        self.c.setopt(pycurl.MAXREDIRS, 5) +        self.c.setopt(pycurl.CONNECTTIMEOUT, 30) +        self.c.setopt(pycurl.NOSIGNAL, 1) +        self.c.setopt(pycurl.NOPROGRESS, 1) +        if hasattr(pycurl, "AUTOREFERER"): +            self.c.setopt(pycurl.AUTOREFERER, 1) +        self.c.setopt(pycurl.SSL_VERIFYPEER, 0) +        # Interval for low speed, detects connection loss, but can abort dl if hoster stalls the download +        self.c.setopt(pycurl.LOW_SPEED_TIME, 45) +        self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5) + +        # don't save the cookies +        self.c.setopt(pycurl.COOKIEFILE, "") +        self.c.setopt(pycurl.COOKIEJAR, "") + +        #self.c.setopt(pycurl.VERBOSE, 1) + +        self.c.setopt(pycurl.USERAGENT, +                      "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") +        if pycurl.version_info()[7]: +            self.c.setopt(pycurl.ENCODING, "gzip, deflate") +        self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", +                                          "Accept-Language: en-US,en", +                                          "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", +                                          "Connection: keep-alive", +                                          "Keep-Alive: 300", +                                          "Expect:"]) + +    def setInterface(self, options): + +        interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"] + +        if interface and interface.lower() != "none": +            self.c.setopt(pycurl.INTERFACE, str(interface)) + +        if proxy: +            if proxy["type"] == "socks4": +                self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS4) +            elif proxy["type"] == "socks5": +                self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5) +            else: +                self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP) + +            self.c.setopt(pycurl.PROXY, str(proxy["address"])) +            self.c.setopt(pycurl.PROXYPORT, proxy["port"]) + +            if proxy["username"]: +                self.c.setopt(pycurl.PROXYUSERPWD, str("%s:%s" % (proxy["username"], proxy["password"]))) + +        if ipv6: +            self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) +        else: +            self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) + +        if "timeout" in options: +            self.c.setopt(pycurl.LOW_SPEED_TIME, options["timeout"]) + +    def initOptions(self, options): +        """  Sets same options as available in pycurl  """ +        for k, v in options.iteritems(): +            if hasattr(pycurl, k): +                self.c.setopt(getattr(pycurl, k), v) + +    def setRequestContext(self, url, get, post, referer, cookies, multipart=False): +        """ sets everything needed for the request """ +        url = myquote(url) + +        if get: +            get = urlencode(get) +            url = "%s?%s" % (url, get) + +        self.c.setopt(pycurl.URL, url) +        self.lastURL = url + +        if post: +            self.c.setopt(pycurl.POST, 1) +            if not multipart: +                if type(post) == unicode: +                    post = str(post) #unicode not allowed +                elif type(post) == str: +                    pass +                else: +                    post = myurlencode(post) + +                self.c.setopt(pycurl.POSTFIELDS, post) +            else: +                post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()] +                self.c.setopt(pycurl.HTTPPOST, post) +        else: +            self.c.setopt(pycurl.POST, 0) + +        if referer and self.lastURL: +            self.c.setopt(pycurl.REFERER, str(self.lastURL)) +        else: +            self.c.setopt(pycurl.REFERER, "") + +        if cookies: +            self.c.setopt(pycurl.COOKIELIST, self.cj.output()) +        else: +            # Magic string that erases all cookies +            self.c.setopt(pycurl.COOKIELIST, "ALL") + +        # TODO: remove auth again +        if "auth" in self.options: +            self.c.setopt(pycurl.USERPWD, str(self.options["auth"])) + + +    def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False, multipart=False, decode=False): +        """ load and returns a given page """ + +        self.setRequestContext(url, get, post, referer, cookies, multipart) + +        # TODO: use http/rfc message instead +        self.header = "" + +        if "header" in self.options: +            self.c.setopt(pycurl.HTTPHEADER, self.options["header"]) + +        if just_header: +            self.c.setopt(pycurl.FOLLOWLOCATION, 0) +            self.c.setopt(pycurl.NOBODY, 1) #TODO: nobody= no post? + +            # overwrite HEAD request, we want a common request type +            if post: +                self.c.setopt(pycurl.CUSTOMREQUEST, "POST") +            else: +                self.c.setopt(pycurl.CUSTOMREQUEST, "GET") + +            try: +                self.c.perform() +                rep = self.header +            finally: +                self.c.setopt(pycurl.FOLLOWLOCATION, 1) +                self.c.setopt(pycurl.NOBODY, 0) +                self.c.unsetopt(pycurl.CUSTOMREQUEST) + +        else: +            self.c.perform() +            rep = self.getResponse() + +        self.c.setopt(pycurl.POSTFIELDS, "") +        self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL) +        self.code = self.verifyHeader() + +        if cookies: +            self.parseCookies() + +        if decode: +            rep = self.decodeResponse(rep) + +        return rep + +    def parseCookies(self): +        for c in self.c.getinfo(pycurl.INFO_COOKIELIST): +            #http://xiix.wordpress.com/2006/03/23/mozillafirefox-cookie-format +            domain, flag, path, secure, expires, name, value = c.split("\t") +            # http only was added in py 2.6 +            domain = domain.replace("#HttpOnly_", "") +            self.cj.setCookie(domain, name, value, path, expires, secure) + +    def verifyHeader(self): +        """ raise an exceptions on bad headers """ +        code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) +        if code in bad_headers: +            raise ResponseException(code, responses.get(code, "Unknown statuscode")) +        return code + +    def getResponse(self): +        """ retrieve response from string io """ +        if self.rep is None: return "" +        value = self.rep.getvalue() +        self.rep.close() +        self.rep = StringIO() +        return value + +    def decodeResponse(self, rep): +        """ decode with correct encoding, relies on header """ +        header = self.header.splitlines() +        encoding = "utf8" # default encoding + +        for line in header: +            line = line.lower().replace(" ", "") +            if not line.startswith("content-type:") or \ +                    ("text" not in line and "application" not in line): +                continue + +            none, delemiter, charset = line.rpartition("charset=") +            if delemiter: +                charset = charset.split(";") +                if charset: +                    encoding = charset[0] + +        try: +            #self.log.debug("Decoded %s" % encoding ) +            if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8): +                encoding = 'utf-8-sig' + +            decoder = getincrementaldecoder(encoding)("replace") +            rep = decoder.decode(rep, True) + +            #TODO: html_unescape as default + +        except LookupError: +            self.log.debug("No Decoder found for %s" % encoding) +        except Exception: +            self.log.debug("Error when decoding string from %s." % encoding) + +        return rep + +    def write(self, buf): +        """ writes response """ +        if self.rep.tell() > 1000000 or self.doAbort: +            rep = self.getResponse() +            if self.doAbort: raise Abort() +            f = open("response.dump", "wb") +            f.write(rep) +            f.close() +            raise Exception("Loaded Url exceeded limit") + +        self.rep.write(buf) + +    def writeHeader(self, buf): +        """ writes header """ +        self.header += buf + +    def reset(self): +        self.cj.clear() +        self.options.clear() + +    def close(self): +        """ cleanup, unusable after this """ +        self.rep.close() +        if hasattr(self, "cj"): +            del self.cj +        if hasattr(self, "c"): +            self.c.close() +            del self.c
\ No newline at end of file diff --git a/pyload/plugins/network/DefaultRequest.py b/pyload/plugins/network/DefaultRequest.py new file mode 100644 index 000000000..dce486ea5 --- /dev/null +++ b/pyload/plugins/network/DefaultRequest.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +from CurlRequest import CurlRequest +from CurlDownload import CurlDownload + +__version__ = "0.1" + +DefaultRequest = CurlRequest +DefaultDownload = CurlDownload
\ No newline at end of file | 
