diff options
Diffstat (limited to 'pyload/network')
| -rw-r--r-- | pyload/network/Browser.py | 132 | ||||
| -rw-r--r-- | pyload/network/Bucket.py | 59 | ||||
| -rw-r--r-- | pyload/network/CookieJar.py | 50 | ||||
| -rw-r--r-- | pyload/network/HTTPChunk.py | 292 | ||||
| -rw-r--r-- | pyload/network/HTTPDownload.py | 325 | ||||
| -rw-r--r-- | pyload/network/HTTPRequest.py | 303 | ||||
| -rw-r--r-- | pyload/network/RequestFactory.py | 126 | ||||
| -rw-r--r-- | pyload/network/XDCCRequest.py | 159 | ||||
| -rw-r--r-- | pyload/network/__init__.py | 1 | 
9 files changed, 1447 insertions, 0 deletions
| diff --git a/pyload/network/Browser.py b/pyload/network/Browser.py new file mode 100644 index 000000000..e78d24688 --- /dev/null +++ b/pyload/network/Browser.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- + +from logging import getLogger + +from HTTPRequest import HTTPRequest +from HTTPDownload import HTTPDownload + + +class Browser(object): +    __slots__ = ("log", "options", "bucket", "cj", "_size", "http", "dl") + +    def __init__(self, bucket=None, options={}): +        self.log = getLogger("log") + +        self.options = options #holds pycurl options +        self.bucket = bucket + +        self.cj = None # needs to be setted later +        self._size = 0 + +        self.renewHTTPRequest() +        self.dl = None + + +    def renewHTTPRequest(self): +        if hasattr(self, "http"): self.http.close() +        self.http = HTTPRequest(self.cj, self.options) + +    def setLastURL(self, val): +        self.http.lastURL = val + +    # tunnel some attributes from HTTP Request to Browser +    lastEffectiveURL = property(lambda self: self.http.lastEffectiveURL) +    lastURL = property(lambda self: self.http.lastURL, setLastURL) +    code = property(lambda self: self.http.code) +    cookieJar = property(lambda self: self.cj) + +    def setCookieJar(self, cj): +        self.cj = cj +        self.http.cj = cj + +    @property +    def speed(self): +        if self.dl: +            return self.dl.speed +        return 0 + +    @property +    def size(self): +        if self._size: +            return self._size +        if self.dl: +            return self.dl.size +        return 0 + +    @property +    def arrived(self): +        if self.dl: +            return self.dl.arrived +        return 0 + +    @property +    def percent(self): +        if not self.size: return 0 +        return (self.arrived * 100) / self.size + +    def clearCookies(self): +        if self.cj: +            self.cj.clear() +        self.http.clearCookies() + +    def clearReferer(self): +        self.http.lastURL = None + +    def abortDownloads(self): +        self.http.abort = True +        if self.dl: +            self._size = self.dl.size +            self.dl.abort = True + +    def httpDownload(self, url, filename, get={}, post={}, ref=True, cookies=True, chunks=1, resume=False, +                     progressNotify=None, disposition=False): +        """ this can also download ftp """ +        self._size = 0 +        self.dl = HTTPDownload(url, filename, get, post, self.lastEffectiveURL if ref else None, +            self.cj if cookies else None, self.bucket, self.options, progressNotify, disposition) +        name = self.dl.download(chunks, resume) +        self._size = self.dl.size + +        self.dl = None + +        return name + +    def load(self, *args, **kwargs): +        """ retrieves page """ +        return self.http.load(*args, **kwargs) + +    def putHeader(self, name, value): +        """ add a header to the request """ +        self.http.putHeader(name, value) + +    def addAuth(self, pwd): +        """Adds user and pw for http auth + +        :param pwd: string, user:password +        """ +        self.options["auth"] = pwd +        self.renewHTTPRequest() #we need a new request + +    def removeAuth(self): +        if "auth" in self.options: del self.options["auth"] +        self.renewHTTPRequest() + +    def setOption(self, name, value): +        """Adds an option to the request, see HTTPRequest for existing ones""" +        self.options[name] = value + +    def deleteOption(self, name): +        if name in self.options: del self.options[name] + +    def clearHeaders(self): +        self.http.clearHeaders() + +    def close(self): +        """ cleanup """ +        if hasattr(self, "http"): +            self.http.close() +            del self.http +        if hasattr(self, "dl"): +            del self.dl +        if hasattr(self, "cj"): +            del self.cj diff --git a/pyload/network/Bucket.py b/pyload/network/Bucket.py new file mode 100644 index 000000000..a096d644a --- /dev/null +++ b/pyload/network/Bucket.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +""" +    This program is free software; you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation; either version 3 of the License, +    or (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +    See the GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program; if not, see <http://www.gnu.org/licenses/>. + +    @author: RaNaN +""" + +from time import time +from threading import Lock + +class Bucket: +    def __init__(self): +        self.rate = 0 +        self.tokens = 0 +        self.timestamp = time() +        self.lock = Lock() + +    def __nonzero__(self): +        return False if self.rate < 10240 else True + +    def setRate(self, rate): +        self.lock.acquire() +        self.rate = int(rate) +        self.lock.release() + +    def consumed(self, amount): +        """ return time the process have to sleep, after consumed specified amount """ +        if self.rate < 10240: return 0 #min. 10kb, may become unresponsive otherwise +        self.lock.acquire() + +        self.calc_tokens() +        self.tokens -= amount + +        if self.tokens < 0: +            time = -self.tokens/float(self.rate) +        else: +            time = 0 + + +        self.lock.release() +        return time + +    def calc_tokens(self): +        if self.tokens < self.rate: +            now = time() +            delta = self.rate * (now - self.timestamp) +            self.tokens = min(self.rate, self.tokens + delta) +            self.timestamp = now diff --git a/pyload/network/CookieJar.py b/pyload/network/CookieJar.py new file mode 100644 index 000000000..a6ae090bc --- /dev/null +++ b/pyload/network/CookieJar.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- + +""" +    This program is free software; you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation; either version 3 of the License, +    or (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +    See the GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program; if not, see <http://www.gnu.org/licenses/>. + +    @author: mkaay, RaNaN +""" + +from time import time + +class CookieJar: +    def __init__(self, pluginname, account=None): +        self.cookies = {} +        self.plugin = pluginname +        self.account = account + +    def addCookies(self, clist): +        for c in clist: +            name = c.split("\t")[5] +            self.cookies[name] = c + +    def getCookies(self): +        return self.cookies.values() + +    def parseCookie(self, name): +        if name in self.cookies: +            return self.cookies[name].split("\t")[6] +        else: +            return None + +    def getCookie(self, name): +        return self.parseCookie(name) + +    def setCookie(self, domain, name, value, path="/", exp=time()+3600*24*180): +        s = ".%s	TRUE	%s	FALSE	%s	%s	%s" % (domain, path, exp, name, value) +        self.cookies[name] = s + +    def clear(self): +        self.cookies = {} diff --git a/pyload/network/HTTPChunk.py b/pyload/network/HTTPChunk.py new file mode 100644 index 000000000..b9d2a5379 --- /dev/null +++ b/pyload/network/HTTPChunk.py @@ -0,0 +1,292 @@ +# -*- coding: utf-8 -*- +""" +    This program is free software; you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation; either version 3 of the License, +    or (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +    See the GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program; if not, see <http://www.gnu.org/licenses/>. + +    @author: RaNaN +""" +from os import remove, stat, fsync +from os.path import exists +from time import sleep +from re import search +from pyload.utils import fs_encode +import codecs +import pycurl + +from HTTPRequest import HTTPRequest + +class WrongFormat(Exception): +    pass + + +class ChunkInfo: +    def __init__(self, name): +        self.name = unicode(name) +        self.size = 0 +        self.resume = False +        self.chunks = [] + +    def __repr__(self): +        ret = "ChunkInfo: %s, %s\n" % (self.name, self.size) +        for i, c in enumerate(self.chunks): +            ret += "%s# %s\n" % (i, c[1]) + +        return ret + +    def setSize(self, size): +        self.size = int(size) + +    def addChunk(self, name, range): +        self.chunks.append((name, range)) + +    def clear(self): +        self.chunks = [] + +    def createChunks(self, chunks): +        self.clear() +        chunk_size = self.size / chunks + +        current = 0 +        for i in range(chunks): +            end = self.size - 1 if (i == chunks - 1) else current + chunk_size +            self.addChunk("%s.chunk%s" % (self.name, i), (current, end)) +            current += chunk_size + 1 + + +    def save(self): +        fs_name = fs_encode("%s.chunks" % self.name) +        fh = codecs.open(fs_name, "w", "utf_8") +        fh.write("name:%s\n" % self.name) +        fh.write("size:%s\n" % self.size) +        for i, c in enumerate(self.chunks): +            fh.write("#%d:\n" % i) +            fh.write("\tname:%s\n" % c[0]) +            fh.write("\trange:%i-%i\n" % c[1]) +        fh.close() + +    @staticmethod +    def load(name): +        fs_name = fs_encode("%s.chunks" % name) +        if not exists(fs_name): +            raise IOError() +        fh = codecs.open(fs_name, "r", "utf_8") +        name = fh.readline()[:-1] +        size = fh.readline()[:-1] +        if name.startswith("name:") and size.startswith("size:"): +            name = name[5:] +            size = size[5:] +        else: +            fh.close() +            raise WrongFormat() +        ci = ChunkInfo(name) +        ci.loaded = True +        ci.setSize(size) +        while True: +            if not fh.readline(): #skip line +                break +            name = fh.readline()[1:-1] +            range = fh.readline()[1:-1] +            if name.startswith("name:") and range.startswith("range:"): +                name = name[5:] +                range = range[6:].split("-") +            else: +                raise WrongFormat() + +            ci.addChunk(name, (long(range[0]), long(range[1]))) +        fh.close() +        return ci + +    def remove(self): +        fs_name = fs_encode("%s.chunks" % self.name) +        if exists(fs_name): remove(fs_name) + +    def getCount(self): +        return len(self.chunks) + +    def getChunkName(self, index): +        return self.chunks[index][0] + +    def getChunkRange(self, index): +        return self.chunks[index][1] + + +class HTTPChunk(HTTPRequest): +    def __init__(self, id, parent, range=None, resume=False): +        self.id = id +        self.p = parent # HTTPDownload instance +        self.range = range # tuple (start, end) +        self.resume = resume +        self.log = parent.log + +        self.size = range[1] - range[0] if range else -1 +        self.arrived = 0 +        self.lastURL = self.p.referer + +        self.c = pycurl.Curl() + +        self.header = "" +        self.headerParsed = False #indicates if the header has been processed + +        self.fp = None #file handle + +        self.initHandle() +        self.setInterface(self.p.options) + +        self.BOMChecked = False # check and remove byte order mark + +        self.rep = None + +        self.sleep = 0.000 +        self.lastSize = 0 + +    def __repr__(self): +        return "<HTTPChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived) + +    @property +    def cj(self): +        return self.p.cj + +    def getHandle(self): +        """ returns a Curl handle ready to use for perform/multiperform """ + +        self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cj) +        self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody) +        self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) + +        # request all bytes, since some servers in russia seems to have a defect arihmetic unit + +        fs_name = fs_encode(self.p.info.getChunkName(self.id)) +        if self.resume: +            self.fp = open(fs_name, "ab") +            self.arrived = self.fp.tell() +            if not self.arrived: +                self.arrived = stat(fs_name).st_size + +            if self.range: +                #do nothing if chunk already finished +                if self.arrived + self.range[0] >= self.range[1]: return None + +                if self.id == len(self.p.info.chunks) - 1: #as last chunk dont set end range, so we get everything +                    range = "%i-" % (self.arrived + self.range[0]) +                else: +                    range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1)) + +                self.log.debug("Chunked resume with range %s" % range) +                self.c.setopt(pycurl.RANGE, range) +            else: +                self.log.debug("Resume File from %i" % self.arrived) +                self.c.setopt(pycurl.RESUME_FROM, self.arrived) + +        else: +            if self.range: +                if self.id == len(self.p.info.chunks) - 1: # see above +                    range = "%i-" % self.range[0] +                else: +                    range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1)) + +                self.log.debug("Chunked with range %s" % range) +                self.c.setopt(pycurl.RANGE, range) + +            self.fp = open(fs_name, "wb") + +        return self.c + +    def writeHeader(self, buf): +        self.header += buf +        #@TODO forward headers?, this is possibly unneeeded, when we just parse valid 200 headers +        # as first chunk, we will parse the headers +        if not self.range and self.header.endswith("\r\n\r\n"): +            self.parseHeader() +        elif not self.range and buf.startswith("150") and "data connection" in buf.lower():  #: ftp file size parsing +            size = search(r"(\d+) bytes", buf) +            if size: +                self.p.size = int(size.group(1)) +                self.p.chunkSupport = True + +        self.headerParsed = True + +    def writeBody(self, buf): +        #ignore BOM, it confuses unrar +        if not self.BOMChecked: +            if [ord(b) for b in buf[:3]] == [239, 187, 191]: +                buf = buf[3:] +            self.BOMChecked = True + +        size = len(buf) + +        self.arrived += size + +        self.fp.write(buf) + +        if self.p.bucket: +            sleep(self.p.bucket.consumed(size)) +        else: +            # Avoid small buffers, increasing sleep time slowly if buffer size gets smaller +            # otherwise reduce sleep time percentual (values are based on tests) +            # So in general cpu time is saved without reducing bandwith too much + +            if size < self.lastSize: +                self.sleep += 0.002 +            else: +                self.sleep *= 0.7 + +            self.lastSize = size + +            sleep(self.sleep) + +        if self.range and self.arrived > self.size: +            return 0 #close if we have enough data + + +    def parseHeader(self): +        """parse data from recieved header""" +        for orgline in self.decodeResponse(self.header).splitlines(): +            line = orgline.strip().lower() +            if line.startswith("accept-ranges") and "bytes" in line: +                self.p.chunkSupport = True + +            if line.startswith("content-disposition") and "filename=" in line: +                name = orgline.partition("filename=")[2] +                name = name.replace('"', "").replace("'", "").replace(";", "").strip() +                self.p.nameDisposition = name +                self.log.debug("Content-Disposition: %s" % name) + +            if not self.resume and line.startswith("content-length"): +                self.p.size = int(line.split(":")[1]) + +        self.headerParsed = True + +    def stop(self): +        """The download will not proceed after next call of writeBody""" +        self.range = [0, 0] +        self.size = 0 + +    def resetRange(self): +        """ Reset the range, so the download will load all data available  """ +        self.range = None + +    def setRange(self, range): +        self.range = range +        self.size = range[1] - range[0] + +    def flushFile(self): +        """  flush and close file """ +        self.fp.flush() +        fsync(self.fp.fileno()) #make sure everything was written to disk +        self.fp.close() #needs to be closed, or merging chunks will fail + +    def close(self): +        """ closes everything, unusable after this """ +        if self.fp: self.fp.close() +        self.c.close() +        if hasattr(self, "p"): del self.p diff --git a/pyload/network/HTTPDownload.py b/pyload/network/HTTPDownload.py new file mode 100644 index 000000000..50c6b4bdf --- /dev/null +++ b/pyload/network/HTTPDownload.py @@ -0,0 +1,325 @@ +# -*- coding: utf-8 -*- +""" +    This program is free software; you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation; either version 3 of the License, +    or (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +    See the GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program; if not, see <http://www.gnu.org/licenses/>. + +    @author: RaNaN +""" + +from os import remove, fsync +from os.path import dirname +from time import sleep, time +from shutil import move +from logging import getLogger + +import pycurl + +from HTTPChunk import ChunkInfo, HTTPChunk +from HTTPRequest import BadHeader + +from pyload.plugins.Plugin import Abort +from pyload.utils import safe_join, fs_encode + +class HTTPDownload: +    """ loads a url http + ftp """ + +    def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None, +                 options={}, progressNotify=None, disposition=False): +        self.url = url +        self.filename = filename  #complete file destination, not only name +        self.get = get +        self.post = post +        self.referer = referer +        self.cj = cj  #cookiejar if cookies are needed +        self.bucket = bucket +        self.options = options +        self.disposition = disposition +        # all arguments + +        self.abort = False +        self.size = 0 +        self.nameDisposition = None #will be parsed from content disposition + +        self.chunks = [] + +        self.log = getLogger("log") + +        try: +            self.info = ChunkInfo.load(filename) +            self.info.resume = True #resume is only possible with valid info file +            self.size = self.info.size +            self.infoSaved = True +        except IOError: +            self.info = ChunkInfo(filename) + +        self.chunkSupport = None +        self.m = pycurl.CurlMulti() + +        #needed for speed calculation +        self.lastArrived = [] +        self.speeds = [] +        self.lastSpeeds = [0, 0] + +        self.progressNotify = progressNotify + +    @property +    def speed(self): +        last = [sum(x) for x in self.lastSpeeds if x] +        return (sum(self.speeds) + sum(last)) / (1 + len(last)) + +    @property +    def arrived(self): +        return sum([c.arrived for c in self.chunks]) + +    @property +    def percent(self): +        if not self.size: return 0 +        return (self.arrived * 100) / self.size + +    def _copyChunks(self): +        init = fs_encode(self.info.getChunkName(0)) #initial chunk name + +        if self.info.getCount() > 1: +            fo = open(init, "rb+") #first chunkfile +            for i in range(1, self.info.getCount()): +                #input file +                fo.seek( +                    self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks +                fname = fs_encode("%s.chunk%d" % (self.filename, i)) +                fi = open(fname, "rb") +                buf = 32 * 1024 +                while True: #copy in chunks, consumes less memory +                    data = fi.read(buf) +                    if not data: +                        break +                    fo.write(data) +                fi.close() +                if fo.tell() < self.info.getChunkRange(i)[1]: +                    fo.close() +                    remove(init) +                    self.info.remove() #there are probably invalid chunks +                    raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.") +                remove(fname) #remove chunk +            fo.close() + +        if self.nameDisposition and self.disposition: +            self.filename = safe_join(dirname(self.filename), self.nameDisposition) + +        move(init, fs_encode(self.filename)) +        self.info.remove() #remove info file + +    def download(self, chunks=1, resume=False): +        """ returns new filename or None """ + +        chunks = max(1, chunks) +        resume = self.info.resume and resume + +        try: +            self._download(chunks, resume) +        except pycurl.error, e: +            #code 33 - no resume +            code = e.args[0] +            if code == 33: +                # try again without resume +                self.log.debug("Errno 33 -> Restart without resume") + +                #remove old handles +                for chunk in self.chunks: +                    self.closeChunk(chunk) + +                return self._download(chunks, False) +            else: +                raise +        finally: +            self.close() + +        if self.nameDisposition and self.disposition: return self.nameDisposition +        return None + +    def _download(self, chunks, resume): +        if not resume: +            self.info.clear() +            self.info.addChunk("%s.chunk0" % self.filename, (0, 0)) #create an initial entry + +        self.chunks = [] + +        init = HTTPChunk(0, self, None, resume) #initial chunk that will load complete file (if needed) + +        self.chunks.append(init) +        self.m.add_handle(init.getHandle()) + +        lastFinishCheck = 0 +        lastTimeCheck = 0 +        chunksDone = set()  # list of curl handles that are finished +        chunksCreated = False +        done = False +        if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can +            self.chunkSupport = True + +        while 1: +            #need to create chunks +            if not chunksCreated and self.chunkSupport and self.size: #will be setted later by first chunk + +                if not resume: +                    self.info.setSize(self.size) +                    self.info.createChunks(chunks) +                    self.info.save() + +                chunks = self.info.getCount() + +                init.setRange(self.info.getChunkRange(0)) + +                for i in range(1, chunks): +                    c = HTTPChunk(i, self, self.info.getChunkRange(i), resume) + +                    handle = c.getHandle() +                    if handle: +                        self.chunks.append(c) +                        self.m.add_handle(handle) +                    else: +                        #close immediatly +                        self.log.debug("Invalid curl handle -> closed") +                        c.close() + +                chunksCreated = True + +            while 1: +                ret, num_handles = self.m.perform() +                if ret != pycurl.E_CALL_MULTI_PERFORM: +                    break + +            t = time() + +            # reduce these calls +            while lastFinishCheck + 0.5 < t: +                # list of failed curl handles +                failed = [] +                ex = None # save only last exception, we can only raise one anyway + +                num_q, ok_list, err_list = self.m.info_read() +                for c in ok_list: +                    chunk = self.findChunk(c) +                    try: # check if the header implies success, else add it to failed list +                        chunk.verifyHeader() +                    except BadHeader, e: +                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) +                        failed.append(chunk) +                        ex = e +                    else: +                        chunksDone.add(c) + +                for c in err_list: +                    curl, errno, msg = c +                    chunk = self.findChunk(curl) +                    #test if chunk was finished +                    if errno != 23 or "0 !=" not in msg: +                        failed.append(chunk) +                        ex = pycurl.error(errno, msg) +                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex))) +                        continue + +                    try: # check if the header implies success, else add it to failed list +                        chunk.verifyHeader() +                    except BadHeader, e: +                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) +                        failed.append(chunk) +                        ex = e +                    else: +                        chunksDone.add(curl) +                if not num_q: # no more infos to get + +                    # check if init is not finished so we reset download connections +                    # note that other chunks are closed and downloaded with init too +                    if failed and init not in failed and init.c not in chunksDone: +                        self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex)))) + +                        #list of chunks to clean and remove +                        to_clean = filter(lambda x: x is not init, self.chunks) +                        for chunk in to_clean: +                            self.closeChunk(chunk) +                            self.chunks.remove(chunk) +                            remove(fs_encode(self.info.getChunkName(chunk.id))) + +                        #let first chunk load the rest and update the info file +                        init.resetRange() +                        self.info.clear() +                        self.info.addChunk("%s.chunk0" % self.filename, (0, self.size)) +                        self.info.save() +                    elif failed: +                        raise ex + +                    lastFinishCheck = t + +                    if len(chunksDone) >= len(self.chunks): +                        if len(chunksDone) > len(self.chunks): +                            self.log.warning("Finished download chunks size incorrect, please report bug.") +                        done = True  #all chunks loaded + +                    break + +            if done: +                break #all chunks loaded + +            # calc speed once per second, averaging over 3 seconds +            if lastTimeCheck + 1 < t: +                diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in +                        enumerate(self.chunks)] + +                self.lastSpeeds[1] = self.lastSpeeds[0] +                self.lastSpeeds[0] = self.speeds +                self.speeds = [float(a) / (t - lastTimeCheck) for a in diff] +                self.lastArrived = [c.arrived for c in self.chunks] +                lastTimeCheck = t +                self.updateProgress() + +            if self.abort: +                raise Abort() + +            #sleep(0.003) #supress busy waiting - limits dl speed to (1 / x) * buffersize +            self.m.select(1) + +        for chunk in self.chunks: +            chunk.flushFile() #make sure downloads are written to disk + +        self._copyChunks() + +    def updateProgress(self): +        if self.progressNotify: +            self.progressNotify(self.percent) + +    def findChunk(self, handle): +        """ linear search to find a chunk (should be ok since chunk size is usually low) """ +        for chunk in self.chunks: +            if chunk.c == handle: return chunk + +    def closeChunk(self, chunk): +        try: +            self.m.remove_handle(chunk.c) +        except pycurl.error, e: +            self.log.debug("Error removing chunk: %s" % str(e)) +        finally: +            chunk.close() + +    def close(self): +        """ cleanup """ +        for chunk in self.chunks: +            self.closeChunk(chunk) + +        self.chunks = [] +        if hasattr(self, "m"): +            self.m.close() +            del self.m +        if hasattr(self, "cj"): +            del self.cj +        if hasattr(self, "info"): +            del self.info diff --git a/pyload/network/HTTPRequest.py b/pyload/network/HTTPRequest.py new file mode 100644 index 000000000..66e355b77 --- /dev/null +++ b/pyload/network/HTTPRequest.py @@ -0,0 +1,303 @@ +# -*- coding: utf-8 -*- +""" +    This program is free software; you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation; either version 3 of the License, +    or (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +    See the GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program; if not, see <http://www.gnu.org/licenses/>. + +    @author: RaNaN +""" + +import pycurl + +from codecs import getincrementaldecoder, lookup, BOM_UTF8 +from urllib import quote, urlencode +from httplib import responses +from logging import getLogger +from cStringIO import StringIO + +from pyload.plugins.Plugin import Abort + +def myquote(url): +    return quote(url.encode('utf_8') if isinstance(url, unicode) else url, safe="%/:=&?~#+!$,;'@()*[]") + +def myurlencode(data): +    data = dict(data) +    return urlencode(dict((x.encode('utf_8') if isinstance(x, unicode) else x, \ +        y.encode('utf_8') if isinstance(y, unicode) else y ) for x, y in data.iteritems())) + +bad_headers = range(400, 404) + range(405, 418) + range(500, 506) + +class BadHeader(Exception): +    def __init__(self, code, content=""): +        Exception.__init__(self, "Bad server response: %s %s" % (code, responses[int(code)])) +        self.code = code +        self.content = content + + +class HTTPRequest: +    def __init__(self, cookies=None, options=None): +        self.c = pycurl.Curl() +        self.rep = StringIO() + +        self.cj = cookies #cookiejar + +        self.lastURL = None +        self.lastEffectiveURL = None +        self.abort = False +        self.code = 0 # last http code + +        self.header = "" + +        self.headers = [] #temporary request header + +        self.initHandle() +        self.setInterface(options) + +        self.c.setopt(pycurl.WRITEFUNCTION, self.write) +        self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) + +        self.log = getLogger("log") + + +    def initHandle(self): +        """ sets common options to curl handle """ +        self.c.setopt(pycurl.FOLLOWLOCATION, 1) +        self.c.setopt(pycurl.MAXREDIRS, 5) +        self.c.setopt(pycurl.CONNECTTIMEOUT, 30) +        self.c.setopt(pycurl.NOSIGNAL, 1) +        self.c.setopt(pycurl.NOPROGRESS, 1) +        if hasattr(pycurl, "AUTOREFERER"): +            self.c.setopt(pycurl.AUTOREFERER, 1) +        self.c.setopt(pycurl.SSL_VERIFYPEER, 0) +        self.c.setopt(pycurl.LOW_SPEED_TIME, 30) +        self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5) + +        #self.c.setopt(pycurl.VERBOSE, 1) + +        self.c.setopt(pycurl.USERAGENT, +            "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") +        if pycurl.version_info()[7]: +            self.c.setopt(pycurl.ENCODING, "gzip, deflate") +        self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", +                                          "Accept-Language: en-US, en", +                                          "Accept-Charset: ISO-8859-1, utf-8;q=0.7,*;q=0.7", +                                          "Connection: keep-alive", +                                          "Keep-Alive: 300", +                                          "Expect:"]) + +    def setInterface(self, options): + +        interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"] + +        if interface and interface.lower() != "none": +            self.c.setopt(pycurl.INTERFACE, str(interface)) + +        if proxy: +            if proxy["type"] == "socks4": +                self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS4) +            elif proxy["type"] == "socks5": +                self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5) +            else: +                self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP) + +            self.c.setopt(pycurl.PROXY, str(proxy["address"])) +            self.c.setopt(pycurl.PROXYPORT, proxy["port"]) + +            if proxy["username"]: +                self.c.setopt(pycurl.PROXYUSERPWD, str("%s:%s" % (proxy["username"], proxy["password"]))) + +        if ipv6: +            self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) +        else: +            self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) + +        if "auth" in options: +            self.c.setopt(pycurl.USERPWD, str(options["auth"])) + +        if "timeout" in options: +            self.c.setopt(pycurl.LOW_SPEED_TIME, options["timeout"]) + + +    def addCookies(self): +        """ put cookies from curl handle to cj """ +        if self.cj: +            self.cj.addCookies(self.c.getinfo(pycurl.INFO_COOKIELIST)) + +    def getCookies(self): +        """ add cookies from cj to curl handle """ +        if self.cj: +            for c in self.cj.getCookies(): +                self.c.setopt(pycurl.COOKIELIST, c) +        return + +    def clearCookies(self): +        self.c.setopt(pycurl.COOKIELIST, "") + +    def setRequestContext(self, url, get, post, referer, cookies, multipart=False): +        """ sets everything needed for the request """ + +        url = myquote(url) + +        if get: +            get = urlencode(get) +            url = "%s?%s" % (url, get) + +        self.c.setopt(pycurl.URL, url) +        self.c.lastUrl = url + +        if post: +            self.c.setopt(pycurl.POST, 1) +            if not multipart: +                if type(post) == unicode: +                    post = str(post) #unicode not allowed +                elif type(post) == str: +                    pass +                else: +                    post = myurlencode(post) + +                self.c.setopt(pycurl.POSTFIELDS, post) +            else: +                post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()] +                self.c.setopt(pycurl.HTTPPOST, post) +        else: +            self.c.setopt(pycurl.POST, 0) + +        if referer and self.lastURL: +            self.c.setopt(pycurl.REFERER, str(self.lastURL)) + +        if cookies: +            self.c.setopt(pycurl.COOKIEFILE, "") +            self.c.setopt(pycurl.COOKIEJAR, "") +            self.getCookies() + + +    def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False, multipart=False, decode=False): +        """ load and returns a given page """ + +        self.setRequestContext(url, get, post, referer, cookies, multipart) + +        self.header = "" + +        self.c.setopt(pycurl.HTTPHEADER, self.headers) + +        if just_header: +            self.c.setopt(pycurl.FOLLOWLOCATION, 0) +            self.c.setopt(pycurl.NOBODY, 1) +            if post: +                self.c.setopt(pycurl.POST, 1) +            else: +                self.c.setopt(pycurl.HTTPGET, 1) +            self.c.perform() +            rep = self.header + +            self.c.setopt(pycurl.FOLLOWLOCATION, 1) +            self.c.setopt(pycurl.NOBODY, 0) + +        else: +            self.c.perform() +            rep = self.getResponse() + +        self.c.setopt(pycurl.POSTFIELDS, "") +        self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL) +        self.code = self.verifyHeader() + +        self.addCookies() + +        if decode: +            rep = self.decodeResponse(rep) + +        return rep + +    def verifyHeader(self): +        """ raise an exceptions on bad headers """ +        code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) +        if code in bad_headers: +            #404 will NOT raise an exception +            raise BadHeader(code, self.getResponse()) +        return code + +    def checkHeader(self): +        """ check if header indicates failure""" +        return int(self.c.getinfo(pycurl.RESPONSE_CODE)) not in bad_headers + +    def getResponse(self): +        """ retrieve response from string io """ +        if self.rep is None: return "" +        value = self.rep.getvalue() +        self.rep.close() +        self.rep = StringIO() +        return value + +    def decodeResponse(self, rep): +        """ decode with correct encoding, relies on header """ +        header = self.header.splitlines() +        encoding = "utf8" # default encoding + +        for line in header: +            line = line.lower().replace(" ", "") +            if not line.startswith("content-type:") or\ +               ("text" not in line and "application" not in line): +                continue + +            none, delemiter, charset = line.rpartition("charset=") +            if delemiter: +                charset = charset.split(";") +                if charset: +                    encoding = charset[0] + +        try: +            #self.log.debug("Decoded %s" % encoding ) +            if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8): +                encoding = 'utf-8-sig' + +            decoder = getincrementaldecoder(encoding)("replace") +            rep = decoder.decode(rep, True) + +            #TODO: html_unescape as default + +        except LookupError: +            self.log.debug("No Decoder foung for %s" % encoding) +        except Exception: +            self.log.debug("Error when decoding string from %s." % encoding) + +        return rep + +    def write(self, buf): +        """ writes response """ +        if self.rep.tell() > 1000000 or self.abort: +            rep = self.getResponse() +            if self.abort: raise Abort() +            f = open("response.dump", "wb") +            f.write(rep) +            f.close() +            raise Exception("Loaded Url exceeded limit") + +        self.rep.write(buf) + +    def writeHeader(self, buf): +        """ writes header """ +        self.header += buf + +    def putHeader(self, name, value): +        self.headers.append("%s: %s" % (name, value)) + +    def clearHeaders(self): +        self.headers = [] + +    def close(self): +        """ cleanup, unusable after this """ +        self.rep.close() +        if hasattr(self, "cj"): +            del self.cj +        if hasattr(self, "c"): +            self.c.close() +            del self.c diff --git a/pyload/network/RequestFactory.py b/pyload/network/RequestFactory.py new file mode 100644 index 000000000..6811b11d8 --- /dev/null +++ b/pyload/network/RequestFactory.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- + +""" +    This program is free software; you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation; either version 3 of the License, +    or (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +    See the GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program; if not, see <http://www.gnu.org/licenses/>. + +    @author: mkaay, RaNaN +""" + +from threading import Lock + +from Browser import Browser +from Bucket import Bucket +from HTTPRequest import HTTPRequest +from CookieJar import CookieJar + +from XDCCRequest import XDCCRequest + +class RequestFactory: +    def __init__(self, core): +        self.lock = Lock() +        self.core = core +        self.bucket = Bucket() +        self.updateBucket() +        self.cookiejars = {} + +    def iface(self): +        return self.core.config["download"]["interface"] + +    def getRequest(self, pluginName, account=None, type="HTTP"): +        self.lock.acquire() + +        if type == "XDCC": +            return XDCCRequest(proxies=self.getProxies()) + +        req = Browser(self.bucket, self.getOptions()) + +        if account: +            cj = self.getCookieJar(pluginName, account) +            req.setCookieJar(cj) +        else: +            req.setCookieJar(CookieJar(pluginName)) + +        self.lock.release() +        return req + +    def getHTTPRequest(self, **kwargs): +        """ returns a http request, dont forget to close it ! """ +        options = self.getOptions() +        options.update(kwargs) # submit kwargs as additional options +        return HTTPRequest(CookieJar(None), options) + +    def getURL(self, *args, **kwargs): +        """ see HTTPRequest for argument list """ +        h = HTTPRequest(None, self.getOptions()) +        try: +            rep = h.load(*args, **kwargs) +        finally: +            h.close() + +        return rep + +    def getCookieJar(self, pluginName, account=None): +        if (pluginName, account) in self.cookiejars: +            return self.cookiejars[(pluginName, account)] + +        cj = CookieJar(pluginName, account) +        self.cookiejars[(pluginName, account)] = cj +        return cj + +    def getProxies(self): +        """ returns a proxy list for the request classes """ +        if not self.core.config["proxy"]["proxy"]: +            return {} +        else: +            type = "http" +            setting = self.core.config["proxy"]["type"].lower() +            if setting == "socks4": type = "socks4" +            elif setting == "socks5": type = "socks5" + +            username = None +            if self.core.config["proxy"]["username"] and self.core.config["proxy"]["username"].lower() != "none": +                username = self.core.config["proxy"]["username"] + +            pw = None +            if self.core.config["proxy"]["password"] and self.core.config["proxy"]["password"].lower() != "none": +                pw = self.core.config["proxy"]["password"] + +            return { +                "type": type, +                "address": self.core.config["proxy"]["address"], +                "port": self.core.config["proxy"]["port"], +                "username": username, +                "password": pw, +                } + +    def getOptions(self): +        """returns options needed for pycurl""" +        return {"interface": self.iface(), +                "proxies": self.getProxies(), +                "ipv6": self.core.config["download"]["ipv6"]} + +    def updateBucket(self): +        """ set values in the bucket according to settings""" +        if not self.core.config["download"]["limit_speed"]: +            self.bucket.setRate(-1) +        else: +            self.bucket.setRate(self.core.config["download"]["max_speed"] * 1024) + +# needs pyreq in global namespace +def getURL(*args, **kwargs): +    return pyreq.getURL(*args, **kwargs) + + +def getRequest(*args, **kwargs): +    return pyreq.getHTTPRequest() diff --git a/pyload/network/XDCCRequest.py b/pyload/network/XDCCRequest.py new file mode 100644 index 000000000..9ae52f72b --- /dev/null +++ b/pyload/network/XDCCRequest.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +""" +    This program is free software; you can redistribute it and/or modify +    it under the terms of the GNU General Public License as published by +    the Free Software Foundation; either version 3 of the License, +    or (at your option) any later version. + +    This program is distributed in the hope that it will be useful, +    but WITHOUT ANY WARRANTY; without even the implied warranty of +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +    See the GNU General Public License for more details. + +    You should have received a copy of the GNU General Public License +    along with this program; if not, see <http://www.gnu.org/licenses/>. + +    @author: jeix +""" + +import socket +import re + +from os import remove +from os.path import exists + +from time import time + +import struct +from select import select + +from pyload.plugins.Plugin import Abort + + +class XDCCRequest: +    def __init__(self, timeout=30, proxies={}): + +        self.proxies = proxies +        self.timeout = timeout + +        self.filesize = 0 +        self.recv = 0 +        self.speed = 0 + +        self.abort = False + +    def createSocket(self): +        # proxytype = None +        # proxy = None +        # if self.proxies.has_key("socks5"): +            # proxytype = socks.PROXY_TYPE_SOCKS5 +            # proxy = self.proxies["socks5"] +        # elif self.proxies.has_key("socks4"): +            # proxytype = socks.PROXY_TYPE_SOCKS4 +            # proxy = self.proxies["socks4"] +        # if proxytype: +            # sock = socks.socksocket() +            # t = _parse_proxy(proxy) +            # sock.setproxy(proxytype, addr=t[3].split(":")[0], port=int(t[3].split(":")[1]), username=t[1], password=t[2]) +        # else: +            # sock = socket.socket() +        # return sock + +        return socket.socket() + +    def download(self, ip, port, filename, irc, progressNotify=None): + +        ircbuffer = "" +        lastUpdate = time() +        cumRecvLen = 0 + +        dccsock = self.createSocket() + +        dccsock.settimeout(self.timeout) +        dccsock.connect((ip, port)) + +        if exists(filename): +            i = 0 +            nameParts = filename.rpartition(".") +            while True: +                newfilename = "%s-%d%s%s" % (nameParts[0], i, nameParts[1], nameParts[2]) +                i += 1 + +                if not exists(newfilename): +                    filename = newfilename +                    break + +        fh = open(filename, "wb") + +        # recv loop for dcc socket +        while True: +            if self.abort: +                dccsock.close() +                fh.close() +                remove(filename) +                raise Abort() + +            self._keepAlive(irc, ircbuffer) + +            data = dccsock.recv(4096) +            dataLen = len(data) +            self.recv += dataLen + +            cumRecvLen += dataLen + +            now = time() +            timespan = now - lastUpdate +            if timespan > 1:             +                self.speed = cumRecvLen / timespan +                cumRecvLen = 0 +                lastUpdate = now + +                if progressNotify: +                    progressNotify(self.percent) + +            if not data: +                break + +            fh.write(data) + +            # acknowledge data by sending number of recceived bytes +            dccsock.send(struct.pack('!I', self.recv)) + +        dccsock.close() +        fh.close() + +        return filename + +    def _keepAlive(self, sock, readbuffer): +        fdset = select([sock], [], [], 0) +        if sock not in fdset[0]: +            return + +        readbuffer += sock.recv(1024) +        temp = readbuffer.split("\n") +        readbuffer = temp.pop() + +        for line in temp: +            line  = line.rstrip() +            first = line.split() +            if first[0] == "PING": +                sock.send("PONG %s\r\n" % first[1]) + +    def abortDownloads(self): +        self.abort = True + +    @property +    def size(self): +        return self.filesize + +    @property +    def arrived(self): +        return self.recv + +    @property +    def percent(self): +        if not self.filesize: return 0 +        return (self.recv * 100) / self.filesize + +    def close(self): +        pass diff --git a/pyload/network/__init__.py b/pyload/network/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/pyload/network/__init__.py @@ -0,0 +1 @@ + | 
