diff options
| author | 2011-12-04 13:39:42 +0100 | |
|---|---|---|
| committer | 2011-12-04 13:39:42 +0100 | |
| commit | d2e3afceb738af20aeb8e41f9aad12150cf1e8a7 (patch) | |
| tree | 91a1ce5bc7fb51be6c3d188aed11552662d6f4bf /module/network | |
| parent | closed #440 (diff) | |
| download | pyload-d2e3afceb738af20aeb8e41f9aad12150cf1e8a7.tar.xz | |
Better download connection handling: Detect server error earlier, fallback to single connection if possible
Diffstat (limited to 'module/network')
| -rw-r--r-- | module/network/HTTPChunk.py | 22 | ||||
| -rw-r--r-- | module/network/HTTPDownload.py | 91 | ||||
| -rw-r--r-- | module/network/HTTPRequest.py | 7 | 
3 files changed, 92 insertions, 28 deletions
| diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py index 69eedb19c..582067aa8 100644 --- a/module/network/HTTPChunk.py +++ b/module/network/HTTPChunk.py @@ -16,7 +16,7 @@      @author: RaNaN  """ -from os import remove, stat +from os import remove, stat, fsync  from os.path import exists  from time import sleep  from re import search @@ -146,6 +146,9 @@ class HTTPChunk(HTTPRequest):          self.sleep = 0.000          self.lastSize = 0 +    def __repr__(self): +        return "<HTTPChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived) +      @property      def cj(self):          return self.p.cj @@ -157,7 +160,7 @@ class HTTPChunk(HTTPRequest):          self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody)          self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) -        # request one byte more, since some servers in russia seems to have a defect arihmetic unit +        # request all bytes, since some servers in russia seems to have a defect arihmetic unit          if self.resume:              self.fp = open(self.p.info.getChunkName(self.id), "ab") @@ -259,10 +262,25 @@ class HTTPChunk(HTTPRequest):          self.headerParsed = True +    def stop(self): +        """The download will not proceed after next call of writeBody""" +        self.range = [0,0] +        self.size = 0 + +    def resetRange(self): +        """ Reset the range, so the download will load all data available  """ +        self.range = None +      def setRange(self, range):          self.range = range          self.size = range[1] - range[0] +    def flushFile(self): +        """  flush and close file """ +        self.fp.flush() +        fsync(self.fp.fileno()) #make sure everything was written to disk +        self.fp.close() #needs to be closed, or merging chunks will fail +      def close(self):          """ closes everything, unusable after this """          if self.fp: self.fp.close() diff --git a/module/network/HTTPDownload.py b/module/network/HTTPDownload.py index 1a2886332..13c674833 100644 --- a/module/network/HTTPDownload.py +++ b/module/network/HTTPDownload.py @@ -140,7 +140,7 @@ class HTTPDownload():                  return self._download(chunks, False)              else: -                raise e +                raise          finally:              self.close() @@ -161,7 +161,7 @@ class HTTPDownload():          lastFinishCheck = 0          lastTimeCheck = 0 -        chunksDone = set() +        chunksDone = set()  # list of curl handles that are finished          chunksCreated = False          done = False          if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can @@ -202,32 +202,76 @@ class HTTPDownload():              t = time()              # reduce these calls -            while lastFinishCheck + 1 < t: +            while lastFinishCheck + 0.5 < t: +                # list of failed curl handles +                failed = [] +                ex = None # save only last exception, we can only raise one anyway +                  num_q, ok_list, err_list = self.m.info_read()                  for c in ok_list: -                    chunksDone.add(c) +                    chunk = self.findChunk(c) +                    try: # check if the header implies success, else add it to failed list +                        chunk.verifyHeader() +                    except BadHeader, e: +                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) +                        failed.append(chunk) +                        ex = e +                    else: +                        chunksDone.add(c) +                  for c in err_list:                      curl, errno, msg = c -                    #test if chunk was finished, otherwise raise the exception +                    chunk = self.findChunk(curl) +                    #test if chunk was finished                      if errno != 23 or "0 !=" not in msg: -                        raise pycurl.error(errno, msg) - -                    #@TODO KeyBoardInterrupts are seen as finished chunks, -                    #but normally not handled to this process, only in the testcase +                        failed.append(chunk) +                        ex = pycurl.error(errno, msg) +                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex))) +                        continue + +                    try: # check if the header implies success, else add it to failed list +                        chunk.verifyHeader() +                    except BadHeader, e: +                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) +                        failed.append(chunk) +                        ex = e +                    else: +                        chunksDone.add(curl) +                if not num_q: # no more infos to get + +                    # check if init is not finished so we reset download connections +                    # note that other chunks are closed and downloaded with init too +                    if failed and init not in failed and init.c not in chunksDone: +                        self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex)))) + +                        #list of chunks to clean and remove +                        to_clean = filter(lambda x: x is not init, self.chunks) +                        for chunk in to_clean: +                            self.closeChunk(chunk) +                            self.chunks.remove(chunk) +                            remove(self.info.getChunkName(chunk.id)) + +                        #let first chunk load the rest and update the info file +                        init.resetRange() +                        self.info.clear() +                        self.info.addChunk("%s.chunk0" % self.filename, (0, self.size)) +                        self.info.save() +                    elif failed: +                        raise ex -                    chunksDone.add(curl) -                if not num_q:                      lastFinishCheck = t -                    if len(chunksDone) == len(self.chunks): -                        done = True #all chunks loaded +                    if len(chunksDone) >= len(self.chunks): +                        if len(chunksDone) > len(self.chunks): +                            self.log.warning("Finished download chunks size incorrect, please report bug.") +                        done = True  #all chunks loaded                      break              if done:                  break #all chunks loaded -            # calc speed once per second +            # calc speed once per second, averaging over 3 seconds              if lastTimeCheck + 1 < t:                  diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in                          enumerate(self.chunks)] @@ -247,15 +291,7 @@ class HTTPDownload():          failed = False          for chunk in self.chunks: -            try: -                chunk.verifyHeader() -            except BadHeader, e: -                failed = e.code -                remove(self.info.getChunkName(chunk.id)) - -            chunk.fp.flush() -            fsync(chunk.fp.fileno()) #make sure everything was written to disk -            chunk.fp.close() #needs to be closed, or merging chunks will fail +            chunk.flushFile() #make sure downloads are written to disk          if failed: raise BadHeader(failed) @@ -265,11 +301,16 @@ class HTTPDownload():          if self.progressNotify:              self.progressNotify(self.percent) +    def findChunk(self, handle): +        """ linear search to find a chunk (should be ok since chunk size is usually low) """ +        for chunk in self.chunks: +            if chunk.c == handle: return chunk +      def closeChunk(self, chunk):          try:              self.m.remove_handle(chunk.c) -        except pycurl.error: -            self.log.debug("Error removing chunk") +        except pycurl.error, e: +            self.log.debug("Error removing chunk: %s" % str(e))          finally:              chunk.close() diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py index bd8cdd72e..e58fd114e 100644 --- a/module/network/HTTPRequest.py +++ b/module/network/HTTPRequest.py @@ -30,6 +30,7 @@ from module.plugins.Plugin import Abort  def myquote(url):      return quote(url, safe="%/:=&?~#+!$,;'@()*[]") +bad_headers = range(400, 404) + range(405, 418) + range(500, 506)  class BadHeader(Exception):      def __init__(self, code, content=""): @@ -211,11 +212,15 @@ class HTTPRequest():      def verifyHeader(self):          """ raise an exceptions on bad headers """          code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) -        if code in range(400, 404) or code in range(405, 418) or code in range(500, 506): +        if code in bad_headers:              #404 will NOT raise an exception              raise BadHeader(code, self.getResponse())          return code +    def checkHeader(self): +        """ check if header indicates failure""" +        return int(self.c.getinfo(pycurl.RESPONSE_CODE)) not in bad_headers +      def getResponse(self):          """ retrieve response from string io """          if self.rep is None: return "" | 
