Better download connection handling: Detect server error earlier, fallback to single connection if possible

author: RaNaN <Mast3rRaNaN@hotmail.de> 2011-12-04 13:39:42 +0100
committer: RaNaN <Mast3rRaNaN@hotmail.de> 2011-12-04 13:39:42 +0100
commit: d2e3afceb738af20aeb8e41f9aad12150cf1e8a7 (patch)
tree: 91a1ce5bc7fb51be6c3d188aed11552662d6f4bf /module/network
parent: closed #440 (diff)
download: pyload-d2e3afceb738af20aeb8e41f9aad12150cf1e8a7.tar.xz
3 files changed, 92 insertions, 28 deletions
diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py
index 69eedb19c..582067aa8 100644
--- a/module/network/HTTPChunk.py
+++ b/module/network/HTTPChunk.py
@@ -16,7 +16,7 @@
     
     @author: RaNaN
 """
-from os import remove, stat
+from os import remove, stat, fsync
 from os.path import exists
 from time import sleep
 from re import search
@@ -146,6 +146,9 @@ class HTTPChunk(HTTPRequest):
         self.sleep = 0.000
         self.lastSize = 0
 
+    def __repr__(self):
+        return "<HTTPChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived)
+
     @property
     def cj(self):
         return self.p.cj
@@ -157,7 +160,7 @@ class HTTPChunk(HTTPRequest):
         self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody)
         self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
 
-        # request one byte more, since some servers in russia seems to have a defect arihmetic unit
+        # request all bytes, since some servers in russia seems to have a defect arihmetic unit
 
         if self.resume:
             self.fp = open(self.p.info.getChunkName(self.id), "ab")
@@ -259,10 +262,25 @@ class HTTPChunk(HTTPRequest):
 
         self.headerParsed = True
 
+    def stop(self):
+        """The download will not proceed after next call of writeBody"""
+        self.range = [0,0]
+        self.size = 0
+
+    def resetRange(self):
+        """ Reset the range, so the download will load all data available  """
+        self.range = None
+
     def setRange(self, range):
         self.range = range
         self.size = range[1] - range[0]
 
+    def flushFile(self):
+        """  flush and close file """
+        self.fp.flush()
+        fsync(self.fp.fileno()) #make sure everything was written to disk
+        self.fp.close() #needs to be closed, or merging chunks will fail
+
     def close(self):
         """ closes everything, unusable after this """
         if self.fp: self.fp.close()
diff --git a/module/network/HTTPDownload.py b/module/network/HTTPDownload.py
index 1a2886332..13c674833 100644
--- a/module/network/HTTPDownload.py
+++ b/module/network/HTTPDownload.py
@@ -140,7 +140,7 @@ class HTTPDownload():
 
                 return self._download(chunks, False)
             else:
-                raise e
+                raise
         finally:
             self.close()
 
@@ -161,7 +161,7 @@ class HTTPDownload():
 
         lastFinishCheck = 0
         lastTimeCheck = 0
-        chunksDone = set()
+        chunksDone = set()  # list of curl handles that are finished
         chunksCreated = False
         done = False
         if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can
@@ -202,32 +202,76 @@ class HTTPDownload():
             t = time()
 
             # reduce these calls
-            while lastFinishCheck + 1 < t:
+            while lastFinishCheck + 0.5 < t:
+                # list of failed curl handles
+                failed = []
+                ex = None # save only last exception, we can only raise one anyway
+
                 num_q, ok_list, err_list = self.m.info_read()
                 for c in ok_list:
-                    chunksDone.add(c)
+                    chunk = self.findChunk(c)
+                    try: # check if the header implies success, else add it to failed list
+                        chunk.verifyHeader()
+                    except BadHeader, e:
+                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e)))
+                        failed.append(chunk)
+                        ex = e
+                    else:
+                        chunksDone.add(c)
+
                 for c in err_list:
                     curl, errno, msg = c
-                    #test if chunk was finished, otherwise raise the exception
+                    chunk = self.findChunk(curl)
+                    #test if chunk was finished
                     if errno != 23 or "0 !=" not in msg:
-                        raise pycurl.error(errno, msg)
-
-                    #@TODO KeyBoardInterrupts are seen as finished chunks,
-                    #but normally not handled to this process, only in the testcase
+                        failed.append(chunk)
+                        ex = pycurl.error(errno, msg)
+                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex)))
+                        continue
+
+                    try: # check if the header implies success, else add it to failed list
+                        chunk.verifyHeader()
+                    except BadHeader, e:
+                        self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e)))
+                        failed.append(chunk)
+                        ex = e
+                    else:
+                        chunksDone.add(curl)
+                if not num_q: # no more infos to get
+
+                    # check if init is not finished so we reset download connections
+                    # note that other chunks are closed and downloaded with init too
+                    if failed and init not in failed and init.c not in chunksDone:
+                        self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex))))
+
+                        #list of chunks to clean and remove
+                        to_clean = filter(lambda x: x is not init, self.chunks)
+                        for chunk in to_clean:
+                            self.closeChunk(chunk)
+                            self.chunks.remove(chunk)
+                            remove(self.info.getChunkName(chunk.id))
+
+                        #let first chunk load the rest and update the info file
+                        init.resetRange()
+                        self.info.clear()
+                        self.info.addChunk("%s.chunk0" % self.filename, (0, self.size))
+                        self.info.save()
+                    elif failed:
+                        raise ex
 
-                    chunksDone.add(curl)
-                if not num_q:
                     lastFinishCheck = t
 
-                    if len(chunksDone) == len(self.chunks):
-                        done = True #all chunks loaded
+                    if len(chunksDone) >= len(self.chunks):
+                        if len(chunksDone) > len(self.chunks):
+                            self.log.warning("Finished download chunks size incorrect, please report bug.")
+                        done = True  #all chunks loaded
 
                     break
 
             if done:
                 break #all chunks loaded
 
-            # calc speed once per second
+            # calc speed once per second, averaging over 3 seconds
             if lastTimeCheck + 1 < t:
                 diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in
                         enumerate(self.chunks)]
@@ -247,15 +291,7 @@ class HTTPDownload():
 
         failed = False
         for chunk in self.chunks:
-            try:
-                chunk.verifyHeader()
-            except BadHeader, e:
-                failed = e.code
-                remove(self.info.getChunkName(chunk.id))
-
-            chunk.fp.flush()
-            fsync(chunk.fp.fileno()) #make sure everything was written to disk
-            chunk.fp.close() #needs to be closed, or merging chunks will fail
+            chunk.flushFile() #make sure downloads are written to disk
 
         if failed: raise BadHeader(failed)
 
@@ -265,11 +301,16 @@ class HTTPDownload():
         if self.progressNotify:
             self.progressNotify(self.percent)
 
+    def findChunk(self, handle):
+        """ linear search to find a chunk (should be ok since chunk size is usually low) """
+        for chunk in self.chunks:
+            if chunk.c == handle: return chunk
+
     def closeChunk(self, chunk):
         try:
             self.m.remove_handle(chunk.c)
-        except pycurl.error:
-            self.log.debug("Error removing chunk")
+        except pycurl.error, e:
+            self.log.debug("Error removing chunk: %s" % str(e))
         finally:
             chunk.close()
 
diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py
index bd8cdd72e..e58fd114e 100644
--- a/module/network/HTTPRequest.py
+++ b/module/network/HTTPRequest.py
@@ -30,6 +30,7 @@ from module.plugins.Plugin import Abort
 def myquote(url):
     return quote(url, safe="%/:=&?~#+!$,;'@()*[]")
 
+bad_headers = range(400, 404) + range(405, 418) + range(500, 506)
 
 class BadHeader(Exception):
     def __init__(self, code, content=""):
@@ -211,11 +212,15 @@ class HTTPRequest():
     def verifyHeader(self):
         """ raise an exceptions on bad headers """
         code = int(self.c.getinfo(pycurl.RESPONSE_CODE))
-        if code in range(400, 404) or code in range(405, 418) or code in range(500, 506):
+        if code in bad_headers:
             #404 will NOT raise an exception
             raise BadHeader(code, self.getResponse())
         return code
 
+    def checkHeader(self):
+        """ check if header indicates failure"""
+        return int(self.c.getinfo(pycurl.RESPONSE_CODE)) not in bad_headers
+
     def getResponse(self):
         """ retrieve response from string io """
         if self.rep is None: return ""
author	RaNaN <Mast3rRaNaN@hotmail.de>	2011-12-04 13:39:42 +0100
committer	RaNaN <Mast3rRaNaN@hotmail.de>	2011-12-04 13:39:42 +0100
commit	d2e3afceb738af20aeb8e41f9aad12150cf1e8a7 (patch)
tree	91a1ce5bc7fb51be6c3d188aed11552662d6f4bf /module/network
parent	closed #440 (diff)
download	pyload-d2e3afceb738af20aeb8e41f9aad12150cf1e8a7.tar.xz