UploadStation & BitshareCom: Improved debugging

author: fragonib <devnull@localhost> 2011-04-24 17:40:11 +0200
committer: fragonib <devnull@localhost> 2011-04-24 17:40:11 +0200
commit: 25ab9093d84cbeb86f7b626d7251f4a0180b32bf (patch)
tree: 0a064a60a8a85a3bc4c09c5f6031a303537fb6f4 /module/plugins
parent: BitshareCom: Fix i18n issues (diff)
download: pyload-25ab9093d84cbeb86f7b626d7251f4a0180b32bf.tar.xz
2 files changed, 127 insertions, 93 deletions
diff --git a/module/plugins/hoster/BitshareCom.py b/module/plugins/hoster/BitshareCom.py
index f44a89e23..496071fa3 100644
--- a/module/plugins/hoster/BitshareCom.py
+++ b/module/plugins/hoster/BitshareCom.py
@@ -10,6 +10,7 @@ from module.plugins.Hoster import Hoster
 from module.plugins.ReCaptcha import ReCaptcha
 
 from module.network.RequestFactory import getURL
+from wx.lib.analogclock.helpers import Hand
 
 def unicode2str(unitext):
     return unicodedata.normalize('NFKD', unitext).encode('ascii', 'ignore') 
@@ -19,20 +20,22 @@ def getInfo(urls):
     
     for url in urls:
         
-        # Get html
+        # Get file info html
+        # TODO: Force responses in english language
         html = getURL(url)
-        if re.search(BitshareCom.OFFLINE_PATTERN, html):
+        if re.search(BitshareCom.FILE_OFFLINE_PATTERN, html):
             result.append((url, 0, 1, url))
 
         # Name
         name1 = re.search(BitshareCom.__pattern__, url).group('name')
         m = re.search(BitshareCom.FILE_INFO_PATTERN, html)
         name2 = m.group('name')
-        name = unicode2str(max(name1, name2))
+        name = unicode2str(max(name1, name2))   # Unicode BUG workaround
         
         # Size
         value = float(m.group('size'))
-        pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[m.group('units')] 
+        units = m.group('units')
+        pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units] 
         size = int(value*1024**pow)
     
         # Return info
@@ -44,12 +47,14 @@ class BitshareCom(Hoster):
     __name__ = "BitshareCom"
     __type__ = "hoster"
     __pattern__ = r"http://(www\.)?bitshare\.com/(files/(?P<id1>[a-zA-Z0-9]+)(/(?P<name>.*?)\.html)?|\?f=(?P<id2>[a-zA-Z0-9]+))"
-    __version__ = "0.2"
+    __version__ = "0.3"
     __description__ = """Bitshare.Com File Download Hoster"""
-    __author_name__ = ("paul", "king")
+    __author_name__ = ("paul", "king", "fragonib")
     
-    OFFLINE_PATTERN = r'''(>We are sorry, but the requested file was not found in our database|>Error - File not available<|The file was deleted either by the uploader, inactivity or due to copyright claim)'''
+    FILE_OFFLINE_PATTERN = r'''(>We are sorry, but the requested file was not found in our database|>Error - File not available<|The file was deleted either by the uploader, inactivity or due to copyright claim)'''
     FILE_INFO_PATTERN = r'<h1>.*\s(?P<name>.+?)\s-\s(?P<size>\d+)\s(?P<units>..)yte</h1>'
+    FILE_AJAXID_PATTERN = r'var ajaxdl = "(.*?)";'
+    CAPTCHA_KEY_PATTERN = r"http://api\.recaptcha\.net/challenge\?k=(.*?) " 
         
     def setup(self):
         self.multiDL = False
@@ -57,74 +62,92 @@ class BitshareCom(Hoster):
     def process(self, pyfile):
     
         self.pyfile = pyfile
+        
+        # Force responses language
         self.req.cj.setCookie("bitshare.com", "language_selection", "EN")
     
         # File id
         m = re.match(self.__pattern__, self.pyfile.url)
         self.file_id = max(m.group('id1'), m.group('id2')) 
-
-        # File url
-        self.log.debug("%s: File_id is %s" % (self.__name__, self.file_id))
+        self.log.debug("%s: File id is [%s]" % (self.__name__, self.file_id))
 
         # Load main page
         self.html = self.load(self.pyfile.url, ref=False, utf8=True, cookies=True)
 
         # Check offline
-        if re.search(self.OFFLINE_PATTERN, self.html) is not None:
+        if re.search(self.FILE_OFFLINE_PATTERN, self.html) is not None:
             self.offline()
            
         # File name
         name1 = re.search(BitshareCom.__pattern__, self.pyfile.url).group('name')
         name2 = re.search(BitshareCom.FILE_INFO_PATTERN, self.html).group('name')
-        self.pyfile.name = unicode2str(max(name1, name2))
+        self.pyfile.name = unicode2str(max(name1, name2))   # Unicode BUG workaround
 
-        self.ajaxid = re.search("var ajaxdl = \"(.*?)\";",self.html).group(1)
-        
-        self.log.debug("%s: AjaxId %s" % (self.__name__, self.ajaxid))
+        # Ajax file id
+        self.ajaxid = re.search(BitshareCom.FILE_AJAXID_PATTERN, self.html).group(1)
+        self.log.debug("%s: File ajax id is [%s]" % (self.__name__, self.ajaxid))
 
+        # Handle free downloading
         self.handleFree()
     
     def handleFree(self):
 
-        action = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html",
+        # Get download info
+        self.log.debug("%s: Getting download info" % (self.__name__))
+        response = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html",
                             post={"request" : "generateID", "ajaxid" : self.ajaxid})
-        self.log.debug("%s: Result of generateID %s" % (self.__name__, action))
-        parts = action.split(":")
-    
-        if parts[0] == "ERROR":
-            self.fail(parts[1])
-        
+        self.handleErrors(response, ':')
+        parts = response.split(":")
         filetype = parts[0]
         wait = int(parts[1])
         captcha = int(parts[2])
+        self.log.debug("%s: Download info [type: '%s', waiting: %d, captcha: %d]" % 
+                       (self.__name__, filetype, wait, captcha))
 
+        # Waiting
         if wait > 0:
-            self.log.info("%s: Waiting %d seconds." % (self.__name__, wait))
+            self.log.debug("%s: Waiting %d seconds." % (self.__name__, wait))
             self.setWait(wait, True)
             self.wait()
             
+        # Resolve captcha
         if captcha == 1:
-            id = re.search(r"http://api\.recaptcha\.net/challenge\?k=(.*?) ", self.html).group(1)
-            self.log.debug("%s: ReCaptcha key %s" % (self.__name__, id))
-            for i in range(3):   # Try upto 3 times
+            self.log.debug("%s: File is captcha protected" % (self.__name__))
+            id = re.search(BitshareCom.CAPTCHA_KEY_PATTERN, self.html).group(1)
+            # Try up to 3 times
+            for i in range(3):
+                self.log.debug("%s: Resolving ReCaptcha with key [%s], round %d" % (self.__name__, id, i+1))
                 recaptcha = ReCaptcha(self)
                 challenge, code = recaptcha.challenge(id)
-                action = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html",
+                response = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html",
                                 post={"request" : "validateCaptcha", "ajaxid" : self.ajaxid, "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : code})
-                parts = action.split(":")
-                if parts[0] != "SUCCESS":
-                    self.invalidCaptcha()
-                else:
+                if self.handleCaptchaErrors(response):
                     break
 
-        action = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html",
-                    post={"request" : "getDownloadURL", "ajaxid" : self.ajaxid})
 
-        parts = action.split("#")
-    
-        if parts[0] == "ERROR":
-            self.fail(parts[1])
+        # Get download URL
+        self.log.debug("%s: Getting download url" % (self.__name__))
+        response = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html",
+                    post={"request" : "getDownloadURL", "ajaxid" : self.ajaxid})
+        self.handleErrors(response, '#')
+        url = response.split("#")[-1]    
 
-        # this may either download our file or forward us to an error page
-        self.log.debug("%s: Download url %s" % (self.__name__, parts[1]))
-        dl = self.download(parts[1])
+        # Request download URL
+        # This may either download our file or forward us to an error page
+        self.log.debug("%s: Downloading file with url [%s]" % (self.__name__, url))
+        dl = self.download(url)
+        
+    def handleErrors(self, response, separator):
+        self.log.debug("%s: Checking response [%s]" % (self.__name__, response))
+        if "ERROR" in response:
+            msg = response.split(separator)[-1]
+            self.fail(msg)
+
+    def handleCaptchaErrors(self, response):
+        self.log.debug("%s: Result of captcha resolving [%s]" % (self.__name__, response))
+        if "SUCCESS" in response:
+            return True
+        
+        self.log.debug("%s: Wrong captcha" % (self.__name__))
+        self.invalidCaptcha()
+        
diff --git a/module/plugins/hoster/UploadStationCom.py b/module/plugins/hoster/UploadStationCom.py
index 2723ae2ef..19c2d078d 100644
--- a/module/plugins/hoster/UploadStationCom.py
+++ b/module/plugins/hoster/UploadStationCom.py
@@ -12,29 +12,28 @@ from module.plugins.ReCaptcha import ReCaptcha
 from module.network.RequestFactory import getURL
 
 def unicode2str(unitext):
-    return unicodedata.normalize('NFKD', unitext).encode('ascii', 'ignore') 
-
+    return unicodedata.normalize('NFKD', unitext).encode('ascii', 'ignore')
+ 
 def getInfo(urls):
     result = []
     
     for url in urls:
         
-        # Get html
-        html = getURL(url)
-        pattern = r'''<h1>File not available</h1>|<b>The file could not be found\. Please check the download link'''  
-        if re.search(pattern, html):
+        # Get file info html
+        html = getURL(url) 
+        if re.search(UploadStationCom.FILE_OFFLINE_PATTERN, html):
             result.append((url, 0, 1, url))
             continue
         
         # Name
-        pattern = r'''<div class=\"download_item\">(.*?)</div>'''
-        name = re.search(pattern, html).group(1)
+        name = re.search(UploadStationCom.FILE_TITLE_PATTERN, html).group(1)
+        name = unicode2str(name)   # Unicode BUG workaround
         
         # Size
-        pattern = r'''<div><span>File size: <b>(.*?) (KB|MB|GB)</b>'''
-        m = re.search(pattern, html)
+        m = re.search(UploadStationCom.FILE_SIZE_PATTERN, html)
         value = float(m.group(1))
-        pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[m.group(2)] 
+        units = m.group(2)
+        pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units] 
         size = int(value*1024**pow)
     
         # Return info
@@ -46,16 +45,27 @@ def getInfo(urls):
 class UploadStationCom(Hoster):
     __name__ = "UploadStationCom"
     __type__ = "hoster"
-    __pattern__ = r"http://(www\.)?uploadstation\.com/file/[A-Za-z0-9]+"
-    __version__ = "0.2"
+    __pattern__ = r"http://(www\.)?uploadstation\.com/file/(?P<id>[A-Za-z0-9]+)"
+    __version__ = "0.3"
     __description__ = """UploadStation.Com File Download Hoster"""
     __author_name__ = ("fragonib")
     __author_mail__ = ("fragonib[AT]yahoo[DOT]es")
+    
+    FILE_OFFLINE_PATTERN = r'''<h1>File not available</h1>|<b>The file could not be found\. Please check the download link'''
+    FILE_TITLE_PATTERN = r'''<div class=\"download_item\">(.*?)</div>'''
+    FILE_SIZE_PATTERN = r'''<div><span>File size: <b>(.*?) (KB|MB|GB)</b>'''
+    CAPTCHA_PRESENT_TOKEN = '<div class="speedBox" id="showCaptcha" style="display:none;">'
+    CAPTCHA_KEY_PATTERN = r"var reCAPTCHA_publickey='(.*?)';"
+    CAPTCHA_WRONG_TOKEN = 'incorrect-captcha-sol'
+    WAITING_PATTERN = r".*?(\d+).*?"
+    TIME_LIMIT_TOKEN = '"fail":"timeLimit"'
+    TIME_LIMIT_WAIT_PATTERN = r"You need to wait (\d+) seconds to download next file."
+    DOWNLOAD_RESTRICTION_TOKEN = '"To remove download restriction, please choose your suitable plan as below</h1>"'
         
     def setup(self):
         self.multiDL = False
-        self.fileId = re.search(r"uploadstation\.com/file/([a-zA-Z0-9]+)(http:.*)?", self.pyfile.url).group(1)
-        self.pyfile.url = "http://www.uploadstation.com/file/" + self.fileId
+        self.fileId = ''
+        self.html = ''
 
     def process(self, pyfile):
         
@@ -63,15 +73,14 @@ class UploadStationCom(Hoster):
         self.html = self.load(self.pyfile.url, ref=False, cookies=True, utf8=True)
 
         # Is offline?
-        pattern = r'''<h1>File not available</h1>|<b>The file could not be found\. Please check the download link'''
-        m = re.search(pattern, self.html) 
+        m = re.search(UploadStationCom.FILE_OFFLINE_PATTERN, self.html) 
         if m is not None:
             self.offline()
 
-        # Title
-        pattern = r'''<div class=\"download_item\">(.*?)</div>'''
-        title = re.search(pattern, self.html).group(1)
-        self.pyfile.name = unicode2str(title)            
+        # Id & Title
+        self.fileId = re.search(self.__pattern__, self.pyfile.url).group('id')
+        title = re.search(UploadStationCom.FILE_TITLE_PATTERN, self.html).group(1)
+        self.pyfile.name = unicode2str(title)   # Unicode BUG workaround          
 
         # Free account
         self.handleFree()
@@ -79,81 +88,83 @@ class UploadStationCom(Hoster):
     def handleFree(self):
         
         # Not needed yet
-        #pattern = r'''\"(/landing/.*?/download_captcha\.js)\"'''
-        #jsPage = re.search(pattern, self.html).group(1)
-        #self.jsPage = self.load("http://uploadstation.com" + jsPage)
+        # pattern = r'''\"(/landing/.*?/download_captcha\.js)\"'''
+        # jsPage = re.search(pattern, self.html).group(1)
+        # self.jsPage = self.load("http://uploadstation.com" + jsPage)
         
         # Check download
         response = self.load(self.pyfile.url, post={"checkDownload" : "check"})
-        if not '"success":"showCaptcha"' in response:
-            self.handleErrors(response)
+        self.log.debug("%s: Checking download, response [%s]" % (self.__name__, response))
+        self.handleErrors(response)
         
         # We got a captcha?
-        if '<div class="speedBox" id="showCaptcha" style="display:none;">' in self.html:
-            id = re.search(r"var reCAPTCHA_publickey='(.*?)';", self.html).group(1)
+        if UploadStationCom.CAPTCHA_PRESENT_TOKEN in self.html:
+            id = re.search(UploadStationCom.CAPTCHA_KEY_PATTERN, self.html).group(1)
+            self.log.debug("%s: Resolving ReCaptcha with key [%s]" % (self.__name__, id))
             recaptcha = ReCaptcha(self)
             challenge, code = recaptcha.challenge(id)
             response = self.load('http://www.uploadstation.com/checkReCaptcha.php', 
                                   post={'recaptcha_challenge_field' : challenge,
                                         'recaptcha_response_field' : code, 
                                         'recaptcha_shortencode_field' : self.fileId})
-            if r'incorrect-captcha-sol' in response:
-                self.handleCaptchaErrors(response)
+            self.log.debug("%s: Result of captcha resolving [%s]" % (self.__name__, response))
+            self.handleCaptchaErrors(response)
 
         # Process waiting
-        response = self.load(self.pyfile.url, post={"downloadLink":"wait"})
-        m = re.search(r".*?(\d+).*?", response)
+        response = self.load(self.pyfile.url, post={"downloadLink" : "wait"})
+        m = re.search(UploadStationCom.WAITING_PATTERN, response)
         if m is not None:
-            wait = m.group(1)
-            if wait == "404":
+            wait = int(m.group(1))
+            if wait == 404:
                 self.log.debug("No wait time returned")
                 self.fail("No wait time returned")
-            else:
-                self.setWait(int(wait))
 
+            self.log.debug("%s: Waiting %d seconds." % (self.__name__, wait))
+            self.setWait(wait + 3)
             self.wait()
 
         # Show download link
-        self.load(self.pyfile.url, post={"downloadLink":"show"})
+        self.load(self.pyfile.url, post={"downloadLink" : "show"})
 
         # This may either download our file or forward us to an error page
-        dl = self.download(self.pyfile.url, post={"download":"normal"})
+        self.log.debug("%s: Downloading file." % (self.__name__))
+        dl = self.download(self.pyfile.url, post={"download" : "normal"})
         self.handleDownloadedFile()
         
     def handleErrors(self, response):
         
-        text = '"fail":"timeLimit"'
-        if text in response:
+        if UploadStationCom.TIME_LIMIT_TOKEN in response:
             wait = 300
             html = self.load(self.pyfile.url, post={"checkDownload" : "showError", "errorType" : "timeLimit"})
-            m = re.search(r"You need to wait (\d+) seconds to download next file.", html)
+            m = re.search(UploadStationCom.TIME_LIMIT_WAIT_PATTERN, html)
             if m is not None:
                 wait = int(m.group(1))
 
+            self.log.info("%s: Time limit reached, waiting %d seconds." % (self.__name__, wait))
             self.setWait(wait, True)
             self.wait()
             self.retry()
             
-        text = '"To remove download restriction, please choose your suitable plan as below</h1>"'
-        if text in response:
+        if UploadStationCom.DOWNLOAD_RESTRICTION_TOKEN in response:
             wait = 720
+            self.log.info("%s: Free account time limit reached, waiting %d seconds." % (self.__name__, wait))
             self.setWait(wait, True)
             self.wait()
             self.retry()
             
     def handleCaptchaErrors(self, response):
-        self.invalidCaptcha()
-        self.retry()
+        if UploadStationCom.CAPTCHA_WRONG_TOKEN in response:
+            self.log.info("%s: Invalid captcha response, retrying." % (self.__name__))
+            self.invalidCaptcha()
+            self.retry()
 
     def handleDownloadedFile(self):
-        check = self.checkDownload({"wait": re.compile(r'You need to wait (\d+) seconds to download next file.')})
-        
+        check = self.checkDownload({"wait": re.compile(UploadStationCom.TIME_LIMIT_WAIT_PATTERN)})
         if check == "wait":
-            wait_time = 720
+            wait = 720
             if self.lastCheck is not None:
-                wait_time = int(self.lastCheck.group(1))
-            self.setWait(wait_time+3)
-            self.log.debug("%s: You need to wait %d seconds for another download." % (self.__name__, wait_time))
-            self.wantReconnect = True
+                wait = int(self.lastCheck.group(1))
+            self.log.debug("%s: Failed, you need to wait %d seconds for another download." % (self.__name__, wait))
+            self.setWait(wait + 3, True)
             self.wait()
-            self.retry() 
-\ No newline at end of file
+            self.retry()
+\ No newline at end of file
author	fragonib <devnull@localhost>	2011-04-24 17:40:11 +0200
committer	fragonib <devnull@localhost>	2011-04-24 17:40:11 +0200
commit	25ab9093d84cbeb86f7b626d7251f4a0180b32bf (patch)
tree	0a064a60a8a85a3bc4c09c5f6031a303537fb6f4 /module/plugins
parent	BitshareCom: Fix i18n issues (diff)
download	pyload-25ab9093d84cbeb86f7b626d7251f4a0180b32bf.tar.xz