diff options
| author | 2011-04-24 17:40:11 +0200 | |
|---|---|---|
| committer | 2011-04-24 17:40:11 +0200 | |
| commit | 25ab9093d84cbeb86f7b626d7251f4a0180b32bf (patch) | |
| tree | 0a064a60a8a85a3bc4c09c5f6031a303537fb6f4 /module/plugins | |
| parent | BitshareCom: Fix i18n issues (diff) | |
| download | pyload-25ab9093d84cbeb86f7b626d7251f4a0180b32bf.tar.xz | |
UploadStation & BitshareCom: Improved debugging
Diffstat (limited to 'module/plugins')
| -rw-r--r-- | module/plugins/hoster/BitshareCom.py | 103 | ||||
| -rw-r--r-- | module/plugins/hoster/UploadStationCom.py | 117 | 
2 files changed, 127 insertions, 93 deletions
| diff --git a/module/plugins/hoster/BitshareCom.py b/module/plugins/hoster/BitshareCom.py index f44a89e23..496071fa3 100644 --- a/module/plugins/hoster/BitshareCom.py +++ b/module/plugins/hoster/BitshareCom.py @@ -10,6 +10,7 @@ from module.plugins.Hoster import Hoster  from module.plugins.ReCaptcha import ReCaptcha  from module.network.RequestFactory import getURL +from wx.lib.analogclock.helpers import Hand  def unicode2str(unitext):      return unicodedata.normalize('NFKD', unitext).encode('ascii', 'ignore')  @@ -19,20 +20,22 @@ def getInfo(urls):      for url in urls: -        # Get html +        # Get file info html +        # TODO: Force responses in english language          html = getURL(url) -        if re.search(BitshareCom.OFFLINE_PATTERN, html): +        if re.search(BitshareCom.FILE_OFFLINE_PATTERN, html):              result.append((url, 0, 1, url))          # Name          name1 = re.search(BitshareCom.__pattern__, url).group('name')          m = re.search(BitshareCom.FILE_INFO_PATTERN, html)          name2 = m.group('name') -        name = unicode2str(max(name1, name2)) +        name = unicode2str(max(name1, name2))   # Unicode BUG workaround          # Size          value = float(m.group('size')) -        pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[m.group('units')]  +        units = m.group('units') +        pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units]           size = int(value*1024**pow)          # Return info @@ -44,12 +47,14 @@ class BitshareCom(Hoster):      __name__ = "BitshareCom"      __type__ = "hoster"      __pattern__ = r"http://(www\.)?bitshare\.com/(files/(?P<id1>[a-zA-Z0-9]+)(/(?P<name>.*?)\.html)?|\?f=(?P<id2>[a-zA-Z0-9]+))" -    __version__ = "0.2" +    __version__ = "0.3"      __description__ = """Bitshare.Com File Download Hoster""" -    __author_name__ = ("paul", "king") +    __author_name__ = ("paul", "king", "fragonib") -    OFFLINE_PATTERN = r'''(>We are sorry, but the requested file was not found in our database|>Error - File not available<|The file was deleted either by the uploader, inactivity or due to copyright claim)''' +    FILE_OFFLINE_PATTERN = r'''(>We are sorry, but the requested file was not found in our database|>Error - File not available<|The file was deleted either by the uploader, inactivity or due to copyright claim)'''      FILE_INFO_PATTERN = r'<h1>.*\s(?P<name>.+?)\s-\s(?P<size>\d+)\s(?P<units>..)yte</h1>' +    FILE_AJAXID_PATTERN = r'var ajaxdl = "(.*?)";' +    CAPTCHA_KEY_PATTERN = r"http://api\.recaptcha\.net/challenge\?k=(.*?) "       def setup(self):          self.multiDL = False @@ -57,74 +62,92 @@ class BitshareCom(Hoster):      def process(self, pyfile):          self.pyfile = pyfile +         +        # Force responses language          self.req.cj.setCookie("bitshare.com", "language_selection", "EN")          # File id          m = re.match(self.__pattern__, self.pyfile.url)          self.file_id = max(m.group('id1'), m.group('id2'))  - -        # File url -        self.log.debug("%s: File_id is %s" % (self.__name__, self.file_id)) +        self.log.debug("%s: File id is [%s]" % (self.__name__, self.file_id))          # Load main page          self.html = self.load(self.pyfile.url, ref=False, utf8=True, cookies=True)          # Check offline -        if re.search(self.OFFLINE_PATTERN, self.html) is not None: +        if re.search(self.FILE_OFFLINE_PATTERN, self.html) is not None:              self.offline()          # File name          name1 = re.search(BitshareCom.__pattern__, self.pyfile.url).group('name')          name2 = re.search(BitshareCom.FILE_INFO_PATTERN, self.html).group('name') -        self.pyfile.name = unicode2str(max(name1, name2)) +        self.pyfile.name = unicode2str(max(name1, name2))   # Unicode BUG workaround -        self.ajaxid = re.search("var ajaxdl = \"(.*?)\";",self.html).group(1) -         -        self.log.debug("%s: AjaxId %s" % (self.__name__, self.ajaxid)) +        # Ajax file id +        self.ajaxid = re.search(BitshareCom.FILE_AJAXID_PATTERN, self.html).group(1) +        self.log.debug("%s: File ajax id is [%s]" % (self.__name__, self.ajaxid)) +        # Handle free downloading          self.handleFree()      def handleFree(self): -        action = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html", +        # Get download info +        self.log.debug("%s: Getting download info" % (self.__name__)) +        response = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html",                              post={"request" : "generateID", "ajaxid" : self.ajaxid}) -        self.log.debug("%s: Result of generateID %s" % (self.__name__, action)) -        parts = action.split(":") -     -        if parts[0] == "ERROR": -            self.fail(parts[1]) -         +        self.handleErrors(response, ':') +        parts = response.split(":")          filetype = parts[0]          wait = int(parts[1])          captcha = int(parts[2]) +        self.log.debug("%s: Download info [type: '%s', waiting: %d, captcha: %d]" %  +                       (self.__name__, filetype, wait, captcha)) +        # Waiting          if wait > 0: -            self.log.info("%s: Waiting %d seconds." % (self.__name__, wait)) +            self.log.debug("%s: Waiting %d seconds." % (self.__name__, wait))              self.setWait(wait, True)              self.wait() +        # Resolve captcha          if captcha == 1: -            id = re.search(r"http://api\.recaptcha\.net/challenge\?k=(.*?) ", self.html).group(1) -            self.log.debug("%s: ReCaptcha key %s" % (self.__name__, id)) -            for i in range(3):   # Try upto 3 times +            self.log.debug("%s: File is captcha protected" % (self.__name__)) +            id = re.search(BitshareCom.CAPTCHA_KEY_PATTERN, self.html).group(1) +            # Try up to 3 times +            for i in range(3): +                self.log.debug("%s: Resolving ReCaptcha with key [%s], round %d" % (self.__name__, id, i+1))                  recaptcha = ReCaptcha(self)                  challenge, code = recaptcha.challenge(id) -                action = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html", +                response = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html",                                  post={"request" : "validateCaptcha", "ajaxid" : self.ajaxid, "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : code}) -                parts = action.split(":") -                if parts[0] != "SUCCESS": -                    self.invalidCaptcha() -                else: +                if self.handleCaptchaErrors(response):                      break -        action = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html", -                    post={"request" : "getDownloadURL", "ajaxid" : self.ajaxid}) -        parts = action.split("#") -     -        if parts[0] == "ERROR": -            self.fail(parts[1]) +        # Get download URL +        self.log.debug("%s: Getting download url" % (self.__name__)) +        response = self.load("http://bitshare.com/files-ajax/" + self.file_id + "/request.html", +                    post={"request" : "getDownloadURL", "ajaxid" : self.ajaxid}) +        self.handleErrors(response, '#') +        url = response.split("#")[-1]     -        # this may either download our file or forward us to an error page -        self.log.debug("%s: Download url %s" % (self.__name__, parts[1])) -        dl = self.download(parts[1]) +        # Request download URL +        # This may either download our file or forward us to an error page +        self.log.debug("%s: Downloading file with url [%s]" % (self.__name__, url)) +        dl = self.download(url) +         +    def handleErrors(self, response, separator): +        self.log.debug("%s: Checking response [%s]" % (self.__name__, response)) +        if "ERROR" in response: +            msg = response.split(separator)[-1] +            self.fail(msg) + +    def handleCaptchaErrors(self, response): +        self.log.debug("%s: Result of captcha resolving [%s]" % (self.__name__, response)) +        if "SUCCESS" in response: +            return True +         +        self.log.debug("%s: Wrong captcha" % (self.__name__)) +        self.invalidCaptcha() +         diff --git a/module/plugins/hoster/UploadStationCom.py b/module/plugins/hoster/UploadStationCom.py index 2723ae2ef..19c2d078d 100644 --- a/module/plugins/hoster/UploadStationCom.py +++ b/module/plugins/hoster/UploadStationCom.py @@ -12,29 +12,28 @@ from module.plugins.ReCaptcha import ReCaptcha  from module.network.RequestFactory import getURL
  def unicode2str(unitext):
 -    return unicodedata.normalize('NFKD', unitext).encode('ascii', 'ignore') 
 -
 +    return unicodedata.normalize('NFKD', unitext).encode('ascii', 'ignore')
 + 
  def getInfo(urls):
      result = []
      for url in urls:
 -        # Get html
 -        html = getURL(url)
 -        pattern = r'''<h1>File not available</h1>|<b>The file could not be found\. Please check the download link'''  
 -        if re.search(pattern, html):
 +        # Get file info html
 +        html = getURL(url) 
 +        if re.search(UploadStationCom.FILE_OFFLINE_PATTERN, html):
              result.append((url, 0, 1, url))
              continue
          # Name
 -        pattern = r'''<div class=\"download_item\">(.*?)</div>'''
 -        name = re.search(pattern, html).group(1)
 +        name = re.search(UploadStationCom.FILE_TITLE_PATTERN, html).group(1)
 +        name = unicode2str(name)   # Unicode BUG workaround
          # Size
 -        pattern = r'''<div><span>File size: <b>(.*?) (KB|MB|GB)</b>'''
 -        m = re.search(pattern, html)
 +        m = re.search(UploadStationCom.FILE_SIZE_PATTERN, html)
          value = float(m.group(1))
 -        pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[m.group(2)] 
 +        units = m.group(2)
 +        pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units] 
          size = int(value*1024**pow)
          # Return info
 @@ -46,16 +45,27 @@ def getInfo(urls):  class UploadStationCom(Hoster):
      __name__ = "UploadStationCom"
      __type__ = "hoster"
 -    __pattern__ = r"http://(www\.)?uploadstation\.com/file/[A-Za-z0-9]+"
 -    __version__ = "0.2"
 +    __pattern__ = r"http://(www\.)?uploadstation\.com/file/(?P<id>[A-Za-z0-9]+)"
 +    __version__ = "0.3"
      __description__ = """UploadStation.Com File Download Hoster"""
      __author_name__ = ("fragonib")
      __author_mail__ = ("fragonib[AT]yahoo[DOT]es")
 +    
 +    FILE_OFFLINE_PATTERN = r'''<h1>File not available</h1>|<b>The file could not be found\. Please check the download link'''
 +    FILE_TITLE_PATTERN = r'''<div class=\"download_item\">(.*?)</div>'''
 +    FILE_SIZE_PATTERN = r'''<div><span>File size: <b>(.*?) (KB|MB|GB)</b>'''
 +    CAPTCHA_PRESENT_TOKEN = '<div class="speedBox" id="showCaptcha" style="display:none;">'
 +    CAPTCHA_KEY_PATTERN = r"var reCAPTCHA_publickey='(.*?)';"
 +    CAPTCHA_WRONG_TOKEN = 'incorrect-captcha-sol'
 +    WAITING_PATTERN = r".*?(\d+).*?"
 +    TIME_LIMIT_TOKEN = '"fail":"timeLimit"'
 +    TIME_LIMIT_WAIT_PATTERN = r"You need to wait (\d+) seconds to download next file."
 +    DOWNLOAD_RESTRICTION_TOKEN = '"To remove download restriction, please choose your suitable plan as below</h1>"'
      def setup(self):
          self.multiDL = False
 -        self.fileId = re.search(r"uploadstation\.com/file/([a-zA-Z0-9]+)(http:.*)?", self.pyfile.url).group(1)
 -        self.pyfile.url = "http://www.uploadstation.com/file/" + self.fileId
 +        self.fileId = ''
 +        self.html = ''
      def process(self, pyfile):
 @@ -63,15 +73,14 @@ class UploadStationCom(Hoster):          self.html = self.load(self.pyfile.url, ref=False, cookies=True, utf8=True)
          # Is offline?
 -        pattern = r'''<h1>File not available</h1>|<b>The file could not be found\. Please check the download link'''
 -        m = re.search(pattern, self.html) 
 +        m = re.search(UploadStationCom.FILE_OFFLINE_PATTERN, self.html) 
          if m is not None:
              self.offline()
 -        # Title
 -        pattern = r'''<div class=\"download_item\">(.*?)</div>'''
 -        title = re.search(pattern, self.html).group(1)
 -        self.pyfile.name = unicode2str(title)            
 +        # Id & Title
 +        self.fileId = re.search(self.__pattern__, self.pyfile.url).group('id')
 +        title = re.search(UploadStationCom.FILE_TITLE_PATTERN, self.html).group(1)
 +        self.pyfile.name = unicode2str(title)   # Unicode BUG workaround          
          # Free account
          self.handleFree()
 @@ -79,81 +88,83 @@ class UploadStationCom(Hoster):      def handleFree(self):
          # Not needed yet
 -        #pattern = r'''\"(/landing/.*?/download_captcha\.js)\"'''
 -        #jsPage = re.search(pattern, self.html).group(1)
 -        #self.jsPage = self.load("http://uploadstation.com" + jsPage)
 +        # pattern = r'''\"(/landing/.*?/download_captcha\.js)\"'''
 +        # jsPage = re.search(pattern, self.html).group(1)
 +        # self.jsPage = self.load("http://uploadstation.com" + jsPage)
          # Check download
          response = self.load(self.pyfile.url, post={"checkDownload" : "check"})
 -        if not '"success":"showCaptcha"' in response:
 -            self.handleErrors(response)
 +        self.log.debug("%s: Checking download, response [%s]" % (self.__name__, response))
 +        self.handleErrors(response)
          # We got a captcha?
 -        if '<div class="speedBox" id="showCaptcha" style="display:none;">' in self.html:
 -            id = re.search(r"var reCAPTCHA_publickey='(.*?)';", self.html).group(1)
 +        if UploadStationCom.CAPTCHA_PRESENT_TOKEN in self.html:
 +            id = re.search(UploadStationCom.CAPTCHA_KEY_PATTERN, self.html).group(1)
 +            self.log.debug("%s: Resolving ReCaptcha with key [%s]" % (self.__name__, id))
              recaptcha = ReCaptcha(self)
              challenge, code = recaptcha.challenge(id)
              response = self.load('http://www.uploadstation.com/checkReCaptcha.php', 
                                    post={'recaptcha_challenge_field' : challenge,
                                          'recaptcha_response_field' : code, 
                                          'recaptcha_shortencode_field' : self.fileId})
 -            if r'incorrect-captcha-sol' in response:
 -                self.handleCaptchaErrors(response)
 +            self.log.debug("%s: Result of captcha resolving [%s]" % (self.__name__, response))
 +            self.handleCaptchaErrors(response)
          # Process waiting
 -        response = self.load(self.pyfile.url, post={"downloadLink":"wait"})
 -        m = re.search(r".*?(\d+).*?", response)
 +        response = self.load(self.pyfile.url, post={"downloadLink" : "wait"})
 +        m = re.search(UploadStationCom.WAITING_PATTERN, response)
          if m is not None:
 -            wait = m.group(1)
 -            if wait == "404":
 +            wait = int(m.group(1))
 +            if wait == 404:
                  self.log.debug("No wait time returned")
                  self.fail("No wait time returned")
 -            else:
 -                self.setWait(int(wait))
 +            self.log.debug("%s: Waiting %d seconds." % (self.__name__, wait))
 +            self.setWait(wait + 3)
              self.wait()
          # Show download link
 -        self.load(self.pyfile.url, post={"downloadLink":"show"})
 +        self.load(self.pyfile.url, post={"downloadLink" : "show"})
          # This may either download our file or forward us to an error page
 -        dl = self.download(self.pyfile.url, post={"download":"normal"})
 +        self.log.debug("%s: Downloading file." % (self.__name__))
 +        dl = self.download(self.pyfile.url, post={"download" : "normal"})
          self.handleDownloadedFile()
      def handleErrors(self, response):
 -        text = '"fail":"timeLimit"'
 -        if text in response:
 +        if UploadStationCom.TIME_LIMIT_TOKEN in response:
              wait = 300
              html = self.load(self.pyfile.url, post={"checkDownload" : "showError", "errorType" : "timeLimit"})
 -            m = re.search(r"You need to wait (\d+) seconds to download next file.", html)
 +            m = re.search(UploadStationCom.TIME_LIMIT_WAIT_PATTERN, html)
              if m is not None:
                  wait = int(m.group(1))
 +            self.log.info("%s: Time limit reached, waiting %d seconds." % (self.__name__, wait))
              self.setWait(wait, True)
              self.wait()
              self.retry()
 -        text = '"To remove download restriction, please choose your suitable plan as below</h1>"'
 -        if text in response:
 +        if UploadStationCom.DOWNLOAD_RESTRICTION_TOKEN in response:
              wait = 720
 +            self.log.info("%s: Free account time limit reached, waiting %d seconds." % (self.__name__, wait))
              self.setWait(wait, True)
              self.wait()
              self.retry()
      def handleCaptchaErrors(self, response):
 -        self.invalidCaptcha()
 -        self.retry()
 +        if UploadStationCom.CAPTCHA_WRONG_TOKEN in response:
 +            self.log.info("%s: Invalid captcha response, retrying." % (self.__name__))
 +            self.invalidCaptcha()
 +            self.retry()
      def handleDownloadedFile(self):
 -        check = self.checkDownload({"wait": re.compile(r'You need to wait (\d+) seconds to download next file.')})
 -        
 +        check = self.checkDownload({"wait": re.compile(UploadStationCom.TIME_LIMIT_WAIT_PATTERN)})
          if check == "wait":
 -            wait_time = 720
 +            wait = 720
              if self.lastCheck is not None:
 -                wait_time = int(self.lastCheck.group(1))
 -            self.setWait(wait_time+3)
 -            self.log.debug("%s: You need to wait %d seconds for another download." % (self.__name__, wait_time))
 -            self.wantReconnect = True
 +                wait = int(self.lastCheck.group(1))
 +            self.log.debug("%s: Failed, you need to wait %d seconds for another download." % (self.__name__, wait))
 +            self.setWait(wait + 3, True)
              self.wait()
 -            self.retry() 
\ No newline at end of file +            self.retry()
\ No newline at end of file | 
