diff options
Diffstat (limited to 'module')
| -rw-r--r-- | module/plugins/internal/SimpleHoster.py | 123 | 
1 files changed, 83 insertions, 40 deletions
| diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index f741cd691..736d15f3a 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -4,7 +4,7 @@ import re  from time import time  from urllib import unquote -from urlparse import urlparse +from urlparse import urljoin, urlparse  from module.PyFile import statusMap as _statusMap  from module.network.CookieJar import CookieJar @@ -33,7 +33,7 @@ def _error(self, reason, type):  #@TODO: Remove in 0.4.10  def _wait(self, seconds, reconnect):      if seconds: -        self.setWait(seconds) +        self.setWait(seconds + 1)      if reconnect is not None:          self.wantReconnect = reconnect @@ -114,25 +114,35 @@ def timestamp():  #@TODO: Move to hoster class in 0.4.10 -def _getDirectLink(self, url): +def _isDirectLink(self, url, resumable=True):      header = self.load(url, ref=True, just_header=True, decode=True) -    if not 'code' in header or header['code'] != 302: -        return "" -      if not 'location' in header or not header['location']:          return "" -    # if 'content-type' in header and "text/plain" not in header['content-type']: -        # return "" +    location = header['location'] + +    if resumable:  #: sometimes http code may be wrong... +        if self.load(location, ref=True, cookies=True, just_header=True, decode=True)['location']: +            return "" +    else: +        if not 'code' in header or header['code'] != 302: +            return "" -    return header['location'] +    if urlparse(location).scheme: +        link = location +    else: +        p = urlparse(url) +        base = "%s://%s" % (p.scheme, p.netloc) +        link = urljoin(base, location) + +    return link  class SimpleHoster(Hoster):      __name__    = "SimpleHoster"      __type__    = "hoster" -    __version__ = "0.68" +    __version__ = "0.69"      __pattern__ = r'^unmatchable$' @@ -154,6 +164,9 @@ class SimpleHoster(Hoster):          SIZE_PATTERN: (optional) Size that will be checked for the file            example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)' +      HASHSUM_PATTERN: (optional) Hash code and type of the file +        example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)' +        OFFLINE_PATTERN: (optional) Check if the file is yet available online          example: OFFLINE_PATTERN = r'File (deleted|not found)' @@ -189,9 +202,9 @@ class SimpleHoster(Hoster):      TEXT_ENCODING       = False  #: Set to True or encoding name if encoding value in http header is not correct      COOKIES             = True   #: or False or list of tuples [(domain, name, value)]      FORCE_CHECK_TRAFFIC = False  #: Set to True to force checking traffic left for premium account -    CHECK_DIRECT_LINK   = None   #: when None self-set to True if self.account else False -    MULTI_HOSTER        = False  #: Set to True to leech other hoster link -    CONTENT_DISPOSITION = False  #: Set to True to replace file name with content-disposition value in http header +    CHECK_DIRECT_LINK   = None   #: Set to True to check for direct link, set to None to do it only if self.account is True +    MULTI_HOSTER        = False  #: Set to True to leech other hoster link (according its multihoster hook if available) +    CONTENT_DISPOSITION = False  #: Set to True to replace file name with content-disposition value from http header      @classmethod @@ -203,14 +216,32 @@ class SimpleHoster(Hoster):      @classmethod      def getInfo(cls, url="", html=""): -        info = {'name': urlparse(url).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3 if url else 1, 'url': url or ""} +        info = {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3, 'url': url}          if not html: -            if url: -                html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING) -                if isinstance(cls.TEXT_ENCODING, basestring): -                    html = unicode(html, cls.TEXT_ENCODING) -            else: +            try: +                if not url: +                    info['error']  = "missing url" +                    info['status'] = 1 +                    raise + +                try: +                    html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING) + +                    if isinstance(cls.TEXT_ENCODING, basestring): +                        html = unicode(html, cls.TEXT_ENCODING) + +                except BadHeader, e: +                    info['error'] = "%d: %s" % (e.code, e.content) + +                    if e.code is 404: +                        info['status'] = 1 +                        raise + +                    if e.code is 503: +                        info['status'] = 6 +                        raise +            except:                  return info          online = False @@ -226,19 +257,20 @@ class SimpleHoster(Hoster):          else:              try: -                info.update(re.match(cls.__pattern__, url).groupdict()) +                info['pattern'] = re.match(cls.__pattern__, url).groupdict()  #: pattern groups will be saved here, please save api stuff to info['api']              except:                  pass              for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN",                              "FILE_NAME_PATTERN", "NAME_PATTERN", -                            "FILE_SIZE_PATTERN", "SIZE_PATTERN"):  #@TODO: Remove old patterns starting with "FILE_" in 0.4.10 +                            "FILE_SIZE_PATTERN", "SIZE_PATTERN", +                            "HASHSUM_PATTERN"):  #@TODO: Remove old patterns starting with "FILE_" in 0.4.10                  try:                      attr = getattr(cls, pattern)                      dict = re.search(attr, html).groupdict() -                    if all(True for k in dict if k not in info): -                        info.update(dict) +                    if all(True for k in dict if k not in info['pattern']): +                        info['pattern'].update(dict)                  except AttributeError:                      continue @@ -249,12 +281,12 @@ class SimpleHoster(Hoster):          if online:              info['status'] = 2 -            if 'N' in info: -                info['name'] = replace_patterns(unquote(info['N'].strip()), +            if 'N' in info['pattern']: +                info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()),                                                  cls.FILE_NAME_REPLACEMENTS if hasattr(cls, "FILE_NAME_REPLACEMENTS") else cls.NAME_REPLACEMENTS)  #@TODO: Remove FILE_NAME_REPLACEMENTS check in 0.4.10 -            if 'S' in info: -                size = replace_patterns(info['S'] + info['U'] if 'U' in info else info['S'], +            if 'S' in info['pattern']: +                size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info else info['pattern']['S'],                                          cls.FILE_SIZE_REPLACEMENTS if hasattr(cls, "FILE_SIZE_REPLACEMENTS") else cls.SIZE_REPLACEMENTS)  #@TODO: Remove FILE_SIZE_REPLACEMENTS check in 0.4.10                  info['size'] = parseFileSize(size) @@ -262,6 +294,10 @@ class SimpleHoster(Hoster):                  unit = info['units'] if 'units' in info else None                  info['size'] = parseFileSize(info['size'], unit) +            if 'H' in info['pattern']: +                hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash" +                info[hashtype] = info['pattern']['H'] +          return info @@ -349,15 +385,20 @@ class SimpleHoster(Hoster):          if self.link:              self.download(self.link, disposition=self.CONTENT_DISPOSITION) +        self.checkFile() + + +    def checkFile(self) +        if self.checkDownload({'empty': re.compile(r"^$")}) is "empty":  #@TODO: Move to hoster in 0.4.10 +            self.fail(_("Empty file")) +      def checkErrors(self): -        if hasattr(self, 'WAIT_PATTERN'): -            m = re.search(self.WAIT_PATTERN, self.html) +        if hasattr(self, 'ERROR_PATTERN'): +            m = re.search(self.ERROR_PATTERN, self.html)              if m: -                wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in -                                 re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)]) -                self.wait(wait_time, False) -                return +                e = self.info['error'] = m.group(1) +                self.error(e)          if hasattr(self, 'PREMIUM_ONLY_PATTERN'):              m = re.search(self.PREMIUM_ONLY_PATTERN, self.html) @@ -365,11 +406,13 @@ class SimpleHoster(Hoster):                  self.info['error'] = "premium-only"                  return -        if hasattr(self, 'ERROR_PATTERN'): -            m = re.search(self.ERROR_PATTERN, self.html) +        if hasattr(self, 'WAIT_PATTERN'): +            m = re.search(self.WAIT_PATTERN, self.html)              if m: -                e = self.info['error'] = m.group(1) -                self.error(e) +                wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in +                                 re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)]) +                self.wait(wait_time, False) +                return          self.info.pop('error', None) @@ -431,7 +474,7 @@ class SimpleHoster(Hoster):      def handleDirect(self): -        link = _getDirectLink(self, self.pyfile.url) +        link = _isDirectLink(self, self.pyfile.url, self.resumeDownload)          if link:              self.logInfo(_("Direct download link detected")) @@ -460,7 +503,7 @@ class SimpleHoster(Hoster):              self.link = m.group(1)          except Exception, e: -            self.fail(str(e)) +            self.fail(e)      def handlePremium(self): @@ -475,7 +518,7 @@ class SimpleHoster(Hoster):              self.link = m.group(1)          except Exception, e: -            self.fail(str(e)) +            self.fail(e)      def longWait(self, wait_time=None, max_tries=3): | 
