diff options
Diffstat (limited to 'module/plugins/internal/SimpleHoster.py')
-rw-r--r-- | module/plugins/internal/SimpleHoster.py | 583 |
1 files changed, 415 insertions, 168 deletions
diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index fa1ea45f2..4a3b7dcf7 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -1,20 +1,22 @@ # -*- coding: utf-8 -*- -import re +from __future__ import with_statement -from inspect import isclass -from os.path import exists -from time import time -from urllib import unquote -from urlparse import urljoin, urlparse +import datetime +import mimetypes +import os +import re +import time +import urllib +import urlparse from module.PyFile import statusMap as _statusMap from module.network.CookieJar import CookieJar from module.network.HTTPRequest import BadHeader from module.network.RequestFactory import getURL -from module.plugins.Hoster import Hoster -from module.plugins.Plugin import Fail -from module.utils import fixup, fs_encode, parseFileSize +from module.plugins.internal.Hoster import Hoster +from module.plugins.internal.Plugin import Fail, Retry +from module.utils import decode, fixup, fs_encode, html_unescape, parseFileSize #@TODO: Adapt and move to PyFile in 0.4.10 @@ -27,7 +29,7 @@ def _error(self, reason, type): type = "unknown" msg = _("%s error") % type.strip().capitalize() if type else _("Error") - msg += ": %s" % reason.strip() if reason else "" + msg += (": %s" % reason.strip()) if reason else "" msg += _(" | Plugin may be out of date") raise Fail(msg) @@ -74,7 +76,7 @@ def parseHtmlForm(attr_str, html, input_names={}): if name: value = parseHtmlTagAttrValue("value", inputtag.group(1)) if not value: - inputs[name] = inputtag.group(3) or '' + inputs[name] = inputtag.group(3) or "" else: inputs[name] = value @@ -100,43 +102,77 @@ def parseHtmlForm(attr_str, html, input_names={}): return {}, None #: no matching form found -#: Deprecated +#@TODO: Remove in 0.4.10 def parseFileInfo(plugin, url="", html=""): if hasattr(plugin, "getInfo"): info = plugin.getInfo(url, html) res = info['name'], info['size'], info['status'], info['url'] else: - res = urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 0, 3, url + url = urllib.unquote(url) + url_p = urlparse.urlparse(url) + res = ((url_p.path.split('/')[-1] + or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] + or url_p.netloc.split('.', 1)[0]), + 0, + 3 if url else 8, + url) return res #@TODO: Remove in 0.4.10 -#@NOTE: Every plugin must have own parseInfos classmethod to work with 0.4.10 def create_getInfo(plugin): + def getInfo(urls): + for url in urls: + if hasattr(plugin, "URL_REPLACEMENTS"): + url = replace_patterns(url, plugin.URL_REPLACEMENTS) + yield parseFileInfo(plugin, url) + + return getInfo + + +def timestamp(): + return int(time.time() * 1000) + - def generator(list): - for x in list: - yield x +#@TODO: Move to Hoster in 0.4.10 +def getFileURL(self, url, follow_location=None): + link = "" + redirect = 1 - if hasattr(plugin, "parseInfos"): - fn = lambda urls: generator((info['name'], info['size'], info['status'], info['url']) for info in plugin.parseInfos(urls)) + if type(follow_location) is int: + redirect = max(follow_location, 1) else: - fn = lambda urls: generator(parseFileInfo(url) for url in urls) + redirect = 10 - return fn + for i in xrange(redirect): + try: + self.logDebug("Redirect #%d to: %s" % (i, url)) + header = self.load(url, just_header=True, decode=True) + except Exception: #: Bad bad bad... rewrite this part in 0.4.10 + req = pyreq.getHTTPRequest() + res = req.load(url, just_header=True, decode=True) -def timestamp(): - return int(time() * 1000) + req.close() + header = {"code": req.code} + for line in res.splitlines(): + line = line.strip() + if not line or ":" not in line: + continue -#@TODO: Move to hoster class in 0.4.10 -def directLink(self, url, resumable=False): - link = "" + key, none, value = line.partition(":") + key = key.lower().strip() + value = value.strip() - for i in xrange(5 if resumable else 1): - header = self.load(url, ref=True, cookies=True, just_header=True, decode=True) + if key in header: + if type(header[key]) == list: + header[key].append(value) + else: + header[key] = [header[key], value] + else: + header[key] = value if 'content-disposition' in header: link = url @@ -144,42 +180,59 @@ def directLink(self, url, resumable=False): elif 'location' in header and header['location']: location = header['location'] - if not urlparse(location).scheme: - p = urlparse(url) - base = "%s://%s" % (p.scheme, p.netloc) - location = urljoin(base, location) + if not urlparse.urlparse(location).scheme: + url_p = urlparse.urlparse(url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + location = urlparse.urljoin(baseurl, location) - if resumable: + if 'code' in header and header['code'] == 302: + link = location + + if follow_location: url = location - self.logDebug("Redirect #%d to: %s" % (++i, location)) continue - elif 'code' in header and header['code'] == 302: - link = location + else: + extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1] - elif 'content-type' in header and header['content-type'] and "html" not in header['content-type']: - link = url + if 'content-type' in header and header['content-type']: + mimetype = header['content-type'].split(';')[0].strip() + + elif extension: + mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" + + else: + mimetype = "" + + if mimetype and (link or 'html' not in mimetype): + link = url + else: + link = "" break + else: - self.logError(_("Too many redirects")) + try: + self.logError(_("Too many redirects")) + except Exception: + pass return link def secondsToMidnight(gmt=0): - now = datetime.utcnow() + timedelta(hours=gmt) + now = datetime.datetime.utcnow() + datetime.timedelta(hours=gmt) if now.hour is 0 and now.minute < 10: midnight = now else: - midnight = now + timedelta(days=1) + midnight = now + datetime.timedelta(days=1) td = midnight.replace(hour=0, minute=10, second=0, microsecond=0) - now if hasattr(td, 'total_seconds'): res = td.total_seconds() - else: #@NOTE: work-around for python 2.5 and 2.6 missing timedelta.total_seconds + else: #@NOTE: work-around for python 2.5 and 2.6 missing datetime.timedelta.total_seconds res = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 return int(res) @@ -188,39 +241,39 @@ def secondsToMidnight(gmt=0): class SimpleHoster(Hoster): __name__ = "SimpleHoster" __type__ = "hoster" - __version__ = "0.96" + __version__ = "1.66" __pattern__ = r'^unmatchable$' + __config__ = [("use_premium", "bool", "Use premium account if available" , True), + ("fallback" , "bool", "Fallback to free download if premium fails", True)] __description__ = """Simple hoster plugin""" __license__ = "GPLv3" - __authors__ = [("zoidberg", "zoidberg@mujmail.cz"), - ("stickell", "l.stickell@yahoo.it"), - ("Walter Purcaro", "vuolter@gmail.com")] - + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] """ - Info patterns should be defined by each hoster: + Info patterns: - INFO_PATTERN: (optional) Name and Size of the file + INFO_PATTERN: (mandatory) Name and Size of the file example: INFO_PATTERN = r'(?P<N>file_name) (?P<S>file_size) (?P<U>size_unit)' or - NAME_PATTERN: (optional) Name that will be set for the file + NAME_PATTERN: (mandatory) Name that will be set for the file example: NAME_PATTERN = r'(?P<N>file_name)' - SIZE_PATTERN: (optional) Size that will be checked for the file + + SIZE_PATTERN: (mandatory) Size that will be checked for the file example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)' HASHSUM_PATTERN: (optional) Hash code and type of the file example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)' - OFFLINE_PATTERN: (optional) Check if the page is unreachable + OFFLINE_PATTERN: (mandatory) Check if the page is unreachable example: OFFLINE_PATTERN = r'File (deleted|not found)' TEMP_OFFLINE_PATTERN: (optional) Check if the page is temporarily unreachable example: TEMP_OFFLINE_PATTERN = r'Server (maintenance|maintainance)' - Error handling patterns are all optional: + Error patterns: WAIT_PATTERN: (optional) Detect waiting time example: WAIT_PATTERN = r'' @@ -228,46 +281,65 @@ class SimpleHoster(Hoster): PREMIUM_ONLY_PATTERN: (optional) Check if the file can be downloaded only with a premium account example: PREMIUM_ONLY_PATTERN = r'Premium account required' + HAPPY_HOUR_PATTERN: (optional) + example: HAPPY_HOUR_PATTERN = r'Happy hour' + + IP_BLOCKED_PATTERN: (optional) + example: IP_BLOCKED_PATTERN = r'in your country' + + DL_LIMIT_PATTERN: (optional) + example: DL_LIMIT_PATTERN = r'download limit' + + SIZE_LIMIT_PATTERN: (optional) + example: SIZE_LIMIT_PATTERN = r'up to' + ERROR_PATTERN: (optional) Detect any error preventing download example: ERROR_PATTERN = r'' - Instead overriding handleFree and handlePremium methods you can define the following patterns for direct download: + Instead overriding handleFree and handlePremium methods you may define the following patterns for basic link handling: - LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download - example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"' + LINK_PATTERN: (optional) group(1) should be the direct link for free and premium download + example: LINK_PATTERN = r'<div class="link"><a href="(.+?)"' + or + LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download + example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"' - LINK_PREMIUM_PATTERN: (optional) group(1) should be the direct link for premium download - example: LINK_PREMIUM_PATTERN = r'<div class="link"><a href="(.+?)"' + LINK_PREMIUM_PATTERN: (optional) group(1) should be the direct link for premium download + example: LINK_PREMIUM_PATTERN = r'<div class="link"><a href="(.+?)"' """ NAME_REPLACEMENTS = [("&#?\w+;", fixup)] SIZE_REPLACEMENTS = [] URL_REPLACEMENTS = [] - TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct - COOKIES = True #: or False or list of tuples [(domain, name, value)] + FILE_ERRORS = [('Html error' , r'\A(?:\s*<.+>)?((?:[\w\s]*(?:[Ee]rror|ERROR)\s*\:?)?\s*\d{3})(?:\Z|\s+)'), + ('Request error', r'([Aa]n error occured while processing your request)' ), + ('Html file' , r'\A\s*<!DOCTYPE html' )] + + CHECK_FILE = True #: Set to False to not check the last downloaded file with declared error patterns CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account + COOKIES = True #: or False or list of tuples [(domain, name, value)] DIRECT_LINK = None #: Set to True to looking for direct link (as defined in handleDirect method), set to None to do it if self.account is True else False - MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method) + DISPOSITION = True #: Set to True to use any content-disposition value in http header as file name LOGIN_ACCOUNT = False #: Set to True to require account login + LOGIN_PREMIUM = False #: Set to True to require premium account login + MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method) + TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct - directLink = directLink #@TODO: Remove in 0.4.10 + LINK_PATTERN = None - @classmethod - def parseInfos(cls, urls): #@TODO: Built-in in 0.4.10 core, then remove from plugins - for url in urls: - url = replace_patterns(url, cls.FILE_URL_REPLACEMENTS if hasattr(cls, "FILE_URL_REPLACEMENTS") else cls.URL_REPLACEMENTS) #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10 - yield cls.getInfo(url) + directLink = getFileURL #@TODO: Remove in 0.4.10 @classmethod - def apiInfo(cls, url="", get={}, post={}): - url = unquote(url) - return {'name' : (urlparse(url).path.split('/')[-1] - or urlparse(url).query.split('=', 1)[::-1][0].split('&', 1)[0] - or _("Unknown")), + def apiInfo(cls, url): + url = urllib.unquote(url) + url_p = urlparse.urlparse(url) + return {'name' : (url_p.path.split('/')[-1] + or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] + or url_p.netloc.split('.', 1)[0]), 'size' : 0, 'status': 3 if url else 8, 'url' : url} @@ -276,7 +348,7 @@ class SimpleHoster(Hoster): @classmethod def getInfo(cls, url="", html=""): info = cls.apiInfo(url) - online = False + online = True if info['status'] is 2 else False try: info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here @@ -284,7 +356,7 @@ class SimpleHoster(Hoster): except Exception: info['pattern'] = {} - if not html: + if not html and not online: if not url: info['error'] = "missing url" info['status'] = 1 @@ -305,21 +377,18 @@ class SimpleHoster(Hoster): elif e.code is 503: info['status'] = 6 + except Exception: + pass + if html: if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html): info['status'] = 1 - elif hasattr(cls, "FILE_OFFLINE_PATTERN") and re.search(cls.FILE_OFFLINE_PATTERN, html): #@TODO: Remove in 0.4.10 - info['status'] = 1 - elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html): info['status'] = 6 else: - for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN", - "FILE_NAME_PATTERN", "NAME_PATTERN", - "FILE_SIZE_PATTERN", "SIZE_PATTERN", - "HASHSUM_PATTERN"): #@TODO: Remove old patterns starting with "FILE_" in 0.4.10 + for pattern in ("INFO_PATTERN", "NAME_PATTERN", "SIZE_PATTERN", "HASHSUM_PATTERN"): try: attr = getattr(cls, pattern) pdict = re.search(attr, html).groupdict() @@ -333,19 +402,16 @@ class SimpleHoster(Hoster): else: online = True - if not info['pattern']: - info.pop('pattern', None) - if online: info['status'] = 2 if 'N' in info['pattern']: - info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()), - cls.FILE_NAME_REPLACEMENTS if hasattr(cls, "FILE_NAME_REPLACEMENTS") else cls.NAME_REPLACEMENTS) #@TODO: Remove FILE_NAME_REPLACEMENTS check in 0.4.10 + info['name'] = replace_patterns(urllib.unquote(info['pattern']['N'].strip()), + cls.NAME_REPLACEMENTS) if 'S' in info['pattern']: size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info['pattern'] else info['pattern']['S'], - cls.FILE_SIZE_REPLACEMENTS if hasattr(cls, "FILE_SIZE_REPLACEMENTS") else cls.SIZE_REPLACEMENTS) #@TODO: Remove FILE_SIZE_REPLACEMENTS check in 0.4.10 + cls.SIZE_REPLACEMENTS) info['size'] = parseFileSize(size) elif isinstance(info['size'], basestring): @@ -356,9 +422,49 @@ class SimpleHoster(Hoster): hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash" info[hashtype] = info['pattern']['H'] + if not info['pattern']: + info.pop('pattern', None) + return info + #@TODO: Move to Hoster in 0.4.10 + def _log(self, level, args): + log = getattr(self.core.log, level) + msg = " | ".join((fs_encode(a) if isinstance(a, unicode) else #@NOTE: `fs_encode` -> `encode` in 0.4.10 + decode(a) if isinstance(a, str) else + str(a)).strip() for a in args if a) + log("%(plugin)s[%(id)s]: %(msg)s" % {'plugin': self.__name__, + 'id' : self.pyfile.id, + 'msg' : msg or _(level.upper() + " MARK")}) + + + #@TODO: Move to Hoster in 0.4.10 + def logDebug(self, *args): + if self.core.debug: + return self._log("debug", args) + + + #@TODO: Move to Hoster in 0.4.10 + def logInfo(self, *args): + return self._log("info", args) + + + #@TODO: Move to Hoster in 0.4.10 + def logWarning(self, *args): + return self._log("warning", args) + + + #@TODO: Move to Hoster in 0.4.10 + def logError(self, *args): + return self._log("error", args) + + + #@TODO: Move to Hoster in 0.4.10 + def logCritical(self, *args): + return self._log("critical", args) + + def setup(self): self.resumeDownload = self.multiDL = self.premium @@ -367,9 +473,16 @@ class SimpleHoster(Hoster): self.pyfile.error = "" #@TODO: Remove in 0.4.10 self.info = {} - self.link = "" #@TODO: Move to hoster class in 0.4.10 - self.directDL = False #@TODO: Move to hoster class in 0.4.10 - self.multihost = False #@TODO: Move to hoster class in 0.4.10 + self.html = "" + self.link = "" #@TODO: Move to Hoster in 0.4.10 + self.directDL = False #@TODO: Move to Hoster in 0.4.10 + self.multihost = False #@TODO: Move to Hoster in 0.4.10 + + if not self.getConfig('use_premium', True): + self.retryFree() + + if self.LOGIN_PREMIUM and not self.premium: + self.fail(_("Required premium account not found")) if self.LOGIN_ACCOUNT and not self.account: self.fail(_("Required account not found")) @@ -379,6 +492,13 @@ class SimpleHoster(Hoster): if isinstance(self.COOKIES, list): set_cookies(self.req.cj, self.COOKIES) + if self.LINK_PATTERN: + if not hasattr(self, 'LINK_FREE_PATTERN'): + self.LINK_FREE_PATTERN = self.LINK_PATTERN + + if not hasattr(self, 'LINK_PREMIUM_PATTERN'): + self.LINK_PREMIUM_PATTERN = self.LINK_PATTERN + if (self.MULTI_HOSTER and (self.__pattern__ != self.core.pluginManager.hosterPlugins[self.__name__]['pattern'] or re.match(self.__pattern__, self.pyfile.url) is None)): @@ -390,80 +510,116 @@ class SimpleHoster(Hoster): else: self.directDL = self.DIRECT_LINK - self.pyfile.url = replace_patterns(self.pyfile.url, - self.FILE_URL_REPLACEMENTS if hasattr(self, "FILE_URL_REPLACEMENTS") else self.URL_REPLACEMENTS) #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10 + self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) def preload(self): - self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), decode=not self.TEXT_ENCODING) + self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), ref=False, decode=not self.TEXT_ENCODING) if isinstance(self.TEXT_ENCODING, basestring): self.html = unicode(self.html, self.TEXT_ENCODING) def process(self, pyfile): - self.prepare() - self.checkInfo() + try: + self.prepare() + self.checkInfo() - if self.directDL: - self.logDebug("Looking for direct download link...") - self.handleDirect(pyfile) + if self.directDL: + self.logDebug("Looking for direct download link...") + self.handleDirect(pyfile) - if self.multihost and not self.link and not self.lastDownload: - self.logDebug("Looking for leeched download link...") - self.handleMulti(pyfile) + if self.multihost and not self.link and not self.lastDownload: + self.logDebug("Looking for leeched download link...") + self.handleMulti(pyfile) + + if not self.link and not self.lastDownload: + self.MULTI_HOSTER = False + self.retry(1, reason=_("Multi hoster fails")) if not self.link and not self.lastDownload: - self.MULTI_HOSTER = False - self.retry(1, reason="Multi hoster fails") + self.preload() + self.checkInfo() + + if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as premium download") + self.handlePremium(pyfile) + + elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as free download") + self.handleFree(pyfile) + + self.download(self.link, ref=False, disposition=self.DISPOSITION) + self.checkFile() + + except Fail, e: #@TODO: Move to PluginThread in 0.4.10 + err = str(e) #@TODO: Recheck in 0.4.10 + + if err == _("No captcha result obtained in appropiate time by any of the plugins."): #@TODO: Fix in 0.4.10 + self.checkFile() + + elif self.getConfig('fallback', True) and self.premium: + self.logWarning(_("Premium download failed"), e) + self.retryFree() + + else: + raise Fail(err) - if not self.link and not self.lastDownload: - self.preload() - self.checkInfo() - if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handled as premium download") - self.handlePremium(pyfile) + def download(self, url, *args, **kwargs): + if not url or not isinstance(url, basestring): + return - elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handled as free download") - self.handleFree(pyfile) + self.correctCaptcha() - self.downloadLink(self.link) - self.checkFile() + url = html_unescape(url.decode('unicode-escape').strip()) #@TODO: Move to Hoster in 0.4.10 + if not urlparse.urlparse(url).scheme: + url_p = urlparse.urlparse(self.pyfile.url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + url = urlparse.urljoin(baseurl, url) - def downloadLink(self, link): - if link and isinstance(link, basestring): - self.correctCaptcha() - self.download(link, disposition=False) #@TODO: Set `disposition=True` in 0.4.10 + return super(SimpleHoster, self).download(url, *args, **kwargs) def checkFile(self): + lastDownload = fs_encode(self.lastDownload) + if self.cTask and not self.lastDownload: self.invalidCaptcha() self.retry(10, reason=_("Wrong captcha")) - elif not self.lastDownload or not exists(fs_encode(self.lastDownload)): + elif not self.lastDownload or not os.path.exists(lastDownload): self.lastDownload = "" - self.fail(self.pyfile.error or _("No file downloaded")) + self.error(self.pyfile.error or _("No file downloaded")) else: - rules = {'empty file': re.compile(r'\A\Z'), - 'html file' : re.compile(r'\A\s*<!DOCTYPE html'), - 'html error': re.compile(r'\A\s*(<.+>)?\d{3}(\Z|\s+)')} + #@TODO: Move to Hoster in 0.4.10 + if os.stat(lastDownload).st_size < 1 or self.checkDownload({'Empty file': re.compile(r'\A((.|)(\2|\s)*)\Z')}): + self.error(_("Empty file")) - if hasattr(self, 'ERROR_PATTERN'): - rules['error'] = re.compile(self.ERROR_PATTERN) + self.logDebug("Checking last downloaded file with built-in rules") + for r, p in self.FILE_ERRORS: + errmsg = self.checkDownload({r: re.compile(p)}) + if errmsg is not None: + errmsg = errmsg.strip().capitalize() + + try: + errmsg += " | " + self.lastCheck.group(1).strip() + except Exception: + pass - check = self.checkDownload(rules) - if check: #@TODO: Move to hoster in 0.4.10 - errmsg = check.strip().capitalize() - if self.lastCheck: - errmsg += " | " + self.lastCheck.group(0).strip() + self.logWarning("Check result: " + errmsg, "Waiting 1 minute and retry") + self.wantReconnect = True + self.retry(wait_time=60, reason=errmsg) + else: + if self.CHECK_FILE: + self.logDebug("Checking last downloaded file with custom rules") + with open(lastDownload, "rb") as f: + self.html = f.read(50000) #@TODO: Recheck in 0.4.10 + self.checkErrors() - self.lastDownload = "" - self.retry(10, 60, errmsg) + self.logDebug("No file errors found") def checkErrors(self): @@ -471,30 +627,111 @@ class SimpleHoster(Hoster): self.logWarning(_("No html code to check")) return - if hasattr(self, 'PREMIUM_ONLY_PATTERN') and self.premium and re.search(self.PREMIUM_ONLY_PATTERN, self.html): - self.fail(_("Link require a premium account to be handled")) + if hasattr(self, 'IP_BLOCKED_PATTERN') and re.search(self.IP_BLOCKED_PATTERN, self.html): + self.fail(_("Connection from your current IP address is not allowed")) + + elif not self.premium: + if hasattr(self, 'PREMIUM_ONLY_PATTERN') and re.search(self.PREMIUM_ONLY_PATTERN, self.html): + self.fail(_("File can be downloaded by premium users only")) + + elif hasattr(self, 'SIZE_LIMIT_PATTERN') and re.search(self.SIZE_LIMIT_PATTERN, self.html): + self.fail(_("File too large for free download")) + + elif hasattr(self, 'DL_LIMIT_PATTERN') and re.search(self.DL_LIMIT_PATTERN, self.html): + m = re.search(self.DL_LIMIT_PATTERN, self.html) + try: + errmsg = m.group(1).strip() + except Exception: + errmsg = m.group(0).strip() + + self.info['error'] = re.sub(r'<.*?>', " ", errmsg) + self.logWarning(self.info['error']) + + if re.search('da(il)?y|today', errmsg, re.I): + wait_time = secondsToMidnight(gmt=2) + else: + wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I)) - elif hasattr(self, 'ERROR_PATTERN'): + self.wantReconnect = wait_time > 300 + self.retry(1, wait_time, _("Download limit exceeded")) + + if hasattr(self, 'HAPPY_HOUR_PATTERN') and re.search(self.HAPPY_HOUR_PATTERN, self.html): + self.multiDL = True + + if hasattr(self, 'ERROR_PATTERN'): m = re.search(self.ERROR_PATTERN, self.html) if m: - errmsg = self.info['error'] = m.group(1) - self.error(errmsg) + try: + errmsg = m.group(1).strip() + except Exception: + errmsg = m.group(0).strip() + + self.info['error'] = re.sub(r'<.*?>', " ", errmsg) + self.logWarning(self.info['error']) + + if re.search('limit|wait|slot', errmsg, re.I): + if re.search("da(il)?y|today", errmsg): + wait_time = secondsToMidnight(gmt=2) + else: + wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I)) + + self.wantReconnect = wait_time > 300 + self.retry(1, wait_time, _("Download limit exceeded")) + + elif re.search('country|ip|region|nation', errmsg, re.I): + self.fail(_("Connection from your current IP address is not allowed")) + + elif re.search('captcha|code', errmsg, re.I): + self.invalidCaptcha() + self.retry(10, reason=_("Wrong captcha")) + + elif re.search('countdown|expired', errmsg, re.I): + self.retry(10, 60, _("Link expired")) + + elif re.search('maintenance|maintainance|temp', errmsg, re.I): + self.tempOffline() + + elif re.search('up to|size', errmsg, re.I): + self.fail(_("File too large for free download")) + + elif re.search('offline|delet|remov|not? (found|(longer)? available)', errmsg, re.I): + self.offline() + + elif re.search('filename', errmsg, re.I): + url_p = urlparse.urlparse(self.pyfile.url) + self.pyfile.url = "%s://%s/%s" % (url_p.scheme, url_p.netloc, url_p.path.strip('/').split('/')[0]) + self.retry(1, reason=_("Wrong url")) + + elif re.search('premium', errmsg, re.I): + self.fail(_("File can be downloaded by premium users only")) + + else: + self.wantReconnect = True + self.retry(wait_time=60, reason=errmsg) elif hasattr(self, 'WAIT_PATTERN'): m = re.search(self.WAIT_PATTERN, self.html) if m: - wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in - re.findall(r'(\d+)\s*(hr|hour|min|sec)', m.group(0), re.I)]) + try: + waitmsg = m.group(1).strip() + except Exception: + waitmsg = m.group(0).strip() + + wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec|)', waitmsg, re.I)) self.wait(wait_time, wait_time > 300) - return self.info.pop('error', None) def checkStatus(self, getinfo=True): if not self.info or getinfo: - self.logDebug("File info (BEFORE): %s" % self.info) + self.logDebug("Update file info...") + self.logDebug("Previous file info: %s" % self.info) self.info.update(self.getInfo(self.pyfile.url, self.html)) + self.logDebug("Current file info: %s" % self.info) try: status = self.info['status'] @@ -506,22 +743,22 @@ class SimpleHoster(Hoster): self.tempOffline() elif status is 8: - self.fail() + self.fail(self.info['error'] if 'error' in self.info else _("Failed")) finally: - self.logDebug("File status: %s" % statusMap[status], - "File info: %s" % self.info) + self.logDebug("File status: %s" % statusMap[status]) def checkNameSize(self, getinfo=True): if not self.info or getinfo: - self.logDebug("File info (BEFORE): %s" % self.info) + self.logDebug("Update file info...") + self.logDebug("Previous file info: %s" % self.info) self.info.update(self.getInfo(self.pyfile.url, self.html)) - self.logDebug("File info (AFTER): %s" % self.info) + self.logDebug("Current file info: %s" % self.info) try: - url = self.info['url'] - name = self.info['name'] + url = self.info['url'].strip() + name = self.info['name'].strip() if name and name != url: self.pyfile.name = name @@ -537,7 +774,7 @@ class SimpleHoster(Hoster): pass self.logDebug("File name: %s" % self.pyfile.name, - "File size: %s" % self.pyfile.size if self.pyfile.size > 0 else "Unknown") + "File size: %s byte" % self.pyfile.size if self.pyfile.size > 0 else "File size: Unknown") def checkInfo(self): @@ -562,7 +799,6 @@ class SimpleHoster(Hoster): if link: self.logInfo(_("Direct download link detected")) - self.link = link else: self.logDebug("Direct download link not found") @@ -576,16 +812,12 @@ class SimpleHoster(Hoster): if not hasattr(self, 'LINK_FREE_PATTERN'): self.logError(_("Free download not implemented")) - try: - m = re.search(self.LINK_FREE_PATTERN, self.html) - if m is None: - self.error(_("Free download link not found")) - + m = re.search(self.LINK_FREE_PATTERN, self.html) + if m is None: + self.error(_("Free download link not found")) + else: self.link = m.group(1) - except Exception, e: - self.fail(e) - def handlePremium(self, pyfile): if not hasattr(self, 'LINK_PREMIUM_PATTERN'): @@ -593,16 +825,12 @@ class SimpleHoster(Hoster): self.logDebug("Handled as free download") self.handleFree(pyfile) - try: - m = re.search(self.LINK_PREMIUM_PATTERN, self.html) - if m is None: - self.error(_("Premium download link not found")) - + m = re.search(self.LINK_PREMIUM_PATTERN, self.html) + if m is None: + self.error(_("Premium download link not found")) + else: self.link = m.group(1) - except Exception, e: - self.fail(e) - def longWait(self, wait_time=None, max_tries=3): if wait_time and isinstance(wait_time, (int, long, float)): @@ -614,8 +842,7 @@ class SimpleHoster(Hoster): self.logInfo(_("Download limit reached, reconnect or wait %s") % time_str) - self.setWait(wait_time, True) - self.wait() + self.wait(wait_time, True) self.retry(max_tries=max_tries, reason=_("Download limit reached")) @@ -640,6 +867,26 @@ class SimpleHoster(Hoster): #@TODO: Remove in 0.4.10 + def getConfig(self, option, default=''): + """getConfig with default value - sublass may not implements all config options""" + try: + return self.getConf(option) + + except KeyError: + return default + + + def retryFree(self): + if not self.premium: + return + self.premium = False + self.account = None + self.req = self.core.requestFactory.getRequest(self.__name__) + self.retries = -1 + raise Retry(_("Fallback to free download")) + + + #@TODO: Remove in 0.4.10 def wait(self, seconds=0, reconnect=None): return _wait(self, seconds, reconnect) |