From 81b2595cdbbca825f0ce553395acb88deffd4e99 Mon Sep 17 00:00:00 2001 From: zoidberg10 Date: Mon, 5 Dec 2011 16:50:50 +0100 Subject: simplehoster - use parseFileSize --- module/plugins/hoster/CrockoCom.py | 4 +- module/plugins/hoster/CzshareCom.py | 13 +++--- module/plugins/hoster/FourSharedCom.py | 6 +-- module/plugins/hoster/HellspyCz.py | 4 +- module/plugins/hoster/IfolderRu.py | 7 +-- module/plugins/hoster/UploadboxCom.py | 4 +- module/plugins/hoster/UploadedTo.py | 6 +-- module/plugins/internal/SimpleHoster.py | 81 +++++++++++++++++---------------- 8 files changed, 64 insertions(+), 61 deletions(-) (limited to 'module') diff --git a/module/plugins/hoster/CrockoCom.py b/module/plugins/hoster/CrockoCom.py index 7eafa67ed..9598025ec 100644 --- a/module/plugins/hoster/CrockoCom.py +++ b/module/plugins/hoster/CrockoCom.py @@ -9,7 +9,7 @@ class CrockoCom(SimpleHoster): __name__ = "CrockoCom" __type__ = "hoster" __pattern__ = r"http://(www\.)?(crocko|easy-share).com/.*" - __version__ = "0.10" + __version__ = "0.11" __description__ = """Crocko Download Hoster""" __author_name__ = ("zoidberg") __author_mail__ = ("zoidberg@mujmail.cz") @@ -23,7 +23,7 @@ class CrockoCom(SimpleHoster): FORM_PATTERN = r'
(.*?)
' FORM_INPUT_PATTERN = r']* name="?([^" ]+)"? value="?([^" ]+)"?[^>]*>' - NAME_REPLACEMENTS = [(r'<[^>]*>', '')] + FILE_NAME_REPLACEMENTS = [(r'<[^>]*>', '')] def handleFree(self): if "You need Premium membership to download this file." in self.html: diff --git a/module/plugins/hoster/CzshareCom.py b/module/plugins/hoster/CzshareCom.py index 158fb0d1d..0ef9c267c 100644 --- a/module/plugins/hoster/CzshareCom.py +++ b/module/plugins/hoster/CzshareCom.py @@ -45,21 +45,20 @@ class CzshareCom(SimpleHoster): __name__ = "CzshareCom" __type__ = "hoster" __pattern__ = r"http://(\w*\.)*czshare\.(com|cz)/(\d+/|download.php\?).*" - __version__ = "0.85" + __version__ = "0.86" __description__ = """CZshare.com""" __author_name__ = ("zoidberg") - SIZE_REPLACEMENTS = [(',', '.'), (' ', '')] + FILE_NAME_PATTERN = r'
\s*

\s*Cel. n.zev: ]*>(?P[^<]+)' + FILE_SIZE_PATTERN = r'

(?:\s*

[^\n]*

)*\s*Velikost:\s*(?P[0-9., ]+)(?P[kKMG])i?B\s*
' + FILE_OFFLINE_PATTERN = r'
\s*

' + FILE_SIZE_REPLACEMENTS = [(' ', '')] + FREE_URL_PATTERN = r'[^>]*alt="([^"]+)" />' FREE_FORM_PATTERN = r'
\s*(.*?)
' PREMIUM_FORM_PATTERN = r'
(.*?)
' FORM_INPUT_PATTERN = r']* name="([^"]+)" value="([^"]+)"[^>]*/>' - #FILE_OFFLINE_PATTERN = r'

[^<]*[Ss]oubor (nenalezen|expiroval|je po.kozen)[^<]* 

' - FILE_OFFLINE_PATTERN = r'
\s*

' MULTIDL_PATTERN = r"

Z[^<]*PROFI.

" - #FILE_NAME_PATTERN = r'

([^<]+) 

' - FILE_NAME_PATTERN = r'
\s*

\s*Cel. n.zev: ]*>(?P[^<]+)' - FILE_SIZE_PATTERN = r'

(?:\s*

[^\n]*

)*\s*Velikost:\s*(?P[0-9., ]+)(?P[kKMG])i?B\s*
' USER_CREDIT_PATTERN = r'
\s*kredit: ([0-9., ]+)([kKMG]i?B)\s*
' def setup(self): diff --git a/module/plugins/hoster/FourSharedCom.py b/module/plugins/hoster/FourSharedCom.py index 5d10204a7..b1cc252e2 100644 --- a/module/plugins/hoster/FourSharedCom.py +++ b/module/plugins/hoster/FourSharedCom.py @@ -8,7 +8,7 @@ class FourSharedCom(SimpleHoster): __name__ = "FourSharedCom" __type__ = "hoster" __pattern__ = r"http://[\w\.]*?4shared(-china)?\.com/(account/)?(download|get|file|document|photo|video|audio)/.+?/.*" - __version__ = "0.23" + __version__ = "0.24" __description__ = """4Shared Download Hoster""" __author_name__ = ("jeix", "zoidberg") __author_mail__ = ("jeix@hasnomail.de", "zoidberg@mujmail.cz") @@ -16,10 +16,10 @@ class FourSharedCom(SimpleHoster): FILE_NAME_PATTERN = '' FILE_SIZE_PATTERN = '' FILE_OFFLINE_PATTERN = 'The file link that you requested is not valid\.|This file was deleted.' + FILE_NAME_REPLACEMENTS = [(r"&#(\d+).", lambda m: unichr(int(m.group(1))))] + DOWNLOAD_BUTTON_PATTERN = '\s*(404 - Page|File) not found

' - URL_REPLACEMENTS = [(r"http://(?:\w*\.)*hellspy\.(?:cz|com|sk|hu)(/\S+/\d+)/?.*", r"http://www.hellspy.com\1")] + FILE_URL_REPLACEMENTS = [(r"http://(?:\w*\.)*hellspy\.(?:cz|com|sk|hu)(/\S+/\d+)/?.*", r"http://www.hellspy.com\1")] CREDIT_LEFT_PATTERN = r'Credits: \s*(\d+)' DOWNLOAD_AGAIN_PATTERN = r']*title="You can download the file without deducting your credit.">' diff --git a/module/plugins/hoster/IfolderRu.py b/module/plugins/hoster/IfolderRu.py index 3177271c4..83b98ecc9 100644 --- a/module/plugins/hoster/IfolderRu.py +++ b/module/plugins/hoster/IfolderRu.py @@ -30,9 +30,11 @@ class IfolderRu(SimpleHoster): __author_name__ = ("zoidberg") __author_mail__ = ("zoidberg@mujmail.cz") - SIZE_UNITS = {u'Кб': 1, u'Мб': 2, u'Гб': 3} + FILE_SIZE_REPLACEMENTS = [(u'Кб', 'KB'), (u'Мб', 'MB'), (u'Гб', 'GB')] FILE_NAME_PATTERN = ur'(?:
)?Название:(?:)? (?P[^<]+)<(?:/div|br)>' - FILE_SIZE_PATTERN = ur'(?:
)?Размер:(?:)? (?P[0-9.]+) (?P[^<]+)<(?:/div|br)>' + FILE_SIZE_PATTERN = ur'(?:
)?Размер:(?:)? (?P[^<]+)<(?:/div|br)>' + FILE_OFFLINE_PATTERN = ur'

Файл номер [^<]* не найден !!!

' + SESSION_ID_PATTERN = r'
]+)>' FORM1_PATTERN = r'
(.*?)
' FORM_INPUT_PATTERN = r']* name="?([^" ]+)"? value="?([^" ]+)"?[^>]*>' @@ -40,7 +42,6 @@ class IfolderRu(SimpleHoster): HIDDEN_INPUT_PATTERN = r"var v = .*?name='([^']+)' value='1'" DOWNLOAD_LINK_PATTERN = r'
неверный код,
введите еще раз
' - FILE_OFFLINE_PATTERN = ur'

Файл номер [^<]* не найден !!!

' def setup(self): self.resumeDownload = self.multiDL = True if self.account else False diff --git a/module/plugins/hoster/UploadboxCom.py b/module/plugins/hoster/UploadboxCom.py index 584c64e77..0eb023cb2 100644 --- a/module/plugins/hoster/UploadboxCom.py +++ b/module/plugins/hoster/UploadboxCom.py @@ -31,7 +31,7 @@ class UploadboxCom(SimpleHoster): __name__ = "Uploadbox" __type__ = "hoster" __pattern__ = r"http://(?:www\.)?uploadbox\.com/files/([^/]+).*" - __version__ = "0.03" + __version__ = "0.04" __description__ = """UploadBox.com plugin - free only""" __author_name__ = ("zoidberg") __author_mail__ = ("zoidberg@mujmail.cz") @@ -39,7 +39,7 @@ class UploadboxCom(SimpleHoster): FILE_NAME_PATTERN = r'

File name:\s*(?P[^<]+)

' FILE_SIZE_PATTERN = r'Size:\s*(?P[0-9.]+) (?P[kKMG])i?B ' FILE_OFFLINE_PATTERN = r'File deleted from service' - NAME_REPLACEMENTS = [(r"(.*)", lambda m: unicode(m.group(1), 'koi8_r'))] + FILE_NAME_REPLACEMENTS = [(r"(.*)", lambda m: unicode(m.group(1), 'koi8_r'))] FREE_FORM_PATTERN = r'
(.*?)
' FORM_INPUT_PATTERN = r']* name="([^"]+)" value="([^"]+)" />' diff --git a/module/plugins/hoster/UploadedTo.py b/module/plugins/hoster/UploadedTo.py index 174c386a8..39483cf86 100644 --- a/module/plugins/hoster/UploadedTo.py +++ b/module/plugins/hoster/UploadedTo.py @@ -59,7 +59,7 @@ def parseFileInfo(self, url = '', html = ''): found = re.search(self.FILE_INFO_PATTERN, html) if found: name, fileid = html_unescape(found.group('N')), found.group('ID') - size = parseFileSize(found.group('S'), found.group('U')) + size = parseFileSize(found.group('S')) status = 2 return name, size, status, fileid @@ -84,12 +84,12 @@ class UploadedTo(Hoster): __name__ = "UploadedTo" __type__ = "hoster" __pattern__ = r"(http://[\w\.-]*?uploaded\.to/.*?(file/|\?id=|&id=)[\w]+/?)|(http://[\w\.]*?ul\.to/(\?id=|&id=)?[\w\-]+/.+)|(http://[\w\.]*?ul\.to/(\?id=|&id=)?[\w\-]+/?)" - __version__ = "0.52" + __version__ = "0.53" __description__ = """Uploaded.to Download Hoster""" __author_name__ = ("spoob", "mkaay") __author_mail__ = ("spoob@pyload.org", "mkaay@mkaay.de") - FILE_INFO_PATTERN = r'
(?P[^<]+)  \s*]*>(?P[0-9,]+) (?P[KMG])B' + FILE_INFO_PATTERN = r'(?P[^<]+)  \s*]*>(?P[^<]+)' FILE_OFFLINE_PATTERN = r'Error: 404' def setup(self): diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index c101cbf6d..4a03ec60a 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -20,7 +20,7 @@ from urlparse import urlparse from re import search, sub from module.plugins.Hoster import Hoster -from module.utils import html_unescape +from module.utils import html_unescape, parseFileSize from module.network.RequestFactory import getURL def reSub(string, ruleslist): @@ -31,40 +31,37 @@ def reSub(string, ruleslist): def parseFileInfo(self, url = '', html = ''): if not html and hasattr(self, "html"): html = self.html - name, size, status, found = '', 0, 3, 0 + info = {"name" : url, "size" : 0, "status" : 3} if hasattr(self, "FILE_OFFLINE_PATTERN") and search(self.FILE_OFFLINE_PATTERN, html): # File offline - status = 1 - elif hasattr(self, "FILE_INFO_PATTERN"): - found = search(self.FILE_INFO_PATTERN, html) - if found: - name, size, units = found.group('N'), found.group('S'), found.group('U') + info['status'] = 1 else: - if hasattr(self, "FILE_NAME_PATTERN"): - found = search(self.FILE_NAME_PATTERN, html) - if found: - name = found.group('N') - - if hasattr(self, "FILE_SIZE_PATTERN"): - found = search(self.FILE_SIZE_PATTERN, html) - if found: - size, units = found.group('S'), found.group('U') + for pattern in ("FILE_INFO_PATTERN", "FILE_NAME_PATTERN", "FILE_SIZE_PATTERN"): + try: + info = dict(info, **search(getattr(self, pattern), html).groupdict()) + except AttributeError: + continue - if size: + if len(info) > 3: # File online, return name and size - size = float(reSub(size, self.SIZE_REPLACEMENTS)) * 1024 ** self.SIZE_UNITS[units] - status = 2 + info['status'] = 2 + if 'N' in info: info['name'] = reSub(info['N'], self.FILE_NAME_REPLACEMENTS) + if 'S' in info: + size = info['S'] + info['U'] if 'U' in info else info['S'] + print repr(size) + size = parseFileSize(reSub(size, self.FILE_SIZE_REPLACEMENTS)) + print repr(self.FILE_SIZE_REPLACEMENTS), repr(size) + info['size'] = size - name = reSub(name, self.NAME_REPLACEMENTS) if name else url - - return name, size, status, url + print info + return info def create_getInfo(plugin): def getInfo(urls): for url in urls: - file_info = parseFileInfo(plugin, url, getURL(reSub(url, plugin.URL_REPLACEMENTS), decode=True)) - yield file_info + file_info = parseFileInfo(plugin, url, getURL(reSub(url, plugin.FILE_URL_REPLACEMENTS), decode=True)) + yield file_info['name'], file_info['size'], file_info['status'], url return getInfo class PluginParseError(Exception): @@ -90,20 +87,18 @@ class SimpleHoster(Hoster): FILE_OFFLINE_PATTERN = r'File (deleted|not found)' TEMP_OFFLINE_PATTERN = r'Server maintainance' """ - #TODO: could be replaced when using utils.parseFileSize ? - #some plugins need to override these - SIZE_UNITS = {'k': 1, 'K': 1, 'M': 2, 'G': 3} - SIZE_REPLACEMENTS = [(',', ''), (' ', '')] - NAME_REPLACEMENTS = [] - URL_REPLACEMENTS = [] + + FILE_SIZE_REPLACEMENTS = [] + FILE_NAME_REPLACEMENTS = [] + FILE_URL_REPLACEMENTS = [] def setup(self): self.resumeDownload = self.multiDL = True if self.account else False def process(self, pyfile): - pyfile.url = reSub(pyfile.url, self.URL_REPLACEMENTS) + pyfile.url = reSub(pyfile.url, self.FILE_URL_REPLACEMENTS) self.html = self.load(pyfile.url, decode = True) - self.getFileInfo() + self.file_info = self.getFileInfo() if self.account: self.handlePremium() else: @@ -114,17 +109,25 @@ class SimpleHoster(Hoster): if hasattr(self, "TEMP_OFFLINE_PATTERN") and search(self.TEMP_OFFLINE_PATTERN, html): self.tempOffline() - name, size, status, url = parseFileInfo(self) - if status == 1: + file_info = parseFileInfo(self) + if file_info['status'] == 1: self.offline() - elif status != 2: + elif file_info['status'] != 2: + self.logDebug(file_info) self.parseError('File info') - if not name: - name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1]) + if file_info['name']: + self.pyfile.name = file_info['name'] + else: + self.pyfile.name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1]) + + if file_info['size']: + self.pyfile.size = file_info['size'] + else: + self.logError("File size not parsed") - self.logDebug("FILE NAME: %s FILE SIZE: %s" % (name, size)) - self.pyfile.name, self.pyfile.size = name, size + self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size)) + return file_info def handleFree(self): self.fail("Free download not implemented") -- cgit v1.2.3