From a0805f27015748638a5fb05fd55b746852c53362 Mon Sep 17 00:00:00 2001 From: RaNaN Date: Wed, 15 Jun 2011 17:35:48 +0200 Subject: html_unescape function, little plugin improvements --- module/Utils.py | 27 +++++++++++++++++++++++++++ module/network/HTTPRequest.py | 3 +++ module/plugins/hoster/FreakshareCom.py | 5 +++++ module/plugins/hoster/MegauploadCom.py | 12 +++++++----- module/plugins/hoster/RapidshareCom.py | 6 ++---- module/plugins/hoster/YoutubeCom.py | 4 +++- 6 files changed, 47 insertions(+), 10 deletions(-) diff --git a/module/Utils.py b/module/Utils.py index e6e40c956..cdf76c144 100644 --- a/module/Utils.py +++ b/module/Utils.py @@ -8,6 +8,7 @@ import time import re from os.path import join from string import maketrans +from htmlentitydefs import name2codepoint def chmod(*args): try: @@ -129,6 +130,32 @@ def lock(func): return new + +def fixup(m): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + name = text[1:-1] + text = unichr(name2codepoint[name]) + except KeyError: + pass + + return text # leave as is + +def html_unescape(text): + """Removes HTML or XML character references and entities from a text string""" + return re.sub("&#?\w+;", fixup, text) + if __name__ == "__main__": print freeSpace(".") diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py index f90048f4d..7904070e8 100644 --- a/module/network/HTTPRequest.py +++ b/module/network/HTTPRequest.py @@ -24,6 +24,7 @@ from urllib import quote, urlencode from logging import getLogger from cStringIO import StringIO +from module.utils import html_unescape from module.plugins.Plugin import Abort def myquote(url): @@ -228,6 +229,8 @@ class HTTPRequest(): #self.log.debug("Decoded %s" % encoding ) decoder = getincrementaldecoder(encoding)("replace") rep = decoder.decode(rep, True) + + #TODO: html_unescape as default except LookupError: self.log.debug("No Decoder foung for %s" % encoding) diff --git a/module/plugins/hoster/FreakshareCom.py b/module/plugins/hoster/FreakshareCom.py index c20206bf5..869b8a99e 100644 --- a/module/plugins/hoster/FreakshareCom.py +++ b/module/plugins/hoster/FreakshareCom.py @@ -34,6 +34,11 @@ class FreakshareCom(Hoster): self.get_file_url() self.download(self.pyfile.url, post=self.req_opts) + + + check = self.checkDownload({"bad": "bad try"}) + if check == "bad": + self.fail("Bad Try.") def prepare(self): diff --git a/module/plugins/hoster/MegauploadCom.py b/module/plugins/hoster/MegauploadCom.py index fa16fdf31..342a8024d 100644 --- a/module/plugins/hoster/MegauploadCom.py +++ b/module/plugins/hoster/MegauploadCom.py @@ -7,7 +7,7 @@ from module.plugins.Hoster import Hoster from module.network.RequestFactory import getURL -from module.unescape import unescape +from module.utils import html_unescape from module.PyFile import statusMap from pycurl import error @@ -36,7 +36,7 @@ def getInfo(urls): # File info fileInfo = _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap) url = urls[i] - name = fileInfo.get('name', url) + name = html_unescape(fileInfo.get('name', url)) size = fileInfo.get('size', 0) status = fileInfo.get('status', statusMap['queued']) @@ -51,7 +51,7 @@ def _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap): fileInfo = {} try: fileInfo['status'] = MegauploadCom.API_STATUS_MAPPING[apiFileDataMap[apiFileId]] - fileInfo['name'] = apiFileDataMap['n'] + fileInfo['name'] = apiFileDataMap['n'] fileInfo['size'] = int(apiFileDataMap['s']) fileInfo['hoster'] = apiHosterMap[apiFileDataMap['d']] except: @@ -215,10 +215,12 @@ class MegauploadCom(Hoster): def get_file_name(self): try: - return self.api["name"] + name = self.api["name"] except KeyError: file_name_pattern = 'id="downloadlink">