diff options
Diffstat (limited to 'module')
| -rw-r--r-- | module/Utils.py | 27 | ||||
| -rw-r--r-- | module/network/HTTPRequest.py | 3 | ||||
| -rw-r--r-- | module/plugins/hoster/FreakshareCom.py | 5 | ||||
| -rw-r--r-- | module/plugins/hoster/MegauploadCom.py | 12 | ||||
| -rw-r--r-- | module/plugins/hoster/RapidshareCom.py | 6 | ||||
| -rw-r--r-- | module/plugins/hoster/YoutubeCom.py | 4 | 
6 files changed, 47 insertions, 10 deletions
| diff --git a/module/Utils.py b/module/Utils.py index e6e40c956..cdf76c144 100644 --- a/module/Utils.py +++ b/module/Utils.py @@ -8,6 +8,7 @@ import time  import re  from os.path import join  from string import maketrans +from htmlentitydefs import name2codepoint  def chmod(*args):      try: @@ -129,6 +130,32 @@ def lock(func):      return new + +def fixup(m): +    text = m.group(0) +    if text[:2] == "&#": +        # character reference +        try: +            if text[:3] == "&#x": +                return unichr(int(text[3:-1], 16)) +            else: +                return unichr(int(text[2:-1])) +        except ValueError: +            pass +    else: +        # named entity +        try: +            name = text[1:-1] +            text = unichr(name2codepoint[name]) +        except KeyError: +            pass +         +    return text # leave as is + +def html_unescape(text): +    """Removes HTML or XML character references and entities from a text string""" +    return re.sub("&#?\w+;", fixup, text) +  if __name__ == "__main__":      print freeSpace(".") diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py index f90048f4d..7904070e8 100644 --- a/module/network/HTTPRequest.py +++ b/module/network/HTTPRequest.py @@ -24,6 +24,7 @@ from urllib import quote, urlencode  from logging import getLogger  from cStringIO import StringIO +from module.utils import html_unescape  from module.plugins.Plugin import Abort  def myquote(url): @@ -228,6 +229,8 @@ class HTTPRequest():                  #self.log.debug("Decoded %s" % encoding )                  decoder = getincrementaldecoder(encoding)("replace")                  rep = decoder.decode(rep, True) + +                #TODO: html_unescape as default              except LookupError:                  self.log.debug("No Decoder foung for %s" % encoding) diff --git a/module/plugins/hoster/FreakshareCom.py b/module/plugins/hoster/FreakshareCom.py index c20206bf5..869b8a99e 100644 --- a/module/plugins/hoster/FreakshareCom.py +++ b/module/plugins/hoster/FreakshareCom.py @@ -34,6 +34,11 @@ class FreakshareCom(Hoster):              self.get_file_url()
              self.download(self.pyfile.url, post=self.req_opts)
 +
 +
 +            check = self.checkDownload({"bad": "bad try"})
 +            if check == "bad":
 +                self.fail("Bad Try.")
      def prepare(self):
 diff --git a/module/plugins/hoster/MegauploadCom.py b/module/plugins/hoster/MegauploadCom.py index fa16fdf31..342a8024d 100644 --- a/module/plugins/hoster/MegauploadCom.py +++ b/module/plugins/hoster/MegauploadCom.py @@ -7,7 +7,7 @@ from module.plugins.Hoster import Hoster  from module.network.RequestFactory import getURL
 -from module.unescape import unescape
 +from module.utils import html_unescape
  from module.PyFile import statusMap
  from pycurl import error
 @@ -36,7 +36,7 @@ def getInfo(urls):          # File info
          fileInfo = _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap)
          url = urls[i]
 -        name = fileInfo.get('name', url)
 +        name = html_unescape(fileInfo.get('name', url))
          size = fileInfo.get('size', 0)
          status = fileInfo.get('status', statusMap['queued'])
 @@ -51,7 +51,7 @@ def _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap):      fileInfo = {}
      try:
          fileInfo['status'] = MegauploadCom.API_STATUS_MAPPING[apiFileDataMap[apiFileId]]
 -        fileInfo['name'] = apiFileDataMap['n'] 
 +        fileInfo['name'] = apiFileDataMap['n']
          fileInfo['size'] = int(apiFileDataMap['s'])
          fileInfo['hoster'] = apiHosterMap[apiFileDataMap['d']]        
      except:
 @@ -215,10 +215,12 @@ class MegauploadCom(Hoster):      def get_file_name(self):
          try:
 -            return self.api["name"]
 +            name =  self.api["name"]
          except KeyError:
              file_name_pattern = 'id="downloadlink"><a href="(.*)" onclick="'
 -            return re.search(file_name_pattern, self.html[1]).group(1).split("/")[-1]
 +            name = re.search(file_name_pattern, self.html[1]).group(1).split("/")[-1]
 +
 +        return html_unescape(name)
      def get_wait_time(self):
          time = re.search(r"count=(\d+);", self.html[1])
 diff --git a/module/plugins/hoster/RapidshareCom.py b/module/plugins/hoster/RapidshareCom.py index 95f6f91f9..96fa6fd36 100644 --- a/module/plugins/hoster/RapidshareCom.py +++ b/module/plugins/hoster/RapidshareCom.py @@ -8,8 +8,6 @@  # * removed some (old?) comment blocks  import re -from os import stat, remove -from time import sleep  from module.network.RequestFactory import getURL  from module.plugins.Hoster import Hoster @@ -99,7 +97,7 @@ class RapidshareCom(Hoster):              self.log.info(_("Rapidshare: Traffic Share (direct download)"))              self.pyfile.name = self.get_file_name() -            self.download(self.pyfile.url, get={"directstart":1}, cookies=True) +            self.download(self.pyfile.url, get={"directstart":1})          elif self.api_data["status"] in ("0","4","5"):              self.offline() @@ -133,7 +131,7 @@ class RapidshareCom(Hoster):          info = self.account.getAccountInfo(self.user, True)          self.log.debug("%s: Use Premium Account" % self.__name__)          url = self.api_data["mirror"] -        self.download(url, get={"directstart":1}, cookies=True) +        self.download(url, get={"directstart":1})      def download_api_data(self, force=False): diff --git a/module/plugins/hoster/YoutubeCom.py b/module/plugins/hoster/YoutubeCom.py index 1b8cf6b4b..908869236 100644 --- a/module/plugins/hoster/YoutubeCom.py +++ b/module/plugins/hoster/YoutubeCom.py @@ -3,6 +3,8 @@  import re  import urllib + +from module.utils import html_unescape  from module.plugins.Hoster import Hoster  class YoutubeCom(Hoster): @@ -89,6 +91,6 @@ class YoutubeCom(Hoster):          if fmt in self.formats:              file_suffix = self.formats[fmt][0]          name = re.search(file_name_pattern, html).group(1).replace("/", "") + file_suffix -        pyfile.name = name #.replace("&", "&").replace("ö", "oe").replace("ä", "ae").replace("ü", "ue")        +        pyfile.name = html_unescape(name)          self.download(fmt_dict[fmt]) | 
