diff options
| -rw-r--r-- | module/plugins/decrypter/SerienjunkiesOrg.py | 17 | ||||
| -rw-r--r-- | module/unescape.py | 28 | 
2 files changed, 36 insertions, 9 deletions
| diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index 7d45fd705..af7dc8169 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -59,6 +59,16 @@ class SerienjunkiesOrg(Plugin):          return True +    def get_file_name(self): +        showPattern = re.compile("^http://serienjunkies.org/serie/(.*)/$") +        seasonPattern = re.compile("^http://serienjunkies.org/.*?/(.*)/$") +        m = showPattern.match(self.parent.url) +        if not m: +            m = seasonPattern.match(self.parent.url) +        if m: +            return m.group(1) +        return "n/a" +          def getSJSrc(self, url):          src = self.req.load(str(url))          if not src.find("Enter Serienjunkies") == -1: @@ -78,7 +88,7 @@ class SerienjunkiesOrg(Plugin):          self.logger.debug("Preferred hoster: %s" % ", ".join(preferredHoster))          groups = {}          gid = -1 -        seasonName = soup.find("a", attrs={"rel":"bookmark"}).string +        seasonName = unescape(soup.find("a", attrs={"rel":"bookmark"}).string)          for p in ps:              if re.search("<strong>Dauer|<strong>Sprache|<strong>Format", str(p)):                  var = p.findAll("strong") @@ -191,13 +201,16 @@ class SerienjunkiesOrg(Plugin):          episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$")          oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$")          framePattern = re.compile("^http://download.serienjunkies.org/frame/go-.*?/$") -        seasonPattern = re.compile("^http://serienjunkies.org/\?p=.*?$") +        showPattern = re.compile("^http://serienjunkies.org/serie/.*/$") +        seasonPattern = re.compile("^http://serienjunkies.org/.*?/.*/$")          if framePattern.match(url):              links = [self.handleFrame(url)]          elif episodePattern.match(url):              links = self.handleEpisode(url)          elif oldStyleLink.match(url):              links = self.handleOldStyleLink(url) +        elif showPattern.match(url): +            pass          elif seasonPattern.match(url):              links = self.handleSeason(url)          self.links = links diff --git a/module/unescape.py b/module/unescape.py index 462423b03..59f35f36b 100644 --- a/module/unescape.py +++ b/module/unescape.py @@ -1,12 +1,25 @@ +from htmlentitydefs import name2codepoint as n2cp +import re + +def substitute_entity(match): +    ent = match.group(2) +    if match.group(1) == "#": +        return unichr(int(ent)) +    else: +        cp = n2cp.get(ent) +        if cp: +            return unichr(cp) +        else: +            return match.group() + +def unescape(string): +    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") +    return entity_re.subn(substitute_entity, string)[0] + +"""  import re  def unescape(text): -   """Removes HTML or XML character references  -      and entities from a text string. -      keep &, >, < in the source code. -   from Fredrik Lundh -   http://effbot.org/zone/re-sub.htm#unescape-html -   """     def fixup(m):        text = m.group(0)        if text[:2] == "&#": @@ -35,4 +48,5 @@ def unescape(text):              print "keyerror"              pass        return text # leave as is -   return str(re.sub("&#?\w+;", fixup, text)) +   return re.sub("&#?\w+;", fixup, text) +""" | 
