diff options
| author | 2009-12-30 17:33:14 +0100 | |
|---|---|---|
| committer | 2009-12-30 17:33:14 +0100 | |
| commit | 7c28259f92c2b3c608583ff128a5ae4134d4c48f (patch) | |
| tree | 1cc8d9e95c38f51b0efaef927e0036b677355068 /module/plugins/decrypter | |
| parent | signal slot stuff (diff) | |
| download | pyload-7c28259f92c2b3c608583ff128a5ae4134d4c48f.tar.xz | |
moved captcha stuff, extended serienjunkies, some other stuff
Diffstat (limited to 'module/plugins/decrypter')
| -rw-r--r-- | module/plugins/decrypter/SerienjunkiesOrg.py | 178 | 
1 files changed, 165 insertions, 13 deletions
| diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index 46f380857..a73779dd3 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -6,6 +6,25 @@ from time import sleep  from module.Plugin import Plugin  from module.BeautifulSoup import BeautifulSoup +from module.download_thread import CaptchaError + +from htmlentitydefs import name2codepoint as n2cp +def substitute_entity(match): +    ent = match.group(2) +    if match.group(1) == "#": +        return unichr(int(ent)) +    else: +        cp = n2cp.get(ent) +        if cp: +            return unichr(cp) +        else: +            return match.group() + +def decode_htmlentities(string): +    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") +    return entity_re.subn(substitute_entity, string)[0] + +  class SerienjunkiesOrg(Plugin):      def __init__(self, parent):          Plugin.__init__(self, parent) @@ -13,7 +32,7 @@ class SerienjunkiesOrg(Plugin):          props['name'] = "SerienjunkiesOrg"          props['type'] = "container"          props['pattern'] = r"http://.*?serienjunkies.org/.*?" -        props['version'] = "0.1" +        props['version'] = "0.2"          props['description'] = """serienjunkies.org Container Plugin"""          props['author_name'] = ("mkaay")          props['author_mail'] = ("mkaay@mkaay.de") @@ -21,6 +40,43 @@ class SerienjunkiesOrg(Plugin):          self.parent = parent          self.html = None          self.multi_dl = False +         +        self.hosterMap = { +            "rc": "RapidshareCom", +            "ff": "FilefactoryCom", +            "ut": "UploadedTo", +            "ul": "UploadedTo", +            "nl": "NetloadIn", +            "rs": "RapidshareDe" +        } +        self.hosterMapReverse = dict((v,k) for k, v in self.hosterMap.iteritems()) +        episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") +        oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$") +        if re.match(episodePattern, self.parent.url) or re.match(oldStyleLink, self.parent.url): +            self.decryptNow = False +        else: +            self.decryptNow = True +     +    def prepare(self, thread): +        pyfile = self.parent + +        self.want_reconnect = False + +        pyfile.status.exists = self.file_exists() + +        if not pyfile.status.exists: +            raise Exception, "File not found" +            return False + +        pyfile.status.filename = self.get_file_name() +             +        pyfile.status.waituntil = self.time_plus_wait +        pyfile.status.url = self.get_file_url() +        pyfile.status.want_reconnect = self.want_reconnect + +        thread.wait(self.parent) +         +        return True      def getSJSrc(self, url):          src = self.req.load(str(url)) @@ -31,9 +87,88 @@ class SerienjunkiesOrg(Plugin):      def file_exists(self):          return True +    def waitForCaptcha(self, captchaData, imgType): +        captchaManager = self.parent.core.captchaManager +        task = captchaManager.newTask(self) +        task.setCaptcha(captchaData, imgType) +        task.setWaiting() +        while not task.getStatus() == "done": +            if not self.parent.core.isGUIConnected(): +                task.removeTask() +                raise CaptchaError +            sleep(1) +        result = task.getResult() +        task.removeTask() +        return result +     +    def handleSeason(self, url): +        src = self.getSJSrc(url) +        soup = BeautifulSoup(src) +        post = soup.find("div", attrs={"class": "post-content"}) +        ps = post.findAll("p") +        hosterPattern = re.compile("^http://download\.serienjunkies\.org/f-.*?/([rcfultns]{2})_.*?\.html$") +        preferredHoster = self.get_config("preferredHoster").split(",") +        self.logger.debug("Preferred hoster: %s" % ", ".join(preferredHoster)) +        groups = {} +        gid = -1 +        seasonName = soup.find("a", attrs={"rel":"bookmark"}).string +        for p in ps: +            if re.search("<strong>Dauer|<strong>Sprache|<strong>Format", str(p)): +                var = p.findAll("strong") +                opts = {"Dauer": "", "Uploader": "", "Sprache": "", "Format": "", u"Größe": ""} +                for v in var: +                    n = decode_htmlentities(v.string) +                    val = v.nextSibling +                    val = val.encode("utf-8") +                    val = decode_htmlentities(val) +                    val = val.replace(" |", "") +                    n = n.strip() +                    n = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', n) +                    val = val.strip() +                    val = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', val) +                    opts[n.strip()] = val.strip() +                gid += 1 +                groups[gid] = {} +                groups[gid]["ep"] = [] +                groups[gid]["opts"] = opts +            elif re.search("<strong>Download:", str(p)): +                links1 = p.findAll("a", attrs={"href": hosterPattern}) +                links2 = p.findAll("a", attrs={"href": re.compile("^http://serienjunkies.org/safe/.*$")}) +                for link in links1 + links2: +                    groups[gid]["ep"].append(link["href"]) +        packages = {} +        for g in groups.values(): +            links = [] +            linklist = g["ep"] +            package = "%s (%s, %s)" % (seasonName, g["opts"]["Format"], g["opts"]["Sprache"]) +            linkgroups = {} +            for link in linklist: +                key = re.sub("^http://download\.serienjunkies\.org/f-.*?/([rcfultns]{2})_", "", link) +                if not linkgroups.has_key(key): +                    linkgroups[key] = [] +                linkgroups[key].append(link) +            for group in linkgroups.values(): +                print "group", group +                for pHoster in preferredHoster: +                    print "phoster", pHoster +                    hmatch = False +                    for link in group: +                        print "link", link +                        m = hosterPattern.match(link) +                        if m: +                            if pHoster == self.hosterMap[m.group(1)]: +                                links.append(link) +                                hmatch = True +                                print "match" +                                break +                    if hmatch: +                        break +            packages[package] = links +        return packages +          def handleEpisode(self, url):          if not self.parent.core.isGUIConnected(): -            return False +            raise CaptchaError          for i in range(3):              src = self.getSJSrc(url)              if not src.find("Du hast das Download-Limit überschritten! Bitte versuche es später nocheinmal.") == -1: @@ -45,17 +180,7 @@ class SerienjunkiesOrg(Plugin):                  captchaTag = soup.find(attrs={"src":re.compile("^/secure/")})                  captchaUrl = "http://download.serienjunkies.org"+captchaTag["src"]                  captchaData = self.req.load(str(captchaUrl)) -                captchaManager = self.parent.core.captchaManager -                task = captchaManager.newTask(self) -                task.setCaptcha(captchaData, "png") -                task.setWaiting() -                while not task.getStatus() == "done": -                    if not self.parent.core.isGUIConnected(): -                        task.removeTask() -                        return False -                    sleep(1) -                result = task.getResult() -                task.removeTask() +                result = self.waitForCaptcha(captchaData, "png")                  url = "http://download.serienjunkies.org"+form["action"]                  sinp = form.find(attrs={"name":"s"}) @@ -73,6 +198,27 @@ class SerienjunkiesOrg(Plugin):                      links.append(self.handleFrame(frameUrl))                  return links +    def handleOldStyleLink(self, url): +        if not self.parent.core.isGUIConnected(): +            raise CaptchaError +        for i in range(3): +            sj = self.req.load(str(url)) +            soup = BeautifulSoup(sj) +            form = soup.find("form", attrs={"action":re.compile("^http://serienjunkies.org")}) +            captchaTag = form.find(attrs={"src":re.compile("^/safe/secure/")}) +            captchaUrl = "http://serienjunkies.org"+captchaTag["src"] +            captchaData = self.req.load(str(captchaUrl)) +            result = self.waitForCaptcha(captchaData, "png") +            url = form["action"] +            sinp = form.find(attrs={"name":"s"}) +             +            self.req.load(str(url), post={'s': sinp["value"], 'c': result, 'dl.start': "Download"}, cookies=False, just_header=True) +            decrypted = self.req.lastEffectiveURL +            if decrypted == str(url): +                continue +            return [decrypted] +        return False +          def handleFrame(self, url):          self.req.load(str(url), cookies=False, just_header=True)          return self.req.lastEffectiveURL @@ -80,9 +226,15 @@ class SerienjunkiesOrg(Plugin):      def proceed(self, url, location):          links = False          episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") +        oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$")          framePattern = re.compile("^http://download.serienjunkies.org/frame/go-.*?/$") +        seasonPattern = re.compile("^http://serienjunkies.org/\?p=.*?$")          if framePattern.match(url):              links = [self.handleFrame(url)]          elif episodePattern.match(url):              links = self.handleEpisode(url) +        elif oldStyleLink.match(url): +            links = self.handleOldStyleLink(url) +        elif seasonPattern.match(url): +            links = self.handleSeason(url)          self.links = links | 
