diff options
24 files changed, 206 insertions, 27 deletions
| diff --git a/module/Plugin.py b/module/Plugin.py index f3830595d..c33e0d565 100644 --- a/module/Plugin.py +++ b/module/Plugin.py @@ -32,7 +32,7 @@ class Plugin():          self.config = {}          props = {}          props['name'] = "BasePlugin" -        props['version'] = "0.2" +        props['version'] = "0.3"          props['pattern'] = None          props['type'] = "hoster"          props['description'] = """Base Plugin""" @@ -47,6 +47,7 @@ class Plugin():          self.multi_dl = True          self.ocr = None #captcha reader instance          self.logger = logging.getLogger("log") +        self.decryptNow = True      def prepare(self, thread):          pyfile = self.parent @@ -127,7 +128,7 @@ class Plugin():              pass      def init_ocr(self): -        modul = __import__("module.captcha." + self.props['name'], fromlist=['captcha']) +        modul = __import__("module.plugins.captcha." + self.props['name'], fromlist=['captcha'])          captchaClass = getattr(modul, self.props['name'])          self.ocr = captchaClass() diff --git a/module/captcha/LinksaveIn/bg/flecken_1.gif b/module/captcha/LinksaveIn/bg/flecken_1.gifBinary files differ deleted file mode 100644 index df2f51217..000000000 --- a/module/captcha/LinksaveIn/bg/flecken_1.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/flecken_2.gif b/module/captcha/LinksaveIn/bg/flecken_2.gifBinary files differ deleted file mode 100644 index 838276188..000000000 --- a/module/captcha/LinksaveIn/bg/flecken_2.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/gewebe_fein.gif b/module/captcha/LinksaveIn/bg/gewebe_fein.gifBinary files differ deleted file mode 100644 index 502f18cc4..000000000 --- a/module/captcha/LinksaveIn/bg/gewebe_fein.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/gewebe_grob.gif b/module/captcha/LinksaveIn/bg/gewebe_grob.gifBinary files differ deleted file mode 100644 index e66a365ad..000000000 --- a/module/captcha/LinksaveIn/bg/gewebe_grob.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/gitter.gif b/module/captcha/LinksaveIn/bg/gitter.gifBinary files differ deleted file mode 100644 index ec52ef68d..000000000 --- a/module/captcha/LinksaveIn/bg/gitter.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/mauer_horizontal.gif b/module/captcha/LinksaveIn/bg/mauer_horizontal.gifBinary files differ deleted file mode 100644 index 3d75fafa8..000000000 --- a/module/captcha/LinksaveIn/bg/mauer_horizontal.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/mauer_vertikal.gif b/module/captcha/LinksaveIn/bg/mauer_vertikal.gifBinary files differ deleted file mode 100644 index 2ada6fdae..000000000 --- a/module/captcha/LinksaveIn/bg/mauer_vertikal.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/scheckig.gif b/module/captcha/LinksaveIn/bg/scheckig.gifBinary files differ deleted file mode 100644 index 8bfb45c56..000000000 --- a/module/captcha/LinksaveIn/bg/scheckig.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/wellen.gif b/module/captcha/LinksaveIn/bg/wellen.gifBinary files differ deleted file mode 100644 index a181ebe74..000000000 --- a/module/captcha/LinksaveIn/bg/wellen.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/tesser_conf b/module/captcha/LinksaveIn/tesser_conf deleted file mode 100644 index 34ca8fa02..000000000 --- a/module/captcha/LinksaveIn/tesser_conf +++ /dev/null @@ -1 +0,0 @@ -tessedit_char_whitelist 0123456789 diff --git a/module/config/plugin_default.xml b/module/config/plugin_default.xml index 01c0e7ed6..88628cb5e 100644 --- a/module/config/plugin_default.xml +++ b/module/config/plugin_default.xml @@ -36,4 +36,7 @@          <!-- False for no limitation -->          <max_videos>False</max_videos>      </YoutubeChannel> +    <SerienjunkiesOrg> +        <preferredHoster>RapidshareCom,UploadedTo,NetloadIn,FilefactoryCom</preferredHoster> +    </SerienjunkiesOrg>  </config> diff --git a/module/download_thread.py b/module/download_thread.py index 3c008d000..a07f4511f 100644 --- a/module/download_thread.py +++ b/module/download_thread.py @@ -66,6 +66,9 @@ class Checksum(Exception):      def getFile(self):          return self.file +class CaptchaError(Exception): +    pass +  class Download_Thread(threading.Thread):      def __init__(self, parent):          threading.Thread.__init__(self) @@ -93,8 +96,10 @@ class Download_Thread(threading.Thread):                      f = open("%s.info" % e.getFile(), "w")                      f.write("Checksum not matched!")                      f.close() +                except CaptchaError: +                    self.loadedPyFile.status.type = "failed" +                    self.loadedPyFile.status.error = "Can't solve captcha"                  except Exception, e: -                      try:                          if self.parent.parent.config['general']['debug_mode']:                              traceback.print_exc() @@ -123,8 +128,11 @@ class Download_Thread(threading.Thread):          pyfile.plugin.prepare(self)          pyfile.plugin.req.set_timeout(self.parent.parent.config['general']['max_download_time']) - -        status.type = "downloading" +         +        if pyfile.plugin.props["type"] == "container": +            status.type = "decrypting" +        else: +            status.type = "downloading"          location = join(pyfile.folder, status.filename)          pyfile.plugin.proceed(status.url, location) diff --git a/module/file_list.py b/module/file_list.py index cc3b63006..8af66d5ed 100644 --- a/module/file_list.py +++ b/module/file_list.py @@ -129,7 +129,11 @@ class File_List(object):          files = []          for pypack in self.data["queue"] + self.data["packages"]:              for pyfile in pypack.files: -                if  pyfile.plugin.props['type'] == "container" and not pyfile.active: +                if pyfile.status.type == None and pyfile.plugin.props['type'] == "container" and not pyfile.active: +                    files.append(pyfile) +        for pypack in self.data["packages"]: +            for pyfile in pypack.files: +                if pyfile.status.type == None and pyfile.plugin.props['type'] == "container" and pyfile.plugin.decryptNow and not pyfile.active:                      files.append(pyfile)          for pypack in self.data["queue"]:              for pyfile in pypack.files: @@ -424,7 +428,7 @@ class PyLoadFile():              for dir in ["hoster", "decrypter", "container"]:                  try:                      self.modul = __import__("%s.%s" % (dir, pluginName), globals(), locals(), [pluginName], -1) -                except: +                except Exception, e:                      pass              pluginClass = getattr(self.modul, pluginName)          else: diff --git a/module/gui/CaptchaDock.py b/module/gui/CaptchaDock.py index 3dc9441a4..8a7e8010e 100644 --- a/module/gui/CaptchaDock.py +++ b/module/gui/CaptchaDock.py @@ -41,6 +41,7 @@ class CaptchaDock(QDockWidget):          data = QByteArray(img)          self.currentID = tid          self.widget.emit(SIGNAL("setImage"), data) +        self.widget.input.setText("")          self.show()  class CaptchaDockWidget(QWidget): diff --git a/module/captcha/GigasizeCom.py b/module/plugins/captcha/GigasizeCom.py index 136092181..136092181 100644 --- a/module/captcha/GigasizeCom.py +++ b/module/plugins/captcha/GigasizeCom.py diff --git a/module/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py index d6f61e362..d6f61e362 100644 --- a/module/captcha/LinksaveIn.py +++ b/module/plugins/captcha/LinksaveIn.py diff --git a/module/captcha/MegauploadCom.py b/module/plugins/captcha/MegauploadCom.py index 374bcd678..374bcd678 100644 --- a/module/captcha/MegauploadCom.py +++ b/module/plugins/captcha/MegauploadCom.py diff --git a/module/captcha/NetloadIn.py b/module/plugins/captcha/NetloadIn.py index 9799a6a2b..9799a6a2b 100644 --- a/module/captcha/NetloadIn.py +++ b/module/plugins/captcha/NetloadIn.py diff --git a/module/captcha/ShareonlineBiz.py b/module/plugins/captcha/ShareonlineBiz.py index 91124f181..91124f181 100644 --- a/module/captcha/ShareonlineBiz.py +++ b/module/plugins/captcha/ShareonlineBiz.py diff --git a/module/captcha/__init__.py b/module/plugins/captcha/__init__.py index e69de29bb..e69de29bb 100644 --- a/module/captcha/__init__.py +++ b/module/plugins/captcha/__init__.py diff --git a/module/captcha/captcha.py b/module/plugins/captcha/captcha.py index 283b171e0..283b171e0 100644 --- a/module/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index 46f380857..a73779dd3 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -6,6 +6,25 @@ from time import sleep  from module.Plugin import Plugin  from module.BeautifulSoup import BeautifulSoup +from module.download_thread import CaptchaError + +from htmlentitydefs import name2codepoint as n2cp +def substitute_entity(match): +    ent = match.group(2) +    if match.group(1) == "#": +        return unichr(int(ent)) +    else: +        cp = n2cp.get(ent) +        if cp: +            return unichr(cp) +        else: +            return match.group() + +def decode_htmlentities(string): +    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") +    return entity_re.subn(substitute_entity, string)[0] + +  class SerienjunkiesOrg(Plugin):      def __init__(self, parent):          Plugin.__init__(self, parent) @@ -13,7 +32,7 @@ class SerienjunkiesOrg(Plugin):          props['name'] = "SerienjunkiesOrg"          props['type'] = "container"          props['pattern'] = r"http://.*?serienjunkies.org/.*?" -        props['version'] = "0.1" +        props['version'] = "0.2"          props['description'] = """serienjunkies.org Container Plugin"""          props['author_name'] = ("mkaay")          props['author_mail'] = ("mkaay@mkaay.de") @@ -21,6 +40,43 @@ class SerienjunkiesOrg(Plugin):          self.parent = parent          self.html = None          self.multi_dl = False +         +        self.hosterMap = { +            "rc": "RapidshareCom", +            "ff": "FilefactoryCom", +            "ut": "UploadedTo", +            "ul": "UploadedTo", +            "nl": "NetloadIn", +            "rs": "RapidshareDe" +        } +        self.hosterMapReverse = dict((v,k) for k, v in self.hosterMap.iteritems()) +        episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") +        oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$") +        if re.match(episodePattern, self.parent.url) or re.match(oldStyleLink, self.parent.url): +            self.decryptNow = False +        else: +            self.decryptNow = True +     +    def prepare(self, thread): +        pyfile = self.parent + +        self.want_reconnect = False + +        pyfile.status.exists = self.file_exists() + +        if not pyfile.status.exists: +            raise Exception, "File not found" +            return False + +        pyfile.status.filename = self.get_file_name() +             +        pyfile.status.waituntil = self.time_plus_wait +        pyfile.status.url = self.get_file_url() +        pyfile.status.want_reconnect = self.want_reconnect + +        thread.wait(self.parent) +         +        return True      def getSJSrc(self, url):          src = self.req.load(str(url)) @@ -31,9 +87,88 @@ class SerienjunkiesOrg(Plugin):      def file_exists(self):          return True +    def waitForCaptcha(self, captchaData, imgType): +        captchaManager = self.parent.core.captchaManager +        task = captchaManager.newTask(self) +        task.setCaptcha(captchaData, imgType) +        task.setWaiting() +        while not task.getStatus() == "done": +            if not self.parent.core.isGUIConnected(): +                task.removeTask() +                raise CaptchaError +            sleep(1) +        result = task.getResult() +        task.removeTask() +        return result +     +    def handleSeason(self, url): +        src = self.getSJSrc(url) +        soup = BeautifulSoup(src) +        post = soup.find("div", attrs={"class": "post-content"}) +        ps = post.findAll("p") +        hosterPattern = re.compile("^http://download\.serienjunkies\.org/f-.*?/([rcfultns]{2})_.*?\.html$") +        preferredHoster = self.get_config("preferredHoster").split(",") +        self.logger.debug("Preferred hoster: %s" % ", ".join(preferredHoster)) +        groups = {} +        gid = -1 +        seasonName = soup.find("a", attrs={"rel":"bookmark"}).string +        for p in ps: +            if re.search("<strong>Dauer|<strong>Sprache|<strong>Format", str(p)): +                var = p.findAll("strong") +                opts = {"Dauer": "", "Uploader": "", "Sprache": "", "Format": "", u"Größe": ""} +                for v in var: +                    n = decode_htmlentities(v.string) +                    val = v.nextSibling +                    val = val.encode("utf-8") +                    val = decode_htmlentities(val) +                    val = val.replace(" |", "") +                    n = n.strip() +                    n = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', n) +                    val = val.strip() +                    val = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', val) +                    opts[n.strip()] = val.strip() +                gid += 1 +                groups[gid] = {} +                groups[gid]["ep"] = [] +                groups[gid]["opts"] = opts +            elif re.search("<strong>Download:", str(p)): +                links1 = p.findAll("a", attrs={"href": hosterPattern}) +                links2 = p.findAll("a", attrs={"href": re.compile("^http://serienjunkies.org/safe/.*$")}) +                for link in links1 + links2: +                    groups[gid]["ep"].append(link["href"]) +        packages = {} +        for g in groups.values(): +            links = [] +            linklist = g["ep"] +            package = "%s (%s, %s)" % (seasonName, g["opts"]["Format"], g["opts"]["Sprache"]) +            linkgroups = {} +            for link in linklist: +                key = re.sub("^http://download\.serienjunkies\.org/f-.*?/([rcfultns]{2})_", "", link) +                if not linkgroups.has_key(key): +                    linkgroups[key] = [] +                linkgroups[key].append(link) +            for group in linkgroups.values(): +                print "group", group +                for pHoster in preferredHoster: +                    print "phoster", pHoster +                    hmatch = False +                    for link in group: +                        print "link", link +                        m = hosterPattern.match(link) +                        if m: +                            if pHoster == self.hosterMap[m.group(1)]: +                                links.append(link) +                                hmatch = True +                                print "match" +                                break +                    if hmatch: +                        break +            packages[package] = links +        return packages +          def handleEpisode(self, url):          if not self.parent.core.isGUIConnected(): -            return False +            raise CaptchaError          for i in range(3):              src = self.getSJSrc(url)              if not src.find("Du hast das Download-Limit überschritten! Bitte versuche es später nocheinmal.") == -1: @@ -45,17 +180,7 @@ class SerienjunkiesOrg(Plugin):                  captchaTag = soup.find(attrs={"src":re.compile("^/secure/")})                  captchaUrl = "http://download.serienjunkies.org"+captchaTag["src"]                  captchaData = self.req.load(str(captchaUrl)) -                captchaManager = self.parent.core.captchaManager -                task = captchaManager.newTask(self) -                task.setCaptcha(captchaData, "png") -                task.setWaiting() -                while not task.getStatus() == "done": -                    if not self.parent.core.isGUIConnected(): -                        task.removeTask() -                        return False -                    sleep(1) -                result = task.getResult() -                task.removeTask() +                result = self.waitForCaptcha(captchaData, "png")                  url = "http://download.serienjunkies.org"+form["action"]                  sinp = form.find(attrs={"name":"s"}) @@ -73,6 +198,27 @@ class SerienjunkiesOrg(Plugin):                      links.append(self.handleFrame(frameUrl))                  return links +    def handleOldStyleLink(self, url): +        if not self.parent.core.isGUIConnected(): +            raise CaptchaError +        for i in range(3): +            sj = self.req.load(str(url)) +            soup = BeautifulSoup(sj) +            form = soup.find("form", attrs={"action":re.compile("^http://serienjunkies.org")}) +            captchaTag = form.find(attrs={"src":re.compile("^/safe/secure/")}) +            captchaUrl = "http://serienjunkies.org"+captchaTag["src"] +            captchaData = self.req.load(str(captchaUrl)) +            result = self.waitForCaptcha(captchaData, "png") +            url = form["action"] +            sinp = form.find(attrs={"name":"s"}) +             +            self.req.load(str(url), post={'s': sinp["value"], 'c': result, 'dl.start': "Download"}, cookies=False, just_header=True) +            decrypted = self.req.lastEffectiveURL +            if decrypted == str(url): +                continue +            return [decrypted] +        return False +          def handleFrame(self, url):          self.req.load(str(url), cookies=False, just_header=True)          return self.req.lastEffectiveURL @@ -80,9 +226,15 @@ class SerienjunkiesOrg(Plugin):      def proceed(self, url, location):          links = False          episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") +        oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$")          framePattern = re.compile("^http://download.serienjunkies.org/frame/go-.*?/$") +        seasonPattern = re.compile("^http://serienjunkies.org/\?p=.*?$")          if framePattern.match(url):              links = [self.handleFrame(url)]          elif episodePattern.match(url):              links = self.handleEpisode(url) +        elif oldStyleLink.match(url): +            links = self.handleOldStyleLink(url) +        elif seasonPattern.match(url): +            links = self.handleSeason(url)          self.links = links diff --git a/module/thread_list.py b/module/thread_list.py index ad0d0c8fb..d3eb4d203 100644 --- a/module/thread_list.py +++ b/module/thread_list.py @@ -110,15 +110,26 @@ class Thread_List(object):              if pyfile.plugin.props['type'] == "container":                  newLinks = 0                  if pyfile.plugin.links: -                    for link in pyfile.plugin.links: -                        newFile = self.list.collector.addLink(link) -                        self.list.packager.addFileToPackage(pyfile.package.data["id"], self.list.collector.popFile(newFile)) -                        newLinks += 1 -                    self.list.packager.pushPackage2Queue(pyfile.package.data["id"]) +                    if isinstance(pyfile.plugin.links, dict): +                        packmap = {} +                        for packname in pyfile.plugin.links.keys(): +                            packmap[packname] = self.list.packager.addNewPackage(packname) +                        for packname, links in pyfile.plugin.links.items(): +                            pid = packmap[packname] +                            for link in links: +                                newFile = self.list.collector.addLink(link) +                                self.list.packager.addFileToPackage(pid, self.list.collector.popFile(newFile)) +                                newLinks += 1 +                    else: +                        for link in pyfile.plugin.links: +                            newFile = self.list.collector.addLink(link) +                            self.list.packager.addFileToPackage(pyfile.package.data["id"], self.list.collector.popFile(newFile)) +                            newLinks += 1 +                        #self.list.packager.pushPackage2Queue(pyfile.package.data["id"])                  self.list.packager.removeFileFromPackage(pyfile.id, pyfile.package.data["id"])                  if newLinks: -                    self.parent.logger.info("Parsed link from %s: %i" % (pyfile.status.filename, newLinks)) +                    self.parent.logger.info("Parsed links from %s: %i" % (pyfile.status.filename, newLinks))                  else:                      self.parent.logger.info("No links in %s" % pyfile.status.filename)                  #~ self.list.packager.removeFileFromPackage(pyfile.id, pyfile.package.id) | 
