diff options
Diffstat (limited to 'module/plugins')
| -rw-r--r-- | module/plugins/hooks/ExtractArchive.py | 168 | ||||
| -rw-r--r-- | module/plugins/internal/UnRar.py | 179 | ||||
| -rw-r--r-- | module/plugins/internal/UnZip.py | 64 | 
3 files changed, 272 insertions, 139 deletions
| diff --git a/module/plugins/hooks/ExtractArchive.py b/module/plugins/hooks/ExtractArchive.py index ddec8319b..16942bef0 100644 --- a/module/plugins/hooks/ExtractArchive.py +++ b/module/plugins/hooks/ExtractArchive.py @@ -51,32 +51,33 @@ if os.name != "nt":      from pwd import getpwnam  from module.plugins.Hook import Hook, threaded, Expose -from module.plugins.internal.AbstractExtractor import ArchiveError, CRCError, WrongPassword -from module.utils import save_join, fs_encode +from module.plugins.internal.AbstractExtractor import ArchiveError, CRCError, PasswordError +from module.utils import save_join, uniqify  class ExtractArchive(Hook):      __name__    = "ExtractArchive"      __type__    = "hook" -    __version__ = "0.20" - -    __config__ = [("activated", "bool", "Activated", True), -                  ("fullpath", "bool", "Extract full path", True), -                  ("overwrite", "bool", "Overwrite files", True), -                  ("passwordfile", "file", "password file", "archive_password.txt"), -                  ("deletearchive", "bool", "Delete archives when done", False), -                  ("subfolder", "bool", "Create subfolder for each package", False), -                  ("destination", "folder", "Extract files to", ""), -                  ("excludefiles", "str", "Exclude files from unpacking (seperated by ;)", ""), -                  ("recursive", "bool", "Extract archives in archvies", True), -                  ("queue", "bool", "Wait for all downloads to be finished", True), -                  ("renice", "int", "CPU Priority", 0)] +    __version__ = "1.00" + +    __config__ = [("activated"    , "bool"  , "Activated"                                 , True                                                                     ), +                  ("fullpath"     , "bool"  , "Extract full path"                         , True                                                                     ), +                  ("overwrite"    , "bool"  , "Overwrite files"                           , False                                                                    ), +                  ("keepbroken"   , "bool"  , "Extract broken archives"                   , False                                                                    ), +                  ("repair"       , "bool"  , "Repair broken archives"                    , True                                                                     ), +                  ("passwordfile" , "file"  , "Store passwords in file"                   , "archive_password.txt"                                                   ), +                  ("delete"       , "bool"  , "Delete archive when successfully extracted", False                                                                    ), +                  ("subfolder"    , "bool"  , "Create subfolder for each package"         , False                                                                    ), +                  ("destination"  , "folder", "Extract files to"                          , ""                                                                       ), +                  ("extensions"   , "str"   , "Extract the following extensions"          , "7z,bz2,bzip2,gz,gzip,lha,lzh,lzma,rar,tar,taz,tbz,tbz2,tgz,xar,xz,z,zip"), +                  ("excludefiles" , "str"   , "Don't extract the following files"         , "*.nfo,*.DS_Store,index.dat,thumb.db"                                    ), +                  ("recursive"    , "bool"  , "Extract archives in archives"              , True                                                                     ), +                  ("queue"        , "bool"  , "Wait for all downloads to be finished"     , True                                                                     ), +                  ("renice"       , "int"   , "CPU Priority"                              , 0                                                                        )]      __description__ = """Extract different kind of archives"""      __license__     = "GPLv3" -    __authors__     = [("RaNaN", "ranan@pyload.org"), -                       ("AndroKev", None), -                       ("Walter Purcaro", "vuolter@gmail.com")] +    __authors__     = [("Walter Purcaro", "vuolter@gmail.com")]      event_list = ["allDownloadsProcessed"] @@ -92,7 +93,7 @@ class ExtractArchive(Hook):          self.passwords = []          names = [] -        for p in ("UnRar", "UnZip"): +        for p in ("UnRar", "SevenZip", "UnZip"):              try:                  module = self.core.pluginManager.loadModule("internal", p)                  klass = getattr(module, p) @@ -154,13 +155,21 @@ class ExtractArchive(Hook):          extracted = []          failed    = [] +        clearlist = lambda string: [x.lstrip('.') for x in string.replace(' ', '').replace(',', '|').replace(';', '|').split('|')] +          destination  = self.getConfig("destination")          subfolder    = self.getConfig("subfolder")          fullpath     = self.getConfig("fullpath")          overwrite    = self.getConfig("overwrite") -        excludefiles = self.getConfig("excludefiles") +        extensions   = clearlist(self.getConfig("extensions")) +        excludefiles = clearlist(self.getConfig("excludefiles"))          renice       = self.getConfig("renice")          recursive    = self.getConfig("recursive") +        delete       = self.getConfig("delete") +        keepbroken   = self.getConfig("keepbroken") + +        if extensions: +            self.logDebug("Extensions allowed: %s" % "|.".join(extensions))          # reload from txt file          self.reloadPasswords() @@ -171,7 +180,7 @@ class ExtractArchive(Hook):          #iterate packages -> plugins -> targets          for pid in ids:              p = self.core.files.getPackage(pid) -            self.logInfo(_("Check package %s") % p.name) +            self.logInfo(_("Check package: %s") % p.name)              if not p:                  continue @@ -179,21 +188,25 @@ class ExtractArchive(Hook):              out = save_join(dl, p.folder, destination, "")  #: force trailing slash              if subfolder: -                out = save_join(out, fs_encode(p.folder)) +                out = save_join(out, p.folder)              if not exists(out):                  makedirs(out)              files_ids = [(save_join(dl, p.folder, x['name']), x['id']) for x in p.getChildren().itervalues()] -            matched = False -            success = True +            matched   = False +            success   = True              # check as long there are unseen files              while files_ids:                  new_files_ids = [] +                if extensions: +                    files_ids = [(file, id) for file, id in files_ids if filter(lambda ext: file.endswith(ext), extensions)] +                  for plugin in self.plugins:                      targets = plugin.getTargets(files_ids) +                      if targets:                          self.logDebug("Targets for %s: %s" % (plugin.__name__, targets))                          matched = True @@ -205,19 +218,31 @@ class ExtractArchive(Hook):                          processed.append(target)  # prevent extracting same file twice -                        self.logInfo(basename(target), _("Extract to %s") % out) +                        self.logInfo(basename(target), _("Extract to: %s") % out)                          try: -                            klass = plugin(self, target, out, fullpath, overwrite, excludefiles, renice) +                            klass = plugin(self, +                                           target, +                                           out, +                                           p.password, +                                           fullpath, +                                           overwrite, +                                           excludefiles, +                                           renice, +                                           delete, +                                           keepbroken)                              klass.init() -                            new_files = self._extract(klass, fid, [p.password.strip()], thread) +                            new_files = self._extract(klass, fid, thread)                          except Exception, e:                              self.logError(basename(target), e) +                            new_files = None + +                        if new_files is None:                              success = False                              continue -                        self.logDebug("Extracted", new_files) +                        self.logDebug("Extracted files: %s" % new_files)                          self.setPermissions(new_files)                          for file in new_files: @@ -242,43 +267,78 @@ class ExtractArchive(Hook):          return True if not failed else False -    def _extract(self, plugin, fid, passwords, thread): +    def _extract(self, plugin, fid, thread):          pyfile = self.core.files.getFile(fid) -        deletearchive = self.getConfig("deletearchive")          pyfile.setCustomStatus(_("extracting"))          thread.addActive(pyfile)  # keep this file until everything is done          try: -            progress = lambda x: pyfile.setProgress(x) -            success = False +            progress  = lambda x: pyfile.setProgress(x) +            encrypted = False +            passwords = self.getPasswords() + +            try: +                self.logInfo(basename(plugin.file), "Verifying...") + +                tmp_password    = plugin.password +                plugin.password = ""  #: Force verifying without password + +                plugin.verify() + +            except PasswordError: +                encrypted = True + +            except CRCError: +                self.logWarning(basename(plugin.file), _("Archive damaged")) + +                if not self.getConfig("repair"): +                    raise CRCError + +                elif plugin.repair(): +                    self.logInfo(basename(plugin.file), _("Successfully repaired")) + +                elif not self.getConfig("keepbroken"): +                    raise ArchiveError(_("Broken archive")) + +                else: +                    self.logInfo(basename(plugin.file), _("All OK")) + +            plugin.password = tmp_password + +            if not encrypted: +                plugin.extract(progress) -            if not plugin.checkArchive(): -                plugin.extract(progress, pw) -                success = True              else:                  self.logInfo(basename(plugin.file), _("Password protected")) -                self.logDebug("Passwords: %s" % passwords if passwords else "No password provided") -                for pw in set(passwords) | set(self.getPasswords()): +                if plugin.password: +                    passwords.insert(0, plugin.password) +                    passwords = uniqify(self.passwords) +                    self.logDebug("Password: %s" % plugin.password) +                else: +                    self.logDebug("No package password provided") + +                for pw in passwords:                      try:                          self.logDebug("Try password: %s" % pw) -                        if plugin.checkPassword(pw): -                            plugin.extract(progress, pw) + +                        if plugin.setPassword(pw): +                            plugin.extract(progress)                              self.addPassword(pw) -                            success = True                              break +                        else: +                            raise PasswordError -                    except WrongPassword: +                    except PasswordError:                          self.logDebug("Password was wrong") - -            if not success: -                raise Exception(_("Wrong password")) +                else: +                    raise PasswordError              if self.core.debug: -                self.logDebug("Would delete", ", ".join(plugin.getDeleteFiles())) +                self.logDebug("Would delete: %s" % ", ".join(plugin.getDeleteFiles())) -            if deletearchive: +            if self.getConfig("delete"):                  files = plugin.getDeleteFiles()                  self.logInfo(_("Deleting %s files") % len(files))                  for f in files: @@ -294,12 +354,16 @@ class ExtractArchive(Hook):              return extracted_files -        except ArchiveError, e: -            self.logError(basename(plugin.file), _("Archive Error"), e) +        except PasswordError: +            self.logError(basename(plugin.file), _("Wrong password" if passwords else "No password found")) +            plugin.password = ""          except CRCError:              self.logError(basename(plugin.file), _("CRC Mismatch")) +        except ArchiveError, e: +            self.logError(basename(plugin.file), _("Archive Error"), e) +          except Exception, e:              if self.core.debug:                  print_exc() @@ -307,7 +371,7 @@ class ExtractArchive(Hook):          self.manager.dispatchEvent("archive_extract_failed", pyfile) -        raise Exception(_("Extract failed")) +        self.logError(basename(plugin.file), _("Extract failed"))      @Expose @@ -337,15 +401,13 @@ class ExtractArchive(Hook):          """  Adds a password to saved list"""          passwordfile = self.getConfig("passwordfile") -        if pw in self.passwords: -            self.passwords.remove(pw) -          self.passwords.insert(0, pw) +        self.passwords = uniqify(self.passwords)          try:              with open(passwordfile, "wb") as f:                  for pw in self.passwords: -                    f.write(pw + "\n") +                    f.write(pw + '\n')          except IOError, e:              self.logError(e) diff --git a/module/plugins/internal/UnRar.py b/module/plugins/internal/UnRar.py index 4bbd2042c..5633b31f7 100644 --- a/module/plugins/internal/UnRar.py +++ b/module/plugins/internal/UnRar.py @@ -4,11 +4,11 @@ import os  import re  from glob import glob -from os.path import basename, join +from os.path import basename, dirname, join  from string import digits  from subprocess import Popen, PIPE -from module.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError, CRCError +from module.plugins.internal.AbstractExtractor import AbtractExtractor, PasswordError, ArchiveError, CRCError  from module.utils import save_join, decode @@ -22,21 +22,25 @@ def renice(pid, value):  class UnRar(AbtractExtractor):      __name__    = "UnRar" -    __version__ = "0.21" +    __version__ = "1.00"      __description__ = """Rar extractor plugin"""      __license__     = "GPLv3" -    __authors__     = [("RaNaN", "RaNaN@pyload.org"), -                       ("Walter Purcaro", "vuolter@gmail.com")] +    __authors__     = [("Walter Purcaro", "vuolter@gmail.com")]      CMD = "unrar" +    EXTENSIONS = ["rar", "zip", "cab", "arj", "lzh", "tar", "gz", "bz2", "ace", "uue", "jar", "iso", "7z", "xz", "z"] + +      #@NOTE: there are some more uncovered rar formats -    re_splitfile = re.compile(r'(.*)\.part(\d+)\.rar$', re.I) -    re_partfiles = re.compile(r'.*\.(rar|r\d+)', re.I) +    re_rarpart = re.compile(r'(.*)\.part(\d+)\.rar$', re.I) +    re_rarfile = re.compile(r'.*\.(rar|r\d+)$', re.I) +      re_filelist  = re.compile(r'(.+)\s+(\d+)\s+(\d+)\s+|(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)') -    re_wrongpwd  = re.compile(r'(Corrupt file or wrong password|password incorrect)', re.I) +    re_wrongpwd  = re.compile(r'password', re.I) +    re_wrongcrc  = re.compile(r'encrypted|damaged|CRC failed|checksum error', re.I)      @classmethod @@ -60,69 +64,99 @@ class UnRar(AbtractExtractor):      @classmethod +    def isArchive(cls, file): +        f = basename(file).lower() +        return any(f.endswith('.%s' % ext) for ext in cls.EXTENSIONS) + + +    @classmethod      def getTargets(cls, files_ids): -        result = [] +        targets = []          for file, id in files_ids: -            if not file.endswith(".rar"): +            if not cls.isArchive(file):                  continue -            match = cls.re_splitfile.findall(file) -            if match: +            m = cls.re_rarpart.findall(file) +            if m:                  # only add first parts -                if int(match[0][1]) == 1: -                    result.append((file, id)) +                if int(m[0][1]) == 1: +                    targets.append((file, id))              else: -                result.append((file, id)) +                targets.append((file, id)) -        return result +        return targets -    def init(self): -        self.passwordProtected = False -        self.headerProtected   = False  #: list files will not work without password -        self.password          = ""  #: save the correct password +    def check(self, out="", err=""): +        if not out or not err: +            return +        if err.strip(): +            if self.re_wrongpwd.search(err): +                raise PasswordError -    def checkArchive(self): -        p = self.call_unrar("l", "-v", self.file) -        out, err = p.communicate() -        if self.re_wrongpwd.search(err): -            self.passwordProtected = True -            self.headerProtected   = True -            return True +            elif self.re_wrongcrc.search(err): +                raise CRCError + +            else:  #: raise error if anything is on stderr +                raise ArchiveError(err.strip())          # output only used to check if passworded files are present          for attr in self.re_filelist.findall(out):              if attr[0].startswith("*"): -                self.passwordProtected = True -                return True +                raise PasswordError + -        self.listContent() -        if not self.files: -            raise ArchiveError("Empty Archive") +    def verify(self): +        p = self.call_cmd("l", "-v", self.file, password=self.password) + +        self.check(*p.communicate()) + +        if p and p.returncode: +            raise ArchiveError("Process terminated") + +        if not self.list(): +            raise ArchiveError("Empty archive") + + +    def isPassword(self, password): +        if isinstance(password, basestring): +            p = self.call_cmd("l", "-v", self.file, password=password) +            out, err = p.communicate() + +            if not self.re_wrongpwd.search(err): +                return True          return False -    def checkPassword(self, password): -        # at this point we can only verify header protected files -        if self.headerProtected: -            p = self.call_unrar("l", "-v", self.file, password=password) +    def repair(self): +        p = self.call_cmd("rc", self.file) +        out, err = p.communicate() + +        if p.returncode or err.strip(): +            p = self.call_cmd("r", self.file)              out, err = p.communicate() -            if self.re_wrongpwd.search(err): + +            if p.returncode or err.strip():                  return False +            else: +                self.file = join(dirname(self.file), re.search(r'(fixed|rebuild)\.%s' % basename(self.file), out).group(0))          return True -    def extract(self, progress, password=""): +    def extract(self, progress=lambda x: None): +        self.verify() + +        progress(0) +          command = "x" if self.fullpath else "e" -        p = self.call_unrar(command, self.file, self.out, password=password) -        renice(p.pid, self.renice) +        p = self.call_cmd(command, self.file, self.out, password=self.password) -        progress(0) +        renice(p.pid, self.renice)          progressstring = ""          while True: @@ -131,7 +165,7 @@ class UnRar(AbtractExtractor):              if not c:                  break              # reading a percentage sign -> set progress and restart -            if c == '%': +            if c is '%':                  progress(int(progressstring))                  progressstring = ""              # not reading a digit -> therefore restart @@ -139,46 +173,43 @@ class UnRar(AbtractExtractor):                  progressstring = ""              # add digit to progressstring              else: -                progressstring = progressstring + c +                progressstring += c          progress(100) -        # retrieve stderr -        err = p.stderr.read() +        self.files = self.list() -        if "CRC failed" in err and not password and not self.passwordProtected: -            raise CRCError -        elif "CRC failed" in err: -            raise WrongPassword +        # retrieve stderr +        self.check(err=p.stderr.read()) -        if err.strip():  #: raise error if anything is on stderr -            raise ArchiveError(err.strip())          if p.returncode:              raise ArchiveError("Process terminated") -        if not self.files: -            self.password = password -            self.listContent() -      def getDeleteFiles(self):          if ".part" in basename(self.file):              return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.I)) +          # get files which matches .r* and filter unsuited files out          parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.I)) -        return filter(lambda x: self.re_partfiles.match(x), parts) +        return filter(lambda x: self.re_rarfile.match(x), parts) -    def listContent(self): + +    def list(self):          command = "vb" if self.fullpath else "lb" -        p = self.call_unrar(command, "-v", self.file, password=self.password) + +        p = self.call_cmd(command, "-v", self.file, password=self.password)          out, err = p.communicate() -        if "Cannot open" in err: -            raise ArchiveError("Cannot open file") +        if err.strip(): +            self.m.logError(err) +            if "Cannot open" in err: +                return list() -        if err.strip():  #: only log error at this point -            self.m.logError(err.strip()) +        if p.returncode: +            self.m.logError("Process terminated") +            return list()          result = set() @@ -186,17 +217,22 @@ class UnRar(AbtractExtractor):              f = f.strip()              result.add(save_join(self.out, f)) -        self.files = result +        return list(result) -    def call_unrar(self, command, *xargs, **kwargs): +    def call_cmd(self, command, *xargs, **kwargs):          args = [] +          # overwrite flag -        args.append("-o+") if self.overwrite else args.append("-o-") +        if self.overwrite: +            args.append("-o+") +        else: +            args.append("-o-") +            if self.delete: +                args.append("-or") -        if self.excludefiles: -            for word in self.excludefiles.split(';'): -                args.append("-x%s" % word) +        for word in self.excludefiles: +            args.append("-x%s" % word.strip())          # assume yes on all queries          args.append("-y") @@ -207,10 +243,11 @@ class UnRar(AbtractExtractor):          else:              args.append("-p-") +        if self.keepbroken: +            args.append("-kb") +          # NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue          call = [self.CMD, command] + args + list(xargs)          self.m.logDebug(" ".join(call)) -        p = Popen(call, stdout=PIPE, stderr=PIPE) - -        return p +        return Popen(call, stdout=PIPE, stderr=PIPE) diff --git a/module/plugins/internal/UnZip.py b/module/plugins/internal/UnZip.py index 81c298784..b3d54cba0 100644 --- a/module/plugins/internal/UnZip.py +++ b/module/plugins/internal/UnZip.py @@ -1,19 +1,23 @@  # -*- coding: utf-8 -*- +from __future__ import with_statement +  import sys  import zipfile -from module.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError +from module.plugins.internal.AbstractExtractor import AbtractExtractor, PasswordError, ArchiveError, CRCError  class UnZip(AbtractExtractor):      __name__    = "UnZip" -    __version__ = "0.12" +    __version__ = "1.00"      __description__ = """Zip extractor plugin"""      __license__     = "GPLv3" -    __authors__     = [("RaNaN", "RaNaN@pyload.org"), -                       ("Walter Purcaro", "vuolter@gmail.com")] +    __authors__     = [("Walter Purcaro", "vuolter@gmail.com")] + + +    EXTENSIONS = ["zip", "zip64"]      @classmethod @@ -22,31 +26,61 @@ class UnZip(AbtractExtractor):      @classmethod -    def getTargets(cls, files_ids): -        result = [] +    def isArchive(cls, file): +        return zipfile.is_zipfile(file) -        for file, id in files_ids: -            if file.endswith(".zip"): -                result.append((file, id)) -        return result +    def verify(self): +        try: +            with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: +                z.setpassword(self.password) +                badcrc = z.testzip() +        except (BadZipfile, LargeZipFile), e: +            raise ArchiveError(e) -    def extract(self, progress, password=""): +        except RuntimeError, e: +            if 'encrypted' in e: +                raise PasswordError +            else: +                raise ArchiveError(e) + +        else: +            if badcrc: +                raise CRCError + +        if not self.list(): +            raise ArchiveError("Empty archive") + + +    def list(self):          try: -            z = zipfile.ZipFile(self.file) -            self.files = z.namelist() -            z.extractall(self.out, pwd=password) +            with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: +                z.setpassword(self.password) +                return z.namelist() +        except Exception: +            return list() + + +    def extract(self, progress=lambda x: None): +        try: +            with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: +                progress(0) +                z.extractall(self.out, pwd=self.password) +                progress(100)          except (BadZipfile, LargeZipFile), e:              raise ArchiveError(e)          except RuntimeError, e:              if e is "Bad password for file": -                raise WrongPassword +                raise PasswordError              else:                  raise ArchiveError(e) +        finally: +            self.files = self.list() +      def getDeleteFiles(self):          return [self.file] | 
