diff options
Diffstat (limited to 'module/plugins')
| -rw-r--r-- | module/plugins/internal/OCR.py | 89 | 
1 files changed, 47 insertions, 42 deletions
| diff --git a/module/plugins/internal/OCR.py b/module/plugins/internal/OCR.py index b4e28ca0f..8d080e436 100644 --- a/module/plugins/internal/OCR.py +++ b/module/plugins/internal/OCR.py @@ -14,13 +14,13 @@ import subprocess  # import tempfile  from module.plugins.internal.Plugin import Plugin -from module.plugins.internal.utils import fs_join +from module.plugins.internal.misc import encode, fsjoin  class OCR(Plugin):      __name__    = "OCR"      __type__    = "ocr" -    __version__ = "0.21" +    __version__ = "0.22"      __status__  = "stable"      __description__ = """OCR base plugin""" @@ -28,15 +28,15 @@ class OCR(Plugin):      __authors__     = [("pyLoad Team", "admin@pyload.org")] -    def __init__(self, plugin): -        self._init(plugin.pyload) -        self.plugin = plugin +    def __init__(self, pyfile): +        self._init(pyfile.m.core) +        self.pyfile = pyfile          self.init()      def _log(self, level, plugintype, pluginname, messages):          messages = (self.__name__,) + messages -        return self.plugin._log(level, plugintype, self.plugin.__name__, messages) +        return self.pyfile.plugin._log(level, plugintype, self.pyfile.plugin.__name__, messages)      def load_image(self, image): @@ -56,26 +56,33 @@ class OCR(Plugin):          self.image = self.image.point(lambda a: a * value + 10) -    def run(self, command): +    def call_cmd(self, command, *args, **kwargs):          """          Run a command          """ -        popen = subprocess.Popen(command, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE) +        call = [command] + args +        self.log_debug("EXECUTE " + " ".join(call)) + +        call = map(encode, call) +        popen = subprocess.Popen(call, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE)          popen.wait() +          output = popen.stdout.read() + " | " + popen.stderr.read() +          popen.stdout.close()          popen.stderr.close() +          self.log_debug("Tesseract ReturnCode %d" % popen.returncode, "Output: %s" % output)      def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True, pagesegmode=None):          # tmpTif = tempfile.NamedTemporaryFile(suffix=".tif")          try: -            tmpTif = open(fs_join("tmp", "tmpTif_%s.tif" % self.classname), "wb") +            tmpTif = open(fsjoin("tmp", "tmpTif_%s.tif" % self.classname), "wb")              tmpTif.close()              # tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") -            tmpTxt = open(fs_join("tmp", "tmpTxt_%s.txt" % self.classname), "wb") +            tmpTxt = open(fsjoin("tmp", "tmpTxt_%s.txt" % self.classname), "wb")              tmpTxt.close()          except IOError, e: @@ -86,18 +93,18 @@ class OCR(Plugin):          self.image.save(tmpTif.name, 'TIFF')          if os.name is "nt": -            tessparams = [os.path.join(pypath, "tesseract", "tesseract.exe")] +            command = os.path.join(pypath, "tesseract", "tesseract.exe")          else: -            tessparams = ["tesseract"] +            command = "tesseract" -        tessparams.extend([os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")]) +        args = [os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")]          if pagesegmode: -            tessparams.extend(["-psm", str(pagesegmode)]) +            args.extend(["-psm", str(pagesegmode)])          if subset and (digits or lowercase or uppercase):              # tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") -            with open(fs_join("tmp", "tmpSub_%s.subset" % self.classname), "wb") as tmpSub: +            with open(fsjoin("tmp", "tmpSub_%s.subset" % self.classname), "wb") as tmpSub:                  tmpSub.write("tessedit_char_whitelist ")                  if digits: @@ -108,11 +115,11 @@ class OCR(Plugin):                      tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ")                  tmpSub.write("\n") -                tessparams.append("nobatch") -                tessparams.append(os.path.abspath(tmpSub.name)) +                args.append("nobatch") +                args.append(os.path.abspath(tmpSub.name))          self.log_debug("Running tesseract...") -        self.run(tessparams) +        self.call_cmd(command, *args)          self.log_debug("Reading txt...")          try: @@ -123,14 +130,12 @@ class OCR(Plugin):              self.result_captcha = ""          self.log_info(_("OCR result: ") + self.result_captcha) -        try: -            os.remove(tmpTif.name) -            os.remove(tmpTxt.name) -            if subset and (digits or lowercase or uppercase): -                os.remove(tmpSub.name) -        except OSError, e: -            self.log_warning(e) +        self.remove(tmpTif.name, trash=False) +        self.remove(tmpTxt.name, trash=False) + +        if subset and (digits or lowercase or uppercase): +            self.remove(tmpSub.name, trash=False)      def recognize(self, name): @@ -162,34 +167,34 @@ class OCR(Plugin):          for x in xrange(w):              for y in xrange(h): -                if pixels[x, y] == 255: +                if pixels[x, y] is 255:                      continue                  #: No point in processing white pixels since we only want to remove black pixel                  count = 0                  try: -                    if pixels[x - 1, y - 1] != 255: +                    if pixels[x - 1, y - 1] is not 255:                          count += 1 -                    if pixels[x - 1, y] != 255: +                    if pixels[x - 1, y] is not 255:                          count += 1 -                    if pixels[x - 1, y + 1] != 255: +                    if pixels[x - 1, y + 1] is not 255:                          count += 1 -                    if pixels[x, y + 1] != 255: +                    if pixels[x, y + 1] is not 255:                          count += 1 -                    if pixels[x + 1, y + 1] != 255: +                    if pixels[x + 1, y + 1] is not 255:                          count += 1 -                    if pixels[x + 1, y] != 255: +                    if pixels[x + 1, y] is not 255:                          count += 1 -                    if pixels[x + 1, y - 1] != 255: +                    if pixels[x + 1, y - 1] is not 255:                          count += 1 -                    if pixels[x, y - 1] != 255: +                    if pixels[x, y - 1] is not 255:                          count += 1                  except Exception: @@ -203,7 +208,7 @@ class OCR(Plugin):          #: Second pass: this time set all 1's to 255 (white)          for x in xrange(w):              for y in xrange(h): -                if pixels[x, y] == 1: +                if pixels[x, y] is 1:                      pixels[x, y] = 255          self.pixels = pixels @@ -218,7 +223,7 @@ class OCR(Plugin):          for x in xrange(w):              for y in xrange(h): -                if pixels[x, y] == 0: +                if pixels[x, y] is 0:                      pixels[x, y] = 155          highest = {} @@ -234,7 +239,7 @@ class OCR(Plugin):              for x in xrange(w):                  for y in xrange(h): -                    if pixels[x, y] == 0: +                    if pixels[x, y] is 0:                          pixels[x, y] = 255              count = {} @@ -242,14 +247,14 @@ class OCR(Plugin):              for x in xrange(w):                  count[x] = 0                  for y in xrange(h): -                    if pixels[x, y] == 155: +                    if pixels[x, y] is 155:                          count[x] += 1              sum = 0              cnt = 0              for x in count.values(): -                if x != 0: +                if x is not 0:                      sum += x                      cnt += 1 @@ -275,10 +280,10 @@ class OCR(Plugin):          for x in xrange(w):              for y in xrange(h): -                if pixels[x, y] == 0: +                if pixels[x, y] is 0:                      pixels[x, y] = 255 -                if pixels[x, y] == 155: +                if pixels[x, y] is 155:                      pixels[x, y] = 0          self.pixels = pixels @@ -295,7 +300,7 @@ class OCR(Plugin):          for x in xrange(width):              black_pixel_in_col = False              for y in xrange(height): -                if pixels[x, y] != 255: +                if pixels[x, y] is not 255:                      if not started:                          started = True                          firstX = x | 
