diff options
Diffstat (limited to 'module/plugins/captcha')
| -rw-r--r-- | module/plugins/captcha/GigasizeCom.py | 11 | ||||
| -rw-r--r-- | module/plugins/captcha/LinksaveIn.py | 23 | ||||
| -rw-r--r-- | module/plugins/captcha/NetloadIn.py | 11 | ||||
| -rw-r--r-- | module/plugins/captcha/ShareonlineBiz.py | 11 | ||||
| -rw-r--r-- | module/plugins/captcha/captcha.py | 56 | 
5 files changed, 69 insertions, 43 deletions
| diff --git a/module/plugins/captcha/GigasizeCom.py b/module/plugins/captcha/GigasizeCom.py index add3ffc57..99f432d12 100644 --- a/module/plugins/captcha/GigasizeCom.py +++ b/module/plugins/captcha/GigasizeCom.py @@ -1,21 +1,22 @@  # -*- coding: utf-8 -*- -from module.plugins.captcha import OCR +from module.plugins.captcha.captcha import OCR  class GigasizeCom(OCR): -    __name__ = "GigasizeCom" -    __type__ = "ocr" +    __name__    = "GigasizeCom" +    __type__    = "ocr"      __version__ = "0.1"      __description__ = """Gigasize.com ocr plugin""" -    __author_name__ = "pyLoad Team" -    __author_mail__ = "admin@pyload.org" +    __license__     = "GPLv3" +    __authors__     = [("pyLoad Team", "admin@pyload.org")]      def __init__(self):          OCR.__init__(self) +      def get_captcha(self, image):          self.load_image(image)          self.threshold(2.8) diff --git a/module/plugins/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py index dd5ac7b98..41673d8a6 100644 --- a/module/plugins/captcha/LinksaveIn.py +++ b/module/plugins/captcha/LinksaveIn.py @@ -1,27 +1,32 @@  # -*- coding: utf-8 -*- -from PIL import Image +try: +    from PIL import Image +except ImportError: +    import Image +  from glob import glob  from os import sep  from os.path import abspath, dirname -from module.plugins.captcha import OCR +from module.plugins.captcha.captcha import OCR  class LinksaveIn(OCR): -    __name__ = "LinksaveIn" -    __type__ = "ocr" +    __name__    = "LinksaveIn" +    __type__    = "ocr"      __version__ = "0.1"      __description__ = """Linksave.in ocr plugin""" -    __author_name__ = "pyLoad Team" -    __author_mail__ = "admin@pyload.org" +    __license__     = "GPLv3" +    __authors__     = [("pyLoad Team", "admin@pyload.org")]      def __init__(self):          OCR.__init__(self)          self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep +      def load_image(self, image):          im = Image.open(image)          frame_nr = 0 @@ -49,6 +54,7 @@ class LinksaveIn(OCR):          self.pixels = self.image.load()          self.result_captcha = '' +      def get_bg(self):          stat = {}          cstat = {} @@ -79,12 +85,13 @@ class LinksaveIn(OCR):                          stat[bgpath] += 1          max_p = 0          bg = "" -        for bgpath, value in stat.items(): +        for bgpath, value in stat.iteritems():              if max_p < value:                  bg = bgpath                  max_p = value          return bg +      def substract_bg(self, bgpath):          bg = Image.open(bgpath)          img = self.image.convert("P") @@ -107,6 +114,7 @@ class LinksaveIn(OCR):                  if rgb_c == rgb_bg:                      orgpix[x, y] = (255,255,255) +      def eval_black_white(self):          new = Image.new("RGB", (140, 75))          pix = new.load() @@ -128,6 +136,7 @@ class LinksaveIn(OCR):          self.image = new          self.pixels = self.image.load() +      def get_captcha(self, image):          self.load_image(image)          bg = self.get_bg() diff --git a/module/plugins/captcha/NetloadIn.py b/module/plugins/captcha/NetloadIn.py index cb6cb9264..fc8eecf59 100644 --- a/module/plugins/captcha/NetloadIn.py +++ b/module/plugins/captcha/NetloadIn.py @@ -1,21 +1,22 @@  # -*- coding: utf-8 -*- -from module.plugins.captcha import OCR +from module.plugins.captcha.captcha import OCR  class NetloadIn(OCR): -    __name__ = "NetloadIn" -    __type__ = "ocr" +    __name__    = "NetloadIn" +    __type__    = "ocr"      __version__ = "0.1"      __description__ = """Netload.in ocr plugin""" -    __author_name__ = "pyLoad Team" -    __author_mail__ = "admin@pyload.org" +    __license__     = "GPLv3" +    __authors__     = [("pyLoad Team", "admin@pyload.org")]      def __init__(self):          OCR.__init__(self) +      def get_captcha(self, image):          self.load_image(image)          self.to_greyscale() diff --git a/module/plugins/captcha/ShareonlineBiz.py b/module/plugins/captcha/ShareonlineBiz.py index aab4e9da0..6e513941d 100644 --- a/module/plugins/captcha/ShareonlineBiz.py +++ b/module/plugins/captcha/ShareonlineBiz.py @@ -1,21 +1,22 @@  # -*- coding: utf-8 -*- -from module.plugins.captcha import OCR +from module.plugins.captcha.captcha import OCR  class ShareonlineBiz(OCR): -    __name__ = "ShareonlineBiz" -    __type__ = "ocr" +    __name__    = "ShareonlineBiz" +    __type__    = "ocr"      __version__ = "0.1"      __description__ = """Shareonline.biz ocr plugin""" -    __author_name__ = "RaNaN" -    __author_mail__ = "RaNaN@pyload.org" +    __license__     = "GPLv3" +    __authors__     = [("RaNaN", "RaNaN@pyload.org")]      def __init__(self):          OCR.__init__(self) +      def get_captcha(self, image):          self.load_image(image)          self.to_greyscale() diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index cc07f50cf..b67ce9b9e 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -2,11 +2,11 @@  from __future__ import with_statement -import GifImagePlugin -import Image -import JpegImagePlugin -import PngImagePlugin -import TiffImagePlugin +try: +    from PIL import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin +except ImportError: +    import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin +  import logging  import os  import subprocess @@ -16,30 +16,34 @@ from os.path import abspath, join  class OCR(object): -    __name__ = "OCR" -    __type__ = "ocr" +    __name__    = "OCR" +    __type__    = "ocr"      __version__ = "0.1"      __description__ = """OCR base plugin""" -    __author_name__ = "pyLoad Team" -    __author_mail__ = "admin@pyload.org" +    __license__     = "GPLv3" +    __authors__     = [("pyLoad Team", "admin@pyload.org")]      def __init__(self):          self.logger = logging.getLogger("log") +      def load_image(self, image):          self.image = Image.open(image)          self.pixels = self.image.load()          self.result_captcha = '' +      def unload(self):          """delete all tmp images"""          pass +      def threshold(self, value):          self.image = self.image.point(lambda a: a * value + 10) +      def run(self, command):          """Run a command""" @@ -50,29 +54,32 @@ class OCR(object):          popen.stderr.close()          self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output)) +      def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True): -        #self.logger.debug("create tmp tif") +        #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") +        try: +            tmpTif = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") +            tmpTif.close() + +            #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") +            tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") +            tmpTxt.close() -        #tmp = tempfile.NamedTemporaryFile(suffix=".tif") -        tmp = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") -        tmp.close() -        #self.logger.debug("create tmp txt") -        #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") -        tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") -        tmpTxt.close() +        except IOError, e: +            self.logError(e) +            return          self.logger.debug("save tiff") -        self.image.save(tmp.name, 'TIFF') +        self.image.save(tmpTif.name, 'TIFF')          if os.name == "nt":              tessparams = [join(pypath, "tesseract", "tesseract.exe")]          else:              tessparams = ["tesseract"] -        tessparams.extend( [abspath(tmp.name), abspath(tmpTxt.name).replace(".txt", "")] ) +        tessparams.extend( [abspath(tmpTif.name), abspath(tmpTxt.name).replace(".txt", "")] )          if subset and (digits or lowercase or uppercase): -            #self.logger.debug("create temp subset config")              #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset")              tmpSub = open(join("tmp", "tmpSub_%s.subset" % self.__name__), "wb")              tmpSub.write("tessedit_char_whitelist ") @@ -99,22 +106,25 @@ class OCR(object):          self.logger.debug(self.result_captcha)          try: -            os.remove(tmp.name) +            os.remove(tmpTif.name)              os.remove(tmpTxt.name)              if subset and (digits or lowercase or uppercase):                  os.remove(tmpSub.name)          except:              pass +      def get_captcha(self, name):          raise NotImplementedError +      def to_greyscale(self):          if self.image.mode != 'L':              self.image = self.image.convert('L')          self.pixels = self.image.load() +      def eval_black_white(self, limit):          self.pixels = self.image.load()          w, h = self.image.size @@ -125,6 +135,7 @@ class OCR(object):                  else:                      self.pixels[x, y] = 0 +      def clean(self, allowed):          pixels = self.pixels @@ -170,6 +181,7 @@ class OCR(object):          self.pixels = pixels +      def derotate_by_average(self):          """rotate by checking each angle and guess most suitable""" @@ -244,6 +256,7 @@ class OCR(object):          self.pixels = pixels +      def split_captcha_letters(self):          captcha = self.image          started = False @@ -283,6 +296,7 @@ class OCR(object):          return letters +      def correct(self, values, var=None):          if var:              result = var | 
