diff options
Diffstat (limited to 'module/plugins/captcha')
| -rw-r--r-- | module/plugins/captcha/GigasizeCom.py | 24 | ||||
| -rw-r--r-- | module/plugins/captcha/LinksaveIn.py | 158 | ||||
| -rw-r--r-- | module/plugins/captcha/NetloadIn.py | 29 | ||||
| -rw-r--r-- | module/plugins/captcha/ShareonlineBiz.py | 39 | ||||
| -rw-r--r-- | module/plugins/captcha/__init__.py | 0 | ||||
| -rw-r--r-- | module/plugins/captcha/captcha.py | 319 | 
6 files changed, 0 insertions, 569 deletions
diff --git a/module/plugins/captcha/GigasizeCom.py b/module/plugins/captcha/GigasizeCom.py deleted file mode 100644 index 244cf6a2a..000000000 --- a/module/plugins/captcha/GigasizeCom.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- - -from module.plugins.captcha.captcha import OCR - - -class GigasizeCom(OCR): -    __name__    = "GigasizeCom" -    __type__    = "ocr" -    __version__ = "0.10" - -    __description__ = """Gigasize.com ocr plugin""" -    __license__     = "GPLv3" -    __authors__     = [("pyLoad Team", "admin@pyload.org")] - - -    def __init__(self): -        OCR.__init__(self) - - -    def get_captcha(self, image): -        self.load_image(image) -        self.threshold(2.8) -        self.run_tesser(True, False, False, True) -        return self.result_captcha diff --git a/module/plugins/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py deleted file mode 100644 index de6b0e7ff..000000000 --- a/module/plugins/captcha/LinksaveIn.py +++ /dev/null @@ -1,158 +0,0 @@ -# -*- coding: utf-8 -*- - -try: -    from PIL import Image -except ImportError: -    import Image - -from glob import glob -from os import sep -from os.path import abspath, dirname - -from module.plugins.captcha.captcha import OCR - - -class LinksaveIn(OCR): -    __name__    = "LinksaveIn" -    __type__    = "ocr" -    __version__ = "0.10" - -    __description__ = """Linksave.in ocr plugin""" -    __license__     = "GPLv3" -    __authors__     = [("pyLoad Team", "admin@pyload.org")] - - -    def __init__(self): -        OCR.__init__(self) -        self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep - - -    def load_image(self, image): -        im = Image.open(image) -        frame_nr = 0 - -        lut = im.resize((256, 1)) -        lut.putdata(range(256)) -        lut = list(lut.convert("RGB").getdata()) - -        new = Image.new("RGB", im.size) -        npix = new.load() -        while True: -            try: -                im.seek(frame_nr) -            except EOFError: -                break -            frame = im.copy() -            pix = frame.load() -            for x in xrange(frame.size[0]): -                for y in xrange(frame.size[1]): -                    if lut[pix[x, y]] != (0,0,0): -                        npix[x, y] = lut[pix[x, y]] -            frame_nr += 1 -        new.save(self.data_dir+"unblacked.png") -        self.image = new.copy() -        self.pixels = self.image.load() -        self.result_captcha = '' - - -    def get_bg(self): -        stat = {} -        cstat = {} -        img = self.image.convert("P") -        for bgpath in glob(self.data_dir+"bg/*.gif"): -            stat[bgpath] = 0 -            bg = Image.open(bgpath) - -            bglut = bg.resize((256, 1)) -            bglut.putdata(range(256)) -            bglut = list(bglut.convert("RGB").getdata()) - -            lut = img.resize((256, 1)) -            lut.putdata(range(256)) -            lut = list(lut.convert("RGB").getdata()) - -            bgpix = bg.load() -            pix = img.load() -            for x in xrange(bg.size[0]): -                for y in xrange(bg.size[1]): -                    rgb_bg = bglut[bgpix[x, y]] -                    rgb_c = lut[pix[x, y]] -                    try: -                        cstat[rgb_c] += 1 -                    except Exception: -                        cstat[rgb_c] = 1 -                    if rgb_bg == rgb_c: -                        stat[bgpath] += 1 -        max_p = 0 -        bg = "" -        for bgpath, value in stat.iteritems(): -            if max_p < value: -                bg = bgpath -                max_p = value -        return bg - - -    def substract_bg(self, bgpath): -        bg = Image.open(bgpath) -        img = self.image.convert("P") - -        bglut = bg.resize((256, 1)) -        bglut.putdata(range(256)) -        bglut = list(bglut.convert("RGB").getdata()) - -        lut = img.resize((256, 1)) -        lut.putdata(range(256)) -        lut = list(lut.convert("RGB").getdata()) - -        bgpix = bg.load() -        pix = img.load() -        orgpix = self.image.load() -        for x in xrange(bg.size[0]): -            for y in xrange(bg.size[1]): -                rgb_bg = bglut[bgpix[x, y]] -                rgb_c = lut[pix[x, y]] -                if rgb_c == rgb_bg: -                    orgpix[x, y] = (255,255,255) - - -    def eval_black_white(self): -        new = Image.new("RGB", (140, 75)) -        pix = new.load() -        orgpix = self.image.load() -        thresh = 4 -        for x in xrange(new.size[0]): -            for y in xrange(new.size[1]): -                rgb = orgpix[x, y] -                r, g, b = rgb -                pix[x, y] = (255,255,255) -                if r > max(b, g)+thresh: -                    pix[x, y] = (0,0,0) -                if g < min(r, b): -                    pix[x, y] = (0,0,0) -                if g > max(r, b)+thresh: -                    pix[x, y] = (0,0,0) -                if b > max(r, g)+thresh: -                    pix[x, y] = (0,0,0) -        self.image = new -        self.pixels = self.image.load() - - -    def get_captcha(self, image): -        self.load_image(image) -        bg = self.get_bg() -        self.substract_bg(bg) -        self.eval_black_white() -        self.to_greyscale() -        self.image.save(self.data_dir+"cleaned_pass1.png") -        self.clean(4) -        self.clean(4) -        self.image.save(self.data_dir+"cleaned_pass2.png") -        letters = self.split_captcha_letters() -        final = "" -        for n, letter in enumerate(letters): -            self.image = letter -            self.image.save(ocr.data_dir+"letter%d.png" % n) -            self.run_tesser(True, True, False, False) -            final += self.result_captcha - -        return final diff --git a/module/plugins/captcha/NetloadIn.py b/module/plugins/captcha/NetloadIn.py deleted file mode 100644 index 28eb18fb5..000000000 --- a/module/plugins/captcha/NetloadIn.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- - -from module.plugins.captcha.captcha import OCR - - -class NetloadIn(OCR): -    __name__    = "NetloadIn" -    __type__    = "ocr" -    __version__ = "0.10" - -    __description__ = """Netload.in ocr plugin""" -    __license__     = "GPLv3" -    __authors__     = [("pyLoad Team", "admin@pyload.org")] - - -    def __init__(self): -        OCR.__init__(self) - - -    def get_captcha(self, image): -        self.load_image(image) -        self.to_greyscale() -        self.clean(3) -        self.clean(3) -        self.run_tesser(True, True, False, False) - -        self.result_captcha = self.result_captcha.replace(" ", "")[:4] # cut to 4 numbers - -        return self.result_captcha diff --git a/module/plugins/captcha/ShareonlineBiz.py b/module/plugins/captcha/ShareonlineBiz.py deleted file mode 100644 index 8210e8859..000000000 --- a/module/plugins/captcha/ShareonlineBiz.py +++ /dev/null @@ -1,39 +0,0 @@ -# -*- coding: utf-8 -*- - -from module.plugins.captcha.captcha import OCR - - -class ShareonlineBiz(OCR): -    __name__    = "ShareonlineBiz" -    __type__    = "ocr" -    __version__ = "0.10" - -    __description__ = """Shareonline.biz ocr plugin""" -    __license__     = "GPLv3" -    __authors__     = [("RaNaN", "RaNaN@pyload.org")] - - -    def __init__(self): -        OCR.__init__(self) - - -    def get_captcha(self, image): -        self.load_image(image) -        self.to_greyscale() -        self.image = self.image.resize((160, 50)) -        self.pixels = self.image.load() -        self.threshold(1.85) -        #self.eval_black_white(240) -        #self.derotate_by_average() - -        letters = self.split_captcha_letters() - -        final = "" -        for letter in letters: -            self.image = letter -            self.run_tesser(True, True, False, False) -            final += self.result_captcha - -        return final - -        #tesseract at 60% diff --git a/module/plugins/captcha/__init__.py b/module/plugins/captcha/__init__.py deleted file mode 100644 index e69de29bb..000000000 --- a/module/plugins/captcha/__init__.py +++ /dev/null diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py deleted file mode 100644 index 1874ba07d..000000000 --- a/module/plugins/captcha/captcha.py +++ /dev/null @@ -1,319 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import with_statement - -try: -    from PIL import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin - -except ImportError: -    import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin - -import logging -import os -import subprocess -#import tempfile - -from module.utils import save_join - - -class OCR(object): -    __name__    = "OCR" -    __type__    = "ocr" -    __version__ = "0.11" - -    __description__ = """OCR base plugin""" -    __license__     = "GPLv3" -    __authors__     = [("pyLoad Team", "admin@pyload.org")] - - -    def __init__(self): -        self.logger = logging.getLogger("log") - - -    def load_image(self, image): -        self.image = Image.open(image) -        self.pixels = self.image.load() -        self.result_captcha = '' - - -    def unload(self): -        """delete all tmp images""" -        pass - - -    def threshold(self, value): -        self.image = self.image.point(lambda a: a * value + 10) - - -    def run(self, command): -        """Run a command""" - -        popen = subprocess.Popen(command, bufsize = -1, stdout=subprocess.PIPE, stderr=subprocess.PIPE) -        popen.wait() -        output = popen.stdout.read() +" | "+ popen.stderr.read() -        popen.stdout.close() -        popen.stderr.close() -        self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output)) - - -    def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True): -        #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") -        try: -            tmpTif = open(save_join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") -            tmpTif.close() - -            #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") -            tmpTxt = open(save_join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") -            tmpTxt.close() - -        except IOError, e: -            self.logError(e) -            return - -        self.logger.debug("save tiff") -        self.image.save(tmpTif.name, 'TIFF') - -        if os.name == "nt": -            tessparams = [os.path.join(pypath, "tesseract", "tesseract.exe")] -        else: -            tessparams = ["tesseract"] - -        tessparams.extend( [os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")] ) - -        if subset and (digits or lowercase or uppercase): -            #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") -            with open(save_join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") as tmpSub: -                tmpSub.write("tessedit_char_whitelist ") - -                if digits: -                    tmpSub.write("0123456789") -                if lowercase: -                    tmpSub.write("abcdefghijklmnopqrstuvwxyz") -                if uppercase: -                    tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ") - -                tmpSub.write("\n") -                tessparams.append("nobatch") -                tessparams.append(os.path.abspath(tmpSub.name)) - -        self.logger.debug("run tesseract") -        self.run(tessparams) -        self.logger.debug("read txt") - -        try: -            with open(tmpTxt.name, 'r') as f: -                self.result_captcha = f.read().replace("\n", "") -        except Exception: -            self.result_captcha = "" - -        self.logger.debug(self.result_captcha) -        try: -            os.remove(tmpTif.name) -            os.remove(tmpTxt.name) -            if subset and (digits or lowercase or uppercase): -                os.remove(tmpSub.name) -        except Exception: -            pass - - -    def get_captcha(self, name): -        raise NotImplementedError - - -    def to_greyscale(self): -        if self.image.mode != 'L': -            self.image = self.image.convert('L') - -        self.pixels = self.image.load() - - -    def eval_black_white(self, limit): -        self.pixels = self.image.load() -        w, h = self.image.size -        for x in xrange(w): -            for y in xrange(h): -                if self.pixels[x, y] > limit: -                    self.pixels[x, y] = 255 -                else: -                    self.pixels[x, y] = 0 - - -    def clean(self, allowed): -        pixels = self.pixels - -        w, h = self.image.size - -        for x in xrange(w): -            for y in xrange(h): -                if pixels[x, y] == 255: -                    continue -                # No point in processing white pixels since we only want to remove black pixel -                count = 0 - -                try: -                    if pixels[x-1, y-1] != 255: -                        count += 1 -                    if pixels[x-1, y] != 255: -                        count += 1 -                    if pixels[x-1, y + 1] != 255: -                        count += 1 -                    if pixels[x, y + 1] != 255: -                        count += 1 -                    if pixels[x + 1, y + 1] != 255: -                        count += 1 -                    if pixels[x + 1, y] != 255: -                        count += 1 -                    if pixels[x + 1, y-1] != 255: -                        count += 1 -                    if pixels[x, y-1] != 255: -                        count += 1 -                except Exception: -                    pass - -        # not enough neighbors are dark pixels so mark this pixel -            # to be changed to white -                if count < allowed: -                    pixels[x, y] = 1 - -            # second pass: this time set all 1's to 255 (white) -        for x in xrange(w): -            for y in xrange(h): -                if pixels[x, y] == 1: -                    pixels[x, y] = 255 - -        self.pixels = pixels - - -    def derotate_by_average(self): -        """rotate by checking each angle and guess most suitable""" - -        w, h = self.image.size -        pixels = self.pixels - -        for x in xrange(w): -            for y in xrange(h): -                if pixels[x, y] == 0: -                    pixels[x, y] = 155 - -        highest = {} -        counts = {} - -        for angle in xrange(-45, 45): - -            tmpimage = self.image.rotate(angle) - -            pixels = tmpimage.load() - -            w, h = self.image.size - -            for x in xrange(w): -                for y in xrange(h): -                    if pixels[x, y] == 0: -                        pixels[x, y] = 255 - - -            count = {} - -            for x in xrange(w): -                count[x] = 0 -                for y in xrange(h): -                    if pixels[x, y] == 155: -                        count[x] += 1 - -            sum = 0 -            cnt = 0 - -            for x in count.values(): -                if x != 0: -                    sum += x -                    cnt += 1 - -            avg = sum / cnt -            counts[angle] = cnt -            highest[angle] = 0 -            for x in count.values(): -                if x > highest[angle]: -                    highest[angle] = x - -            highest[angle] = highest[angle] - avg - -        hkey = 0 -        hvalue = 0 - -        for key, value in highest.iteritems(): -            if value > hvalue: -                hkey = key -                hvalue = value - -        self.image = self.image.rotate(hkey) -        pixels = self.image.load() - -        for x in xrange(w): -            for y in xrange(h): -                if pixels[x, y] == 0: -                    pixels[x, y] = 255 - -                if pixels[x, y] == 155: -                    pixels[x, y] = 0 - -        self.pixels = pixels - - -    def split_captcha_letters(self): -        captcha = self.image -        started = False -        letters = [] -        width, height = captcha.size -        bottomY, topY = 0, height -        pixels = captcha.load() - -        for x in xrange(width): -            black_pixel_in_col = False -            for y in xrange(height): -                if pixels[x, y] != 255: -                    if not started: -                        started = True -                        firstX = x -                        lastX = x - -                    if y > bottomY: -                        bottomY = y -                    if y < topY: -                        topY = y -                    if x > lastX: -                        lastX = x - -                    black_pixel_in_col = True - -            if black_pixel_in_col is False and started is True: -                rect = (firstX, topY, lastX, bottomY) -                new_captcha = captcha.crop(rect) - -                w, h = new_captcha.size -                if w > 5 and h > 5: -                    letters.append(new_captcha) - -                started = False -                bottomY, topY = 0, height - -        return letters - - -    def correct(self, values, var=None): -        if var: -            result = var -        else: -            result = self.result_captcha - -        for key, item in values.iteritems(): - -            if key.__class__ == str: -                result = result.replace(key, item) -            else: -                for expr in key: -                    result = result.replace(expr, item) - -        if var: -            return result -        else: -            self.result_captcha = result  | 
