diff options
Diffstat (limited to 'pyload/plugin/ocr')
| -rw-r--r-- | pyload/plugin/ocr/GigasizeCom.py | 24 | ||||
| -rw-r--r-- | pyload/plugin/ocr/LinksaveIn.py | 158 | ||||
| -rw-r--r-- | pyload/plugin/ocr/NetloadIn.py | 29 | ||||
| -rw-r--r-- | pyload/plugin/ocr/ShareonlineBiz.py | 39 | ||||
| -rw-r--r-- | pyload/plugin/ocr/__init__.py | 1 | 
5 files changed, 251 insertions, 0 deletions
| diff --git a/pyload/plugin/ocr/GigasizeCom.py b/pyload/plugin/ocr/GigasizeCom.py new file mode 100644 index 000000000..f818cdb41 --- /dev/null +++ b/pyload/plugin/ocr/GigasizeCom.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +from pyload.plugin.OCR import OCR + + +class GigasizeCom(OCR): +    __name    = "GigasizeCom" +    __type    = "ocr" +    __version = "0.10" + +    __description = """Gigasize.com ocr plugin""" +    __license     = "GPLv3" +    __authors     = [("pyLoad Team", "admin@pyload.org")] + + +    def __init__(self): +        OCR.__init__(self) + + +    def get_captcha(self, image): +        self.load_image(image) +        self.threshold(2.8) +        self.run_tesser(True, False, False, True) +        return self.result_captcha diff --git a/pyload/plugin/ocr/LinksaveIn.py b/pyload/plugin/ocr/LinksaveIn.py new file mode 100644 index 000000000..cd0fe96f2 --- /dev/null +++ b/pyload/plugin/ocr/LinksaveIn.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- + +try: +    from PIL import Image +except ImportError: +    import Image + +from glob import glob +from os import sep +from os.path import abspath, dirname + +from pyload.plugin.OCR import OCR + + +class LinksaveIn(OCR): +    __name    = "LinksaveIn" +    __type    = "ocr" +    __version = "0.10" + +    __description = """Linksave.in ocr plugin""" +    __license     = "GPLv3" +    __authors     = [("pyLoad Team", "admin@pyload.org")] + + +    def __init__(self): +        OCR.__init__(self) +        self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep + + +    def load_image(self, image): +        im = Image.open(image) +        frame_nr = 0 + +        lut = im.resize((256, 1)) +        lut.putdata(range(256)) +        lut = list(lut.convert("RGB").getdata()) + +        new = Image.new("RGB", im.size) +        npix = new.load() +        while True: +            try: +                im.seek(frame_nr) +            except EOFError: +                break +            frame = im.copy() +            pix = frame.load() +            for x in xrange(frame.size[0]): +                for y in xrange(frame.size[1]): +                    if lut[pix[x, y]] != (0,0,0): +                        npix[x, y] = lut[pix[x, y]] +            frame_nr += 1 +        new.save(self.data_dir+"unblacked.png") +        self.image = new.copy() +        self.pixels = self.image.load() +        self.result_captcha = '' + + +    def get_bg(self): +        stat = {} +        cstat = {} +        img = self.image.convert("P") +        for bgpath in glob(self.data_dir+"bg/*.gif"): +            stat[bgpath] = 0 +            bg = Image.open(bgpath) + +            bglut = bg.resize((256, 1)) +            bglut.putdata(range(256)) +            bglut = list(bglut.convert("RGB").getdata()) + +            lut = img.resize((256, 1)) +            lut.putdata(range(256)) +            lut = list(lut.convert("RGB").getdata()) + +            bgpix = bg.load() +            pix = img.load() +            for x in xrange(bg.size[0]): +                for y in xrange(bg.size[1]): +                    rgb_bg = bglut[bgpix[x, y]] +                    rgb_c = lut[pix[x, y]] +                    try: +                        cstat[rgb_c] += 1 +                    except Exception: +                        cstat[rgb_c] = 1 +                    if rgb_bg == rgb_c: +                        stat[bgpath] += 1 +        max_p = 0 +        bg = "" +        for bgpath, value in stat.iteritems(): +            if max_p < value: +                bg = bgpath +                max_p = value +        return bg + + +    def substract_bg(self, bgpath): +        bg = Image.open(bgpath) +        img = self.image.convert("P") + +        bglut = bg.resize((256, 1)) +        bglut.putdata(range(256)) +        bglut = list(bglut.convert("RGB").getdata()) + +        lut = img.resize((256, 1)) +        lut.putdata(range(256)) +        lut = list(lut.convert("RGB").getdata()) + +        bgpix = bg.load() +        pix = img.load() +        orgpix = self.image.load() +        for x in xrange(bg.size[0]): +            for y in xrange(bg.size[1]): +                rgb_bg = bglut[bgpix[x, y]] +                rgb_c = lut[pix[x, y]] +                if rgb_c == rgb_bg: +                    orgpix[x, y] = (255,255,255) + + +    def eval_black_white(self): +        new = Image.new("RGB", (140, 75)) +        pix = new.load() +        orgpix = self.image.load() +        thresh = 4 +        for x in xrange(new.size[0]): +            for y in xrange(new.size[1]): +                rgb = orgpix[x, y] +                r, g, b = rgb +                pix[x, y] = (255,255,255) +                if r > max(b, g)+thresh: +                    pix[x, y] = (0,0,0) +                if g < min(r, b): +                    pix[x, y] = (0,0,0) +                if g > max(r, b)+thresh: +                    pix[x, y] = (0,0,0) +                if b > max(r, g)+thresh: +                    pix[x, y] = (0,0,0) +        self.image = new +        self.pixels = self.image.load() + + +    def get_captcha(self, image): +        self.load_image(image) +        bg = self.get_bg() +        self.substract_bg(bg) +        self.eval_black_white() +        self.to_greyscale() +        self.image.save(self.data_dir+"cleaned_pass1.png") +        self.clean(4) +        self.clean(4) +        self.image.save(self.data_dir+"cleaned_pass2.png") +        letters = self.split_captcha_letters() +        final = "" +        for n, letter in enumerate(letters): +            self.image = letter +            self.image.save(ocr.data_dir+"letter%d.png" % n) +            self.run_tesser(True, True, False, False) +            final += self.result_captcha + +        return final diff --git a/pyload/plugin/ocr/NetloadIn.py b/pyload/plugin/ocr/NetloadIn.py new file mode 100644 index 000000000..f15ab4449 --- /dev/null +++ b/pyload/plugin/ocr/NetloadIn.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +from pyload.plugin.OCR import OCR + + +class NetloadIn(OCR): +    __name    = "NetloadIn" +    __type    = "ocr" +    __version = "0.10" + +    __description = """Netload.in ocr plugin""" +    __license     = "GPLv3" +    __authors     = [("pyLoad Team", "admin@pyload.org")] + + +    def __init__(self): +        OCR.__init__(self) + + +    def get_captcha(self, image): +        self.load_image(image) +        self.to_greyscale() +        self.clean(3) +        self.clean(3) +        self.run_tesser(True, True, False, False) + +        self.result_captcha = self.result_captcha.replace(" ", "")[:4] # cut to 4 numbers + +        return self.result_captcha diff --git a/pyload/plugin/ocr/ShareonlineBiz.py b/pyload/plugin/ocr/ShareonlineBiz.py new file mode 100644 index 000000000..103a9f96f --- /dev/null +++ b/pyload/plugin/ocr/ShareonlineBiz.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +from pyload.plugin.OCR import OCR + + +class ShareonlineBiz(OCR): +    __name    = "ShareonlineBiz" +    __type    = "ocr" +    __version = "0.10" + +    __description = """Shareonline.biz ocr plugin""" +    __license     = "GPLv3" +    __authors     = [("RaNaN", "RaNaN@pyload.org")] + + +    def __init__(self): +        OCR.__init__(self) + + +    def get_captcha(self, image): +        self.load_image(image) +        self.to_greyscale() +        self.image = self.image.resize((160, 50)) +        self.pixels = self.image.load() +        self.threshold(1.85) +        #self.eval_black_white(240) +        #self.derotate_by_average() + +        letters = self.split_captcha_letters() + +        final = "" +        for letter in letters: +            self.image = letter +            self.run_tesser(True, True, False, False) +            final += self.result_captcha + +        return final + +        #tesseract at 60% diff --git a/pyload/plugin/ocr/__init__.py b/pyload/plugin/ocr/__init__.py new file mode 100644 index 000000000..40a96afc6 --- /dev/null +++ b/pyload/plugin/ocr/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- | 
