diff options
| -rw-r--r-- | module/captcha/LinksaveIn.py | 113 | ||||
| -rw-r--r-- | module/captcha/LinksaveIn/bg/flecken_1.gif | bin | 0 -> 10744 bytes | |||
| -rw-r--r-- | module/captcha/LinksaveIn/bg/flecken_2.gif | bin | 0 -> 11076 bytes | |||
| -rw-r--r-- | module/captcha/LinksaveIn/bg/gewebe_fein.gif | bin | 0 -> 10504 bytes | |||
| -rw-r--r-- | module/captcha/LinksaveIn/bg/gewebe_grob.gif | bin | 0 -> 10127 bytes | |||
| -rw-r--r-- | module/captcha/LinksaveIn/bg/gitter.gif | bin | 0 -> 8151 bytes | |||
| -rw-r--r-- | module/captcha/LinksaveIn/bg/mauer_horizontal.gif | bin | 0 -> 9105 bytes | |||
| -rw-r--r-- | module/captcha/LinksaveIn/bg/mauer_vertikal.gif | bin | 0 -> 10830 bytes | |||
| -rw-r--r-- | module/captcha/LinksaveIn/bg/scheckig.gif | bin | 0 -> 10214 bytes | |||
| -rw-r--r-- | module/captcha/LinksaveIn/bg/wellen.gif | bin | 0 -> 10041 bytes | |||
| -rw-r--r-- | module/captcha/LinksaveIn/tesser_conf | 1 | 
11 files changed, 113 insertions, 1 deletions
| diff --git a/module/captcha/LinksaveIn.py b/module/captcha/LinksaveIn.py index cd4e97f87..4219f03b5 100644 --- a/module/captcha/LinksaveIn.py +++ b/module/captcha/LinksaveIn.py @@ -1,9 +1,17 @@  from captcha import OCR  import Image +from os import sep +from os.path import dirname +from os.path import abspath +from glob import glob +import tempfile + +from pprint import pprint  class LinksaveIn(OCR):      def __init__(self):          OCR.__init__(self) +        self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep      def load_image(self, image):          im = Image.open(image) @@ -27,12 +35,115 @@ class LinksaveIn(OCR):                      if lut[pix[x, y]] != (0,0,0):                          npix[x, y] = lut[pix[x, y]]              frame_nr += 1 +        new.save(self.data_dir+"unblacked.png")          self.image = new.copy()          self.pixels = self.image.load()          self.result_captcha = '' +     +    def get_bg(self): +        stat = {} +        cstat = {} +        img = self.image.convert("P") +        for bgpath in glob(self.data_dir+"bg/*.gif"): +            stat[bgpath] = 0 +            bg = Image.open(bgpath) +             +            bglut = bg.resize((256, 1)) +            bglut.putdata(range(256)) +            bglut = list(bglut.convert("RGB").getdata()) +             +            lut = img.resize((256, 1)) +            lut.putdata(range(256)) +            lut = list(lut.convert("RGB").getdata()) +             +            bgpix = bg.load() +            pix = img.load() +            for x in range(bg.size[0]): +                for y in range(bg.size[1]): +                    rgb_bg = bglut[bgpix[x, y]] +                    rgb_c = lut[pix[x, y]] +                    try: +                        cstat[rgb_c] += 1 +                    except: +                        cstat[rgb_c] = 1 +                    if rgb_bg == rgb_c: +                        stat[bgpath] += 1 +        max_p = 0 +        bg = "" +        for bgpath, value in stat.items(): +            if max_p < value: +                bg = bgpath +                max_p = value +        return bg +     +    def substract_bg(self, bgpath): +        bg = Image.open(bgpath) +        img = self.image.convert("P") +         +        bglut = bg.resize((256, 1)) +        bglut.putdata(range(256)) +        bglut = list(bglut.convert("RGB").getdata()) +         +        lut = img.resize((256, 1)) +        lut.putdata(range(256)) +        lut = list(lut.convert("RGB").getdata()) +         +        bgpix = bg.load() +        pix = img.load() +        orgpix = self.image.load() +        for x in range(bg.size[0]): +            for y in range(bg.size[1]): +                rgb_bg = bglut[bgpix[x, y]] +                rgb_c = lut[pix[x, y]] +                if rgb_c == rgb_bg: +                    orgpix[x, y] = (255,255,255) +     +    def eval_black_white(self): +        new = Image.new("RGB", (140, 75)) +        pix = new.load() +        orgpix = self.image.load() +        thresh = 4 +        for x in range(new.size[0]): +            for y in range(new.size[1]): +                rgb = orgpix[x, y] +                r, g, b = rgb +                pix[x, y] = (255,255,255) +                if r > max(b, g)+thresh: +                    pix[x, y] = (0,0,0) +                if g < min(r, b): +                    pix[x, y] = (0,0,0) +                if g > max(r, b)+thresh: +                    pix[x, y] = (0,0,0) +                if b > max(r, g)+thresh: +                    pix[x, y] = (0,0,0) +        self.image = new +        self.pixels = self.image.load() +     +    def run_tesser(self): +        self.logger.debug("create tmp tif") +        tmp = tempfile.NamedTemporaryFile(suffix=".tif") +        self.logger.debug("create tmp txt") +        tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") +        self.logger.debug("save tiff") +        self.image.save(tmp.name, 'TIFF') +        self.logger.debug("run tesseract") +        self.run(['tesseract', tmp.name, tmpTxt.name.replace(".txt", ""), "nobatch", self.data_dir+"tesser_conf"]) +        self.logger.debug("read txt") + +        with open(tmpTxt.name, 'r') as f: +            self.result_captcha = f.read().replace("\n", "")      def get_captcha(self, image):          self.load_image(image) +        bg = self.get_bg() +        self.substract_bg(bg) +        self.eval_black_white() +        self.to_greyscale() +        self.image.save(self.data_dir+"cleaned_pass1.png") +        self.clean(6) +        self.image.save(self.data_dir+"cleaned_pass2.png") +        letters = self.split_captcha_letters() +                  self.run_tesser()          return self.result_captcha @@ -42,5 +153,5 @@ if __name__ == '__main__':      ocr = LinksaveIn()      testurl = "http://linksave.in/captcha/cap.php?hsh=2229185&code=ZzHdhl3UffV3lXTH5U4b7nShXj%2Bwma1vyoNBcbc6lcc%3D"      urllib.urlretrieve(testurl, "captcha.gif") - +          print ocr.get_captcha('captcha.gif') diff --git a/module/captcha/LinksaveIn/bg/flecken_1.gif b/module/captcha/LinksaveIn/bg/flecken_1.gifBinary files differ new file mode 100644 index 000000000..df2f51217 --- /dev/null +++ b/module/captcha/LinksaveIn/bg/flecken_1.gif diff --git a/module/captcha/LinksaveIn/bg/flecken_2.gif b/module/captcha/LinksaveIn/bg/flecken_2.gifBinary files differ new file mode 100644 index 000000000..838276188 --- /dev/null +++ b/module/captcha/LinksaveIn/bg/flecken_2.gif diff --git a/module/captcha/LinksaveIn/bg/gewebe_fein.gif b/module/captcha/LinksaveIn/bg/gewebe_fein.gifBinary files differ new file mode 100644 index 000000000..502f18cc4 --- /dev/null +++ b/module/captcha/LinksaveIn/bg/gewebe_fein.gif diff --git a/module/captcha/LinksaveIn/bg/gewebe_grob.gif b/module/captcha/LinksaveIn/bg/gewebe_grob.gifBinary files differ new file mode 100644 index 000000000..e66a365ad --- /dev/null +++ b/module/captcha/LinksaveIn/bg/gewebe_grob.gif diff --git a/module/captcha/LinksaveIn/bg/gitter.gif b/module/captcha/LinksaveIn/bg/gitter.gifBinary files differ new file mode 100644 index 000000000..ec52ef68d --- /dev/null +++ b/module/captcha/LinksaveIn/bg/gitter.gif diff --git a/module/captcha/LinksaveIn/bg/mauer_horizontal.gif b/module/captcha/LinksaveIn/bg/mauer_horizontal.gifBinary files differ new file mode 100644 index 000000000..3d75fafa8 --- /dev/null +++ b/module/captcha/LinksaveIn/bg/mauer_horizontal.gif diff --git a/module/captcha/LinksaveIn/bg/mauer_vertikal.gif b/module/captcha/LinksaveIn/bg/mauer_vertikal.gifBinary files differ new file mode 100644 index 000000000..2ada6fdae --- /dev/null +++ b/module/captcha/LinksaveIn/bg/mauer_vertikal.gif diff --git a/module/captcha/LinksaveIn/bg/scheckig.gif b/module/captcha/LinksaveIn/bg/scheckig.gifBinary files differ new file mode 100644 index 000000000..8bfb45c56 --- /dev/null +++ b/module/captcha/LinksaveIn/bg/scheckig.gif diff --git a/module/captcha/LinksaveIn/bg/wellen.gif b/module/captcha/LinksaveIn/bg/wellen.gifBinary files differ new file mode 100644 index 000000000..a181ebe74 --- /dev/null +++ b/module/captcha/LinksaveIn/bg/wellen.gif diff --git a/module/captcha/LinksaveIn/tesser_conf b/module/captcha/LinksaveIn/tesser_conf new file mode 100644 index 000000000..34ca8fa02 --- /dev/null +++ b/module/captcha/LinksaveIn/tesser_conf @@ -0,0 +1 @@ +tessedit_char_whitelist 0123456789 | 
