diff options
Diffstat (limited to 'captcha')
| -rw-r--r-- | captcha/NetloadIn.py | 16 | ||||
| -rw-r--r-- | captcha/captcha.py | 55 | 
2 files changed, 70 insertions, 1 deletions
diff --git a/captcha/NetloadIn.py b/captcha/NetloadIn.py new file mode 100644 index 000000000..a8fc38757 --- /dev/null +++ b/captcha/NetloadIn.py @@ -0,0 +1,16 @@ +from captcha import Ocr + +class NetloadIn(Ocr): +    def __init__(self, image): +        Ocr.__init__(self, image) + +    def get_captcha(self): +        self.to_greyscale() +        self.clean(3) +        self.clean(3) +        self.run_tesser() +        return self.result_captcha + +if __name__ == '__main__': +    ocr = NetloadIn('captchas/netload/captcha.php10.png') +    print  ocr.get_captcha() diff --git a/captcha/captcha.py b/captcha/captcha.py index b57fa1b7e..361893fa3 100644 --- a/captcha/captcha.py +++ b/captcha/captcha.py @@ -5,6 +5,7 @@ import subprocess  class Ocr(object):      def __init__(self, image):          self.image = Image.open(image) +        self.pixels = self.image.load()          self.image_name = 'captcha_clean.png'          self.result_captcha = '' @@ -17,9 +18,61 @@ class Ocr(object):          cmd = ['gocr', self.image_name]          self.result_captcha = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].replace('\n','') +    def run_tesser(self): +        self.image.save('captcha.tif', 'TIFF') +        cmd = ['tesseract', 'captcha.tif', '0'] +        self.result_captcha = subprocess.Popen(cmd) +        self.result_captcha.wait() +        cmd = ['cat', '0.txt'] +        self.result_captcha = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].replace('\n','') +      def get_captcha(self):          pass -         + +    def to_greyscale(self): +        if self.image.mode != 'L': +            self.image = self.image.convert('L') + +        self.pixels = self.image.load() + + +    def clean(self, allowed): +        pixels = self.pixels + +        w, h = self.image.size + +        for x in xrange(w): +            for y in xrange(h): +           # no point in processing white pixels since we only want to remove black pixels +                if pixels[x, y] == 255: continue + +                count = 0 + +                try: +                    if pixels[x-1, y-1] != 255: count += 1 +                    if pixels[x-1, y  ] != 255: count += 1 +                    if pixels[x-1, y+1] != 255: count += 1 +                    if pixels[x, y+1  ] != 255: count += 1 +                    if pixels[x+1, y+1] != 255: count += 1 +                    if pixels[x+1, y  ] != 255: count += 1 +                    if pixels[x+1, y-1] != 255: count += 1 +                    if pixels[x, y-1  ] != 255: count += 1 +                except: +                    pass + +           # not enough neighbors are dark pixels so mark this pixel +           # to be changed to white +                if count < allowed: +                    pixels[x, y] = 1 +                     +           # second pass: this time set all 1's to 255 (white) +        for x in xrange(w): +            for y in xrange(h): +                if pixels[x, y] == 1: pixels[x, y] = 255 + +        self.pixels = pixels + +  if __name__ == '__main__':      ocr = Ocr('gigasize-com/7.jpg')      print  ocr.get_captcha()  | 
