diff options
| -rw-r--r-- | captcha/NetloadIn.py | 10 | ||||
| -rw-r--r-- | captcha/ShareonlineBiz.py | 20 | ||||
| -rw-r--r-- | captcha/captcha.py | 23 | 
3 files changed, 40 insertions, 13 deletions
| diff --git a/captcha/NetloadIn.py b/captcha/NetloadIn.py index 94103f78b..9799a6a2b 100644 --- a/captcha/NetloadIn.py +++ b/captcha/NetloadIn.py @@ -10,8 +10,16 @@ class NetloadIn(OCR):          self.clean(3)          self.clean(3)          self.run_tesser() + +        self.correct({ +        ("$", "g"): "5", +        }) +          return self.result_captcha  if __name__ == '__main__': +    import urllib      ocr = NetloadIn() -    print  ocr.get_captcha('captchas/netload/captcha.php10.png') +    urllib.urlretrieve("http://netload.in/share/includes/captcha.php", "captcha.png") + +    print  ocr.get_captcha('captcha.png') diff --git a/captcha/ShareonlineBiz.py b/captcha/ShareonlineBiz.py index 5d0eb37b8..91124f181 100644 --- a/captcha/ShareonlineBiz.py +++ b/captcha/ShareonlineBiz.py @@ -17,8 +17,6 @@  # along with this program; if not, see <http://www.gnu.org/licenses/>.  #  ### -import urllib -  from captcha import OCR  class ShareonlineBiz(OCR): @@ -43,21 +41,21 @@ class ShareonlineBiz(OCR):              final += self.result_captcha          #replace common errors -        final = final.replace("A", "4") -        final = final.replace("‘5", "3") -        final = final.replace("‘1", "7") -        final = final.replace("‘L", "2") -        final = final.replace("T", "7") -        final = final.replace("b", "6") -        final = final.replace("B", "2") -        final = final.replace("I", "1") -        final = final.replace("X", "1") +        final = self.correct({ +        "A": "4", +        "‘5": "3", +        ("‘1", "T"): "7", +        ("‘L", "B", "'L"): "2", +        "b": "6", +        ("I", "X"): "1" +        }, final)          return final          #tesseract at 60%  if __name__ == '__main__': +    import urllib      ocr = ShareonlineBiz()      urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg")      print  ocr.get_captcha('captcha.jpeg') diff --git a/captcha/captcha.py b/captcha/captcha.py index 7092e21c1..22c097f38 100644 --- a/captcha/captcha.py +++ b/captcha/captcha.py @@ -226,8 +226,29 @@ class OCR(object):          return letters +    def correct(self, values, var=None): + +        if var: +            result = var +        else: +            result = self.result_captcha + +        for key, item in values.iteritems(): + +            if key.__class__ == str: +                print key, "->", item +                result = result.replace(key, item) +            else: +                for expr in key: +                    print expr, "->", item +                    result = result.replace(expr, item) + +        if var: +            return result +        else: +            self.result_captcha = result + -          if __name__ == '__main__':      ocr = OCR()      ocr.load_image("B.jpg") | 
