diff options
| author | 2014-07-10 03:26:45 +0200 | |
|---|---|---|
| committer | 2014-07-10 03:26:45 +0200 | |
| commit | c1abc13d4dccb20f3845594c28952667573b7d0b (patch) | |
| tree | da8a8678bd804bec77ef16e864bfe2bf2e561eaf /module/plugins/ocr/ShareonlineBiz.py | |
| parent | Improved filename sanitation removing non-ascii chars. (diff) | |
| download | pyload-c1abc13d4dccb20f3845594c28952667573b7d0b.tar.xz | |
Move captcha to ocr
Diffstat (limited to 'module/plugins/ocr/ShareonlineBiz.py')
| -rw-r--r-- | module/plugins/ocr/ShareonlineBiz.py | 53 | 
1 files changed, 53 insertions, 0 deletions
| diff --git a/module/plugins/ocr/ShareonlineBiz.py b/module/plugins/ocr/ShareonlineBiz.py new file mode 100644 index 000000000..db72449d1 --- /dev/null +++ b/module/plugins/ocr/ShareonlineBiz.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- + +# +#Copyright (C) 2009 kingzero, RaNaN +# +#This program is free software; you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation; either version 3 of the License, +#or (at your option) any later version. +# +#This program is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +#See the GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. +# +### +from module.plugins.OCR import OCR + +class ShareonlineBiz(OCR): +    __name__ = "ShareonlineBiz" + +    def __init__(self): +        OCR.__init__(self) + +    def get_captcha(self, image):  +        self.load_image(image) +        self.to_greyscale() +        self.image = self.image.resize((160, 50)) +        self.pixels = self.image.load() +        self.threshold(1.85) +        #self.eval_black_white(240) +        #self.derotate_by_average() + +        letters = self.split_captcha_letters() + +        final = "" +        for letter in letters: +            self.image = letter +            self.run_tesser(True, True, False, False) +            final += self.result_captcha + +        return final + +        #tesseract at 60% + +if __name__ == '__main__': +    import urllib +    ocr = ShareonlineBiz() +    urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg") +    print  ocr.get_captcha('captcha.jpeg') | 
