diff options
| -rw-r--r-- | module/plugins/captcha/LinksaveIn.py | 16 | ||||
| -rw-r--r-- | module/plugins/captcha/MegauploadCom.py | 2 | ||||
| -rw-r--r-- | module/plugins/captcha/NetloadIn.py | 6 | ||||
| -rw-r--r-- | module/plugins/captcha/ShareonlineBiz.py | 12 | ||||
| -rw-r--r-- | module/plugins/captcha/captcha.py | 20 | ||||
| -rw-r--r-- | module/plugins/hoster/NetloadIn.py | 10 | ||||
| -rw-r--r-- | module/plugins/hoster/ShareonlineBiz.py | 1 | 
7 files changed, 30 insertions, 37 deletions
| diff --git a/module/plugins/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py index d6f61e362..22b801273 100644 --- a/module/plugins/captcha/LinksaveIn.py +++ b/module/plugins/captcha/LinksaveIn.py @@ -118,20 +118,6 @@ class LinksaveIn(OCR):          self.image = new          self.pixels = self.image.load() -    def run_tesser(self): -        self.logger.debug("create tmp tif") -        tmp = tempfile.NamedTemporaryFile(suffix=".tif") -        self.logger.debug("create tmp txt") -        tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") -        self.logger.debug("save tiff") -        self.image.save(tmp.name, 'TIFF') -        self.logger.debug("run tesseract") -        self.run(['tesseract', tmp.name, tmpTxt.name.replace(".txt", ""), "nobatch", self.data_dir+"tesser_conf"]) -        self.logger.debug("read txt") - -        with open(tmpTxt.name, 'r') as f: -            self.result_captcha = f.read().replace("\n", "") -      def get_captcha(self, image):          self.load_image(image)          bg = self.get_bg() @@ -147,7 +133,7 @@ class LinksaveIn(OCR):          for n, letter in enumerate(letters):              self.image = letter              self.image.save(ocr.data_dir+"letter%d.png" % n) -            self.run_tesser() +            self.run_tesser(True, True, False, False)              final += self.result_captcha          return final diff --git a/module/plugins/captcha/MegauploadCom.py b/module/plugins/captcha/MegauploadCom.py index 374bcd678..da8ab2cb9 100644 --- a/module/plugins/captcha/MegauploadCom.py +++ b/module/plugins/captcha/MegauploadCom.py @@ -6,7 +6,7 @@ class MegauploadCom(OCR):      def get_captcha(self, image):          self.load_image(image) -        self.run_tesser() +        self.run_tesser(True, True, False, True)          return self.result_captcha  if __name__ == '__main__': diff --git a/module/plugins/captcha/NetloadIn.py b/module/plugins/captcha/NetloadIn.py index 9799a6a2b..c99a0744c 100644 --- a/module/plugins/captcha/NetloadIn.py +++ b/module/plugins/captcha/NetloadIn.py @@ -9,11 +9,7 @@ class NetloadIn(OCR):          self.to_greyscale()          self.clean(3)          self.clean(3) -        self.run_tesser() - -        self.correct({ -        ("$", "g"): "5", -        }) +        self.run_tesser(True, True, False, False)          return self.result_captcha diff --git a/module/plugins/captcha/ShareonlineBiz.py b/module/plugins/captcha/ShareonlineBiz.py index 91124f181..7bd5d7960 100644 --- a/module/plugins/captcha/ShareonlineBiz.py +++ b/module/plugins/captcha/ShareonlineBiz.py @@ -37,19 +37,9 @@ class ShareonlineBiz(OCR):          final = ""          for letter in letters:              self.image = letter -            self.run_tesser() +            self.run_tesser(True, True, False, False)              final += self.result_captcha -        #replace common errors -        final = self.correct({ -        "A": "4", -        "‘5": "3", -        ("‘1", "T"): "7", -        ("‘L", "B", "'L"): "2", -        "b": "6", -        ("I", "X"): "1" -        }, final) -          return final          #tesseract at 60% diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index 283b171e0..452952533 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -82,15 +82,31 @@ class OCR(object):          self.image.save(tmp)          self.result_captcha = self.run(['gocr', tmp.name]).replace("\n", "") -    def run_tesser(self): +    def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True ):          self.logger.debug("create tmp tif")          tmp = tempfile.NamedTemporaryFile(suffix=".tif")          self.logger.debug("create tmp txt")          tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")          self.logger.debug("save tiff")          self.image.save(tmp.name, 'TIFF') + +        tessparams = ['tesseract', tmp.name, tmpTxt.name.replace(".txt", "") + +        if subset and (digits or lowercase or uppercase): +            self.logger.debug("create temp subset config") +            tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") +            tmpSub.write("tessedit_char_whitelist ") +            if digits: +                tmpSub.write("0123456789") +            if lowercase: +                tmpSub.write("abcdefghijklmnopqrstuvwxyz") +            if uppercase: +                tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ") +            tessparams.append("nobatch") +            tessparams.append(tmpSub.name) +          self.logger.debug("run tesseract") -        self.run(['tesseract', tmp.name, tmpTxt.name.replace(".txt", "")]) +        self.run(tessparams)          self.logger.debug("read txt")          with open(tmpTxt.name, 'r') as f: diff --git a/module/plugins/hoster/NetloadIn.py b/module/plugins/hoster/NetloadIn.py index 0b7bcd27f..9891828a2 100644 --- a/module/plugins/hoster/NetloadIn.py +++ b/module/plugins/hoster/NetloadIn.py @@ -57,6 +57,7 @@ class NetloadIn(Plugin):                  thread.wait(self.parent)                  pyfile.status.url = self.get_file_url() +            return True          else:              return False @@ -69,7 +70,10 @@ class NetloadIn(Plugin):              apiurl = "http://netload.in/share/fileinfos2.php"              src = self.req.load(apiurl, cookies=False, get={"file_id": match.group(1)})              self.api_data = {} -            if not src == "unknown file_data": +            if src == "unknown_server_data": +                self.api_data = False +                self.html[0] = self.req.load(self.parent.url, cookies=False) +            elif not src == "unknown file_data":                  lines = src.split(";")                  self.api_data["exists"] = True                  self.api_data["fileid"] = lines[0] @@ -124,7 +128,7 @@ class NetloadIn(Plugin):          self.time_plus_wait = time() + wait_seconds      def get_file_name(self): -        if self.api_data["filename"]: +        if self.api_data and self.api_data["filename"]:              return self.api_data["filename"]          elif self.html[0]:              file_name_pattern = '\t\t\t(.+)<span style="color: #8d8d8d;">' @@ -134,7 +138,7 @@ class NetloadIn(Plugin):          return self.parent.url      def file_exists(self): -        if self.api_data["exists"]: +        if self.api_data and self.api_data["exists"]:              return self.api_data["exists"]          elif self.html[0] and re.search(r"The file has been deleted", self.html[0]) == None:              return True diff --git a/module/plugins/hoster/ShareonlineBiz.py b/module/plugins/hoster/ShareonlineBiz.py index ede810bbd..b76e83568 100644 --- a/module/plugins/hoster/ShareonlineBiz.py +++ b/module/plugins/hoster/ShareonlineBiz.py @@ -46,6 +46,7 @@ class ShareonlineBiz(Plugin):              pyfile.status.waituntil = self.time_plus_wait              pyfile.status.url = self.get_file_url()              pyfile.status.want_reconnect = self.want_reconnect +            return True          else:              return False | 
