From cca1c563a0acc1c5dbd0552b4d610f1325f53559 Mon Sep 17 00:00:00 2001 From: mkaay Date: Mon, 31 Jan 2011 19:06:22 +0100 Subject: added captchatrader.com support, fixed lof.cc --- module/config/default.conf | 3 + module/lib/MultipartPostHandler.py | 139 +++++++++++++++++++++++++++++ module/lib/captchatrader.py | 119 ++++++++++++++++++++++++ module/network/MultipartPostHandler.py | 139 ----------------------------- module/plugins/Plugin.py | 44 +++++++-- module/plugins/ReCaptcha.py | 4 +- module/plugins/container/CCF.py | 2 +- module/plugins/crypter/LofCc.py | 2 +- module/plugins/crypter/SerienjunkiesOrg.py | 2 +- pyLoadCore.py | 4 +- 10 files changed, 308 insertions(+), 150 deletions(-) create mode 100644 module/lib/MultipartPostHandler.py create mode 100644 module/lib/captchatrader.py delete mode 100644 module/network/MultipartPostHandler.py diff --git a/module/config/default.conf b/module/config/default.conf index a9b64dd92..7d7b84854 100644 --- a/module/config/default.conf +++ b/module/config/default.conf @@ -59,3 +59,6 @@ proxy - "Proxy": http;socks4;socks5 type : "Protocol" = http str username : "Username" = None str password : "Password" = None +captchatrader - "CaptchaTrader": + str username : "Username" = + str password : "Password" = diff --git a/module/lib/MultipartPostHandler.py b/module/lib/MultipartPostHandler.py new file mode 100644 index 000000000..94aee0193 --- /dev/null +++ b/module/lib/MultipartPostHandler.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +#### +# 02/2006 Will Holcomb +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# 7/26/07 Slightly modified by Brian Schneider +# in order to support unicode files ( multipart_encode function ) +""" +Usage: + Enables the use of multipart/form-data for posting forms + +Inspirations: + Upload files in python: + http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306 + urllib2_file: + Fabien Seisen: + +Example: + import MultipartPostHandler, urllib2, cookielib + + cookies = cookielib.CookieJar() + opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies), + MultipartPostHandler.MultipartPostHandler) + params = { "username" : "bob", "password" : "riviera", + "file" : open("filename", "rb") } + opener.open("http://wwww.bobsite.com/upload/", params) + +Further Example: + The main function of this file is a sample which downloads a page and + then uploads it to the W3C validator. +""" + +from urllib import urlencode +from urllib2 import BaseHandler, HTTPHandler, build_opener +import mimetools, mimetypes +from os import write, remove +from cStringIO import StringIO + +class Callable: + def __init__(self, anycallable): + self.__call__ = anycallable + +# Controls how sequences are uncoded. If true, elements may be given multiple values by +# assigning a sequence. +doseq = 1 + +class MultipartPostHandler(BaseHandler): + handler_order = HTTPHandler.handler_order - 10 # needs to run first + + def http_request(self, request): + data = request.get_data() + if data is not None and type(data) != str: + v_files = [] + v_vars = [] + try: + for(key, value) in data.items(): + if type(value) == file: + v_files.append((key, value)) + else: + v_vars.append((key, value)) + except TypeError: + systype, value, traceback = sys.exc_info() + raise TypeError, "not a valid non-string sequence or mapping object", traceback + + if len(v_files) == 0: + data = urlencode(v_vars, doseq) + else: + boundary, data = self.multipart_encode(v_vars, v_files) + + contenttype = 'multipart/form-data; boundary=%s' % boundary + if(request.has_header('Content-Type') + and request.get_header('Content-Type').find('multipart/form-data') != 0): + print "Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data') + request.add_unredirected_header('Content-Type', contenttype) + + request.add_data(data) + + return request + + def multipart_encode(vars, files, boundary = None, buf = None): + if boundary is None: + boundary = mimetools.choose_boundary() + if buf is None: + buf = StringIO() + for(key, value) in vars: + buf.write('--%s\r\n' % boundary) + buf.write('Content-Disposition: form-data; name="%s"' % key) + buf.write('\r\n\r\n' + value + '\r\n') + for(key, fd) in files: + #file_size = os.fstat(fd.fileno())[stat.ST_SIZE] + filename = fd.name.split('/')[-1] + contenttype = mimetypes.guess_type(filename)[0] or 'application/octet-stream' + buf.write('--%s\r\n' % boundary) + buf.write('Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (key, filename)) + buf.write('Content-Type: %s\r\n' % contenttype) + # buffer += 'Content-Length: %s\r\n' % file_size + fd.seek(0) + buf.write('\r\n' + fd.read() + '\r\n') + buf.write('--' + boundary + '--\r\n\r\n') + buf = buf.getvalue() + return boundary, buf + multipart_encode = Callable(multipart_encode) + + https_request = http_request + +def main(): + import tempfile, sys + + validatorURL = "http://validator.w3.org/check" + opener = build_opener(MultipartPostHandler) + + def validateFile(url): + temp = tempfile.mkstemp(suffix=".html") + write(temp[0], opener.open(url).read()) + params = { "ss" : "0", # show source + "doctype" : "Inline", + "uploaded_file" : open(temp[1], "rb") } + print opener.open(validatorURL, params).read() + remove(temp[1]) + + if len(sys.argv[1:]) > 0: + for arg in sys.argv[1:]: + validateFile(arg) + else: + validateFile("http://www.google.com") + +if __name__=="__main__": + main() diff --git a/module/lib/captchatrader.py b/module/lib/captchatrader.py new file mode 100644 index 000000000..171944c28 --- /dev/null +++ b/module/lib/captchatrader.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . + + @author: mkaay +""" + +from json import loads +from urllib2 import build_opener +from MultipartPostHandler import MultipartPostHandler + +PYLOAD_KEY = "9f65e7f381c3af2b076ea680ae96b0b7" + +opener = build_opener(MultipartPostHandler) + +class CaptchaTraderException(Exception): + def __init__(self, err): + self.err = err + + def getCode(self): + return self.err + + def __str__(self): + return "" % self.err + + def __repr__(self): + return "" % self.err + +class CaptchaTrader(): + SUBMIT_URL = "http://captchatrader.com/api/submit" + RESPOND_URL = "http://captchatrader.com/api/respond" + GETCREDITS_URL = "http://captchatrader.com/api/get_credits/username:%(user)s/password:%(password)s/" + + def __init__(self, user, password, api_key=PYLOAD_KEY): + self.api_key = api_key + self.user = user + self.password = password + + def getCredits(self): + json = opener.open(CaptchaTrader.GETCREDITS_URL % {"user":self.user, "password":self.password}).read() + response = loads(json) + if response[0] < 0: + raise CaptchaTraderException(response[1]) + else: + return response[1] + + def submit(self, captcha, captchaType="file", match=None): + if not self.api_key: + raise CaptchaTraderException("No API Key Specified!") + if type(captcha) == str and captchaType == "file": + raise CaptchaTraderException("Invalid Type") + assert captchaType in ("file", "url-jpg", "url-jpeg", "url-png", "url-bmp") + json = opener.open(CaptchaTrader.SUBMIT_URL, data={"api_key":self.api_key, + "username":self.user, + "password":self.password, + "value":captcha, + "type":captchaType}).read() + response = loads(json) + if response[0] < 0: + raise CaptchaTraderException(response[1]) + + class Result(): + def __init__(self, api, ticket, result): + self.api = api + self.ticket = ticket + self.result = result + + def getTicketID(self): + return self.ticket + + def getResult(self): + return self.result + + def success(self): + self.sendResponse(True) + + def fail(self): + self.sendResponse(False) + + def sendResponse(self, success): + self.api.respond(self.ticket, success) + + return Result(self, response[0], response[1]) + + def respond(self, ticket, success): + json = opener.open(CaptchaTrader.RESPOND_URL, data={"is_correct":1 if success else 0, + "username":self.user, + "password":self.password, + "ticket":ticket}).read() + response = loads(json) + if response[0] < 0: + raise CaptchaTraderException(response[1]) + +if __name__ == "__main__": + ct = CaptchaTrader("", "") + print "credits", ct.getCredits() + + print "testing..." + + result = ct.submit(open("test_captcha.jpg", "rb")) + print "result", result.getResult() + if result.getResult() == "bettand trifting": + result.success() + print "captcha recognized" + else: + result.fail() + print "captcha not recognized" diff --git a/module/network/MultipartPostHandler.py b/module/network/MultipartPostHandler.py deleted file mode 100644 index 94aee0193..000000000 --- a/module/network/MultipartPostHandler.py +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -#### -# 02/2006 Will Holcomb -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# 7/26/07 Slightly modified by Brian Schneider -# in order to support unicode files ( multipart_encode function ) -""" -Usage: - Enables the use of multipart/form-data for posting forms - -Inspirations: - Upload files in python: - http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306 - urllib2_file: - Fabien Seisen: - -Example: - import MultipartPostHandler, urllib2, cookielib - - cookies = cookielib.CookieJar() - opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies), - MultipartPostHandler.MultipartPostHandler) - params = { "username" : "bob", "password" : "riviera", - "file" : open("filename", "rb") } - opener.open("http://wwww.bobsite.com/upload/", params) - -Further Example: - The main function of this file is a sample which downloads a page and - then uploads it to the W3C validator. -""" - -from urllib import urlencode -from urllib2 import BaseHandler, HTTPHandler, build_opener -import mimetools, mimetypes -from os import write, remove -from cStringIO import StringIO - -class Callable: - def __init__(self, anycallable): - self.__call__ = anycallable - -# Controls how sequences are uncoded. If true, elements may be given multiple values by -# assigning a sequence. -doseq = 1 - -class MultipartPostHandler(BaseHandler): - handler_order = HTTPHandler.handler_order - 10 # needs to run first - - def http_request(self, request): - data = request.get_data() - if data is not None and type(data) != str: - v_files = [] - v_vars = [] - try: - for(key, value) in data.items(): - if type(value) == file: - v_files.append((key, value)) - else: - v_vars.append((key, value)) - except TypeError: - systype, value, traceback = sys.exc_info() - raise TypeError, "not a valid non-string sequence or mapping object", traceback - - if len(v_files) == 0: - data = urlencode(v_vars, doseq) - else: - boundary, data = self.multipart_encode(v_vars, v_files) - - contenttype = 'multipart/form-data; boundary=%s' % boundary - if(request.has_header('Content-Type') - and request.get_header('Content-Type').find('multipart/form-data') != 0): - print "Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data') - request.add_unredirected_header('Content-Type', contenttype) - - request.add_data(data) - - return request - - def multipart_encode(vars, files, boundary = None, buf = None): - if boundary is None: - boundary = mimetools.choose_boundary() - if buf is None: - buf = StringIO() - for(key, value) in vars: - buf.write('--%s\r\n' % boundary) - buf.write('Content-Disposition: form-data; name="%s"' % key) - buf.write('\r\n\r\n' + value + '\r\n') - for(key, fd) in files: - #file_size = os.fstat(fd.fileno())[stat.ST_SIZE] - filename = fd.name.split('/')[-1] - contenttype = mimetypes.guess_type(filename)[0] or 'application/octet-stream' - buf.write('--%s\r\n' % boundary) - buf.write('Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (key, filename)) - buf.write('Content-Type: %s\r\n' % contenttype) - # buffer += 'Content-Length: %s\r\n' % file_size - fd.seek(0) - buf.write('\r\n' + fd.read() + '\r\n') - buf.write('--' + boundary + '--\r\n\r\n') - buf = buf.getvalue() - return boundary, buf - multipart_encode = Callable(multipart_encode) - - https_request = http_request - -def main(): - import tempfile, sys - - validatorURL = "http://validator.w3.org/check" - opener = build_opener(MultipartPostHandler) - - def validateFile(url): - temp = tempfile.mkstemp(suffix=".html") - write(temp[0], opener.open(url).read()) - params = { "ss" : "0", # show source - "doctype" : "Inline", - "uploaded_file" : open(temp[1], "rb") } - print opener.open(validatorURL, params).read() - remove(temp[1]) - - if len(sys.argv[1:]) > 0: - for arg in sys.argv[1:]: - validateFile(arg) - else: - validateFile("http://www.google.com") - -if __name__=="__main__": - main() diff --git a/module/plugins/Plugin.py b/module/plugins/Plugin.py index 8e48e0ff3..3630e5222 100644 --- a/module/plugins/Plugin.py +++ b/module/plugins/Plugin.py @@ -38,6 +38,8 @@ if os.name != "nt": from itertools import islice +from thread import start_new_thread + from module.utils import save_join def chunks(iterable, size): @@ -108,6 +110,7 @@ class Plugin(object): self.lastDownload = "" # location where the last call to download was saved self.lastCheck = None #re match of last checked matched self.js = self.core.js # js engine + self.ctresult = None self.html = None #some plugins store html code here @@ -218,15 +221,17 @@ class Plugin(object): def retry(self): """ begin again from the beginning """ + if self.ctresult: + self.self.ctresult.fail() raise Retry - def decryptCaptcha(self, url, get={}, post={}, cookies=False, forceUser=False): + def decryptCaptcha(self, url, get={}, post={}, cookies=False, forceUser=False, imgtype="jpg"): """ loads the catpcha and decrypt it or ask the user for input """ content = self.load(url, get=get, post=post, cookies=cookies) id = ("%.2f" % time())[-6:] - temp = open(join("tmp","tmpCaptcha_%s_%s" % (self.__name__, id)), "wb") + temp = open(join("tmp","tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "wb") temp.write(content) temp.close() @@ -245,17 +250,46 @@ class Plugin(object): ocr = Ocr() result = ocr.get_captcha(temp.name) else: + captchaManager = self.core.captchaManager task = captchaManager.newTask(self) - task.setCaptcha(content, None) #@TODO mimetype really needed? + task.setCaptcha(content, imgtype) task.setWaiting() + + ct = None + if self.core.config["captchatrader"]["username"] and self.core.config["captchatrader"]["password"]: + task.setWatingForUser(exclusive=True) + from module.lib.captchatrader import CaptchaTrader + ct = CaptchaTrader(self.core.config["captchatrader"]["username"], self.core.config["captchatrader"]["password"]) + if ct.getCredits < 10: + self.log.info("Not enough credits for CaptchaTrader") + task.setWaiting() + else: + self.log.info("Submitting to CaptchaTrader") + def threaded(ct): + cf = open(join("tmp","tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "rb") + try: + result = ct.submit(cf) + except: + self.log.warning("CaptchaTrader error!") + if self.core.debug: + from traceback import print_exc + print_exc() + ct = None + task.setWaiting() + else: + self.ctresult = result + task.setResult(result.getResult()) + task.setDone() + start_new_thread(threaded, (ct, )) + while not task.getStatus() == "done": if not self.core.isClientConnected(): task.removeTask() #temp.unlink(temp.name) - if has_plugin: + if has_plugin and not ct: self.fail(_("Pil and tesseract not installed and no Client connected for captcha decrypting")) - else: + elif not ct: self.fail(_("No Client connected for captcha decrypting")) if self.pyfile.abort: task.removeTask() diff --git a/module/plugins/ReCaptcha.py b/module/plugins/ReCaptcha.py index d29530a64..ec366695a 100644 --- a/module/plugins/ReCaptcha.py +++ b/module/plugins/ReCaptcha.py @@ -5,13 +5,13 @@ class ReCaptcha(): self.plugin = plugin def challenge(self, id): - js = self.plugin.req.load("http://api.recaptcha.net/challenge", get={"k":id}, cookies=True) + js = self.plugin.req.load("http://www.google.com/recaptcha/api/challenge", get={"k":id}, cookies=True) try: challenge = re.search("challenge : '(.*?)',", js).group(1) server = re.search("server : '(.*?)',", js).group(1) except: self.plugin.fail("recaptcha error") - result = self.plugin.decryptCaptcha("%simage"%server, get={"c":challenge}, cookies=True) + result = self.plugin.decryptCaptcha("%simage"%server, get={"c":challenge}, cookies=True, imgtype="jpg") return challenge, result diff --git a/module/plugins/container/CCF.py b/module/plugins/container/CCF.py index 90502c001..d7da2f93b 100644 --- a/module/plugins/container/CCF.py +++ b/module/plugins/container/CCF.py @@ -5,7 +5,7 @@ import re from urllib2 import build_opener from module.plugins.Container import Container -from module.network.MultipartPostHandler import MultipartPostHandler +from module.lib.MultipartPostHandler import MultipartPostHandler from os import makedirs from os.path import exists, join diff --git a/module/plugins/crypter/LofCc.py b/module/plugins/crypter/LofCc.py index cd3a6fe4d..3785ce600 100644 --- a/module/plugins/crypter/LofCc.py +++ b/module/plugins/crypter/LofCc.py @@ -23,7 +23,7 @@ class LofCc(Crypter): def decrypt(self, pyfile): html = self.req.load(self.pyfile.url, cookies=True) - m = re.search(r"src=\"http://api.recaptcha.net/challenge\?k=(.*?)\">", html) + m = re.search(r"src=\"http://www.google.com/recaptcha/api/challenge\?k=(.*?)\">", html) if not m: self.offline() diff --git a/module/plugins/crypter/SerienjunkiesOrg.py b/module/plugins/crypter/SerienjunkiesOrg.py index 193f6b417..7d637369e 100644 --- a/module/plugins/crypter/SerienjunkiesOrg.py +++ b/module/plugins/crypter/SerienjunkiesOrg.py @@ -131,7 +131,7 @@ class SerienjunkiesOrg(Crypter): self.retry() captchaUrl = "http://download.serienjunkies.org"+captchaTag["src"] - result = self.decryptCaptcha(str(captchaUrl)) + result = self.decryptCaptcha(str(captchaUrl), imgtype="png") sinp = form.find(attrs={"name":"s"}) self.req.lastUrl = url diff --git a/pyLoadCore.py b/pyLoadCore.py index d9bbc4040..f207bf2e5 100755 --- a/pyLoadCore.py +++ b/pyLoadCore.py @@ -776,7 +776,9 @@ class ServerMethods(): def get_task_status(self, tid): self.core.lastClientConnected = time.time() - return self.core.captchaManager.getTaskFromID(tid).getStatus() + t = self.core.captchaManager.getTaskFromID(tid) + if t: + return t.getStatus() def set_captcha_result(self, tid, result): self.core.lastClientConnected = time.time() -- cgit v1.2.3