From 64c2f6e7d91cc6502fe31ffe8c3bd032f6e415c1 Mon Sep 17 00:00:00 2001 From: Paul King Date: Sat, 7 May 2011 00:44:17 +0100 Subject: Fix 294 - refactor plugin --- module/plugins/hoster/FilesonicCom.py | 307 +++++++++++++++++++--------------- 1 file changed, 168 insertions(+), 139 deletions(-) (limited to 'module/plugins/hoster/FilesonicCom.py') diff --git a/module/plugins/hoster/FilesonicCom.py b/module/plugins/hoster/FilesonicCom.py index 03bba3a64..b08c1d192 100644 --- a/module/plugins/hoster/FilesonicCom.py +++ b/module/plugins/hoster/FilesonicCom.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import re +import string from module.plugins.Hoster import Hoster from module.plugins.ReCaptcha import ReCaptcha @@ -10,156 +11,190 @@ from module.plugins.Plugin import chunks from module.network.RequestFactory import getURL from module.utils import decode +try: + from json import loads as json_loads +except ImportError: # pragma: no cover + from module.lib.simplejson import loads as json_loads def getInfo(urls): - for chunk in chunks(urls, 15): - # Always use "filesonic.com" for file's url - # mod_link contains the right url to check - orig_link = "".join(chunk) - mod_link = orig_link - url_datas = re.search("(.+)(filesonic\..+)/file/(.+)", orig_link) - if url_datas: - mod_link = url_datas.group(1) + "filesonic.com" + "/file/" + url_datas.group(3) - - page = getURL("http://www." + getDomain() + "/link-checker", post={"links": mod_link}).decode("utf8", "ignore") - - found = re.findall(r'\s+([^<]+)\s+([^<]+)\s+(.+) MB\s+\s+([^<]+)
\s+
\s+\s+', page, re.MULTILINE) - result = [] - for src, name, size, status in found: - result.append((name, float(size)*1024*1024, 2 if status == "Available" else 1, src)) + for chunk in chunks(urls, 20): + result=[] + ids=dict() + for url in chunk: + id = getId(url) + if id: + ids[id]=url + else: + result.append((none,0,1,url)) + + if len(ids) > 0: + check_url="http://api.filesonic.com/link?method=getInfo&format=json&ids=" + ",".join(ids.keys()) + response = json_loads(getURL(check_url).decode("utf8","ignore")) + for item in response["FSApi_Link"]["getInfo"]["response"]["links"]: + if item["status"] != "AVAILABLE": + result.append((None,0,1,ids[item["id"]])) + else: + result.append((item["filename"],item["size"],2,ids[str(item["id"])])) yield result - -def getDomain(): - html = decode(getURL("http://api.filesonic.com/utility?method=getFilesonicDomainForCurrentIp&format=xml")) - return re.search(r"response>.*?(filesonic\..*?).*?)(?P.(filesonic|sharingmatrix)\..+?)(?P/.*)' + FILE_ID_PATTERN = r'/file/(?P([a-z][0-9]+/)?[0-9]+)(/.*)?' + FILE_LINK_PATTERN = r'

Start download' + WAIT_TIME_PATTERN = r'countDownDelay = (?P\d+)' + WAIT_TM_PATTERN = r"name='tm' value='(.*?)' />" + WAIT_TM_HASH_PATTERN = r"name='tm_hash' value='(.*?)' />" + CAPTCHA_TYPE1_PATTERN = r'Recaptcha.create\("(.*?)",' + CAPTCHA_TYPE2_PATTERN = r'id="recaptcha_image">Download (.*?) for free on Filesonic.com', self.html) - if name: - self.pyfile.name = name.group(1) - else: - self.offline() - - if 'The page you are trying to access was not found.' in self.html: - self.offline() - if self.account: - self.download(pyfile.url) + self.pyfile.url = self.checkFile(self.pyfile.url) + + if self.premium: + self.downloadPremium() + else: + self.downloadFree() + + def checkFile(self,url): + id = getId(url) + self.log.debug("%s: file id is %s" % (self.__name__,id)) + if id: + # Use the api to check the current status of the file and fixup data + check_url="http://api.filesonic.com/link?method=getInfo&format=json&ids=%s" % id + result = json_loads(self.load(check_url).decode("utf8","ignore")) + item = result["FSApi_Link"]["getInfo"]["response"]["links"][0] + self.log.debug("%s: api check returns %s" % (self.__name__,item)) + + if item["status"] != "AVAILABLE": + self.offline() + if item["is_password_protected"] != 0: + self.fail("This file is password protected") + if item["is_premium_only"] != 0 and not self.premium: + self.fail("need premium account for file") + self.pyfile.name=item["filename"] + + # Fix the url and resolve the domain to the correct regional variation + url=item["url"] + urlparts = re.search(self.URL_DOMAIN_PATTERN,url) + if urlparts: + url = urlparts.group("prefix")+self.getDomain()+urlparts.group("suffix") + self.log.debug("%s: localised url is %s" % (self.__name__,url)) + return url else: - self.download(self.getFileUrl()) + self.fail("Invalid URL") - def getFileUrl(self): - part_1_link = re.search("(.+/file/\d+/)", self.url).group(1) - link = part_1_link + re.search(r'href="(.*?start=1.*?)"', self.html).group(1) - self.html = self.load(link) + def getDomain(self): + result = json_loads(self.load("http://api.filesonic.com/utility?method=getFilesonicDomainForCurrentIp&format=json").decode("utf8","ignore")) + self.log.debug("%s: response to get domain %s" % (self.__name__,result)) + return result["FSApi_Utility"]["getFilesonicDomainForCurrentIp"]["response"] + + def downloadPremium(self): + self.log.debug("%s: Premium download" % self.__name__) + self.download(self.pyfile.url) + + def downloadFree(self): + self.log.debug("%s: Free download" % self.__name__) + # Get initial page + self.html = self.load(self.pyfile.url) + url = self.pyfile.url + "?start=1" + self.html = self.load(url) self.handleErrors() - realLinkRegexp = "

Start download now!

" - url = re.search(realLinkRegexp, self.html) - - if not url: - if "This file is available for premium users only." in self.html: - self.fail("Need premium account.") - - countDownDelay = re.search("countDownDelay = (\\d+)", self.html) - if countDownDelay: - wait_time = int(countDownDelay.group(1)) - - if wait_time > 300: - self.wantReconnect = True - - self.setWait(wait_time) - self.log.info("%s: Waiting %d seconds." % (self.__name__, wait_time)) - self.wait() - - tm = re.search("name='tm' value='(.*?)' />", self.html) - tm_hash = re.search("name='tm_hash' value='(.*?)' />", self.html) - - if tm and tm_hash: - tm = tm.group(1) - tm_hash = tm_hash.group(1) - else: - self.html = self.load(link) + finalUrl = re.search(self.FILE_LINK_PATTERN, self.html) + + if not finalUrl: + self.doWait(url) + self.doWait(url) + + chall = re.search(self.CAPTCHA_TYPE1_PATTERN, self.html) + chall2 = re.search(self.CAPTCHA_TYPE2_PATTERN, self.html) + if chall or chall2: + for i in range(5): + + re_captcha = ReCaptcha(self) + if chall: + self.log.debug("%s: Captcha type1" % self.__name__) + challenge, result = re_captcha.challenge(chall.group(1)) + else: + self.log.debug("%s: Captcha type2" % self.__name__) + server = chall2.group(1) + challenge = chall2.group(2) + result = re_captcha.result(server, challenge) + + postData = {"recaptcha_challenge_field": challenge, + "recaptcha_response_field" : result} + + self.html = self.load(url, post=postData) self.handleErrors() - url = re.search(realLinkRegexp, self.html) - if not url: - if "This file is available for premium users only." in self.html: - self.fail("Need premium account.") - - countDownDelay = re.search("countDownDelay = (\\d+)", self.html) - if countDownDelay: - wait_time = int(countDownDelay.group(1)) - - if wait_time > 300: - self.wantReconnect = True - - self.setWait(wait_time) - self.log.info("%s: Waiting %d seconds." % (self.__name__, wait_time)) - self.wait() - - tm = re.search("name='tm' value='(.*?)' />", self.html).group(1) - tm_hash = re.search("name='tm_hash' value='(.*?)' />", self.html).group(1) - - self.html = self.load(self.url + "?start=1", post={"tm":tm,"tm_hash":tm_hash}) - + chall = re.search(self.CAPTCHA_TYPE1_PATTERN, self.html) + chall2 = re.search(self.CAPTCHA_TYPE2_PATTERN, self.html) + + if chall or chall2: + self.invalidCaptcha() + else: + self.correctCaptcha() + break + + finalUrl = re.search(self.FILE_LINK_PATTERN, self.html) + + if not finalUrl: + self.fail("Couldn't find free download link") + + self.log.debug("%s: got download url %s" % (self.__name__, finalUrl.group(1))) + self.download(finalUrl.group(1)) + + def doWait(self, url): + # If the current page requires us to wait then wait and move to the next page as required + waitSearch = re.search(self.WAIT_TIME_PATTERN, self.html) + if waitSearch: + wait = int(waitSearch.group("wait")) + if wait > 300: + self.wantReconnect = True + + self.setWait(wait) + self.log.debug("%s: Waiting %d seconds." % (self.__name__, wait)) + self.wait() + + tm = re.search(self.WAIT_TM_PATTERN, self.html) + tm_hash = re.search(self.WAIT_TM_HASH_PATTERN, self.html) + + if tm and tm_hash: + tm = tm.group(1) + tm_hash = tm_hash.group(1) + self.html = self.load(url, post={"tm":tm,"tm_hash":tm_hash}) + self.handleErrors() + else: + self.html = self.load(url) self.handleErrors() - - - if "Please Enter Password" in self.html: - self.fail("implement need pw") - - chall = re.search(r'Recaptcha.create\("(.*?)",', self.html) - for i in range(5): - if not chall: break - - re_captcha = ReCaptcha(self) - challenge, result = re_captcha.challenge(chall.group(1)) - - postData = {"recaptcha_challenge_field": challenge, - "recaptcha_response_field" : result} - - self.html = self.load(link, post=postData) - chall = re.search(r'Recaptcha.create\("(.*?)",', self.html) - - if chall: - self.invalidCaptcha() - else: - self.correctCaptcha() - - re_url = re.search(realLinkRegexp, self.html) - if re_url: - url = re_url.group(1) - - if not url: - self.fail("Plugin failed") - - return url - - def convertURL(self, url): - id = re.search("/file/([0-9]+(/.+)?)", url) - if not id: - id = re.search("/file/[a-z0-9]+/([0-9]+(/.+)?)", url) - result = "http://www.filesonic.com/file/" + id.group(1) - return self.getRightUrl(result) def handleErrors(self): + if "This file is available for premium users only." in self.html: + self.fail("need premium account for file") + if "The file that you're trying to download is larger than" in self.html: self.fail("need premium account for file") @@ -168,21 +203,15 @@ class FilesonicCom(Hoster): if "Free user can not download files" in self.html: self.fail("need premium account for file") - + if "Download session in progress" in self.html: self.fail("already downloading") - + if "This file is password protected" in self.html: - self.fail("This file is password protected, please one.") - + self.fail("This file is password protected") + if "An Error Occurred" in self.html: self.fail("A server error occured.") if "This file was deleted" in self.html: self.offline() - - def getRightUrl(self, url): - part_2 = re.search("http://www..+(/file.+)", url) - if not part_2: - part_2 = re.search("http://.+(/file.+)", url) - return "http://www.%s%s" % (getDomain(), part_2.group(1)) -- cgit v1.2.3