diff options
| author | 2012-02-08 21:27:49 +0100 | |
|---|---|---|
| committer | 2012-02-08 21:27:49 +0100 | |
| commit | f226ac102cee63721fcbaffc60dcdf75c242d5e6 (patch) | |
| tree | c90ebaa2563c8126d37c85cc737e2962cfe7d5ca /module/plugins/hoster | |
| parent | Handle Oron TOS errors (diff) | |
| download | pyload-f226ac102cee63721fcbaffc60dcdf75c242d5e6.tar.xz | |
filefactory premium, uloz.to new url pattern
Diffstat (limited to 'module/plugins/hoster')
| -rw-r--r-- | module/plugins/hoster/FilefactoryCom.py | 115 | ||||
| -rw-r--r-- | module/plugins/hoster/UlozTo.py | 82 | 
2 files changed, 107 insertions, 90 deletions
| diff --git a/module/plugins/hoster/FilefactoryCom.py b/module/plugins/hoster/FilefactoryCom.py index 37b2bb7ce..17520a6c3 100644 --- a/module/plugins/hoster/FilefactoryCom.py +++ b/module/plugins/hoster/FilefactoryCom.py @@ -1,88 +1,95 @@  # -*- coding: utf-8 -*- -from __future__ import with_statement -  from module.network.RequestFactory import getURL  from module.plugins.Hoster import Hoster  from module.plugins.ReCaptcha import ReCaptcha +from module.utils import parseFileSize +from module.plugins.Plugin import chunks  import re -def getInfo(urls): -    result = [] +def checkFile(plugin, urls): +    file_info = [] +    url_dict = {}      for url in urls: -         -        # Get file info html -        # @TODO: Force responses in english language so current patterns will be right -        html = getURL(url) -        if re.search(FilefactoryCom.FILE_OFFLINE_PATTERN, html): -            result.append((url, 0, 1, url)) +        url_dict[re.search(plugin.__pattern__, url).group('id')] = (url, 0, 0, url) +    url_ids = url_dict.keys() +    urls = map(lambda url_id: 'http://www.filefactory.com/file/' + url_id, url_ids) -        # Name -        name = re.search(FilefactoryCom.FILE_NAME_PATTERN, html).group('name') -        m = re.search(FilefactoryCom.FILE_INFO_PATTERN, html) +    html = getURL("http://filefactory.com/tool/links.php", post = {"func": "links", "links": "\n".join(urls)}, decode=True)    -        # Size -        value = float(m.group('size')) -        units = m.group('units') -        pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units]  -        size = int(value*1024**pow) +    for m in re.finditer(plugin.LC_INFO_PATTERN, html): +        if m.group('id') in url_ids: +            url_dict[m.group('id')] = (m.group('name'), parseFileSize(m.group('size')), 2, url_dict[m.group('id')][3]) +             +    for m in re.finditer(plugin.LC_OFFLINE_PATTERN, html): +        if m.group('id') in url_ids: +            url_dict[m.group('id')] = (url_dict[m.group('id')][0], 0, 1, url_dict[m.group('id')][3]) -        # Return info -        result.append((name, size, 2, url)) -         -    yield result +    file_info = url_dict.values() +    return file_info +     class FilefactoryCom(Hoster):      __name__ = "FilefactoryCom"      __type__ = "hoster" -    __pattern__ = r"http://(www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+)" # URLs given out are often longer but this is the requirement -    __version__ = "0.3" +    __pattern__ = r"http://(?:www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+).*" # URLs given out are often longer but this is the requirement +    __version__ = "0.31"      __description__ = """Filefactory.Com File Download Hoster""" -    __author_name__ = ("paulking") +    __author_name__ = ("paulking", "zoidberg") +     +    LC_INFO_PATTERN = r'<tr class="(even|odd)">\s*<td>\s*<a href="http://www.filefactory.com/file/(?P<id>\w+)[^"]*">(?P<name>[^<]+)</a>\s*.*\s*</td>\s*<td>(?P<size>[0-9.]+ \w+)</td>' +    LC_OFFLINE_PATTERN = r'<li class="(even|odd)">\s*<div class="metadata">http://www.filefactory.com/file/(?P<id>\w+)/</div>'      FILE_OFFLINE_PATTERN = r'<title>File Not Found'      FILE_NAME_PATTERN = r'<span class="last">(?P<name>.*?)</span>'      FILE_INFO_PATTERN = r'<span>(?P<size>\d(\d|\.)*) (?P<units>..) file uploaded' +          FILE_CHECK_PATTERN = r'check:\'(?P<check>.*?)\''      CAPTCHA_KEY_PATTERN = r'Recaptcha.create\("(?P<recaptchakey>.*?)",'       WAIT_PATH_PATTERN = r'path:"(?P<path>.*?)"'      WAIT_PATTERN = r'id="startWait" value="(?P<wait>\d+)"'      FILE_URL_PATTERN = r'<a href="(?P<url>.*?)" id="downloadLinkTarget">' -         +                  def setup(self): -        self.multiDL = False +        self.multiDL = self.resumeDownloads = self.premium      def process(self, pyfile): -     -        self.pyfile = pyfile +        # Check file +        pyfile.name, pyfile.size, status, self.url = checkFile(self, [pyfile.url])[0]         +        if status != 2: self.offline() +        self.logDebug("File Name: %s Size: %d" % (pyfile.name, pyfile.size))  -        # Force responses language to US English -        self.req.cj.setCookie("filefactory.com", "ff_locale","") - -        # Load main page -        self.html = self.load(self.pyfile.url, ref=False, decode=True) - -        # Check offline -        if re.search(self.FILE_OFFLINE_PATTERN, self.html) is not None: -            self.offline() +        # Handle downloading +        url = self.checkDirectDownload(pyfile.url) +        if url: +            self.download(url) +        else:                 +            self.html = self.load(pyfile.url, decode = True) +                       +            if self.premium: +                self.handlePremium() +            else: +                self.handleFree() +               +    def checkDirectDownload(self, url): +        for i in range(5): +            header = self.load(url, just_header = True)            +            if 'location' in header: +                url = header['location'].strip()  +                if not url.startswith("http://"): +                    url = "http://www.filefactory.com" + url +                self.logDebug('URL: ' + url) +            elif 'content-disposition' in header: +                return url -        # File id -        self.file_id = re.match(self.__pattern__, self.pyfile.url).group('id') -        self.log.debug("%s: File id is [%s]" % (self.__name__, self.file_id)) -            -        # File name -        self.pyfile.name = re.search(self.FILE_NAME_PATTERN, self.html).group('name') - +        return False                                 +     +    def handleFree(self):                  # Check Id          self.check = re.search(self.FILE_CHECK_PATTERN, self.html).group('check')          self.log.debug("%s: File check code is [%s]" % (self.__name__, self.check)) - -        # Handle free downloading -        self.handleFree() -     -    def handleFree(self): -     +                  # Resolve captcha          self.log.debug("%s: File is captcha protected" % self.__name__)          id = re.search(self.CAPTCHA_KEY_PATTERN, self.html).group('recaptchakey') @@ -138,3 +145,9 @@ class FilefactoryCom(Hoster):          self.log.debug("%s: Wrong captcha" % self.__name__)          self.invalidCaptcha() +     +    def handlePremium(self): +        self.fail('Please enable direct downloads') +         +def getInfo(urls): +    for chunk in chunks(urls, 100): yield checkFile(FilefactoryCom, chunk) diff --git a/module/plugins/hoster/UlozTo.py b/module/plugins/hoster/UlozTo.py index 5f482e189..a67e52d4d 100644 --- a/module/plugins/hoster/UlozTo.py +++ b/module/plugins/hoster/UlozTo.py @@ -21,54 +21,57 @@ from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo  def convertDecimalPrefix(m):      # decimal prefixes used in filesize and traffic -    return ("%%.%df" % {'k':3,'M':6,'G':9}[m.group(2)] % float(m.group(1))).replace('.','')      +    return ("%%.%df" % {'k':3,'M':6,'G':9}[m.group(2)] % float(m.group(1))).replace('.','')  class UlozTo(SimpleHoster):      __name__ = "UlozTo"      __type__ = "hoster" -    __pattern__ = r"http://(\w*\.)?(uloz\.to|ulozto\.(cz|sk|net)|bagruj.cz|zachowajto.pl)/(?:live/)?(?P<id>\d+/[^/?]*)" -    __version__ = "0.83" +    __pattern__ = r"http://(\w*\.)?(uloz\.to|ulozto\.(cz|sk|net)|bagruj.cz|zachowajto.pl)/(?:live/)?(?P<id>\w+/[^/?]*)" +    __version__ = "0.84"      __description__ = """uloz.to"""      __author_name__ = ("zoidberg") -    FILE_NAME_PATTERN = r'<a href="#download" class="jsShowDownload">(?P<N>[^<]+)</a>'  -    FILE_SIZE_PATTERN = r'<span id="fileSize">(?P<S>[^<]+)</span>'    -    FILE_SIZE_REPLACEMENTS = [('([0-9.]+)\s([kMG])B', convertDecimalPrefix)]        -    FILE_OFFLINE_PATTERN = ur'<title>(404 - Page not found|Stránka nenalezena|Nie można wyświetlić strony)</title>' -     -    PASSWD_PATTERN = r'<input type="password" class="text" name="file_password" id="frmfilepasswordForm-file_password" />' -    VIPLINK_PATTERN = r'<a href="[^"]*\?disclaimer=1" class="linkVip">'     +    FILE_NAME_PATTERN = r'<a href="#download" class="jsShowDownload">(?P<N>[^<]+)</a>' +    FILE_SIZE_PATTERN = r'<span id="fileSize">(?P<S>[^<]+)</span>' +    FILE_INFO_PATTERN = r'<p>File <strong>(?P<N>[^<]+)</strong> is password protected</p>' +    FILE_OFFLINE_PATTERN = r'<title>404 - Page not found</title>|<h1 class="h1">File was banned</h1>' +    FILE_SIZE_REPLACEMENTS = [('([0-9.]+)\s([kMG])B', convertDecimalPrefix)] +    FILE_URL_REPLACEMENTS = [(r"(?<=http://)([^/]+)", "www.ulozto.net")] + +    PASSWD_PATTERN = r'<div class="passwordProtectedFile">' +    VIPLINK_PATTERN = r'<a href="[^"]*\?disclaimer=1" class="linkVip">'      FREE_URL_PATTERN = r'<div class="freeDownloadForm"><form action="([^"]+)"'      PREMIUM_URL_PATTERN = r'<div class="downloadForm"><form action="([^"]+)"'      CAPTCHA_PATTERN = r'<img class="captcha" src="(.*?(\d+).png)" alt="" />' -     -    def process(self, pyfile):         -        self.url = "http://www.ulozto.net/" + re.match(self.__pattern__, pyfile.url).group('id')    -         -        self.html = self.load(self.url, decode=True) -         -        # password protected links -        passwords = self.getPassword().splitlines()        + +    def setup(self): +        self.multiDL = self.resumeDownload = True + +    def process(self, pyfile): +        pyfile.url = re.sub(r"(?<=http://)([^/]+)", "www.ulozto.net", pyfile.url) +        self.html = self.load(pyfile.url, decode = True, cookies = False) + +        passwords = self.getPassword().splitlines()          while self.PASSWD_PATTERN in self.html:              if passwords:                  password = passwords.pop(0)                  self.logInfo("Password protected link, trying " + password) -                self.html = self.load(self.url, get = {"do": "filepasswordForm-submit"}, post={"file_password": password, "fpwdsend": 'Odeslat'}, cookies=True) +                self.html = self.load(pyfile.url, get = {"do": "passwordProtectedForm-submit"}, +                    post={"password": password, "password_send": 'Send'}, cookies=True)              else:                  self.fail("No or incorrect password") -         -        self.file_info = self.getFileInfo() -                 -        # adult content     +          if re.search(self.VIPLINK_PATTERN, self.html): -            self.html = self.load(self.url, get={"disclaimer": "1"}) -         +            self.html = self.load(pyfile.url, get={"disclaimer": "1"}) + +        self.file_info = self.getFileInfo() +          if self.premium and self.checkTrafficLeft():              self.handlePremium() -        else:  +        else:              self.handleFree() -             -    def handleFree(self):     + +    def handleFree(self):          parsed_url = self.findDownloadURL(premium=False)          # get and decrypt captcha @@ -82,21 +85,21 @@ class UlozTo(SimpleHoster):              captcha_url, captcha_id = found.groups()              captcha_text = self.decryptCaptcha(captcha_url) -         +          self.log.debug('CAPTCHA_URL:' + captcha_url + ' CAPTCHA ID:' + captcha_id + ' CAPTCHA TEXT:' + captcha_text) -        # download and check         +        # download and check          self.download(parsed_url, post={"captcha[id]": captcha_id, "captcha[text]": captcha_text, "freeDownload": "Download"}, cookies=True) -        self.doCheckDownload()    -         +        self.doCheckDownload() +          self.setStorage("captcha_id", captcha_id)          self.setStorage("captcha_text", captcha_text) -     +      def handlePremium(self):          parsed_url = self.findDownloadURL(premium=True)          self.download(parsed_url, post={"download": "Download"})          self.doCheckDownload() -         +      def findDownloadURL(self, premium=False):          msg = "%s link" % ("Premium" if premium else "Free")          found = re.search(self.PREMIUM_URL_PATTERN if premium else self.FREE_URL_PATTERN, self.html) @@ -104,13 +107,13 @@ class UlozTo(SimpleHoster):          parsed_url = "http://www.ulozto.net" + found.group(1)          self.logDebug("%s: %s" % (msg, parsed_url))          return parsed_url -     +      def doCheckDownload(self):          check = self.checkDownload({              "wrong_captcha": re.compile(self.CAPTCHA_PATTERN),              "offline": re.compile(self.FILE_OFFLINE_PATTERN),              "passwd": self.PASSWD_PATTERN, -            "paralell_dl": u'<h2 class="center">Z Vašeho počítače se již stahuje</h2>' +            "paralell_dl": re.compile(r'<title>Uloz.to - Ji. stahuje.</title>')          })          if check == "wrong_captcha": @@ -123,8 +126,9 @@ class UlozTo(SimpleHoster):          elif check == "passwd":              self.fail("Wrong password")          elif check == "paralell_dl": -            self.setWait(600, True) +            self.multiDL = False +            self.setWait(300, True)              self.wait() -            self.retry()     +            self.retry() -getInfo = create_getInfo(UlozTo)        
\ No newline at end of file +getInfo = create_getInfo(UlozTo)
\ No newline at end of file | 
