FilefactoryCom: plugin rewritten

(see also bug #70)
author: Stefano <l.stickell@yahoo.it> 2013-04-07 22:31:51 +0200
committer: Stefano <l.stickell@yahoo.it> 2013-04-07 22:31:51 +0200
commit: f516aaecff9d4efa8a60af521b4e1c1965a1a249 (patch)
tree: 57103c43979a6b7f1519c6fbdb726a4be7717c01 /module
parent: FilefactoryCom: fixes #70 (diff)
download: pyload-f516aaecff9d4efa8a60af521b4e1c1965a1a249.tar.xz
1 files changed, 90 insertions, 128 deletions
diff --git a/module/plugins/hoster/FilefactoryCom.py b/module/plugins/hoster/FilefactoryCom.py
index b3eb4c865..e92c1505d 100644
--- a/module/plugins/hoster/FilefactoryCom.py
+++ b/module/plugins/hoster/FilefactoryCom.py
@@ -1,159 +1,121 @@
 # -*- coding: utf-8 -*-
-from module.network.RequestFactory import getURL
-from module.plugins.Hoster import Hoster
-from module.plugins.ReCaptcha import ReCaptcha
-from module.utils import parseFileSize
-from module.plugins.Plugin import chunks
-from module.common.json_layer import json_loads
 
-import re
+############################################################################
+# This program is free software: you can redistribute it and/or modify     #
+# it under the terms of the GNU Affero General Public License as           #
+# published by the Free Software Foundation, either version 3 of the       #
+# License, or (at your option) any later version.                          #
+#                                                                          #
+# This program is distributed in the hope that it will be useful,          #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of           #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
+# GNU Affero General Public License for more details.                      #
+#                                                                          #
+# You should have received a copy of the GNU Affero General Public License #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.    #
+############################################################################
 
 # Test links (random.bin):
 # http://www.filefactory.com/file/ymxkmdud2o3/n/random.bin
 
-def checkFile(plugin, urls):
-    url_dict = {}
-    
+import re
+
+from module.plugins.internal.SimpleHoster import SimpleHoster
+from module.network.RequestFactory import getURL
+from module.utils import parseFileSize
+
+
+def getInfo(urls):
+    file_info = list()
+    list_ids = dict()
+
+    # Create a dict id:url. Will be used to retrieve original url
     for url in urls:
-        url_dict[re.search(plugin.__pattern__, url).group('id')] = (url, 0, 0, url)
-    url_ids = url_dict.keys()
-    urls = map(lambda url_id: 'http://www.filefactory.com/file/' + url_id, url_ids)
-
-    html = getURL("http://www.filefactory.com/tool/links.php", post = {"func": "links", "links": "\n".join(urls)}, decode=True)   
-        
-    for m in re.finditer(plugin.LC_INFO_PATTERN, html):
-        if m.group('id') in url_ids:
-            url_dict[m.group('id')] = (m.group('name'), parseFileSize(m.group('size')), 2, url_dict[m.group('id')][3])
-            
-    for m in re.finditer(plugin.LC_OFFLINE_PATTERN, html):
-        if m.group('id') in url_ids:
-            url_dict[m.group('id')] = (url_dict[m.group('id')][0], 0, 1, url_dict[m.group('id')][3])
-    
-    file_info = url_dict.values()
-    
+        m = re.search(FilefactoryCom.__pattern__, url)
+        list_ids[m.group('id')] = url
+
+    # WARN: There could be a limit of urls for request
+    post_data = {'func': 'links', 'links': '\n'.join(urls)}
+    rep = getURL('http://www.filefactory.com/tool/links.php', post=post_data, decode=True)
+
+    # Online links
+    for m in re.finditer(
+            r'innerText">\s*<h1 class="name">(?P<N>.+) \((?P<S>[\w.]+) (?P<U>\w+)\)</h1>\s*<p>http://www.filefactory.com/file/(?P<ID>\w+).*</p>\s*<p class="hidden size">',
+            rep):
+        file_info.append((m.group('N'), parseFileSize(m.group('S'), m.group('U')), 2, list_ids[m.group('ID')]))
+
+    # Offline links
+    for m in re.finditer(
+            r'innerText">\s*<h1>(http://www.filefactory.com/file/(?P<ID>\w+)/)</h1>\s*<p>\1</p>\s*<p class="errorResponse">Error: file not found</p>',
+            rep):
+        file_info.append((list_ids[m.group('ID')], 0, 1, list_ids[m.group('ID')]))
+
     return file_info
-   
-class FilefactoryCom(Hoster):
+
+
+class FilefactoryCom(SimpleHoster):
     __name__ = "FilefactoryCom"
     __type__ = "hoster"
-    __pattern__ = r"http://(?:www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+).*" # URLs given out are often longer but this is the requirement
-    __version__ = "0.37"
+    __pattern__ = r"https?://(?:www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+)"
+    __version__ = "0.38"
     __description__ = """Filefactory.Com File Download Hoster"""
-    __author_name__ = ("paulking", "zoidberg")
-    
-    LC_INFO_PATTERN = r'<h1 class="name">(?P<name>[^<]+) \((?P<size>[0-9.]+ \w+)\)</h1>\s*<p>http://www.filefactory.com/file/(?P<id>\w+)/'
-    LC_OFFLINE_PATTERN = r'<p>http://www.filefactory.com/file/(?P<id>\w+)/</p>\s*<p class="errorResponse">'
- 
+    __author_name__ = ("stickell")
+    __author_mail__ = ("l.stickell@yahoo.it")
+
+    FILE_INFO_PATTERN = r'(?P<N>\S+)\s*</span>\s*</h1>\s*<h2>(?P<S>[\w.]+) (?P<U>\w+) file uploaded'
     FILE_OFFLINE_PATTERN = r'<title>File Not Found'
-    FILE_NAME_PATTERN = r'<span class="last">(?P<name>.*?)</span>'
-    FILE_INFO_PATTERN = r'<span>(?P<size>\d(\d|\.)*) (?P<units>..) file uploaded'
-    
-    FILE_CHECK_PATTERN = r'check:\s*\'(?P<check>.*?)\''
-    CAPTCHA_KEY_PATTERN = r'Recaptcha.create\(\s*"(.*?)",' 
-    WAIT_PATTERN = r'id="startWait" value="(?P<wait>\d+)"'
-    FILE_URL_PATTERN = r'<p[^>]*?id="downloadLinkTarget"[^>]*>\s*<a href="(?P<url>.*?)"'
-
-            
-    def setup(self):
-        self.multiDL = self.resumeDownloads = self.premium
 
     def process(self, pyfile):
-        # Check file
-        pyfile.name, pyfile.size, status, self.url = checkFile(self, [pyfile.url])[0]     
-        if status != 2: self.offline()
-        self.logDebug("File Name: %s Size: %d" % (pyfile.name, pyfile.size)) 
-        
-        # Handle downloading
-        url = self.checkDirectDownload(pyfile.url)
-        if url:
-            self.download(url)
-        else:                
-            self.html = self.load(pyfile.url, decode = True)
-                      
-            if self.premium:
-                self.handlePremium()
-            else:
-                self.handleFree()
-              
-    def checkDirectDownload(self, url):
-        for i in range(5):
-            header = self.load(url, just_header = True)           
-            if 'location' in header:
-                url = header['location'].strip() 
-                if not url.startswith("http://"):
-                    url = "http://www.filefactory.com" + url
-                self.logDebug('URL: ' + url)
-            elif 'content-disposition' in header:
-                return url
-        
-        return False                                
-    
+        if self.premium and (not self.SH_CHECK_TRAFFIC or self.checkTrafficLeft()):
+            self.handlePremium()
+        else:
+            self.handleFree()
+
     def handleFree(self):
+        self.html = self.load(self.pyfile.url, decode=True)
         if "Currently only Premium Members can download files larger than" in self.html:
             self.fail("File too large for free download")
         elif "All free download slots on this server are currently in use" in self.html:
             self.retry(50, 900, "All free slots are busy")
-             
-        url = re.search(r"document\.location\.host \+\s*'(.+)';", self.html).group(1)
-        if not url.startswith('"http://"'):
-            url = 'http://www.filefactory.com' + url
+
+        url = re.search(r"document\.location\.host \+\s*'(.+)';", self.html)
+        if not url:
+            self.parseError('Unable to detect free link')
+        url = 'http://www.filefactory.com' + url.group(1)
         self.html = self.load(url, decode=True)
 
-        direct = re.search(r'data-href-direct="(.*)" class="button', self.html).group(1)
-        waittime = re.search(r'id="startWait" value="(\d+)"', self.html).group(1)
-        self.setWait(waittime)
+        waittime = re.search(r'id="startWait" value="(\d+)"', self.html)
+        if not waittime:
+            self.parseError('Unable to detect wait time')
+        self.setWait(int(waittime.group(1)))
         self.wait()
 
-        # # Resolve captcha
-        # found = re.search(self.CAPTCHA_KEY_PATTERN, self.html)
-        # recaptcha_key = found.group(1) if found else "6LeN8roSAAAAAPdC1zy399Qei4b1BwmSBSsBN8zm"
-        # recaptcha = ReCaptcha(self)
-        #
-        # # Try up to 5 times
-        # for i in range(5):
-        #     challenge, code = recaptcha.challenge(recaptcha_key)
-        #     response = json_loads(self.load("http://www.filefactory.com/file/checkCaptcha.php",
-        #                     post={"check" : self.check, "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : code}))
-        #     if response['status'] == 'ok':
-        #         self.correctCaptcha()
-        #         break
-        #     else:
-        #         self.invalidCaptcha()
-        # else:
-        #     self.fail("No valid captcha after 5 attempts")
-        #
-        # # This will take us to a wait screen
-        # waiturl = "http://www.filefactory.com" + response['path']
-        # self.logDebug("Fetching wait with url [%s]" % waiturl)
-        # waithtml = self.load(waiturl, decode=True)
-        # found = re.search(r'<a href="(http://www.filefactory.com/dlf/.*?)"', waithtml)
-        # waithtml = self.load(found.group(1), decode=True)
-        #
-        # # Find the wait value and wait
-        # wait = int(re.search(self.WAIT_PATTERN, waithtml).group('wait'))
-        # self.logDebug("Waiting %d seconds." % wait)
-        # self.setWait(wait, True)
-        # self.wait()
-        #
-        # # Now get the real download url and retrieve the file
-        # url = re.search(self.FILE_URL_PATTERN,waithtml).group('url')
-        # # this may either download our file or forward us to an error page
-        # self.logDebug("Download URL: %s" % url)
-        self.download(direct)
-        
+        direct = re.search(r'data-href-direct="(.*)" class="button', self.html)
+        if not direct:
+            self.parseError('Unable to detect free direct link')
+        direct = direct.group(1)
+        self.logDebug('DIRECT LINK: ' + direct)
+        self.download(direct, disposition=True)
+
         check = self.checkDownload({"multiple": "You are currently downloading too many files at once.",
                                     "error": '<div id="errorMessage">'})
 
         if check == "multiple":
-            self.setWait(15*60)
             self.logDebug("Parallel downloads detected; waiting 15 minutes")
-            self.wait()
-            self.retry()
+            self.retry(wait_time=15 * 60, reason='Parallel downloads')
         elif check == "error":
             self.fail("Unknown error")
-    
+
     def handlePremium(self):
-        self.fail('Please enable direct downloads')
-        
-def getInfo(urls):
-    for chunk in chunks(urls, 100): yield checkFile(FilefactoryCom, chunk)
+        header = self.load(self.pyfile.url, just_header=True)
+        if 'location' in header:
+            url = header['location'].strip()
+            if not url.startswith("http://"):
+                url = "http://www.filefactory.com" + url
+        elif 'content-disposition' in header:
+            url = self.pyfile.url
+        else:
+            self.parseError('Unable to detect premium direct link')
+
+        self.logDebug('DIRECT PREMIUM LINK: ' + url)
+        self.download(url, disposition=True)
author	Stefano <l.stickell@yahoo.it>	2013-04-07 22:31:51 +0200
committer	Stefano <l.stickell@yahoo.it>	2013-04-07 22:31:51 +0200
commit	f516aaecff9d4efa8a60af521b4e1c1965a1a249 (patch)
tree	57103c43979a6b7f1519c6fbdb726a4be7717c01 /module
parent	FilefactoryCom: fixes #70 (diff)
download	pyload-f516aaecff9d4efa8a60af521b4e1c1965a1a249.tar.xz