Resolving file-tracking links

These links come from search. Part of the #12 and PR #11.
setnicka · Nov 16, 2020 · dd2b06f · dd2b06f
1 parent d08c790
commit dd2b06f
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 7 deletions.
diff --git a/ulozto_downloader/downloader.py b/ulozto_downloader/downloader.py
@@ -178,7 +178,7 @@ def download(self, url, parts=10, target_dir=""):
         sys.stdout.write("\033[?25l")  # hide cursor
         self.cli_initialized = True
         print(colors.blue("File:\t\t") + colors.bold(page.filename))
-        print(colors.blue("URL:\t\t") + url)
+        print(colors.blue("URL:\t\t") + page.url)
         print(colors.blue("Download type:\t") + download_type)
         print(colors.blue("Total size:\t") + colors.bold("{}MB".format(round(total_size / 1024**2, 2))))
         print(colors.blue("Parts:\t\t") + "{} x {}MB".format(parts, round(part_size / 1024**2, 2)))

diff --git a/ulozto_downloader/page.py b/ulozto_downloader/page.py
@@ -40,10 +40,6 @@ def __init__(self, url):
 
         self.url = url
         parsed_url = urlparse(url)
-        # Get file slug from URL
-        self.slug = parse_single(parsed_url.path, r'/file/([^\\]*)/')
-        if self.slug is None:
-            raise RuntimeError("Cannot parse file slug from Uloz.to URL")
 
         cookies = None
         # special case for Pornfile.cz run by Uloz.to - confirmation is needed
@@ -54,13 +50,25 @@ def __init__(self, url):
             })
             cookies = r.cookies
 
-        r = requests.get(url, cookies=cookies)
+        # If file is file-tracking link we need to get normal file link from it
+        if url.startswith('{uri.scheme}://{uri.netloc}/file-tracking/'.format(uri=parsed_url)):
+            r = requests.get(url, allow_redirects=False, cookies=cookies)
+            if 'Location' in r.headers:
+                self.url = r.headers['Location']
+                parsed_url = urlparse(self.url)
+
+        r = requests.get(self.url, cookies=cookies)
         self.baseURL = "{uri.scheme}://{uri.netloc}".format(uri=parsed_url)
 
         if r.status_code == 451:
             raise RuntimeError("File was deleted from Uloz.to due to legal reasons (status code 451)")
         elif r.status_code != 200:
-            raise RuntimeError("Uloz.to returned status code", r.status_code)
+            raise RuntimeError(f"Uloz.to returned status code {r.status_code}, file does not exist")
+
+        # Get file slug from URL
+        self.slug = parse_single(parsed_url.path, r'/file/([^\\]*)/')
+        if self.slug is None:
+            raise RuntimeError("Cannot parse file slug from Uloz.to URL")
 
         self.body = r.text