From dd2b06fb71b384c291f1750a9a1ce2b160964c7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Setni=C4=8Dka?= Date: Mon, 16 Nov 2020 23:15:39 +0100 Subject: [PATCH] Resolving file-tracking links These links come from search. Part of the #12 and PR #11. --- ulozto_downloader/downloader.py | 2 +- ulozto_downloader/page.py | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/ulozto_downloader/downloader.py b/ulozto_downloader/downloader.py index 81ef5b0..9b0ac51 100644 --- a/ulozto_downloader/downloader.py +++ b/ulozto_downloader/downloader.py @@ -178,7 +178,7 @@ def download(self, url, parts=10, target_dir=""): sys.stdout.write("\033[?25l") # hide cursor self.cli_initialized = True print(colors.blue("File:\t\t") + colors.bold(page.filename)) - print(colors.blue("URL:\t\t") + url) + print(colors.blue("URL:\t\t") + page.url) print(colors.blue("Download type:\t") + download_type) print(colors.blue("Total size:\t") + colors.bold("{}MB".format(round(total_size / 1024**2, 2)))) print(colors.blue("Parts:\t\t") + "{} x {}MB".format(parts, round(part_size / 1024**2, 2))) diff --git a/ulozto_downloader/page.py b/ulozto_downloader/page.py index be28237..b3d4758 100644 --- a/ulozto_downloader/page.py +++ b/ulozto_downloader/page.py @@ -40,10 +40,6 @@ def __init__(self, url): self.url = url parsed_url = urlparse(url) - # Get file slug from URL - self.slug = parse_single(parsed_url.path, r'/file/([^\\]*)/') - if self.slug is None: - raise RuntimeError("Cannot parse file slug from Uloz.to URL") cookies = None # special case for Pornfile.cz run by Uloz.to - confirmation is needed @@ -54,13 +50,25 @@ def __init__(self, url): }) cookies = r.cookies - r = requests.get(url, cookies=cookies) + # If file is file-tracking link we need to get normal file link from it + if url.startswith('{uri.scheme}://{uri.netloc}/file-tracking/'.format(uri=parsed_url)): + r = requests.get(url, allow_redirects=False, cookies=cookies) + if 'Location' in r.headers: + self.url = r.headers['Location'] + parsed_url = urlparse(self.url) + + r = requests.get(self.url, cookies=cookies) self.baseURL = "{uri.scheme}://{uri.netloc}".format(uri=parsed_url) if r.status_code == 451: raise RuntimeError("File was deleted from Uloz.to due to legal reasons (status code 451)") elif r.status_code != 200: - raise RuntimeError("Uloz.to returned status code", r.status_code) + raise RuntimeError(f"Uloz.to returned status code {r.status_code}, file does not exist") + + # Get file slug from URL + self.slug = parse_single(parsed_url.path, r'/file/([^\\]*)/') + if self.slug is None: + raise RuntimeError("Cannot parse file slug from Uloz.to URL") self.body = r.text