From b64e8197b95a5d35eaf062a2754c7c67bc832b80 Mon Sep 17 00:00:00 2001 From: Peter Taylor Date: Thu, 16 Mar 2023 08:27:43 +0000 Subject: [PATCH] Fix Path handling (#72) * Fix Path handling * Port to pathlib * Replace with re.sub * Tidied regular expression * Ensure escaping is done on single path level at a time * Version bump to 2.3.0 * pylint & black fixes --- itchiodl/__init__.py | 2 ++ itchiodl/game.py | 74 +++++++++++++++++++++----------------------- itchiodl/library.py | 23 +++++++++----- itchiodl/utils.py | 4 +-- pyproject.toml | 2 +- 5 files changed, 56 insertions(+), 49 deletions(-) diff --git a/itchiodl/__init__.py b/itchiodl/__init__.py index e2e7914..c80181a 100644 --- a/itchiodl/__init__.py +++ b/itchiodl/__init__.py @@ -1,3 +1,5 @@ +# pylint: disable=consider-using-from-import +import itchiodl.utils as utils from .login import LoginWeb, LoginAPI from .bundle import Bundle from .library import Library diff --git a/itchiodl/game.py b/itchiodl/game.py index 1f904b2..3937c49 100644 --- a/itchiodl/game.py +++ b/itchiodl/game.py @@ -1,13 +1,11 @@ import re import json -import os import urllib import datetime -import shutil +from pathlib import Path import requests - -import itchiodl.utils +from itchiodl import utils class Game: @@ -31,6 +29,11 @@ def __init__(self, data): self.files = [] self.downloads = [] + self.dir = ( + Path(".") + / utils.clean_path(self.publisher_slug) + / utils.clean_path(self.game_slug) + ) def load_downloads(self, token): """Load all downloads for this game""" @@ -53,17 +56,13 @@ def download(self, token, platform): """Download a singular file""" print("Downloading", self.name) - # if os.path.exists(f"{self.publisher_slug}/{self.game_slug}.json"): + # if out_folder.with_suffix(".json").exists(): # print(f"Skipping Game {self.name}") # return self.load_downloads(token) - if not os.path.exists(self.publisher_slug): - os.mkdir(self.publisher_slug) - - if not os.path.exists(f"{self.publisher_slug}/{self.game_slug}"): - os.mkdir(f"{self.publisher_slug}/{self.game_slug}") + self.dir.mkdir(parents=True, exist_ok=True) for d in self.downloads: if ( @@ -75,7 +74,7 @@ def download(self, token, platform): continue self.do_download(d, token) - with open(f"{self.publisher_slug}/{self.game_slug}.json", "w") as f: + with self.dir.with_suffix(".json").open("w") as f: json.dump( { "name": self.name, @@ -93,42 +92,41 @@ def do_download(self, d, token): """Download a single file, checking for existing files""" print(f"Downloading {d['filename']}") - file = itchiodl.utils.clean_path(d["filename"] or d["display_name"] or d["id"]) - path = itchiodl.utils.clean_path(f"{self.publisher_slug}/{self.game_slug}") + filename = d["filename"] or d["display_name"] or d["id"] - if os.path.exists(f"{path}/{file}"): - print(f"File Already Exists! {file}") - if os.path.exists(f"{path}/{file}.md5"): + out_file = self.dir / filename - with open(f"{path}/{file}.md5", "r") as f: + if out_file.exists(): + print(f"File Already Exists! {filename}") + md5_file = out_file.with_suffix(".md5") + if md5_file.exists(): + with md5_file.open("r") as f: md5 = f.read().strip() - if md5 == d["md5_hash"]: - print(f"Skipping {self.name} - {file}") + print(f"Skipping {self.name} - {filename}") return - print(f"MD5 Mismatch! {file}") + print(f"MD5 Mismatch! {filename}") else: - md5 = itchiodl.utils.md5sum(f"{path}/{file}") + md5 = utils.md5sum(str(out_file)) if md5 == d["md5_hash"]: - print(f"Skipping {self.name} - {file}") + print(f"Skipping {self.name} - {filename}") # Create checksum file - with open(f"{path}/{file}.md5", "w") as f: + with md5_file.open("w") as f: f.write(d["md5_hash"]) return # Old Download or corrupted file? corrupted = False if corrupted: - os.remove(f"{path}/{file}") + out_file.remove() return - if not os.path.exists(f"{path}/old"): - os.mkdir(f"{path}/old") + old_dir = self.dir / "old" + old_dir.mkdir(exist_ok=True) - print(f"Moving {file} to old/") + print(f"Moving {filename} to old/") timestamp = datetime.datetime.now().strftime("%Y-%m-%d") - print(timestamp) - shutil.move(f"{path}/{file}", f"{path}/old/{timestamp}-{file}") + out_file.rename(old_dir / f"{timestamp}-{filename}") # Get UUID r = requests.post( @@ -150,16 +148,16 @@ def do_download(self, d, token): ) # response_code = urllib.request.urlopen(url).getcode() try: - itchiodl.utils.download(url, path, self.name, file) - except itchiodl.utils.NoDownloadError: + utils.download(url, self.dir, self.name, filename) + except utils.NoDownloadError: print("Http response is not a download, skipping") with open("errors.txt", "a") as f: f.write( f""" Cannot download game/asset: {self.game_slug} Publisher Name: {self.publisher_slug} - Path: {path} - File: {file} + Path: {out_file} + File: {filename} Request URL: {url} This request failed due to a missing response header This game/asset has been skipped please download manually @@ -174,8 +172,8 @@ def do_download(self, d, token): f.write( f""" Cannot download game/asset: {self.game_slug} Publisher Name: {self.publisher_slug} - Path: {path} - File: {file} + Path: {out_file} + File: {filename} Request URL: {url} Request Response Code: {e.code} Error Reason: {e.reason} @@ -186,10 +184,10 @@ def do_download(self, d, token): return # Verify - if itchiodl.utils.md5sum(f"{path}/{file}") != d["md5_hash"]: - print(f"Failed to verify {file}") + if utils.md5sum(out_file) != d["md5_hash"]: + print(f"Failed to verify {filename}") return # Create checksum file - with open(f"{path}/{file}.md5", "w") as f: + with out_file.with_suffix(".md5").open("w") as f: f.write(d["md5_hash"]) diff --git a/itchiodl/library.py b/itchiodl/library.py index eeb08ad..1132735 100644 --- a/itchiodl/library.py +++ b/itchiodl/library.py @@ -6,6 +6,7 @@ from bs4 import BeautifulSoup from itchiodl.game import Game +from itchiodl.utils import NoDownloadError class Library: @@ -89,15 +90,21 @@ def load_games(self, publisher): def download_library(self, platform=None): """Download all games in the library""" with ThreadPoolExecutor(max_workers=self.jobs) as executor: - i = [0] + i = [0, 0] l = len(self.games) lock = threading.RLock() def dl(i, g): - x = g.download(self.login, platform) - with lock: - i[0] += 1 - print(f"Downloaded {g.name} ({i[0]} of {l})") - return x - - executor.map(functools.partial(dl, i), self.games) + try: + g.download(self.login, platform) + with lock: + i[0] += 1 + print(f"Downloaded {g.name} ({i[0]} of {l})") + except NoDownloadError as e: + print(e) + i[1] += 1 + + r = executor.map(functools.partial(dl, i), self.games) + for _ in r: + pass + print(f"Downloaded {i[0]} Games, {i[1]} Errors") diff --git a/itchiodl/utils.py b/itchiodl/utils.py index 78a289f..b972f67 100644 --- a/itchiodl/utils.py +++ b/itchiodl/utils.py @@ -40,7 +40,7 @@ def download(url, path, name, file): def clean_path(path): """Cleans a path on windows""" if sys.platform in ["win32", "cygwin", "msys"]: - path_clean = re.replace(r"[\<\>\:\"\/\\\|\?\*]", "-", path) + path_clean = re.sub(r"[<>:|?*\"\/\\]", "-", path) return path_clean return path @@ -48,7 +48,7 @@ def clean_path(path): def md5sum(path): """Returns the md5sum of a file""" md5 = hashlib.md5() - with open(path, "rb") as f: + with path.open("rb") as f: for chunk in iter(lambda: f.read(4096), b""): md5.update(chunk) return md5.hexdigest() diff --git a/pyproject.toml b/pyproject.toml index c95d356..001e8f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "itchiodl" -version = "2.2.0" +version = "2.3.0" description = "Python Scripts for downloading / archiving your itchio library" authors = ["Peter Taylor "] license = "MIT"