diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bfcfddd --- /dev/null +++ b/.gitignore @@ -0,0 +1,197 @@ +# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,virtualenv +# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,virtualenv + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### VirtualEnv ### +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +pip-selfcheck.json + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,virtualenv diff --git a/matching_torrents.py b/matching_torrents.py new file mode 100644 index 0000000..6c7d582 --- /dev/null +++ b/matching_torrents.py @@ -0,0 +1,100 @@ +import re +from typing import Optional, Union +from pathlib import Path +from difflib import get_close_matches + + +class TFDataFinder: + def __init__(self, path: Path, **kwargs) -> None: + if self._is_torrent_file(path): + self.path: Path = path + else: + raise ValueError(f"Not a torrent file: {path}") + self.data: Optional[Union[Path, list[Path]]] = kwargs.get("data", None) + self.tracker: Optional[str] = self.guess_tracker(path, **kwargs) + + def __str__(self) -> str: + obj_vals = { + "path": str(self.path), + } + if self.data: + if isinstance(self.find_data, Path): + obj_vals["data"] = str(self.data) + else: + obj_vals["data"] = [str(s) for s in self.data] + if self.tracker: + obj_vals["tracker"] = self.tracker + return ( + f"TorrentFile(" + ", ".join([f"{k}={v}" for k, v in obj_vals.items()]) + ")" + ) + + @staticmethod + def _is_torrent_file(path: Path) -> bool: + if not path.is_file() or not path.suffix == ".torrent": + return False + return True + + def find_data( + self, files: list[Path], cutoff: float = 0.9 + ) -> Optional[Union[Path, list[Path]]]: + stems_hash = {ff.stem: ff for ff in files} + close_matches = get_close_matches( + word=self.path.stem, possibilities=stems_hash.keys(), cutoff=cutoff + ) + if close_matches: + close_matches = [stems_hash[s] for s in close_matches] + self.data = close_matches if len(close_matches) > 1 else close_matches[0] + + @staticmethod + def guess_tracker(path, read_len: int = 100) -> Optional[str]: + with open(path, "rb") as F: + line = F.readline() + try: + line_decoded = line[:read_len].decode("utf-8") + except UnicodeDecodeError: + return None + tracker = re.match( + r"^[\w]*:announce[0-9]*:http:\/\/([\w.-]+)", line_decoded + ) + if tracker: + return tracker.group(1) + return tracker + + +def find_files( + folder: Path, + ignore_permission_err=True, + valid_exts: Optional[list[str]] = None, + exclude_exts: Optional[list[str]] = None, +) -> list[Path]: + files = list() + for path in folder.glob("**/"): + try: + for f in path.iterdir(): + if f.is_file(): + if valid_exts and f.suffix not in valid_exts: + continue + if exclude_exts and f.suffix in exclude_exts: + continue + files.append(f) + except PermissionError as e: + if not ignore_permission_err: + raise e + return files + + +def main(): + TORRENT_FOLDER = Path("V:\\") + DATA_FOLDER = Path("V:\\") + + torrent_files = [ + TFDataFinder(f) for f in find_files(TORRENT_FOLDER, valid_exts=[".torrent"]) + ] + data_files = find_files(DATA_FOLDER, exclude_exts=[".torrent"]) + for tf in torrent_files: + tf.find_data(data_files) + print(tf) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b7650d7 Binary files /dev/null and b/requirements.txt differ