From e52d74569322e7fe94cca005494e08b077d3a29c Mon Sep 17 00:00:00 2001 From: Alyssa Coghlan Date: Thu, 24 Oct 2024 19:48:30 +1000 Subject: [PATCH] Filter tarfile entry mode bits Also correctly marks some `pack_venv` APIs as private. Closes #23 --- src/venvstacks/pack_venv.py | 38 ++++++++++++++++++++----------- src/venvstacks/stacks.py | 8 +++---- tests/expected-output-config.toml | 3 +-- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/src/venvstacks/pack_venv.py b/src/venvstacks/pack_venv.py index c09d28d..d60c282 100755 --- a/src/venvstacks/pack_venv.py +++ b/src/venvstacks/pack_venv.py @@ -267,7 +267,7 @@ def create_archive( def report_progress(_: Any) -> None: pass else: - progress_bar = ProgressBar() + progress_bar = _ProgressBar() progress_bar.show(0.0) num_archive_entries = 0 total_entries_to_archive = sum(1 for __ in env_path.rglob("*")) @@ -379,6 +379,15 @@ def _process_archive_entry(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo: tarinfo.uname = tarinfo.gname = "root" if _clamp_mtime is not None: tarinfo.mtime = _clamp_mtime(tarinfo.mtime) + # Ensure permissions are compatible with `tar_filter` extraction + # Layered environments will still need to be fully trusted when + # unpacking them (due to the external symlinks to the base runtime) + mode = tarinfo.mode + if mode is not None: + # Apply the same mode filtering as tarfile.tar_filter in 3.12+ + # https://docs.python.org/3.13/library/tarfile.html#tarfile.tar_filter + # Clears high bits (e.g. setuid/)setgid, and the group/other write bits + tarinfo.mode = mode & 0o755 # Report progress if requested if progress_callback is not None: progress_callback(tarinfo.name) @@ -405,12 +414,12 @@ def _process_archive_entry(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo: if _WINDOWS_BUILD: - def set_mtime(fspath: str, mtime: int | float) -> None: + def _set_mtime(fspath: str, mtime: int | float) -> None: # There's no `follow_symlinks` option available on Windows os.utime(fspath, (mtime, mtime)) else: - def set_mtime(fspath: str, mtime: int | float) -> None: + def _set_mtime(fspath: str, mtime: int | float) -> None: os.utime(fspath, (mtime, mtime), follow_symlinks=False) @@ -476,7 +485,7 @@ def _add_zip_entry(fspath: str, arcname: str) -> None: fs_mtime = os.lstat(fspath).st_mtime zip_entry_mtime = adjust_mtime(min(fs_mtime, max_mtime)) if zip_entry_mtime != fs_mtime: - set_mtime(fspath, zip_entry_mtime) + _set_mtime(fspath, zip_entry_mtime) zf.write(fspath, arcname) arcname = os.path.normpath(base_dir) @@ -516,22 +525,25 @@ def _add_zip_entry(fspath: str, arcname: str) -> None: # Everywhere else, create XZ compressed tar archives _make_archive = _make_tar_archive -# Basic progress bar support, taken from my SO answer at +# Basic progress bar support, taken from ncoghlan's SO answer at # https://stackoverflow.com/questions/3160699/python-progress-bar/78590319#78590319 -# (since the code originated with me, it isn't subject to Stack Overflow's CC-BY-SA terms) +# (since the code originated with her, it isn't subject to Stack Overflow's CC-BY-SA terms) # -# I originally skipped this, but archiving pytorch (and similarly large AI/ML libraries) -# takes a long time, so you really need some assurance that progress is being made. +# Archiving pytorch (and similarly large AI/ML libraries) takes a long time, +# so you really need some assurance that progress is being made. # # If compression times are a significant problem, it would be worth moving in the same # direction as conda-pack did, and implementing support for parallel compression (the # compression libraries all drop the GIL when compressing data chunks, so this approach # scales effectively up to the number of available CPUs) -ProgressSummary = tuple[int, str] -ProgressReport = tuple[str, ProgressSummary] +# +# See https://github.com/lmstudio-ai/venvstacks/issues/4 + +_ProgressSummary = tuple[int, str] +_ProgressReport = tuple[str, _ProgressSummary] -class ProgressBar: +class _ProgressBar: """Display & update a progress bar""" TEXT_ABORTING = "Aborting..." @@ -541,7 +553,7 @@ class ProgressBar: bar_length: int stream: TextIO _last_displayed_text: str | None - _last_displayed_summary: ProgressSummary | None + _last_displayed_summary: _ProgressSummary | None def __init__(self, bar_length: int = 25, stream: TextIO = sys.stdout) -> None: self.bar_length = bar_length @@ -554,7 +566,7 @@ def reset(self) -> None: self._last_displayed_text = None self._last_displayed_summary = None - def _format_progress(self, progress: float, aborting: bool) -> ProgressReport: + def _format_progress(self, progress: float, aborting: bool) -> _ProgressReport: """Internal helper that also reports the number of completed increments""" bar_length = self.bar_length progress = float(progress) diff --git a/src/venvstacks/stacks.py b/src/venvstacks/stacks.py index a7c3e65..1866252 100755 --- a/src/venvstacks/stacks.py +++ b/src/venvstacks/stacks.py @@ -882,11 +882,9 @@ def _pdm_python_install(target_path: Path, request: str) -> Path | None: with tempfile.NamedTemporaryFile() as tf: tf.close() original_filename = download(python_file, tf.name, env.session) - # TODO: use "tar_filter" here instead of "fully_trusted" - # Currently blocked on Python 3.11 producing different results - # if Python 3.12+ enables a filter that actually makes any changes - # https://github.com/lmstudio-ai/venvstacks/issues/23 - with default_tarfile_filter("fully_trusted"): + # Use "tar_filter" if stdlib tar extraction filters are available + # (they were only added in Python 3.12, so no filtering on 3.11) + with default_tarfile_filter("tar_filter"): install_file(tf.name, destination, original_filename) if interpreter.exists(): # Installation successful, return the path to the installation folder diff --git a/tests/expected-output-config.toml b/tests/expected-output-config.toml index d105608..dbe9f4b 100644 --- a/tests/expected-output-config.toml +++ b/tests/expected-output-config.toml @@ -33,5 +33,4 @@ UV_EXCLUDE_NEWER="2024-10-15 00:00:00+00:00" # Metadata updates can also be requested when the # launch module content in the sample project changes -# Last requested update: launch module autoformatting - +# Last requested update: tar archive mode filtering