Filter tarfile entry mode bits

Also correctly marks some `pack_venv` APIs as private. Closes #23
lmstudio-ai · Oct 24, 2024 · e52d745 · e52d745
1 parent f7d424c
commit e52d745
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 20 deletions.
diff --git a/src/venvstacks/pack_venv.py b/src/venvstacks/pack_venv.py
@@ -267,7 +267,7 @@ def create_archive(
             def report_progress(_: Any) -> None:
                 pass
         else:
-            progress_bar = ProgressBar()
+            progress_bar = _ProgressBar()
             progress_bar.show(0.0)
             num_archive_entries = 0
             total_entries_to_archive = sum(1 for __ in env_path.rglob("*"))
@@ -379,6 +379,15 @@ def _process_archive_entry(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo:
         tarinfo.uname = tarinfo.gname = "root"
         if _clamp_mtime is not None:
             tarinfo.mtime = _clamp_mtime(tarinfo.mtime)
+        # Ensure permissions are compatible with `tar_filter` extraction
+        # Layered environments will still need to be fully trusted when
+        # unpacking them (due to the external symlinks to the base runtime)
+        mode = tarinfo.mode
+        if mode is not None:
+            # Apply the same mode filtering as tarfile.tar_filter in 3.12+
+            # https://docs.python.org/3.13/library/tarfile.html#tarfile.tar_filter
+            # Clears high bits (e.g. setuid/)setgid, and the group/other write bits
+            tarinfo.mode = mode & 0o755
         # Report progress if requested
         if progress_callback is not None:
             progress_callback(tarinfo.name)
@@ -405,12 +414,12 @@ def _process_archive_entry(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo:
 
 if _WINDOWS_BUILD:
 
-    def set_mtime(fspath: str, mtime: int | float) -> None:
+    def _set_mtime(fspath: str, mtime: int | float) -> None:
         # There's no `follow_symlinks` option available on Windows
         os.utime(fspath, (mtime, mtime))
 else:
 
-    def set_mtime(fspath: str, mtime: int | float) -> None:
+    def _set_mtime(fspath: str, mtime: int | float) -> None:
         os.utime(fspath, (mtime, mtime), follow_symlinks=False)
 
 
@@ -476,7 +485,7 @@ def _add_zip_entry(fspath: str, arcname: str) -> None:
             fs_mtime = os.lstat(fspath).st_mtime
             zip_entry_mtime = adjust_mtime(min(fs_mtime, max_mtime))
             if zip_entry_mtime != fs_mtime:
-                set_mtime(fspath, zip_entry_mtime)
+                _set_mtime(fspath, zip_entry_mtime)
             zf.write(fspath, arcname)
 
         arcname = os.path.normpath(base_dir)
@@ -516,22 +525,25 @@ def _add_zip_entry(fspath: str, arcname: str) -> None:
     # Everywhere else, create XZ compressed tar archives
     _make_archive = _make_tar_archive
 
-# Basic progress bar support, taken from my SO answer at
+# Basic progress bar support, taken from ncoghlan's SO answer at
 # https://stackoverflow.com/questions/3160699/python-progress-bar/78590319#78590319
-# (since the code originated with me, it isn't subject to Stack Overflow's CC-BY-SA terms)
+# (since the code originated with her, it isn't subject to Stack Overflow's CC-BY-SA terms)
 #
-# I originally skipped this, but archiving pytorch (and similarly large AI/ML libraries)
-# takes a long time, so you really need some assurance that progress is being made.
+# Archiving pytorch (and similarly large AI/ML libraries) takes a long time,
+# so you really need some assurance that progress is being made.
 #
 # If compression times are a significant problem, it would be worth moving in the same
 # direction as conda-pack did, and implementing support for parallel compression (the
 # compression libraries all drop the GIL when compressing data chunks, so this approach
 # scales effectively up to the number of available CPUs)
-ProgressSummary = tuple[int, str]
-ProgressReport = tuple[str, ProgressSummary]
+#
+# See https://github.com/lmstudio-ai/venvstacks/issues/4
+
+_ProgressSummary = tuple[int, str]
+_ProgressReport = tuple[str, _ProgressSummary]
 
 
-class ProgressBar:
+class _ProgressBar:
     """Display & update a progress bar"""
 
     TEXT_ABORTING = "Aborting..."
@@ -541,7 +553,7 @@ class ProgressBar:
     bar_length: int
     stream: TextIO
     _last_displayed_text: str | None
-    _last_displayed_summary: ProgressSummary | None
+    _last_displayed_summary: _ProgressSummary | None
 
     def __init__(self, bar_length: int = 25, stream: TextIO = sys.stdout) -> None:
         self.bar_length = bar_length
@@ -554,7 +566,7 @@ def reset(self) -> None:
         self._last_displayed_text = None
         self._last_displayed_summary = None
 
-    def _format_progress(self, progress: float, aborting: bool) -> ProgressReport:
+    def _format_progress(self, progress: float, aborting: bool) -> _ProgressReport:
         """Internal helper that also reports the number of completed increments"""
         bar_length = self.bar_length
         progress = float(progress)

diff --git a/src/venvstacks/stacks.py b/src/venvstacks/stacks.py
@@ -882,11 +882,9 @@ def _pdm_python_install(target_path: Path, request: str) -> Path | None:
         with tempfile.NamedTemporaryFile() as tf:
             tf.close()
             original_filename = download(python_file, tf.name, env.session)
-            # TODO: use "tar_filter" here instead of "fully_trusted"
-            # Currently blocked on Python 3.11 producing different results
-            # if Python 3.12+ enables a filter that actually makes any changes
-            # https://github.com/lmstudio-ai/venvstacks/issues/23
-            with default_tarfile_filter("fully_trusted"):
+            # Use "tar_filter" if stdlib tar extraction filters are available
+            # (they were only added in Python 3.12, so no filtering on 3.11)
+            with default_tarfile_filter("tar_filter"):
                 install_file(tf.name, destination, original_filename)
     if interpreter.exists():
         # Installation successful, return the path to the installation folder

diff --git a/tests/expected-output-config.toml b/tests/expected-output-config.toml
@@ -33,5 +33,4 @@ UV_EXCLUDE_NEWER="2024-10-15 00:00:00+00:00"
 # Metadata updates can also be requested when the
 # launch module content in the sample project changes
 
-# Last requested update: launch module autoformatting
-
+# Last requested update: tar archive mode filtering