Skip to content

Commit

Permalink
Filter tarfile entry mode bits
Browse files Browse the repository at this point in the history
Also correctly marks some `pack_venv` APIs as private.

Closes #23
  • Loading branch information
ncoghlan committed Oct 24, 2024
1 parent f7d424c commit e52d745
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 20 deletions.
38 changes: 25 additions & 13 deletions src/venvstacks/pack_venv.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def create_archive(
def report_progress(_: Any) -> None:
pass
else:
progress_bar = ProgressBar()
progress_bar = _ProgressBar()
progress_bar.show(0.0)
num_archive_entries = 0
total_entries_to_archive = sum(1 for __ in env_path.rglob("*"))
Expand Down Expand Up @@ -379,6 +379,15 @@ def _process_archive_entry(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo:
tarinfo.uname = tarinfo.gname = "root"
if _clamp_mtime is not None:
tarinfo.mtime = _clamp_mtime(tarinfo.mtime)
# Ensure permissions are compatible with `tar_filter` extraction
# Layered environments will still need to be fully trusted when
# unpacking them (due to the external symlinks to the base runtime)
mode = tarinfo.mode
if mode is not None:
# Apply the same mode filtering as tarfile.tar_filter in 3.12+
# https://docs.python.org/3.13/library/tarfile.html#tarfile.tar_filter
# Clears high bits (e.g. setuid/)setgid, and the group/other write bits
tarinfo.mode = mode & 0o755
# Report progress if requested
if progress_callback is not None:
progress_callback(tarinfo.name)
Expand All @@ -405,12 +414,12 @@ def _process_archive_entry(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo:

if _WINDOWS_BUILD:

def set_mtime(fspath: str, mtime: int | float) -> None:
def _set_mtime(fspath: str, mtime: int | float) -> None:
# There's no `follow_symlinks` option available on Windows
os.utime(fspath, (mtime, mtime))
else:

def set_mtime(fspath: str, mtime: int | float) -> None:
def _set_mtime(fspath: str, mtime: int | float) -> None:
os.utime(fspath, (mtime, mtime), follow_symlinks=False)


Expand Down Expand Up @@ -476,7 +485,7 @@ def _add_zip_entry(fspath: str, arcname: str) -> None:
fs_mtime = os.lstat(fspath).st_mtime
zip_entry_mtime = adjust_mtime(min(fs_mtime, max_mtime))
if zip_entry_mtime != fs_mtime:
set_mtime(fspath, zip_entry_mtime)
_set_mtime(fspath, zip_entry_mtime)
zf.write(fspath, arcname)

arcname = os.path.normpath(base_dir)
Expand Down Expand Up @@ -516,22 +525,25 @@ def _add_zip_entry(fspath: str, arcname: str) -> None:
# Everywhere else, create XZ compressed tar archives
_make_archive = _make_tar_archive

# Basic progress bar support, taken from my SO answer at
# Basic progress bar support, taken from ncoghlan's SO answer at
# https://stackoverflow.com/questions/3160699/python-progress-bar/78590319#78590319
# (since the code originated with me, it isn't subject to Stack Overflow's CC-BY-SA terms)
# (since the code originated with her, it isn't subject to Stack Overflow's CC-BY-SA terms)
#
# I originally skipped this, but archiving pytorch (and similarly large AI/ML libraries)
# takes a long time, so you really need some assurance that progress is being made.
# Archiving pytorch (and similarly large AI/ML libraries) takes a long time,
# so you really need some assurance that progress is being made.
#
# If compression times are a significant problem, it would be worth moving in the same
# direction as conda-pack did, and implementing support for parallel compression (the
# compression libraries all drop the GIL when compressing data chunks, so this approach
# scales effectively up to the number of available CPUs)
ProgressSummary = tuple[int, str]
ProgressReport = tuple[str, ProgressSummary]
#
# See https://github.com/lmstudio-ai/venvstacks/issues/4

_ProgressSummary = tuple[int, str]
_ProgressReport = tuple[str, _ProgressSummary]


class ProgressBar:
class _ProgressBar:
"""Display & update a progress bar"""

TEXT_ABORTING = "Aborting..."
Expand All @@ -541,7 +553,7 @@ class ProgressBar:
bar_length: int
stream: TextIO
_last_displayed_text: str | None
_last_displayed_summary: ProgressSummary | None
_last_displayed_summary: _ProgressSummary | None

def __init__(self, bar_length: int = 25, stream: TextIO = sys.stdout) -> None:
self.bar_length = bar_length
Expand All @@ -554,7 +566,7 @@ def reset(self) -> None:
self._last_displayed_text = None
self._last_displayed_summary = None

def _format_progress(self, progress: float, aborting: bool) -> ProgressReport:
def _format_progress(self, progress: float, aborting: bool) -> _ProgressReport:
"""Internal helper that also reports the number of completed increments"""
bar_length = self.bar_length
progress = float(progress)
Expand Down
8 changes: 3 additions & 5 deletions src/venvstacks/stacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -882,11 +882,9 @@ def _pdm_python_install(target_path: Path, request: str) -> Path | None:
with tempfile.NamedTemporaryFile() as tf:
tf.close()
original_filename = download(python_file, tf.name, env.session)
# TODO: use "tar_filter" here instead of "fully_trusted"
# Currently blocked on Python 3.11 producing different results
# if Python 3.12+ enables a filter that actually makes any changes
# https://github.com/lmstudio-ai/venvstacks/issues/23
with default_tarfile_filter("fully_trusted"):
# Use "tar_filter" if stdlib tar extraction filters are available
# (they were only added in Python 3.12, so no filtering on 3.11)
with default_tarfile_filter("tar_filter"):
install_file(tf.name, destination, original_filename)
if interpreter.exists():
# Installation successful, return the path to the installation folder
Expand Down
3 changes: 1 addition & 2 deletions tests/expected-output-config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,4 @@ UV_EXCLUDE_NEWER="2024-10-15 00:00:00+00:00"
# Metadata updates can also be requested when the
# launch module content in the sample project changes

# Last requested update: launch module autoformatting

# Last requested update: tar archive mode filtering

0 comments on commit e52d745

Please sign in to comment.