-
Notifications
You must be signed in to change notification settings - Fork 41
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Mounts on images [CLI-4] - image.add_local_python_package
#2343
Changes from 41 commits
8bc041c
b8d399f
5e34e80
af9f74f
0b40907
2a52f6d
e237634
787dcc6
c89b7a4
244af57
813d3f1
77186d0
36bc55e
14907e5
19780c7
18d931f
01aab3b
cab50ff
25b388b
0abd75b
6e2db24
2ffac62
7e41f47
6b107a1
dfc627b
fef3f9c
908b329
7c81d3f
6b954c9
2a6001b
895036a
8f81737
7ccb07a
3a8409c
fd39606
d3a4dec
4a4257d
1e1bc97
286db82
543ee3a
652fd7d
8645fbb
222dc25
67ed484
c0d8236
11452a8
4878945
94f9843
f83f4a8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
import re | ||
import shlex | ||
import sys | ||
import textwrap | ||
import typing | ||
import warnings | ||
from dataclasses import dataclass | ||
|
@@ -274,11 +275,21 @@ class _Image(_Object, type_prefix="im"): | |
force_build: bool | ||
inside_exceptions: List[Exception] | ||
_used_local_mounts: typing.FrozenSet[_Mount] # used for mounts watching | ||
_mounts: Sequence[_Mount] # added as mounts on any container referencing the Image, see `def _mount_layers` | ||
_metadata: Optional[api_pb2.ImageMetadata] = None # set on hydration, private for now | ||
|
||
def _initialize_from_empty(self): | ||
self.inside_exceptions = [] | ||
self._used_local_mounts = frozenset() | ||
self._mounts = () | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the difference between There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, there is a docstring further up, but I renamed them now to make it clearer:
|
||
self.force_build = False | ||
|
||
def _initialize_from_other(self, other: "_Image"): | ||
# used by .clone() | ||
self.inside_exceptions = other.inside_exceptions | ||
self.force_build = other.force_build | ||
self._used_local_mounts = other._used_local_mounts | ||
self._mounts = other._mounts | ||
|
||
def _hydrate_metadata(self, message: Optional[Message]): | ||
env_image_id = config.get("image_id") # set as an env var in containers | ||
|
@@ -292,6 +303,57 @@ def _hydrate_metadata(self, message: Optional[Message]): | |
assert isinstance(message, api_pb2.ImageMetadata) | ||
self._metadata = message | ||
|
||
def _add_mount_layer_or_copy(self, mount: _Mount, copy: bool = False): | ||
if copy: | ||
return self.copy_mount(mount, remote_path="/") | ||
|
||
base_image = self | ||
|
||
async def _load(self2: "_Image", resolver: Resolver, existing_object_id: Optional[str]): | ||
self2._hydrate_from_other(base_image) # same image id as base image as long as it's lazy | ||
self2._mounts = tuple(base_image._mounts) + (mount,) | ||
self2._used_local_mounts = base_image._used_local_mounts | ({mount} if mount.is_local() else set()) | ||
|
||
return _Image._from_loader(_load, "ImageWithMounts()", deps=lambda: [base_image, mount]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, they just use the regular Image class - this is the repr string. Might have changed the repr it here for debugging reasons (would be nice to fix our object reprs to actually be meaningful/using some data from the specification) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
@property | ||
def _mount_layers(self) -> typing.Tuple[_Mount]: | ||
"""Non-evaluated mount layers on the image | ||
|
||
When the image is used by a Modal container, these mounts need to be attached as well to | ||
represent the full image content, as they haven't yet been represented as a layer in the | ||
image. | ||
|
||
When the image is used as a base image for a new layer (that is not itself a mount layer) | ||
these mounts need to first be inserted as a copy operation (.copy_mount) into the image. | ||
""" | ||
return self._mounts | ||
|
||
def _assert_no_mount_layers(self): | ||
if self._mount_layers: | ||
raise InvalidError( | ||
textwrap.dedent( | ||
""" | ||
An image tried to run a build step after using `image.add_local_*` to include local files. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mwaskom any feedback on this error message copy? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
Run `image.add_local_*` commands last in your image build to avoid rebuilding images with every local | ||
filechange. Modal will then mount these files as a thin layer when starting your container, saving build | ||
time. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the "thin layer" terminology used anywhere else? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope, changed |
||
|
||
If you need these files earlier in the build, set `copy=True` to copy the files directly into the image, | ||
though this will increase build time. | ||
|
||
Example: | ||
|
||
my_image = ( | ||
Image.debian_slim() | ||
.add_local_python_packages("mypak", copy=True) | ||
.run_commands("python -m mypak") # this now works! | ||
) | ||
""" | ||
) | ||
) | ||
|
||
@staticmethod | ||
def _from_args( | ||
*, | ||
|
@@ -306,9 +368,11 @@ def _from_args( | |
force_build: bool = False, | ||
# For internal use only. | ||
_namespace: int = api_pb2.DEPLOYMENT_NAMESPACE_WORKSPACE, | ||
_do_assert_no_mount_layers: bool = True, | ||
): | ||
if base_images is None: | ||
base_images = {} | ||
|
||
if secrets is None: | ||
secrets = [] | ||
if gpu_config is None: | ||
|
@@ -334,6 +398,11 @@ def _deps() -> List[_Object]: | |
return deps | ||
|
||
async def _load(self: _Image, resolver: Resolver, existing_object_id: Optional[str]): | ||
if _do_assert_no_mount_layers: | ||
for image in base_images.values(): | ||
# base images can't have | ||
image._assert_no_mount_layers() | ||
|
||
environment = await _get_environment_cached(resolver.environment_name or "", resolver.client) | ||
# A bit hacky,but assume that the environment provides a valid builder version | ||
image_builder_version = cast(ImageBuilderVersion, environment._settings.image_builder_version) | ||
|
@@ -349,7 +418,9 @@ async def _load(self: _Image, resolver: Resolver, existing_object_id: Optional[s | |
"No commands were provided for the image — have you tried using modal.Image.debian_slim()?" | ||
) | ||
if dockerfile.commands and build_function: | ||
raise InvalidError("Cannot provide both a build function and Dockerfile commands!") | ||
raise InvalidError( | ||
"Cannot provide both build function and Dockerfile commands in the same image layer!" | ||
) | ||
|
||
base_images_pb2s = [ | ||
api_pb2.BaseImage( | ||
|
@@ -487,7 +558,7 @@ async def join(): | |
local_mounts.add(context_mount) | ||
self._used_local_mounts = frozenset(local_mounts) | ||
|
||
rep = "Image()" | ||
rep = f"Image({dockerfile_function})" | ||
obj = _Image._from_loader(_load, rep, deps=_deps) | ||
obj.force_build = force_build | ||
return obj | ||
|
@@ -553,6 +624,23 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: | |
context_mount=mount, | ||
) | ||
|
||
def add_local_python_packages(self, *packages: Union[str, Path], copy: bool = False) -> "_Image": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this is a Python API, would There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, but what if the user has non-python packages locally (.deb?) that they want to add? 😬 I agree it's a bit on the long end, but this also reflects the existing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did some more thinking... see below |
||
"""Attaches local Python packages to the container running the image | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we avoid introducing new terminology ("attach") if we're not otherwise going to use it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Something that surprised me when testing — but maybe it shouldn't have — is that adding a local package (really a third-party package; I was just reaching for something quick) didn't include its dependencies. That makes sense from one perspective (this operation is more like copying a directory with some sugar to reference the directory via PYTHONPATH) but I wonder if it will surprise users? Since "adding a package" usually has some notion of dependency resolution. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "Attach" was an oversight when I renamed it again, thanks for spotting! Yeah, adding third party packages or packages that have real install recipes via this method isn't intended, but I can see how people would do that. The main intent is for it to be used for directories that are part of the user's current python projet, and the interface has been using the module name as a convenience for pointing to that directory via the current sys.path. Basically a short form for: I have separately been thinking that it would be nice to just point to a "project directory" when setting up the user's image, which is very similar to this but using file paths, and maybe that's a better way to do this? E.g. Image.debian_slim().add_local_dir(
Path(__file__).parent,
remote_path="/root/some_root_package_name",
condition=python_mount_condition
) Preferably I'd want it to work like above with default arguments of using: Image.debian_slim().add_project_dir(".") # use directory of current file as project dir - "standard", only include python files (or .modalignore?) And then have some very easy ways to customize this: Image.debian_slim().add_project_dir("..") # use parent directory as root, using relative paths from the current file's dir or cwd? (the latter probably cleanest to implement) Image.debian_slim().add_project_dir(".", filter=modal.filters.AllFiles) # use all files instead of just python files
# this is where we could also take advantage of new glob/modalignore support There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll rename |
||
|
||
Packages are added to the /root directory which is on the PYTHONPATH of any | ||
executed Modal functions. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clarify that this is in the container? Also might want to use capitalization for our proper nouns to Jono stays happy :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which nouns? Function? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And Image? |
||
|
||
By default (copy=False) the packages are layered on top of the image when | ||
the container starts up and not built as an image layer. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm having trouble parsing "packages are layered ... not built as an image layer". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rewrote: Adds Python Package Files to Containers Adds all files from the specified Python packages to containers running the image. Packages are added to the By default ( Set Note: This excludes all dot-prefixed subdirectories or files and all |
||
|
||
Set copy=True to force the packages to be added as an image layer instead. | ||
This can be slower since it requires a rebuild of the image whenever the | ||
required package files change, but it allows you to run additional build | ||
steps after this operation. | ||
""" | ||
mount = _Mount.from_local_python_packages(*packages) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we take the opportunity to break with the existing behavior of including data assets and everything else within the directory containing the Python package, at least by default? That's one of our most surprising and problem-causing behaviors IMO. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is the current behavior for explicit It's the auto mounts today that exclude everything that's not a .py-file. Very consistent, I know... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I thought auto mounting now included most files? That's how users get themselves into trouble when they automount a project directory that has a lot of data in it. I agree that you're mostly mirroring the existing behavior of (I also think we should add the dockerignore conditions to these interfaces, which will help a bit, but my suspicion is that it's easier to debug "this |
||
return self._add_mount_layer_or_copy(mount, copy=copy) | ||
|
||
def copy_local_dir(self, local_path: Union[str, Path], remote_path: Union[str, Path] = ".") -> "_Image": | ||
"""Copy a directory into the image as a part of building the image. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
from typing import List, Literal, get_args | ||
from unittest import mock | ||
|
||
import modal | ||
from modal import App, Image, Mount, Secret, build, environments, gpu, method | ||
from modal._serialization import serialize | ||
from modal._utils.async_utils import synchronizer | ||
|
@@ -24,6 +25,7 @@ | |
_validate_python_version, | ||
) | ||
from modal.mount import PYTHON_STANDALONE_VERSIONS | ||
from modal.runner import deploy_app | ||
from modal_proto import api_pb2 | ||
|
||
from .supports.skip import skip_windows | ||
|
@@ -35,8 +37,16 @@ | |
] | ||
|
||
|
||
def dummy(): | ||
... | ||
@pytest.fixture(autouse=True) | ||
def no_automount(monkeypatch): | ||
# no tests in here use automounting, but a lot of them implicitly create | ||
# functions w/ lots of modules is sys.modules which will automount | ||
# which takes a lot of time, so we disable it | ||
monkeypatch.setenv("MODAL_AUTOMOUNT", "0") | ||
|
||
|
||
def dummy() -> None: | ||
return None | ||
|
||
|
||
def test_supported_python_series(): | ||
|
@@ -284,8 +294,6 @@ def test_image_pip_install_pyproject(builder_version, servicer, client): | |
app.function(image=image)(dummy) | ||
with app.run(client=client): | ||
layers = get_image_layers(image.object_id, servicer) | ||
|
||
print(layers[0].dockerfile_commands) | ||
assert any("pip install 'banana >=1.2.0' 'potato >=0.1.0'" in cmd for cmd in layers[0].dockerfile_commands) | ||
|
||
|
||
|
@@ -298,7 +306,6 @@ def test_image_pip_install_pyproject_with_optionals(builder_version, servicer, c | |
with app.run(client=client): | ||
layers = get_image_layers(image.object_id, servicer) | ||
|
||
print(layers[0].dockerfile_commands) | ||
assert any( | ||
"pip install 'banana >=1.2.0' 'linting-tool >=0.0.0' 'potato >=0.1.0' 'pytest >=1.2.0'" in cmd | ||
for cmd in layers[0].dockerfile_commands | ||
|
@@ -1209,3 +1216,137 @@ async def test_logs(servicer, client): | |
|
||
logs = [data async for data in image._logs.aio()] | ||
assert logs == ["build starting\n", "build finished\n"] | ||
|
||
|
||
def hydrate_image(img, client): | ||
# there should be a more straight forward way to do this? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
app = App() | ||
app.function(serialized=True, image=img)(lambda: None) | ||
with app.run(client=client): | ||
pass | ||
|
||
|
||
def test_add_local_lazy_vs_copy(client, servicer, set_env_client, supports_on_path): | ||
deb = Image.debian_slim() | ||
image_with_mount = deb.add_local_python_packages("pkg_a") | ||
|
||
hydrate_image(image_with_mount, client) | ||
assert image_with_mount.object_id == deb.object_id | ||
assert len(image_with_mount._mount_layers) == 1 | ||
|
||
image_additional_mount = image_with_mount.add_local_python_packages("pkg_b") | ||
hydrate_image(image_additional_mount, client) | ||
assert len(image_additional_mount._mount_layers) == 2 # another mount added to lazy layer | ||
assert len(image_with_mount._mount_layers) == 1 # original image should not be affected | ||
|
||
# running commands | ||
image_non_mount = image_with_mount.run_commands("echo 'hello'") | ||
with pytest.raises(InvalidError, match="copy=True"): | ||
# error about using non-copy add commands before other build steps | ||
hydrate_image(image_non_mount, client) | ||
|
||
image_with_copy = deb.add_local_python_packages("pkg_a", copy=True) | ||
hydrate_image(image_with_copy, client) | ||
assert len(image_with_copy._mount_layers) == 0 | ||
|
||
# do the same exact image using copy=True | ||
image_with_copy_and_commands = deb.add_local_python_packages("pkg_a", copy=True).run_commands("echo 'hello'") | ||
hydrate_image(image_with_copy_and_commands, client) | ||
assert len(image_with_copy_and_commands._mount_layers) == 0 | ||
|
||
layers = get_image_layers(image_with_copy_and_commands.object_id, servicer) | ||
|
||
echo_layer = layers[0] | ||
assert echo_layer.dockerfile_commands == ["FROM base", "RUN echo 'hello'"] | ||
|
||
copy_layer = layers[1] | ||
assert copy_layer.dockerfile_commands == ["FROM base", "COPY . /"] | ||
copied_files = servicer.mount_contents[copy_layer.context_mount_id].keys() | ||
assert len(copied_files) == 8 | ||
assert all(fn.startswith("/root/pkg_a/") for fn in copied_files) | ||
|
||
|
||
def test_add_locals_are_attached_to_functions(servicer, client, supports_on_path): | ||
deb_slim = Image.debian_slim() | ||
img = deb_slim.add_local_python_packages("pkg_a") | ||
app = App("my-app") | ||
control_fun: modal.Function = app.function(serialized=True, image=deb_slim, name="control")( | ||
dummy | ||
) # no mounts on image | ||
fun: modal.Function = app.function(serialized=True, image=img, name="fun")(dummy) # mounts on image | ||
deploy_app(app, client=client) | ||
|
||
control_func_mounts = set(servicer.app_functions[control_fun.object_id].mount_ids) | ||
fun_def = servicer.app_functions[fun.object_id] | ||
added_mounts = set(fun_def.mount_ids) - control_func_mounts | ||
assert len(added_mounts) == 1 | ||
assert added_mounts == {img._mount_layers[0].object_id} | ||
|
||
|
||
def test_add_locals_are_attached_to_classes(servicer, client, supports_on_path, set_env_client): | ||
deb_slim = Image.debian_slim() | ||
img = deb_slim.add_local_python_packages("pkg_a") | ||
app = App("my-app") | ||
control_fun: modal.Function = app.function(serialized=True, image=deb_slim, name="control")( | ||
dummy | ||
) # no mounts on image | ||
|
||
class A: | ||
some_arg: str = modal.parameter() | ||
|
||
ACls = app.cls(serialized=True, image=img)(A) # mounts on image | ||
deploy_app(app, client=client) | ||
|
||
control_func_mounts = set(servicer.app_functions[control_fun.object_id].mount_ids) | ||
fun_def = servicer.function_by_name("A.*") # class service function | ||
added_mounts = set(fun_def.mount_ids) - control_func_mounts | ||
assert len(added_mounts) == 1 | ||
assert added_mounts == {img._mount_layers[0].object_id} | ||
|
||
obj = ACls(some_arg="foo") # type: ignore | ||
# hacky way to force hydration of the *parameter bound* function (instance service function): | ||
obj.keep_warm(0) # type: ignore | ||
|
||
obj_fun_def = servicer.function_by_name("A.*", ((), {"some_arg": "foo"})) # instance service function | ||
added_mounts = set(obj_fun_def.mount_ids) - control_func_mounts | ||
assert len(added_mounts) == 1 | ||
assert added_mounts == {img._mount_layers[0].object_id} | ||
|
||
|
||
@skip_windows("servicer sandbox implementation not working on windows") | ||
def test_add_locals_are_attached_to_sandboxes(servicer, client, supports_on_path): | ||
deb_slim = Image.debian_slim() | ||
img = deb_slim.add_local_python_packages("pkg_a") | ||
app = App("my-app") | ||
with app.run(client=client): | ||
modal.Sandbox.create(image=img, app=app, client=client) | ||
sandbox_def = servicer.sandbox_defs[0] | ||
|
||
assert sandbox_def.image_id == deb_slim.object_id | ||
assert sandbox_def.mount_ids == [img._mount_layers[0].object_id] | ||
copied_files = servicer.mount_contents[sandbox_def.mount_ids[0]] | ||
assert len(copied_files) == 8 | ||
assert all(fn.startswith("/root/pkg_a/") for fn in copied_files) | ||
|
||
|
||
def empty_fun(): | ||
pass | ||
|
||
|
||
def test_add_locals_build_function(servicer, client, supports_on_path): | ||
deb_slim = Image.debian_slim() | ||
img = deb_slim.add_local_python_packages("pkg_a") | ||
img_with_build_function = img.run_function(empty_fun) | ||
with pytest.raises(InvalidError): | ||
# build functions could still potentially rewrite mount contents, | ||
# so we still require them to use copy=True | ||
# TODO(elias): what if someone wants do use an equivalent of `run_function(..., mounts=[...]) ? | ||
hydrate_image(img_with_build_function, client) | ||
|
||
img_with_copy = deb_slim.add_local_python_packages("pkg_a", copy=True) | ||
hydrate_image(img_with_copy, client) # this is fine | ||
|
||
|
||
# TODO: test modal shell w/ lazy mounts | ||
# this works since the image is passed on as is to a sandbox which will load it and | ||
# transfer any virtual mount layers from the image as mounts to the sandbox |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can remove the autossh installation - checking with @pawalt and @luiscape now