From a9ea3694364305eeb8e79934100f9a754d5c8a38 Mon Sep 17 00:00:00 2001 From: Elias Freider Date: Sat, 23 Nov 2024 20:30:50 +0100 Subject: [PATCH] [Mounts on images] add_local_file/add_local_dir (#2507) ## Changelog * Adds `Image.add_local_file(...)` and `Image.add_local_dir(...)` as a unified replacement for the old `Image.copy_local_*()` and `Mount.add_local_*` methods. --- modal/image.py | 51 +++++++++++++++++++++++- modal/mount.py | 4 +- test/image_test.py | 81 ++++++++++++++++++++++++++++++++++++++ test/mounted_files_test.py | 10 +++-- 4 files changed, 139 insertions(+), 7 deletions(-) diff --git a/modal/image.py b/modal/image.py index 32ab4512d..e5d7957d1 100644 --- a/modal/image.py +++ b/modal/image.py @@ -344,7 +344,7 @@ def _assert_no_mount_layers(self): "\n" "my_image = (\n" " Image.debian_slim()\n" - ' .add_local_python_packages("mypak", copy=True)\n' + ' .add_local_file("data.json", copy=True)\n' ' .run_commands("python -m mypak") # this now works!\n' ")\n" ) @@ -601,12 +601,59 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: context_mount=mount, ) + def add_local_file(self, local_path: Union[str, Path], remote_path: str, *, copy: bool = False) -> "_Image": + """Adds a local file to the image at `remote_path` within the container + + By default (`copy=False`), the files are added to containers on startup and are not built into the actual Image, + which speeds up deployment. + + Set `copy=True` to copy the files into an Image layer at build time instead, similar to how + [`COPY`](https://docs.docker.com/engine/reference/builder/#copy) works in a `Dockerfile`. + + copy=True can slow down iteration since it requires a rebuild of the Image and any subsequent + build steps whenever the included files change, but it is required if you want to run additional + build steps after this one. + """ + if not PurePosixPath(remote_path).is_absolute(): + # TODO(elias): implement relative to absolute resolution using image workdir metadata + # + make default remote_path="./" + # This requires deferring the Mount creation until after "self" (the base image) has been resolved + # so we know the workdir of the operation. + raise InvalidError("image.add_local_file() currently only supports absolute remote_path values") + + if remote_path.endswith("/"): + remote_path = remote_path + Path(local_path).name + + mount = _Mount.from_local_file(local_path, remote_path) + return self._add_mount_layer_or_copy(mount, copy=copy) + + def add_local_dir(self, local_path: Union[str, Path], remote_path: str, *, copy: bool = False) -> "_Image": + """Adds a local directory's content to the image at `remote_path` within the container + + By default (`copy=False`), the files are added to containers on startup and are not built into the actual Image, + which speeds up deployment. + + Set `copy=True` to copy the files into an Image layer at build time instead, similar to how + [`COPY`](https://docs.docker.com/engine/reference/builder/#copy) works in a `Dockerfile`. + + copy=True can slow down iteration since it requires a rebuild of the Image and any subsequent + build steps whenever the included files change, but it is required if you want to run additional + build steps after this one. + """ + if not PurePosixPath(remote_path).is_absolute(): + # TODO(elias): implement relative to absolute resolution using image workdir metadata + # + make default remote_path="./" + raise InvalidError("image.add_local_dir() currently only supports absolute remote_path values") + mount = _Mount.from_local_dir(local_path, remote_path=remote_path) + return self._add_mount_layer_or_copy(mount, copy=copy) + def copy_local_file(self, local_path: Union[str, Path], remote_path: Union[str, Path] = "./") -> "_Image": """Copy a file into the image as a part of building it. This works in a similar way to [`COPY`](https://docs.docker.com/engine/reference/builder/#copy) works in a `Dockerfile`. """ + # TODO(elias): add pending deprecation with suggestion to use add_* instead basename = str(Path(local_path).name) mount = _Mount.from_local_file(local_path, remote_path=f"/{basename}") @@ -1637,7 +1684,7 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: dockerfile_function=build_dockerfile, ) - def workdir(self, path: str) -> "_Image": + def workdir(self, path: Union[str, PurePosixPath]) -> "_Image": """Set the working directory for subsequent image build steps and function execution. **Example** diff --git a/modal/mount.py b/modal/mount.py index e6c33e83d..b0554307c 100644 --- a/modal/mount.py +++ b/modal/mount.py @@ -377,7 +377,9 @@ def from_local_dir( ) def add_local_file( - self, local_path: Union[str, Path], remote_path: Union[str, PurePosixPath, None] = None + self, + local_path: Union[str, Path], + remote_path: Union[str, PurePosixPath, None] = None, ) -> "_Mount": """ Add a local file to the `Mount` object. diff --git a/test/image_test.py b/test/image_test.py index fa0990d48..f7af141ca 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -624,6 +624,87 @@ def tmp_path_with_content(tmp_path): return tmp_path +@pytest.mark.parametrize(["copy"], [(True,), (False,)]) +@pytest.mark.parametrize( + ["remote_path", "expected_dest"], + [ + ("/place/nice.txt", "/place/nice.txt"), + # Not supported yet, but soon: + ("/place/", "/place/data.txt"), # use original basename if destination has a trailing slash + # ("output.txt", "/proj/output.txt") # workdir relative target + # (None, "/proj/data.txt") # default target - basename in current directory + ], +) +def test_image_add_local_file(servicer, client, tmp_path_with_content, copy, remote_path, expected_dest): + app = App() + + if remote_path is None: + remote_path_kwargs = {} + else: + remote_path_kwargs = {"remote_path": remote_path} + + img = ( + Image.from_registry("unknown_image") + .workdir("/proj") + .add_local_file(tmp_path_with_content / "data.txt", **remote_path_kwargs, copy=copy) + ) + app.function(image=img)(dummy) + + with app.run(client=client): + if copy: + # check that dockerfile commands include COPY . . + layers = get_image_layers(img.object_id, servicer) + assert layers[0].dockerfile_commands == ["FROM base", "COPY . /"] + mount_id = layers[0].context_mount_id + # and then get the relevant context mount to check + if not copy: + assert len(img._mount_layers) == 1 + mount_id = img._mount_layers[0].object_id + + assert set(servicer.mount_contents[mount_id].keys()) == {expected_dest} + + +@pytest.mark.parametrize(["copy"], [(True,), (False,)]) +@pytest.mark.parametrize( + ["remote_path", "expected_dest"], + [ + ("/place/", "/place/sub"), # copy full dir + ("/place", "/place/sub"), # removing trailing slash on source makes no difference, unlike shell cp + # TODO: add support for relative paths: + # Not supported yet, but soon: + # ("place", "/proj/place/sub") # workdir relative target + # (None, "/proj/sub") # default target - copy into current directory + ], +) +def test_image_add_local_dir(servicer, client, tmp_path_with_content, copy, remote_path, expected_dest): + app = App() + + if remote_path is None: + remote_path_kwargs = {} + else: + remote_path_kwargs = {"remote_path": remote_path} + + img = ( + Image.from_registry("unknown_image") + .workdir("/proj") + .add_local_dir(tmp_path_with_content / "data", **remote_path_kwargs, copy=copy) + ) + app.function(image=img)(dummy) + + with app.run(client=client): + if copy: + # check that dockerfile commands include COPY . . + layers = get_image_layers(img.object_id, servicer) + assert layers[0].dockerfile_commands == ["FROM base", "COPY . /"] + mount_id = layers[0].context_mount_id + # and then get the relevant context mount to check + if not copy: + assert len(img._mount_layers) == 1 + mount_id = img._mount_layers[0].object_id + + assert set(servicer.mount_contents[mount_id].keys()) == {expected_dest} + + def test_image_copy_local_dir(builder_version, servicer, client, tmp_path_with_content): app = App() app.image = Image.debian_slim().copy_local_dir(tmp_path_with_content, remote_path="/dummy") diff --git a/test/mounted_files_test.py b/test/mounted_files_test.py index ce21414b4..1491d2af4 100644 --- a/test/mounted_files_test.py +++ b/test/mounted_files_test.py @@ -272,11 +272,13 @@ def test_mount_dedupe(servicer, credentials, test_dir, server_url_env): ) ) assert servicer.n_mounts == 2 - assert servicer.mount_contents["mo-1"].keys() == {"/root/mount_dedupe.py"} - pkg_a_mount = servicer.mount_contents["mo-2"] - for fn in pkg_a_mount.keys(): + # the order isn't strictly defined here + client_mount, entrypoint_mount, pkg_a_mount = sorted(servicer.mount_contents.items(), key=lambda item: len(item[1])) + assert client_mount[1] == {} + assert entrypoint_mount[1].keys() == {"/root/mount_dedupe.py"} + for fn in pkg_a_mount[1].keys(): assert fn.startswith("/root/pkg_a") - assert "/root/pkg_a/normally_not_included.pyc" not in pkg_a_mount.keys() + assert "/root/pkg_a/normally_not_included.pyc" not in pkg_a_mount[1].keys() def test_mount_dedupe_explicit(servicer, credentials, test_dir, server_url_env):