From a65075220acc579b18d62f3e067c2c2327f992a2 Mon Sep 17 00:00:00 2001 From: kramstrom Date: Thu, 28 Nov 2024 15:05:04 +0100 Subject: [PATCH 01/25] init --- modal/image.py | 144 +++++++++++++++++++++++++++++++-------------- test/image_test.py | 81 ++++++++++++++++++++++++- 2 files changed, 179 insertions(+), 46 deletions(-) diff --git a/modal/image.py b/modal/image.py index 3427681c56..2a4c94c987 100644 --- a/modal/image.py +++ b/modal/image.py @@ -1,5 +1,6 @@ # Copyright Modal Labs 2022 import contextlib +import fnmatch import json import os import re @@ -237,6 +238,72 @@ def _get_image_builder_version(server_version: ImageBuilderVersion) -> ImageBuil return version +def _create_context_mount(docker_commands: List[str]) -> _Mount: + mount_sources = set() + current_command = "" + copy_pattern = re.compile(r"^\s*COPY\s+(.+)$", re.IGNORECASE) + + # First pass: handle line continuations and collect full commands + for line in docker_commands: + line = line.strip() + # print(f"{line=}") + if not line or line.startswith("#"): + # ignore comments and empty lines + continue + + if current_command: + # Continue previous line + current_command += " " + line.rstrip("\\").strip() + else: + # Start new command + current_command = line.rstrip("\\").strip() + + if not line.endswith("\\"): + # Command is complete + + match = copy_pattern.match(current_command) + if match: + # print(f"{current_command=}") + args = match.group(1) + parts = shlex.split(args) + + if len(parts) >= 2: + # Last part is destination, everything else is a mount source + sources = parts[:-1] + + for source in sources: + special_pattern = re.compile(r"^\s*--|\$\s*") + if special_pattern.match(source): + raise InvalidError( + f"COPY command: {source} using special flags/arguments/variables are not supported" + ) + + # make sure all sources are absolute paths + if not os.path.isabs(source): + source = os.path.abspath(source) + + if not os.path.exists(source): + raise InvalidError(f"Mount source does not exist: {source}") + mount_sources.add(source) + + current_command = "" + + def mount_filter(source: str): + # TODO: + # https://docs.docker.com/reference/dockerfile/#pattern-matching-1 + # https://pkg.go.dev/path/filepath#Match + for mount_source in mount_sources: + # startswith should only apply if it's a directory (ends with /) + if (source.startswith(mount_source) and mount_source.endswith("/")) or fnmatch.fnmatch( + source, mount_source + ): + return True + + return False + + return _Mount.from_local_dir("./", remote_path="/", condition=mount_filter) + + class _ImageRegistryConfig: """mdmd:hidden""" @@ -397,7 +464,7 @@ def _from_args( build_function: Optional["modal.functions._Function"] = None, build_function_input: Optional[api_pb2.FunctionInput] = None, image_registry_config: Optional[_ImageRegistryConfig] = None, - context_mount: Optional[_Mount] = None, + context_mount_function: Optional[Callable[[], Optional[_Mount]]] = None, force_build: bool = False, # For internal use only. _namespace: "api_pb2.DeploymentNamespace.ValueType" = api_pb2.DEPLOYMENT_NAMESPACE_WORKSPACE, @@ -424,13 +491,15 @@ def _deps() -> Sequence[_Object]: deps = tuple(base_images.values()) + tuple(secrets) if build_function: deps += (build_function,) - if context_mount: - deps += (context_mount,) if image_registry_config and image_registry_config.secret: deps += (image_registry_config.secret,) return deps async def _load(self: _Image, resolver: Resolver, existing_object_id: Optional[str]): + context_mount = context_mount_function() if context_mount_function else None + if context_mount: + await resolver.load(context_mount) + if _do_assert_no_mount_layers: for image in base_images.values(): # base images can't have @@ -612,7 +681,7 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: return _Image._from_args( base_images={"base": self}, dockerfile_function=build_dockerfile, - context_mount=mount, + context_mount_function=lambda: mount, ) def add_local_file(self, local_path: Union[str, Path], remote_path: str, *, copy: bool = False) -> "_Image": @@ -669,7 +738,6 @@ def copy_local_file(self, local_path: Union[str, Path], remote_path: Union[str, """ # TODO(elias): add pending deprecation with suggestion to use add_* instead basename = str(Path(local_path).name) - mount = _Mount.from_local_file(local_path, remote_path=f"/{basename}") def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: return DockerfileSpec(commands=["FROM base", f"COPY {basename} {remote_path}"], context_files={}) @@ -677,7 +745,7 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: return _Image._from_args( base_images={"base": self}, dockerfile_function=build_dockerfile, - context_mount=mount, + context_mount_function=lambda: _Mount.from_local_file(local_path, remote_path=f"/{basename}"), ) def _add_local_python_packages(self, *packages: str, copy: bool = False) -> "_Image": @@ -707,7 +775,6 @@ def copy_local_dir(self, local_path: Union[str, Path], remote_path: Union[str, P This works in a similar way to [`COPY`](https://docs.docker.com/engine/reference/builder/#copy) works in a `Dockerfile`. """ - mount = _Mount.from_local_dir(local_path, remote_path="/") def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: return DockerfileSpec(commands=["FROM base", f"COPY . {remote_path}"], context_files={}) @@ -715,7 +782,7 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: return _Image._from_args( base_images={"base": self}, dockerfile_function=build_dockerfile, - context_mount=mount, + context_mount_function=lambda: _Mount.from_local_dir(local_path, remote_path="/"), ) def pip_install( @@ -1070,8 +1137,6 @@ def dockerfile_commands( context_files: Dict[str, str] = {}, secrets: Sequence[_Secret] = [], gpu: GPU_T = None, - # modal.Mount with local files to supply as build context for COPY commands - context_mount: Optional[_Mount] = None, force_build: bool = False, # Ignore cached builds, similar to 'docker build --no-cache' ) -> "_Image": """Extend an image with arbitrary Dockerfile-like commands.""" @@ -1087,7 +1152,7 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: dockerfile_function=build_dockerfile, secrets=secrets, gpu_config=parse_gpu_config(gpu), - context_mount=context_mount, + context_mount_function=lambda: _create_context_mount(cmds), force_build=self.force_build or force_build, ) @@ -1317,11 +1382,15 @@ def from_registry( modal.Image.from_registry("nvcr.io/nvidia/pytorch:22.12-py3") ``` """ - context_mount = None - if add_python: - context_mount = _Mount.from_name( - python_standalone_mount_name(add_python), - namespace=api_pb2.DEPLOYMENT_NAMESPACE_GLOBAL, + + def context_mount_function() -> Optional[_Mount]: + return ( + _Mount.from_name( + python_standalone_mount_name(add_python), + namespace=api_pb2.DEPLOYMENT_NAMESPACE_GLOBAL, + ) + if add_python + else None ) if "image_registry_config" not in kwargs and secret is not None: @@ -1334,7 +1403,7 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: return _Image._from_args( dockerfile_function=build_dockerfile, - context_mount=context_mount, + context_mount_function=context_mount_function, force_build=force_build, **kwargs, ) @@ -1439,9 +1508,6 @@ def from_aws_ecr( def from_dockerfile( # Filepath to Dockerfile. path: Union[str, Path], - # modal.Mount with local files to supply as build context for COPY commands. - # NOTE: The remote_path of the Mount should match the Dockerfile's WORKDIR. - context_mount: Optional[_Mount] = None, # Ignore cached builds, similar to 'docker build --no-cache' force_build: bool = False, *, @@ -1459,21 +1525,6 @@ def from_dockerfile( ```python image = modal.Image.from_dockerfile("./Dockerfile", add_python="3.12") ``` - - If your Dockerfile uses `COPY` instructions which copy data from the local context of the - build into the image, this local data must be uploaded to Modal via a context mount: - - ```python - image = modal.Image.from_dockerfile( - "./Dockerfile", - context_mount=modal.Mount.from_local_dir( - local_path="src", - remote_path=".", # to current WORKDIR - ), - ) - ``` - - The context mount will allow a `COPY src/ src/` instruction to succeed in Modal's remote builder. """ # --- Build the base dockerfile @@ -1483,10 +1534,15 @@ def build_dockerfile_base(version: ImageBuilderVersion) -> DockerfileSpec: commands = f.read().split("\n") return DockerfileSpec(commands=commands, context_files={}) + def base_image_context_mount_function() -> _Mount: + with open(os.path.expanduser(path)) as f: + lines = f.readlines() + return _create_context_mount(lines) + gpu_config = parse_gpu_config(gpu) base_image = _Image._from_args( dockerfile_function=build_dockerfile_base, - context_mount=context_mount, + context_mount_function=base_image_context_mount_function, gpu_config=gpu_config, secrets=secrets, ) @@ -1495,13 +1551,15 @@ def build_dockerfile_base(version: ImageBuilderVersion) -> DockerfileSpec: # This happening in two steps is probably a vestigial consequence of previous limitations, # but it will be difficult to merge them without forcing rebuilds of images. - if add_python: - context_mount = _Mount.from_name( - python_standalone_mount_name(add_python), - namespace=api_pb2.DEPLOYMENT_NAMESPACE_GLOBAL, + def context_mount_function(): + return ( + _Mount.from_name( + python_standalone_mount_name(add_python), + namespace=api_pb2.DEPLOYMENT_NAMESPACE_GLOBAL, + ) + if add_python + else None ) - else: - context_mount = None def build_dockerfile_python(version: ImageBuilderVersion) -> DockerfileSpec: commands = _Image._registry_setup_commands("base", version, [], add_python) @@ -1512,7 +1570,7 @@ def build_dockerfile_python(version: ImageBuilderVersion) -> DockerfileSpec: return _Image._from_args( base_images={"base": base_image}, dockerfile_function=build_dockerfile_python, - context_mount=context_mount, + context_mount_function=context_mount_function, force_build=force_build, ) diff --git a/test/image_test.py b/test/image_test.py index 57314b3177..dbc5225fda 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -6,7 +6,8 @@ import sys import threading from hashlib import sha256 -from tempfile import NamedTemporaryFile +from pathlib import Path +from tempfile import NamedTemporaryFile, TemporaryDirectory from typing import List, Literal, get_args from unittest import mock @@ -720,7 +721,7 @@ def test_image_copy_local_dir(builder_version, servicer, client, tmp_path_with_c def test_image_docker_command_copy(builder_version, servicer, client, tmp_path_with_content): app = App() data_mount = Mount.from_local_dir(tmp_path_with_content, remote_path="/") - app.image = Image.debian_slim().dockerfile_commands(["COPY . /dummy"], context_mount=data_mount) + app.image = Image.debian_slim().dockerfile_commands(["COPY . /dummy"]) app.function()(dummy) with app.run(client=client): @@ -737,7 +738,7 @@ def test_image_dockerfile_copy(builder_version, servicer, client, tmp_path_with_ app = App() data_mount = Mount.from_local_dir(tmp_path_with_content, remote_path="/") - app.image = Image.debian_slim().from_dockerfile(dockerfile.name, context_mount=data_mount) + app.image = Image.debian_slim().from_dockerfile(dockerfile.name) app.function()(dummy) with app.run(client=client): @@ -746,6 +747,80 @@ def test_image_dockerfile_copy(builder_version, servicer, client, tmp_path_with_ files = {f.mount_filename: f.content for f in Mount._get_files(data_mount.entries)} assert files == {"/data.txt": b"hello", "/data/sub": b"world"} + copied_files = servicer.mount_contents[layers[1].context_mount_id].keys() + print() + print(copied_files) + print() + + assert app.image == 123 + + +def test_image_dockerfile_copy2(builder_version, servicer, client): + with TemporaryDirectory(dir="./") as tmp_dir: + tmp_path = Path(tmp_dir) + + (tmp_path / "smth").mkdir() + (tmp_path / "smth" / "one").mkdir() + (tmp_path / "smth" / "one" / "foo.py").write_text("foo") + (tmp_path / "smth" / "one" / "foo.csv").write_text("foo") + (tmp_path / "smth" / "two").mkdir() + (tmp_path / "smth" / "two" / "bar.py").write_text("bar") + (tmp_path / "smth" / "baz.py").write_text("baz") + + (tmp_path / "abc.py").write_text("abc") + (tmp_path / "xyz.py").write_text("xyz") + + dockerfile = NamedTemporaryFile("w", delete=False) + dockerfile.write( + f""" +FROM python:3.12-slim + +WORKDIR /my-app + +RUN ls + +# COPY simple directory + CoPY {tmp_dir}/smth ./smth_copy + +RUN ls -la + +# COPY multiple sources + COPY {tmp_dir}/abc.py {tmp_dir}/xyz.py / + +RUN ls \\ + -l + +# COPY multiple lines +copy {tmp_dir}/smth \\ + {tmp_dir}/abc.py \\ +# this is a comment + {tmp_dir}/xyz.py \\ + /x + + RUN ls + """ + ) + dockerfile.close() + + app = App() + app.image = Image.debian_slim().from_dockerfile(dockerfile.name) + app.function()(dummy) + + with app.run(client=client): + layers = get_image_layers(app.image.object_id, servicer) + + copied_files = servicer.mount_contents[layers[1].context_mount_id].keys() + assert sorted(copied_files) == sorted( + [ + f"/{tmp_path}/abc.py", + f"/{tmp_path}/smth/one/foo.py", + f"/{tmp_path}/smth/one/foo.csv", + f"/{tmp_path}/smth/two/bar.py", + f"/{tmp_path}/smth/baz.py", + f"/{tmp_path}/xyz.py", + ] + ) + def test_image_docker_command_entrypoint(builder_version, servicer, client, tmp_path_with_content): app = App() From 70195990e9c88aa7ab48b9df5601cdc04f5bb569 Mon Sep 17 00:00:00 2001 From: kramstrom Date: Tue, 3 Dec 2024 16:20:20 +0100 Subject: [PATCH 02/25] patterns --- modal/image.py | 76 +++++++++++++++++------ test/image_test.py | 152 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 181 insertions(+), 47 deletions(-) diff --git a/modal/image.py b/modal/image.py index 2a4c94c987..ad965d1be4 100644 --- a/modal/image.py +++ b/modal/image.py @@ -238,15 +238,18 @@ def _get_image_builder_version(server_version: ImageBuilderVersion) -> ImageBuil return version -def _create_context_mount(docker_commands: List[str]) -> _Mount: +def _extract_copy_command_patterns(dockerfile_lines: List[str]) -> List[str]: + """ + Extract all COPY command sources from a Dockerfile. + Combines multiline COPY commands into a single line. + """ mount_sources = set() current_command = "" copy_pattern = re.compile(r"^\s*COPY\s+(.+)$", re.IGNORECASE) # First pass: handle line continuations and collect full commands - for line in docker_commands: + for line in dockerfile_lines: line = line.strip() - # print(f"{line=}") if not line or line.startswith("#"): # ignore comments and empty lines continue @@ -263,7 +266,6 @@ def _create_context_mount(docker_commands: List[str]) -> _Mount: match = copy_pattern.match(current_command) if match: - # print(f"{current_command=}") args = match.group(1) parts = shlex.split(args) @@ -278,29 +280,63 @@ def _create_context_mount(docker_commands: List[str]) -> _Mount: f"COPY command: {source} using special flags/arguments/variables are not supported" ) - # make sure all sources are absolute paths - if not os.path.isabs(source): - source = os.path.abspath(source) - - if not os.path.exists(source): - raise InvalidError(f"Mount source does not exist: {source}") mount_sources.add(source) current_command = "" - def mount_filter(source: str): - # TODO: - # https://docs.docker.com/reference/dockerfile/#pattern-matching-1 - # https://pkg.go.dev/path/filepath#Match - for mount_source in mount_sources: - # startswith should only apply if it's a directory (ends with /) - if (source.startswith(mount_source) and mount_source.endswith("/")) or fnmatch.fnmatch( - source, mount_source - ): - return True + return list(mount_sources) + + +def _filter_fp_docker_pattern(filepath: str, pattern: str) -> bool: + """ + Validates that a filepath matches a docker copy pattern. + https://docs.docker.com/reference/dockerfile/#pattern-matching-1 + https://pkg.go.dev/path/filepath#Match + """ + filepath = os.path.abspath(filepath) + pattern = os.path.abspath(pattern) + + if "**" in pattern: + # Match any file that ends with the pattern after '**/' + base_dir = pattern.split("/**/")[0] + file_pattern = pattern.split("/**/")[1] + + # Ensure the file is within the base directory + if not filepath.startswith(base_dir + "/"): + return False + + # Get the relative path after base_dir + rel_path = filepath[len(base_dir) + 1 :] + + # Match against the filename part only + filename = os.path.basename(filepath) + if fnmatch.fnmatch(filename, file_pattern): + # Ensure the file is exactly one level deep + return rel_path.count("/") == 1 + return False + # if pattern is a directory, make sure it ends with a separator + if os.path.isdir(pattern) and not pattern.endswith("/"): + pattern += "/*" + + filepath_dir = os.path.dirname(filepath) + pattern_dir = os.path.dirname(pattern) + + if filepath_dir != pattern_dir: return False + return fnmatch.fnmatch(filepath, pattern) + + +def _create_context_mount(docker_commands: List[str]) -> _Mount: + """ + Creates a context mount from a list of docker commands. + """ + mount_sources = _extract_copy_command_patterns(docker_commands) + + def mount_filter(source: str): + return any(_filter_fp_docker_pattern(source, mount_source) for mount_source in mount_sources) + return _Mount.from_local_dir("./", remote_path="/", condition=mount_filter) diff --git a/test/image_test.py b/test/image_test.py index dbc5225fda..0e7419767e 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -22,6 +22,8 @@ ImageBuilderVersion, _base_image_config, _dockerhub_python_version, + _extract_copy_command_patterns, + _filter_fp_docker_pattern, _get_modal_requirements_path, _validate_python_version, ) @@ -747,28 +749,45 @@ def test_image_dockerfile_copy(builder_version, servicer, client, tmp_path_with_ files = {f.mount_filename: f.content for f in Mount._get_files(data_mount.entries)} assert files == {"/data.txt": b"hello", "/data/sub": b"world"} - copied_files = servicer.mount_contents[layers[1].context_mount_id].keys() - print() - print(copied_files) - print() - assert app.image == 123 +def create_tmp_files(tmp_path): + (tmp_path / "dir1").mkdir() + (tmp_path / "dir1" / "a.txt").write_text("a") + (tmp_path / "dir1" / "b.txt").write_text("b") + (tmp_path / "dir2").mkdir() + (tmp_path / "dir2" / "test1.py").write_text("test1") + (tmp_path / "dir2" / "test2.py").write_text("test2") -def test_image_dockerfile_copy2(builder_version, servicer, client): + (tmp_path / "file1.txt").write_text("content1") + (tmp_path / "file10.txt").write_text("content1") + (tmp_path / "file2.txt").write_text("content2") + (tmp_path / "test.py").write_text("python") + + (tmp_path / "special").mkdir() + (tmp_path / "special" / "file[1].txt").write_text("special1") + (tmp_path / "special" / "file{2}.txt").write_text("special2") + (tmp_path / "special" / "test?file.py").write_text("special3") + + (tmp_path / "this").mkdir() + (tmp_path / "this" / "is").mkdir() + (tmp_path / "this" / "is" / "super").mkdir() + (tmp_path / "this" / "is" / "super" / "nested").mkdir() + (tmp_path / "this" / "is" / "super" / "nested" / "file.py").write_text("python") + + all_fps = [] + for root, _, files in os.walk(tmp_path): + for file in files: + all_fps.append(f"{os.path.join(root, file)}".lstrip("./")) + + return all_fps + + +def test_image_dockerfile_copy_messy(builder_version, servicer, client): with TemporaryDirectory(dir="./") as tmp_dir: tmp_path = Path(tmp_dir) - (tmp_path / "smth").mkdir() - (tmp_path / "smth" / "one").mkdir() - (tmp_path / "smth" / "one" / "foo.py").write_text("foo") - (tmp_path / "smth" / "one" / "foo.csv").write_text("foo") - (tmp_path / "smth" / "two").mkdir() - (tmp_path / "smth" / "two" / "bar.py").write_text("bar") - (tmp_path / "smth" / "baz.py").write_text("baz") - - (tmp_path / "abc.py").write_text("abc") - (tmp_path / "xyz.py").write_text("xyz") + create_tmp_files(tmp_path) dockerfile = NamedTemporaryFile("w", delete=False) dockerfile.write( @@ -780,21 +799,21 @@ def test_image_dockerfile_copy2(builder_version, servicer, client): RUN ls # COPY simple directory - CoPY {tmp_dir}/smth ./smth_copy + CoPY {tmp_dir}/dir1 ./smth_copy RUN ls -la # COPY multiple sources - COPY {tmp_dir}/abc.py {tmp_dir}/xyz.py / + COPY {tmp_dir}/test.py {tmp_dir}/file10.txt / RUN ls \\ -l # COPY multiple lines -copy {tmp_dir}/smth \\ - {tmp_dir}/abc.py \\ +copy {tmp_dir}/dir2 \\ + {tmp_dir}/file1.txt \\ # this is a comment - {tmp_dir}/xyz.py \\ + {tmp_dir}/file2.txt \\ /x RUN ls @@ -812,16 +831,95 @@ def test_image_dockerfile_copy2(builder_version, servicer, client): copied_files = servicer.mount_contents[layers[1].context_mount_id].keys() assert sorted(copied_files) == sorted( [ - f"/{tmp_path}/abc.py", - f"/{tmp_path}/smth/one/foo.py", - f"/{tmp_path}/smth/one/foo.csv", - f"/{tmp_path}/smth/two/bar.py", - f"/{tmp_path}/smth/baz.py", - f"/{tmp_path}/xyz.py", + f"/{tmp_path}/dir1/a.txt", + f"/{tmp_path}/dir1/b.txt", + f"/{tmp_path}/test.py", + f"/{tmp_path}/file10.txt", + f"/{tmp_path}/file1.txt", + f"/{tmp_path}/file2.txt", + f"/{tmp_path}/dir2/test1.py", + f"/{tmp_path}/dir2/test2.py", ] ) +def test_extract_copy_command_patterns(): + x = [ + ( + ["CoPY files/dir1 ./smth_copy"], + ["files/dir1"], + ), + ( + ["COPY files/*.txt /dest/", "COPY files/**/*.py /dest/"], + ["files/*.txt", "files/**/*.py"], + ), + ( + ["COPY files/special/file[[]1].txt /dest/"], + ["files/special/file[[]1].txt"], + ), + ( + ["COPY files/*.txt files/**/*.py /dest/"], + ["files/*.txt", "files/**/*.py"], + ), + ( + [ + "copy ./smth \\", + "./foo.py \\", + "# this is a comment", + "./bar.py \\", + "/x", + ], + ["./smth", "./foo.py", "./bar.py"], + ), + ] + + for dockerfile_lines, expected in x: + copy_command_sources = sorted(_extract_copy_command_patterns(dockerfile_lines)) + expected = sorted(expected) + assert copy_command_sources == expected + + +@pytest.mark.parametrize( + ("name", "pattern", "expected_filepaths"), + [ + ( + "basic_wildcards", + "{tmp_dir}/*.txt", + ["{tmp_path}/file1.txt", "{tmp_path}/file10.txt", "{tmp_path}/file2.txt"], + ), + ("single_character_wildcards", "{tmp_dir}/file?.txt", ["{tmp_path}/file1.txt", "{tmp_path}/file2.txt"]), + ( + "recursive_wildcards", + "{tmp_dir}/**/*.py", + ["{tmp_path}/dir2/test1.py", "{tmp_path}/dir2/test2.py", "{tmp_path}/special/test?file.py"], + ), + ( + "directory_specific_match", + "{tmp_dir}/dir1/*.txt", + ["{tmp_path}/dir1/a.txt", "{tmp_path}/dir1/b.txt"], + ), + ("escaping_special_characters", "{tmp_dir}/special/file[[]1].txt", ["{tmp_path}/special/file[1].txt"]), + ("character_range", "{tmp_dir}/dir2/test[1-2].py", ["{tmp_path}/dir2/test1.py", "{tmp_path}/dir2/test2.py"]), + ("abs_path", "/Users/kasper/dev/client/{tmp_path}/dir2/test1.py", ["{tmp_path}/dir2/test1.py"]), + ], +) +def test_filter_fp_docker_pattern(name, pattern, expected_filepaths): + with TemporaryDirectory(dir="./") as tmp_dir: + tmp_path = Path(tmp_dir) + all_fps = create_tmp_files(tmp_path) + + fmt_pattern = pattern.format(tmp_dir=tmp_dir, tmp_path=tmp_path) + fmt_expected_filepaths = [fp.format(tmp_dir=tmp_dir, tmp_path=tmp_path) for fp in expected_filepaths] + fmt_unexpected_filepaths = [fp for fp in all_fps if fp not in fmt_expected_filepaths] + + # assert only expected_filepaths are matched + for fp in fmt_expected_filepaths: + assert _filter_fp_docker_pattern(fp, fmt_pattern), f"{name=} {fp=} {fmt_pattern=}" + # assert no other filepaths are matched + for fp in fmt_unexpected_filepaths: + assert not _filter_fp_docker_pattern(fp, fmt_pattern), f"{name=} {fp=} {fmt_pattern=}" + + def test_image_docker_command_entrypoint(builder_version, servicer, client, tmp_path_with_content): app = App() app.image = Image.debian_slim().entrypoint([]) From 13ca6a1f5a9681dc20190cd7a66071311494393c Mon Sep 17 00:00:00 2001 From: kramstrom Date: Tue, 3 Dec 2024 16:48:55 +0100 Subject: [PATCH 03/25] abs path --- test/image_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/image_test.py b/test/image_test.py index 47834f1bb9..ae421f3e29 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -900,15 +900,16 @@ def test_extract_copy_command_patterns(): ), ("escaping_special_characters", "{tmp_dir}/special/file[[]1].txt", ["{tmp_path}/special/file[1].txt"]), ("character_range", "{tmp_dir}/dir2/test[1-2].py", ["{tmp_path}/dir2/test1.py", "{tmp_path}/dir2/test2.py"]), - ("abs_path", "/Users/kasper/dev/client/{tmp_path}/dir2/test1.py", ["{tmp_path}/dir2/test1.py"]), + ("abs_path", "{tmp_dir}/dir2/test1.py", ["{tmp_path}/dir2/test1.py"]), ], ) def test_filter_fp_docker_pattern(name, pattern, expected_filepaths): with TemporaryDirectory(dir="./") as tmp_dir: tmp_path = Path(tmp_dir) all_fps = create_tmp_files(tmp_path) - fmt_pattern = pattern.format(tmp_dir=tmp_dir, tmp_path=tmp_path) + if name == "abs_path": + fmt_pattern = os.path.abspath(fmt_pattern) fmt_expected_filepaths = [fp.format(tmp_dir=tmp_dir, tmp_path=tmp_path) for fp in expected_filepaths] fmt_unexpected_filepaths = [fp for fp in all_fps if fp not in fmt_expected_filepaths] From 034693ee12841e721686631ec135cdc7649f863d Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Mon, 16 Dec 2024 13:56:14 +0100 Subject: [PATCH 04/25] add back context_mount --- modal/image.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/modal/image.py b/modal/image.py index eb84c968fb..0ae5665197 100644 --- a/modal/image.py +++ b/modal/image.py @@ -1625,6 +1625,9 @@ def from_aws_ecr( def from_dockerfile( # Filepath to Dockerfile. path: Union[str, Path], + # modal.Mount with local files to supply as build context for COPY commands. + # NOTE: The remote_path of the Mount should match the Dockerfile's WORKDIR. + context_mount: Optional[_Mount] = None, # Ignore cached builds, similar to 'docker build --no-cache' force_build: bool = False, *, @@ -1642,8 +1645,32 @@ def from_dockerfile( ```python image = modal.Image.from_dockerfile("./Dockerfile", add_python="3.12") ``` + + If your Dockerfile uses `COPY` instructions which copy data from the local context of the + build into the image, this local data will be implicitly mounted into the image. + TODO: add dockerignore example """ + if context_mount is not None: + deprecation_warning( + (2024, 12, 16), + "`context_mount` is deprecated," + + " files are now mounted implicitly without this flag and can be ignored with `dockerignore` files", + ) + + def wrapper_context_mount_function(): + return context_mount + + context_mount_function = wrapper_context_mount_function + else: + + def base_image_context_mount_function() -> _Mount: + with open(os.path.expanduser(path)) as f: + lines = f.readlines() + return _create_context_mount(lines) + + context_mount_function = base_image_context_mount_function + # --- Build the base dockerfile def build_dockerfile_base(version: ImageBuilderVersion) -> DockerfileSpec: @@ -1651,15 +1678,10 @@ def build_dockerfile_base(version: ImageBuilderVersion) -> DockerfileSpec: commands = f.read().split("\n") return DockerfileSpec(commands=commands, context_files={}) - def base_image_context_mount_function() -> _Mount: - with open(os.path.expanduser(path)) as f: - lines = f.readlines() - return _create_context_mount(lines) - gpu_config = parse_gpu_config(gpu) base_image = _Image._from_args( dockerfile_function=build_dockerfile_base, - context_mount_function=base_image_context_mount_function, + context_mount_function=context_mount_function, gpu_config=gpu_config, secrets=secrets, ) From b8d3e1492c9a119c446d3c05fa081b8beea5c031 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Mon, 16 Dec 2024 15:25:50 +0100 Subject: [PATCH 05/25] add back --- modal/image.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/modal/image.py b/modal/image.py index 0ae5665197..a34844c3e4 100644 --- a/modal/image.py +++ b/modal/image.py @@ -1254,6 +1254,8 @@ def dockerfile_commands( context_files: dict[str, str] = {}, secrets: Sequence[_Secret] = [], gpu: GPU_T = None, + # modal.Mount with local files to supply as build context for COPY commands + context_mount: Optional[_Mount] = None, force_build: bool = False, # Ignore cached builds, similar to 'docker build --no-cache' ) -> "_Image": """Extend an image with arbitrary Dockerfile-like commands.""" @@ -1261,6 +1263,24 @@ def dockerfile_commands( if not cmds: return self + if context_mount is not None: + deprecation_warning( + (2024, 12, 16), + "`context_mount` is deprecated," + + " files are now mounted implicitly without this flag and can be ignored with `dockerignore` files", + ) + + def wrapper_context_mount_function(): + return context_mount + + context_mount_function = wrapper_context_mount_function + else: + + def base_image_context_mount_function() -> _Mount: + return _create_context_mount(cmds) + + context_mount_function = base_image_context_mount_function + def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: return DockerfileSpec(commands=["FROM base", *cmds], context_files=context_files) @@ -1269,7 +1289,7 @@ def build_dockerfile(version: ImageBuilderVersion) -> DockerfileSpec: dockerfile_function=build_dockerfile, secrets=secrets, gpu_config=parse_gpu_config(gpu), - context_mount_function=lambda: _create_context_mount(cmds), + context_mount_function=context_mount_function, force_build=self.force_build or force_build, ) From 75346c180a948b4281072ac0599dc8467955aa7e Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Mon, 16 Dec 2024 16:33:44 +0100 Subject: [PATCH 06/25] update --- test/image_test.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/test/image_test.py b/test/image_test.py index f6666d62f0..6022bea129 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -715,7 +715,7 @@ def test_image_copy_local_dir(builder_version, servicer, client, tmp_path_with_c def test_image_docker_command_copy(builder_version, servicer, client, tmp_path_with_content): app = App() data_mount = Mount.from_local_dir(tmp_path_with_content, remote_path="/", condition=lambda p: "module" not in p) - app.image = Image.debian_slim().dockerfile_commands(["COPY . /dummy"], context_mount=data_mount) + app.image = Image.debian_slim().dockerfile_commands(["COPY . /dummy"]) app.function()(dummy) with app.run(client=client): @@ -727,18 +727,25 @@ def test_image_docker_command_copy(builder_version, servicer, client, tmp_path_w def test_image_dockerfile_copy(builder_version, servicer, client, tmp_path_with_content): dockerfile = NamedTemporaryFile("w", delete=False) - dockerfile.write("COPY . /dummy\n") + dockerfile.write(f"COPY {tmp_path_with_content} /dummy\n") dockerfile.close() app = App() - data_mount = Mount.from_local_dir(tmp_path_with_content, remote_path="/", condition=lambda p: "module" not in p) - app.image = Image.debian_slim().from_dockerfile(dockerfile.name, context_mount=data_mount) + # data_mount = Mount.from_local_dir(tmp_path_with_content, remote_path="/", condition=lambda p: "module" not in p) + app.image = Image.debian_slim().from_dockerfile(dockerfile.name) app.function()(dummy) with app.run(client=client): layers = get_image_layers(app.image.object_id, servicer) - assert "COPY . /dummy" in layers[1].dockerfile_commands - files = {f.mount_filename: f.content for f in Mount._get_files(data_mount.entries)} + assert f"COPY {tmp_path_with_content} /dummy" in layers[1].dockerfile_commands + # TODO: + # get files from the image layers instead + # files = {f.mount_filename: f.content for f in Mount._get_files(data_mount.entries)} + files = {} + print() + print(f"{layers[1]=}") + mount_id = layers[1].context_mount_id + files = set(Path(fn) for fn in servicer.mount_contents[mount_id].keys()) assert files == {"/data.txt": b"hello", "/data/sub": b"world"} From b013546f757b617ee876011274346b6c5ee15913 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Tue, 17 Dec 2024 15:15:59 +0100 Subject: [PATCH 07/25] dockerignore find --- modal/image.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/modal/image.py b/modal/image.py index a34844c3e4..366c51f2d5 100644 --- a/modal/image.py +++ b/modal/image.py @@ -338,6 +338,53 @@ def mount_filter(source: str): return _Mount.from_local_dir("./", remote_path="/", condition=mount_filter) +def find_dockerignore_file(dockerfile_path: Path) -> Optional[Path]: + current_working_dir = Path.cwd() + # 1. file next to the Dockerfile but do NOT look for parent directories + + dockerignore_file = None + if dockerignore_file.exists(): + return dockerignore_file + + dockerignore_file = None + if dockerignore_file.exists(): + return dockerignore_file + + dockerignore_file = None + if dockerignore_file.exists(): + return dockerignore_file + + dockerignore_file = None + if dockerignore_file: + return dockerignore_file + + def valid_dockerignore_file(fp): + # fp has to exist + if not fp.exists(): + return False + # fp has to be subpath to current working directory + if not fp.is_relative_to(current_working_dir): + return False + + return True + + generic_name = ".dockerignore" + specific_name = f"{dockerfile_path.name}.dockerignore" + + possible_locations = [ + # 1. check if specific .dockerignore file exists in the same directory as + dockerfile_path.parent / specific_name, + # 2. check if generic .dockerignore file exists in the same directory as + dockerfile_path.parent / generic_name, + # 3. check if generic .dockerignore file exists in current working directory + current_working_dir / generic_name, + # 4. ?????? check if specific .dockerignore file exists in current working directory + current_working_dir / specific_name, + ] + + return next((e for e in possible_locations if valid_dockerignore_file(e)), None) + + class _ImageRegistryConfig: """mdmd:hidden""" From b2cb8ceee2d50725393137be345599ee9c766540 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Wed, 18 Dec 2024 08:47:36 +0100 Subject: [PATCH 08/25] use dockerignore --- modal/image.py | 42 +++++++++----------- test/image_test.py | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 24 deletions(-) diff --git a/modal/image.py b/modal/image.py index 366c51f2d5..e8f307399e 100644 --- a/modal/image.py +++ b/modal/image.py @@ -33,6 +33,7 @@ from ._utils.blob_utils import MAX_OBJECT_SIZE_BYTES from ._utils.function_utils import FunctionInfo from ._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES, retry_transient_errors +from ._utils.pattern_utils import read_ignorefile from .client import _Client from .cloud_bucket_mount import _CloudBucketMount from .config import config, logger, user_config_path @@ -326,38 +327,29 @@ def _filter_fp_docker_pattern(filepath: str, pattern: str) -> bool: return fnmatch.fnmatch(filepath, pattern) -def _create_context_mount(docker_commands: list[str]) -> _Mount: +def _create_context_mount(docker_commands: list[str], ignore: Callable[[Path], bool]) -> _Mount: """ Creates a context mount from a list of docker commands. """ mount_sources = _extract_copy_command_patterns(docker_commands) - def mount_filter(source: str): - return any(_filter_fp_docker_pattern(source, mount_source) for mount_source in mount_sources) + def mount_filter(source: Path): + # check if the source path matches any pattern from the docker file + # and it doesnt match any ignore pattern + matches_any_copy_pattern = any( + _filter_fp_docker_pattern(source, mount_source) for mount_source in mount_sources + ) + matches_any_ignore_pattern = ignore(source) + + return matches_any_copy_pattern and not matches_any_ignore_pattern - return _Mount.from_local_dir("./", remote_path="/", condition=mount_filter) + return _Mount._add_local_dir(Path("./"), Path("/"), ignore=mount_filter) -def find_dockerignore_file(dockerfile_path: Path) -> Optional[Path]: +def _find_dockerignore_file(dockerfile_path: Path) -> Optional[Path]: current_working_dir = Path.cwd() # 1. file next to the Dockerfile but do NOT look for parent directories - dockerignore_file = None - if dockerignore_file.exists(): - return dockerignore_file - - dockerignore_file = None - if dockerignore_file.exists(): - return dockerignore_file - - dockerignore_file = None - if dockerignore_file.exists(): - return dockerignore_file - - dockerignore_file = None - if dockerignore_file: - return dockerignore_file - def valid_dockerignore_file(fp): # fp has to exist if not fp.exists(): @@ -1324,7 +1316,7 @@ def wrapper_context_mount_function(): else: def base_image_context_mount_function() -> _Mount: - return _create_context_mount(cmds) + return _create_context_mount(cmds, FilePatternMatcher()) context_mount_function = base_image_context_mount_function @@ -1715,9 +1707,11 @@ def from_dockerfile( If your Dockerfile uses `COPY` instructions which copy data from the local context of the build into the image, this local data will be implicitly mounted into the image. - TODO: add dockerignore example """ + dockerignore_fp = _find_dockerignore_file(Path(path)) + ignore = FilePatternMatcher(*read_ignorefile(dockerignore_fp)) if dockerignore_fp else FilePatternMatcher() + if context_mount is not None: deprecation_warning( (2024, 12, 16), @@ -1734,7 +1728,7 @@ def wrapper_context_mount_function(): def base_image_context_mount_function() -> _Mount: with open(os.path.expanduser(path)) as f: lines = f.readlines() - return _create_context_mount(lines) + return _create_context_mount(lines, ignore) context_mount_function = base_image_context_mount_function diff --git a/test/image_test.py b/test/image_test.py index 6022bea129..13971b2013 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -24,6 +24,7 @@ _dockerhub_python_version, _extract_copy_command_patterns, _filter_fp_docker_pattern, + _find_dockerignore_file, _get_modal_requirements_path, _validate_python_version, ) @@ -1738,3 +1739,100 @@ def test_image_add_local_dir_ignore_nothing(servicer, client, tmp_path_with_cont assert set(Path(fn) for fn in servicer.mount_contents[mount_id].keys()) == { Path(f"/place{f}") for f in expected } + + +def test_find_dockerignore_file(): + print() + test_cwd = Path.cwd() + + # case 1: + # generic dockerignore file in cwd --> find it + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + + os.chdir(test_cwd / tmp_dir) + + dockerfile_path = dir1 / "Dockerfile" + dockerignore_path = tmp_path / ".dockerignore" + dockerignore_path.write_text("**/*") + assert _find_dockerignore_file(dockerfile_path) == dockerignore_path + + # case 2: + # specific dockerignore file in cwd --> find it + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + + os.chdir(test_cwd / tmp_dir) + + dockerfile_path = dir1 / "foo" + dockerignore_path = tmp_path / "foo.dockerignore" + dockerignore_path.write_text("**/*") + assert _find_dockerignore_file(dockerfile_path) == dockerignore_path + + # case 3: + # generic dockerignore file and nested dockerignore file in cwd + # should match specific + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + + os.chdir(test_cwd / tmp_dir) + + dockerfile_path = tmp_path / "Dockerfile" + generic_dockerignore_path = tmp_path / ".dockerignore" + generic_dockerignore_path.write_text("**/*.py") + specific_dockerignore_path = tmp_path / "Dockerfile.dockerignore" + specific_dockerignore_path.write_text("**/*") + assert _find_dockerignore_file(dockerfile_path) == specific_dockerignore_path + assert _find_dockerignore_file(dockerfile_path) != generic_dockerignore_path + + # case 4: + # should not match nested dockerignore files + # or parent dockerignore files + # when dockerfile is in cwd + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + dir2 = dir1 / "dir2" + dir2.mkdir() + + os.chdir(dir1) + + dockerfile_path = dir1 / "Dockerfile" + dockerfile_path.write_text("COPY . /dummy") + + # should ignore parent ones + generic_dockerignore_path = tmp_path / ".dockerignore" + generic_dockerignore_path.write_text("**/*") + specific_dockerignore_path = tmp_path / "Dockerfile.dockerignore" + specific_dockerignore_path.write_text("**/*") + + # should ignore nested ones + nested_generic_dockerignore_path = dir2 / ".dockerignore" + nested_generic_dockerignore_path.write_text("**/*") + nested_specific_dockerignore_path = dir2 / "Dockerfile.dockerignore" + nested_specific_dockerignore_path.write_text("**/*") + + assert _find_dockerignore_file(dockerfile_path) is None + + # case 5: + # should match dockerignore file next to dockerfile + # and not next to cwd if both exist + # even if more specific + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + + os.chdir(test_cwd / tmp_dir) + + dockerfile_path = dir1 / "Dockerfile" + dockerignore_path = tmp_path / ".dockerignore" + dockerignore_path.write_text("**/*") + assert _find_dockerignore_file(dockerfile_path) == dockerignore_path From 21ec1e7005ef83a4dd0724b42dda37b5e5203c66 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Wed, 18 Dec 2024 11:43:10 +0100 Subject: [PATCH 09/25] dockerfile match port --- modal/_utils/match.py | 161 ++++++++++++++++++++++++++++++++++++++++++ modal/image.py | 47 +----------- test/match_test.py | 83 ++++++++++++++++++++++ 3 files changed, 246 insertions(+), 45 deletions(-) create mode 100644 modal/_utils/match.py create mode 100644 test/match_test.py diff --git a/modal/_utils/match.py b/modal/_utils/match.py new file mode 100644 index 0000000000..196cacaa60 --- /dev/null +++ b/modal/_utils/match.py @@ -0,0 +1,161 @@ +# Copyright Modal Labs 2024 +"""Pattern matching library ported from https://github.com/golang/go/blob/go1.23.4/src/path/match.go + +This is the same pattern-matching logic used by Docker for Dockerfiles (not dockerignore), +except it is written in Python rather than Go. +""" + + +class PatternError(Exception): + """Indicates a pattern was malformed.""" + + pass + + +def match(pattern: str, name: str) -> bool: + while len(pattern) > 0: + continue_outer_loop = False + star, chunk, pattern = scan_chunk(pattern) + + if star and chunk == "": + # Trailing * matches rest of string unless it has a /. + # return bytealg.IndexByteString(name, '/') < 0, nil + return name.find("/") < 0 + + # Look for match at current position. + t, ok = match_chunk(chunk, name) + # if we're the last chunk, make sure we've exhausted the name + # otherwise we'll give a false result even if we could still match + # using the star + if ok and (len(t) == 0 or len(pattern) > 0): + name = t + continue + + if star: + i = 0 + while i < len(name) and name[i] != "/": + t, ok = match_chunk(chunk, name[i + 1 :]) + if ok: + if len(pattern) == 0 and len(t) > 0: + i += 1 + continue + name = t + continue_outer_loop = True + break + + i += 1 + if continue_outer_loop: + continue + + while len(pattern) > 0: + _, chunk, pattern = scan_chunk(pattern) + match_chunk(chunk, "") + + return False + + return len(name) == 0 + + +def scan_chunk(pattern: str) -> tuple[bool, str, str]: + star = False + while len(pattern) > 0 and pattern[0] == "*": + pattern = pattern[1:] + star = True + + inrange = False + i = 0 + while i < len(pattern): + if pattern[i] == "\\": + if i + 1 < len(pattern): + i += 1 + elif pattern[i] == "[": + inrange = True + elif pattern[i] == "]": + inrange = False + elif pattern[i] == "*": + if not inrange: + break + i += 1 + + return star, pattern[0:i], pattern[i:] + + +def match_chunk(chunk: str, s: str) -> tuple[str, bool]: + failed = False + + while len(chunk) > 0: + if not failed and len(s) == 0: + failed = True + + if chunk[0] == "[": + r = "" + if not failed: + r = s[0] + s = s[1:] + chunk = chunk[1:] + negated = False + if len(chunk) > 0 and chunk[0] == "^": + negated = True + chunk = chunk[1:] + + match = False + nrange = 0 + + while True: + if len(chunk) > 0 and chunk[0] == "]" and nrange > 0: + chunk = chunk[1:] + break + lo, chunk = get_esc(chunk) + hi = lo + + if chunk[0] == "-": + hi, chunk = get_esc(chunk[1:]) + if lo <= r and r <= hi: + match = True + nrange += 1 + + if match == negated: + failed = True + elif chunk[0] == "?": + if not failed: + if s[0] == "/": + failed = True + s = s[1:] + chunk = chunk[1:] + elif chunk[0] == "\\": + chunk = chunk[1:] + if len(chunk) == 0: + raise PatternError("Bad pattern") + + if not failed: + if chunk[0] != s[0]: + failed = True + s = s[1:] + chunk = chunk[1:] + else: + if not failed: + if chunk[0] != s[0]: + failed = True + s = s[1:] + chunk = chunk[1:] + + if failed: + return "", False + return s, True + + +def get_esc(chunk: str) -> tuple[str, str]: + if len(chunk) == 0 or chunk[0] == "-" or chunk[0] == "]": + raise PatternError("Bad pattern") + if chunk[0] == "\\": + chunk = chunk[1:] + if len(chunk) == 0: + raise PatternError("Bad pattern") + try: + r = chunk[0] + nchunk = chunk[1:] + except IndexError: + raise PatternError("Invalid pattern") + if len(nchunk) == 0: + raise PatternError("Bad pattern") + return r, nchunk diff --git a/modal/image.py b/modal/image.py index e8f307399e..5172236845 100644 --- a/modal/image.py +++ b/modal/image.py @@ -1,6 +1,5 @@ # Copyright Modal Labs 2022 import contextlib -import fnmatch import json import os import re @@ -33,6 +32,7 @@ from ._utils.blob_utils import MAX_OBJECT_SIZE_BYTES from ._utils.function_utils import FunctionInfo from ._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES, retry_transient_errors +from ._utils.match import match from ._utils.pattern_utils import read_ignorefile from .client import _Client from .cloud_bucket_mount import _CloudBucketMount @@ -286,47 +286,6 @@ def _extract_copy_command_patterns(dockerfile_lines: list[str]) -> list[str]: return list(mount_sources) -def _filter_fp_docker_pattern(filepath: str, pattern: str) -> bool: - """ - Validates that a filepath matches a docker copy pattern. - https://docs.docker.com/reference/dockerfile/#pattern-matching-1 - https://pkg.go.dev/path/filepath#Match - """ - filepath = os.path.abspath(filepath) - pattern = os.path.abspath(pattern) - - if "**" in pattern: - # Match any file that ends with the pattern after '**/' - base_dir = pattern.split("/**/")[0] - file_pattern = pattern.split("/**/")[1] - - # Ensure the file is within the base directory - if not filepath.startswith(base_dir + "/"): - return False - - # Get the relative path after base_dir - rel_path = filepath[len(base_dir) + 1 :] - - # Match against the filename part only - filename = os.path.basename(filepath) - if fnmatch.fnmatch(filename, file_pattern): - # Ensure the file is exactly one level deep - return rel_path.count("/") == 1 - return False - - # if pattern is a directory, make sure it ends with a separator - if os.path.isdir(pattern) and not pattern.endswith("/"): - pattern += "/*" - - filepath_dir = os.path.dirname(filepath) - pattern_dir = os.path.dirname(pattern) - - if filepath_dir != pattern_dir: - return False - - return fnmatch.fnmatch(filepath, pattern) - - def _create_context_mount(docker_commands: list[str], ignore: Callable[[Path], bool]) -> _Mount: """ Creates a context mount from a list of docker commands. @@ -336,9 +295,7 @@ def _create_context_mount(docker_commands: list[str], ignore: Callable[[Path], b def mount_filter(source: Path): # check if the source path matches any pattern from the docker file # and it doesnt match any ignore pattern - matches_any_copy_pattern = any( - _filter_fp_docker_pattern(source, mount_source) for mount_source in mount_sources - ) + matches_any_copy_pattern = any(match(source, mount_source) for mount_source in mount_sources) matches_any_ignore_pattern = ignore(source) return matches_any_copy_pattern and not matches_any_ignore_pattern diff --git a/test/match_test.py b/test/match_test.py new file mode 100644 index 0000000000..da1b2b386e --- /dev/null +++ b/test/match_test.py @@ -0,0 +1,83 @@ +# Copyright Modal Labs 2024 +"""Pattern matching library tests ported from https://github.com/golang/go/blob/go1.23.4/src/path/match_test.go""" + +import platform +import pytest + +from modal._utils.match import PatternError, match + + +def match_tests(): + return [ + # (pattern, string, is_match, error) + ("abc", "abc", True, None), + ("*", "abc", True, None), + ("*c", "abc", True, None), + ("a*", "a", True, None), + ("a*", "abc", True, None), + ("a*", "ab/c", False, None), + ("a*/b", "abc/b", True, None), + ("a*/b", "a/c/b", False, None), + ("a*b*c*d*e*/f", "axbxcxdxe/f", True, None), + ("a*b*c*d*e*/f", "axbxcxdxexxx/f", True, None), + ("a*b*c*d*e*/f", "axbxcxdxe/xxx/f", False, None), + ("a*b*c*d*e*/f", "axbxcxdxexxx/fff", False, None), + ("a*b?c*x", "abxbbxdbxebxczzx", True, None), + ("a*b?c*x", "abxbbxdbxebxczzy", False, None), + ("ab[c]", "abc", True, None), + ("ab[b-d]", "abc", True, None), + ("ab[e-g]", "abc", False, None), + ("ab[^c]", "abc", False, None), + ("ab[^b-d]", "abc", False, None), + ("ab[^e-g]", "abc", True, None), + ("a\\*b", "a*b", True, None), + ("a\\*b", "ab", False, None), + ("a?b", "a☺b", True, None), + ("a[^a]b", "a☺b", True, None), + ("a???b", "a☺b", False, None), + ("a[^a][^a][^a]b", "a☺b", False, None), + ("[a-ζ]*", "α", True, None), + ("*[a-ζ]", "A", False, None), + ("a?b", "a/b", False, None), + ("a*b", "a/b", False, None), + ("[\\]a]", "]", True, None), + ("[\\-]", "-", True, None), + ("[x\\-]", "x", True, None), + ("[x\\-]", "-", True, None), + ("[x\\-]", "z", False, None), + ("[\\-x]", "x", True, None), + ("[\\-x]", "-", True, None), + ("[\\-x]", "a", False, None), + ("[]a]", "]", False, PatternError), + ("[-]", "-", False, PatternError), + ("[x-]", "x", False, PatternError), + ("[x-]", "-", False, PatternError), + ("[x-]", "z", False, PatternError), + ("[-x]", "x", False, PatternError), + ("[-x]", "-", False, PatternError), + ("[-x]", "a", False, PatternError), + ("\\", "a", False, PatternError), + ("[a-b-c]", "a", False, PatternError), + ("[", "a", False, PatternError), + ("[^", "a", False, PatternError), + ("[^bc", "a", False, PatternError), + ("a[", "a", False, PatternError), + ("a[", "ab", False, PatternError), + ("a[", "x", False, PatternError), + ("a/b[", "x", False, PatternError), + ("*x", "xxx", True, None), + ] + + +@pytest.mark.parametrize("pattern, s, is_match, err", match_tests()) +def test_match(pattern, s, is_match, err): + if platform.system() == "Windows" and "\\" in pattern: + # No escape allowed on Windows. + return + + if err is not None: + with pytest.raises(ValueError): + match(pattern, s) + else: + actual_match = match(pattern, s) + assert is_match == actual_match, f"{pattern=} {s=} {is_match=} {actual_match=}" From d7c9e76e2f693de4fcaaa8281d8a170eaf4fa1b8 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Wed, 18 Dec 2024 11:55:07 +0100 Subject: [PATCH 10/25] fix test --- test/match_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/match_test.py b/test/match_test.py index da1b2b386e..ed42c0cb20 100644 --- a/test/match_test.py +++ b/test/match_test.py @@ -76,7 +76,7 @@ def test_match(pattern, s, is_match, err): return if err is not None: - with pytest.raises(ValueError): + with pytest.raises(PatternError): match(pattern, s) else: actual_match = match(pattern, s) From 37475c3b13fdfa64e2a3e06f68214fef9f4189ed Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Wed, 18 Dec 2024 11:59:20 +0100 Subject: [PATCH 11/25] remove --- test/image_test.py | 43 ------------------------------------------- 1 file changed, 43 deletions(-) diff --git a/test/image_test.py b/test/image_test.py index 13971b2013..2608df3c53 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -23,7 +23,6 @@ _base_image_config, _dockerhub_python_version, _extract_copy_command_patterns, - _filter_fp_docker_pattern, _find_dockerignore_file, _get_modal_requirements_path, _validate_python_version, @@ -879,48 +878,6 @@ def test_extract_copy_command_patterns(): assert copy_command_sources == expected -@pytest.mark.parametrize( - ("name", "pattern", "expected_filepaths"), - [ - ( - "basic_wildcards", - "{tmp_dir}/*.txt", - ["{tmp_path}/file1.txt", "{tmp_path}/file10.txt", "{tmp_path}/file2.txt"], - ), - ("single_character_wildcards", "{tmp_dir}/file?.txt", ["{tmp_path}/file1.txt", "{tmp_path}/file2.txt"]), - ( - "recursive_wildcards", - "{tmp_dir}/**/*.py", - ["{tmp_path}/dir2/test1.py", "{tmp_path}/dir2/test2.py", "{tmp_path}/special/test?file.py"], - ), - ( - "directory_specific_match", - "{tmp_dir}/dir1/*.txt", - ["{tmp_path}/dir1/a.txt", "{tmp_path}/dir1/b.txt"], - ), - ("escaping_special_characters", "{tmp_dir}/special/file[[]1].txt", ["{tmp_path}/special/file[1].txt"]), - ("character_range", "{tmp_dir}/dir2/test[1-2].py", ["{tmp_path}/dir2/test1.py", "{tmp_path}/dir2/test2.py"]), - ("abs_path", "{tmp_dir}/dir2/test1.py", ["{tmp_path}/dir2/test1.py"]), - ], -) -def test_filter_fp_docker_pattern(name, pattern, expected_filepaths): - with TemporaryDirectory(dir="./") as tmp_dir: - tmp_path = Path(tmp_dir) - all_fps = create_tmp_files(tmp_path) - fmt_pattern = pattern.format(tmp_dir=tmp_dir, tmp_path=tmp_path) - if name == "abs_path": - fmt_pattern = os.path.abspath(fmt_pattern) - fmt_expected_filepaths = [fp.format(tmp_dir=tmp_dir, tmp_path=tmp_path) for fp in expected_filepaths] - fmt_unexpected_filepaths = [fp for fp in all_fps if fp not in fmt_expected_filepaths] - - # assert only expected_filepaths are matched - for fp in fmt_expected_filepaths: - assert _filter_fp_docker_pattern(fp, fmt_pattern), f"{name=} {fp=} {fmt_pattern=}" - # assert no other filepaths are matched - for fp in fmt_unexpected_filepaths: - assert not _filter_fp_docker_pattern(fp, fmt_pattern), f"{name=} {fp=} {fmt_pattern=}" - - def test_image_docker_command_entrypoint(builder_version, servicer, client, tmp_path_with_content): app = App() app.image = Image.debian_slim().entrypoint([]) From 8c0b41206e577d1af0d443a3404cccc2c09f19da Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Wed, 18 Dec 2024 12:04:35 +0100 Subject: [PATCH 12/25] move --- modal/_utils/docker_utils.py | 86 ++++++++++++++++++++++ modal/image.py | 85 +-------------------- test/docker_utils_test.py | 139 +++++++++++++++++++++++++++++++++++ test/image_test.py | 135 ---------------------------------- 4 files changed, 228 insertions(+), 217 deletions(-) create mode 100644 modal/_utils/docker_utils.py create mode 100644 test/docker_utils_test.py diff --git a/modal/_utils/docker_utils.py b/modal/_utils/docker_utils.py new file mode 100644 index 0000000000..26c9784b7d --- /dev/null +++ b/modal/_utils/docker_utils.py @@ -0,0 +1,86 @@ +# Copyright Modal Labs 2024 +import re +import shlex +from pathlib import Path +from typing import Optional + +from .exception import InvalidError + + +def extract_copy_command_patterns(dockerfile_lines: list[str]) -> list[str]: + """ + Extract all COPY command sources from a Dockerfile. + Combines multiline COPY commands into a single line. + """ + mount_sources = set() + current_command = "" + copy_pattern = re.compile(r"^\s*COPY\s+(.+)$", re.IGNORECASE) + + # First pass: handle line continuations and collect full commands + for line in dockerfile_lines: + line = line.strip() + if not line or line.startswith("#"): + # ignore comments and empty lines + continue + + if current_command: + # Continue previous line + current_command += " " + line.rstrip("\\").strip() + else: + # Start new command + current_command = line.rstrip("\\").strip() + + if not line.endswith("\\"): + # Command is complete + + match = copy_pattern.match(current_command) + if match: + args = match.group(1) + parts = shlex.split(args) + + if len(parts) >= 2: + # Last part is destination, everything else is a mount source + sources = parts[:-1] + + for source in sources: + special_pattern = re.compile(r"^\s*--|\$\s*") + if special_pattern.match(source): + raise InvalidError( + f"COPY command: {source} using special flags/arguments/variables are not supported" + ) + + mount_sources.add(source) + + current_command = "" + + return list(mount_sources) + + +def find_dockerignore_file(dockerfile_path: Path, context_directory: Path) -> Optional[Path]: + # 1. file next to the Dockerfile but do NOT look for parent directories + + def valid_dockerignore_file(fp): + # fp has to exist + if not fp.exists(): + return False + # fp has to be subpath to current working directory + if not fp.is_relative_to(context_directory): + return False + + return True + + generic_name = ".dockerignore" + specific_name = f"{dockerfile_path.name}.dockerignore" + + possible_locations = [ + # 1. check if specific .dockerignore file exists in the same directory as + dockerfile_path.parent / specific_name, + # 2. check if generic .dockerignore file exists in the same directory as + dockerfile_path.parent / generic_name, + # 3. check if generic .dockerignore file exists in current working directory + context_directory / generic_name, + # 4. ?????? check if specific .dockerignore file exists in current working directory + context_directory / specific_name, + ] + + return next((e for e in possible_locations if valid_dockerignore_file(e)), None) diff --git a/modal/image.py b/modal/image.py index 750fd4f04f..5f4cc12491 100644 --- a/modal/image.py +++ b/modal/image.py @@ -31,6 +31,7 @@ from ._utils.async_utils import synchronize_api from ._utils.blob_utils import MAX_OBJECT_SIZE_BYTES from ._utils.deprecation import deprecation_error, deprecation_warning +from ._utils.docker_utils import extract_copy_command_patterns, find_dockerignore_file from ._utils.function_utils import FunctionInfo from ._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES, retry_transient_errors from ._utils.match import match @@ -238,60 +239,11 @@ def _get_image_builder_version(server_version: ImageBuilderVersion) -> ImageBuil return version -def _extract_copy_command_patterns(dockerfile_lines: list[str]) -> list[str]: - """ - Extract all COPY command sources from a Dockerfile. - Combines multiline COPY commands into a single line. - """ - mount_sources = set() - current_command = "" - copy_pattern = re.compile(r"^\s*COPY\s+(.+)$", re.IGNORECASE) - - # First pass: handle line continuations and collect full commands - for line in dockerfile_lines: - line = line.strip() - if not line or line.startswith("#"): - # ignore comments and empty lines - continue - - if current_command: - # Continue previous line - current_command += " " + line.rstrip("\\").strip() - else: - # Start new command - current_command = line.rstrip("\\").strip() - - if not line.endswith("\\"): - # Command is complete - - match = copy_pattern.match(current_command) - if match: - args = match.group(1) - parts = shlex.split(args) - - if len(parts) >= 2: - # Last part is destination, everything else is a mount source - sources = parts[:-1] - - for source in sources: - special_pattern = re.compile(r"^\s*--|\$\s*") - if special_pattern.match(source): - raise InvalidError( - f"COPY command: {source} using special flags/arguments/variables are not supported" - ) - - mount_sources.add(source) - - current_command = "" - - return list(mount_sources) - - def _create_context_mount(docker_commands: list[str], ignore: Callable[[Path], bool]) -> _Mount: """ Creates a context mount from a list of docker commands. """ - mount_sources = _extract_copy_command_patterns(docker_commands) + mount_sources = extract_copy_command_patterns(docker_commands) def mount_filter(source: Path): # check if the source path matches any pattern from the docker file @@ -304,37 +256,6 @@ def mount_filter(source: Path): return _Mount._add_local_dir(Path("./"), Path("/"), ignore=mount_filter) -def _find_dockerignore_file(dockerfile_path: Path) -> Optional[Path]: - current_working_dir = Path.cwd() - # 1. file next to the Dockerfile but do NOT look for parent directories - - def valid_dockerignore_file(fp): - # fp has to exist - if not fp.exists(): - return False - # fp has to be subpath to current working directory - if not fp.is_relative_to(current_working_dir): - return False - - return True - - generic_name = ".dockerignore" - specific_name = f"{dockerfile_path.name}.dockerignore" - - possible_locations = [ - # 1. check if specific .dockerignore file exists in the same directory as - dockerfile_path.parent / specific_name, - # 2. check if generic .dockerignore file exists in the same directory as - dockerfile_path.parent / generic_name, - # 3. check if generic .dockerignore file exists in current working directory - current_working_dir / generic_name, - # 4. ?????? check if specific .dockerignore file exists in current working directory - current_working_dir / specific_name, - ] - - return next((e for e in possible_locations if valid_dockerignore_file(e)), None) - - class _ImageRegistryConfig: """mdmd:hidden""" @@ -1667,7 +1588,7 @@ def from_dockerfile( build into the image, this local data will be implicitly mounted into the image. """ - dockerignore_fp = _find_dockerignore_file(Path(path)) + dockerignore_fp = find_dockerignore_file(Path(path), context_directory=Path.cwd()) ignore = FilePatternMatcher(*read_ignorefile(dockerignore_fp)) if dockerignore_fp else FilePatternMatcher() if context_mount is not None: diff --git a/test/docker_utils_test.py b/test/docker_utils_test.py new file mode 100644 index 0000000000..ee80c77ce2 --- /dev/null +++ b/test/docker_utils_test.py @@ -0,0 +1,139 @@ +# Copyright Modal Labs 2024 +import os +from pathlib import Path +from tempfile import TemporaryDirectory + +from modal._utils.docker_utils import extract_copy_command_patterns, find_dockerignore_file + + +def test_extract_copy_command_patterns(): + x = [ + ( + ["CoPY files/dir1 ./smth_copy"], + ["files/dir1"], + ), + ( + ["COPY files/*.txt /dest/", "COPY files/**/*.py /dest/"], + ["files/*.txt", "files/**/*.py"], + ), + ( + ["COPY files/special/file[[]1].txt /dest/"], + ["files/special/file[[]1].txt"], + ), + ( + ["COPY files/*.txt files/**/*.py /dest/"], + ["files/*.txt", "files/**/*.py"], + ), + ( + [ + "copy ./smth \\", + "./foo.py \\", + "# this is a comment", + "./bar.py \\", + "/x", + ], + ["./smth", "./foo.py", "./bar.py"], + ), + ] + + for dockerfile_lines, expected in x: + copy_command_sources = sorted(extract_copy_command_patterns(dockerfile_lines)) + expected = sorted(expected) + assert copy_command_sources == expected + + +def test_find_dockerignore_file(): + print() + test_cwd = Path.cwd() + + # case 1: + # generic dockerignore file in cwd --> find it + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + + os.chdir(test_cwd / tmp_dir) + + dockerfile_path = dir1 / "Dockerfile" + dockerignore_path = tmp_path / ".dockerignore" + dockerignore_path.write_text("**/*") + assert find_dockerignore_file(dockerfile_path) == dockerignore_path + + # case 2: + # specific dockerignore file in cwd --> find it + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + + os.chdir(test_cwd / tmp_dir) + + dockerfile_path = dir1 / "foo" + dockerignore_path = tmp_path / "foo.dockerignore" + dockerignore_path.write_text("**/*") + assert find_dockerignore_file(dockerfile_path) == dockerignore_path + + # case 3: + # generic dockerignore file and nested dockerignore file in cwd + # should match specific + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + + os.chdir(test_cwd / tmp_dir) + + dockerfile_path = tmp_path / "Dockerfile" + generic_dockerignore_path = tmp_path / ".dockerignore" + generic_dockerignore_path.write_text("**/*.py") + specific_dockerignore_path = tmp_path / "Dockerfile.dockerignore" + specific_dockerignore_path.write_text("**/*") + assert find_dockerignore_file(dockerfile_path) == specific_dockerignore_path + assert find_dockerignore_file(dockerfile_path) != generic_dockerignore_path + + # case 4: + # should not match nested dockerignore files + # or parent dockerignore files + # when dockerfile is in cwd + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + dir2 = dir1 / "dir2" + dir2.mkdir() + + os.chdir(dir1) + + dockerfile_path = dir1 / "Dockerfile" + dockerfile_path.write_text("COPY . /dummy") + + # should ignore parent ones + generic_dockerignore_path = tmp_path / ".dockerignore" + generic_dockerignore_path.write_text("**/*") + specific_dockerignore_path = tmp_path / "Dockerfile.dockerignore" + specific_dockerignore_path.write_text("**/*") + + # should ignore nested ones + nested_generic_dockerignore_path = dir2 / ".dockerignore" + nested_generic_dockerignore_path.write_text("**/*") + nested_specific_dockerignore_path = dir2 / "Dockerfile.dockerignore" + nested_specific_dockerignore_path.write_text("**/*") + + assert find_dockerignore_file(dockerfile_path) is None + + # case 5: + # should match dockerignore file next to dockerfile + # and not next to cwd if both exist + # even if more specific + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir) + dir1 = tmp_path / "dir1" + dir1.mkdir() + + os.chdir(test_cwd / tmp_dir) + + dockerfile_path = dir1 / "Dockerfile" + dockerignore_path = tmp_path / ".dockerignore" + dockerignore_path.write_text("**/*") + assert find_dockerignore_file(dockerfile_path) == dockerignore_path diff --git a/test/image_test.py b/test/image_test.py index 2608df3c53..8047579495 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -22,8 +22,6 @@ ImageBuilderVersion, _base_image_config, _dockerhub_python_version, - _extract_copy_command_patterns, - _find_dockerignore_file, _get_modal_requirements_path, _validate_python_version, ) @@ -842,42 +840,6 @@ def test_image_dockerfile_copy_messy(builder_version, servicer, client): ) -def test_extract_copy_command_patterns(): - x = [ - ( - ["CoPY files/dir1 ./smth_copy"], - ["files/dir1"], - ), - ( - ["COPY files/*.txt /dest/", "COPY files/**/*.py /dest/"], - ["files/*.txt", "files/**/*.py"], - ), - ( - ["COPY files/special/file[[]1].txt /dest/"], - ["files/special/file[[]1].txt"], - ), - ( - ["COPY files/*.txt files/**/*.py /dest/"], - ["files/*.txt", "files/**/*.py"], - ), - ( - [ - "copy ./smth \\", - "./foo.py \\", - "# this is a comment", - "./bar.py \\", - "/x", - ], - ["./smth", "./foo.py", "./bar.py"], - ), - ] - - for dockerfile_lines, expected in x: - copy_command_sources = sorted(_extract_copy_command_patterns(dockerfile_lines)) - expected = sorted(expected) - assert copy_command_sources == expected - - def test_image_docker_command_entrypoint(builder_version, servicer, client, tmp_path_with_content): app = App() app.image = Image.debian_slim().entrypoint([]) @@ -1696,100 +1658,3 @@ def test_image_add_local_dir_ignore_nothing(servicer, client, tmp_path_with_cont assert set(Path(fn) for fn in servicer.mount_contents[mount_id].keys()) == { Path(f"/place{f}") for f in expected } - - -def test_find_dockerignore_file(): - print() - test_cwd = Path.cwd() - - # case 1: - # generic dockerignore file in cwd --> find it - with TemporaryDirectory(dir=test_cwd) as tmp_dir: - tmp_path = Path(tmp_dir) - dir1 = tmp_path / "dir1" - dir1.mkdir() - - os.chdir(test_cwd / tmp_dir) - - dockerfile_path = dir1 / "Dockerfile" - dockerignore_path = tmp_path / ".dockerignore" - dockerignore_path.write_text("**/*") - assert _find_dockerignore_file(dockerfile_path) == dockerignore_path - - # case 2: - # specific dockerignore file in cwd --> find it - with TemporaryDirectory(dir=test_cwd) as tmp_dir: - tmp_path = Path(tmp_dir) - dir1 = tmp_path / "dir1" - dir1.mkdir() - - os.chdir(test_cwd / tmp_dir) - - dockerfile_path = dir1 / "foo" - dockerignore_path = tmp_path / "foo.dockerignore" - dockerignore_path.write_text("**/*") - assert _find_dockerignore_file(dockerfile_path) == dockerignore_path - - # case 3: - # generic dockerignore file and nested dockerignore file in cwd - # should match specific - with TemporaryDirectory(dir=test_cwd) as tmp_dir: - tmp_path = Path(tmp_dir) - dir1 = tmp_path / "dir1" - dir1.mkdir() - - os.chdir(test_cwd / tmp_dir) - - dockerfile_path = tmp_path / "Dockerfile" - generic_dockerignore_path = tmp_path / ".dockerignore" - generic_dockerignore_path.write_text("**/*.py") - specific_dockerignore_path = tmp_path / "Dockerfile.dockerignore" - specific_dockerignore_path.write_text("**/*") - assert _find_dockerignore_file(dockerfile_path) == specific_dockerignore_path - assert _find_dockerignore_file(dockerfile_path) != generic_dockerignore_path - - # case 4: - # should not match nested dockerignore files - # or parent dockerignore files - # when dockerfile is in cwd - with TemporaryDirectory(dir=test_cwd) as tmp_dir: - tmp_path = Path(tmp_dir) - dir1 = tmp_path / "dir1" - dir1.mkdir() - dir2 = dir1 / "dir2" - dir2.mkdir() - - os.chdir(dir1) - - dockerfile_path = dir1 / "Dockerfile" - dockerfile_path.write_text("COPY . /dummy") - - # should ignore parent ones - generic_dockerignore_path = tmp_path / ".dockerignore" - generic_dockerignore_path.write_text("**/*") - specific_dockerignore_path = tmp_path / "Dockerfile.dockerignore" - specific_dockerignore_path.write_text("**/*") - - # should ignore nested ones - nested_generic_dockerignore_path = dir2 / ".dockerignore" - nested_generic_dockerignore_path.write_text("**/*") - nested_specific_dockerignore_path = dir2 / "Dockerfile.dockerignore" - nested_specific_dockerignore_path.write_text("**/*") - - assert _find_dockerignore_file(dockerfile_path) is None - - # case 5: - # should match dockerignore file next to dockerfile - # and not next to cwd if both exist - # even if more specific - with TemporaryDirectory(dir=test_cwd) as tmp_dir: - tmp_path = Path(tmp_dir) - dir1 = tmp_path / "dir1" - dir1.mkdir() - - os.chdir(test_cwd / tmp_dir) - - dockerfile_path = dir1 / "Dockerfile" - dockerignore_path = tmp_path / ".dockerignore" - dockerignore_path.write_text("**/*") - assert _find_dockerignore_file(dockerfile_path) == dockerignore_path From ed19a20a48dea3077ff8a3a8a1d130a7f07f31f9 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Wed, 18 Dec 2024 13:39:58 +0100 Subject: [PATCH 13/25] renaming --- .../_utils/{match.py => docker_copy_match.py} | 22 ++-- modal/_utils/docker_utils.py | 2 +- modal/image.py | 6 +- ...atch_test.py => docker_copy_match_test.py} | 6 +- test/docker_utils_test.py | 111 +++++++++++++++--- test/image_test.py | 95 +-------------- 6 files changed, 114 insertions(+), 128 deletions(-) rename modal/_utils/{match.py => docker_copy_match.py} (87%) rename test/{match_test.py => docker_copy_match_test.py} (94%) diff --git a/modal/_utils/match.py b/modal/_utils/docker_copy_match.py similarity index 87% rename from modal/_utils/match.py rename to modal/_utils/docker_copy_match.py index 196cacaa60..28e5048fe8 100644 --- a/modal/_utils/match.py +++ b/modal/_utils/docker_copy_match.py @@ -12,10 +12,10 @@ class PatternError(Exception): pass -def match(pattern: str, name: str) -> bool: +def dockerfile_match(pattern: str, name: str) -> bool: while len(pattern) > 0: continue_outer_loop = False - star, chunk, pattern = scan_chunk(pattern) + star, chunk, pattern = _scan_chunk(pattern) if star and chunk == "": # Trailing * matches rest of string unless it has a /. @@ -23,7 +23,7 @@ def match(pattern: str, name: str) -> bool: return name.find("/") < 0 # Look for match at current position. - t, ok = match_chunk(chunk, name) + t, ok = _match_chunk(chunk, name) # if we're the last chunk, make sure we've exhausted the name # otherwise we'll give a false result even if we could still match # using the star @@ -34,7 +34,7 @@ def match(pattern: str, name: str) -> bool: if star: i = 0 while i < len(name) and name[i] != "/": - t, ok = match_chunk(chunk, name[i + 1 :]) + t, ok = _match_chunk(chunk, name[i + 1 :]) if ok: if len(pattern) == 0 and len(t) > 0: i += 1 @@ -48,15 +48,15 @@ def match(pattern: str, name: str) -> bool: continue while len(pattern) > 0: - _, chunk, pattern = scan_chunk(pattern) - match_chunk(chunk, "") + _, chunk, pattern = _scan_chunk(pattern) + _match_chunk(chunk, "") return False return len(name) == 0 -def scan_chunk(pattern: str) -> tuple[bool, str, str]: +def _scan_chunk(pattern: str) -> tuple[bool, str, str]: star = False while len(pattern) > 0 and pattern[0] == "*": pattern = pattern[1:] @@ -80,7 +80,7 @@ def scan_chunk(pattern: str) -> tuple[bool, str, str]: return star, pattern[0:i], pattern[i:] -def match_chunk(chunk: str, s: str) -> tuple[str, bool]: +def _match_chunk(chunk: str, s: str) -> tuple[str, bool]: failed = False while len(chunk) > 0: @@ -105,11 +105,11 @@ def match_chunk(chunk: str, s: str) -> tuple[str, bool]: if len(chunk) > 0 and chunk[0] == "]" and nrange > 0: chunk = chunk[1:] break - lo, chunk = get_esc(chunk) + lo, chunk = _get_esc(chunk) hi = lo if chunk[0] == "-": - hi, chunk = get_esc(chunk[1:]) + hi, chunk = _get_esc(chunk[1:]) if lo <= r and r <= hi: match = True nrange += 1 @@ -144,7 +144,7 @@ def match_chunk(chunk: str, s: str) -> tuple[str, bool]: return s, True -def get_esc(chunk: str) -> tuple[str, str]: +def _get_esc(chunk: str) -> tuple[str, str]: if len(chunk) == 0 or chunk[0] == "-" or chunk[0] == "]": raise PatternError("Bad pattern") if chunk[0] == "\\": diff --git a/modal/_utils/docker_utils.py b/modal/_utils/docker_utils.py index 26c9784b7d..a681cceadf 100644 --- a/modal/_utils/docker_utils.py +++ b/modal/_utils/docker_utils.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Optional -from .exception import InvalidError +from ..exception import InvalidError def extract_copy_command_patterns(dockerfile_lines: list[str]) -> list[str]: diff --git a/modal/image.py b/modal/image.py index 5f4cc12491..4eb78ec53b 100644 --- a/modal/image.py +++ b/modal/image.py @@ -31,10 +31,10 @@ from ._utils.async_utils import synchronize_api from ._utils.blob_utils import MAX_OBJECT_SIZE_BYTES from ._utils.deprecation import deprecation_error, deprecation_warning +from ._utils.docker_copy_match import dockerfile_match from ._utils.docker_utils import extract_copy_command_patterns, find_dockerignore_file from ._utils.function_utils import FunctionInfo from ._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES, retry_transient_errors -from ._utils.match import match from ._utils.pattern_utils import read_ignorefile from .client import _Client from .cloud_bucket_mount import _CloudBucketMount @@ -239,7 +239,7 @@ def _get_image_builder_version(server_version: ImageBuilderVersion) -> ImageBuil return version -def _create_context_mount(docker_commands: list[str], ignore: Callable[[Path], bool]) -> _Mount: +def _create_context_mount(docker_commands: Sequence[str], ignore: Callable[[Path], bool]) -> _Mount: """ Creates a context mount from a list of docker commands. """ @@ -248,7 +248,7 @@ def _create_context_mount(docker_commands: list[str], ignore: Callable[[Path], b def mount_filter(source: Path): # check if the source path matches any pattern from the docker file # and it doesnt match any ignore pattern - matches_any_copy_pattern = any(match(source, mount_source) for mount_source in mount_sources) + matches_any_copy_pattern = any(dockerfile_match(source, mount_source) for mount_source in mount_sources) matches_any_ignore_pattern = ignore(source) return matches_any_copy_pattern and not matches_any_ignore_pattern diff --git a/test/match_test.py b/test/docker_copy_match_test.py similarity index 94% rename from test/match_test.py rename to test/docker_copy_match_test.py index ed42c0cb20..717a8bcf8c 100644 --- a/test/match_test.py +++ b/test/docker_copy_match_test.py @@ -4,7 +4,7 @@ import platform import pytest -from modal._utils.match import PatternError, match +from modal._utils.docker_copy_match import PatternError, dockerfile_match def match_tests(): @@ -77,7 +77,7 @@ def test_match(pattern, s, is_match, err): if err is not None: with pytest.raises(PatternError): - match(pattern, s) + dockerfile_match(pattern, s) else: - actual_match = match(pattern, s) + actual_match = dockerfile_match(pattern, s) assert is_match == actual_match, f"{pattern=} {s=} {is_match=} {actual_match=}" diff --git a/test/docker_utils_test.py b/test/docker_utils_test.py index ee80c77ce2..bd75286b66 100644 --- a/test/docker_utils_test.py +++ b/test/docker_utils_test.py @@ -1,7 +1,7 @@ # Copyright Modal Labs 2024 import os from pathlib import Path -from tempfile import TemporaryDirectory +from tempfile import NamedTemporaryFile, TemporaryDirectory from modal._utils.docker_utils import extract_copy_command_patterns, find_dockerignore_file @@ -53,12 +53,10 @@ def test_find_dockerignore_file(): dir1 = tmp_path / "dir1" dir1.mkdir() - os.chdir(test_cwd / tmp_dir) - dockerfile_path = dir1 / "Dockerfile" dockerignore_path = tmp_path / ".dockerignore" dockerignore_path.write_text("**/*") - assert find_dockerignore_file(dockerfile_path) == dockerignore_path + assert find_dockerignore_file(dockerfile_path, test_cwd / tmp_dir) == dockerignore_path # case 2: # specific dockerignore file in cwd --> find it @@ -67,12 +65,10 @@ def test_find_dockerignore_file(): dir1 = tmp_path / "dir1" dir1.mkdir() - os.chdir(test_cwd / tmp_dir) - dockerfile_path = dir1 / "foo" dockerignore_path = tmp_path / "foo.dockerignore" dockerignore_path.write_text("**/*") - assert find_dockerignore_file(dockerfile_path) == dockerignore_path + assert find_dockerignore_file(dockerfile_path, test_cwd / tmp_dir) == dockerignore_path # case 3: # generic dockerignore file and nested dockerignore file in cwd @@ -82,15 +78,13 @@ def test_find_dockerignore_file(): dir1 = tmp_path / "dir1" dir1.mkdir() - os.chdir(test_cwd / tmp_dir) - dockerfile_path = tmp_path / "Dockerfile" generic_dockerignore_path = tmp_path / ".dockerignore" generic_dockerignore_path.write_text("**/*.py") specific_dockerignore_path = tmp_path / "Dockerfile.dockerignore" specific_dockerignore_path.write_text("**/*") - assert find_dockerignore_file(dockerfile_path) == specific_dockerignore_path - assert find_dockerignore_file(dockerfile_path) != generic_dockerignore_path + assert find_dockerignore_file(dockerfile_path, test_cwd / tmp_dir) == specific_dockerignore_path + assert find_dockerignore_file(dockerfile_path, test_cwd / tmp_dir) != generic_dockerignore_path # case 4: # should not match nested dockerignore files @@ -103,8 +97,6 @@ def test_find_dockerignore_file(): dir2 = dir1 / "dir2" dir2.mkdir() - os.chdir(dir1) - dockerfile_path = dir1 / "Dockerfile" dockerfile_path.write_text("COPY . /dummy") @@ -120,7 +112,7 @@ def test_find_dockerignore_file(): nested_specific_dockerignore_path = dir2 / "Dockerfile.dockerignore" nested_specific_dockerignore_path.write_text("**/*") - assert find_dockerignore_file(dockerfile_path) is None + assert find_dockerignore_file(dockerfile_path, dir1) is None # case 5: # should match dockerignore file next to dockerfile @@ -131,9 +123,96 @@ def test_find_dockerignore_file(): dir1 = tmp_path / "dir1" dir1.mkdir() - os.chdir(test_cwd / tmp_dir) + # os.chdir(context_directory) dockerfile_path = dir1 / "Dockerfile" dockerignore_path = tmp_path / ".dockerignore" dockerignore_path.write_text("**/*") - assert find_dockerignore_file(dockerfile_path) == dockerignore_path + + assert find_dockerignore_file(dockerfile_path, test_cwd / tmp_dir) == dockerignore_path + + +def create_tmp_files(tmp_path): + (tmp_path / "dir1").mkdir() + (tmp_path / "dir1" / "a.txt").write_text("a") + (tmp_path / "dir1" / "b.txt").write_text("b") + + (tmp_path / "dir2").mkdir() + (tmp_path / "dir2" / "test1.py").write_text("test1") + (tmp_path / "dir2" / "test2.py").write_text("test2") + + (tmp_path / "file1.txt").write_text("content1") + (tmp_path / "file10.txt").write_text("content1") + (tmp_path / "file2.txt").write_text("content2") + (tmp_path / "test.py").write_text("python") + + (tmp_path / "special").mkdir() + (tmp_path / "special" / "file[1].txt").write_text("special1") + (tmp_path / "special" / "file{2}.txt").write_text("special2") + (tmp_path / "special" / "test?file.py").write_text("special3") + + (tmp_path / "this").mkdir() + (tmp_path / "this" / "is").mkdir() + (tmp_path / "this" / "is" / "super").mkdir() + (tmp_path / "this" / "is" / "super" / "nested").mkdir() + (tmp_path / "this" / "is" / "super" / "nested" / "file.py").write_text("python") + + all_fps = [] + for root, _, files in os.walk(tmp_path): + for file in files: + all_fps.append(f"{os.path.join(root, file)}".lstrip("./")) + + return all_fps + + +def test_image_dockerfile_copy_messy(): + with TemporaryDirectory(dir="./") as tmp_dir: + tmp_path = Path(tmp_dir) + + create_tmp_files(tmp_path) + + dockerfile = NamedTemporaryFile("w", delete=False) + dockerfile.write( + f""" +FROM python:3.12-slim + +WORKDIR /my-app + +RUN ls + +# COPY simple directory + CoPY {tmp_dir}/dir1 ./smth_copy + +RUN ls -la + +# COPY multiple sources + COPY {tmp_dir}/test.py {tmp_dir}/file10.txt / + +RUN ls \\ + -l + +# COPY multiple lines +copy {tmp_dir}/dir2 \\ + {tmp_dir}/file1.txt \\ +# this is a comment + {tmp_dir}/file2.txt \\ + /x + + RUN ls + """ + ) + dockerfile.close() + + with open(dockerfile.name) as f: + lines = f.readlines() + + assert sorted(extract_copy_command_patterns(lines)) == sorted( + [ + f"{tmp_dir}/dir1", + f"{tmp_dir}/test.py", + f"{tmp_dir}/file10.txt", + f"{tmp_dir}/dir2", + f"{tmp_dir}/file1.txt", + f"{tmp_dir}/file2.txt", + ] + ) diff --git a/test/image_test.py b/test/image_test.py index 8047579495..ad72c2252c 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -7,7 +7,7 @@ import threading from hashlib import sha256 from pathlib import Path -from tempfile import NamedTemporaryFile, TemporaryDirectory +from tempfile import NamedTemporaryFile from typing import Literal, get_args from unittest import mock @@ -747,99 +747,6 @@ def test_image_dockerfile_copy(builder_version, servicer, client, tmp_path_with_ assert files == {"/data.txt": b"hello", "/data/sub": b"world"} -def create_tmp_files(tmp_path): - (tmp_path / "dir1").mkdir() - (tmp_path / "dir1" / "a.txt").write_text("a") - (tmp_path / "dir1" / "b.txt").write_text("b") - - (tmp_path / "dir2").mkdir() - (tmp_path / "dir2" / "test1.py").write_text("test1") - (tmp_path / "dir2" / "test2.py").write_text("test2") - - (tmp_path / "file1.txt").write_text("content1") - (tmp_path / "file10.txt").write_text("content1") - (tmp_path / "file2.txt").write_text("content2") - (tmp_path / "test.py").write_text("python") - - (tmp_path / "special").mkdir() - (tmp_path / "special" / "file[1].txt").write_text("special1") - (tmp_path / "special" / "file{2}.txt").write_text("special2") - (tmp_path / "special" / "test?file.py").write_text("special3") - - (tmp_path / "this").mkdir() - (tmp_path / "this" / "is").mkdir() - (tmp_path / "this" / "is" / "super").mkdir() - (tmp_path / "this" / "is" / "super" / "nested").mkdir() - (tmp_path / "this" / "is" / "super" / "nested" / "file.py").write_text("python") - - all_fps = [] - for root, _, files in os.walk(tmp_path): - for file in files: - all_fps.append(f"{os.path.join(root, file)}".lstrip("./")) - - return all_fps - - -def test_image_dockerfile_copy_messy(builder_version, servicer, client): - with TemporaryDirectory(dir="./") as tmp_dir: - tmp_path = Path(tmp_dir) - - create_tmp_files(tmp_path) - - dockerfile = NamedTemporaryFile("w", delete=False) - dockerfile.write( - f""" -FROM python:3.12-slim - -WORKDIR /my-app - -RUN ls - -# COPY simple directory - CoPY {tmp_dir}/dir1 ./smth_copy - -RUN ls -la - -# COPY multiple sources - COPY {tmp_dir}/test.py {tmp_dir}/file10.txt / - -RUN ls \\ - -l - -# COPY multiple lines -copy {tmp_dir}/dir2 \\ - {tmp_dir}/file1.txt \\ -# this is a comment - {tmp_dir}/file2.txt \\ - /x - - RUN ls - """ - ) - dockerfile.close() - - app = App() - app.image = Image.debian_slim().from_dockerfile(dockerfile.name) - app.function()(dummy) - - with app.run(client=client): - layers = get_image_layers(app.image.object_id, servicer) - - copied_files = servicer.mount_contents[layers[1].context_mount_id].keys() - assert sorted(copied_files) == sorted( - [ - f"/{tmp_path}/dir1/a.txt", - f"/{tmp_path}/dir1/b.txt", - f"/{tmp_path}/test.py", - f"/{tmp_path}/file10.txt", - f"/{tmp_path}/file1.txt", - f"/{tmp_path}/file2.txt", - f"/{tmp_path}/dir2/test1.py", - f"/{tmp_path}/dir2/test2.py", - ] - ) - - def test_image_docker_command_entrypoint(builder_version, servicer, client, tmp_path_with_content): app = App() app.image = Image.debian_slim().entrypoint([]) From e67a26460e805e7e06e7bbb64514cd93392b571d Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Wed, 18 Dec 2024 13:46:17 +0100 Subject: [PATCH 14/25] remove --- modal/_utils/docker_utils.py | 6 +++--- test/image_test.py | 11 ++--------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/modal/_utils/docker_utils.py b/modal/_utils/docker_utils.py index a681cceadf..99fbe676c0 100644 --- a/modal/_utils/docker_utils.py +++ b/modal/_utils/docker_utils.py @@ -12,7 +12,7 @@ def extract_copy_command_patterns(dockerfile_lines: list[str]) -> list[str]: Extract all COPY command sources from a Dockerfile. Combines multiline COPY commands into a single line. """ - mount_sources = set() + copy_source_patterns: set[str] = set() current_command = "" copy_pattern = re.compile(r"^\s*COPY\s+(.+)$", re.IGNORECASE) @@ -49,11 +49,11 @@ def extract_copy_command_patterns(dockerfile_lines: list[str]) -> list[str]: f"COPY command: {source} using special flags/arguments/variables are not supported" ) - mount_sources.add(source) + copy_source_patterns.add(source) current_command = "" - return list(mount_sources) + return list(copy_source_patterns) def find_dockerignore_file(dockerfile_path: Path, context_directory: Path) -> Optional[Path]: diff --git a/test/image_test.py b/test/image_test.py index ad72c2252c..84fa83a2e8 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -712,14 +712,14 @@ def test_image_copy_local_dir(builder_version, servicer, client, tmp_path_with_c def test_image_docker_command_copy(builder_version, servicer, client, tmp_path_with_content): app = App() - data_mount = Mount.from_local_dir(tmp_path_with_content, remote_path="/", condition=lambda p: "module" not in p) app.image = Image.debian_slim().dockerfile_commands(["COPY . /dummy"]) app.function()(dummy) with app.run(client=client): layers = get_image_layers(app.image.object_id, servicer) assert "COPY . /dummy" in layers[0].dockerfile_commands - files = {f.mount_filename: f.content for f in Mount._get_files(data_mount.entries)} + mount_id = layers[1].context_mount_id + files = set(Path(fn) for fn in servicer.mount_contents[mount_id].keys()) assert files == {"/data.txt": b"hello", "/data/sub": b"world"} @@ -729,19 +729,12 @@ def test_image_dockerfile_copy(builder_version, servicer, client, tmp_path_with_ dockerfile.close() app = App() - # data_mount = Mount.from_local_dir(tmp_path_with_content, remote_path="/", condition=lambda p: "module" not in p) app.image = Image.debian_slim().from_dockerfile(dockerfile.name) app.function()(dummy) with app.run(client=client): layers = get_image_layers(app.image.object_id, servicer) assert f"COPY {tmp_path_with_content} /dummy" in layers[1].dockerfile_commands - # TODO: - # get files from the image layers instead - # files = {f.mount_filename: f.content for f in Mount._get_files(data_mount.entries)} - files = {} - print() - print(f"{layers[1]=}") mount_id = layers[1].context_mount_id files = set(Path(fn) for fn in servicer.mount_contents[mount_id].keys()) assert files == {"/data.txt": b"hello", "/data/sub": b"world"} From e5c1545b3c75445d7b5a9334a4cf57377c3336b0 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Thu, 19 Dec 2024 14:55:27 +0100 Subject: [PATCH 15/25] ignore --- modal/_utils/docker_copy_match.py | 16 +-- modal/_utils/docker_utils.py | 28 ++--- modal/image.py | 44 +++++--- test/docker_copy_match_test.py | 58 +++++----- test/docker_utils_test.py | 15 +-- test/image_test.py | 174 +++++++++++++++++++++++++----- 6 files changed, 240 insertions(+), 95 deletions(-) diff --git a/modal/_utils/docker_copy_match.py b/modal/_utils/docker_copy_match.py index 28e5048fe8..68e2a09032 100644 --- a/modal/_utils/docker_copy_match.py +++ b/modal/_utils/docker_copy_match.py @@ -6,12 +6,6 @@ """ -class PatternError(Exception): - """Indicates a pattern was malformed.""" - - pass - - def dockerfile_match(pattern: str, name: str) -> bool: while len(pattern) > 0: continue_outer_loop = False @@ -125,7 +119,7 @@ def _match_chunk(chunk: str, s: str) -> tuple[str, bool]: elif chunk[0] == "\\": chunk = chunk[1:] if len(chunk) == 0: - raise PatternError("Bad pattern") + raise ValueError("Bad pattern") if not failed: if chunk[0] != s[0]: @@ -146,16 +140,16 @@ def _match_chunk(chunk: str, s: str) -> tuple[str, bool]: def _get_esc(chunk: str) -> tuple[str, str]: if len(chunk) == 0 or chunk[0] == "-" or chunk[0] == "]": - raise PatternError("Bad pattern") + raise ValueError("Bad pattern") if chunk[0] == "\\": chunk = chunk[1:] if len(chunk) == 0: - raise PatternError("Bad pattern") + raise ValueError("Bad pattern") try: r = chunk[0] nchunk = chunk[1:] except IndexError: - raise PatternError("Invalid pattern") + raise ValueError("Invalid pattern") if len(nchunk) == 0: - raise PatternError("Bad pattern") + raise ValueError("Bad pattern") return r, nchunk diff --git a/modal/_utils/docker_utils.py b/modal/_utils/docker_utils.py index 99fbe676c0..5198d72586 100644 --- a/modal/_utils/docker_utils.py +++ b/modal/_utils/docker_utils.py @@ -56,8 +56,14 @@ def extract_copy_command_patterns(dockerfile_lines: list[str]) -> list[str]: return list(copy_source_patterns) -def find_dockerignore_file(dockerfile_path: Path, context_directory: Path) -> Optional[Path]: - # 1. file next to the Dockerfile but do NOT look for parent directories +def find_dockerignore_file(context_directory: Path, dockerfile_path: Optional[Path] = None) -> Optional[Path]: + """ + Find dockerignore file relative to current context directory + and if dockerfile path is provided, check if specific .dockerignore + file exists in the same directory as + + Finds the most specific dockerignore file that exists. + """ def valid_dockerignore_file(fp): # fp has to exist @@ -70,17 +76,15 @@ def valid_dockerignore_file(fp): return True generic_name = ".dockerignore" - specific_name = f"{dockerfile_path.name}.dockerignore" - - possible_locations = [ + possible_locations = [] + if dockerfile_path: + specific_name = f"{dockerfile_path.name}.dockerignore" # 1. check if specific .dockerignore file exists in the same directory as - dockerfile_path.parent / specific_name, + possible_locations.append(dockerfile_path.parent / specific_name) # 2. check if generic .dockerignore file exists in the same directory as - dockerfile_path.parent / generic_name, - # 3. check if generic .dockerignore file exists in current working directory - context_directory / generic_name, - # 4. ?????? check if specific .dockerignore file exists in current working directory - context_directory / specific_name, - ] + possible_locations.append(dockerfile_path.parent / generic_name) + + # 3. check if generic .dockerignore file exists in current working directory + possible_locations.append(context_directory / generic_name) return next((e for e in possible_locations if valid_dockerignore_file(e)), None) diff --git a/modal/image.py b/modal/image.py index 4eb78ec53b..11a23ed292 100644 --- a/modal/image.py +++ b/modal/image.py @@ -31,7 +31,6 @@ from ._utils.async_utils import synchronize_api from ._utils.blob_utils import MAX_OBJECT_SIZE_BYTES from ._utils.deprecation import deprecation_error, deprecation_warning -from ._utils.docker_copy_match import dockerfile_match from ._utils.docker_utils import extract_copy_command_patterns, find_dockerignore_file from ._utils.function_utils import FunctionInfo from ._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES, retry_transient_errors @@ -243,17 +242,17 @@ def _create_context_mount(docker_commands: Sequence[str], ignore: Callable[[Path """ Creates a context mount from a list of docker commands. """ - mount_sources = extract_copy_command_patterns(docker_commands) + copy_patterns = extract_copy_command_patterns(docker_commands) + include = FilePatternMatcher(*copy_patterns) - def mount_filter(source: Path): - # check if the source path matches any pattern from the docker file - # and it doesnt match any ignore pattern - matches_any_copy_pattern = any(dockerfile_match(source, mount_source) for mount_source in mount_sources) - matches_any_ignore_pattern = ignore(source) + def ignore_with_include(source: Path): + relative_source = source.relative_to(Path.cwd()) + if not include(relative_source) or ignore(relative_source): + return True - return matches_any_copy_pattern and not matches_any_ignore_pattern + return False - return _Mount._add_local_dir(Path("./"), Path("/"), ignore=mount_filter) + return _Mount._add_local_dir(Path("./"), Path("/"), ignore=ignore_with_include) class _ImageRegistryConfig: @@ -1175,12 +1174,23 @@ def dockerfile_commands( # modal.Mount with local files to supply as build context for COPY commands context_mount: Optional[_Mount] = None, force_build: bool = False, # Ignore cached builds, similar to 'docker build --no-cache' + ignore: Optional[Union[Sequence[str], Callable[[Path], bool]]] = None, ) -> "_Image": """Extend an image with arbitrary Dockerfile-like commands.""" cmds = _flatten_str_args("dockerfile_commands", "dockerfile_commands", dockerfile_commands) if not cmds: return self + if ignore is None: + dockerignore_fp = find_dockerignore_file(Path.cwd()) + if dockerignore_fp: + with open(dockerignore_fp) as f: + ignore = FilePatternMatcher(*read_ignorefile(f)) + else: + ignore = FilePatternMatcher() + elif isinstance(ignore, list): + ignore = FilePatternMatcher(*ignore) + if context_mount is not None: deprecation_warning( (2024, 12, 16), @@ -1195,7 +1205,7 @@ def wrapper_context_mount_function(): else: def base_image_context_mount_function() -> _Mount: - return _create_context_mount(cmds, FilePatternMatcher()) + return _create_context_mount(cmds, ignore) context_mount_function = base_image_context_mount_function @@ -1572,6 +1582,9 @@ def from_dockerfile( secrets: Sequence[_Secret] = [], gpu: GPU_T = None, add_python: Optional[str] = None, + # If ignore is set to None + # it will look for a dockerignore file to get ignore patterns + ignore: Optional[Union[Sequence[str], Callable[[Path], bool]]] = None, ) -> "_Image": """Build a Modal image from a local Dockerfile. @@ -1588,8 +1601,15 @@ def from_dockerfile( build into the image, this local data will be implicitly mounted into the image. """ - dockerignore_fp = find_dockerignore_file(Path(path), context_directory=Path.cwd()) - ignore = FilePatternMatcher(*read_ignorefile(dockerignore_fp)) if dockerignore_fp else FilePatternMatcher() + if ignore is None: + dockerignore_fp = find_dockerignore_file(Path.cwd(), Path(path)) + if dockerignore_fp: + with open(dockerignore_fp) as f: + ignore = FilePatternMatcher(*read_ignorefile(f)) + else: + ignore = FilePatternMatcher() + elif isinstance(ignore, list): + ignore = FilePatternMatcher(*ignore) if context_mount is not None: deprecation_warning( diff --git a/test/docker_copy_match_test.py b/test/docker_copy_match_test.py index 717a8bcf8c..6571b42499 100644 --- a/test/docker_copy_match_test.py +++ b/test/docker_copy_match_test.py @@ -4,7 +4,7 @@ import platform import pytest -from modal._utils.docker_copy_match import PatternError, dockerfile_match +from modal._utils.docker_copy_match import dockerfile_match def match_tests(): @@ -48,36 +48,36 @@ def match_tests(): ("[\\-x]", "x", True, None), ("[\\-x]", "-", True, None), ("[\\-x]", "a", False, None), - ("[]a]", "]", False, PatternError), - ("[-]", "-", False, PatternError), - ("[x-]", "x", False, PatternError), - ("[x-]", "-", False, PatternError), - ("[x-]", "z", False, PatternError), - ("[-x]", "x", False, PatternError), - ("[-x]", "-", False, PatternError), - ("[-x]", "a", False, PatternError), - ("\\", "a", False, PatternError), - ("[a-b-c]", "a", False, PatternError), - ("[", "a", False, PatternError), - ("[^", "a", False, PatternError), - ("[^bc", "a", False, PatternError), - ("a[", "a", False, PatternError), - ("a[", "ab", False, PatternError), - ("a[", "x", False, PatternError), - ("a/b[", "x", False, PatternError), + ("[]a]", "]", False, ValueError), + ("[-]", "-", False, ValueError), + ("[x-]", "x", False, ValueError), + ("[x-]", "-", False, ValueError), + ("[x-]", "z", False, ValueError), + ("[-x]", "x", False, ValueError), + ("[-x]", "-", False, ValueError), + ("[-x]", "a", False, ValueError), + ("\\", "a", False, ValueError), + ("[a-b-c]", "a", False, ValueError), + ("[", "a", False, ValueError), + ("[^", "a", False, ValueError), + ("[^bc", "a", False, ValueError), + ("a[", "a", False, ValueError), + ("a[", "ab", False, ValueError), + ("a[", "x", False, ValueError), + ("a/b[", "x", False, ValueError), ("*x", "xxx", True, None), ] -@pytest.mark.parametrize("pattern, s, is_match, err", match_tests()) -def test_match(pattern, s, is_match, err): - if platform.system() == "Windows" and "\\" in pattern: - # No escape allowed on Windows. - return +def test_match(): + for pattern, s, expected_match, err in match_tests(): + if platform.system() == "Windows" and "\\" in pattern: + # No escape allowed on Windows. + return - if err is not None: - with pytest.raises(PatternError): - dockerfile_match(pattern, s) - else: - actual_match = dockerfile_match(pattern, s) - assert is_match == actual_match, f"{pattern=} {s=} {is_match=} {actual_match=}" + if err is not None: + with pytest.raises(ValueError): + dockerfile_match(pattern, s) + else: + actual_match = dockerfile_match(pattern, s) + assert expected_match == actual_match, f"{pattern=} {s=} {expected_match=} {actual_match=}" diff --git a/test/docker_utils_test.py b/test/docker_utils_test.py index bd75286b66..88aa7a630c 100644 --- a/test/docker_utils_test.py +++ b/test/docker_utils_test.py @@ -56,10 +56,11 @@ def test_find_dockerignore_file(): dockerfile_path = dir1 / "Dockerfile" dockerignore_path = tmp_path / ".dockerignore" dockerignore_path.write_text("**/*") - assert find_dockerignore_file(dockerfile_path, test_cwd / tmp_dir) == dockerignore_path + assert find_dockerignore_file(test_cwd / tmp_dir, dockerfile_path) == dockerignore_path # case 2: - # specific dockerignore file in cwd --> find it + # should not match specific dockerignore file in cwd + # when dockerfile is nested with TemporaryDirectory(dir=test_cwd) as tmp_dir: tmp_path = Path(tmp_dir) dir1 = tmp_path / "dir1" @@ -68,7 +69,7 @@ def test_find_dockerignore_file(): dockerfile_path = dir1 / "foo" dockerignore_path = tmp_path / "foo.dockerignore" dockerignore_path.write_text("**/*") - assert find_dockerignore_file(dockerfile_path, test_cwd / tmp_dir) == dockerignore_path + assert find_dockerignore_file(test_cwd / tmp_dir, dockerfile_path) is None # case 3: # generic dockerignore file and nested dockerignore file in cwd @@ -83,8 +84,8 @@ def test_find_dockerignore_file(): generic_dockerignore_path.write_text("**/*.py") specific_dockerignore_path = tmp_path / "Dockerfile.dockerignore" specific_dockerignore_path.write_text("**/*") - assert find_dockerignore_file(dockerfile_path, test_cwd / tmp_dir) == specific_dockerignore_path - assert find_dockerignore_file(dockerfile_path, test_cwd / tmp_dir) != generic_dockerignore_path + assert find_dockerignore_file(test_cwd / tmp_dir, dockerfile_path) == specific_dockerignore_path + assert find_dockerignore_file(test_cwd / tmp_dir, dockerfile_path) != generic_dockerignore_path # case 4: # should not match nested dockerignore files @@ -112,7 +113,7 @@ def test_find_dockerignore_file(): nested_specific_dockerignore_path = dir2 / "Dockerfile.dockerignore" nested_specific_dockerignore_path.write_text("**/*") - assert find_dockerignore_file(dockerfile_path, dir1) is None + assert find_dockerignore_file(dir1, dockerfile_path) is None # case 5: # should match dockerignore file next to dockerfile @@ -129,7 +130,7 @@ def test_find_dockerignore_file(): dockerignore_path = tmp_path / ".dockerignore" dockerignore_path.write_text("**/*") - assert find_dockerignore_file(dockerfile_path, test_cwd / tmp_dir) == dockerignore_path + assert find_dockerignore_file(test_cwd / tmp_dir, dockerfile_path) == dockerignore_path def create_tmp_files(tmp_path): diff --git a/test/image_test.py b/test/image_test.py index 84fa83a2e8..a3a59ed662 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -7,7 +7,7 @@ import threading from hashlib import sha256 from pathlib import Path -from tempfile import NamedTemporaryFile +from tempfile import TemporaryDirectory from typing import Literal, get_args from unittest import mock @@ -710,34 +710,160 @@ def test_image_copy_local_dir(builder_version, servicer, client, tmp_path_with_c assert set(servicer.mount_contents[mount_id].keys()) == {"/data.txt", "/data/sub"} -def test_image_docker_command_copy(builder_version, servicer, client, tmp_path_with_content): - app = App() - app.image = Image.debian_slim().dockerfile_commands(["COPY . /dummy"]) - app.function()(dummy) +def test_image_docker_command_copy(builder_version, servicer, client): + test_cwd = Path.cwd() + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir).relative_to(test_cwd) + (tmp_path / "data.txt").write_text("hello") + (tmp_path / "data").mkdir() + (tmp_path / "data" / "sub").write_text("world") + app = App() + app.image = Image.debian_slim().dockerfile_commands([f"COPY {tmp_path} /dummy"]) + app.function()(dummy) - with app.run(client=client): - layers = get_image_layers(app.image.object_id, servicer) - assert "COPY . /dummy" in layers[0].dockerfile_commands - mount_id = layers[1].context_mount_id - files = set(Path(fn) for fn in servicer.mount_contents[mount_id].keys()) - assert files == {"/data.txt": b"hello", "/data/sub": b"world"} + with app.run(client=client): + layers = get_image_layers(app.image.object_id, servicer) + assert f"COPY {tmp_path} /dummy" in layers[0].dockerfile_commands + mount_id = layers[0].context_mount_id + files = set(Path(fn) for fn in servicer.mount_contents[mount_id].keys()) + assert files == {Path("/") / tmp_path / "data.txt", Path("/") / tmp_path / "data" / "sub"} + + +def test_image_dockerfile_copy(builder_version, servicer, client): + test_cwd = Path.cwd() + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir).relative_to(test_cwd) + (tmp_path / "data").mkdir() + (tmp_path / "data" / "sub").write_text("world") + (tmp_path / "module").mkdir() + (tmp_path / "module" / "__init__.py").write_text("foo") + (tmp_path / "module" / "sub.py").write_text("bar") + (tmp_path / "module" / "sub").mkdir() + (tmp_path / "module" / "sub" / "__init__.py").write_text("baz") + dockerfile = tmp_path / "Dockerfile" + dockerfile.write_text(f"COPY {tmp_path} /dummy\n") + + app = App() + app.image = Image.debian_slim().from_dockerfile(dockerfile) + app.function()(dummy) + with app.run(client=client): + layers = get_image_layers(app.image.object_id, servicer) + assert f"COPY {tmp_path} /dummy" in layers[1].dockerfile_commands + mount_id = layers[1].context_mount_id + files = set(Path(fn) for fn in servicer.mount_contents[mount_id].keys()) + assert files == { + Path("/") / tmp_path / "Dockerfile", + Path("/") / tmp_path / "data/sub", + Path("/") / tmp_path / "module/__init__.py", + Path("/") / tmp_path / "module/sub.py", + Path("/") / tmp_path / "module/sub/__init__.py", + } -def test_image_dockerfile_copy(builder_version, servicer, client, tmp_path_with_content): - dockerfile = NamedTemporaryFile("w", delete=False) - dockerfile.write(f"COPY {tmp_path_with_content} /dummy\n") - dockerfile.close() - app = App() - app.image = Image.debian_slim().from_dockerfile(dockerfile.name) - app.function()(dummy) +@pytest.mark.parametrize(["use_dockerfile"], [(True,), (False,)]) +def test_image_dockerfile_copy_dockerignore(monkeypatch, builder_version, servicer, client, use_dockerfile): + def run_app_get_files(): + app = App() + with monkeypatch.context() as m: + m.setattr("pathlib.Path.cwd", lambda: tmp_path) - with app.run(client=client): - layers = get_image_layers(app.image.object_id, servicer) - assert f"COPY {tmp_path_with_content} /dummy" in layers[1].dockerfile_commands - mount_id = layers[1].context_mount_id - files = set(Path(fn) for fn in servicer.mount_contents[mount_id].keys()) - assert files == {"/data.txt": b"hello", "/data/sub": b"world"} + if use_dockerfile: + app.image = Image.debian_slim().from_dockerfile(dockerfile) + layer = 1 + else: + app.image = Image.debian_slim().dockerfile_commands([f"COPY {tmp_path} /dummy"]) + layer = 0 + app.function()(dummy) + + with app.run(client=client): + layers = get_image_layers(app.image.object_id, servicer) + assert f"COPY {tmp_path} /dummy" in layers[layer].dockerfile_commands + mount_id = layers[layer].context_mount_id + files = set(Path(fn) for fn in servicer.mount_contents[mount_id].keys()) + return files + + test_cwd = Path.cwd() + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir).relative_to(test_cwd) + (tmp_path / "foo.py").write_text("hello") + (tmp_path / "bar.txt").write_text("hello") + (tmp_path / "baz.md").write_text("hello") + (tmp_path / "bat.yaml").write_text("hello") + (tmp_path / "module").mkdir() + + dockerfile = tmp_path / "module" / "special_dockerfile" + dockerfile.write_text(f"COPY {tmp_path} /dummy\n") + + all_files = { + Path("/") / tmp_path / "foo.py", + Path("/") / tmp_path / "bar.txt", + Path("/") / tmp_path / "baz.md", + Path("/") / tmp_path / "bat.yaml", + Path("/") / tmp_path / "module/special_dockerfile", + } + + # case 1: .dockerignore should be + # ignored if it's not next to the dockerfile + special_cwd_dockerignore = tmp_path / "special_dockerfile.dockerignore" + special_cwd_dockerignore.write_text("**/*.txt") + + files = run_app_get_files() + expected_files = all_files.union({Path("/") / tmp_path / "special_dockerfile.dockerignore"}) + assert files == expected_files + + # case 2: generic .dockerignore in cwd should be used + generic_cwd_dockerignore = tmp_path / ".dockerignore" + generic_cwd_dockerignore.write_text("**/*.py\n**/*.dockerignore") + files = run_app_get_files() + assert files == {x for x in all_files if x != Path("/") / tmp_path / "foo.py"} + + # case 3: generic .dockerignore in next to dockerfile is prefered + if use_dockerfile: + generic_cwd_dockerignore = tmp_path / "module" / ".dockerignore" + generic_cwd_dockerignore.write_text("**/*.yaml\n**/*.dockerignore") + files = run_app_get_files() + assert files == {x for x in all_files if x != Path("/") / tmp_path / "bat.yaml"} + + # case 4: specific dockerignore next to dockerfile is prefered + special_dockerignore = tmp_path / "module" / "special_dockerfile.dockerignore" + special_dockerignore.write_text("**/*.md\n**/*.dockerignore") + files = run_app_get_files() + assert files == {x for x in all_files if x != Path("/") / tmp_path / "baz.md"} + + +@pytest.mark.parametrize( + ["use_callable", "use_dockerfile"], [(True, True), (True, False), (False, True), (False, False)] +) +def test_image_dockerfile_copy_ignore(builder_version, servicer, client, use_callable, use_dockerfile): + def cb(x): + return str(x).endswith(".txt") + + ignore = cb if use_callable else ["**/*.txt"] + + test_cwd = Path.cwd() + with TemporaryDirectory(dir=test_cwd) as tmp_dir: + tmp_path = Path(tmp_dir).relative_to(test_cwd) + (tmp_path / "data.txt").write_text("world") + (tmp_path / "file.py").write_text("world") + dockerfile = tmp_path / "Dockerfile" + dockerfile.write_text(f"COPY {tmp_path} /dummy\n") + + app = App() + if use_dockerfile: + app.image = Image.debian_slim().from_dockerfile(dockerfile, ignore=ignore) + layer = 1 + else: + app.image = Image.debian_slim().dockerfile_commands([f"COPY {tmp_path} /dummy"], ignore=ignore) + layer = 0 + app.function()(dummy) + + with app.run(client=client): + layers = get_image_layers(app.image.object_id, servicer) + assert f"COPY {tmp_path} /dummy" in layers[layer].dockerfile_commands + mount_id = layers[layer].context_mount_id + files = set(Path(fn) for fn in servicer.mount_contents[mount_id].keys()) + assert files == {Path("/") / tmp_path / "Dockerfile", Path("/") / tmp_path / "file.py"} def test_image_docker_command_entrypoint(builder_version, servicer, client, tmp_path_with_content): From a4048fbaf1a960475e2d00d62b5302b7ca5bc272 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Thu, 19 Dec 2024 14:56:08 +0100 Subject: [PATCH 16/25] remove docker_copy_match --- modal/_utils/docker_copy_match.py | 155 ------------------------------ test/docker_copy_match_test.py | 83 ---------------- 2 files changed, 238 deletions(-) delete mode 100644 modal/_utils/docker_copy_match.py delete mode 100644 test/docker_copy_match_test.py diff --git a/modal/_utils/docker_copy_match.py b/modal/_utils/docker_copy_match.py deleted file mode 100644 index 68e2a09032..0000000000 --- a/modal/_utils/docker_copy_match.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright Modal Labs 2024 -"""Pattern matching library ported from https://github.com/golang/go/blob/go1.23.4/src/path/match.go - -This is the same pattern-matching logic used by Docker for Dockerfiles (not dockerignore), -except it is written in Python rather than Go. -""" - - -def dockerfile_match(pattern: str, name: str) -> bool: - while len(pattern) > 0: - continue_outer_loop = False - star, chunk, pattern = _scan_chunk(pattern) - - if star and chunk == "": - # Trailing * matches rest of string unless it has a /. - # return bytealg.IndexByteString(name, '/') < 0, nil - return name.find("/") < 0 - - # Look for match at current position. - t, ok = _match_chunk(chunk, name) - # if we're the last chunk, make sure we've exhausted the name - # otherwise we'll give a false result even if we could still match - # using the star - if ok and (len(t) == 0 or len(pattern) > 0): - name = t - continue - - if star: - i = 0 - while i < len(name) and name[i] != "/": - t, ok = _match_chunk(chunk, name[i + 1 :]) - if ok: - if len(pattern) == 0 and len(t) > 0: - i += 1 - continue - name = t - continue_outer_loop = True - break - - i += 1 - if continue_outer_loop: - continue - - while len(pattern) > 0: - _, chunk, pattern = _scan_chunk(pattern) - _match_chunk(chunk, "") - - return False - - return len(name) == 0 - - -def _scan_chunk(pattern: str) -> tuple[bool, str, str]: - star = False - while len(pattern) > 0 and pattern[0] == "*": - pattern = pattern[1:] - star = True - - inrange = False - i = 0 - while i < len(pattern): - if pattern[i] == "\\": - if i + 1 < len(pattern): - i += 1 - elif pattern[i] == "[": - inrange = True - elif pattern[i] == "]": - inrange = False - elif pattern[i] == "*": - if not inrange: - break - i += 1 - - return star, pattern[0:i], pattern[i:] - - -def _match_chunk(chunk: str, s: str) -> tuple[str, bool]: - failed = False - - while len(chunk) > 0: - if not failed and len(s) == 0: - failed = True - - if chunk[0] == "[": - r = "" - if not failed: - r = s[0] - s = s[1:] - chunk = chunk[1:] - negated = False - if len(chunk) > 0 and chunk[0] == "^": - negated = True - chunk = chunk[1:] - - match = False - nrange = 0 - - while True: - if len(chunk) > 0 and chunk[0] == "]" and nrange > 0: - chunk = chunk[1:] - break - lo, chunk = _get_esc(chunk) - hi = lo - - if chunk[0] == "-": - hi, chunk = _get_esc(chunk[1:]) - if lo <= r and r <= hi: - match = True - nrange += 1 - - if match == negated: - failed = True - elif chunk[0] == "?": - if not failed: - if s[0] == "/": - failed = True - s = s[1:] - chunk = chunk[1:] - elif chunk[0] == "\\": - chunk = chunk[1:] - if len(chunk) == 0: - raise ValueError("Bad pattern") - - if not failed: - if chunk[0] != s[0]: - failed = True - s = s[1:] - chunk = chunk[1:] - else: - if not failed: - if chunk[0] != s[0]: - failed = True - s = s[1:] - chunk = chunk[1:] - - if failed: - return "", False - return s, True - - -def _get_esc(chunk: str) -> tuple[str, str]: - if len(chunk) == 0 or chunk[0] == "-" or chunk[0] == "]": - raise ValueError("Bad pattern") - if chunk[0] == "\\": - chunk = chunk[1:] - if len(chunk) == 0: - raise ValueError("Bad pattern") - try: - r = chunk[0] - nchunk = chunk[1:] - except IndexError: - raise ValueError("Invalid pattern") - if len(nchunk) == 0: - raise ValueError("Bad pattern") - return r, nchunk diff --git a/test/docker_copy_match_test.py b/test/docker_copy_match_test.py deleted file mode 100644 index 6571b42499..0000000000 --- a/test/docker_copy_match_test.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright Modal Labs 2024 -"""Pattern matching library tests ported from https://github.com/golang/go/blob/go1.23.4/src/path/match_test.go""" - -import platform -import pytest - -from modal._utils.docker_copy_match import dockerfile_match - - -def match_tests(): - return [ - # (pattern, string, is_match, error) - ("abc", "abc", True, None), - ("*", "abc", True, None), - ("*c", "abc", True, None), - ("a*", "a", True, None), - ("a*", "abc", True, None), - ("a*", "ab/c", False, None), - ("a*/b", "abc/b", True, None), - ("a*/b", "a/c/b", False, None), - ("a*b*c*d*e*/f", "axbxcxdxe/f", True, None), - ("a*b*c*d*e*/f", "axbxcxdxexxx/f", True, None), - ("a*b*c*d*e*/f", "axbxcxdxe/xxx/f", False, None), - ("a*b*c*d*e*/f", "axbxcxdxexxx/fff", False, None), - ("a*b?c*x", "abxbbxdbxebxczzx", True, None), - ("a*b?c*x", "abxbbxdbxebxczzy", False, None), - ("ab[c]", "abc", True, None), - ("ab[b-d]", "abc", True, None), - ("ab[e-g]", "abc", False, None), - ("ab[^c]", "abc", False, None), - ("ab[^b-d]", "abc", False, None), - ("ab[^e-g]", "abc", True, None), - ("a\\*b", "a*b", True, None), - ("a\\*b", "ab", False, None), - ("a?b", "a☺b", True, None), - ("a[^a]b", "a☺b", True, None), - ("a???b", "a☺b", False, None), - ("a[^a][^a][^a]b", "a☺b", False, None), - ("[a-ζ]*", "α", True, None), - ("*[a-ζ]", "A", False, None), - ("a?b", "a/b", False, None), - ("a*b", "a/b", False, None), - ("[\\]a]", "]", True, None), - ("[\\-]", "-", True, None), - ("[x\\-]", "x", True, None), - ("[x\\-]", "-", True, None), - ("[x\\-]", "z", False, None), - ("[\\-x]", "x", True, None), - ("[\\-x]", "-", True, None), - ("[\\-x]", "a", False, None), - ("[]a]", "]", False, ValueError), - ("[-]", "-", False, ValueError), - ("[x-]", "x", False, ValueError), - ("[x-]", "-", False, ValueError), - ("[x-]", "z", False, ValueError), - ("[-x]", "x", False, ValueError), - ("[-x]", "-", False, ValueError), - ("[-x]", "a", False, ValueError), - ("\\", "a", False, ValueError), - ("[a-b-c]", "a", False, ValueError), - ("[", "a", False, ValueError), - ("[^", "a", False, ValueError), - ("[^bc", "a", False, ValueError), - ("a[", "a", False, ValueError), - ("a[", "ab", False, ValueError), - ("a[", "x", False, ValueError), - ("a/b[", "x", False, ValueError), - ("*x", "xxx", True, None), - ] - - -def test_match(): - for pattern, s, expected_match, err in match_tests(): - if platform.system() == "Windows" and "\\" in pattern: - # No escape allowed on Windows. - return - - if err is not None: - with pytest.raises(ValueError): - dockerfile_match(pattern, s) - else: - actual_match = dockerfile_match(pattern, s) - assert expected_match == actual_match, f"{pattern=} {s=} {expected_match=} {actual_match=}" From b7ff5737f9ee9c07707bc851b37e3786cd7a0859 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Fri, 20 Dec 2024 10:28:21 +0100 Subject: [PATCH 17/25] auto dockerignore --- modal/_utils/docker_utils.py | 10 ++++-- modal/image.py | 66 ++++++++++++++++++++---------------- modal/mount.py | 2 +- 3 files changed, 46 insertions(+), 32 deletions(-) diff --git a/modal/_utils/docker_utils.py b/modal/_utils/docker_utils.py index 5198d72586..605ab7332f 100644 --- a/modal/_utils/docker_utils.py +++ b/modal/_utils/docker_utils.py @@ -2,12 +2,18 @@ import re import shlex from pathlib import Path -from typing import Optional +from typing import Optional, Sequence from ..exception import InvalidError -def extract_copy_command_patterns(dockerfile_lines: list[str]) -> list[str]: +class AUTO_DOCKERIGNORE: + """Flag indicating that present .dockerignore files should be used for file exclusion patterns.""" + + pass + + +def extract_copy_command_patterns(dockerfile_lines: Sequence[str]) -> list[str]: """ Extract all COPY command sources from a Dockerfile. Combines multiline COPY commands into a single line. diff --git a/modal/image.py b/modal/image.py index e8ebb46cd1..1e0066c500 100644 --- a/modal/image.py +++ b/modal/image.py @@ -31,7 +31,7 @@ from ._utils.async_utils import synchronize_api from ._utils.blob_utils import MAX_OBJECT_SIZE_BYTES from ._utils.deprecation import deprecation_error, deprecation_warning -from ._utils.docker_utils import extract_copy_command_patterns, find_dockerignore_file +from ._utils.docker_utils import AUTO_DOCKERIGNORE, extract_copy_command_patterns, find_dockerignore_file from ._utils.function_utils import FunctionInfo from ._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES, retry_transient_errors from ._utils.pattern_utils import read_ignorefile @@ -40,7 +40,7 @@ from .config import config, logger, user_config_path from .environments import _get_environment_cached from .exception import InvalidError, NotFoundError, RemoteError, VersionError -from .file_pattern_matcher import NON_PYTHON_FILES +from .file_pattern_matcher import NON_PYTHON_FILES, FilePatternMatcher from .gpu import GPU_T, parse_gpu_config from .mount import _Mount, python_standalone_mount_name from .network_file_system import _NetworkFileSystem @@ -238,13 +238,20 @@ def _get_image_builder_version(server_version: ImageBuilderVersion) -> ImageBuil return version -def _create_context_mount(docker_commands: Sequence[str], ignore: Callable[[Path], bool]) -> _Mount: +def _create_context_mount( + docker_commands: Sequence[str], ignore: Union[Sequence[str], Callable[[Path], bool]] +) -> _Mount: """ Creates a context mount from a list of docker commands. """ copy_patterns = extract_copy_command_patterns(docker_commands) include = FilePatternMatcher(*copy_patterns) + if not callable(ignore): + ignore = FilePatternMatcher(*ignore) + + ignore = typing.cast(Callable[[Path], bool], ignore) + def ignore_with_include(source: Path): relative_source = source.relative_to(Path.cwd()) if not include(relative_source) or ignore(relative_source): @@ -1174,35 +1181,36 @@ def dockerfile_commands( # modal.Mount with local files to supply as build context for COPY commands context_mount: Optional[_Mount] = None, force_build: bool = False, # Ignore cached builds, similar to 'docker build --no-cache' - ignore: Optional[Union[Sequence[str], Callable[[Path], bool]]] = None, + ignore: Union[Sequence[str], Callable[[Path], bool], AUTO_DOCKERIGNORE] = AUTO_DOCKERIGNORE, ) -> "_Image": """Extend an image with arbitrary Dockerfile-like commands.""" cmds = _flatten_str_args("dockerfile_commands", "dockerfile_commands", dockerfile_commands) if not cmds: return self - if ignore is None: - dockerignore_fp = find_dockerignore_file(Path.cwd()) - if dockerignore_fp: - with open(dockerignore_fp) as f: - ignore = FilePatternMatcher(*read_ignorefile(f)) - else: - ignore = FilePatternMatcher() - elif isinstance(ignore, list): - ignore = FilePatternMatcher(*ignore) - if context_mount is not None: deprecation_warning( (2024, 12, 16), - "`context_mount` is deprecated," - + " files are now mounted implicitly without this flag and can be ignored with `dockerignore` files", + "`context_mount` is deprecated, files are now mounted implicitly" + + " without this flag and can be ignored with the `ignore` argument." + + " Defaults to using .dockerignore files.", ) + if not isinstance(ignore, AUTO_DOCKERIGNORE): + raise InvalidError("Cannot set both `context_mount` and `ignore`") def wrapper_context_mount_function(): return context_mount context_mount_function = wrapper_context_mount_function + else: + if isinstance(ignore, AUTO_DOCKERIGNORE): + dockerignore_fp = find_dockerignore_file(Path.cwd()) + if dockerignore_fp is not None: + with open(dockerignore_fp) as f: + ignore = FilePatternMatcher(*read_ignorefile(f)) + else: + ignore = FilePatternMatcher() def base_image_context_mount_function() -> _Mount: return _create_context_mount(cmds, ignore) @@ -1584,7 +1592,7 @@ def from_dockerfile( add_python: Optional[str] = None, # If ignore is set to None # it will look for a dockerignore file to get ignore patterns - ignore: Optional[Union[Sequence[str], Callable[[Path], bool]]] = None, + ignore: Union[Sequence[str], Callable[[Path], bool], AUTO_DOCKERIGNORE] = AUTO_DOCKERIGNORE, ) -> "_Image": """Build a Modal image from a local Dockerfile. @@ -1601,28 +1609,28 @@ def from_dockerfile( build into the image, this local data will be implicitly mounted into the image. """ - if ignore is None: - dockerignore_fp = find_dockerignore_file(Path.cwd(), Path(path)) - if dockerignore_fp: - with open(dockerignore_fp) as f: - ignore = FilePatternMatcher(*read_ignorefile(f)) - else: - ignore = FilePatternMatcher() - elif isinstance(ignore, list): - ignore = FilePatternMatcher(*ignore) - if context_mount is not None: deprecation_warning( (2024, 12, 16), - "`context_mount` is deprecated," - + " files are now mounted implicitly without this flag and can be ignored with `dockerignore` files", + "`context_mount` is deprecated, files are now mounted implicitly" + + " without this flag and can be ignored with the `ignore` argument." + + " Defaults to using .dockerignore files.", ) + if not isinstance(ignore, AUTO_DOCKERIGNORE): + raise InvalidError("Cannot set both `context_mount` and `ignore`") def wrapper_context_mount_function(): return context_mount context_mount_function = wrapper_context_mount_function else: + if isinstance(ignore, AUTO_DOCKERIGNORE): + dockerignore_fp = find_dockerignore_file(Path.cwd()) + if dockerignore_fp is not None: + with open(dockerignore_fp) as f: + ignore = FilePatternMatcher(*read_ignorefile(f)) + else: + ignore = FilePatternMatcher() def base_image_context_mount_function() -> _Mount: with open(os.path.expanduser(path)) as f: diff --git a/modal/mount.py b/modal/mount.py index e568a19a39..66917a98c8 100644 --- a/modal/mount.py +++ b/modal/mount.py @@ -325,7 +325,7 @@ def _add_local_dir( remote_path: Path, ignore: Union[Sequence[str], Callable[[Path], bool]] = [], ): - if isinstance(ignore, list): + if not callable(ignore): ignore = FilePatternMatcher(*ignore) return _Mount._new()._extend( From 6ac14008b68a76977b5a0485cd98ef4200a5c963 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Fri, 20 Dec 2024 11:14:31 +0100 Subject: [PATCH 18/25] remove autodockerignore --- modal/_utils/docker_utils.py | 6 ------ modal/image.py | 14 +++++++------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/modal/_utils/docker_utils.py b/modal/_utils/docker_utils.py index 605ab7332f..6711aa7060 100644 --- a/modal/_utils/docker_utils.py +++ b/modal/_utils/docker_utils.py @@ -7,12 +7,6 @@ from ..exception import InvalidError -class AUTO_DOCKERIGNORE: - """Flag indicating that present .dockerignore files should be used for file exclusion patterns.""" - - pass - - def extract_copy_command_patterns(dockerfile_lines: Sequence[str]) -> list[str]: """ Extract all COPY command sources from a Dockerfile. diff --git a/modal/image.py b/modal/image.py index 1e0066c500..f698522d79 100644 --- a/modal/image.py +++ b/modal/image.py @@ -31,7 +31,7 @@ from ._utils.async_utils import synchronize_api from ._utils.blob_utils import MAX_OBJECT_SIZE_BYTES from ._utils.deprecation import deprecation_error, deprecation_warning -from ._utils.docker_utils import AUTO_DOCKERIGNORE, extract_copy_command_patterns, find_dockerignore_file +from ._utils.docker_utils import extract_copy_command_patterns, find_dockerignore_file from ._utils.function_utils import FunctionInfo from ._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES, retry_transient_errors from ._utils.pattern_utils import read_ignorefile @@ -1181,7 +1181,7 @@ def dockerfile_commands( # modal.Mount with local files to supply as build context for COPY commands context_mount: Optional[_Mount] = None, force_build: bool = False, # Ignore cached builds, similar to 'docker build --no-cache' - ignore: Union[Sequence[str], Callable[[Path], bool], AUTO_DOCKERIGNORE] = AUTO_DOCKERIGNORE, + ignore: Optional[Union[Sequence[str], Callable[[Path], bool]]] = None, ) -> "_Image": """Extend an image with arbitrary Dockerfile-like commands.""" cmds = _flatten_str_args("dockerfile_commands", "dockerfile_commands", dockerfile_commands) @@ -1195,7 +1195,7 @@ def dockerfile_commands( + " without this flag and can be ignored with the `ignore` argument." + " Defaults to using .dockerignore files.", ) - if not isinstance(ignore, AUTO_DOCKERIGNORE): + if ignore is not None: raise InvalidError("Cannot set both `context_mount` and `ignore`") def wrapper_context_mount_function(): @@ -1204,7 +1204,7 @@ def wrapper_context_mount_function(): context_mount_function = wrapper_context_mount_function else: - if isinstance(ignore, AUTO_DOCKERIGNORE): + if ignore is None: dockerignore_fp = find_dockerignore_file(Path.cwd()) if dockerignore_fp is not None: with open(dockerignore_fp) as f: @@ -1592,7 +1592,7 @@ def from_dockerfile( add_python: Optional[str] = None, # If ignore is set to None # it will look for a dockerignore file to get ignore patterns - ignore: Union[Sequence[str], Callable[[Path], bool], AUTO_DOCKERIGNORE] = AUTO_DOCKERIGNORE, + ignore: Optional[Union[Sequence[str], Callable[[Path], bool]]] = None, ) -> "_Image": """Build a Modal image from a local Dockerfile. @@ -1616,7 +1616,7 @@ def from_dockerfile( + " without this flag and can be ignored with the `ignore` argument." + " Defaults to using .dockerignore files.", ) - if not isinstance(ignore, AUTO_DOCKERIGNORE): + if ignore is not None: raise InvalidError("Cannot set both `context_mount` and `ignore`") def wrapper_context_mount_function(): @@ -1624,7 +1624,7 @@ def wrapper_context_mount_function(): context_mount_function = wrapper_context_mount_function else: - if isinstance(ignore, AUTO_DOCKERIGNORE): + if ignore is None: dockerignore_fp = find_dockerignore_file(Path.cwd()) if dockerignore_fp is not None: with open(dockerignore_fp) as f: From c44a6eaf1f25e875128c2a57bf083c2902043e4c Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Fri, 20 Dec 2024 11:19:14 +0100 Subject: [PATCH 19/25] name --- test/image_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/image_test.py b/test/image_test.py index a3a59ed662..2735c938c1 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -839,7 +839,7 @@ def test_image_dockerfile_copy_ignore(builder_version, servicer, client, use_cal def cb(x): return str(x).endswith(".txt") - ignore = cb if use_callable else ["**/*.txt"] + ignore_patterns = cb if use_callable else ["**/*.txt"] test_cwd = Path.cwd() with TemporaryDirectory(dir=test_cwd) as tmp_dir: @@ -851,10 +851,10 @@ def cb(x): app = App() if use_dockerfile: - app.image = Image.debian_slim().from_dockerfile(dockerfile, ignore=ignore) + app.image = Image.debian_slim().from_dockerfile(dockerfile, ignore=ignore_patterns) layer = 1 else: - app.image = Image.debian_slim().dockerfile_commands([f"COPY {tmp_path} /dummy"], ignore=ignore) + app.image = Image.debian_slim().dockerfile_commands([f"COPY {tmp_path} /dummy"], ignore=ignore_patterns) layer = 0 app.function()(dummy) From 97685a8eaa301d889d16b499cb3e63fa4187fe5e Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Fri, 20 Dec 2024 13:26:01 +0100 Subject: [PATCH 20/25] autodockerignore --- modal/_utils/docker_utils.py | 21 +++++++++++++++++++++ modal/image.py | 19 ++++++++++++------- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/modal/_utils/docker_utils.py b/modal/_utils/docker_utils.py index 6711aa7060..91e2dba4d6 100644 --- a/modal/_utils/docker_utils.py +++ b/modal/_utils/docker_utils.py @@ -88,3 +88,24 @@ def valid_dockerignore_file(fp): possible_locations.append(context_directory / generic_name) return next((e for e in possible_locations if valid_dockerignore_file(e)), None) + + +class _AutoDockerIgnore: + _custom_repr: Optional[str] = None + + def with_repr(self, custom_repr) -> "_AutoDockerIgnore": + # use to give an instance of a matcher a custom name - useful for visualizing default values in signatures + self._custom_repr = custom_repr + return self + + def __repr__(self) -> str: + if self._custom_repr: + return self._custom_repr + + return super().__repr__() + + def __call__(self, _: Path) -> bool: + return True + + +AUTO_DOCKERIGNORE = _AutoDockerIgnore().with_repr(f"{__name__}.AUTO_DOCKERIGNORE") diff --git a/modal/image.py b/modal/image.py index f698522d79..4f2d4388c8 100644 --- a/modal/image.py +++ b/modal/image.py @@ -31,7 +31,12 @@ from ._utils.async_utils import synchronize_api from ._utils.blob_utils import MAX_OBJECT_SIZE_BYTES from ._utils.deprecation import deprecation_error, deprecation_warning -from ._utils.docker_utils import extract_copy_command_patterns, find_dockerignore_file +from ._utils.docker_utils import ( + AUTO_DOCKERIGNORE, + _AutoDockerIgnore, + extract_copy_command_patterns, + find_dockerignore_file, +) from ._utils.function_utils import FunctionInfo from ._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES, retry_transient_errors from ._utils.pattern_utils import read_ignorefile @@ -1181,7 +1186,7 @@ def dockerfile_commands( # modal.Mount with local files to supply as build context for COPY commands context_mount: Optional[_Mount] = None, force_build: bool = False, # Ignore cached builds, similar to 'docker build --no-cache' - ignore: Optional[Union[Sequence[str], Callable[[Path], bool]]] = None, + ignore: Union[Sequence[str], Callable[[Path], bool]] = AUTO_DOCKERIGNORE, ) -> "_Image": """Extend an image with arbitrary Dockerfile-like commands.""" cmds = _flatten_str_args("dockerfile_commands", "dockerfile_commands", dockerfile_commands) @@ -1195,7 +1200,7 @@ def dockerfile_commands( + " without this flag and can be ignored with the `ignore` argument." + " Defaults to using .dockerignore files.", ) - if ignore is not None: + if not isinstance(ignore, _AutoDockerIgnore): raise InvalidError("Cannot set both `context_mount` and `ignore`") def wrapper_context_mount_function(): @@ -1204,7 +1209,7 @@ def wrapper_context_mount_function(): context_mount_function = wrapper_context_mount_function else: - if ignore is None: + if isinstance(ignore, _AutoDockerIgnore): dockerignore_fp = find_dockerignore_file(Path.cwd()) if dockerignore_fp is not None: with open(dockerignore_fp) as f: @@ -1592,7 +1597,7 @@ def from_dockerfile( add_python: Optional[str] = None, # If ignore is set to None # it will look for a dockerignore file to get ignore patterns - ignore: Optional[Union[Sequence[str], Callable[[Path], bool]]] = None, + ignore: Union[Sequence[str], Callable[[Path], bool]] = AUTO_DOCKERIGNORE, ) -> "_Image": """Build a Modal image from a local Dockerfile. @@ -1616,7 +1621,7 @@ def from_dockerfile( + " without this flag and can be ignored with the `ignore` argument." + " Defaults to using .dockerignore files.", ) - if ignore is not None: + if not isinstance(ignore, _AutoDockerIgnore): raise InvalidError("Cannot set both `context_mount` and `ignore`") def wrapper_context_mount_function(): @@ -1624,7 +1629,7 @@ def wrapper_context_mount_function(): context_mount_function = wrapper_context_mount_function else: - if ignore is None: + if isinstance(ignore, _AutoDockerIgnore): dockerignore_fp = find_dockerignore_file(Path.cwd()) if dockerignore_fp is not None: with open(dockerignore_fp) as f: From b52fdbb876fe8266e9be456ccda73374b214abd5 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Fri, 20 Dec 2024 13:30:30 +0100 Subject: [PATCH 21/25] move to inside load --- modal/image.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/modal/image.py b/modal/image.py index 4f2d4388c8..728468e2d4 100644 --- a/modal/image.py +++ b/modal/image.py @@ -1207,17 +1207,16 @@ def wrapper_context_mount_function(): return context_mount context_mount_function = wrapper_context_mount_function - else: - if isinstance(ignore, _AutoDockerIgnore): - dockerignore_fp = find_dockerignore_file(Path.cwd()) - if dockerignore_fp is not None: - with open(dockerignore_fp) as f: - ignore = FilePatternMatcher(*read_ignorefile(f)) - else: - ignore = FilePatternMatcher() def base_image_context_mount_function() -> _Mount: + if isinstance(ignore, _AutoDockerIgnore): + dockerignore_fp = find_dockerignore_file(Path.cwd()) + if dockerignore_fp is not None: + with open(dockerignore_fp) as f: + ignore = FilePatternMatcher(*read_ignorefile(f)) + else: + ignore = FilePatternMatcher() return _create_context_mount(cmds, ignore) context_mount_function = base_image_context_mount_function @@ -1629,15 +1628,15 @@ def wrapper_context_mount_function(): context_mount_function = wrapper_context_mount_function else: - if isinstance(ignore, _AutoDockerIgnore): - dockerignore_fp = find_dockerignore_file(Path.cwd()) - if dockerignore_fp is not None: - with open(dockerignore_fp) as f: - ignore = FilePatternMatcher(*read_ignorefile(f)) - else: - ignore = FilePatternMatcher() def base_image_context_mount_function() -> _Mount: + if isinstance(ignore, _AutoDockerIgnore): + dockerignore_fp = find_dockerignore_file(Path.cwd()) + if dockerignore_fp is not None: + with open(dockerignore_fp) as f: + ignore = FilePatternMatcher(*read_ignorefile(f)) + else: + ignore = FilePatternMatcher() with open(os.path.expanduser(path)) as f: lines = f.readlines() return _create_context_mount(lines, ignore) From aa8a5b07664f2c1402bb9c06d9e3435119344dbc Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Fri, 20 Dec 2024 13:46:58 +0100 Subject: [PATCH 22/25] types --- modal/_utils/docker_utils.py | 8 ++++---- modal/image.py | 28 +++++++++++++++++----------- test/image_test.py | 8 ++++---- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/modal/_utils/docker_utils.py b/modal/_utils/docker_utils.py index 91e2dba4d6..0a3434b551 100644 --- a/modal/_utils/docker_utils.py +++ b/modal/_utils/docker_utils.py @@ -90,10 +90,10 @@ def valid_dockerignore_file(fp): return next((e for e in possible_locations if valid_dockerignore_file(e)), None) -class _AutoDockerIgnore: +class _AutoDockerIgnoreSentinel: _custom_repr: Optional[str] = None - def with_repr(self, custom_repr) -> "_AutoDockerIgnore": + def with_repr(self, custom_repr) -> "_AutoDockerIgnoreSentinel": # use to give an instance of a matcher a custom name - useful for visualizing default values in signatures self._custom_repr = custom_repr return self @@ -105,7 +105,7 @@ def __repr__(self) -> str: return super().__repr__() def __call__(self, _: Path) -> bool: - return True + raise NotImplementedError("This is only a placeholder. Do not call") -AUTO_DOCKERIGNORE = _AutoDockerIgnore().with_repr(f"{__name__}.AUTO_DOCKERIGNORE") +AUTO_DOCKERIGNORE = _AutoDockerIgnoreSentinel().with_repr(f"{__name__}.AUTO_DOCKERIGNORE") diff --git a/modal/image.py b/modal/image.py index 728468e2d4..68d662587b 100644 --- a/modal/image.py +++ b/modal/image.py @@ -33,7 +33,7 @@ from ._utils.deprecation import deprecation_error, deprecation_warning from ._utils.docker_utils import ( AUTO_DOCKERIGNORE, - _AutoDockerIgnore, + _AutoDockerIgnoreSentinel, extract_copy_command_patterns, find_dockerignore_file, ) @@ -1200,7 +1200,7 @@ def dockerfile_commands( + " without this flag and can be ignored with the `ignore` argument." + " Defaults to using .dockerignore files.", ) - if not isinstance(ignore, _AutoDockerIgnore): + if not isinstance(ignore, _AutoDockerIgnoreSentinel): raise InvalidError("Cannot set both `context_mount` and `ignore`") def wrapper_context_mount_function(): @@ -1210,14 +1210,17 @@ def wrapper_context_mount_function(): else: def base_image_context_mount_function() -> _Mount: - if isinstance(ignore, _AutoDockerIgnore): + if isinstance(ignore, _AutoDockerIgnoreSentinel): dockerignore_fp = find_dockerignore_file(Path.cwd()) if dockerignore_fp is not None: with open(dockerignore_fp) as f: - ignore = FilePatternMatcher(*read_ignorefile(f)) + ignore_patterns = FilePatternMatcher(*read_ignorefile(f)) else: - ignore = FilePatternMatcher() - return _create_context_mount(cmds, ignore) + ignore_patterns = FilePatternMatcher() + ignore_patterns = ignore + else: + ignore_patterns = ignore + return _create_context_mount(cmds, ignore_patterns) context_mount_function = base_image_context_mount_function @@ -1620,7 +1623,7 @@ def from_dockerfile( + " without this flag and can be ignored with the `ignore` argument." + " Defaults to using .dockerignore files.", ) - if not isinstance(ignore, _AutoDockerIgnore): + if not isinstance(ignore, _AutoDockerIgnoreSentinel): raise InvalidError("Cannot set both `context_mount` and `ignore`") def wrapper_context_mount_function(): @@ -1630,16 +1633,19 @@ def wrapper_context_mount_function(): else: def base_image_context_mount_function() -> _Mount: - if isinstance(ignore, _AutoDockerIgnore): + if isinstance(ignore, _AutoDockerIgnoreSentinel): dockerignore_fp = find_dockerignore_file(Path.cwd()) if dockerignore_fp is not None: with open(dockerignore_fp) as f: - ignore = FilePatternMatcher(*read_ignorefile(f)) + ignore_patterns = FilePatternMatcher(*read_ignorefile(f)) else: - ignore = FilePatternMatcher() + ignore_patterns = FilePatternMatcher() + ignore_patterns = ignore + else: + ignore_patterns = ignore with open(os.path.expanduser(path)) as f: lines = f.readlines() - return _create_context_mount(lines, ignore) + return _create_context_mount(lines, ignore_patterns) context_mount_function = base_image_context_mount_function diff --git a/test/image_test.py b/test/image_test.py index 2735c938c1..710ccc7c65 100644 --- a/test/image_test.py +++ b/test/image_test.py @@ -8,7 +8,7 @@ from hashlib import sha256 from pathlib import Path from tempfile import TemporaryDirectory -from typing import Literal, get_args +from typing import Callable, Literal, Sequence, Union, get_args from unittest import mock import modal @@ -839,7 +839,7 @@ def test_image_dockerfile_copy_ignore(builder_version, servicer, client, use_cal def cb(x): return str(x).endswith(".txt") - ignore_patterns = cb if use_callable else ["**/*.txt"] + ignore: Union[Sequence[str], Callable[[Path], bool]] = cb if use_callable else ["**/*.txt"] test_cwd = Path.cwd() with TemporaryDirectory(dir=test_cwd) as tmp_dir: @@ -851,10 +851,10 @@ def cb(x): app = App() if use_dockerfile: - app.image = Image.debian_slim().from_dockerfile(dockerfile, ignore=ignore_patterns) + app.image = Image.debian_slim().from_dockerfile(dockerfile, ignore=ignore) layer = 1 else: - app.image = Image.debian_slim().dockerfile_commands([f"COPY {tmp_path} /dummy"], ignore=ignore_patterns) + app.image = Image.debian_slim().dockerfile_commands([f"COPY {tmp_path} /dummy"], ignore=ignore) layer = 0 app.function()(dummy) From fa89c548933963cb4114e5743d78dbc8a1a7c555 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Fri, 20 Dec 2024 13:52:05 +0100 Subject: [PATCH 23/25] move repr inside --- modal/_utils/docker_utils.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/modal/_utils/docker_utils.py b/modal/_utils/docker_utils.py index 0a3434b551..f9b2b6b612 100644 --- a/modal/_utils/docker_utils.py +++ b/modal/_utils/docker_utils.py @@ -91,21 +91,11 @@ def valid_dockerignore_file(fp): class _AutoDockerIgnoreSentinel: - _custom_repr: Optional[str] = None - - def with_repr(self, custom_repr) -> "_AutoDockerIgnoreSentinel": - # use to give an instance of a matcher a custom name - useful for visualizing default values in signatures - self._custom_repr = custom_repr - return self - def __repr__(self) -> str: - if self._custom_repr: - return self._custom_repr - - return super().__repr__() + return f"{__name__}.AUTO_DOCKERIGNORE" def __call__(self, _: Path) -> bool: raise NotImplementedError("This is only a placeholder. Do not call") -AUTO_DOCKERIGNORE = _AutoDockerIgnoreSentinel().with_repr(f"{__name__}.AUTO_DOCKERIGNORE") +AUTO_DOCKERIGNORE = _AutoDockerIgnoreSentinel() From e83725c5a7e64c281fbcefa720d3e56366b18577 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Fri, 20 Dec 2024 14:08:09 +0100 Subject: [PATCH 24/25] fix --- modal/image.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/modal/image.py b/modal/image.py index 68d662587b..ae2488a4a7 100644 --- a/modal/image.py +++ b/modal/image.py @@ -1217,7 +1217,6 @@ def base_image_context_mount_function() -> _Mount: ignore_patterns = FilePatternMatcher(*read_ignorefile(f)) else: ignore_patterns = FilePatternMatcher() - ignore_patterns = ignore else: ignore_patterns = ignore return _create_context_mount(cmds, ignore_patterns) @@ -1640,7 +1639,6 @@ def base_image_context_mount_function() -> _Mount: ignore_patterns = FilePatternMatcher(*read_ignorefile(f)) else: ignore_patterns = FilePatternMatcher() - ignore_patterns = ignore else: ignore_patterns = ignore with open(os.path.expanduser(path)) as f: From 41b55d2f57237bd85cd59d86234215cc9e3084a5 Mon Sep 17 00:00:00 2001 From: Kasper Vogel Date: Fri, 20 Dec 2024 14:24:04 +0100 Subject: [PATCH 25/25] fix . pattern --- modal/_utils/docker_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modal/_utils/docker_utils.py b/modal/_utils/docker_utils.py index f9b2b6b612..fa65abe397 100644 --- a/modal/_utils/docker_utils.py +++ b/modal/_utils/docker_utils.py @@ -49,7 +49,10 @@ def extract_copy_command_patterns(dockerfile_lines: Sequence[str]) -> list[str]: f"COPY command: {source} using special flags/arguments/variables are not supported" ) - copy_source_patterns.add(source) + if source == ".": + copy_source_patterns.add("./**") + else: + copy_source_patterns.add(source) current_command = ""