From 90e65dbc1d8cf011141f872f98b515db21446616 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Sun, 17 Nov 2024 20:43:48 -0800 Subject: [PATCH 01/29] optional put_options --- api/python/quilt3/data_transfer.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/api/python/quilt3/data_transfer.py b/api/python/quilt3/data_transfer.py index 219c584de57..bd708fa4396 100644 --- a/api/python/quilt3/data_transfer.py +++ b/api/python/quilt3/data_transfer.py @@ -301,7 +301,7 @@ def _copy_local_file(ctx: WorkerContext, size: int, src_path: str, dest_path: st ctx.done(PhysicalKey.from_path(dest_path), None) -def _upload_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, dest_key: str): +def _upload_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, dest_key: str, put_options={}): s3_client = ctx.s3_client_provider.standard_client if not is_mpu(size): @@ -449,7 +449,7 @@ def download_part(part_number): def _copy_remote_file(ctx: WorkerContext, size: int, src_bucket: str, src_key: str, src_version: Optional[str], - dest_bucket: str, dest_key: str, extra_args: Optional[Iterable[Tuple[str, Any]]] = None): + dest_bucket: str, dest_key: str, extra_args: Optional[Iterable[Tuple[str, Any]]] = None, put_options={}): src_params = dict( Bucket=src_bucket, Key=src_key @@ -580,7 +580,7 @@ def _reuse_remote_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket return None -def _upload_or_reuse_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, dest_path: str): +def _upload_or_reuse_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, dest_path: str, put_options={}): result = _reuse_remote_file(ctx, size, src_path, dest_bucket, dest_path) if result is not None: dest_version_id, checksum = result @@ -588,7 +588,7 @@ def _upload_or_reuse_file(ctx: WorkerContext, size: int, src_path: str, dest_buc ctx.done(PhysicalKey(dest_bucket, dest_path, dest_version_id), checksum) return # Optimization succeeded. # If the optimization didn't happen, do the normal upload. - _upload_file(ctx, size, src_path, dest_bucket, dest_path) + _upload_file(ctx, size, src_path, dest_bucket, dest_path, put_options) def _copy_file_list_last_retry(retry_state): @@ -602,7 +602,7 @@ def _copy_file_list_last_retry(retry_state): wait=wait_exponential(multiplier=1, min=1, max=10), retry=retry_if_not_result(all), retry_error_callback=_copy_file_list_last_retry) -def _copy_file_list_internal(file_list, results, message, callback, exceptions_to_ignore=(ClientError,)): +def _copy_file_list_internal(file_list, results, message, callback, exceptions_to_ignore=(ClientError,), put_options={}): """ Takes a list of tuples (src, dest, size) and copies the data in parallel. `results` is the list where results will be stored. @@ -668,13 +668,13 @@ def done_callback(value, checksum): else: if dest.version_id: raise ValueError("Cannot set VersionId on destination") - _upload_or_reuse_file(ctx, size, src.path, dest.bucket, dest.path) + _upload_or_reuse_file(ctx, size, src.path, dest.bucket, dest.path, put_options) else: if dest.is_local(): _download_file(ctx, size, src.bucket, src.path, src.version_id, dest.path) else: _copy_remote_file(ctx, size, src.bucket, src.path, src.version_id, - dest.bucket, dest.path) + dest.bucket, dest.path, put_options) try: for idx, (args, result) in enumerate(zip(file_list, results)): @@ -855,7 +855,7 @@ def delete_url(src: PhysicalKey): s3_client.delete_object(Bucket=src.bucket, Key=src.path) -def copy_file_list(file_list, message=None, callback=None): +def copy_file_list(file_list, message=None, callback=None, put_options={}): """ Takes a list of tuples (src, dest, size) and copies them in parallel. URLs must be regular files, not directories. @@ -865,10 +865,10 @@ def copy_file_list(file_list, message=None, callback=None): if _looks_like_dir(src) or _looks_like_dir(dest): raise ValueError("Directories are not allowed") - return _copy_file_list_internal(file_list, [None] * len(file_list), message, callback) + return _copy_file_list_internal(file_list, [None] * len(file_list), message, callback, put_options={}) -def copy_file(src: PhysicalKey, dest: PhysicalKey, size=None, message=None, callback=None): +def copy_file(src: PhysicalKey, dest: PhysicalKey, size=None, message=None, callback=None, put_options={}): """ Copies a single file or directory. If src is a file, dest can be a file or a directory. @@ -900,10 +900,10 @@ def sanity_check(rel_path): src = PhysicalKey(src.bucket, src.path, version_id) url_list.append((src, dest, size)) - _copy_file_list_internal(url_list, [None] * len(url_list), message, callback) + _copy_file_list_internal(url_list, [None] * len(url_list), message, callback, put_options={}) -def put_bytes(data: bytes, dest: PhysicalKey): +def put_bytes(data: bytes, dest: PhysicalKey, put_options={}): if _looks_like_dir(dest): raise ValueError("Invalid path: %r" % dest.path) @@ -919,6 +919,7 @@ def put_bytes(data: bytes, dest: PhysicalKey): Bucket=dest.bucket, Key=dest.path, Body=data, + **put_options ) From f16069bf3b3cdab8a4955a40fd69b13a83e1effd Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Sun, 17 Nov 2024 20:53:58 -0800 Subject: [PATCH 02/29] public APIs --- api/python/quilt3/api.py | 5 +++-- api/python/quilt3/bucket.py | 10 ++++++---- api/python/quilt3/packages.py | 19 +++++++++++-------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/api/python/quilt3/api.py b/api/python/quilt3/api.py index 561a2a8d35a..f4a01dab057 100644 --- a/api/python/quilt3/api.py +++ b/api/python/quilt3/api.py @@ -21,7 +21,7 @@ ) -def copy(src, dest): +def copy(src, dest, put_options={}): """ Copies ``src`` object from QUILT to ``dest``. @@ -31,8 +31,9 @@ def copy(src, dest): Parameters: src (str): a path to retrieve dest (str): a path to write to + put_options (dict): optional arguments to pass to the PutObject operation """ - copy_file(PhysicalKey.from_url(fix_url(src)), PhysicalKey.from_url(fix_url(dest))) + copy_file(PhysicalKey.from_url(fix_url(src)), PhysicalKey.from_url(fix_url(dest)), put_options=put_options) @ApiTelemetry("api.delete_package") diff --git a/api/python/quilt3/bucket.py b/api/python/quilt3/bucket.py index e0f58d69d2a..1ed9b515c79 100644 --- a/api/python/quilt3/bucket.py +++ b/api/python/quilt3/bucket.py @@ -55,13 +55,14 @@ def search(self, query: T.Union[str, dict], limit: int = 10) -> T.List[dict]: """ return search_api(query, index=f"{self._pk.bucket},{self._pk.bucket}_packages", limit=limit)["hits"]["hits"] - def put_file(self, key, path): + def put_file(self, key, path, put_options={}): """ Stores file at path to key in bucket. Args: key(str): key in bucket to store file at path(str): string representing local path to file + put_options(dict): optional arguments to pass to the PutObject operation Returns: None @@ -71,15 +72,16 @@ def put_file(self, key, path): * if copy fails """ dest = self._pk.join(key) - copy_file(PhysicalKey.from_url(fix_url(path)), dest) + copy_file(PhysicalKey.from_url(fix_url(path)), dest, put_options=put_options) - def put_dir(self, key, directory): + def put_dir(self, key, directory, put_options={}): """ Stores all files in the `directory` under the prefix `key`. Args: key(str): prefix to store files under in bucket directory(str): path to directory to grab files from + put_options(dict): optional arguments to pass to the PutObject operation Returns: None @@ -97,7 +99,7 @@ def put_dir(self, key, directory): src = PhysicalKey.from_path(str(src_path) + '/') dest = self._pk.join(key) - copy_file(src, dest) + copy_file(src, dest, put_options=put_options) def keys(self): """ diff --git a/api/python/quilt3/packages.py b/api/python/quilt3/packages.py index ed844bf97e1..9e2edf14c33 100644 --- a/api/python/quilt3/packages.py +++ b/api/python/quilt3/packages.py @@ -351,13 +351,14 @@ def deserialize(self, func=None, **format_opts): return formats[0].deserialize(data, self._meta, pkey_ext, **format_opts) - def fetch(self, dest=None): + def fetch(self, dest=None, put_options={}): """ Gets objects from entry and saves them to dest. Args: dest: where to put the files Defaults to the entry name + put_options: optional arguments to pass to the PutObject operation Returns: None @@ -368,7 +369,7 @@ def fetch(self, dest=None): else: dest = PhysicalKey.from_url(fix_url(dest)) - copy_file(self.physical_key, dest) + copy_file(self.physical_key, dest, put_options=put_options) # return a package reroot package physical keys after the copy operation succeeds # see GH#388 for context @@ -700,13 +701,14 @@ def __getitem__(self, logical_key): return pkg @ApiTelemetry("package.fetch") - def fetch(self, dest='./'): + def fetch(self, dest='./', put_options={}): """ Copy all descendants to `dest`. Descendants are written under their logical names _relative_ to self. Args: dest: where to put the files (locally) + put_options: optional arguments to pass to the PutObject operation Returns: A new Package object with entries from self, but with physical keys @@ -727,7 +729,7 @@ def fetch(self, dest='./'): new_entry = entry.with_physical_key(new_physical_key) pkg._set(logical_key, new_entry) - copy_file_list(file_list, message="Copying objects") + copy_file_list(file_list, message="Copying objects", put_options=put_options) return pkg @@ -1355,7 +1357,7 @@ def _get_top_hash_parts(cls, meta, entries): @_fix_docstring(workflow=_WORKFLOW_PARAM_DOCSTRING) def push( self, name, registry=None, dest=None, message=None, selector_fn=None, *, - workflow=..., force: bool = False, dedupe: bool = False + workflow=..., force: bool = False, dedupe: bool = False, put_options={} ): """ Copies objects to path, then creates a new package that points to those objects. @@ -1398,19 +1400,20 @@ def push( %(workflow)s force: skip the top hash check and overwrite any existing package dedupe: don't push if the top hash matches the existing package top hash; return the current package + put_options: optional arguments to pass to the PutObject operation Returns: A new package that points to the copied objects. """ return self._push( name, registry, dest, message, selector_fn, workflow=workflow, - print_info=True, force=force, dedupe=dedupe + print_info=True, force=force, dedupe=dedupe, put_options=put_options ) def _push( self, name, registry=None, dest=None, message=None, selector_fn=None, *, workflow, print_info, force: bool, dedupe: bool, - copy_file_list_fn: T.Optional[CopyFileListFn] = None, + copy_file_list_fn: T.Optional[CopyFileListFn] = None, put_options={} ): if selector_fn is None: def selector_fn(*args): @@ -1531,7 +1534,7 @@ def check_hash_conficts(latest_hash): entries.append((logical_key, entry)) file_list.append((physical_key, new_physical_key, entry.size)) - results = copy_file_list_fn(file_list, message="Copying objects") + results = copy_file_list_fn(file_list, message="Copying objects", put_options=put_options) for (logical_key, entry), (versioned_key, checksum) in zip(entries, results): # Create a new package entry pointing to the new remote key. From 48c3c6fbd5a202306947b44475350866b3e50a59 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Sun, 17 Nov 2024 21:00:45 -0800 Subject: [PATCH 03/29] update copy_mock --- api/python/tests/integration/test_packages.py | 5 +++-- api/python/tests/test_bucket.py | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/api/python/tests/integration/test_packages.py b/api/python/tests/integration/test_packages.py index b9e92c1e5f7..88112615447 100644 --- a/api/python/tests/integration/test_packages.py +++ b/api/python/tests/integration/test_packages.py @@ -43,7 +43,7 @@ LOCAL_REGISTRY = Path('local_registry') # Set by QuiltTestCase -def _mock_copy_file_list(file_list, callback=None, message=None): +def _mock_copy_file_list(file_list, callback=None, message=None, **kwargs): return [(key, None) for _, key, _ in file_list] @@ -449,7 +449,8 @@ def test_fetch_default_dest(tmpdir): filepath = os.path.join(os.path.dirname(__file__), 'data', 'foo.txt') copy_mock.assert_called_once_with( PhysicalKey.from_path(filepath), - PhysicalKey.from_path('foo.txt') + PhysicalKey.from_path('foo.txt'), + put_options={} ) @patch('quilt3.workflows.validate', mock.MagicMock(return_value=None)) diff --git a/api/python/tests/test_bucket.py b/api/python/tests/test_bucket.py index e4cfedc29c3..edb6f5b983d 100644 --- a/api/python/tests/test_bucket.py +++ b/api/python/tests/test_bucket.py @@ -176,7 +176,7 @@ def test_bucket_put_file(self): bucket.put_file(key='README.md', path='./README') # put local file to bucket copy_mock.assert_called_once_with( - PhysicalKey.from_path('README'), PhysicalKey.from_url('s3://test-bucket/README.md')) + PhysicalKey.from_path('README'), PhysicalKey.from_url('s3://test-bucket/README.md'), put_options={}) def test_bucket_put_dir(self): path = pathlib.Path(__file__).parent / 'data' @@ -185,17 +185,17 @@ def test_bucket_put_dir(self): with patch("quilt3.bucket.copy_file") as copy_mock: bucket.put_dir('test', path) copy_mock.assert_called_once_with( - PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/test/')) + PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/test/'), put_options={}) with patch("quilt3.bucket.copy_file") as copy_mock: bucket.put_dir('test/', path) copy_mock.assert_called_once_with( - PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/test/')) + PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/test/'), put_options={}) with patch("quilt3.bucket.copy_file") as copy_mock: bucket.put_dir('', path) copy_mock.assert_called_once_with( - PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/')) + PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/'), put_options={}) def test_remote_delete(self): self.s3_stubber.add_response( From 91085280d127f91dd60310b58402b5db5f88c11e Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Sun, 17 Nov 2024 21:10:19 -0800 Subject: [PATCH 04/29] default to None Thanks @Copilot --- api/python/quilt3/api.py | 2 +- api/python/quilt3/bucket.py | 4 +-- api/python/quilt3/data_transfer.py | 29 +++++++++---------- api/python/quilt3/packages.py | 8 ++--- api/python/tests/integration/test_packages.py | 2 +- api/python/tests/test_bucket.py | 8 ++--- 6 files changed, 25 insertions(+), 28 deletions(-) diff --git a/api/python/quilt3/api.py b/api/python/quilt3/api.py index f4a01dab057..8e370ddea61 100644 --- a/api/python/quilt3/api.py +++ b/api/python/quilt3/api.py @@ -21,7 +21,7 @@ ) -def copy(src, dest, put_options={}): +def copy(src, dest, put_options=None): """ Copies ``src`` object from QUILT to ``dest``. diff --git a/api/python/quilt3/bucket.py b/api/python/quilt3/bucket.py index 1ed9b515c79..fab661701f9 100644 --- a/api/python/quilt3/bucket.py +++ b/api/python/quilt3/bucket.py @@ -55,7 +55,7 @@ def search(self, query: T.Union[str, dict], limit: int = 10) -> T.List[dict]: """ return search_api(query, index=f"{self._pk.bucket},{self._pk.bucket}_packages", limit=limit)["hits"]["hits"] - def put_file(self, key, path, put_options={}): + def put_file(self, key, path, put_options=None): """ Stores file at path to key in bucket. @@ -74,7 +74,7 @@ def put_file(self, key, path, put_options={}): dest = self._pk.join(key) copy_file(PhysicalKey.from_url(fix_url(path)), dest, put_options=put_options) - def put_dir(self, key, directory, put_options={}): + def put_dir(self, key, directory, put_options=None): """ Stores all files in the `directory` under the prefix `key`. diff --git a/api/python/quilt3/data_transfer.py b/api/python/quilt3/data_transfer.py index bd708fa4396..20d03514077 100644 --- a/api/python/quilt3/data_transfer.py +++ b/api/python/quilt3/data_transfer.py @@ -301,7 +301,7 @@ def _copy_local_file(ctx: WorkerContext, size: int, src_path: str, dest_path: st ctx.done(PhysicalKey.from_path(dest_path), None) -def _upload_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, dest_key: str, put_options={}): +def _upload_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, dest_key: str, put_options=None): s3_client = ctx.s3_client_provider.standard_client if not is_mpu(size): @@ -449,7 +449,7 @@ def download_part(part_number): def _copy_remote_file(ctx: WorkerContext, size: int, src_bucket: str, src_key: str, src_version: Optional[str], - dest_bucket: str, dest_key: str, extra_args: Optional[Iterable[Tuple[str, Any]]] = None, put_options={}): + dest_bucket: str, dest_key: str, extra_args: Optional[Iterable[Tuple[str, Any]]] = None, put_options=None): src_params = dict( Bucket=src_bucket, Key=src_key @@ -580,7 +580,7 @@ def _reuse_remote_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket return None -def _upload_or_reuse_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, dest_path: str, put_options={}): +def _upload_or_reuse_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, dest_path: str, put_options=None): result = _reuse_remote_file(ctx, size, src_path, dest_bucket, dest_path) if result is not None: dest_version_id, checksum = result @@ -602,7 +602,7 @@ def _copy_file_list_last_retry(retry_state): wait=wait_exponential(multiplier=1, min=1, max=10), retry=retry_if_not_result(all), retry_error_callback=_copy_file_list_last_retry) -def _copy_file_list_internal(file_list, results, message, callback, exceptions_to_ignore=(ClientError,), put_options={}): +def _copy_file_list_internal(file_list, results, message, callback, exceptions_to_ignore=(ClientError,), put_options=None): """ Takes a list of tuples (src, dest, size) and copies the data in parallel. `results` is the list where results will be stored. @@ -855,7 +855,7 @@ def delete_url(src: PhysicalKey): s3_client.delete_object(Bucket=src.bucket, Key=src.path) -def copy_file_list(file_list, message=None, callback=None, put_options={}): +def copy_file_list(file_list, message=None, callback=None, put_options=None): """ Takes a list of tuples (src, dest, size) and copies them in parallel. URLs must be regular files, not directories. @@ -865,10 +865,10 @@ def copy_file_list(file_list, message=None, callback=None, put_options={}): if _looks_like_dir(src) or _looks_like_dir(dest): raise ValueError("Directories are not allowed") - return _copy_file_list_internal(file_list, [None] * len(file_list), message, callback, put_options={}) + return _copy_file_list_internal(file_list, [None] * len(file_list), message, callback, put_options=None) -def copy_file(src: PhysicalKey, dest: PhysicalKey, size=None, message=None, callback=None, put_options={}): +def copy_file(src: PhysicalKey, dest: PhysicalKey, size=None, message=None, callback=None, put_options=None): """ Copies a single file or directory. If src is a file, dest can be a file or a directory. @@ -900,10 +900,10 @@ def sanity_check(rel_path): src = PhysicalKey(src.bucket, src.path, version_id) url_list.append((src, dest, size)) - _copy_file_list_internal(url_list, [None] * len(url_list), message, callback, put_options={}) + _copy_file_list_internal(url_list, [None] * len(url_list), message, callback, put_options=None) -def put_bytes(data: bytes, dest: PhysicalKey, put_options={}): +def put_bytes(data: bytes, dest: PhysicalKey, put_options=None): if _looks_like_dir(dest): raise ValueError("Invalid path: %r" % dest.path) @@ -915,13 +915,10 @@ def put_bytes(data: bytes, dest: PhysicalKey, put_options={}): if dest.version_id is not None: raise ValueError("Cannot set VersionId on destination") s3_client = S3ClientProvider().standard_client - s3_client.put_object( - Bucket=dest.bucket, - Key=dest.path, - Body=data, - **put_options - ) - + s3_params = dict(Bucket=dest.bucket, Key=dest.path, Body=data) + if put_options: + s3_params.update(put_options) + s3_client.put_object(**s3_params) def _local_get_bytes(pk: PhysicalKey): return pathlib.Path(pk.path).read_bytes() diff --git a/api/python/quilt3/packages.py b/api/python/quilt3/packages.py index 9e2edf14c33..353cf6a9e98 100644 --- a/api/python/quilt3/packages.py +++ b/api/python/quilt3/packages.py @@ -351,7 +351,7 @@ def deserialize(self, func=None, **format_opts): return formats[0].deserialize(data, self._meta, pkey_ext, **format_opts) - def fetch(self, dest=None, put_options={}): + def fetch(self, dest=None, put_options=None): """ Gets objects from entry and saves them to dest. @@ -701,7 +701,7 @@ def __getitem__(self, logical_key): return pkg @ApiTelemetry("package.fetch") - def fetch(self, dest='./', put_options={}): + def fetch(self, dest='./', put_options=None): """ Copy all descendants to `dest`. Descendants are written under their logical names _relative_ to self. @@ -1357,7 +1357,7 @@ def _get_top_hash_parts(cls, meta, entries): @_fix_docstring(workflow=_WORKFLOW_PARAM_DOCSTRING) def push( self, name, registry=None, dest=None, message=None, selector_fn=None, *, - workflow=..., force: bool = False, dedupe: bool = False, put_options={} + workflow=..., force: bool = False, dedupe: bool = False, put_options=None ): """ Copies objects to path, then creates a new package that points to those objects. @@ -1413,7 +1413,7 @@ def push( def _push( self, name, registry=None, dest=None, message=None, selector_fn=None, *, workflow, print_info, force: bool, dedupe: bool, - copy_file_list_fn: T.Optional[CopyFileListFn] = None, put_options={} + copy_file_list_fn: T.Optional[CopyFileListFn] = None, put_options=None ): if selector_fn is None: def selector_fn(*args): diff --git a/api/python/tests/integration/test_packages.py b/api/python/tests/integration/test_packages.py index 88112615447..eb1adbf83ba 100644 --- a/api/python/tests/integration/test_packages.py +++ b/api/python/tests/integration/test_packages.py @@ -450,7 +450,7 @@ def test_fetch_default_dest(tmpdir): copy_mock.assert_called_once_with( PhysicalKey.from_path(filepath), PhysicalKey.from_path('foo.txt'), - put_options={} + put_options=None ) @patch('quilt3.workflows.validate', mock.MagicMock(return_value=None)) diff --git a/api/python/tests/test_bucket.py b/api/python/tests/test_bucket.py index edb6f5b983d..a56a72520ce 100644 --- a/api/python/tests/test_bucket.py +++ b/api/python/tests/test_bucket.py @@ -176,7 +176,7 @@ def test_bucket_put_file(self): bucket.put_file(key='README.md', path='./README') # put local file to bucket copy_mock.assert_called_once_with( - PhysicalKey.from_path('README'), PhysicalKey.from_url('s3://test-bucket/README.md'), put_options={}) + PhysicalKey.from_path('README'), PhysicalKey.from_url('s3://test-bucket/README.md'), put_options=None) def test_bucket_put_dir(self): path = pathlib.Path(__file__).parent / 'data' @@ -185,17 +185,17 @@ def test_bucket_put_dir(self): with patch("quilt3.bucket.copy_file") as copy_mock: bucket.put_dir('test', path) copy_mock.assert_called_once_with( - PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/test/'), put_options={}) + PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/test/'), put_options=None) with patch("quilt3.bucket.copy_file") as copy_mock: bucket.put_dir('test/', path) copy_mock.assert_called_once_with( - PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/test/'), put_options={}) + PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/test/'), put_options=None) with patch("quilt3.bucket.copy_file") as copy_mock: bucket.put_dir('', path) copy_mock.assert_called_once_with( - PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/'), put_options={}) + PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/'), put_options=None) def test_remote_delete(self): self.s3_stubber.add_response( From 66985e470c6d2359130f5c14374820a568ef8ae0 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Sun, 17 Nov 2024 21:10:23 -0800 Subject: [PATCH 05/29] Update CHANGELOG.md --- docs/CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c4eed9428d6..85031324cc5 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -12,7 +12,12 @@ Entries inside each section should be ordered by type: ## CLI !--> +# unreleased - YYYY-MM-DD + +## Python API +* [Added] Extend public methods that write to S3 to take optional arguments to pass to PutObject: `quilt3.push`, `quilt3.fetch`, `quilt3.copy`, `quilt3.put_file`, `quilt3.put_dir` ([#4219](https://github.com/quiltdata/quilt/pull/4219)) + # 6.1.0 - 2024-10-14 ## Python API From 19b36e847557b1bda8f38d4ec9a2979391ee6797 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Sun, 17 Nov 2024 21:18:30 -0800 Subject: [PATCH 06/29] extra_args=put_options --- api/python/quilt3/data_transfer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/python/quilt3/data_transfer.py b/api/python/quilt3/data_transfer.py index 20d03514077..f99a9e56096 100644 --- a/api/python/quilt3/data_transfer.py +++ b/api/python/quilt3/data_transfer.py @@ -449,7 +449,7 @@ def download_part(part_number): def _copy_remote_file(ctx: WorkerContext, size: int, src_bucket: str, src_key: str, src_version: Optional[str], - dest_bucket: str, dest_key: str, extra_args: Optional[Iterable[Tuple[str, Any]]] = None, put_options=None): + dest_bucket: str, dest_key: str, extra_args: Optional[Iterable[Tuple[str, Any]]] = None): src_params = dict( Bucket=src_bucket, Key=src_key @@ -674,7 +674,7 @@ def done_callback(value, checksum): _download_file(ctx, size, src.bucket, src.path, src.version_id, dest.path) else: _copy_remote_file(ctx, size, src.bucket, src.path, src.version_id, - dest.bucket, dest.path, put_options) + dest.bucket, dest.path, extra_args=put_options) try: for idx, (args, result) in enumerate(zip(file_list, results)): From ce7e1286102fad59ecc161e48570e8c4e3e2179c Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Sun, 17 Nov 2024 21:21:20 -0800 Subject: [PATCH 07/29] lint --- api/python/quilt3/data_transfer.py | 7 +++++-- api/python/tests/test_bucket.py | 12 ++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/api/python/quilt3/data_transfer.py b/api/python/quilt3/data_transfer.py index f99a9e56096..79610c9bd7e 100644 --- a/api/python/quilt3/data_transfer.py +++ b/api/python/quilt3/data_transfer.py @@ -580,7 +580,8 @@ def _reuse_remote_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket return None -def _upload_or_reuse_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, dest_path: str, put_options=None): +def _upload_or_reuse_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, + dest_path: str, put_options=None): result = _reuse_remote_file(ctx, size, src_path, dest_bucket, dest_path) if result is not None: dest_version_id, checksum = result @@ -602,7 +603,8 @@ def _copy_file_list_last_retry(retry_state): wait=wait_exponential(multiplier=1, min=1, max=10), retry=retry_if_not_result(all), retry_error_callback=_copy_file_list_last_retry) -def _copy_file_list_internal(file_list, results, message, callback, exceptions_to_ignore=(ClientError,), put_options=None): +def _copy_file_list_internal(file_list, results, message, callback, + exceptions_to_ignore=(ClientError,), put_options=None): """ Takes a list of tuples (src, dest, size) and copies the data in parallel. `results` is the list where results will be stored. @@ -920,6 +922,7 @@ def put_bytes(data: bytes, dest: PhysicalKey, put_options=None): s3_params.update(put_options) s3_client.put_object(**s3_params) + def _local_get_bytes(pk: PhysicalKey): return pathlib.Path(pk.path).read_bytes() diff --git a/api/python/tests/test_bucket.py b/api/python/tests/test_bucket.py index a56a72520ce..5fffca8b000 100644 --- a/api/python/tests/test_bucket.py +++ b/api/python/tests/test_bucket.py @@ -176,7 +176,8 @@ def test_bucket_put_file(self): bucket.put_file(key='README.md', path='./README') # put local file to bucket copy_mock.assert_called_once_with( - PhysicalKey.from_path('README'), PhysicalKey.from_url('s3://test-bucket/README.md'), put_options=None) + PhysicalKey.from_path('README'), + PhysicalKey.from_url('s3://test-bucket/README.md'), put_options=None) def test_bucket_put_dir(self): path = pathlib.Path(__file__).parent / 'data' @@ -185,17 +186,20 @@ def test_bucket_put_dir(self): with patch("quilt3.bucket.copy_file") as copy_mock: bucket.put_dir('test', path) copy_mock.assert_called_once_with( - PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/test/'), put_options=None) + PhysicalKey.from_path(str(path) + '/'), + PhysicalKey.from_url('s3://test-bucket/test/'), put_options=None) with patch("quilt3.bucket.copy_file") as copy_mock: bucket.put_dir('test/', path) copy_mock.assert_called_once_with( - PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/test/'), put_options=None) + PhysicalKey.from_path(str(path) + '/'), + PhysicalKey.from_url('s3://test-bucket/test/'), put_options=None) with patch("quilt3.bucket.copy_file") as copy_mock: bucket.put_dir('', path) copy_mock.assert_called_once_with( - PhysicalKey.from_path(str(path) + '/'), PhysicalKey.from_url('s3://test-bucket/'), put_options=None) + PhysicalKey.from_path(str(path) + '/'), + PhysicalKey.from_url('s3://test-bucket/'), put_options=None) def test_remote_delete(self): self.s3_stubber.add_response( From 692b3f265d84539d71e6ef7926ef985bbbe233be Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Sun, 17 Nov 2024 21:29:33 -0800 Subject: [PATCH 08/29] test put_options pass to copy_mock --- api/python/tests/test_bucket.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/api/python/tests/test_bucket.py b/api/python/tests/test_bucket.py index 5fffca8b000..0a5dbf42fe4 100644 --- a/api/python/tests/test_bucket.py +++ b/api/python/tests/test_bucket.py @@ -172,34 +172,37 @@ def test_bucket_select(self): def test_bucket_put_file(self): with patch("quilt3.bucket.copy_file") as copy_mock: + opts = {'SSECustomerKey': 'FakeKey'} bucket = Bucket('s3://test-bucket') - bucket.put_file(key='README.md', path='./README') # put local file to bucket + bucket.put_file(key='README.md', path='./README', put_options=opts) + # put local file to bucket copy_mock.assert_called_once_with( PhysicalKey.from_path('README'), - PhysicalKey.from_url('s3://test-bucket/README.md'), put_options=None) + PhysicalKey.from_url('s3://test-bucket/README.md'), put_options=opts) def test_bucket_put_dir(self): path = pathlib.Path(__file__).parent / 'data' bucket = Bucket('s3://test-bucket') + opts = {'SSECustomerKey': 'FakeKey'} with patch("quilt3.bucket.copy_file") as copy_mock: - bucket.put_dir('test', path) + bucket.put_dir('test', path, opts) copy_mock.assert_called_once_with( PhysicalKey.from_path(str(path) + '/'), - PhysicalKey.from_url('s3://test-bucket/test/'), put_options=None) + PhysicalKey.from_url('s3://test-bucket/test/'), put_options=opts) with patch("quilt3.bucket.copy_file") as copy_mock: - bucket.put_dir('test/', path) + bucket.put_dir('test/', path, opts) copy_mock.assert_called_once_with( PhysicalKey.from_path(str(path) + '/'), - PhysicalKey.from_url('s3://test-bucket/test/'), put_options=None) + PhysicalKey.from_url('s3://test-bucket/test/'), put_options=opts) with patch("quilt3.bucket.copy_file") as copy_mock: - bucket.put_dir('', path) + bucket.put_dir('', path, opts) copy_mock.assert_called_once_with( PhysicalKey.from_path(str(path) + '/'), - PhysicalKey.from_url('s3://test-bucket/'), put_options=None) + PhysicalKey.from_url('s3://test-bucket/'), put_options=opts) def test_remote_delete(self): self.s3_stubber.add_response( From 22e3a27be493fa7dfd5e9265d55e3043d7a7f3fc Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Sun, 17 Nov 2024 21:35:50 -0800 Subject: [PATCH 09/29] update gendocs --- docs/CHANGELOG.md | 2 +- docs/api-reference/Bucket.md | 6 ++++-- docs/api-reference/Package.md | 9 ++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 85031324cc5..d2d7174bd3e 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -16,7 +16,7 @@ Entries inside each section should be ordered by type: ## Python API -* [Added] Extend public methods that write to S3 to take optional arguments to pass to PutObject: `quilt3.push`, `quilt3.fetch`, `quilt3.copy`, `quilt3.put_file`, `quilt3.put_dir` ([#4219](https://github.com/quiltdata/quilt/pull/4219)) +* [Added] Extend public methods that write to S3 to take optional `put_options` argument to pass to PutObject: `quilt3.Package.push`, `quilt3.Package.fetch`, `quilt3.PackageEntry.fetch`, `quilt3.Bucket.put_file`, `quilt3.Bucket.put_dir` ([#4219](https://github.com/quiltdata/quilt/pull/4219)) # 6.1.0 - 2024-10-14 diff --git a/docs/api-reference/Bucket.md b/docs/api-reference/Bucket.md index 7570078cc3a..b0243d573b9 100644 --- a/docs/api-reference/Bucket.md +++ b/docs/api-reference/Bucket.md @@ -33,7 +33,7 @@ __Returns__ search results -## Bucket.put\_file(self, key, path) {#Bucket.put\_file} +## Bucket.put\_file(self, key, path, put\_options=None) {#Bucket.put\_file} Stores file at path to key in bucket. @@ -41,6 +41,7 @@ __Arguments__ * __key(str)__: key in bucket to store file at * __path(str)__: string representing local path to file +* __put_options(dict)__: options to pass to the underlying storage layer __Returns__ @@ -52,7 +53,7 @@ __Raises__ * if copy fails -## Bucket.put\_dir(self, key, directory) {#Bucket.put\_dir} +## Bucket.put\_dir(self, key, directory, put\_options=None) {#Bucket.put\_dir} Stores all files in the `directory` under the prefix `key`. @@ -60,6 +61,7 @@ __Arguments__ * __key(str)__: prefix to store files under in bucket * __directory(str)__: path to directory to grab files from +* __put_options(dict)__: options to pass to the underlying storage layer __Returns__ diff --git a/docs/api-reference/Package.md b/docs/api-reference/Package.md index 526ea577b67..0c1f8f94b17 100644 --- a/docs/api-reference/Package.md +++ b/docs/api-reference/Package.md @@ -86,7 +86,7 @@ PackageEntry if prefix matches a logical_key exactly otherwise Package -## Package.fetch(self, dest='./') {#Package.fetch} +## Package.fetch(self, dest='./', put\_options=None) {#Package.fetch} Copy all descendants to `dest`. Descendants are written under their logical names _relative_ to self. @@ -94,6 +94,7 @@ names _relative_ to self. __Arguments__ * __dest__: where to put the files (locally) +* __put_options__: optional arguments to pass to the PutObject operation __Returns__ @@ -272,7 +273,7 @@ __Raises__ * `KeyError`: when logical_key is not present to be deleted -## Package.push(self, name, registry=None, dest=None, message=None, selector\_fn=None, \*, workflow=Ellipsis, force: bool = False, dedupe: bool = False) {#Package.push} +## Package.push(self, name, registry=None, dest=None, message=None, selector\_fn=None, \*, workflow=Ellipsis, force: bool = False, dedupe: bool = False, put\_options=None) {#Package.push} Copies objects to path, then creates a new package that points to those objects. Copies each object in this package to path according to logical key structure, @@ -318,6 +319,7 @@ __Arguments__ * __force__: skip the top hash check and overwrite any existing package * __dedupe__: don't push if the top hash matches the existing package top hash; return the current package +* __put_options__: optional arguments to pass to the PutObject operation __Returns__ @@ -507,7 +509,7 @@ hash verification fail when deserialization metadata is not present -## PackageEntry.fetch(self, dest=None) {#PackageEntry.fetch} +## PackageEntry.fetch(self, dest=None, put\_options=None) {#PackageEntry.fetch} Gets objects from entry and saves them to dest. @@ -515,6 +517,7 @@ __Arguments__ * __dest__: where to put the files Defaults to the entry name +* __put_options__: optional arguments to pass to the PutObject operation __Returns__ From 1f455290c6fa09e2adab00705d8172acb0060fa6 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Sun, 17 Nov 2024 21:38:18 -0800 Subject: [PATCH 10/29] Bucket.md --- docs/api-reference/Bucket.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api-reference/Bucket.md b/docs/api-reference/Bucket.md index b0243d573b9..8aeab93cb27 100644 --- a/docs/api-reference/Bucket.md +++ b/docs/api-reference/Bucket.md @@ -41,7 +41,7 @@ __Arguments__ * __key(str)__: key in bucket to store file at * __path(str)__: string representing local path to file -* __put_options(dict)__: options to pass to the underlying storage layer +* __put_options(dict)__: optional arguments to pass to the PutObject operation __Returns__ @@ -61,7 +61,7 @@ __Arguments__ * __key(str)__: prefix to store files under in bucket * __directory(str)__: path to directory to grab files from -* __put_options(dict)__: options to pass to the underlying storage layer +* __put_options(dict)__: optional arguments to pass to the PutObject operation __Returns__ From 541ef1fe57c6bd287cf3d19859123160a97aebac Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" Date: Wed, 20 Nov 2024 17:04:32 -0800 Subject: [PATCH 11/29] Fix typos from code review Co-authored-by: Alexei Mochalov --- api/python/tests/integration/test_packages.py | 2 +- api/python/tests/test_bucket.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/api/python/tests/integration/test_packages.py b/api/python/tests/integration/test_packages.py index eb1adbf83ba..b6d686207b0 100644 --- a/api/python/tests/integration/test_packages.py +++ b/api/python/tests/integration/test_packages.py @@ -450,7 +450,7 @@ def test_fetch_default_dest(tmpdir): copy_mock.assert_called_once_with( PhysicalKey.from_path(filepath), PhysicalKey.from_path('foo.txt'), - put_options=None + put_options=None, ) @patch('quilt3.workflows.validate', mock.MagicMock(return_value=None)) diff --git a/api/python/tests/test_bucket.py b/api/python/tests/test_bucket.py index 0a5dbf42fe4..1b80c99e87c 100644 --- a/api/python/tests/test_bucket.py +++ b/api/python/tests/test_bucket.py @@ -179,7 +179,9 @@ def test_bucket_put_file(self): copy_mock.assert_called_once_with( PhysicalKey.from_path('README'), - PhysicalKey.from_url('s3://test-bucket/README.md'), put_options=opts) + PhysicalKey.from_url('s3://test-bucket/README.md'), + put_options=opts, + ) def test_bucket_put_dir(self): path = pathlib.Path(__file__).parent / 'data' From 8b7cf55a7e7b36235383561efb5d2cf06668468c Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Thu, 26 Dec 2024 12:02:32 -0800 Subject: [PATCH 12/29] **s3_extra_params for all s3_client --- api/python/quilt3/data_transfer.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/api/python/quilt3/data_transfer.py b/api/python/quilt3/data_transfer.py index 36488afb98c..bb36c871cee 100644 --- a/api/python/quilt3/data_transfer.py +++ b/api/python/quilt3/data_transfer.py @@ -303,15 +303,19 @@ def _copy_local_file(ctx: WorkerContext, size: int, src_path: str, dest_path: st def _upload_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, dest_key: str, put_options=None): s3_client = ctx.s3_client_provider.standard_client + s3_extra_params: dict = put_options or {} if not is_mpu(size): with ReadFileChunk.from_filename(src_path, 0, size, [ctx.progress]) as fd: - resp = s3_client.put_object( + s3_params = dict( Body=fd, Bucket=dest_bucket, Key=dest_key, ChecksumAlgorithm='SHA256', ) + if put_options: + s3_params.update(put_options) + resp = s3_client.put_object(**s3_params) version_id = resp.get('VersionId') # Absent in unversioned buckets. checksum = _simple_s3_to_quilt_checksum(resp['ChecksumSHA256']) @@ -321,6 +325,7 @@ def _upload_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, Bucket=dest_bucket, Key=dest_key, ChecksumAlgorithm='SHA256', + **s3_extra_params, ) upload_id = resp['UploadId'] @@ -343,6 +348,7 @@ def upload_part(i, start, end): UploadId=upload_id, PartNumber=part_id, ChecksumAlgorithm='SHA256', + **s3_extra_params, ) with lock: parts[i] = dict( @@ -359,6 +365,7 @@ def upload_part(i, start, end): Key=dest_key, UploadId=upload_id, MultipartUpload={'Parts': parts}, + **s3_extra_params, ) version_id = resp.get('VersionId') # Absent in unversioned buckets. checksum, _ = resp['ChecksumSHA256'].split('-', 1) @@ -450,6 +457,7 @@ def download_part(part_number): def _copy_remote_file(ctx: WorkerContext, size: int, src_bucket: str, src_key: str, src_version: Optional[str], dest_bucket: str, dest_key: str, extra_args: Optional[Iterable[Tuple[str, Any]]] = None): + s3_extra_params: dict = dict(extra_args) if extra_args else {} src_params = dict( Bucket=src_bucket, Key=src_key @@ -469,10 +477,7 @@ def _copy_remote_file(ctx: WorkerContext, size: int, src_bucket: str, src_key: s ChecksumAlgorithm='SHA256', ) - if extra_args: - params.update(extra_args) - - resp = s3_client.copy_object(**params) + resp = s3_client.copy_object(**params, **s3_extra_params) ctx.progress(size) version_id = resp.get('VersionId') # Absent in unversioned buckets. checksum = _simple_s3_to_quilt_checksum(resp['CopyObjectResult']['ChecksumSHA256']) @@ -482,6 +487,7 @@ def _copy_remote_file(ctx: WorkerContext, size: int, src_bucket: str, src_key: s Bucket=dest_bucket, Key=dest_key, ChecksumAlgorithm='SHA256', + **s3_extra_params, ) upload_id = resp['UploadId'] @@ -503,6 +509,7 @@ def upload_part(i, start, end): Key=dest_key, UploadId=upload_id, PartNumber=part_id, + **s3_extra_params, ) with lock: parts[i] = dict( @@ -521,6 +528,7 @@ def upload_part(i, start, end): Key=dest_key, UploadId=upload_id, MultipartUpload={'Parts': parts}, + **s3_extra_params, ) version_id = resp.get('VersionId') # Absent in unversioned buckets. checksum, _ = resp['ChecksumSHA256'].split('-', 1) From d9252542c26aa527fb1d57367458e2215e27452d Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Thu, 26 Dec 2024 15:35:08 -0800 Subject: [PATCH 13/29] add formatting --- api/python/tests/test_bucket.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/python/tests/test_bucket.py b/api/python/tests/test_bucket.py index 1b80c99e87c..0ce1d1483cc 100644 --- a/api/python/tests/test_bucket.py +++ b/api/python/tests/test_bucket.py @@ -174,8 +174,8 @@ def test_bucket_put_file(self): with patch("quilt3.bucket.copy_file") as copy_mock: opts = {'SSECustomerKey': 'FakeKey'} bucket = Bucket('s3://test-bucket') - bucket.put_file(key='README.md', path='./README', put_options=opts) # put local file to bucket + bucket.put_file(key='README.md', path='./README', put_options=opts) copy_mock.assert_called_once_with( PhysicalKey.from_path('README'), From 290b8c3ca7a3a159aed19b4735357a9ccf1a420a Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Thu, 26 Dec 2024 15:49:14 -0800 Subject: [PATCH 14/29] add_put_options_safely checks null and existing keys --- api/python/quilt3/data_transfer.py | 35 +++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/api/python/quilt3/data_transfer.py b/api/python/quilt3/data_transfer.py index bb36c871cee..d70e1acdea7 100644 --- a/api/python/quilt3/data_transfer.py +++ b/api/python/quilt3/data_transfer.py @@ -52,6 +52,16 @@ logger = logging.getLogger(__name__) +def add_put_options_safely(params: dict, put_options: Optional[dict]): + """ + Add put options to the params dictionary safely. + This method ensures that the put options do not overwrite existing keys in the params dictionary. + """ + if put_options: + for key, value in put_options.items(): + if key in params: + raise ValueError(f"Key {key} already exists in params.") + params[key] = value class S3Api(Enum): GET_OBJECT = "GET_OBJECT" @@ -303,7 +313,6 @@ def _copy_local_file(ctx: WorkerContext, size: int, src_path: str, dest_path: st def _upload_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, dest_key: str, put_options=None): s3_client = ctx.s3_client_provider.standard_client - s3_extra_params: dict = put_options or {} if not is_mpu(size): with ReadFileChunk.from_filename(src_path, 0, size, [ctx.progress]) as fd: @@ -313,20 +322,20 @@ def _upload_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, Key=dest_key, ChecksumAlgorithm='SHA256', ) - if put_options: - s3_params.update(put_options) + add_put_options_safely(s3_params, put_options) resp = s3_client.put_object(**s3_params) version_id = resp.get('VersionId') # Absent in unversioned buckets. checksum = _simple_s3_to_quilt_checksum(resp['ChecksumSHA256']) ctx.done(PhysicalKey(dest_bucket, dest_key, version_id), checksum) else: - resp = s3_client.create_multipart_upload( + s3_create_params = dict( Bucket=dest_bucket, Key=dest_key, ChecksumAlgorithm='SHA256', - **s3_extra_params, ) + add_put_options_safely(s3_create_params, put_options) + resp = s3_client.create_multipart_upload(s3_create_params) upload_id = resp['UploadId'] chunksize = get_checksum_chunksize(size) @@ -341,15 +350,16 @@ def upload_part(i, start, end): nonlocal remaining part_id = i + 1 with ReadFileChunk.from_filename(src_path, start, end-start, [ctx.progress]) as fd: - part = s3_client.upload_part( + s3_upload_params = dict( Body=fd, Bucket=dest_bucket, Key=dest_key, UploadId=upload_id, PartNumber=part_id, ChecksumAlgorithm='SHA256', - **s3_extra_params, ) + add_put_options_safely(s3_upload_params, put_options) + part = s3_client.upload_part(s3_upload_params) with lock: parts[i] = dict( PartNumber=part_id, @@ -360,12 +370,18 @@ def upload_part(i, start, end): done = remaining == 0 if done: + s3_complete_params = dict( + Bucket=dest_bucket, + Key=dest_key, + UploadId=upload_id, + MultipartUpload={'Parts': parts}, + ) + add_put_options_safely(s3_complete_params, put_options) resp = s3_client.complete_multipart_upload( Bucket=dest_bucket, Key=dest_key, UploadId=upload_id, MultipartUpload={'Parts': parts}, - **s3_extra_params, ) version_id = resp.get('VersionId') # Absent in unversioned buckets. checksum, _ = resp['ChecksumSHA256'].split('-', 1) @@ -926,8 +942,7 @@ def put_bytes(data: bytes, dest: PhysicalKey, put_options=None): raise ValueError("Cannot set VersionId on destination") s3_client = S3ClientProvider().standard_client s3_params = dict(Bucket=dest.bucket, Key=dest.path, Body=data) - if put_options: - s3_params.update(put_options) + add_put_options_safely(s3_params, put_options) s3_client.put_object(**s3_params) From 17dd533cbea98292c8351e0a98af3935eb2ff58a Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Thu, 26 Dec 2024 15:57:27 -0800 Subject: [PATCH 15/29] call with keyword args --- api/python/quilt3/data_transfer.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/api/python/quilt3/data_transfer.py b/api/python/quilt3/data_transfer.py index d70e1acdea7..c5aebe085e2 100644 --- a/api/python/quilt3/data_transfer.py +++ b/api/python/quilt3/data_transfer.py @@ -52,6 +52,7 @@ logger = logging.getLogger(__name__) + def add_put_options_safely(params: dict, put_options: Optional[dict]): """ Add put options to the params dictionary safely. @@ -63,6 +64,7 @@ def add_put_options_safely(params: dict, put_options: Optional[dict]): raise ValueError(f"Key {key} already exists in params.") params[key] = value + class S3Api(Enum): GET_OBJECT = "GET_OBJECT" HEAD_OBJECT = "HEAD_OBJECT" @@ -335,7 +337,7 @@ def _upload_file(ctx: WorkerContext, size: int, src_path: str, dest_bucket: str, ChecksumAlgorithm='SHA256', ) add_put_options_safely(s3_create_params, put_options) - resp = s3_client.create_multipart_upload(s3_create_params) + resp = s3_client.create_multipart_upload(**s3_create_params) upload_id = resp['UploadId'] chunksize = get_checksum_chunksize(size) @@ -359,7 +361,7 @@ def upload_part(i, start, end): ChecksumAlgorithm='SHA256', ) add_put_options_safely(s3_upload_params, put_options) - part = s3_client.upload_part(s3_upload_params) + part = s3_client.upload_part(**s3_upload_params) with lock: parts[i] = dict( PartNumber=part_id, @@ -473,7 +475,6 @@ def download_part(part_number): def _copy_remote_file(ctx: WorkerContext, size: int, src_bucket: str, src_key: str, src_version: Optional[str], dest_bucket: str, dest_key: str, extra_args: Optional[Iterable[Tuple[str, Any]]] = None): - s3_extra_params: dict = dict(extra_args) if extra_args else {} src_params = dict( Bucket=src_bucket, Key=src_key @@ -492,19 +493,20 @@ def _copy_remote_file(ctx: WorkerContext, size: int, src_bucket: str, src_key: s Key=dest_key, ChecksumAlgorithm='SHA256', ) - - resp = s3_client.copy_object(**params, **s3_extra_params) + add_put_options_safely(params, extra_args) + resp = s3_client.copy_object(**params) ctx.progress(size) version_id = resp.get('VersionId') # Absent in unversioned buckets. checksum = _simple_s3_to_quilt_checksum(resp['CopyObjectResult']['ChecksumSHA256']) ctx.done(PhysicalKey(dest_bucket, dest_key, version_id), checksum) else: - resp = s3_client.create_multipart_upload( + s3_create_params = dict( Bucket=dest_bucket, Key=dest_key, ChecksumAlgorithm='SHA256', - **s3_extra_params, ) + add_put_options_safely(s3_create_params, extra_args) + resp = s3_client.create_multipart_upload(**s3_create_params) upload_id = resp['UploadId'] chunksize = get_checksum_chunksize(size) @@ -518,15 +520,16 @@ def _copy_remote_file(ctx: WorkerContext, size: int, src_bucket: str, src_key: s def upload_part(i, start, end): nonlocal remaining part_id = i + 1 - part = s3_client.upload_part_copy( + s3_upload_params = dict( CopySource=src_params, CopySourceRange=f'bytes={start}-{end-1}', Bucket=dest_bucket, Key=dest_key, UploadId=upload_id, PartNumber=part_id, - **s3_extra_params, ) + add_put_options_safely(s3_upload_params, extra_args) + part = s3_client.upload_part_copy(**s3_upload_params) with lock: parts[i] = dict( PartNumber=part_id, @@ -539,13 +542,14 @@ def upload_part(i, start, end): ctx.progress(end - start) if done: - resp = s3_client.complete_multipart_upload( + s3_complete_params = dict( Bucket=dest_bucket, Key=dest_key, UploadId=upload_id, MultipartUpload={'Parts': parts}, - **s3_extra_params, ) + add_put_options_safely(s3_complete_params, extra_args) + resp = s3_client.complete_multipart_upload(**s3_complete_params) version_id = resp.get('VersionId') # Absent in unversioned buckets. checksum, _ = resp['ChecksumSHA256'].split('-', 1) ctx.done(PhysicalKey(dest_bucket, dest_key, version_id), checksum) From 6f43df3ae5a886f7488be657b92b4381a363b85d Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 10:35:18 -0800 Subject: [PATCH 16/29] test_add_put_options_safely --- api/python/tests/test_data_transfer.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/api/python/tests/test_data_transfer.py b/api/python/tests/test_data_transfer.py index 05a71597a98..f0d2b89f2bc 100644 --- a/api/python/tests/test_data_transfer.py +++ b/api/python/tests/test_data_transfer.py @@ -29,6 +29,25 @@ class DataTransferTest(QuiltTestCase): + def test_add_put_options_safely(self): + OPTIONS_TEMPLATE = {'SSECustomerKey': '123456789'} + + # Test that the function adds the options + options_empty = {} + data_transfer.add_put_options_safely(options_empty, OPTIONS_TEMPLATE) + assert options_empty == OPTIONS_TEMPLATE + + # Test that the function works when passed None + options_unchanged = OPTIONS_TEMPLATE.copy() + data_transfer.add_put_options_safely(options_unchanged, None) + assert options_unchanged == OPTIONS_TEMPLATE + + # Test that the function raises error if modifying the original options + options_original = OPTIONS_TEMPLATE.copy() + options_modified = {'SSECustomerKey': '987654321'} + with pytest.raises(ValueError): + data_transfer.add_put_options_safely(options_original, OPTIONS_TEMPLATE) + def test_select(self): # Note: The boto3 Stubber doesn't work properly with s3_client.select_object_content(). # The return value expects a dict where an iterable is in the actual results. From fcea30556f316309093124f87a053826dc6718e7 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 10:35:33 -0800 Subject: [PATCH 17/29] remove arg types in doc --- api/python/quilt3/api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/python/quilt3/api.py b/api/python/quilt3/api.py index 8e370ddea61..da1b18899b9 100644 --- a/api/python/quilt3/api.py +++ b/api/python/quilt3/api.py @@ -29,9 +29,9 @@ def copy(src, dest, put_options=None): or local file paths (starting with ``file:///``). Parameters: - src (str): a path to retrieve - dest (str): a path to write to - put_options (dict): optional arguments to pass to the PutObject operation + src: a path to retrieve + dest: a path to write to + put_options: optional arguments to pass to the PutObject operation """ copy_file(PhysicalKey.from_url(fix_url(src)), PhysicalKey.from_url(fix_url(dest)), put_options=put_options) From 5d151b1f6bed59f3fbe96f6d2a7f48218d0d4601 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 10:37:35 -0800 Subject: [PATCH 18/29] lint --- api/python/tests/test_data_transfer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/python/tests/test_data_transfer.py b/api/python/tests/test_data_transfer.py index f0d2b89f2bc..69af9fc0785 100644 --- a/api/python/tests/test_data_transfer.py +++ b/api/python/tests/test_data_transfer.py @@ -36,7 +36,7 @@ def test_add_put_options_safely(self): options_empty = {} data_transfer.add_put_options_safely(options_empty, OPTIONS_TEMPLATE) assert options_empty == OPTIONS_TEMPLATE - + # Test that the function works when passed None options_unchanged = OPTIONS_TEMPLATE.copy() data_transfer.add_put_options_safely(options_unchanged, None) From 0f89cd9ab045264caac954d1352ed70199a61b34 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 11:03:00 -0800 Subject: [PATCH 19/29] make tests and error mesage more robust --- api/python/quilt3/data_transfer.py | 2 +- api/python/tests/test_data_transfer.py | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/api/python/quilt3/data_transfer.py b/api/python/quilt3/data_transfer.py index c5aebe085e2..584d5c5fbb6 100644 --- a/api/python/quilt3/data_transfer.py +++ b/api/python/quilt3/data_transfer.py @@ -61,7 +61,7 @@ def add_put_options_safely(params: dict, put_options: Optional[dict]): if put_options: for key, value in put_options.items(): if key in params: - raise ValueError(f"Key {key} already exists in params.") + raise ValueError(f"Cannot override key `{key}` using put_options: {put_options}.") params[key] = value diff --git a/api/python/tests/test_data_transfer.py b/api/python/tests/test_data_transfer.py index 69af9fc0785..8d5a588a595 100644 --- a/api/python/tests/test_data_transfer.py +++ b/api/python/tests/test_data_transfer.py @@ -35,18 +35,20 @@ def test_add_put_options_safely(self): # Test that the function adds the options options_empty = {} data_transfer.add_put_options_safely(options_empty, OPTIONS_TEMPLATE) - assert options_empty == OPTIONS_TEMPLATE + self.assertEqual(options_empty, OPTIONS_TEMPLATE) # Test that the function works when passed None options_unchanged = OPTIONS_TEMPLATE.copy() data_transfer.add_put_options_safely(options_unchanged, None) - assert options_unchanged == OPTIONS_TEMPLATE + self.assertEqual(options_unchanged, OPTIONS_TEMPLATE) - # Test that the function raises error if modifying the original options + # Test that the function raises error if it would modify the original options options_original = OPTIONS_TEMPLATE.copy() options_modified = {'SSECustomerKey': '987654321'} - with pytest.raises(ValueError): - data_transfer.add_put_options_safely(options_original, OPTIONS_TEMPLATE) + with pytest.raises(ValueError, + match="Cannot override key `SSECustomerKey` using put_options:"\ + " {'SSECustomerKey': '987654321'}."): + data_transfer.add_put_options_safely(options_original, options_modified) def test_select(self): # Note: The boto3 Stubber doesn't work properly with s3_client.select_object_content(). From a660015af3214b7273c6f5eb396d13c06559f5bb Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 11:04:58 -0800 Subject: [PATCH 20/29] lint --- api/python/tests/test_data_transfer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/python/tests/test_data_transfer.py b/api/python/tests/test_data_transfer.py index 8d5a588a595..27677e4ece0 100644 --- a/api/python/tests/test_data_transfer.py +++ b/api/python/tests/test_data_transfer.py @@ -46,8 +46,8 @@ def test_add_put_options_safely(self): options_original = OPTIONS_TEMPLATE.copy() options_modified = {'SSECustomerKey': '987654321'} with pytest.raises(ValueError, - match="Cannot override key `SSECustomerKey` using put_options:"\ - " {'SSECustomerKey': '987654321'}."): + match="Cannot override key `SSECustomerKey` using put_options:" + " {'SSECustomerKey': '987654321'}."): data_transfer.add_put_options_safely(options_original, options_modified) def test_select(self): From ec0ae9c8fdcfebfe3ffacc97f5b44f29c9cbdbb9 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 11:39:09 -0800 Subject: [PATCH 21/29] pass along put_options --- api/python/quilt3/data_transfer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/python/quilt3/data_transfer.py b/api/python/quilt3/data_transfer.py index 584d5c5fbb6..d5ca59596c7 100644 --- a/api/python/quilt3/data_transfer.py +++ b/api/python/quilt3/data_transfer.py @@ -895,7 +895,7 @@ def copy_file_list(file_list, message=None, callback=None, put_options=None): if _looks_like_dir(src) or _looks_like_dir(dest): raise ValueError("Directories are not allowed") - return _copy_file_list_internal(file_list, [None] * len(file_list), message, callback, put_options=None) + return _copy_file_list_internal(file_list, [None] * len(file_list), message, callback, put_options=put_options) def copy_file(src: PhysicalKey, dest: PhysicalKey, size=None, message=None, callback=None, put_options=None): @@ -930,7 +930,7 @@ def sanity_check(rel_path): src = PhysicalKey(src.bucket, src.path, version_id) url_list.append((src, dest, size)) - _copy_file_list_internal(url_list, [None] * len(url_list), message, callback, put_options=None) + _copy_file_list_internal(url_list, [None] * len(url_list), message, callback, put_options=put_options) def put_bytes(data: bytes, dest: PhysicalKey, put_options=None): From b1fd866bb60a8e84f737210854ad121d912b5fcc Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 15:08:27 -0800 Subject: [PATCH 22/29] wrap push_manifest --- api/python/quilt3/backends/base.py | 17 +++++++------- api/python/quilt3/packages.py | 7 +++--- api/python/tests/integration/test_packages.py | 23 ++++++++++++------- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/api/python/quilt3/backends/base.py b/api/python/quilt3/backends/base.py index ff8a065deec..dfc5548d055 100644 --- a/api/python/quilt3/backends/base.py +++ b/api/python/quilt3/backends/base.py @@ -87,7 +87,8 @@ def delete_package_version(self, pkg_name: str, top_hash: str): pass @abc.abstractmethod - def push_manifest(self, pkg_name: str, top_hash: str, manifest_data: bytes): + def push_manifest(self, pkg_name: str, top_hash: str, manifest_data: bytes, + put_options: dict = None): pass @abc.abstractmethod @@ -134,14 +135,14 @@ def manifests_package_dir(self, pkg_name: str) -> PhysicalKey: def manifest_pk(self, pkg_name: str, top_hash: str) -> PhysicalKey: return self.root.join(f'packages/{top_hash}') - def push_manifest(self, pkg_name: str, top_hash: str, manifest_data: bytes): + def push_manifest(self, pkg_name: str, top_hash: str, manifest_data: bytes, put_options: dict = None): """returns: timestamp to support catalog drag-and-drop => browse""" - put_bytes(manifest_data, self.manifest_pk(pkg_name, top_hash)) + put_bytes(manifest_data, self.manifest_pk(pkg_name, top_hash), put_options=put_options) hash_bytes = top_hash.encode() # TODO: use a float to string formatter instead of double casting timestamp_str = str(int(time.time())) - put_bytes(hash_bytes, self.pointer_pk(pkg_name, timestamp_str)) - put_bytes(hash_bytes, self.pointer_latest_pk(pkg_name)) + put_bytes(hash_bytes, self.pointer_pk(pkg_name, timestamp_str), put_options=put_options) + put_bytes(hash_bytes, self.pointer_latest_pk(pkg_name), put_options=put_options) return timestamp_str @staticmethod @@ -246,9 +247,9 @@ def list_package_versions(self, pkg_name: str): for dt, top_hash in self.list_package_versions_with_timestamps(pkg_name): yield str(int(dt.timestamp())), top_hash - def push_manifest(self, pkg_name: str, top_hash: str, manifest_data: bytes): - put_bytes(manifest_data, self.manifest_pk(pkg_name, top_hash)) - put_bytes(top_hash.encode(), self.pointer_latest_pk(pkg_name)) + def push_manifest(self, pkg_name: str, top_hash: str, manifest_data: bytes, put_options: dict = None): + put_bytes(manifest_data, self.manifest_pk(pkg_name, top_hash), put_options=put_options) + put_bytes(top_hash.encode(), self.pointer_latest_pk(pkg_name), put_options=put_options) @staticmethod def _top_hash_from_path(path: str) -> str: diff --git a/api/python/quilt3/packages.py b/api/python/quilt3/packages.py index 647db51600c..b44623d3854 100644 --- a/api/python/quilt3/packages.py +++ b/api/python/quilt3/packages.py @@ -1064,6 +1064,7 @@ def build(self, name, registry=None, message=None, *, workflow=...): registry: registry to build to defaults to local registry message: the commit message of the package + put_options: optional arguments to pass to the PutObject operation %(workflow)s Returns: @@ -1084,10 +1085,10 @@ def _build(self, name, registry, message): self._push_manifest(name, registry, top_hash) return top_hash - def _push_manifest(self, name, registry, top_hash): + def _push_manifest(self, name, registry, top_hash, put_options=None): manifest = io.BytesIO() self._dump(manifest) - registry.push_manifest(name, top_hash, manifest.getvalue()) + registry.push_manifest(name, top_hash, manifest.getvalue(), put_options=put_options) @ApiTelemetry("package.dump") def dump(self, writable_file): @@ -1583,7 +1584,7 @@ def physical_key_is_temp_file(pk): latest_hash = get_latest_hash() check_hash_conficts(latest_hash) - pkg._push_manifest(name, registry, top_hash) + pkg._push_manifest(name, registry, top_hash, put_options=put_options) if print_info: shorthash = registry.shorten_top_hash(name, top_hash) diff --git a/api/python/tests/integration/test_packages.py b/api/python/tests/integration/test_packages.py index 58ebd7169c4..97cd31f0fd0 100644 --- a/api/python/tests/integration/test_packages.py +++ b/api/python/tests/integration/test_packages.py @@ -1260,6 +1260,7 @@ def test_commit_message_on_push(self, mocked_workflow_validate): 'Quilt/test_pkg_name', registry, mock.sentinel.top_hash, + put_options=None, ) mocked_workflow_validate.assert_called_once_with( registry=registry, @@ -1923,10 +1924,11 @@ def test_push_dest_fn(self): ) push_manifest_mock = self.patch_s3_registry('push_manifest') self.patch_s3_registry('shorten_top_hash', return_value='7a67ff4') - pkg.push(pkg_name, registry='s3://test-bucket', dest=dest_fn, force=True) + pkg.push(pkg_name, registry='s3://test-bucket', dest=dest_fn, force=True, +) dest_fn.assert_called_once_with(lk, pkg[lk]) - push_manifest_mock.assert_called_once_with(pkg_name, mock.sentinel.top_hash, ANY) + push_manifest_mock.assert_called_once_with(pkg_name, mock.sentinel.top_hash, ANY, put_options=None) assert Package.load( BytesIO(push_manifest_mock.call_args[0][2]) )[lk].physical_key == PhysicalKey(dest_bucket, dest_key, version) @@ -1952,7 +1954,7 @@ def test_push_selector_fn_false(self): selector_fn.assert_called_once_with(lk, pkg[lk]) calculate_checksum_mock.assert_called_once_with([PhysicalKey(src_bucket, src_key, src_version)], [0]) - push_manifest_mock.assert_called_once_with(pkg_name, mock.sentinel.top_hash, ANY) + push_manifest_mock.assert_called_once_with(pkg_name, mock.sentinel.top_hash, ANY, put_options=None) assert Package.load( BytesIO(push_manifest_mock.call_args[0][2]) )[lk].physical_key == PhysicalKey(src_bucket, src_key, src_version) @@ -1999,7 +2001,7 @@ def test_push_selector_fn_true(self): selector_fn.assert_called_once_with(lk, pkg[lk]) calculate_checksum_mock.assert_called_once_with([], []) - push_manifest_mock.assert_called_once_with(pkg_name, mock.sentinel.top_hash, ANY) + push_manifest_mock.assert_called_once_with(pkg_name, mock.sentinel.top_hash, ANY, put_options=None) assert Package.load( BytesIO(push_manifest_mock.call_args[0][2]) )[lk].physical_key == PhysicalKey(dst_bucket, dst_key, dst_version) @@ -2047,10 +2049,15 @@ class PackageTestV2(PackageTest): def local_manifest_timestamp_fixer(self, timestamp): wrapped = self.LocalPackageRegistryDefault.push_manifest - def wrapper(pkg_registry, pkg_name, top_hash, manifest_data): - wrapped(pkg_registry, pkg_name, top_hash, manifest_data) - os.utime(pkg_registry._manifest_parent_pk(pkg_name, top_hash).path, (timestamp, timestamp)) - return patch.object(self.LocalPackageRegistryDefault, 'push_manifest', wrapper) + def wrapper(pkg_registry, pkg_name, top_hash, manifest_data, put_options=None): + wrapped(pkg_registry, pkg_name, top_hash, manifest_data, put_options=put_options) + os.utime( + pkg_registry._manifest_parent_pk(pkg_name, top_hash).path, + (timestamp, timestamp) + ) + return patch.object( + self.LocalPackageRegistryDefault, 'push_manifest', wrapper + ) def _test_list_remote_packages_setup_stubber(self, pkg_registry, *, pkg_names): self.s3_stubber.add_response( From c2a341da434ebaa2da906901cb5363976d918bc8 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 15:10:48 -0800 Subject: [PATCH 23/29] add put_options to build --- api/python/quilt3/packages.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/api/python/quilt3/packages.py b/api/python/quilt3/packages.py index b44623d3854..ef48a30f11b 100644 --- a/api/python/quilt3/packages.py +++ b/api/python/quilt3/packages.py @@ -1055,7 +1055,7 @@ def _validate_with_workflow(self, *, registry, workflow, name, message): @ApiTelemetry("package.build") @_fix_docstring(workflow=_WORKFLOW_PARAM_DOCSTRING) - def build(self, name, registry=None, message=None, *, workflow=...): + def build(self, name, registry=None, message=None, *, workflow=..., put_options=None): """ Serializes this package to a registry. @@ -1064,17 +1064,17 @@ def build(self, name, registry=None, message=None, *, workflow=...): registry: registry to build to defaults to local registry message: the commit message of the package - put_options: optional arguments to pass to the PutObject operation %(workflow)s + put_options: optional arguments to pass to the PutObject operation Returns: The top hash as a string. """ registry = get_package_registry(registry) self._validate_with_workflow(registry=registry, workflow=workflow, name=name, message=message) - return self._build(name=name, registry=registry, message=message) + return self._build(name=name, registry=registry, message=message, put_options=put_options) - def _build(self, name, registry, message): + def _build(self, name, registry, message, put_options=None): validate_package_name(name) registry = get_package_registry(registry) @@ -1082,7 +1082,7 @@ def _build(self, name, registry, message): self._calculate_missing_hashes() top_hash = self.top_hash - self._push_manifest(name, registry, top_hash) + self._push_manifest(name, registry, top_hash, put_options=put_options) return top_hash def _push_manifest(self, name, registry, top_hash, put_options=None): From cf019c09316a15e29a2d08d9ec4b02554a487e6c Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 15:14:46 -0800 Subject: [PATCH 24/29] _mock_package_build --- lambdas/pkgpush/tests/test_index.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/lambdas/pkgpush/tests/test_index.py b/lambdas/pkgpush/tests/test_index.py index 7f375999408..58b93295089 100644 --- a/lambdas/pkgpush/tests/test_index.py +++ b/lambdas/pkgpush/tests/test_index.py @@ -587,9 +587,11 @@ def make_request(self, params, **kwargs): ) @contextlib.contextmanager - def _mock_package_build(self, entries, *, message=..., expected_workflow=...): + def _mock_package_build(self, entries, *, message=..., expected_workflow=..., put_options=None): if message is ...: message = self.dst_commit_message + if put_options is None: + put_options = {} # Use a test package to verify manifest entries test_pkg = Package() @@ -614,11 +616,6 @@ def _mock_package_build(self, entries, *, message=..., expected_workflow=...): self.s3_stubber.add_response( 'put_object', service_response={}, - expected_params={ - 'Body': manifest.read(), - 'Bucket': self.dst_bucket, - 'Key': f'.quilt/packages/{test_pkg.top_hash}', - }, ) self.s3_stubber.add_response( 'put_object', @@ -627,15 +624,17 @@ def _mock_package_build(self, entries, *, message=..., expected_workflow=...): 'Body': str.encode(test_pkg.top_hash), 'Bucket': self.dst_bucket, 'Key': f'.quilt/named_packages/{self.dst_pkg_name}/{str(int(self.mock_timestamp))}', + **put_options, }, ) self.s3_stubber.add_response( - 'put_object', + "put_object", service_response={}, expected_params={ - 'Body': str.encode(test_pkg.top_hash), - 'Bucket': self.dst_bucket, - 'Key': f'.quilt/named_packages/{self.dst_pkg_name}/latest', + "Body": str.encode(test_pkg.top_hash), + "Bucket": self.dst_bucket, + "Key": f".quilt/named_packages/{self.dst_pkg_name}/latest", + **put_options, }, ) with mock.patch('quilt3.workflows.validate', return_value=mocked_workflow_data) as workflow_validate_mock: From 55cfa7609895da39821233935e74870df51d3c0b Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 15:16:48 -0800 Subject: [PATCH 25/29] lint --- api/python/tests/integration/test_packages.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/api/python/tests/integration/test_packages.py b/api/python/tests/integration/test_packages.py index 97cd31f0fd0..e2e236ebbed 100644 --- a/api/python/tests/integration/test_packages.py +++ b/api/python/tests/integration/test_packages.py @@ -1924,8 +1924,7 @@ def test_push_dest_fn(self): ) push_manifest_mock = self.patch_s3_registry('push_manifest') self.patch_s3_registry('shorten_top_hash', return_value='7a67ff4') - pkg.push(pkg_name, registry='s3://test-bucket', dest=dest_fn, force=True, -) + pkg.push(pkg_name, registry='s3://test-bucket', dest=dest_fn, force=True) dest_fn.assert_called_once_with(lk, pkg[lk]) push_manifest_mock.assert_called_once_with(pkg_name, mock.sentinel.top_hash, ANY, put_options=None) From 98036c1bae6825d11bb1acb9f2405fa446a8ab7d Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 15:22:17 -0800 Subject: [PATCH 26/29] Package.build gendoc --- docs/api-reference/Package.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/api-reference/Package.md b/docs/api-reference/Package.md index 0c1f8f94b17..6ef8f02e914 100644 --- a/docs/api-reference/Package.md +++ b/docs/api-reference/Package.md @@ -194,7 +194,7 @@ no such entry exists. Sets user metadata on this Package. -## Package.build(self, name, registry=None, message=None, \*, workflow=Ellipsis) {#Package.build} +## Package.build(self, name, registry=None, message=None, \*, workflow=Ellipsis, put\_options=None) {#Package.build} Serializes this package to a registry. @@ -208,6 +208,8 @@ __Arguments__ If not specified, the default workflow will be used. * __For details see__: https://docs.quiltdata.com/advanced-usage/workflows +* __put_options__: optional arguments to pass to the PutObject operation + __Returns__ From 6d8aad3bd60d5df96e1b77363a75babe58bcec18 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 15:41:01 -0800 Subject: [PATCH 27/29] gendocs lint --- docs/api-reference/Package.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/api-reference/Package.md b/docs/api-reference/Package.md index 6ef8f02e914..ef46ac4012f 100644 --- a/docs/api-reference/Package.md +++ b/docs/api-reference/Package.md @@ -210,7 +210,6 @@ __Arguments__ * __put_options__: optional arguments to pass to the PutObject operation - __Returns__ The top hash as a string. From 540b4468e19740b1d7d3d3fa777fb9663798e483 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 15:46:48 -0800 Subject: [PATCH 28/29] Package.fetch is always local --- api/python/quilt3/packages.py | 5 ++--- docs/api-reference/Package.md | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/api/python/quilt3/packages.py b/api/python/quilt3/packages.py index ef48a30f11b..3360d8f3b04 100644 --- a/api/python/quilt3/packages.py +++ b/api/python/quilt3/packages.py @@ -701,14 +701,13 @@ def __getitem__(self, logical_key): return pkg @ApiTelemetry("package.fetch") - def fetch(self, dest='./', put_options=None): + def fetch(self, dest='./'): """ Copy all descendants to `dest`. Descendants are written under their logical names _relative_ to self. Args: dest: where to put the files (locally) - put_options: optional arguments to pass to the PutObject operation Returns: A new Package object with entries from self, but with physical keys @@ -729,7 +728,7 @@ def fetch(self, dest='./', put_options=None): new_entry = entry.with_physical_key(new_physical_key) pkg._set(logical_key, new_entry) - copy_file_list(file_list, message="Copying objects", put_options=put_options) + copy_file_list(file_list, message="Copying objects") return pkg diff --git a/docs/api-reference/Package.md b/docs/api-reference/Package.md index ef46ac4012f..c9d4d1bd335 100644 --- a/docs/api-reference/Package.md +++ b/docs/api-reference/Package.md @@ -86,7 +86,7 @@ PackageEntry if prefix matches a logical_key exactly otherwise Package -## Package.fetch(self, dest='./', put\_options=None) {#Package.fetch} +## Package.fetch(self, dest='./') {#Package.fetch} Copy all descendants to `dest`. Descendants are written under their logical names _relative_ to self. @@ -94,7 +94,6 @@ names _relative_ to self. __Arguments__ * __dest__: where to put the files (locally) -* __put_options__: optional arguments to pass to the PutObject operation __Returns__ From cd62b516e85616ce5b305ef9303dccaefee942d9 Mon Sep 17 00:00:00 2001 From: "Dr. Ernie Prabhakar" <19791+drernie@users.noreply.github.com> Date: Fri, 27 Dec 2024 15:49:23 -0800 Subject: [PATCH 29/29] PackageEntry.fetch --- api/python/quilt3/packages.py | 2 +- docs/api-reference/Package.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/python/quilt3/packages.py b/api/python/quilt3/packages.py index 3360d8f3b04..c6ab5ff0c5b 100644 --- a/api/python/quilt3/packages.py +++ b/api/python/quilt3/packages.py @@ -356,7 +356,7 @@ def fetch(self, dest=None, put_options=None): Gets objects from entry and saves them to dest. Args: - dest: where to put the files + dest: url for where to put the files Defaults to the entry name put_options: optional arguments to pass to the PutObject operation diff --git a/docs/api-reference/Package.md b/docs/api-reference/Package.md index c9d4d1bd335..843714db535 100644 --- a/docs/api-reference/Package.md +++ b/docs/api-reference/Package.md @@ -515,7 +515,7 @@ Gets objects from entry and saves them to dest. __Arguments__ -* __dest__: where to put the files +* __dest__: url for where to put the files Defaults to the entry name * __put_options__: optional arguments to pass to the PutObject operation