From 3bb70e8a7757574096b01f2e962bd702c92b1a75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20M=C3=BCller?= Date: Thu, 7 Mar 2024 09:47:19 +0100 Subject: [PATCH] feat: add convenience method `get_s3_attributes_for_artifact` --- CHANGELOG | 2 ++ dcor_shared/s3cc.py | 75 +++++++++++++++++++++++++++++++++++++++++++++ tests/test_s3cc.py | 19 ++++++++++++ 3 files changed, 96 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 0cfc9c0..8c01d52 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,5 @@ +0.9.1 + - feat: add convenience method `get_s3_attributes_for_artifact` 0.9.0 - feat: introduce `testing.make_dataset_via_s3` and `testing.make_resource_via_s3` which uploads resources via S3 diff --git a/dcor_shared/s3cc.py b/dcor_shared/s3cc.py index 455960b..baa6263 100644 --- a/dcor_shared/s3cc.py +++ b/dcor_shared/s3cc.py @@ -61,6 +61,81 @@ def create_presigned_url( filename=filename) +def get_s3_attributes_for_artifact( + resource_id: str, + artifact: Literal["condensed", "preview", "resource"] = "resource"): + """Return all attribute for an artifact in the S3 object store + + Returns + ------- + meta: dict + Metadata dictionary with the keys "etag", "server", "size", + and "success". + """ + bucket_name, object_name = get_s3_bucket_object_for_artifact( + resource_id=resource_id, artifact=artifact) + s3_client, _, _ = s3.get_s3() + attr_info = s3_client.head_object(Bucket=bucket_name, Key=object_name) + # Example output from MinIO:: + # + # {'AcceptRanges': 'bytes', + # 'ContentLength': 904729, + # 'ContentType': 'application/octet-stream', + # 'ETag': '"108d47e80f3e5f35110493b1fdcd30d5"', + # 'LastModified': datetime.datetime(2024, 3, 7, 8, 15, + # tzinfo=tzutc()), + # 'Metadata': {}, + # 'ResponseMetadata': { + # 'HTTPHeaders': { + # 'accept-ranges': 'bytes', + # 'content-length': '904729', + # 'content-type': 'application/octet-stream', + # 'date': 'Thu, 07 Mar 2024 08:15:02 GMT', + # 'etag': '"108d47e80f3e5f35110493b1fdcd30d5"', + # 'last-modified': 'Thu, 07 Mar 2024 ' + # '08:15:00 GMT', + # 'server': 'MinIO', + # 'strict-transport-security': 'max-age=31536000; ' + # 'includeSubDomains', + # 'vary': 'Origin, Accept-Encoding', + # 'x-amz-id-2': 'dd9025bab4ad464b049177c95eb6e...', + # 'x-amz-request-id': '17BA6D680CB67A2C', + # 'x-amz-tagging-count': '1', + # 'x-content-type-options': 'nosniff', + # 'x-xss-protection': '1; mode=block'}, + # 'HTTPStatusCode': 200, + # 'HostId': 'dd9025bab4ad464b049177c95eb6ebf3...', + # 'RequestId': '17BA6D680CB67A2C', + # 'RetryAttempts': 0} + # } + meta = {} + for key, funcs in [ + ("etag", [lambda m: m.get("ETag"), + lambda m: m.get("ResponseMetadata", + {}).get("HTTPHeaders", + {}).get("etag"), + ]), + ("server", [lambda m: m.get("ResponseMetadata", + {}).get("HTTPHeaders", + {}).get("server", "unknown") + ]), + ("size", [lambda m: m.get("ContentLength"), + lambda m: m.get("ResponseMetadata", + {}).get("HTTPHeaders", + {}).get("content-length"), + ]), + ("success", [lambda m: m.get("ResponseMetadata", + {}).get("HTTPStatusCode", 404) == 200 + ]), + ]: + for fn in funcs: + val = fn(attr_info) + if val is not None: + meta[key] = val + break + return meta + + def get_s3_bucket_object_for_artifact( resource_id: str, artifact: Literal["condensed", "preview", "resource"] = "resource"): diff --git a/tests/test_s3cc.py b/tests/test_s3cc.py index 9cd5189..5ff1e76 100644 --- a/tests/test_s3cc.py +++ b/tests/test_s3cc.py @@ -115,6 +115,25 @@ def test_create_presigned_url(enqueue_job_mock, tmp_path): "490efdf5d9bb4cd4b2a6bcf2fe54d4dc201c38530140bcb168980bf8bf846c73" +@pytest.mark.ckan_config('ckan.plugins', 'dcor_schemas') +@pytest.mark.usefixtures('clean_db', 'with_request_context') +@mock.patch('ckan.plugins.toolkit.enqueue_job', + side_effect=synchronous_enqueue_job) +def test_get_s3_attributes_for_artifact(enqueue_job_mock): + rid, _, _, org_dict = setup_s3_resource_on_ckan() + + # Make sure the resource exists + res_dict = helpers.call_action("resource_show", id=rid) + assert res_dict["id"] == rid, "sanity check" + + # get the size + meta = s3cc.get_s3_attributes_for_artifact(rid) + assert meta["size"] == 904729 + assert meta["success"] + assert meta["etag"] + assert meta["server"] + + @pytest.mark.ckan_config('ckan.plugins', 'dcor_schemas') @pytest.mark.usefixtures('clean_db', 'with_request_context') @mock.patch('ckan.plugins.toolkit.enqueue_job',