From 270cb0014d279a29c2ba0230b27d3844f4ca61be Mon Sep 17 00:00:00 2001 From: nl_0 Date: Fri, 23 Feb 2024 09:45:46 +0100 Subject: [PATCH] consistently handle empty input --- .../s3hash/src/t4_lambda_s3hash/__init__.py | 11 ++++++++++ lambdas/s3hash/tests/test_compute_checksum.py | 15 +++++++++++++ .../tests/test_get_compliant_checksum.py | 22 +++++++++++++++++-- 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/lambdas/s3hash/src/t4_lambda_s3hash/__init__.py b/lambdas/s3hash/src/t4_lambda_s3hash/__init__.py index 5efab213f60..dd8251c877a 100644 --- a/lambdas/s3hash/src/t4_lambda_s3hash/__init__.py +++ b/lambdas/s3hash/src/t4_lambda_s3hash/__init__.py @@ -81,6 +81,12 @@ def modern(cls, value: bytes): def for_parts(cls, checksums: T.Sequence[bytes]): return cls.modern(hash_parts(checksums)) + _EMPTY_HASH = hashlib.sha256().digest() + + @classmethod + def empty(cls): + return cls.modern(cls._EMPTY_HASH) if MODERN_CHECKSUMS else cls.legacy(cls._EMPTY_HASH) + # 8 MiB -- boto3 default: # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.TransferConfig @@ -122,6 +128,9 @@ async def get_obj_attributes(location: S3ObjectSource) -> T.Optional[GetObjectAt def get_compliant_checksum(attrs: GetObjectAttributesOutputTypeDef) -> T.Optional[Checksum]: + if attrs["ObjectSize"] == 0: + return Checksum.empty() + checksum_value = attrs.get("Checksum", {}).get("ChecksumSHA256") if checksum_value is None: return None @@ -319,6 +328,8 @@ async def compute_checksum(location: S3ObjectSource) -> ChecksumResult: else: resp = await S3.get().head_object(**location.boto_args) etag, total_size = resp["ETag"], resp["ContentLength"] + if total_size == 0: + return ChecksumResult(checksum=Checksum.empty()) if not MODERN_CHECKSUMS and total_size > MAX_PART_SIZE: checksum = await compute_checksum_legacy(location) diff --git a/lambdas/s3hash/tests/test_compute_checksum.py b/lambdas/s3hash/tests/test_compute_checksum.py index 222d123115d..13afb7cdd59 100644 --- a/lambdas/s3hash/tests/test_compute_checksum.py +++ b/lambdas/s3hash/tests/test_compute_checksum.py @@ -92,6 +92,21 @@ async def test_compliant(s3_stub: Stubber): assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.modern(base64.b64decode(checksum_hash))) +async def test_empty(s3_stub: Stubber): + s3_stub.add_response( + "get_object_attributes", + { + "Checksum": {"ChecksumSHA256": "doesnt matter"}, + "ObjectSize": 0, + }, + EXPECTED_GETATTR_PARAMS, + ) + + res = await s3hash.compute_checksum(LOC) + + assert res == s3hash.ChecksumResult(checksum=s3hash.Checksum.empty()) + + async def test_legacy(s3_stub: Stubber, mocker: MockerFixture): s3_stub.add_client_error( "get_object_attributes", diff --git a/lambdas/s3hash/tests/test_get_compliant_checksum.py b/lambdas/s3hash/tests/test_get_compliant_checksum.py index 7d984721ce2..70fd43790d5 100644 --- a/lambdas/s3hash/tests/test_get_compliant_checksum.py +++ b/lambdas/s3hash/tests/test_get_compliant_checksum.py @@ -9,8 +9,13 @@ @pytest.mark.parametrize( "obj_attrs", [ - {}, - {"Checksum": {"ChecksumSHA1": "X94czmA+ZWbSDagRyci8zLBE1K4="}}, + { + "ObjectSize": 1, + }, + { + "ObjectSize": 1, + "Checksum": {"ChecksumSHA1": "X94czmA+ZWbSDagRyci8zLBE1K4="}, + }, ], ) def test_no_sha256(obj_attrs): @@ -20,6 +25,19 @@ def test_no_sha256(obj_attrs): @pytest.mark.parametrize( "obj_attrs, legacy, modern", [ + ( + {"ObjectSize": 0}, + Checksum.legacy(base64.b64decode("47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=")), + Checksum.modern(base64.b64decode("47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=")), + ), + ( + { + "Checksum": {"ChecksumSHA256": "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="}, + "ObjectSize": 0, + }, + Checksum.legacy(base64.b64decode("47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=")), + Checksum.modern(base64.b64decode("47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=")), + ), ( { "Checksum": {"ChecksumSHA256": "MOFJVevxNSJm3C/4Bn5oEEYH51CrudOzZYK4r5Cfy1g="},