diff --git a/app/aws/s3.py b/app/aws/s3.py index 2e8b283904..c55ca61997 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -60,10 +60,13 @@ def get_job_metadata_from_s3(service_id, job_id): return obj.get()["Metadata"] -def remove_jobs_from_s3(jobs): +# AWS has a limit of 1000 objects per delete_objects call +def remove_jobs_from_s3(jobs, batch_size=1000): bucket = resource("s3").Bucket(current_app.config["CSV_UPLOAD_BUCKET_NAME"]) - object_keys = [FILE_LOCATION_STRUCTURE.format(job.service_id, job.id) for job in jobs] - bucket.delete_objects(Delete={"Objects": [{"Key": key} for key in object_keys]}) + + for start in range(0, len(jobs), batch_size): + object_keys = [FILE_LOCATION_STRUCTURE.format(job.service_id, job.id) for job in jobs[start:start + batch_size]] + bucket.delete_objects(Delete={"Objects": [{"Key": key} for key in object_keys]}) def get_s3_bucket_objects(bucket_name, subfolder="", older_than=7, limit_days=2): diff --git a/tests/app/aws/test_s3.py b/tests/app/aws/test_s3.py index bae56c3f45..bf3527aa80 100644 --- a/tests/app/aws/test_s3.py +++ b/tests/app/aws/test_s3.py @@ -1,6 +1,6 @@ import uuid from datetime import datetime, timedelta -from unittest.mock import call +from unittest.mock import call, Mock import pytest import pytz @@ -14,6 +14,7 @@ get_s3_file, remove_transformed_dvla_file, upload_job_to_s3, + remove_jobs_from_s3, ) from tests.app.conftest import datetime_in_past @@ -214,3 +215,23 @@ def test_upload_job_to_s3(notify_api, mocker): bucket_name=current_app.config["CSV_UPLOAD_BUCKET_NAME"], file_location=f"service-{service_id}-notify/{upload_id}.csv", ) + +def test_remove_jobs_from_s3(notify_api, mocker): + mock = Mock() + mocker.patch("app.aws.s3.resource", return_value=mock) + jobs = [ + type("Job", (object,), {"service_id": "foo", "id": "j1"}), + type("Job", (object,), {"service_id": "foo", "id": "j2"}), + type("Job", (object,), {"service_id": "foo", "id": "j3"}), + type("Job", (object,), {"service_id": "foo", "id": "j4"}), + type("Job", (object,), {"service_id": "foo", "id": "j5"}), + ] + + remove_jobs_from_s3(jobs, batch_size=2) + + mock.assert_has_calls([ + call.Bucket(current_app.config["CSV_UPLOAD_BUCKET_NAME"]), + call.Bucket().delete_objects(Delete={"Objects": [{"Key": "service-foo-notify/j1.csv"}, {"Key": "service-foo-notify/j2.csv"}]}), + call.Bucket().delete_objects(Delete={"Objects": [{"Key": "service-foo-notify/j3.csv"}, {"Key": "service-foo-notify/j4.csv"}]}), + call.Bucket().delete_objects(Delete={"Objects": [{"Key": "service-foo-notify/j5.csv"}]}), + ])