Skip to content

Commit

Permalink
delete from s3 in subbatches
Browse files Browse the repository at this point in the history
  • Loading branch information
sastels committed Mar 19, 2024
1 parent 6ca0604 commit 54ca882
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 4 deletions.
9 changes: 6 additions & 3 deletions app/aws/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,13 @@ def get_job_metadata_from_s3(service_id, job_id):
return obj.get()["Metadata"]


def remove_jobs_from_s3(jobs):
# AWS has a limit of 1000 objects per delete_objects call
def remove_jobs_from_s3(jobs, batch_size=1000):
bucket = resource("s3").Bucket(current_app.config["CSV_UPLOAD_BUCKET_NAME"])
object_keys = [FILE_LOCATION_STRUCTURE.format(job.service_id, job.id) for job in jobs]
bucket.delete_objects(Delete={"Objects": [{"Key": key} for key in object_keys]})

for start in range(0, len(jobs), batch_size):
object_keys = [FILE_LOCATION_STRUCTURE.format(job.service_id, job.id) for job in jobs[start:start + batch_size]]
bucket.delete_objects(Delete={"Objects": [{"Key": key} for key in object_keys]})


def get_s3_bucket_objects(bucket_name, subfolder="", older_than=7, limit_days=2):
Expand Down
23 changes: 22 additions & 1 deletion tests/app/aws/test_s3.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import uuid
from datetime import datetime, timedelta
from unittest.mock import call
from unittest.mock import call, Mock

import pytest
import pytz
Expand All @@ -14,6 +14,7 @@
get_s3_file,
remove_transformed_dvla_file,
upload_job_to_s3,
remove_jobs_from_s3,
)
from tests.app.conftest import datetime_in_past

Expand Down Expand Up @@ -214,3 +215,23 @@ def test_upload_job_to_s3(notify_api, mocker):
bucket_name=current_app.config["CSV_UPLOAD_BUCKET_NAME"],
file_location=f"service-{service_id}-notify/{upload_id}.csv",
)

def test_remove_jobs_from_s3(notify_api, mocker):
mock = Mock()
mocker.patch("app.aws.s3.resource", return_value=mock)
jobs = [
type("Job", (object,), {"service_id": "foo", "id": "j1"}),
type("Job", (object,), {"service_id": "foo", "id": "j2"}),
type("Job", (object,), {"service_id": "foo", "id": "j3"}),
type("Job", (object,), {"service_id": "foo", "id": "j4"}),
type("Job", (object,), {"service_id": "foo", "id": "j5"}),
]

remove_jobs_from_s3(jobs, batch_size=2)

mock.assert_has_calls([
call.Bucket(current_app.config["CSV_UPLOAD_BUCKET_NAME"]),
call.Bucket().delete_objects(Delete={"Objects": [{"Key": "service-foo-notify/j1.csv"}, {"Key": "service-foo-notify/j2.csv"}]}),
call.Bucket().delete_objects(Delete={"Objects": [{"Key": "service-foo-notify/j3.csv"}, {"Key": "service-foo-notify/j4.csv"}]}),
call.Bucket().delete_objects(Delete={"Objects": [{"Key": "service-foo-notify/j5.csv"}]}),
])

0 comments on commit 54ca882

Please sign in to comment.