Skip to content

Commit

Permalink
delete from s3 in subbatches
Browse files Browse the repository at this point in the history
  • Loading branch information
sastels committed Mar 19, 2024
1 parent 6ca0604 commit 4dbdc62
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 4 deletions.
9 changes: 6 additions & 3 deletions app/aws/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,13 @@ def get_job_metadata_from_s3(service_id, job_id):
return obj.get()["Metadata"]


def remove_jobs_from_s3(jobs):
# AWS has a limit of 1000 objects per delete_objects call
def remove_jobs_from_s3(jobs, batch_size=1000):
bucket = resource("s3").Bucket(current_app.config["CSV_UPLOAD_BUCKET_NAME"])
object_keys = [FILE_LOCATION_STRUCTURE.format(job.service_id, job.id) for job in jobs]
bucket.delete_objects(Delete={"Objects": [{"Key": key} for key in object_keys]})

for start in range(0, len(jobs), batch_size):
object_keys = [FILE_LOCATION_STRUCTURE.format(job.service_id, job.id) for job in jobs[start : start + batch_size]]
bucket.delete_objects(Delete={"Objects": [{"Key": key} for key in object_keys]})


def get_s3_bucket_objects(bucket_name, subfolder="", older_than=7, limit_days=2):
Expand Down
30 changes: 29 additions & 1 deletion tests/app/aws/test_s3.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import uuid
from datetime import datetime, timedelta
from unittest.mock import call
from unittest.mock import Mock, call

import pytest
import pytz
Expand All @@ -12,6 +12,7 @@
get_list_of_files_by_suffix,
get_s3_bucket_objects,
get_s3_file,
remove_jobs_from_s3,
remove_transformed_dvla_file,
upload_job_to_s3,
)
Expand Down Expand Up @@ -214,3 +215,30 @@ def test_upload_job_to_s3(notify_api, mocker):
bucket_name=current_app.config["CSV_UPLOAD_BUCKET_NAME"],
file_location=f"service-{service_id}-notify/{upload_id}.csv",
)


def test_remove_jobs_from_s3(notify_api, mocker):
mock = Mock()
mocker.patch("app.aws.s3.resource", return_value=mock)
jobs = [
type("Job", (object,), {"service_id": "foo", "id": "j1"}),
type("Job", (object,), {"service_id": "foo", "id": "j2"}),
type("Job", (object,), {"service_id": "foo", "id": "j3"}),
type("Job", (object,), {"service_id": "foo", "id": "j4"}),
type("Job", (object,), {"service_id": "foo", "id": "j5"}),
]

remove_jobs_from_s3(jobs, batch_size=2)

mock.assert_has_calls(
[
call.Bucket(current_app.config["CSV_UPLOAD_BUCKET_NAME"]),
call.Bucket().delete_objects(
Delete={"Objects": [{"Key": "service-foo-notify/j1.csv"}, {"Key": "service-foo-notify/j2.csv"}]}
),
call.Bucket().delete_objects(
Delete={"Objects": [{"Key": "service-foo-notify/j3.csv"}, {"Key": "service-foo-notify/j4.csv"}]}
),
call.Bucket().delete_objects(Delete={"Objects": [{"Key": "service-foo-notify/j5.csv"}]}),
]
)

0 comments on commit 4dbdc62

Please sign in to comment.