Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Task/remove letters #2185

Merged
merged 6 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
361 changes: 10 additions & 351 deletions app/celery/letters_pdf_tasks.py

Large diffs are not rendered by default.

40 changes: 0 additions & 40 deletions app/dao/notifications_dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import string
from datetime import datetime, timedelta

from boto.exception import BotoClientError
from flask import current_app
from itsdangerous import BadSignature
from notifications_utils.international_billing_rates import INTERNATIONAL_BILLING_RATES
Expand All @@ -25,11 +24,9 @@
from werkzeug.datastructures import MultiDict

from app import create_uuid, db, signer_personalisation
from app.aws.s3 import get_s3_bucket_objects, remove_s3_object
from app.dao.dao_utils import transactional
from app.dao.date_util import utc_midnight_n_days_ago
from app.errors import InvalidRequest
from app.letters.utils import LETTERS_PDF_FILE_LOCATION_STRUCTURE
from app.models import (
EMAIL_TYPE,
KEY_TYPE_TEST,
Expand Down Expand Up @@ -391,9 +388,6 @@ def delete_notifications_older_than_retention_by_type(notification_type, qry_lim
convert_utc_to_local_timezone(datetime.utcnow()).date()
) - timedelta(days=f.days_of_retention)

if notification_type == LETTER_TYPE:
_delete_letters_from_s3(notification_type, f.service_id, days_of_retention, qry_limit)

insert_update_notification_history(notification_type, days_of_retention, f.service_id)

current_app.logger.info("Deleting {} notifications for service id: {}".format(notification_type, f.service_id))
Expand All @@ -409,8 +403,6 @@ def delete_notifications_older_than_retention_by_type(notification_type, qry_lim

for row in service_ids_to_purge:
service_id = row._mapping["id"]
if notification_type == LETTER_TYPE:
_delete_letters_from_s3(notification_type, service_id, seven_days_ago, qry_limit)
insert_update_notification_history(notification_type, seven_days_ago, service_id)
deleted += _delete_notifications(notification_type, seven_days_ago, service_id, qry_limit)

Expand Down Expand Up @@ -486,38 +478,6 @@ def insert_update_notification_history(notification_type, date_to_delete_from, s
db.session.commit()


def _delete_letters_from_s3(notification_type, service_id, date_to_delete_from, query_limit):
letters_to_delete_from_s3 = (
db.session.query(Notification)
.filter(
Notification.notification_type == notification_type,
Notification.created_at < date_to_delete_from,
Notification.service_id == service_id,
)
.limit(query_limit)
.all()
)
for letter in letters_to_delete_from_s3:
bucket_name = current_app.config["LETTERS_PDF_BUCKET_NAME"]
if letter.sent_at:
sent_at = str(letter.sent_at.date())
prefix = LETTERS_PDF_FILE_LOCATION_STRUCTURE.format(
folder=sent_at + "/",
reference=letter.reference,
duplex="D",
letter_class="2",
colour="C",
crown="C" if letter.service.crown else "N",
date="",
).upper()[:-5]
s3_objects = get_s3_bucket_objects(bucket_name=bucket_name, subfolder=prefix)
for s3_object in s3_objects:
try:
remove_s3_object(bucket_name, s3_object["Key"])
except BotoClientError:
current_app.logger.exception("Could not delete S3 object with filename: {}".format(s3_object["Key"]))


@statsd(namespace="dao")
@transactional
def dao_delete_notifications_by_id(notification_id):
Expand Down
8 changes: 2 additions & 6 deletions app/letters/rest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from flask import Blueprint, jsonify, request
from flask import Blueprint

from app.letters.letter_schemas import letter_references
from app.schema_validation import validate
from app.v2.errors import register_errors

letter_job = Blueprint("letter-job", __name__)
Expand All @@ -10,6 +8,4 @@

@letter_job.route("/letters/returned", methods=["POST"])
def create_process_returned_letters_job():
references = validate(request.get_json(), letter_references)

return jsonify(references=references["references"]), 200
pass
188 changes: 17 additions & 171 deletions app/letters/utils.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we not delete this entire file?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could! Lets do this incrementally though. I removed a bunch, will keep removing more cause its a lot of finding is something is called elsewhere or not.

Original file line number Diff line number Diff line change
@@ -1,21 +1,6 @@
import io
import math
from datetime import datetime, timedelta
from enum import Enum

import boto3
from flask import current_app
from notifications_utils.letter_timings import LETTER_PROCESSING_DEADLINE
from notifications_utils.pdf import pdf_page_count
from notifications_utils.s3 import s3upload
from notifications_utils.timezones import convert_utc_to_local_timezone

from app.models import (
KEY_TYPE_TEST,
NOTIFICATION_VALIDATION_FAILED,
RESOLVE_POSTAGE_FOR_FILE_NAME,
SECOND_CLASS,
)
from app.models import SECOND_CLASS


class ScanErrorType(Enum):
Expand All @@ -29,203 +14,64 @@ class ScanErrorType(Enum):


def get_folder_name(_now, is_test_or_scan_letter=False):
if is_test_or_scan_letter:
folder_name = ""
else:
print_datetime = convert_utc_to_local_timezone(_now)
if print_datetime.time() > LETTER_PROCESSING_DEADLINE:
print_datetime += timedelta(days=1)
folder_name = "{}/".format(print_datetime.date())
return folder_name
pass


def get_letter_pdf_filename(reference, crown, is_scan_letter=False, postage=SECOND_CLASS):
now = datetime.utcnow()

upload_file_name = LETTERS_PDF_FILE_LOCATION_STRUCTURE.format(
folder=get_folder_name(now, is_scan_letter),
reference=reference,
duplex="D",
letter_class=RESOLVE_POSTAGE_FOR_FILE_NAME[postage],
colour="C",
crown="C" if crown else "N",
date=now.strftime("%Y%m%d%H%M%S"),
).upper()

return upload_file_name
pass


def get_bucket_name_and_prefix_for_notification(notification):
folder = ""
if notification.status == NOTIFICATION_VALIDATION_FAILED:
bucket_name = current_app.config["INVALID_PDF_BUCKET_NAME"]
elif notification.key_type == KEY_TYPE_TEST:
bucket_name = current_app.config["TEST_LETTERS_BUCKET_NAME"]
else:
bucket_name = current_app.config["LETTERS_PDF_BUCKET_NAME"]
if notification.sent_at:
folder = "{}/".format(notification.sent_at.date())
elif notification.updated_at:
folder = get_folder_name(notification.updated_at, False)
else:
folder = get_folder_name(notification.created_at, False)

upload_file_name = PRECOMPILED_BUCKET_PREFIX.format(folder=folder, reference=notification.reference).upper()

return bucket_name, upload_file_name
pass


def get_reference_from_filename(filename):
# filename looks like '2018-01-13/NOTIFY.ABCDEF1234567890.D.2.C.C.20180113120000.PDF'
filename_parts = filename.split(".")
return filename_parts[1]
pass


def upload_letter_pdf(notification, pdf_data, precompiled=False):
current_app.logger.info(
"PDF Letter {} reference {} created at {}, {} bytes".format(
notification.id,
notification.reference,
notification.created_at,
len(pdf_data),
)
)

upload_file_name = get_letter_pdf_filename(
notification.reference,
notification.service.crown,
is_scan_letter=precompiled or notification.key_type == KEY_TYPE_TEST,
postage=notification.postage,
)

if precompiled:
bucket_name = current_app.config["LETTERS_SCAN_BUCKET_NAME"]
elif notification.key_type == KEY_TYPE_TEST:
bucket_name = current_app.config["TEST_LETTERS_BUCKET_NAME"]
else:
bucket_name = current_app.config["LETTERS_PDF_BUCKET_NAME"]

s3upload(
filedata=pdf_data,
region=current_app.config["AWS_REGION"],
bucket_name=bucket_name,
file_location=upload_file_name,
)

current_app.logger.info(
"Uploaded letters PDF {} to {} for notification id {}".format(upload_file_name, bucket_name, notification.id)
)
return upload_file_name
pass


def move_failed_pdf(source_filename, scan_error_type):
scan_bucket = current_app.config["LETTERS_SCAN_BUCKET_NAME"]

target_filename = ("ERROR/" if scan_error_type == ScanErrorType.ERROR else "FAILURE/") + source_filename

_move_s3_object(scan_bucket, source_filename, scan_bucket, target_filename)
pass


def copy_redaction_failed_pdf(source_filename):
scan_bucket = current_app.config["LETTERS_SCAN_BUCKET_NAME"]

target_filename = "REDACTION_FAILURE/" + source_filename

_copy_s3_object(scan_bucket, source_filename, scan_bucket, target_filename)
pass


def move_error_pdf_to_scan_bucket(source_filename):
scan_bucket = current_app.config["LETTERS_SCAN_BUCKET_NAME"]
error_file = "ERROR/" + source_filename

_move_s3_object(scan_bucket, error_file, scan_bucket, source_filename)
pass


def move_scan_to_invalid_pdf_bucket(source_filename):
scan_bucket = current_app.config["LETTERS_SCAN_BUCKET_NAME"]
invalid_pdf_bucket = current_app.config["INVALID_PDF_BUCKET_NAME"]
_move_s3_object(scan_bucket, source_filename, invalid_pdf_bucket, source_filename)
pass


def move_uploaded_pdf_to_letters_bucket(source_filename, upload_filename):
_move_s3_object(
source_bucket=current_app.config["TRANSIENT_UPLOADED_LETTERS"],
source_filename=source_filename,
target_bucket=current_app.config["LETTERS_PDF_BUCKET_NAME"],
target_filename=upload_filename,
)
pass


def get_file_names_from_error_bucket():
s3 = boto3.resource("s3")
scan_bucket = current_app.config["LETTERS_SCAN_BUCKET_NAME"]
bucket = s3.Bucket(scan_bucket)

return bucket.objects.filter(Prefix="ERROR")
pass


def get_letter_pdf(notification):
bucket_name, prefix = get_bucket_name_and_prefix_for_notification(notification)

s3 = boto3.resource("s3")
bucket = s3.Bucket(bucket_name)
item = next(x for x in bucket.objects.filter(Prefix=prefix))

obj = s3.Object(bucket_name=bucket_name, key=item.key)
return obj.get()["Body"].read()
pass


def _move_s3_object(source_bucket, source_filename, target_bucket, target_filename):
s3 = boto3.resource("s3")
copy_source = {"Bucket": source_bucket, "Key": source_filename}

target_bucket = s3.Bucket(target_bucket)
obj = target_bucket.Object(target_filename)

# Tags are copied across but the expiration time is reset in the destination bucket
# e.g. if a file has 5 days left to expire on a ONE_WEEK retention in the source bucket,
# in the destination bucket the expiration time will be reset to 7 days left to expire
obj.copy(copy_source, ExtraArgs={"ServerSideEncryption": "AES256"})

s3.Object(source_bucket, source_filename).delete()

current_app.logger.info(
"Moved letter PDF: {}/{} to {}/{}".format(source_bucket, source_filename, target_bucket, target_filename)
)
pass


def _copy_s3_object(source_bucket, source_filename, target_bucket, target_filename):
s3 = boto3.resource("s3")
copy_source = {"Bucket": source_bucket, "Key": source_filename}

target_bucket = s3.Bucket(target_bucket)
obj = target_bucket.Object(target_filename)

# Tags are copied across but the expiration time is reset in the destination bucket
# e.g. if a file has 5 days left to expire on a ONE_WEEK retention in the source bucket,
# in the destination bucket the expiration time will be reset to 7 days left to expire
obj.copy(copy_source, ExtraArgs={"ServerSideEncryption": "AES256"})

current_app.logger.info(
"Copied letter PDF: {}/{} to {}/{}".format(source_bucket, source_filename, target_bucket, target_filename)
)
pass


def letter_print_day(created_at):
bst_print_datetime = convert_utc_to_local_timezone(created_at) + timedelta(hours=6, minutes=30)
bst_print_date = bst_print_datetime.date()

current_bst_date = convert_utc_to_local_timezone(datetime.utcnow()).date()

if bst_print_date >= current_bst_date:
return "today"
else:
print_date = bst_print_datetime.strftime("%d %B").lstrip("0")
return "on {}".format(print_date)
pass


def get_page_count(pdf):
pages = pdf_page_count(io.BytesIO(pdf))
pages_per_sheet = 2
billable_units = math.ceil(pages / pages_per_sheet)
return billable_units
pass
Loading
Loading