From c321472fe7add39fcaee57b1ec9d0d8b0cb05e0a Mon Sep 17 00:00:00 2001 From: Jumana B Date: Thu, 7 Nov 2024 15:04:08 -0500 Subject: [PATCH] Added the code to insert quarter data per service (#2345) --- app/celery/reporting_tasks.py | 60 ++++++++++++++++ app/dao/annual_limits_data_dao.py | 72 +++++++++++++++++++ app/dao/fact_notification_status_dao.py | 25 +++++++ app/models.py | 19 +++++ ...data.py => 0464_add_annual_limits_data.py} | 0 migrations/versions/0465_add_constraints.py | 23 ++++++ tests/app/celery/test_reporting_tasks.py | 26 +++++++ tests/app/dao/test_annual_limits_data_dao.py | 55 ++++++++++++++ .../dao/test_fact_notification_status_dao.py | 20 ++++++ 9 files changed, 300 insertions(+) create mode 100644 app/dao/annual_limits_data_dao.py rename migrations/versions/{0467_add_annual_limits_data.py => 0464_add_annual_limits_data.py} (100%) create mode 100644 migrations/versions/0465_add_constraints.py create mode 100644 tests/app/dao/test_annual_limits_data_dao.py diff --git a/app/celery/reporting_tasks.py b/app/celery/reporting_tasks.py index 53af03661b..8090d42bb7 100644 --- a/app/celery/reporting_tasks.py +++ b/app/celery/reporting_tasks.py @@ -8,9 +8,11 @@ from app import notify_celery from app.config import QueueNames from app.cronitor import cronitor +from app.dao.annual_limits_data_dao import get_previous_quarter, insert_quarter_data from app.dao.fact_billing_dao import fetch_billing_data_for_day, update_fact_billing from app.dao.fact_notification_status_dao import ( fetch_notification_status_for_day, + fetch_quarter_data, update_fact_notification_status, ) from app.models import Service @@ -117,3 +119,61 @@ def create_nightly_notification_status_for_day(process_day): process_day, chunk, e ) ) + + +@notify_celery.task(name="insert-quarter-data-for-annual-limits") +@statsd(namespace="tasks") +def insert_quarter_data_for_annual_limits(process_day): + """ + This function gets all the service ids and fetches all the notification_count + for the given quarter for the service_ids. It then inserts that data + into the annaual_limits_data_table. + + The process_day determines which quarter to fetch data for. + + Args: + process_day = datetime object + """ + + quarter, dates = get_previous_quarter(process_day) + start_date = dates[0] + end_date = dates[1] + + service_info = {x.id: (x.email_annual_limit, x.sms_annual_limit) for x in Service.query.all()} + service_ids = [service_id for service_id in service_info] + chunk_size = 20 + iter_service_ids = iter(service_ids) + + while True: + chunk = list(islice(iter_service_ids, chunk_size)) + + if not chunk: + current_app.logger.info( + "insert_quarter_data_for_annual_limits completed for quarter {} on {}".format( + quarter, datetime.now(timezone.utc).date() + ) + ) + break + + try: + start = datetime.now(timezone.utc) + transit_data = fetch_quarter_data(start_date, end_date, service_ids=chunk) + end = datetime.now(timezone.utc) + current_app.logger.info( + "fetch_quarter_data_for_annual_limits for time period {} to {} fetched in {} seconds".format( + start_date, end_date, (end - start).seconds + ) + ) + insert_quarter_data(transit_data, quarter, service_info) + + current_app.logger.info( + "insert_quarter_data task complete: {} rows updated for time period {} to {} for service_ids {}".format( + len(transit_data), start_date, end_date, chunk + ) + ) + except Exception as e: + current_app.logger.error( + "insert_quarter_data_for_annual_limits task failed for for time period {} to {} for service_ids {}. Error: {}".format( + start_date, end_date, chunk, e + ) + ) diff --git a/app/dao/annual_limits_data_dao.py b/app/dao/annual_limits_data_dao.py new file mode 100644 index 0000000000..91aecf004f --- /dev/null +++ b/app/dao/annual_limits_data_dao.py @@ -0,0 +1,72 @@ +from datetime import datetime + +from sqlalchemy.dialects.postgresql import insert + +from app import db +from app.models import AnnualLimitsData + + +def get_previous_quarter(date_to_check): + year = date_to_check.year + month = date_to_check.month + + quarter = "" + start_date = None + end_date = None + if month in [1, 2, 3]: + quarter = "Q3" + year -= 1 + start_date = datetime(year, 10, 1) + end_date = datetime(year, 12, 31, 23, 59, 59) + elif month in [4, 5, 6]: + quarter = "Q4" + start_date = datetime(year, 1, 1) + end_date = datetime(year, 3, 31, 23, 59, 59) + year -= 1 # Cause we want to store it as Q4 of the previous year + elif month in [7, 8, 9]: + quarter = "Q1" + start_date = datetime(year, 4, 1) + end_date = datetime(year, 6, 30, 23, 59, 59) + elif month in [10, 11, 12]: + quarter = "Q2" + start_date = datetime(year, 7, 1) + end_date = datetime(year, 9, 30, 23, 59, 59) + + quarter_name = f"{quarter}-{year}" if quarter else "" + return quarter_name, (start_date, end_date) + + +def insert_quarter_data(data, quarter, service_info): + """ + Insert data for each quarter into the database. + + Each row in transit_data is a namedtuple with the following fields: + - service_id, + - notification_type, + - notification_count + """ + + table = AnnualLimitsData.__table__ + + for row in data: + stmt = ( + insert(table) + .values( + service_id=row.service_id, + time_period=quarter, + annual_email_limit=service_info[row.service_id][0], + annual_sms_limit=service_info[row.service_id][1], + notification_type=row.notification_type, + notification_count=row.notification_count, + ) + .on_conflict_do_update( + index_elements=["service_id", "time_period", "notification_type"], + set_={ + "annual_email_limit": insert(table).excluded.annual_email_limit, + "annual_sms_limit": insert(table).excluded.annual_sms_limit, + "notification_count": insert(table).excluded.notification_count, + }, + ) + ) + db.session.connection().execute(stmt) + db.session.commit() diff --git a/app/dao/fact_notification_status_dao.py b/app/dao/fact_notification_status_dao.py index cd8ceba11d..dc3ec5be7d 100644 --- a/app/dao/fact_notification_status_dao.py +++ b/app/dao/fact_notification_status_dao.py @@ -885,3 +885,28 @@ def fetch_monthly_notification_statuses_per_service(start_date, end_date): ) .all() ) + + +def fetch_quarter_data(start_date, end_date, service_ids): + """ + Get data for each quarter for the given year and service_ids + + Args: + start_date (datetime obj) + end_date (datetime obj) + service_ids (list): list of service_ids + """ + query = ( + db.session.query( + FactNotificationStatus.service_id, + FactNotificationStatus.notification_type, + func.sum(FactNotificationStatus.notification_count).label("notification_count"), + ) + .filter( + FactNotificationStatus.service_id.in_(service_ids), + FactNotificationStatus.bst_date >= start_date, + FactNotificationStatus.bst_date <= end_date, + ) + .group_by(FactNotificationStatus.service_id, FactNotificationStatus.notification_type) + ) + return query.all() diff --git a/app/models.py b/app/models.py index d8515d0ee2..2fe296a1d0 100644 --- a/app/models.py +++ b/app/models.py @@ -2620,3 +2620,22 @@ class BounceRateStatus(Enum): NORMAL = "normal" WARNING = "warning" CRITICAL = "critical" + + +class AnnualLimitsData(BaseModel): + __tablename__ = "annual_limits_data" + + service_id = db.Column(UUID(as_uuid=True), db.ForeignKey("services.id"), primary_key=True) + time_period = db.Column(db.String, primary_key=True) + annual_email_limit = db.Column(db.BigInteger, nullable=False) + annual_sms_limit = db.Column(db.BigInteger, nullable=False) + notification_type = db.Column(notification_types, nullable=False, primary_key=True) + notification_count = db.Column(db.BigInteger, nullable=False) + + __table_args__ = ( + # Add the composite unique constraint on service_id, time_period, and notification_type + UniqueConstraint("service_id", "time_period", "notification_type", name="uix_service_time_notification"), + # Define the indexes within __table_args__ + db.Index("ix_service_id_notification_type", "service_id", "notification_type"), + db.Index("ix_service_id_notification_type_time", "time_period", "service_id", "notification_type"), + ) diff --git a/migrations/versions/0467_add_annual_limits_data.py b/migrations/versions/0464_add_annual_limits_data.py similarity index 100% rename from migrations/versions/0467_add_annual_limits_data.py rename to migrations/versions/0464_add_annual_limits_data.py diff --git a/migrations/versions/0465_add_constraints.py b/migrations/versions/0465_add_constraints.py new file mode 100644 index 0000000000..7498e980a7 --- /dev/null +++ b/migrations/versions/0465_add_constraints.py @@ -0,0 +1,23 @@ +""" +Revision ID: 0465_add_constraints +Revises: 0464_add_annual_limits_data +Create Date: 2024-10-31 13:32:00 +""" +from alembic import op + +revision = "0465_add_constraints" +down_revision = "0464_add_annual_limits_data" + + +def upgrade(): + op.create_index( + "ix_service_id_notification_type_time", "annual_limits_data", ["time_period", "service_id", "notification_type"] + ) + op.create_unique_constraint( + "uq_service_id_notification_type_time_period", "annual_limits_data", ["service_id", "notification_type", "time_period"] + ) + + +def downgrade(): + op.drop_constraint("uq_service_id_notification_type_time_period", "annual_limits_data") + op.drop_index("ix_service_id_notification_type_time", "annual_limits_data") diff --git a/tests/app/celery/test_reporting_tasks.py b/tests/app/celery/test_reporting_tasks.py index 8dfd2f956b..53ceb547ca 100644 --- a/tests/app/celery/test_reporting_tasks.py +++ b/tests/app/celery/test_reporting_tasks.py @@ -10,17 +10,20 @@ create_nightly_billing_for_day, create_nightly_notification_status, create_nightly_notification_status_for_day, + insert_quarter_data_for_annual_limits, ) from app.dao.fact_billing_dao import get_rate from app.models import ( EMAIL_TYPE, LETTER_TYPE, SMS_TYPE, + AnnualLimitsData, FactBilling, FactNotificationStatus, Notification, ) from tests.app.db import ( + create_ft_notification_status, create_letter_rate, create_notification, create_notification_history, @@ -579,3 +582,26 @@ def test_create_nightly_notification_status_for_day_respects_local_timezone( assert noti_status[0].bst_date == date(2019, 4, 1) assert noti_status[0].notification_status == "created" + + +class TestInsertQuarterData: + def test_insert_quarter_data(self, notify_db_session): + service_1 = create_service(service_name="service_1") + service_2 = create_service(service_name="service_2") + + create_ft_notification_status(date(2018, 1, 1), "sms", service_1, count=4) + create_ft_notification_status(date(2018, 5, 2), "sms", service_1, count=10) + create_ft_notification_status(date(2018, 3, 20), "sms", service_2, count=100) + create_ft_notification_status(date(2018, 2, 1), "sms", service_2, count=1000) + + # Data for Q4 2017 + insert_quarter_data_for_annual_limits(datetime(2018, 4, 1)) + + assert AnnualLimitsData.query.count() == 2 + assert AnnualLimitsData.query.filter_by(service_id=service_1.id).first().notification_count == 4 + assert AnnualLimitsData.query.filter_by(service_id=service_2.id).first().notification_count == 1100 + assert AnnualLimitsData.query.filter_by(service_id=service_1.id).first().time_period == "Q4-2017" + + # Data for Q1 2018 + insert_quarter_data_for_annual_limits(datetime(2018, 7, 1)) + assert AnnualLimitsData.query.filter_by(service_id=service_1.id, time_period="Q1-2018").first().notification_count == 10 diff --git a/tests/app/dao/test_annual_limits_data_dao.py b/tests/app/dao/test_annual_limits_data_dao.py new file mode 100644 index 0000000000..546f671946 --- /dev/null +++ b/tests/app/dao/test_annual_limits_data_dao.py @@ -0,0 +1,55 @@ +from collections import namedtuple +from datetime import datetime + +import pytest + +from app.dao.annual_limits_data_dao import get_previous_quarter, insert_quarter_data +from app.models import AnnualLimitsData, Service +from tests.app.db import create_service + + +class TestGetPreviousQuarter: + @pytest.mark.parametrize( + "date_to_check, expected", + [ + (datetime(2021, 4, 1), ("Q4-2020", (datetime(2021, 1, 1, 0, 0), datetime(2021, 3, 31, 23, 59, 59)))), + (datetime(2021, 6, 1), ("Q4-2020", (datetime(2021, 1, 1, 0, 0), datetime(2021, 3, 31, 23, 59, 59)))), + (datetime(2021, 9, 1), ("Q1-2021", (datetime(2021, 4, 1, 0, 0), datetime(2021, 6, 30, 23, 59, 59)))), + (datetime(2021, 12, 1), ("Q2-2021", (datetime(2021, 7, 1, 0, 0), datetime(2021, 9, 30, 23, 59, 59)))), + (datetime(2022, 1, 1), ("Q3-2021", (datetime(2021, 10, 1, 0, 0), datetime(2021, 12, 31, 23, 59, 59)))), + (datetime(2022, 3, 31), ("Q3-2021", (datetime(2021, 10, 1, 0, 0), datetime(2021, 12, 31, 23, 59, 59)))), + (datetime(2023, 5, 5), ("Q4-2022", (datetime(2023, 1, 1, 0, 0), datetime(2023, 3, 31, 23, 59, 59)))), + ], + ) + def test_get_previous_quarter(self, date_to_check, expected): + assert get_previous_quarter(date_to_check) == expected + + +class TestInsertQuarterData: + def test_insert_quarter_data(self, notify_db_session): + service_1 = create_service(service_name="service_1") + service_2 = create_service(service_name="service_2") + + service_info = {x.id: (x.email_annual_limit, x.sms_annual_limit) for x in Service.query.all()} + NotificationData = namedtuple("NotificationData", ["service_id", "notification_type", "notification_count"]) + + data = [ + NotificationData(service_1.id, "sms", 4), + NotificationData(service_2.id, "sms", 1100), + NotificationData(service_1.id, "email", 2), + ] + insert_quarter_data(data, "Q1-2018", service_info) + + assert AnnualLimitsData.query.count() == 3 + + # We want to check what happens when the same primary key but a new notification count is introduced + + assert AnnualLimitsData.query.filter_by(service_id=service_2.id).first().notification_count == 1100 + insert_quarter_data( + [ + NotificationData(service_2.id, "sms", 500), + ], + "Q1-2018", + service_info, + ) + assert AnnualLimitsData.query.filter_by(service_id=service_2.id).first().notification_count == 500 diff --git a/tests/app/dao/test_fact_notification_status_dao.py b/tests/app/dao/test_fact_notification_status_dao.py index 3c97bd3bf4..fea995ca56 100644 --- a/tests/app/dao/test_fact_notification_status_dao.py +++ b/tests/app/dao/test_fact_notification_status_dao.py @@ -18,6 +18,7 @@ fetch_notification_status_for_service_for_today_and_7_previous_days, fetch_notification_status_totals_for_all_services, fetch_notification_statuses_for_job, + fetch_quarter_data, fetch_stats_for_all_services_by_date_range, get_api_key_ranked_by_notifications_created, get_last_send_for_api_key, @@ -1345,3 +1346,22 @@ def test_fetch_monthly_notification_statuses_per_service_for_rows_that_should_be results = fetch_monthly_notification_statuses_per_service(date(2019, 3, 1), date(2019, 3, 31)) assert len(results) == 0 + + +class TestFetchQuarterData: + def test_fetch_quarter_data(self, notify_db_session): + service_1 = create_service(service_name="service_1") + service_2 = create_service(service_name="service_2") + + create_ft_notification_status(date(2018, 1, 1), "sms", service_1, count=4) + create_ft_notification_status(date(2018, 5, 2), "sms", service_1, count=10) + create_ft_notification_status(date(2018, 3, 20), "sms", service_2, count=100) + create_ft_notification_status(date(2018, 2, 1), "sms", service_2, count=1000) + + assert fetch_quarter_data(date(2018, 1, 1), date(2018, 3, 27), service_ids=[service_1.id, service_2.id]) == [ + (service_1.id, "sms", 4), + (service_2.id, "sms", 1100), + ] + assert fetch_quarter_data(date(2018, 4, 1), date(2018, 6, 30), service_ids=[service_1.id, service_2.id]) == [ + (service_1.id, "sms", 10) + ]