Skip to content

Commit

Permalink
Merge pull request #29 from NYPL/daily-location-visits-alarms
Browse files Browse the repository at this point in the history
Add DailyLocationVisitsAlarms
  • Loading branch information
aaronfriedman6 authored Dec 23, 2024
2 parents bde6756 + fba7da1 commit b981f08
Show file tree
Hide file tree
Showing 13 changed files with 244 additions and 9 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ jobs:
name: Updates changelog
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: dangoslen/changelog-enforcer@v3
test:
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Set up Python 3.12
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'pip'
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 2024-12-17
### Added
- Add DailyLocationVisits alarms checking that the Redshift daily_location_visits table has the right sites, has no duplicates, and contains mostly healthy data

## 2024-11-13
### Added
- Add BranchCodesMap alarms checking that it's in sync with all branches with location hours
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ Currently, the code will log an error (triggering an alarm to fire) under the fo
* When there are fewer than 10000 new location visits records for the previous day
* When a given location visits (site id, orbit, increment start) combination from the previous day contains multiple fresh rows
* When a given location visits (site id, orbit, increment start) combination from the previous thirty days contains only stale rows
* When the sites from the aggregated location visits don't perfectly match the known sites
* When there are duplicate aggregated location visits sites
* When less than 50% of sites had a healthy day of location visits
* When the number of active itype/location/stat group codes in Sierra and Redshift differs
* When there are duplicate active itype/location/stat group codes in Redshift
* When there are active itype/location/stat group codes in Redshift without the necessary additional fields populated
Expand Down
2 changes: 2 additions & 0 deletions alarm_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from alarms.models.branch_codes_map_alarms import BranchCodesMapAlarms
from alarms.models.circ_trans_alarms import CircTransAlarms
from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms
from alarms.models.granular_location_visits_alarms import GranularLocationVisitsAlarms
from alarms.models.holds_alarms import HoldsAlarms
from alarms.models.overdrive_checkouts_alarms import OverDriveCheckoutsAlarms
Expand Down Expand Up @@ -64,6 +65,7 @@ def _setup_alarms(self):
BranchCodesMapAlarms(self.redshift_client),
CircTransAlarms(self.redshift_client, self.sierra_client),
GranularLocationVisitsAlarms(self.redshift_client),
DailyLocationVisitsAlarms(self.redshift_client),
HoldsAlarms(self.redshift_client),
OverDriveCheckoutsAlarms(self.redshift_client, self.overdrive_credentials),
PatronInfoAlarms(self.redshift_client, self.sierra_client),
Expand Down
84 changes: 84 additions & 0 deletions alarms/models/daily_location_visits_alarms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import os

from alarms.alarm import Alarm
from datetime import timedelta
from helpers.query_helper import build_redshift_daily_location_visits_query
from nypl_py_utils.classes.s3_client import S3Client
from nypl_py_utils.functions.log_helper import create_log


class DailyLocationVisitsAlarms(Alarm):
def __init__(self, redshift_client):
super().__init__(redshift_client)
self.logger = create_log("daily_location_visits_alarms")

def run_checks(self):
date_to_test = (self.yesterday_date - timedelta(days=29)).isoformat()
self.logger.info(f"\nDAILY LOCATION VISITS: {date_to_test}\n")
s3_client = S3Client(
os.environ["SHOPPERTRAK_S3_BUCKET"], os.environ["SHOPPERTRAK_S3_RESOURCE"]
)
all_shoppertrak_sites = set(s3_client.fetch_cache())
s3_client.close()

redshift_table = "daily_location_visits" + self.redshift_suffix
redshift_query = build_redshift_daily_location_visits_query(
redshift_table, date_to_test
)

self.redshift_client.connect()
redshift_results = self.redshift_client.execute_query(redshift_query)
self.redshift_client.close_connection()

redshift_sites = []
redshift_healthy = []
for shoppertrak_site, is_all_healthy in redshift_results:
redshift_sites.append(shoppertrak_site)
redshift_healthy.append(int(is_all_healthy))

self.check_redshift_duplicate_sites_alarm(redshift_sites)
self.check_redshift_missing_sites_alarm(redshift_sites, all_shoppertrak_sites)
self.check_redshift_extra_sites_alarm(redshift_sites, all_shoppertrak_sites)
self.check_redshift_healthy_sites_alarm(redshift_healthy)

def check_redshift_duplicate_sites_alarm(self, redshift_sites):
seen_sites = set()
duplicate_sites = set()
for site in redshift_sites:
if site in seen_sites:
duplicate_sites.add(site)
seen_sites.add(site)

if duplicate_sites:
self.logger.error(
"The following ShopperTrak sites are duplicated: {}".format(
sorted(list(duplicate_sites))
)
)

def check_redshift_missing_sites_alarm(self, redshift_sites, all_sites):
missing_sites = all_sites.difference(set(redshift_sites))
if missing_sites:
self.logger.error(
"The following ShopperTrak sites are missing: {}".format(
sorted(list(missing_sites))
)
)

def check_redshift_extra_sites_alarm(self, redshift_sites, all_sites):
extra_sites = set(redshift_sites).difference(all_sites)
if extra_sites:
self.logger.error(
"The following unknown ShopperTrak site ids were found: {}".format(
sorted(list(extra_sites))
)
)

def check_redshift_healthy_sites_alarm(self, redshift_healthy):
percent_healthy = sum(redshift_healthy) / len(redshift_healthy)
if percent_healthy < 0.5:
self.logger.error(
"Only {0:.2f}% of ShopperTrak sites were healthy".format(
percent_healthy * 100
)
)
2 changes: 2 additions & 0 deletions config/devel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ PLAINTEXT_VARIABLES:
ENVISIONWARE_DB_PORT: 3306
ENVISIONWARE_DB_NAME: lasttwodays
REDSHIFT_DB_NAME: dev
SHOPPERTRAK_S3_BUCKET: shoppertrak-sites
SHOPPERTRAK_S3_RESOURCE: site_ids.json
LOG_LEVEL: info
SIERRA_DB_HOST: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGkwZwYJKoZIhvcNAQcGoFowWAIBADBTBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDCu20jxZpTC9cf9V8QIBEIAmt4TZJ7JuFQ1C845HxG8wAXzC7SFHkMNe4U6rKlD1twveXygfiQc=
SIERRA_DB_USER: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGcwZQYJKoZIhvcNAQcGoFgwVgIBADBRBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDBMTB2dgzYV4nlEtwwIBEIAky8apvCdg3fDGqaXd06Vq9U59XxG8qlcShPfW9Jp/JiaURhIH
Expand Down
2 changes: 2 additions & 0 deletions config/production.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ PLAINTEXT_VARIABLES:
ENVISIONWARE_DB_PORT: 3306
ENVISIONWARE_DB_NAME: lasttwodays
REDSHIFT_DB_NAME: production
SHOPPERTRAK_S3_BUCKET: shoppertrak-sites
SHOPPERTRAK_S3_RESOURCE: site_ids.json
LOG_LEVEL: info
SIERRA_DB_HOST: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGwwagYJKoZIhvcNAQcGoF0wWwIBADBWBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDFrV5IoeP0tL98V0zgIBEIApBnZ9IWKJ/s6F++zu0rOeWwfB+Kkwh4aFt68vD7jv4LaO0zOppTOPycA=
SIERRA_DB_USER: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGcwZQYJKoZIhvcNAQcGoFgwVgIBADBRBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDBMTB2dgzYV4nlEtwwIBEIAky8apvCdg3fDGqaXd06Vq9U59XxG8qlcShPfW9Jp/JiaURhIH
Expand Down
2 changes: 2 additions & 0 deletions config/qa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ PLAINTEXT_VARIABLES:
ENVISIONWARE_DB_PORT: 3306
ENVISIONWARE_DB_NAME: lasttwodays
REDSHIFT_DB_NAME: qa
SHOPPERTRAK_S3_BUCKET: shoppertrak-sites
SHOPPERTRAK_S3_RESOURCE: site_ids.json
LOG_LEVEL: info
SIERRA_DB_HOST: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGwwagYJKoZIhvcNAQcGoF0wWwIBADBWBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDL6zq0QQYBhdW4rz8gIBEIApxucxIVAb1Ec4uHUAwxRZcvC8OAZxuj/oJAkhCPh8sPJJ08w3ECoXoKk=
SIERRA_DB_USER: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGcwZQYJKoZIhvcNAQcGoFgwVgIBADBRBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDBMTB2dgzYV4nlEtwwIBEIAky8apvCdg3fDGqaXd06Vq9U59XxG8qlcShPfW9Jp/JiaURhIH
Expand Down
9 changes: 9 additions & 0 deletions helpers/query_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
"SELECT COUNT(*) FROM {table} WHERE {date_field} = '{date}';"
)

_REDSHIFT_DAILY_LOCATION_VISITS_QUERY = (
"SELECT shoppertrak_site_id, is_all_healthy FROM {table} "
"WHERE visits_date = '{date}';"
)

_REDSHIFT_LOCATION_VISITS_COUNT_QUERY = (
"SELECT COUNT(id) FROM {table} "
"WHERE increment_start::DATE = '{date}' AND is_fresh;"
Expand Down Expand Up @@ -192,6 +197,10 @@ def build_redshift_circ_trans_query(table, date_field, date):
)


def build_redshift_daily_location_visits_query(table, date):
return _REDSHIFT_DAILY_LOCATION_VISITS_QUERY.format(table=table, date=date)


def build_redshift_location_visits_count_query(table, date):
return _REDSHIFT_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date)

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
nypl-py-utils[mysql-client,postgresql-client,redshift-client,config-helper]==1.4.0
nypl-py-utils[mysql-client,postgresql-client,redshift-client,s3-client,config-helper]==1.6.2
selenium>=4.10.0
125 changes: 125 additions & 0 deletions tests/alarms/models/test_daily_location_visits_alarms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import logging
import pytest

from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms
from datetime import date


class TestDailyLocationVisitsAlarms:
@pytest.fixture
def test_instance(self, mocker):
mocker.patch(
"alarms.models.daily_location_visits_alarms.S3Client.fetch_cache",
return_value=["aa", "bb", "cc"],
)
return DailyLocationVisitsAlarms(mocker.MagicMock())

def test_init(self, mocker):
daily_location_visits_alarms = DailyLocationVisitsAlarms(mocker.MagicMock())
assert daily_location_visits_alarms.redshift_suffix == "_test_redshift_db"
assert daily_location_visits_alarms.run_added_tests
assert daily_location_visits_alarms.yesterday_date == date(2023, 5, 31)
assert daily_location_visits_alarms.yesterday == "2023-05-31"

def test_run_checks_no_alarm(self, mocker, caplog):
daily_location_visits_alarms = DailyLocationVisitsAlarms(mocker.MagicMock())
mock_s3_client = mocker.MagicMock()
mock_s3_constructor = mocker.patch(
"alarms.models.daily_location_visits_alarms.S3Client",
return_value=mock_s3_client,
)
mock_s3_client.fetch_cache.return_value = ["aa", "bb", "cc"]

mock_redshift_query = mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query",
return_value="redshift query",
)
daily_location_visits_alarms.redshift_client.execute_query.return_value = (
["aa", True],
["bb", True],
["cc", False],
)

with caplog.at_level(logging.ERROR):
daily_location_visits_alarms.run_checks()
assert caplog.text == ""

mock_s3_constructor.assert_called_once_with(
"test_shoppertrak_s3_bucket", "test_shoppertrak_s3_resource"
)
mock_s3_client.fetch_cache.assert_called_once()
mock_s3_client.close.assert_called_once()
daily_location_visits_alarms.redshift_client.connect.assert_called_once()
mock_redshift_query.assert_called_once_with(
"daily_location_visits_test_redshift_db", "2023-05-02"
)
daily_location_visits_alarms.redshift_client.execute_query.assert_called_once_with(
"redshift query"
)
daily_location_visits_alarms.redshift_client.close_connection.assert_called_once()

def test_run_checks_redshift_duplicate_sites_alarm(
self, test_instance, mocker, caplog
):
mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["bb", True],
["bb", True],
["cc", False],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert ("The following ShopperTrak sites are duplicated: ['bb']") in caplog.text

def test_run_checks_redshift_missing_sites_alarm(
self, test_instance, mocker, caplog
):
mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["cc", True],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert "The following ShopperTrak sites are missing: ['bb']" in caplog.text

def test_run_checks_redshift_extra_sites_alarm(self, test_instance, mocker, caplog):
mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["bb", True],
["cc", False],
["ee", True],
["dd", False],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert (
"The following unknown ShopperTrak site ids were found: ['dd', 'ee']"
) in caplog.text

def test_run_checks_redshift_healthy_sites_alarm(
self, test_instance, mocker, caplog
):
mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["bb", False],
["cc", False],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert "Only 33.33% of ShopperTrak sites were healthy" in caplog.text
10 changes: 5 additions & 5 deletions tests/alarms/models/test_granular_location_visits_alarms.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ def test_instance(self, mocker):
return GranularLocationVisitsAlarms(mocker.MagicMock())

def test_init(self, mocker):
location_visits_alarms = GranularLocationVisitsAlarms(mocker.MagicMock())
assert location_visits_alarms.redshift_suffix == "_test_redshift_db"
assert location_visits_alarms.run_added_tests
assert location_visits_alarms.yesterday_date == date(2023, 5, 31)
assert location_visits_alarms.yesterday == "2023-05-31"
gran_location_visits_alarms = GranularLocationVisitsAlarms(mocker.MagicMock())
assert gran_location_visits_alarms.redshift_suffix == "_test_redshift_db"
assert gran_location_visits_alarms.run_added_tests
assert gran_location_visits_alarms.yesterday_date == date(2023, 5, 31)
assert gran_location_visits_alarms.yesterday == "2023-05-31"

def test_run_checks_no_alarm(self, test_instance, mocker, caplog):
mock_redshift_count_query = mocker.patch(
Expand Down
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
"ENVISIONWARE_DB_PASSWORD": "test_envisionware_password",
"OVERDRIVE_USERNAME": "test_overdrive_username",
"OVERDRIVE_PASSWORD": "test_overdrive_password",
"SHOPPERTRAK_S3_BUCKET": "test_shoppertrak_s3_bucket",
"SHOPPERTRAK_S3_RESOURCE": "test_shoppertrak_s3_resource",
}


Expand Down

0 comments on commit b981f08

Please sign in to comment.