From 85e24e2aa3046a7b0df9c01a9bac7d028c637fe8 Mon Sep 17 00:00:00 2001 From: Ryan Eggens <81097013+RyEggGit@users.noreply.github.com> Date: Wed, 10 Jan 2024 17:32:40 -0500 Subject: [PATCH] add tests and queries --- backend/database/queries/queries.py | 21 +++++ backend/scraper/run_scrape.py | 48 +++++----- .../websites/FiftyA/FiftyAIncidentParser.py | 8 +- backend/tests/scraper/test_fiftya.py | 18 +++- backend/tests/scraper/test_run_scrape.py | 88 +++++++++++++++++++ backend/tests/test_queries.py | 54 ++++++++++++ 6 files changed, 205 insertions(+), 32 deletions(-) create mode 100644 backend/database/queries/queries.py create mode 100644 backend/tests/test_queries.py diff --git a/backend/database/queries/queries.py b/backend/database/queries/queries.py new file mode 100644 index 000000000..d4c90185f --- /dev/null +++ b/backend/database/queries/queries.py @@ -0,0 +1,21 @@ +from .. import Officer, StateID, Incident +from sqlalchemy.orm import Session + + +def officer_exists(db: Session, stateID: StateID) -> bool: + return ( + db.query(Officer) + .join(StateID) + .filter( + StateID.value == stateID.value and StateID.state == stateID.state + ) + .first() + is not None + ) + + +def incident_exists(db: Session, case_id: str) -> bool: + return ( + db.query(Incident).filter(Incident.case_id == case_id).first() + is not None + ) diff --git a/backend/scraper/run_scrape.py b/backend/scraper/run_scrape.py index 47b34c9ab..7db07d400 100644 --- a/backend/scraper/run_scrape.py +++ b/backend/scraper/run_scrape.py @@ -2,40 +2,32 @@ from backend.scraper.websites.FiftyA.FiftyA import FiftyA from backend.scraper.websites.NYPD.Nypd import Nypd from backend.scraper.mixins.ScrapeCache import ScrapeCacheContainer, ScrapeCache -from backend.database import Officer, Incident, StateID, db +from backend.database import Officer, Incident, db +from backend.database.queries.queries import officer_exists, incident_exists from typing import Union -from sqlalchemy.orm import Session, scoped_session - - -def officer_exists( - session: scoped_session[Session], state_id_value: str -) -> bool: - return ( - session.query(Officer) - .join(StateID) - .filter(StateID.value == state_id_value) - .first() - is not None - ) - - -def incident_exists(session: Session, case_id: str) -> bool: - return ( - session.query(Incident).filter(Incident.case_id == case_id).first() - is not None - ) def add_to_database( model: Union[Officer, Incident], cache: ScrapeCache, uid: str, table: str ): + """ + Adds the given model to the database if it doesn't already exist, and stores + it in the cache. + + Args: + model (Union[Officer, Incident]): The model to be added to the database. + cache (ScrapeCache): The cache object used to store the model. + uid (str): The unique identifier for the model. + table (str): The table name where the model should be stored. + + Returns: + None + """ logger = logging.Logger("scrape") if cache.get_json(uid, table): logger.info(f"{table} {uid} already in cache") return - # add the model to the database - # Check if the model already exists in the database model_exists: bool if table == "officer": model_exists = officer_exists( @@ -49,7 +41,7 @@ def add_to_database( ) else: raise ValueError(f"Invalid table {table}") - if model_exists: # type: ignore + if model_exists: logger.info(f"{table} {uid} already in database") return @@ -64,6 +56,14 @@ def add_to_database( def scrape(debug: bool = False): + """ + Scrapes data from FiftyA and NYPD sources, merges the data, and adds it to + the database. + + Args: + debug (bool, optional): Flag indicating whether to enable debug mode. + Defaults to False. + """ logger = logging.Logger("scrape") logger.info("Starting scrape") diff --git a/backend/scraper/websites/FiftyA/FiftyAIncidentParser.py b/backend/scraper/websites/FiftyA/FiftyAIncidentParser.py index d36dd71b1..d97fdac6e 100644 --- a/backend/scraper/websites/FiftyA/FiftyAIncidentParser.py +++ b/backend/scraper/websites/FiftyA/FiftyAIncidentParser.py @@ -148,10 +148,6 @@ def parse_complaint( incident_location = self._get_location(details_text) precinct_number, precinct_name = self._get_precinct(details_text) - # table = soup.find('tbody') - # perps = soup.find_all('a', class_="name") - # perpetrators = list(set([perp.text for perp in perps])) - incident = Incident() incident.date_record_created = datetime.now().strftime(self.TIME_FORMAT) incident.time_of_incident = datetime.strptime( @@ -192,6 +188,10 @@ def parse_complaint( incident.source_details = source # type: ignore incident.victims = victim # type: ignore incident.use_of_force = force # type: ignore + + # table = soup.find('tbody') + # perps = soup.find_all('a', class_="name") + # perpetrators = list(set([perp.text for perp in perps])) # data = {} # data["victim"] = victim # data["perpetrators"] = list(set(officer_involved_badges)) diff --git a/backend/tests/scraper/test_fiftya.py b/backend/tests/scraper/test_fiftya.py index 1b5bbf5b0..078675722 100644 --- a/backend/tests/scraper/test_fiftya.py +++ b/backend/tests/scraper/test_fiftya.py @@ -1,6 +1,7 @@ import pytest from backend.scraper.websites.FiftyA.FiftyA import FiftyA from backend.database import Officer, Incident +from unittest.mock import patch, Mock @pytest.fixture @@ -23,9 +24,18 @@ def test_sample_list(fiftya: FiftyA): assert len(sampled_list) == min(num, len(lst)) -def test_find_officers_in_precincts(fiftya: FiftyA): - debug = False - officers = fiftya._find_officers_in_precincts(debug) +@patch.object(FiftyA, "_find_officers") +@patch.object(FiftyA, "find_urls") +def test_find_officers_in_precincts( + mock_find_urls: Mock, mock__find_officers: Mock, fiftya: FiftyA +): + mock_find_urls.return_value = ["/command/1", "/command/2", "/command/3"] + mock__find_officers.return_value = [ + "/officer/1", + "/officer/2", + "/officer/3", + ] + officers = fiftya._find_officers_in_precincts(debug=False) assert isinstance(officers, list) assert all(isinstance(officer, str) for officer in officers) @@ -49,7 +59,7 @@ def test_find_officer_profile_and_complaints(fiftya: FiftyA): def test_extract_data(fiftya: FiftyA): - officer_profiles, incidents = fiftya.extract_data(debug=False) + officer_profiles, incidents = fiftya.extract_data(debug=True) assert isinstance(officer_profiles, list) assert all(isinstance(profile, Officer) for profile in officer_profiles) assert isinstance(incidents, list) diff --git a/backend/tests/scraper/test_run_scrape.py b/backend/tests/scraper/test_run_scrape.py index e69de29bb..dcec8acf8 100644 --- a/backend/tests/scraper/test_run_scrape.py +++ b/backend/tests/scraper/test_run_scrape.py @@ -0,0 +1,88 @@ +from unittest.mock import patch, Mock +import pytest +from backend.scraper.run_scrape import add_to_database + + +@pytest.fixture +def cache(): + cache = Mock() + cache.get_json.return_value = {"model": "data"} + return cache + + +@pytest.fixture +def model(): + model = Mock() + model.__getstate__ = Mock(return_value={"model": "data"}) + return model + + +@pytest.fixture +def uid(): + return "123" + + +@pytest.fixture +def table(): + return "officer" + + +def test_add_to_database_existing_in_cache( + cache: Mock, model: Mock, uid: str, table: str +): + # Calling the function + add_to_database(model, cache, uid, table) + + # Assertions + cache.get_json.assert_called_once_with(uid, table) + model.create.assert_not_called() + cache.set_json.assert_not_called() + + +@patch("backend.scraper.run_scrape.officer_exists") +def test_add_to_database_existing_in_database( + mock_officer_exists: Mock, + cache: Mock, + model: Mock, + uid: str, + table: str, +): + cache.get_json.return_value = None + mock_officer_exists.return_value = True + + # Calling the function + add_to_database(model, cache, uid, table) + + # Assertions + cache.get_json.assert_called_once_with(uid, table) + model.create.assert_not_called() + cache.set_json.assert_not_called() + mock_officer_exists.assert_called_once() + + +@patch("backend.scraper.run_scrape.officer_exists") +def test_add_to_database_new_model( + mock_officer_exists: Mock, cache: Mock, model: Mock, uid: str, table: str +): + cache.get_json.return_value = None + mock_officer_exists.return_value = False + + # Calling the function + add_to_database(model, cache, uid, table) + + # Assertions + cache.get_json.assert_called_once_with(uid, table) + model.create.assert_called_once() + cache.set_json.assert_called_once_with(uid, model.__getstate__(), table) + mock_officer_exists.assert_called_once() + + +def test_add_to_database_invalid_table(cache: Mock, model: Mock, uid: str): + table = "invalid_table" + + # Calling the function + try: + add_to_database(model, cache, uid, table) + except ValueError as e: + # Assertion + assert str(e) == "Invalid table invalid_table" diff --git a/backend/tests/test_queries.py b/backend/tests/test_queries.py new file mode 100644 index 000000000..c544774bb --- /dev/null +++ b/backend/tests/test_queries.py @@ -0,0 +1,54 @@ +from backend.database.queries.queries import officer_exists, incident_exists +from backend.database import Officer, StateID, Incident +from typing import Any + + +def test_officer_exists(db_session: Any): + # Create a test officer with a state ID value + state_id_value = "ABC123" + officer = Officer(**{"first_name": "Test Officer"}) + state_id = StateID(**{"value": state_id_value, "state": "NY"}) + officer.stateId = state_id # type: ignore + db_session.add(officer) + db_session.commit() + + # test manually that the officer exists + assert ( + db_session.query(Officer) + .join(StateID) + .filter( + StateID.value == state_id.value and StateID.state == state_id.state + ) + .first() + is not None + ) + + # Test that the officer exists + assert officer_exists(db_session, state_id) + + # Test that a non-existing officer returns False + assert not officer_exists( + db_session, StateID(**{"value": "DEF456", "state": "NY"}) + ) + + +def test_incident_exists(db_session: Any): + # Create a test incident with a case ID value + case_id_value = "123456" + incident = Incident(**{"case_id": case_id_value}) + db_session.add(incident) + db_session.commit() + + # test manually that the incident exists + assert ( + db_session.query(Incident) + .filter(Incident.case_id == case_id_value) + .first() + is not None + ) + + # Test that the incident exists + assert incident_exists(db_session, case_id_value) + + # Test that a non-existing incident returns False + assert not incident_exists(db_session, "654321")