Skip to content

Commit

Permalink
add tests and queries
Browse files Browse the repository at this point in the history
  • Loading branch information
RyEggGit committed Jan 10, 2024
1 parent b1f08a6 commit 85e24e2
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 32 deletions.
21 changes: 21 additions & 0 deletions backend/database/queries/queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from .. import Officer, StateID, Incident
from sqlalchemy.orm import Session


def officer_exists(db: Session, stateID: StateID) -> bool:
return (
db.query(Officer)
.join(StateID)
.filter(
StateID.value == stateID.value and StateID.state == stateID.state
)
.first()
is not None
)


def incident_exists(db: Session, case_id: str) -> bool:
return (
db.query(Incident).filter(Incident.case_id == case_id).first()
is not None
)
48 changes: 24 additions & 24 deletions backend/scraper/run_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,32 @@
from backend.scraper.websites.FiftyA.FiftyA import FiftyA
from backend.scraper.websites.NYPD.Nypd import Nypd
from backend.scraper.mixins.ScrapeCache import ScrapeCacheContainer, ScrapeCache
from backend.database import Officer, Incident, StateID, db
from backend.database import Officer, Incident, db
from backend.database.queries.queries import officer_exists, incident_exists
from typing import Union
from sqlalchemy.orm import Session, scoped_session


def officer_exists(
session: scoped_session[Session], state_id_value: str
) -> bool:
return (
session.query(Officer)
.join(StateID)
.filter(StateID.value == state_id_value)
.first()
is not None
)


def incident_exists(session: Session, case_id: str) -> bool:
return (
session.query(Incident).filter(Incident.case_id == case_id).first()
is not None
)


def add_to_database(
model: Union[Officer, Incident], cache: ScrapeCache, uid: str, table: str
):
"""
Adds the given model to the database if it doesn't already exist, and stores
it in the cache.
Args:
model (Union[Officer, Incident]): The model to be added to the database.
cache (ScrapeCache): The cache object used to store the model.
uid (str): The unique identifier for the model.
table (str): The table name where the model should be stored.
Returns:
None
"""
logger = logging.Logger("scrape")
if cache.get_json(uid, table):
logger.info(f"{table} {uid} already in cache")
return

# add the model to the database
# Check if the model already exists in the database
model_exists: bool
if table == "officer":
model_exists = officer_exists(
Expand All @@ -49,7 +41,7 @@ def add_to_database(
)
else:
raise ValueError(f"Invalid table {table}")
if model_exists: # type: ignore
if model_exists:
logger.info(f"{table} {uid} already in database")
return

Expand All @@ -64,6 +56,14 @@ def add_to_database(


def scrape(debug: bool = False):
"""
Scrapes data from FiftyA and NYPD sources, merges the data, and adds it to
the database.
Args:
debug (bool, optional): Flag indicating whether to enable debug mode.
Defaults to False.
"""
logger = logging.Logger("scrape")
logger.info("Starting scrape")

Expand Down
8 changes: 4 additions & 4 deletions backend/scraper/websites/FiftyA/FiftyAIncidentParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,6 @@ def parse_complaint(
incident_location = self._get_location(details_text)
precinct_number, precinct_name = self._get_precinct(details_text)

# table = soup.find('tbody')
# perps = soup.find_all('a', class_="name")
# perpetrators = list(set([perp.text for perp in perps]))

incident = Incident()
incident.date_record_created = datetime.now().strftime(self.TIME_FORMAT)
incident.time_of_incident = datetime.strptime(
Expand Down Expand Up @@ -192,6 +188,10 @@ def parse_complaint(
incident.source_details = source # type: ignore
incident.victims = victim # type: ignore
incident.use_of_force = force # type: ignore

# table = soup.find('tbody')
# perps = soup.find_all('a', class_="name")
# perpetrators = list(set([perp.text for perp in perps]))
# data = {}
# data["victim"] = victim
# data["perpetrators"] = list(set(officer_involved_badges))
Expand Down
18 changes: 14 additions & 4 deletions backend/tests/scraper/test_fiftya.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
from backend.scraper.websites.FiftyA.FiftyA import FiftyA
from backend.database import Officer, Incident
from unittest.mock import patch, Mock


@pytest.fixture
Expand All @@ -23,9 +24,18 @@ def test_sample_list(fiftya: FiftyA):
assert len(sampled_list) == min(num, len(lst))


def test_find_officers_in_precincts(fiftya: FiftyA):
debug = False
officers = fiftya._find_officers_in_precincts(debug)
@patch.object(FiftyA, "_find_officers")
@patch.object(FiftyA, "find_urls")
def test_find_officers_in_precincts(
mock_find_urls: Mock, mock__find_officers: Mock, fiftya: FiftyA
):
mock_find_urls.return_value = ["/command/1", "/command/2", "/command/3"]
mock__find_officers.return_value = [
"/officer/1",
"/officer/2",
"/officer/3",
]
officers = fiftya._find_officers_in_precincts(debug=False)
assert isinstance(officers, list)
assert all(isinstance(officer, str) for officer in officers)

Expand All @@ -49,7 +59,7 @@ def test_find_officer_profile_and_complaints(fiftya: FiftyA):


def test_extract_data(fiftya: FiftyA):
officer_profiles, incidents = fiftya.extract_data(debug=False)
officer_profiles, incidents = fiftya.extract_data(debug=True)
assert isinstance(officer_profiles, list)
assert all(isinstance(profile, Officer) for profile in officer_profiles)
assert isinstance(incidents, list)
Expand Down
88 changes: 88 additions & 0 deletions backend/tests/scraper/test_run_scrape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from unittest.mock import patch, Mock
import pytest
from backend.scraper.run_scrape import add_to_database


@pytest.fixture
def cache():
cache = Mock()
cache.get_json.return_value = {"model": "data"}
return cache


@pytest.fixture
def model():
model = Mock()
model.__getstate__ = Mock(return_value={"model": "data"})
return model


@pytest.fixture
def uid():
return "123"


@pytest.fixture
def table():
return "officer"


def test_add_to_database_existing_in_cache(
cache: Mock, model: Mock, uid: str, table: str
):
# Calling the function
add_to_database(model, cache, uid, table)

# Assertions
cache.get_json.assert_called_once_with(uid, table)
model.create.assert_not_called()
cache.set_json.assert_not_called()


@patch("backend.scraper.run_scrape.officer_exists")
def test_add_to_database_existing_in_database(
mock_officer_exists: Mock,
cache: Mock,
model: Mock,
uid: str,
table: str,
):
cache.get_json.return_value = None
mock_officer_exists.return_value = True

# Calling the function
add_to_database(model, cache, uid, table)

# Assertions
cache.get_json.assert_called_once_with(uid, table)
model.create.assert_not_called()
cache.set_json.assert_not_called()
mock_officer_exists.assert_called_once()


@patch("backend.scraper.run_scrape.officer_exists")
def test_add_to_database_new_model(
mock_officer_exists: Mock, cache: Mock, model: Mock, uid: str, table: str
):
cache.get_json.return_value = None
mock_officer_exists.return_value = False

# Calling the function
add_to_database(model, cache, uid, table)

# Assertions
cache.get_json.assert_called_once_with(uid, table)
model.create.assert_called_once()
cache.set_json.assert_called_once_with(uid, model.__getstate__(), table)
mock_officer_exists.assert_called_once()


def test_add_to_database_invalid_table(cache: Mock, model: Mock, uid: str):
table = "invalid_table"

# Calling the function
try:
add_to_database(model, cache, uid, table)
except ValueError as e:
# Assertion
assert str(e) == "Invalid table invalid_table"
54 changes: 54 additions & 0 deletions backend/tests/test_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from backend.database.queries.queries import officer_exists, incident_exists
from backend.database import Officer, StateID, Incident
from typing import Any


def test_officer_exists(db_session: Any):
# Create a test officer with a state ID value
state_id_value = "ABC123"
officer = Officer(**{"first_name": "Test Officer"})
state_id = StateID(**{"value": state_id_value, "state": "NY"})
officer.stateId = state_id # type: ignore
db_session.add(officer)
db_session.commit()

# test manually that the officer exists
assert (
db_session.query(Officer)
.join(StateID)
.filter(
StateID.value == state_id.value and StateID.state == state_id.state
)
.first()
is not None
)

# Test that the officer exists
assert officer_exists(db_session, state_id)

# Test that a non-existing officer returns False
assert not officer_exists(
db_session, StateID(**{"value": "DEF456", "state": "NY"})
)


def test_incident_exists(db_session: Any):
# Create a test incident with a case ID value
case_id_value = "123456"
incident = Incident(**{"case_id": case_id_value})
db_session.add(incident)
db_session.commit()

# test manually that the incident exists
assert (
db_session.query(Incident)
.filter(Incident.case_id == case_id_value)
.first()
is not None
)

# Test that the incident exists
assert incident_exists(db_session, case_id_value)

# Test that a non-existing incident returns False
assert not incident_exists(db_session, "654321")

0 comments on commit 85e24e2

Please sign in to comment.