Skip to content

Commit

Permalink
Handle invalid JSONPath with KeyError, Exceptions, and add test cases (
Browse files Browse the repository at this point in the history
…#4007)

* Handle invalid JSONPath with KeyError, general Exceptions, and add test cases

* Change comment to correct value

* Update log line to be a warning instead of an exception
  • Loading branch information
wssheldon authored and metroid-samus committed Nov 29, 2023
1 parent 1c56d63 commit 1ca6513
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 8 deletions.
29 changes: 21 additions & 8 deletions src/dispatch/entity/service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime, timedelta
import logging
from typing import Generator, Optional, Sequence, Union, NewType, NamedTuple
import re

Expand All @@ -16,6 +17,9 @@
from dispatch.signal.models import Signal, SignalInstance


log = logging.getLogger(__name__)


def get(*, db_session: Session, entity_id: int) -> Optional[Entity]:
"""Gets a entity by its id."""
return db_session.query(Entity).filter(Entity.id == entity_id).one_or_none()
Expand Down Expand Up @@ -255,7 +259,7 @@ def find_entities(
list[Entity]: A list of entities found in the SignalInstance.
"""

def _find_entites_by_regex(
def _find_entities_by_regex(
val: Union[dict, str, list],
signal_instance: SignalInstance,
entity_type_pairs: list[EntityTypePair],
Expand Down Expand Up @@ -287,7 +291,7 @@ def _find_entites_by_regex(
>>> signal_instance = SignalInstance(raw={"text": "John Doe was born on 1987-05-12."})
>>> entities = list(_find_entites_by_regex(signal_instance.raw, signal_instance, entity_type_pairs))
>>> entities = list(_find_entities_by_regex(signal_instance.raw, signal_instance, entity_type_pairs))
>>> entities[0].value
'John Doe'
Expand All @@ -301,7 +305,7 @@ def _find_entites_by_regex(
# If the value is a dictionary, search its key-value pairs recursively
if isinstance(val, dict):
for _, subval in val.items():
yield from _find_entites_by_regex(
yield from _find_entities_by_regex(
subval,
signal_instance,
entity_type_pairs,
Expand All @@ -310,7 +314,7 @@ def _find_entites_by_regex(
# If the value is a list, search its items recursively
elif isinstance(val, list):
for item in val:
yield from _find_entites_by_regex(
yield from _find_entities_by_regex(
item,
signal_instance,
entity_type_pairs,
Expand Down Expand Up @@ -360,9 +364,18 @@ def _find_entities_by_regex_and_jsonpath_expression(
entity_type=entity_type,
project=signal_instance.project,
)
except jsonpath_ng.JSONPathError:
# field not found in signal_instance.raw
pass
except KeyError:
log.warning(
f"Unable to extract entity {str(jpath)} is not a valid JSONPath for Instance {signal_instance.id}."
f"A KeyError usually occurs when the JSONPath includes a list index lookup against a dictionary value."
f" Example: dictionary[0].value"
)
continue
except Exception as e: # Add this to handle general exceptions
log.exception(
f"An error occurred while extracting entity {str(jpath)} from Instance {signal_instance.id}: {str(e)}"
)
continue

# Create a list of (entity type, regular expression, jpath) tuples
entity_type_pairs = [
Expand All @@ -386,7 +399,7 @@ def _find_entities_by_regex_and_jsonpath_expression(
entities = [
entity
for _, val in signal_instance.raw.items()
for entity in _find_entites_by_regex(val, signal_instance, filtered_entity_type_pairs)
for entity in _find_entities_by_regex(val, signal_instance, filtered_entity_type_pairs)
]

entities.extend(
Expand Down
65 changes: 65 additions & 0 deletions tests/entity/test_entity_service.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dispatch.entity_type.models import EntityType
from dispatch.entity import service as entity_service
from tests.factories import SignalInstanceFactory


def test_get(session, entity):
Expand Down Expand Up @@ -164,3 +165,67 @@ def test_find_entities_with_no_regex_or_field(session, signal_instance, project)
]
entities = entity_service.find_entities(session, signal_instance, entity_types)
assert len(entities) == 0


def test_find_entities_handles_key_error(session, signal_instance, project):
# Define an entity type that will cause a KeyError due to incorrect JSONPath usage
# The JSONPath expression tries to access a dictionary with an index, which is invalid
entity_type_with_invalid_jsonpath = EntityType(
name="EntityType with Invalid JSONPath",
jpath="dictionary[0].value",
regular_expression=None,
project=project,
)

# Override the signal_instance fixture to include problematic 'dictionary' field
# that is a dictionary, not a list
signal_instance = SignalInstanceFactory(
raw={
"id": "4893bde0-f8bc-4472-a7dc-8b44b26b2198",
"dictionary": {
"value": "pompompurin",
},
}
)

# Attempt to find entities using the entity type with the invalid JSONPath expression
entities = entity_service.find_entities(
session, signal_instance, [entity_type_with_invalid_jsonpath]
)

# The service should handle the KeyError and not return any entities for the invalid JSONPath
assert len(entities) == 0


def test_find_entities_multiple_entity_types(session, signal_instance, project):
# A test that checks if the function correctly processes multiple entity types, some valid and some invalid.
entity_type_valid = EntityType(
name="EntityType with Valid JSONPath and Regex",
jpath="dictionary.value",
regular_expression=None,
project=project,
)

entity_type_invalid_jsonpath = EntityType(
name="EntityType with Invalid JSONPath",
jpath="dictionary[0].value",
regular_expression=None,
project=project,
)

signal_instance = SignalInstanceFactory(
raw={
"id": "4893bde0-f8bc-4472-a7dc-8b44b26b2198",
"dictionary": {
"value": "pompompurin",
},
}
)

entities = entity_service.find_entities(
session, signal_instance, [entity_type_valid, entity_type_invalid_jsonpath]
)

# The service should find one entity with valid JSONPath and Regex and ignore the invalid one
assert len(entities) == 1
assert entities[0].value == "pompompurin"

0 comments on commit 1ca6513

Please sign in to comment.