diff --git a/src/dispatch/entity/service.py b/src/dispatch/entity/service.py index 2065689d4bbf..4e5c8aa507ef 100644 --- a/src/dispatch/entity/service.py +++ b/src/dispatch/entity/service.py @@ -1,4 +1,5 @@ from datetime import datetime, timedelta +import logging from typing import Generator, Optional, Sequence, Union, NewType, NamedTuple import re @@ -16,6 +17,9 @@ from dispatch.signal.models import Signal, SignalInstance +log = logging.getLogger(__name__) + + def get(*, db_session: Session, entity_id: int) -> Optional[Entity]: """Gets a entity by its id.""" return db_session.query(Entity).filter(Entity.id == entity_id).one_or_none() @@ -255,7 +259,7 @@ def find_entities( list[Entity]: A list of entities found in the SignalInstance. """ - def _find_entites_by_regex( + def _find_entities_by_regex( val: Union[dict, str, list], signal_instance: SignalInstance, entity_type_pairs: list[EntityTypePair], @@ -287,7 +291,7 @@ def _find_entites_by_regex( >>> signal_instance = SignalInstance(raw={"text": "John Doe was born on 1987-05-12."}) - >>> entities = list(_find_entites_by_regex(signal_instance.raw, signal_instance, entity_type_pairs)) + >>> entities = list(_find_entities_by_regex(signal_instance.raw, signal_instance, entity_type_pairs)) >>> entities[0].value 'John Doe' @@ -301,7 +305,7 @@ def _find_entites_by_regex( # If the value is a dictionary, search its key-value pairs recursively if isinstance(val, dict): for _, subval in val.items(): - yield from _find_entites_by_regex( + yield from _find_entities_by_regex( subval, signal_instance, entity_type_pairs, @@ -310,7 +314,7 @@ def _find_entites_by_regex( # If the value is a list, search its items recursively elif isinstance(val, list): for item in val: - yield from _find_entites_by_regex( + yield from _find_entities_by_regex( item, signal_instance, entity_type_pairs, @@ -360,9 +364,18 @@ def _find_entities_by_regex_and_jsonpath_expression( entity_type=entity_type, project=signal_instance.project, ) - except jsonpath_ng.JSONPathError: - # field not found in signal_instance.raw - pass + except KeyError: + log.warning( + f"Unable to extract entity {str(jpath)} is not a valid JSONPath for Instance {signal_instance.id}." + f"A KeyError usually occurs when the JSONPath includes a list index lookup against a dictionary value." + f" Example: dictionary[0].value" + ) + continue + except Exception as e: # Add this to handle general exceptions + log.exception( + f"An error occurred while extracting entity {str(jpath)} from Instance {signal_instance.id}: {str(e)}" + ) + continue # Create a list of (entity type, regular expression, jpath) tuples entity_type_pairs = [ @@ -386,7 +399,7 @@ def _find_entities_by_regex_and_jsonpath_expression( entities = [ entity for _, val in signal_instance.raw.items() - for entity in _find_entites_by_regex(val, signal_instance, filtered_entity_type_pairs) + for entity in _find_entities_by_regex(val, signal_instance, filtered_entity_type_pairs) ] entities.extend( diff --git a/tests/entity/test_entity_service.py b/tests/entity/test_entity_service.py index 5b6fde84a2e9..3122260784b0 100644 --- a/tests/entity/test_entity_service.py +++ b/tests/entity/test_entity_service.py @@ -1,5 +1,6 @@ from dispatch.entity_type.models import EntityType from dispatch.entity import service as entity_service +from tests.factories import SignalInstanceFactory def test_get(session, entity): @@ -164,3 +165,67 @@ def test_find_entities_with_no_regex_or_field(session, signal_instance, project) ] entities = entity_service.find_entities(session, signal_instance, entity_types) assert len(entities) == 0 + + +def test_find_entities_handles_key_error(session, signal_instance, project): + # Define an entity type that will cause a KeyError due to incorrect JSONPath usage + # The JSONPath expression tries to access a dictionary with an index, which is invalid + entity_type_with_invalid_jsonpath = EntityType( + name="EntityType with Invalid JSONPath", + jpath="dictionary[0].value", + regular_expression=None, + project=project, + ) + + # Override the signal_instance fixture to include problematic 'dictionary' field + # that is a dictionary, not a list + signal_instance = SignalInstanceFactory( + raw={ + "id": "4893bde0-f8bc-4472-a7dc-8b44b26b2198", + "dictionary": { + "value": "pompompurin", + }, + } + ) + + # Attempt to find entities using the entity type with the invalid JSONPath expression + entities = entity_service.find_entities( + session, signal_instance, [entity_type_with_invalid_jsonpath] + ) + + # The service should handle the KeyError and not return any entities for the invalid JSONPath + assert len(entities) == 0 + + +def test_find_entities_multiple_entity_types(session, signal_instance, project): + # A test that checks if the function correctly processes multiple entity types, some valid and some invalid. + entity_type_valid = EntityType( + name="EntityType with Valid JSONPath and Regex", + jpath="dictionary.value", + regular_expression=None, + project=project, + ) + + entity_type_invalid_jsonpath = EntityType( + name="EntityType with Invalid JSONPath", + jpath="dictionary[0].value", + regular_expression=None, + project=project, + ) + + signal_instance = SignalInstanceFactory( + raw={ + "id": "4893bde0-f8bc-4472-a7dc-8b44b26b2198", + "dictionary": { + "value": "pompompurin", + }, + } + ) + + entities = entity_service.find_entities( + session, signal_instance, [entity_type_valid, entity_type_invalid_jsonpath] + ) + + # The service should find one entity with valid JSONPath and Regex and ignore the invalid one + assert len(entities) == 1 + assert entities[0].value == "pompompurin"