From 206c6b2f644a52849c0b3662b64d2128dce0e4c5 Mon Sep 17 00:00:00 2001 From: Arik Gortsunian Date: Thu, 2 Jan 2025 10:19:41 +0200 Subject: [PATCH] [Core] JQ Mapping log enrichment - empty values support (#1285) # Description What - added test and support for use-case of empty string of identifier/blueprint values, added debug log to validate the identifier and blueprint values Why - I've identified that there are 2 main use-cases for us to log the details about, for null, missing and / or mis configured values AND for empty strings. the empty string is not a "wrong / misconfigured" value for JQ, but it is a non valid identifier. current logs were missing this details and were not clear enough also added example on how to assert on logs How - added tests and validations following logs investigations ``` raw_results: list[dict[Any, Any]] = [ {"foo": "", "bar": "bluePrintMapped"}, {"foo": "identifierMapped", "bar": ""}, ] result = await mocked_processor._parse_items(mapping, raw_results) assert "identifier" not in result.misonfigured_entity_keys assert "blueprint" not in result.misonfigured_entity_keys raw_results = [ {"foo": "identifierMapped", "bar": None}, {"foo": None, "bar": ""}, ] result = await mocked_processor._parse_items(mapping, raw_results) assert result.misonfigured_entity_keys == { "identifier": ".foo", "blueprint": ".bar", } ``` tests now pass values with empty strings and None, tests validate proper logs and make sure to output the relevant details with an additional counter for the empty values. expected to see a batch of 100 with 65 empty values and only 35 upserted. new logs text example (picture below): ``` port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py::TestJQEntityProcessor::test_parse_items_empty_required 2025-01-01 15:54:09.083 | INFO | port_ocean.core.handlers.entity_processor.jq_entity_processor:_parse_items:256 - Parsing 2 raw results into entities 2025-01-01 15:54:09.083 | DEBUG | port_ocean.utils.queue_utils:_start_processor_worker:21 - Processing async task 2025-01-01 15:54:09.084 | DEBUG | port_ocean.utils.queue_utils:_start_processor_worker:21 - Processing async task 2025-01-01 15:54:09.088 | DEBUG | port_ocean.core.handlers.entity_processor.jq_entity_processor:_parse_items:267 - Finished parsing raw results into entities with 0 errors. errors: [] 2025-01-01 15:54:09.089 | INFO | port_ocean.core.handlers.entity_processor.jq_entity_processor:_log_mapping_issues_identified:88 - 2 transformations of batch failed due to empty values 2025-01-01 15:54:09.089 | INFO | port_ocean.core.handlers.entity_processor.jq_entity_processor:_parse_items:256 - Parsing 2 raw results into entities 2025-01-01 15:54:09.089 | DEBUG | port_ocean.utils.queue_utils:_start_processor_worker:21 - Processing async task 2025-01-01 15:54:09.089 | DEBUG | port_ocean.utils.queue_utils:_start_processor_worker:21 - Processing async task 2025-01-01 15:54:09.090 | DEBUG | port_ocean.core.handlers.entity_processor.jq_entity_processor:_parse_items:267 - Finished parsing raw results into entities with 0 errors. errors: [] 2025-01-01 15:54:09.090 | DEBUG | port_ocean.core.handlers.entity_processor.jq_entity_processor:_parse_items:298 - Transformation failed, values verification for identifier: identifierMapped, for blueprint: None 2025-01-01 15:54:09.090 | INFO | port_ocean.core.handlers.entity_processor.jq_entity_processor:_log_mapping_issues_identified:84 - Unable to find valid data for: {'blueprint': '.bar', 'identifier': '.foo'} (null, missing, or misconfigured) 2025-01-01 15:54:09.090 | INFO | port_ocean.core.handlers.entity_processor.jq_entity_processor:_log_mapping_issues_identified:88 - 1 transformations of batch failed due to empty values ``` ![Screenshot 2025-01-01 at 15 54 16](https://github.com/user-attachments/assets/d3f886a2-5d81-416c-81bd-3ed772c35b50) ## Type of change - [x] Bug fix (non-breaking change which fixes an issue) --------- Co-authored-by: Arik Gortsunian --- CHANGELOG.md | 8 ++++ .../entity_processor/jq_entity_processor.py | 40 ++++++++++++++--- .../test_jq_entity_processor.py | 43 +++++++++++++++++++ pyproject.toml | 2 +- 4 files changed, 87 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c662071bec..270dff009c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 0.17.3 (2024-12-31) + + +### Bug Fixes + +- Added support for empty values for JQ mapping logs +- Added tests to assert for proper response when JQ is missmapped or values are empty + ## 0.17.2 (2024-12-31) diff --git a/port_ocean/core/handlers/entity_processor/jq_entity_processor.py b/port_ocean/core/handlers/entity_processor/jq_entity_processor.py index 408621ebe8..ac95d04b8a 100644 --- a/port_ocean/core/handlers/entity_processor/jq_entity_processor.py +++ b/port_ocean/core/handlers/entity_processor/jq_entity_processor.py @@ -3,7 +3,6 @@ from dataclasses import dataclass, field from functools import lru_cache from typing import Any, Optional - import jq # type: ignore from loguru import logger @@ -66,6 +65,22 @@ def inner() -> Any: return inner + @staticmethod + def _notify_mapping_issues( + entity_misconfigurations: dict[str, str], + missing_required_fields: bool, + entity_mapping_fault_counter: int, + ) -> None: + + if len(entity_misconfigurations) > 0: + logger.info( + f"Unable to find valid data for: {entity_misconfigurations} (null, missing, or misconfigured)" + ) + if missing_required_fields: + logger.info( + f"{entity_mapping_fault_counter} transformations of batch failed due to empty values" + ) + async def _search(self, data: dict[str, Any], pattern: str) -> Any: try: loop = asyncio.get_event_loop() @@ -252,9 +267,12 @@ async def _parse_items( examples_to_send: list[dict[str, Any]] = [] entity_misconfigurations: dict[str, str] = {} missing_required_fields: bool = False + entity_mapping_fault_counter: int = 0 + for result in calculated_entities_results: if len(result.misconfigurations) > 0: entity_misconfigurations |= result.misconfigurations + if result.entity.get("identifier") and result.entity.get("blueprint"): parsed_entity = Entity.parse_obj(result.entity) if result.did_entity_pass_selector: @@ -268,10 +286,22 @@ async def _parse_items( failed_entities.append(parsed_entity) else: missing_required_fields = True - if len(entity_misconfigurations) > 0: - logger.info( - f"The mapping resulted with invalid values for{" identifier, blueprint," if missing_required_fields else " "} properties. Mapping result: {entity_misconfigurations}" - ) + if (result.entity.get("identifier") == "") or ( + result.entity.get("blueprint") == "" + ): + entity_mapping_fault_counter += 1 + else: + logger.debug( + f"Mapping failed, values verification for identifier: {result.entity.get("identifier")}, \ + for blueprint: {result.entity.get("blueprint")}" + ) + + self._notify_mapping_issues( + entity_misconfigurations, + missing_required_fields, + entity_mapping_fault_counter, + ) + if ( not calculated_entities_results and raw_results diff --git a/port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py b/port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py index 1e24c8d7c0..2a0010931b 100644 --- a/port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py +++ b/port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py @@ -1,6 +1,8 @@ from typing import Any from unittest.mock import AsyncMock, Mock +from loguru import logger import pytest +from io import StringIO from port_ocean.context.ocean import PortOceanContext from port_ocean.core.handlers.entity_processor.jq_entity_processor import ( @@ -303,3 +305,44 @@ async def test_parse_items_wrong_mapping( "url": ".foobar", "defaultBranch": ".bar.baz", } + + async def test_parse_items_empty_required( + self, mocked_processor: JQEntityProcessor + ) -> None: + stream = StringIO() + sink_id = logger.add(stream, level="DEBUG") + + mapping = Mock() + mapping.port.entity.mappings.dict.return_value = { + "identifier": ".foo", + "blueprint": ".bar", + } + mapping.port.items_to_parse = None + mapping.selector.query = "true" + raw_results: list[dict[Any, Any]] = [ + {"foo": "", "bar": "bluePrintMapped"}, + {"foo": "identifierMapped", "bar": ""}, + ] + result = await mocked_processor._parse_items(mapping, raw_results) + assert "identifier" not in result.misonfigured_entity_keys + assert "blueprint" not in result.misonfigured_entity_keys + + raw_results = [ + {"foo": "identifierMapped", "bar": None}, + {"foo": None, "bar": ""}, + ] + result = await mocked_processor._parse_items(mapping, raw_results) + assert result.misonfigured_entity_keys == { + "identifier": ".foo", + "blueprint": ".bar", + } + + logger.remove(sink_id) + logs_captured = stream.getvalue() + + assert "2 transformations of batch failed due to empty values" in logs_captured + assert ( + "{'blueprint': '.bar', 'identifier': '.foo'} (null, missing, or misconfigured)" + in logs_captured + ) + assert "1 transformations of batch failed due to empty values" in logs_captured diff --git a/pyproject.toml b/pyproject.toml index af28af2262..a3cb130b83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "port-ocean" -version = "0.17.2" +version = "0.17.3" description = "Port Ocean is a CLI tool for managing your Port projects." readme = "README.md" homepage = "https://app.getport.io"