From c322f96c1ac2d7a5334352961d42a908fffced55 Mon Sep 17 00:00:00 2001 From: Johan Schreurs Date: Wed, 26 Apr 2023 19:58:48 +0200 Subject: [PATCH] Reverting changes that made processes metadata merge report more tolerant for incorrect defaults. This was not reliable yet, and we would need a cleaner solution. First make the tests work again. Issue #105 --- src/openeo_aggregator/metadata/merging.py | 168 ++-------------------- src/openeo_aggregator/utils.py | 4 +- tests/test_views.py | 16 +-- 3 files changed, 20 insertions(+), 168 deletions(-) diff --git a/src/openeo_aggregator/metadata/merging.py b/src/openeo_aggregator/metadata/merging.py index ba519d1e..32910c8b 100644 --- a/src/openeo_aggregator/metadata/merging.py +++ b/src/openeo_aggregator/metadata/merging.py @@ -281,47 +281,6 @@ class ProcessMetadataMerger: def __init__(self, report: Callable = DEFAULT_REPORTER.report): self.report = report - self._tolerant_defaults = True - """Be tolerant to unnecessary values for fields in type schema's. - - Mostly, if the field is explicitely None were the default is False and - basically that field should have been empty, then treat the field as - its default value of False. - - For example: - - - [ ] **https://openeo-dev.eodc.eu/openeo/1.1.0/ : absolute** (merging.py:454): Parameter 'x' field 'schema' value differs from merged. - - merged `{'type': ['number', 'null']}` - - value `{'type': ['number', 'null'], 'subtype': None, 'pattern': None, 'enum': None, 'minimum': None, 'maximum': None, 'minItems': 0.0, 'maxItems': None, 'items': None, 'deprecated': None, 'parameters': None, 'returns': None}` - - JSON diff: - - --- merged - +++ https://openeo-dev.eodc.eu/openeo/1.1.0/ - @@ -1,4 +1,15 @@ - { - + "deprecated": null, - + "enum": null, - + "items": null, - + "maxItems": null, - + "maximum": null, - + "minItems": 0.0, - + "minimum": null, - + "parameters": null, - + "pattern": null, - + "returns": null, - + "subtype": null, - "type": [ - "number", - "null" - - All of these fields with a + sign should not have been present and will be treated as their default: - deprecated: False - enum: Not present, this is not an enum type - maxItems: not present / 0 - minItems: should have been the integer value 0, not float 0.0 - subtype": None, could have left it out. - """ - def merge_processes_metadata( self, processes_per_backend: Dict[str, Dict[str, dict]] ) -> Dict[str, dict]: @@ -389,12 +348,8 @@ def merge_process_metadata(self, by_backend: Dict[str, dict]) -> dict: merged["deprecated"] = any(getter.get("deprecated")) merged["experimental"] = any(getter.get("experimental")) - merged["examples"] = getter.concat( - "examples", skip_duplicates=True, none_means_empty=self._tolerant_defaults - ) - merged["links"] = getter.concat( - "links", skip_duplicates=True, none_means_empty=self._tolerant_defaults - ) + merged["examples"] = getter.concat("examples", skip_duplicates=True) + merged["links"] = getter.concat("links", skip_duplicates=True) return merged @@ -433,23 +388,14 @@ def _merge_process_parameters( # TODO: real merge instead of taking first? merged = [] merged_params_by_name = {} - for backend_id, process_metadata in sorted(by_backend.items()): + # for backend_id, process_metadata in sorted(by_backend.items()): + for backend_id, process_metadata in by_backend.items(): params = process_metadata.get("parameters", []) if params: - # normalizer = ProcessParameterNormalizer( - # strip_description=False, - # add_optionals=False, - # report=functools.partial( - # self.report, backend_id=backend_id, process_id=process_id - # ), - # ) normalizer = ProcessParameterNormalizer( strip_description=False, - add_optionals=True, - tolerant_defaults=self._tolerant_defaults, - report=functools.partial( - self.report, backend_id=backend_id, process_id=process_id - ), + add_optionals=False, + report=functools.partial(self.report, backend_id=backend_id, process_id=process_id), ) merged = normalizer.normalize_parameters(params) @@ -459,14 +405,13 @@ def _merge_process_parameters( break # Check other parameter listings against merged - for backend_id, process_metadata in sorted(by_backend.items()): + # for backend_id, process_metadata in sorted(by_backend.items()): + for backend_id, process_metadata in by_backend.items(): params = process_metadata.get("parameters", []) params_by_name = self._get_parameters_by_name( parameters=params, backend_id=backend_id, process_id=process_id ) - missing_parameters = sorted( - set(merged_params_by_name).difference(params_by_name) - ) + missing_parameters = set(merged_params_by_name).difference(params_by_name) if missing_parameters: self.report( "Missing parameters.", @@ -474,9 +419,7 @@ def _merge_process_parameters( process_id=process_id, missing_parameters=missing_parameters, ) - extra_parameters = sorted( - set(params_by_name).difference(merged_params_by_name) - ) + extra_parameters = set(params_by_name).difference(merged_params_by_name) if extra_parameters: self.report( "Extra parameters (not in merged listing).", @@ -488,7 +431,6 @@ def _merge_process_parameters( normalizer = ProcessParameterNormalizer( strip_description=True, add_optionals=True, - tolerant_defaults=self._tolerant_defaults, report=functools.partial( self.report, backend_id=backend_id, process_id=process_id ), @@ -497,19 +439,10 @@ def _merge_process_parameters( merged_params_by_name[name] ) other_param = normalizer.normalize_parameter(params_by_name[name]) - fields_with_default_false = ["optional", "deprecated", "experimental"] for field in merged_param.keys(): merged_value = merged_param[field] other_value = other_param[field] if merged_value != other_value: - if ( - self._tolerant_defaults - and field in fields_with_default_false - ): - if (merged_value is False and other_value is None) or ( - merged_value is None and other_value is False - ): - continue self.report( f"Parameter {name!r} field {field!r} value differs from merged.", backend_id=backend_id, @@ -538,12 +471,9 @@ def _merge_process_returns( getter = MultiDictGetter(by_backend.values()) # TODO: real merge instead of taking first schema as "merged" schema? merged = getter.first("returns", {"schema": {}}) - if self._tolerant_defaults: - merged = remove_defaults_from_schema(merged) for backend_id, process_metadata in by_backend.items(): other_returns = process_metadata.get("returns", {"schema": {}}) - merged = remove_defaults_from_schema(other_returns) if ignore_description(other_returns) != ignore_description(merged): self.report( f"Returns schema is different from merged.", @@ -565,8 +495,6 @@ def _merge_process_exceptions(self, by_backend: Dict[str, dict]): if isinstance(exceptions, dict): # TODO: take value from first backend instead of last one here? merged.update(exceptions) - elif exceptions is None: - continue else: self.report( f"Invalid process exceptions listing", @@ -600,18 +528,16 @@ class ProcessParameterNormalizer: e.g. for comparison purposes. """ - __slots__ = ["strip_description", "add_optionals", "tolerant_defaults", "report"] + __slots__ = ["strip_description", "add_optionals", "report"] def __init__( self, strip_description: bool = False, add_optionals: bool = True, - tolerant_defaults: bool = False, report: Callable = DEFAULT_REPORTER.report, ): self.strip_description = strip_description self.add_optionals = add_optionals - self.tolerant_defaults = tolerant_defaults self.report = report def normalize_parameter(self, param: dict) -> dict: @@ -644,12 +570,7 @@ def normalize_parameter(self, param: dict) -> dict: normalized["default"] = param.get("default", None) # Recurse into sub-process graphs under "schema" to normalize nested parameters - if self.tolerant_defaults: - normalized["schema"] = self.normalize_recursively( - remove_defaults_from_schema(normalized["schema"]) - ) - else: - normalized["schema"] = self.normalize_recursively(normalized["schema"]) + normalized["schema"] = self.normalize_recursively(normalized["schema"]) return normalized @@ -679,68 +600,3 @@ def normalize_recursively(self, x: Any) -> Any: return [self.normalize_recursively(v) for v in x] else: return x - - -# def remove_defaults_from_schema(schema): -# cleaned_schema = {} -# schema_defaults = { -# "type": None, -# "subtype": None, -# "deprecated": None, -# "enum": None, -# "items": None, -# "maxItems": None, -# "maximum": None, -# "minItems": 0.0, -# "minimum": None, -# "parameters": None, -# "pattern": None, -# "returns": None, -# } -# for key, value in schema.items(): -# if key not in schema_defaults: -# cleaned_schema[key] = value -# elif value != schema_defaults.get(key): -# cleaned_schema[key] = value -# return cleaned_schema - - -def remove_defaults_from_schema(schema): - cleaned_schema = {} - - def map_to_correct_default(key, value): - conversion = { - "type": {False: None}, - "subtype": {False: None}, - "deprecated": {None: False}, - } - if key in conversion: - if value is conversion.get(key): - return conversion[key][value] - - return value - - defaults_to_remove = { - "type": None, - "subtype": None, - "enum": None, - "items": None, - "maxItems": None, - "maximum": None, - "minItems": 0.0, - "minimum": None, - "parameters": None, - "pattern": None, - "returns": None, - } - - for key, value in schema.items(): - if key not in defaults_to_remove: - cleaned_schema[key] = value - elif value != defaults_to_remove.get(key): - cleaned_schema[key] = value - - cleaned_schema = { - k: map_to_correct_default(k, v) for k, v in cleaned_schema.items() - } - return cleaned_schema diff --git a/src/openeo_aggregator/utils.py b/src/openeo_aggregator/utils.py index 055b0613..f92a7f1a 100644 --- a/src/openeo_aggregator/utils.py +++ b/src/openeo_aggregator/utils.py @@ -56,14 +56,12 @@ def has_key(self, key: str) -> bool: def available_keys(self, keys: List[str]) -> List[str]: return [k for k in keys if self.has_key(k)] - def concat(self, key: str, skip_duplicates=False, none_means_empty=False) -> list: + def concat(self, key: str, skip_duplicates=False) -> list: """ Concatenate all lists/tuples at given `key (optionally skipping duplicate items in the process) """ result = [] for items in self.get(key): - if none_means_empty: - items = [] if isinstance(items, (list, tuple)): for item in items: if skip_duplicates and item in result: diff --git a/tests/test_views.py b/tests/test_views.py index 8562569c..c12d47dd 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -539,12 +539,9 @@ def test_processes_basic(self, api100, requests_mock, backend1, backend2): True, [ { - "id": "multiply", - "description": "multiply", - "parameters": [ - {"name": "x", "schema": {}, "description": "x"}, - {"name": "y", "schema": {}, "description": "y"}, - ], + "id": "mean", + "description": "mean", + "parameters": [{"name": "data", "schema": {}, "description": "data"}], "returns": {"schema": {}}, "federation:backends": ["b2"], 'deprecated': False, @@ -553,10 +550,11 @@ def test_processes_basic(self, api100, requests_mock, backend1, backend2): 'links': [] }, { - "id": "mean", - "description": "mean", + "id": "multiply", + "description": "multiply", "parameters": [ - {"name": "data", "schema": {}, "description": "data"} + {"name": "x", "schema": {}, "description": "x"}, + {"name": "y", "schema": {}, "description": "y"}, ], "returns": {"schema": {}}, "federation:backends": ["b2"],