Skip to content

Commit

Permalink
Reverting changes that made processes metadata merge report more tole…
Browse files Browse the repository at this point in the history
…rant for incorrect defaults.

This was not reliable yet, and we would need a cleaner solution.
First make the tests work again.

Issue #105
  • Loading branch information
JohanKJSchreurs committed Apr 27, 2023
1 parent 2eb28e2 commit c322f96
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 168 deletions.
168 changes: 12 additions & 156 deletions src/openeo_aggregator/metadata/merging.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,47 +281,6 @@ class ProcessMetadataMerger:
def __init__(self, report: Callable = DEFAULT_REPORTER.report):
self.report = report

self._tolerant_defaults = True
"""Be tolerant to unnecessary values for fields in type schema's.
Mostly, if the field is explicitely None were the default is False and
basically that field should have been empty, then treat the field as
its default value of False.
For example:
- [ ] **https://openeo-dev.eodc.eu/openeo/1.1.0/ : absolute** (merging.py:454): Parameter 'x' field 'schema' value differs from merged.
- merged `{'type': ['number', 'null']}`
- value `{'type': ['number', 'null'], 'subtype': None, 'pattern': None, 'enum': None, 'minimum': None, 'maximum': None, 'minItems': 0.0, 'maxItems': None, 'items': None, 'deprecated': None, 'parameters': None, 'returns': None}`
- JSON diff:
--- merged
+++ https://openeo-dev.eodc.eu/openeo/1.1.0/
@@ -1,4 +1,15 @@
{
+ "deprecated": null,
+ "enum": null,
+ "items": null,
+ "maxItems": null,
+ "maximum": null,
+ "minItems": 0.0,
+ "minimum": null,
+ "parameters": null,
+ "pattern": null,
+ "returns": null,
+ "subtype": null,
"type": [
"number",
"null"
All of these fields with a + sign should not have been present and will be treated as their default:
deprecated: False
enum: Not present, this is not an enum type
maxItems: not present / 0
minItems: should have been the integer value 0, not float 0.0
subtype": None, could have left it out.
"""

def merge_processes_metadata(
self, processes_per_backend: Dict[str, Dict[str, dict]]
) -> Dict[str, dict]:
Expand Down Expand Up @@ -389,12 +348,8 @@ def merge_process_metadata(self, by_backend: Dict[str, dict]) -> dict:

merged["deprecated"] = any(getter.get("deprecated"))
merged["experimental"] = any(getter.get("experimental"))
merged["examples"] = getter.concat(
"examples", skip_duplicates=True, none_means_empty=self._tolerant_defaults
)
merged["links"] = getter.concat(
"links", skip_duplicates=True, none_means_empty=self._tolerant_defaults
)
merged["examples"] = getter.concat("examples", skip_duplicates=True)
merged["links"] = getter.concat("links", skip_duplicates=True)

return merged

Expand Down Expand Up @@ -433,23 +388,14 @@ def _merge_process_parameters(
# TODO: real merge instead of taking first?
merged = []
merged_params_by_name = {}
for backend_id, process_metadata in sorted(by_backend.items()):
# for backend_id, process_metadata in sorted(by_backend.items()):
for backend_id, process_metadata in by_backend.items():
params = process_metadata.get("parameters", [])
if params:
# normalizer = ProcessParameterNormalizer(
# strip_description=False,
# add_optionals=False,
# report=functools.partial(
# self.report, backend_id=backend_id, process_id=process_id
# ),
# )
normalizer = ProcessParameterNormalizer(
strip_description=False,
add_optionals=True,
tolerant_defaults=self._tolerant_defaults,
report=functools.partial(
self.report, backend_id=backend_id, process_id=process_id
),
add_optionals=False,
report=functools.partial(self.report, backend_id=backend_id, process_id=process_id),
)
merged = normalizer.normalize_parameters(params)

Expand All @@ -459,24 +405,21 @@ def _merge_process_parameters(
break

# Check other parameter listings against merged
for backend_id, process_metadata in sorted(by_backend.items()):
# for backend_id, process_metadata in sorted(by_backend.items()):
for backend_id, process_metadata in by_backend.items():
params = process_metadata.get("parameters", [])
params_by_name = self._get_parameters_by_name(
parameters=params, backend_id=backend_id, process_id=process_id
)
missing_parameters = sorted(
set(merged_params_by_name).difference(params_by_name)
)
missing_parameters = set(merged_params_by_name).difference(params_by_name)
if missing_parameters:
self.report(
"Missing parameters.",
backend_id=backend_id,
process_id=process_id,
missing_parameters=missing_parameters,
)
extra_parameters = sorted(
set(params_by_name).difference(merged_params_by_name)
)
extra_parameters = set(params_by_name).difference(merged_params_by_name)
if extra_parameters:
self.report(
"Extra parameters (not in merged listing).",
Expand All @@ -488,7 +431,6 @@ def _merge_process_parameters(
normalizer = ProcessParameterNormalizer(
strip_description=True,
add_optionals=True,
tolerant_defaults=self._tolerant_defaults,
report=functools.partial(
self.report, backend_id=backend_id, process_id=process_id
),
Expand All @@ -497,19 +439,10 @@ def _merge_process_parameters(
merged_params_by_name[name]
)
other_param = normalizer.normalize_parameter(params_by_name[name])
fields_with_default_false = ["optional", "deprecated", "experimental"]
for field in merged_param.keys():
merged_value = merged_param[field]
other_value = other_param[field]
if merged_value != other_value:
if (
self._tolerant_defaults
and field in fields_with_default_false
):
if (merged_value is False and other_value is None) or (
merged_value is None and other_value is False
):
continue
self.report(
f"Parameter {name!r} field {field!r} value differs from merged.",
backend_id=backend_id,
Expand Down Expand Up @@ -538,12 +471,9 @@ def _merge_process_returns(
getter = MultiDictGetter(by_backend.values())
# TODO: real merge instead of taking first schema as "merged" schema?
merged = getter.first("returns", {"schema": {}})
if self._tolerant_defaults:
merged = remove_defaults_from_schema(merged)

for backend_id, process_metadata in by_backend.items():
other_returns = process_metadata.get("returns", {"schema": {}})
merged = remove_defaults_from_schema(other_returns)
if ignore_description(other_returns) != ignore_description(merged):
self.report(
f"Returns schema is different from merged.",
Expand All @@ -565,8 +495,6 @@ def _merge_process_exceptions(self, by_backend: Dict[str, dict]):
if isinstance(exceptions, dict):
# TODO: take value from first backend instead of last one here?
merged.update(exceptions)
elif exceptions is None:
continue
else:
self.report(
f"Invalid process exceptions listing",
Expand Down Expand Up @@ -600,18 +528,16 @@ class ProcessParameterNormalizer:
e.g. for comparison purposes.
"""

__slots__ = ["strip_description", "add_optionals", "tolerant_defaults", "report"]
__slots__ = ["strip_description", "add_optionals", "report"]

def __init__(
self,
strip_description: bool = False,
add_optionals: bool = True,
tolerant_defaults: bool = False,
report: Callable = DEFAULT_REPORTER.report,
):
self.strip_description = strip_description
self.add_optionals = add_optionals
self.tolerant_defaults = tolerant_defaults
self.report = report

def normalize_parameter(self, param: dict) -> dict:
Expand Down Expand Up @@ -644,12 +570,7 @@ def normalize_parameter(self, param: dict) -> dict:
normalized["default"] = param.get("default", None)

# Recurse into sub-process graphs under "schema" to normalize nested parameters
if self.tolerant_defaults:
normalized["schema"] = self.normalize_recursively(
remove_defaults_from_schema(normalized["schema"])
)
else:
normalized["schema"] = self.normalize_recursively(normalized["schema"])
normalized["schema"] = self.normalize_recursively(normalized["schema"])

return normalized

Expand Down Expand Up @@ -679,68 +600,3 @@ def normalize_recursively(self, x: Any) -> Any:
return [self.normalize_recursively(v) for v in x]
else:
return x


# def remove_defaults_from_schema(schema):
# cleaned_schema = {}
# schema_defaults = {
# "type": None,
# "subtype": None,
# "deprecated": None,
# "enum": None,
# "items": None,
# "maxItems": None,
# "maximum": None,
# "minItems": 0.0,
# "minimum": None,
# "parameters": None,
# "pattern": None,
# "returns": None,
# }
# for key, value in schema.items():
# if key not in schema_defaults:
# cleaned_schema[key] = value
# elif value != schema_defaults.get(key):
# cleaned_schema[key] = value
# return cleaned_schema


def remove_defaults_from_schema(schema):
cleaned_schema = {}

def map_to_correct_default(key, value):
conversion = {
"type": {False: None},
"subtype": {False: None},
"deprecated": {None: False},
}
if key in conversion:
if value is conversion.get(key):
return conversion[key][value]

return value

defaults_to_remove = {
"type": None,
"subtype": None,
"enum": None,
"items": None,
"maxItems": None,
"maximum": None,
"minItems": 0.0,
"minimum": None,
"parameters": None,
"pattern": None,
"returns": None,
}

for key, value in schema.items():
if key not in defaults_to_remove:
cleaned_schema[key] = value
elif value != defaults_to_remove.get(key):
cleaned_schema[key] = value

cleaned_schema = {
k: map_to_correct_default(k, v) for k, v in cleaned_schema.items()
}
return cleaned_schema
4 changes: 1 addition & 3 deletions src/openeo_aggregator/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,12 @@ def has_key(self, key: str) -> bool:
def available_keys(self, keys: List[str]) -> List[str]:
return [k for k in keys if self.has_key(k)]

def concat(self, key: str, skip_duplicates=False, none_means_empty=False) -> list:
def concat(self, key: str, skip_duplicates=False) -> list:
"""
Concatenate all lists/tuples at given `key (optionally skipping duplicate items in the process)
"""
result = []
for items in self.get(key):
if none_means_empty:
items = []
if isinstance(items, (list, tuple)):
for item in items:
if skip_duplicates and item in result:
Expand Down
16 changes: 7 additions & 9 deletions tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,12 +539,9 @@ def test_processes_basic(self, api100, requests_mock, backend1, backend2):
True,
[
{
"id": "multiply",
"description": "multiply",
"parameters": [
{"name": "x", "schema": {}, "description": "x"},
{"name": "y", "schema": {}, "description": "y"},
],
"id": "mean",
"description": "mean",
"parameters": [{"name": "data", "schema": {}, "description": "data"}],
"returns": {"schema": {}},
"federation:backends": ["b2"],
'deprecated': False,
Expand All @@ -553,10 +550,11 @@ def test_processes_basic(self, api100, requests_mock, backend1, backend2):
'links': []
},
{
"id": "mean",
"description": "mean",
"id": "multiply",
"description": "multiply",
"parameters": [
{"name": "data", "schema": {}, "description": "data"}
{"name": "x", "schema": {}, "description": "x"},
{"name": "y", "schema": {}, "description": "y"},
],
"returns": {"schema": {}},
"federation:backends": ["b2"],
Expand Down

0 comments on commit c322f96

Please sign in to comment.