Skip to content

Commit

Permalink
issue #76 replace merge_dict_values with merge_lists_skip_duplicates
Browse files Browse the repository at this point in the history
  • Loading branch information
JeroenVerstraelen committed Oct 11, 2022
1 parent 69c1811 commit a546cb8
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 43 deletions.
14 changes: 6 additions & 8 deletions src/openeo_aggregator/metadata/models/stac_summaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import attr

from openeo_aggregator.metadata.models.statistics import Statistics
from openeo_aggregator.metadata.utils import merge_dict_values
from openeo_aggregator.metadata.utils import merge_lists_skip_duplicates

T = TypeVar("T", bound="StacSummaries")

Expand Down Expand Up @@ -125,15 +125,13 @@ def merge_all(
additional_properties = [(cid, x.additional_properties) for cid, x in summaries_list]
# Calculate the unique summary names.
unique_summary_names: Set[str] = functools.reduce(lambda a, b: a.union(b), (d.keys() for _, d in additional_properties), set())

merged_addition_properties = {}
for summary_name in unique_summary_names:
if summary_name in ["constellation", "platform", "instruments"]:
merged_addition_properties[summary_name] = merge_dict_values(
additional_properties, summary_name, [list], report)
elif summary_name.startswith("sar:") or summary_name.startswith("sat:"):
merged_addition_properties[summary_name] = merge_dict_values(
additional_properties, summary_name, [list], report)
if (summary_name in ["constellation", "platform", "instruments"] or
summary_name.startswith("sar:") or summary_name.startswith("sat:")):
summary_lists = [d.get(summary_name, []) for _, d in additional_properties]
merged_addition_properties[summary_name] = merge_lists_skip_duplicates(summary_lists)
else:
backends = [cid for cid, d in additional_properties if summary_name in d]
report(f"{backends}: Unhandled merging of StacSummaries for summary_name: {summary_name!r}", "warning")
Expand Down
45 changes: 10 additions & 35 deletions src/openeo_aggregator/metadata/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,40 +9,15 @@ def __bool__(self) -> bool:
UNSET: Unset = Unset()


def merge_dict_values(
dictionaries: List[Tuple[str, Dict]],
key, expected_types, report: Callable[[str, str], None]
) -> Any:
def merge_lists_skip_duplicates(lists: List[List[Any]]) -> List[Any]:
"""
Merge all values of a given key from a list of dictionaries, skipping any duplicates.
Args:
dictionaries: List of dictionaries, given as tuples of (collection_identifier, dictionary)
key: Key to concatenate
expected_types: List of expected types for the value of the key, if None, all types are allowed
When a value is not of the expected type, it is reported and ignored.
report: function to report inconsistencies
Returns:
Merged value, can be a list, a dict, or an object that implements the merge method.
Merge multiple lists into one, but only keep unique values.
:param lists: list of lists to merge
:return: merged list
"""
result = None
for cid, d in dictionaries:
if key in d:
if expected_types is not None:
if not isinstance(d[key], tuple(expected_types)):
caller = inspect.stack()[1]
report(f"[{cid}]: Unexpected type for {key!r}: {type(d[key])!r} instead of {expected_types!r} in {caller.filename}:{caller.lineno}")
if result is None:
result = d[key]
elif isinstance(result, list):
if d[key] in result:
continue
result = list(result + d[key])
elif isinstance(result, dict):
result = {**result, **d[key]}
elif hasattr(result, 'merge'):
result = result.merge(d[key])
else:
caller = inspect.stack()[1]
report(f"[{cid}]: Unhandled merging of {key!r} with {type(result)} and {type(d[key])} in {caller.filename}:{caller.lineno}")
return result
merged = []
for l in lists:
for v in l:
if v not in merged:
merged.append(v)
return merged

0 comments on commit a546cb8

Please sign in to comment.