From a598171ef01d6bf10e866890b0a753f0a8d70a6b Mon Sep 17 00:00:00 2001 From: Carson Davis Date: Tue, 3 Dec 2024 11:05:00 -0600 Subject: [PATCH] improve processing of temporal extent to include ranges as well as single dates --- scripts/ej/cmr_processing.py | 18 +++++---- scripts/ej/test_cmr_processing.py | 61 +++++++++++++++++++++++++++++-- 2 files changed, 68 insertions(+), 11 deletions(-) diff --git a/scripts/ej/cmr_processing.py b/scripts/ej/cmr_processing.py index ce9252d8..6b6c77ab 100644 --- a/scripts/ej/cmr_processing.py +++ b/scripts/ej/cmr_processing.py @@ -69,26 +69,28 @@ def _check_temporal_range(self, range_datetime: dict) -> tuple[datetime, datetim return begin_date, end_date def _process_temporal_extents(self) -> TemporalInfo: - """Process all temporal information.""" temporal_extents = self.umm.get("TemporalExtents", []) latest_end_date = None total_duration = 0 - single_date_times = [] + all_temporal_strings = [] for extent in temporal_extents: - single_date_times.extend(extent.get("SingleDateTimes", [])) - range_datetimes = extent.get("RangeDateTimes", []) + # Process single dates + all_temporal_strings.extend(extent.get("SingleDateTimes", [])) - for range_dt in range_datetimes: + # Process range dates + for range_dt in extent.get("RangeDateTimes", []): try: begin_date, end_date = self._check_temporal_range(range_dt) + range_str = f"{range_dt['BeginningDateTime']} - {range_dt['EndingDateTime']}" + all_temporal_strings.append(range_str) + if latest_end_date is None or end_date > latest_end_date: latest_end_date = end_date total_duration += (end_date - begin_date).days except (KeyError, ValueError): continue - # Fix: Extract Value and Unit correctly from the TemporalResolution dictionary temporal_resolution_dict = temporal_extents[0].get("TemporalResolution", {}) if temporal_extents else {} resolution_value = temporal_resolution_dict.get("Value", "") resolution_unit = temporal_resolution_dict.get("Unit", "") @@ -96,9 +98,9 @@ def _process_temporal_extents(self) -> TemporalInfo: return TemporalInfo( latest_end_date=latest_end_date, total_duration=total_duration, - resolution=str(resolution_value), # Convert to string in case it's a number + resolution=str(resolution_value), resolution_unit=resolution_unit, - single_date_times=single_date_times, + single_date_times=sorted(all_temporal_strings), ) def _process_spatial_info(self) -> SpatialInfo: diff --git a/scripts/ej/test_cmr_processing.py b/scripts/ej/test_cmr_processing.py index 3a552118..56b00b56 100644 --- a/scripts/ej/test_cmr_processing.py +++ b/scripts/ej/test_cmr_processing.py @@ -21,13 +21,12 @@ def cmr_dataset(self): def test_full_dataset_processing(self, cmr_dataset): """Test that all properties can be extracted from real data without errors""" - # Test all property accessors assert cmr_dataset.dataset_name == "2000 Pilot Environmental Sustainability Index (ESI)" assert cmr_dataset.description.startswith("The 2000 Pilot Environmental Sustainability Index") assert cmr_dataset.limitations == "None" assert cmr_dataset.format == "PDF" - assert cmr_dataset.temporal_extent == "" # No SingleDateTimes in example - assert cmr_dataset.intended_use == "Path A" # ProcessingLevel is 4 + assert cmr_dataset.temporal_extent == "1978-01-01T00:00:00.000Z - 1999-12-31T00:00:00.000Z" + assert cmr_dataset.intended_use == "Path A" assert cmr_dataset.source_link == "https://doi.org/10.7927/H4NK3BZJ" assert "Long temporal extent" in cmr_dataset.strengths assert "No recent data available" in cmr_dataset.weaknesses @@ -133,6 +132,62 @@ def test_missing_temporal_data(self): assert dataset.temporal_info.latest_end_date is None assert dataset.temporal_resolution == "" + def test_single_date_only(self): + data = { + "meta": {}, + "umm": {"TemporalExtents": [{"SingleDateTimes": ["2020-01-01T00:00:00.000Z", "2020-06-01T00:00:00.000Z"]}]}, + } + dataset = CmrDataset(data) + assert dataset.temporal_extent == "2020-01-01T00:00:00.000Z, 2020-06-01T00:00:00.000Z" + + def test_range_date_only(self): + data = { + "meta": {}, + "umm": { + "TemporalExtents": [ + { + "RangeDateTimes": [ + { + "BeginningDateTime": "2020-01-01T00:00:00.000Z", + "EndingDateTime": "2020-12-31T23:59:59.999Z", + }, + { + "BeginningDateTime": "2021-01-01T00:00:00.000Z", + "EndingDateTime": "2021-12-31T23:59:59.999Z", + }, + ] + } + ] + }, + } + dataset = CmrDataset(data) + assert ( + dataset.temporal_extent + == "2020-01-01T00:00:00.000Z - 2020-12-31T23:59:59.999Z, 2021-01-01T00:00:00.000Z - 2021-12-31T23:59:59.999Z" # noqa + ) + + def test_combined_single_and_range_dates(self): + data = { + "meta": {}, + "umm": { + "TemporalExtents": [ + { + "SingleDateTimes": ["2020-01-01T00:00:00.000Z"], + "RangeDateTimes": [ + { + "BeginningDateTime": "2021-01-01T00:00:00.000Z", + "EndingDateTime": "2021-12-31T23:59:59.999Z", + } + ], + } + ] + }, + } + dataset = CmrDataset(data) + assert ( + dataset.temporal_extent == "2020-01-01T00:00:00.000Z, 2021-01-01T00:00:00.000Z - 2021-12-31T23:59:59.999Z" + ) + class TestSpatialProcessing: """Unit tests for spatial information processing"""