Skip to content

Commit

Permalink
improve processing of temporal extent to include ranges as well as si…
Browse files Browse the repository at this point in the history
…ngle dates
  • Loading branch information
CarsonDavis committed Dec 3, 2024
1 parent 2e6cc06 commit a598171
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 11 deletions.
18 changes: 10 additions & 8 deletions scripts/ej/cmr_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,36 +69,38 @@ def _check_temporal_range(self, range_datetime: dict) -> tuple[datetime, datetim
return begin_date, end_date

def _process_temporal_extents(self) -> TemporalInfo:
"""Process all temporal information."""
temporal_extents = self.umm.get("TemporalExtents", [])
latest_end_date = None
total_duration = 0
single_date_times = []
all_temporal_strings = []

for extent in temporal_extents:
single_date_times.extend(extent.get("SingleDateTimes", []))
range_datetimes = extent.get("RangeDateTimes", [])
# Process single dates
all_temporal_strings.extend(extent.get("SingleDateTimes", []))

for range_dt in range_datetimes:
# Process range dates
for range_dt in extent.get("RangeDateTimes", []):
try:
begin_date, end_date = self._check_temporal_range(range_dt)
range_str = f"{range_dt['BeginningDateTime']} - {range_dt['EndingDateTime']}"
all_temporal_strings.append(range_str)

if latest_end_date is None or end_date > latest_end_date:
latest_end_date = end_date
total_duration += (end_date - begin_date).days
except (KeyError, ValueError):
continue

# Fix: Extract Value and Unit correctly from the TemporalResolution dictionary
temporal_resolution_dict = temporal_extents[0].get("TemporalResolution", {}) if temporal_extents else {}
resolution_value = temporal_resolution_dict.get("Value", "")
resolution_unit = temporal_resolution_dict.get("Unit", "")

return TemporalInfo(
latest_end_date=latest_end_date,
total_duration=total_duration,
resolution=str(resolution_value), # Convert to string in case it's a number
resolution=str(resolution_value),
resolution_unit=resolution_unit,
single_date_times=single_date_times,
single_date_times=sorted(all_temporal_strings),
)

def _process_spatial_info(self) -> SpatialInfo:
Expand Down
61 changes: 58 additions & 3 deletions scripts/ej/test_cmr_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@ def cmr_dataset(self):

def test_full_dataset_processing(self, cmr_dataset):
"""Test that all properties can be extracted from real data without errors"""
# Test all property accessors
assert cmr_dataset.dataset_name == "2000 Pilot Environmental Sustainability Index (ESI)"
assert cmr_dataset.description.startswith("The 2000 Pilot Environmental Sustainability Index")
assert cmr_dataset.limitations == "None"
assert cmr_dataset.format == "PDF"
assert cmr_dataset.temporal_extent == "" # No SingleDateTimes in example
assert cmr_dataset.intended_use == "Path A" # ProcessingLevel is 4
assert cmr_dataset.temporal_extent == "1978-01-01T00:00:00.000Z - 1999-12-31T00:00:00.000Z"
assert cmr_dataset.intended_use == "Path A"
assert cmr_dataset.source_link == "https://doi.org/10.7927/H4NK3BZJ"
assert "Long temporal extent" in cmr_dataset.strengths
assert "No recent data available" in cmr_dataset.weaknesses
Expand Down Expand Up @@ -133,6 +132,62 @@ def test_missing_temporal_data(self):
assert dataset.temporal_info.latest_end_date is None
assert dataset.temporal_resolution == ""

def test_single_date_only(self):
data = {
"meta": {},
"umm": {"TemporalExtents": [{"SingleDateTimes": ["2020-01-01T00:00:00.000Z", "2020-06-01T00:00:00.000Z"]}]},
}
dataset = CmrDataset(data)
assert dataset.temporal_extent == "2020-01-01T00:00:00.000Z, 2020-06-01T00:00:00.000Z"

def test_range_date_only(self):
data = {
"meta": {},
"umm": {
"TemporalExtents": [
{
"RangeDateTimes": [
{
"BeginningDateTime": "2020-01-01T00:00:00.000Z",
"EndingDateTime": "2020-12-31T23:59:59.999Z",
},
{
"BeginningDateTime": "2021-01-01T00:00:00.000Z",
"EndingDateTime": "2021-12-31T23:59:59.999Z",
},
]
}
]
},
}
dataset = CmrDataset(data)
assert (
dataset.temporal_extent
== "2020-01-01T00:00:00.000Z - 2020-12-31T23:59:59.999Z, 2021-01-01T00:00:00.000Z - 2021-12-31T23:59:59.999Z" # noqa
)

def test_combined_single_and_range_dates(self):
data = {
"meta": {},
"umm": {
"TemporalExtents": [
{
"SingleDateTimes": ["2020-01-01T00:00:00.000Z"],
"RangeDateTimes": [
{
"BeginningDateTime": "2021-01-01T00:00:00.000Z",
"EndingDateTime": "2021-12-31T23:59:59.999Z",
}
],
}
]
},
}
dataset = CmrDataset(data)
assert (
dataset.temporal_extent == "2020-01-01T00:00:00.000Z, 2021-01-01T00:00:00.000Z - 2021-12-31T23:59:59.999Z"
)


class TestSpatialProcessing:
"""Unit tests for spatial information processing"""
Expand Down

0 comments on commit a598171

Please sign in to comment.