diff --git a/earthaccess/search.py b/earthaccess/search.py index 4ac8cb61..d95deaaf 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -2,7 +2,7 @@ from inspect import getmembers, ismethod from typing import Any, Dict, List, Optional, Tuple, Type, Union -import dateutil.parser as parser # type: ignore +import dateutil.parser # type: ignore from cmr import CollectionQuery, GranuleQuery # type: ignore from requests import exceptions, session @@ -11,6 +11,35 @@ from .results import DataCollection, DataGranule +def _normalize_datetime(raw: Union[None, str, dt.date]) -> Union[None, dt.datetime]: + # the cmr.*Query.temporal method will convert None to an empty string + if raw is None or raw == "": + return None + # the cmr.*Query.temporal method will convert a utc dt.datetime to the + # correct ISO 8601 string without additional attempts to parse + if isinstance(raw, str): + # handle string by parsing with default + default = dt.datetime(1, 1, 1, tzinfo=dt.timezone.utc) + normalized = dateutil.parser.parse(raw, default=default) + elif not isinstance(raw, dt.datetime): + # handle dt.date by converting to utc dt.datetime + try: + normalized = dt.datetime.combine(raw, dt.time(tzinfo=dt.timezone.utc)) + except TypeError: + msg = f"Dates must be a date object or str, not {raw.__class__.__name__}." + raise TypeError(msg) + else: + # handle aware dt.datetime and naive dt.datetime by assuming utc + normalized = raw if raw.tzinfo else raw.replace(tzinfo=dt.timezone.utc) + # convert timezone aware dt.datetime to a utc dt.datetime + try: + normalized = normalized.astimezone(dt.timezone.utc) + except Exception: + msg = f"Provided date {raw} is not valid." + raise ValueError(msg) from None + return normalized.astimezone(dt.timezone.utc) + + class DataCollections(CollectionQuery): """ ???+ Info @@ -313,35 +342,23 @@ def get(self, limit: int = 2000) -> list: def temporal( self, - date_from: Optional[Union[str, dt.datetime]] = None, - date_to: Optional[Union[str, dt.datetime]] = None, + date_from: Optional[Union[str, dt.date]] = None, + date_to: Optional[Union[str, dt.date]] = None, exclude_boundary: bool = False, ) -> Type[CollectionQuery]: """Filter by an open or closed date range. Dates can be provided as datetime objects - or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls - to this method before calling execute(). + or ISO 8601 strings. Multiple ranges can be provided by successive method calls. Parameters: - date_from (String or Datetime object): earliest date of temporal range - date_to (String or Datetime object): latest date of temporal range - exclude_boundary (Boolean): whether or not to exclude the date_from/to in the matched range. + date_from: earliest date of temporal range + date_to: latest date of temporal range + exclude_boundary: whether or not to exclude the date_from/to in the matched range. """ - DEFAULT = dt.datetime(1979, 1, 1) - if date_from is not None and not isinstance(date_from, dt.datetime): - try: - date_from = parser.parse(date_from, default=DEFAULT).isoformat() + "Z" - except Exception: - print("The provided start date was not recognized") - date_from = "" - - if date_to is not None and not isinstance(date_to, dt.datetime): - try: - date_to = parser.parse(date_to, default=DEFAULT).isoformat() + "Z" - except Exception: - print("The provided end date was not recognized") - date_to = "" - - super().temporal(date_from, date_to, exclude_boundary) + super().temporal( + _normalize_datetime(date_from), + _normalize_datetime(date_to), + exclude_boundary, + ) return self @@ -680,35 +697,23 @@ def debug(self, debug: bool = True) -> Type[GranuleQuery]: def temporal( self, - date_from: Optional[Union[str, dt.datetime]] = None, - date_to: Optional[Union[str, dt.datetime]] = None, + date_from: Optional[Union[str, dt.date]] = None, + date_to: Optional[Union[str, dt.date]] = None, exclude_boundary: bool = False, ) -> Type[GranuleQuery]: - """Filter by an open or closed date range. - Dates can be provided as a datetime objects or ISO 8601 formatted strings. Multiple - ranges can be provided by successive calls to this method before calling execute(). + """Filter by an open or closed date range. Dates can be provided as datetime objects + or ISO 8601 strings. Multiple ranges can be provided by successive method calls. Parameters: date_from: earliest date of temporal range date_to: latest date of temporal range - exclude_boundary: whether to exclude the date_from/to in the matched range + exclude_boundary: whether or not to exclude the date_from/to in the matched range. """ - DEFAULT = dt.datetime(1979, 1, 1) - if date_from is not None and not isinstance(date_from, dt.datetime): - try: - date_from = parser.parse(date_from, default=DEFAULT).isoformat() + "Z" - except Exception: - print("The provided start date was not recognized") - date_from = "" - - if date_to is not None and not isinstance(date_to, dt.datetime): - try: - date_to = parser.parse(date_to, default=DEFAULT).isoformat() + "Z" - except Exception: - print("The provided end date was not recognized") - date_to = "" - - super().temporal(date_from, date_to, exclude_boundary) + super().temporal( + _normalize_datetime(date_from), + _normalize_datetime(date_to), + exclude_boundary, + ) return self def version(self, version: str = "") -> Type[GranuleQuery]: diff --git a/tests/unit/test_granule_queries.py b/tests/unit/test_granule_queries.py index 33280810..edfe16b0 100644 --- a/tests/unit/test_granule_queries.py +++ b/tests/unit/test_granule_queries.py @@ -13,12 +13,14 @@ dt.datetime(2021, 2, 2), "2021-02-01T00:00:00Z,2021-02-02T00:00:00Z", ), + ("1999-02-01 06:00:00Z", "2009-01-01", "1999-02-01T06:00:00Z,2009-01-01T00:00:00Z"), ] invalid_single_dates = [ - ("2001-12-45", "2001-12-21", None), - ("2021w1", "", None), - ("2999-02-01", "2009-01-01", None), + ("2001-12-45", "2001-12-21", ValueError), + ("2021w1", "", ValueError), + ("2999-02-01", "2009-01-01", ValueError), + (123, "2009-01-01", TypeError), ] @@ -35,14 +37,12 @@ def test_query_can_parse_single_dates(start, end, expected): assert granules.params["temporal"][0] == expected -@pytest.mark.parametrize("start,end,expected", invalid_single_dates) -def test_query_can_handle_invalid_dates(start, end, expected): +@pytest.mark.parametrize("start,end,exception", invalid_single_dates) +def test_query_can_handle_invalid_dates(start, end, exception): granules = DataGranules().short_name("MODIS") - try: + with pytest.raises(exception): granules = granules.temporal(start, end) - except Exception as e: - assert isinstance(e, ValueError) - assert "temporal" not in granules.params + assert "temporal" not in granules.params @pytest.mark.parametrize("bbox,expected", bbox_queries) diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py new file mode 100644 index 00000000..68457125 --- /dev/null +++ b/tests/unit/test_search.py @@ -0,0 +1,48 @@ +from datetime import date, datetime, time, timedelta, timezone + +import pytest +from dateutil.parser import ParserError +from earthaccess import search +from numpy import datetime64 +from pandas import Timestamp + +handled_dates = [ + ("", None), + (None, None), + ("2024", "2024-01-01T00:00:00Z"), + ("2024-02", "2024-02-01T00:00:00Z"), + ("2024-02-03T10", "2024-02-03T10:00:00Z"), + ("2024-02-03T10:08:54", "2024-02-03T10:08:54Z"), + ("2024-02-03T10:08:54Z", "2024-02-03T10:08:54Z"), + ("2024-02-03T10:08:54+00:00", "2024-02-03T10:08:54Z"), + ("2024-02-03T10:08:54-09:00", "2024-02-03T19:08:54Z"), + (date(1985, 10, 19), "1985-10-19T00:00:00Z"), + (datetime(1985, 10, 19, 12), "1985-10-19T12:00:00Z"), + (datetime(1985, 10, 19, 12, 24), "1985-10-19T12:24:00Z"), + (datetime(1985, 10, 19, 12, 24, tzinfo=timezone.utc), "1985-10-19T12:24:00Z"), + ( + datetime(1985, 10, 19, 12, 24, tzinfo=timezone(timedelta(hours=-9))), + "1985-10-19T21:24:00Z", + ), + (Timestamp("1985-10-19"), "1985-10-19T00:00:00Z"), + ("foobar", ParserError("Unknown string format: foobar")), + (time(0, 0), TypeError("Dates must be a date object or str, not time.")), + ( + datetime64(0, "ns"), + TypeError("Dates must be a date object or str, not datetime64."), + ), + (Timestamp(""), ValueError("Provided date NaT is not valid.")), +] + + +@pytest.mark.parametrize("raw,expected", handled_dates) +def test__normalize_datetime(raw, expected): + if isinstance(expected, Exception): + with pytest.raises(type(expected), match=str(expected)): + _ = search._normalize_datetime(raw) + elif expected: + assert ( + search._normalize_datetime(raw).strftime("%Y-%m-%dT%H:%M:%SZ") == expected + ) + else: + assert search._normalize_datetime(raw) is None