From 471e919dbd7abc382065b476a60e3b09cfead750 Mon Sep 17 00:00:00 2001 From: Notger Heinz Date: Mon, 18 Apr 2022 17:30:42 +0200 Subject: [PATCH] Added test for correctness of generated datetime-intervals based on filenames --- tests/disabled_utils.py | 6 +++--- tests/test_download.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/tests/disabled_utils.py b/tests/disabled_utils.py index dcc6d2c8..231dc523 100644 --- a/tests/disabled_utils.py +++ b/tests/disabled_utils.py @@ -78,14 +78,14 @@ def test_save_dataset_to_zarr(self): # noqa D102 # test function but save_dataset_to_zarr depends on a dataset being loaded, # we have to reload the dataset here. This means that this test can theoretically # fail for two reasons: Either the data-loading failed, or the data-saving failed. - cloudmask_dataset = load_cloudmask_to_dataset( - Path(self.cloud_mask_filename), temp_directory=Path(os.getcwd()), area="UK" + rss_dataset, _ = load_native_to_dataset( + Path(self.rss_filename), temp_directory=Path(os.getcwd()), area="UK" ) zarr_path = os.path.join(os.getcwd(), "tmp.zarr") save_dataset_to_zarr( - cloudmask_dataset, + rss_dataset, zarr_path=zarr_path, compressor_name="bz2", zarr_mode="w", diff --git a/tests/test_download.py b/tests/test_download.py index e947ce92..2621d5ba 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -4,7 +4,11 @@ import pandas as pd -from satip.download import _determine_datetimes_to_download_files, download_eumetsat_data +from satip.download import ( + _determine_datetimes_to_download_files, + _get_missing_datetimes_from_list_of_files, + download_eumetsat_data, +) from satip.utils import format_dt_str @@ -49,3 +53,32 @@ def test_determine_datetime_to_download_files(self): self.assertEqual(datetimes[0][1], pd.to_datetime("2020-03-09 11:58:00")) self.assertEqual(datetimes[1][0], pd.to_datetime("2020-03-09 11:59:00")) self.assertEqual(datetimes[1][1], pd.to_datetime("2020-03-10 11:58:00")) + + def test_get_missing_datetimes_from_list_of_files(self): + """Tests padding of datetimes if files present are missing data for given days.""" + # Assume we only have two files present, as something went very wrong + # with the download. + filenames = [ + "MSG3-SEVI-MSG15-0100-NA-20190308114036.810000000Z-NA.nat", + "MSG3-SEVI-MSG15-0100-NA-20220308133711.810000000Z-NA.nat", + ] + + # We then expect the function to pad this by adding missing timeranges + # to fill up everything from the beginning to the first day to the timestamp + # of the first day (case A), then everything between both files (case B) + # and finally fill up the rest until the end of the second day (case C). + expected_timestamps = [ + (pd.to_datetime("2019-03-08 00:00:00"), pd.to_datetime("2019-03-08 11:40:00")), + (pd.to_datetime("2019-03-08 11:40:00"), pd.to_datetime("2022-03-08 13:37:00")), + (pd.to_datetime("2022-03-08 13:37:00"), pd.to_datetime("2022-03-08 23:58:00")), + ] + + # Generate the list of missing datetime interval boundaries: + res = _get_missing_datetimes_from_list_of_files(filenames) + + for i, interval in enumerate(expected_timestamps): + for b, boundary in enumerate(interval): + # Note that the function returns datetime-objects, but we defined the expected + # values as pd-datetime. Though their str-repr is different, they still pass + # the equality-check when compared for same dates. + self.assertEqual(boundary, res[i][b])