diff --git a/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py b/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py index 92aa10be5435..114af4b56b6e 100644 --- a/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py +++ b/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py @@ -1,6 +1,6 @@ """Load a snapshot and create a meadow dataset.""" -import io +import tempfile import zipfile import geopandas as gpd @@ -25,16 +25,19 @@ def _load_data_array(snap: Snapshot) -> xr.DataArray: log.info("load_data_array.start") - # Load data from snapshot. with zipfile.ZipFile(snap.path, "r") as zip_file: - # Iterate through all files in the zip archive for file_info in zip_file.infolist(): - with zip_file.open(file_info) as file: - file_content = file.read() - # Create an in-memory bytes file and load the dataset - with io.BytesIO(file_content) as memfile: - da = xr.open_dataset(memfile).load() # .load() ensures data is eagerly loaded + if file_info.filename.endswith((".grb", ".grib")): # Filter GRIB files + with zip_file.open(file_info) as file: + file_content = file.read() + # Write to a temporary file + with tempfile.NamedTemporaryFile(delete=True, suffix=".grib") as tmp_file: + tmp_file.write(file_content) + tmp_file.flush() # Ensure all data is written + + # Load the GRIB file using xarray and cfgrib + da = xr.open_dataset(tmp_file.name, engine="cfgrib").load() # Convert temperature from Kelvin to Celsius. da = da["t2m"] - 273.15 @@ -137,8 +140,7 @@ def run(dest_dir: str) -> None: f"It wasn't possible to extract temperature data for {len(small_countries)} small countries as they are too small for the resolution of the Copernicus data." ) # Define the start and end dates - da["valid_time"] = pd.to_datetime(da["valid_time"].astype(str), format="ISO8601") - print(da["valid_time"]) + da["valid_time"] = xr.DataArray(pd.to_datetime(da["valid_time"].values), dims=da["valid_time"].dims) # Now you can access the 'dt' accessor start_time = da["valid_time"].min().dt.date.astype(str).item() diff --git a/snapshots/climate/2024-12-05/surface_temperature.py b/snapshots/climate/2024-12-05/surface_temperature.py index bd1f036f9d5e..2b439d6bd479 100644 --- a/snapshots/climate/2024-12-05/surface_temperature.py +++ b/snapshots/climate/2024-12-05/surface_temperature.py @@ -27,7 +27,7 @@ def main(upload: bool) -> None: # Create a new snapshot. snap = Snapshot(f"climate/{SNAPSHOT_VERSION}/surface_temperature.zip") - snap + # Save data as a compressed temporary file. with tempfile.TemporaryDirectory() as temp_dir: output_file = Path(temp_dir) / "era5_monthly_t2m_eur.nc" @@ -42,7 +42,7 @@ def main(upload: bool) -> None: "month": ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"], "time": "00:00", "area": [90, -180, -90, 180], - "data_format": "netcdf", + "data_format": "grib", "download_format": "zip", } diff --git a/snapshots/climate/2024-12-05/surface_temperature.zip.dvc b/snapshots/climate/2024-12-05/surface_temperature.zip.dvc index 4e136913e454..2f22db8e0bae 100644 --- a/snapshots/climate/2024-12-05/surface_temperature.zip.dvc +++ b/snapshots/climate/2024-12-05/surface_temperature.zip.dvc @@ -21,6 +21,6 @@ meta: name: Copernicus License url: https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview outs: - - md5: 63aa98ba0272c654be00eaa4779c253d - size: 1396486999 + - md5: 8fffb8e0ed6edc22b681587769a54b4e + size: 1709315816 path: surface_temperature.zip