Skip to content

Commit

Permalink
change import to grib format
Browse files Browse the repository at this point in the history
  • Loading branch information
veronikasamborska1994 committed Dec 20, 2024
1 parent e870b4a commit 4fb15de
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 14 deletions.
22 changes: 12 additions & 10 deletions etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Load a snapshot and create a meadow dataset."""

import io
import tempfile
import zipfile

import geopandas as gpd
Expand All @@ -25,16 +25,19 @@

def _load_data_array(snap: Snapshot) -> xr.DataArray:
log.info("load_data_array.start")
# Load data from snapshot.
with zipfile.ZipFile(snap.path, "r") as zip_file:
# Iterate through all files in the zip archive
for file_info in zip_file.infolist():
with zip_file.open(file_info) as file:
file_content = file.read()
# Create an in-memory bytes file and load the dataset
with io.BytesIO(file_content) as memfile:
da = xr.open_dataset(memfile).load() # .load() ensures data is eagerly loaded
if file_info.filename.endswith((".grb", ".grib")): # Filter GRIB files
with zip_file.open(file_info) as file:
file_content = file.read()

# Write to a temporary file
with tempfile.NamedTemporaryFile(delete=True, suffix=".grib") as tmp_file:
tmp_file.write(file_content)
tmp_file.flush() # Ensure all data is written

# Load the GRIB file using xarray and cfgrib
da = xr.open_dataset(tmp_file.name, engine="cfgrib").load()
# Convert temperature from Kelvin to Celsius.
da = da["t2m"] - 273.15

Expand Down Expand Up @@ -137,8 +140,7 @@ def run(dest_dir: str) -> None:
f"It wasn't possible to extract temperature data for {len(small_countries)} small countries as they are too small for the resolution of the Copernicus data."
)
# Define the start and end dates
da["valid_time"] = pd.to_datetime(da["valid_time"].astype(str), format="ISO8601")
print(da["valid_time"])
da["valid_time"] = xr.DataArray(pd.to_datetime(da["valid_time"].values), dims=da["valid_time"].dims)

# Now you can access the 'dt' accessor
start_time = da["valid_time"].min().dt.date.astype(str).item()
Expand Down
4 changes: 2 additions & 2 deletions snapshots/climate/2024-12-05/surface_temperature.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
def main(upload: bool) -> None:
# Create a new snapshot.
snap = Snapshot(f"climate/{SNAPSHOT_VERSION}/surface_temperature.zip")
snap

# Save data as a compressed temporary file.
with tempfile.TemporaryDirectory() as temp_dir:
output_file = Path(temp_dir) / "era5_monthly_t2m_eur.nc"
Expand All @@ -42,7 +42,7 @@ def main(upload: bool) -> None:
"month": ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"],
"time": "00:00",
"area": [90, -180, -90, 180],
"data_format": "netcdf",
"data_format": "grib",
"download_format": "zip",
}

Expand Down
4 changes: 2 additions & 2 deletions snapshots/climate/2024-12-05/surface_temperature.zip.dvc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ meta:
name: Copernicus License
url: https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview
outs:
- md5: 63aa98ba0272c654be00eaa4779c253d
size: 1396486999
- md5: 8fffb8e0ed6edc22b681587769a54b4e
size: 1709315816
path: surface_temperature.zip

0 comments on commit 4fb15de

Please sign in to comment.