From 48df2c7f9dd5b0bffcc9d9ecfd9722e9aee05fd5 Mon Sep 17 00:00:00 2001 From: veronikasamborska1994 <32176660+veronikasamborska1994@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:02:31 +0000 Subject: [PATCH 1/4] climate: era5 surface temperature data December 2024 --- dag/climate.yml | 2 +- .../climate/2023-12-20/surface_temperature.py | 7 ++- .../climate/2024-11-19/total_precipitation.py | 2 +- .../climate/2024-12-05/surface_temperature.py | 56 +++++++++++++++++++ .../2024-12-05/surface_temperature.zip.dvc | 26 +++++++++ 5 files changed, 88 insertions(+), 5 deletions(-) create mode 100644 snapshots/climate/2024-12-05/surface_temperature.py create mode 100644 snapshots/climate/2024-12-05/surface_temperature.zip.dvc diff --git a/dag/climate.yml b/dag/climate.yml index 84dd5b43073..4436974e2b2 100644 --- a/dag/climate.yml +++ b/dag/climate.yml @@ -32,7 +32,7 @@ steps: # Copernicus Climate Change Service - Surface temperature. # data://meadow/climate/2023-12-20/surface_temperature: - - snapshot://climate/2024-11-05/surface_temperature.zip + - snapshot://climate/2024-12-05/surface_temperature.zip - snapshot://countries/2023-12-27/world_bank.zip data://garden/climate/2023-12-20/surface_temperature: - data://meadow/climate/2023-12-20/surface_temperature diff --git a/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py b/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py index 6e0fa1f3be6..92aa10be543 100644 --- a/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py +++ b/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py @@ -137,11 +137,12 @@ def run(dest_dir: str) -> None: f"It wasn't possible to extract temperature data for {len(small_countries)} small countries as they are too small for the resolution of the Copernicus data." ) # Define the start and end dates - da["date"] = pd.to_datetime(da["date"].astype(str), format="%Y%m%d") + da["valid_time"] = pd.to_datetime(da["valid_time"].astype(str), format="ISO8601") + print(da["valid_time"]) # Now you can access the 'dt' accessor - start_time = da["date"].min().dt.date.astype(str).item() - end_time = da["date"].max().dt.date.astype(str).item() + start_time = da["valid_time"].min().dt.date.astype(str).item() + end_time = da["valid_time"].max().dt.date.astype(str).item() # Generate a date range from start_time to end_time with monthly frequency month_middles = pd.date_range(start=start_time, end=end_time, freq="MS") + pd.offsets.Day(14) diff --git a/snapshots/climate/2024-11-19/total_precipitation.py b/snapshots/climate/2024-11-19/total_precipitation.py index 261639c7b02..05c119573ea 100644 --- a/snapshots/climate/2024-11-19/total_precipitation.py +++ b/snapshots/climate/2024-11-19/total_precipitation.py @@ -1,4 +1,4 @@ -"""Script to create a snapshot of the monthly averaged surface temperature data from 1950 to present from the Copernicus Climate Change Service. +"""Script to create a snapshot of the precipitation data from 1950 to present from the Copernicus Climate Change Service. The script assumes that the data is available on the CDS API. Instructions on how to access the API on a Mac are here: https://confluence.ecmwf.int/display/CKB/How+to+install+and+use+CDS+API+on+macOS diff --git a/snapshots/climate/2024-12-05/surface_temperature.py b/snapshots/climate/2024-12-05/surface_temperature.py new file mode 100644 index 00000000000..bd1f036f9d5 --- /dev/null +++ b/snapshots/climate/2024-12-05/surface_temperature.py @@ -0,0 +1,56 @@ +"""Script to create a snapshot of the monthly averaged surface temperature data from 1950 to present from the Copernicus Climate Change Service. + +The script assumes that the data is available on the CDS API. +Instructions on how to access the API on a Mac are here: https://confluence.ecmwf.int/display/CKB/How+to+install+and+use+CDS+API+on+macOS + +More information on how to access the data is here: hhttps://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview + +The data is downloaded as a NetCDF file. Tutorials for using the Copernicus API are here and work with the NETCDF format are here: https://ecmwf-projects.github.io/copernicus-training-c3s/cds-tutorial.html +""" + + +import tempfile +from pathlib import Path + +# CDS API +import cdsapi +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"climate/{SNAPSHOT_VERSION}/surface_temperature.zip") + snap + # Save data as a compressed temporary file. + with tempfile.TemporaryDirectory() as temp_dir: + output_file = Path(temp_dir) / "era5_monthly_t2m_eur.nc" + + client = cdsapi.Client() + + dataset = "reanalysis-era5-single-levels-monthly-means" + request = { + "product_type": ["monthly_averaged_reanalysis"], + "variable": ["2m_temperature"], + "year": [str(year) for year in range(1940, 2025)], + "month": ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"], + "time": "00:00", + "area": [90, -180, -90, 180], + "data_format": "netcdf", + "download_format": "zip", + } + + client.retrieve(dataset, request, output_file) + + # Upload snapshot. + snap.create_snapshot(filename=output_file, upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/climate/2024-12-05/surface_temperature.zip.dvc b/snapshots/climate/2024-12-05/surface_temperature.zip.dvc new file mode 100644 index 00000000000..4e136913e45 --- /dev/null +++ b/snapshots/climate/2024-12-05/surface_temperature.zip.dvc @@ -0,0 +1,26 @@ +meta: + origin: + title_snapshot: ERA5 Monthly Averaged Data on Single Levels from 1940 to Present - Monthly Averages of 2m Surface Temperature + title: ERA5 monthly averaged data on single levels from 1940 to present + description: |- + ERA5 is the latest climate reanalysis produced by ECMWF, providing hourly data on many atmospheric, land-surface and sea-state parameters together with estimates of uncertainty. + + ERA5 data are available in the Climate Data Store on regular latitude-longitude grids at 0.25° x 0.25° resolution, with atmospheric parameters on 37 pressure levels. + + ERA5 is available from 1940 and continues to be extended forward in time, with daily updates being made available 5 days behind real time + + Initial release data, i.e., data no more than three months behind real time, are called ERA5T. + producer: Contains modified Copernicus Climate Change Service information + version_producer: 2 + citation_full: |- + Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 monthly averaged data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.f17050d7 (Accessed on 19-Nov-2024) + url_main: https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview + date_accessed: 2024-12-05 + date_published: 2019-12-04 + license: + name: Copernicus License + url: https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview +outs: + - md5: 63aa98ba0272c654be00eaa4779c253d + size: 1396486999 + path: surface_temperature.zip From 2992a91eeb0a370acc6663dda6ed75d7359be7c5 Mon Sep 17 00:00:00 2001 From: veronikasamborska1994 <32176660+veronikasamborska1994@users.noreply.github.com> Date: Fri, 20 Dec 2024 12:03:10 +0100 Subject: [PATCH 2/4] change import to grib format --- .../climate/2023-12-20/surface_temperature.py | 22 ++++++++++--------- .../climate/2024-12-05/surface_temperature.py | 4 ++-- .../2024-12-05/surface_temperature.zip.dvc | 4 ++-- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py b/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py index 92aa10be543..114af4b56b6 100644 --- a/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py +++ b/etl/steps/data/meadow/climate/2023-12-20/surface_temperature.py @@ -1,6 +1,6 @@ """Load a snapshot and create a meadow dataset.""" -import io +import tempfile import zipfile import geopandas as gpd @@ -25,16 +25,19 @@ def _load_data_array(snap: Snapshot) -> xr.DataArray: log.info("load_data_array.start") - # Load data from snapshot. with zipfile.ZipFile(snap.path, "r") as zip_file: - # Iterate through all files in the zip archive for file_info in zip_file.infolist(): - with zip_file.open(file_info) as file: - file_content = file.read() - # Create an in-memory bytes file and load the dataset - with io.BytesIO(file_content) as memfile: - da = xr.open_dataset(memfile).load() # .load() ensures data is eagerly loaded + if file_info.filename.endswith((".grb", ".grib")): # Filter GRIB files + with zip_file.open(file_info) as file: + file_content = file.read() + # Write to a temporary file + with tempfile.NamedTemporaryFile(delete=True, suffix=".grib") as tmp_file: + tmp_file.write(file_content) + tmp_file.flush() # Ensure all data is written + + # Load the GRIB file using xarray and cfgrib + da = xr.open_dataset(tmp_file.name, engine="cfgrib").load() # Convert temperature from Kelvin to Celsius. da = da["t2m"] - 273.15 @@ -137,8 +140,7 @@ def run(dest_dir: str) -> None: f"It wasn't possible to extract temperature data for {len(small_countries)} small countries as they are too small for the resolution of the Copernicus data." ) # Define the start and end dates - da["valid_time"] = pd.to_datetime(da["valid_time"].astype(str), format="ISO8601") - print(da["valid_time"]) + da["valid_time"] = xr.DataArray(pd.to_datetime(da["valid_time"].values), dims=da["valid_time"].dims) # Now you can access the 'dt' accessor start_time = da["valid_time"].min().dt.date.astype(str).item() diff --git a/snapshots/climate/2024-12-05/surface_temperature.py b/snapshots/climate/2024-12-05/surface_temperature.py index bd1f036f9d5..2b439d6bd47 100644 --- a/snapshots/climate/2024-12-05/surface_temperature.py +++ b/snapshots/climate/2024-12-05/surface_temperature.py @@ -27,7 +27,7 @@ def main(upload: bool) -> None: # Create a new snapshot. snap = Snapshot(f"climate/{SNAPSHOT_VERSION}/surface_temperature.zip") - snap + # Save data as a compressed temporary file. with tempfile.TemporaryDirectory() as temp_dir: output_file = Path(temp_dir) / "era5_monthly_t2m_eur.nc" @@ -42,7 +42,7 @@ def main(upload: bool) -> None: "month": ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"], "time": "00:00", "area": [90, -180, -90, 180], - "data_format": "netcdf", + "data_format": "grib", "download_format": "zip", } diff --git a/snapshots/climate/2024-12-05/surface_temperature.zip.dvc b/snapshots/climate/2024-12-05/surface_temperature.zip.dvc index 4e136913e45..2f22db8e0ba 100644 --- a/snapshots/climate/2024-12-05/surface_temperature.zip.dvc +++ b/snapshots/climate/2024-12-05/surface_temperature.zip.dvc @@ -21,6 +21,6 @@ meta: name: Copernicus License url: https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview outs: - - md5: 63aa98ba0272c654be00eaa4779c253d - size: 1396486999 + - md5: 8fffb8e0ed6edc22b681587769a54b4e + size: 1709315816 path: surface_temperature.zip From e2839c527f1a2ecf8af1ef79bb10c7d469e1f185 Mon Sep 17 00:00:00 2001 From: veronikasamborska1994 <32176660+veronikasamborska1994@users.noreply.github.com> Date: Fri, 20 Dec 2024 12:07:51 +0100 Subject: [PATCH 3/4] Update surface_temperature.py --- snapshots/climate/2024-12-05/surface_temperature.py | 1 - 1 file changed, 1 deletion(-) diff --git a/snapshots/climate/2024-12-05/surface_temperature.py b/snapshots/climate/2024-12-05/surface_temperature.py index 2b439d6bd47..dcb09400160 100644 --- a/snapshots/climate/2024-12-05/surface_temperature.py +++ b/snapshots/climate/2024-12-05/surface_temperature.py @@ -8,7 +8,6 @@ The data is downloaded as a NetCDF file. Tutorials for using the Copernicus API are here and work with the NETCDF format are here: https://ecmwf-projects.github.io/copernicus-training-c3s/cds-tutorial.html """ - import tempfile from pathlib import Path From 2c2cd4dd2ac51e924b69f720dc7dc3f8b200489b Mon Sep 17 00:00:00 2001 From: veronikasamborska1994 <32176660+veronikasamborska1994@users.noreply.github.com> Date: Fri, 20 Dec 2024 15:03:50 +0100 Subject: [PATCH 4/4] add cfgrib --- pyproject.toml | 1 + uv.lock | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 16272913362..3370d48e57b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,6 +68,7 @@ dependencies = [ "geopy>=2.4.1", "py7zr>=0.22.0", "pyreadr>=0.5.2", + "cfgrib>=0.9.15.0", ] [tool.uv.sources] diff --git a/uv.lock b/uv.lock index da42e2623a0..16fd545ea6e 100644 --- a/uv.lock +++ b/uv.lock @@ -578,6 +578,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009 }, ] +[[package]] +name = "cfgrib" +version = "0.9.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "click" }, + { name = "eccodes" }, + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3e/3b/0ccbbc67866a4a2df570d6bf0f53d6d22220c44e1f3684455b5eae298936/cfgrib-0.9.15.0.tar.gz", hash = "sha256:d455034e19b9560a75d008ba9d09b2d4e65762adfb2e911f28b841f4b9c6b47f", size = 6511752 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7d/d7/96b4209c99f1fd6c19f502cebe8c91983c23331c380f3f521250f268ae8c/cfgrib-0.9.15.0-py3-none-any.whl", hash = "sha256:469cfd25dc173863795e596263b3b6b5ea1402b1715f2b7b1d4b995b40b32c18", size = 48908 }, +] + [[package]] name = "chardet" version = "5.2.0" @@ -974,6 +989,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/c0/c7048a1c3039f506b67eb538ff1ce91ac43de52527fb51d46bffb82307b4/earthengine_api-1.4.3-py3-none-any.whl", hash = "sha256:3a6a0de67ec3da63dabbb420eb4d2fe63ca8697d5cc9a0a3d59fc3968498542a", size = 457345 }, ] +[[package]] +name = "eccodes" +version = "2.39.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "cffi" }, + { name = "findlibs" }, + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bd/fa/4a1216ef706152bb99694b84cd09cc5883314c410409dbc2ec92273d0883/eccodes-2.39.1.tar.gz", hash = "sha256:c9e4695e1cbfa3a85c548cd6a3073e2277d71413529a89b0ea62b0f299416e20", size = 2267042 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/85/b39e4e37b1444233212c707a16aa317152b2f5dc1100ce9814aa9a1f0b27/eccodes-2.39.1-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:64e71f850a7dbead9ed54549b17e112682a9affb39d98043634fe42946cb973f", size = 6489138 }, + { url = "https://files.pythonhosted.org/packages/10/89/aaab32be9690c26e98bddf9eaaf5bb25c9cba8d8921f7dfc6261b8ef3916/eccodes-2.39.1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:a6fc372fdbb980b0097a7bb236eac10fc8c4f6f6924243632a726f7d6f998742", size = 6584396 }, + { url = "https://files.pythonhosted.org/packages/26/04/139afa90f7c2399b2c2e5d1c0ca05d3c098c31171660cad2e0dee8b68722/eccodes-2.39.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:cb1d72405490ea54a5ccb0e6165f06e0c3615ea2c65590c65799829b077f4900", size = 7331755 }, + { url = "https://files.pythonhosted.org/packages/d5/30/c8ea5426601c9d38e815e76de4c93bedf7e398ad3f3fb2b3c1d6029b8f53/eccodes-2.39.1-cp310-cp310-win_amd64.whl", hash = "sha256:d31cdff7ba720b515d7916935c3b9eae5fb502427ee43042cd50cc8e315e4088", size = 6157552 }, + { url = "https://files.pythonhosted.org/packages/4b/53/4eb2b769947178c7ee1382bbc1c69dae73f22061d8cac38801f2730f60c5/eccodes-2.39.1-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:774b8d558e0ccdfb2cc26e77b65436d76cb2872d911b5c46ba4c81b4a7687406", size = 6489143 }, + { url = "https://files.pythonhosted.org/packages/ec/46/be1c165fb16bacdef5bc8cf453ef52ba0cc4a2005b91b58ef82e47197654/eccodes-2.39.1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:6dc9cff3670e06854230f8e5b9a527505616a47dde41dcd302cb1b762391fd38", size = 6584397 }, + { url = "https://files.pythonhosted.org/packages/2f/ca/bc3e79e9447b894bda141075f3b9e041501da07d0d687b6abbed50828091/eccodes-2.39.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:491fd109210695ac72424b9d01d6d6c1a5aca58b2ad718ac573df6b23095da63", size = 7331771 }, + { url = "https://files.pythonhosted.org/packages/76/90/ed94954b344b407ad009c2c67b82e420bdf9c174e1f1e46863dd11e96734/eccodes-2.39.1-cp311-cp311-win_amd64.whl", hash = "sha256:6639f386fc3e23e1590ebc80c3926195c3bbde12b85e75146537b6eee1e05073", size = 6157557 }, + { url = "https://files.pythonhosted.org/packages/57/7f/dc0e17fe646978cdeda7b2f7fb7dd73b5512d57feb6421ff76c3f374968c/eccodes-2.39.1-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:754406f57d0e3ac36db8b7d7873ba905d9f8d0fb0f52372b5b0fef39b3ecc72c", size = 6489141 }, + { url = "https://files.pythonhosted.org/packages/cd/3c/16ab784ddb56457a20ee46572574c7a920df0ec1b7ddbd60fe911cf532c0/eccodes-2.39.1-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:0f933cc52ce0a5ab84d094b218fa1914cbe23426b834c1dfd198bb45a034577b", size = 6584395 }, + { url = "https://files.pythonhosted.org/packages/4b/d5/8978a6c1c1c3db2c27ad06b314a32bca9b8221baac2d2fa6260868512bae/eccodes-2.39.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2f62ce4c33184c463b02826add35b693680e13aa9023b89bb9ccf7964f14f317", size = 7331954 }, + { url = "https://files.pythonhosted.org/packages/8b/86/1f9d8e65cd50110291bb66bc181a018c69d969e59e13c5c42f30a8ed6552/eccodes-2.39.1-cp312-cp312-win_amd64.whl", hash = "sha256:f50595f76c8a7ba3514cdf49f5974f0a739c55d390a950ed549261693c084d27", size = 6157556 }, + { url = "https://files.pythonhosted.org/packages/5a/cf/835903713f8d0f0298afc871fdb8a2965ed7365f89e194a49c3a9218bd77/eccodes-2.39.1-py3-none-any.whl", hash = "sha256:8faa901c5e681e51f490ae037437501780272a23855d93cc4acd70c74b05ffde", size = 43226 }, +] + [[package]] name = "entrypoints" version = "0.4" @@ -999,6 +1041,7 @@ source = { editable = "." } dependencies = [ { name = "bugsnag" }, { name = "cdsapi" }, + { name = "cfgrib" }, { name = "click" }, { name = "deprecated" }, { name = "earthengine-api" }, @@ -1119,6 +1162,7 @@ dev = [ requires-dist = [ { name = "bugsnag", specifier = ">=4.2.1" }, { name = "cdsapi", specifier = ">=0.7.0" }, + { name = "cfgrib", specifier = ">=0.9.15.0" }, { name = "click", specifier = ">=8.0.1" }, { name = "deprecated", specifier = ">=1.2.14" }, { name = "earthengine-api", specifier = ">=0.1.411" }, @@ -1314,6 +1358,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163 }, ] +[[package]] +name = "findlibs" +version = "0.0.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/be/6c72ef9d990cd25fe3dd97ebe9d77a859f7d27b7273e62ad750846d207ee/findlibs-0.0.5.tar.gz", hash = "sha256:7a801571e999d0ee83f9b92cbb598c21f861ee26ca9dba74cea8958ba4335e7e", size = 6581 } + [[package]] name = "fonttools" version = "4.55.3"