Skip to content

Commit

Permalink
Update notebooks to work with changes to dask chunking, adapt to new …
Browse files Browse the repository at this point in the history
…Raven code conventions (#395)

Fixes 1/2 of
Ouranosinc/pavics-jupyter-env-issues#12

For new Jupyter env in
Ouranosinc/PAVICS-e2e-workflow-tests#137.

FYI @tlvu 

### Changes

- Updates the date handling of datasets so that chunking with `dask` is
properly performed
- Minor fixes to comments (formatting, typos).
  • Loading branch information
tlvu authored Nov 14, 2024
2 parents 88bfc6f + c06bf1f commit fb74e38
Show file tree
Hide file tree
Showing 10 changed files with 211 additions and 200 deletions.
21 changes: 12 additions & 9 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,13 @@ jobs:
fail-fast: false
matrix:
os: [ 'ubuntu-latest' ] # 'macos-latest' disabled until a new build of raven-hydro is available
python-version:
- "3.9"
- "3.10"
- "3.11"
- "3.12"
python-version: [ "3.9", "3.11", "3.12" ]
tox-env: [ 'false' ]
# - "3.13" # not yet supported by dependencies
upstream-branch: [ 'main' ]
include:
- os: 'ubuntu-latest'
python-version: '3.10'
tox-env: 'py3.10-coveralls-upstream'
steps:
- name: Harden Runner
uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1
Expand Down Expand Up @@ -113,7 +113,11 @@ jobs:
python3 -m pip install --require-hashes -r CI/requirements_ci.txt
- name: Test with tox and report coverage
run: |
python3 -m tox
if [ "${{ matrix.tox-env }}" != "false" ]; then
python3 -m tox -e ${{ matrix.tox-env }}
else
python3 -m tox -e py${{ matrix.python-version }}-coveralls
fi
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COVERALLS_FLAG_NAME: run-Python${{ matrix.python-version }}-${{ matrix.os }}
Expand All @@ -128,8 +132,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
os: [ "ubuntu-latest" ]
# - macos-latest # disabled until a new build of raven-hydro is available
# - windows-latest # disabled until xesmf is available
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
Expand Down
98 changes: 52 additions & 46 deletions docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@
},
"outputs": [],
"source": [
"# We get the basin contour for testing on a server. You can replace the getfile method by a string containing the path\n",
"# to your own geojson\n",
"# We get the basin contour for testing on a server.\n",
"# You can replace the getfile method by a string containing the path to your own geojson.\n",
"\n",
"# Get basin contour\n",
"# Get basin contour.\n",
"basin_contour = get_file(\"notebook_inputs/input.geojson\")\n",
"\n",
"reference_start_day = dt.datetime(1980, 12, 31)\n",
Expand Down Expand Up @@ -158,15 +158,16 @@
},
"outputs": [],
"source": [
"# Prepare the filesystem that allows reading data. Data is read on the Google Cloud Services, which host a copy of the CMIP6 (and other) data.\n",
"# Prepare the filesystem that allows reading data.\n",
"# Data is read on the Google Cloud Services, which host a copy of the CMIP6 (and other) data.\n",
"fsCMIP = gcsfs.GCSFileSystem(token=\"anon\", access=\"read_only\")\n",
"\n",
"# Get the catalog info from the pangeo dataset, which basically is a list of links to the various products.\n",
"# Get the catalog info from the PANGEO dataset, which basically is a list of links to the various products.\n",
"col = intake.open_esm_datastore(\n",
" \"https://storage.googleapis.com/cmip6/pangeo-cmip6.json\"\n",
")\n",
"\n",
"# Print the contents of the catalog, so we can see the classification system\n",
"# Print the contents of the catalog, so we can see the classification system.\n",
"display(col)"
]
},
Expand All @@ -185,7 +186,8 @@
},
"outputs": [],
"source": [
"# Get the list of models. Replace \"source_id\" with any of the catalog categories (table_id, activity_id, variable_id, etc.)\n",
"# Get the list of models.\n",
"# Replace \"source_id\" with any of the catalog categories (table_id, activity_id, variable_id, etc.)\n",
"list(col.df.source_id.unique())"
]
},
Expand Down Expand Up @@ -225,11 +227,10 @@
" member_id=\"r1i1p1f1\",\n",
" source_id=climate_model,\n",
")\n",
"col_subset = col.search(\n",
" require_all_on=[\"source_id\"], **query\n",
") # Command that will return the filtered list\n",
"# Return the filtered list.\n",
"col_subset = col.search(require_all_on=[\"source_id\"], **query)\n",
"\n",
"# Show the filtered list:\n",
"# Show the filtered list.\n",
"display(col_subset.df)"
]
},
Expand All @@ -249,7 +250,7 @@
},
"outputs": [],
"source": [
"# Get the object locator object\n",
"# Get the object locator object.\n",
"mapper = fsCMIP.get_mapper(col_subset.df.zstore[0])"
]
},
Expand Down Expand Up @@ -277,17 +278,20 @@
},
"outputs": [],
"source": [
"# Get the CMIP6 data from Google Cloud and read it in memory using xarray. This is done via \"lazy loading\" and is not actually reading the data in memory\n",
"# yet, but is keeping track of what it will need to get, eventually.\n",
"# Get the CMIP6 data from Google Cloud and read it in memory using xarray.\n",
"# This is done via \"lazy loading\" and is not actually reading the data in memory yet, but is keeping track of what it will need to get, eventually.\n",
"ds = xr.open_zarr(mapper, consolidated=True)\n",
"\n",
"# Convert to numpy.datetime64 object to be compatbile\n",
"if type(ds.time[0].values) is not type(np.datetime64(\"1980-01-01\")):\n",
" ds = ds.convert_calendar(\"standard\")\n",
"# Convert to numpy.datetime64 object for compatibility.\n",
"ds = ds.convert_calendar(\"standard\")\n",
"\n",
"# Extract only the dates that we really want. Again, this is done via lazy loading, and is not actually using memory at this point.\n",
"# Extract only the dates that we really want.\n",
"# Again, this is done via lazy loading, and is not actually using memory at this point.\n",
"ds = ds.sel(time=slice(reference_start_day, reference_end_day))\n",
"\n",
"# Set the date to the midnight of the given day.\n",
"ds = ds.assign_coords(time=ds.time.dt.floor(\"D\"))\n",
"\n",
"# Use the clisops subsetting tools to extract the data for the watershed boundaries and take the spatial average\n",
"ds = average.average_shape(ds, basin_contour)\n",
"\n",
Expand Down Expand Up @@ -322,14 +326,16 @@
" with xr.set_options(keep_attrs=True):\n",
" ds = xr.open_zarr(mapper, consolidated=True)\n",
"\n",
" # Convert to numpy.datetime64 object to be compatbile\n",
" if type(ds.time[0].values) is not type(np.datetime64(\"1980-01-01\")):\n",
" ds = ds.convert_calendar(\"standard\")\n",
" # Convert to numpy.datetime64 object for compatibility.\n",
" ds = ds.convert_calendar(\"standard\")\n",
"\n",
" # Set the date to the midnight of the given day.\n",
" ds = ds.assign_coords(time=ds.time.dt.floor(\"D\"))\n",
"\n",
" # Compute average over region\n",
" # Compute the average over region.\n",
" out = average.average_shape(ds.sel(time=slice(start, end)), geometry)\n",
"\n",
" # Convert geometry variables into attributes\n",
" # Convert geometry variables into attributes.\n",
" attrs = {\n",
" key: out[key].values.item()\n",
" for key in out.coords\n",
Expand Down Expand Up @@ -431,9 +437,15 @@
" ERA5_reference = subset.subset_shape(\n",
" ds.sel(time=slice(reference_start_day, reference_end_day)), basin_contour\n",
" ).mean({\"latitude\", \"longitude\"})\n",
" ERA5_tmin = ERA5_reference[\"t2m\"].resample(time=\"1D\").min().chunk(-1, -1, -1)\n",
" ERA5_tmax = ERA5_reference[\"t2m\"].resample(time=\"1D\").max().chunk(-1, -1, -1)\n",
" ERA5_pr = ERA5_reference[\"tp\"].resample(time=\"1D\").sum().chunk(-1, -1, -1)"
" ERA5_tmin = (\n",
" ERA5_reference.t2m.resample(time=\"1D\")\n",
" .min()\n",
" .chunk(\n",
" time=-1,\n",
" )\n",
" )\n",
" ERA5_tmax = ERA5_reference.t2m.resample(time=\"1D\").max().chunk(time=-1)\n",
" ERA5_pr = ERA5_reference.tp.resample(time=\"1D\").sum().chunk(time=-1)"
]
},
{
Expand All @@ -455,8 +467,8 @@
},
"outputs": [],
"source": [
"# Here we need to make sure that our units are all in the correct format. You can play around with the tools we've seen thus far to explore the units\n",
"# and make sure everything is consistent.\n",
"# Here we need to make sure that our units are all in the correct format.\n",
"# You can play around with the tools we've seen thus far to explore the units and make sure everything is consistent.\n",
"\n",
"# Let's start with precipitation:\n",
"ERA5_pr = xclim.core.units.convert_units_to(ERA5_pr, \"mm\", context=\"hydro\")\n",
Expand Down Expand Up @@ -497,25 +509,25 @@
},
"outputs": [],
"source": [
"# Use xclim utilities (sbda) to give information on the type of window used for the bias correction.\n",
"# Use xclim utilities (SDBA) to give information on the type of window used for the bias correction.\n",
"group_month_window = sdba.utils.Grouper(\"time.dayofyear\", window=15)\n",
"\n",
"# This is an adjusting function. It builds the tool that will perform the corrections.\n",
"Adjustment = sdba.DetrendedQuantileMapping.train(\n",
" ref=ERA5_pr, hist=historical_pr, nquantiles=50, kind=\"+\", group=group_month_window\n",
")\n",
"\n",
"# Apply the correction factors on the reference period\n",
"# Apply the correction factors on the reference period.\n",
"corrected_ref_precip = Adjustment.adjust(historical_pr, interp=\"linear\")\n",
"\n",
"# Apply the correction factors on the future period\n",
"# Apply the correction factors on the future period.\n",
"corrected_fut_precip = Adjustment.adjust(future_pr, interp=\"linear\")\n",
"\n",
"# Ensure that the precipitation is non-negative, which can happen with some climate models\n",
"# Ensure that the precipitation is non-negative, which can happen with some climate models.\n",
"corrected_ref_precip = corrected_ref_precip.where(corrected_ref_precip > 0, 0)\n",
"corrected_fut_precip = corrected_fut_precip.where(corrected_fut_precip > 0, 0)\n",
"\n",
"# Train the model to find the correction factors for the maximum temperature (tasmax) data\n",
"# Train the model to find the correction factors for the maximum temperature (tasmax) data.\n",
"Adjustment = sdba.DetrendedQuantileMapping.train(\n",
" ref=ERA5_tmax,\n",
" hist=historical_tasmax,\n",
Expand All @@ -524,13 +536,13 @@
" group=group_month_window,\n",
")\n",
"\n",
"# Apply the correction factors on the reference period\n",
"# Apply the correction factors on the reference period.\n",
"corrected_ref_tasmax = Adjustment.adjust(historical_tasmax, interp=\"linear\")\n",
"\n",
"# Apply the correction factors on the future period\n",
"# Apply the correction factors on the future period.\n",
"corrected_fut_tasmax = Adjustment.adjust(future_tasmax, interp=\"linear\")\n",
"\n",
"# Train the model to find the correction factors for the minimum temperature (tasmin) data\n",
"# Train the model to find the correction factors for the minimum temperature (tasmin) data.\n",
"Adjustment = sdba.DetrendedQuantileMapping.train(\n",
" ref=ERA5_tmin,\n",
" hist=historical_tasmin,\n",
Expand Down Expand Up @@ -561,7 +573,8 @@
},
"outputs": [],
"source": [
"# Convert the reference corrected data into netCDF file. We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n",
"# Convert the reference corrected data into netCDF file.\n",
"# We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n",
"ref_dataset = xr.merge(\n",
" [\n",
" corrected_ref_precip.to_dataset(name=\"pr\"),\n",
Expand All @@ -570,11 +583,11 @@
" ]\n",
")\n",
"\n",
"# Write to temporary folder\n",
"# Write to temporary folder.\n",
"fn_ref = tmp / \"reference_dataset.nc\"\n",
"ref_dataset.to_netcdf(fn_ref)\n",
"\n",
"# Convert the future corrected data into netCDF file\n",
"# Convert the future corrected data into netCDF file.\n",
"fut_dataset = xr.merge(\n",
" [\n",
" corrected_fut_precip.to_dataset(name=\"pr\"),\n",
Expand Down Expand Up @@ -610,13 +623,6 @@
"# Compare it to the future precipitation without bias-correction.\n",
"future_pr.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
11 changes: 2 additions & 9 deletions docs/notebooks/10_Data_assimilation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,7 @@
" adj=\"MULTIPLICATIVE\",\n",
" ),\n",
" rc.ForcingPerturbation(\n",
" forcing=\"TEMP_MAX\",\n",
" dist=\"DIST_NORMAL\",\n",
" p1=0.0,\n",
" p2=2.0,\n",
" adj=\"ADDITIVE\",\n",
" ),\n",
" rc.ForcingPerturbation(\n",
" forcing=\"TEMP_MIN\",\n",
" forcing=\"TEMP_AVE\",\n",
" dist=\"DIST_NORMAL\",\n",
" p1=0.0,\n",
" p2=2.0,\n",
Expand Down Expand Up @@ -456,7 +449,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.12.5"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit fb74e38

Please sign in to comment.