diff --git a/examples/key_metrics/config.yml b/examples/key_metrics/config.yml index ceed3e6..0bd44e2 100644 --- a/examples/key_metrics/config.yml +++ b/examples/key_metrics/config.yml @@ -54,9 +54,9 @@ timeseries: case_name: 'b.e23_alpha17f.BLT1850.ne30_t232.092' atm: - vars: ['ACTNI', 'ACTNL', 'ACTREI', 'ACTREL', 'AODDUST'] - derive_vars: [] # {'PRECT':['PRECL', 'PRECC'], 'RESTOM':['FLNT', 'FSNT']} - hist_str: 'h0' + vars: ['PSL'] + derive_vars: [] + hist_str: 'h0a' start_years: [1] end_years: [100] level: 'lev' @@ -99,7 +99,7 @@ compute_notebooks: # parameters are specified. Several examples of different # types of notebooks are provided. - # The first key (here simple_no_params_nb) is the name of the + # The first key (here infrastructure) is the name of the # notebook from nb_path_root, minus the .ipynb infrastructure: @@ -107,13 +107,14 @@ compute_notebooks: parameter_groups: none: {} -# atm: -# adf_quick_run: -# parameter_groups: -# none: -# adf_path: ../../../externals/ADF -# config_path: . -# config_fil_str: "config_f.cam6_3_119.FLTHIST_ne30.r328_gamma0.33_soae.001.yaml" + atm: + nmse_PSL: + parameter_groups: + none: + regridded_output: True + validation_path: '/glade/campaign/cesm/development/cross-wg/diagnostic_framework/nmse_validation/fv0.9x1.25' + start_date: '0001-01-01' + end_date: '0101-01-01' glc: LIWG_SMB_diagnostic: @@ -181,13 +182,12 @@ book_toc: # Parts group notebooks into different sections in the Jupyter book # table of contents, so you can organize different parts of your project. + # Each chapter is the name of one of the notebooks that you executed + # in compute_notebooks above, also without .ipynb - # - caption: Atmosphere - - # # Each chapter is the name of one of the notebooks that you executed - # # in compute_notebooks above, also without .ipynb - # chapters: - # - file: atm/adf_quick_run + - caption: Atmosphere + chapters: + - file: atm/nmse_PSL # - caption: Ocean # chapters: diff --git a/examples/nblibrary/atm/averaging_utils.py b/examples/nblibrary/atm/averaging_utils.py new file mode 100644 index 0000000..b98625b --- /dev/null +++ b/examples/nblibrary/atm/averaging_utils.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import xarray as xr + +dpseas = {"DJF": 90, "MAM": 92, "JJA": 92, "SON": 91} + + +def seasonal_climatology_weighted(dat): + """Calculate seasonal and annual average climatologies""" + days_in_month = dat.time.dt.days_in_month + mons = dat.time.dt.month + wgts = mons.copy(deep=True) + wgts = xr.where( + (mons == 1) | (mons == 2) | (mons == 12), + days_in_month / dpseas["DJF"], + wgts, + ) + wgts = xr.where( + (mons == 3) | (mons == 4) | (mons == 5), + days_in_month / dpseas["MAM"], + wgts, + ) + wgts = xr.where( + (mons == 6) | (mons == 7) | (mons == 8), + days_in_month / dpseas["JJA"], + wgts, + ) + wgts = xr.where( + (mons == 9) | (mons == 10) | (mons == 11), + days_in_month / dpseas["SON"], + wgts, + ) + datw = dat * wgts + + wgts_am = days_in_month / 365.0 + datw_am = dat * wgts_am + + ds_season = ( + datw.rolling(min_periods=3, center=True, time=3).sum().dropna("time", how="all") + ) + dat_djf = ds_season.where(ds_season.time.dt.month == 1, drop=True).mean("time") + dat_mam = ds_season.where(ds_season.time.dt.month == 4, drop=True).mean("time") + dat_jja = ds_season.where(ds_season.time.dt.month == 7, drop=True).mean("time") + dat_son = ds_season.where(ds_season.time.dt.month == 10, drop=True).mean("time") + dat_am = datw_am.groupby("time.year").sum("time") + dat_am = dat_am.mean("year") + + dat_djf = dat_djf.rename("DJF") + dat_mam = dat_mam.rename("MAM") + dat_jja = dat_jja.rename("JJA") + dat_son = dat_son.rename("SON") + dat_am = dat_am.rename("AM") + + return xr.merge([dat_djf, dat_mam, dat_jja, dat_son, dat_am]) diff --git a/examples/nblibrary/atm/nmse_PSL.ipynb b/examples/nblibrary/atm/nmse_PSL.ipynb new file mode 100644 index 0000000..93d14c2 --- /dev/null +++ b/examples/nblibrary/atm/nmse_PSL.ipynb @@ -0,0 +1,443 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3f230d52-dca7-4ce4-98cc-6267fc04893d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Normalized Mean Square Error\n", + "\n", + "This notebook computes the normalized mean square error of atmospheric surface pressure.\n", + "It is compared to ERA5 observations, as well as the CESM2 large ensemble and CMIP6 model output." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2292c691-9bd9-44d2-8a3f-cb90dbe2e383", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import glob\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import xarray as xr\n", + "\n", + "from nmse_utils import nmse\n", + "from averaging_utils import seasonal_climatology_weighted" + ] + }, + { + "cell_type": "markdown", + "id": "9d67416c-a2d4-403b-85f4-647aa0a816eb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Parameters\n", + "\n", + "These variables are set in `config.yml`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7486e94-e493-4369-9767-90eb15c0ac3a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "CESM_output_dir = \"\"\n", + "case_name = \"\"\n", + "start_date = \"\"\n", + "end_date = \"\"\n", + "validation_path = \"\"\n", + "regridded_output = False" + ] + }, + { + "cell_type": "markdown", + "id": "74c7803f-a8c5-445d-9233-0aa2663c58bd", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Read in the current case" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ccca8e3a-a52f-4202-9704-9d4470eda984", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def fix_time_dim(dat):\n", + " \"\"\"CESM2 output sets time as the end of the averaging interval (e.g. January average is midnight on February 1st);\n", + " This function sets the time dimension to the midpoint of the averaging interval.\n", + " Note that CESM3 output sets time to the midpoint already, so this function should not change CESM3 data.\"\"\"\n", + " if \"time\" not in dat.dims:\n", + " return dat\n", + " if \"bounds\" not in dat.time.attrs:\n", + " return dat\n", + " time_bounds_avg = dat[dat.time.attrs[\"bounds\"]].mean(\"nbnd\")\n", + " time_bounds_avg.attrs = dat.time.attrs\n", + " dat = dat.assign_coords({\"time\": time_bounds_avg})\n", + " return xr.decode_cf(dat)\n", + "\n", + "\n", + "if regridded_output:\n", + " file_path = f\"{CESM_output_dir}/{case_name}/atm/proc/tseries/regrid\"\n", + "else:\n", + " file_path = f\"{CESM_output_dir}/{case_name}/atm/proc/tseries\"\n", + "\n", + "dat = (\n", + " fix_time_dim(xr.open_mfdataset(f\"{file_path}/*PSL*.nc\", decode_times=False))\n", + " .sel(time=slice(start_date, end_date))\n", + " .PSL\n", + " / 100.0\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "073a2ad0-81e6-4817-9024-4b9b718fabb4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# --Compute seasonal and annual means\n", + "dat = seasonal_climatology_weighted(dat).load()" + ] + }, + { + "cell_type": "markdown", + "id": "e0527e3e-cd26-46b5-8c1e-08882109e12e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Read in validation data and other CMIP models for comparison (precomputed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ff152b1-2168-4a0d-826b-cf8d11f66ab7", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Ensure all validation datasets have the same coordinates as the ERA5 data\n", + "# (Avoid round-off level differences since all data should be on the same grid)\n", + "lon = dat.lon.data\n", + "lat = dat.lat.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "126e65b3-2b8c-400c-af02-2ad0b0f82e6e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# ---ERA5\n", + "era5 = xr.open_dataset(f\"{validation_path}/PSL_ERA5.nc\").assign_coords(\n", + " {\"lon\": lon, \"lat\": lat}\n", + ")\n", + "era5 = era5 / 100.0 # convert to hPa\n", + "\n", + "# ---CESM2\n", + "lens2 = xr.open_dataset(f\"{validation_path}/PSL_LENS2.nc\").assign_coords(\n", + " {\"lon\": lon, \"lat\": lat}\n", + ")\n", + "lens2 = lens2 / 100.0 # convert to hPa\n", + "\n", + "# ---CMIP6\n", + "modelfiles = sorted(glob.glob(f\"{validation_path}/CMIP6/*.nc\"))\n", + "datcmip6 = [\n", + " xr.open_dataset(ifile).assign_coords({\"lon\": lon, \"lat\": lat}).mean(\"M\")\n", + " for ifile in modelfiles\n", + "]\n", + "datcmip6 = xr.concat(datcmip6, dim=\"model\")\n", + "datcmip6 = datcmip6 / 100.0" + ] + }, + { + "cell_type": "markdown", + "id": "22cc331d-413c-4a87-bd89-812ad118cf8c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Compute the NMSE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6857717d-7514-45b5-ba33-a774f38b7c3e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "nmse_dat = []\n", + "nmse_cesm2 = []\n", + "nmse_cmip6 = []\n", + "for ivar in era5.data_vars:\n", + " nmse_dat.append(nmse(era5[ivar], dat[ivar]))\n", + " nmse_cesm2.append(nmse(era5[ivar], lens2[ivar]))\n", + " nmse_cmip6.append(nmse(era5[ivar], datcmip6[ivar]))\n", + "nmse_dat = xr.merge(nmse_dat)\n", + "nmse_cesm2 = xr.merge(nmse_cesm2)\n", + "nmse_cmip6 = xr.merge(nmse_cmip6)" + ] + }, + { + "cell_type": "markdown", + "id": "1014f119-fc3f-428b-99ca-ab9de700148d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "### Set up the plot panel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53494900-0145-4ab2-85b8-5ed6ae347892", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def plotnmse(fig, cmip6, cesm2, cesm3, x1, x2, y1, y2, titlestr):\n", + " ax = fig.add_axes([x1, y1, x2 - x1, y2 - y1])\n", + "\n", + " cmip6 = cmip6.sortby(cmip6, ascending=False)\n", + " binedges = np.arange(0, cmip6.size, 1)\n", + " ax.bar(\n", + " binedges,\n", + " cmip6,\n", + " width=1,\n", + " bottom=0,\n", + " edgecolor=\"black\",\n", + " color=\"gray\",\n", + " label=\"CMIP6\",\n", + " )\n", + "\n", + " ax.plot(cmip6.size + 1, cesm3, \"o\", color=\"blue\", label=\"THIS RUN\")\n", + "\n", + " ax.fill_between(\n", + " np.arange(0, cmip6.size + 3, 1) - 0.5,\n", + " np.arange(0, cmip6.size + 3, 1) * 0 + np.array(cesm2.min()),\n", + " np.arange(0, cmip6.size + 3, 1) * 0 + np.array(cesm2.max()),\n", + " color=\"salmon\",\n", + " alpha=0.5,\n", + " label=\"LENS2\",\n", + " )\n", + "\n", + " ax.set_xlim(-0.5, cmip6.size + 2 - 0.5)\n", + " ax.set_xticks([])\n", + " ax.set_ylabel(\"NMSE\", fontsize=14)\n", + " ax.set_title(titlestr, fontsize=16)\n", + "\n", + " ax.legend()\n", + "\n", + " return ax" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56b4cd99-a27e-4f28-86c2-8013e7c7bc78", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "fig = plt.figure(figsize=(16, 16))\n", + "\n", + "ax = plotnmse(\n", + " fig,\n", + " nmse_cmip6.AM,\n", + " nmse_cesm2.AM,\n", + " nmse_dat.AM,\n", + " 0.3,\n", + " 0.7,\n", + " 0.77,\n", + " 0.93,\n", + " \"NMSE, SLP, AM\",\n", + ")\n", + "ax = plotnmse(\n", + " fig,\n", + " nmse_cmip6.DJF,\n", + " nmse_cesm2.DJF,\n", + " nmse_dat.DJF,\n", + " 0.05,\n", + " 0.45,\n", + " 0.57,\n", + " 0.72,\n", + " \"NMSE, SLP, DJF\",\n", + ")\n", + "ax = plotnmse(\n", + " fig,\n", + " nmse_cmip6.MAM,\n", + " nmse_cesm2.MAM,\n", + " nmse_dat.MAM,\n", + " 0.55,\n", + " 0.95,\n", + " 0.57,\n", + " 0.72,\n", + " \"NMSE, SLP, MAM\",\n", + ")\n", + "ax = plotnmse(\n", + " fig,\n", + " nmse_cmip6.JJA,\n", + " nmse_cesm2.JJA,\n", + " nmse_dat.JJA,\n", + " 0.05,\n", + " 0.45,\n", + " 0.37,\n", + " 0.52,\n", + " \"NMSE, SLP, JJA\",\n", + ")\n", + "ax = plotnmse(\n", + " fig,\n", + " nmse_cmip6.SON,\n", + " nmse_cesm2.SON,\n", + " nmse_dat.SON,\n", + " 0.55,\n", + " 0.95,\n", + " 0.37,\n", + " 0.52,\n", + " \"NMSE, SLP, SON\",\n", + ")\n", + "\n", + "fig.text(\n", + " 0.5,\n", + " 0.99,\n", + " \"THIS RUN = \" + case_name + \" \" + start_date + \" to \" + end_date,\n", + " ha=\"center\",\n", + " va=\"center\",\n", + " fontsize=14,\n", + " color=\"royalblue\",\n", + ")\n", + "fig.text(\n", + " 0.5,\n", + " 0.975,\n", + " \"Other runs = 1979-01-01 to 2023-12-31\",\n", + " ha=\"center\",\n", + " va=\"center\",\n", + " fontsize=14,\n", + ")\n", + "fig.text(\n", + " 0.5,\n", + " 0.96,\n", + " \"Validation data = ERA5 1979-01-01 to 2023-12-31\",\n", + " ha=\"center\",\n", + " va=\"center\",\n", + " fontsize=14,\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:cupid-analysis]", + "language": "python", + "name": "conda-env-cupid-analysis-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/nblibrary/atm/nmse_utils.py b/examples/nblibrary/atm/nmse_utils.py new file mode 100644 index 0000000..efd1ec2 --- /dev/null +++ b/examples/nblibrary/atm/nmse_utils.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import numpy as np + + +def nmse(obs, mod): + """Calculate the normalized mean squared error metric of Williamson (1995) + "Skill Scores from the AMIP Simulations". + nmse = overbar( (z_m - z_a)**2 ) / overbar( (z_a')**2 ) + where overbar is the weighted spatial average and prime is the deviation + from that + """ + + # get the weights and weight by zero if the model or obs is nan + w = np.cos(np.deg2rad(obs.lat)) + w = w.expand_dims({"lon": obs.lon}, axis=1) + w = w.where(~(np.isnan(obs) | np.isnan(mod)), 0) + obs = obs.where(w != 0, 0) + mod = mod.where(w != 0, 0) + + # numerator + num = (mod - obs) ** 2.0 + numw = num.weighted(w) + numwm = numw.mean(["lon", "lat"]) + + # denomenator + obsw = obs.weighted(w) + obswm = obsw.mean(["lon", "lat"]) + obsprime = obs - obswm + obsprime2 = obsprime**2.0 + obsprime2w = obsprime2.weighted(w) + obsprime2wm = obsprime2w.mean(["lon", "lat"]) + + nmse = numwm / obsprime2wm + + return nmse diff --git a/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb b/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb index 64a356c..fc23c64 100644 --- a/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb +++ b/examples/nblibrary/glc/LIWG_SMB_diagnostic.ipynb @@ -3,7 +3,13 @@ { "cell_type": "markdown", "id": "e45c723f-aa6f-4db4-a197-492132cc8156", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "# Land ice SMB model comparison\n", "This notebook compares the downscaled output of surface mass balance (SMB) over the Greenland ice sheet (GrIS) to the regional model MAR. In what follows, we interchangeably call the MAR data \"observation\".\n", @@ -62,17 +68,17 @@ "source": [ "# Parameter Defaults\n", "\n", - "CESM_output_dir = \"/glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing\"\n", - "case_name = \"b.e23_alpha17f.BLT1850.ne30_t232.092\" # case name\n", - "climo_nyears = 40 # number of years to compute the climatology\n", - "last_year = 101\n", + "CESM_output_dir = \"\"\n", + "case_name = \"\" # case name\n", + "climo_nyears = 0 # number of years to compute the climatology\n", + "last_year = 0\n", "\n", "base_case_output_dir = CESM_output_dir\n", "base_case_name = None\n", "base_last_year = last_year\n", "\n", - "obs_path = \"/glade/u/home/gunterl/obs_diagnostic_cesm\" # path to observed dataset\n", - "obs_name = \"GrIS_MARv3.12_climo_1960_1999.nc\"" + "obs_path = \"\" # directory containing observed dataset\n", + "obs_name = \"\" # file name for observed dataset" ] }, {