Skip to content

Commit

Permalink
hidden cell
Browse files Browse the repository at this point in the history
  • Loading branch information
RondeauG committed Sep 12, 2023
1 parent e87629d commit d39b65c
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 101 deletions.
171 changes: 93 additions & 78 deletions docs/notebooks/2_getting_started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@
"id": "65f7dfcc",
"metadata": {},
"source": [
"The result of `search_data_catalog` is a dictionary with one entry per unique ID. Note that a unique ID can be associated to multiple *intake datasets*, as is the case here, because `intake-esm` groups catalog lines per *id - domain - processing_level - xrfeq*. In this cse, *tas* and *sftlf* would be detected as different datasets without `search_data_catalogs`."
"The result of `search_data_catalog` is a dictionary with one entry per unique ID. Note that a unique ID can be associated to multiple *intake datasets*, as is the case here, because `intake-esm` groups catalog lines per *id - domain - processing_level - xrfeq*."
]
},
{
Expand Down Expand Up @@ -244,7 +244,7 @@
"ds_dict = xs.extract_dataset(\n",
" catalog=cat_sim[\"CMIP6_ScenarioMIP_NCC_NorESM2-MM_ssp245_r1i1p1f1_example-region\"],\n",
" variables_and_freqs=variables_and_freqs,\n",
" periods=[2001, 2003],\n",
" periods=[2001, 2002],\n",
" region=region,\n",
" xr_open_kwargs=xr_open_kwargs,\n",
" xr_combine_kwargs=xr_combine_kwargs,\n",
Expand Down Expand Up @@ -289,36 +289,7 @@
},
"outputs": [],
"source": [
"from xscen.testing import datablock_3d, fake_data\n",
"\n",
"for ds in ds_dict.values():\n",
" if \"tas\" in ds.data_vars:\n",
" # Since the sample files are very small, we'll create fake data covering a longer time period\n",
" data = fake_data(\n",
" nyears=71,\n",
" ny=len(ds.lat),\n",
" nx=len(ds.lon),\n",
" rand_type=\"tas\",\n",
" seed=0,\n",
" amplitude=15,\n",
" offset=2,\n",
" )\n",
" attrs = ds.attrs\n",
" ds = datablock_3d(\n",
" data,\n",
" \"tas\",\n",
" \"lon\",\n",
" -75,\n",
" \"lat\",\n",
" 45,\n",
" x_step=1,\n",
" y_step=1.5,\n",
" start=\"1/1/1981\",\n",
" freq=\"D\",\n",
" as_dataset=True,\n",
" )\n",
" ds.attrs = attrs\n",
"\n",
" filename = str(\n",
" output_folder\n",
" / f\"{ds.attrs['cat:id']}.{ds.attrs['cat:domain']}.{ds.attrs['cat:processing_level']}.{ds.attrs['cat:frequency']}.zarr\"\n",
Expand Down Expand Up @@ -404,32 +375,6 @@
" )\n",
"\n",
" for ds in dset_dict.values():\n",
" if \"tas\" in ds.data_vars:\n",
" # Since the sample files are very small, we'll create fake data covering a longer time period\n",
" data = fake_data(\n",
" nyears=71,\n",
" ny=len(ds.lat),\n",
" nx=len(ds.lon),\n",
" rand_type=\"tas\",\n",
" seed=list(cat_sim.keys()).index(key),\n",
" amplitude=15,\n",
" offset=2,\n",
" )\n",
" attrs = ds.attrs\n",
" ds = datablock_3d(\n",
" data,\n",
" \"tas\",\n",
" \"lon\",\n",
" -75,\n",
" \"lat\",\n",
" 45,\n",
" x_step=1,\n",
" y_step=1.5,\n",
" start=\"1/1/1981\",\n",
" freq=\"D\",\n",
" as_dataset=True,\n",
" )\n",
" ds.attrs = attrs\n",
" filename = str(\n",
" output_folder\n",
" / f\"{ds.attrs['cat:id']}.{ds.attrs['cat:domain']}.{ds.attrs['cat:processing_level']}.{ds.attrs['cat:frequency']}.zarr\"\n",
Expand All @@ -441,6 +386,93 @@
" pcat.update_from_ds(ds=ds, path=filename, info_dict={\"format\": \"zarr\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "113b9cfa-d237-43d2-9c4e-9ecd5969372e",
"metadata": {
"nbsphinx": "hidden"
},
"outputs": [],
"source": [
"# This is a hidden cell. Since the sample files are very small, we'll create fake data covering a longer time period and highjack the previously saved files.\n",
"\n",
"import shutil\n",
"\n",
"from xscen.testing import datablock_3d, fake_data\n",
"\n",
"ds_dict = pcat.search(processing_level=\"extracted\", variable=\"tas\").to_dataset_dict()\n",
"for key, ds in ds_dict.items():\n",
" attrs = ds.attrs\n",
" filename = pcat.search(id=ds.attrs[\"cat:id\"], variable=\"tas\").df.path.iloc[0]\n",
"\n",
" shutil.rmtree(filename)\n",
"\n",
" data = fake_data(\n",
" nyears=71,\n",
" ny=len(ds.lat),\n",
" nx=len(ds.lon),\n",
" rand_type=\"tas\",\n",
" seed=sorted(list(ds_dict.keys())).index(key),\n",
" amplitude=15,\n",
" offset=2,\n",
" )\n",
" ds = datablock_3d(\n",
" data,\n",
" \"tas\",\n",
" \"lon\",\n",
" -75,\n",
" \"lat\",\n",
" 45,\n",
" x_step=1,\n",
" y_step=1.5,\n",
" start=\"1/1/1981\",\n",
" freq=\"D\",\n",
" as_dataset=True,\n",
" )\n",
" ds.attrs = attrs\n",
" ds.attrs[\"cat:date_start\"] = \"1981-01-01\"\n",
" ds.attrs[\"cat:date_end\"] = \"2050-01-01\"\n",
" chunks = xs.io.estimate_chunks(ds, dims=[\"time\"], target_mb=50)\n",
" xs.save_to_zarr(ds, filename, rechunk=chunks, mode=\"o\")\n",
"\n",
" pcat.update_from_ds(ds=ds, path=filename, info_dict={\"format\": \"zarr\"})\n",
"\n",
"# For this tutorial, we'll also create a fake reference dataset\n",
"data = fake_data(\n",
" nyears=31, ny=5, nx=4, rand_type=\"tas\", seed=42, amplitude=25, offset=-2\n",
")\n",
"ds_ref = datablock_3d(\n",
" data,\n",
" \"tas\",\n",
" \"lon\",\n",
" -74.875,\n",
" \"lat\",\n",
" 45.275,\n",
" x_step=0.25,\n",
" y_step=0.55,\n",
" start=\"1/1/1981\",\n",
" freq=\"D\",\n",
" as_dataset=True,\n",
")\n",
"ds_ref.attrs = attrs\n",
"ds_ref.attrs[\"cat:date_start\"] = \"1981-01-01\"\n",
"ds_ref.attrs[\"cat:date_end\"] = \"2010-01-01\"\n",
"ds_ref.attrs[\"cat:source\"] = \"ERA5-Land\"\n",
"ds_ref.attrs[\"cat:institution\"] = \"ECMWF\"\n",
"ds_ref.attrs[\"cat:domain\"] = \"finer-grid\"\n",
"ds_ref.attrs[\"cat:activity\"] = None\n",
"ds_ref.attrs[\"cat:experiment\"] = None\n",
"ds_ref.attrs[\"cat:member\"] = None\n",
"ds_ref.attrs[\"cat:mip_era\"] = None\n",
"ds_ref.attrs[\"cat:path\"] = None\n",
"ds_ref.attrs[\"cat:id\"] = xs.catalog.generate_id(ds_ref)[0]\n",
"\n",
"filename = filename.replace(Path(filename).stem, ds_ref.attrs[\"cat:id\"])\n",
"xs.save_to_zarr(ds_ref, filename, rechunk=chunks, mode=\"o\")\n",
"pcat.update_from_ds(ds=ds_ref, path=filename, info_dict={\"format\": \"zarr\"})"
]
},
{
"cell_type": "markdown",
"id": "9b2040b2",
Expand Down Expand Up @@ -644,7 +676,7 @@
"# to_dataset_dict() is called to cast the search results as xr.Dataset objects\n",
"# frequency=\"^(?!fx$).*$\" is used to exclude fixed fields from the results\n",
"ds_dict = pcat.search(\n",
" processing_level=\"extracted\", frequency=\"^(?!fx$).*$\"\n",
" processing_level=\"extracted\", frequency=\"^(?!fx$).*$\", domain=\"example-region\"\n",
").to_dataset_dict()\n",
"\n",
"mask_args = {\n",
Expand Down Expand Up @@ -798,29 +830,12 @@
"source": [
"ds_dict = pcat.search(processing_level=\"regridded\").to_dataset_dict()\n",
"\n",
"# # Open the reference dataset, in this case ERA5-Land\n",
"ds_ref = pcat.search(processing_level=\"extracted\", source=\"ERA5-Land\").to_dataset()\n",
"\n",
"# Currently, only a single variable can be bias adjusted at a time\n",
"variables = [\"tas\"]\n",
"for v in variables:\n",
" # For this tutorial, we'll create a fake reference dataset\n",
" data = fake_data(\n",
" nyears=31, ny=5, nx=4, rand_type=\"tas\", seed=42, amplitude=25, offset=-2\n",
" )\n",
" ds_ref = datablock_3d(\n",
" data,\n",
" \"tas\",\n",
" \"lon\",\n",
" -75,\n",
" \"lat\",\n",
" 45,\n",
" x_step=1,\n",
" y_step=1.5,\n",
" start=\"1/1/2001\",\n",
" freq=\"D\",\n",
" as_dataset=True,\n",
" )\n",
" ds_ref[\"lon\"] = ds_regrid[\"lon\"]\n",
" ds_ref[\"lat\"] = ds_regrid[\"lat\"]\n",
"\n",
" for ds in ds_dict.values():\n",
" # Train\n",
" ds_train = xs.train(\n",
Expand Down
24 changes: 1 addition & 23 deletions docs/notebooks/3_diagnostics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"\n",
"output_folder = Path().absolute() / \"_data\"\n",
"\n",
"\n",
"# Create a project Catalog\n",
"project = {\n",
" \"title\": \"example-diagnostics\",\n",
Expand Down Expand Up @@ -241,28 +240,7 @@
"outputs": [],
"source": [
"# load input\n",
"# For this tutorial, we'll create a fake reference dataset\n",
"from xscen.testing import datablock_3d, fake_data\n",
"\n",
"data = fake_data(\n",
" nyears=31, ny=5, nx=4, rand_type=\"tas\", seed=42, amplitude=25, offset=-2\n",
")\n",
"dref = datablock_3d(\n",
" data,\n",
" \"tas\",\n",
" \"lon\",\n",
" -75,\n",
" \"lat\",\n",
" 45,\n",
" x_step=1,\n",
" y_step=1.5,\n",
" start=\"1/1/2001\",\n",
" freq=\"D\",\n",
" as_dataset=True,\n",
")\n",
"dref[\"lon\"] = ds[\"lon\"]\n",
"dref[\"lat\"] = ds[\"lat\"]\n",
"dref.attrs = {\"cat:id\": \"reference\", \"cat:domain\": \"finer-grid\"}\n",
"dref = gettingStarted_cat.search(source=\"ERA5-Land\").to_dataset()\n",
"\n",
"# calculate properties and measures\n",
"prop_ref, _ = xs.properties_and_measures(\n",
Expand Down

0 comments on commit d39b65c

Please sign in to comment.