From 28aea00ad4c5dac3bc0ae83e8e40c4381b9290d9 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Fri, 4 Aug 2023 13:19:05 -0400 Subject: [PATCH 01/23] first tests --- tests/test_extract.py | 182 ++++++++++++++++++++++++++++++++++++++++++ xscen/extract.py | 16 +++- 2 files changed, 195 insertions(+), 3 deletions(-) diff --git a/tests/test_extract.py b/tests/test_extract.py index da5f3780..2a6c7007 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -6,6 +6,188 @@ import xscen as xs +from .conftest import notebooks + + +class TestSearchDataCatalogs: + small_cat = xs.DataCatalog(notebooks / "samples" / "tutorial-catalog.json") + big_cat = xs.DataCatalog(notebooks / "samples" / "pangeo-cmip6.json") + + @pytest.mark.parametrize( + "variables_and_freqs, other_arg", + [ + ({"tas": "D"}, None), + ({"sftlf": "fx"}, "other"), + ({"tas": "D", "sftlf": "fx"}, "exclusion"), + ({"tas": "D", "sftlf": "fx"}, "other2"), + ], + ) + def test_basic(self, variables_and_freqs, other_arg): + if "other" in other_arg: + other_arg = "other" + # fdsfs TODO find nothing + else: + out = xs.search_data_catalogs( + data_catalogs=self.small_cat, + variables_and_freqs=variables_and_freqs, + other_search_criteria={"experiment": ["ssp370"]} + if other_arg == "other" + else None, + exclusions={"member": "r2.*"} if other_arg == "exclusion" else None, + ) + assert len(out) == 5 if other_arg is None else 1 if other_arg == "other" else 4 + + @pytest.mark.parametrize( + "periods, coverage_kwargs", + [ + ([["2020", "2030"], ["2035", "2040"]], None), + ([["1900", "2030"], ["2035", "2040"]], None), + ([["2020", "2080"]], None), + ([["2020", "2080"]], {"coverage": 0.5}), + ], + ) + def test_periods(self, periods, coverage_kwargs): + out = xs.search_data_catalogs( + data_catalogs=self.small_cat, + variables_and_freqs={"tas": "D"}, + periods=periods, + coverage_kwargs=coverage_kwargs, + ) + assert len(out) == ( + 5 + if ((periods[0] == ["2020", "2030"]) or coverage_kwargs is not None) + else 0 + ) + + def test_ids(self): + out = xs.search_data_catalogs( + data_catalogs=deepcopy(self.small_cat), + variables_and_freqs={"tas": "D"}, + id_columns=["source"], + ) + assert len(out) == 1 + assert len(out["NorESM2-MM"].df) == 5 + + # TODO: missing id + + @pytest.mark.parametrize("allow_resampling", [True, False]) + def test_allow_resampling(self, allow_resampling): + out = xs.search_data_catalogs( + data_catalogs=deepcopy(self.small_cat), + variables_and_freqs={"tas": "YS"}, + allow_resampling=allow_resampling, + ) + assert len(out) == (5 if allow_resampling else 0) + + @pytest.mark.parametrize( + "restrict_warming_level", + [ + True, + {"ignore_member": True}, + {"wl": 2}, + {"wl": 3}, + {"wl": 2, "ignore_member": True}, + ], + ) + def test_warminglevel(self, restrict_warming_level): + out = xs.search_data_catalogs( + data_catalogs=self.small_cat, + variables_and_freqs={"tas": "D"}, + restrict_warming_level=restrict_warming_level, + ) + assert ( + out == 5 + if restrict_warming_level == {"ignore_member": True} + else 4 + if ( + (restrict_warming_level is True) + or (restrict_warming_level == {"wl": 2, "ignore_member": True}) + ) + else 3 + if restrict_warming_level == {"wl": 2} + else 2 + ) + + @pytest.mark.parametrize("restrict_resolution", [None, "finest"]) + def test_restrict_resolution(self, restrict_resolution): + out = xs.search_data_catalogs( + data_catalogs=self.big_cat, + variables_and_freqs={"tas": "D"}, + other_search_criteria={ + "institution": ["NOAA-GFDL"], + "experiment": ["ssp585"], + }, + restrict_resolution=restrict_resolution, + ) + assert len(out) == 3 if restrict_resolution is None else 2 + + @pytest.mark.parametrize("restrict_members", [None, {"ordered": 5}]) + def test_restrict_members(self, restrict_members): + out = xs.search_data_catalogs( + data_catalogs=self.big_cat, + variables_and_freqs={"tas": "D"}, + other_search_criteria={"source": ["CanESM5"], "experiment": ["ssp585"]}, + restrict_members=restrict_members, + ) + assert len(out) == (50 if restrict_members is None else 5) + if restrict_members is not None: + assert all( + o in out.keys() + for o in [ + "ScenarioMIP_CCCma_CanESM5_ssp585_r1i1p2f1_gn", + "ScenarioMIP_CCCma_CanESM5_ssp585_r1i1p1f1_gn", + "ScenarioMIP_CCCma_CanESM5_ssp585_r2i1p1f1_gn", + "ScenarioMIP_CCCma_CanESM5_ssp585_r2i1p2f1_gn", + "ScenarioMIP_CCCma_CanESM5_ssp585_r3i1p1f1_gn", + ] + ) + + assert ( + len( + xs.search_data_catalogs( + data_catalogs=self.big_cat, + variables_and_freqs={"tas": "D"}, + other_search_criteria={ + "institution": ["NOAA-GFDL"], + "experiment": ["ssp585"], + }, + restrict_members=restrict_members, + ) + ) + == 3 + ) + + @pytest.mark.parametrize("allow_conversion", [True, False]) + def test_allow_conversion(self, allow_conversion): + out = xs.search_data_catalogs( + data_catalogs=self.big_cat, + variables_and_freqs={"evspsblpot": "D"}, + other_search_criteria={ + "institution": ["NOAA-GFDL"], + "experiment": ["ssp585"], + }, + allow_conversion=allow_conversion, + ) + assert len(out) == (3 if allow_conversion else 0) + if allow_conversion: + assert all( + v in out[list(out.keys())[0]].unique("variable") + for v in ["tasmin", "tasmax"] + ) + assert "evspsblpot" not in out[list(out.keys())[0]].unique("variable") + + def test_no_match(self): + pass + + def test_input_types(self): + pass + + def test_match_histfut(self): + pass + + def test_fx(self): + pass + class TestGetWarmingLevel: def test_list(self): diff --git a/xscen/extract.py b/xscen/extract.py index aaf5afd4..e7c7646b 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -851,7 +851,7 @@ def get_warming_level( tas_baseline_period : list [start, end] of the base period. The warming is calculated with respect to it. The default is ["1850", "1900"]. ignore_member : bool - Only used for Datasets. Decides whether to ignore the member when searching for the model run in tas_csv. + Decides whether to ignore the member when searching for the model run in tas_csv. tas_csv : str Path to a csv of annual global mean temperature with a row for each year and a column for each dataset. If None, it will default to data/IPCC_annual_global_tas.csv which was built from @@ -902,6 +902,8 @@ def get_warming_level( info["experiment"], info["member"], ) = real.split("_") + if ignore_member: + info["member"] = ".*" elif isinstance(real, dict) and set(real.keys()).issuperset( (set(FIELDS) - {"member"}) if ignore_member else FIELDS ): @@ -1354,7 +1356,7 @@ def _restrict_wl(df, restrictions: dict): # open csv annual_tas = pd.read_csv(tas_csv, index_col="year") - if restrictions["ignore_member"]: + if restrictions["ignore_member"] and "wl" not in restrictions: df["csv_name"] = df["mip_era"].str.cat( [df["source"], df["experiment"]], sep="_" ) @@ -1365,7 +1367,15 @@ def _restrict_wl(df, restrictions: dict): ) csv_source = list(annual_tas.columns[1:]) - to_keep = df["csv_name"].isin(csv_source) + if "wl" in restrictions: + to_keep = pd.Series( + [ + get_warming_level(x, **restrictions)[0] is not None + for x in df["csv_name"] + ] + ) + else: + to_keep = df["csv_name"].isin(csv_source) removed = pd.unique(df[~to_keep]["id"]) df = df[to_keep] From 8283fd09c1e75b8736e32395584330792c9cd809 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Mon, 14 Aug 2023 15:37:24 -0400 Subject: [PATCH 02/23] more tests --- tests/test_extract.py | 57 +++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/tests/test_extract.py b/tests/test_extract.py index 2a6c7007..9c799a07 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -19,22 +19,17 @@ class TestSearchDataCatalogs: ({"tas": "D"}, None), ({"sftlf": "fx"}, "other"), ({"tas": "D", "sftlf": "fx"}, "exclusion"), - ({"tas": "D", "sftlf": "fx"}, "other2"), ], ) def test_basic(self, variables_and_freqs, other_arg): - if "other" in other_arg: - other_arg = "other" - # fdsfs TODO find nothing - else: - out = xs.search_data_catalogs( - data_catalogs=self.small_cat, - variables_and_freqs=variables_and_freqs, - other_search_criteria={"experiment": ["ssp370"]} - if other_arg == "other" - else None, - exclusions={"member": "r2.*"} if other_arg == "exclusion" else None, - ) + out = xs.search_data_catalogs( + data_catalogs=self.small_cat, + variables_and_freqs=variables_and_freqs, + other_search_criteria={"experiment": ["ssp370"]} + if other_arg == "other" + else None, + exclusions={"member": "r2.*"} if other_arg == "exclusion" else None, + ) assert len(out) == 5 if other_arg is None else 1 if other_arg == "other" else 4 @pytest.mark.parametrize( @@ -177,13 +172,45 @@ def test_allow_conversion(self, allow_conversion): assert "evspsblpot" not in out[list(out.keys())[0]].unique("variable") def test_no_match(self): - pass + out = xs.search_data_catalogs( + data_catalogs=self.small_cat, + variables_and_freqs={"tas": "YS"}, + allow_resampling=False, + ) + assert isinstance(out, dict) + assert len(out) == 0 def test_input_types(self): pass def test_match_histfut(self): - pass + out = xs.search_data_catalogs( + data_catalogs=self.big_cat, + variables_and_freqs={"tas": "D"}, + other_search_criteria={"experiment": "ssp585", "source": "CanESM5"}, + restrict_members={"ordered": 1}, + match_hist_and_fut=True, + ) + assert ( + str( + sorted( + out["ScenarioMIP_CCCma_CanESM5_ssp585_r1i1p1f1_gn"].unique( + "date_start" + ) + )[0] + ) + == "1985-01-01 00:00:00" + ) + assert ( + str( + sorted( + out["ScenarioMIP_CCCma_CanESM5_ssp585_r1i1p1f1_gn"].unique( + "date_start" + ) + )[1] + ) + == "2015-01-01 00:00:00" + ) def test_fx(self): pass From 0b163ee701dd26bdf92774299e19c4df5fb966aa Mon Sep 17 00:00:00 2001 From: RondeauG Date: Wed, 23 Aug 2023 16:50:41 -0400 Subject: [PATCH 03/23] active repo --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 149cce2b..b89e0424 100644 --- a/README.rst +++ b/README.rst @@ -70,6 +70,6 @@ This package was created with Cookiecutter_ and the `Ouranosinc/cookiecutter-pyp :target: https://pypi.python.org/pypi/xscen :alt: Supported Python Versions -.. |status| image:: https://www.repostatus.org/badges/latest/wip.svg - :target: https://www.repostatus.org/#wip - :alt: Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public. +.. |status| image:: https://www.repostatus.org/badges/latest/active.svg + :target: https://www.repostatus.org/#active + :alt: Active The project has reached a stable, usable state and is being actively developed. From ff59e17fa360c59cd7e9d54fcad448639f97fc71 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Thu, 24 Aug 2023 16:23:44 -0400 Subject: [PATCH 04/23] more tests WIP --- tests/test_extract.py | 39 ++++++++++++++++++++++++++++++++++++++- xscen/extract.py | 4 ++-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/tests/test_extract.py b/tests/test_extract.py index 9c799a07..91f0681d 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -63,7 +63,26 @@ def test_ids(self): assert len(out) == 1 assert len(out["NorESM2-MM"].df) == 5 - # TODO: missing id + # Missing id + small_cat = deepcopy(self.small_cat) + small_cat.esmcat._df.loc[ + small_cat.esmcat._df.id + == "CMIP6_ScenarioMIP_NCC_NorESM2-MM_ssp126_r1i1p1f1_example-region", + "id", + ] = None + assert ( + "CMIP6_ScenarioMIP_NCC_NorESM2-MM_ssp126_r1i1p1f1_example-region" + not in small_cat.esmcat._df.id.values + ) + out = xs.search_data_catalogs( + data_catalogs=deepcopy(self.small_cat), + variables_and_freqs={"tas": "D"}, + ) + assert len(out) == 5 + assert ( + "CMIP6_ScenarioMIP_NCC_NorESM2-MM_ssp126_r1i1p1f1_example-region" + in out.keys() + ) @pytest.mark.parametrize("allow_resampling", [True, False]) def test_allow_resampling(self, allow_resampling): @@ -179,8 +198,26 @@ def test_no_match(self): ) assert isinstance(out, dict) assert len(out) == 0 + out = xs.search_data_catalogs( + data_catalogs=self.small_cat, + variables_and_freqs={"tas": "D"}, + other_search_criteria={"experiment": "not_real"}, + ) + assert isinstance(out, dict) + assert len(out) == 0 def test_input_types(self): + # data_catalogs_1 = notebooks / "samples" / "tutorial-catalog.json" + # data_catalogs_2 = notebooks / "samples" / "pangeo-cmip6.json" + # out = xs.search_data_catalogs( + # data_catalogs=data_catalogs_1, + # variables_and_freqs={"tas": "D"}, + # other_search_criteria={ + # "experiment": "ssp585", + # "source": "NorESM.*", + # "member": "r1i1p1f1", + # }, + # ) pass def test_match_histfut(self): diff --git a/xscen/extract.py b/xscen/extract.py index 7d1c0e36..b2d132c4 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -587,7 +587,7 @@ def search_data_catalogs( } # Cast paths to single item list - if isinstance(data_catalogs, (str, Path)): + if isinstance(data_catalogs, (str, os.PathLike)): data_catalogs = [data_catalogs] # Prepare a unique catalog to search from, with the DerivedCat added if required @@ -608,7 +608,7 @@ def search_data_catalogs( **cat_kwargs, ) elif isinstance(data_catalogs, list) and all( - isinstance(dc, str) for dc in data_catalogs + isinstance(dc, (str, os.PathLike)) for dc in data_catalogs ): data_catalogs = [ DataCatalog(path) if path.endswith(".json") else DataCatalog.from_df(path) From 5bc059a86f07132a2496a726f62dbc8e75065b91 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 29 Aug 2023 14:20:22 -0400 Subject: [PATCH 05/23] more tests --- tests/test_extract.py | 123 ++++++++++++++++++++++++++++-------------- xscen/extract.py | 85 +++++++++++++++-------------- 2 files changed, 126 insertions(+), 82 deletions(-) diff --git a/tests/test_extract.py b/tests/test_extract.py index 91f0681d..4306ac66 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -1,6 +1,7 @@ from copy import deepcopy import numpy as np +import pandas as pd import pytest from xclim.testing.helpers import test_timeseries as timeseries @@ -63,27 +64,6 @@ def test_ids(self): assert len(out) == 1 assert len(out["NorESM2-MM"].df) == 5 - # Missing id - small_cat = deepcopy(self.small_cat) - small_cat.esmcat._df.loc[ - small_cat.esmcat._df.id - == "CMIP6_ScenarioMIP_NCC_NorESM2-MM_ssp126_r1i1p1f1_example-region", - "id", - ] = None - assert ( - "CMIP6_ScenarioMIP_NCC_NorESM2-MM_ssp126_r1i1p1f1_example-region" - not in small_cat.esmcat._df.id.values - ) - out = xs.search_data_catalogs( - data_catalogs=deepcopy(self.small_cat), - variables_and_freqs={"tas": "D"}, - ) - assert len(out) == 5 - assert ( - "CMIP6_ScenarioMIP_NCC_NorESM2-MM_ssp126_r1i1p1f1_example-region" - in out.keys() - ) - @pytest.mark.parametrize("allow_resampling", [True, False]) def test_allow_resampling(self, allow_resampling): out = xs.search_data_catalogs( @@ -122,18 +102,47 @@ def test_warminglevel(self, restrict_warming_level): else 2 ) - @pytest.mark.parametrize("restrict_resolution", [None, "finest"]) + @pytest.mark.parametrize("restrict_resolution", [None, "finest", "coarsest"]) def test_restrict_resolution(self, restrict_resolution): + big_cat = deepcopy(self.big_cat) + for i in range(2): + new_line = deepcopy(big_cat.df.iloc[0]) + new_line["mip_era"] = "CMIP5" + new_line["activity"] = "CORDEX" + new_line["institution"] = "CCCma" + new_line["driving_model"] = "CanESM2" + new_line["source"] = "CRCM5" + new_line["experiment"] = "rcp85" + new_line["member"] = "r1i1p1" + new_line["domain"] = "NAM-22" if i == 0 else "NAM-11" + new_line["frequency"] = "day" + new_line["xrfreq"] = "D" + new_line["variable"] = ("tas",) + new_line["id"] = xs.catalog.generate_id(new_line.to_frame().T).iloc[0] + + big_cat.esmcat._df = pd.concat( + [big_cat.df, new_line.to_frame().T], ignore_index=True + ) + out = xs.search_data_catalogs( - data_catalogs=self.big_cat, + data_catalogs=big_cat, variables_and_freqs={"tas": "D"}, other_search_criteria={ - "institution": ["NOAA-GFDL"], - "experiment": ["ssp585"], + "source": ["GFDL-CM4", "CRCM5"], + "experiment": ["ssp585", "rcp85"], }, restrict_resolution=restrict_resolution, ) - assert len(out) == 3 if restrict_resolution is None else 2 + if restrict_resolution is None: + assert len(out) == 4 + elif restrict_resolution == "finest": + assert len(out) == 2 + assert any("NAM-11" in x for x in out) + assert any("_gr1" in x for x in out) + elif restrict_resolution == "coarsest": + assert len(out) == 2 + assert any("NAM-22" in x for x in out) + assert any("_gr2" in x for x in out) @pytest.mark.parametrize("restrict_members", [None, {"ordered": 5}]) def test_restrict_members(self, restrict_members): @@ -207,18 +216,29 @@ def test_no_match(self): assert len(out) == 0 def test_input_types(self): - # data_catalogs_1 = notebooks / "samples" / "tutorial-catalog.json" - # data_catalogs_2 = notebooks / "samples" / "pangeo-cmip6.json" - # out = xs.search_data_catalogs( - # data_catalogs=data_catalogs_1, - # variables_and_freqs={"tas": "D"}, - # other_search_criteria={ - # "experiment": "ssp585", - # "source": "NorESM.*", - # "member": "r1i1p1f1", - # }, - # ) - pass + data_catalogs_1 = notebooks / "samples" / "tutorial-catalog.json" + data_catalogs_2 = notebooks / "samples" / "pangeo-cmip6.json" + + assert ( + xs.search_data_catalogs( + data_catalogs=[data_catalogs_1, data_catalogs_2], + variables_and_freqs={"tas": "D"}, + other_search_criteria={ + "experiment": "ssp585", + "source": "NorESM.*", + "member": "r1i1p1f1", + }, + ).keys() + == xs.search_data_catalogs( + data_catalogs=[self.small_cat, data_catalogs_2], + variables_and_freqs={"tas": "D"}, + other_search_criteria={ + "experiment": "ssp585", + "source": "NorESM.*", + "member": "r1i1p1f1", + }, + ).keys() + ) def test_match_histfut(self): out = xs.search_data_catalogs( @@ -250,7 +270,32 @@ def test_match_histfut(self): ) def test_fx(self): - pass + small_cat = deepcopy(self.small_cat) + new_line = deepcopy(small_cat.df.iloc[0]) + new_line["id"] = new_line["id"].replace( + new_line["experiment"], "another_experiment" + ) + new_line["experiment"] = "another_experiment" + small_cat.esmcat._df = pd.concat( + [small_cat.df, new_line.to_frame().T], ignore_index=True + ) + + with pytest.warns( + UserWarning, + match="doesn't have the fixed field sftlf, but it can be acquired from ", + ): + out = xs.search_data_catalogs( + data_catalogs=small_cat, + variables_and_freqs={"sftlf": "fx"}, + other_search_criteria={"experiment": "another_experiment"}, + ) + assert len(out) == 1 + np.testing.assert_array_equal( + out[ + "CMIP6_ScenarioMIP_NCC_NorESM2-MM_another_experiment_r1i1p1f1_example-region" + ].df["experiment"], + "another_experiment", + ) class TestGetWarmingLevel: diff --git a/xscen/extract.py b/xscen/extract.py index b2d132c4..e9e5f168 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -586,43 +586,31 @@ def search_data_catalogs( "registry": registry_from_module(load_xclim_module(conversion_yaml)) } - # Cast paths to single item list - if isinstance(data_catalogs, (str, os.PathLike)): + # Cast single items to a list + if isinstance(data_catalogs, (str, os.PathLike, DataCatalog)): data_catalogs = [data_catalogs] + # Open the catalogs given as paths + for i, dc in enumerate(data_catalogs): + if isinstance(dc, (str, os.PathLike)): + data_catalogs[i] = ( + DataCatalog(dc, **cat_kwargs) + if Path(dc).suffix == ".json" + else DataCatalog.from_df(dc) + ) - # Prepare a unique catalog to search from, with the DerivedCat added if required - if isinstance(data_catalogs, DataCatalog): - catalog = DataCatalog( - {"esmcat": data_catalogs.esmcat.dict(), "df": data_catalogs.df}, - **cat_kwargs, - ) - data_catalogs = [catalog] # simply for a meaningful logging line - elif isinstance(data_catalogs, list) and all( + if not isinstance(data_catalogs, list) or not all( isinstance(dc, DataCatalog) for dc in data_catalogs ): - catalog = DataCatalog( - { - "esmcat": data_catalogs[0].esmcat.dict(), - "df": pd.concat([dc.df for dc in data_catalogs], ignore_index=True), - }, - **cat_kwargs, - ) - elif isinstance(data_catalogs, list) and all( - isinstance(dc, (str, os.PathLike)) for dc in data_catalogs - ): - data_catalogs = [ - DataCatalog(path) if path.endswith(".json") else DataCatalog.from_df(path) - for path in data_catalogs - ] - catalog = DataCatalog( - { - "esmcat": data_catalogs[0].esmcat.dict(), - "df": pd.concat([dc.df for dc in data_catalogs], ignore_index=True), - }, - **cat_kwargs, - ) - else: raise ValueError("Catalogs type not recognized.") + + # Prepare a unique catalog to search from, with the DerivedCat added if required + catalog = DataCatalog( + { + "esmcat": data_catalogs[0].esmcat.dict(), + "df": pd.concat([dc.df for dc in data_catalogs], ignore_index=True), + }, + **cat_kwargs, + ) logger.info(f"Catalog opened: {catalog} from {len(data_catalogs)} files.") if match_hist_and_fut: @@ -630,17 +618,18 @@ def search_data_catalogs( catalog = _dispatch_historical_to_future(catalog, id_columns) # Cut entries that do not match search criteria - if other_search_criteria: - catalog = catalog.search(**other_search_criteria) - logger.info( - f"{len(catalog.df)} assets matched the criteria : {other_search_criteria}." - ) if exclusions: ex = catalog.search(**exclusions) catalog.esmcat._df = pd.concat([catalog.df, ex.df]).drop_duplicates(keep=False) logger.info( f"Removing {len(ex.df)} assets based on exclusion dict : {exclusions}." ) + full_catalog = deepcopy(catalog) # Used for searching for fixed fields + if other_search_criteria: + catalog = catalog.search(**other_search_criteria) + logger.info( + f"{len(catalog.df)} assets matched the criteria : {other_search_criteria}." + ) if restrict_warming_level: if isinstance(restrict_warming_level, bool): restrict_warming_level = {} @@ -654,11 +643,16 @@ def search_data_catalogs( # Recreate id from user specifications catalog.df["id"] = ids else: - # Only fill in the missing IDs + # Only fill in the missing IDs. + # Unreachable line if 'id' is in the aggregation control columns, but this is a safety measure. catalog.df["id"] = catalog.df["id"].fillna(ids) if catalog.df.empty: - logger.warning("Found no match corresponding to the 'other' search criteria.") + warnings.warn( + "Found no match corresponding to the search criteria.", + UserWarning, + stacklevel=1, + ) return {} coverage_kwargs = coverage_kwargs or {} @@ -687,11 +681,14 @@ def search_data_catalogs( scat_id = { i: scat.df[i].iloc[0] for i in id_columns or ID_COLUMNS - if i in scat.df.columns + if ( + (i in scat.df.columns) + and (not pd.isnull(scat.df[i].iloc[0])) + ) } scat_id.pop("experiment", None) scat_id.pop("member", None) - varcat = catalog.search( + varcat = full_catalog.search( **scat_id, xrfreq=xrfreq, variable=var_id, @@ -700,8 +697,10 @@ def search_data_catalogs( if len(varcat) > 1: varcat.esmcat._df = varcat.df.iloc[[0]] if len(varcat) == 1: - logger.warning( - f"Dataset {sim_id} doesn't have the fixed field {var_id}, but it can be acquired from {varcat.df['id'].iloc[0]}." + warnings.warn( + f"Dataset {sim_id} doesn't have the fixed field {var_id}, but it can be acquired from {varcat.df['id'].iloc[0]}.", + UserWarning, + stacklevel=1, ) for i in {"member", "experiment", "id"}.intersection( varcat.df.columns From c13dccaac3d4e5f5e53fc28dbc045d8634e58767 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 29 Aug 2023 17:15:37 -0400 Subject: [PATCH 06/23] fix tests for smaller catalog --- tests/conftest.py | 2 +- tests/test_extract.py | 179 ++++++++++++++++++------------------------ xscen/extract.py | 5 +- 3 files changed, 79 insertions(+), 107 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 96781d5b..6050ebb3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,7 +26,7 @@ def remove_data_folder(): request.addfinalizer(remove_data_folder) -@pytest.mark.requires_docs +@pytest.mark.requires_netcdf @pytest.fixture(scope="session") def samplecat(): """Generate a sample catalog with the tutorial netCDFs.""" diff --git a/tests/test_extract.py b/tests/test_extract.py index 4306ac66..983f6454 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -3,110 +3,101 @@ import numpy as np import pandas as pd import pytest +from conftest import notebooks from xclim.testing.helpers import test_timeseries as timeseries import xscen as xs -from .conftest import notebooks - class TestSearchDataCatalogs: - small_cat = xs.DataCatalog(notebooks / "samples" / "tutorial-catalog.json") - big_cat = xs.DataCatalog(notebooks / "samples" / "pangeo-cmip6.json") + cat = xs.DataCatalog(notebooks / "samples" / "pangeo-cmip6.json") @pytest.mark.parametrize( "variables_and_freqs, other_arg", [ - ({"tas": "D"}, None), + ({"tasmin": "D"}, None), ({"sftlf": "fx"}, "other"), - ({"tas": "D", "sftlf": "fx"}, "exclusion"), + ({"tasmin": "D", "sftlf": "fx"}, "exclusion"), ], ) def test_basic(self, variables_and_freqs, other_arg): out = xs.search_data_catalogs( - data_catalogs=self.small_cat, + data_catalogs=self.cat, variables_and_freqs=variables_and_freqs, - other_search_criteria={"experiment": ["ssp370"]} + other_search_criteria={"experiment": ["ssp585"]} if other_arg == "other" else None, exclusions={"member": "r2.*"} if other_arg == "exclusion" else None, ) - assert len(out) == 5 if other_arg is None else 1 if other_arg == "other" else 4 + assert len(out) == 13 if other_arg is None else 2 if other_arg == "other" else 6 @pytest.mark.parametrize( "periods, coverage_kwargs", [ ([["2020", "2030"], ["2035", "2040"]], None), ([["1900", "2030"], ["2035", "2040"]], None), - ([["2020", "2080"]], None), - ([["2020", "2080"]], {"coverage": 0.5}), + ([["2020", "2130"]], {"coverage": 0.70}), ], ) def test_periods(self, periods, coverage_kwargs): out = xs.search_data_catalogs( - data_catalogs=self.small_cat, - variables_and_freqs={"tas": "D"}, + data_catalogs=self.cat, + variables_and_freqs={"tasmin": "D"}, periods=periods, coverage_kwargs=coverage_kwargs, ) - assert len(out) == ( - 5 - if ((periods[0] == ["2020", "2030"]) or coverage_kwargs is not None) - else 0 - ) + assert len(out) == (0 if periods[0] == ["1900", "2030"] else 5) def test_ids(self): out = xs.search_data_catalogs( - data_catalogs=deepcopy(self.small_cat), - variables_and_freqs={"tas": "D"}, + data_catalogs=deepcopy(self.cat), + variables_and_freqs={"tasmin": "D"}, id_columns=["source"], ) - assert len(out) == 1 + assert len(out) == 3 assert len(out["NorESM2-MM"].df) == 5 @pytest.mark.parametrize("allow_resampling", [True, False]) def test_allow_resampling(self, allow_resampling): out = xs.search_data_catalogs( - data_catalogs=deepcopy(self.small_cat), - variables_and_freqs={"tas": "YS"}, + data_catalogs=deepcopy(self.cat), + variables_and_freqs={"tasmin": "YS"}, allow_resampling=allow_resampling, ) - assert len(out) == (5 if allow_resampling else 0) + assert len(out) == (13 if allow_resampling else 0) @pytest.mark.parametrize( "restrict_warming_level", [ True, - {"ignore_member": True}, - {"wl": 2}, - {"wl": 3}, {"wl": 2, "ignore_member": True}, + {"wl": 4}, ], ) def test_warminglevel(self, restrict_warming_level): + cat = deepcopy(self.cat) + new_line = deepcopy(cat.df.iloc[13]) + new_line["experiment"] = "ssp245" + new_line["id"] = xs.catalog.generate_id(new_line.to_frame().T).iloc[0] + cat.esmcat._df = pd.concat([cat.df, new_line.to_frame().T], ignore_index=True) + out = xs.search_data_catalogs( - data_catalogs=self.small_cat, - variables_and_freqs={"tas": "D"}, + data_catalogs=cat, + variables_and_freqs={"tasmax": "D"}, restrict_warming_level=restrict_warming_level, ) - assert ( - out == 5 - if restrict_warming_level == {"ignore_member": True} - else 4 - if ( - (restrict_warming_level is True) - or (restrict_warming_level == {"wl": 2, "ignore_member": True}) - ) - else 3 - if restrict_warming_level == {"wl": 2} - else 2 - ) + if isinstance(restrict_warming_level, bool): + assert len(out) == 5 + elif restrict_warming_level == {"wl": 2, "ignore_member": True}: + assert len(out) == 5 + elif restrict_warming_level == {"wl": 4}: + assert len(out) == 2 @pytest.mark.parametrize("restrict_resolution", [None, "finest", "coarsest"]) def test_restrict_resolution(self, restrict_resolution): - big_cat = deepcopy(self.big_cat) + cat = deepcopy(self.cat) for i in range(2): - new_line = deepcopy(big_cat.df.iloc[0]) + new_line = deepcopy(cat.df.iloc[0]) new_line["mip_era"] = "CMIP5" new_line["activity"] = "CORDEX" new_line["institution"] = "CCCma" @@ -117,16 +108,16 @@ def test_restrict_resolution(self, restrict_resolution): new_line["domain"] = "NAM-22" if i == 0 else "NAM-11" new_line["frequency"] = "day" new_line["xrfreq"] = "D" - new_line["variable"] = ("tas",) + new_line["variable"] = ("tasmin",) new_line["id"] = xs.catalog.generate_id(new_line.to_frame().T).iloc[0] - big_cat.esmcat._df = pd.concat( - [big_cat.df, new_line.to_frame().T], ignore_index=True + cat.esmcat._df = pd.concat( + [cat.df, new_line.to_frame().T], ignore_index=True ) out = xs.search_data_catalogs( - data_catalogs=big_cat, - variables_and_freqs={"tas": "D"}, + data_catalogs=cat, + variables_and_freqs={"tasmin": "D"}, other_search_criteria={ "source": ["GFDL-CM4", "CRCM5"], "experiment": ["ssp585", "rcp85"], @@ -144,46 +135,48 @@ def test_restrict_resolution(self, restrict_resolution): assert any("NAM-22" in x for x in out) assert any("_gr2" in x for x in out) - @pytest.mark.parametrize("restrict_members", [None, {"ordered": 5}]) + @pytest.mark.parametrize("restrict_members", [None, {"ordered": 2}]) def test_restrict_members(self, restrict_members): out = xs.search_data_catalogs( - data_catalogs=self.big_cat, - variables_and_freqs={"tas": "D"}, - other_search_criteria={"source": ["CanESM5"], "experiment": ["ssp585"]}, + data_catalogs=self.cat, + variables_and_freqs={"tasmin": "D"}, + other_search_criteria={ + "source": ["NorESM2-LM"], + "experiment": ["historical"], + }, restrict_members=restrict_members, ) - assert len(out) == (50 if restrict_members is None else 5) + assert len(out) == (3 if restrict_members is None else 2) if restrict_members is not None: assert all( o in out.keys() for o in [ - "ScenarioMIP_CCCma_CanESM5_ssp585_r1i1p2f1_gn", - "ScenarioMIP_CCCma_CanESM5_ssp585_r1i1p1f1_gn", - "ScenarioMIP_CCCma_CanESM5_ssp585_r2i1p1f1_gn", - "ScenarioMIP_CCCma_CanESM5_ssp585_r2i1p2f1_gn", - "ScenarioMIP_CCCma_CanESM5_ssp585_r3i1p1f1_gn", + "CMIP_NCC_NorESM2-LM_historical_r1i1p1f1_gn", + "CMIP_NCC_NorESM2-LM_historical_r2i1p1f1_gn", ] ) + # Make sure that those with fewer members are still returned assert ( len( xs.search_data_catalogs( - data_catalogs=self.big_cat, - variables_and_freqs={"tas": "D"}, + data_catalogs=self.cat, + variables_and_freqs={"tasmin": "D"}, other_search_criteria={ - "institution": ["NOAA-GFDL"], + "source": ["GFDL-CM4"], "experiment": ["ssp585"], + "domain": "gr1", }, restrict_members=restrict_members, ) ) - == 3 + == 1 ) @pytest.mark.parametrize("allow_conversion", [True, False]) def test_allow_conversion(self, allow_conversion): out = xs.search_data_catalogs( - data_catalogs=self.big_cat, + data_catalogs=self.cat, variables_and_freqs={"evspsblpot": "D"}, other_search_criteria={ "institution": ["NOAA-GFDL"], @@ -191,37 +184,36 @@ def test_allow_conversion(self, allow_conversion): }, allow_conversion=allow_conversion, ) - assert len(out) == (3 if allow_conversion else 0) + assert len(out) == (2 if allow_conversion else 0) if allow_conversion: assert all( v in out[list(out.keys())[0]].unique("variable") for v in ["tasmin", "tasmax"] ) - assert "evspsblpot" not in out[list(out.keys())[0]].unique("variable") + assert "tas" not in out[list(out.keys())[0]].unique("variable") def test_no_match(self): out = xs.search_data_catalogs( - data_catalogs=self.small_cat, + data_catalogs=self.cat, variables_and_freqs={"tas": "YS"}, allow_resampling=False, ) assert isinstance(out, dict) assert len(out) == 0 out = xs.search_data_catalogs( - data_catalogs=self.small_cat, + data_catalogs=self.cat, variables_and_freqs={"tas": "D"}, other_search_criteria={"experiment": "not_real"}, ) assert isinstance(out, dict) assert len(out) == 0 - def test_input_types(self): - data_catalogs_1 = notebooks / "samples" / "tutorial-catalog.json" + def test_input_types(self, samplecat): data_catalogs_2 = notebooks / "samples" / "pangeo-cmip6.json" assert ( xs.search_data_catalogs( - data_catalogs=[data_catalogs_1, data_catalogs_2], + data_catalogs=[samplecat, data_catalogs_2], variables_and_freqs={"tas": "D"}, other_search_criteria={ "experiment": "ssp585", @@ -230,7 +222,7 @@ def test_input_types(self): }, ).keys() == xs.search_data_catalogs( - data_catalogs=[self.small_cat, data_catalogs_2], + data_catalogs=[samplecat, self.cat], variables_and_freqs={"tas": "D"}, other_search_criteria={ "experiment": "ssp585", @@ -242,58 +234,37 @@ def test_input_types(self): def test_match_histfut(self): out = xs.search_data_catalogs( - data_catalogs=self.big_cat, - variables_and_freqs={"tas": "D"}, - other_search_criteria={"experiment": "ssp585", "source": "CanESM5"}, - restrict_members={"ordered": 1}, + data_catalogs=self.cat, + variables_and_freqs={"tasmin": "D"}, + other_search_criteria={"experiment": "ssp585", "source": "GFDL-CM4"}, match_hist_and_fut=True, ) - assert ( - str( - sorted( - out["ScenarioMIP_CCCma_CanESM5_ssp585_r1i1p1f1_gn"].unique( - "date_start" - ) - )[0] - ) - == "1985-01-01 00:00:00" - ) - assert ( - str( - sorted( - out["ScenarioMIP_CCCma_CanESM5_ssp585_r1i1p1f1_gn"].unique( - "date_start" - ) - )[1] - ) - == "2015-01-01 00:00:00" - ) + k = list(out.keys())[0] + assert str(sorted(out[k].unique("date_start"))[0]) == "1985-01-01 00:00:00" + assert str(sorted(out[k].unique("date_start"))[1]) == "2015-01-01 00:00:00" def test_fx(self): - small_cat = deepcopy(self.small_cat) - new_line = deepcopy(small_cat.df.iloc[0]) + cat = deepcopy(self.cat) + new_line = deepcopy(cat.df.iloc[0]) new_line["id"] = new_line["id"].replace( new_line["experiment"], "another_experiment" ) new_line["experiment"] = "another_experiment" - small_cat.esmcat._df = pd.concat( - [small_cat.df, new_line.to_frame().T], ignore_index=True - ) + cat.esmcat._df = pd.concat([cat.df, new_line.to_frame().T], ignore_index=True) with pytest.warns( UserWarning, match="doesn't have the fixed field sftlf, but it can be acquired from ", ): out = xs.search_data_catalogs( - data_catalogs=small_cat, + data_catalogs=cat, variables_and_freqs={"sftlf": "fx"}, other_search_criteria={"experiment": "another_experiment"}, ) assert len(out) == 1 + k = list(out.keys())[0] np.testing.assert_array_equal( - out[ - "CMIP6_ScenarioMIP_NCC_NorESM2-MM_another_experiment_r1i1p1f1_example-region" - ].df["experiment"], + out[k].df["experiment"], "another_experiment", ) diff --git a/xscen/extract.py b/xscen/extract.py index e9e5f168..6e5c4fb6 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -552,9 +552,10 @@ def search_data_catalogs( Currently only supports {"ordered": int} format. restrict_warming_level : bool, dict Used to restrict the results only to datasets that exist in the csv used to compute warming levels in `subset_warming_level`. - If True, this will only keep the datasets that have a mip_era, source, experiment - and member combination that exist in the csv. This does not guarantees that a given warming level will be reached, only that the datasets have corresponding columns in the csv. + If True, this will only keep the datasets that have a mip_era, source, experiment and member combination that exist in the csv. + This does not guarantee that a given warming level will be reached, only that the datasets have corresponding columns in the csv. More option can be added by passing a dictionary instead of a boolean. + If {'wl': float}, it will only keep the datasets that reach a warming level that is equal or greater than the given value. If {'ignore_member':True}, it will disregard the member when trying to match the dataset to a column. If {tas_csv: Path_to_csv}, it will use an alternative csv instead of the default one provided by xscen. From cdc0cf8692cd84ed0d317bbfdd8eb8ea19da546d Mon Sep 17 00:00:00 2001 From: RondeauG Date: Wed, 30 Aug 2023 09:16:00 -0400 Subject: [PATCH 07/23] upd History --- HISTORY.rst | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index daa865b4..9db73c8a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,30 @@ History ======= +v0.8.0 (unreleased) +------------------- +Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`). + +Announcements +^^^^^^^^^^^^^ +* N/A + +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* Added the ability to search for simulations that reach a given warming level. (:pull:`251`). + +Breaking changes +^^^^^^^^^^^^^^^^ +* N/A + +Bug fixes +^^^^^^^^^ +* Fixed a bug in ``xs.search_data_catalogs`` when searching for fixed fields and specific experiments/members. (:pull:`251`). + +Internal changes +^^^^^^^^^^^^^^^^ +* Continued work on adding tests. (:pull:`251`). + v0.7.1 (2023-08-23) ------------------- * Update dependencies by removing ``pygeos``, pinning ``shapely>=2`` and ``intake-esm>=2023.07.07`` as well as other small fixes to the environment files. (:pull:`243`). From e6edb6d7facf38f8139d33ad7a78878c148f1745 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Fri, 8 Sep 2023 15:24:17 -0400 Subject: [PATCH 08/23] add an example in notebooks --- docs/notebooks/1_catalog.ipynb | 6 ++++-- xscen/extract.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/notebooks/1_catalog.ipynb b/docs/notebooks/1_catalog.ipynb index 458deba8..fc8ee4be 100644 --- a/docs/notebooks/1_catalog.ipynb +++ b/docs/notebooks/1_catalog.ipynb @@ -254,7 +254,7 @@ "- `allow_conversion` is used to allow searching for calculable variables, in the case where the requested variable would not be available.\n", "- `restrict_resolution` is used to limit the results to the finest or coarsest resolution available for each source.\n", "- `restrict_members` is used to limit the results to a maximum number of realizations for each source.\n", - "- `restrict_warming_level` is used to limit the results to only datasets that are present in the csv used for calculating warming levels.\n", + "- `restrict_warming_level` is used to limit the results to only datasets that are present in the csv used for calculating warming levels and/or that reach a given warming level.\n", "\n", "Note that compared to `search`, the result of `search_data_catalog` is a dictionary with one entry per unique ID. A given unique ID might contain multiple datasets as per `intake-esm`'s definition, because it groups catalog lines per *id - domain - processing_level - xrfreq*. Thus, it would separate model data that exists at different frequencies.\n", "\n", @@ -390,7 +390,9 @@ " data_catalogs=[f\"{Path().absolute()}/samples/pangeo-cmip6.json\"],\n", " variables_and_freqs=variables_and_freqs,\n", " match_hist_and_fut=True,\n", - " restrict_warming_level=True, # In this case all models exist in our database, so nothing gets eliminated.\n", + " restrict_warming_level={\n", + " \"wl\": 2\n", + " }, # SSP126 gets eliminated, since it doesn't reach +2°C by 2100.\n", ")\n", "\n", "cat_sim" diff --git a/xscen/extract.py b/xscen/extract.py index 6e5c4fb6..c0eb00a6 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -555,9 +555,10 @@ def search_data_catalogs( If True, this will only keep the datasets that have a mip_era, source, experiment and member combination that exist in the csv. This does not guarantee that a given warming level will be reached, only that the datasets have corresponding columns in the csv. More option can be added by passing a dictionary instead of a boolean. - If {'wl': float}, it will only keep the datasets that reach a warming level that is equal or greater than the given value. If {'ignore_member':True}, it will disregard the member when trying to match the dataset to a column. If {tas_csv: Path_to_csv}, it will use an alternative csv instead of the default one provided by xscen. + If 'wl' is a provided key, then `xs.get_warming_level` will be called and only datasets that reach the given warming level will be kept. + This can be combined with other arguments of the function, for example {'wl': 1.5, 'window': 30}. Notes ----- From fe7120644aa7db248de04bf66eac6c88afb56b33 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Fri, 8 Sep 2023 15:33:35 -0400 Subject: [PATCH 09/23] fix tests for xclim 0.45 --- tests/test_diagnostics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_diagnostics.py b/tests/test_diagnostics.py index e605e767..f4ab3e3d 100644 --- a/tests/test_diagnostics.py +++ b/tests/test_diagnostics.py @@ -126,7 +126,7 @@ def test_variables(self): xs.diagnostics.health_checks(ds, variables_and_units={"tas": "degC"}) with pytest.warns( UserWarning, - match="Data units kelvin are not compatible with requested 1 millimeter.", + match="Data units kelvin are not compatible with requested", ): xs.diagnostics.health_checks(ds, variables_and_units={"tas": "mm"}) From 4d2b706f8a43f98889c9b6c35709ed3f6e2a8284 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Fri, 8 Sep 2023 15:35:02 -0400 Subject: [PATCH 10/23] fix tests for xclim 0.45 --- tests/test_diagnostics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_diagnostics.py b/tests/test_diagnostics.py index f4ab3e3d..401c44e6 100644 --- a/tests/test_diagnostics.py +++ b/tests/test_diagnostics.py @@ -126,7 +126,7 @@ def test_variables(self): xs.diagnostics.health_checks(ds, variables_and_units={"tas": "degC"}) with pytest.warns( UserWarning, - match="Data units kelvin are not compatible with requested", + match="Data units kelvin are not compatible with requested mm.", ): xs.diagnostics.health_checks(ds, variables_and_units={"tas": "mm"}) From e4edc935085da07c83c138f3cb8d8a30d90256d0 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Fri, 8 Sep 2023 15:51:27 -0400 Subject: [PATCH 11/23] fix pre-commit --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c72afae0..d82123ad 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,6 +19,7 @@ repos: - id: check-json - id: pretty-format-json args: [ '--autofix', '--no-ensure-ascii', '--no-sort-keys' ] + exclude: .ipynb - id: check-yaml args: [ '--allow-multiple-documents' ] exclude: conda/xscen/meta.yaml From d777d14fc85414e2df33916c40ab39d11c0537e1 Mon Sep 17 00:00:00 2001 From: RondeauG <38501935+RondeauG@users.noreply.github.com> Date: Mon, 11 Sep 2023 13:21:51 -0400 Subject: [PATCH 12/23] Update docs/notebooks/1_catalog.ipynb --- docs/notebooks/1_catalog.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks/1_catalog.ipynb b/docs/notebooks/1_catalog.ipynb index fc8ee4be..063b89ee 100644 --- a/docs/notebooks/1_catalog.ipynb +++ b/docs/notebooks/1_catalog.ipynb @@ -254,7 +254,7 @@ "- `allow_conversion` is used to allow searching for calculable variables, in the case where the requested variable would not be available.\n", "- `restrict_resolution` is used to limit the results to the finest or coarsest resolution available for each source.\n", "- `restrict_members` is used to limit the results to a maximum number of realizations for each source.\n", - "- `restrict_warming_level` is used to limit the results to only datasets that are present in the csv used for calculating warming levels and/or that reach a given warming level.\n", + "- `restrict_warming_level` is used to limit the results to only datasets that are present in the csv used for calculating warming levels. You can also pass a dict to verify that a given warming level is reached.\n", "\n", "Note that compared to `search`, the result of `search_data_catalog` is a dictionary with one entry per unique ID. A given unique ID might contain multiple datasets as per `intake-esm`'s definition, because it groups catalog lines per *id - domain - processing_level - xrfreq*. Thus, it would separate model data that exists at different frequencies.\n", "\n", From a1cfdc5ebf03864297435ddb783c3ecc0a4d1ba6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 12 Sep 2023 07:18:10 +0000 Subject: [PATCH 13/23] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/psf/black: 23.7.0 → 23.9.1](https://github.com/psf/black/compare/23.7.0...23.9.1) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d82123ad..db526fa5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,7 +28,7 @@ repos: hooks: - id: rst-inline-touching-normal - repo: https://github.com/psf/black - rev: 23.7.0 + rev: 23.9.1 hooks: - id: black exclude: ^docs/ From c1e5624dbdd65b44d9d809ac472d6123702d58b0 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 12 Sep 2023 09:05:29 -0400 Subject: [PATCH 14/23] Update .pre-commit-config.yaml use black pre-commit mirror --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index db526fa5..a9cb6631 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,7 +27,7 @@ repos: rev: v1.10.0 hooks: - id: rst-inline-touching-normal - - repo: https://github.com/psf/black + - repo: https://github.com/psf/black-pre-commit-mirror rev: 23.9.1 hooks: - id: black From f189b60ab010d97393b0b83053a1d1dc93872e46 Mon Sep 17 00:00:00 2001 From: "bumpversion[bot]" Date: Wed, 13 Sep 2023 14:54:12 +0000 Subject: [PATCH 15/23] =?UTF-8?q?Bump=20version:=200.7.4-beta=20=E2=86=92?= =?UTF-8?q?=200.7.5-beta?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .cruft.json | 2 +- setup.cfg | 2 +- setup.py | 2 +- tests/test_xscen.py | 2 +- xscen/__init__.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.cruft.json b/.cruft.json index 919e8b66..f67b403f 100644 --- a/.cruft.json +++ b/.cruft.json @@ -11,7 +11,7 @@ "project_slug": "xscen", "project_short_description": "A climate change scenario-building analysis framework, built with xclim/xarray.", "pypi_username": "RondeauG", - "version": "0.7.4-beta", + "version": "0.7.5-beta", "use_pytest": "y", "use_black": "y", "add_pyup_badge": "n", diff --git a/setup.cfg b/setup.cfg index 93bcc285..004df997 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.7.4-beta +current_version = 0.7.5-beta commit = True tag = False parse = (?P\d+)\.(?P\d+).(?P\d+)(\-(?P[a-z]+))? diff --git a/setup.py b/setup.py index 9bd97c95..3d49e3ca 100644 --- a/setup.py +++ b/setup.py @@ -80,6 +80,6 @@ test_suite="tests", extras_require={"dev": dev_requirements}, url="https://github.com/Ouranosinc/xscen", - version="0.7.4-beta", + version="0.7.5-beta", zip_safe=False, ) diff --git a/tests/test_xscen.py b/tests/test_xscen.py index 74ac8c37..5bbcbc69 100644 --- a/tests/test_xscen.py +++ b/tests/test_xscen.py @@ -28,4 +28,4 @@ def test_package_metadata(self): contents = f.read() assert """Gabriel Rondeau-Genesse""" in contents assert '__email__ = "rondeau-genesse.gabriel@ouranos.ca"' in contents - assert '__version__ = "0.7.4-beta"' in contents + assert '__version__ = "0.7.5-beta"' in contents diff --git a/xscen/__init__.py b/xscen/__init__.py index ea232cc3..7e61e09c 100644 --- a/xscen/__init__.py +++ b/xscen/__init__.py @@ -52,7 +52,7 @@ __author__ = """Gabriel Rondeau-Genesse""" __email__ = "rondeau-genesse.gabriel@ouranos.ca" -__version__ = "0.7.4-beta" +__version__ = "0.7.5-beta" # monkeypatch so that warnings.warn() doesn't mention itself From 6c8a9a7a631c327c1123951e35e8a095ad747a8c Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 13 Sep 2023 11:44:16 -0400 Subject: [PATCH 16/23] Update .pre-commit-config.yaml --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a9cb6631..2dfe235a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: rev: v3.10.1 hooks: - id: pyupgrade - args: [--py39-plus] + args: [ '--py39-plus' ] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: @@ -53,7 +53,7 @@ repos: rev: v0.3.8 hooks: - id: blackdoc - additional_dependencies: [ 'black==23.3.0' ] + additional_dependencies: [ 'black==23.9.1' ] exclude: config.py - repo: https://github.com/adrienverge/yamllint.git rev: v1.32.0 From e101e1850b3161bff138e293d9bbd9e12e2bfc70 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 14 Sep 2023 13:43:57 -0400 Subject: [PATCH 17/23] conditional running of notebooks in ReadTheDocs depending on target output --- docs/conf.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index c0eeccfd..7dee3b41 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -83,10 +83,19 @@ # "branch", # "external", # ]: -# elif os.getenv("READTHEDOCS_VERSION_NAME") in ["latest", "stable"]: -# nbsphinx_execute = "always" -# else: -# nbsphinx_execute = "auto" +# warnings.warn("Not executing notebooks.") +# nbsphinx_execute = "never" + +if os.getenv("READTHEDOCS_VERSION_NAME") in ["latest", "stable"] or os.getenv( + "READTHEDOCS_VERSION_TYPE" +) in ["tag"]: + if os.getenv("READTHEDOCS_OUTPUT") in ["pdf"]: + warnings.warn("Generating PDF version. Not executing notebooks.") + nbsphinx_execute = "never" + else: + nbsphinx_execute = "always" +else: + nbsphinx_execute = "auto" # To avoid having to install these and burst memory limit on ReadTheDocs. # autodoc_mock_imports = [ From 9749cf9b1372880b2988b8335ce7b728415fc28d Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 14 Sep 2023 13:49:36 -0400 Subject: [PATCH 18/23] conditional running of notebooks in ReadTheDocs depending on target output, but less buggy --- docs/conf.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 7dee3b41..0bdec6a2 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -68,16 +68,22 @@ autosectionlabel_maxdepth = 2 autosummary_generate = True -nbsphinx_execute = "always" -# To avoid running notebooks on linkcheck +nbsphinx_execute = "always" +# To avoid running notebooks on linkcheck and when building PDF. try: skip_notebooks = int(os.getenv("SKIP_NOTEBOOKS")) except TypeError: skip_notebooks = False if skip_notebooks: - warnings.warn("Not executing notebooks.") + warnings.warn("SKIP_NOTEBOOKS is set. Not executing notebooks.") nbsphinx_execute = "never" +elif os.getenv("READTHEDOCS_VERSION_NAME") in ["latest", "stable"] or os.getenv( + "READTHEDOCS_VERSION_TYPE" +) in ["tag"]: + if os.getenv("READTHEDOCS_OUTPUT") in ["pdf"]: + warnings.warn("Generating PDF version. Not executing notebooks.") + nbsphinx_execute = "never" # if skip_notebooks or os.getenv("READTHEDOCS_VERSION_TYPE") in [ # "branch", From 8e2da4dc0771116a76d8874c297425f581af375c Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 14 Sep 2023 13:52:35 -0400 Subject: [PATCH 19/23] proper year range --- docs/conf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 0bdec6a2..9fd8c3cc 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,6 +21,7 @@ import os import sys import warnings +from datetime import datetime from pathlib import Path sys.path.insert(0, os.path.abspath('..')) @@ -157,8 +158,8 @@ master_doc = 'index' # General information about the project. -project = 'xscen' -copyright = "2022, Ouranos Inc., Gabriel Rondeau-Genesse, and contributors" +project = "xscen" +copyright = f"2022-{datetime.now().year}, Ouranos Inc., Gabriel Rondeau-Genesse, and contributors" author = "Gabriel Rondeau-Genesse" # The version info for the project you're documenting, acts as replacement From f234740fbe6f83be62aa46e3b030021f2ce3f6ad Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 14 Sep 2023 13:52:55 -0400 Subject: [PATCH 20/23] blacken --- docs/conf.py | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 9fd8c3cc..310d2dfe 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -24,12 +24,12 @@ from datetime import datetime from pathlib import Path -sys.path.insert(0, os.path.abspath('..')) -if os.environ.get('READTHEDOCS') and 'ESMFMKFILE' not in os.environ: +sys.path.insert(0, os.path.abspath("..")) +if os.environ.get("READTHEDOCS") and "ESMFMKFILE" not in os.environ: # RTD doesn't activate the env, and esmpy depends on a env var set there # We assume the `os` package is in {ENV}/lib/pythonX.X/os.py # See conda-forge/esmf-feedstock#91 and readthedocs/readthedocs.org#4067 - os.environ['ESMFMKFILE'] = str(Path(os.__file__).parent.parent / 'esmf.mk') + os.environ["ESMFMKFILE"] = str(Path(os.__file__).parent.parent / "esmf.mk") import xscen # noqa import xarray # noqa @@ -54,7 +54,7 @@ "sphinx.ext.viewcode", "nbsphinx", "sphinx_codeautolink", - "sphinx_copybutton" + "sphinx_copybutton", ] # To ensure that underlined fields (e.g. `_field`) are shown in the docs. @@ -152,10 +152,10 @@ # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = ['.rst'] +source_suffix = [".rst"] # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. project = "xscen" @@ -188,7 +188,7 @@ ] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -217,13 +217,13 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # -- Options for HTMLHelp output --------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'xscendoc' +htmlhelp_basename = "xscendoc" # -- Options for LaTeX output ------------------------------------------ @@ -232,15 +232,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -250,9 +247,13 @@ # (source start file, target name, title, author, documentclass # [howto, manual, or own class]). latex_documents = [ - (master_doc, 'xscen.tex', - 'xscen Documentation', - 'Gabriel Rondeau-Genesse', 'manual'), + ( + master_doc, + "xscen.tex", + "xscen Documentation", + "Gabriel Rondeau-Genesse", + "manual", + ), ] @@ -260,11 +261,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'xscen', - 'xscen Documentation', - [author], 1) -] +man_pages = [(master_doc, "xscen", "xscen Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------- @@ -273,10 +270,13 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'xscen', - 'xscen Documentation', - author, - 'xscen', - 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "xscen", + "xscen Documentation", + author, + "xscen", + "One line description of project.", + "Miscellaneous", + ), ] From d69891e671b2890ec02c066f10c480a5d0b13dd8 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 14 Sep 2023 13:58:30 -0400 Subject: [PATCH 21/23] remove double-commit --- docs/conf.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 310d2dfe..dcee2304 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -93,17 +93,6 @@ # warnings.warn("Not executing notebooks.") # nbsphinx_execute = "never" -if os.getenv("READTHEDOCS_VERSION_NAME") in ["latest", "stable"] or os.getenv( - "READTHEDOCS_VERSION_TYPE" -) in ["tag"]: - if os.getenv("READTHEDOCS_OUTPUT") in ["pdf"]: - warnings.warn("Generating PDF version. Not executing notebooks.") - nbsphinx_execute = "never" - else: - nbsphinx_execute = "always" -else: - nbsphinx_execute = "auto" - # To avoid having to install these and burst memory limit on ReadTheDocs. # autodoc_mock_imports = [ # "cartopy", From 268051657079cb54e24e717ad1264e58cb5be9bf Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 14 Sep 2023 14:37:05 -0400 Subject: [PATCH 22/23] update HISTORY.rst --- HISTORY.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.rst b/HISTORY.rst index 9d4c6423..4bab7fbf 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -21,6 +21,7 @@ Breaking changes Bug fixes ^^^^^^^^^ * Fixed a bug in ``xs.search_data_catalogs`` when searching for fixed fields and specific experiments/members. (:pull:`251`). +* Fixed a bug in the documentation build configuration that prevented stable/latest and tagged documentation builds from resolving on ReadTheDocs. (:pull:`256`). Internal changes ^^^^^^^^^^^^^^^^ From 74b4dc10dc5537f95ef5f26388625b53fc85c337 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 14 Sep 2023 14:56:43 -0400 Subject: [PATCH 23/23] document SKIP_NOTEBOOKS --- CONTRIBUTING.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 5a67da34..170e53d7 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -111,6 +111,12 @@ Ready to contribute? Here's how to set up `xscen` for local development. $ cd docs/ $ make html +.. note:: + + When building the documentation, the default behaviour is to evaluate notebooks ('nbsphinx_execute = "always"'), rather than simply parse the content ('nbsphinx_execute = "never"'). Due to their complexity, this can sometimes be a very computationally demanding task and should only be performed when necessary (i.e.: when the notebooks have been modified). + + In order to speed up documentation builds, setting a value for the environment variable "SKIP_NOTEBOOKS" (e.g. "$ export SKIP_NOTEBOOKS=1") will prevent the notebooks from being evaluated on all subsequent "$ tox -e docs" or "$ make docs" invocations. + 8. Submit a pull request through the GitHub website. Pull Request Guidelines