diff --git a/HISTORY.rst b/HISTORY.rst index 2849ba51..9281407b 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -30,6 +30,7 @@ Bug fixes * Fixed a bug in ``xs.search_data_catalogs`` when searching for fixed fields and specific experiments/members. (:pull:`251`). * Fixed a bug in the documentation build configuration that prevented stable/latest and tagged documentation builds from resolving on ReadTheDocs. (:pull:`256`). * Fixed ``get_warming_level`` to avoid incomplete matches. (:pull:`269`). +* `search_data_catalogs` now eliminates anything that matches any entry in `exclusions`. (:issue:`275`, :pull:`280`). Internal changes ^^^^^^^^^^^^^^^^ diff --git a/tests/test_extract.py b/tests/test_extract.py index cd36f34a..940bf3a9 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -28,9 +28,11 @@ def test_basic(self, variables_and_freqs, other_arg): other_search_criteria={"experiment": ["ssp585"]} if other_arg == "other" else None, - exclusions={"member": "r2.*"} if other_arg == "exclusion" else None, + exclusions={"member": "r2.*", "domain": ["gr2"]} + if other_arg == "exclusion" + else None, ) - assert len(out) == 13 if other_arg is None else 2 if other_arg == "other" else 6 + assert len(out) == 13 if other_arg is None else 2 if other_arg == "other" else 4 @pytest.mark.parametrize( "periods, coverage_kwargs", diff --git a/xscen/extract.py b/xscen/extract.py index 8edea223..96a9f6df 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -616,7 +616,7 @@ def search_data_catalogs( You can also pass 'require_all_on: list(columns_name)' in order to only return results that correspond to all other criteria across the listed columns. More details available at https://intake-esm.readthedocs.io/en/stable/how-to/enforce-search-query-criteria-via-require-all-on.html . exclusions : dict, optional - Same as other_search_criteria, but for eliminating results. + Same as other_search_criteria, but for eliminating results. Any result that matches any of the exclusions will be removed. match_hist_and_fut: bool, optional If True, historical and future simulations will be combined into the same line, and search results lacking one of them will be rejected. periods : list @@ -712,11 +712,14 @@ def search_data_catalogs( # Cut entries that do not match search criteria if exclusions: - ex = catalog.search(**exclusions) - catalog.esmcat._df = pd.concat([catalog.df, ex.df]).drop_duplicates(keep=False) - logger.info( - f"Removing {len(ex.df)} assets based on exclusion dict : {exclusions}." - ) + for k in exclusions.keys(): + ex = catalog.search(**{k: exclusions[k]}) + catalog.esmcat._df = pd.concat([catalog.df, ex.df]).drop_duplicates( + keep=False + ) + logger.info( + f"Removing {len(ex.df)} assets based on exclusion dict '{k}': {exclusions[k]}." + ) full_catalog = deepcopy(catalog) # Used for searching for fixed fields if other_search_criteria: catalog = catalog.search(**other_search_criteria)