diff --git a/stitches/fx_data.py b/stitches/fx_data.py index 0b1ce613..82da4bfb 100644 --- a/stitches/fx_data.py +++ b/stitches/fx_data.py @@ -6,11 +6,16 @@ def get_lat_name(ds): - """Get the name for the latitude values (could be either lat or latitude). + """Get the name for the latitude values in an xarray dataset. - :param ds: xarray dataset of CMIP data. + This function searches for latitude coordinates in the dataset, + which could be named either 'lat' or 'latitude'. - :return: the string name for the latitude variable. + :param ds: The dataset from which to retrieve the latitude coordinate name. + :type ds: xarray.Dataset + :returns: The name of the latitude variable. + :rtype: str + :raises RuntimeError: If no latitude coordinate is found in the dataset. """ for lat_name in ["lat", "latitude"]: if lat_name in ds.coords: @@ -19,11 +24,13 @@ def get_lat_name(ds): def global_mean(ds): - """Get the weighted global mean for a variable. - - :param ds: xarray dataset of CMIP data. + """ + Calculate the weighted global mean for a variable in an xarray dataset. - :return: xarray dataset of the weighted global mean. + :param ds: The xarray dataset of CMIP data. + :type ds: xarray.Dataset + :returns: The xarray dataset of the weighted global mean. + :rtype: xarray.Dataset """ lat = ds[get_lat_name(ds)] weight = np.cos(np.deg2rad(lat)) @@ -33,11 +40,11 @@ def global_mean(ds): def get_ds_meta(ds): - """Get the meta data information from the xarray data set. - - :param ds: xarray dataset of CMIP data. + """ + Get the metadata information from an xarray dataset. - :return: pandas dataset of MIP information. + :param ds: xarray dataset of CMIP data. + :return: pandas DataFrame of MIP information. """ v = ds.variable_id diff --git a/stitches/fx_match.py b/stitches/fx_match.py index ac18eed7..96174b66 100644 --- a/stitches/fx_match.py +++ b/stitches/fx_match.py @@ -8,20 +8,22 @@ # Internal fx def internal_dist(fx_pt, dx_pt, archivedata, tol=0): - """This function calculates the euclidean distance between the target values (fx and dx) - and the archive values contained in the data frame. It will be used to help select which - of the archive values best matches the target values. To ensure a consistent unit across - all dimensions of the space, dx is updated to be windowsize*dx so that it has units of - degC. This results in a distance metric (Euclidean/l2) in units of degC. - Could _very_ easily make that choice of unit consistency optional via arg and if-statement. - - :param fx_pt: a single value of the target fx value - :param dx_pt: a single value of the target dx value - :param archivedata: a data frame of the archive fx and dx values - :param archivedata: a data frame of the archive fx and dx values - :param tol: a tolerance for the neighborhood of matching. defaults to 0 degC - only the nearest-neighbor is returned - - :return: a data frame with the target data and the corresponding matched archive data. + """ + Calculate the Euclidean distance between target and archive values. + + This function calculates the Euclidean distance between the target values (fx and dx) + and the archive values contained in the dataframe. It is used to select which + archive values best match the target values. To ensure consistent units across + all dimensions, dx is updated to be windowsize*dx with units of degC, resulting + in a distance metric (Euclidean/l2) in units of degC. The choice of unit consistency + could be made optional via an argument and if-statement. + + :param fx_pt: A single value of the target fx value. + :param dx_pt: A single value of the target dx value. + :param archivedata: A dataframe of the archive fx and dx values. + :param tol: A tolerance for the neighborhood of matching; defaults to 0 degC, + returning only the nearest neighbor. + :return: A dataframe with the target data and the corresponding matched archive data. """ # Check the inputs @@ -73,11 +75,11 @@ def internal_dist(fx_pt, dx_pt, archivedata, tol=0): # Internal fx def shuffle_function(dt): - """Randomly shuffle the deck, this should help with the matching process. - - :param dt: a data of archive values that will be used in the matching process. + """ + Randomly shuffle the deck to assist with the matching process. - :return: a randomly ordered data frame. + :param dt: A DataFrame of archive values used in the matching process. + :return: A DataFrame with rows in random order. """ nrow = dt.shape[0] out = dt.sample(nrow, replace=False) @@ -87,15 +89,18 @@ def shuffle_function(dt): # Internal fx def drop_hist_false_duplicates(matched_data): - """A helper function to remove false duplicate matches in the historical period. For - example, target 1850 gets 1872 data from realization 13 of SSP126 and SSP585. - The metadata of these archive values are different, but the actual data - values are identical because we just pasted in the same historical data to - every Experiment. So this function keeps only the first match. + """ + Remove false duplicate matches in the historical period. - :param matched_data: pandas object returned from match_neighborhood. + This function is used to remove false duplicate matches in the historical period. + For example, if the target year 1850 gets data from 1872 from realization 13 of + SSP126 and SSP585, the metadata of these archive values are different, but the + actual data values are identical because the same historical data was pasted into + every experiment. This function keeps only the first match. - :return: a data frame of matched data with the same structure as the input, with false duplicates in the historical period dropped + :param matched_data: pandas DataFrame returned from match_neighborhood. + :return: DataFrame with the same structure as the input, with false duplicates + in the historical period dropped. """ # Subset the idealized runs, since these are not concatenated with the historical time series @@ -227,19 +232,21 @@ def drop_hist_false_duplicates(matched_data): def match_neighborhood( target_data, archive_data, tol: float = 0, drop_hist_duplicates: bool = True ): - """This function takes data frames of target and archive data and calculates the euclidean distance between the target values (fx and dx) and the archive values. - - :param target_data: a data frame of the target fx and dx values - - :param archive_data: a data frame of the archive fx and dx values - - :param tol: a tolerance for the neighborhood of matching. defaults to 0 degC - only the nearest-neighbor is returned - :type tol: float - - :param drop_hist_duplicates: a Boolean True/False that defaults to True to determine whether to consider historical values across SSP scenarios to be duplicates and therefore all but one dropped from matching (True) or to be distinct points for matching (False). - :type drop_hist_duplicates: bool - - :return: a data frame with the target data and the corresponding matched archive data. + """ + Calculate the Euclidean distance between target and archive data. + + This function takes data frames of target and archive data and calculates the + Euclidean distance between the target values (fx and dx) and the archive values. + + :param target_data: Data frame of the target fx and dx values. + :param archive_data: Data frame of the archive fx and dx values. + :param tol: Tolerance for the neighborhood of matching. Defaults to 0 degC, + meaning only the nearest-neighbor is returned. Must be a float. + :param drop_hist_duplicates: Determines whether to consider historical values + across SSP scenarios as duplicates (True) and drop all but one from matching, + or to consider them as distinct points for matching (False). Defaults to True. + :type drop_hist_duplicates: bool + :return: Data frame with the target data and the corresponding matched archive data. """ # Check the inputs of the functions if util.nrow(target_data) <= 0: diff --git a/stitches/fx_pangeo.py b/stitches/fx_pangeo.py index 3b33e81b..9bd3e062 100644 --- a/stitches/fx_pangeo.py +++ b/stitches/fx_pangeo.py @@ -7,9 +7,13 @@ def fetch_pangeo_table(): - """Get a copy of the pangeo archive contents + """ + Fetch the Pangeo CMIP6 archive table of contents as a pandas DataFrame. + + Retrieve a copy of the Pangeo CMIP6 archive contents, which includes information + about the available models, sources, experiments, ensembles, and more. - :return: a pandas data frame containing information about the model, source, experiment, ensemble and so on that is available for download on pangeo. + :return: A pandas DataFrame with details on the datasets available for download from Pangeo. """ # The url path that contains to the pangeo archive table of contents. @@ -20,12 +24,12 @@ def fetch_pangeo_table(): def fetch_nc(zstore: str): - """Extract data for a single file. - - :param zstore: str of the location of the cmip6 data file on pangeo. - :type zstore: str + """ + Extract data for a single file from Pangeo. - :return: an xarray containing cmip6 data downloaded from pangeo. + :param zstore: The location of the CMIP6 data file on Pangeo. + :type zstore: str + :return: An xarray Dataset containing CMIP6 data downloaded from Pangeo. """ ds = xr.open_zarr(fsspec.get_mapper(zstore)) ds.sortby("time") diff --git a/stitches/fx_processing.py b/stitches/fx_processing.py index 2124bdf4..18ec08e9 100644 --- a/stitches/fx_processing.py +++ b/stitches/fx_processing.py @@ -11,13 +11,14 @@ def calculate_rolling_mean(data, size): - """ " - Calculate the rolling mean for the data frame with a user defined size centered window. - :param data: A data frame of the cmip absolute temperature - :type data: pandas.core.frame.DataFrame - :param size: An integer value for the size of the window to use when calculating the rolling mean - :type size: int - :return: A pandas data frame of the smoothed time series (rolling mean applied) + """ + Calculate the rolling mean for the data frame with a user-defined size centered window. + + :param data: A data frame of the CMIP absolute temperature. + :type data: pandas.core.frame.DataFrame + :param size: An integer value for the size of the window to use when calculating the rolling mean. + :type size: int + :return: A pandas data frame of the smoothed time series with the rolling mean applied. """ # Check inputs util.check_columns( @@ -61,15 +62,20 @@ def calculate_rolling_mean(data, size): def chunk_ts(df, n, base_chunk=0): - """Format a data frame into an array of data frames containing data for n-sized years of successive data. - :param df: data frame of climate data to chunk into different periods - :type df: pandas DataFrame - :param n: the size of the windows to chunk into separate periods - :type n: int - :param base_chunk: a helper argument for creating all of the staggered chunks, defaults to 0 (original behavior) - :type base_chunk: int - :return: pandas DataFrame identical to df with the addition of a chunk column + """ + Format a data frame into an array of data frames with n-sized years of successive data. + This function takes a data frame of climate data and chunks it into separate periods, + each containing data for a span of `n` years. It adds a 'chunk' column to the data frame + to indicate the period each row belongs to. + + :param df: Data frame of climate data to chunk into different periods. + :type df: pandas.DataFrame + :param n: The size of the windows to chunk into separate periods. + :type n: int + :param base_chunk: A helper argument for creating staggered chunks, defaults to 0 (original behavior). + :type base_chunk: int + :return: A pandas DataFrame identical to `df` with the addition of a 'chunk' column. """ # Check inputs @@ -102,11 +108,13 @@ def chunk_ts(df, n, base_chunk=0): def get_chunk_info(df): - """Determine the value and the rate of change for each chunk. - :param df: data frame of climate data chunked into different periods - :type df: pandas DataFrame - :return: pandas DataFrame of the chunk information, the start and end years as well as the chunk value (fx) - and the chunk rate of change (dx). + """ + Determine the value and the rate of change for each chunk. + + :param df: Data frame of climate data chunked into different periods. + :type df: pandas.DataFrame + :return: A pandas DataFrame with the chunk information, including the start and end years, the chunk value (fx), + and the chunk rate of change (dx). """ # Check the inputs @@ -189,19 +197,20 @@ def get_chunk_info(df): def subset_archive(staggered_archive, end_yr_vector): - """ Take a staggered archive with chunked data for a 9 year window following - each year in 1850-2100 and subset to the entries with `end_yr` in - `end_yr_vector`. - :param staggered_archive: A formatted archive with chunked data starting - in each year - :type df: pandas DataFrame - - :param end_yr_vector: vector of end_yrs want to subset the archive to. - - - :return: pandas DataFrame of the subsetted archive, same format just fewer - entries - """ + """ + Subset a staggered archive to entries with `end_yr` in `end_yr_vector`. + + This function takes a staggered archive with chunked data for a 9-year window + following each year in 1850-2100 and subsets it to the entries with `end_yr` + in `end_yr_vector`. + + :param staggered_archive: A formatted archive with chunked data starting in each year. + :type staggered_archive: pandas.DataFrame + :param end_yr_vector: Vector of end years to subset the archive to. + :type end_yr_vector: list or similar iterable + :return: A pandas DataFrame of the subsetted archive, same format but fewer entries. + :rtype: pandas.DataFrame + """ out = staggered_archive[staggered_archive['end_yr'].isin(end_yr_vector)].reset_index(drop=True).copy() return out diff --git a/stitches/fx_recipe.py b/stitches/fx_recipe.py index 4a70a3fa..05604e18 100644 --- a/stitches/fx_recipe.py +++ b/stitches/fx_recipe.py @@ -12,13 +12,17 @@ def get_num_perms(matched_data): - """A function to give you the number of potential permutations from a - matched set of data. Ie Taking in the the results of `match_neighborhood(target, archive)`. + """ + Calculate the number of potential permutations from matched data. + + This function takes the results of `match_neighborhood(target, archive)` and + determines the total number of potential permutations of the matches that cover + the period 1850-2100 in the matched_data dataframe. It also provides a breakdown + of how many matches are in each period of the target data. - :param matched_data: data output from match_neighborhood. - :return: A list with two entries. First, the total number of potential permutations of the - matches that cover 1850-2100 of the target data in the matched_data dataframe. The second, a data frame with - the break down of how many matches are in each period of the target data + :param matched_data: The data output from match_neighborhood. + :return: A list with two entries: the total number of potential permutations, and + a dataframe with the breakdown of matches per period. """ # Check inputs util.check_columns( @@ -77,21 +81,21 @@ def get_num_perms(matched_data): def remove_duplicates(md, archive): - """A function that makes sure that within a single given matched recipe that - there each archive point used is unique. When two target tgav windows in - the trajectory match to the same archive window, the target window with - smaller Euclidean distance keeps the match, and the other target window - gets re-matched with its nearest-neighbor match from a new archive, the - previous one with all matched points removed. - - :param md: A data frame with results of matching for a single - tgav recipe. Either because match_neighborhood was - used specifically to return NN or because the multiple - matches have been permuted into new recipes and then - split with this function being applied to each recipe. - :param archive: data frame object consisting of the tas archive to use - for re-matching duplicate points. - :return: data frame with same structure as raw matched, with duplicate matches replaced. + """ + Ensure each archive point in a matched recipe is unique. + + When two target tgav windows in the trajectory match to the same archive window, + the target window with the smaller Euclidean distance retains the match. The other + target window is re-matched with its nearest-neighbor from a new archive, which + excludes all previously matched points. + + :param md: A data frame with the results of matching for a single tgav recipe. + This can be from match_neighborhood specifically returning NN, or from + multiple matches permuted into new recipes and then split, with this + function applied to each recipe. + :param archive: A data frame consisting of the tas archive for re-matching. + :return: A data frame with the same structure as the raw matched data, but with + duplicate matches replaced. """ if len(md["target_year"].unique()) < util.nrow(md): raise TypeError( @@ -247,21 +251,27 @@ def remove_duplicates(md, archive): def permute_stitching_recipes( N_matches: int, matched_data, archive, optional=None, testing: bool = False ): - """A function to sample from input `matched_data` (the the results of `match_neighborhood(target, archive, tol)` to produce permutations of possible stitching recipes that will match the target data. + """ + Sample from `matched_data` to produce permutations of stitching recipes. + + This function samples from `matched_data` (the results of `match_neighborhood(target, archive, tol)`) + to produce permutations of possible stitching recipes that will match the target data. - :param N_matches: a int to the maximum number of matches per target data - :type N_matches: int + :param N_matches: The maximum number of matches per target data. + :type N_matches: int - :param matched_data: data output from match_neighborhood. + :param matched_data: Data output from `match_neighborhood`. - :param archive: the archive data to use for re-matching duplicate points + :param archive: The archive data to use for re-matching duplicate points. - :param optional: a previous output of this function that contains a list of already created recipes to avoid re-making (this is not implemented). + :param optional: A previous output of this function that contains a list of already created recipes + to avoid re-making (this is not implemented). - :param testing: Boolean True/False. Defaults to False. When True, the behavior can be reliably replicated without setting global seeds. - :type testing: bool + :param testing: When True, the behavior can be reliably replicated without setting global seeds. + Defaults to False. + :type testing: bool - :return: data frame with same structure as raw matched, with duplicate matches replaced. + :return: A data frame with the same structure as the raw matched data, with duplicate matches replaced. """ # Check inputs util.check_columns( @@ -667,13 +677,15 @@ def permute_stitching_recipes( def handle_transition_periods(rp): - """Go through the recipe and when there is a transition period, aka the archive years span both the - historical and future scenarios go through and insert in an extra period so that they don't do - this over lap any more. + """ + Handle transition periods in the recipe data frame. - :param rp: a data frame of the recipe. + This function processes the recipe data frame to ensure that there are no overlapping + historical and future experiments during transition periods. It inserts extra periods + to separate historical and future scenarios. - :return: a data frame of of the recipe with no over lapping historical/future experiments, this is now ready to join with pangeo information. + :param rp: A data frame of the recipe. + :return: A data frame of the recipe with separated historical/future experiments, ready to join with Pangeo information. """ util.check_columns( rp, @@ -827,18 +839,17 @@ def internal_func(x): def handle_final_period(rp): - """Go through a recipe and ensure that all of the periods have the same archive - and target period length, if not update to reflect the target period length. - Otherwise you'll end up with extra years in the stitched data. This is really - only an issue for the final period of target data because sometimes that period is somewhat short. - OR if the normal sized target window gets matched to the final period of data from one - of the archive matches. Since the final period is typically only one year shorter than the - full window target period in this case, we simply repeat the final archive year to get - enough matches. - - :param rp: a data frame of the recipe. - - :return: a recipe data frame that has target and archive periods of the same length. + """ + Ensure all periods in a recipe have matching target and archive lengths. + + This function processes a recipe data frame to ensure that each period has the + same length for both the target and archive. This is particularly important for + the final period of target data, which may be shorter than expected. If the target + window is matched to the final period of archive data, the final archive year is + repeated to provide a sufficient number of matches. + + :param rp: A data frame of the recipe. + :return: A recipe data frame with target and archive periods of equal length. """ # Define an internal function that checks row by row if we are working @@ -908,14 +919,13 @@ def internal_func(x): def generate_gridded_recipe(messy_recipe, res: str = "mon"): - """Using a messy recipe create the recipe that can be used in the stitching process. - - :param messy_recipe: a data frame generated by the permute_recipes - - :param res: string mon or day - :type res: str + """ + Create a recipe for the stitching process using a messy recipe. - :return: a recipe data frame + :param messy_recipe: A data frame generated by the permute_recipes function. + :param res: The resolution of the recipe, either 'mon' for monthly or 'day' for daily. + :type res: str + :return: A data frame formatted as a recipe for stitching. """ # Check inputs util.check_columns( @@ -1009,29 +1019,24 @@ def make_recipe( non_tas_variables: [str] = None, reproducible: bool = False, ): - """Generate a stitching recipe from target and archive data. - - :param target_data: a pandas data frame of climate information to emulate. - - :param archive_data: a pandas data frame of temperature data to use as the archive to match on. - - :param N_matches: a int to the maximum number of matches per target data - :type N_matches: int - - :param res: str of 'mon' or 'day' to indicate the resolution of the stitched data - :type res: str - - :param tol: float value indicating the tolerance to use in the matching process, default set to 0.1 - :type tol: float - - :param non_tas_variables: a list of variables other than tas to stitch together, when using the default set to None only tas will be stitched together. - :type non_tas_variables: [str] - - :param reproducible: Boolean True/False. Defaults to False. If True, the call to permute_stitching_recipes() uses the testing=True argument so that the behavior can be reliably replicated without setting global seeds. - :type reproducible: bool - - - :return: pandas data frame of a formatted recipe + """ + Generate a stitching recipe from target and archive data. + + :param target_data: A pandas DataFrame of climate information to emulate. + :param archive_data: A pandas DataFrame of temperature data to use as the archive to match on. + :param N_matches: The maximum number of matches per target data. + :param res: Resolution of the stitched data, either 'mon' or 'day'. + :param tol: Tolerance used in the matching process, default is 0.1. + :param non_tas_variables: List of variables other than tas to stitch together; defaults to None, which stitches tas only. + :param reproducible: If True, ensures reproducible behavior by using the testing=True argument in permute_stitching_recipes(); defaults to False. + + :type N_matches: int + :type res: str + :type tol: float + :type non_tas_variables: list[str] + :type reproducible: bool + + :return: A pandas DataFrame of a formatted recipe. """ # Check the inputs diff --git a/stitches/fx_stitch.py b/stitches/fx_stitch.py index 5d42d76e..7de0baf2 100644 --- a/stitches/fx_stitch.py +++ b/stitches/fx_stitch.py @@ -11,9 +11,11 @@ def find_zfiles(rp): - """Determine which cmip files must be downloaded from pangeo. - :param rp: data frame of the recipes - :return: numpy.ndarray array of the gs:// files to pull from pangeo + """ + Determine which CMIP files must be downloaded from Pangeo. + + :param rp: Data frame of the recipes. + :return: Numpy ndarray of the gs:// files to pull from Pangeo. """ # Figure out which columns contain the string file @@ -23,9 +25,11 @@ def find_zfiles(rp): def find_var_cols(x): - """Determine which variables that are going to be downloaded. - :param x: pandas data frame of the stitches recipe - :return: a list of the variables that are going to be written out to the netcdf files. + """ + Determine the variables to be downloaded. + + :param x: pandas DataFrame of the stitches recipe. + :return: List of variables to be written to the NetCDF files. """ # Parse out the variable name so that we can use it @@ -40,13 +44,15 @@ def find_var_cols(x): def get_netcdf_values(i, dl, rp, fl, name): - """Extract the archive values from the list of downloaded cmip data - :param i: int index of the row of the recipe data frame - :param dl: list of xarray cmip files - :param rp: data frame of the recipe - :param fl: list of the cmip files - :param name: name of the variable file that is going to be processed. - :return: a slice of xarray (not sure confident on the technical term) + """ + Extract archive values from a list of downloaded CMIP data. + + :param i: Index of the row in the recipe data frame. + :param dl: List of xarray datasets containing CMIP files. + :param rp: DataFrame of the recipe. + :param fl: List of CMIP file paths. + :param name: Name of the variable file to process. + :return: A slice of xarray data (unsure about the technical term). """ file = rp[name][i] @@ -100,12 +106,14 @@ def get_netcdf_values(i, dl, rp, fl, name): def get_var_info(rp, dl, fl, name): - """Extract the cmip variable attribute information. - :param rp: data frame of the recipes - :param dl: list of the data files - :param fl: list of the data file names - :param name: string of the column containing the variable file name from rp - :return: pandas dataframe of the variable meta data + """ + Extract the CMIP variable attribute information. + + :param rp: Data frame of the recipes. + :param dl: List of the data files. + :param fl: List of the data file names. + :param name: String of the column containing the variable file name from rp. + :return: Pandas dataframe of the variable meta data. """ util.check_columns(rp, {name}) file = rp[name][0] @@ -120,12 +128,14 @@ def get_var_info(rp, dl, fl, name): def get_atts(rp, dl, fl, name): - """Extract the cmip variable attribute information. - :param rp: data frame of the recipes - :param dl: list of the data files - :param fl: list of the data file names - :param name: string of the column containing the variable files to process - :return: dict object containing the cmip variable information + """ + Extract the CMIP variable attribute information. + + :param rp: Data frame of the recipes. + :param dl: List of the data files. + :param fl: List of the data file names. + :param name: String of the column containing the variable file name from rp. + :return: Dict object containing the CMIP variable information. """ file = rp[name][0] index = int(np.where(fl == file)[0]) @@ -138,11 +148,13 @@ def get_atts(rp, dl, fl, name): def internal_stitch(rp, dl, fl): - """Stitch a single recipe into netcdf outputs - :param dl: list of xarray cmip files - :param rp: data frame of the recipe - :param fl: list of the cmip files - :return: a list of the data arrays for the stitched products of the different variables. + """ + Stitch a single recipe into netCDF outputs. + + :param dl: List of xarray CMIP files. + :param rp: DataFrame of the recipe. + :param fl: List of the CMIP file names. + :return: List of the data arrays for the stitched products of the different variables. """ rp = rp.sort_values(by=["stitching_id", "target_start_yr"]).copy() @@ -228,14 +240,13 @@ def internal_stitch(rp, dl, fl): def gridded_stitching(out_dir: str, rp): - """Stitch the gridded netcdfs for variables contained in recipe file and save. - - :param out_dir: string directory location where to write the netcdf files to - :type out_dir: str - - :param rp: data frame of the recipe including variables to stitch + """ + Stitch the gridded NetCDFs for variables contained in the recipe file and save them. - :return: a list of the netcdf files paths + :param out_dir: Directory location where to write the NetCDF files. + :type out_dir: str + :param rp: DataFrame of the recipe including variables to stitch. + :return: List of the NetCDF file paths. """ flag = os.path.isdir(out_dir) @@ -355,16 +366,15 @@ def gridded_stitching(out_dir: str, rp): return f -# end gridded stitching function - - def gmat_internal_stitch(row, data): - """Select data from a tas archive based on a single row in a recipe data frame, this - function is used to iterate over an entire recipe to do the stitching. + """ + Select data from a tas archive based on a single row in a recipe data frame. + + This function is used to iterate over an entire recipe to do the stitching. - :param row: pandas.core.series.Series a row entry of a fully formatted recipe - :param data: pandas.core.frame.DataFrame containing the tas values to be stitched together - :return: pandas.core.frame.DataFrame of tas values + :param row: A row entry of a fully formatted recipe as a pandas Series. + :param data: A DataFrame containing the tas values to be stitched together. + :return: A DataFrame of tas values. """ years = list(range(int(row["target_start_yr"]), int(row["target_end_yr"]) + 1)) select_years = list( @@ -396,11 +406,11 @@ def gmat_internal_stitch(row, data): def gmat_stitching(rp): - """Based on a recipe data frame stitch together a time series of global tas data. - - :param rp: pandas DataFrame - a fully formatted recipe data frame. + """ + Stitch together a time series of global tas data based on a recipe data frame. - :return: pandas DataFrame of stitched together tas data. + :param rp: A fully formatted recipe data frame as a pandas DataFrame. + :return: A pandas DataFrame of stitched together tas data. """ # Check inputs. @@ -489,4 +499,5 @@ def gmat_stitching(rp): final_output = final_output.reset_index(drop=True).copy() final_output = final_output.sort_values(["stitching_id", "year"]).copy() final_output = final_output.reset_index(drop=True).copy() + return final_output diff --git a/stitches/fx_util.py b/stitches/fx_util.py index e23fa343..b3d6d9ca 100644 --- a/stitches/fx_util.py +++ b/stitches/fx_util.py @@ -7,12 +7,12 @@ def combine_df(df1, df2): - """Join the data frames together. - - :param df1: pandas data frame 1. - :param df2: pandas data frame 2. + """ + Join two pandas data frames into a single data frame. - :return: a single pandas data frame. + :param df1: First pandas DataFrame. + :param df2: Second pandas DataFrame. + :return: A single pandas DataFrame resulting from the joining of df1 and df2. """ incommon = df1.columns.intersection(df2.columns) if len(incommon) > 0: @@ -28,13 +28,13 @@ def combine_df(df1, df2): def list_files(d): - """Return the absolute path for all of the files in a single directory with the exception of - .DS_Store files. - - - :param d: str name of a directory. + """ + Return the absolute path for all files in a directory, excluding .DS_Store files. - :return: a list of the files + :param d: Name of the directory. + :type d: str + :returns: List of file paths. + :rtype: list """ files = os.listdir(d) ofiles = [] @@ -46,13 +46,17 @@ def list_files(d): def selstr(a, start, stop): - """Select elements of a string from an array. - - :param a: array containing a string. - :param start: int referring to the first character index to select. - :param stop: int referring to the last character index to select. - - :return: array of strings + """ + Select elements of a string from start to stop index. + + :param a: Array containing a string. + :type a: str + :param start: First character index to select. + :type start: int + :param stop: Last character index to select. + :type stop: int + :returns: Array of strings. + :rtype: list """ if type(a) not in [str]: raise TypeError("a: must be a single string") @@ -65,12 +69,14 @@ def selstr(a, start, stop): def check_columns(data, names): - """Check to see if a data frame has all of the required columns. - - :param data: pd data - :param names: set of the required names + """ + Check if a DataFrame contains all required columns. - :return: an error message if there is a column is missing + :param data: DataFrame to check. + :type data: pd.DataFrame + :param names: Set of required column names. + :type names: set + :raises TypeError: If `names` is not a set or if required columns are missing. """ col_names = set(data.columns) @@ -82,24 +88,28 @@ def check_columns(data, names): def nrow(df): - """Return the number of rows - - :param df: pd data + """ + Return the number of rows in the data frame. - :return: an integer value that corresponds the number of rows in the data frame. + :param df: DataFrame to count rows for. + :type df: pd.DataFrame + :return: Number of rows in the data frame. + :rtype: int """ return df.shape[0] def remove_obs_from_match(md, rm): - """Return an updated matched data frame. The idea being that this function could be - useful to prevent envelope collapse between generated and target ensembles + """ + Return an updated matched data frame to prevent envelope collapse. - :param md: pd data - :param rm: pd data + This function is useful for preventing envelope collapse between + generated and target ensembles by removing observations from the match. - :return: data frame + :param md: Matched data as a pandas DataFrame. + :param rm: Data to remove as a pandas DataFrame. + :return: Updated matched data frame as a pandas DataFrame. """ rm = rm[ [ @@ -146,16 +156,17 @@ def remove_obs_from_match(md, rm): def anti_join(x, y, bycols): - """Return a pd.DataFrame of the rows in x that do not appear in Table y. - Maintains only the columns of x with their names (but maybe a different - order?) - Adapted from https://towardsdatascience.com/masteriadsf-246b4c16daaf#74c6 + """ + Return a DataFrame of the rows in `x` that do not appear in `y`. - :param x: pd.DataFrame object - :param y: pd.DataFrame object - :param bycols: list-like; columns to do the anti-join on + This function maintains only the columns of `x` with their original names, + potentially in a different order. It performs an anti-join operation based + on the specified columns. - :return: pd.DataFrame object + :param x: DataFrame to be filtered. + :param y: DataFrame to filter against. + :param bycols: Columns to perform the anti-join on. + :return: A DataFrame containing the filtered result. """ # Check the inputs check_columns(x, set(bycols)) @@ -193,11 +204,12 @@ def anti_join(x, y, bycols): def load_data_files(subdir): - """Read in a list of data frames. - - :param subdir: pd.DataFrame str for a sub directory that exists + """ + Read in a list of data frames from a specified subdirectory. - :return: pd.DataFrame object + :param subdir: Subdirectory from which to load data files. + :type subdir: str + :return: A single pandas DataFrame object containing concatenated data from all files. """ # Make sure the sub directory exists. path = resources.files("stitches") / subdir diff --git a/stitches/generate_package_data.py b/stitches/generate_package_data.py index d0d2d8ce..1c674a00 100644 --- a/stitches/generate_package_data.py +++ b/stitches/generate_package_data.py @@ -5,11 +5,24 @@ def generate_pkg_data(smoothing_window=9, chunk_window=9, add_staggered=False, anomaly_startYr=1995, anomaly_endYr=2014): - """ Generate all of the internal package data for stitches, the tas archive, - matching archive, & the table of pangeo files. + """ + Generate all internal package data for stitches. + + This function creates the tas archive, matching archive, and the table of + pangeo files. It generates all of the CSV files included in the prebuilt + stitches package and may produce temporary files during the process. - :return: Nothing, running this function should in addition to temporary files - generate all of the csv files that are included in the prebuilt stitches package. + :param smoothing_window: The smoothing window size. + :type smoothing_window: int + :param chunk_window: The chunk window size. + :type chunk_window: int + :param add_staggered: Flag to add staggered output. + :type add_staggered: bool + :param anomaly_startYr: The start year for anomaly calculation. + :type anomaly_startYr: int + :param anomaly_endYr: The end year for anomaly calculation. + :type anomaly_endYr: int + :return: None """ # This takes several hours to run. @@ -23,4 +36,5 @@ def generate_pkg_data(smoothing_window=9, chunk_window=9, add_staggered=False, ) mk_pangeo() - return None \ No newline at end of file + return None + \ No newline at end of file diff --git a/stitches/install_pkgdata.py b/stitches/install_pkgdata.py index 30a888a2..19a2210b 100644 --- a/stitches/install_pkgdata.py +++ b/stitches/install_pkgdata.py @@ -11,14 +11,13 @@ class InstallPackageData: - """Download and unpack example data minted on Zenodo that matches the current installed + """ + Download and unpack example data minted on Zenodo that matches the current installed stitches distribution. - :param data_dir: Optional, Full path oto the directory you wish to store the data in. Default - os to install it in the data directory of the package. - - :type data_dir: str - + :param data_dir: Optional. Full path to the directory where you wish to store the data. + If not specified, the data will be installed in the data directory of the package. + :type data_dir: str """ # URL for DOI minted example data hosted on Zenodo @@ -99,13 +98,17 @@ def fetch_zenodo(self): def install_package_data(data_dir: str = None): - """Download and unpack Zenodo-minted stitches package data that matches the current installed - stitches distribution. + """ + Download and unpack Zenodo-minted stitches package data. + + This function matches the current installed stitches distribution and unpacks + the data into the specified directory or the default data directory of the package. - :param data_dir: Optional. Full path to the directory you wish to store the data in. Default is to install it in data directory of the package. - :type data_dir: str + :param data_dir: Optional. Full path to the directory to store the data. + Default is the data directory of the package. + :type data_dir: str - :return: Nothing, write a file out to package data. + :return: None """ zen = InstallPackageData(data_dir=data_dir) diff --git a/stitches/make_matching_archive.py b/stitches/make_matching_archive.py index 3c855ef6..0a16f099 100644 --- a/stitches/make_matching_archive.py +++ b/stitches/make_matching_archive.py @@ -13,20 +13,24 @@ def make_matching_archive( smoothing_window: int = 9, chunk_window: int = 9, add_staggered: bool = False ): """ - The function that creates the archive of rate of change (dx) and mean (fx) values for - from the CMIP6 archive, these the the values that will be using in the matching portion - of the stitching pipeline. + Create an archive of rate of change (dx) and mean (fx) values. - :param smoothing_window: int default set to 9, the size of the smoothing window to be applied to the ts. - :type smoothing_window: int + This function processes the CMIP6 archive to produce values used in the + matching portion of the stitching pipeline. - :param chunk_window: int default set to 9, the size of the chunks of data to summarize with dx & fx. - :type chunk_window: int + :param smoothing_window: The size of the smoothing window to be applied to the time series. + Defaults to 9. + :type smoothing_window: int - :param add_staggered: boolean default set to False. If True, the staggered windows will be added to the archive. - :type add_staggered: bool + :param chunk_window: The size of the chunks of data to summarize with dx & fx. + Defaults to 9. + :type chunk_window: int - :return: str location of the matching archive file. + :param add_staggered: If True, staggered windows will be added to the archive. + Defaults to False. + :type add_staggered: bool + + :return: The file location of the matching archive. """ # Start by loading all of the tas files. raw_data = util.load_data_files("data/tas-data") diff --git a/stitches/make_pangeo_table.py b/stitches/make_pangeo_table.py index 2305445d..efb8efde 100644 --- a/stitches/make_pangeo_table.py +++ b/stitches/make_pangeo_table.py @@ -8,10 +8,12 @@ def make_pangeo_table(): """ - The function that makes a copy of the files that are available on pangeo that have corresponding files - in the the matching archive, this will be used in the stitching process. + Create a copy of the Pangeo files that have corresponding entries in the matching archive. - :return: Nothing, write a file out to package data. + This function is used in the stitching process to ensure that only relevant Pangeo files + are considered. It writes out a file to the package data directory. + + :return: None """ # Using the information about what experiment/ensemble/models that are available for matching. archive_path = resources.files("stitches") / "data" / "matching_archive.csv" @@ -63,11 +65,12 @@ def make_pangeo_table(): def make_pangeo_comparison(): """ - A function that makes a copy of the entire pangeo archive. This will be used in - testing to check to see if there has been an update to the pangeo archive, if there - is then may suggest updating the internal package data. + Create a copy of the entire Pangeo archive for testing. + + This function is used to check for updates in the Pangeo archive. If an update is + detected, it may suggest updating the internal package data. - :return: Nothing, write a file out to package data. + :return: None. Writes a file to package data. """ dat = pangeo.fetch_pangeo_table() diff --git a/stitches/make_tas_archive.py b/stitches/make_tas_archive.py index 9d283b74..d577aace 100644 --- a/stitches/make_tas_archive.py +++ b/stitches/make_tas_archive.py @@ -61,12 +61,14 @@ def rbind(dat1, dat2): def get_global_tas(path): """ - Calculate the weighted annual global mean temp. + Calculate the weighted annual global mean temperature. - :param path: a zstore path to the CMIP6 files stored on pangeo. - :type path: str + This function computes the weighted annual global mean surface air temperature + from CMIP6 files stored on Pangeo. - :return: str path to the location of file containing the weighted global mean. + :param path: A Zarr store path to the CMIP6 files. + :type path: str + :return: Path to the file containing the weighted global mean temperature. """ temp_dir = resources.files("stitches") / "data" / "temp-data" @@ -103,16 +105,19 @@ def get_global_tas(path): def calculate_anomaly(data, startYr=1995, endYr=2014): """ - Convert the temp data from absolute into an anomaly relative to a reference period. - - :param data: A data frame of the cmip absolute temperature - :type data: pandas.core.frame.DataFrame - :param startYr: The first year of the reference period, default set to 1995 corresponding to the IPCC defined reference period. - :type startYr: int - :param endYr: The final year of the reference period, default set to 2014 corresponding to the IPCC defined reference period. - :type endYr: int - - :return: A pandas data frame of cmip tgav as anomalies relative to a time-averaged value from a reference period, default uses a reference period form 1995-2014 + Convert CMIP absolute temperature data to anomalies relative to a reference period. + + This function transforms a DataFrame containing CMIP absolute temperature + data into a DataFrame of temperature anomalies. Anomalies are calculated + relative to a time-averaged value from a specified reference period. + + :param data: A DataFrame of the CMIP absolute temperature. + :type data: pandas.core.frame.DataFrame + :param startYr: The first year of the reference period. Defaults to 1995. + :type startYr: int + :param endYr: The final year of the reference period. Defaults to 2014. + :type endYr: int + :return: A DataFrame of CMIP temperature anomalies relative to the reference period. """ # Inputs @@ -151,14 +156,15 @@ def calculate_anomaly(data, startYr=1995, endYr=2014): def paste_historical_data(input_data): - """ " - Paste the appropriate historical data into each future scenario so that SSP585 realization 1, for - example, has the appropriate data from 1850-2100. + """ + Paste historical data into each future scenario. - :param input_data: A data frame of the cmip absolute temperature - :type input_data: pandas.core.frame.DataFrame + This function appends the appropriate historical data to each future scenario, + ensuring that, for example, SSP585 realization 1 contains data from 1850-2100. - :return: A pandas data frame of the smoothed time series (rolling mean applied) + :param input_data: A DataFrame of the CMIP absolute temperature. + :type input_data: pandas.core.frame.DataFrame + :return: A DataFrame of the smoothed time series with a rolling mean applied. """ # Relabel the historical values so that there is a continuous rolling mean between the @@ -206,9 +212,18 @@ def paste_historical_data(input_data): def make_tas_archive(anomaly_startYr=1995, anomaly_endYr=2014): """ - The function that creates the archive from Pangeo-hosted CMIP6 data. - - :return: Array of the tas files created. + Create the archive from Pangeo-hosted CMIP6 data. + + This function processes CMIP6 data hosted on Pangeo to create an archive of + temperature anomaly files. It calculates anomalies based on a specified reference + period. + + :param anomaly_startYr: Start year of the reference period for anomaly calculation. + :type anomaly_startYr: int + :param anomaly_endYr: End year of the reference period for anomaly calculation. + :type anomaly_endYr: int + :return: List of paths to the created tas files. + :rtype: list """ # Get the pangeo table of contents. df = pangeo.fetch_pangeo_table() @@ -445,4 +460,5 @@ def make_tas_archive(anomaly_startYr=1995, anomaly_endYr=2014): group.to_csv(path, index=False) print("Global tas data complete") + return files diff --git a/stitches/package_data.py b/stitches/package_data.py index 028787a6..62626a01 100644 --- a/stitches/package_data.py +++ b/stitches/package_data.py @@ -7,14 +7,14 @@ def fetch_quickstarter_data(variable: str) -> xr.Dataset: - """Get a quickstarter NetCDF dataset as an xarray object. - - :param variable: Target variable name. - :type variable: str + """ + Fetch a quickstarter NetCDF dataset as an xarray object. - :return: Xarray Dataset for example data - :rtype: xr.Dataset + :param variable: Target variable name. + :type variable: str + :return: Xarray Dataset for example data. + :rtype: xr.Dataset """ variable_lower = variable.casefold() diff --git a/stitches/tests/test_fx_recipe.py b/stitches/tests/test_fx_recipe.py index 95699841..e1429401 100644 --- a/stitches/tests/test_fx_recipe.py +++ b/stitches/tests/test_fx_recipe.py @@ -14,11 +14,12 @@ class TestRecipe(unittest.TestCase): - # ################################################### - # some test data - # ################################################### - # real ESM data that know will have some duplicates, etc to test with. - # Easier than trying to make complete + """ + A collection of unittests for testing the recipe functionality within the + stitches package. This includes tests for duplicate removal, permutation + of stitching recipes, and other recipe-related functions. + """ + TARGET_DATA = pd.DataFrame( data={ "ensemble": ["r1i1p1f1"] * 28, @@ -469,6 +470,12 @@ class TestRecipe(unittest.TestCase): # ################################################### def test_get_num_perms(self): + """ + Test the `get_num_perms` function to ensure it returns the correct number of permutations. + + This test verifies that the function returns a list of permutations for the match data, + and that the list contains the expected columns. + """ # Read in the match test data. path = resources.files("stitches") / "tests" / "test-match_w_dup.csv" match_data = pd.read_csv(path) @@ -506,7 +513,12 @@ def test_get_num_perms(self): self.assertEqual(len(out), 2, "Test get_num_perms") def test_remove_duplicates(self): - """Test to make sure the remove_duplicates function if working correctly.""" + """ + Test the remove_duplicates function for correct operation. + + Ensures that the function correctly identifies and removes duplicate + entries from the dataset. + """ # Initial match data md = match_neighborhood( @@ -545,6 +557,7 @@ def test_remove_duplicates(self): ) def test_permute_stitching_recipes(self): + """Test the permute_stitching_recipes function for correct operation.""" # With tol < 0.17, the test data can only support one collapse free # recipe. So a message will be printed to that effect messy_rp1 = permute_stitching_recipes( diff --git a/stitches/tests/test_install_data.py b/stitches/tests/test_install_data.py index 6ae575d7..36ecacf5 100644 --- a/stitches/tests/test_install_data.py +++ b/stitches/tests/test_install_data.py @@ -5,13 +5,16 @@ class TestInstallRawData(unittest.TestCase): + """Tests for verifying the installation of raw data.""" + def test_instantiate(self): + """Test instantiation of InstallPackageData with a fake data directory.""" zen = sd.InstallPackageData(data_dir="fake") - # ensure default version is set + # Ensure default version is set self.assertEqual(str, type(zen.DEFAULT_VERSION)) - # ensure urls present for current version + # Ensure URLs are present for current version self.assertTrue(stitches.__version__ in zen.DATA_VERSION_URLS) diff --git a/stitches/tests/test_match.py b/stitches/tests/test_match.py index 36fae7ba..435fecce 100644 --- a/stitches/tests/test_match.py +++ b/stitches/tests/test_match.py @@ -14,7 +14,12 @@ class TestMatch(unittest.TestCase): def test_match_fxns(self): - """Testing the `match_neighborhood` functions""" + """ + Test the `match_neighborhood` function. + + This test ensures that the `match_neighborhood` function + operates correctly. + """ # Read in some made up target data. path = resources.files("stitches") / "tests" / "test-target_dat.csv" data = pd.read_csv(path) diff --git a/stitches/tests/test_pangeo.py b/stitches/tests/test_pangeo.py index 50d38be3..625dc8bc 100644 --- a/stitches/tests/test_pangeo.py +++ b/stitches/tests/test_pangeo.py @@ -7,10 +7,16 @@ class TestPangeo(unittest.TestCase): - # flag for run 'all' or for 'ci' + """ + A test case for Pangeo-related functions. + + This test class is used to run tests for functions that interact with the Pangeo data archive. + It includes a flag to run tests for continuous integration or for all cases. + """ RUN = "ci" def test_pangeo_fn(self): + """Test Pangeo-related functions for continuous integration or full cases.""" if TestPangeo.RUN == "ci": self.assertEqual(0, 0) else: diff --git a/stitches/tests/test_stitch.py b/stitches/tests/test_stitch.py index afae0cbf..3352f3d3 100644 --- a/stitches/tests/test_stitch.py +++ b/stitches/tests/test_stitch.py @@ -17,7 +17,13 @@ class TestStitch(unittest.TestCase): - # flag for run all or for ci + """ + Unit tests for stitching functions in the `stitches` package. + + This class provides a set of tests to ensure the correct functionality + of the stitching functions, which are used to combine different climate + model outputs into a single coherent dataset. + """ RUN = "ci" # This is an example recipe that will be used to test the stitching functions @@ -40,6 +46,7 @@ class TestStitch(unittest.TestCase): ) def test_find_var_cols(self): + """Test the `find_var_cols` function for identifying variable columns.""" o = pd.DataFrame(data={"tas": [1, 2], "col2": [3, 4]}) self.assertEqual(len(find_var_cols(o)), 0) @@ -50,6 +57,12 @@ def test_find_var_cols(self): self.assertEqual(len(find_var_cols(o)), 2) def test_find_zfiles(self): + """ + Test the `find_zfiles` function to ensure it correctly identifies zipped file paths. + + This test verifies that the `find_zfiles` function correctly identifies file paths + for zipped files within a given DataFrame. + """ d = pd.DataFrame(data={"tas": [1, 2], "col2": [3, 4], "year": [1, 2]}) self.assertEqual(len(find_zfiles(d)), 0) @@ -76,7 +89,11 @@ def test_find_zfiles(self): self.assertTrue(len(file_list) != nrow(d)) def test_gmat_stitching(self): - # Check the output returned by gmat_stitching + """ + Test the output returned by `gmat_stitching`. + + This test checks the type and structure of the output to ensure it meets expected formats. + """ out = gmat_stitching(self.MY_RP) self.assertEqual(type(out), pd.core.frame.DataFrame) @@ -112,6 +129,13 @@ def test_gmat_stitching(self): gmat_stitching(rp) def test_gridded_related(self): + """ + Test functions related to gridded data stitching. + + This test suite covers the functionality of gridded data stitching, + ensuring that the output is consistent and errors are raised when + expected. + """ if TestStitch.RUN == "ci": self.assertEqual(0, 0) else: diff --git a/stitches/tests/test_util.py b/stitches/tests/test_util.py index f16e53b4..6dd45198 100644 --- a/stitches/tests/test_util.py +++ b/stitches/tests/test_util.py @@ -17,12 +17,14 @@ class TestUtil(unittest.TestCase): + """Unit tests for utility functions in the `stitches` package.""" TABLE_A = pd.DataFrame(data={"col1": [1, 2, 3, 4], "col2": [3, 4, 5, 6]}) TABLE_B = pd.DataFrame(data={"col1": [3, 4, 9], "col2": [5, 6, 10]}) TABLE_C = pd.DataFrame(data={"col1": ["a", "b"], "col2": [4, 4]}) TABLE_D = pd.DataFrame(data={"col3": ["a", "b", "c"], "col4": [3, 4, 5]}) def test_check_columns(self): + """Test the check_columns function for proper type checking.""" self.assertTrue( isinstance(check_columns(self.TABLE_A, set(self.TABLE_A)), type(None)) ) @@ -32,11 +34,13 @@ def test_check_columns(self): check_columns(self.TABLE_A, {"fake"}) def test_nrow(self): + """Test the `nrow` function to ensure it returns the correct number of rows.""" self.assertTrue(nrow(self.TABLE_A), 4) doubble_A = pd.concat([self.TABLE_A, self.TABLE_A]) self.assertTrue(nrow(doubble_A), nrow(self.TABLE_A) * 2) def test_selstr(self): + """Test the `selstr` function for substring extraction.""" self.assertEqual(selstr("abcd", 0, 2), "ab") self.assertEqual(selstr("abcd", 0, 1), "a") with self.assertRaises(TypeError): @@ -45,6 +49,13 @@ def test_selstr(self): selstr({"abcd", "abcd"}, 1, 2) def test_anti_join(self): + """ + Test the `anti_join` function to ensure it returns the correct DataFrame. + + This test verifies that the `anti_join` function correctly returns a DataFrame + that contains only the rows from the first input DataFrame that do not have + matching key values in the second input DataFrame. + """ tableA = pd.DataFrame( np.random.rand(4, 3), pd.Index(list("abcd"), name="Key"), ["A", "B", "C"] ).reset_index() @@ -58,13 +69,16 @@ def test_anti_join(self): self.assertEqual(tableD.shape, (2, 4), "unexpected shape returned by anti_join") def test_combine_df(self): + """Test the `combine_df` function for DataFrame combination.""" TABLE_E = combine_df(self.TABLE_C, self.TABLE_D) self.assertEqual(nrow(TABLE_E), nrow(self.TABLE_C) * nrow(self.TABLE_D)) with self.assertRaises(TypeError): combine_df(self.TABLE_C, self.TABLE_C) def test_file_fxns(self): - # Test the list files and load files function + """ + Test the `list_files` and `load_files` functions. + """ # Make sure that all of the files returned by the list_files function are all true. test_dir = resources.files("stitches") / "tests"