Merge pull request #83 from JGCRI/dev-acs-jossrevisions

ACS - JOSS revisions for adding units and updated quickstart
JGCRI · Dec 4, 2023 · 1255e65 · 1255e65
2 parents 59e0725 + 882fed9
commit 1255e65
Show file tree

Hide file tree

Showing 10 changed files with 387 additions and 179 deletions.
diff --git a/README.md b/README.md
@@ -16,14 +16,14 @@ To install for use, run the following:
 python -m pip install git+https://github.com/JGCRI/stitches.git
 ```
 
-To install pre-built run the following:
+To install package data that has already been pre-processed run the following:
 ```python
 import stitches
 
 stitches.install_package_data()
 ```
 
-For users that would like to generate the run the following:
+For users who would like to generate the package data locally, run the following: 
 
 ```python
 import stitches

diff --git a/docs/source/reference/contributing.rst b/docs/source/reference/contributing.rst
@@ -3,6 +3,8 @@ Contributing to **stitches**
 
 Whether you find a typo in the documentation, find a bug, or want to develop functionality that you think will make **stitches** more robust, you are welcome to contribute!
 
+We welcome third-party patches, which are essential for advancing the science and architecture of STITCHES. 
+But there are a few guidelines that we ask contributors to follow, guidelines that ease the maintainers' organizational and logistical duties, while encouraging development by others. All contributors agree to abide by the code of conduct.
 
 Opening issues
 ______________
@@ -40,7 +42,7 @@ The following is the recommended workflow for contributing to **stitches**:
 
 3. Add your recommended changes and ensure all tests pass, then commit your changes:
 
-    Ensure your tests pass locally before pushing to your remote branch where GitHub actions will launch CI services to build the package, run the test suite, and evaluate code coverage.  To do this, ensure that ``pytest`` has been installed then navigate to the root of your cloned directory (e.g., <my-path>/stitches) and simply execute ``pytest`` in the terminal.
+  Ensure your tests pass locally before pushing to your remote branch where GitHub actions will launch CI services to build the package, run the test suite, and evaluate code coverage.  To do this, ensure that ``pytest`` has been installed then navigate to the root of your cloned directory (e.g., <my-path>/stitches) and simply execute ``pytest`` in the terminal.
 
   .. code-block:: bash
 

diff --git a/...ooks/stitches-quickstart_deprecated.ipynb → ...books/stitches-barebones-quickstart.ipynb b/...ooks/stitches-quickstart_deprecated.ipynb → ...books/stitches-barebones-quickstart.ipynb
diff --git a/notebooks/stitches-quickstart.ipynb b/notebooks/stitches-quickstart.ipynb
diff --git a/stitches/__init__.py b/stitches/__init__.py
@@ -7,5 +7,6 @@
 from .make_pangeo_table import make_pangeo_comparison, make_pangeo_table
 from .make_tas_archive import make_tas_archive
 from .package_data import *
+from .generate_package_data import generate_pkg_data
 
 __version__ = "0.10.0"
diff --git a/stitches/data/.DS_Store b/stitches/data/.DS_Store
diff --git a/stitches/fx_recipe.py b/stitches/fx_recipe.py
@@ -3,6 +3,9 @@
 
 import os
 
+import numpy as np
+import pkg_resources
+
 import pandas as pd
 import pkg_resources
 
@@ -1067,7 +1070,22 @@ def make_recipe(
     if not type(N_matches) is int:
         raise TypeError("N_matches: must be an integer")
     if not type(tol) is float:
-        raise TypeError("tol: must be a float")
+        raise TypeError(f"tol: must be a float")
+
+
+    if (target_data['unit'].unique() != archive_data['unit'].unique()):
+        raise TypeError(f"units of target and archive data do not match")
+
+    # pull off the unit so we have it
+    unit = target_data['unit'].unique().copy()
+
+
+    # drop the units from each dataframe so matching functions don't need updates
+    target_data = target_data[['experiment', 'variable', 'ensemble', 'model', 'start_yr',
+                                         'end_yr', 'year', 'fx', 'dx']].copy()
+    archive_data = archive_data[['experiment', 'variable', 'ensemble', 'model', 'start_yr',
+                               'end_yr', 'year', 'fx', 'dx']].copy()
+
 
     # If there are non tas variables to be stitched, subset the archive to limit
     # the coverage to only the entries with the complete coverage.

diff --git a/stitches/generate_package_data.py b/stitches/generate_package_data.py
@@ -3,16 +3,17 @@
 import stitches.make_tas_archive as mk_tas
 
 
-def generate_pkg_data(smoothing_window=9, chunk_window=9, add_staggered=False):
-    """Generate all of the internal package data for stitches, the tas archive,
+def generate_pkg_data(smoothing_window=9, chunk_window=9, add_staggered=False,
+                     anomaly_startYr=1995, anomaly_endYr=2014):
+    """ Generate all of the internal package data for stitches, the tas archive,
     matching archive, & the table of pangeo files.
 
     :return: Nothing, running this function should in addition to temporary files
     generate all of the csv files that are included in the prebuilt stitches package.
     """
 
     # This takes several hours to run.
-    mk_tas.make_tas_archive()
+    mk_tas(anomaly_startYr=anomaly_startYr, anomaly_endYr=anomaly_endYr)
 
     # These two functions run quickly.
     mk_match(
@@ -22,4 +23,4 @@ def generate_pkg_data(smoothing_window=9, chunk_window=9, add_staggered=False):
     )
     mk_pangeo()
 
-    return None
+    return None
diff --git a/stitches/make_matching_archive.py b/stitches/make_matching_archive.py
@@ -47,7 +47,7 @@ def make_matching_archive(
         ["model", "experiment", "ensemble", "year", "variable", "value"]
     ]
     data = data.reset_index(drop=True).copy()
-    group_by = ["model", "experiment", "ensemble", "variable"]
+    group_by = ['model', 'experiment', 'ensemble', 'variable', 'unit']
     out = []
     for key, d in data.groupby(group_by):
         dat = d.reset_index(drop=True).copy()
@@ -73,6 +73,7 @@ def make_matching_archive(
         else:
             dd = prep.chunk_ts(df=dat, n=chunk_window)
             rslt = prep.get_chunk_info(dd)
+            rslt['unit'] = (np.repeat(d['unit'].unique(), len(rslt))).copy()
             out.append(rslt)
         # end if-else
     # end of the for loop
@@ -103,6 +104,7 @@ def make_matching_archive(
                 else:
                     dd = prep.chunk_ts(df=dat, n=chunk_window, base_chunk=offset)
                     rslt = prep.get_chunk_info(dd)
+                    rslt['unit'] = (np.repeat(d['unit'].unique(), len(rslt))).copy()
                     out.append(rslt)
                 # end if-else
             # end of the for loop over (model-experiment-ensemble-variable) combos

diff --git a/stitches/make_tas_archive.py b/stitches/make_tas_archive.py
@@ -70,7 +70,7 @@ def get_global_tas(path):
 
     :return:      str path to the location of file containing the weighted global mean.
     """
-    temp_dir = pkg_resources.resource_filename("stitches", "data")
+    temp_dir = pkg_resources.resource_filename('stitches', 'data/temp-data')
 
     if not os.path.isdir(temp_dir):
         os.mkdir(temp_dir)
@@ -205,7 +205,7 @@ def paste_historical_data(input_data):
     return d
 
 
-def make_tas_archive():
+def make_tas_archive(anomaly_startYr=1995, anomaly_endYr=2014):
     """
     The function that creates the archive from Pangeo-hosted CMIP6 data.
 
@@ -364,7 +364,9 @@ def make_tas_archive():
     #
     # In this section convert from absolute value to an anomaly & concatenate the historical data
     # with the future scenarios.
-    data_anomaly = calculate_anomaly(cleaned_data).copy()
+    data_anomaly = calculate_anomaly(cleaned_data,
+                                     startYr= anomaly_startYr,
+                                     endYr= anomaly_endYr).copy()
     data = paste_historical_data(data_anomaly)
     data = data.sort_values(by=["variable", "experiment", "ensemble", "model", "year"])
     data = data[