From cefcb45d544827465d84685a0c56d762052f4218 Mon Sep 17 00:00:00 2001 From: Jackson Burns Date: Wed, 28 Jun 2023 11:23:52 -0400 Subject: [PATCH 1/4] bump version for minor release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4018da7..027dbce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "astartes" -version = "1.0.3" +version = "1.1.0" authors = [ { name = "Jackson Burns", email = "jwburns@mit.edu" }, { name = "Himaghna Bhattacharjee", email = "himaghna@udel.edu" }, From a57a9e7a7f30922bad214e1852cfc50ab5858a17 Mon Sep 17 00:00:00 2001 From: Jackson Burns Date: Wed, 28 Jun 2023 11:37:27 -0400 Subject: [PATCH 2/4] simplify import for new table generation function, clarify m1 comment --- README.md | 10 +++++++--- astartes/utils/__init__.py | 7 +++++++ astartes/utils/{utils.py => user_utils.py} | 16 ++++++++++++---- test/unit/utils/test_utils.py | 2 +- 4 files changed, 27 insertions(+), 8 deletions(-) rename astartes/utils/{utils.py => user_utils.py} (94%) diff --git a/README.md b/README.md index 5636312..e948978 100644 --- a/README.md +++ b/README.md @@ -126,15 +126,19 @@ Configuration options for the featurization scheme can be found in the documenta To that end, the default behavior of `astartes` is to use `42` as the random seed and _always_ set it. Running `astartes` with the default settings will always produce the exact same results. We have verified this behavior on Debian Ubuntu, Windows, and Intel Macs from Python versions 3.7 through 3.11 (with appropriate dependencies for each version). -We are limited in our ability to test on M1 Macs, but from our limited manual testing we achieve perfect reproducbility in all cases _except occasionally_ with `KMeans` on Apple silicon. It has produced _slightly_ different results between platforms regardless of `random_state`, with up to two clusters being assigned differently resulting in data splits which are >99% identical. `astartes` is still consistent between runs on the same platform in all cases. -## Evaluate the impact of splitting algorithms +> **Note** +> We are limited in our ability to test on M1 Macs, but from our limited manual testing we achieve perfect reproducbility in all cases _except occasionally_ with `KMeans` on Apple silicon. +It has produced _slightly_ different results between platforms regardless of `random_state`, with up to two clusters being assigned differently resulting in data splits which are >99% identical. +`astartes` is still consistent between runs on the same platform in all cases, and other samplers are not impacted by this apparent bug. + +## Evaluate the Impact of Splitting Algorithms The `generate_regression_results_dict` function allows users to quickly evaluate the impact of different splitting techniques on any model supported by `sklearn`. All results are stored in a dictionary format and can be displayed in a neatly formatted table using the optional `print_results` argument. ``` from sklearn.svm import LinearSVR -from astartes.utils.utils import generate_regression_results_dict +from astartes.utils import generate_regression_results_dict sklearn_model = LinearSVR() results_dict = generate_regression_results_dict( diff --git a/astartes/utils/__init__.py b/astartes/utils/__init__.py index e69de29..141f516 100644 --- a/astartes/utils/__init__.py +++ b/astartes/utils/__init__.py @@ -0,0 +1,7 @@ +# import functions from this directory's contents so that users can import +# them with `from astartes.utils import *` +# internally, we do NOT do this to make the imports more explicit, i.e. +# `from astartes.utils.exceptions import *` +from .user_utils import generate_regression_results_dict + +__all__ = ["generate_regression_results_dict"] diff --git a/astartes/utils/utils.py b/astartes/utils/user_utils.py similarity index 94% rename from astartes/utils/utils.py rename to astartes/utils/user_utils.py index 8e65f7a..41ff343 100644 --- a/astartes/utils/utils.py +++ b/astartes/utils/user_utils.py @@ -2,8 +2,7 @@ from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score from tabulate import tabulate -from astartes import train_val_test_split -from astartes.utils.exceptions import InvalidModelTypeError +import astartes def generate_regression_results_dict( @@ -57,7 +56,9 @@ def generate_regression_results_dict( } """ if not isinstance(sklearn_model, sklearn.base.BaseEstimator): - raise InvalidModelTypeError("Model must be an sklearn model") + raise astartes.utils.exceptions.InvalidModelTypeError( + "Model must be an sklearn model" + ) final_dict = {} for sampler in samplers: @@ -80,7 +81,14 @@ def generate_regression_results_dict( } # obtain indices - _, _, _, train_indices, val_indices, test_indices = train_val_test_split( + ( + _, + _, + _, + train_indices, + val_indices, + test_indices, + ) = astartes.train_val_test_split( X, train_size=train_size, val_size=val_size, diff --git a/test/unit/utils/test_utils.py b/test/unit/utils/test_utils.py index bad81d4..2c6664d 100644 --- a/test/unit/utils/test_utils.py +++ b/test/unit/utils/test_utils.py @@ -5,7 +5,7 @@ from astartes.samplers.interpolation import Random from astartes.utils.exceptions import InvalidModelTypeError -from astartes.utils.utils import generate_regression_results_dict +from astartes.utils import generate_regression_results_dict class Test_utils(unittest.TestCase): From ef10cf1a9c661da33eb7571fa8de8442be65d85f Mon Sep 17 00:00:00 2001 From: Jackson Burns Date: Wed, 28 Jun 2023 11:58:21 -0400 Subject: [PATCH 3/4] provision with micromamba action step deprecated, switch to new one --- .github/workflows/ipynb_ci.yml | 12 +++++++----- .github/workflows/reproduce_paper.yml | 12 +++++++----- .github/workflows/run_tests.yml | 12 +++++++----- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ipynb_ci.yml b/.github/workflows/ipynb_ci.yml index 012c840..df91d2e 100644 --- a/.github/workflows/ipynb_ci.yml +++ b/.github/workflows/ipynb_ci.yml @@ -21,13 +21,15 @@ jobs: name: Check ${{ matrix.nb-file }} Notebook Execution steps: - uses: actions/checkout@v3 - - uses: mamba-org/provision-with-micromamba@main + - uses: mamba-org/setup-micromamba@main with: - environment-file: false environment-name: temp - channels: defaults,conda-forge - channel-priority: flexible - extra-specs: | + condarc: | + channels: + - defaults + - conda-forge + channel_priority: flexible + create-args: | python=3.11 - name: Install dependencies run: | diff --git a/.github/workflows/reproduce_paper.yml b/.github/workflows/reproduce_paper.yml index 35d10b5..fc264c6 100644 --- a/.github/workflows/reproduce_paper.yml +++ b/.github/workflows/reproduce_paper.yml @@ -25,13 +25,15 @@ jobs: name: Reproduce Paper Data Splits steps: - uses: actions/checkout@v3 - - uses: mamba-org/provision-with-micromamba@main + - uses: mamba-org/setup-micromamba@main with: - environment-file: false environment-name: temp - channels: defaults,conda-forge - channel-priority: flexible - extra-specs: | + condarc: | + channels: + - defaults + - conda-forge + channel_priority: flexible + create-args: | python=3.11 - name: Install Dependencies run: | diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index b5da95f..a306924 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -28,13 +28,15 @@ jobs: name: ${{ matrix.os }} Python ${{ matrix.python-version }} Subtest steps: - uses: actions/checkout@v3 - - uses: mamba-org/provision-with-micromamba@main + - uses: mamba-org/setup-micromamba@main with: - environment-file: false environment-name: temp - channels: defaults,conda-forge - channel-priority: flexible - extra-specs: | + condarc: | + channels: + - defaults + - conda-forge + channel_priority: flexible + create-args: | python=${{ matrix.python-version }} - name: Install Dependencies run: | From c2e2846f3ab24a66ca72962e71cea1dcc4a3a880 Mon Sep 17 00:00:00 2001 From: Jackson Burns Date: Wed, 28 Jun 2023 12:02:54 -0400 Subject: [PATCH 4/4] fix import order in test utils --- test/unit/utils/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/utils/test_utils.py b/test/unit/utils/test_utils.py index 2c6664d..3412ca1 100644 --- a/test/unit/utils/test_utils.py +++ b/test/unit/utils/test_utils.py @@ -4,8 +4,8 @@ from sklearn.svm import LinearSVR from astartes.samplers.interpolation import Random -from astartes.utils.exceptions import InvalidModelTypeError from astartes.utils import generate_regression_results_dict +from astartes.utils.exceptions import InvalidModelTypeError class Test_utils(unittest.TestCase):