From 74f2f5337558b725a1ae54439c754f587aa8b871 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Fri, 20 Dec 2024 13:25:48 -0600 Subject: [PATCH 1/4] CLN: Update deprecated pre-commit format Auto-updated by precommit. --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 659986caa..c3a402350 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,7 +45,7 @@ repos: hooks: - id: end-of-file-fixer exclude: (.txt|^docs/JOSS1|^docs/JOSS2|^examples/data/) - stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite] + stages: [pre-commit, pre-merge-commit, pre-push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite] - id: trailing-whitespace - stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite] + stages: [pre-commit, pre-merge-commit, pre-push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite] exclude: (.txt|^docs/JOSS1|^docs/JOSS2|^examples/data/) From 42e12e7b89ca5f4e822f1ca99018f017277b7e24 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Fri, 20 Dec 2024 14:15:04 -0600 Subject: [PATCH 2/4] fix: Allow pickling of SR3 and subordinate classes This change adds a test parametrization to specifically check SR3 pickling. Design-wise, this change shifts away from some of the safety in argument specifying regularizer. Weighted regularizers are now the same in all ways as nonweighted (scalar-weighted) ones. They already had the same implementation, they just had extra guard code. Weight shape is still checked to match data shape in order to prevent broadcasting, but using a scalar weight on a "weighted" regularizer is now allowed. --- pysindy/utils/base.py | 136 +++++++++++++----------- test/test_optimizers/debug.py | 20 ++++ test/test_optimizers/test_optimizers.py | 4 +- test/utils/test_utils.py | 14 +-- 4 files changed, 99 insertions(+), 75 deletions(-) create mode 100644 test/test_optimizers/debug.py diff --git a/pysindy/utils/base.py b/pysindy/utils/base.py index 95cbf40a1..a2ddc8401 100644 --- a/pysindy/utils/base.py +++ b/pysindy/utils/base.py @@ -1,4 +1,5 @@ import warnings +from functools import wraps from typing import Callable from typing import Sequence from typing import Union @@ -152,17 +153,19 @@ def reorder_constraints(arr, n_features, output_order="feature"): return arr.reshape(starting_shape).transpose([0, 2, 1]).reshape((n_constraints, -1)) -def _validate_prox_and_reg_inputs(func, regularization): +def _validate_prox_and_reg_inputs(func): + """Add guard code to ensure weight and argument have compatible shape/type + + Decorates prox and regularization functions. + """ + + @wraps(func) def wrapper(x, regularization_weight): - if regularization[:8] == "weighted": - if not isinstance(regularization_weight, np.ndarray): - raise ValueError( - f"'regularization_weight' must be an array of shape {x.shape}." - ) + if isinstance(regularization_weight, np.ndarray): weight_shape = regularization_weight.shape if weight_shape != x.shape: raise ValueError( - f"Invalid shape for 'regularization_weight':" + f"Invalid shape for 'regularization_weight': " f"{weight_shape}. Must be the same shape as x: {x.shape}." ) elif not isinstance(regularization_weight, (int, float)): @@ -190,36 +193,66 @@ def get_prox( and returns an array of the same shape """ - def prox_l0( - x: NDArray[np.float64], - regularization_weight: Union[float, NDArray[np.float64]], - ): - threshold = np.sqrt(2 * regularization_weight) - return x * (np.abs(x) > threshold) + prox = { + "l0": _prox_l0, + "weighted_l0": _prox_l0, + "l1": _prox_l1, + "weighted_l1": _prox_l1, + "l2": _prox_l2, + "weighted_l2": _prox_l2, + } + regularization = regularization.lower() + return prox[regularization] - def prox_l1( - x: NDArray[np.float64], - regularization_weight: Union[float, NDArray[np.float64]], - ): - return np.sign(x) * np.maximum(np.abs(x) - regularization_weight, 0) +@_validate_prox_and_reg_inputs +def _prox_l0( + x: NDArray[np.float64], + regularization_weight: Union[float, NDArray[np.float64]], +): + threshold = np.sqrt(2 * regularization_weight) + return x * (np.abs(x) > threshold) - def prox_l2( - x: NDArray[np.float64], - regularization_weight: Union[float, NDArray[np.float64]], - ): - return x / (1 + 2 * regularization_weight) - prox = { - "l0": prox_l0, - "weighted_l0": prox_l0, - "l1": prox_l1, - "weighted_l1": prox_l1, - "l2": prox_l2, - "weighted_l2": prox_l2, - } - regularization = regularization.lower() - return _validate_prox_and_reg_inputs(prox[regularization], regularization) +@_validate_prox_and_reg_inputs +def _prox_l1( + x: NDArray[np.float64], + regularization_weight: Union[float, NDArray[np.float64]], +): + + return np.sign(x) * np.maximum(np.abs(x) - regularization_weight, 0) + + +@_validate_prox_and_reg_inputs +def _prox_l2( + x: NDArray[np.float64], + regularization_weight: Union[float, NDArray[np.float64]], +): + return x / (1 + 2 * regularization_weight) + + +@_validate_prox_and_reg_inputs +def _regularization_l0( + x: NDArray[np.float64], + regularization_weight: Union[float, NDArray[np.float64]], +): + return np.sum(regularization_weight * (x != 0)) + + +@_validate_prox_and_reg_inputs +def _regularization_l1( + x: NDArray[np.float64], + regularization_weight: Union[float, NDArray[np.float64]], +): + return np.sum(regularization_weight * np.abs(x)) + + +@_validate_prox_and_reg_inputs +def _regularization_l2( + x: NDArray[np.float64], + regularization_weight: Union[float, NDArray[np.float64]], +): + return np.sum(regularization_weight * x**2) def get_regularization( @@ -238,39 +271,16 @@ def get_regularization( and returns a float """ - def regularization_l0( - x: NDArray[np.float64], - regularization_weight: Union[float, NDArray[np.float64]], - ): - - return np.sum(regularization_weight * (x != 0)) - - def regularization_l1( - x: NDArray[np.float64], - regularization_weight: Union[float, NDArray[np.float64]], - ): - - return np.sum(regularization_weight * np.abs(x)) - - def regularization_l2( - x: NDArray[np.float64], - regularization_weight: Union[float, NDArray[np.float64]], - ): - - return np.sum(regularization_weight * x**2) - regularization_fn = { - "l0": regularization_l0, - "weighted_l0": regularization_l0, - "l1": regularization_l1, - "weighted_l1": regularization_l1, - "l2": regularization_l2, - "weighted_l2": regularization_l2, + "l0": _regularization_l0, + "weighted_l0": _regularization_l0, + "l1": _regularization_l1, + "weighted_l1": _regularization_l1, + "l2": _regularization_l2, + "weighted_l2": _regularization_l2, } regularization = regularization.lower() - return _validate_prox_and_reg_inputs( - regularization_fn[regularization], regularization - ) + return regularization_fn[regularization] def capped_simplex_projection(trimming_array, trimming_fraction): diff --git a/test/test_optimizers/debug.py b/test/test_optimizers/debug.py new file mode 100644 index 000000000..01901a2dc --- /dev/null +++ b/test/test_optimizers/debug.py @@ -0,0 +1,20 @@ +import pickle +from functools import wraps + + +def foo(func): + @wraps(func) + def wrapper(*args, **kwargs): + print(f"Called {func}") + return func(*args, **kwargs) + + return wrapper + + +@foo +def bar(a, b): + print(a + b) + + +bars = pickle.dumps(bar) +barl = pickle.loads(bars) diff --git a/test/test_optimizers/test_optimizers.py b/test/test_optimizers/test_optimizers.py index 3ba9bf28a..fb2102f65 100644 --- a/test/test_optimizers/test_optimizers.py +++ b/test/test_optimizers/test_optimizers.py @@ -1183,12 +1183,14 @@ def test_remove_and_decrement(): ( (MIOSR, {"target_sparsity": 7}), (SBR, {"num_warmup": 10, "num_samples": 10}), + (SR3, {}), + (TrappingSR3, {"_n_tgts": 3, "_include_bias": True}), ), ) def test_pickle(data_lorenz, opt_cls, opt_args): x, t = data_lorenz y = PolynomialLibrary(degree=2).fit_transform(x) - opt = opt_cls(**opt_args).fit(x, y) + opt = opt_cls(**opt_args).fit(y, x) expected = opt.coef_ new_opt = pickle.loads(pickle.dumps(opt)) result = new_opt.coef_ diff --git a/test/utils/test_utils.py b/test/utils/test_utils.py index bfd5054d5..a70e44d45 100644 --- a/test/utils/test_utils.py +++ b/test/utils/test_utils.py @@ -101,17 +101,9 @@ def test_get_prox_and_regularization_bad_shape(regularization, lam): prox(data, lam) -@pytest.mark.parametrize( - "regularization", ["weighted_l0", "weighted_l1", "weighted_l2"] -) -@pytest.mark.parametrize( - "lam", - [ - np.array([[1, 2]]), - 1, - ], -) -def test_get_weighted_prox_and_regularization_bad_shape(regularization, lam): +@pytest.mark.parametrize("regularization", ["l0", "l1", "l2"]) +def test_get_weighted_prox_and_regularization_bad_shape(regularization): + lam = np.array([[1, 2]]) data = np.array([[-2, 5]]).T reg = get_regularization(regularization) with pytest.raises(ValueError): From f60cba9c84a352b711dc724723ee29dba6e546bb Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Fri, 20 Dec 2024 14:22:25 -0600 Subject: [PATCH 3/4] BLD: Prevent Scikit-learn 1.6.0 This version introduced the method __sklearn_tags__(), but it currently seems broken. Many tests run into calls for: env\Lib\site-packages\sklearn\utils\_tags.py:396: in get_tags tags = estimator.__sklearn_tags__() E AttributeError: 'super' object has no attribute '__sklearn_tags__' --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cd3603a73..18f6b6fe6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ classifiers = [ readme = "README.rst" dependencies = [ "jax>=0.4,<0.5", - "scikit-learn>=1.1, !=1.5.0", + "scikit-learn>=1.1, !=1.5.0, !=1.6.0", "derivative>=0.6.2", "typing_extensions", ] From 8b8f0a5cb8e9854a7a320b69c30343ae28e3a9b9 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Sat, 11 Jan 2025 17:10:45 -0800 Subject: [PATCH 4/4] bug (ssr): Be explicit when setting the shape. Previously, sklearn.linear_model.ridge_regression would always give a 2D array when using 2D inputs. A change to scikitlearn 1.6 now squeezes that extra dimension when there's only one target for the regression. Also, remove extra debug.py that snuck in previous commit. --- pysindy/optimizers/ssr.py | 20 +++++++++++++++++++- test/test_optimizers/debug.py | 20 -------------------- 2 files changed, 19 insertions(+), 21 deletions(-) delete mode 100644 test/test_optimizers/debug.py diff --git a/pysindy/optimizers/ssr.py b/pysindy/optimizers/ssr.py index 9308298f2..39ce1b5ca 100644 --- a/pysindy/optimizers/ssr.py +++ b/pysindy/optimizers/ssr.py @@ -1,8 +1,20 @@ +from typing import cast +from typing import NewType +from typing import TypeVar + import numpy as np +from numpy.typing import NBitBase from sklearn.linear_model import ridge_regression from .base import BaseOptimizer +Rows = TypeVar("Rows", covariant=True, bound=int) +Cols = TypeVar("Cols", covariant=True, bound=int) +Float2D = np.ndarray[tuple[Rows, Cols], np.dtype[np.floating[NBitBase]]] +Features = NewType("Features", int) +Targets = NewType("Targets", int) +Samples = NewType("Samples", int) + class SSR(BaseOptimizer): """Stepwise sparse regression (SSR) greedy algorithm. @@ -157,10 +169,13 @@ def _model_residual(self, x, y, coef, inds): cc[total_ind] = 0.0 return cc, total_ind - def _regress(self, x, y): + def _regress( + self, x: Float2D[Samples, Features], y: Float2D[Samples, Targets] + ) -> Float2D[Targets, Features]: """Perform the ridge regression""" kw = self.ridge_kw or {} coef = ridge_regression(x, y, self.alpha, **kw) + coef = np.atleast_2d(coef) # type: ignore self.iters += 1 return coef @@ -168,6 +183,9 @@ def _reduce(self, x, y): """Performs at most ``self.max_iter`` iterations of the SSR greedy algorithm. """ + # Until static typing grows, use cast + x = cast(Float2D[Samples, Features], x) + y = cast(Float2D[Samples, Targets], y) n_samples, n_features = x.shape n_targets = y.shape[1] cond_num = np.linalg.cond(x) diff --git a/test/test_optimizers/debug.py b/test/test_optimizers/debug.py deleted file mode 100644 index 01901a2dc..000000000 --- a/test/test_optimizers/debug.py +++ /dev/null @@ -1,20 +0,0 @@ -import pickle -from functools import wraps - - -def foo(func): - @wraps(func) - def wrapper(*args, **kwargs): - print(f"Called {func}") - return func(*args, **kwargs) - - return wrapper - - -@foo -def bar(a, b): - print(a + b) - - -bars = pickle.dumps(bar) -barl = pickle.loads(bars)