From 71bb4312dcd01cbb19d012ae129943fb84f37fae Mon Sep 17 00:00:00 2001 From: Jake <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:24:47 -0700 Subject: [PATCH 1/2] tst: Extract column normalization for much faster testing --- pysindy/_typing.py | 7 +++++ pysindy/optimizers/base.py | 16 +++++++---- test/test_optimizers/test_optimizers.py | 36 +++++++------------------ 3 files changed, 28 insertions(+), 31 deletions(-) create mode 100644 pysindy/_typing.py diff --git a/pysindy/_typing.py b/pysindy/_typing.py new file mode 100644 index 000000000..1ec0c446c --- /dev/null +++ b/pysindy/_typing.py @@ -0,0 +1,7 @@ +import numpy as np +import numpy.typing as npt + +# In python 3.12, use type statement +# https://docs.python.org/3/reference/simple_stmts.html#the-type-statement +NpFlt = np.floating[npt.NBitBase] +Float2D = np.ndarray[tuple[int, int], np.dtype[NpFlt]] diff --git a/pysindy/optimizers/base.py b/pysindy/optimizers/base.py index 01526f452..ea4eef3ff 100644 --- a/pysindy/optimizers/base.py +++ b/pysindy/optimizers/base.py @@ -14,6 +14,7 @@ from sklearn.utils.validation import check_is_fitted from sklearn.utils.validation import check_X_y +from .._typing import Float2D from ..utils import AxesArray from ..utils import drop_nan_samples @@ -178,8 +179,7 @@ def fit(self, x_, y, sample_weight=None, **reduce_kws): x_normed = np.copy(x) if self.normalize_columns: - reg = 1 / np.linalg.norm(x, 2, axis=0) - x_normed = x * reg + feat_norms, x_normed = _normalize_features(x_normed) if self.initial_guess is None: self.coef_ = np.linalg.lstsq(x_normed, y, rcond=None)[0].T @@ -203,11 +203,11 @@ def fit(self, x_, y, sample_weight=None, **reduce_kws): # Rescale coefficients to original units if self.normalize_columns: - self.coef_ = np.multiply(reg, self.coef_) + self.coef_ = self.coef_ / feat_norms if hasattr(self, "coef_full_"): - self.coef_full_ = np.multiply(reg, self.coef_full_) + self.coef_full_ = self.coef_full_ / feat_norms for i in range(np.shape(self.history_)[0]): - self.history_[i] = np.multiply(reg, self.history_[i]) + self.history_[i] = self.history_[i] / feat_norms self._set_intercept(X_offset, y_offset, X_scale) return self @@ -395,3 +395,9 @@ def _drop_random_samples( x_dot_new = np.take(x_dot, rand_inds, axis=x.ax_sample) return x_new, x_dot_new + + +def _normalize_features(x: Float2D) -> Float2D: + "Calculate the length of vectors and normalize them" + lengths = np.linalg.norm(x, 2, axis=0) + return lengths, x / lengths diff --git a/test/test_optimizers/test_optimizers.py b/test/test_optimizers/test_optimizers.py index 0623f0f80..8423e60b7 100644 --- a/test/test_optimizers/test_optimizers.py +++ b/test/test_optimizers/test_optimizers.py @@ -35,6 +35,7 @@ from pysindy.optimizers import STLSQ from pysindy.optimizers import TrappingSR3 from pysindy.optimizers import WrappedOptimizer +from pysindy.optimizers.base import _normalize_features from pysindy.optimizers.ssr import _ind_inflection from pysindy.optimizers.stlsq import _remove_and_decrement from pysindy.utils import supports_multiple_targets @@ -1034,33 +1035,16 @@ def test_inequality_constraints_reqs(): ) -@pytest.mark.parametrize( - "optimizer", - [ - STLSQ, - SSR, - FROLS, - SR3, - ConstrainedSR3, - StableLinearSR3, - TrappingSR3, - MIOSR, - SBR, - ], -) -def test_normalize_columns(data_derivative_1d, optimizer): +def test_normalize_columns(data_derivative_1d): x, x_dot = data_derivative_1d - if len(x.shape) == 1: - x = x.reshape(-1, 1) - opt = optimizer(normalize_columns=True) - opt, x = _align_optimizer_and_1dfeatures(opt, x) - opt.fit(x, x_dot) - check_is_fitted(opt) - assert opt.complexity >= 0 - if len(x_dot.shape) > 1: - assert opt.coef_.shape == (x.shape[1], x_dot.shape[1]) - else: - assert opt.coef_.shape == (1, x.shape[1]) + x = np.reshape(x, (-1, 1)) + x_dot = np.reshape(x_dot, (-1, 1)) + cols = np.hstack((x, x_dot)) + norm, ncols = _normalize_features(cols) + result = np.linalg.norm(ncols, axis=0) + expected = [1.0, 1.0] + np.testing.assert_allclose(result, expected) + np.testing.assert_allclose(ncols * norm, cols) @pytest.mark.parametrize( From 4b15699593c8ae4201067d86625ec6e995f5ece6 Mon Sep 17 00:00:00 2001 From: Jake <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:35:20 -0700 Subject: [PATCH 2/2] tst: speedup another SBR test --- test/test_optimizers/test_optimizers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_optimizers/test_optimizers.py b/test/test_optimizers/test_optimizers.py index 8423e60b7..3ba9bf28a 100644 --- a/test/test_optimizers/test_optimizers.py +++ b/test/test_optimizers/test_optimizers.py @@ -126,7 +126,7 @@ def data(request): ElasticNet(fit_intercept=False), DummyLinearModel(), MIOSR(), - SBR(), + SBR(num_warmup=10, num_samples=10), ], ids=lambda param: type(param), )