From b8faec0fd8717f9fa7d670bacc240f80836af76d Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Sun, 26 May 2024 10:28:38 -0700 Subject: [PATCH 01/11] feat (trapping): change initialization to match master remove evolve_w, objective_history --- pysindy/optimizers/trapping_sr3.py | 77 ++++++++++++------------------ 1 file changed, 31 insertions(+), 46 deletions(-) diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py index aa2b0b1bb..5166e141c 100644 --- a/pysindy/optimizers/trapping_sr3.py +++ b/pysindy/optimizers/trapping_sr3.py @@ -68,9 +68,6 @@ class TrappingSR3(SR3): Parameters ---------- - evolve_w : bool, optional (default True) - If false, don't update w and just minimize over (m, A) - threshold : float, optional (default 0.1) Determines the strength of the regularization. When the regularization function R is the L0 norm, the regularization @@ -246,7 +243,6 @@ class TrappingSR3(SR3): def __init__( self, - evolve_w=True, threshold=0.1, eps_solver=1e-7, inequality_constraints=False, @@ -330,7 +326,6 @@ def __init__( raise ValueError("tol and tol_m must be positive") self.mod_matrix = mod_matrix - self.evolve_w = evolve_w self.eps_solver = eps_solver self.inequality_constraints = inequality_constraints self.m0 = m0 @@ -350,24 +345,17 @@ def __init__( self.PW_history_ = [] self.PWeigs_history_ = [] self.history_ = [] - self.objective_history = objective_history self.unbias = False self.verbose_cvxpy = verbose_cvxpy self.use_constraints = (constraint_lhs is not None) and ( constraint_rhs is not None ) - if inequality_constraints: - if not evolve_w: - raise ValueError( - "Use of inequality constraints requires solving for xi " - " (evolve_w=True)." - ) - if not self.use_constraints: - raise ValueError( - "Use of inequality constraints requires constraint_rhs " - "and constraint_lhs " - "variables to be passed to the Optimizer class." - ) + if inequality_constraints and not self.use_constraints: + raise ValueError( + "Use of inequality constraints requires constraint_rhs " + "and constraint_lhs " + "variables to be passed to the Optimizer class." + ) if self.use_constraints: if constraint_order not in ("feature", "target"): @@ -788,34 +776,31 @@ def _reduce(self, x, y): # update w coef_prev = coef_sparse - if self.evolve_w: - if (self.threshold > 0.0) or self.inequality_constraints: - coef_sparse = self._solve_sparse_relax_and_split( - r, n_features, x_expanded, y, Pmatrix, A, coef_prev - ) - else: - # if threshold = 0, there is analytic expression - # for the solve over the coefficients, - # which is coded up here separately - pTp = np.dot(Pmatrix.T, Pmatrix) - # notice reshaping PQ here requires fortran-ordering - PQ = np.tensordot(self.mod_matrix, self.PQ_, axes=([1], [0])) - PQ = np.reshape(PQ, (r * r * r, r * n_features), "F") - PQTPQ = np.dot(PQ.T, PQ) - PQ = np.reshape(self.PQ_, (r, r, r, r * n_features), "F") - PQ = np.tensordot(self.mod_matrix, PQ, axes=([1], [0])) - PQ_ep = ( - PQ - + np.transpose(PQ, [1, 2, 0, 3]) - + np.transpose(PQ, [2, 0, 1, 3]) - ) - PQ_ep = np.reshape(PQ_ep, (r * r * r, r * n_features), "F") - PQTPQ_ep = np.dot(PQ_ep.T, PQ_ep) - H = xTx + pTp / self.eta + PQTPQ / self.alpha + PQTPQ_ep / self.beta - P_transpose_A = np.dot(Pmatrix.T, A.flatten()) - coef_sparse = self._solve_nonsparse_relax_and_split( - H, xTy, P_transpose_A, coef_prev - ) + if (self.threshold > 0.0) or self.inequality_constraints: + coef_sparse = self._solve_sparse_relax_and_split( + r, n_features, x_expanded, y, Pmatrix, A, coef_prev + ) + else: + # if threshold = 0, there is analytic expression + # for the solve over the coefficients, + # which is coded up here separately + pTp = np.dot(Pmatrix.T, Pmatrix) + # notice reshaping PQ here requires fortran-ordering + PQ = np.tensordot(self.mod_matrix, self.PQ_, axes=([1], [0])) + PQ = np.reshape(PQ, (r * r * r, r * n_features), "F") + PQTPQ = np.dot(PQ.T, PQ) + PQ = np.reshape(self.PQ_, (r, r, r, r * n_features), "F") + PQ = np.tensordot(self.mod_matrix, PQ, axes=([1], [0])) + PQ_ep = ( + PQ + np.transpose(PQ, [1, 2, 0, 3]) + np.transpose(PQ, [2, 0, 1, 3]) + ) + PQ_ep = np.reshape(PQ_ep, (r * r * r, r * n_features), "F") + PQTPQ_ep = np.dot(PQ_ep.T, PQ_ep) + H = xTx + pTp / self.eta + PQTPQ / self.alpha + PQTPQ_ep / self.beta + P_transpose_A = np.dot(Pmatrix.T, A.flatten()) + coef_sparse = self._solve_nonsparse_relax_and_split( + H, xTy, P_transpose_A, coef_prev + ) # If problem over xi becomes infeasible, break out of the loop if coef_sparse is None: From c69e247651decfddbddcfe3d8d1ba8deb51e8d2b Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Sun, 26 May 2024 12:19:53 -0700 Subject: [PATCH 02/11] cln: align simple initialization and admin work with master * move underscore attributes to _reduce (their presence is used by scikit-learn's check_fitted()) * add explicit _n_tgts, _include_bias, and _interaction_only to init * subclass from ConstrainedSR3 * Add "method" kwarg to control global/local (this will hopefully be removed in the future) --- pysindy/optimizers/trapping_sr3.py | 212 ++++++++++++++++------------- 1 file changed, 116 insertions(+), 96 deletions(-) diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py index 5166e141c..4e83dbfdf 100644 --- a/pysindy/optimizers/trapping_sr3.py +++ b/pysindy/optimizers/trapping_sr3.py @@ -5,6 +5,8 @@ from math import comb from typing import cast from typing import NewType +from typing import Optional +from typing import Union import cvxpy as cp import numpy as np @@ -14,7 +16,7 @@ from ..feature_library.polynomial_library import PolynomialLibrary from ..utils import reorder_constraints -from .sr3 import SR3 +from .constrained_sr3 import ConstrainedSR3 AnyFloat = np.dtype[np.floating[NBitBase]] Int1D = np.ndarray[tuple[int], np.dtype[np.int_]] @@ -27,7 +29,7 @@ NTarget = NewType("NTarget", int) -class TrappingSR3(SR3): +class TrappingSR3(ConstrainedSR3): """ Generalized trapping variant of sparse relaxed regularized regression. This optimizer can be used to identify quadratically nonlinear systems with @@ -243,130 +245,148 @@ class TrappingSR3(SR3): def __init__( self, - threshold=0.1, - eps_solver=1e-7, - inequality_constraints=False, - eta=None, - alpha=None, - beta=None, - mod_matrix=None, - alpha_A=None, - alpha_m=None, - gamma=-0.1, - tol=1e-5, - tol_m=1e-5, - thresholder="l1", - thresholds=None, - max_iter=30, - accel=False, - normalize_columns=False, - fit_intercept=False, - copy_X=True, - m0=None, - A0=None, - objective_history=None, - constraint_lhs=None, - constraint_rhs=None, - constraint_order="target", - verbose=False, - verbose_cvxpy=False, + *, + _n_tgts: int = None, + _include_bias: bool = False, + _interaction_only: bool = False, + method: str = "global", + eta: Union[float, None] = None, + eps_solver: float = 1e-7, + alpha: Optional[float] = None, + beta: Optional[float] = None, + mod_matrix: Optional[NDArray] = None, + alpha_A: Union[float, None] = None, + alpha_m: Union[float, None] = None, + gamma: float = -0.1, + tol_m: float = 1e-5, + thresholder: str = "l1", + accel: bool = False, + m0: Union[NDArray, None] = None, + A0: Union[NDArray, None] = None, + **kwargs, ): - super(TrappingSR3, self).__init__( - threshold=threshold, - max_iter=max_iter, - normalize_columns=normalize_columns, - fit_intercept=fit_intercept, - copy_X=copy_X, - thresholder=thresholder, - thresholds=thresholds, - verbose=verbose, + # n_tgts, constraints, etc are data-dependent parameters and belong in + # _reduce/fit (). The following is a hack until we refactor how + # constraints are applied in ConstrainedSR3 and MIOSR + self._include_bias = _include_bias + self._interaction_only = _interaction_only + self._n_tgts = _n_tgts + if _n_tgts is None: + warnings.warn( + "Trapping Optimizer initialized without _n_tgts. It will likely" + " be unable to fit data" + ) + _n_tgts = 1 + if method == "global": + if hasattr(kwargs, "constraint_separation_index"): + constraint_separation_index = kwargs["constraint_separation_index"] + elif kwargs.get("inequality_constraints", False): + constraint_separation_index = kwargs["constraint_lhs"].shape[0] + else: + constraint_separation_index = 0 + constraint_rhs, constraint_lhs = _make_constraints( + _n_tgts, include_bias=_include_bias + ) + constraint_order = kwargs.pop("constraint_order", "feature") + if constraint_order == "target": + constraint_lhs = np.transpose(constraint_lhs, [0, 2, 1]) + constraint_lhs = np.reshape(constraint_lhs, (constraint_lhs.shape[0], -1)) + try: + constraint_lhs = np.concatenate( + (kwargs.pop("constraint_lhs"), constraint_lhs), 0 + ) + constraint_rhs = np.concatenate( + (kwargs.pop("constraint_rhs"), constraint_rhs), 0 + ) + except KeyError: + pass + + super().__init__( + constraint_lhs=constraint_lhs, + constraint_rhs=constraint_rhs, + constraint_separation_index=constraint_separation_index, + constraint_order=constraint_order, + equality_constraints=True, + thresholder=thresholder, + **kwargs, + ) + elif method == "local": + super().__init__(thresholder=thresholder, **kwargs) + else: + raise ValueError(f"Can either use 'global' or 'local' method, not {method}") + + self.mod_matrix = mod_matrix + self.eps_solver = eps_solver + self.m0 = m0 + self.A0 = A0 + self.alpha_A = alpha_A + self.alpha_m = alpha_m + self.eta = eta + self.alpha = alpha + self.beta = beta + self.gamma = gamma + self.tol_m = tol_m + self.accel = accel + self.unbias = False + self.use_constraints = (constraint_lhs is not None) and ( + constraint_rhs is not None ) - if thresholder.lower() not in ("l1", "l2", "weighted_l1", "weighted_l2"): + self.__post_init_guard() + + def __post_init_guard(self): + """Conduct initialization post-init, as required by scikitlearn API""" + if self.thresholder.lower() not in ("l1", "l2", "weighted_l1", "weighted_l2"): raise ValueError("Regularizer must be (weighted) L1 or L2") - if eta is None: + if self.eta is None: warnings.warn( "eta was not set, so defaulting to eta = 1e20 " "with alpha_m = 1e-2 * eta, alpha_A = eta. Here eta is so " "large that the stability term in the optimization " "will be ignored." ) - eta = 1e20 - alpha_m = 1e18 - alpha_A = 1e20 + self.eta = 1e20 + self.alpha_m = 1e18 + self.alpha_A = 1e20 else: - if alpha_m is None: - alpha_m = eta * 1e-2 - if alpha_A is None: - alpha_A = eta - if eta <= 0: + if self.alpha_m is None: + self.alpha_m = self.eta * 1e-2 + if self.alpha_A is None: + self.alpha_A = self.eta + if self.eta <= 0: raise ValueError("eta must be positive") - if alpha is None: - alpha = 1e20 + if self.alpha is None: + self.alpha = 1e20 warnings.warn( "alpha was not set, so defaulting to alpha = 1e20 " "which is so" "large that the ||Qijk|| term in the optimization " "will be essentially ignored." ) - if beta is None: - beta = 1e20 + if self.beta is None: + self.beta = 1e20 warnings.warn( "beta was not set, so defaulting to beta = 1e20 " "which is so" "large that the ||Qijk + Qjik + Qkij|| " "term in the optimization will be essentially ignored." ) - if alpha_m < 0 or alpha_m > eta: + + if self.alpha_m < 0 or self.alpha_m > self.eta: raise ValueError("0 <= alpha_m <= eta") - if alpha_A < 0 or alpha_A > eta: + if self.alpha_A < 0 or self.alpha_A > self.eta: raise ValueError("0 <= alpha_A <= eta") - if gamma >= 0: + if self.gamma >= 0: raise ValueError("gamma must be negative") - if tol <= 0 or tol_m <= 0 or eps_solver <= 0: + if self.tol <= 0 or self.tol_m <= 0 or self.eps_solver <= 0: raise ValueError("tol and tol_m must be positive") - - self.mod_matrix = mod_matrix - self.eps_solver = eps_solver - self.inequality_constraints = inequality_constraints - self.m0 = m0 - self.A0 = A0 - self.alpha_A = alpha_A - self.alpha_m = alpha_m - self.eta = eta - self.alpha = alpha - self.beta = beta - self.gamma = gamma - self.tol = tol - self.tol_m = tol_m - self.accel = accel - self.A_history_ = [] - self.m_history_ = [] - self.p_history_ = [] - self.PW_history_ = [] - self.PWeigs_history_ = [] - self.history_ = [] - self.unbias = False - self.verbose_cvxpy = verbose_cvxpy - self.use_constraints = (constraint_lhs is not None) and ( - constraint_rhs is not None - ) - if inequality_constraints and not self.use_constraints: + if self.inequality_constraints and self.threshold == 0.0: raise ValueError( - "Use of inequality constraints requires constraint_rhs " - "and constraint_lhs " - "variables to be passed to the Optimizer class." + "Ineq. constr. -> threshold!=0 + relax_optim=True or relax_optim=False." ) - if self.use_constraints: - if constraint_order not in ("feature", "target"): - raise ValueError( - "constraint_order must be either 'feature' or 'target'" - ) - - self.constraint_lhs = constraint_lhs - self.constraint_rhs = constraint_rhs - self.unbias = False - self.constraint_order = constraint_order + def set_params(self, **kwargs): + super().set_params(**kwargs) + self.__post_init_guard() @staticmethod def _build_PC(polyterms: list[tuple[int, Int1D]]) -> Float3D: From afa25dbf8e7841289fb151b0cff71925db850396 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Sun, 26 May 2024 12:51:49 -0700 Subject: [PATCH 03/11] cln: offload some of _solve_sparse_relax_and_split() to ConstrainedSR3 This pulls in the changes to ConstrainedSR3 from trapping_resolve --- pysindy/optimizers/constrained_sr3.py | 149 +++++++++++++++----------- pysindy/optimizers/trapping_sr3.py | 138 +++++++++--------------- 2 files changed, 139 insertions(+), 148 deletions(-) diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py index f00445b9a..eb20cf159 100644 --- a/pysindy/optimizers/constrained_sr3.py +++ b/pysindy/optimizers/constrained_sr3.py @@ -1,4 +1,7 @@ import warnings +from copy import deepcopy +from typing import Optional +from typing import Tuple try: import cvxpy as cp @@ -39,6 +42,9 @@ class ConstrainedSR3(SR3): to learn parsimonious physics-informed models from data." IEEE Access 8 (2020): 169259-169271. + Zheng, Peng, et al. "A unified framework for sparse relaxed + regularized regression: Sr3." IEEE Access 7 (2018): 1404-1423. + Parameters ---------- threshold : float, optional (default 0.1) @@ -66,14 +72,10 @@ class ConstrainedSR3(SR3): max_iter : int, optional (default 30) Maximum iterations of the optimization algorithm. - fit_intercept : boolean, optional (default False) - Whether to calculate the intercept for this model. If set to false, no - intercept will be used in calculations. - constraint_lhs : numpy ndarray, optional (default None) Shape should be (n_constraints, n_features * n_targets), - The left hand side matrix C of Cw <= d. - There should be one row per constraint. + The left hand side matrix C of Cw <= d (Or Cw = d for equality + constraints). There should be one row per constraint. constraint_rhs : numpy ndarray, shape (n_constraints,), optional (default None) The right hand side vector d of Cw <= d. @@ -97,9 +99,6 @@ class ConstrainedSR3(SR3): is deprecated in sklearn versions >= 1.0 and will be removed. Note that this parameter is incompatible with the constraints! - copy_X : boolean, optional (default True) - If True, X will be copied; else, it may be overwritten. - initial_guess : np.ndarray, optional (default None) Shape should be (n_features) or (n_targets, n_features). Initial guess for coefficients ``coef_``, (v in the mathematical equations) @@ -128,6 +127,10 @@ class ConstrainedSR3(SR3): output should be verbose or not. Only relevant for optimizers that use the CVXPY package in some capabity. + unbias: bool (default False) + See base class for definition. Most options are incompatible + with unbiasing. + Attributes ---------- coef_ : array, shape (n_features,) or (n_targets, n_features) @@ -138,11 +141,15 @@ class ConstrainedSR3(SR3): Weight vector(s) that are not subjected to the regularization. This is the w in the objective function. - unbias : boolean - Whether to perform an extra step of unregularized linear regression - to unbias the coefficients for the identified support. - ``unbias`` is automatically set to False if a constraint is used and - is otherwise left uninitialized. + history_ : list + History of sparse coefficients. ``history_[k]`` contains the + sparse coefficients (v in the optimization objective function) + at iteration k. + + objective_history_ : list + History of the value of the objective at each step. Note that + the trapping SINDy problem is nonconvex, meaning that this value + may increase and decrease as the algorithm works. """ def __init__( @@ -158,17 +165,17 @@ def __init__( constraint_rhs=None, constraint_order="target", normalize_columns=False, - fit_intercept=False, copy_X=True, initial_guess=None, thresholds=None, equality_constraints=False, inequality_constraints=False, - constraint_separation_index=0, + constraint_separation_index: Optional[bool] = None, verbose=False, verbose_cvxpy=False, + unbias=False, ): - super(ConstrainedSR3, self).__init__( + super().__init__( threshold=threshold, nu=nu, tol=tol, @@ -178,10 +185,10 @@ def __init__( trimming_step_size=trimming_step_size, max_iter=max_iter, initial_guess=initial_guess, - fit_intercept=fit_intercept, copy_X=copy_X, normalize_columns=normalize_columns, verbose=verbose, + unbias=unbias, ) self.verbose_cvxpy = verbose_cvxpy @@ -189,7 +196,7 @@ def __init__( self.constraint_lhs = constraint_lhs self.constraint_rhs = constraint_rhs self.constraint_order = constraint_order - self.use_constraints = (constraint_lhs is not None) and ( + self.use_constraints = (constraint_lhs is not None) or ( constraint_rhs is not None ) @@ -203,15 +210,18 @@ def __init__( " but user did not specify if the constraints were equality or" " inequality constraints. Assuming equality constraints." ) - self.equality_constraints = True + equality_constraints = True if self.use_constraints: if constraint_order not in ("feature", "target"): raise ValueError( "constraint_order must be either 'feature' or 'target'" ) - - self.unbias = False + if unbias: + raise ValueError( + "Constraints are incompatible with an unbiasing step. Set" + " unbias=False" + ) if inequality_constraints and not cvxpy_flag: raise ValueError( @@ -235,6 +245,16 @@ def __init__( ) self.inequality_constraints = inequality_constraints self.equality_constraints = equality_constraints + if self.use_constraints and constraint_separation_index is None: + if self.inequality_constraints and not self.equality_constraints: + constraint_separation_index = len(constraint_lhs) + elif self.equality_constraints and not self.inequality_constraints: + constraint_separation_index = 0 + else: + raise ValueError( + "If passing both inequality and equality constraints, must specify" + " constraint_separation_index." + ) self.constraint_separation_index = constraint_separation_index def _update_full_coef_constraints(self, H, x_transpose_y, coef_sparse): @@ -251,62 +271,66 @@ def _update_full_coef_constraints(self, H, x_transpose_y, coef_sparse): rhs = rhs.reshape(g.shape) return inv1.dot(rhs) - def _update_coef_cvxpy(self, x, y, coef_sparse): - xi = cp.Variable(coef_sparse.shape[0] * coef_sparse.shape[1]) - cost = cp.sum_squares(x @ xi - y.flatten()) + def _create_var_and_part_cost( + self, var_len: int, x_expanded: np.ndarray, y: np.ndarray + ) -> Tuple[cp.Variable, cp.Expression]: + xi = cp.Variable(var_len) + cost = cp.sum_squares(x_expanded @ xi - y.flatten()) if self.thresholder.lower() == "l1": cost = cost + self.threshold * cp.norm1(xi) elif self.thresholder.lower() == "weighted_l1": cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi) elif self.thresholder.lower() == "l2": - cost = cost + self.threshold * cp.norm2(xi) + cost = cost + self.threshold * cp.norm2(xi) ** 2 elif self.thresholder.lower() == "weighted_l2": - cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) + cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2 + return xi, cost + + def _update_coef_cvxpy(self, xi, cost, var_len, coef_prev, tol): if self.use_constraints: - if self.inequality_constraints and self.equality_constraints: - # Process inequality constraints then equality constraints - prob = cp.Problem( - cp.Minimize(cost), - [ - self.constraint_lhs[: self.constraint_separation_index, :] @ xi - <= self.constraint_rhs[: self.constraint_separation_index], - self.constraint_lhs[self.constraint_separation_index :, :] @ xi - == self.constraint_rhs[self.constraint_separation_index :], - ], - ) - elif self.inequality_constraints: - prob = cp.Problem( - cp.Minimize(cost), - [self.constraint_lhs @ xi <= self.constraint_rhs], + constraints = [] + if self.equality_constraints: + constraints.append( + self.constraint_lhs[self.constraint_separation_index :, :] @ xi + == self.constraint_rhs[self.constraint_separation_index :], ) - else: - prob = cp.Problem( - cp.Minimize(cost), - [self.constraint_lhs @ xi == self.constraint_rhs], + if self.inequality_constraints: + constraints.append( + self.constraint_lhs[: self.constraint_separation_index, :] @ xi + <= self.constraint_rhs[: self.constraint_separation_index] ) + prob = cp.Problem(cp.Minimize(cost), constraints) else: - prob = cp.Problem(cp.Minimize(cost)) + cp.Problem(cp.Minimize(cost)) - # default solver is OSQP here but switches to ECOS for L2 + prob_clone = deepcopy(prob) + # default solver is SCS/OSQP here but switches to ECOS for L2 try: prob.solve( max_iter=self.max_iter, - eps_abs=self.tol, - eps_rel=self.tol, + eps_abs=tol, + eps_rel=tol, verbose=self.verbose_cvxpy, ) # Annoying error coming from L2 norm switching to use the ECOS # solver, which uses "max_iters" instead of "max_iter", and # similar semantic changes for the other variables. - except TypeError: + except (TypeError, ValueError): try: - prob.solve(abstol=self.tol, reltol=self.tol, verbose=self.verbose_cvxpy) + prob = prob_clone + prob.solve(max_iters=self.max_iter, verbose=self.verbose_cvxpy) + xi = prob.variables()[0] except cp.error.SolverError: - print("Solver failed, setting coefs to zeros") - xi.value = np.zeros(coef_sparse.shape[0] * coef_sparse.shape[1]) + warnings.warn("Solver failed, setting coefs to zeros") + xi.value = np.zeros(var_len) except cp.error.SolverError: - print("Solver failed, setting coefs to zeros") - xi.value = np.zeros(coef_sparse.shape[0] * coef_sparse.shape[1]) + try: + prob = prob_clone + prob.solve(max_iter=self.max_iter, verbose=self.verbose_cvxpy) + xi = prob.variables()[0] + except cp.error.SolverError: + warnings.warn("Solver failed, setting coefs to zeros") + xi.value = np.zeros(var_len) if xi.value is None: warnings.warn( @@ -315,7 +339,7 @@ def _update_coef_cvxpy(self, x, y, coef_sparse): ConvergenceWarning, ) return None - coef_new = (xi.value).reshape(coef_sparse.shape) + coef_new = (xi.value).reshape(coef_prev.shape) return coef_new def _update_sparse_coef(self, coef_full): @@ -422,7 +446,11 @@ def _reduce(self, x, y): objective_history = [] if self.inequality_constraints: - coef_sparse = self._update_coef_cvxpy(x_expanded, y, coef_sparse) + var_len = coef_sparse.shape[0] * coef_sparse.shape[1] + xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y) + coef_sparse = self._update_coef_cvxpy( + xi, cost, var_len, coef_sparse, self.tol + ) objective_history.append(self._objective(x, y, 0, coef_full, coef_sparse)) else: for k in range(self.max_iter): @@ -461,9 +489,8 @@ def _reduce(self, x, y): break else: warnings.warn( - "SR3._reduce did not converge after {} iterations.".format( - self.max_iter - ), + f"ConstrainedSR3 did not converge after {self.max_iter}" + " iterations.", ConvergenceWarning, ) if self.use_constraints and self.constraint_order.lower() == "target": diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py index 4e83dbfdf..0e769aac8 100644 --- a/pysindy/optimizers/trapping_sr3.py +++ b/pysindy/optimizers/trapping_sr3.py @@ -14,6 +14,7 @@ from numpy.typing import NDArray from sklearn.exceptions import ConvergenceWarning +from ..feature_library.polynomial_library import n_poly_features from ..feature_library.polynomial_library import PolynomialLibrary from ..utils import reorder_constraints from .constrained_sr3 import ConstrainedSR3 @@ -310,8 +311,10 @@ def __init__( thresholder=thresholder, **kwargs, ) + self.method = "global" elif method == "local": super().__init__(thresholder=thresholder, **kwargs) + self.method = "local" else: raise ValueError(f"Can either use 'global' or 'local' method, not {method}") @@ -585,75 +588,23 @@ def _objective(self, x, y, coef_sparse, A, PW, k): ) return R2 + stability_term + L1 + alpha_term + beta_term - def _solve_sparse_relax_and_split(self, r, N, x_expanded, y, Pmatrix, A, coef_prev): + def _update_coef_sparse_rs( + self, r, N, var_len, x_expanded, y, Pmatrix, A, coef_prev + ): """Solve coefficient update with CVXPY if threshold != 0""" - xi = cp.Variable(N * r) - cost = cp.sum_squares(x_expanded @ xi - y.flatten()) - if self.thresholder.lower() == "l1": - cost = cost + self.threshold * cp.norm1(xi) - elif self.thresholder.lower() == "weighted_l1": - cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi) - elif self.thresholder.lower() == "l2": - cost = cost + self.threshold * cp.norm2(xi) ** 2 - elif self.thresholder.lower() == "weighted_l2": - cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2 + xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y) cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta # new terms minimizing quadratic piece ||P^Q @ xi||_2^2 - Q = np.reshape(self.PQ_, (r * r * r, N * r), "F") - cost = cost + cp.sum_squares(Q @ xi) / self.alpha - Q = np.reshape(self.PQ_, (r, r, r, N * r), "F") - Q_ep = Q + np.transpose(Q, [1, 2, 0, 3]) + np.transpose(Q, [2, 0, 1, 3]) - Q_ep = np.reshape(Q_ep, (r * r * r, N * r), "F") - cost = cost + cp.sum_squares(Q_ep @ xi) / self.beta - - # Constraints - if self.use_constraints: - if self.inequality_constraints: - prob = cp.Problem( - cp.Minimize(cost), - [self.constraint_lhs @ xi <= self.constraint_rhs], - ) - else: - prob = cp.Problem( - cp.Minimize(cost), - [self.constraint_lhs @ xi == self.constraint_rhs], - ) - else: - prob = cp.Problem(cp.Minimize(cost)) - - # default solver is OSQP here but switches to ECOS for L2 - try: - prob.solve( - eps_abs=self.eps_solver, - eps_rel=self.eps_solver, - verbose=self.verbose_cvxpy, - ) - # Annoying error coming from L2 norm switching to use the ECOS - # solver, which uses "max_iters" instead of "max_iter", and - # similar semantic changes for the other variables. - except TypeError: - try: - prob.solve( - abstol=self.eps_solver, - reltol=self.eps_solver, - verbose=self.verbose_cvxpy, - ) - except cp.error.SolverError: - print("Solver failed, setting coefs to zeros") - xi.value = np.zeros(N * r) - except cp.error.SolverError: - print("Solver failed, setting coefs to zeros") - xi.value = np.zeros(N * r) - - if xi.value is None: - warnings.warn( - "Infeasible solve, increase/decrease eta", - ConvergenceWarning, - ) - return None - coef_sparse = (xi.value).reshape(coef_prev.shape) - return coef_sparse + if self.method == "local": + Q = np.reshape(self.PQ_, (r * r * r, N * r), "F") + cost = cost + cp.sum_squares(Q @ xi) / self.alpha + Q = np.reshape(self.PQ_, (r, r, r, N * r), "F") + Q_ep = Q + np.transpose(Q, [1, 2, 0, 3]) + np.transpose(Q, [2, 0, 1, 3]) + Q_ep = np.reshape(Q_ep, (r * r * r, N * r), "F") + cost = cost + cp.sum_squares(Q_ep @ xi) / self.beta + + return self._update_coef_cvxpy(xi, cost, var_len, coef_prev, self.eps_solver) def _solve_m_relax_and_split(self, r, N, m_prev, m, A, coef_sparse, tk_previous): """ @@ -711,16 +662,23 @@ def _reduce(self, x, y): TrappingSR3 algorithm. Assumes initial guess for coefficients is stored in ``self.coef_``. """ - - n_samples, n_features = x.shape - self.n_features = n_features - r = y.shape[1] - N = n_features # int((r ** 2 + 3 * r) / 2.0) - if N > int((r**2 + 3 * r) / 2.0): - self._include_bias = True + self.A_history_ = [] + self.m_history_ = [] + self.p_history_ = [] + self.PW_history_ = [] + self.PWeigs_history_ = [] + self.history_ = [] + n_samples, n_tgts = y.shape + n_features = n_poly_features( + n_tgts, + 2, + include_bias=self._include_bias, + interaction_only=self._interaction_only, + ) + var_len = n_features * n_tgts if self.mod_matrix is None: - self.mod_matrix = np.eye(r) + self.mod_matrix = np.eye(n_tgts) # Define PL, PQ, PT and PM tensors, only relevant if the stability term in # trapping SINDy is turned on. @@ -731,7 +689,7 @@ def _reduce(self, x, y): self.PQ_, self.PT_, self.PM_, - ) = self._set_Ptensors(r) + ) = self._set_Ptensors(n_tgts) # Set initial coefficients if self.use_constraints and self.constraint_order.lower() == "target": @@ -760,9 +718,9 @@ def _reduce(self, x, y): if self.A0 is not None: A = self.A0 elif np.any(self.PM_ != 0.0): - A = np.diag(self.gamma * np.ones(r)) + A = np.diag(self.gamma * np.ones(n_tgts)) else: - A = np.diag(np.zeros(r)) + A = np.diag(np.zeros(n_tgts)) self.A_history_.append(A) # initial guess for m @@ -770,14 +728,14 @@ def _reduce(self, x, y): m = self.m0 else: np.random.seed(1) - m = (np.random.rand(r) - np.ones(r)) * 2 + m = (np.random.rand(n_tgts) - np.ones(n_tgts)) * 2 self.m_history_.append(m) # Precompute some objects for optimization - x_expanded = np.zeros((n_samples, r, n_features, r)) - for i in range(r): + x_expanded = np.zeros((n_samples, n_tgts, n_features, n_tgts)) + for i in range(n_tgts): x_expanded[:, i, :, i] = x - x_expanded = np.reshape(x_expanded, (n_samples * r, r * n_features)) + x_expanded = np.reshape(x_expanded, (n_samples * n_tgts, n_tgts * n_features)) xTx = np.dot(x_expanded.T, x_expanded) xTy = np.dot(x_expanded.T, y.flatten()) @@ -792,13 +750,13 @@ def _reduce(self, x, y): # update P tensor from the newest m mPM = np.tensordot(self.PM_, m, axes=([2], [0])) p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0])) - Pmatrix = p.reshape(r * r, r * n_features) + Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features) # update w coef_prev = coef_sparse if (self.threshold > 0.0) or self.inequality_constraints: - coef_sparse = self._solve_sparse_relax_and_split( - r, n_features, x_expanded, y, Pmatrix, A, coef_prev + coef_sparse = self._update_coef_sparse_rs( + n_tgts, n_features, var_len, x_expanded, y, Pmatrix, A, coef_prev ) else: # if threshold = 0, there is analytic expression @@ -807,14 +765,20 @@ def _reduce(self, x, y): pTp = np.dot(Pmatrix.T, Pmatrix) # notice reshaping PQ here requires fortran-ordering PQ = np.tensordot(self.mod_matrix, self.PQ_, axes=([1], [0])) - PQ = np.reshape(PQ, (r * r * r, r * n_features), "F") + PQ = np.reshape( + PQ, (n_tgts * n_tgts * n_tgts, n_tgts * n_features), "F" + ) PQTPQ = np.dot(PQ.T, PQ) - PQ = np.reshape(self.PQ_, (r, r, r, r * n_features), "F") + PQ = np.reshape( + self.PQ_, (n_tgts, n_tgts, n_tgts, n_tgts * n_features), "F" + ) PQ = np.tensordot(self.mod_matrix, PQ, axes=([1], [0])) PQ_ep = ( PQ + np.transpose(PQ, [1, 2, 0, 3]) + np.transpose(PQ, [2, 0, 1, 3]) ) - PQ_ep = np.reshape(PQ_ep, (r * r * r, r * n_features), "F") + PQ_ep = np.reshape( + PQ_ep, (n_tgts * n_tgts * n_tgts, n_tgts * n_features), "F" + ) PQTPQ_ep = np.dot(PQ_ep.T, PQ_ep) H = xTx + pTp / self.eta + PQTPQ / self.alpha + PQTPQ_ep / self.beta P_transpose_A = np.dot(Pmatrix.T, A.flatten()) @@ -829,7 +793,7 @@ def _reduce(self, x, y): # Now solve optimization for m and A m_prev, m, A, tk_prev = self._solve_m_relax_and_split( - r, n_features, m_prev, m, A, coef_sparse, tk_prev + n_tgts, n_features, m_prev, m, A, coef_sparse, tk_prev ) # If problem over m becomes infeasible, break out of the loop From f463b18ab29f5e98c0e6a1de045b2188a0423dc3 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Wed, 29 May 2024 17:39:41 -0700 Subject: [PATCH 04/11] Backport polynomial library fixes to trapping_extended --- pysindy/feature_library/polynomial_library.py | 258 ++++++++---------- 1 file changed, 120 insertions(+), 138 deletions(-) diff --git a/pysindy/feature_library/polynomial_library.py b/pysindy/feature_library/polynomial_library.py index 602f6eac2..221a7a6dc 100644 --- a/pysindy/feature_library/polynomial_library.py +++ b/pysindy/feature_library/polynomial_library.py @@ -1,12 +1,12 @@ from itertools import chain -from itertools import combinations -from itertools import combinations_with_replacement as combinations_w_r +from math import comb +from typing import Iterator +from typing import Tuple import numpy as np +from numpy.typing import NDArray from scipy import sparse -from sklearn import __version__ from sklearn.preprocessing import PolynomialFeatures -from sklearn.preprocessing._csr_polynomial_expansion import _csr_polynomial_expansion from sklearn.utils.validation import check_is_fitted from ..utils import AxesArray @@ -45,22 +45,13 @@ class PolynomialLibrary(PolynomialFeatures, BaseFeatureLibrary): Order of output array in the dense case. 'F' order is faster to compute, but may slow down subsequent estimators. - library_ensemble : boolean, optional (default False) - Whether or not to use library bagging (regress on subset of the - candidate terms in the library) - - ensemble_indices : integer array, optional (default [0]) - The indices to use for ensembling the library. - Attributes ---------- powers_ : array, shape (n_output_features, n_input_features) powers_[i, j] is the exponent of the jth input in the ith output. - n_input_features_ : int + n_features_in_ : int The total number of input features. - WARNING: This is deprecated in scikit-learn version 1.0 and higher so - we check the sklearn.__version__ and switch to n_features_in if needed. n_output_features_ : int The total number of output features. This number is computed by @@ -74,68 +65,68 @@ def __init__( interaction_only=False, include_bias=True, order="C", - library_ensemble=False, - ensemble_indices=[0], ): - super(PolynomialLibrary, self).__init__( + super().__init__( degree=degree, interaction_only=interaction_only, include_bias=include_bias, order=order, ) - BaseFeatureLibrary.__init__( - self, library_ensemble=library_ensemble, ensemble_indices=ensemble_indices - ) - if degree < 0 or not isinstance(degree, int): - raise ValueError("degree must be a nonnegative integer") - if (not include_interaction) and interaction_only: - raise ValueError( - "Can't have include_interaction be False and interaction_only" - " be True" - ) self.include_interaction = include_interaction @staticmethod def _combinations( - n_features, degree, include_interaction, interaction_only, include_bias - ): - comb = combinations if interaction_only else combinations_w_r - start = int(not include_bias) + n_features: int, + degree: int, + include_interaction: bool, + interaction_only: bool, + include_bias: bool, + ) -> Iterator[Tuple[int, ...]]: + """ + Create selection tuples of input indexes for each polynomail term + + Selection tuple iterates the input indexes present in a single term. + For example, (x+y+1)^2 would be in iterator of the tuples: + (), (0,), (1,), (0, 0), (0, 1), (1, 1) + 1 x y x^2 x*y y^2 + + Order of terms is preserved regardless of include_interation. + """ if not include_interaction: - if include_bias: - return chain( - [()], - chain.from_iterable( - combinations_w_r([j], i) - for i in range(1, degree + 1) - for j in range(n_features) - ), - ) - else: - return chain.from_iterable( - combinations_w_r([j], i) - for i in range(1, degree + 1) - for j in range(n_features) - ) - return chain.from_iterable( - comb(range(n_features), i) for i in range(start, degree + 1) + return chain( + [()] if include_bias else [], + ( + exponent * (feat_idx,) + for exponent in range(1, degree + 1) + for feat_idx in range(n_features) + ), + ) + return PolynomialFeatures._combinations( + n_features=n_features, + min_degree=int(not include_bias), + max_degree=degree, + interaction_only=interaction_only, + include_bias=include_bias, ) @property - def powers_(self): + def powers_(self) -> NDArray[np.int_]: + """ + The exponents of the polynomial as an array of shape + (n_features_out, n_features_in), where each item is the exponent of the + jth input variable in the ith polynomial term. + """ check_is_fitted(self) - if float(__version__[:3]) >= 1.0: - n_features = self.n_features_in_ - else: - n_features = self.n_input_features_ combinations = self._combinations( - n_features, - self.degree, - self.include_interaction, - self.interaction_only, - self.include_bias, + n_features=self.n_features_in_, + degree=self.degree, + include_interaction=self.include_interaction, + interaction_only=self.interaction_only, + include_bias=self.include_bias, + ) + return np.vstack( + [np.bincount(c, minlength=self.n_features_in_) for c in combinations] ) - return np.vstack([np.bincount(c, minlength=n_features) for c in combinations]) def get_feature_names(self, input_features=None): """Return feature names for output features. @@ -182,6 +173,13 @@ def fit(self, x_full, y=None): ------- self : instance """ + if self.degree < 0 or not isinstance(self.degree, int): + raise ValueError("degree must be a nonnegative integer") + if (not self.include_interaction) and self.interaction_only: + raise ValueError( + "Can't have include_interaction be False and interaction_only" + " be True" + ) n_features = x_full[0].shape[x_full[0].ax_coord] combinations = self._combinations( n_features, @@ -190,10 +188,7 @@ def fit(self, x_full, y=None): self.interaction_only, self.include_bias, ) - if float(__version__[:3]) >= 1.0: - self.n_features_in_ = n_features - else: - self.n_input_features_ = n_features + self.n_features_in_ = n_features self.n_output_features_ = sum(1 for _ in combinations) return self @@ -203,19 +198,8 @@ def transform(self, x_full): Parameters ---------- - x : array-like or CSR/CSC sparse matrix, shape (n_samples, n_features) + x_full : {array-like, sparse matrix} of shape (n_samples, n_features) The data to transform, row by row. - Prefer CSR over CSC for sparse input (for speed), but CSC is - required if the degree is 4 or higher. If the degree is less than - 4 and the input format is CSC, it will be converted to CSR, have - its polynomial features generated, then converted back to CSC. - If the degree is 2 or 3, the method described in "Leveraging - Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices - Using K-Simplex Numbers" by Andrew Nystrom and John Hughes is - used, which is much faster than the method used on CSC input. For - this reason, a CSC input will be converted to CSR, and the output - will be converted back to CSC prior to being returned, hence the - preference of CSR. Returns ------- @@ -231,72 +215,70 @@ def transform(self, x_full): if sparse.issparse(x) and x.format not in ["csr", "csc"]: # create new with correct sparse axes = comprehend_axes(x) - x = x.asformat("csr") + x = x.asformat("csc") wrap_axes(axes, x) - - n_samples = x.shape[x.ax_time] n_features = x.shape[x.ax_coord] - if float(__version__[:3]) >= 1.0: - if n_features != self.n_features_in_: - raise ValueError("x shape does not match training shape") - else: - if n_features != self.n_input_features_: - raise ValueError("x shape does not match training shape") - - if sparse.isspmatrix_csr(x): - if self.degree > 3: - return sparse.csr_matrix(self.transform(x.tocsc())) - to_stack = [] - if self.include_bias: - to_stack.append(np.ones(shape=(n_samples, 1), dtype=x.dtype)) - to_stack.append(x) - for deg in range(2, self.degree + 1): - xp_next = _csr_polynomial_expansion( - x.data, - x.indices, - x.indptr, - x.shape[1], - self.interaction_only, - deg, - ) - if xp_next is None: - break - to_stack.append(xp_next) - xp = sparse.hstack(to_stack, format="csr") - elif sparse.isspmatrix_csc(x) and self.degree < 4: - return sparse.csc_matrix(self.transform(x.tocsr())) + if n_features != self.n_features_in_: + raise ValueError("x shape does not match training shape") + + combinations = self._combinations( + n_features, + self.degree, + self.include_interaction, + self.interaction_only, + self.include_bias, + ) + if sparse.isspmatrix(x): + columns = [] + for combo in combinations: + if combo: + out_col = 1 + for col_idx in combo: + out_col = x[..., col_idx].multiply(out_col) + columns.append(out_col) + else: + bias = sparse.csc_matrix(np.ones((x.shape[0], 1))) + columns.append(bias) + xp = sparse.hstack(columns, dtype=x.dtype).tocsc() else: - combinations = self._combinations( - n_features, - self.degree, - self.include_interaction, - self.interaction_only, - self.include_bias, + xp = AxesArray( + np.empty( + (*x.shape[:-1], self.n_output_features_), + dtype=x.dtype, + order=self.order, + ), + x.axes, ) - if sparse.isspmatrix(x): - columns = [] - for comb in combinations: - if comb: - out_col = 1 - for col_idx in comb: - out_col = x[..., col_idx].multiply(out_col) - columns.append(out_col) - else: - bias = sparse.csc_matrix(np.ones((x.shape[0], 1))) - columns.append(bias) - xp = sparse.hstack(columns, dtype=x.dtype).tocsc() - else: - xp = AxesArray( - np.empty( - (*x.shape[:-1], self.n_output_features_), - dtype=x.dtype, - order=self.order, - ), - x.__dict__, - ) - for i, comb in enumerate(combinations): - xp[..., i] = x[..., comb].prod(-1) + for i, combo in enumerate(combinations): + xp[..., i] = x[..., combo].prod(-1) xp_full = xp_full + [xp] - if self.library_ensemble: - xp_full = self._ensemble(xp_full) return xp_full + + +def n_poly_features( + n_in_feat: int, + degree: int, + include_bias: bool = False, + include_interation: bool = True, + interaction_only: bool = False, +) -> int: + """Calculate number of polynomial features + + Args: + n_in_feat: number of input features, e.g. 3 for x, y, z + degree: polynomial degree, e.g. 2 for quadratic + include_bias: whether to include a constant term + include_interaction: whether to include terms mixing multiple inputs + interaction_only: whether to omit terms of x_m * x_n^p for p > 1 + """ + if not include_interation and interaction_only: + raise ValueError("Cannot set interaction only if include_interaction is False") + n_feat = include_bias + if not include_interation: + return n_feat + n_in_feat * degree + for deg in range(1, degree + 1): + if interaction_only: + n_feat += comb(n_in_feat, deg) + else: + n_feat += comb(n_in_feat + deg - 1, deg) + return n_feat From 3436aedd89f51228a9fe139c8895f6e31463ce4f Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Wed, 29 May 2024 17:50:18 -0700 Subject: [PATCH 05/11] Backport changes to BaseOptimizer, SR3, and ConstrainedSR3 --- .github/workflows/main.yml | 2 +- pysindy/optimizers/base.py | 98 +++++++++++++++++---------- pysindy/optimizers/constrained_sr3.py | 2 - pysindy/optimizers/sr3.py | 26 +++---- 4 files changed, 78 insertions(+), 50 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c6bde5a64..afa7b169c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -36,7 +36,7 @@ jobs: sudo apt-get update -y sudo apt-get install pandoc sudo apt-get update -y - pip install -e .[docs] + pip install -e .[docs,cvxpy,miosr,sbr] - name: Build docs run: | cd docs diff --git a/pysindy/optimizers/base.py b/pysindy/optimizers/base.py index 377f8a171..01526f452 100644 --- a/pysindy/optimizers/base.py +++ b/pysindy/optimizers/base.py @@ -15,6 +15,7 @@ from sklearn.utils.validation import check_X_y from ..utils import AxesArray +from ..utils import drop_nan_samples def _rescale_data(X, y, sample_weight): @@ -45,14 +46,9 @@ class BaseOptimizer(LinearRegression, ComplexityMixin): Parameters ---------- - fit_intercept : boolean, optional (default False) - Whether to calculate the intercept for this model. If set to false, no - intercept will be used in calculations. - normalize_columns : boolean, optional (default False) Normalize the columns of x (the SINDy library terms) before regression - by dividing by the L2-norm. Note that the 'normalize' option in sklearn - is deprecated in sklearn versions >= 1.0 and will be removed. + by dividing by the L2-norm. copy_X : boolean, optional (default True) If True, X will be copied; else, it may be overwritten. @@ -62,13 +58,24 @@ class BaseOptimizer(LinearRegression, ComplexityMixin): Initial guess for coefficients ``coef_``. If None, the initial guess is obtained via a least-squares fit. + unbias: Whether to perform an extra step of unregularized linear + regression to unbias the coefficients for the identified + support. If an optimizer (``self.optimizer``) applies any type + of regularization, that regularization may bias coefficients, + improving the conditioning of the problem but harming the + quality of the fit. Setting ``unbias==True`` enables an extra + step wherein unregularized linear regression is applied, but + only for the coefficients in the support identified by the + optimizer. This helps to remove the bias introduced by + regularization. + Attributes ---------- coef_ : array, shape (n_features,) or (n_targets, n_features) Weight vector(s). ind_ : array, shape (n_features,) or (n_targets, n_features) - Array of 0s and 1s indicating which coefficients of the + Array of bools indicating which coefficients of the weight vector have not been masked out. history_ : list @@ -85,21 +92,29 @@ def __init__( self, max_iter=20, normalize_columns=False, - fit_intercept=False, initial_guess=None, copy_X=True, + unbias: bool = True, ): - super(BaseOptimizer, self).__init__(fit_intercept=fit_intercept, copy_X=copy_X) - - if max_iter <= 0: - raise ValueError("max_iter must be positive") - + super().__init__(fit_intercept=False, copy_X=copy_X) self.max_iter = max_iter self.iters = 0 - if np.ndim(initial_guess) == 1: - initial_guess = initial_guess.reshape(1, -1) self.initial_guess = initial_guess self.normalize_columns = normalize_columns + self.unbias = unbias + self.__post_init_guard() + + # See name mangling rules for double underscore rationale + def __post_init_guard(self): + """Conduct initialization post-init, as required by scikitlearn API.""" + if np.ndim(self.initial_guess) == 1: + self.initial_guess = self.initial_guess.reshape(1, -1) + if self.max_iter <= 0: + raise ValueError("max_iter must be positive") + + def set_params(self, **kwargs): + super().set_params(**kwargs) + self.__post_init_guard # Force subclasses to implement this @abc.abstractmethod @@ -107,7 +122,8 @@ def _reduce(self): """ Carry out the bulk of the work of the fit function. - Subclass implementations MUST update self.coef_. + Subclass implementations MUST update self.coef_ as shape + (n_targets, n_inputs). """ raise NotImplementedError @@ -134,12 +150,16 @@ def fit(self, x_, y, sample_weight=None, **reduce_kws): ------- self : returns an instance of self """ + x_ = AxesArray(np.asarray(x_), {"ax_sample": 0, "ax_coord": 1}) + y_axes = {"ax_sample": 0} if y.ndim == 1 else {"ax_sample": 0, "ax_coord": 1} + y = AxesArray(np.asarray(y), y_axes) + x_, y = drop_nan_samples(x_, y) x_, y = check_X_y(x_, y, accept_sparse=[], y_numeric=True, multi_output=True) x, y, X_offset, y_offset, X_scale = _preprocess_data( x_, y, - fit_intercept=self.fit_intercept, + fit_intercept=False, copy=self.copy_X, sample_weight=sample_weight, ) @@ -178,6 +198,9 @@ def fit(self, x_, y, sample_weight=None, **reduce_kws): self._reduce(x_normed, y, **reduce_kws) self.ind_ = np.abs(self.coef_) > 1e-14 + if self.unbias: + self._unbias(x_normed, y) + # Rescale coefficients to original units if self.normalize_columns: self.coef_ = np.multiply(reg, self.coef_) @@ -189,6 +212,17 @@ def fit(self, x_, y, sample_weight=None, **reduce_kws): self._set_intercept(X_offset, y_offset, X_scale) return self + def _unbias(self, x: np.ndarray, y: np.ndarray): + coef = np.zeros((y.shape[1], x.shape[1])) + for i in range(self.ind_.shape[0]): + if np.any(self.ind_[i]): + coef[i, self.ind_[i]] = ( + LinearRegression(fit_intercept=False) + .fit(x[:, self.ind_[i]], y[:, i]) + .coef_ + ) + self.coef_ = coef + class EnsembleOptimizer(BaseOptimizer): """Wrapper class for ensembling methods. @@ -249,12 +283,6 @@ class EnsembleOptimizer(BaseOptimizer): coef_full_ : array, shape (n_features,) or (n_targets, n_features) Weight vector(s) that are not subjected to the regularization. This is the w in the objective function. - - unbias : boolean - Whether to perform an extra step of unregularized linear regression - to unbias the coefficients for the identified support. - ``unbias`` is automatically set to False if a constraint is used and - is otherwise left uninitialized. """ def __init__( @@ -271,9 +299,8 @@ def __init__( if not hasattr(opt, "initial_guess"): opt.initial_guess = None - super(EnsembleOptimizer, self).__init__( + super().__init__( max_iter=opt.max_iter, - fit_intercept=opt.fit_intercept, initial_guess=opt.initial_guess, copy_X=opt.copy_X, ) @@ -281,19 +308,19 @@ def __init__( raise ValueError( "If not ensembling data or library terms, use another optimizer" ) - if n_subset is not None and n_subset <= 0: - raise ValueError("n_subset must be a positive integer if bagging") - if n_candidates_to_drop is not None and n_candidates_to_drop <= 0: + if bagging and n_subset is not None and n_subset < 1: + raise ValueError("n_subset must be a positive integer or None if bagging") + if library_ensemble and ( + n_candidates_to_drop is None or n_candidates_to_drop < 1 + ): raise ValueError( "n_candidates_to_drop must be a positive integer if ensembling library" ) - self.opt = opt - if n_models is None or n_models == 0: - warnings.warn( - "n_models must be a positive integer. Explicitly initialized to zero" - " or None, defaulting to 20." + if n_models < 1: + raise ValueError( + "n_candidates_to_drop must be a positive integer if ensembling library" ) - n_models = 20 + self.opt = opt self.n_models = n_models self.n_subset = n_subset self.bagging = bagging @@ -302,6 +329,7 @@ def __init__( self.replace = replace self.n_candidates_to_drop = n_candidates_to_drop self.coef_list = [] + self.unbias = False def _reduce(self, x: AxesArray, y: np.ndarray) -> None: x = AxesArray(np.asarray(x), {"ax_sample": 0, "ax_coord": 1}) @@ -316,7 +344,7 @@ def _reduce(self, x: AxesArray, y: np.ndarray) -> None: else: n_subset = self.n_subset - n_features = x.shape[x.ax_coord] + n_features = x.n_coord if self.library_ensemble and self.n_candidates_to_drop > n_features: warnings.warn( "n_candidates_to_drop larger than number of features. Cannot " diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py index eb20cf159..300bce897 100644 --- a/pysindy/optimizers/constrained_sr3.py +++ b/pysindy/optimizers/constrained_sr3.py @@ -300,8 +300,6 @@ def _update_coef_cvxpy(self, xi, cost, var_len, coef_prev, tol): <= self.constraint_rhs[: self.constraint_separation_index] ) prob = cp.Problem(cp.Minimize(cost), constraints) - else: - cp.Problem(cp.Minimize(cost)) prob_clone = deepcopy(prob) # default solver is SCS/OSQP here but switches to ECOS for L2 diff --git a/pysindy/optimizers/sr3.py b/pysindy/optimizers/sr3.py index 658ad7fa0..58db5ef5c 100644 --- a/pysindy/optimizers/sr3.py +++ b/pysindy/optimizers/sr3.py @@ -74,10 +74,6 @@ class SR3(BaseOptimizer): Initial guess for coefficients ``coef_``. If None, least-squares is used to obtain an initial guess. - fit_intercept : boolean, optional (default False) - Whether to calculate the intercept for this model. If set to false, no - intercept will be used in calculations. - normalize_columns : boolean, optional (default False) Normalize the columns of x (the SINDy library terms) before regression by dividing by the L2-norm. Note that the 'normalize' option in sklearn @@ -102,6 +98,10 @@ class SR3(BaseOptimizer): If True, prints out the different error terms every max_iter / 10 iterations. + unbias: bool (default False) + See base class for definition. Most options are incompatible + with unbiasing. + Attributes ---------- coef_ : array, shape (n_features,) or (n_targets, n_features) @@ -147,18 +147,18 @@ def __init__( trimming_fraction=0.0, trimming_step_size=1.0, max_iter=30, - fit_intercept=False, copy_X=True, initial_guess=None, normalize_columns=False, verbose=False, + unbias=False, ): super(SR3, self).__init__( max_iter=max_iter, initial_guess=initial_guess, - fit_intercept=fit_intercept, copy_X=copy_X, normalize_columns=normalize_columns, + unbias=unbias, ) if threshold < 0: @@ -169,6 +169,11 @@ def __init__( raise ValueError("tol must be positive") if (trimming_fraction < 0) or (trimming_fraction > 1): raise ValueError("trimming fraction must be between 0 and 1") + if trimming_fraction > 0 and unbias: + raise ValueError( + "Unbiasing counteracts some of the effects of trimming. Either set" + " unbias=False or trimming_fraction=0.0" + ) if thresholder.lower() not in ( "l0", "l1", @@ -337,8 +342,8 @@ def _reduce(self, x, y): coef_sparse = self.coef_.T n_samples, n_features = x.shape + coef_full = coef_sparse.copy() if self.use_trimming: - coef_full = coef_sparse.copy() trimming_array = np.repeat(1.0 - self.trimming_fraction, n_samples) self.history_trimming_ = [trimming_array] else: @@ -359,8 +364,7 @@ def _reduce(self, x, y): "Total Error: |y-Xw|^2 + |w-u|^2/v + R(u)", ] print( - "{: >10} ... {: >10} ... {: >10} ... {: >10}" - " ... {: >10}".format(*row) + "{: >10} ... {: >10} ... {: >10} ... {: >10} ... {: >10}".format(*row) ) objective_history = [] @@ -388,9 +392,7 @@ def _reduce(self, x, y): break else: warnings.warn( - "SR3._reduce did not converge after {} iterations.".format( - self.max_iter - ), + f"SR3 did not converge after {self.max_iter} iterations.", ConvergenceWarning, ) self.coef_ = coef_sparse.T From 2d141d90252813bf4fc1bab7fc6d6ac742ac2009 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Wed, 29 May 2024 18:52:51 -0700 Subject: [PATCH 06/11] CLN: specify nonsparse rs steps as "if local trapping" --- .gitattributes | 4 +++ pysindy/optimizers/trapping_sr3.py | 49 ++++++++++++++++-------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/.gitattributes b/.gitattributes index ac65e8e5a..c12963a6d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,7 @@ # Interpret Jupyter notebooks as Python *.ipynb linguist-language=Python + +*.ipynb diff=jupyternotebook + +*.ipynb merge=jupyternotebook diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py index 0e769aac8..f7dc9c365 100644 --- a/pysindy/optimizers/trapping_sr3.py +++ b/pysindy/optimizers/trapping_sr3.py @@ -606,6 +606,31 @@ def _update_coef_sparse_rs( return self._update_coef_cvxpy(xi, cost, var_len, coef_prev, self.eps_solver) + def _update_coef_nonsparse_rs( + self, n_tgts, n_features, Pmatrix, A, coef_prev, xTx, xTy + ): + pTp = np.dot(Pmatrix.T, Pmatrix) + H = xTx + pTp / self.eta + P_transpose_A = np.dot(Pmatrix.T, A.flatten()) + + if self.method == "local": + # notice reshaping PQ here requires fortran-ordering + PQ = np.tensordot(self.mod_matrix, self.PQ_, axes=([1], [0])) + PQ = np.reshape(PQ, (n_tgts * n_tgts * n_tgts, n_tgts * n_features), "F") + PQTPQ = np.dot(PQ.T, PQ) + PQ = np.reshape( + self.PQ_, (n_tgts, n_tgts, n_tgts, n_tgts * n_features), "F" + ) + PQ = np.tensordot(self.mod_matrix, PQ, axes=([1], [0])) + PQ_ep = PQ + np.transpose(PQ, [1, 2, 0, 3]) + np.transpose(PQ, [2, 0, 1, 3]) + PQ_ep = np.reshape( + PQ_ep, (n_tgts * n_tgts * n_tgts, n_tgts * n_features), "F" + ) + PQTPQ_ep = np.dot(PQ_ep.T, PQ_ep) + H += PQTPQ / self.alpha + PQTPQ_ep / self.beta + + return self._solve_nonsparse_relax_and_split(H, xTy, P_transpose_A, coef_prev) + def _solve_m_relax_and_split(self, r, N, m_prev, m, A, coef_sparse, tk_previous): """ If using the relaxation formulation of trapping SINDy, solves the @@ -762,28 +787,8 @@ def _reduce(self, x, y): # if threshold = 0, there is analytic expression # for the solve over the coefficients, # which is coded up here separately - pTp = np.dot(Pmatrix.T, Pmatrix) - # notice reshaping PQ here requires fortran-ordering - PQ = np.tensordot(self.mod_matrix, self.PQ_, axes=([1], [0])) - PQ = np.reshape( - PQ, (n_tgts * n_tgts * n_tgts, n_tgts * n_features), "F" - ) - PQTPQ = np.dot(PQ.T, PQ) - PQ = np.reshape( - self.PQ_, (n_tgts, n_tgts, n_tgts, n_tgts * n_features), "F" - ) - PQ = np.tensordot(self.mod_matrix, PQ, axes=([1], [0])) - PQ_ep = ( - PQ + np.transpose(PQ, [1, 2, 0, 3]) + np.transpose(PQ, [2, 0, 1, 3]) - ) - PQ_ep = np.reshape( - PQ_ep, (n_tgts * n_tgts * n_tgts, n_tgts * n_features), "F" - ) - PQTPQ_ep = np.dot(PQ_ep.T, PQ_ep) - H = xTx + pTp / self.eta + PQTPQ / self.alpha + PQTPQ_ep / self.beta - P_transpose_A = np.dot(Pmatrix.T, A.flatten()) - coef_sparse = self._solve_nonsparse_relax_and_split( - H, xTy, P_transpose_A, coef_prev + coef_sparse = self._update_coef_nonsparse_rs( + Pmatrix, A, coef_prev, xTx, xTy ) # If problem over xi becomes infeasible, break out of the loop From 02b5a8ad100c7648d2e4aa6789de5b069cd27db7 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Thu, 30 May 2024 11:49:40 -0700 Subject: [PATCH 07/11] CLN: Align a few bits and pieces to trapping-resolve --- pysindy/optimizers/trapping_sr3.py | 179 ++++++++++------------------- test/conftest.py | 1 - 2 files changed, 60 insertions(+), 120 deletions(-) diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py index f7dc9c365..4afb9a49e 100644 --- a/pysindy/optimizers/trapping_sr3.py +++ b/pysindy/optimizers/trapping_sr3.py @@ -71,127 +71,71 @@ class TrappingSR3(ConstrainedSR3): Parameters ---------- - threshold : float, optional (default 0.1) - Determines the strength of the regularization. When the - regularization function R is the L0 norm, the regularization - is equivalent to performing hard thresholding, and lambda - is chosen to threshold at the value given by this parameter. - This is equivalent to choosing lambda = threshold^2 / (2 * nu). - - eta : float, optional (default 1.0e20) - Determines the strength of the stability term ||Pw-A||^2 in the + eta : + Determines the strength of the stability term :math:`||Pw-A||^2` in the optimization. The default value is very large so that the algorithm default is to ignore the stability term. In this limit, this should be approximately equivalent to the ConstrainedSR3 method. - alpha_m : float, optional (default eta * 0.1) - Determines the step size in the prox-gradient descent over m. - For convergence, need alpha_m <= eta / ||w^T * PQ^T * PQ * w||. - Typically 0.01 * eta <= alpha_m <= 0.1 * eta. - - alpha_A : float, optional (default eta) - Determines the step size in the prox-gradient descent over A. - For convergence, need alpha_A <= eta, so typically - alpha_A = eta is used. + eps_solver : + If threshold != 0, this specifies the error tolerance in the + CVXPY (OSQP) solve. Default is 1.0e-3 in OSQP. - alpha : float, optional (default 1.0e20) - Determines the strength of the local stability term ||Qijk||^2 in the - optimization. The default value is very large so that the + alpha: + Determines the strength of the local stability term :math:`||Qijk||^2` + in the optimization. The default value (1e20) is very large so that the algorithm default is to ignore this term. - beta : float, optional (default 1.0e20) + beta: Determines the strength of the local stability term - ||Qijk + Qjik + Qkij||^2 in the + :math:`||Qijk + Qjik + Qkij||^2` in the optimization. The default value is very large so that the algorithm default is to ignore this term. - gamma : float, optional (default 0.1) + mod_matrix: + Lyapunov matrix. Trapping theorems apply to energy + :math:`\\propto \\dot y \\cdot y`, but also to any + :math:`\\propto \\dot y P \\cdot y` for Lyapunov matrix :math:`P`. + Defaults to the identity matrix. + + alpha_A : + Determines the step size in the prox-gradient descent over A. + For convergence, need alpha_A <= eta, so typically + alpha_A = eta is used. + + alpha_m : + Determines the step size in the prox-gradient descent over m. + For convergence, need alpha_m <= eta / ||w^T * PQ^T * PQ * w||. + Typically 0.01 * eta <= alpha_m <= 0.1 * eta. (default eta * 0.1) + + gamma : Determines the negative interval that matrix A is projected onto. For most applications gamma = 0.1 - 1.0 works pretty well. - tol : float, optional (default 1e-5) - Tolerance used for determining convergence of the optimization - algorithm over w. - - tol_m : float, optional (default 1e-5) + tol_m : Tolerance used for determining convergence of the optimization algorithm over m. - thresholder : string, optional (default 'L1') + thresholder : Regularization function to use. For current trapping SINDy, only the L1 and L2 norms are implemented. Note that other convex norms could be straightforwardly implemented, but L0 requires - reformulation because of nonconvexity. - - thresholds : np.ndarray, shape (n_targets, n_features), optional \ - (default None) - Array of thresholds for each library function coefficient. - Each row corresponds to a measurement variable and each column - to a function from the feature library. - Recall that SINDy seeks a matrix :math:`\\Xi` such that - :math:`\\dot{X} \\approx \\Theta(X)\\Xi`. - ``thresholds[i, j]`` should specify the threshold to be used for the - (j + 1, i + 1) entry of :math:`\\Xi`. That is to say it should give the - threshold to be used for the (j + 1)st library function in the equation - for the (i + 1)st measurement variable. - - eps_solver : float, optional (default 1.0e-7) - If threshold != 0, this specifies the error tolerance in the - CVXPY (OSQP) solve. Default is 1.0e-3 in OSQP. + reformulation because of nonconvexity. (default 'L1') - inequality_constraints : bool, optional (default False) - If True, CVXPY methods are used. - - max_iter : int, optional (default 30) - Maximum iterations of the optimization algorithm. - - accel : bool, optional (default False) + accel : Whether or not to use accelerated prox-gradient descent for (m, A). + (default False) - m0 : np.ndarray, shape (n_targets), optional (default None) - Initial guess for vector m in the optimization. Otherwise - each component of m is randomly initialized in [-1, 1]. - - A0 : np.ndarray, shape (n_targets, n_targets), optional (default None) - Initial guess for vector A in the optimization. Otherwise - A is initialized as A = diag(gamma). + m0 : + Initial guess for trap center in the optimization. Default None + initializes vector elements randomly in [-1, 1]. shape (n_targets) - fit_intercept : boolean, optional (default False) - Whether to calculate the intercept for this model. If set to false, no - intercept will be used in calculations. - - copy_X : boolean, optional (default True) - If True, X will be copied; else, it may be overwritten. - - normalize_columns : boolean, optional (default False) - Normalize the columns of x (the SINDy library terms) before regression - by dividing by the L2-norm. Note that the 'normalize' option in sklearn - is deprecated in sklearn versions >= 1.0 and will be removed. - - verbose : bool, optional (default False) - If True, prints out the different error terms every iteration. - - verbose_cvxpy : bool, optional (default False) - Boolean flag which is passed to CVXPY solve function to indicate if - output should be verbose or not. Only relevant for optimizers that - use the CVXPY package in some capabity. + A0 : + Initial guess for vector A in the optimization. Shape (n_targets, n_targets) + Default None, meaning A is initialized as A = diag(gamma). Attributes ---------- - coef_ : array, shape (n_features,) or (n_targets, n_features) - Regularized weight vector(s). This is the v in the objective - function. - - history_ : list - History of sparse coefficients. ``history_[k]`` contains the - sparse coefficients (v in the optimization objective function) - at iteration k. - - objective_history_ : list - History of the value of the objective at each step. Note that - the trapping SINDy problem is nonconvex, meaning that this value - may increase and decrease as the algorithm works. - A_history_ : list History of the auxiliary variable A that approximates diag(PW). @@ -224,6 +168,11 @@ class TrappingSR3(ConstrainedSR3): Transpose of 1st dimension and 2nd dimension of quadratic coefficient part of the P matrix in ||Pw - A||^2 + objective_history_ : list + History of the value of the objective at each step. Note that + the trapping SINDy problem is nonconvex, meaning that this value + may increase and decrease as the algorithm works. + Examples -------- >>> import numpy as np @@ -330,10 +279,6 @@ def __init__( self.gamma = gamma self.tol_m = tol_m self.accel = accel - self.unbias = False - self.use_constraints = (constraint_lhs is not None) and ( - constraint_rhs is not None - ) self.__post_init_guard() def __post_init_guard(self): @@ -383,9 +328,9 @@ def __post_init_guard(self): if self.tol <= 0 or self.tol_m <= 0 or self.eps_solver <= 0: raise ValueError("tol and tol_m must be positive") if self.inequality_constraints and self.threshold == 0.0: - raise ValueError( - "Ineq. constr. -> threshold!=0 + relax_optim=True or relax_optim=False." - ) + raise ValueError("Inequality constraints requires threshold!=0") + if self.mod_matrix is None: + self.mod_matrix = np.eye(self._n_tgts) def set_params(self, **kwargs): super().set_params(**kwargs) @@ -490,7 +435,7 @@ def _build_PQ(polyterms: list[tuple[int, Int1D]]) -> Float5D: def _set_Ptensors( self, n_targets: int - ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[Float3D, Float4D, Float4D, Float5D, Float5D, Float5D]: """Make the projection tensors used for the algorithm.""" lib = PolynomialLibrary(2, include_bias=self._include_bias).fit( np.zeros((1, n_targets)) @@ -502,7 +447,7 @@ def _set_Ptensors( PQ_tensor = self._build_PQ(polyterms) PT_tensor = PQ_tensor.transpose([1, 0, 2, 3, 4]) # PM is the sum of PQ and PQ which projects out the sum of Qijk and Qjik - PM_tensor = PQ_tensor + PT_tensor + PM_tensor = cast(Float5D, PQ_tensor + PT_tensor) return PC_tensor, PL_tensor_unsym, PL_tensor, PQ_tensor, PT_tensor, PM_tensor @@ -702,9 +647,6 @@ def _reduce(self, x, y): ) var_len = n_features * n_tgts - if self.mod_matrix is None: - self.mod_matrix = np.eye(n_tgts) - # Define PL, PQ, PT and PM tensors, only relevant if the stability term in # trapping SINDy is turned on. ( @@ -750,11 +692,11 @@ def _reduce(self, x, y): # initial guess for m if self.m0 is not None: - m = self.m0 + trap_ctr = self.m0 else: np.random.seed(1) - m = (np.random.rand(n_tgts) - np.ones(n_tgts)) * 2 - self.m_history_.append(m) + trap_ctr = (np.random.rand(n_tgts) - np.ones(n_tgts)) * 2 + self.m_history_.append(trap_ctr) # Precompute some objects for optimization x_expanded = np.zeros((n_samples, n_tgts, n_features, n_tgts)) @@ -766,14 +708,14 @@ def _reduce(self, x, y): # if using acceleration tk_prev = 1 - m_prev = m + m_prev = trap_ctr # Begin optimization loop objective_history = [] for k in range(self.max_iter): # update P tensor from the newest m - mPM = np.tensordot(self.PM_, m, axes=([2], [0])) + mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0])) p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0])) Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features) @@ -797,24 +739,24 @@ def _reduce(self, x, y): break # Now solve optimization for m and A - m_prev, m, A, tk_prev = self._solve_m_relax_and_split( - n_tgts, n_features, m_prev, m, A, coef_sparse, tk_prev + m_prev, trap_ctr, A, tk_prev = self._solve_m_relax_and_split( + n_tgts, n_features, m_prev, trap_ctr, A, coef_sparse, tk_prev ) # If problem over m becomes infeasible, break out of the loop - if m is None: - m = m_prev + if trap_ctr is None: + trap_ctr = m_prev break self.history_.append(coef_sparse.T) PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1])) # (m,A) update finished, append the result - self.m_history_.append(m) + self.m_history_.append(trap_ctr) self.A_history_.append(A) eigvals, eigvecs = np.linalg.eig(PW) self.PW_history_.append(PW) self.PWeigs_history_.append(np.sort(eigvals)) - mPM = np.tensordot(self.PM_, m, axes=([2], [0])) + mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0])) p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0])) self.p_history_.append(p) @@ -825,9 +767,8 @@ def _reduce(self, x, y): self._m_convergence_criterion() < self.tol_m and self._convergence_criterion() < self.tol ): - # Could not (further) select important features break - if k == self.max_iter - 1: + else: warnings.warn( "TrappingSR3._reduce did not converge after {} iters.".format( self.max_iter diff --git a/test/conftest.py b/test/conftest.py index 33e9e1b06..b63d344d0 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -70,7 +70,6 @@ def data_lorenz(): @pytest.fixture def data_multiple_trajectories(): - n_points = [100, 200, 500] initial_conditions = [ [8, 27, -7], From ecdd8008383ff15af386375159642213a021ee24 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Fri, 31 May 2024 17:57:08 -0700 Subject: [PATCH 08/11] fix: some function calls ALso align some comments and organization with trapping-resolve --- pysindy/optimizers/trapping_sr3.py | 40 +++++++++++++----------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py index 4afb9a49e..6a83697f5 100644 --- a/pysindy/optimizers/trapping_sr3.py +++ b/pysindy/optimizers/trapping_sr3.py @@ -79,7 +79,7 @@ class TrappingSR3(ConstrainedSR3): eps_solver : If threshold != 0, this specifies the error tolerance in the - CVXPY (OSQP) solve. Default is 1.0e-3 in OSQP. + CVXPY (OSQP) solve. Default 1.0e-7 alpha: Determines the strength of the local stability term :math:`||Qijk||^2` @@ -331,6 +331,8 @@ def __post_init_guard(self): raise ValueError("Inequality constraints requires threshold!=0") if self.mod_matrix is None: self.mod_matrix = np.eye(self._n_tgts) + if self.A0 is None: + self.A0 = np.diag(self.gamma * np.ones(self._n_tgts)) def set_params(self, **kwargs): super().set_params(**kwargs) @@ -534,7 +536,7 @@ def _objective(self, x, y, coef_sparse, A, PW, k): return R2 + stability_term + L1 + alpha_term + beta_term def _update_coef_sparse_rs( - self, r, N, var_len, x_expanded, y, Pmatrix, A, coef_prev + self, n_tgts, n_features, var_len, x_expanded, y, Pmatrix, A, coef_prev ): """Solve coefficient update with CVXPY if threshold != 0""" xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y) @@ -542,11 +544,15 @@ def _update_coef_sparse_rs( # new terms minimizing quadratic piece ||P^Q @ xi||_2^2 if self.method == "local": - Q = np.reshape(self.PQ_, (r * r * r, N * r), "F") + Q = np.reshape( + self.PQ_, (n_tgts * n_tgts * n_tgts, n_features * n_tgts), "F" + ) cost = cost + cp.sum_squares(Q @ xi) / self.alpha - Q = np.reshape(self.PQ_, (r, r, r, N * r), "F") + Q = np.reshape(self.PQ_, (n_tgts, n_tgts, n_tgts, n_features * n_tgts), "F") Q_ep = Q + np.transpose(Q, [1, 2, 0, 3]) + np.transpose(Q, [2, 0, 1, 3]) - Q_ep = np.reshape(Q_ep, (r * r * r, N * r), "F") + Q_ep = np.reshape( + Q_ep, (n_tgts * n_tgts * n_tgts, n_features * n_tgts), "F" + ) cost = cost + cp.sum_squares(Q_ep @ xi) / self.beta return self._update_coef_cvxpy(xi, cost, var_len, coef_prev, self.eps_solver) @@ -647,8 +653,6 @@ def _reduce(self, x, y): ) var_len = n_features * n_tgts - # Define PL, PQ, PT and PM tensors, only relevant if the stability term in - # trapping SINDy is turned on. ( self.PC_, self.PL_unsym_, @@ -681,13 +685,7 @@ def _reduce(self, x, y): " ... {: >8} ... {: >10} ... {: >8}".format(*row) ) - # initial A - if self.A0 is not None: - A = self.A0 - elif np.any(self.PM_ != 0.0): - A = np.diag(self.gamma * np.ones(n_tgts)) - else: - A = np.diag(np.zeros(n_tgts)) + A = self.A0 self.A_history_.append(A) # initial guess for m @@ -708,7 +706,7 @@ def _reduce(self, x, y): # if using acceleration tk_prev = 1 - m_prev = trap_ctr + trap_prev_ctr = trap_ctr # Begin optimization loop objective_history = [] @@ -726,11 +724,8 @@ def _reduce(self, x, y): n_tgts, n_features, var_len, x_expanded, y, Pmatrix, A, coef_prev ) else: - # if threshold = 0, there is analytic expression - # for the solve over the coefficients, - # which is coded up here separately coef_sparse = self._update_coef_nonsparse_rs( - Pmatrix, A, coef_prev, xTx, xTy + n_tgts, n_features, Pmatrix, A, coef_prev, xTx, xTy ) # If problem over xi becomes infeasible, break out of the loop @@ -738,14 +733,13 @@ def _reduce(self, x, y): coef_sparse = coef_prev break - # Now solve optimization for m and A - m_prev, trap_ctr, A, tk_prev = self._solve_m_relax_and_split( - n_tgts, n_features, m_prev, trap_ctr, A, coef_sparse, tk_prev + trap_prev_ctr, trap_ctr, A, tk_prev = self._solve_m_relax_and_split( + n_tgts, n_features, trap_prev_ctr, trap_ctr, A, coef_sparse, tk_prev ) # If problem over m becomes infeasible, break out of the loop if trap_ctr is None: - trap_ctr = m_prev + trap_ctr = trap_prev_ctr break self.history_.append(coef_sparse.T) PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1])) From 0254032593d693c4667d132f9baa389533ca5877 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Sat, 1 Jun 2024 22:23:36 -0700 Subject: [PATCH 09/11] Add option to make PMatrix using global stability --- pysindy/optimizers/trapping_sr3.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py index 6a83697f5..150339691 100644 --- a/pysindy/optimizers/trapping_sr3.py +++ b/pysindy/optimizers/trapping_sr3.py @@ -582,7 +582,7 @@ def _update_coef_nonsparse_rs( return self._solve_nonsparse_relax_and_split(H, xTy, P_transpose_A, coef_prev) - def _solve_m_relax_and_split(self, r, N, m_prev, m, A, coef_sparse, tk_previous): + def _solve_m_relax_and_split(self, m_prev, m, A, coef_sparse, tk_previous): """ If using the relaxation formulation of trapping SINDy, solves the (m, A) algorithm update. @@ -711,13 +711,16 @@ def _reduce(self, x, y): # Begin optimization loop objective_history = [] for k in range(self.max_iter): + # update P tensor from the newest trap center + if self.method == "global": + mPQ = np.tensordot(trap_ctr, self.PQ_, axes=([0], [0])) + p = self.PL_ - mPQ + Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features) + else: + mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0])) + p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0])) + Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features) - # update P tensor from the newest m - mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0])) - p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0])) - Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features) - - # update w coef_prev = coef_sparse if (self.threshold > 0.0) or self.inequality_constraints: coef_sparse = self._update_coef_sparse_rs( @@ -734,7 +737,7 @@ def _reduce(self, x, y): break trap_prev_ctr, trap_ctr, A, tk_prev = self._solve_m_relax_and_split( - n_tgts, n_features, trap_prev_ctr, trap_ctr, A, coef_sparse, tk_prev + trap_prev_ctr, trap_ctr, A, coef_sparse, tk_prev ) # If problem over m becomes infeasible, break out of the loop From 70fbb67a11f77ca6b9948c9b63d7d4193f73eda0 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Sat, 1 Jun 2024 22:59:06 -0700 Subject: [PATCH 10/11] update objective to align with trapping-resolve --- pysindy/optimizers/trapping_sr3.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py index 150339691..0333ee841 100644 --- a/pysindy/optimizers/trapping_sr3.py +++ b/pysindy/optimizers/trapping_sr3.py @@ -533,7 +533,10 @@ def _objective(self, x, y, coef_sparse, A, PW, k): "{0:5d} ... {1:8.3e} ... {2:8.3e} ... {3:8.2e}" " ... {4:8.2e} ... {5:8.2e} ... {6:8.2e}".format(*row) ) - return R2 + stability_term + L1 + alpha_term + beta_term + if self.method == "global": + return 0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1 + else: + return R2 + stability_term + L1 + alpha_term + beta_term def _update_coef_sparse_rs( self, n_tgts, n_features, var_len, x_expanded, y, Pmatrix, A, coef_prev From 44269653831dca11540a04205db48e4b062ae818 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> Date: Sun, 2 Jun 2024 16:12:06 -0700 Subject: [PATCH 11/11] cln: Split global/local math in _solve_m_relax_and_split Also: move m0 initialization to __init__, fix _objective calc --- pysindy/optimizers/trapping_sr3.py | 90 +++++++++++++++++++----------- 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py index 0333ee841..941aeb3e3 100644 --- a/pysindy/optimizers/trapping_sr3.py +++ b/pysindy/optimizers/trapping_sr3.py @@ -21,6 +21,7 @@ AnyFloat = np.dtype[np.floating[NBitBase]] Int1D = np.ndarray[tuple[int], np.dtype[np.int_]] +Float1D = np.ndarray[tuple[int], AnyFloat] Float2D = np.ndarray[tuple[int, int], AnyFloat] Float3D = np.ndarray[tuple[int, int, int], AnyFloat] Float4D = np.ndarray[tuple[int, int, int, int], AnyFloat] @@ -333,6 +334,9 @@ def __post_init_guard(self): self.mod_matrix = np.eye(self._n_tgts) if self.A0 is None: self.A0 = np.diag(self.gamma * np.ones(self._n_tgts)) + if self.m0 is None: + np.random.seed(1) + self.m0 = (np.random.rand(self._n_tgts) - np.ones(self._n_tgts)) * 2 def set_params(self, **kwargs): super().set_params(**kwargs) @@ -533,10 +537,10 @@ def _objective(self, x, y, coef_sparse, A, PW, k): "{0:5d} ... {1:8.3e} ... {2:8.3e} ... {3:8.2e}" " ... {4:8.2e} ... {5:8.2e} ... {6:8.2e}".format(*row) ) - if self.method == "global": - return 0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1 - else: - return R2 + stability_term + L1 + alpha_term + beta_term + obj = R2 + stability_term + L1 + if self.method == "local": + obj += alpha_term + beta_term + return obj def _update_coef_sparse_rs( self, n_tgts, n_features, var_len, x_expanded, y, Pmatrix, A, coef_prev @@ -585,35 +589,65 @@ def _update_coef_nonsparse_rs( return self._solve_nonsparse_relax_and_split(H, xTy, P_transpose_A, coef_prev) - def _solve_m_relax_and_split(self, m_prev, m, A, coef_sparse, tk_previous): - """ - If using the relaxation formulation of trapping SINDy, solves the - (m, A) algorithm update. + def _solve_m_relax_and_split( + self, + trap_ctr_prev: Float1D, + trap_ctr: Float1D, + A: Float2D, + coef_sparse: np.ndarray[tuple[NFeat, NTarget], AnyFloat], + tk_previous: float, + ) -> tuple[Float1D, Float2D, float]: + """Solves the (m, A) algorithm update. + + TODO: explain the optimization this solves, add good names to variables, + and refactor/indirect the if global/local trapping conditionals + + Returns the new trap center (m), the new A, and the new acceleration weight """ # prox-grad for (A, m) # Accelerated prox gradient descent + # Calculate projection matrix from Quad terms to As if self.accel: tk = (1 + np.sqrt(1 + 4 * tk_previous**2)) / 2.0 - m_partial = m + (tk_previous - 1.0) / tk * (m - m_prev) + m_partial = trap_ctr + (tk_previous - 1.0) / tk * (trap_ctr - trap_ctr_prev) tk_previous = tk - mPM = np.tensordot(self.PM_, m_partial, axes=([2], [0])) + if self.method == "global": + mPQ = np.tensordot(m_partial, self.PQ_, axes=([0], [0])) + else: + mPM = np.tensordot(self.PM_, m_partial, axes=([2], [0])) else: - mPM = np.tensordot(self.PM_, m, axes=([2], [0])) - p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0])) - PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1])) - PMW = np.tensordot(self.PM_, coef_sparse, axes=([4, 3], [0, 1])) - PMW = np.tensordot(self.mod_matrix, PMW, axes=([1], [0])) + if self.method == "global": + mPQ = np.tensordot(trap_ctr, self.PQ_, axes=([0], [0])) + else: + mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0])) + # Calculate As and its quad term components + if self.method == "global": + p = self.PL_ - mPQ + PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1])) + PQW = np.tensordot(self.PQ_, coef_sparse, axes=([4, 3], [0, 1])) + else: + p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0])) + PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1])) + PMW = np.tensordot(self.PM_, coef_sparse, axes=([4, 3], [0, 1])) + PMW = np.tensordot(self.mod_matrix, PMW, axes=([1], [0])) + # Calculate error in quadratic balance, and adjust trap center A_b = (A - PW) / self.eta - PMT_PW = np.tensordot(PMW, A_b, axes=([2, 1], [0, 1])) - if self.accel: - m_new = m_partial - self.alpha_m * PMT_PW + if self.method == "global": + PQWT_PW = np.tensordot(PQW, A_b, axes=([2, 1], [0, 1])) + if self.accel: + trap_new = m_partial - self.alpha_m * PQWT_PW + else: + trap_new = trap_ctr_prev - self.alpha_m * PQWT_PW else: - m_new = m_prev - self.alpha_m * PMT_PW - m_current = m_new + PMT_PW = np.tensordot(PMW, A_b, axes=([2, 1], [0, 1])) + if self.accel: + trap_new = m_partial - self.alpha_m * PMT_PW + else: + trap_new = trap_ctr_prev - self.alpha_m * PMT_PW # Update A A_new = self._update_A(A - self.alpha_A * A_b, PW) - return m_current, m_new, A_new, tk_previous + return trap_new, A_new, tk_previous def _solve_nonsparse_relax_and_split(self, H, xTy, P_transpose_A, coef_prev): """Update for the coefficients if threshold = 0.""" @@ -668,7 +702,7 @@ def _reduce(self, x, y): # Set initial coefficients if self.use_constraints and self.constraint_order.lower() == "target": self.constraint_lhs = reorder_constraints( - self.constraint_lhs, n_features, output_order="target" + self.constraint_lhs, n_features, output_order="feature" ) coef_sparse: np.ndarray[tuple[NFeat, NTarget], AnyFloat] = self.coef_.T @@ -690,13 +724,7 @@ def _reduce(self, x, y): A = self.A0 self.A_history_.append(A) - - # initial guess for m - if self.m0 is not None: - trap_ctr = self.m0 - else: - np.random.seed(1) - trap_ctr = (np.random.rand(n_tgts) - np.ones(n_tgts)) * 2 + trap_ctr = self.m0 self.m_history_.append(trap_ctr) # Precompute some objects for optimization @@ -723,6 +751,7 @@ def _reduce(self, x, y): mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0])) p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0])) Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features) + self.p_history_.append(p) coef_prev = coef_sparse if (self.threshold > 0.0) or self.inequality_constraints: @@ -756,9 +785,6 @@ def _reduce(self, x, y): eigvals, eigvecs = np.linalg.eig(PW) self.PW_history_.append(PW) self.PWeigs_history_.append(np.sort(eigvals)) - mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0])) - p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0])) - self.p_history_.append(p) # update objective objective_history.append(self._objective(x, y, coef_sparse, A, PW, k))