From b8faec0fd8717f9fa7d670bacc240f80836af76d Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Sun, 26 May 2024 10:28:38 -0700
Subject: [PATCH 01/11] feat (trapping): change initialization to match master

remove evolve_w, objective_history
---
 pysindy/optimizers/trapping_sr3.py | 77 ++++++++++++------------------
 1 file changed, 31 insertions(+), 46 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index aa2b0b1bb..5166e141c 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -68,9 +68,6 @@ class TrappingSR3(SR3):
 
     Parameters
     ----------
-    evolve_w : bool, optional (default True)
-        If false, don't update w and just minimize over (m, A)
-
     threshold : float, optional (default 0.1)
         Determines the strength of the regularization. When the
         regularization function R is the L0 norm, the regularization
@@ -246,7 +243,6 @@ class TrappingSR3(SR3):
 
     def __init__(
         self,
-        evolve_w=True,
         threshold=0.1,
         eps_solver=1e-7,
         inequality_constraints=False,
@@ -330,7 +326,6 @@ def __init__(
             raise ValueError("tol and tol_m must be positive")
 
         self.mod_matrix = mod_matrix
-        self.evolve_w = evolve_w
         self.eps_solver = eps_solver
         self.inequality_constraints = inequality_constraints
         self.m0 = m0
@@ -350,24 +345,17 @@ def __init__(
         self.PW_history_ = []
         self.PWeigs_history_ = []
         self.history_ = []
-        self.objective_history = objective_history
         self.unbias = False
         self.verbose_cvxpy = verbose_cvxpy
         self.use_constraints = (constraint_lhs is not None) and (
             constraint_rhs is not None
         )
-        if inequality_constraints:
-            if not evolve_w:
-                raise ValueError(
-                    "Use of inequality constraints requires solving for xi "
-                    " (evolve_w=True)."
-                )
-            if not self.use_constraints:
-                raise ValueError(
-                    "Use of inequality constraints requires constraint_rhs "
-                    "and constraint_lhs "
-                    "variables to be passed to the Optimizer class."
-                )
+        if inequality_constraints and not self.use_constraints:
+            raise ValueError(
+                "Use of inequality constraints requires constraint_rhs "
+                "and constraint_lhs "
+                "variables to be passed to the Optimizer class."
+            )
 
         if self.use_constraints:
             if constraint_order not in ("feature", "target"):
@@ -788,34 +776,31 @@ def _reduce(self, x, y):
 
             # update w
             coef_prev = coef_sparse
-            if self.evolve_w:
-                if (self.threshold > 0.0) or self.inequality_constraints:
-                    coef_sparse = self._solve_sparse_relax_and_split(
-                        r, n_features, x_expanded, y, Pmatrix, A, coef_prev
-                    )
-                else:
-                    # if threshold = 0, there is analytic expression
-                    # for the solve over the coefficients,
-                    # which is coded up here separately
-                    pTp = np.dot(Pmatrix.T, Pmatrix)
-                    # notice reshaping PQ here requires fortran-ordering
-                    PQ = np.tensordot(self.mod_matrix, self.PQ_, axes=([1], [0]))
-                    PQ = np.reshape(PQ, (r * r * r, r * n_features), "F")
-                    PQTPQ = np.dot(PQ.T, PQ)
-                    PQ = np.reshape(self.PQ_, (r, r, r, r * n_features), "F")
-                    PQ = np.tensordot(self.mod_matrix, PQ, axes=([1], [0]))
-                    PQ_ep = (
-                        PQ
-                        + np.transpose(PQ, [1, 2, 0, 3])
-                        + np.transpose(PQ, [2, 0, 1, 3])
-                    )
-                    PQ_ep = np.reshape(PQ_ep, (r * r * r, r * n_features), "F")
-                    PQTPQ_ep = np.dot(PQ_ep.T, PQ_ep)
-                    H = xTx + pTp / self.eta + PQTPQ / self.alpha + PQTPQ_ep / self.beta
-                    P_transpose_A = np.dot(Pmatrix.T, A.flatten())
-                    coef_sparse = self._solve_nonsparse_relax_and_split(
-                        H, xTy, P_transpose_A, coef_prev
-                    )
+            if (self.threshold > 0.0) or self.inequality_constraints:
+                coef_sparse = self._solve_sparse_relax_and_split(
+                    r, n_features, x_expanded, y, Pmatrix, A, coef_prev
+                )
+            else:
+                # if threshold = 0, there is analytic expression
+                # for the solve over the coefficients,
+                # which is coded up here separately
+                pTp = np.dot(Pmatrix.T, Pmatrix)
+                # notice reshaping PQ here requires fortran-ordering
+                PQ = np.tensordot(self.mod_matrix, self.PQ_, axes=([1], [0]))
+                PQ = np.reshape(PQ, (r * r * r, r * n_features), "F")
+                PQTPQ = np.dot(PQ.T, PQ)
+                PQ = np.reshape(self.PQ_, (r, r, r, r * n_features), "F")
+                PQ = np.tensordot(self.mod_matrix, PQ, axes=([1], [0]))
+                PQ_ep = (
+                    PQ + np.transpose(PQ, [1, 2, 0, 3]) + np.transpose(PQ, [2, 0, 1, 3])
+                )
+                PQ_ep = np.reshape(PQ_ep, (r * r * r, r * n_features), "F")
+                PQTPQ_ep = np.dot(PQ_ep.T, PQ_ep)
+                H = xTx + pTp / self.eta + PQTPQ / self.alpha + PQTPQ_ep / self.beta
+                P_transpose_A = np.dot(Pmatrix.T, A.flatten())
+                coef_sparse = self._solve_nonsparse_relax_and_split(
+                    H, xTy, P_transpose_A, coef_prev
+                )
 
             # If problem over xi becomes infeasible, break out of the loop
             if coef_sparse is None:

From c69e247651decfddbddcfe3d8d1ba8deb51e8d2b Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Sun, 26 May 2024 12:19:53 -0700
Subject: [PATCH 02/11] cln: align simple initialization and admin work with
 master

* move underscore attributes to _reduce (their presence is used by
    scikit-learn's check_fitted())
* add explicit _n_tgts, _include_bias, and _interaction_only to init
* subclass from ConstrainedSR3
* Add "method" kwarg to control global/local (this will hopefully be removed
    in the future)
---
 pysindy/optimizers/trapping_sr3.py | 212 ++++++++++++++++-------------
 1 file changed, 116 insertions(+), 96 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 5166e141c..4e83dbfdf 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -5,6 +5,8 @@
 from math import comb
 from typing import cast
 from typing import NewType
+from typing import Optional
+from typing import Union
 
 import cvxpy as cp
 import numpy as np
@@ -14,7 +16,7 @@
 
 from ..feature_library.polynomial_library import PolynomialLibrary
 from ..utils import reorder_constraints
-from .sr3 import SR3
+from .constrained_sr3 import ConstrainedSR3
 
 AnyFloat = np.dtype[np.floating[NBitBase]]
 Int1D = np.ndarray[tuple[int], np.dtype[np.int_]]
@@ -27,7 +29,7 @@
 NTarget = NewType("NTarget", int)
 
 
-class TrappingSR3(SR3):
+class TrappingSR3(ConstrainedSR3):
     """
     Generalized trapping variant of sparse relaxed regularized regression.
     This optimizer can be used to identify quadratically nonlinear systems with
@@ -243,130 +245,148 @@ class TrappingSR3(SR3):
 
     def __init__(
         self,
-        threshold=0.1,
-        eps_solver=1e-7,
-        inequality_constraints=False,
-        eta=None,
-        alpha=None,
-        beta=None,
-        mod_matrix=None,
-        alpha_A=None,
-        alpha_m=None,
-        gamma=-0.1,
-        tol=1e-5,
-        tol_m=1e-5,
-        thresholder="l1",
-        thresholds=None,
-        max_iter=30,
-        accel=False,
-        normalize_columns=False,
-        fit_intercept=False,
-        copy_X=True,
-        m0=None,
-        A0=None,
-        objective_history=None,
-        constraint_lhs=None,
-        constraint_rhs=None,
-        constraint_order="target",
-        verbose=False,
-        verbose_cvxpy=False,
+        *,
+        _n_tgts: int = None,
+        _include_bias: bool = False,
+        _interaction_only: bool = False,
+        method: str = "global",
+        eta: Union[float, None] = None,
+        eps_solver: float = 1e-7,
+        alpha: Optional[float] = None,
+        beta: Optional[float] = None,
+        mod_matrix: Optional[NDArray] = None,
+        alpha_A: Union[float, None] = None,
+        alpha_m: Union[float, None] = None,
+        gamma: float = -0.1,
+        tol_m: float = 1e-5,
+        thresholder: str = "l1",
+        accel: bool = False,
+        m0: Union[NDArray, None] = None,
+        A0: Union[NDArray, None] = None,
+        **kwargs,
     ):
-        super(TrappingSR3, self).__init__(
-            threshold=threshold,
-            max_iter=max_iter,
-            normalize_columns=normalize_columns,
-            fit_intercept=fit_intercept,
-            copy_X=copy_X,
-            thresholder=thresholder,
-            thresholds=thresholds,
-            verbose=verbose,
+        # n_tgts, constraints, etc are data-dependent parameters and belong in
+        # _reduce/fit ().  The following is a hack until we refactor how
+        # constraints are applied in ConstrainedSR3 and MIOSR
+        self._include_bias = _include_bias
+        self._interaction_only = _interaction_only
+        self._n_tgts = _n_tgts
+        if _n_tgts is None:
+            warnings.warn(
+                "Trapping Optimizer initialized without _n_tgts.  It will likely"
+                " be unable to fit data"
+            )
+            _n_tgts = 1
+        if method == "global":
+            if hasattr(kwargs, "constraint_separation_index"):
+                constraint_separation_index = kwargs["constraint_separation_index"]
+            elif kwargs.get("inequality_constraints", False):
+                constraint_separation_index = kwargs["constraint_lhs"].shape[0]
+            else:
+                constraint_separation_index = 0
+            constraint_rhs, constraint_lhs = _make_constraints(
+                _n_tgts, include_bias=_include_bias
+            )
+            constraint_order = kwargs.pop("constraint_order", "feature")
+            if constraint_order == "target":
+                constraint_lhs = np.transpose(constraint_lhs, [0, 2, 1])
+            constraint_lhs = np.reshape(constraint_lhs, (constraint_lhs.shape[0], -1))
+            try:
+                constraint_lhs = np.concatenate(
+                    (kwargs.pop("constraint_lhs"), constraint_lhs), 0
+                )
+                constraint_rhs = np.concatenate(
+                    (kwargs.pop("constraint_rhs"), constraint_rhs), 0
+                )
+            except KeyError:
+                pass
+
+            super().__init__(
+                constraint_lhs=constraint_lhs,
+                constraint_rhs=constraint_rhs,
+                constraint_separation_index=constraint_separation_index,
+                constraint_order=constraint_order,
+                equality_constraints=True,
+                thresholder=thresholder,
+                **kwargs,
+            )
+        elif method == "local":
+            super().__init__(thresholder=thresholder, **kwargs)
+        else:
+            raise ValueError(f"Can either use 'global' or 'local' method, not {method}")
+
+        self.mod_matrix = mod_matrix
+        self.eps_solver = eps_solver
+        self.m0 = m0
+        self.A0 = A0
+        self.alpha_A = alpha_A
+        self.alpha_m = alpha_m
+        self.eta = eta
+        self.alpha = alpha
+        self.beta = beta
+        self.gamma = gamma
+        self.tol_m = tol_m
+        self.accel = accel
+        self.unbias = False
+        self.use_constraints = (constraint_lhs is not None) and (
+            constraint_rhs is not None
         )
-        if thresholder.lower() not in ("l1", "l2", "weighted_l1", "weighted_l2"):
+        self.__post_init_guard()
+
+    def __post_init_guard(self):
+        """Conduct initialization post-init, as required by scikitlearn API"""
+        if self.thresholder.lower() not in ("l1", "l2", "weighted_l1", "weighted_l2"):
             raise ValueError("Regularizer must be (weighted) L1 or L2")
-        if eta is None:
+        if self.eta is None:
             warnings.warn(
                 "eta was not set, so defaulting to eta = 1e20 "
                 "with alpha_m = 1e-2 * eta, alpha_A = eta. Here eta is so "
                 "large that the stability term in the optimization "
                 "will be ignored."
             )
-            eta = 1e20
-            alpha_m = 1e18
-            alpha_A = 1e20
+            self.eta = 1e20
+            self.alpha_m = 1e18
+            self.alpha_A = 1e20
         else:
-            if alpha_m is None:
-                alpha_m = eta * 1e-2
-            if alpha_A is None:
-                alpha_A = eta
-        if eta <= 0:
+            if self.alpha_m is None:
+                self.alpha_m = self.eta * 1e-2
+            if self.alpha_A is None:
+                self.alpha_A = self.eta
+        if self.eta <= 0:
             raise ValueError("eta must be positive")
-        if alpha is None:
-            alpha = 1e20
+        if self.alpha is None:
+            self.alpha = 1e20
             warnings.warn(
                 "alpha was not set, so defaulting to alpha = 1e20 "
                 "which is so"
                 "large that the ||Qijk|| term in the optimization "
                 "will be essentially ignored."
             )
-        if beta is None:
-            beta = 1e20
+        if self.beta is None:
+            self.beta = 1e20
             warnings.warn(
                 "beta was not set, so defaulting to beta = 1e20 "
                 "which is so"
                 "large that the ||Qijk + Qjik + Qkij|| "
                 "term in the optimization will be essentially ignored."
             )
-        if alpha_m < 0 or alpha_m > eta:
+
+        if self.alpha_m < 0 or self.alpha_m > self.eta:
             raise ValueError("0 <= alpha_m <= eta")
-        if alpha_A < 0 or alpha_A > eta:
+        if self.alpha_A < 0 or self.alpha_A > self.eta:
             raise ValueError("0 <= alpha_A <= eta")
-        if gamma >= 0:
+        if self.gamma >= 0:
             raise ValueError("gamma must be negative")
-        if tol <= 0 or tol_m <= 0 or eps_solver <= 0:
+        if self.tol <= 0 or self.tol_m <= 0 or self.eps_solver <= 0:
             raise ValueError("tol and tol_m must be positive")
-
-        self.mod_matrix = mod_matrix
-        self.eps_solver = eps_solver
-        self.inequality_constraints = inequality_constraints
-        self.m0 = m0
-        self.A0 = A0
-        self.alpha_A = alpha_A
-        self.alpha_m = alpha_m
-        self.eta = eta
-        self.alpha = alpha
-        self.beta = beta
-        self.gamma = gamma
-        self.tol = tol
-        self.tol_m = tol_m
-        self.accel = accel
-        self.A_history_ = []
-        self.m_history_ = []
-        self.p_history_ = []
-        self.PW_history_ = []
-        self.PWeigs_history_ = []
-        self.history_ = []
-        self.unbias = False
-        self.verbose_cvxpy = verbose_cvxpy
-        self.use_constraints = (constraint_lhs is not None) and (
-            constraint_rhs is not None
-        )
-        if inequality_constraints and not self.use_constraints:
+        if self.inequality_constraints and self.threshold == 0.0:
             raise ValueError(
-                "Use of inequality constraints requires constraint_rhs "
-                "and constraint_lhs "
-                "variables to be passed to the Optimizer class."
+                "Ineq. constr. -> threshold!=0 + relax_optim=True or relax_optim=False."
             )
 
-        if self.use_constraints:
-            if constraint_order not in ("feature", "target"):
-                raise ValueError(
-                    "constraint_order must be either 'feature' or 'target'"
-                )
-
-            self.constraint_lhs = constraint_lhs
-            self.constraint_rhs = constraint_rhs
-            self.unbias = False
-            self.constraint_order = constraint_order
+    def set_params(self, **kwargs):
+        super().set_params(**kwargs)
+        self.__post_init_guard()
 
     @staticmethod
     def _build_PC(polyterms: list[tuple[int, Int1D]]) -> Float3D:

From afa25dbf8e7841289fb151b0cff71925db850396 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Sun, 26 May 2024 12:51:49 -0700
Subject: [PATCH 03/11] cln: offload some of _solve_sparse_relax_and_split() to
 ConstrainedSR3

This pulls in the changes to ConstrainedSR3 from trapping_resolve
---
 pysindy/optimizers/constrained_sr3.py | 149 +++++++++++++++-----------
 pysindy/optimizers/trapping_sr3.py    | 138 +++++++++---------------
 2 files changed, 139 insertions(+), 148 deletions(-)

diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py
index f00445b9a..eb20cf159 100644
--- a/pysindy/optimizers/constrained_sr3.py
+++ b/pysindy/optimizers/constrained_sr3.py
@@ -1,4 +1,7 @@
 import warnings
+from copy import deepcopy
+from typing import Optional
+from typing import Tuple
 
 try:
     import cvxpy as cp
@@ -39,6 +42,9 @@ class ConstrainedSR3(SR3):
         to learn parsimonious physics-informed models from data."
         IEEE Access 8 (2020): 169259-169271.
 
+        Zheng, Peng, et al. "A unified framework for sparse relaxed
+        regularized regression: Sr3." IEEE Access 7 (2018): 1404-1423.
+
     Parameters
     ----------
     threshold : float, optional (default 0.1)
@@ -66,14 +72,10 @@ class ConstrainedSR3(SR3):
     max_iter : int, optional (default 30)
         Maximum iterations of the optimization algorithm.
 
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     constraint_lhs : numpy ndarray, optional (default None)
         Shape should be (n_constraints, n_features * n_targets),
-        The left hand side matrix C of Cw <= d.
-        There should be one row per constraint.
+        The left hand side matrix C of Cw <= d (Or Cw = d for equality
+        constraints). There should be one row per constraint.
 
     constraint_rhs : numpy ndarray, shape (n_constraints,), optional (default None)
         The right hand side vector d of Cw <= d.
@@ -97,9 +99,6 @@ class ConstrainedSR3(SR3):
         is deprecated in sklearn versions >= 1.0 and will be removed. Note that
         this parameter is incompatible with the constraints!
 
-    copy_X : boolean, optional (default True)
-        If True, X will be copied; else, it may be overwritten.
-
     initial_guess : np.ndarray, optional (default None)
         Shape should be (n_features) or (n_targets, n_features).
         Initial guess for coefficients ``coef_``, (v in the mathematical equations)
@@ -128,6 +127,10 @@ class ConstrainedSR3(SR3):
         output should be verbose or not. Only relevant for optimizers that
         use the CVXPY package in some capabity.
 
+    unbias: bool (default False)
+        See base class for definition.  Most options are incompatible
+        with unbiasing.
+
     Attributes
     ----------
     coef_ : array, shape (n_features,) or (n_targets, n_features)
@@ -138,11 +141,15 @@ class ConstrainedSR3(SR3):
         Weight vector(s) that are not subjected to the regularization.
         This is the w in the objective function.
 
-    unbias : boolean
-        Whether to perform an extra step of unregularized linear regression
-        to unbias the coefficients for the identified support.
-        ``unbias`` is automatically set to False if a constraint is used and
-        is otherwise left uninitialized.
+    history_ : list
+        History of sparse coefficients. ``history_[k]`` contains the
+        sparse coefficients (v in the optimization objective function)
+        at iteration k.
+
+    objective_history_ : list
+        History of the value of the objective at each step. Note that
+        the trapping SINDy problem is nonconvex, meaning that this value
+        may increase and decrease as the algorithm works.
     """
 
     def __init__(
@@ -158,17 +165,17 @@ def __init__(
         constraint_rhs=None,
         constraint_order="target",
         normalize_columns=False,
-        fit_intercept=False,
         copy_X=True,
         initial_guess=None,
         thresholds=None,
         equality_constraints=False,
         inequality_constraints=False,
-        constraint_separation_index=0,
+        constraint_separation_index: Optional[bool] = None,
         verbose=False,
         verbose_cvxpy=False,
+        unbias=False,
     ):
-        super(ConstrainedSR3, self).__init__(
+        super().__init__(
             threshold=threshold,
             nu=nu,
             tol=tol,
@@ -178,10 +185,10 @@ def __init__(
             trimming_step_size=trimming_step_size,
             max_iter=max_iter,
             initial_guess=initial_guess,
-            fit_intercept=fit_intercept,
             copy_X=copy_X,
             normalize_columns=normalize_columns,
             verbose=verbose,
+            unbias=unbias,
         )
 
         self.verbose_cvxpy = verbose_cvxpy
@@ -189,7 +196,7 @@ def __init__(
         self.constraint_lhs = constraint_lhs
         self.constraint_rhs = constraint_rhs
         self.constraint_order = constraint_order
-        self.use_constraints = (constraint_lhs is not None) and (
+        self.use_constraints = (constraint_lhs is not None) or (
             constraint_rhs is not None
         )
 
@@ -203,15 +210,18 @@ def __init__(
                 " but user did not specify if the constraints were equality or"
                 " inequality constraints. Assuming equality constraints."
             )
-            self.equality_constraints = True
+            equality_constraints = True
 
         if self.use_constraints:
             if constraint_order not in ("feature", "target"):
                 raise ValueError(
                     "constraint_order must be either 'feature' or 'target'"
                 )
-
-            self.unbias = False
+            if unbias:
+                raise ValueError(
+                    "Constraints are incompatible with an unbiasing step.  Set"
+                    " unbias=False"
+                )
 
         if inequality_constraints and not cvxpy_flag:
             raise ValueError(
@@ -235,6 +245,16 @@ def __init__(
             )
         self.inequality_constraints = inequality_constraints
         self.equality_constraints = equality_constraints
+        if self.use_constraints and constraint_separation_index is None:
+            if self.inequality_constraints and not self.equality_constraints:
+                constraint_separation_index = len(constraint_lhs)
+            elif self.equality_constraints and not self.inequality_constraints:
+                constraint_separation_index = 0
+            else:
+                raise ValueError(
+                    "If passing both inequality and equality constraints, must specify"
+                    " constraint_separation_index."
+                )
         self.constraint_separation_index = constraint_separation_index
 
     def _update_full_coef_constraints(self, H, x_transpose_y, coef_sparse):
@@ -251,62 +271,66 @@ def _update_full_coef_constraints(self, H, x_transpose_y, coef_sparse):
         rhs = rhs.reshape(g.shape)
         return inv1.dot(rhs)
 
-    def _update_coef_cvxpy(self, x, y, coef_sparse):
-        xi = cp.Variable(coef_sparse.shape[0] * coef_sparse.shape[1])
-        cost = cp.sum_squares(x @ xi - y.flatten())
+    def _create_var_and_part_cost(
+        self, var_len: int, x_expanded: np.ndarray, y: np.ndarray
+    ) -> Tuple[cp.Variable, cp.Expression]:
+        xi = cp.Variable(var_len)
+        cost = cp.sum_squares(x_expanded @ xi - y.flatten())
         if self.thresholder.lower() == "l1":
             cost = cost + self.threshold * cp.norm1(xi)
         elif self.thresholder.lower() == "weighted_l1":
             cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi)
         elif self.thresholder.lower() == "l2":
-            cost = cost + self.threshold * cp.norm2(xi)
+            cost = cost + self.threshold * cp.norm2(xi) ** 2
         elif self.thresholder.lower() == "weighted_l2":
-            cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi)
+            cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2
+        return xi, cost
+
+    def _update_coef_cvxpy(self, xi, cost, var_len, coef_prev, tol):
         if self.use_constraints:
-            if self.inequality_constraints and self.equality_constraints:
-                # Process inequality constraints then equality constraints
-                prob = cp.Problem(
-                    cp.Minimize(cost),
-                    [
-                        self.constraint_lhs[: self.constraint_separation_index, :] @ xi
-                        <= self.constraint_rhs[: self.constraint_separation_index],
-                        self.constraint_lhs[self.constraint_separation_index :, :] @ xi
-                        == self.constraint_rhs[self.constraint_separation_index :],
-                    ],
-                )
-            elif self.inequality_constraints:
-                prob = cp.Problem(
-                    cp.Minimize(cost),
-                    [self.constraint_lhs @ xi <= self.constraint_rhs],
+            constraints = []
+            if self.equality_constraints:
+                constraints.append(
+                    self.constraint_lhs[self.constraint_separation_index :, :] @ xi
+                    == self.constraint_rhs[self.constraint_separation_index :],
                 )
-            else:
-                prob = cp.Problem(
-                    cp.Minimize(cost),
-                    [self.constraint_lhs @ xi == self.constraint_rhs],
+            if self.inequality_constraints:
+                constraints.append(
+                    self.constraint_lhs[: self.constraint_separation_index, :] @ xi
+                    <= self.constraint_rhs[: self.constraint_separation_index]
                 )
+            prob = cp.Problem(cp.Minimize(cost), constraints)
         else:
-            prob = cp.Problem(cp.Minimize(cost))
+            cp.Problem(cp.Minimize(cost))
 
-        # default solver is OSQP here but switches to ECOS for L2
+        prob_clone = deepcopy(prob)
+        # default solver is SCS/OSQP here but switches to ECOS for L2
         try:
             prob.solve(
                 max_iter=self.max_iter,
-                eps_abs=self.tol,
-                eps_rel=self.tol,
+                eps_abs=tol,
+                eps_rel=tol,
                 verbose=self.verbose_cvxpy,
             )
         # Annoying error coming from L2 norm switching to use the ECOS
         # solver, which uses "max_iters" instead of "max_iter", and
         # similar semantic changes for the other variables.
-        except TypeError:
+        except (TypeError, ValueError):
             try:
-                prob.solve(abstol=self.tol, reltol=self.tol, verbose=self.verbose_cvxpy)
+                prob = prob_clone
+                prob.solve(max_iters=self.max_iter, verbose=self.verbose_cvxpy)
+                xi = prob.variables()[0]
             except cp.error.SolverError:
-                print("Solver failed, setting coefs to zeros")
-                xi.value = np.zeros(coef_sparse.shape[0] * coef_sparse.shape[1])
+                warnings.warn("Solver failed, setting coefs to zeros")
+                xi.value = np.zeros(var_len)
         except cp.error.SolverError:
-            print("Solver failed, setting coefs to zeros")
-            xi.value = np.zeros(coef_sparse.shape[0] * coef_sparse.shape[1])
+            try:
+                prob = prob_clone
+                prob.solve(max_iter=self.max_iter, verbose=self.verbose_cvxpy)
+                xi = prob.variables()[0]
+            except cp.error.SolverError:
+                warnings.warn("Solver failed, setting coefs to zeros")
+                xi.value = np.zeros(var_len)
 
         if xi.value is None:
             warnings.warn(
@@ -315,7 +339,7 @@ def _update_coef_cvxpy(self, x, y, coef_sparse):
                 ConvergenceWarning,
             )
             return None
-        coef_new = (xi.value).reshape(coef_sparse.shape)
+        coef_new = (xi.value).reshape(coef_prev.shape)
         return coef_new
 
     def _update_sparse_coef(self, coef_full):
@@ -422,7 +446,11 @@ def _reduce(self, x, y):
 
         objective_history = []
         if self.inequality_constraints:
-            coef_sparse = self._update_coef_cvxpy(x_expanded, y, coef_sparse)
+            var_len = coef_sparse.shape[0] * coef_sparse.shape[1]
+            xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y)
+            coef_sparse = self._update_coef_cvxpy(
+                xi, cost, var_len, coef_sparse, self.tol
+            )
             objective_history.append(self._objective(x, y, 0, coef_full, coef_sparse))
         else:
             for k in range(self.max_iter):
@@ -461,9 +489,8 @@ def _reduce(self, x, y):
                     break
             else:
                 warnings.warn(
-                    "SR3._reduce did not converge after {} iterations.".format(
-                        self.max_iter
-                    ),
+                    f"ConstrainedSR3 did not converge after {self.max_iter}"
+                    " iterations.",
                     ConvergenceWarning,
                 )
         if self.use_constraints and self.constraint_order.lower() == "target":
diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 4e83dbfdf..0e769aac8 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -14,6 +14,7 @@
 from numpy.typing import NDArray
 from sklearn.exceptions import ConvergenceWarning
 
+from ..feature_library.polynomial_library import n_poly_features
 from ..feature_library.polynomial_library import PolynomialLibrary
 from ..utils import reorder_constraints
 from .constrained_sr3 import ConstrainedSR3
@@ -310,8 +311,10 @@ def __init__(
                 thresholder=thresholder,
                 **kwargs,
             )
+            self.method = "global"
         elif method == "local":
             super().__init__(thresholder=thresholder, **kwargs)
+            self.method = "local"
         else:
             raise ValueError(f"Can either use 'global' or 'local' method, not {method}")
 
@@ -585,75 +588,23 @@ def _objective(self, x, y, coef_sparse, A, PW, k):
                 )
         return R2 + stability_term + L1 + alpha_term + beta_term
 
-    def _solve_sparse_relax_and_split(self, r, N, x_expanded, y, Pmatrix, A, coef_prev):
+    def _update_coef_sparse_rs(
+        self, r, N, var_len, x_expanded, y, Pmatrix, A, coef_prev
+    ):
         """Solve coefficient update with CVXPY if threshold != 0"""
-        xi = cp.Variable(N * r)
-        cost = cp.sum_squares(x_expanded @ xi - y.flatten())
-        if self.thresholder.lower() == "l1":
-            cost = cost + self.threshold * cp.norm1(xi)
-        elif self.thresholder.lower() == "weighted_l1":
-            cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi)
-        elif self.thresholder.lower() == "l2":
-            cost = cost + self.threshold * cp.norm2(xi) ** 2
-        elif self.thresholder.lower() == "weighted_l2":
-            cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2
+        xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y)
         cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
 
         # new terms minimizing quadratic piece ||P^Q @ xi||_2^2
-        Q = np.reshape(self.PQ_, (r * r * r, N * r), "F")
-        cost = cost + cp.sum_squares(Q @ xi) / self.alpha
-        Q = np.reshape(self.PQ_, (r, r, r, N * r), "F")
-        Q_ep = Q + np.transpose(Q, [1, 2, 0, 3]) + np.transpose(Q, [2, 0, 1, 3])
-        Q_ep = np.reshape(Q_ep, (r * r * r, N * r), "F")
-        cost = cost + cp.sum_squares(Q_ep @ xi) / self.beta
-
-        # Constraints
-        if self.use_constraints:
-            if self.inequality_constraints:
-                prob = cp.Problem(
-                    cp.Minimize(cost),
-                    [self.constraint_lhs @ xi <= self.constraint_rhs],
-                )
-            else:
-                prob = cp.Problem(
-                    cp.Minimize(cost),
-                    [self.constraint_lhs @ xi == self.constraint_rhs],
-                )
-        else:
-            prob = cp.Problem(cp.Minimize(cost))
-
-        # default solver is OSQP here but switches to ECOS for L2
-        try:
-            prob.solve(
-                eps_abs=self.eps_solver,
-                eps_rel=self.eps_solver,
-                verbose=self.verbose_cvxpy,
-            )
-        # Annoying error coming from L2 norm switching to use the ECOS
-        # solver, which uses "max_iters" instead of "max_iter", and
-        # similar semantic changes for the other variables.
-        except TypeError:
-            try:
-                prob.solve(
-                    abstol=self.eps_solver,
-                    reltol=self.eps_solver,
-                    verbose=self.verbose_cvxpy,
-                )
-            except cp.error.SolverError:
-                print("Solver failed, setting coefs to zeros")
-                xi.value = np.zeros(N * r)
-        except cp.error.SolverError:
-            print("Solver failed, setting coefs to zeros")
-            xi.value = np.zeros(N * r)
-
-        if xi.value is None:
-            warnings.warn(
-                "Infeasible solve, increase/decrease eta",
-                ConvergenceWarning,
-            )
-            return None
-        coef_sparse = (xi.value).reshape(coef_prev.shape)
-        return coef_sparse
+        if self.method == "local":
+            Q = np.reshape(self.PQ_, (r * r * r, N * r), "F")
+            cost = cost + cp.sum_squares(Q @ xi) / self.alpha
+            Q = np.reshape(self.PQ_, (r, r, r, N * r), "F")
+            Q_ep = Q + np.transpose(Q, [1, 2, 0, 3]) + np.transpose(Q, [2, 0, 1, 3])
+            Q_ep = np.reshape(Q_ep, (r * r * r, N * r), "F")
+            cost = cost + cp.sum_squares(Q_ep @ xi) / self.beta
+
+        return self._update_coef_cvxpy(xi, cost, var_len, coef_prev, self.eps_solver)
 
     def _solve_m_relax_and_split(self, r, N, m_prev, m, A, coef_sparse, tk_previous):
         """
@@ -711,16 +662,23 @@ def _reduce(self, x, y):
         TrappingSR3 algorithm.
         Assumes initial guess for coefficients is stored in ``self.coef_``.
         """
-
-        n_samples, n_features = x.shape
-        self.n_features = n_features
-        r = y.shape[1]
-        N = n_features  # int((r ** 2 + 3 * r) / 2.0)
-        if N > int((r**2 + 3 * r) / 2.0):
-            self._include_bias = True
+        self.A_history_ = []
+        self.m_history_ = []
+        self.p_history_ = []
+        self.PW_history_ = []
+        self.PWeigs_history_ = []
+        self.history_ = []
+        n_samples, n_tgts = y.shape
+        n_features = n_poly_features(
+            n_tgts,
+            2,
+            include_bias=self._include_bias,
+            interaction_only=self._interaction_only,
+        )
+        var_len = n_features * n_tgts
 
         if self.mod_matrix is None:
-            self.mod_matrix = np.eye(r)
+            self.mod_matrix = np.eye(n_tgts)
 
         # Define PL, PQ, PT and PM tensors, only relevant if the stability term in
         # trapping SINDy is turned on.
@@ -731,7 +689,7 @@ def _reduce(self, x, y):
             self.PQ_,
             self.PT_,
             self.PM_,
-        ) = self._set_Ptensors(r)
+        ) = self._set_Ptensors(n_tgts)
 
         # Set initial coefficients
         if self.use_constraints and self.constraint_order.lower() == "target":
@@ -760,9 +718,9 @@ def _reduce(self, x, y):
         if self.A0 is not None:
             A = self.A0
         elif np.any(self.PM_ != 0.0):
-            A = np.diag(self.gamma * np.ones(r))
+            A = np.diag(self.gamma * np.ones(n_tgts))
         else:
-            A = np.diag(np.zeros(r))
+            A = np.diag(np.zeros(n_tgts))
         self.A_history_.append(A)
 
         # initial guess for m
@@ -770,14 +728,14 @@ def _reduce(self, x, y):
             m = self.m0
         else:
             np.random.seed(1)
-            m = (np.random.rand(r) - np.ones(r)) * 2
+            m = (np.random.rand(n_tgts) - np.ones(n_tgts)) * 2
         self.m_history_.append(m)
 
         # Precompute some objects for optimization
-        x_expanded = np.zeros((n_samples, r, n_features, r))
-        for i in range(r):
+        x_expanded = np.zeros((n_samples, n_tgts, n_features, n_tgts))
+        for i in range(n_tgts):
             x_expanded[:, i, :, i] = x
-        x_expanded = np.reshape(x_expanded, (n_samples * r, r * n_features))
+        x_expanded = np.reshape(x_expanded, (n_samples * n_tgts, n_tgts * n_features))
         xTx = np.dot(x_expanded.T, x_expanded)
         xTy = np.dot(x_expanded.T, y.flatten())
 
@@ -792,13 +750,13 @@ def _reduce(self, x, y):
             # update P tensor from the newest m
             mPM = np.tensordot(self.PM_, m, axes=([2], [0]))
             p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0]))
-            Pmatrix = p.reshape(r * r, r * n_features)
+            Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features)
 
             # update w
             coef_prev = coef_sparse
             if (self.threshold > 0.0) or self.inequality_constraints:
-                coef_sparse = self._solve_sparse_relax_and_split(
-                    r, n_features, x_expanded, y, Pmatrix, A, coef_prev
+                coef_sparse = self._update_coef_sparse_rs(
+                    n_tgts, n_features, var_len, x_expanded, y, Pmatrix, A, coef_prev
                 )
             else:
                 # if threshold = 0, there is analytic expression
@@ -807,14 +765,20 @@ def _reduce(self, x, y):
                 pTp = np.dot(Pmatrix.T, Pmatrix)
                 # notice reshaping PQ here requires fortran-ordering
                 PQ = np.tensordot(self.mod_matrix, self.PQ_, axes=([1], [0]))
-                PQ = np.reshape(PQ, (r * r * r, r * n_features), "F")
+                PQ = np.reshape(
+                    PQ, (n_tgts * n_tgts * n_tgts, n_tgts * n_features), "F"
+                )
                 PQTPQ = np.dot(PQ.T, PQ)
-                PQ = np.reshape(self.PQ_, (r, r, r, r * n_features), "F")
+                PQ = np.reshape(
+                    self.PQ_, (n_tgts, n_tgts, n_tgts, n_tgts * n_features), "F"
+                )
                 PQ = np.tensordot(self.mod_matrix, PQ, axes=([1], [0]))
                 PQ_ep = (
                     PQ + np.transpose(PQ, [1, 2, 0, 3]) + np.transpose(PQ, [2, 0, 1, 3])
                 )
-                PQ_ep = np.reshape(PQ_ep, (r * r * r, r * n_features), "F")
+                PQ_ep = np.reshape(
+                    PQ_ep, (n_tgts * n_tgts * n_tgts, n_tgts * n_features), "F"
+                )
                 PQTPQ_ep = np.dot(PQ_ep.T, PQ_ep)
                 H = xTx + pTp / self.eta + PQTPQ / self.alpha + PQTPQ_ep / self.beta
                 P_transpose_A = np.dot(Pmatrix.T, A.flatten())
@@ -829,7 +793,7 @@ def _reduce(self, x, y):
 
             # Now solve optimization for m and A
             m_prev, m, A, tk_prev = self._solve_m_relax_and_split(
-                r, n_features, m_prev, m, A, coef_sparse, tk_prev
+                n_tgts, n_features, m_prev, m, A, coef_sparse, tk_prev
             )
 
             # If problem over m becomes infeasible, break out of the loop

From f463b18ab29f5e98c0e6a1de045b2188a0423dc3 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Wed, 29 May 2024 17:39:41 -0700
Subject: [PATCH 04/11] Backport polynomial library fixes to trapping_extended

---
 pysindy/feature_library/polynomial_library.py | 258 ++++++++----------
 1 file changed, 120 insertions(+), 138 deletions(-)

diff --git a/pysindy/feature_library/polynomial_library.py b/pysindy/feature_library/polynomial_library.py
index 602f6eac2..221a7a6dc 100644
--- a/pysindy/feature_library/polynomial_library.py
+++ b/pysindy/feature_library/polynomial_library.py
@@ -1,12 +1,12 @@
 from itertools import chain
-from itertools import combinations
-from itertools import combinations_with_replacement as combinations_w_r
+from math import comb
+from typing import Iterator
+from typing import Tuple
 
 import numpy as np
+from numpy.typing import NDArray
 from scipy import sparse
-from sklearn import __version__
 from sklearn.preprocessing import PolynomialFeatures
-from sklearn.preprocessing._csr_polynomial_expansion import _csr_polynomial_expansion
 from sklearn.utils.validation import check_is_fitted
 
 from ..utils import AxesArray
@@ -45,22 +45,13 @@ class PolynomialLibrary(PolynomialFeatures, BaseFeatureLibrary):
         Order of output array in the dense case. 'F' order is faster to
         compute, but may slow down subsequent estimators.
 
-    library_ensemble : boolean, optional (default False)
-        Whether or not to use library bagging (regress on subset of the
-        candidate terms in the library)
-
-    ensemble_indices : integer array, optional (default [0])
-        The indices to use for ensembling the library.
-
     Attributes
     ----------
     powers_ : array, shape (n_output_features, n_input_features)
         powers_[i, j] is the exponent of the jth input in the ith output.
 
-    n_input_features_ : int
+    n_features_in_ : int
         The total number of input features.
-        WARNING: This is deprecated in scikit-learn version 1.0 and higher so
-        we check the sklearn.__version__ and switch to n_features_in if needed.
 
     n_output_features_ : int
         The total number of output features. This number is computed by
@@ -74,68 +65,68 @@ def __init__(
         interaction_only=False,
         include_bias=True,
         order="C",
-        library_ensemble=False,
-        ensemble_indices=[0],
     ):
-        super(PolynomialLibrary, self).__init__(
+        super().__init__(
             degree=degree,
             interaction_only=interaction_only,
             include_bias=include_bias,
             order=order,
         )
-        BaseFeatureLibrary.__init__(
-            self, library_ensemble=library_ensemble, ensemble_indices=ensemble_indices
-        )
-        if degree < 0 or not isinstance(degree, int):
-            raise ValueError("degree must be a nonnegative integer")
-        if (not include_interaction) and interaction_only:
-            raise ValueError(
-                "Can't have include_interaction be False and interaction_only"
-                " be True"
-            )
         self.include_interaction = include_interaction
 
     @staticmethod
     def _combinations(
-        n_features, degree, include_interaction, interaction_only, include_bias
-    ):
-        comb = combinations if interaction_only else combinations_w_r
-        start = int(not include_bias)
+        n_features: int,
+        degree: int,
+        include_interaction: bool,
+        interaction_only: bool,
+        include_bias: bool,
+    ) -> Iterator[Tuple[int, ...]]:
+        """
+        Create selection tuples of input indexes for each polynomail term
+
+        Selection tuple iterates the input indexes present in a single term.
+        For example, (x+y+1)^2 would be in iterator of the tuples:
+        (), (0,), (1,), (0, 0), (0, 1), (1, 1)
+        1    x     y      x^2     x*y     y^2
+
+        Order of terms is preserved regardless of include_interation.
+        """
         if not include_interaction:
-            if include_bias:
-                return chain(
-                    [()],
-                    chain.from_iterable(
-                        combinations_w_r([j], i)
-                        for i in range(1, degree + 1)
-                        for j in range(n_features)
-                    ),
-                )
-            else:
-                return chain.from_iterable(
-                    combinations_w_r([j], i)
-                    for i in range(1, degree + 1)
-                    for j in range(n_features)
-                )
-        return chain.from_iterable(
-            comb(range(n_features), i) for i in range(start, degree + 1)
+            return chain(
+                [()] if include_bias else [],
+                (
+                    exponent * (feat_idx,)
+                    for exponent in range(1, degree + 1)
+                    for feat_idx in range(n_features)
+                ),
+            )
+        return PolynomialFeatures._combinations(
+            n_features=n_features,
+            min_degree=int(not include_bias),
+            max_degree=degree,
+            interaction_only=interaction_only,
+            include_bias=include_bias,
         )
 
     @property
-    def powers_(self):
+    def powers_(self) -> NDArray[np.int_]:
+        """
+        The exponents of the polynomial as an array of shape
+        (n_features_out, n_features_in), where each item is the exponent of the
+        jth input variable in the ith polynomial term.
+        """
         check_is_fitted(self)
-        if float(__version__[:3]) >= 1.0:
-            n_features = self.n_features_in_
-        else:
-            n_features = self.n_input_features_
         combinations = self._combinations(
-            n_features,
-            self.degree,
-            self.include_interaction,
-            self.interaction_only,
-            self.include_bias,
+            n_features=self.n_features_in_,
+            degree=self.degree,
+            include_interaction=self.include_interaction,
+            interaction_only=self.interaction_only,
+            include_bias=self.include_bias,
+        )
+        return np.vstack(
+            [np.bincount(c, minlength=self.n_features_in_) for c in combinations]
         )
-        return np.vstack([np.bincount(c, minlength=n_features) for c in combinations])
 
     def get_feature_names(self, input_features=None):
         """Return feature names for output features.
@@ -182,6 +173,13 @@ def fit(self, x_full, y=None):
         -------
         self : instance
         """
+        if self.degree < 0 or not isinstance(self.degree, int):
+            raise ValueError("degree must be a nonnegative integer")
+        if (not self.include_interaction) and self.interaction_only:
+            raise ValueError(
+                "Can't have include_interaction be False and interaction_only"
+                " be True"
+            )
         n_features = x_full[0].shape[x_full[0].ax_coord]
         combinations = self._combinations(
             n_features,
@@ -190,10 +188,7 @@ def fit(self, x_full, y=None):
             self.interaction_only,
             self.include_bias,
         )
-        if float(__version__[:3]) >= 1.0:
-            self.n_features_in_ = n_features
-        else:
-            self.n_input_features_ = n_features
+        self.n_features_in_ = n_features
         self.n_output_features_ = sum(1 for _ in combinations)
         return self
 
@@ -203,19 +198,8 @@ def transform(self, x_full):
 
         Parameters
         ----------
-        x : array-like or CSR/CSC sparse matrix, shape (n_samples, n_features)
+        x_full : {array-like, sparse matrix} of shape (n_samples, n_features)
             The data to transform, row by row.
-            Prefer CSR over CSC for sparse input (for speed), but CSC is
-            required if the degree is 4 or higher. If the degree is less than
-            4 and the input format is CSC, it will be converted to CSR, have
-            its polynomial features generated, then converted back to CSC.
-            If the degree is 2 or 3, the method described in "Leveraging
-            Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices
-            Using K-Simplex Numbers" by Andrew Nystrom and John Hughes is
-            used, which is much faster than the method used on CSC input. For
-            this reason, a CSC input will be converted to CSR, and the output
-            will be converted back to CSC prior to being returned, hence the
-            preference of CSR.
 
         Returns
         -------
@@ -231,72 +215,70 @@ def transform(self, x_full):
             if sparse.issparse(x) and x.format not in ["csr", "csc"]:
                 # create new with correct sparse
                 axes = comprehend_axes(x)
-                x = x.asformat("csr")
+                x = x.asformat("csc")
                 wrap_axes(axes, x)
-
-            n_samples = x.shape[x.ax_time]
             n_features = x.shape[x.ax_coord]
-            if float(__version__[:3]) >= 1.0:
-                if n_features != self.n_features_in_:
-                    raise ValueError("x shape does not match training shape")
-            else:
-                if n_features != self.n_input_features_:
-                    raise ValueError("x shape does not match training shape")
-
-            if sparse.isspmatrix_csr(x):
-                if self.degree > 3:
-                    return sparse.csr_matrix(self.transform(x.tocsc()))
-                to_stack = []
-                if self.include_bias:
-                    to_stack.append(np.ones(shape=(n_samples, 1), dtype=x.dtype))
-                to_stack.append(x)
-                for deg in range(2, self.degree + 1):
-                    xp_next = _csr_polynomial_expansion(
-                        x.data,
-                        x.indices,
-                        x.indptr,
-                        x.shape[1],
-                        self.interaction_only,
-                        deg,
-                    )
-                    if xp_next is None:
-                        break
-                    to_stack.append(xp_next)
-                xp = sparse.hstack(to_stack, format="csr")
-            elif sparse.isspmatrix_csc(x) and self.degree < 4:
-                return sparse.csc_matrix(self.transform(x.tocsr()))
+            if n_features != self.n_features_in_:
+                raise ValueError("x shape does not match training shape")
+
+            combinations = self._combinations(
+                n_features,
+                self.degree,
+                self.include_interaction,
+                self.interaction_only,
+                self.include_bias,
+            )
+            if sparse.isspmatrix(x):
+                columns = []
+                for combo in combinations:
+                    if combo:
+                        out_col = 1
+                        for col_idx in combo:
+                            out_col = x[..., col_idx].multiply(out_col)
+                        columns.append(out_col)
+                    else:
+                        bias = sparse.csc_matrix(np.ones((x.shape[0], 1)))
+                        columns.append(bias)
+                xp = sparse.hstack(columns, dtype=x.dtype).tocsc()
             else:
-                combinations = self._combinations(
-                    n_features,
-                    self.degree,
-                    self.include_interaction,
-                    self.interaction_only,
-                    self.include_bias,
+                xp = AxesArray(
+                    np.empty(
+                        (*x.shape[:-1], self.n_output_features_),
+                        dtype=x.dtype,
+                        order=self.order,
+                    ),
+                    x.axes,
                 )
-                if sparse.isspmatrix(x):
-                    columns = []
-                    for comb in combinations:
-                        if comb:
-                            out_col = 1
-                            for col_idx in comb:
-                                out_col = x[..., col_idx].multiply(out_col)
-                            columns.append(out_col)
-                        else:
-                            bias = sparse.csc_matrix(np.ones((x.shape[0], 1)))
-                            columns.append(bias)
-                    xp = sparse.hstack(columns, dtype=x.dtype).tocsc()
-                else:
-                    xp = AxesArray(
-                        np.empty(
-                            (*x.shape[:-1], self.n_output_features_),
-                            dtype=x.dtype,
-                            order=self.order,
-                        ),
-                        x.__dict__,
-                    )
-                    for i, comb in enumerate(combinations):
-                        xp[..., i] = x[..., comb].prod(-1)
+                for i, combo in enumerate(combinations):
+                    xp[..., i] = x[..., combo].prod(-1)
             xp_full = xp_full + [xp]
-        if self.library_ensemble:
-            xp_full = self._ensemble(xp_full)
         return xp_full
+
+
+def n_poly_features(
+    n_in_feat: int,
+    degree: int,
+    include_bias: bool = False,
+    include_interation: bool = True,
+    interaction_only: bool = False,
+) -> int:
+    """Calculate number of polynomial features
+
+    Args:
+        n_in_feat: number of input features, e.g. 3 for x, y, z
+        degree: polynomial degree, e.g. 2 for quadratic
+        include_bias: whether to include a constant term
+        include_interaction: whether to include terms mixing multiple inputs
+        interaction_only: whether to omit terms of x_m * x_n^p for p > 1
+    """
+    if not include_interation and interaction_only:
+        raise ValueError("Cannot set interaction only if include_interaction is False")
+    n_feat = include_bias
+    if not include_interation:
+        return n_feat + n_in_feat * degree
+    for deg in range(1, degree + 1):
+        if interaction_only:
+            n_feat += comb(n_in_feat, deg)
+        else:
+            n_feat += comb(n_in_feat + deg - 1, deg)
+    return n_feat

From 3436aedd89f51228a9fe139c8895f6e31463ce4f Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Wed, 29 May 2024 17:50:18 -0700
Subject: [PATCH 05/11] Backport changes to BaseOptimizer, SR3, and
 ConstrainedSR3

---
 .github/workflows/main.yml            |  2 +-
 pysindy/optimizers/base.py            | 98 +++++++++++++++++----------
 pysindy/optimizers/constrained_sr3.py |  2 -
 pysindy/optimizers/sr3.py             | 26 +++----
 4 files changed, 78 insertions(+), 50 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index c6bde5a64..afa7b169c 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -36,7 +36,7 @@ jobs:
         sudo apt-get update -y
         sudo apt-get install pandoc
         sudo apt-get update -y
-        pip install -e .[docs]
+        pip install -e .[docs,cvxpy,miosr,sbr]
     - name: Build docs
       run: |
         cd docs
diff --git a/pysindy/optimizers/base.py b/pysindy/optimizers/base.py
index 377f8a171..01526f452 100644
--- a/pysindy/optimizers/base.py
+++ b/pysindy/optimizers/base.py
@@ -15,6 +15,7 @@
 from sklearn.utils.validation import check_X_y
 
 from ..utils import AxesArray
+from ..utils import drop_nan_samples
 
 
 def _rescale_data(X, y, sample_weight):
@@ -45,14 +46,9 @@ class BaseOptimizer(LinearRegression, ComplexityMixin):
 
     Parameters
     ----------
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     normalize_columns : boolean, optional (default False)
         Normalize the columns of x (the SINDy library terms) before regression
-        by dividing by the L2-norm. Note that the 'normalize' option in sklearn
-        is deprecated in sklearn versions >= 1.0 and will be removed.
+        by dividing by the L2-norm.
 
     copy_X : boolean, optional (default True)
         If True, X will be copied; else, it may be overwritten.
@@ -62,13 +58,24 @@ class BaseOptimizer(LinearRegression, ComplexityMixin):
         Initial guess for coefficients ``coef_``.
         If None, the initial guess is obtained via a least-squares fit.
 
+    unbias:  Whether to perform an extra step of unregularized linear
+        regression to unbias the coefficients for the identified
+        support.  If an optimizer (``self.optimizer``) applies any type
+        of regularization, that regularization may bias coefficients,
+        improving the conditioning of the problem but harming the
+        quality of the fit. Setting ``unbias==True`` enables an extra
+        step wherein unregularized linear regression is applied, but
+        only for the coefficients in the support identified by the
+        optimizer. This helps to remove the bias introduced by
+        regularization.
+
     Attributes
     ----------
     coef_ : array, shape (n_features,) or (n_targets, n_features)
         Weight vector(s).
 
     ind_ : array, shape (n_features,) or (n_targets, n_features)
-        Array of 0s and 1s indicating which coefficients of the
+        Array of bools indicating which coefficients of the
         weight vector have not been masked out.
 
     history_ : list
@@ -85,21 +92,29 @@ def __init__(
         self,
         max_iter=20,
         normalize_columns=False,
-        fit_intercept=False,
         initial_guess=None,
         copy_X=True,
+        unbias: bool = True,
     ):
-        super(BaseOptimizer, self).__init__(fit_intercept=fit_intercept, copy_X=copy_X)
-
-        if max_iter <= 0:
-            raise ValueError("max_iter must be positive")
-
+        super().__init__(fit_intercept=False, copy_X=copy_X)
         self.max_iter = max_iter
         self.iters = 0
-        if np.ndim(initial_guess) == 1:
-            initial_guess = initial_guess.reshape(1, -1)
         self.initial_guess = initial_guess
         self.normalize_columns = normalize_columns
+        self.unbias = unbias
+        self.__post_init_guard()
+
+    # See name mangling rules for double underscore rationale
+    def __post_init_guard(self):
+        """Conduct initialization post-init, as required by scikitlearn API."""
+        if np.ndim(self.initial_guess) == 1:
+            self.initial_guess = self.initial_guess.reshape(1, -1)
+        if self.max_iter <= 0:
+            raise ValueError("max_iter must be positive")
+
+    def set_params(self, **kwargs):
+        super().set_params(**kwargs)
+        self.__post_init_guard
 
     # Force subclasses to implement this
     @abc.abstractmethod
@@ -107,7 +122,8 @@ def _reduce(self):
         """
         Carry out the bulk of the work of the fit function.
 
-        Subclass implementations MUST update self.coef_.
+        Subclass implementations MUST update self.coef_ as shape
+            (n_targets, n_inputs).
         """
         raise NotImplementedError
 
@@ -134,12 +150,16 @@ def fit(self, x_, y, sample_weight=None, **reduce_kws):
         -------
         self : returns an instance of self
         """
+        x_ = AxesArray(np.asarray(x_), {"ax_sample": 0, "ax_coord": 1})
+        y_axes = {"ax_sample": 0} if y.ndim == 1 else {"ax_sample": 0, "ax_coord": 1}
+        y = AxesArray(np.asarray(y), y_axes)
+        x_, y = drop_nan_samples(x_, y)
         x_, y = check_X_y(x_, y, accept_sparse=[], y_numeric=True, multi_output=True)
 
         x, y, X_offset, y_offset, X_scale = _preprocess_data(
             x_,
             y,
-            fit_intercept=self.fit_intercept,
+            fit_intercept=False,
             copy=self.copy_X,
             sample_weight=sample_weight,
         )
@@ -178,6 +198,9 @@ def fit(self, x_, y, sample_weight=None, **reduce_kws):
         self._reduce(x_normed, y, **reduce_kws)
         self.ind_ = np.abs(self.coef_) > 1e-14
 
+        if self.unbias:
+            self._unbias(x_normed, y)
+
         # Rescale coefficients to original units
         if self.normalize_columns:
             self.coef_ = np.multiply(reg, self.coef_)
@@ -189,6 +212,17 @@ def fit(self, x_, y, sample_weight=None, **reduce_kws):
         self._set_intercept(X_offset, y_offset, X_scale)
         return self
 
+    def _unbias(self, x: np.ndarray, y: np.ndarray):
+        coef = np.zeros((y.shape[1], x.shape[1]))
+        for i in range(self.ind_.shape[0]):
+            if np.any(self.ind_[i]):
+                coef[i, self.ind_[i]] = (
+                    LinearRegression(fit_intercept=False)
+                    .fit(x[:, self.ind_[i]], y[:, i])
+                    .coef_
+                )
+        self.coef_ = coef
+
 
 class EnsembleOptimizer(BaseOptimizer):
     """Wrapper class for ensembling methods.
@@ -249,12 +283,6 @@ class EnsembleOptimizer(BaseOptimizer):
     coef_full_ : array, shape (n_features,) or (n_targets, n_features)
         Weight vector(s) that are not subjected to the regularization.
         This is the w in the objective function.
-
-    unbias : boolean
-        Whether to perform an extra step of unregularized linear regression
-        to unbias the coefficients for the identified support.
-        ``unbias`` is automatically set to False if a constraint is used and
-        is otherwise left uninitialized.
     """
 
     def __init__(
@@ -271,9 +299,8 @@ def __init__(
         if not hasattr(opt, "initial_guess"):
             opt.initial_guess = None
 
-        super(EnsembleOptimizer, self).__init__(
+        super().__init__(
             max_iter=opt.max_iter,
-            fit_intercept=opt.fit_intercept,
             initial_guess=opt.initial_guess,
             copy_X=opt.copy_X,
         )
@@ -281,19 +308,19 @@ def __init__(
             raise ValueError(
                 "If not ensembling data or library terms, use another optimizer"
             )
-        if n_subset is not None and n_subset <= 0:
-            raise ValueError("n_subset must be a positive integer if bagging")
-        if n_candidates_to_drop is not None and n_candidates_to_drop <= 0:
+        if bagging and n_subset is not None and n_subset < 1:
+            raise ValueError("n_subset must be a positive integer or None if bagging")
+        if library_ensemble and (
+            n_candidates_to_drop is None or n_candidates_to_drop < 1
+        ):
             raise ValueError(
                 "n_candidates_to_drop must be a positive integer if ensembling library"
             )
-        self.opt = opt
-        if n_models is None or n_models == 0:
-            warnings.warn(
-                "n_models must be a positive integer.  Explicitly initialized to zero"
-                " or None, defaulting to 20."
+        if n_models < 1:
+            raise ValueError(
+                "n_candidates_to_drop must be a positive integer if ensembling library"
             )
-            n_models = 20
+        self.opt = opt
         self.n_models = n_models
         self.n_subset = n_subset
         self.bagging = bagging
@@ -302,6 +329,7 @@ def __init__(
         self.replace = replace
         self.n_candidates_to_drop = n_candidates_to_drop
         self.coef_list = []
+        self.unbias = False
 
     def _reduce(self, x: AxesArray, y: np.ndarray) -> None:
         x = AxesArray(np.asarray(x), {"ax_sample": 0, "ax_coord": 1})
@@ -316,7 +344,7 @@ def _reduce(self, x: AxesArray, y: np.ndarray) -> None:
         else:
             n_subset = self.n_subset
 
-        n_features = x.shape[x.ax_coord]
+        n_features = x.n_coord
         if self.library_ensemble and self.n_candidates_to_drop > n_features:
             warnings.warn(
                 "n_candidates_to_drop larger than number of features.  Cannot "
diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py
index eb20cf159..300bce897 100644
--- a/pysindy/optimizers/constrained_sr3.py
+++ b/pysindy/optimizers/constrained_sr3.py
@@ -300,8 +300,6 @@ def _update_coef_cvxpy(self, xi, cost, var_len, coef_prev, tol):
                     <= self.constraint_rhs[: self.constraint_separation_index]
                 )
             prob = cp.Problem(cp.Minimize(cost), constraints)
-        else:
-            cp.Problem(cp.Minimize(cost))
 
         prob_clone = deepcopy(prob)
         # default solver is SCS/OSQP here but switches to ECOS for L2
diff --git a/pysindy/optimizers/sr3.py b/pysindy/optimizers/sr3.py
index 658ad7fa0..58db5ef5c 100644
--- a/pysindy/optimizers/sr3.py
+++ b/pysindy/optimizers/sr3.py
@@ -74,10 +74,6 @@ class SR3(BaseOptimizer):
         Initial guess for coefficients ``coef_``.
         If None, least-squares is used to obtain an initial guess.
 
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     normalize_columns : boolean, optional (default False)
         Normalize the columns of x (the SINDy library terms) before regression
         by dividing by the L2-norm. Note that the 'normalize' option in sklearn
@@ -102,6 +98,10 @@ class SR3(BaseOptimizer):
         If True, prints out the different error terms every
         max_iter / 10 iterations.
 
+    unbias: bool (default False)
+        See base class for definition.  Most options are incompatible
+        with unbiasing.
+
     Attributes
     ----------
     coef_ : array, shape (n_features,) or (n_targets, n_features)
@@ -147,18 +147,18 @@ def __init__(
         trimming_fraction=0.0,
         trimming_step_size=1.0,
         max_iter=30,
-        fit_intercept=False,
         copy_X=True,
         initial_guess=None,
         normalize_columns=False,
         verbose=False,
+        unbias=False,
     ):
         super(SR3, self).__init__(
             max_iter=max_iter,
             initial_guess=initial_guess,
-            fit_intercept=fit_intercept,
             copy_X=copy_X,
             normalize_columns=normalize_columns,
+            unbias=unbias,
         )
 
         if threshold < 0:
@@ -169,6 +169,11 @@ def __init__(
             raise ValueError("tol must be positive")
         if (trimming_fraction < 0) or (trimming_fraction > 1):
             raise ValueError("trimming fraction must be between 0 and 1")
+        if trimming_fraction > 0 and unbias:
+            raise ValueError(
+                "Unbiasing counteracts some of the effects of trimming.  Either set"
+                " unbias=False or trimming_fraction=0.0"
+            )
         if thresholder.lower() not in (
             "l0",
             "l1",
@@ -337,8 +342,8 @@ def _reduce(self, x, y):
         coef_sparse = self.coef_.T
         n_samples, n_features = x.shape
 
+        coef_full = coef_sparse.copy()
         if self.use_trimming:
-            coef_full = coef_sparse.copy()
             trimming_array = np.repeat(1.0 - self.trimming_fraction, n_samples)
             self.history_trimming_ = [trimming_array]
         else:
@@ -359,8 +364,7 @@ def _reduce(self, x, y):
                 "Total Error: |y-Xw|^2 + |w-u|^2/v + R(u)",
             ]
             print(
-                "{: >10} ... {: >10} ... {: >10} ... {: >10}"
-                " ... {: >10}".format(*row)
+                "{: >10} ... {: >10} ... {: >10} ... {: >10} ... {: >10}".format(*row)
             )
         objective_history = []
 
@@ -388,9 +392,7 @@ def _reduce(self, x, y):
                 break
         else:
             warnings.warn(
-                "SR3._reduce did not converge after {} iterations.".format(
-                    self.max_iter
-                ),
+                f"SR3 did not converge after {self.max_iter} iterations.",
                 ConvergenceWarning,
             )
         self.coef_ = coef_sparse.T

From 2d141d90252813bf4fc1bab7fc6d6ac742ac2009 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Wed, 29 May 2024 18:52:51 -0700
Subject: [PATCH 06/11] CLN: specify nonsparse rs steps as "if local trapping"

---
 .gitattributes                     |  4 +++
 pysindy/optimizers/trapping_sr3.py | 49 ++++++++++++++++--------------
 2 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/.gitattributes b/.gitattributes
index ac65e8e5a..c12963a6d 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -2,3 +2,7 @@
 
 # Interpret Jupyter notebooks as Python
 *.ipynb linguist-language=Python
+
+*.ipynb	diff=jupyternotebook
+
+*.ipynb	merge=jupyternotebook
diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 0e769aac8..f7dc9c365 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -606,6 +606,31 @@ def _update_coef_sparse_rs(
 
         return self._update_coef_cvxpy(xi, cost, var_len, coef_prev, self.eps_solver)
 
+    def _update_coef_nonsparse_rs(
+        self, n_tgts, n_features, Pmatrix, A, coef_prev, xTx, xTy
+    ):
+        pTp = np.dot(Pmatrix.T, Pmatrix)
+        H = xTx + pTp / self.eta
+        P_transpose_A = np.dot(Pmatrix.T, A.flatten())
+
+        if self.method == "local":
+            # notice reshaping PQ here requires fortran-ordering
+            PQ = np.tensordot(self.mod_matrix, self.PQ_, axes=([1], [0]))
+            PQ = np.reshape(PQ, (n_tgts * n_tgts * n_tgts, n_tgts * n_features), "F")
+            PQTPQ = np.dot(PQ.T, PQ)
+            PQ = np.reshape(
+                self.PQ_, (n_tgts, n_tgts, n_tgts, n_tgts * n_features), "F"
+            )
+            PQ = np.tensordot(self.mod_matrix, PQ, axes=([1], [0]))
+            PQ_ep = PQ + np.transpose(PQ, [1, 2, 0, 3]) + np.transpose(PQ, [2, 0, 1, 3])
+            PQ_ep = np.reshape(
+                PQ_ep, (n_tgts * n_tgts * n_tgts, n_tgts * n_features), "F"
+            )
+            PQTPQ_ep = np.dot(PQ_ep.T, PQ_ep)
+            H += PQTPQ / self.alpha + PQTPQ_ep / self.beta
+
+        return self._solve_nonsparse_relax_and_split(H, xTy, P_transpose_A, coef_prev)
+
     def _solve_m_relax_and_split(self, r, N, m_prev, m, A, coef_sparse, tk_previous):
         """
         If using the relaxation formulation of trapping SINDy, solves the
@@ -762,28 +787,8 @@ def _reduce(self, x, y):
                 # if threshold = 0, there is analytic expression
                 # for the solve over the coefficients,
                 # which is coded up here separately
-                pTp = np.dot(Pmatrix.T, Pmatrix)
-                # notice reshaping PQ here requires fortran-ordering
-                PQ = np.tensordot(self.mod_matrix, self.PQ_, axes=([1], [0]))
-                PQ = np.reshape(
-                    PQ, (n_tgts * n_tgts * n_tgts, n_tgts * n_features), "F"
-                )
-                PQTPQ = np.dot(PQ.T, PQ)
-                PQ = np.reshape(
-                    self.PQ_, (n_tgts, n_tgts, n_tgts, n_tgts * n_features), "F"
-                )
-                PQ = np.tensordot(self.mod_matrix, PQ, axes=([1], [0]))
-                PQ_ep = (
-                    PQ + np.transpose(PQ, [1, 2, 0, 3]) + np.transpose(PQ, [2, 0, 1, 3])
-                )
-                PQ_ep = np.reshape(
-                    PQ_ep, (n_tgts * n_tgts * n_tgts, n_tgts * n_features), "F"
-                )
-                PQTPQ_ep = np.dot(PQ_ep.T, PQ_ep)
-                H = xTx + pTp / self.eta + PQTPQ / self.alpha + PQTPQ_ep / self.beta
-                P_transpose_A = np.dot(Pmatrix.T, A.flatten())
-                coef_sparse = self._solve_nonsparse_relax_and_split(
-                    H, xTy, P_transpose_A, coef_prev
+                coef_sparse = self._update_coef_nonsparse_rs(
+                    Pmatrix, A, coef_prev, xTx, xTy
                 )
 
             # If problem over xi becomes infeasible, break out of the loop

From 02b5a8ad100c7648d2e4aa6789de5b069cd27db7 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Thu, 30 May 2024 11:49:40 -0700
Subject: [PATCH 07/11] CLN: Align a few bits and pieces to trapping-resolve

---
 pysindy/optimizers/trapping_sr3.py | 179 ++++++++++-------------------
 test/conftest.py                   |   1 -
 2 files changed, 60 insertions(+), 120 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index f7dc9c365..4afb9a49e 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -71,127 +71,71 @@ class TrappingSR3(ConstrainedSR3):
 
     Parameters
     ----------
-    threshold : float, optional (default 0.1)
-        Determines the strength of the regularization. When the
-        regularization function R is the L0 norm, the regularization
-        is equivalent to performing hard thresholding, and lambda
-        is chosen to threshold at the value given by this parameter.
-        This is equivalent to choosing lambda = threshold^2 / (2 * nu).
-
-    eta : float, optional (default 1.0e20)
-        Determines the strength of the stability term ||Pw-A||^2 in the
+    eta :
+        Determines the strength of the stability term :math:`||Pw-A||^2` in the
         optimization. The default value is very large so that the
         algorithm default is to ignore the stability term. In this limit,
         this should be approximately equivalent to the ConstrainedSR3 method.
 
-    alpha_m : float, optional (default eta * 0.1)
-        Determines the step size in the prox-gradient descent over m.
-        For convergence, need alpha_m <= eta / ||w^T * PQ^T * PQ * w||.
-        Typically 0.01 * eta <= alpha_m <= 0.1 * eta.
-
-    alpha_A : float, optional (default eta)
-        Determines the step size in the prox-gradient descent over A.
-        For convergence, need alpha_A <= eta, so typically
-        alpha_A = eta is used.
+    eps_solver :
+        If threshold != 0, this specifies the error tolerance in the
+        CVXPY (OSQP) solve. Default is 1.0e-3 in OSQP.
 
-    alpha : float, optional (default 1.0e20)
-        Determines the strength of the local stability term ||Qijk||^2 in the
-        optimization. The default value is very large so that the
+    alpha:
+        Determines the strength of the local stability term :math:`||Qijk||^2`
+        in the optimization. The default value (1e20) is very large so that the
         algorithm default is to ignore this term.
 
-    beta : float, optional (default 1.0e20)
+    beta:
         Determines the strength of the local stability term
-        ||Qijk + Qjik + Qkij||^2 in the
+        :math:`||Qijk + Qjik + Qkij||^2` in the
         optimization. The default value is very large so that the
         algorithm default is to ignore this term.
 
-    gamma : float, optional (default 0.1)
+    mod_matrix:
+        Lyapunov matrix.  Trapping theorems apply to energy
+        :math:`\\propto \\dot y \\cdot y`, but also to any
+        :math:`\\propto \\dot y P \\cdot y` for Lyapunov matrix :math:`P`.
+        Defaults to the identity matrix.
+
+    alpha_A :
+        Determines the step size in the prox-gradient descent over A.
+        For convergence, need alpha_A <= eta, so typically
+        alpha_A = eta is used.
+
+    alpha_m :
+        Determines the step size in the prox-gradient descent over m.
+        For convergence, need alpha_m <= eta / ||w^T * PQ^T * PQ * w||.
+        Typically 0.01 * eta <= alpha_m <= 0.1 * eta.  (default eta * 0.1)
+
+    gamma :
         Determines the negative interval that matrix A is projected onto.
         For most applications gamma = 0.1 - 1.0 works pretty well.
 
-    tol : float, optional (default 1e-5)
-        Tolerance used for determining convergence of the optimization
-        algorithm over w.
-
-    tol_m : float, optional (default 1e-5)
+    tol_m :
         Tolerance used for determining convergence of the optimization
         algorithm over m.
 
-    thresholder : string, optional (default 'L1')
+    thresholder :
         Regularization function to use. For current trapping SINDy,
         only the L1 and L2 norms are implemented. Note that other convex norms
         could be straightforwardly implemented, but L0 requires
-        reformulation because of nonconvexity.
-
-    thresholds : np.ndarray, shape (n_targets, n_features), optional \
-            (default None)
-        Array of thresholds for each library function coefficient.
-        Each row corresponds to a measurement variable and each column
-        to a function from the feature library.
-        Recall that SINDy seeks a matrix :math:`\\Xi` such that
-        :math:`\\dot{X} \\approx \\Theta(X)\\Xi`.
-        ``thresholds[i, j]`` should specify the threshold to be used for the
-        (j + 1, i + 1) entry of :math:`\\Xi`. That is to say it should give the
-        threshold to be used for the (j + 1)st library function in the equation
-        for the (i + 1)st measurement variable.
-
-    eps_solver : float, optional (default 1.0e-7)
-        If threshold != 0, this specifies the error tolerance in the
-        CVXPY (OSQP) solve. Default is 1.0e-3 in OSQP.
+        reformulation because of nonconvexity. (default 'L1')
 
-    inequality_constraints : bool, optional (default False)
-        If True, CVXPY methods are used.
-
-    max_iter : int, optional (default 30)
-        Maximum iterations of the optimization algorithm.
-
-    accel : bool, optional (default False)
+    accel :
         Whether or not to use accelerated prox-gradient descent for (m, A).
+        (default False)
 
-    m0 : np.ndarray, shape (n_targets), optional (default None)
-        Initial guess for vector m in the optimization. Otherwise
-        each component of m is randomly initialized in [-1, 1].
-
-    A0 : np.ndarray, shape (n_targets, n_targets), optional (default None)
-        Initial guess for vector A in the optimization. Otherwise
-        A is initialized as A = diag(gamma).
+    m0 :
+        Initial guess for trap center in the optimization. Default None
+        initializes vector elements randomly in [-1, 1]. shape (n_targets)
 
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
-    copy_X : boolean, optional (default True)
-        If True, X will be copied; else, it may be overwritten.
-
-    normalize_columns : boolean, optional (default False)
-        Normalize the columns of x (the SINDy library terms) before regression
-        by dividing by the L2-norm. Note that the 'normalize' option in sklearn
-        is deprecated in sklearn versions >= 1.0 and will be removed.
-
-    verbose : bool, optional (default False)
-        If True, prints out the different error terms every iteration.
-
-    verbose_cvxpy : bool, optional (default False)
-        Boolean flag which is passed to CVXPY solve function to indicate if
-        output should be verbose or not. Only relevant for optimizers that
-        use the CVXPY package in some capabity.
+    A0 :
+        Initial guess for vector A in the optimization.  Shape (n_targets, n_targets)
+        Default None, meaning A is initialized as A = diag(gamma).
 
     Attributes
     ----------
-    coef_ : array, shape (n_features,) or (n_targets, n_features)
-        Regularized weight vector(s). This is the v in the objective
-        function.
-
-    history_ : list
-        History of sparse coefficients. ``history_[k]`` contains the
-        sparse coefficients (v in the optimization objective function)
-        at iteration k.
-
-    objective_history_ : list
-        History of the value of the objective at each step. Note that
-        the trapping SINDy problem is nonconvex, meaning that this value
-        may increase and decrease as the algorithm works.
-
     A_history_ : list
         History of the auxiliary variable A that approximates diag(PW).
 
@@ -224,6 +168,11 @@ class TrappingSR3(ConstrainedSR3):
         Transpose of 1st dimension and 2nd dimension of quadratic coefficient
         part of the P matrix in ||Pw - A||^2
 
+    objective_history_ : list
+        History of the value of the objective at each step. Note that
+        the trapping SINDy problem is nonconvex, meaning that this value
+        may increase and decrease as the algorithm works.
+
     Examples
     --------
     >>> import numpy as np
@@ -330,10 +279,6 @@ def __init__(
         self.gamma = gamma
         self.tol_m = tol_m
         self.accel = accel
-        self.unbias = False
-        self.use_constraints = (constraint_lhs is not None) and (
-            constraint_rhs is not None
-        )
         self.__post_init_guard()
 
     def __post_init_guard(self):
@@ -383,9 +328,9 @@ def __post_init_guard(self):
         if self.tol <= 0 or self.tol_m <= 0 or self.eps_solver <= 0:
             raise ValueError("tol and tol_m must be positive")
         if self.inequality_constraints and self.threshold == 0.0:
-            raise ValueError(
-                "Ineq. constr. -> threshold!=0 + relax_optim=True or relax_optim=False."
-            )
+            raise ValueError("Inequality constraints requires threshold!=0")
+        if self.mod_matrix is None:
+            self.mod_matrix = np.eye(self._n_tgts)
 
     def set_params(self, **kwargs):
         super().set_params(**kwargs)
@@ -490,7 +435,7 @@ def _build_PQ(polyterms: list[tuple[int, Int1D]]) -> Float5D:
 
     def _set_Ptensors(
         self, n_targets: int
-    ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    ) -> tuple[Float3D, Float4D, Float4D, Float5D, Float5D, Float5D]:
         """Make the projection tensors used for the algorithm."""
         lib = PolynomialLibrary(2, include_bias=self._include_bias).fit(
             np.zeros((1, n_targets))
@@ -502,7 +447,7 @@ def _set_Ptensors(
         PQ_tensor = self._build_PQ(polyterms)
         PT_tensor = PQ_tensor.transpose([1, 0, 2, 3, 4])
         # PM is the sum of PQ and PQ which projects out the sum of Qijk and Qjik
-        PM_tensor = PQ_tensor + PT_tensor
+        PM_tensor = cast(Float5D, PQ_tensor + PT_tensor)
 
         return PC_tensor, PL_tensor_unsym, PL_tensor, PQ_tensor, PT_tensor, PM_tensor
 
@@ -702,9 +647,6 @@ def _reduce(self, x, y):
         )
         var_len = n_features * n_tgts
 
-        if self.mod_matrix is None:
-            self.mod_matrix = np.eye(n_tgts)
-
         # Define PL, PQ, PT and PM tensors, only relevant if the stability term in
         # trapping SINDy is turned on.
         (
@@ -750,11 +692,11 @@ def _reduce(self, x, y):
 
         # initial guess for m
         if self.m0 is not None:
-            m = self.m0
+            trap_ctr = self.m0
         else:
             np.random.seed(1)
-            m = (np.random.rand(n_tgts) - np.ones(n_tgts)) * 2
-        self.m_history_.append(m)
+            trap_ctr = (np.random.rand(n_tgts) - np.ones(n_tgts)) * 2
+        self.m_history_.append(trap_ctr)
 
         # Precompute some objects for optimization
         x_expanded = np.zeros((n_samples, n_tgts, n_features, n_tgts))
@@ -766,14 +708,14 @@ def _reduce(self, x, y):
 
         # if using acceleration
         tk_prev = 1
-        m_prev = m
+        m_prev = trap_ctr
 
         # Begin optimization loop
         objective_history = []
         for k in range(self.max_iter):
 
             # update P tensor from the newest m
-            mPM = np.tensordot(self.PM_, m, axes=([2], [0]))
+            mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0]))
             p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0]))
             Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features)
 
@@ -797,24 +739,24 @@ def _reduce(self, x, y):
                 break
 
             # Now solve optimization for m and A
-            m_prev, m, A, tk_prev = self._solve_m_relax_and_split(
-                n_tgts, n_features, m_prev, m, A, coef_sparse, tk_prev
+            m_prev, trap_ctr, A, tk_prev = self._solve_m_relax_and_split(
+                n_tgts, n_features, m_prev, trap_ctr, A, coef_sparse, tk_prev
             )
 
             # If problem over m becomes infeasible, break out of the loop
-            if m is None:
-                m = m_prev
+            if trap_ctr is None:
+                trap_ctr = m_prev
                 break
             self.history_.append(coef_sparse.T)
             PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1]))
 
             # (m,A) update finished, append the result
-            self.m_history_.append(m)
+            self.m_history_.append(trap_ctr)
             self.A_history_.append(A)
             eigvals, eigvecs = np.linalg.eig(PW)
             self.PW_history_.append(PW)
             self.PWeigs_history_.append(np.sort(eigvals))
-            mPM = np.tensordot(self.PM_, m, axes=([2], [0]))
+            mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0]))
             p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0]))
             self.p_history_.append(p)
 
@@ -825,9 +767,8 @@ def _reduce(self, x, y):
                 self._m_convergence_criterion() < self.tol_m
                 and self._convergence_criterion() < self.tol
             ):
-                # Could not (further) select important features
                 break
-        if k == self.max_iter - 1:
+        else:
             warnings.warn(
                 "TrappingSR3._reduce did not converge after {} iters.".format(
                     self.max_iter
diff --git a/test/conftest.py b/test/conftest.py
index 33e9e1b06..b63d344d0 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -70,7 +70,6 @@ def data_lorenz():
 
 @pytest.fixture
 def data_multiple_trajectories():
-
     n_points = [100, 200, 500]
     initial_conditions = [
         [8, 27, -7],

From ecdd8008383ff15af386375159642213a021ee24 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Fri, 31 May 2024 17:57:08 -0700
Subject: [PATCH 08/11] fix: some function calls

ALso align some comments and organization with trapping-resolve
---
 pysindy/optimizers/trapping_sr3.py | 40 +++++++++++++-----------------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 4afb9a49e..6a83697f5 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -79,7 +79,7 @@ class TrappingSR3(ConstrainedSR3):
 
     eps_solver :
         If threshold != 0, this specifies the error tolerance in the
-        CVXPY (OSQP) solve. Default is 1.0e-3 in OSQP.
+        CVXPY (OSQP) solve. Default 1.0e-7
 
     alpha:
         Determines the strength of the local stability term :math:`||Qijk||^2`
@@ -331,6 +331,8 @@ def __post_init_guard(self):
             raise ValueError("Inequality constraints requires threshold!=0")
         if self.mod_matrix is None:
             self.mod_matrix = np.eye(self._n_tgts)
+        if self.A0 is None:
+            self.A0 = np.diag(self.gamma * np.ones(self._n_tgts))
 
     def set_params(self, **kwargs):
         super().set_params(**kwargs)
@@ -534,7 +536,7 @@ def _objective(self, x, y, coef_sparse, A, PW, k):
         return R2 + stability_term + L1 + alpha_term + beta_term
 
     def _update_coef_sparse_rs(
-        self, r, N, var_len, x_expanded, y, Pmatrix, A, coef_prev
+        self, n_tgts, n_features, var_len, x_expanded, y, Pmatrix, A, coef_prev
     ):
         """Solve coefficient update with CVXPY if threshold != 0"""
         xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y)
@@ -542,11 +544,15 @@ def _update_coef_sparse_rs(
 
         # new terms minimizing quadratic piece ||P^Q @ xi||_2^2
         if self.method == "local":
-            Q = np.reshape(self.PQ_, (r * r * r, N * r), "F")
+            Q = np.reshape(
+                self.PQ_, (n_tgts * n_tgts * n_tgts, n_features * n_tgts), "F"
+            )
             cost = cost + cp.sum_squares(Q @ xi) / self.alpha
-            Q = np.reshape(self.PQ_, (r, r, r, N * r), "F")
+            Q = np.reshape(self.PQ_, (n_tgts, n_tgts, n_tgts, n_features * n_tgts), "F")
             Q_ep = Q + np.transpose(Q, [1, 2, 0, 3]) + np.transpose(Q, [2, 0, 1, 3])
-            Q_ep = np.reshape(Q_ep, (r * r * r, N * r), "F")
+            Q_ep = np.reshape(
+                Q_ep, (n_tgts * n_tgts * n_tgts, n_features * n_tgts), "F"
+            )
             cost = cost + cp.sum_squares(Q_ep @ xi) / self.beta
 
         return self._update_coef_cvxpy(xi, cost, var_len, coef_prev, self.eps_solver)
@@ -647,8 +653,6 @@ def _reduce(self, x, y):
         )
         var_len = n_features * n_tgts
 
-        # Define PL, PQ, PT and PM tensors, only relevant if the stability term in
-        # trapping SINDy is turned on.
         (
             self.PC_,
             self.PL_unsym_,
@@ -681,13 +685,7 @@ def _reduce(self, x, y):
                 " ... {: >8} ... {: >10} ... {: >8}".format(*row)
             )
 
-        # initial A
-        if self.A0 is not None:
-            A = self.A0
-        elif np.any(self.PM_ != 0.0):
-            A = np.diag(self.gamma * np.ones(n_tgts))
-        else:
-            A = np.diag(np.zeros(n_tgts))
+        A = self.A0
         self.A_history_.append(A)
 
         # initial guess for m
@@ -708,7 +706,7 @@ def _reduce(self, x, y):
 
         # if using acceleration
         tk_prev = 1
-        m_prev = trap_ctr
+        trap_prev_ctr = trap_ctr
 
         # Begin optimization loop
         objective_history = []
@@ -726,11 +724,8 @@ def _reduce(self, x, y):
                     n_tgts, n_features, var_len, x_expanded, y, Pmatrix, A, coef_prev
                 )
             else:
-                # if threshold = 0, there is analytic expression
-                # for the solve over the coefficients,
-                # which is coded up here separately
                 coef_sparse = self._update_coef_nonsparse_rs(
-                    Pmatrix, A, coef_prev, xTx, xTy
+                    n_tgts, n_features, Pmatrix, A, coef_prev, xTx, xTy
                 )
 
             # If problem over xi becomes infeasible, break out of the loop
@@ -738,14 +733,13 @@ def _reduce(self, x, y):
                 coef_sparse = coef_prev
                 break
 
-            # Now solve optimization for m and A
-            m_prev, trap_ctr, A, tk_prev = self._solve_m_relax_and_split(
-                n_tgts, n_features, m_prev, trap_ctr, A, coef_sparse, tk_prev
+            trap_prev_ctr, trap_ctr, A, tk_prev = self._solve_m_relax_and_split(
+                n_tgts, n_features, trap_prev_ctr, trap_ctr, A, coef_sparse, tk_prev
             )
 
             # If problem over m becomes infeasible, break out of the loop
             if trap_ctr is None:
-                trap_ctr = m_prev
+                trap_ctr = trap_prev_ctr
                 break
             self.history_.append(coef_sparse.T)
             PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1]))

From 0254032593d693c4667d132f9baa389533ca5877 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Sat, 1 Jun 2024 22:23:36 -0700
Subject: [PATCH 09/11] Add option to make PMatrix using global stability

---
 pysindy/optimizers/trapping_sr3.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 6a83697f5..150339691 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -582,7 +582,7 @@ def _update_coef_nonsparse_rs(
 
         return self._solve_nonsparse_relax_and_split(H, xTy, P_transpose_A, coef_prev)
 
-    def _solve_m_relax_and_split(self, r, N, m_prev, m, A, coef_sparse, tk_previous):
+    def _solve_m_relax_and_split(self, m_prev, m, A, coef_sparse, tk_previous):
         """
         If using the relaxation formulation of trapping SINDy, solves the
         (m, A) algorithm update.
@@ -711,13 +711,16 @@ def _reduce(self, x, y):
         # Begin optimization loop
         objective_history = []
         for k in range(self.max_iter):
+            # update P tensor from the newest trap center
+            if self.method == "global":
+                mPQ = np.tensordot(trap_ctr, self.PQ_, axes=([0], [0]))
+                p = self.PL_ - mPQ
+                Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features)
+            else:
+                mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0]))
+                p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0]))
+                Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features)
 
-            # update P tensor from the newest m
-            mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0]))
-            p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0]))
-            Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features)
-
-            # update w
             coef_prev = coef_sparse
             if (self.threshold > 0.0) or self.inequality_constraints:
                 coef_sparse = self._update_coef_sparse_rs(
@@ -734,7 +737,7 @@ def _reduce(self, x, y):
                 break
 
             trap_prev_ctr, trap_ctr, A, tk_prev = self._solve_m_relax_and_split(
-                n_tgts, n_features, trap_prev_ctr, trap_ctr, A, coef_sparse, tk_prev
+                trap_prev_ctr, trap_ctr, A, coef_sparse, tk_prev
             )
 
             # If problem over m becomes infeasible, break out of the loop

From 70fbb67a11f77ca6b9948c9b63d7d4193f73eda0 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Sat, 1 Jun 2024 22:59:06 -0700
Subject: [PATCH 10/11] update objective to align with trapping-resolve

---
 pysindy/optimizers/trapping_sr3.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 150339691..0333ee841 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -533,7 +533,10 @@ def _objective(self, x, y, coef_sparse, A, PW, k):
                     "{0:5d} ... {1:8.3e} ... {2:8.3e} ... {3:8.2e}"
                     " ... {4:8.2e} ... {5:8.2e} ... {6:8.2e}".format(*row)
                 )
-        return R2 + stability_term + L1 + alpha_term + beta_term
+        if self.method == "global":
+            return 0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1
+        else:
+            return R2 + stability_term + L1 + alpha_term + beta_term
 
     def _update_coef_sparse_rs(
         self, n_tgts, n_features, var_len, x_expanded, y, Pmatrix, A, coef_prev

From 44269653831dca11540a04205db48e4b062ae818 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Sun, 2 Jun 2024 16:12:06 -0700
Subject: [PATCH 11/11] cln: Split global/local math in
 _solve_m_relax_and_split

Also: move m0 initialization to __init__, fix _objective calc
---
 pysindy/optimizers/trapping_sr3.py | 90 +++++++++++++++++++-----------
 1 file changed, 58 insertions(+), 32 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 0333ee841..941aeb3e3 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -21,6 +21,7 @@
 
 AnyFloat = np.dtype[np.floating[NBitBase]]
 Int1D = np.ndarray[tuple[int], np.dtype[np.int_]]
+Float1D = np.ndarray[tuple[int], AnyFloat]
 Float2D = np.ndarray[tuple[int, int], AnyFloat]
 Float3D = np.ndarray[tuple[int, int, int], AnyFloat]
 Float4D = np.ndarray[tuple[int, int, int, int], AnyFloat]
@@ -333,6 +334,9 @@ def __post_init_guard(self):
             self.mod_matrix = np.eye(self._n_tgts)
         if self.A0 is None:
             self.A0 = np.diag(self.gamma * np.ones(self._n_tgts))
+        if self.m0 is None:
+            np.random.seed(1)
+            self.m0 = (np.random.rand(self._n_tgts) - np.ones(self._n_tgts)) * 2
 
     def set_params(self, **kwargs):
         super().set_params(**kwargs)
@@ -533,10 +537,10 @@ def _objective(self, x, y, coef_sparse, A, PW, k):
                     "{0:5d} ... {1:8.3e} ... {2:8.3e} ... {3:8.2e}"
                     " ... {4:8.2e} ... {5:8.2e} ... {6:8.2e}".format(*row)
                 )
-        if self.method == "global":
-            return 0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1
-        else:
-            return R2 + stability_term + L1 + alpha_term + beta_term
+        obj = R2 + stability_term + L1
+        if self.method == "local":
+            obj += alpha_term + beta_term
+        return obj
 
     def _update_coef_sparse_rs(
         self, n_tgts, n_features, var_len, x_expanded, y, Pmatrix, A, coef_prev
@@ -585,35 +589,65 @@ def _update_coef_nonsparse_rs(
 
         return self._solve_nonsparse_relax_and_split(H, xTy, P_transpose_A, coef_prev)
 
-    def _solve_m_relax_and_split(self, m_prev, m, A, coef_sparse, tk_previous):
-        """
-        If using the relaxation formulation of trapping SINDy, solves the
-        (m, A) algorithm update.
+    def _solve_m_relax_and_split(
+        self,
+        trap_ctr_prev: Float1D,
+        trap_ctr: Float1D,
+        A: Float2D,
+        coef_sparse: np.ndarray[tuple[NFeat, NTarget], AnyFloat],
+        tk_previous: float,
+    ) -> tuple[Float1D, Float2D, float]:
+        """Solves the (m, A) algorithm update.
+
+        TODO: explain the optimization this solves, add good names to variables,
+        and refactor/indirect the if global/local trapping conditionals
+
+        Returns the new trap center (m), the new A, and the new acceleration weight
         """
         # prox-grad for (A, m)
         # Accelerated prox gradient descent
+        # Calculate projection matrix from Quad terms to As
         if self.accel:
             tk = (1 + np.sqrt(1 + 4 * tk_previous**2)) / 2.0
-            m_partial = m + (tk_previous - 1.0) / tk * (m - m_prev)
+            m_partial = trap_ctr + (tk_previous - 1.0) / tk * (trap_ctr - trap_ctr_prev)
             tk_previous = tk
-            mPM = np.tensordot(self.PM_, m_partial, axes=([2], [0]))
+            if self.method == "global":
+                mPQ = np.tensordot(m_partial, self.PQ_, axes=([0], [0]))
+            else:
+                mPM = np.tensordot(self.PM_, m_partial, axes=([2], [0]))
         else:
-            mPM = np.tensordot(self.PM_, m, axes=([2], [0]))
-        p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0]))
-        PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1]))
-        PMW = np.tensordot(self.PM_, coef_sparse, axes=([4, 3], [0, 1]))
-        PMW = np.tensordot(self.mod_matrix, PMW, axes=([1], [0]))
+            if self.method == "global":
+                mPQ = np.tensordot(trap_ctr, self.PQ_, axes=([0], [0]))
+            else:
+                mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0]))
+        # Calculate As and its quad term components
+        if self.method == "global":
+            p = self.PL_ - mPQ
+            PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1]))
+            PQW = np.tensordot(self.PQ_, coef_sparse, axes=([4, 3], [0, 1]))
+        else:
+            p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0]))
+            PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1]))
+            PMW = np.tensordot(self.PM_, coef_sparse, axes=([4, 3], [0, 1]))
+            PMW = np.tensordot(self.mod_matrix, PMW, axes=([1], [0]))
+        # Calculate error in quadratic balance, and adjust trap center
         A_b = (A - PW) / self.eta
-        PMT_PW = np.tensordot(PMW, A_b, axes=([2, 1], [0, 1]))
-        if self.accel:
-            m_new = m_partial - self.alpha_m * PMT_PW
+        if self.method == "global":
+            PQWT_PW = np.tensordot(PQW, A_b, axes=([2, 1], [0, 1]))
+            if self.accel:
+                trap_new = m_partial - self.alpha_m * PQWT_PW
+            else:
+                trap_new = trap_ctr_prev - self.alpha_m * PQWT_PW
         else:
-            m_new = m_prev - self.alpha_m * PMT_PW
-        m_current = m_new
+            PMT_PW = np.tensordot(PMW, A_b, axes=([2, 1], [0, 1]))
+            if self.accel:
+                trap_new = m_partial - self.alpha_m * PMT_PW
+            else:
+                trap_new = trap_ctr_prev - self.alpha_m * PMT_PW
 
         # Update A
         A_new = self._update_A(A - self.alpha_A * A_b, PW)
-        return m_current, m_new, A_new, tk_previous
+        return trap_new, A_new, tk_previous
 
     def _solve_nonsparse_relax_and_split(self, H, xTy, P_transpose_A, coef_prev):
         """Update for the coefficients if threshold = 0."""
@@ -668,7 +702,7 @@ def _reduce(self, x, y):
         # Set initial coefficients
         if self.use_constraints and self.constraint_order.lower() == "target":
             self.constraint_lhs = reorder_constraints(
-                self.constraint_lhs, n_features, output_order="target"
+                self.constraint_lhs, n_features, output_order="feature"
             )
         coef_sparse: np.ndarray[tuple[NFeat, NTarget], AnyFloat] = self.coef_.T
 
@@ -690,13 +724,7 @@ def _reduce(self, x, y):
 
         A = self.A0
         self.A_history_.append(A)
-
-        # initial guess for m
-        if self.m0 is not None:
-            trap_ctr = self.m0
-        else:
-            np.random.seed(1)
-            trap_ctr = (np.random.rand(n_tgts) - np.ones(n_tgts)) * 2
+        trap_ctr = self.m0
         self.m_history_.append(trap_ctr)
 
         # Precompute some objects for optimization
@@ -723,6 +751,7 @@ def _reduce(self, x, y):
                 mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0]))
                 p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0]))
                 Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features)
+            self.p_history_.append(p)
 
             coef_prev = coef_sparse
             if (self.threshold > 0.0) or self.inequality_constraints:
@@ -756,9 +785,6 @@ def _reduce(self, x, y):
             eigvals, eigvecs = np.linalg.eig(PW)
             self.PW_history_.append(PW)
             self.PWeigs_history_.append(np.sort(eigvals))
-            mPM = np.tensordot(self.PM_, trap_ctr, axes=([2], [0]))
-            p = np.tensordot(self.mod_matrix, self.PL_ + mPM, axes=([1], [0]))
-            self.p_history_.append(p)
 
             # update objective
             objective_history.append(self._objective(x, y, coef_sparse, A, PW, k))