diff --git a/src/mpcrl/agents/lstd_dpg.py b/src/mpcrl/agents/lstd_dpg.py index 0b86567..dbe2123 100644 --- a/src/mpcrl/agents/lstd_dpg.py +++ b/src/mpcrl/agents/lstd_dpg.py @@ -246,8 +246,8 @@ def train_one_episode( # computed with the solution of unpertubed MPC (i.e., sol_opt). # According to Gros and Zanon [2], it is hinted that the perturbed # solution should be used instead (sol). - exploration = (action - action_opt).full() - sol_vals = sol_opt.all_vals.full() + exploration = np.asarray((action - action_opt).elements()) + sol_vals = np.asarray(sol_opt.all_vals.elements()) self._rollout.append((state, exploration, cost, state_new, sol_vals)) else: status = f"{sol.status}/{sol_opt.status}" @@ -310,8 +310,7 @@ def _init_sensitivity(self, linsolver: str) -> Callable[[cs.DM, int], np.ndarray # >2 dims, so dpidtheta gets squished in the 3rd dim and needs reshaping def func(sol_values: cs.DM, N: int) -> np.ndarray: return ( - sensitivity(sol_values.T) - .full() + np.ascontiguousarray(sensitivity(sol_values.T).elements()) .reshape(ntheta, na, N, order="F") .transpose((2, 0, 1)) ) @@ -320,19 +319,19 @@ def func(sol_values: cs.DM, N: int) -> np.ndarray: def _consolidate_rollout_into_memory(self) -> None: """Internal utility to compact current rollout into a single item in memory.""" - # convert to arrays + # convert rollout to arrays and clear it N, S, E, L, vals = _consolidate_rollout(self._rollout, self._V.ns, self._V.na) + self._rollout.clear() # compute Phi, dpidtheta, Psi, and CAFA weight v - Phi = np.ascontiguousarray(self._Phi(S.T).full().T) + Phi = np.ascontiguousarray(self._Phi(S.T).elements()).reshape(N + 1, -1) dpidtheta = self._sensitivity(vals, N) Psi = (dpidtheta @ E).reshape(N, dpidtheta.shape[1]) R = self.ridge_regression_regularization v = _compute_cafa_weight_v(Phi, L, self.discount_factor, R) - # save to memory and clear rollout + # save to experience self.store_experience((L, Phi, Psi, dpidtheta, v)) - self._rollout.clear() if self.policy_performances is not None: self.policy_performances.append(L.sum()) @@ -354,7 +353,7 @@ def _consolidate_rollout( S[i] = s.reshape(-1) E[i] = e L[i] = cost - sol_vals[i] = sol_val.reshape(-1) + sol_vals[i] = sol_val S[-1] = rollout[-1][3].reshape(-1) return N, S, E, L, sol_vals diff --git a/src/mpcrl/agents/lstd_q_learning.py b/src/mpcrl/agents/lstd_q_learning.py index f7ffbea..23a76b8 100644 --- a/src/mpcrl/agents/lstd_q_learning.py +++ b/src/mpcrl/agents/lstd_q_learning.py @@ -161,7 +161,7 @@ def update(self) -> Optional[str]: hessians.append(H) gradient = np.mean(gradients, 0) hessian = np.mean(hessians, 0) if self.hessian_type != "none" else None - return self._do_gradient_update(gradient.reshape(-1), hessian) + return self._do_gradient_update(gradient, hessian) def train_one_episode( self, @@ -237,7 +237,7 @@ def _init_sensitivity( # wrap to conveniently return numpy arrays def func(sol_values: cs.DM) -> tuple[np.ndarray, np.ndarray]: dQ, ddQ = sensitivity(sol_values) - return dQ.full().reshape(-1, 1), ddQ.full() + return np.asarray(dQ.elements()), ddQ.toarray() return func @@ -252,7 +252,11 @@ def _try_store_experience( dQ, ddQ = self._sensitivity(sol_values) td_error = cost + self.discount_factor * solV.f - solQ.f g = -td_error * dQ - H = (dQ @ dQ.T - td_error * ddQ) if self.hessian_type != "none" else np.nan + H = ( + (np.multiply.outer(dQ, dQ) - td_error * ddQ) + if self.hessian_type != "none" + else np.nan + ) self.store_experience((g, H)) success = True else: diff --git a/src/mpcrl/optim/adam.py b/src/mpcrl/optim/adam.py index bad1f4e..93beba0 100644 --- a/src/mpcrl/optim/adam.py +++ b/src/mpcrl/optim/adam.py @@ -122,7 +122,7 @@ def _first_order_update( return theta + dtheta, None lbx, ubx = self._get_update_bounds(theta) sol = solver(h=cs.DM.eye(theta.shape[0]), g=-dtheta, lbx=lbx, ubx=ubx) - dtheta = sol["x"].full().reshape(-1) + dtheta = np.asarray(sol["x"].elements()) stats = solver.stats() return theta + dtheta, None if stats["success"] else stats["return_status"] diff --git a/src/mpcrl/optim/gradient_descent.py b/src/mpcrl/optim/gradient_descent.py index 041fdca..646cbae 100644 --- a/src/mpcrl/optim/gradient_descent.py +++ b/src/mpcrl/optim/gradient_descent.py @@ -94,7 +94,7 @@ def _first_order_update( return theta + dtheta, None lbx, ubx = self._get_update_bounds(theta) sol = solver(h=cs.DM.eye(theta.shape[0]), g=-dtheta, lbx=lbx, ubx=ubx) - dtheta = sol["x"].full().reshape(-1) + dtheta = np.asarray(sol["x"].elements()) stats = solver.stats() return theta + dtheta, None if stats["success"] else stats["return_status"] diff --git a/src/mpcrl/optim/newton_method.py b/src/mpcrl/optim/newton_method.py index 40a1106..52676b0 100644 --- a/src/mpcrl/optim/newton_method.py +++ b/src/mpcrl/optim/newton_method.py @@ -93,7 +93,7 @@ def _second_order_update( ) lbx, ubx = self._get_update_bounds(theta) sol = solver(h=H, g=G, lbx=lbx, ubx=ubx) - dtheta = sol["x"].full().reshape(-1) + dtheta = np.asarray(sol["x"].elements()) stats = solver.stats() return theta + dtheta, None if stats["success"] else stats["return_status"] diff --git a/src/mpcrl/optim/rmsprop.py b/src/mpcrl/optim/rmsprop.py index defffbc..d33f082 100644 --- a/src/mpcrl/optim/rmsprop.py +++ b/src/mpcrl/optim/rmsprop.py @@ -112,7 +112,7 @@ def _first_order_update( return theta + dtheta, None lbx, ubx = self._get_update_bounds(theta) sol = solver(h=cs.DM.eye(theta.shape[0]), g=-dtheta, lbx=lbx, ubx=ubx) - dtheta = sol["x"].full().reshape(-1) + dtheta = np.asarray(sol["x"].elements()) stats = solver.stats() return theta + dtheta, None if stats["success"] else stats["return_status"]