replaced full with faster elements+asarray or toarray

FilippoAiraldi · Oct 25, 2023 · 18d783d · 18d783d
1 parent 33b469d
commit 18d783d
Show file tree

Hide file tree

Showing 6 changed files with 19 additions and 16 deletions.
diff --git a/src/mpcrl/agents/lstd_dpg.py b/src/mpcrl/agents/lstd_dpg.py
@@ -246,8 +246,8 @@ def train_one_episode(
                 # computed with the solution of unpertubed MPC (i.e., sol_opt).
                 # According to Gros and Zanon [2], it is hinted that the perturbed
                 # solution should be used instead (sol).
-                exploration = (action - action_opt).full()
-                sol_vals = sol_opt.all_vals.full()
+                exploration = np.asarray((action - action_opt).elements())
+                sol_vals = np.asarray(sol_opt.all_vals.elements())
                 self._rollout.append((state, exploration, cost, state_new, sol_vals))
             else:
                 status = f"{sol.status}/{sol_opt.status}"
@@ -310,8 +310,7 @@ def _init_sensitivity(self, linsolver: str) -> Callable[[cs.DM, int], np.ndarray
         # >2 dims, so dpidtheta gets squished in the 3rd dim and needs reshaping
         def func(sol_values: cs.DM, N: int) -> np.ndarray:
             return (
-                sensitivity(sol_values.T)
-                .full()
+                np.ascontiguousarray(sensitivity(sol_values.T).elements())
                 .reshape(ntheta, na, N, order="F")
                 .transpose((2, 0, 1))
             )
@@ -320,19 +319,19 @@ def func(sol_values: cs.DM, N: int) -> np.ndarray:
 
     def _consolidate_rollout_into_memory(self) -> None:
         """Internal utility to compact current rollout into a single item in memory."""
-        # convert to arrays
+        # convert rollout to arrays and clear it
         N, S, E, L, vals = _consolidate_rollout(self._rollout, self._V.ns, self._V.na)
+        self._rollout.clear()
 
         # compute Phi, dpidtheta, Psi, and CAFA weight v
-        Phi = np.ascontiguousarray(self._Phi(S.T).full().T)
+        Phi = np.ascontiguousarray(self._Phi(S.T).elements()).reshape(N + 1, -1)
         dpidtheta = self._sensitivity(vals, N)
         Psi = (dpidtheta @ E).reshape(N, dpidtheta.shape[1])
         R = self.ridge_regression_regularization
         v = _compute_cafa_weight_v(Phi, L, self.discount_factor, R)
 
-        # save to memory and clear rollout
+        # save to experience
         self.store_experience((L, Phi, Psi, dpidtheta, v))
-        self._rollout.clear()
         if self.policy_performances is not None:
             self.policy_performances.append(L.sum())
 
@@ -354,7 +353,7 @@ def _consolidate_rollout(
         S[i] = s.reshape(-1)
         E[i] = e
         L[i] = cost
-        sol_vals[i] = sol_val.reshape(-1)
+        sol_vals[i] = sol_val
     S[-1] = rollout[-1][3].reshape(-1)
     return N, S, E, L, sol_vals
 

diff --git a/src/mpcrl/agents/lstd_q_learning.py b/src/mpcrl/agents/lstd_q_learning.py
@@ -161,7 +161,7 @@ def update(self) -> Optional[str]:
             hessians.append(H)
         gradient = np.mean(gradients, 0)
         hessian = np.mean(hessians, 0) if self.hessian_type != "none" else None
-        return self._do_gradient_update(gradient.reshape(-1), hessian)
+        return self._do_gradient_update(gradient, hessian)
 
     def train_one_episode(
         self,
@@ -237,7 +237,7 @@ def _init_sensitivity(
         # wrap to conveniently return numpy arrays
         def func(sol_values: cs.DM) -> tuple[np.ndarray, np.ndarray]:
             dQ, ddQ = sensitivity(sol_values)
-            return dQ.full().reshape(-1, 1), ddQ.full()
+            return np.asarray(dQ.elements()), ddQ.toarray()
 
         return func
 
@@ -252,7 +252,11 @@ def _try_store_experience(
             dQ, ddQ = self._sensitivity(sol_values)
             td_error = cost + self.discount_factor * solV.f - solQ.f
             g = -td_error * dQ
-            H = (dQ @ dQ.T - td_error * ddQ) if self.hessian_type != "none" else np.nan
+            H = (
+                (np.multiply.outer(dQ, dQ) - td_error * ddQ)
+                if self.hessian_type != "none"
+                else np.nan
+            )
             self.store_experience((g, H))
             success = True
         else:

diff --git a/src/mpcrl/optim/adam.py b/src/mpcrl/optim/adam.py
@@ -122,7 +122,7 @@ def _first_order_update(
             return theta + dtheta, None
         lbx, ubx = self._get_update_bounds(theta)
         sol = solver(h=cs.DM.eye(theta.shape[0]), g=-dtheta, lbx=lbx, ubx=ubx)
-        dtheta = sol["x"].full().reshape(-1)
+        dtheta = np.asarray(sol["x"].elements())
         stats = solver.stats()
         return theta + dtheta, None if stats["success"] else stats["return_status"]
 

diff --git a/src/mpcrl/optim/gradient_descent.py b/src/mpcrl/optim/gradient_descent.py
@@ -94,7 +94,7 @@ def _first_order_update(
             return theta + dtheta, None
         lbx, ubx = self._get_update_bounds(theta)
         sol = solver(h=cs.DM.eye(theta.shape[0]), g=-dtheta, lbx=lbx, ubx=ubx)
-        dtheta = sol["x"].full().reshape(-1)
+        dtheta = np.asarray(sol["x"].elements())
         stats = solver.stats()
         return theta + dtheta, None if stats["success"] else stats["return_status"]
 

diff --git a/src/mpcrl/optim/newton_method.py b/src/mpcrl/optim/newton_method.py
@@ -93,7 +93,7 @@ def _second_order_update(
         )
         lbx, ubx = self._get_update_bounds(theta)
         sol = solver(h=H, g=G, lbx=lbx, ubx=ubx)
-        dtheta = sol["x"].full().reshape(-1)
+        dtheta = np.asarray(sol["x"].elements())
         stats = solver.stats()
         return theta + dtheta, None if stats["success"] else stats["return_status"]
 

diff --git a/src/mpcrl/optim/rmsprop.py b/src/mpcrl/optim/rmsprop.py
@@ -112,7 +112,7 @@ def _first_order_update(
             return theta + dtheta, None
         lbx, ubx = self._get_update_bounds(theta)
         sol = solver(h=cs.DM.eye(theta.shape[0]), g=-dtheta, lbx=lbx, ubx=ubx)
-        dtheta = sol["x"].full().reshape(-1)
+        dtheta = np.asarray(sol["x"].elements())
         stats = solver.stats()
         return theta + dtheta, None if stats["success"] else stats["return_status"]