Skip to content

Commit

Permalink
replaced full with faster elements+asarray or toarray
Browse files Browse the repository at this point in the history
  • Loading branch information
FilippoAiraldi committed Oct 25, 2023
1 parent 33b469d commit 18d783d
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 16 deletions.
17 changes: 8 additions & 9 deletions src/mpcrl/agents/lstd_dpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,8 @@ def train_one_episode(
# computed with the solution of unpertubed MPC (i.e., sol_opt).
# According to Gros and Zanon [2], it is hinted that the perturbed
# solution should be used instead (sol).
exploration = (action - action_opt).full()
sol_vals = sol_opt.all_vals.full()
exploration = np.asarray((action - action_opt).elements())
sol_vals = np.asarray(sol_opt.all_vals.elements())
self._rollout.append((state, exploration, cost, state_new, sol_vals))
else:
status = f"{sol.status}/{sol_opt.status}"
Expand Down Expand Up @@ -310,8 +310,7 @@ def _init_sensitivity(self, linsolver: str) -> Callable[[cs.DM, int], np.ndarray
# >2 dims, so dpidtheta gets squished in the 3rd dim and needs reshaping
def func(sol_values: cs.DM, N: int) -> np.ndarray:
return (
sensitivity(sol_values.T)
.full()
np.ascontiguousarray(sensitivity(sol_values.T).elements())
.reshape(ntheta, na, N, order="F")
.transpose((2, 0, 1))
)
Expand All @@ -320,19 +319,19 @@ def func(sol_values: cs.DM, N: int) -> np.ndarray:

def _consolidate_rollout_into_memory(self) -> None:
"""Internal utility to compact current rollout into a single item in memory."""
# convert to arrays
# convert rollout to arrays and clear it
N, S, E, L, vals = _consolidate_rollout(self._rollout, self._V.ns, self._V.na)
self._rollout.clear()

# compute Phi, dpidtheta, Psi, and CAFA weight v
Phi = np.ascontiguousarray(self._Phi(S.T).full().T)
Phi = np.ascontiguousarray(self._Phi(S.T).elements()).reshape(N + 1, -1)
dpidtheta = self._sensitivity(vals, N)
Psi = (dpidtheta @ E).reshape(N, dpidtheta.shape[1])
R = self.ridge_regression_regularization
v = _compute_cafa_weight_v(Phi, L, self.discount_factor, R)

# save to memory and clear rollout
# save to experience
self.store_experience((L, Phi, Psi, dpidtheta, v))
self._rollout.clear()
if self.policy_performances is not None:
self.policy_performances.append(L.sum())

Expand All @@ -354,7 +353,7 @@ def _consolidate_rollout(
S[i] = s.reshape(-1)
E[i] = e
L[i] = cost
sol_vals[i] = sol_val.reshape(-1)
sol_vals[i] = sol_val
S[-1] = rollout[-1][3].reshape(-1)
return N, S, E, L, sol_vals

Expand Down
10 changes: 7 additions & 3 deletions src/mpcrl/agents/lstd_q_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def update(self) -> Optional[str]:
hessians.append(H)
gradient = np.mean(gradients, 0)
hessian = np.mean(hessians, 0) if self.hessian_type != "none" else None
return self._do_gradient_update(gradient.reshape(-1), hessian)
return self._do_gradient_update(gradient, hessian)

def train_one_episode(
self,
Expand Down Expand Up @@ -237,7 +237,7 @@ def _init_sensitivity(
# wrap to conveniently return numpy arrays
def func(sol_values: cs.DM) -> tuple[np.ndarray, np.ndarray]:
dQ, ddQ = sensitivity(sol_values)
return dQ.full().reshape(-1, 1), ddQ.full()
return np.asarray(dQ.elements()), ddQ.toarray()

return func

Expand All @@ -252,7 +252,11 @@ def _try_store_experience(
dQ, ddQ = self._sensitivity(sol_values)
td_error = cost + self.discount_factor * solV.f - solQ.f
g = -td_error * dQ
H = (dQ @ dQ.T - td_error * ddQ) if self.hessian_type != "none" else np.nan
H = (
(np.multiply.outer(dQ, dQ) - td_error * ddQ)
if self.hessian_type != "none"
else np.nan
)
self.store_experience((g, H))
success = True
else:
Expand Down
2 changes: 1 addition & 1 deletion src/mpcrl/optim/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def _first_order_update(
return theta + dtheta, None
lbx, ubx = self._get_update_bounds(theta)
sol = solver(h=cs.DM.eye(theta.shape[0]), g=-dtheta, lbx=lbx, ubx=ubx)
dtheta = sol["x"].full().reshape(-1)
dtheta = np.asarray(sol["x"].elements())
stats = solver.stats()
return theta + dtheta, None if stats["success"] else stats["return_status"]

Expand Down
2 changes: 1 addition & 1 deletion src/mpcrl/optim/gradient_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def _first_order_update(
return theta + dtheta, None
lbx, ubx = self._get_update_bounds(theta)
sol = solver(h=cs.DM.eye(theta.shape[0]), g=-dtheta, lbx=lbx, ubx=ubx)
dtheta = sol["x"].full().reshape(-1)
dtheta = np.asarray(sol["x"].elements())
stats = solver.stats()
return theta + dtheta, None if stats["success"] else stats["return_status"]

Expand Down
2 changes: 1 addition & 1 deletion src/mpcrl/optim/newton_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def _second_order_update(
)
lbx, ubx = self._get_update_bounds(theta)
sol = solver(h=H, g=G, lbx=lbx, ubx=ubx)
dtheta = sol["x"].full().reshape(-1)
dtheta = np.asarray(sol["x"].elements())
stats = solver.stats()
return theta + dtheta, None if stats["success"] else stats["return_status"]

Expand Down
2 changes: 1 addition & 1 deletion src/mpcrl/optim/rmsprop.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def _first_order_update(
return theta + dtheta, None
lbx, ubx = self._get_update_bounds(theta)
sol = solver(h=cs.DM.eye(theta.shape[0]), g=-dtheta, lbx=lbx, ubx=ubx)
dtheta = sol["x"].full().reshape(-1)
dtheta = np.asarray(sol["x"].elements())
stats = solver.stats()
return theta + dtheta, None if stats["success"] else stats["return_status"]

Expand Down

0 comments on commit 18d783d

Please sign in to comment.