Skip to content

Commit

Permalink
small update due to numpy deprecation warning
Browse files Browse the repository at this point in the history
  • Loading branch information
FilippoAiraldi committed Apr 12, 2024
1 parent e5ae7bf commit b8ba3e5
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 15 deletions.
13 changes: 8 additions & 5 deletions examples/dpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,14 @@ def reset(
def get_stage_cost(self, state: npt.NDArray[np.floating], action: float) -> float:
"""Computes the stage cost `L(s,a)`."""
lb, ub = self.x_bnd
return 0.5 * float(
np.square(state).sum()
+ 0.5 * action**2
+ self.w.T @ np.maximum(0, lb - state)
+ self.w.T @ np.maximum(0, state - ub)
return (
0.5
* (
np.square(state).sum()
+ 0.5 * action**2
+ self.w.T @ np.maximum(0, lb - state)
+ self.w.T @ np.maximum(0, state - ub)
).item()
)

def step(
Expand Down
13 changes: 8 additions & 5 deletions examples/q_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,14 @@ def reset(
def get_stage_cost(self, state: npt.NDArray[np.floating], action: float) -> float:
"""Computes the stage cost `L(s,a)`."""
lb, ub = self.x_bnd
return 0.5 * float(
np.square(state).sum()
+ 0.5 * action**2
+ self.w.T @ np.maximum(0, lb - state)
+ self.w.T @ np.maximum(0, state - ub)
return (
0.5
* (
np.square(state).sum()
+ 0.5 * action**2
+ self.w.T @ np.maximum(0, lb - state)
+ self.w.T @ np.maximum(0, state - ub)
).item()
)

def step(
Expand Down
13 changes: 8 additions & 5 deletions examples/q_learning_offpolicy.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,14 @@ def reset(
def get_stage_cost(self, state: npt.NDArray[np.floating], action: float) -> float:
"""Computes the stage cost `L(s,a)`."""
lb, ub = self.x_bnd
return 0.5 * float(
np.square(state).sum()
+ 0.5 * action**2
+ self.w.T @ np.maximum(0, lb - state)
+ self.w.T @ np.maximum(0, state - ub)
return (
0.5
* (
np.square(state).sum()
+ 0.5 * action**2
+ self.w.T @ np.maximum(0, lb - state)
+ self.w.T @ np.maximum(0, state - ub)
).item()
)

def step(
Expand Down

0 comments on commit b8ba3e5

Please sign in to comment.