small update due to numpy deprecation warning

FilippoAiraldi · Apr 12, 2024 · b8ba3e5 · b8ba3e5
1 parent e5ae7bf
commit b8ba3e5
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 15 deletions.
diff --git a/examples/dpg.py b/examples/dpg.py
@@ -58,11 +58,14 @@ def reset(
     def get_stage_cost(self, state: npt.NDArray[np.floating], action: float) -> float:
         """Computes the stage cost `L(s,a)`."""
         lb, ub = self.x_bnd
-        return 0.5 * float(
-            np.square(state).sum()
-            + 0.5 * action**2
-            + self.w.T @ np.maximum(0, lb - state)
-            + self.w.T @ np.maximum(0, state - ub)
+        return (
+            0.5
+            * (
+                np.square(state).sum()
+                + 0.5 * action**2
+                + self.w.T @ np.maximum(0, lb - state)
+                + self.w.T @ np.maximum(0, state - ub)
+            ).item()
         )
 
     def step(

diff --git a/examples/q_learning.py b/examples/q_learning.py
@@ -52,11 +52,14 @@ def reset(
     def get_stage_cost(self, state: npt.NDArray[np.floating], action: float) -> float:
         """Computes the stage cost `L(s,a)`."""
         lb, ub = self.x_bnd
-        return 0.5 * float(
-            np.square(state).sum()
-            + 0.5 * action**2
-            + self.w.T @ np.maximum(0, lb - state)
-            + self.w.T @ np.maximum(0, state - ub)
+        return (
+            0.5
+            * (
+                np.square(state).sum()
+                + 0.5 * action**2
+                + self.w.T @ np.maximum(0, lb - state)
+                + self.w.T @ np.maximum(0, state - ub)
+            ).item()
         )
 
     def step(

diff --git a/examples/q_learning_offpolicy.py b/examples/q_learning_offpolicy.py
@@ -54,11 +54,14 @@ def reset(
     def get_stage_cost(self, state: npt.NDArray[np.floating], action: float) -> float:
         """Computes the stage cost `L(s,a)`."""
         lb, ub = self.x_bnd
-        return 0.5 * float(
-            np.square(state).sum()
-            + 0.5 * action**2
-            + self.w.T @ np.maximum(0, lb - state)
-            + self.w.T @ np.maximum(0, state - ub)
+        return (
+            0.5
+            * (
+                np.square(state).sum()
+                + 0.5 * action**2
+                + self.w.T @ np.maximum(0, lb - state)
+                + self.w.T @ np.maximum(0, state - ub)
+            ).item()
         )
 
     def step(