diff --git a/README.md b/README.md
index cc66866..b802296 100644
--- a/README.md
+++ b/README.md
@@ -39,8 +39,8 @@ All codes have been saved as a .ipynb file and a .html file in the same director
 | 10 | [LunarLanderContinuous-v2](https://zhiqingxiao.github.io/rl-book/en2023/code/LunarLanderContinuous-v2_ClosedForm.html) | SACwA [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/LunarLanderContinuous-v2_SACwA_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/LunarLanderContinuous-v2_SACwA_torch.html) |
 | 11 | [BipedalWalker-v3](https://zhiqingxiao.github.io/rl-book/en2023/code/BipedalWalker-v3_ClosedForm.html) | [ES](https://zhiqingxiao.github.io/rl-book/en2023/code/BipedalWalker-v3_ES.html), [ARS](https://zhiqingxiao.github.io/rl-book/en2023/code/BipedalWalker-v3_ARS.html) |
 | 12 | [PongNoFrameskip-v4](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_ClosedForm.html) | CategoricalDQN [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_CategoricalDQN_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_CategoricalDQN_torch.html), QR-DQN [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_QRDQN_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_QRDQN_torch.html), IQN [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_IQN_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_IQN_torch.html) |
-| 13 | [BernoulliMAB-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv_demo.html) | [UCB](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv_demo.html) |
-| 13 | [GaussianMAB-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv_demo.html) | [UCB](https://zhiqingxiao.github.io/rl-book/en2023/code/GaussianMABEnv_demo.html) |
+| 13 | [BernoulliMAB-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv-v0_demo.html) | [UCB](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv-v0_demo.html) |
+| 13 | [GaussianMAB-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/GaussianMABEnv_demo.html) | [UCB](https://zhiqingxiao.github.io/rl-book/en2023/code/GaussianMABEnv_demo.html) |
 | 14 | [TicTacToe-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/TicTacToe-v0_ExhaustiveSearch.html) | AlphaZero [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/TicTacToe-v0_AlphaZero_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/TicTacToe-v0_AlphaZero_torch.html)  |
 | 15 note | [HumanoidBulletEnv-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_ClosedForm_demo.html) | BehaviorClone [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_BC_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_BC_torch.html), GAIL [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_GAILPPO_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_GAILPPO_torch.html) |
 | 16 | [Tiger-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/Tiger-v0_ClosedForm.html) | [VI](https://zhiqingxiao.github.io/rl-book/en2023/code/Tiger-v0_Plan_demo.html)
diff --git a/en2023/README.md b/en2023/README.md
index 7f7f3e2..15567ad 100644
--- a/en2023/README.md
+++ b/en2023/README.md
@@ -66,8 +66,8 @@ List view: [link](https://github.com/zhiqingxiao/rl-book/blob/master/en2023/code
 | 10 | [LunarLanderContinuous-v2](https://zhiqingxiao.github.io/rl-book/en2023/code/LunarLanderContinuous-v2_ClosedForm.html) | SACwA [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/LunarLanderContinuous-v2_SACwA_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/LunarLanderContinuous-v2_SACwA_torch.html) |
 | 11 | [BipedalWalker-v3](https://zhiqingxiao.github.io/rl-book/en2023/code/BipedalWalker-v3_ClosedForm.html) | [ES](https://zhiqingxiao.github.io/rl-book/en2023/code/BipedalWalker-v3_ES.html), [ARS](https://zhiqingxiao.github.io/rl-book/en2023/code/BipedalWalker-v3_ARS.html) |
 | 12 | [PongNoFrameskip-v4](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_ClosedForm.html) | CategoricalDQN [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_CategoricalDQN_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_CategoricalDQN_torch.html), QR-DQN [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_QRDQN_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_QRDQN_torch.html), IQN [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_IQN_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_IQN_torch.html) |
-| 13 | [BernoulliMAB-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv_demo.html) | [UCB](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv_demo.html) |
-| 13 | [GaussianMAB-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv_demo.html) | [UCB](https://zhiqingxiao.github.io/rl-book/en2023/code/GaussianMABEnv_demo.html) |
+| 13 | [BernoulliMAB-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv-v0_demo.html) | [UCB](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv-v0_demo.html) |
+| 13 | [GaussianMAB-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/GaussianMABEnv_demo.html) | [UCB](https://zhiqingxiao.github.io/rl-book/en2023/code/GaussianMABEnv_demo.html) |
 | 14 | [TicTacToe-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/TicTacToe-v0_ExhaustiveSearch.html) | AlphaZero [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/TicTacToe-v0_AlphaZero_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/TicTacToe-v0_AlphaZero_torch.html)  |
 | 15 note | [HumanoidBulletEnv-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_ClosedForm_demo.html) | BehaviorClone [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_BC_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_BC_torch.html), GAIL [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_GAILPPO_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_GAILPPO_torch.html) |
 | 16 | [Tiger-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/Tiger-v0_ClosedForm.html) | [VI](https://zhiqingxiao.github.io/rl-book/en2023/code/Tiger-v0_Plan_demo.html)
diff --git a/en2023/code/BernoulliMABEnv_demo.html b/en2023/code/BernoulliMABEnv-v0_demo.html
similarity index 100%
rename from en2023/code/BernoulliMABEnv_demo.html
rename to en2023/code/BernoulliMABEnv-v0_demo.html
diff --git a/en2023/code/BernoulliMABEnv_demo.ipynb b/en2023/code/BernoulliMABEnv-v0_demo.ipynb
similarity index 100%
rename from en2023/code/BernoulliMABEnv_demo.ipynb
rename to en2023/code/BernoulliMABEnv-v0_demo.ipynb
diff --git a/zh2023/README.md b/zh2023/README.md
index 1972940..7a4ca21 100644
--- a/zh2023/README.md
+++ b/zh2023/README.md
@@ -32,8 +32,8 @@
 | 10 | [LunarLanderContinuous-v2](https://zhiqingxiao.github.io/rl-book/en2023/code/LunarLanderContinuous-v2_ClosedForm.html) | SACwA [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/LunarLanderContinuous-v2_SACwA_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/LunarLanderContinuous-v2_SACwA_torch.html) |
 | 11 | [BipedalWalker-v3](https://zhiqingxiao.github.io/rl-book/en2023/code/BipedalWalker-v3_ClosedForm.html) | [ES](https://zhiqingxiao.github.io/rl-book/en2023/code/BipedalWalker-v3_ES.html), [ARS](https://zhiqingxiao.github.io/rl-book/en2023/code/BipedalWalker-v3_ARS.html) |
 | 12 | [PongNoFrameskip-v4](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_ClosedForm.html) | CategoricalDQN [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_CategoricalDQN_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_CategoricalDQN_torch.html), QR-DQN [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_QRDQN_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_QRDQN_torch.html), IQN [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_IQN_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/PongNoFrameskip-v4_IQN_torch.html) |
-| 13 | [BernoulliMAB-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv_demo.html) | [UCB](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv_demo.html) |
-| 13 | [GaussianMAB-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv_demo.html) | [UCB](https://zhiqingxiao.github.io/rl-book/en2023/code/GaussianMABEnv_demo.html) |
+| 13 | [BernoulliMAB-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv-v0_demo.html) | [UCB](https://zhiqingxiao.github.io/rl-book/en2023/code/BernoulliMABEnv-v0_demo.html) |
+| 13 | [GaussianMAB-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/GaussianMABEnv_demo.html) | [UCB](https://zhiqingxiao.github.io/rl-book/en2023/code/GaussianMABEnv_demo.html) |
 | 14 | [TicTacToe-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/TicTacToe-v0_ExhaustiveSearch.html) | AlphaZero [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/TicTacToe-v0_AlphaZero_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/TicTacToe-v0_AlphaZero_torch.html)  |
 | 15 注 | [HumanoidBulletEnv-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_ClosedForm_demo.html) | BehaviorClone [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_BC_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_BC_torch.html), GAIL [tf](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_GAILPPO_tf.html) [torch](https://zhiqingxiao.github.io/rl-book/en2023/code/HumanoidBulletEnv-v0_GAILPPO_torch.html) |
 | 16 | [Tiger-v0](https://zhiqingxiao.github.io/rl-book/en2023/code/Tiger-v0_ClosedForm.html) | [VI](https://zhiqingxiao.github.io/rl-book/en2023/code/Tiger-v0_Plan_demo.html)
diff --git a/zh2023/errata/202307.md b/zh2023/errata/202307.md
index 737c024..079dfc9 100644
--- a/zh2023/errata/202307.md
+++ b/zh2023/errata/202307.md
@@ -50,7 +50,27 @@ $\rho_{t+1:t+n-1}=\frac{\Pr_\pi\left[R_{t+1},\mathsfit{S}_{t+1},\mathsfit{A}_{t+
 $\rho_{t+1:t+n-1}=\frac{\Pr_\pi\left[R_{t+1},\mathsfit{S}_{t+1},\mathsfit{A}_{t+1},\ldots,\mathsfit{S}_{t+n}\mid\mathsfit{S}_t,\mathsfit{A}_t\right]}{\Pr_b\left[R_{t+1},\mathsfit{S}_{t+1},\mathsfit{A}_{t+1},\ldots,\mathsfit{S}_{t+n}\mid\mathsfit{S}_t,\mathsfit{A}_t\right]}=\prod\limits_{\tau=t+1}^{t+n-1}{\frac{\pi\left(\mathsfit{A}_\tau\mid\mathsfit{S}_\tau\right)}{b\left(\mathsfit{A}_\tau\mid\mathsfit{S}_\tau\right)}}$
 
 
-## 第288页代码10-2里的`step()`函数
+## 第177页最后一行
+
+$\gamma^2\mathrm{E}_{\pi\left(\boldsymbol\theta\right)}\left[\nabla{v_{\pi\left(\boldsymbol\theta\right)}}\left(\mathsfit{S}_1\right)\right]$
+
+#### 改为
+
+$\gamma^2\mathrm{E}_{\pi\left(\boldsymbol\theta\right)}\left[\nabla{v_{\pi\left(\boldsymbol\theta\right)}}\left(\mathsfit{S}_2\right)\right]$
+
+
+## 第288页代码10-2
+
+```python
+    def step(self, observation, reward, terminated):
+        position, velocity = observation
+        if position > -4 * velocity or position < 13 * velocity - 0.6:
+            force = 1.
+        else:
+            force = -1.
+        action = np.array([force,])
+        return action
+```
 
 #### 改为
 
@@ -59,8 +79,7 @@ $\rho_{t+1:t+n-1}=\frac{\Pr_\pi\left[R_{t+1},\mathsfit{S}_{t+1},\mathsfit{A}_{t+
         x, y, v_x, v_y, angle, v_angle, contact_left, \
                 contact_right = observation
 
-        if contact_left or contact_right:
-                # legs have contact
+        if contact_left or contact_right:  # 腿接触了
             f_y = -10. * v_y - 1.
             f_angle = 0.
         else: