From dbb49829b891ed075d5c0be4f5d97eabdc4bbd10 Mon Sep 17 00:00:00 2001
From: flybird11111 <1829166702@qq.com>
Date: Wed, 11 Sep 2024 09:02:10 +0000
Subject: [PATCH] fix

---
 docs/source/en/concepts/paradigms_of_parallelism.md      | 1 +
 docs/source/en/features/sequence_parallelism.md          | 2 +-
 docs/source/zh-Hans/concepts/paradigms_of_parallelism.md | 1 +
 docs/source/zh-Hans/features/sequence_parallelism.md     | 2 +-
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/source/en/concepts/paradigms_of_parallelism.md b/docs/source/en/concepts/paradigms_of_parallelism.md
index 6289e9bfd9d2..80f48e44a5dc 100644
--- a/docs/source/en/concepts/paradigms_of_parallelism.md
+++ b/docs/source/en/concepts/paradigms_of_parallelism.md
@@ -140,3 +140,4 @@ Related paper:
 - [ZeRO-Offload: Democratizing Billion-Scale Model Training](https://arxiv.org/abs/2101.06840)
 - [ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning](https://arxiv.org/abs/2104.07857)
 - [PatrickStar: Parallel Training of Pre-trained Models via Chunk-based Memory Management](https://arxiv.org/abs/2108.05818)
+<!-- doc-test-command: echo  -->
diff --git a/docs/source/en/features/sequence_parallelism.md b/docs/source/en/features/sequence_parallelism.md
index de277e1b4eac..70fd2eb10970 100644
--- a/docs/source/en/features/sequence_parallelism.md
+++ b/docs/source/en/features/sequence_parallelism.md
@@ -126,7 +126,7 @@ plugin = HybridParallelPlugin(
         )
 ```
 #### Using Booster
-```
+```python
 booster = Booster(plugin=plugin)
 dataloader = plugin.prepare_dataloader(dataset, batch_size=args.batch_size, shuffle=True, drop_last=True, seed=42)
 model, optimizer, _, dataloader, _ = booster.boost(model, optimizer, dataloader=dataloader)
diff --git a/docs/source/zh-Hans/concepts/paradigms_of_parallelism.md b/docs/source/zh-Hans/concepts/paradigms_of_parallelism.md
index 1ad65261bd49..b24349d0689c 100755
--- a/docs/source/zh-Hans/concepts/paradigms_of_parallelism.md
+++ b/docs/source/zh-Hans/concepts/paradigms_of_parallelism.md
@@ -109,3 +109,4 @@ ring attention思路类似于flash attention，每个GPU只计算一个局部的
 - [ZeRO-Offload: Democratizing Billion-Scale Model Training](https://arxiv.org/abs/2101.06840)
 - [ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning](https://arxiv.org/abs/2104.07857)
 - [PatrickStar: Parallel Training of Pre-trained Models via Chunk-based Memory Management](https://arxiv.org/abs/2108.05818)
+<!-- doc-test-command: echo  -->
diff --git a/docs/source/zh-Hans/features/sequence_parallelism.md b/docs/source/zh-Hans/features/sequence_parallelism.md
index f5a080c1ebde..534035cb5abf 100644
--- a/docs/source/zh-Hans/features/sequence_parallelism.md
+++ b/docs/source/zh-Hans/features/sequence_parallelism.md
@@ -125,7 +125,7 @@ plugin = HybridParallelPlugin(
         )
 ```
 #### 使用booster
-```
+```python
 booster = Booster(plugin=plugin)
 dataloader = plugin.prepare_dataloader(dataset, batch_size=args.batch_size, shuffle=True, drop_last=True, seed=42)
 model, optimizer, _, dataloader, _ = booster.boost(model, optimizer, dataloader=dataloader)