diff --git a/.github/workflows/test_fx_automatic_parallel.yml b/.github/workflows/test_fx_automatic_parallel.yml index d745b8c724..4b1cc21952 100644 --- a/.github/workflows/test_fx_automatic_parallel.yml +++ b/.github/workflows/test_fx_automatic_parallel.yml @@ -33,7 +33,8 @@ jobs: container: image: ${{ matrix.config.image }} options: --mount type=tmpfs,destination=/tmp --shm-size 64gb --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/ - + env: + NCCL_DEBUG: INFO defaults: run: shell: bash diff --git a/optimum/fx/parallelization/passes.py b/optimum/fx/parallelization/passes.py index d4d563d5b6..6574f5e883 100644 --- a/optimum/fx/parallelization/passes.py +++ b/optimum/fx/parallelization/passes.py @@ -426,6 +426,7 @@ def handle_embedding(node: Node, ctx: ParallelExecutionCtx) -> None: if key in layer_cache: new_mod = layer_cache[key] else: + assert ctx.compile_times == 0, "illegal path for recompilation" new_mod = VocabParallelEmbedding(ctx, mod) layer_cache[key] = new_mod setattr(parent_mod, field, new_mod)