Skip to content

Commit

Permalink
[moe] removed openmoe-coupled code and rectify mixstral code (hpcaite…
Browse files Browse the repository at this point in the history
  • Loading branch information
FrankLeeeee authored and ver217 committed May 29, 2024
1 parent 023ea13 commit f1d4167
Show file tree
Hide file tree
Showing 18 changed files with 15 additions and 1,573 deletions.
Empty file.
Empty file.
4 changes: 2 additions & 2 deletions applications/ColossalMoE/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

import torch
import torch.distributed as dist
from colossal_moe.models.mixtral_checkpoint import MixtralMoEHybridParallelCheckpointIO
from colossal_moe.models.mixtral_policy import MixtralForCausalLMPolicy
from mixtral_checkpoint import MixtralMoEHybridParallelCheckpointIO
from transformers import AutoTokenizer
from transformers.models.mixtral import MixtralConfig, MixtralForCausalLM

import colossalai
from colossalai.booster import Booster
from colossalai.booster.plugin.moe_hybrid_parallel_plugin import MoeHybridParallelPlugin
from colossalai.cluster import DistCoordinator
from colossalai.shardformer.policies.mixtral import MixtralForCausalLMPolicy


def parse_args():
Expand Down
3 changes: 2 additions & 1 deletion applications/ColossalMoE/infer.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
NUM_GPU=2
MODEL="mistralai/Mixtral-8x7B-v0.1"
# MODEL="mistralai/Mixtral-8x7B-v0.1"
MODEL="mistralai/Mixtral-8x7B-Instruct-v0.1"

# ep
torchrun --standalone --nproc_per_node $NUM_GPU infer.py \
Expand Down
2 changes: 1 addition & 1 deletion applications/ColossalMoE/tests/test_mixtral_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
import pytest
import torch
import torch.distributed as dist
from colossal_moe.models.mixtral_layer import EPMixtralSparseMoeBlock
from torch.testing import assert_close
from transformers.models.mixtral.configuration_mixtral import MixtralConfig
from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock

import colossalai
from colossalai.moe import MOE_MANAGER
from colossalai.shardformer.modeling.mixtral import EPMixtralSparseMoeBlock
from colossalai.testing.utils import spawn

tokens, n_experts = 7, 4
Expand Down
4 changes: 1 addition & 3 deletions applications/ColossalMoE/tests/test_moe_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
import pytest
import torch
import torch.distributed as dist
from colossal_moe.models.mixtral_checkpoint import MixtralMoEHybridParallelCheckpointIO
from colossal_moe.models.mixtral_policy import MixtralForCausalLMPolicy
from mixtral_checkpoint import MixtralMoEHybridParallelCheckpointIO
from torch.optim import Adam
from transformers.models.mixtral.configuration_mixtral import MixtralConfig
from transformers.models.mixtral.modeling_mixtral import MixtralForCausalLM
Expand Down Expand Up @@ -81,7 +80,6 @@ def check_mixtral_moe_layer():
tp_size=1,
pp_size=2,
ep_size=2,
custom_policy=MixtralForCausalLMPolicy(),
checkpoint_io=MixtralMoEHybridParallelCheckpointIO,
microbatch_size=1,
zero_stage=1,
Expand Down
6 changes: 2 additions & 4 deletions applications/ColossalMoE/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@

import torch
import torch.distributed as dist
from colossal_moe.models.mixtral_checkpoint import MixtralMoEHybridParallelCheckpointIO
from colossal_moe.models.mixtral_policy import MixtralForCausalLMPolicy
from colossal_moe.utils import load_checkpoint, move_to_cuda, save_checkpoint
from mixtral_checkpoint import MixtralMoEHybridParallelCheckpointIO
from torch.utils.data import Dataset
from tqdm import tqdm
from transformers import AutoTokenizer
from transformers.models.mixtral import MixtralForCausalLM
from utils import load_checkpoint, move_to_cuda, save_checkpoint

import colossalai
from colossalai.booster import Booster
Expand Down Expand Up @@ -155,7 +154,6 @@ def main():
pp_size=args.pp_size,
ep_size=args.ep_size,
microbatch_size=args.microbatch_size,
custom_policy=MixtralForCausalLMPolicy(),
enable_fused_normalization=args.use_layernorm_kernel,
enable_jit_fused=args.use_kernel,
precision=args.precision,
Expand Down
File renamed without changes.
13 changes: 0 additions & 13 deletions colossalai/moe/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,7 @@
from .checkpoint import MoECheckpointIO
from .experts import MLPExperts
from .layers import SparseMLP, apply_load_balance
from .manager import MOE_MANAGER
from .routers import MoeRouter, Top1Router, Top2Router, TopKRouter
from .utils import NormalNoiseGenerator, UniformNoiseGenerator

__all__ = [
"MLPExperts",
"MoeRouter",
"Top1Router",
"Top2Router",
"TopKRouter",
"NormalNoiseGenerator",
"UniformNoiseGenerator",
"SparseMLP",
"MoECheckpointIO",
"MOE_MANAGER",
"apply_load_balance",
]
161 changes: 0 additions & 161 deletions colossalai/moe/experts.py

This file was deleted.

Loading

0 comments on commit f1d4167

Please sign in to comment.