From 9d9c6f8189c9ebfd00ed7c9e145ae92ac0a92ac4 Mon Sep 17 00:00:00 2001 From: ver217 Date: Tue, 29 Aug 2023 14:46:13 +0800 Subject: [PATCH] [legacy] move gpc to legacy --- colossalai/amp/naive_amp/_fp16_optimizer.py | 2 +- colossalai/amp/naive_amp/naive_amp.py | 2 +- colossalai/amp/torch_amp/_grad_scaler.py | 2 +- colossalai/cli/benchmark/benchmark.py | 2 +- colossalai/communication/collective.py | 2 +- colossalai/communication/p2p.py | 2 +- colossalai/communication/p2p_v2.py | 2 +- colossalai/communication/ring.py | 2 +- colossalai/communication/utils.py | 2 +- .../_data_parallel_gradient_handler.py | 4 +- .../gradient_handler/_moe_gradient_handler.py | 4 +- .../_pipeline_parallel_gradient_handler.py | 2 +- .../_sequence_parallel_gradient_handler.py | 4 +- .../engine/schedule/_pipeline_schedule.py | 2 +- .../engine/schedule/_pipeline_schedule_v2.py | 2 +- colossalai/initialize.py | 2 +- colossalai/legacy/context/moe_context.py | 4 +- colossalai/legacy/context/random/_helper.py | 2 +- colossalai/{ => legacy}/core.py | 0 colossalai/nn/layer/base_layer.py | 2 +- colossalai/nn/layer/parallel_1d/_operation.py | 10 +- colossalai/nn/layer/parallel_1d/_utils.py | 5 +- colossalai/nn/layer/parallel_1d/layers.py | 2 +- colossalai/nn/layer/parallel_2d/_operation.py | 2 +- colossalai/nn/layer/parallel_2d/_utils.py | 2 +- colossalai/nn/layer/parallel_2d/layers.py | 2 +- .../nn/layer/parallel_2p5d/_operation.py | 2 +- colossalai/nn/layer/parallel_2p5d/_utils.py | 2 +- colossalai/nn/layer/parallel_2p5d/layers.py | 2 +- colossalai/nn/layer/parallel_3d/_operation.py | 2 +- colossalai/nn/layer/parallel_3d/_utils.py | 2 +- colossalai/nn/layer/parallel_3d/layers.py | 2 +- .../nn/layer/parallel_sequence/_operation.py | 2 +- .../nn/layer/parallel_sequence/layers.py | 2 +- .../nn/layer/wrapper/pipeline_wrapper.py | 2 +- colossalai/nn/loss/loss_1d.py | 2 +- colossalai/nn/loss/loss_2d.py | 2 +- colossalai/nn/loss/loss_2p5d.py | 2 +- colossalai/nn/loss/loss_3d.py | 11 +- colossalai/pipeline/pipelinable.py | 2 +- colossalai/trainer/hooks/_log_hook.py | 2 +- colossalai/trainer/hooks/_metric_hook.py | 2 +- colossalai/utils/checkpointing.py | 2 +- colossalai/utils/common.py | 2 +- .../data_sampler/data_parallel_sampler.py | 2 +- colossalai/utils/memory.py | 2 +- colossalai/utils/moe.py | 2 +- .../utils/profiler/legacy/prof_utils.py | 263 +++++++++--------- .../zero/legacy/init_ctx/init_context.py | 2 +- .../legacy/sharded_model/sharded_model_v2.py | 2 +- .../legacy/sharded_optim/sharded_optim_v2.py | 2 +- .../roberta/pretraining/pretrain_utils.py | 13 +- .../roberta/pretraining/run_pretraining.py | 2 +- .../roberta/pretraining/utils/exp_util.py | 4 +- .../dreambooth/train_dreambooth_colossalai.py | 2 +- .../train_dreambooth_colossalai_lora.py | 2 +- .../auto_parallel/auto_parallel_with_gpt.py | 2 +- examples/language/gpt/titans/model/embed.py | 2 +- examples/language/gpt/titans/model/gpt1d.py | 2 +- .../gpt/titans/model/pipeline_gpt1d.py | 2 +- examples/language/gpt/titans/train_gpt.py | 2 +- .../auto_parallel_with_resnet.py | 2 +- examples/tutorial/hybrid_parallel/train.py | 2 +- .../tutorial/large_batch_optimizer/train.py | 2 +- examples/tutorial/opt/opt/context.py | 2 +- examples/tutorial/opt/opt/run_clm.py | 2 +- .../sequence_parallel/data/__init__.py | 2 +- .../sequence_parallel/data/bert_helper.py | 2 +- .../data/datasets/bert_dataset.py | 2 +- .../data/datasets/data_samplers.py | 2 +- .../data/tokenizer/tokenizer.py | 2 +- .../sequence_parallel/loss_func/bert_loss.py | 2 +- .../tutorial/sequence_parallel/model/bert.py | 2 +- .../sequence_parallel/model/layers/head.py | 2 +- .../model/layers/preprocess.py | 2 +- examples/tutorial/sequence_parallel/train.py | 2 +- .../test_C_solver_consistency.py | 6 +- .../test_ckpt_torchvision.py | 2 +- .../test_autochunk_alphafold_utils.py | 2 +- .../test_autochunk_diffuser_utils.py | 2 +- .../test_autochunk_vit_utils.py | 2 +- tests/test_cluster/test_process_group_mesh.py | 2 +- .../test_comm/test_boardcast_send_recv_v2.py | 2 +- tests/test_comm/test_comm.py | 2 +- tests/test_comm/test_object_list_p2p.py | 2 +- tests/test_comm/test_object_list_p2p_v2.py | 2 +- tests/test_context/test_hybrid_parallel.py | 2 +- tests/test_data/test_data_parallel_sampler.py | 2 +- .../test_deterministic_dataloader.py | 2 +- .../test_cifar_with_data_pipeline_tensor.py | 2 +- tests/test_device/test_init_logical_pg.py | 2 +- tests/test_engine/test_engine.py | 2 +- .../test_engine/test_gradient_accumluation.py | 2 +- .../test_activation_checkpoint_codegen.py | 14 +- ...st_nested_activation_checkpoint_codegen.py | 22 +- .../test_codegen/test_offload_codegen.py | 34 +-- tests/test_fx/test_parallel_1d.py | 2 +- .../test_1d/checks_1d/check_layer_1d.py | 2 +- tests/test_layers/test_1d/test_1d.py | 2 +- .../test_2d/checks_2d/check_layer_2d.py | 2 +- .../test_2d/checks_2d/check_operation_2d.py | 2 +- tests/test_layers/test_2d/test_2d.py | 2 +- .../test_2p5d/checks_2p5d/check_layer_2p5d.py | 2 +- .../checks_2p5d/check_operation_2p5d.py | 2 +- tests/test_layers/test_2p5d/test_2p5d.py | 2 +- .../test_3d/checks_3d/check_layer_3d.py | 2 +- tests/test_layers/test_3d/test_3d.py | 2 +- .../checks_seq/check_layer_seq.py | 2 +- .../test_sequence/test_sequence.py | 2 +- tests/test_moe/test_kernel.py | 2 +- tests/test_tensor/common_utils/_utils.py | 2 +- tests/test_tensor/test_comm_spec_apply.py | 2 +- .../test_dtensor/test_comm_spec.py | 2 +- tests/test_tensor/test_mix_gather.py | 2 +- tests/test_trainer/test_pipeline/test_p2p.py | 2 +- .../test_pipeline/test_pipeline_schedule.py | 2 +- .../test_trainer_with_pipe_schedule.py | 2 +- .../test_checkpoint/test_checkpoint_1d.py | 2 +- .../test_checkpoint/test_checkpoint_2d.py | 2 +- .../test_checkpoint/test_checkpoint_2p5d.py | 2 +- .../test_checkpoint/test_checkpoint_3d.py | 2 +- 121 files changed, 306 insertions(+), 304 deletions(-) rename colossalai/{ => legacy}/core.py (100%) diff --git a/colossalai/amp/naive_amp/_fp16_optimizer.py b/colossalai/amp/naive_amp/_fp16_optimizer.py index 2f4f0ebc8743..48f25743ad76 100644 --- a/colossalai/amp/naive_amp/_fp16_optimizer.py +++ b/colossalai/amp/naive_amp/_fp16_optimizer.py @@ -6,9 +6,9 @@ from torch.distributed import ProcessGroup from torch.optim import Optimizer -from colossalai.core import global_context as gpc from colossalai.kernel.op_builder import FusedOptimBuilder from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.utils import clip_grad_norm_fp32, copy_tensor_parallel_attributes, multi_tensor_applier diff --git a/colossalai/amp/naive_amp/naive_amp.py b/colossalai/amp/naive_amp/naive_amp.py index 4650de336c75..ef39fce009b6 100644 --- a/colossalai/amp/naive_amp/naive_amp.py +++ b/colossalai/amp/naive_amp/naive_amp.py @@ -11,8 +11,8 @@ from torch.distributed import ReduceOp from torch.optim import Optimizer -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn.optimizer import ColossalaiOptimizer from ._fp16_optimizer import FP16Optimizer diff --git a/colossalai/amp/torch_amp/_grad_scaler.py b/colossalai/amp/torch_amp/_grad_scaler.py index 84547c39a6b9..543dac6ab5ef 100644 --- a/colossalai/amp/torch_amp/_grad_scaler.py +++ b/colossalai/amp/torch_amp/_grad_scaler.py @@ -13,8 +13,8 @@ from packaging import version from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc class _MultiDeviceReplicator(object): diff --git a/colossalai/cli/benchmark/benchmark.py b/colossalai/cli/benchmark/benchmark.py index 8bb782480f69..379aa9334687 100644 --- a/colossalai/cli/benchmark/benchmark.py +++ b/colossalai/cli/benchmark/benchmark.py @@ -6,9 +6,9 @@ import colossalai from colossalai.cli.benchmark.utils import find_all_configs, get_batch_data, profile_model -from colossalai.core import global_context as gpc from colossalai.legacy.context import Config from colossalai.legacy.context.random import reset_seeds +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers, get_dist_logger from colossalai.testing import free_port from colossalai.utils import MultiTimer diff --git a/colossalai/communication/collective.py b/colossalai/communication/collective.py index d49fb82607ad..7471188226f0 100644 --- a/colossalai/communication/collective.py +++ b/colossalai/communication/collective.py @@ -6,8 +6,8 @@ from torch import Tensor from torch.distributed import ReduceOp -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc _all_gather_func = dist._all_gather_base \ if "all_gather_into_tensor" not in dir(dist) else dist.all_gather_into_tensor diff --git a/colossalai/communication/p2p.py b/colossalai/communication/p2p.py index 16325105622e..e3f9108ab840 100644 --- a/colossalai/communication/p2p.py +++ b/colossalai/communication/p2p.py @@ -8,8 +8,8 @@ import torch import torch.distributed as dist -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.utils import get_current_device from .utils import gather_split_1d_tensor, split_tensor_into_1d_equal_chunks diff --git a/colossalai/communication/p2p_v2.py b/colossalai/communication/p2p_v2.py index b573e04be9fb..66af214950f2 100644 --- a/colossalai/communication/p2p_v2.py +++ b/colossalai/communication/p2p_v2.py @@ -10,8 +10,8 @@ from torch.distributed import ProcessGroupNCCL from torch.distributed import distributed_c10d as c10d -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc TensorShape = Union[torch.Size, List[int], Tuple[int]] _pg_manager = {} diff --git a/colossalai/communication/ring.py b/colossalai/communication/ring.py index 444d33b917f1..e80192fb578d 100644 --- a/colossalai/communication/ring.py +++ b/colossalai/communication/ring.py @@ -3,8 +3,8 @@ import torch -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.utils import get_current_device, synchronize diff --git a/colossalai/communication/utils.py b/colossalai/communication/utils.py index 5a930a339940..7e3dcf1e9820 100644 --- a/colossalai/communication/utils.py +++ b/colossalai/communication/utils.py @@ -3,8 +3,8 @@ import torch import torch.distributed as dist -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.utils import get_current_device TensorShape = Union[torch.Size, List[int], Tuple[int]] diff --git a/colossalai/engine/gradient_handler/_data_parallel_gradient_handler.py b/colossalai/engine/gradient_handler/_data_parallel_gradient_handler.py index 5cc7169c5a9f..20d877fd34a9 100644 --- a/colossalai/engine/gradient_handler/_data_parallel_gradient_handler.py +++ b/colossalai/engine/gradient_handler/_data_parallel_gradient_handler.py @@ -1,7 +1,7 @@ -from colossalai.core import global_context as gpc +from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.registry import GRADIENT_HANDLER -from ...context.parallel_mode import ParallelMode from ._base_gradient_handler import BaseGradientHandler from .utils import bucket_allreduce diff --git a/colossalai/engine/gradient_handler/_moe_gradient_handler.py b/colossalai/engine/gradient_handler/_moe_gradient_handler.py index 9820297a9fb0..fe50887c2709 100644 --- a/colossalai/engine/gradient_handler/_moe_gradient_handler.py +++ b/colossalai/engine/gradient_handler/_moe_gradient_handler.py @@ -1,9 +1,9 @@ -from colossalai.core import global_context as gpc +from colossalai.legacy.context import ParallelMode from colossalai.legacy.context.moe_context import MOE_CONTEXT +from colossalai.legacy.core import global_context as gpc from colossalai.registry import GRADIENT_HANDLER from colossalai.utils.moe import get_moe_epsize_param_dict -from ...context.parallel_mode import ParallelMode from ._base_gradient_handler import BaseGradientHandler from .utils import bucket_allreduce diff --git a/colossalai/engine/gradient_handler/_pipeline_parallel_gradient_handler.py b/colossalai/engine/gradient_handler/_pipeline_parallel_gradient_handler.py index 5b49a9c0360d..601c4a908104 100644 --- a/colossalai/engine/gradient_handler/_pipeline_parallel_gradient_handler.py +++ b/colossalai/engine/gradient_handler/_pipeline_parallel_gradient_handler.py @@ -6,7 +6,7 @@ import torch.distributed as dist from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors -from colossalai.core import global_context as gpc +from colossalai.legacy.core import global_context as gpc from colossalai.registry import GRADIENT_HANDLER from ._base_gradient_handler import BaseGradientHandler diff --git a/colossalai/engine/gradient_handler/_sequence_parallel_gradient_handler.py b/colossalai/engine/gradient_handler/_sequence_parallel_gradient_handler.py index ea4f0fbb1c71..de951c3225c6 100644 --- a/colossalai/engine/gradient_handler/_sequence_parallel_gradient_handler.py +++ b/colossalai/engine/gradient_handler/_sequence_parallel_gradient_handler.py @@ -1,7 +1,7 @@ -from colossalai.core import global_context as gpc +from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.registry import GRADIENT_HANDLER -from ...context.parallel_mode import ParallelMode from ._base_gradient_handler import BaseGradientHandler from .utils import bucket_allreduce diff --git a/colossalai/engine/schedule/_pipeline_schedule.py b/colossalai/engine/schedule/_pipeline_schedule.py index 3a87dcdbf270..5cc52d60aa57 100644 --- a/colossalai/engine/schedule/_pipeline_schedule.py +++ b/colossalai/engine/schedule/_pipeline_schedule.py @@ -8,8 +8,8 @@ import colossalai.communication as comm from colossalai.amp.naive_amp import NaiveAMPModel -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.utils import switch_virtual_pipeline_parallel_rank from colossalai.utils.cuda import get_current_device diff --git a/colossalai/engine/schedule/_pipeline_schedule_v2.py b/colossalai/engine/schedule/_pipeline_schedule_v2.py index 5134b9c2bf3a..a1f8cd56eeae 100644 --- a/colossalai/engine/schedule/_pipeline_schedule_v2.py +++ b/colossalai/engine/schedule/_pipeline_schedule_v2.py @@ -7,8 +7,8 @@ import colossalai.communication.p2p_v2 as comm from colossalai import engine -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.utils.cuda import get_current_device from ._pipeline_schedule import PipelineSchedule diff --git a/colossalai/initialize.py b/colossalai/initialize.py index 5a51be6fdfd8..60878fde1770 100644 --- a/colossalai/initialize.py +++ b/colossalai/initialize.py @@ -20,7 +20,6 @@ from colossalai.amp import AMP_TYPE, convert_to_amp from colossalai.amp.naive_amp import NaiveAMPModel from colossalai.builder.builder import build_gradient_handler -from colossalai.core import global_context as gpc from colossalai.engine import Engine from colossalai.engine.gradient_accumulation import accumulate_gradient from colossalai.engine.schedule import ( @@ -31,6 +30,7 @@ ) from colossalai.legacy.context import Config, ConfigException, ParallelMode from colossalai.legacy.context.moe_context import MOE_CONTEXT +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.nn.optimizer.colossalai_optimizer import ColossalaiOptimizer from colossalai.utils import ( diff --git a/colossalai/legacy/context/moe_context.py b/colossalai/legacy/context/moe_context.py index 552f8ce6111f..d52d4df6261e 100644 --- a/colossalai/legacy/context/moe_context.py +++ b/colossalai/legacy/context/moe_context.py @@ -9,7 +9,7 @@ def _check_sanity(): - from colossalai.core import global_context as gpc + from colossalai.legacy.core import global_context as gpc if gpc.tensor_parallel_size > 1 or gpc.pipeline_parallel_size > 1: raise NotImplementedError("Moe is not compatible with tensor or " "pipeline parallel at present.") @@ -61,7 +61,7 @@ def setup(self, seed: int, use_kernel_optim: bool = True): self.world_size = dist.get_world_size() - from colossalai.core import global_context as gpc + from colossalai.legacy.core import global_context as gpc self.max_ep_size = gpc.config.get('max_ep_size', self.world_size) assert self.world_size % self.max_ep_size == 0, \ "Maximum expert parallel size must be a factor of the number of GPUs" diff --git a/colossalai/legacy/context/random/_helper.py b/colossalai/legacy/context/random/_helper.py index d0e71346c3f0..4b5d5ef2fe55 100644 --- a/colossalai/legacy/context/random/_helper.py +++ b/colossalai/legacy/context/random/_helper.py @@ -161,7 +161,7 @@ def wrapper(*args, **kwargs): def moe_set_seed(seed): if torch.cuda.is_available(): - from colossalai.core import global_context as gpc + from colossalai.legacy.core import global_context as gpc global_rank = gpc.get_global_rank() diff_seed = seed + global_rank add_seed(ParallelMode.TENSOR, diff_seed, True) diff --git a/colossalai/core.py b/colossalai/legacy/core.py similarity index 100% rename from colossalai/core.py rename to colossalai/legacy/core.py diff --git a/colossalai/nn/layer/base_layer.py b/colossalai/nn/layer/base_layer.py index 809fb95803de..01fd9b3e8943 100644 --- a/colossalai/nn/layer/base_layer.py +++ b/colossalai/nn/layer/base_layer.py @@ -5,8 +5,8 @@ import torch.nn as nn -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc class ParallelLayer(nn.Module): diff --git a/colossalai/nn/layer/parallel_1d/_operation.py b/colossalai/nn/layer/parallel_1d/_operation.py index 300baf9c12ba..f0295481cb61 100644 --- a/colossalai/nn/layer/parallel_1d/_operation.py +++ b/colossalai/nn/layer/parallel_1d/_operation.py @@ -1,7 +1,7 @@ import torch import torch.distributed as dist -from colossalai.core import global_context as gpc +from colossalai.legacy.core import global_context as gpc try: import fused_mix_prec_layer_norm_cuda @@ -40,10 +40,10 @@ def backward(ctx, grad_output): input_, weight_, bias_, mean, invvar = ctx.saved_tensors grad_input = grad_weight = grad_bias = None grad_input, grad_weight, grad_bias \ - = fused_mix_prec_layer_norm_cuda.backward_affine( - grad_output.contiguous(), mean, invvar, - input_, ctx.normalized_shape, - weight_, bias_, ctx.eps) + = fused_mix_prec_layer_norm_cuda.backward_affine( + grad_output.contiguous(), mean, invvar, + input_, ctx.normalized_shape, + weight_, bias_, ctx.eps) return grad_input, grad_weight, grad_bias, None, None diff --git a/colossalai/nn/layer/parallel_1d/_utils.py b/colossalai/nn/layer/parallel_1d/_utils.py index 1212d595635d..f4d7a8b9c79c 100644 --- a/colossalai/nn/layer/parallel_1d/_utils.py +++ b/colossalai/nn/layer/parallel_1d/_utils.py @@ -3,8 +3,9 @@ import torch import torch.distributed as dist -from colossalai.core import global_context as gpc + from colossalai.global_variables import tensor_parallel_env as env +from colossalai.legacy.core import global_context as gpc from ..utils import divide @@ -124,7 +125,7 @@ def backward(ctx, grad_output): class _SplitForwardGatherBackward(torch.autograd.Function): """ Split the input and keep only the corresponding chuck to the rank. - + Args: input_: input matrix. parallel_mode: parallel mode. diff --git a/colossalai/nn/layer/parallel_1d/layers.py b/colossalai/nn/layer/parallel_1d/layers.py index 9bafddb330de..5c46c565f571 100644 --- a/colossalai/nn/layer/parallel_1d/layers.py +++ b/colossalai/nn/layer/parallel_1d/layers.py @@ -11,10 +11,10 @@ from torch.nn.parameter import Parameter from colossalai.communication import broadcast -from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env from colossalai.kernel import LayerNorm from colossalai.legacy.context import ParallelMode, seed +from colossalai.legacy.core import global_context as gpc from colossalai.nn import init as init from colossalai.registry import LAYERS from colossalai.utils.checkpointing import ( diff --git a/colossalai/nn/layer/parallel_2d/_operation.py b/colossalai/nn/layer/parallel_2d/_operation.py index b1f92a7f7045..2d257f7e0878 100644 --- a/colossalai/nn/layer/parallel_2d/_operation.py +++ b/colossalai/nn/layer/parallel_2d/_operation.py @@ -6,9 +6,9 @@ from torch.cuda.amp import custom_bwd, custom_fwd from colossalai.communication.collective import all_gather, all_reduce, reduce, reduce_scatter -from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.utils import get_current_device diff --git a/colossalai/nn/layer/parallel_2d/_utils.py b/colossalai/nn/layer/parallel_2d/_utils.py index 517cc611582e..599030a81198 100644 --- a/colossalai/nn/layer/parallel_2d/_utils.py +++ b/colossalai/nn/layer/parallel_2d/_utils.py @@ -1,6 +1,6 @@ -from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc def get_summa_dim_from_env() -> int: diff --git a/colossalai/nn/layer/parallel_2d/layers.py b/colossalai/nn/layer/parallel_2d/layers.py index d51a03ba35fe..af6c63934bba 100644 --- a/colossalai/nn/layer/parallel_2d/layers.py +++ b/colossalai/nn/layer/parallel_2d/layers.py @@ -9,9 +9,9 @@ from torch.nn import Parameter from colossalai.communication import broadcast -from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env from colossalai.legacy.context import ParallelMode, seed +from colossalai.legacy.core import global_context as gpc from colossalai.nn import init as init from colossalai.registry import LAYERS from colossalai.utils.checkpointing import gather_tensor_parallel_state_dict, partition_tensor_parallel_state_dict diff --git a/colossalai/nn/layer/parallel_2p5d/_operation.py b/colossalai/nn/layer/parallel_2p5d/_operation.py index 4371bccc431c..237e2f6337e0 100644 --- a/colossalai/nn/layer/parallel_2p5d/_operation.py +++ b/colossalai/nn/layer/parallel_2p5d/_operation.py @@ -6,8 +6,8 @@ from torch.cuda.amp import custom_bwd, custom_fwd from colossalai.communication.collective import all_gather, all_reduce, reduce_scatter -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.utils import get_current_device diff --git a/colossalai/nn/layer/parallel_2p5d/_utils.py b/colossalai/nn/layer/parallel_2p5d/_utils.py index f8098abbd3eb..2fde1a551775 100644 --- a/colossalai/nn/layer/parallel_2p5d/_utils.py +++ b/colossalai/nn/layer/parallel_2p5d/_utils.py @@ -1,6 +1,6 @@ -from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc def get_tesseract_dim_dep_from_env(): diff --git a/colossalai/nn/layer/parallel_2p5d/layers.py b/colossalai/nn/layer/parallel_2p5d/layers.py index 129d48a96559..a7b516b0adef 100644 --- a/colossalai/nn/layer/parallel_2p5d/layers.py +++ b/colossalai/nn/layer/parallel_2p5d/layers.py @@ -9,9 +9,9 @@ from torch.nn import Parameter from colossalai.communication import broadcast -from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env from colossalai.legacy.context import ParallelMode, seed +from colossalai.legacy.core import global_context as gpc from colossalai.nn import init as init from colossalai.registry import LAYERS from colossalai.utils.checkpointing import ( diff --git a/colossalai/nn/layer/parallel_3d/_operation.py b/colossalai/nn/layer/parallel_3d/_operation.py index 7ccd301708f5..5b7ada7341de 100755 --- a/colossalai/nn/layer/parallel_3d/_operation.py +++ b/colossalai/nn/layer/parallel_3d/_operation.py @@ -9,8 +9,8 @@ from colossalai.communication import all_gather, all_reduce, broadcast, reduce, reduce_scatter from colossalai.constants import INPUT_GROUP_3D, WEIGHT_GROUP_3D -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from ._utils import get_parallel_mode_from_env, push_async_grad diff --git a/colossalai/nn/layer/parallel_3d/_utils.py b/colossalai/nn/layer/parallel_3d/_utils.py index 364191a79f88..d6cb641a6f94 100644 --- a/colossalai/nn/layer/parallel_3d/_utils.py +++ b/colossalai/nn/layer/parallel_3d/_utils.py @@ -5,8 +5,8 @@ from torch import Tensor from colossalai.constants import INPUT_GROUP_3D, INPUT_X_WEIGHT_3D, OUTPUT_GROUP_3D, OUTPUT_X_WEIGHT_3D, WEIGHT_GROUP_3D -from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env +from colossalai.legacy.core import global_context as gpc def get_depth_from_env() -> int: diff --git a/colossalai/nn/layer/parallel_3d/layers.py b/colossalai/nn/layer/parallel_3d/layers.py index 0ec30b108ff1..67136ad30942 100644 --- a/colossalai/nn/layer/parallel_3d/layers.py +++ b/colossalai/nn/layer/parallel_3d/layers.py @@ -10,9 +10,9 @@ from colossalai.communication import all_reduce, broadcast from colossalai.constants import INPUT_GROUP_3D, INPUT_X_WEIGHT_3D, OUTPUT_GROUP_3D, OUTPUT_X_WEIGHT_3D, WEIGHT_GROUP_3D -from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env from colossalai.legacy.context import ParallelMode, seed +from colossalai.legacy.core import global_context as gpc from colossalai.nn import init as init from colossalai.nn.layer.base_layer import ParallelLayer from colossalai.registry import LAYERS diff --git a/colossalai/nn/layer/parallel_sequence/_operation.py b/colossalai/nn/layer/parallel_sequence/_operation.py index d2eb14bf85a5..9b8d6b7dc87b 100644 --- a/colossalai/nn/layer/parallel_sequence/_operation.py +++ b/colossalai/nn/layer/parallel_sequence/_operation.py @@ -6,8 +6,8 @@ from torch.cuda.amp import custom_bwd, custom_fwd from colossalai.communication import ring_forward -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn.layer.parallel_sequence._utils import _calc_current_device_range, _calc_incoming_device_range from colossalai.utils import get_current_device diff --git a/colossalai/nn/layer/parallel_sequence/layers.py b/colossalai/nn/layer/parallel_sequence/layers.py index e3d2217d2791..e19e1f121f6b 100644 --- a/colossalai/nn/layer/parallel_sequence/layers.py +++ b/colossalai/nn/layer/parallel_sequence/layers.py @@ -9,11 +9,11 @@ from torch.nn import Parameter import colossalai -from colossalai.core import global_context as gpc from colossalai.kernel import FusedScaleMaskSoftmax from colossalai.kernel.cuda_native.scaled_softmax import AttnMaskType from colossalai.legacy.context import seed from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn.layer.parallel_sequence._operation import RingAV, RingQK from colossalai.registry import LAYERS diff --git a/colossalai/nn/layer/wrapper/pipeline_wrapper.py b/colossalai/nn/layer/wrapper/pipeline_wrapper.py index 42a72fee747f..ec19d1b707d8 100644 --- a/colossalai/nn/layer/wrapper/pipeline_wrapper.py +++ b/colossalai/nn/layer/wrapper/pipeline_wrapper.py @@ -3,8 +3,8 @@ import torch.distributed as dist import torch.nn as nn -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc class PipelineSharedModuleWrapper: diff --git a/colossalai/nn/loss/loss_1d.py b/colossalai/nn/loss/loss_1d.py index 52dc783dd486..8258bcf15fc9 100644 --- a/colossalai/nn/loss/loss_1d.py +++ b/colossalai/nn/loss/loss_1d.py @@ -3,8 +3,8 @@ from torch.cuda.amp import custom_bwd, custom_fwd from torch.nn.modules.loss import _Loss -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.registry import LOSSES diff --git a/colossalai/nn/loss/loss_2d.py b/colossalai/nn/loss/loss_2d.py index 9db50f77e281..472dc1e658d8 100644 --- a/colossalai/nn/loss/loss_2d.py +++ b/colossalai/nn/loss/loss_2d.py @@ -4,8 +4,8 @@ from torch.nn.functional import cross_entropy from torch.nn.modules.loss import _Loss -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn.layer.parallel_2d import reduce_by_batch_2d, split_batch_2d from colossalai.nn.layer.parallel_2d._utils import assert_summa_initialization from colossalai.registry import LOSSES diff --git a/colossalai/nn/loss/loss_2p5d.py b/colossalai/nn/loss/loss_2p5d.py index c45de0a3c1e2..44b4bfbbf2af 100644 --- a/colossalai/nn/loss/loss_2p5d.py +++ b/colossalai/nn/loss/loss_2p5d.py @@ -4,8 +4,8 @@ from torch.nn.functional import cross_entropy from torch.nn.modules.loss import _Loss -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn.layer.parallel_2p5d import reduce_by_batch_2p5d, split_batch_2p5d from colossalai.nn.layer.parallel_2p5d._utils import assert_tesseract_initialization from colossalai.registry import LOSSES diff --git a/colossalai/nn/loss/loss_3d.py b/colossalai/nn/loss/loss_3d.py index f27d57ad6c99..1b0199ba1e59 100644 --- a/colossalai/nn/loss/loss_3d.py +++ b/colossalai/nn/loss/loss_3d.py @@ -1,14 +1,15 @@ import torch import torch.distributed as dist -from colossalai.constants import INPUT_GROUP_3D, WEIGHT_GROUP_3D, OUTPUT_GROUP_3D -from colossalai.core import global_context as gpc +from torch.cuda.amp import custom_bwd, custom_fwd +from torch.nn.functional import cross_entropy +from torch.nn.modules.loss import _Loss + +from colossalai.constants import INPUT_GROUP_3D, OUTPUT_GROUP_3D, WEIGHT_GROUP_3D +from colossalai.legacy.core import global_context as gpc from colossalai.nn.layer.parallel_3d import reduce_by_batch_3d, split_tensor_3d from colossalai.nn.layer.parallel_3d._utils import get_parallel_mode_from_env from colossalai.registry import LOSSES from colossalai.utils import get_current_device -from torch.cuda.amp import custom_bwd, custom_fwd -from torch.nn.functional import cross_entropy -from torch.nn.modules.loss import _Loss @LOSSES.register_module diff --git a/colossalai/pipeline/pipelinable.py b/colossalai/pipeline/pipelinable.py index 6ab6d2b6861d..f38f07b4339d 100644 --- a/colossalai/pipeline/pipelinable.py +++ b/colossalai/pipeline/pipelinable.py @@ -2,8 +2,8 @@ import torch -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn.layer.utils import CheckpointModule from colossalai.tensor import ColoParameter from colossalai.utils.model.utils import InsertPostInitMethodToModuleSubClasses diff --git a/colossalai/trainer/hooks/_log_hook.py b/colossalai/trainer/hooks/_log_hook.py index 97dd5c1b8fdd..8cc43aae98c1 100644 --- a/colossalai/trainer/hooks/_log_hook.py +++ b/colossalai/trainer/hooks/_log_hook.py @@ -5,8 +5,8 @@ import os.path as osp from typing import List -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import DistributedLogger from colossalai.registry import HOOKS from colossalai.trainer.hooks._metric_hook import ThroughputMetric diff --git a/colossalai/trainer/hooks/_metric_hook.py b/colossalai/trainer/hooks/_metric_hook.py index c373e43867bd..328bd669cb91 100644 --- a/colossalai/trainer/hooks/_metric_hook.py +++ b/colossalai/trainer/hooks/_metric_hook.py @@ -8,8 +8,8 @@ import torch.distributed as dist from colossalai.communication import all_reduce -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.registry import HOOKS from colossalai.utils import get_current_device, is_no_pp_or_last_stage diff --git a/colossalai/utils/checkpointing.py b/colossalai/utils/checkpointing.py index 311e9be250aa..0af09cdba414 100644 --- a/colossalai/utils/checkpointing.py +++ b/colossalai/utils/checkpointing.py @@ -5,8 +5,8 @@ import torch.distributed as dist from colossalai.constants import IS_TENSOR_PARALLEL -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc try: from torch.nn.modules.module import _EXTRA_STATE_KEY_SUFFIX diff --git a/colossalai/utils/common.py b/colossalai/utils/common.py index c1091f4b66e7..da438333ad79 100644 --- a/colossalai/utils/common.py +++ b/colossalai/utils/common.py @@ -16,9 +16,9 @@ from torch.nn.parameter import Parameter from colossalai.constants import IS_TENSOR_PARALLEL, NUM_PARTITIONS, TENSOR_PARALLEL_ATTRIBUTES -from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.tensor import ColoParameter, ProcessGroup from .multi_tensor_apply import multi_tensor_applier diff --git a/colossalai/utils/data_sampler/data_parallel_sampler.py b/colossalai/utils/data_sampler/data_parallel_sampler.py index 90d3de0712f4..1991b27ccf62 100644 --- a/colossalai/utils/data_sampler/data_parallel_sampler.py +++ b/colossalai/utils/data_sampler/data_parallel_sampler.py @@ -10,8 +10,8 @@ import torch from torch.utils.data import DataLoader, Dataset, Sampler -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.registry import DATA_SAMPLERS T_co = TypeVar('T_co', covariant=True) diff --git a/colossalai/utils/memory.py b/colossalai/utils/memory.py index 1f3664a656c6..7e903c05ada2 100644 --- a/colossalai/utils/memory.py +++ b/colossalai/utils/memory.py @@ -5,8 +5,8 @@ import torch from packaging import version -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.utils import get_current_device diff --git a/colossalai/utils/moe.py b/colossalai/utils/moe.py index b697611b3d87..7cf4602827da 100644 --- a/colossalai/utils/moe.py +++ b/colossalai/utils/moe.py @@ -3,9 +3,9 @@ import torch.distributed as dist import torch.nn as nn -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode from colossalai.legacy.context.moe_context import MOE_CONTEXT +from colossalai.legacy.core import global_context as gpc from .common import is_using_ddp diff --git a/colossalai/utils/profiler/legacy/prof_utils.py b/colossalai/utils/profiler/legacy/prof_utils.py index 2f7eee827651..9b948c9ec1cd 100644 --- a/colossalai/utils/profiler/legacy/prof_utils.py +++ b/colossalai/utils/profiler/legacy/prof_utils.py @@ -1,131 +1,132 @@ -from abc import ABC, abstractmethod -from pathlib import Path -from typing import Union, List -from colossalai.core import global_context as gpc - - -# copied from high version pytorch to support low version -def _format_time(time_us): - """Defines how to format time in FunctionEvent""" - US_IN_SECOND = 1000.0 * 1000.0 - US_IN_MS = 1000.0 - if time_us >= US_IN_SECOND: - return '{:.3f}s'.format(time_us / US_IN_SECOND) - if time_us >= US_IN_MS: - return '{:.3f}ms'.format(time_us / US_IN_MS) - return '{:.3f}us'.format(time_us) - - -# copied from high version pytorch to support low version -def _format_memory(nbytes): - """Returns a formatted memory size string""" - KB = 1024 - MB = 1024 * KB - GB = 1024 * MB - if (abs(nbytes) >= GB): - return '{:.2f} GB'.format(nbytes * 1.0 / GB) - elif (abs(nbytes) >= MB): - return '{:.2f} MB'.format(nbytes * 1.0 / MB) - elif (abs(nbytes) >= KB): - return '{:.2f} KB'.format(nbytes * 1.0 / KB) - else: - return str(nbytes) + ' B' - - -def _format_bandwidth(volume: float or int, time_us: int): - sec_div_mb = (1000.0 / 1024.0)**2 - mb_per_sec = volume / time_us * sec_div_mb - - if mb_per_sec >= 1024.0: - return '{:.3f} GB/s'.format(mb_per_sec / 1024.0) - else: - return '{:.3f} MB/s'.format(mb_per_sec) - - -class BaseProfiler(ABC): - - def __init__(self, profiler_name: str, priority: int): - self.name = profiler_name - self.priority = priority - - @abstractmethod - def enable(self): - pass - - @abstractmethod - def disable(self): - pass - - @abstractmethod - def to_tensorboard(self, writer): - pass - - @abstractmethod - def to_file(self, filename: Path): - pass - - @abstractmethod - def show(self): - pass - - -class ProfilerContext(object): - """Profiler context manager - - Usage:: - - world_size = 4 - inputs = torch.randn(10, 10, dtype=torch.float32, device=get_current_device()) - outputs = torch.empty(world_size, 10, 10, dtype=torch.float32, device=get_current_device()) - outputs_list = list(torch.chunk(outputs, chunks=world_size, dim=0)) - - cc_prof = CommProfiler() - - with ProfilerContext([cc_prof]) as prof: - op = dist.all_reduce(inputs, async_op=True) - dist.all_gather(outputs_list, inputs) - op.wait() - dist.reduce_scatter(inputs, outputs_list) - dist.broadcast(inputs, 0) - dist.reduce(inputs, 0) - - prof.show() - """ - - def __init__(self, profilers: List[BaseProfiler] = None, enable: bool = True): - self.enable = enable - self.profilers = sorted(profilers, key=lambda prof: prof.priority) - - def __enter__(self): - if self.enable: - for prof in self.profilers: - prof.enable() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - if self.enable: - for prof in self.profilers: - prof.disable() - - def to_tensorboard(self, writer): - from torch.utils.tensorboard import SummaryWriter - - assert isinstance(writer, SummaryWriter), \ - f'torch.utils.tensorboard.SummaryWriter is required, but found {type(writer)}.' - - for prof in self.profilers: - prof.to_tensorboard(writer) - - def to_file(self, log_dir: Union[str, Path]): - if isinstance(log_dir, str): - log_dir = Path(log_dir) - - if not log_dir.exists(): - log_dir.mkdir(parents=True, exist_ok=True) - for prof in self.profilers: - log_file = log_dir.joinpath(f'{prof.name}_rank_{gpc.get_global_rank()}.log') - prof.to_file(log_file) - - def show(self): - for prof in self.profilers: - prof.show() +from abc import ABC, abstractmethod +from pathlib import Path +from typing import List, Union + +from colossalai.legacy.core import global_context as gpc + + +# copied from high version pytorch to support low version +def _format_time(time_us): + """Defines how to format time in FunctionEvent""" + US_IN_SECOND = 1000.0 * 1000.0 + US_IN_MS = 1000.0 + if time_us >= US_IN_SECOND: + return '{:.3f}s'.format(time_us / US_IN_SECOND) + if time_us >= US_IN_MS: + return '{:.3f}ms'.format(time_us / US_IN_MS) + return '{:.3f}us'.format(time_us) + + +# copied from high version pytorch to support low version +def _format_memory(nbytes): + """Returns a formatted memory size string""" + KB = 1024 + MB = 1024 * KB + GB = 1024 * MB + if (abs(nbytes) >= GB): + return '{:.2f} GB'.format(nbytes * 1.0 / GB) + elif (abs(nbytes) >= MB): + return '{:.2f} MB'.format(nbytes * 1.0 / MB) + elif (abs(nbytes) >= KB): + return '{:.2f} KB'.format(nbytes * 1.0 / KB) + else: + return str(nbytes) + ' B' + + +def _format_bandwidth(volume: float or int, time_us: int): + sec_div_mb = (1000.0 / 1024.0)**2 + mb_per_sec = volume / time_us * sec_div_mb + + if mb_per_sec >= 1024.0: + return '{:.3f} GB/s'.format(mb_per_sec / 1024.0) + else: + return '{:.3f} MB/s'.format(mb_per_sec) + + +class BaseProfiler(ABC): + + def __init__(self, profiler_name: str, priority: int): + self.name = profiler_name + self.priority = priority + + @abstractmethod + def enable(self): + pass + + @abstractmethod + def disable(self): + pass + + @abstractmethod + def to_tensorboard(self, writer): + pass + + @abstractmethod + def to_file(self, filename: Path): + pass + + @abstractmethod + def show(self): + pass + + +class ProfilerContext(object): + """Profiler context manager + + Usage:: + + world_size = 4 + inputs = torch.randn(10, 10, dtype=torch.float32, device=get_current_device()) + outputs = torch.empty(world_size, 10, 10, dtype=torch.float32, device=get_current_device()) + outputs_list = list(torch.chunk(outputs, chunks=world_size, dim=0)) + + cc_prof = CommProfiler() + + with ProfilerContext([cc_prof]) as prof: + op = dist.all_reduce(inputs, async_op=True) + dist.all_gather(outputs_list, inputs) + op.wait() + dist.reduce_scatter(inputs, outputs_list) + dist.broadcast(inputs, 0) + dist.reduce(inputs, 0) + + prof.show() + """ + + def __init__(self, profilers: List[BaseProfiler] = None, enable: bool = True): + self.enable = enable + self.profilers = sorted(profilers, key=lambda prof: prof.priority) + + def __enter__(self): + if self.enable: + for prof in self.profilers: + prof.enable() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.enable: + for prof in self.profilers: + prof.disable() + + def to_tensorboard(self, writer): + from torch.utils.tensorboard import SummaryWriter + + assert isinstance(writer, SummaryWriter), \ + f'torch.utils.tensorboard.SummaryWriter is required, but found {type(writer)}.' + + for prof in self.profilers: + prof.to_tensorboard(writer) + + def to_file(self, log_dir: Union[str, Path]): + if isinstance(log_dir, str): + log_dir = Path(log_dir) + + if not log_dir.exists(): + log_dir.mkdir(parents=True, exist_ok=True) + for prof in self.profilers: + log_file = log_dir.joinpath(f'{prof.name}_rank_{gpc.get_global_rank()}.log') + prof.to_file(log_file) + + def show(self): + for prof in self.profilers: + prof.show() diff --git a/colossalai/zero/legacy/init_ctx/init_context.py b/colossalai/zero/legacy/init_ctx/init_context.py index 91c8c5271e4c..0525a928f77e 100644 --- a/colossalai/zero/legacy/init_ctx/init_context.py +++ b/colossalai/zero/legacy/init_ctx/init_context.py @@ -8,9 +8,9 @@ import torch.distributed as dist import torch.nn as nn -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode from colossalai.legacy.context.singleton_meta import SingletonMeta +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.utils.model.utils import InsertPostInitMethodToModuleSubClasses from colossalai.zero.legacy.shard_utils import BaseShardStrategy diff --git a/colossalai/zero/legacy/sharded_model/sharded_model_v2.py b/colossalai/zero/legacy/sharded_model/sharded_model_v2.py index 04a38154dfe8..c9ba8ba64591 100644 --- a/colossalai/zero/legacy/sharded_model/sharded_model_v2.py +++ b/colossalai/zero/legacy/sharded_model/sharded_model_v2.py @@ -11,8 +11,8 @@ from torch.distributed import ProcessGroup from torch.nn.parameter import Parameter -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.utils import disposable, get_current_device from colossalai.utils.memory import colo_device_memory_capacity diff --git a/colossalai/zero/legacy/sharded_optim/sharded_optim_v2.py b/colossalai/zero/legacy/sharded_optim/sharded_optim_v2.py index 7c6e12ef174a..bbdc7edd56de 100644 --- a/colossalai/zero/legacy/sharded_optim/sharded_optim_v2.py +++ b/colossalai/zero/legacy/sharded_optim/sharded_optim_v2.py @@ -12,8 +12,8 @@ from torch.optim import Optimizer from colossalai.amp.naive_amp.grad_scaler import DynamicGradScaler -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.nn.optimizer import ColossalaiOptimizer from colossalai.zero.legacy.gemini.stateful_tensor import StatefulTensor, TensorState diff --git a/examples/community/roberta/pretraining/pretrain_utils.py b/examples/community/roberta/pretraining/pretrain_utils.py index cea6ac2c36e5..2f869508f0e1 100644 --- a/examples/community/roberta/pretraining/pretrain_utils.py +++ b/examples/community/roberta/pretraining/pretrain_utils.py @@ -1,9 +1,13 @@ import logging import os import sys +from collections import OrderedDict import torch +import torch.nn as nn import transformers +from model.bert import BertForMaskedLM +from model.deberta_v2 import DebertaV2ForMaskedLM from torch.optim import AdamW from transformers import ( AutoModelForMaskedLM, @@ -16,16 +20,11 @@ get_linear_schedule_with_warmup, ) -from colossalai.core import global_context as gpc +from colossalai.legacy.core import global_context as gpc from colossalai.nn.lr_scheduler import LinearWarmupLR from colossalai.nn.optimizer import FusedAdam, HybridAdam sys.path.append(os.getcwd()) -from collections import OrderedDict - -import torch.nn as nn -from model.bert import BertForMaskedLM -from model.deberta_v2 import DebertaV2ForMaskedLM __all__ = ['get_model', 'get_optimizer', 'get_lr_scheduler', 'get_dataloader_for_pretraining'] @@ -118,7 +117,7 @@ def save_ckpt(model, optimizer, lr_scheduler, path, epoch, shard, global_step): checkpoint['epoch'] = epoch checkpoint['shard'] = shard checkpoint['global_step'] = global_step - model_state = model.state_dict() #each process must run model.state_dict() + model_state = model.state_dict() # each process must run model.state_dict() if gpc.get_global_rank() == 0: torch.save(checkpoint, optimizer_lr_path) torch.save(model_state, model_path) diff --git a/examples/community/roberta/pretraining/run_pretraining.py b/examples/community/roberta/pretraining/run_pretraining.py index 43a571649645..0f25fc5c14d3 100644 --- a/examples/community/roberta/pretraining/run_pretraining.py +++ b/examples/community/roberta/pretraining/run_pretraining.py @@ -16,8 +16,8 @@ from utils.logger import Logger import colossalai -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn.parallel import GeminiDDP, zero_model_wrapper, zero_optim_wrapper from colossalai.tensor import ColoParameter, ComputePattern, ComputeSpec, ProcessGroup, ReplicaSpec, ShardSpec from colossalai.utils import get_current_device diff --git a/examples/community/roberta/pretraining/utils/exp_util.py b/examples/community/roberta/pretraining/utils/exp_util.py index 4a2c9d8a47ad..75d244134057 100644 --- a/examples/community/roberta/pretraining/utils/exp_util.py +++ b/examples/community/roberta/pretraining/utils/exp_util.py @@ -5,7 +5,7 @@ import psutil import torch -from colossalai.core import global_context as gpc +from colossalai.legacy.core import global_context as gpc def logging(s, log_path, print_=True, log_=True): @@ -77,7 +77,7 @@ def throughput_calculator(numel, args, config, iteration_time, total_iterations, elapsed_time_per_iter = iteration_time / total_iterations samples_per_second = batch_size / elapsed_time_per_iter - #flops calculator + # flops calculator hidden_size = config.hidden_size num_layers = config.num_hidden_layers vocab_size = config.vocab_size diff --git a/examples/images/dreambooth/train_dreambooth_colossalai.py b/examples/images/dreambooth/train_dreambooth_colossalai.py index 2f480d30a861..5b830cc8a8a3 100644 --- a/examples/images/dreambooth/train_dreambooth_colossalai.py +++ b/examples/images/dreambooth/train_dreambooth_colossalai.py @@ -21,8 +21,8 @@ import colossalai from colossalai.booster import Booster from colossalai.booster.plugin import GeminiPlugin, LowLevelZeroPlugin, TorchDDPPlugin -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers, get_dist_logger from colossalai.nn.optimizer import HybridAdam from colossalai.utils import get_current_device diff --git a/examples/images/dreambooth/train_dreambooth_colossalai_lora.py b/examples/images/dreambooth/train_dreambooth_colossalai_lora.py index 36972e74b6c2..654bce36ccb7 100644 --- a/examples/images/dreambooth/train_dreambooth_colossalai_lora.py +++ b/examples/images/dreambooth/train_dreambooth_colossalai_lora.py @@ -23,8 +23,8 @@ import colossalai from colossalai.booster import Booster from colossalai.booster.plugin import GeminiPlugin, LowLevelZeroPlugin, TorchDDPPlugin -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers, get_dist_logger from colossalai.nn.optimizer import HybridAdam from colossalai.utils import get_current_device diff --git a/examples/language/gpt/experiments/auto_parallel/auto_parallel_with_gpt.py b/examples/language/gpt/experiments/auto_parallel/auto_parallel_with_gpt.py index e331fc8fcf10..84b02633e775 100644 --- a/examples/language/gpt/experiments/auto_parallel/auto_parallel_with_gpt.py +++ b/examples/language/gpt/experiments/auto_parallel/auto_parallel_with_gpt.py @@ -7,8 +7,8 @@ from gpt_modules import GPT2LMHeadModel, GPTLMLoss from colossalai.auto_parallel.tensor_shard.initialize import autoparallelize -from colossalai.core import global_context as gpc from colossalai.initialize import launch_from_torch +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers, get_dist_logger BATCH_SIZE = 16 diff --git a/examples/language/gpt/titans/model/embed.py b/examples/language/gpt/titans/model/embed.py index 597db57ea639..ff8aa2205164 100644 --- a/examples/language/gpt/titans/model/embed.py +++ b/examples/language/gpt/titans/model/embed.py @@ -6,8 +6,8 @@ from torch.nn import functional as F from torch.nn.parameter import Parameter -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode, seed +from colossalai.legacy.core import global_context as gpc from colossalai.nn.layer.base_layer import ParallelLayer from colossalai.nn.layer.parallel_1d._utils import gather_forward_split_backward, reduce_grad, reduce_input from colossalai.nn.layer.parallel_1d.layers import Linear1D_Row diff --git a/examples/language/gpt/titans/model/gpt1d.py b/examples/language/gpt/titans/model/gpt1d.py index 2edd03606b7d..e5be5533800b 100644 --- a/examples/language/gpt/titans/model/gpt1d.py +++ b/examples/language/gpt/titans/model/gpt1d.py @@ -9,8 +9,8 @@ from colossalai import kernel from colossalai import nn as col_nn -from colossalai.core import global_context as gpc from colossalai.kernel.cuda_native.scaled_softmax import AttnMaskType +from colossalai.legacy.core import global_context as gpc from colossalai.nn.layer import Linear1D_Col, Linear1D_Row from colossalai.nn.layer.base_layer import ParallelLayer from colossalai.nn.layer.utils import ACT2FN, divide diff --git a/examples/language/gpt/titans/model/pipeline_gpt1d.py b/examples/language/gpt/titans/model/pipeline_gpt1d.py index 5b740ab09938..89ef03ac07b3 100644 --- a/examples/language/gpt/titans/model/pipeline_gpt1d.py +++ b/examples/language/gpt/titans/model/pipeline_gpt1d.py @@ -7,8 +7,8 @@ from colossalai import kernel from colossalai import nn as col_nn -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.nn.layer.wrapper import PipelineSharedModuleWrapper from colossalai.pipeline.utils import partition_uniform diff --git a/examples/language/gpt/titans/train_gpt.py b/examples/language/gpt/titans/train_gpt.py index a5c014cc4ace..521823092d2d 100644 --- a/examples/language/gpt/titans/train_gpt.py +++ b/examples/language/gpt/titans/train_gpt.py @@ -8,8 +8,8 @@ import colossalai import colossalai.utils as utils -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers, get_dist_logger from colossalai.nn import LinearWarmupLR from colossalai.trainer import Trainer, hooks diff --git a/examples/tutorial/auto_parallel/auto_parallel_with_resnet.py b/examples/tutorial/auto_parallel/auto_parallel_with_resnet.py index a6a9ad0a312c..33aa5990f7c1 100644 --- a/examples/tutorial/auto_parallel/auto_parallel_with_resnet.py +++ b/examples/tutorial/auto_parallel/auto_parallel_with_resnet.py @@ -4,8 +4,8 @@ import colossalai from colossalai.auto_parallel.tensor_shard.initialize import initialize_model -from colossalai.core import global_context as gpc from colossalai.device.device_mesh import DeviceMesh +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.nn.lr_scheduler import CosineAnnealingLR diff --git a/examples/tutorial/hybrid_parallel/train.py b/examples/tutorial/hybrid_parallel/train.py index c09299a74533..255a8c544de6 100644 --- a/examples/tutorial/hybrid_parallel/train.py +++ b/examples/tutorial/hybrid_parallel/train.py @@ -5,8 +5,8 @@ from tqdm import tqdm import colossalai -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.nn import CrossEntropyLoss from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR diff --git a/examples/tutorial/large_batch_optimizer/train.py b/examples/tutorial/large_batch_optimizer/train.py index 35e54582f494..6ebd8d68083d 100644 --- a/examples/tutorial/large_batch_optimizer/train.py +++ b/examples/tutorial/large_batch_optimizer/train.py @@ -4,7 +4,7 @@ from tqdm import tqdm import colossalai -from colossalai.core import global_context as gpc +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR from colossalai.nn.optimizer import Lamb, Lars diff --git a/examples/tutorial/opt/opt/context.py b/examples/tutorial/opt/opt/context.py index 2229864ce5f5..dfcd3b382d3c 100644 --- a/examples/tutorial/opt/opt/context.py +++ b/examples/tutorial/opt/opt/context.py @@ -1,7 +1,7 @@ import torch.distributed as dist -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc class barrier_context(): diff --git a/examples/tutorial/opt/opt/run_clm.py b/examples/tutorial/opt/opt/run_clm.py index 53ab89b37a72..c6be9b040896 100755 --- a/examples/tutorial/opt/opt/run_clm.py +++ b/examples/tutorial/opt/opt/run_clm.py @@ -51,8 +51,8 @@ from transformers.utils.versions import require_version import colossalai -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers, get_dist_logger from colossalai.nn.optimizer import HybridAdam from colossalai.tensor import ProcessGroup diff --git a/examples/tutorial/sequence_parallel/data/__init__.py b/examples/tutorial/sequence_parallel/data/__init__.py index 22a8a2c11537..6fdf07ba5b69 100644 --- a/examples/tutorial/sequence_parallel/data/__init__.py +++ b/examples/tutorial/sequence_parallel/data/__init__.py @@ -1,8 +1,8 @@ import torch -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode from colossalai.legacy.context.parallel_context import ParallelContext +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from .datasets.builder import build_train_valid_test_datasets diff --git a/examples/tutorial/sequence_parallel/data/bert_helper.py b/examples/tutorial/sequence_parallel/data/bert_helper.py index 9ff841fb5821..b65ca1e64f3c 100644 --- a/examples/tutorial/sequence_parallel/data/bert_helper.py +++ b/examples/tutorial/sequence_parallel/data/bert_helper.py @@ -1,7 +1,7 @@ import torch -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc _MAX_DATA_DIM = 5 diff --git a/examples/tutorial/sequence_parallel/data/datasets/bert_dataset.py b/examples/tutorial/sequence_parallel/data/datasets/bert_dataset.py index 361b54a39cfc..70c1269122dc 100644 --- a/examples/tutorial/sequence_parallel/data/datasets/bert_dataset.py +++ b/examples/tutorial/sequence_parallel/data/datasets/bert_dataset.py @@ -21,8 +21,8 @@ import torch from torch.utils.data import Dataset -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from ..tokenizer import get_tokenizer diff --git a/examples/tutorial/sequence_parallel/data/datasets/data_samplers.py b/examples/tutorial/sequence_parallel/data/datasets/data_samplers.py index 64734ad4228a..b9c197c95ae3 100644 --- a/examples/tutorial/sequence_parallel/data/datasets/data_samplers.py +++ b/examples/tutorial/sequence_parallel/data/datasets/data_samplers.py @@ -18,8 +18,8 @@ import torch -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc def build_pretraining_data_loader(dataset, consumed_samples, micro_batch_size, dataloader_type='single', num_workers=0): diff --git a/examples/tutorial/sequence_parallel/data/tokenizer/tokenizer.py b/examples/tutorial/sequence_parallel/data/tokenizer/tokenizer.py index fc1cb3989d29..ba832b5cdce9 100644 --- a/examples/tutorial/sequence_parallel/data/tokenizer/tokenizer.py +++ b/examples/tutorial/sequence_parallel/data/tokenizer/tokenizer.py @@ -16,8 +16,8 @@ from abc import ABC, abstractmethod -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from .bert_tokenization import FullTokenizer as FullBertTokenizer diff --git a/examples/tutorial/sequence_parallel/loss_func/bert_loss.py b/examples/tutorial/sequence_parallel/loss_func/bert_loss.py index 2f129e9fc6af..b3f2487a438b 100644 --- a/examples/tutorial/sequence_parallel/loss_func/bert_loss.py +++ b/examples/tutorial/sequence_parallel/loss_func/bert_loss.py @@ -3,8 +3,8 @@ import torch.nn as nn import torch.nn.functional as F -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from .cross_entropy import vocab_cross_entropy diff --git a/examples/tutorial/sequence_parallel/model/bert.py b/examples/tutorial/sequence_parallel/model/bert.py index 29b153228122..688a4c26fb68 100644 --- a/examples/tutorial/sequence_parallel/model/bert.py +++ b/examples/tutorial/sequence_parallel/model/bert.py @@ -3,10 +3,10 @@ import torch import torch.nn as nn -from colossalai.core import global_context as gpc from colossalai.kernel import LayerNorm from colossalai.legacy.context import ParallelMode from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.nn.layer.wrapper import PipelineSharedModuleWrapper from colossalai.pipeline.utils import partition_uniform diff --git a/examples/tutorial/sequence_parallel/model/layers/head.py b/examples/tutorial/sequence_parallel/model/layers/head.py index 51d4542f19c3..9e25157e1b40 100644 --- a/examples/tutorial/sequence_parallel/model/layers/head.py +++ b/examples/tutorial/sequence_parallel/model/layers/head.py @@ -4,9 +4,9 @@ from loss_func.cross_entropy import vocab_cross_entropy import colossalai -from colossalai.core import global_context as gpc from colossalai.kernel import LayerNorm from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from .embedding import VocabEmbedding from .linear import Linear diff --git a/examples/tutorial/sequence_parallel/model/layers/preprocess.py b/examples/tutorial/sequence_parallel/model/layers/preprocess.py index d48f3c155d72..dd66bfe13585 100644 --- a/examples/tutorial/sequence_parallel/model/layers/preprocess.py +++ b/examples/tutorial/sequence_parallel/model/layers/preprocess.py @@ -1,8 +1,8 @@ import torch import torch.nn as nn -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc class PreProcessor(nn.Module): diff --git a/examples/tutorial/sequence_parallel/train.py b/examples/tutorial/sequence_parallel/train.py index 19c58054f7b3..17270a27c9e9 100644 --- a/examples/tutorial/sequence_parallel/train.py +++ b/examples/tutorial/sequence_parallel/train.py @@ -9,10 +9,10 @@ import colossalai from colossalai.amp import AMP_TYPE -from colossalai.core import global_context as gpc from colossalai.engine.schedule import PipelineSchedule from colossalai.kernel import LayerNorm from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.nn.optimizer import FusedAdam from colossalai.utils import MultiTimer, is_using_pp diff --git a/tests/test_auto_parallel/test_ckpt_solvers/test_C_solver_consistency.py b/tests/test_auto_parallel/test_ckpt_solvers/test_C_solver_consistency.py index f184f64b35d0..73824b9e2eda 100644 --- a/tests/test_auto_parallel/test_ckpt_solvers/test_C_solver_consistency.py +++ b/tests/test_auto_parallel/test_ckpt_solvers/test_C_solver_consistency.py @@ -6,12 +6,12 @@ import torchvision.models as tm import colossalai -from colossalai.core import global_context as gpc from colossalai.fx import ColoGraphModule, ColoTracer from colossalai.fx._compatibility import is_compatible_with_meta # from colossalai.fx.passes.algorithms import solver_rotor # from colossalai.fx.passes.algorithms.operation import Sequence from colossalai.fx.passes.meta_info_prop import MetaInfoProp +from colossalai.legacy.core import global_context as gpc from colossalai.testing import rerun_if_address_is_in_use, spawn if is_compatible_with_meta(): @@ -55,14 +55,14 @@ def _run_C_solver_consistency_test(rank, world_size, port): for d in range(1, len(opt_python[0])): for i in range(len(opt_python[0]) - d): assert opt_python[m][i][i + d] == opt_C[m][i][i + d], \ - f"item ({m}, {i}, {i + d}) is not consistent with python version!\npython version: {opt_python[m][i][i + d]}\nC version: {opt_C[m][i][i + d]}" + f"item ({m}, {i}, {i + d}) is not consistent with python version!\npython version: {opt_python[m][i][i + d]}\nC version: {opt_C[m][i][i + d]}" sequence_python = sequence_python.list_operations() sequence_C = sequence_C.list_operations() # make sure the sequences are the same assert len(sequence_python) == len(sequence_C) and \ - all(python_op.__repr__() == C_op.__repr__() for (python_op, C_op) in zip(sequence_python, sequence_C)) + all(python_op.__repr__() == C_op.__repr__() for (python_op, C_op) in zip(sequence_python, sequence_C)) gpc.destroy() diff --git a/tests/test_auto_parallel/test_ckpt_solvers/test_ckpt_torchvision.py b/tests/test_auto_parallel/test_ckpt_solvers/test_ckpt_torchvision.py index db268b91d0a0..babdddfada18 100644 --- a/tests/test_auto_parallel/test_ckpt_solvers/test_ckpt_torchvision.py +++ b/tests/test_auto_parallel/test_ckpt_solvers/test_ckpt_torchvision.py @@ -8,12 +8,12 @@ from torch.fx import GraphModule import colossalai -from colossalai.core import global_context as gpc from colossalai.fx import ColoTracer from colossalai.fx._compatibility import is_compatible_with_meta from colossalai.fx.graph_module import ColoGraphModule # from colossalai.fx.passes.algorithms import chen_greedy, solver_rotor from colossalai.fx.passes.meta_info_prop import MetaInfoProp +from colossalai.legacy.core import global_context as gpc from colossalai.testing import rerun_if_address_is_in_use, spawn if is_compatible_with_meta(): diff --git a/tests/test_autochunk/test_autochunk_alphafold/test_autochunk_alphafold_utils.py b/tests/test_autochunk/test_autochunk_alphafold/test_autochunk_alphafold_utils.py index 15610e2b50dc..593658fd1368 100644 --- a/tests/test_autochunk/test_autochunk_alphafold/test_autochunk_alphafold_utils.py +++ b/tests/test_autochunk/test_autochunk_alphafold/test_autochunk_alphafold_utils.py @@ -6,9 +6,9 @@ import colossalai from colossalai.autochunk.autochunk_codegen import AUTOCHUNK_AVAILABLE from colossalai.autochunk.utils import flat_list -from colossalai.core import global_context as gpc from colossalai.fx.graph_module import ColoGraphModule from colossalai.fx.passes.meta_info_prop import MetaInfoProp +from colossalai.legacy.core import global_context as gpc from colossalai.testing import free_port if AUTOCHUNK_AVAILABLE: diff --git a/tests/test_autochunk/test_autochunk_diffuser/test_autochunk_diffuser_utils.py b/tests/test_autochunk/test_autochunk_diffuser/test_autochunk_diffuser_utils.py index b6a792f5652c..264331a5fef0 100644 --- a/tests/test_autochunk/test_autochunk_diffuser/test_autochunk_diffuser_utils.py +++ b/tests/test_autochunk/test_autochunk_diffuser/test_autochunk_diffuser_utils.py @@ -5,9 +5,9 @@ import colossalai from colossalai.autochunk.autochunk_codegen import AUTOCHUNK_AVAILABLE -from colossalai.core import global_context as gpc from colossalai.fx.graph_module import ColoGraphModule from colossalai.fx.passes.meta_info_prop import MetaInfoProp +from colossalai.legacy.core import global_context as gpc if AUTOCHUNK_AVAILABLE: from colossalai.autochunk.autochunk_codegen import AutoChunkCodeGen diff --git a/tests/test_autochunk/test_autochunk_vit/test_autochunk_vit_utils.py b/tests/test_autochunk/test_autochunk_vit/test_autochunk_vit_utils.py index 3202318fb6d1..65d1e9c4d090 100644 --- a/tests/test_autochunk/test_autochunk_vit/test_autochunk_vit_utils.py +++ b/tests/test_autochunk/test_autochunk_vit/test_autochunk_vit_utils.py @@ -5,9 +5,9 @@ import colossalai from colossalai.autochunk.autochunk_codegen import AUTOCHUNK_AVAILABLE -from colossalai.core import global_context as gpc from colossalai.fx.graph_module import ColoGraphModule from colossalai.fx.passes.meta_info_prop import MetaInfoProp +from colossalai.legacy.core import global_context as gpc if AUTOCHUNK_AVAILABLE: from colossalai.autochunk.autochunk_codegen import AutoChunkCodeGen diff --git a/tests/test_cluster/test_process_group_mesh.py b/tests/test_cluster/test_process_group_mesh.py index 26f9f56f9bb4..4ed49c5f3c9b 100644 --- a/tests/test_cluster/test_process_group_mesh.py +++ b/tests/test_cluster/test_process_group_mesh.py @@ -7,8 +7,8 @@ def check_process_group_mesh_with_gpc(): - from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode + from colossalai.legacy.core import global_context as gpc DP_DIM, PP_DIM, TP_DIM = 0, 1, 2 pg_mesh = ProcessGroupMesh(1, 2, 2) diff --git a/tests/test_comm/test_boardcast_send_recv_v2.py b/tests/test_comm/test_boardcast_send_recv_v2.py index ea2659cd65e8..7d69e5d5510c 100644 --- a/tests/test_comm/test_boardcast_send_recv_v2.py +++ b/tests/test_comm/test_boardcast_send_recv_v2.py @@ -2,9 +2,9 @@ import torch from colossalai.communication.p2p_v2 import _recv_object, _send_object -from colossalai.core import global_context as gpc from colossalai.initialize import launch from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.testing import rerun_if_address_is_in_use, spawn diff --git a/tests/test_comm/test_comm.py b/tests/test_comm/test_comm.py index 20b466766482..e852dc7f6cbb 100644 --- a/tests/test_comm/test_comm.py +++ b/tests/test_comm/test_comm.py @@ -3,9 +3,9 @@ import torch.distributed as dist from colossalai.communication import all_gather, all_reduce, reduce_scatter -from colossalai.core import global_context as gpc from colossalai.initialize import launch from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.utils import get_current_device diff --git a/tests/test_comm/test_object_list_p2p.py b/tests/test_comm/test_object_list_p2p.py index 4d3f8226904c..149917ff2de2 100644 --- a/tests/test_comm/test_object_list_p2p.py +++ b/tests/test_comm/test_object_list_p2p.py @@ -9,9 +9,9 @@ send_forward, send_forward_recv_backward, ) -from colossalai.core import global_context as gpc from colossalai.initialize import launch from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.testing import rerun_if_address_is_in_use, spawn CONFIG = dict(parallel=dict(pipeline=2)) diff --git a/tests/test_comm/test_object_list_p2p_v2.py b/tests/test_comm/test_object_list_p2p_v2.py index 5efbeb182c1d..60930612ebed 100644 --- a/tests/test_comm/test_object_list_p2p_v2.py +++ b/tests/test_comm/test_object_list_p2p_v2.py @@ -2,9 +2,9 @@ import torch from colossalai.communication.p2p_v2 import recv_backward, recv_forward, send_backward, send_forward -from colossalai.core import global_context as gpc from colossalai.initialize import launch from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.testing import rerun_if_address_is_in_use, spawn diff --git a/tests/test_context/test_hybrid_parallel.py b/tests/test_context/test_hybrid_parallel.py index 00291c506c44..d165a59cd902 100644 --- a/tests/test_context/test_hybrid_parallel.py +++ b/tests/test_context/test_hybrid_parallel.py @@ -7,10 +7,10 @@ import torch from colossalai import launch -from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as tp_env from colossalai.legacy.context import reset_seeds from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.testing import free_port, rerun_if_address_is_in_use, spawn CONFIG_PATH_LIST = list(Path(__file__).parent.glob('configs/*.py')) diff --git a/tests/test_data/test_data_parallel_sampler.py b/tests/test_data/test_data_parallel_sampler.py index c2f3aea1e480..5fac2d10d6db 100644 --- a/tests/test_data/test_data_parallel_sampler.py +++ b/tests/test_data/test_data_parallel_sampler.py @@ -10,8 +10,8 @@ from torchvision import datasets, transforms import colossalai -from colossalai.core import global_context as gpc from colossalai.legacy.context import Config, ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.utils import get_dataloader diff --git a/tests/test_data/test_deterministic_dataloader.py b/tests/test_data/test_deterministic_dataloader.py index 9cb5c6f6fce0..9c528942bde9 100644 --- a/tests/test_data/test_deterministic_dataloader.py +++ b/tests/test_data/test_deterministic_dataloader.py @@ -10,8 +10,8 @@ from torchvision import datasets, transforms import colossalai -from colossalai.core import global_context as gpc from colossalai.legacy.context import Config, ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.utils import get_dataloader diff --git a/tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor.py b/tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor.py index a8768e1437fe..d509c9b22a22 100644 --- a/tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor.py +++ b/tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor.py @@ -8,8 +8,8 @@ import colossalai from colossalai.amp import AMP_TYPE -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.nn import CrossEntropyLoss from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR diff --git a/tests/test_device/test_init_logical_pg.py b/tests/test_device/test_init_logical_pg.py index 7c6339eff67e..706af9fa70ce 100644 --- a/tests/test_device/test_init_logical_pg.py +++ b/tests/test_device/test_init_logical_pg.py @@ -3,9 +3,9 @@ import torch.distributed as dist from torch.distributed import ReduceOp -from colossalai.core import global_context as gpc from colossalai.device.device_mesh import DeviceMesh from colossalai.initialize import launch +from colossalai.legacy.core import global_context as gpc from colossalai.testing import rerun_if_address_is_in_use, spawn diff --git a/tests/test_engine/test_engine.py b/tests/test_engine/test_engine.py index 62493cf3712d..e5585e7088b4 100644 --- a/tests/test_engine/test_engine.py +++ b/tests/test_engine/test_engine.py @@ -2,7 +2,7 @@ import colossalai from colossalai.amp import AMP_TYPE -from colossalai.core import global_context as gpc +from colossalai.legacy.core import global_context as gpc from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn from tests.components_to_test.registry import non_distributed_component_funcs diff --git a/tests/test_engine/test_gradient_accumluation.py b/tests/test_engine/test_gradient_accumluation.py index 7783827c7c44..52104541e919 100644 --- a/tests/test_engine/test_gradient_accumluation.py +++ b/tests/test_engine/test_gradient_accumluation.py @@ -10,7 +10,7 @@ from torchvision.models import resnet18 import colossalai -from colossalai.core import global_context as gpc +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.utils import get_dataloader diff --git a/tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py b/tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py index bcac2ec426d9..88c18e4daaed 100644 --- a/tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py +++ b/tests/test_fx/test_codegen/test_activation_checkpoint_codegen.py @@ -4,9 +4,9 @@ from torch.utils.checkpoint import checkpoint import colossalai -from colossalai.core import global_context as gpc from colossalai.fx import ColoTracer from colossalai.fx.graph_module import ColoGraphModule +from colossalai.legacy.core import global_context as gpc from colossalai.testing import rerun_if_address_is_in_use, spawn try: @@ -104,9 +104,9 @@ def _run_act_ckpt_codegen(rank, world_size, port): # the offload option is correct code = graph.python_code('self').src assert 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0, True, x, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_1, False, x, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_2, False, y, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_3, False, y, relu, use_reentrant=True)' in code + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_1, False, x, use_reentrant=False)' in code and \ + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_2, False, y, use_reentrant=False)' in code and \ + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_3, False, y, relu, use_reentrant=True)' in code # recompile and verify the outputs are consistent fx_out = gm(data1, data2) @@ -160,9 +160,9 @@ def _run_act_ckpt_python_code_torch11(rank, world_size, port): # the offload option is correct code = graph.python_code('self').src assert 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0, True, x, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_1, False, x, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_2, False, y, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_3, False, y, relu, use_reentrant=True)' in code + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_1, False, x, use_reentrant=False)' in code and \ + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_2, False, y, use_reentrant=False)' in code and \ + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_3, False, y, relu, use_reentrant=True)' in code # recompile and verify the outputs are consistent fx_out = gm(data1, data2) diff --git a/tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py b/tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py index 5b327807a57b..56cc7c960016 100644 --- a/tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py +++ b/tests/test_fx/test_codegen/test_nested_activation_checkpoint_codegen.py @@ -2,9 +2,9 @@ import torch import colossalai -from colossalai.core import global_context as gpc from colossalai.fx import ColoTracer from colossalai.fx.graph_module import ColoGraphModule +from colossalai.legacy.core import global_context as gpc from colossalai.testing import rerun_if_address_is_in_use, spawn try: @@ -70,11 +70,11 @@ def _run_act_ckpt_codegen(rank, world_size, port): # assert checkpoint function will be generated and code = graph.python_code('self').src assert 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_0, False, x, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_1, False, linear3, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_0_0, False, x, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_0_1, False, linear2, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0, False, x, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_1, False, linear4, use_reentrant=False)' in code + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_1, False, linear3, use_reentrant=False)' in code and \ + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_0_0, False, x, use_reentrant=False)' in code and \ + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_0_1, False, linear2, use_reentrant=False)' in code and \ + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0, False, x, use_reentrant=False)' in code and \ + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_1, False, linear4, use_reentrant=False)' in code # recompile and verify the outputs are consistent fx_out = gm(data1) @@ -127,11 +127,11 @@ def _run_act_ckpt_python_code_torch11(rank, world_size, port): # assert checkpoint function will be generated and code = graph.python_code('self').src assert 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_0, False, x, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_1, False, linear3, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_0_0, False, x, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_0_1, False, linear2, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0, False, x, use_reentrant=False)' in code and \ - 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_1, False, linear4, use_reentrant=False)' in code + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_1, False, linear3, use_reentrant=False)' in code and \ + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_0_0, False, x, use_reentrant=False)' in code and \ + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0_0_1, False, linear2, use_reentrant=False)' in code and \ + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0, False, x, use_reentrant=False)' in code and \ + 'colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_1, False, linear4, use_reentrant=False)' in code # recompile and verify the outputs are consistent fx_out = gm(data1) diff --git a/tests/test_fx/test_codegen/test_offload_codegen.py b/tests/test_fx/test_codegen/test_offload_codegen.py index c217b96586fe..a9ea28b0b133 100644 --- a/tests/test_fx/test_codegen/test_offload_codegen.py +++ b/tests/test_fx/test_codegen/test_offload_codegen.py @@ -5,9 +5,9 @@ from torch.fx import GraphModule import colossalai -from colossalai.core import global_context as gpc from colossalai.fx import ColoTracer from colossalai.fx.graph_module import ColoGraphModule +from colossalai.legacy.core import global_context as gpc from colossalai.testing import rerun_if_address_is_in_use, spawn try: @@ -100,14 +100,14 @@ def _run_offload_codegen(rank, world_size, port): # assert we have all the components code = graph.python_code("self").src assert "def pack_hook_input(self, x):" in code and \ - "def unpack_hook(self, packed):" in code and \ - "def pack_hook_no_input(self, x):" in code and \ - "setattr(x, 'offload', True)" in code and \ - "setattr(linear3, 'offload', False)" in code and \ - "with torch.autograd.graph.saved_tensors_hooks(self.pack_hook_input, self.unpack_hook):" in code and \ - "with torch.autograd.graph.save_on_cpu(pin_memory=True):" in code and \ - "with torch.autograd.graph.saved_tensors_hooks(self.pack_hook_no_input, self.unpack_hook):" in code and \ - "colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0, True, linear4, use_reentrant=False)" in code + "def unpack_hook(self, packed):" in code and \ + "def pack_hook_no_input(self, x):" in code and \ + "setattr(x, 'offload', True)" in code and \ + "setattr(linear3, 'offload', False)" in code and \ + "with torch.autograd.graph.saved_tensors_hooks(self.pack_hook_input, self.unpack_hook):" in code and \ + "with torch.autograd.graph.save_on_cpu(pin_memory=True):" in code and \ + "with torch.autograd.graph.saved_tensors_hooks(self.pack_hook_no_input, self.unpack_hook):" in code and \ + "colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0, True, linear4, use_reentrant=False)" in code _test_fwd_and_bwd(model, gm, data) gpc.destroy() @@ -156,14 +156,14 @@ def _run_offload_codegen_torch11(rank, world_size, port): # assert we have all the components code = graph.python_code("self").src assert "def pack_hook_input(self, x):" in code and \ - "def unpack_hook(self, packed):" in code and \ - "def pack_hook_no_input(self, x):" in code and \ - "setattr(x, 'offload', True)" in code and \ - "setattr(linear3, 'offload', False)" in code and \ - "with torch.autograd.graph.saved_tensors_hooks(self.pack_hook_input, self.unpack_hook):" in code and \ - "with torch.autograd.graph.save_on_cpu(pin_memory=True):" in code and \ - "with torch.autograd.graph.saved_tensors_hooks(self.pack_hook_no_input, self.unpack_hook):" in code and \ - "colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0, True, linear4, use_reentrant=False)" in code + "def unpack_hook(self, packed):" in code and \ + "def pack_hook_no_input(self, x):" in code and \ + "setattr(x, 'offload', True)" in code and \ + "setattr(linear3, 'offload', False)" in code and \ + "with torch.autograd.graph.saved_tensors_hooks(self.pack_hook_input, self.unpack_hook):" in code and \ + "with torch.autograd.graph.save_on_cpu(pin_memory=True):" in code and \ + "with torch.autograd.graph.saved_tensors_hooks(self.pack_hook_no_input, self.unpack_hook):" in code and \ + "colossalai.utils.activation_checkpoint.checkpoint(self.checkpoint_0, True, linear4, use_reentrant=False)" in code _test_fwd_and_bwd(model, gm, data) gpc.destroy() diff --git a/tests/test_fx/test_parallel_1d.py b/tests/test_fx/test_parallel_1d.py index 1044be7db1f4..29135b45f997 100644 --- a/tests/test_fx/test_parallel_1d.py +++ b/tests/test_fx/test_parallel_1d.py @@ -5,9 +5,9 @@ import torch from torch.fx import symbolic_trace -from colossalai.core import global_context as gpc from colossalai.fx.passes import column_shard_linear_pass from colossalai.initialize import launch +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.testing import clear_cache_before_run, rerun_if_address_is_in_use, spawn diff --git a/tests/test_layers/test_1d/checks_1d/check_layer_1d.py b/tests/test_layers/test_1d/checks_1d/check_layer_1d.py index b97cd83c5785..db4aa79f1d9f 100644 --- a/tests/test_layers/test_1d/checks_1d/check_layer_1d.py +++ b/tests/test_layers/test_1d/checks_1d/check_layer_1d.py @@ -2,9 +2,9 @@ import torch.distributed as dist from torch.nn import Parameter -from colossalai.core import global_context as gpc from colossalai.global_variables import tensor_parallel_env as env from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn import ( Classifier1D, Embedding1D, diff --git a/tests/test_layers/test_1d/test_1d.py b/tests/test_layers/test_1d/test_1d.py index 891512542475..254457a88099 100644 --- a/tests/test_layers/test_1d/test_1d.py +++ b/tests/test_layers/test_1d/test_1d.py @@ -5,8 +5,8 @@ import torch from checks_1d.check_layer_1d import * -from colossalai.core import global_context as gpc from colossalai.initialize import launch +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.testing import rerun_if_address_is_in_use, spawn diff --git a/tests/test_layers/test_2d/checks_2d/check_layer_2d.py b/tests/test_layers/test_2d/checks_2d/check_layer_2d.py index 2a067ed37f97..0fa6dff6323e 100644 --- a/tests/test_layers/test_2d/checks_2d/check_layer_2d.py +++ b/tests/test_layers/test_2d/checks_2d/check_layer_2d.py @@ -1,7 +1,7 @@ import torch -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn import ( Classifier2D, CrossEntropyLoss2D, diff --git a/tests/test_layers/test_2d/checks_2d/check_operation_2d.py b/tests/test_layers/test_2d/checks_2d/check_operation_2d.py index 68f057ebf3ee..b64ba336fbb4 100644 --- a/tests/test_layers/test_2d/checks_2d/check_operation_2d.py +++ b/tests/test_layers/test_2d/checks_2d/check_operation_2d.py @@ -3,8 +3,8 @@ import torch -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn.layer.parallel_2d._operation import Matmul_AB_2D, Matmul_ABT_2D, Matmul_ATB_2D from colossalai.utils import get_current_device, print_rank_0 diff --git a/tests/test_layers/test_2d/test_2d.py b/tests/test_layers/test_2d/test_2d.py index bcea5ce7b25d..f2d3cb945813 100644 --- a/tests/test_layers/test_2d/test_2d.py +++ b/tests/test_layers/test_2d/test_2d.py @@ -18,8 +18,8 @@ ) from checks_2d.check_operation_2d import check_AB, check_ABT, check_ATB -from colossalai.core import global_context as gpc from colossalai.initialize import launch +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.testing import rerun_if_address_is_in_use, spawn diff --git a/tests/test_layers/test_2p5d/checks_2p5d/check_layer_2p5d.py b/tests/test_layers/test_2p5d/checks_2p5d/check_layer_2p5d.py index 6e5d34a04fb2..f88a6ea09aef 100644 --- a/tests/test_layers/test_2p5d/checks_2p5d/check_layer_2p5d.py +++ b/tests/test_layers/test_2p5d/checks_2p5d/check_layer_2p5d.py @@ -1,8 +1,8 @@ import torch from torch.nn import Parameter -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn import ( Classifier2p5D, CrossEntropyLoss2p5D, diff --git a/tests/test_layers/test_2p5d/checks_2p5d/check_operation_2p5d.py b/tests/test_layers/test_2p5d/checks_2p5d/check_operation_2p5d.py index 77fa139130a0..2f68b7656d8b 100644 --- a/tests/test_layers/test_2p5d/checks_2p5d/check_operation_2p5d.py +++ b/tests/test_layers/test_2p5d/checks_2p5d/check_operation_2p5d.py @@ -1,7 +1,7 @@ import torch -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn.layer.parallel_2p5d._operation import Matmul_AB_2p5D, Matmul_ABT_2p5D, Matmul_ATB_2p5D from colossalai.utils import get_current_device, print_rank_0 diff --git a/tests/test_layers/test_2p5d/test_2p5d.py b/tests/test_layers/test_2p5d/test_2p5d.py index 373d834d0032..84d9f9d6e606 100644 --- a/tests/test_layers/test_2p5d/test_2p5d.py +++ b/tests/test_layers/test_2p5d/test_2p5d.py @@ -3,8 +3,8 @@ from checks_2p5d.check_layer_2p5d import * from checks_2p5d.check_operation_2p5d import check_AB, check_ABT, check_ATB -from colossalai.core import global_context as gpc from colossalai.initialize import launch +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.testing import rerun_if_address_is_in_use, spawn diff --git a/tests/test_layers/test_3d/checks_3d/check_layer_3d.py b/tests/test_layers/test_3d/checks_3d/check_layer_3d.py index e946a1f5912d..d6da13a6bc78 100644 --- a/tests/test_layers/test_3d/checks_3d/check_layer_3d.py +++ b/tests/test_layers/test_3d/checks_3d/check_layer_3d.py @@ -6,7 +6,7 @@ import torch from colossalai.constants import INPUT_GROUP_3D, OUTPUT_GROUP_3D, WEIGHT_GROUP_3D -from colossalai.core import global_context +from colossalai.legacy.core import global_context from colossalai.logging import get_dist_logger from colossalai.nn import ( Classifier3D, diff --git a/tests/test_layers/test_3d/test_3d.py b/tests/test_layers/test_3d/test_3d.py index fde71a4a0d26..c3b70710b6af 100644 --- a/tests/test_layers/test_3d/test_3d.py +++ b/tests/test_layers/test_3d/test_3d.py @@ -15,8 +15,8 @@ check_vocab_parallel_loss, ) -from colossalai.core import global_context as gpc from colossalai.initialize import launch +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.testing import rerun_if_address_is_in_use, skip_if_not_enough_gpus, spawn diff --git a/tests/test_layers/test_sequence/checks_seq/check_layer_seq.py b/tests/test_layers/test_sequence/checks_seq/check_layer_seq.py index b1194b3e24ae..3e1cc7f560cf 100644 --- a/tests/test_layers/test_sequence/checks_seq/check_layer_seq.py +++ b/tests/test_layers/test_sequence/checks_seq/check_layer_seq.py @@ -1,7 +1,7 @@ import torch -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.nn import TransformerSelfAttentionRing from colossalai.utils import get_current_device diff --git a/tests/test_layers/test_sequence/test_sequence.py b/tests/test_layers/test_sequence/test_sequence.py index 543993240adb..ab25fed98dca 100644 --- a/tests/test_layers/test_sequence/test_sequence.py +++ b/tests/test_layers/test_sequence/test_sequence.py @@ -3,8 +3,8 @@ import torch.distributed as dist import colossalai -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.testing import rerun_if_address_is_in_use, spawn CONFIG = dict(parallel=dict(tensor=dict(size=4, mode='sequence'))) diff --git a/tests/test_moe/test_kernel.py b/tests/test_moe/test_kernel.py index 695884fea74e..b33c5d3adcaf 100644 --- a/tests/test_moe/test_kernel.py +++ b/tests/test_moe/test_kernel.py @@ -3,9 +3,9 @@ import torch.nn as nn import colossalai -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode from colossalai.legacy.context.moe_context import MOE_CONTEXT +from colossalai.legacy.core import global_context as gpc from colossalai.nn.layer.moe import Experts, MoeLayer, Top1Router, Top2Router from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.utils import get_current_device diff --git a/tests/test_tensor/common_utils/_utils.py b/tests/test_tensor/common_utils/_utils.py index d625f3c28c29..513b2fc45e48 100644 --- a/tests/test_tensor/common_utils/_utils.py +++ b/tests/test_tensor/common_utils/_utils.py @@ -6,8 +6,8 @@ import torch.distributed as dist from torch.testing import assert_close -from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.tensor import ComputePattern, ComputeSpec, ShardSpec diff --git a/tests/test_tensor/test_comm_spec_apply.py b/tests/test_tensor/test_comm_spec_apply.py index 2c68633aabc8..e4a5b88d16e4 100644 --- a/tests/test_tensor/test_comm_spec_apply.py +++ b/tests/test_tensor/test_comm_spec_apply.py @@ -1,9 +1,9 @@ import pytest import torch -from colossalai.core import global_context as gpc from colossalai.device.device_mesh import DeviceMesh from colossalai.initialize import launch +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.tensor.shape_consistency import CollectiveCommPattern, CommSpec from colossalai.tensor.sharding_spec import ShardingSpec diff --git a/tests/test_tensor/test_dtensor/test_comm_spec.py b/tests/test_tensor/test_dtensor/test_comm_spec.py index 95fcd2aaf8f3..6055f89e6d4c 100644 --- a/tests/test_tensor/test_dtensor/test_comm_spec.py +++ b/tests/test_tensor/test_dtensor/test_comm_spec.py @@ -1,9 +1,9 @@ import pytest import torch -from colossalai.core import global_context as gpc from colossalai.device.device_mesh import DeviceMesh from colossalai.initialize import launch +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.tensor.d_tensor.comm_spec import CollectiveCommPattern, CommSpec from colossalai.testing import rerun_if_address_is_in_use, spawn diff --git a/tests/test_tensor/test_mix_gather.py b/tests/test_tensor/test_mix_gather.py index 9122808eb5a3..c70ccea887a7 100644 --- a/tests/test_tensor/test_mix_gather.py +++ b/tests/test_tensor/test_mix_gather.py @@ -1,9 +1,9 @@ import pytest import torch -from colossalai.core import global_context as gpc from colossalai.device.device_mesh import DeviceMesh from colossalai.initialize import launch +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.tensor.shape_consistency import CollectiveCommPattern, CommSpec from colossalai.tensor.sharding_spec import ShardingSpec diff --git a/tests/test_trainer/test_pipeline/test_p2p.py b/tests/test_trainer/test_pipeline/test_p2p.py index fca36f8dd83b..12f6fdfad11f 100644 --- a/tests/test_trainer/test_pipeline/test_p2p.py +++ b/tests/test_trainer/test_pipeline/test_p2p.py @@ -15,9 +15,9 @@ send_forward_recv_backward, send_obj_meta, ) -from colossalai.core import global_context as gpc from colossalai.initialize import launch from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.utils import get_current_device diff --git a/tests/test_trainer/test_pipeline/test_pipeline_schedule.py b/tests/test_trainer/test_pipeline/test_pipeline_schedule.py index 340e77e3b4d0..f4645ad352c4 100644 --- a/tests/test_trainer/test_pipeline/test_pipeline_schedule.py +++ b/tests/test_trainer/test_pipeline/test_pipeline_schedule.py @@ -11,9 +11,9 @@ from torchvision.models import resnet18 import colossalai -from colossalai.core import global_context as gpc from colossalai.initialize import launch from colossalai.legacy.context import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.utils import get_dataloader, print_rank_0 diff --git a/tests/test_trainer/test_trainer_with_pipe_schedule.py b/tests/test_trainer/test_trainer_with_pipe_schedule.py index bc88ec81765b..1d68e57f7a00 100644 --- a/tests/test_trainer/test_trainer_with_pipe_schedule.py +++ b/tests/test_trainer/test_trainer_with_pipe_schedule.py @@ -10,8 +10,8 @@ from torchvision.models import resnet18 import colossalai -from colossalai.core import global_context as gpc from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import get_dist_logger from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.trainer import Trainer diff --git a/tests/test_utils/test_checkpoint/test_checkpoint_1d.py b/tests/test_utils/test_checkpoint/test_checkpoint_1d.py index 3f1d32eb2410..c0397b3b9f0b 100644 --- a/tests/test_utils/test_checkpoint/test_checkpoint_1d.py +++ b/tests/test_utils/test_checkpoint/test_checkpoint_1d.py @@ -8,9 +8,9 @@ import torch.nn as nn import colossalai.nn as col_nn -from colossalai.core import global_context as gpc from colossalai.initialize import launch from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.testing import rerun_if_address_is_in_use, skip_if_not_enough_gpus, spawn from colossalai.utils import is_using_pp diff --git a/tests/test_utils/test_checkpoint/test_checkpoint_2d.py b/tests/test_utils/test_checkpoint/test_checkpoint_2d.py index 450b0e5d661b..9b3e233692fa 100644 --- a/tests/test_utils/test_checkpoint/test_checkpoint_2d.py +++ b/tests/test_utils/test_checkpoint/test_checkpoint_2d.py @@ -8,9 +8,9 @@ import torch.nn as nn import colossalai.nn as col_nn -from colossalai.core import global_context as gpc from colossalai.initialize import launch from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.testing import rerun_if_address_is_in_use, skip_if_not_enough_gpus, spawn from colossalai.utils import is_using_pp diff --git a/tests/test_utils/test_checkpoint/test_checkpoint_2p5d.py b/tests/test_utils/test_checkpoint/test_checkpoint_2p5d.py index 3f4944128555..d07e568c4c50 100644 --- a/tests/test_utils/test_checkpoint/test_checkpoint_2p5d.py +++ b/tests/test_utils/test_checkpoint/test_checkpoint_2p5d.py @@ -8,9 +8,9 @@ import torch.nn as nn import colossalai.nn as col_nn -from colossalai.core import global_context as gpc from colossalai.initialize import launch from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.testing import rerun_if_address_is_in_use, skip_if_not_enough_gpus, spawn from colossalai.utils import is_using_pp diff --git a/tests/test_utils/test_checkpoint/test_checkpoint_3d.py b/tests/test_utils/test_checkpoint/test_checkpoint_3d.py index 017fb9335ec2..a433910f16f6 100644 --- a/tests/test_utils/test_checkpoint/test_checkpoint_3d.py +++ b/tests/test_utils/test_checkpoint/test_checkpoint_3d.py @@ -8,9 +8,9 @@ import torch.nn as nn import colossalai.nn as col_nn -from colossalai.core import global_context as gpc from colossalai.initialize import launch from colossalai.legacy.context.parallel_mode import ParallelMode +from colossalai.legacy.core import global_context as gpc from colossalai.logging import disable_existing_loggers from colossalai.testing import rerun_if_address_is_in_use, skip_if_not_enough_gpus, spawn from colossalai.utils import is_using_pp