Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[legacy] move builder and registry to legacy #4603

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class Registry:
# TODO: refactor the registry classes used in colossalai.registry, colossalai.fx and here
# TODO: refactor the registry classes used in colossalai.legacy.registry, colossalai.fx and here

def __init__(self, name):
self.name = name
Expand Down
2 changes: 1 addition & 1 deletion colossalai/context/parallel_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
from colossalai.context.config import Config
from colossalai.context.singleton_meta import SingletonMeta
from colossalai.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER
from colossalai.logging import get_dist_logger
from colossalai.registry import DIST_GROUP_INITIALIZER

from .parallel_mode import ParallelMode
from .random import add_seed, get_seeds, set_mode
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
# -*- encoding: utf-8 -*-

import torch.distributed as dist

from colossalai.global_variables import tensor_parallel_env as env
from colossalai.registry import DIST_GROUP_INITIALIZER
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER

from ..parallel_mode import ParallelMode
from .process_group_initializer import ProcessGroupInitializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import torch.distributed as dist

from colossalai.global_variables import tensor_parallel_env as env
from colossalai.registry import DIST_GROUP_INITIALIZER
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER

from ..parallel_mode import ParallelMode
from .process_group_initializer import ProcessGroupInitializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
import math

import torch.distributed as dist

from colossalai.context import Config
from colossalai.global_variables import tensor_parallel_env as env
from colossalai.registry import DIST_GROUP_INITIALIZER
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER

from ..parallel_mode import ParallelMode
from .process_group_initializer import ProcessGroupInitializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import torch.distributed as dist

from colossalai.global_variables import tensor_parallel_env as env
from colossalai.registry import DIST_GROUP_INITIALIZER
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER

from ..parallel_mode import ParallelMode
from .process_group_initializer import ProcessGroupInitializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from torch import distributed as dist

from colossalai.registry import DIST_GROUP_INITIALIZER
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER

from ..parallel_mode import ParallelMode
from .process_group_initializer import ProcessGroupInitializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
# -*- encoding: utf-8 -*-

import torch.distributed as dist
from colossalai.registry import DIST_GROUP_INITIALIZER
from .process_group_initializer import ProcessGroupInitializer

from colossalai.legacy.registry import DIST_GROUP_INITIALIZER

from ..parallel_mode import ParallelMode
from .process_group_initializer import ProcessGroupInitializer


@DIST_GROUP_INITIALIZER.register_module
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from torch import distributed as dist

from colossalai.registry import DIST_GROUP_INITIALIZER
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER

from ..parallel_mode import ParallelMode
from .process_group_initializer import ProcessGroupInitializer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- encoding: utf-8 -*-
import torch.distributed as dist

from colossalai.registry import DIST_GROUP_INITIALIZER
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER

from ..parallel_mode import ParallelMode
from .initializer_tensor import Initializer_Tensor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

import torch.distributed as dist

from colossalai.registry import DIST_GROUP_INITIALIZER
from .process_group_initializer import ProcessGroupInitializer
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER

from ..parallel_mode import ParallelMode
from .process_group_initializer import ProcessGroupInitializer


@DIST_GROUP_INITIALIZER.register_module
Expand Down
2 changes: 1 addition & 1 deletion colossalai/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

from colossalai.amp import AMP_TYPE, convert_to_amp
from colossalai.amp.naive_amp import NaiveAMPModel
from colossalai.builder.builder import build_gradient_handler
from colossalai.context import Config, ConfigException, ParallelMode
from colossalai.context.moe_context import MOE_CONTEXT
from colossalai.core import global_context as gpc
from colossalai.legacy.builder.builder import build_gradient_handler
from colossalai.legacy.engine import Engine
from colossalai.legacy.engine.gradient_accumulation import accumulate_gradient
from colossalai.legacy.engine.schedule import (
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import inspect

from colossalai.registry import *
from colossalai.legacy.registry import *


def build_from_config(module, config: dict):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.registry import GRADIENT_HANDLER
from colossalai.legacy.registry import GRADIENT_HANDLER

from ._base_gradient_handler import BaseGradientHandler
from .utils import bucket_allreduce
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from colossalai.context.moe_context import MOE_CONTEXT
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.registry import GRADIENT_HANDLER
from colossalai.legacy.registry import GRADIENT_HANDLER
from colossalai.utils.moe import get_moe_epsize_param_dict

from ._base_gradient_handler import BaseGradientHandler
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors

from colossalai.core import global_context as gpc
from colossalai.registry import GRADIENT_HANDLER
from colossalai.legacy.registry import GRADIENT_HANDLER

from ._base_gradient_handler import BaseGradientHandler

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.registry import GRADIENT_HANDLER
from colossalai.legacy.registry import GRADIENT_HANDLER

from ._base_gradient_handler import BaseGradientHandler
from .utils import bucket_allreduce
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from colossalai.registry import GRADIENT_HANDLER
from colossalai.legacy.registry import GRADIENT_HANDLER

from ._base_gradient_handler import BaseGradientHandler

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class Registry:
"""This is a registry class used to register classes and modules so that a universal
"""This is a registry class used to register classes and modules so that a universal
object builder can be enabled.

Args:
Expand Down Expand Up @@ -42,7 +42,7 @@ def register_module(self, module_class):
return module_class

def get_module(self, module_name: str):
"""Retrieves a module with name `module_name` and returns the module if it has
"""Retrieves a module with name `module_name` and returns the module if it has
already been registered before.

Args:
Expand Down
2 changes: 1 addition & 1 deletion colossalai/legacy/trainer/hooks/_checkpoint_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# -*- encoding: utf-8 -*-
import torch

from colossalai.legacy.registry import HOOKS
from colossalai.legacy.trainer.hooks import BaseHook
from colossalai.logging import get_dist_logger
from colossalai.registry import HOOKS
from colossalai.utils.checkpointing import save_checkpoint

from ._lr_scheduler_hook import LRSchedulerHook
Expand Down
2 changes: 1 addition & 1 deletion colossalai/legacy/trainer/hooks/_log_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

from colossalai.context import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.legacy.registry import HOOKS
from colossalai.legacy.trainer.hooks._metric_hook import ThroughputMetric
from colossalai.logging import DistributedLogger
from colossalai.registry import HOOKS
from colossalai.utils import MultiTimer, is_dp_rank_0, is_no_pp_or_last_stage, is_tp_rank_0, report_memory_usage

from ._base_hook import BaseHook
Expand Down
2 changes: 1 addition & 1 deletion colossalai/legacy/trainer/hooks/_lr_scheduler_hook.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from torch import Tensor

from colossalai.registry import HOOKS
from colossalai.legacy.registry import HOOKS

from ._metric_hook import LearningRateMetric, MetricHook

Expand Down
6 changes: 3 additions & 3 deletions colossalai/legacy/trainer/hooks/_metric_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from colossalai.communication import all_reduce
from colossalai.context import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.registry import HOOKS
from colossalai.legacy.registry import HOOKS
from colossalai.utils import get_current_device, is_no_pp_or_last_stage

from ._base_hook import BaseHook
Expand Down Expand Up @@ -356,7 +356,7 @@ def get_last_step_value(self) -> float:
self.last_step_num_samples *= gpc.get_world_size(ParallelMode.DATA)
else:
self.last_step_used_time = all_reduce(self.last_step_used_time, ParallelMode.DATA) / \
gpc.get_world_size(ParallelMode.DATA)
gpc.get_world_size(ParallelMode.DATA)
self.last_step_num_samples = all_reduce(self.last_step_num_samples, ParallelMode.DATA)

sample_per_sec = _format_number(self.last_step_num_samples / (self.last_step_used_time + 1e-12).item())
Expand All @@ -367,7 +367,7 @@ def get_last_step_info(self) -> str:
self.last_step_num_samples *= gpc.get_world_size(ParallelMode.DATA)
else:
self.last_step_used_time = all_reduce(self.last_step_used_time, ParallelMode.DATA) / \
gpc.get_world_size(ParallelMode.DATA)
gpc.get_world_size(ParallelMode.DATA)
self.last_step_num_samples = all_reduce(self.last_step_num_samples, ParallelMode.DATA)

sample_per_sec = _format_number(self.last_step_num_samples / (self.last_step_used_time + 1e-12).item())
Expand Down
2 changes: 1 addition & 1 deletion colossalai/nn/layer/parallel_1d/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
from colossalai.core import global_context as gpc
from colossalai.global_variables import tensor_parallel_env as env
from colossalai.kernel import LayerNorm
from colossalai.legacy.registry import LAYERS
from colossalai.nn import init as init
from colossalai.registry import LAYERS
from colossalai.utils.checkpointing import (
broadcast_state_dict,
gather_tensor_parallel_state_dict,
Expand Down
19 changes: 14 additions & 5 deletions colossalai/nn/layer/parallel_2d/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,30 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch.nn import Parameter

from colossalai.communication import broadcast
from colossalai.context import ParallelMode, seed
from colossalai.core import global_context as gpc
from colossalai.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import LAYERS
from colossalai.nn import init as init
from colossalai.registry import LAYERS
from colossalai.utils.checkpointing import gather_tensor_parallel_state_dict, partition_tensor_parallel_state_dict
from colossalai.utils.cuda import get_current_device
from torch import Tensor
from torch.nn import Parameter

from ..base_layer import ParallelLayer
from ..utils import divide, set_tensor_parallel_attribute_by_partition, to_2tuple
from ._operation import (Matmul_AB_2D, Matmul_ABT_2D, add_bias_2d, all_gather_tensor_2d, classifier_2d, layernorm_2d,
reduce_scatter_tensor_2d, split_batch_2d)
from ._operation import (
Matmul_AB_2D,
Matmul_ABT_2D,
add_bias_2d,
all_gather_tensor_2d,
classifier_2d,
layernorm_2d,
reduce_scatter_tensor_2d,
split_batch_2d,
)
from ._utils import assert_summa_initialization, get_summa_dim_from_env


Expand Down
26 changes: 19 additions & 7 deletions colossalai/nn/layer/parallel_2p5d/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,34 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch.nn import Parameter

from colossalai.communication import broadcast
from colossalai.context import ParallelMode, seed
from colossalai.core import global_context as gpc
from colossalai.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import LAYERS
from colossalai.nn import init as init
from colossalai.registry import LAYERS
from colossalai.utils.checkpointing import (broadcast_state_dict, gather_tensor_parallel_state_dict,
partition_tensor_parallel_state_dict)
from colossalai.utils.checkpointing import (
broadcast_state_dict,
gather_tensor_parallel_state_dict,
partition_tensor_parallel_state_dict,
)
from colossalai.utils.cuda import get_current_device
from torch import Tensor
from torch.nn import Parameter

from ..base_layer import ParallelLayer
from ..utils import divide, set_tensor_parallel_attribute_by_partition, to_2tuple
from ._operation import (Matmul_AB_2p5D, Matmul_ABT_2p5D, add_bias_2p5d, all_gather_tensor_2p5d, classifier_2p5d,
layernorm_2p5d, reduce_scatter_tensor_2p5d, split_batch_2p5d)
from ._operation import (
Matmul_AB_2p5D,
Matmul_ABT_2p5D,
add_bias_2p5d,
all_gather_tensor_2p5d,
classifier_2p5d,
layernorm_2p5d,
reduce_scatter_tensor_2p5d,
split_batch_2p5d,
)
from ._utils import assert_tesseract_initialization, get_tesseract_dim_dep_from_env


Expand Down
2 changes: 1 addition & 1 deletion colossalai/nn/layer/parallel_3d/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
from colossalai.context import ParallelMode, seed
from colossalai.core import global_context as gpc
from colossalai.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import LAYERS
from colossalai.nn import init as init
from colossalai.nn.layer.base_layer import ParallelLayer
from colossalai.registry import LAYERS
from colossalai.utils.checkpointing import (
broadcast_state_dict,
gather_tensor_parallel_state_dict,
Expand Down
10 changes: 5 additions & 5 deletions colossalai/nn/layer/parallel_sequence/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
# -*- encoding: utf-8 -*-

import math
import colossalai

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter

import colossalai
from colossalai.context import seed
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.nn.layer.parallel_sequence._operation import RingQK, RingAV
from colossalai.registry import LAYERS
from colossalai.kernel.cuda_native.scaled_softmax import AttnMaskType
from colossalai.kernel import FusedScaleMaskSoftmax
from colossalai.context import seed
from colossalai.kernel.cuda_native.scaled_softmax import AttnMaskType
from colossalai.legacy.registry import LAYERS
from colossalai.nn.layer.parallel_sequence._operation import RingAV, RingQK


@LAYERS.register_module
Expand Down
2 changes: 1 addition & 1 deletion colossalai/nn/layer/vanilla/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from torch.nn.parameter import Parameter

from colossalai.context import seed
from colossalai.legacy.registry import LAYERS
from colossalai.nn import init as init
from colossalai.registry import LAYERS
from colossalai.utils.cuda import get_current_device

from ..utils import to_2tuple
Expand Down
Loading
Loading