Skip to content

Commit

Permalink
[misc] resolve code factor issues
Browse files Browse the repository at this point in the history
  • Loading branch information
ver217 committed Aug 14, 2023
1 parent 60db2cc commit 6a03fa9
Show file tree
Hide file tree
Showing 21 changed files with 32 additions and 206 deletions.
2 changes: 1 addition & 1 deletion colossalai/booster/booster.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def backward(self, loss: torch.Tensor, optimizer: Optimizer) -> None:
loss (torch.Tensor): The loss to be backpropagated.
optimizer (Optimizer): The optimizer to be updated.
"""
# TODO: implement this method with plugin
# TODO(frank lee): implement this method with plugin
optimizer.backward(loss)

def execute_pipeline(self,
Expand Down
2 changes: 0 additions & 2 deletions colossalai/shardformer/layer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ class Randomizer:
_INDEX = 0

def __init__(self, seed: int):
# TODO: remove colossalai.context.random

self.seed = seed

# Handle CUDA rng state
Expand Down
8 changes: 4 additions & 4 deletions colossalai/shardformer/modeling/bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def bert_model_forward(
hidden_states: Optional[torch.FloatTensor] = None, # this is from the previous stage
stage_index: Optional[List[int]] = None,
):
# TODO: add explaination of the output here.
# TODO(jianghai): add explaination of the output here.
r"""
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
Expand Down Expand Up @@ -113,7 +113,7 @@ def bert_model_forward(
batch_size, seq_length = input_shape
device = hidden_states.device

# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(jianghai): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if output_attentions:
logger.warning_once('output_attentions=True is not supported for pipeline models at the moment.')
output_attentions = False
Expand Down Expand Up @@ -272,7 +272,7 @@ def bert_for_pretraining_forward(
logger = logging.get_logger(__name__)

return_dict = return_dict if return_dict is not None else self.config.use_return_dict
# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(jianghai) left the recording kv-value tensors as () or None type, this feature may be added in the future.
if output_attentions:
logger.warning_once('output_attentions=True is not supported for pipeline models at the moment.')
output_attentions = False
Expand Down Expand Up @@ -534,7 +534,7 @@ def bert_for_next_sentence_prediction_forward(
stage_index: Optional[List[int]] = None,
**kwargs,
):
#-> Union[Tuple[torch.Tensor], NextSentencePredictorOutput]:
# -> Union[Tuple[torch.Tensor], NextSentencePredictorOutput]:
r"""
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
Expand Down
12 changes: 6 additions & 6 deletions colossalai/shardformer/modeling/bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def custom_forward(*inputs):
# Add last hidden state
hidden_states = self.ln_f(hidden_states)

# TODO: deal with all_hidden_states, all_self_attentions, presents
# TODO(jianghai): deal with all_hidden_states, all_self_attentions, presents
if output_hidden_states:
all_hidden_states = all_hidden_states + (hidden_states,)

Expand Down Expand Up @@ -307,7 +307,7 @@ def bloom_for_causal_lm_forward(self: BloomForCausalLM,
raise ValueError(f"Got unexpected arguments: {deprecated_arguments}")

return_dict = return_dict if return_dict is not None else self.config.use_return_dict
# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(jianghai): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if output_attentions:
logger.warning_once('output_attentions=True is not supported for pipeline models at the moment.')
output_attentions = False
Expand Down Expand Up @@ -402,7 +402,7 @@ def bloom_for_sequence_classification_forward(

return_dict = return_dict if return_dict is not None else self.config.use_return_dict

# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(jianghai): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if output_attentions:
logger.warning_once('output_attentions=True is not supported for pipeline models at the moment.')
output_attentions = False
Expand Down Expand Up @@ -431,7 +431,7 @@ def bloom_for_sequence_classification_forward(
all_cross_attentions = None
if stage_manager.is_last_stage():
batch_size = hidden_states.shape[0]
#update batch size
# update batch size
hidden_states = transformer_outputs[0]
logits = self.score(hidden_states)

Expand Down Expand Up @@ -525,7 +525,7 @@ def bloom_for_token_classification_forward(

return_dict = return_dict if return_dict is not None else self.config.use_return_dict

# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(jianghai): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if output_attentions:
logger.warning_once('output_attentions=True is not supported for pipeline models at the moment.')
output_attentions = False
Expand Down Expand Up @@ -611,7 +611,7 @@ def bloom_for_question_answering_forward(
logger = logging.get_logger(__name__)

return_dict = return_dict if return_dict is not None else self.config.use_return_dict
# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(jianghai): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if output_attentions:
logger.warning_once('output_attentions=True is not supported for pipeline models at the moment.')
output_attentions = False
Expand Down
2 changes: 1 addition & 1 deletion colossalai/shardformer/modeling/chatglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def chatglm_model_forward(
if output_hidden_states is not None else self.config.output_hidden_states)
use_cache = use_cache if use_cache is not None else self.config.use_cache
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(jianghai): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if past_key_values:
logger.warning_once('Non-empty past_key_values is not supported for pipeline models at the moment.')
past_key_values = None
Expand Down
2 changes: 1 addition & 1 deletion colossalai/shardformer/modeling/gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def gpt2_model_forward(
logger = logging.get_logger(__name__)

# Preprocess passed in arguments
# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(baizhou): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if past_key_values:
logger.warning_once('Non-empty past_key_values is not supported for pipeline models at the moment.')
past_key_values = None
Expand Down
6 changes: 3 additions & 3 deletions colossalai/shardformer/modeling/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def llama_model_forward(
seq_length_with_past = seq_length
past_key_values_length = 0

# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(jianghai): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if output_attentions:
logger.warning_once('output_attentions=True is not supported for pipeline models at the moment.')
output_attentions = False
Expand Down Expand Up @@ -216,7 +216,7 @@ def llama_for_causal_lm_forward(
if output_hidden_states is not None else self.config.output_hidden_states)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict

# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(jianghai): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if output_attentions:
logger.warning_once('output_attentions=True is not supported for pipeline models at the moment.')
output_attentions = False
Expand Down Expand Up @@ -301,7 +301,7 @@ def llama_for_sequence_classification_forward(
logger = logging.get_logger(__name__)

return_dict = return_dict if return_dict is not None else self.config.use_return_dict
# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(jianghai): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if output_attentions:
logger.warning_once('output_attentions=True is not supported for pipeline models at the moment.')
output_attentions = False
Expand Down
2 changes: 1 addition & 1 deletion colossalai/shardformer/modeling/opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def opt_model_forward(
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...")
use_cache = False

# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(baizhou): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if past_key_values:
logger.warning_once('Non-empty past_key_values is not supported for pipeline models at the moment.')
past_key_values = None
Expand Down
6 changes: 3 additions & 3 deletions colossalai/shardformer/modeling/t5.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def t5_stack_forward(

logger = logging.get_logger(__name__)

# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(baizhou): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if past_key_values:
logger.warning_once('Non-empty past_key_values is not supported for pipeline models at the moment.')
past_key_values = None
Expand Down Expand Up @@ -285,7 +285,7 @@ def t5_model_forward(

logger = logging.get_logger(__name__)

# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(baizhou): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if past_key_values:
logger.warning_once('Non-empty past_key_values is not supported for pipeline models at the moment.')
past_key_values = None
Expand Down Expand Up @@ -422,7 +422,7 @@ def t5_for_conditional_generation_forward(

logger = logging.get_logger(__name__)

# TODO: left the recording kv-value tensors as () or None type, this feature may be added in the future.
# TODO(baizhou): left the recording kv-value tensors as () or None type, this feature may be added in the future.
if past_key_values:
logger.warning_once('Non-empty past_key_values is not supported for pipeline models at the moment.')
past_key_values = None
Expand Down
2 changes: 1 addition & 1 deletion colossalai/shardformer/modeling/vit.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def pp_forward(
if pixel_values is None:
raise ValueError("You have to specify pixel_values")

# TODO: maybe have a cleaner way to cast the input (from `ImageProcessor` side?)
# TODO(FoolPlayer): maybe have a cleaner way to cast the input (from `ImageProcessor` side?)
expected_dtype = self.embeddings.patch_embeddings.projection.weight.dtype
if pixel_values.dtype != expected_dtype:
pixel_values = pixel_values.to(expected_dtype)
Expand Down
1 change: 0 additions & 1 deletion colossalai/shardformer/shard/shard_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ class ShardConfig:
enable_flash_attention: bool = False
enable_jit_fused: bool = False

# TODO: add support for tensor parallel
# pipeline_parallel_size: int
# data_parallel_size: int
# tensor_parallel_mode: Literal['1d', '2d', '2.5d', '3d']
Expand Down
2 changes: 1 addition & 1 deletion tests/test_fx/test_tracer/test_hf_model/test_hf_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_gpt():
for name, (model_fn, data_gen_fn, _, _, _) in sub_registry.items():
model = model_fn()

# TODO: support the following models
# TODO(ver217): support the following models
# 1. GPT2DoubleHeadsModel
# as they are not supported, let's skip them
if model.__class__.__name__ in ['GPT2DoubleHeadsModel', 'GPT2ForQuestionAnswering']:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def rearrange(tensor: torch.Tensor, dim: int):
return rearanged_tensor


# TODO: solve lazy_init True is not working
# TODO(FoolPlayer): solve lazy_init True is not working
@parameterize('lazy_init', [False])
def check_linear_conv_1d_col(lazy_init: bool):
ctx = LazyInitContext() if lazy_init else nullcontext()
Expand Down
171 changes: 0 additions & 171 deletions tests/test_shardformer/test_model/test_pure_pipeline.py

This file was deleted.

Loading

0 comments on commit 6a03fa9

Please sign in to comment.