diff --git a/internlm/model/modeling_internlm.py b/internlm/model/modeling_internlm.py index 8f25fea2..651a6292 100644 --- a/internlm/model/modeling_internlm.py +++ b/internlm/model/modeling_internlm.py @@ -130,7 +130,7 @@ def __init__( for _, param in self.mlp.named_parameters(): if gpc.get_world_size(ParallelMode.TENSOR) > 1: setattr(param, IS_TENSOR_PARALLEL, True) - + self.dropout2 = nn.Dropout(drop_rate) self.use_swiglu = use_swiglu self.use_scaled_init = use_scaled_init