diff --git a/llmfoundry/utils/checkpoint_conversion_helpers.py b/llmfoundry/utils/checkpoint_conversion_helpers.py index 35e77eab6c..dafeec94e1 100644 --- a/llmfoundry/utils/checkpoint_conversion_helpers.py +++ b/llmfoundry/utils/checkpoint_conversion_helpers.py @@ -112,7 +112,8 @@ def load_tokenizer( def _write_zero_bias(weight_name: str, weight_file_path: str, - bias_shape: Union[Tuple[int, ...], int]) -> None: + bias_shape: Union[Tuple[int, ...], + int], np_data_type: np.dtype) -> None: """Write zeros for bias when converting MPT to FasterTransformer weights. MPT model might not have bias while FT expects bias. @@ -121,6 +122,7 @@ def _write_zero_bias(weight_name: str, weight_file_path: str, weight_name (str): Name of the weight tensor. weight_file_path (str): Output path for storing the weight (NOT zero bias). bias_shape (Union[Tuple[int, ...], int]): Shape of the bias array. + np_data_type (np.dtype): The data type for bias. """ if 'weight' not in weight_file_path: raise RuntimeError( @@ -128,13 +130,14 @@ def _write_zero_bias(weight_name: str, weight_file_path: str, ) log.debug(f'zero bias for weight: {weight_name}') bias_file_path = weight_file_path.replace('.weight', '.bias') - bias = np.zeros(bias_shape, dtype=np.float32) + bias = np.zeros(bias_shape, dtype=np_data_type) bias.tofile(bias_file_path) def _convert_weight_to_ft_each(save_dir: str, infer_gpu_num: int, tensor_name: str, config: Dict[str, Any], - data: np.ndarray) -> None: + data: np.ndarray, + np_weight_data_type: np.dtype) -> None: """Convert each MPT weight to a FasterTransformer compatible format. Args: @@ -155,7 +158,9 @@ def _convert_weight_to_ft_each(save_dir: str, infer_gpu_num: int, save_path = os.path.join(save_dir, f'model.{tensor_name}.bin') data.tofile(save_path) if 'weight' in tensor_name and config['no_bias']: - _write_zero_bias(tensor_name, save_path, data.shape[-1]) + _write_zero_bias(tensor_name, save_path, data.shape[-1], + np_weight_data_type + ) # pyright: ignore [reportGeneralTypeIssues] elif tensor_name.find('attention.dense.weight') != -1: assert data.shape == ( @@ -170,11 +175,13 @@ def _convert_weight_to_ft_each(save_dir: str, infer_gpu_num: int, if config['no_bias']: fake_weight_path = os.path.join(save_dir, f'model.{tensor_name}.bin') - _write_zero_bias(tensor_name, fake_weight_path, data.shape[-1]) + _write_zero_bias(tensor_name, fake_weight_path, data.shape[-1], + np_weight_data_type + ) # pyright: ignore [reportGeneralTypeIssues] elif tensor_name.find('mlp.dense_4h_to_h.weight') != -1: assert data.shape == ( - config['d_model'], config['mlp_ratio'] * + config['d_model'], config['expansion_ratio'] * config['d_model']), f'unexpected dim for {tensor_name}' # nn.Linear weights are transposed data = data.T @@ -185,11 +192,13 @@ def _convert_weight_to_ft_each(save_dir: str, infer_gpu_num: int, if config['no_bias']: fake_weight_path = os.path.join(save_dir, f'model.{tensor_name}.bin') - _write_zero_bias(tensor_name, fake_weight_path, data.shape[-1]) + _write_zero_bias(tensor_name, fake_weight_path, data.shape[-1], + np_weight_data_type + ) # pyright: ignore [reportGeneralTypeIssues] elif tensor_name.find('mlp.dense_h_to_4h.weight') != -1: assert data.shape == ( - config['mlp_ratio'] * config['d_model'], + config['expansion_ratio'] * config['d_model'], config['d_model']), f'unexpected dim for {tensor_name}' # nn.Linear weights are transposed data = data.T @@ -200,11 +209,12 @@ def _convert_weight_to_ft_each(save_dir: str, infer_gpu_num: int, split_vals[j].tofile(save_path) if config['no_bias']: _write_zero_bias(tensor_name, save_path, - split_vals[j].shape[-1]) + split_vals[j].shape[-1], np_weight_data_type + ) # pyright: ignore [reportGeneralTypeIssues] elif tensor_name.find('mlp.dense_h_to_4h.bias') != -1: assert data.shape == ( - config['mlp_ratio'] * + config['expansion_ratio'] * config['d_model'],), f'unexpected dim for {tensor_name}' split_vals = np.split(data, infer_gpu_num, axis=-1) for j in range(infer_gpu_num): @@ -238,7 +248,9 @@ def _convert_weight_to_ft_each(save_dir: str, infer_gpu_num: int, split_vals[j].tofile(save_path) if config['no_bias']: _write_zero_bias(tensor_name, save_path, - (3, split_vals[j].shape[-1])) + (3, split_vals[j].shape[-1]), + np_weight_data_type + ) # pyright: ignore [reportGeneralTypeIssues] else: raise RuntimeError(f'Tensor with name {tensor_name} is not handled') @@ -306,7 +318,12 @@ def convert_and_save_ft_weights(named_params: dict, 'model.final_layernorm.weight.bin') data.tofile(save_path) if config['no_bias']: - _write_zero_bias(name, save_path, data.shape[-1]) + _write_zero_bias( + name, + save_path, + data.shape[-1], + np_weight_data_type # pyright: ignore [reportGeneralTypeIssues] + ) elif name == 'transformer.lm_head.weight': data.tofile(os.path.join(save_dir, 'model.lm_head.weight.bin')) else: @@ -315,5 +332,11 @@ def convert_and_save_ft_weights(named_params: dict, new_name = name.replace('transformer.blocks.', 'layers.').replace( mpt_pattern, ft_pattern) - _convert_weight_to_ft_each(save_dir, infer_gpu_num, - new_name, config, data) + _convert_weight_to_ft_each( + save_dir, + infer_gpu_num, + new_name, + config, + data, + np_weight_data_type # pyright: ignore [reportGeneralTypeIssues] + )