diff --git a/mergekit/_data/architectures/phi3-small.json b/mergekit/_data/architectures/phi3-small.json new file mode 100644 index 00000000..7b3a1e80 --- /dev/null +++ b/mergekit/_data/architectures/phi3-small.json @@ -0,0 +1,68 @@ +{ + "model_type": "phi3small", + "architectures": [ + "Phi3SmallForCausalLM" + ], + "pre_weights": [ + { + "name": "model.embed_tokens.weight", + "is_embed": true + } + ], + "post_weights": [ + { + "name": "lm_head.weight", + "is_embed":true, + "aliases": [ + "model.embed_tokens.weight" + ] + }, + { + "name": "model.final_layernorm.weight" + }, + { + "name": "model.final_layernorm.bias" + } + ], + "num_layers_config_key": "num_hidden_layers", + "layer_templates": { + "weights": [ + { + "name": "model.layers.${layer_index}.input_layernorm.weight" + }, + { + "name": "model.layers.${layer_index}.input_layernorm.bias" + }, + { + "name": "model.layers.${layer_index}.post_attention_layernorm.weight" + }, + { + "name": "model.layers.${layer_index}.post_attention_layernorm.bias" + }, + { + "name": "model.layers.${layer_index}.self_attn.dense.weight" + }, + { + "name": "model.layers.${layer_index}.self_attn.dense.bias" + }, + { + "name": "model.layers.${layer_index}.self_attn.query_key_value.weight" + }, + { + "name": "model.layers.${layer_index}.self_attn.query_key_value.bias" + }, + { + "name": "model.layers.${layer_index}.mlp.up_proj.weight" + }, + { + "name": "model.layers.${layer_index}.mlp.up_proj.bias" + }, + { + "name": "model.layers.${layer_index}.mlp.down_proj.weight" + }, + { + "name": "model.layers.${layer_index}.mlp.down_proj.bias" + } + ] + } +} diff --git a/mergekit/_data/architectures/phi3.json b/mergekit/_data/architectures/phi3.json index 0e741af3..6c606b84 100644 --- a/mergekit/_data/architectures/phi3.json +++ b/mergekit/_data/architectures/phi3.json @@ -1,5 +1,5 @@ { - "model_type": "phi", + "model_type": "phi3", "architectures": [ "Phi3ForCausalLM" ], @@ -22,28 +22,22 @@ "layer_templates": { "weights": [ { - "name": "model.layers.${layer_index}.input_layernorm.weight", - "is_embed": false + "name": "model.layers.${layer_index}.input_layernorm.weight" }, { - "name": "model.layers.${layer_index}.post_attention_layernorm.weight", - "is_embed": false + "name": "model.layers.${layer_index}.post_attention_layernorm.weight" }, { - "name": "model.layers.${layer_index}.self_attn.o_proj.weight", - "is_embed": false + "name": "model.layers.${layer_index}.self_attn.o_proj.weight" }, { - "name": "model.layers.${layer_index}.self_attn.qkv_proj.weight", - "is_embed": false + "name": "model.layers.${layer_index}.self_attn.qkv_proj.weight" }, { - "name": "model.layers.${layer_index}.mlp.gate_up_proj.weight", - "is_embed": false + "name": "model.layers.${layer_index}.mlp.gate_up_proj.weight" }, { - "name": "model.layers.${layer_index}.mlp.down_proj.weight", - "is_embed": false + "name": "model.layers.${layer_index}.mlp.down_proj.weight" } ] }