Skip to content

Commit

Permalink
Add Phi3SmallForCausalLM (#405)
Browse files Browse the repository at this point in the history
  • Loading branch information
cg123 authored Aug 23, 2024
1 parent f086664 commit f17616f
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 13 deletions.
68 changes: 68 additions & 0 deletions mergekit/_data/architectures/phi3-small.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{
"model_type": "phi3small",
"architectures": [
"Phi3SmallForCausalLM"
],
"pre_weights": [
{
"name": "model.embed_tokens.weight",
"is_embed": true
}
],
"post_weights": [
{
"name": "lm_head.weight",
"is_embed":true,
"aliases": [
"model.embed_tokens.weight"
]
},
{
"name": "model.final_layernorm.weight"
},
{
"name": "model.final_layernorm.bias"
}
],
"num_layers_config_key": "num_hidden_layers",
"layer_templates": {
"weights": [
{
"name": "model.layers.${layer_index}.input_layernorm.weight"
},
{
"name": "model.layers.${layer_index}.input_layernorm.bias"
},
{
"name": "model.layers.${layer_index}.post_attention_layernorm.weight"
},
{
"name": "model.layers.${layer_index}.post_attention_layernorm.bias"
},
{
"name": "model.layers.${layer_index}.self_attn.dense.weight"
},
{
"name": "model.layers.${layer_index}.self_attn.dense.bias"
},
{
"name": "model.layers.${layer_index}.self_attn.query_key_value.weight"
},
{
"name": "model.layers.${layer_index}.self_attn.query_key_value.bias"
},
{
"name": "model.layers.${layer_index}.mlp.up_proj.weight"
},
{
"name": "model.layers.${layer_index}.mlp.up_proj.bias"
},
{
"name": "model.layers.${layer_index}.mlp.down_proj.weight"
},
{
"name": "model.layers.${layer_index}.mlp.down_proj.bias"
}
]
}
}
20 changes: 7 additions & 13 deletions mergekit/_data/architectures/phi3.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"model_type": "phi",
"model_type": "phi3",
"architectures": [
"Phi3ForCausalLM"
],
Expand All @@ -22,28 +22,22 @@
"layer_templates": {
"weights": [
{
"name": "model.layers.${layer_index}.input_layernorm.weight",
"is_embed": false
"name": "model.layers.${layer_index}.input_layernorm.weight"
},
{
"name": "model.layers.${layer_index}.post_attention_layernorm.weight",
"is_embed": false
"name": "model.layers.${layer_index}.post_attention_layernorm.weight"
},
{
"name": "model.layers.${layer_index}.self_attn.o_proj.weight",
"is_embed": false
"name": "model.layers.${layer_index}.self_attn.o_proj.weight"
},
{
"name": "model.layers.${layer_index}.self_attn.qkv_proj.weight",
"is_embed": false
"name": "model.layers.${layer_index}.self_attn.qkv_proj.weight"
},
{
"name": "model.layers.${layer_index}.mlp.gate_up_proj.weight",
"is_embed": false
"name": "model.layers.${layer_index}.mlp.gate_up_proj.weight"
},
{
"name": "model.layers.${layer_index}.mlp.down_proj.weight",
"is_embed": false
"name": "model.layers.${layer_index}.mlp.down_proj.weight"
}
]
}
Expand Down

0 comments on commit f17616f

Please sign in to comment.