From e63e4a66ccc6cae1c7306a9fdcf3118482a377c8 Mon Sep 17 00:00:00 2001 From: thxCode Date: Thu, 12 Dec 2024 10:39:58 +0800 Subject: [PATCH] refactor: embedding usage estimate Signed-off-by: thxCode --- file_estimate__llamacpp.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/file_estimate__llamacpp.go b/file_estimate__llamacpp.go index 7196760..2a3b0a3 100644 --- a/file_estimate__llamacpp.go +++ b/file_estimate__llamacpp.go @@ -359,16 +359,22 @@ func (gf *GGUFFile) EstimateLLaMACppRun(opts ...GGUFRunEstimateOption) (e LLaMAC ls := gf.Layers() ioLs, tfLs, _ := ls.Cut([]string{ + "position_embd.weight", "token_embd.weight", "token_embd_norm.weight", "token_embd_norm.bias", "token_types.weight", + "cls.bias", + "cls.weight", + "cls.output.bias", + "cls.output.weight", "output.weight", "output.bias", "output_norm.weight", "output_norm.bias", }) ipLs, opLs, _ := ioLs.Cut([]string{ + "position_embd.weight", "token_embd.weight", "token_embd_norm.weight", "token_embd_norm.bias", @@ -603,7 +609,7 @@ func (gf *GGUFFile) EstimateLLaMACppRun(opts ...GGUFRunEstimateOption) (e LLaMAC } } // Finally, get the usage of output layer. - if a.Type == "model" { + if a.Type == "model" && a.AttentionCausal { var outInc uint64 if a.Architecture == "mamba" { outInc += inpSMask + inpSSeq