From e63e4a66ccc6cae1c7306a9fdcf3118482a377c8 Mon Sep 17 00:00:00 2001
From: thxCode <thxcode0824@gmail.com>
Date: Thu, 12 Dec 2024 10:39:58 +0800
Subject: [PATCH] refactor: embedding usage estimate

Signed-off-by: thxCode <thxcode0824@gmail.com>
---
 file_estimate__llamacpp.go | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/file_estimate__llamacpp.go b/file_estimate__llamacpp.go
index 7196760..2a3b0a3 100644
--- a/file_estimate__llamacpp.go
+++ b/file_estimate__llamacpp.go
@@ -359,16 +359,22 @@ func (gf *GGUFFile) EstimateLLaMACppRun(opts ...GGUFRunEstimateOption) (e LLaMAC
 
 	ls := gf.Layers()
 	ioLs, tfLs, _ := ls.Cut([]string{
+		"position_embd.weight",
 		"token_embd.weight",
 		"token_embd_norm.weight",
 		"token_embd_norm.bias",
 		"token_types.weight",
+		"cls.bias",
+		"cls.weight",
+		"cls.output.bias",
+		"cls.output.weight",
 		"output.weight",
 		"output.bias",
 		"output_norm.weight",
 		"output_norm.bias",
 	})
 	ipLs, opLs, _ := ioLs.Cut([]string{
+		"position_embd.weight",
 		"token_embd.weight",
 		"token_embd_norm.weight",
 		"token_embd_norm.bias",
@@ -603,7 +609,7 @@ func (gf *GGUFFile) EstimateLLaMACppRun(opts ...GGUFRunEstimateOption) (e LLaMAC
 			}
 		}
 		// Finally, get the usage of output layer.
-		if a.Type == "model" {
+		if a.Type == "model" && a.AttentionCausal {
 			var outInc uint64
 			if a.Architecture == "mamba" {
 				outInc += inpSMask + inpSSeq