From 85cfcd00ef1273a13b028180588001df49fa9ba5 Mon Sep 17 00:00:00 2001 From: thxCode Date: Fri, 14 Jun 2024 19:25:31 +0800 Subject: [PATCH] docs: estimate Signed-off-by: thxCode --- file_estimate.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/file_estimate.go b/file_estimate.go index 9e37085..96bb9db 100644 --- a/file_estimate.go +++ b/file_estimate.go @@ -315,8 +315,10 @@ func (gf *GGUFFile) EstimateLLaMACppUsage(opts ...LLaMACppUsageEstimateOption) ( rs := l.Bytes() attnInc += rs } + // https://github.com/ggerganov/llama.cpp/blob/172c8256840ffd882ab9992ecedbb587d9b21f15/llama.cpp#L6986-L6992. rs := o.CacheKeyType.RowSizeOf([]uint64{uint64(a.AttentionKeyLength), nKV, a.AttentionHeadCountKV}) attnInc += rs + // https://github.com/ggerganov/llama.cpp/blob/172c8256840ffd882ab9992ecedbb587d9b21f15/llama.cpp#L7000-L7007. rs = o.CacheValueType.RowSizeOf([]uint64{uint64(a.AttentionValueLength), nKV, a.AttentionHeadCountKV}) attnInc += rs } else {