From 27ff1871b507e4f163d7fc6991915f6bb7057f92 Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Wed, 23 Oct 2024 13:22:31 +0200 Subject: [PATCH] hotfix: fix flashllama --- .../models/custom_modeling/flash_llama_modeling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py index e4ef36358c6..20841aeb7dd 100644 --- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py @@ -692,7 +692,7 @@ def forward( logits, speculative_logits = self.lm_head(hidden_states) # Used in Granite - if not self.logits_scaled: + if self.logits_scaling is not None and not self.logits_scaled: logits /= self.logits_scaling if speculative_logits is not None: speculative_logits /= self.logits_scaling