Crop Tokens, Not Text (#1065)

Custom tokenizers might not skip special tokens during detokenization, which may result in the wrong prompt cropping. ChatGLM3 similarity score jumps from 0.89 to 1 after the fix.
openvinotoolkit · Oct 25, 2024 · 6846052 · 6846052
1 parent ea8d541
commit 6846052
Showing 1 changed file with 5 additions and 2 deletions.
diff --git a/tools/who_what_benchmark/whowhatbench/text_evaluator.py b/tools/who_what_benchmark/whowhatbench/text_evaluator.py
@@ -204,8 +204,11 @@ def default_gen_answer(model, tokenizer, prompt, max_new_tokens, crop_question):
             inputs = self.tokenizer(prompt, return_tensors="pt")
 
             tokens = model.generate(**inputs, do_sample=False, max_new_tokens=max_new_tokens)
-            out = self.tokenizer.batch_decode(tokens, skip_special_tokens=True)[0]
-            return out[len(prompt) :] if crop_question else out
+
+            if crop_question:
+                tokens = tokens[:, inputs["input_ids"].shape[-1] :]
+
+            return self.tokenizer.batch_decode(tokens, skip_special_tokens=True)[0]
 
         gen_answer_fn = gen_answer_fn or default_gen_answer