fix token counting (#520)

modal-labs · Dec 13, 2023 · 907ba31 · 907ba31
1 parent 0fce7f2
commit 907ba31
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/06_gpu_and_ml/vllm_mixtral.py b/06_gpu_and_ml/vllm_mixtral.py
@@ -136,16 +136,17 @@ async def completion_stream(self, user_question):
             sampling_params,
             request_id,
         )
-        index = 0
+        index, num_tokens = 0, 0
         async for output in result_generator:
             if "\ufffd" == output.outputs[0].text[-1]:
                 continue
             text_delta = output.outputs[0].text[index:]
             index = len(output.outputs[0].text)
+            num_tokens = len(output.outputs[0].token_ids)
 
             yield text_delta
 
-        print(f"Generated {index} tokens in {time.time() - t0:.2f}s")
+        print(f"Generated {num_tokens} tokens in {time.time() - t0:.2f}s")
 
 
 # ## Run the model