Skip to content

Commit

Permalink
fix token counting (#520)
Browse files Browse the repository at this point in the history
  • Loading branch information
gongy authored Dec 13, 2023
1 parent 0fce7f2 commit 907ba31
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions 06_gpu_and_ml/vllm_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,16 +136,17 @@ async def completion_stream(self, user_question):
sampling_params,
request_id,
)
index = 0
index, num_tokens = 0, 0
async for output in result_generator:
if "\ufffd" == output.outputs[0].text[-1]:
continue
text_delta = output.outputs[0].text[index:]
index = len(output.outputs[0].text)
num_tokens = len(output.outputs[0].token_ids)

yield text_delta

print(f"Generated {index} tokens in {time.time() - t0:.2f}s")
print(f"Generated {num_tokens} tokens in {time.time() - t0:.2f}s")


# ## Run the model
Expand Down

0 comments on commit 907ba31

Please sign in to comment.