From 9b0574745791eab876a24956d58c4a7e0de740d1 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Fri, 17 May 2024 15:01:31 +0800 Subject: [PATCH 1/3] correct flan-t5 output size --- llm_bench/python/benchmark.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llm_bench/python/benchmark.py b/llm_bench/python/benchmark.py index 8a9d754f65..cddd47bdef 100644 --- a/llm_bench/python/benchmark.py +++ b/llm_bench/python/benchmark.py @@ -122,6 +122,10 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list, generated_text_len = len(result[bs_idx]) - input_tokens[bs_idx].numel() else: generated_text_len = len(result[bs_idx]) + # Encoder-decoder models expect the `decoder_input_ids` to start with a special token + # When counting the output length, subtract 1. The last token does not participate in inference. + if model.config.is_encoder_decoder and result[bs_idx][0] == model.config.decoder_start_token_id: + generated_text_len = generated_text_len -1 num_tokens += generated_text_len if generated_text_len > max_gen_tokens: log.error('Output token size is over max output token size!') From 77fc95c6446946774666788ba9b2ce42b533ccaa Mon Sep 17 00:00:00 2001 From: wgzintel Date: Fri, 24 May 2024 10:26:49 +0800 Subject: [PATCH 2/3] format code --- llm_bench/python/benchmark.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llm_bench/python/benchmark.py b/llm_bench/python/benchmark.py index ce5ba74cc7..794598f5f9 100644 --- a/llm_bench/python/benchmark.py +++ b/llm_bench/python/benchmark.py @@ -120,15 +120,15 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list, result_md5_list = [] for bs_idx in range(args['batch_size']): if 'sum' not in args['model_name'] and result[bs_idx][:input_token_size].equal(input_tokens[bs_idx]): - generated_text_len = len(result[bs_idx]) - input_tokens[bs_idx].numel() + generated_token_size = len(result[bs_idx]) - input_tokens[bs_idx].numel() else: - generated_text_len = len(result[bs_idx]) + generated_token_size = len(result[bs_idx]) # Encoder-decoder models expect the `decoder_input_ids` to start with a special token # When counting the output length, subtract 1. The last token does not participate in inference. if model.config.is_encoder_decoder and result[bs_idx][0] == model.config.decoder_start_token_id: - generated_text_len = generated_text_len -1 - num_tokens += generated_text_len - if generated_text_len > max_output_token_size: + generated_token_size = generated_token_size - 1 + num_tokens += generated_token_size + if generated_token_size > max_output_token_size: log.error('Output token size is over max output token size!') result_text = generated_text[bs_idx] if args["output_dir"] is not None: From d8ffa7fc88ecf48f88f9078fb538c7b7777cfbf0 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Mon, 27 May 2024 23:18:39 +0800 Subject: [PATCH 3/3] output token size should be same as infer count in greedy search --- llm_bench/python/benchmark.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llm_bench/python/benchmark.py b/llm_bench/python/benchmark.py index 794598f5f9..5a850ae93b 100644 --- a/llm_bench/python/benchmark.py +++ b/llm_bench/python/benchmark.py @@ -142,6 +142,8 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list, if bench_hook is not None: tm_list = bench_hook.get_time_list() tm_infer_list = bench_hook.get_time_infer_list() + if args['num_beams'] == 1 and generated_token_size != len(tm_infer_list): + log.warning(f'Output token size({generated_token_size}) is not equal to infer count({len(tm_infer_list)})') iter_data = gen_iterate_data( num, input_token_size * args['batch_size'],