Skip to content

Commit

Permalink
#0: Change default max context to 64k
Browse files Browse the repository at this point in the history
  • Loading branch information
mtairum committed Dec 3, 2024
1 parent 6359e2e commit 0f486cb
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions models/demos/llama3/demo/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,15 +844,15 @@ def run_llama3_demo(
{"temperature": 0, "top_p": 0.08}, # sampling_params (argmax)
),
( # Max length mode run - Single user, long prompt (adapted to the model being used and architecture)
"models/demos/llama3/demo/input_data_long_128k.json", # input_prompts
"models/demos/llama3/demo/input_data_long_64k.json", # input_prompts
True, # instruct mode
1, # repeat_batches
128 * 1024, # max_seq_len
1, # batch_size
200, # max_generated_tokens
True, # paged_attention
{"page_block_size": 64, "page_max_num_blocks": 2048}, # page_params # TODO This will be serviced by vLLM
{"temperature": 0, "top_p": 0.08}, # sampling_params (argmax)
{"temperature": 0.6, "top_p": 0.08}, # sampling_params (top-p)
),
],
ids=[
Expand Down Expand Up @@ -894,8 +894,8 @@ def test_llama_demo(
is_ci_env,
reset_seeds,
):
if is_ci_env and "long" in input_prompts:
pytest.skip("Do not run the 'max-length test on CI to reduce load")
if is_ci_env and ("long" in input_prompts or optimizations == LlamaOptimizations.accuracy):
pytest.skip("Do not run the 'long-context' or accuracy tests on CI to reduce load")

mesh_device.enable_async(True)

Expand Down

0 comments on commit 0f486cb

Please sign in to comment.