From 457bcd0b850b41928f8d0fab2816749030902b6e Mon Sep 17 00:00:00 2001 From: mtairum Date: Fri, 19 Jul 2024 17:38:00 +0000 Subject: [PATCH] #5337: Refactored all Mistral demo and test scripts to use CI fixture --- models/demos/wormhole/mistral7b/demo/demo.py | 38 ++++--------- .../mistral7b/demo/demo_with_prefill.py | 38 ++++--------- .../mistral7b/tests/test_mistral_attention.py | 25 ++++----- .../tests/test_mistral_attention_prefill.py | 25 ++++----- .../mistral7b/tests/test_mistral_decoder.py | 25 ++++----- .../tests/test_mistral_decoder_prefill.py | 25 ++++----- .../mistral7b/tests/test_mistral_embedding.py | 21 ++++---- .../mistral7b/tests/test_mistral_mlp.py | 19 +++---- .../mistral7b/tests/test_mistral_model.py | 37 ++++--------- .../tests/test_mistral_model_prefill.py | 53 +++++++------------ .../mistral7b/tests/test_mistral_perf.py | 45 ++++------------ .../mistral7b/tests/test_mistral_rms_norm.py | 19 +++---- .../mistral7b/tests/test_mistral_torch.py | 13 ++++- 13 files changed, 147 insertions(+), 236 deletions(-) diff --git a/models/demos/wormhole/mistral7b/demo/demo.py b/models/demos/wormhole/mistral7b/demo/demo.py index 5559052f829c..34e79bef05e9 100644 --- a/models/demos/wormhole/mistral7b/demo/demo.py +++ b/models/demos/wormhole/mistral7b/demo/demo.py @@ -7,14 +7,6 @@ from time import time from loguru import logger import os - -# Set Mistral flags for CI, if CI environment is setup -if os.getenv("CI") == "true": - os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["WH_ARCH_YAML"] = "wormhole_b0_80_arch_eth_dispatch.yaml" - import ttnn import pytest from models.demos.wormhole.mistral7b.tt.mistral_common import ( @@ -26,7 +18,6 @@ ) from models.demos.wormhole.mistral7b.tt.mistral_model import TtTransformer from models.demos.wormhole.mistral7b.tt.mistral_embedding import TtMistralEmbedding -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.demos.wormhole.mistral7b.reference.tokenizer import Tokenizer @@ -95,6 +86,15 @@ def preprocess_inputs(input_prompts, tokenizer, model_args, dtype, embd, instruc def run_mistral_demo(user_input, batch_size, device, instruct_mode, is_ci_env): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs + assert batch_size == 32, "Batch size must be 32" embed_on_device = False @@ -269,8 +269,6 @@ def run_mistral_demo(user_input, batch_size, device, instruct_mode, is_ci_env): logger.info("[User {}] {}".format(user, text)) -# Avoid running this test when in CI -@pytest.mark.skipif(os.getenv("CI") == "true", reason="Non-CI tests") @pytest.mark.parametrize( "input_prompts, instruct_weights", [ @@ -280,23 +278,9 @@ def run_mistral_demo(user_input, batch_size, device, instruct_mode, is_ci_env): ids=["general_weights", "instruct_weights"], ) def test_mistral7B_demo(device, use_program_cache, input_prompts, instruct_weights, is_ci_env): - return run_mistral_demo( - user_input=input_prompts, batch_size=32, device=device, instruct_mode=instruct_weights, is_ci_env=is_ci_env - ) + if is_ci_env and instruct_weights == False: + pytest.skip("CI demo test only runs instruct weights (to reduce CI pipeline load)") - -# CI only runs general-weights demo -@pytest.mark.skipif(not os.getenv("CI") == "true", reason="CI-only test") -@pytest.mark.parametrize( - "input_prompts, instruct_weights", - [ - ("models/demos/wormhole/mistral7b/demo/input_data.json", False), - ], - ids=[ - "general_weights", - ], -) -def test_mistral7B_demo_CI(device, use_program_cache, input_prompts, instruct_weights, is_ci_env): return run_mistral_demo( user_input=input_prompts, batch_size=32, device=device, instruct_mode=instruct_weights, is_ci_env=is_ci_env ) diff --git a/models/demos/wormhole/mistral7b/demo/demo_with_prefill.py b/models/demos/wormhole/mistral7b/demo/demo_with_prefill.py index 5db811c81d34..83f029289af3 100644 --- a/models/demos/wormhole/mistral7b/demo/demo_with_prefill.py +++ b/models/demos/wormhole/mistral7b/demo/demo_with_prefill.py @@ -7,14 +7,6 @@ from time import time from loguru import logger import os - -# Set Mistral flags for CI, if CI environment is setup -if os.getenv("CI") == "true": - os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["WH_ARCH_YAML"] = "wormhole_b0_80_arch_eth_dispatch.yaml" - import ttnn import pytest from models.demos.wormhole.mistral7b.tt.mistral_common import ( @@ -29,7 +21,6 @@ ) from models.demos.wormhole.mistral7b.tt.mistral_model import TtTransformer from models.demos.wormhole.mistral7b.tt.mistral_embedding import TtMistralEmbedding -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.demos.wormhole.mistral7b.reference.tokenizer import Tokenizer @@ -136,6 +127,15 @@ def preprocess_inputs_prefill(input_prompts, tokenizer, model_args, dtype, embd, def run_mistral_demo(user_input, batch_size, device, instruct_mode, is_ci_env): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs + assert batch_size == 32, "Batch size must be 32" embed_on_device = False @@ -348,8 +348,6 @@ def run_mistral_demo(user_input, batch_size, device, instruct_mode, is_ci_env): logger.info("[User {}] {}".format(user, text)) -# Avoid running this test when in CI -@pytest.mark.skipif(os.getenv("CI") == "true", reason="Non-CI tests") @pytest.mark.parametrize( "input_prompts, instruct_weights", [ @@ -359,23 +357,9 @@ def run_mistral_demo(user_input, batch_size, device, instruct_mode, is_ci_env): ids=["general_weights", "instruct_weights"], ) def test_mistral7B_demo(device, use_program_cache, input_prompts, instruct_weights, is_ci_env): - return run_mistral_demo( - user_input=input_prompts, batch_size=32, device=device, instruct_mode=instruct_weights, is_ci_env=is_ci_env - ) + if is_ci_env and instruct_weights == False: + pytest.skip("CI demo test only runs instruct weights (to reduce CI pipeline load)") - -# CI only runs general-weights demo -@pytest.mark.skipif(not os.getenv("CI") == "true", reason="CI-only test") -@pytest.mark.parametrize( - "input_prompts, instruct_weights", - [ - ("models/demos/wormhole/mistral7b/demo/input_data_questions_prefill_128.json", False), - ], - ids=[ - "general_weights", - ], -) -def test_mistral7B_demo_CI(device, use_program_cache, input_prompts, instruct_weights, is_ci_env): return run_mistral_demo( user_input=input_prompts, batch_size=32, device=device, instruct_mode=instruct_weights, is_ci_env=is_ci_env ) diff --git a/models/demos/wormhole/mistral7b/tests/test_mistral_attention.py b/models/demos/wormhole/mistral7b/tests/test_mistral_attention.py index 23417db8ace6..45821b547b1d 100644 --- a/models/demos/wormhole/mistral7b/tests/test_mistral_attention.py +++ b/models/demos/wormhole/mistral7b/tests/test_mistral_attention.py @@ -5,13 +5,6 @@ import pytest from loguru import logger import os - -# Set Mistral flags for CI, if CI environment is setup -if os.getenv("CI") == "true": - os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - import ttnn from models.demos.wormhole.mistral7b.tt.mistral_attention import TtMistralAttention from models.demos.wormhole.mistral7b.tt.mistral_common import ( @@ -19,7 +12,6 @@ prepare_inputs_ttnn, freqs_to_rotation_matrix, ) -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.demos.wormhole.mistral7b.reference.model import Attention from models.utility_functions import ( comp_pcc, @@ -29,11 +21,16 @@ @skip_for_grayskull("Requires wormhole_b0 to run") -@pytest.mark.parametrize( - "iterations", - ((1),), -) -def test_mistral_attention_inference(iterations, device, use_program_cache, reset_seeds): +def test_mistral_attention_inference(device, use_program_cache, reset_seeds, is_ci_env): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs + dtype = ttnn.bfloat8_b pcc = 0.99 @@ -63,7 +60,7 @@ def test_mistral_attention_inference(iterations, device, use_program_cache, rese ) # ttnn.bfloat16 generation_start_pos = 0 - generation_length = iterations + generation_length = 3 all_tests_pass = True tt_model = TtMistralAttention( diff --git a/models/demos/wormhole/mistral7b/tests/test_mistral_attention_prefill.py b/models/demos/wormhole/mistral7b/tests/test_mistral_attention_prefill.py index 4c33c0c9879e..daf13bc1c7c1 100644 --- a/models/demos/wormhole/mistral7b/tests/test_mistral_attention_prefill.py +++ b/models/demos/wormhole/mistral7b/tests/test_mistral_attention_prefill.py @@ -5,13 +5,6 @@ import pytest from loguru import logger import os - -# Set Mistral flags for CI, if CI environment is setup -if os.getenv("CI") == "true": - os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - import ttnn from models.demos.wormhole.mistral7b.tt.mistral_attention import TtMistralAttention from models.demos.wormhole.mistral7b.tt.mistral_common import ( @@ -19,7 +12,6 @@ prepare_inputs_ttnn_prefill, get_rot_transformation_mat, ) -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.demos.wormhole.mistral7b.reference.model import Attention, precompute_freqs_cis from models.utility_functions import ( comp_pcc, @@ -39,11 +31,16 @@ 4096, ), ) -@pytest.mark.parametrize( - "iterations", - ((1),), -) -def test_mistral_attention_inference(iterations, seq_len, device, use_program_cache, reset_seeds): +def test_mistral_attention_inference(seq_len, device, use_program_cache, reset_seeds, is_ci_env): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs + dtype = ttnn.bfloat8_b pcc = 0.99 @@ -68,7 +65,7 @@ def test_mistral_attention_inference(iterations, seq_len, device, use_program_ca memory_config=ttnn.DRAM_MEMORY_CONFIG, ) generation_start_pos = 0 - generation_length = iterations + generation_length = 3 all_tests_pass = True tt_model = TtMistralAttention( diff --git a/models/demos/wormhole/mistral7b/tests/test_mistral_decoder.py b/models/demos/wormhole/mistral7b/tests/test_mistral_decoder.py index 4419c3c11229..df3fc8c9fbf2 100644 --- a/models/demos/wormhole/mistral7b/tests/test_mistral_decoder.py +++ b/models/demos/wormhole/mistral7b/tests/test_mistral_decoder.py @@ -5,13 +5,6 @@ import pytest from loguru import logger import os - -# Set Mistral flags for CI, if CI environment is setup -if os.getenv("CI") == "true": - os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - import ttnn from models.demos.wormhole.mistral7b.tt.mistral_common import ( precompute_freqs, @@ -19,7 +12,6 @@ freqs_to_rotation_matrix, ) from models.demos.wormhole.mistral7b.tt.mistral_decoder import TtTransformerBlock -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.demos.wormhole.mistral7b.reference.model import TransformerBlock from models.utility_functions import ( comp_pcc, @@ -29,11 +21,16 @@ @skip_for_grayskull("Requires wormhole_b0 to run") -@pytest.mark.parametrize( - "iterations", - ((1),), -) -def test_mistral_decoder_inference(device, iterations, use_program_cache, reset_seeds): +def test_mistral_decoder_inference(device, use_program_cache, reset_seeds, is_ci_env): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs + dtype = ttnn.bfloat8_b model_args = TtModelArgs(device) @@ -48,7 +45,7 @@ def test_mistral_decoder_inference(device, iterations, use_program_cache, reset_ reference_model.load_state_dict(partial_state_dict) generation_start_pos = 0 - generation_length = iterations + generation_length = 2 all_tests_pass = True # pre-compute the rotational embedding matrix and send to device diff --git a/models/demos/wormhole/mistral7b/tests/test_mistral_decoder_prefill.py b/models/demos/wormhole/mistral7b/tests/test_mistral_decoder_prefill.py index 7d1c3f4e0216..7644ecd9488e 100644 --- a/models/demos/wormhole/mistral7b/tests/test_mistral_decoder_prefill.py +++ b/models/demos/wormhole/mistral7b/tests/test_mistral_decoder_prefill.py @@ -5,13 +5,6 @@ import pytest from loguru import logger import os - -# Set Mistral flags for CI, if CI environment is setup -if os.getenv("CI") == "true": - os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - import ttnn from models.demos.wormhole.mistral7b.tt.mistral_common import ( get_prefill_rot_mat, @@ -19,7 +12,6 @@ get_rot_transformation_mat, ) from models.demos.wormhole.mistral7b.tt.mistral_decoder import TtTransformerBlock -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.demos.wormhole.mistral7b.reference.model import TransformerBlock, precompute_freqs_cis from models.utility_functions import ( comp_pcc, @@ -39,11 +31,16 @@ 4096, ), ) -@pytest.mark.parametrize( - "iterations", - ((1),), -) -def test_mistral_decoder_inference(device, iterations, seq_len, use_program_cache, reset_seeds): +def test_mistral_decoder_inference(device, seq_len, use_program_cache, reset_seeds, is_ci_env): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs + dtype = ttnn.bfloat8_b model_args = TtModelArgs(device) @@ -58,7 +55,7 @@ def test_mistral_decoder_inference(device, iterations, seq_len, use_program_cach reference_model.load_state_dict(partial_state_dict) generation_start_pos = 0 - generation_length = iterations + generation_length = 2 all_tests_pass = True # pre-compute the rotational embedding matrix and send to device diff --git a/models/demos/wormhole/mistral7b/tests/test_mistral_embedding.py b/models/demos/wormhole/mistral7b/tests/test_mistral_embedding.py index 3dab1fb07465..ca9b81b3b4ed 100644 --- a/models/demos/wormhole/mistral7b/tests/test_mistral_embedding.py +++ b/models/demos/wormhole/mistral7b/tests/test_mistral_embedding.py @@ -5,15 +5,7 @@ import pytest from loguru import logger import os - -# Set Mistral flags for CI, if CI environment is setup -if os.getenv("CI") == "true": - os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - import ttnn -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.demos.wormhole.mistral7b.tt.mistral_embedding import TtMistralEmbedding from models.demos.wormhole.mistral7b.reference.tokenizer import Tokenizer from models.utility_functions import ( @@ -23,7 +15,6 @@ from models.utility_functions import skip_for_grayskull -@skip_for_grayskull("Requires wormhole_b0 to run") class Emb(torch.nn.Module): def __init__(self): super().__init__() @@ -33,7 +24,17 @@ def forward(self, x): return self.emb(x) -def test_mistral_embedding(device, use_program_cache, reset_seeds): +@skip_for_grayskull("Requires wormhole_b0 to run") +def test_mistral_embedding(device, use_program_cache, reset_seeds, is_ci_env): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs + dtype = ttnn.bfloat16 model_args = TtModelArgs(device) diff --git a/models/demos/wormhole/mistral7b/tests/test_mistral_mlp.py b/models/demos/wormhole/mistral7b/tests/test_mistral_mlp.py index 19cce9493ba7..1ef717b65c25 100644 --- a/models/demos/wormhole/mistral7b/tests/test_mistral_mlp.py +++ b/models/demos/wormhole/mistral7b/tests/test_mistral_mlp.py @@ -6,15 +6,7 @@ import pytest from loguru import logger import os - -# Set Mistral flags for CI, if CI environment is setup -if os.getenv("CI") == "true": - os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - import ttnn -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.demos.wormhole.mistral7b.tt.mistral_mlp import TtMistralMLP from models.demos.wormhole.mistral7b.reference.model import FeedForward from models.utility_functions import ( @@ -36,7 +28,16 @@ 4096, ), ) -def test_mistral_mlp_inference(device, seq_len, use_program_cache, reset_seeds): +def test_mistral_mlp_inference(device, seq_len, use_program_cache, reset_seeds, is_ci_env): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs + dtype = ttnn.bfloat8_b model_args = TtModelArgs(device=device) state_dict = torch.load(model_args.consolidated_weights_path) diff --git a/models/demos/wormhole/mistral7b/tests/test_mistral_model.py b/models/demos/wormhole/mistral7b/tests/test_mistral_model.py index fb9fce748b83..0cf9ec3b54b6 100644 --- a/models/demos/wormhole/mistral7b/tests/test_mistral_model.py +++ b/models/demos/wormhole/mistral7b/tests/test_mistral_model.py @@ -5,13 +5,6 @@ import pytest from loguru import logger import os - -# Set Mistral flags for CI, if CI environment is setup -if os.getenv("CI") == "true": - os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - import ttnn from models.demos.wormhole.mistral7b.tt.mistral_common import ( precompute_freqs, @@ -20,7 +13,6 @@ sample, ) from models.demos.wormhole.mistral7b.tt.mistral_model import TtTransformer -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.demos.wormhole.mistral7b.reference.model import Transformer from models.demos.wormhole.mistral7b.reference.tokenizer import Tokenizer from models.utility_functions import ( @@ -41,24 +33,19 @@ def forward(self, x): @skip_for_grayskull("Requires wormhole_b0 to run") @pytest.mark.models_performance_bare_metal -@pytest.mark.parametrize( - "version", - ( - "generative", - # "instruct", # Disabled from testing due to PCC mismatch - ), -) @pytest.mark.parametrize( "iterations", (17,), ) -def test_mistral_model_inference(device, iterations, version, use_program_cache, reset_seeds): - if version == "generative": - instruct = False - elif version == "instruct": - instruct = True - else: - assert "Invalid version. Please use 'generative' or 'instruct'" +def test_mistral_model_inference(device, iterations, use_program_cache, reset_seeds, is_ci_env): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs run_ref_pt = True # Flag to run reference PyTorch model and compare PCC cache_pcc = False # Flag to measure KV cache PCC for all layers @@ -88,11 +75,7 @@ def test_mistral_model_inference(device, iterations, version, use_program_cache, } logger.info("Finished loading weights...") - if instruct: - # The instruct prompts follow the format: [INST] prompt [/INST]. [INST] are strings. is the correspoding bos_id token - prompts = ["[INST] what is the capital of Canada? [/INST]"] * 32 - else: - prompts = ["This is a test"] * 32 + prompts = ["This is a test"] * 32 encoded_prompts = [tokenizer.encode(prompt) for prompt in prompts] diff --git a/models/demos/wormhole/mistral7b/tests/test_mistral_model_prefill.py b/models/demos/wormhole/mistral7b/tests/test_mistral_model_prefill.py index 2e1b2fe88b56..b481ab0c5f6b 100644 --- a/models/demos/wormhole/mistral7b/tests/test_mistral_model_prefill.py +++ b/models/demos/wormhole/mistral7b/tests/test_mistral_model_prefill.py @@ -5,15 +5,6 @@ import pytest from loguru import logger import os - -# Set Mistral flags for CI, if CI environment is setup -if os.getenv("CI") == "true": - os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - # Prefill prompt files too large to keep in repo - os.environ["MISTRAL_REF_OUTPUT_PATH"] = "/mnt/MLPerf/tt_dnn-models/Mistral/Mixtral-8x7B-v0.1/prefill/" - import ttnn from models.demos.wormhole.mistral7b.tt.mistral_common import ( get_prefill_rot_mat, @@ -22,7 +13,6 @@ sample, ) from models.demos.wormhole.mistral7b.tt.mistral_model import TtTransformer -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.demos.wormhole.mistral7b.reference.model import Transformer, precompute_freqs_cis from models.demos.wormhole.mistral7b.reference.tokenizer import Tokenizer from models.utility_functions import ( @@ -53,20 +43,16 @@ def forward(self, x): 4096, ), ) -@pytest.mark.parametrize( - "version", - ( - "generative", - # "instruct", # Disabled from testing due to PCC mismatch - ), -) -def test_mistral_model_inference(device, version, seq_len, use_program_cache, reset_seeds): - if version == "generative": - instruct = False - elif version == "instruct": - instruct = True - else: - assert "Invalid version. Please use 'generative' or 'instruct'" +def test_mistral_model_inference(device, seq_len, use_program_cache, reset_seeds, is_ci_env): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_REF_OUTPUT_PATH"] = "/mnt/MLPerf/tt_dnn-models/Mistral/Mixtral-8x7B-v0.1/prefill/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs run_ref_pt = True # Flag to run reference PyTorch model and compare PCC cache_pcc = False # Flag to measure KV cache PCC for all layers @@ -91,17 +77,14 @@ def test_mistral_model_inference(device, version, seq_len, use_program_cache, re } logger.info("Finished loading weights...") - if instruct: - # The instruct prompts follow the format: [INST] prompt [/INST]. [INST] are strings. is the correspoding bos_id token - prompts = ["[INST] what is the capital of Canada? [/INST]"] * 32 - else: - prompt_file = os.environ["MISTRAL_REF_OUTPUT_PATH"] + "/tale-of-two-cities.txt" - assert os.path.exists( - prompt_file - ), f"Expected prompt file not found: {prompt_file}. Please set the flag 'MISTRAL_REF_OUTPUT_PATH' correctly." - - with open(prompt_file, "r") as f: - prompts = f.read() + prompt_file = os.environ["MISTRAL_REF_OUTPUT_PATH"] + "/tale-of-two-cities.txt" + assert os.path.exists( + prompt_file + ), f"Expected prompt file not found: {prompt_file}. Please set the flag 'MISTRAL_REF_OUTPUT_PATH' correctly." + + with open(prompt_file, "r") as f: + prompts = f.read() + encoded_prompts = tokenizer.encode(prompts)[:seq_len] if run_ref_pt: diff --git a/models/demos/wormhole/mistral7b/tests/test_mistral_perf.py b/models/demos/wormhole/mistral7b/tests/test_mistral_perf.py index c5f08e63863f..d5bbe9eb8228 100644 --- a/models/demos/wormhole/mistral7b/tests/test_mistral_perf.py +++ b/models/demos/wormhole/mistral7b/tests/test_mistral_perf.py @@ -6,13 +6,6 @@ import pytest from loguru import logger import os - -# Set Mistral flags for CI, if CI environment is setup -if os.getenv("CI") == "true": - os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - import ttnn from models.demos.wormhole.mistral7b.tt.mistral_common import ( precompute_freqs, @@ -22,7 +15,6 @@ ) from models.demos.wormhole.mistral7b.tt.mistral_model import TtTransformer from models.demos.wormhole.mistral7b.tt.mistral_embedding import TtMistralEmbedding -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.demos.wormhole.mistral7b.reference.model import Transformer from models.demos.wormhole.mistral7b.reference.tokenizer import Tokenizer @@ -55,13 +47,23 @@ def forward(self, x): ), ) def test_mistral_model_perf( - device, kv_cache_len, expected_compile_time, expected_inference_time, use_program_cache, reset_seeds + device, kv_cache_len, expected_compile_time, expected_inference_time, use_program_cache, reset_seeds, is_ci_env ): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs + dtype = ttnn.bfloat8_b model_args = TtModelArgs(device) tokenizer = Tokenizer(model_args.tokenizer_path) + model_args.n_layers = 1 # Clear global profiler state before starting measurements profiler.clear() @@ -188,28 +190,3 @@ def run_inference(tt_model, tt_embd, embd, encoded_prompts, generation_start_pos # Greedy decode the generated token and pass it back in, this is just a perf test tt_out_tok = sample(tt_output_torch, temperature=0, top_p=1) - - -@skip_for_grayskull("Requires eth connected devices to run") -@pytest.mark.models_device_performance_bare_metal -@pytest.mark.parametrize( - "batch, iterations, expected_perf", - ((32, 17, 0.16),), -) -def test_mistral_perf_device(batch, iterations, expected_perf, reset_seeds): - subdir = "ttnn_mistral7b" - margin = 0.03 - command = f"pytest models/demos/wormhole/mistral7b/tests/test_mistral_model.py::test_mistral_model_inference[{iterations}-generative]" - cols = ["DEVICE FW", "DEVICE KERNEL", "DEVICE BRISC KERNEL"] - - inference_time_key = "AVG DEVICE KERNEL SAMPLES/S" - expected_perf_cols = {inference_time_key: expected_perf} - - post_processed_results = run_device_perf(command, subdir, iterations, cols, batch) - expected_results = check_device_perf(post_processed_results, margin, expected_perf_cols) - prep_device_perf_report( - model_name=f"mistral-7B_{batch}batch", - batch_size=batch, - post_processed_results=post_processed_results, - expected_results=expected_results, - ) diff --git a/models/demos/wormhole/mistral7b/tests/test_mistral_rms_norm.py b/models/demos/wormhole/mistral7b/tests/test_mistral_rms_norm.py index 821d4fdd903e..09b0108aea6f 100644 --- a/models/demos/wormhole/mistral7b/tests/test_mistral_rms_norm.py +++ b/models/demos/wormhole/mistral7b/tests/test_mistral_rms_norm.py @@ -5,15 +5,7 @@ import pytest from loguru import logger import os - -# Set Mistral flags for CI, if CI environment is setup -if os.getenv("CI") == "true": - os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" - import ttnn -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.common.rmsnorm import RMSNorm as TtRMSNorm from models.demos.wormhole.mistral7b.reference.model import RMSNorm as RefRMSNorm from models.utility_functions import ( @@ -24,7 +16,16 @@ @skip_for_grayskull("Requires wormhole_b0 to run") -def test_mistral_rms_norm_inference(device, use_program_cache, reset_seeds): +def test_mistral_rms_norm_inference(device, use_program_cache, reset_seeds, is_ci_env): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs + dtype = ttnn.bfloat8_b model_args = TtModelArgs(device) diff --git a/models/demos/wormhole/mistral7b/tests/test_mistral_torch.py b/models/demos/wormhole/mistral7b/tests/test_mistral_torch.py index 0ec0001e88e7..5154c3090661 100644 --- a/models/demos/wormhole/mistral7b/tests/test_mistral_torch.py +++ b/models/demos/wormhole/mistral7b/tests/test_mistral_torch.py @@ -5,12 +5,12 @@ import pytest import json from pathlib import Path +import os # import ttnn from models.demos.wormhole.mistral7b.tt.mistral_common import ( precompute_freqs, ) -from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs from models.demos.wormhole.mistral7b.reference.model import Transformer from models.demos.wormhole.mistral7b.reference.tokenizer import Tokenizer @@ -26,7 +26,16 @@ def forward(self, x): return self.emb(x) -def test_mistral_torch_inference(): +def test_mistral_torch_inference(is_ci_env): + # Set Mistral flags for CI + if is_ci_env: + os.environ["MISTRAL_CKPT_DIR"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_TOKENIZER_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + os.environ["MISTRAL_CACHE_PATH"] = "/mnt/MLPerf/ttnn/models/demos/mistral7b/" + + # This module requires the env paths above for CI runs + from models.demos.wormhole.mistral7b.tt.model_config import TtModelArgs + iterations = 20 model_args = TtModelArgs(device=None)