diff --git a/conftest.py b/conftest.py index 0c9509261f3..9bf09e252f0 100644 --- a/conftest.py +++ b/conftest.py @@ -37,6 +37,13 @@ def function_level_defaults(reset_seeds): yield +@pytest.fixture(scope="function") +def is_ci_env(): + if os.getenv("CI") == "true": + return True + return False + + @pytest.fixture(scope="session") def model_location_generator(): def model_location_generator_(model_version, model_subdir=""): diff --git a/models/demos/t3000/falcon7b/demo_t3000.py b/models/demos/t3000/falcon7b/demo_t3000.py index cccf2784139..0f7ee60f6e1 100644 --- a/models/demos/t3000/falcon7b/demo_t3000.py +++ b/models/demos/t3000/falcon7b/demo_t3000.py @@ -47,7 +47,14 @@ def test_demo_multichip( all_devices, use_program_cache, async_mode, + is_ci_env, ): + if is_ci_env: + if num_devices != 8 or (not expected_greedy_output_path and not expected_perf_metrics): + pytest.skip("Skipping test in CI since it provides redundant testing") + elif expected_greedy_output_path or expected_perf_metrics: + assert num_devices == 8, "8 devices are expected for perf and greedy output verification" + assert is_wormhole_b0(), "Multi-chip is only supported for Wormhole B0" devices = get_devices_for_t3000(all_devices, num_devices) diff --git a/models/demos/wormhole/falcon7b/demo_wormhole.py b/models/demos/wormhole/falcon7b/demo_wormhole.py index a011ddd2342..c15a47faa72 100644 --- a/models/demos/wormhole/falcon7b/demo_wormhole.py +++ b/models/demos/wormhole/falcon7b/demo_wormhole.py @@ -4,6 +4,7 @@ import pytest from models.demos.falcon7b.demo.demo import run_falcon_demo_kv +from models.utility_functions import is_wormhole_b0 @pytest.mark.parametrize( @@ -42,7 +43,14 @@ def test_demo( get_tt_cache_path, device, use_program_cache, + is_ci_env, ): + if is_ci_env: + if not expected_greedy_output_path and not expected_perf_metrics and not len(user_input) == 1: + pytest.skip("Skipping test in CI since it provides redundant testing") + + assert is_wormhole_b0() + return run_falcon_demo_kv( user_input=user_input, batch_size=32, diff --git a/tests/scripts/single_card/run_demos_single_card_n300_tests.sh b/tests/scripts/single_card/run_demos_single_card_n300_tests.sh index e44a37d1ab9..7294e7cc675 100755 --- a/tests/scripts/single_card/run_demos_single_card_n300_tests.sh +++ b/tests/scripts/single_card/run_demos_single_card_n300_tests.sh @@ -8,11 +8,7 @@ run_n300_falcon7b_tests() { echo "LOG_METAL: Running run_t3000_falcon7b_tests" # Perf verification for 128/1024/2048 seq lens - pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b/demo/input_data.json' models/demos/wormhole/falcon7b/demo_wormhole.py::test_demo[user_input0-perf_mode_128_stochastic_verify] ; fail+=$? - pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b/demo/input_data.json' models/demos/wormhole/falcon7b/demo_wormhole.py::test_demo[user_input0-perf_mode_1024_stochastic_verify] ; fail+=$? - pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b/demo/input_data.json' models/demos/wormhole/falcon7b/demo_wormhole.py::test_demo[user_input0-perf_mode_2048_stochastic_verify] ; fail+=$? - # Output token verification for 32 user prompts - pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b/demo/input_data.json' models/demos/wormhole/falcon7b/demo_wormhole.py::test_demo[user_input0-default_mode_1024_greedy_verify] ; fail+=$? + pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b/demo/input_data.json' models/demos/wormhole/falcon7b/demo_wormhole.py ; fail+=$? # Record the end time end_time=$(date +%s) diff --git a/tests/scripts/t3000/run_t3000_demo_tests.sh b/tests/scripts/t3000/run_t3000_demo_tests.sh index 4ec6fb05bc4..626210e64c7 100755 --- a/tests/scripts/t3000/run_t3000_demo_tests.sh +++ b/tests/scripts/t3000/run_t3000_demo_tests.sh @@ -48,10 +48,7 @@ run_t3000_falcon7b_tests(){ echo "LOG_METAL: Running run_t3000_falcon7b_tests" # Falcon7B demo (perf verification for 128/1024/2048 seq lens and output token verification) - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_128_stochastic_verify] ; fail+=$? - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_1024_stochastic_verify] ; fail+=$? - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_2048_stochastic_verify] ; fail+=$? - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-default_mode_1024_greedy_verify] ; fail+=$? + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py ; fail+=$? # Falcon7B perplexity test (prefill and decode) WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-prefill_seq1024_dram] --timeout=1800 ; fail+=$?