diff --git a/CODEOWNERS b/CODEOWNERS index 55bca38f156a..d91a3c772670 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -140,7 +140,7 @@ models/demos @eyonland @arakhmati @cfjchu @xanderchin models/demos/metal_BERT_large_11 @tt-aho @TT-BrianLiu models/demos/wormhole @uaydonat @eyonland @AleksKnezevic @nsmithtt models/demos/t3000 @uaydonat @AleksKnezevic @nsmithtt -models/demos/falcon7b @skhorasganiTT @djordje-tt @uaydonat @pavlejosipovic @pavlepopovic @s-jovic +models/demos/falcon7b_common @skhorasganiTT @djordje-tt @uaydonat @pavlejosipovic @pavlepopovic @s-jovic models/demos/mamba @esmalTT @uaydonat @kpaigwar models/demos/wormhole/falcon7b @skhorasganiTT @djordje-tt @uaydonat @pavlejosipovic @pavlepopovic @s-jovic models/demos/wormhole/mistral7b @yieldthought @uaydonat @mtairum diff --git a/docs/source/ttnn/ttnn/get_started.rst b/docs/source/ttnn/ttnn/get_started.rst index 82ebc525220b..c9a7adc7322c 100644 --- a/docs/source/ttnn/ttnn/get_started.rst +++ b/docs/source/ttnn/ttnn/get_started.rst @@ -22,7 +22,7 @@ Install and build the project by following the instructions in the ^^^^^^^^^^^^^^^^^^^^^^^^^^ Get started with the Falcon 7B demo. Navigate to the `Falcon 7B demo folder -`_ +`_ for details. You can also check our demos for diff --git a/models/demos/falcon7b/demo/demo.py b/models/demos/falcon7b_common/demo/demo.py similarity index 98% rename from models/demos/falcon7b/demo/demo.py rename to models/demos/falcon7b_common/demo/demo.py index 215238383cb5..130ce854a1cb 100644 --- a/models/demos/falcon7b/demo/demo.py +++ b/models/demos/falcon7b_common/demo/demo.py @@ -12,10 +12,10 @@ import ttnn import tt_lib from loguru import logger -from models.demos.falcon7b.reference.hf_modeling_falcon import FalconConfig -from models.demos.falcon7b.tt.falcon_causallm import TtFalconCausalLM -from models.demos.falcon7b.tt.model_config import get_model_config, model_config_entries -from models.demos.falcon7b.tests.test_utils import initialize_kv_cache, load_hf_model +from models.demos.falcon7b_common.reference.hf_modeling_falcon import FalconConfig +from models.demos.falcon7b_common.tt.falcon_causallm import TtFalconCausalLM +from models.demos.falcon7b_common.tt.model_config import get_model_config, model_config_entries +from models.demos.falcon7b_common.tests.test_utils import initialize_kv_cache, load_hf_model from models.demos.utils.llm_demo_utils import create_benchmark_data, verify_perf, check_tokens_match from models.utility_functions import ( disable_compilation_reports, diff --git a/models/demos/falcon7b/demo/input_data.json b/models/demos/falcon7b_common/demo/input_data.json similarity index 100% rename from models/demos/falcon7b/demo/input_data.json rename to models/demos/falcon7b_common/demo/input_data.json diff --git a/models/demos/falcon7b/reference/cpu_demo.py b/models/demos/falcon7b_common/reference/cpu_demo.py similarity index 98% rename from models/demos/falcon7b/reference/cpu_demo.py rename to models/demos/falcon7b_common/reference/cpu_demo.py index ffe4cd95060f..31aa36a87e85 100644 --- a/models/demos/falcon7b/reference/cpu_demo.py +++ b/models/demos/falcon7b_common/reference/cpu_demo.py @@ -9,7 +9,7 @@ from transformers import AutoTokenizer -from models.falcon7b.reference.hf_modeling_falcon import FalconForCausalLM +from models.demos.falcon7b_common.reference.hf_modeling_falcon import FalconForCausalLM import time falcon1b = "tiiuae/falcon-rw-1b" diff --git a/models/demos/falcon7b/reference/hf_configuration_falcon.py b/models/demos/falcon7b_common/reference/hf_configuration_falcon.py similarity index 100% rename from models/demos/falcon7b/reference/hf_configuration_falcon.py rename to models/demos/falcon7b_common/reference/hf_configuration_falcon.py diff --git a/models/demos/falcon7b/reference/hf_modeling_falcon.py b/models/demos/falcon7b_common/reference/hf_modeling_falcon.py similarity index 100% rename from models/demos/falcon7b/reference/hf_modeling_falcon.py rename to models/demos/falcon7b_common/reference/hf_modeling_falcon.py diff --git a/models/demos/falcon7b/tests/perplexity/run_perplexity_falcon.py b/models/demos/falcon7b_common/tests/perplexity/run_perplexity_falcon.py similarity index 97% rename from models/demos/falcon7b/tests/perplexity/run_perplexity_falcon.py rename to models/demos/falcon7b_common/tests/perplexity/run_perplexity_falcon.py index cd6464d19d4a..f281f932be1f 100644 --- a/models/demos/falcon7b/tests/perplexity/run_perplexity_falcon.py +++ b/models/demos/falcon7b_common/tests/perplexity/run_perplexity_falcon.py @@ -9,9 +9,9 @@ import time import numpy as np import ttnn -from models.demos.falcon7b.tt.falcon_causallm import TtFalconCausalLM -from models.demos.falcon7b.tt.model_config import get_model_config -from models.demos.falcon7b.tests.test_utils import initialize_kv_cache, load_hf_model +from models.demos.falcon7b_common.tt.falcon_causallm import TtFalconCausalLM +from models.demos.falcon7b_common.tt.model_config import get_model_config +from models.demos.falcon7b_common.tests.test_utils import initialize_kv_cache, load_hf_model from models.datasets.llm_dataset_utils import ( prepare_textgen_dataset, prepare_textgen_dataloader, diff --git a/models/demos/falcon7b/tests/perplexity/test_perplexity_falcon.py b/models/demos/falcon7b_common/tests/perplexity/test_perplexity_falcon.py similarity index 94% rename from models/demos/falcon7b/tests/perplexity/test_perplexity_falcon.py rename to models/demos/falcon7b_common/tests/perplexity/test_perplexity_falcon.py index 1d74e0f10316..63b28cfa03be 100644 --- a/models/demos/falcon7b/tests/perplexity/test_perplexity_falcon.py +++ b/models/demos/falcon7b_common/tests/perplexity/test_perplexity_falcon.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import pytest -from models.demos.falcon7b.tests.perplexity.run_perplexity_falcon import run_test_perplexity +from models.demos.falcon7b_common.tests.perplexity.run_perplexity_falcon import run_test_perplexity from models.utility_functions import is_wormhole_b0 diff --git a/models/demos/falcon7b/tests/perplexity/test_perplexity_falcon_ref.py b/models/demos/falcon7b_common/tests/perplexity/test_perplexity_falcon_ref.py similarity index 92% rename from models/demos/falcon7b/tests/perplexity/test_perplexity_falcon_ref.py rename to models/demos/falcon7b_common/tests/perplexity/test_perplexity_falcon_ref.py index 53d21e2d3c75..134235c7d457 100644 --- a/models/demos/falcon7b/tests/perplexity/test_perplexity_falcon_ref.py +++ b/models/demos/falcon7b_common/tests/perplexity/test_perplexity_falcon_ref.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import pytest -from models.demos.falcon7b.tests.perplexity.run_perplexity_falcon import run_test_perplexity +from models.demos.falcon7b_common.tests.perplexity.run_perplexity_falcon import run_test_perplexity @pytest.mark.parametrize( diff --git a/models/demos/falcon7b/tests/run_falcon_end_to_end.py b/models/demos/falcon7b_common/tests/run_falcon_end_to_end.py similarity index 98% rename from models/demos/falcon7b/tests/run_falcon_end_to_end.py rename to models/demos/falcon7b_common/tests/run_falcon_end_to_end.py index 4a483ef77ca9..856c262c344f 100644 --- a/models/demos/falcon7b/tests/run_falcon_end_to_end.py +++ b/models/demos/falcon7b_common/tests/run_falcon_end_to_end.py @@ -9,13 +9,13 @@ from sklearn.metrics import top_k_accuracy_score import tt_lib -from models.demos.falcon7b.tt.falcon_causallm import TtFalconCausalLM +from models.demos.falcon7b_common.tt.falcon_causallm import TtFalconCausalLM -from models.demos.falcon7b.tt.falcon_common import ( +from models.demos.falcon7b_common.tt.falcon_common import ( PytorchFalconCausalLM, ) -from models.demos.falcon7b.tests.test_utils import ( +from models.demos.falcon7b_common.tests.test_utils import ( get_rand_falcon_inputs, concat_device_out_layer_present, load_hf_model, diff --git a/models/demos/falcon7b/tests/test_falcon_attention.py b/models/demos/falcon7b_common/tests/test_falcon_attention.py similarity index 95% rename from models/demos/falcon7b/tests/test_falcon_attention.py rename to models/demos/falcon7b_common/tests/test_falcon_attention.py index 24bf8ce3caf0..a0acfba0b064 100644 --- a/models/demos/falcon7b/tests/test_falcon_attention.py +++ b/models/demos/falcon7b_common/tests/test_falcon_attention.py @@ -6,9 +6,9 @@ import pytest from loguru import logger -from models.demos.falcon7b.tt.falcon_attention import TtFalconAttentionDecode, TtFalconAttentionPrefill -from models.demos.falcon7b.tt.model_config import get_model_config -from models.demos.falcon7b.tests.test_utils import get_rand_falcon_inputs, concat_device_outputs, load_hf_model +from models.demos.falcon7b_common.tt.falcon_attention import TtFalconAttentionDecode, TtFalconAttentionPrefill +from models.demos.falcon7b_common.tt.model_config import get_model_config +from models.demos.falcon7b_common.tests.test_utils import get_rand_falcon_inputs, concat_device_outputs, load_hf_model from tests.tt_eager.python_api_testing.sweep_tests.comparison_funcs import ( comp_pcc, ) diff --git a/models/demos/falcon7b/tests/test_falcon_causallm.py b/models/demos/falcon7b_common/tests/test_falcon_causallm.py similarity index 97% rename from models/demos/falcon7b/tests/test_falcon_causallm.py rename to models/demos/falcon7b_common/tests/test_falcon_causallm.py index f6b990efcc62..d6fa93670c78 100644 --- a/models/demos/falcon7b/tests/test_falcon_causallm.py +++ b/models/demos/falcon7b_common/tests/test_falcon_causallm.py @@ -6,12 +6,12 @@ import pytest from loguru import logger -from models.demos.falcon7b.tt.falcon_causallm import TtFalconCausalLM +from models.demos.falcon7b_common.tt.falcon_causallm import TtFalconCausalLM -from models.demos.falcon7b.tt.model_config import ( +from models.demos.falcon7b_common.tt.model_config import ( get_model_config, ) -from models.demos.falcon7b.tests.test_utils import ( +from models.demos.falcon7b_common.tests.test_utils import ( get_rand_falcon_inputs, concat_device_out_layer_present, load_hf_model, diff --git a/models/demos/falcon7b/tests/test_falcon_decoder.py b/models/demos/falcon7b_common/tests/test_falcon_decoder.py similarity index 95% rename from models/demos/falcon7b/tests/test_falcon_decoder.py rename to models/demos/falcon7b_common/tests/test_falcon_decoder.py index 6f0d2a6cdfac..f6bfcc46ecfb 100644 --- a/models/demos/falcon7b/tests/test_falcon_decoder.py +++ b/models/demos/falcon7b_common/tests/test_falcon_decoder.py @@ -6,9 +6,9 @@ import pytest from loguru import logger -from models.demos.falcon7b.tt.falcon_decoder import TtFalconDecoderLayer -from models.demos.falcon7b.tt.model_config import get_model_config -from models.demos.falcon7b.tests.test_utils import get_rand_falcon_inputs, concat_device_outputs, load_hf_model +from models.demos.falcon7b_common.tt.falcon_decoder import TtFalconDecoderLayer +from models.demos.falcon7b_common.tt.model_config import get_model_config +from models.demos.falcon7b_common.tests.test_utils import get_rand_falcon_inputs, concat_device_outputs, load_hf_model from tests.tt_eager.python_api_testing.sweep_tests.comparison_funcs import ( comp_pcc, ) diff --git a/models/demos/falcon7b/tests/test_falcon_device_perf.py b/models/demos/falcon7b_common/tests/test_falcon_device_perf.py similarity index 91% rename from models/demos/falcon7b/tests/test_falcon_device_perf.py rename to models/demos/falcon7b_common/tests/test_falcon_device_perf.py index 3f10f6c073df..3cb167dd0ae0 100644 --- a/models/demos/falcon7b/tests/test_falcon_device_perf.py +++ b/models/demos/falcon7b_common/tests/test_falcon_device_perf.py @@ -4,13 +4,13 @@ import pytest -from models.demos.falcon7b.tests.run_falcon_end_to_end import ( +from models.demos.falcon7b_common.tests.run_falcon_end_to_end import ( DECODE_CONFIG_TO_PCC, PREFILL_CONFIG_TO_PCC, DeviceSetup, run_test_FalconCausalLM_end_to_end, ) -from models.demos.falcon7b.tt.model_config import get_model_config +from models.demos.falcon7b_common.tt.model_config import get_model_config from models.perf.device_perf_utils import check_device_perf, prep_device_perf_report, run_device_perf from models.utility_functions import disable_compilation_reports, disable_persistent_kernel_cache @@ -85,7 +85,7 @@ def test_device_perf(seq_len, samples): margin = 0.03 num_iterations = 1 model_config = "BFLOAT16-DRAM".lower() - command = f"pytest models/demos/falcon7b/tests/test_falcon_device_perf.py::test_device_perf_wh_bare_metal -k prefill_seq{seq_len}_{model_config}" + command = f"pytest models/demos/falcon7b_common/tests/test_falcon_device_perf.py::test_device_perf_wh_bare_metal -k prefill_seq{seq_len}_{model_config}" cols = ["DEVICE FW", "DEVICE KERNEL", "DEVICE BRISC KERNEL"] subdir = "falcon7b" diff --git a/models/demos/falcon7b/tests/test_falcon_end_to_end.py b/models/demos/falcon7b_common/tests/test_falcon_end_to_end.py similarity index 97% rename from models/demos/falcon7b/tests/test_falcon_end_to_end.py rename to models/demos/falcon7b_common/tests/test_falcon_end_to_end.py index cc39d8d8b132..bafe9f191d82 100644 --- a/models/demos/falcon7b/tests/test_falcon_end_to_end.py +++ b/models/demos/falcon7b_common/tests/test_falcon_end_to_end.py @@ -7,16 +7,16 @@ import torch import tt_lib from loguru import logger -from models.demos.falcon7b.tests.test_utils import ( +from models.demos.falcon7b_common.tests.test_utils import ( concat_device_out_layer_present, get_rand_falcon_inputs, load_hf_model, ) -from models.demos.falcon7b.tt.falcon_causallm import TtFalconCausalLM +from models.demos.falcon7b_common.tt.falcon_causallm import TtFalconCausalLM # TODO: Remove this? -from models.demos.falcon7b.tt.falcon_common import PytorchFalconCausalLM -from models.demos.falcon7b.tt.model_config import get_model_config +from models.demos.falcon7b_common.tt.falcon_common import PytorchFalconCausalLM +from models.demos.falcon7b_common.tt.model_config import get_model_config from models.utility_functions import ( disable_compilation_reports, disable_persistent_kernel_cache, diff --git a/models/demos/falcon7b/tests/test_falcon_mlp.py b/models/demos/falcon7b_common/tests/test_falcon_mlp.py similarity index 94% rename from models/demos/falcon7b/tests/test_falcon_mlp.py rename to models/demos/falcon7b_common/tests/test_falcon_mlp.py index a82386ad5ac1..4a23ed3c6e0a 100644 --- a/models/demos/falcon7b/tests/test_falcon_mlp.py +++ b/models/demos/falcon7b_common/tests/test_falcon_mlp.py @@ -5,9 +5,9 @@ import pytest import torch from loguru import logger -from models.demos.falcon7b.tt.falcon_mlp import TtFalconMLPDecode, TtFalconMLPPrefill -from models.demos.falcon7b.tt.model_config import get_model_config -from models.demos.falcon7b.tests.test_utils import load_hf_model +from models.demos.falcon7b_common.tt.falcon_mlp import TtFalconMLPDecode, TtFalconMLPPrefill +from models.demos.falcon7b_common.tt.model_config import get_model_config +from models.demos.falcon7b_common.tests.test_utils import load_hf_model from models.utility_functions import get_devices_for_t3000, torch2tt_tensor, tt2torch_tensor from tests.tt_eager.python_api_testing.sweep_tests.comparison_funcs import comp_allclose, comp_pcc diff --git a/models/demos/falcon7b/tests/test_falcon_model.py b/models/demos/falcon7b_common/tests/test_falcon_model.py similarity index 97% rename from models/demos/falcon7b/tests/test_falcon_model.py rename to models/demos/falcon7b_common/tests/test_falcon_model.py index 2a560c48397b..1336b2371be4 100644 --- a/models/demos/falcon7b/tests/test_falcon_model.py +++ b/models/demos/falcon7b_common/tests/test_falcon_model.py @@ -5,11 +5,11 @@ import torch import pytest from loguru import logger -from models.demos.falcon7b.tt.falcon_model import TtFalconModel -from models.demos.falcon7b.tt.model_config import ( +from models.demos.falcon7b_common.tt.falcon_model import TtFalconModel +from models.demos.falcon7b_common.tt.model_config import ( get_model_config, ) -from models.demos.falcon7b.tests.test_utils import ( +from models.demos.falcon7b_common.tests.test_utils import ( get_rand_falcon_inputs, concat_device_out_layer_present, load_hf_model, diff --git a/models/demos/falcon7b/tests/test_falcon_prefill_decode.py b/models/demos/falcon7b_common/tests/test_falcon_prefill_decode.py similarity index 97% rename from models/demos/falcon7b/tests/test_falcon_prefill_decode.py rename to models/demos/falcon7b_common/tests/test_falcon_prefill_decode.py index 78afa23a2211..d879fdbc3d3a 100644 --- a/models/demos/falcon7b/tests/test_falcon_prefill_decode.py +++ b/models/demos/falcon7b_common/tests/test_falcon_prefill_decode.py @@ -6,12 +6,12 @@ import pytest from loguru import logger -from models.demos.falcon7b.tt.falcon_causallm import TtFalconCausalLM +from models.demos.falcon7b_common.tt.falcon_causallm import TtFalconCausalLM -from models.demos.falcon7b.tt.model_config import ( +from models.demos.falcon7b_common.tt.model_config import ( get_model_config, ) -from models.demos.falcon7b.tests.test_utils import load_hf_model +from models.demos.falcon7b_common.tests.test_utils import load_hf_model from tests.tt_eager.python_api_testing.sweep_tests.comparison_funcs import ( comp_allclose, diff --git a/models/demos/falcon7b/tests/test_perf_falcon.py b/models/demos/falcon7b_common/tests/test_perf_falcon.py similarity index 98% rename from models/demos/falcon7b/tests/test_perf_falcon.py rename to models/demos/falcon7b_common/tests/test_perf_falcon.py index 491808aa7382..4f8d2da64fb6 100644 --- a/models/demos/falcon7b/tests/test_perf_falcon.py +++ b/models/demos/falcon7b_common/tests/test_perf_falcon.py @@ -4,13 +4,13 @@ import pytest -from models.demos.falcon7b.tests.run_falcon_end_to_end import ( +from models.demos.falcon7b_common.tests.run_falcon_end_to_end import ( DECODE_CONFIG_TO_PCC, PREFILL_CONFIG_TO_PCC, DeviceSetup, run_test_FalconCausalLM_end_to_end, ) -from models.demos.falcon7b.tt.model_config import ( +from models.demos.falcon7b_common.tt.model_config import ( get_model_config, ) diff --git a/models/demos/falcon7b/tests/test_utils.py b/models/demos/falcon7b_common/tests/test_utils.py similarity index 99% rename from models/demos/falcon7b/tests/test_utils.py rename to models/demos/falcon7b_common/tests/test_utils.py index 6fc4dd3f9f63..c920107cb827 100644 --- a/models/demos/falcon7b/tests/test_utils.py +++ b/models/demos/falcon7b_common/tests/test_utils.py @@ -4,7 +4,7 @@ import torch import ttnn -from models.demos.falcon7b.reference.hf_modeling_falcon import FalconForCausalLM +from models.demos.falcon7b_common.reference.hf_modeling_falcon import FalconForCausalLM from models.utility_functions import torch2tt_tensor, tt2torch_tensor diff --git a/models/demos/falcon7b/tests/unit_tests/test_falcon_attn_matmul.py b/models/demos/falcon7b_common/tests/unit_tests/test_falcon_attn_matmul.py similarity index 100% rename from models/demos/falcon7b/tests/unit_tests/test_falcon_attn_matmul.py rename to models/demos/falcon7b_common/tests/unit_tests/test_falcon_attn_matmul.py diff --git a/models/demos/falcon7b/tests/unit_tests/test_falcon_lm_head_matmul_2d.py b/models/demos/falcon7b_common/tests/unit_tests/test_falcon_lm_head_matmul_2d.py similarity index 97% rename from models/demos/falcon7b/tests/unit_tests/test_falcon_lm_head_matmul_2d.py rename to models/demos/falcon7b_common/tests/unit_tests/test_falcon_lm_head_matmul_2d.py index fa8a543cf77c..96278c058a95 100644 --- a/models/demos/falcon7b/tests/unit_tests/test_falcon_lm_head_matmul_2d.py +++ b/models/demos/falcon7b_common/tests/unit_tests/test_falcon_lm_head_matmul_2d.py @@ -8,7 +8,7 @@ import ttnn -from models.demos.falcon7b.tt.falcon_lm_head import falcon_lm_head_matmul_2d +from models.demos.falcon7b_common.tt.falcon_lm_head import falcon_lm_head_matmul_2d from models.utility_functions import comp_pcc, tt2torch_tensor, torch2tt_tensor diff --git a/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py b/models/demos/falcon7b_common/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py similarity index 99% rename from models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py rename to models/demos/falcon7b_common/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py index f10e564f70e0..7c8493231119 100644 --- a/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py +++ b/models/demos/falcon7b_common/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py @@ -7,9 +7,9 @@ from loguru import logger import ttnn -from models.demos.falcon7b.tt.falcon_causallm import falcon_lm_head_matmul -from models.demos.falcon7b.tt.falcon_mlp import falcon_dense_4h_to_h_matmul, falcon_dense_h_to_4h_matmul -from models.demos.falcon7b.tt.model_utils import get_falcon_default_core_grid +from models.demos.falcon7b_common.tt.falcon_causallm import falcon_lm_head_matmul +from models.demos.falcon7b_common.tt.falcon_mlp import falcon_dense_4h_to_h_matmul, falcon_dense_h_to_4h_matmul +from models.demos.falcon7b_common.tt.model_utils import get_falcon_default_core_grid from models.utility_functions import comp_pcc, tt2torch_tensor, torch2tt_tensor, skip_for_wormhole_b0 import torch import math diff --git a/models/demos/falcon7b/tt/falcon_attention.py b/models/demos/falcon7b_common/tt/falcon_attention.py similarity index 99% rename from models/demos/falcon7b/tt/falcon_attention.py rename to models/demos/falcon7b_common/tt/falcon_attention.py index 53f892644b2b..f66a02dbfbe3 100644 --- a/models/demos/falcon7b/tt/falcon_attention.py +++ b/models/demos/falcon7b_common/tt/falcon_attention.py @@ -7,7 +7,7 @@ from torch import nn from typing import List, Optional, Tuple -from models.demos.falcon7b.tt.model_utils import get_falcon_default_core_grid +from models.demos.falcon7b_common.tt.model_utils import get_falcon_default_core_grid import ttnn from models.utility_functions import ( @@ -17,7 +17,7 @@ is_wormhole_b0, ) -from models.demos.falcon7b.tt.model_utils import get_weights_cached +from models.demos.falcon7b_common.tt.model_utils import get_weights_cached from models.utility_functions import torch_tensors_to_tt_tensors diff --git a/models/demos/falcon7b/tt/falcon_causallm.py b/models/demos/falcon7b_common/tt/falcon_causallm.py similarity index 96% rename from models/demos/falcon7b/tt/falcon_causallm.py rename to models/demos/falcon7b_common/tt/falcon_causallm.py index 91339c4a5786..ae228312cc15 100644 --- a/models/demos/falcon7b/tt/falcon_causallm.py +++ b/models/demos/falcon7b_common/tt/falcon_causallm.py @@ -6,9 +6,9 @@ import torch import ttnn -from models.demos.falcon7b.tt.falcon_lm_head import falcon_lm_head_matmul_2d -from models.demos.falcon7b.tt.falcon_model import TtFalconModelShared -from models.demos.falcon7b.tt.model_utils import get_falcon_default_core_grid, get_weights_cached +from models.demos.falcon7b_common.tt.falcon_lm_head import falcon_lm_head_matmul_2d +from models.demos.falcon7b_common.tt.falcon_model import TtFalconModelShared +from models.demos.falcon7b_common.tt.model_utils import get_falcon_default_core_grid, get_weights_cached from models.utility_functions import ( is_grayskull, is_wormhole_b0, diff --git a/models/demos/falcon7b/tt/falcon_common.py b/models/demos/falcon7b_common/tt/falcon_common.py similarity index 100% rename from models/demos/falcon7b/tt/falcon_common.py rename to models/demos/falcon7b_common/tt/falcon_common.py diff --git a/models/demos/falcon7b/tt/falcon_decoder.py b/models/demos/falcon7b_common/tt/falcon_decoder.py similarity index 95% rename from models/demos/falcon7b/tt/falcon_decoder.py rename to models/demos/falcon7b_common/tt/falcon_decoder.py index fac3f9b45541..00d2b2cecace 100644 --- a/models/demos/falcon7b/tt/falcon_decoder.py +++ b/models/demos/falcon7b_common/tt/falcon_decoder.py @@ -6,9 +6,9 @@ import torch import ttnn -from models.demos.falcon7b.tt.falcon_attention import TtFalconAttentionDecode, TtFalconAttentionPrefill -from models.demos.falcon7b.tt.falcon_mlp import TtFalconMLPDecode, TtFalconMLPPrefill -from models.demos.falcon7b.tt.model_utils import get_weights_cached, layernorm +from models.demos.falcon7b_common.tt.falcon_attention import TtFalconAttentionDecode, TtFalconAttentionPrefill +from models.demos.falcon7b_common.tt.falcon_mlp import TtFalconMLPDecode, TtFalconMLPPrefill +from models.demos.falcon7b_common.tt.model_utils import get_weights_cached, layernorm from torch import nn diff --git a/models/demos/falcon7b/tt/falcon_lm_head.py b/models/demos/falcon7b_common/tt/falcon_lm_head.py similarity index 100% rename from models/demos/falcon7b/tt/falcon_lm_head.py rename to models/demos/falcon7b_common/tt/falcon_lm_head.py diff --git a/models/demos/falcon7b/tt/falcon_mlp.py b/models/demos/falcon7b_common/tt/falcon_mlp.py similarity index 99% rename from models/demos/falcon7b/tt/falcon_mlp.py rename to models/demos/falcon7b_common/tt/falcon_mlp.py index ec77115b4035..6ce6c6991e83 100644 --- a/models/demos/falcon7b/tt/falcon_mlp.py +++ b/models/demos/falcon7b_common/tt/falcon_mlp.py @@ -4,7 +4,7 @@ import torch import ttnn -from models.demos.falcon7b.tt.model_utils import get_falcon_default_core_grid, get_weights_cached +from models.demos.falcon7b_common.tt.model_utils import get_falcon_default_core_grid, get_weights_cached from torch import nn from models.utility_functions import ( is_grayskull, diff --git a/models/demos/falcon7b/tt/falcon_model.py b/models/demos/falcon7b_common/tt/falcon_model.py similarity index 98% rename from models/demos/falcon7b/tt/falcon_model.py rename to models/demos/falcon7b_common/tt/falcon_model.py index 6c4a42c06924..23a2d62dcb02 100644 --- a/models/demos/falcon7b/tt/falcon_model.py +++ b/models/demos/falcon7b_common/tt/falcon_model.py @@ -10,10 +10,10 @@ import tt_lib -from models.demos.falcon7b.tt.falcon_decoder import TtFalconDecoderLayer -from models.demos.falcon7b.tt.model_utils import get_weights_cached, layernorm +from models.demos.falcon7b_common.tt.falcon_decoder import TtFalconDecoderLayer +from models.demos.falcon7b_common.tt.model_utils import get_weights_cached, layernorm from models.utility_functions import nearest_32, torch_tensors_to_tt_tensors -from models.demos.falcon7b.tests.test_utils import create_prefill_attn_mask_for_sharded_softmax +from models.demos.falcon7b_common.tests.test_utils import create_prefill_attn_mask_for_sharded_softmax from tqdm import tqdm diff --git a/models/demos/falcon7b/tt/model_config.py b/models/demos/falcon7b_common/tt/model_config.py similarity index 99% rename from models/demos/falcon7b/tt/model_config.py rename to models/demos/falcon7b_common/tt/model_config.py index 718932c2220c..93c1c6844bfa 100644 --- a/models/demos/falcon7b/tt/model_config.py +++ b/models/demos/falcon7b_common/tt/model_config.py @@ -195,7 +195,7 @@ def get_model_config(model_config_str, prefill_seq_len=0, decode_batch_size=32): "DEFAULT_DTYPE": dtype, "DEFAULT_MEMCFG": mem_config, "MOVE_DECODER_OUTPUT_BOOL": False, - "DEFAULT_CACHE_PATH": Path(f"models/demos/falcon7b/datasets/"), + "DEFAULT_CACHE_PATH": Path(f"models/demos/falcon7b_common/datasets/"), } # DEFAULT_MEMCFG also used to determine banking for ttl.device.InitializeDevice model_config.update({f"{key}_MEMCFG": mem_config for key in OP_KEYS if key not in NO_MEMCFG}) model_config.update({f"{key}_DTYPE": dtype for key in OP_KEYS if key not in NO_DTYPE}) diff --git a/models/demos/falcon7b/tt/model_utils.py b/models/demos/falcon7b_common/tt/model_utils.py similarity index 100% rename from models/demos/falcon7b/tt/model_utils.py rename to models/demos/falcon7b_common/tt/model_utils.py diff --git a/models/demos/grayskull/falcon7b/README.md b/models/demos/grayskull/falcon7b/README.md index 663c669895ff..5a3abba2d5ec 100644 --- a/models/demos/grayskull/falcon7b/README.md +++ b/models/demos/grayskull/falcon7b/README.md @@ -8,7 +8,7 @@ To run the model for a single user you can use the command line input: To run the demo using prewritten prompts for a batch of 32 users run (currently only supports same token-length inputs): -`pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b/demo/input_data.json' models/demos/grayskull/falcon7b/demo_grayskull.py::test_demo[user_input0-default_mode_stochastic]` +`pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b_common/demo/input_data.json' models/demos/grayskull/falcon7b/demo_grayskull.py::test_demo[user_input0-default_mode_stochastic]` ## Inputs diff --git a/models/demos/grayskull/falcon7b/demo_grayskull.py b/models/demos/grayskull/falcon7b/demo_grayskull.py index e260989c8d3f..a68d06481a04 100644 --- a/models/demos/grayskull/falcon7b/demo_grayskull.py +++ b/models/demos/grayskull/falcon7b/demo_grayskull.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import pytest -from models.demos.falcon7b.demo.demo import run_falcon_demo_kv +from models.demos.falcon7b_common.demo.demo import run_falcon_demo_kv @pytest.mark.parametrize( diff --git a/models/demos/t3000/falcon7b/demo_t3000.py b/models/demos/t3000/falcon7b/demo_t3000.py index 0f7ee60f6e1d..3f2b33b29fcb 100644 --- a/models/demos/t3000/falcon7b/demo_t3000.py +++ b/models/demos/t3000/falcon7b/demo_t3000.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import pytest -from models.demos.falcon7b.demo.demo import run_falcon_demo_kv +from models.demos.falcon7b_common.demo.demo import run_falcon_demo_kv from models.utility_functions import is_wormhole_b0, get_devices_for_t3000 diff --git a/models/demos/wormhole/falcon7b/README.md b/models/demos/wormhole/falcon7b/README.md index 63fa0b466119..d135215be5f0 100644 --- a/models/demos/wormhole/falcon7b/README.md +++ b/models/demos/wormhole/falcon7b/README.md @@ -14,7 +14,7 @@ To run the model for a single user you can use the command line input: To run the demo using prewritten prompts for a batch of 32 users run (currently only supports same token-length inputs): -`pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b/demo/input_data.json' models/demos/wormhole/falcon7b/demo_wormhole.py::test_demo[user_input0-default_mode_1024_stochastic]` +`pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b_common/demo/input_data.json' models/demos/wormhole/falcon7b/demo_wormhole.py::test_demo[user_input0-default_mode_1024_stochastic]` - **Decoding method**: The default decoding method is top-k/top-p (stochastic) sampling, however greedy decoding can also be used by replacing `stochastic` with `greedy` in the command above. @@ -22,7 +22,7 @@ To run the demo using prewritten prompts for a batch of 32 users run (currently To measure the performance of generating the `i`'th token while the KV cache is filled with `i-1` rows (where `i` is 128 in the command below): -`pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b/demo/input_data.json' models/demos/wormhole/falcon7b/demo_wormhole.py::test_demo[user_input0-perf_mode_128_stochastic]` +`pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b_common/demo/input_data.json' models/demos/wormhole/falcon7b/demo_wormhole.py::test_demo[user_input0-perf_mode_128_stochastic]` - **Supported sequence lengths**: Currently `i` can only be set to 128, 1024, or 2048 for performance measurement mode. diff --git a/models/demos/wormhole/falcon7b/demo_wormhole.py b/models/demos/wormhole/falcon7b/demo_wormhole.py index c15a47faa726..78cea74678a1 100644 --- a/models/demos/wormhole/falcon7b/demo_wormhole.py +++ b/models/demos/wormhole/falcon7b/demo_wormhole.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import pytest -from models.demos.falcon7b.demo.demo import run_falcon_demo_kv +from models.demos.falcon7b_common.demo.demo import run_falcon_demo_kv from models.utility_functions import is_wormhole_b0 diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh index d220d10c102f..a4fa77ea44f7 100755 --- a/tests/scripts/run_performance.sh +++ b/tests/scripts/run_performance.sh @@ -31,7 +31,7 @@ run_perf_models_llm_javelin() { local tt_arch=$1 local test_marker=$2 - env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests -m $test_marker + env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b_common/tests -m $test_marker if [ "$tt_arch" == "wormhole_b0" ]; then env pytest -n auto models/demos/mamba/tests -m $test_marker --timeout=360 @@ -85,7 +85,7 @@ run_device_perf_models() { env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/metal_BERT_large_11/tests -m $test_marker #env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/functional_unet/tests -m $test_marker - env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests -m $test_marker + env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b_common/tests -m $test_marker fi ## Merge all the generated reports diff --git a/tests/scripts/run_python_model_tests.sh b/tests/scripts/run_python_model_tests.sh index 25cbf8a5d688..37a04770d3e8 100755 --- a/tests/scripts/run_python_model_tests.sh +++ b/tests/scripts/run_python_model_tests.sh @@ -42,17 +42,17 @@ if [ "$ARCH_NAME" != "wormhole_b0" ]; then pytest $TT_METAL_HOME/models/demos/resnet/tests/test_resnet18.py pytest $TT_METAL_HOME/tests/ttnn/integration_tests/resnet/test_ttnn_functional_resnet50_new.py -k "pretrained_weight_false" # Falcon tests - pytest $TT_METAL_HOME/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py -k "seq_len_128 and in0_BFLOAT16-in1_BFLOAT8_B-out_BFLOAT16-weights_DRAM" - pytest $TT_METAL_HOME/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py -k "seq_len_512 and in0_BFLOAT16-in1_BFLOAT8_B-out_BFLOAT16-weights_DRAM" - pytest $TT_METAL_HOME/models/demos/falcon7b/tests/unit_tests/test_falcon_attn_matmul.py + pytest $TT_METAL_HOME/models/demos/falcon7b_common/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py -k "seq_len_128 and in0_BFLOAT16-in1_BFLOAT8_B-out_BFLOAT16-weights_DRAM" + pytest $TT_METAL_HOME/models/demos/falcon7b_common/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py -k "seq_len_512 and in0_BFLOAT16-in1_BFLOAT8_B-out_BFLOAT16-weights_DRAM" + pytest $TT_METAL_HOME/models/demos/falcon7b_common/tests/unit_tests/test_falcon_attn_matmul.py else # wormhole_b0 # Falcon tests # attn_matmul_from_cache is currently not used in falcon7b - pytest $TT_METAL_HOME/models/demos/falcon7b/tests/unit_tests/test_falcon_attn_matmul.py -k "not attn_matmul_from_cache" + pytest $TT_METAL_HOME/models/demos/falcon7b_common/tests/unit_tests/test_falcon_attn_matmul.py -k "not attn_matmul_from_cache" # higher sequence lengths and different formats trigger memory issues - pytest $TT_METAL_HOME/models/demos/falcon7b/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py -k "seq_len_128 and in0_BFLOAT16-in1_BFLOAT8_B-out_BFLOAT16-weights_DRAM" + pytest $TT_METAL_HOME/models/demos/falcon7b_common/tests/unit_tests/test_falcon_matmuls_and_bmms_with_mixed_precision.py -k "seq_len_128 and in0_BFLOAT16-in1_BFLOAT8_B-out_BFLOAT16-weights_DRAM" pytest $TT_METAL_HOME/tests/ttnn/integration_tests/resnet/test_ttnn_functional_resnet50_new.py -k "pretrained_weight_false" SLOW_MATMULS=1 WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest $TT_METAL_HOME/tests/ttnn/integration_tests/stable_diffusion/test_unet_2d_condition_model.py -k 512 --timeout=420 diff --git a/tests/scripts/single_card/run_demos_single_card_n300_tests.sh b/tests/scripts/single_card/run_demos_single_card_n300_tests.sh index 3756b89d1e89..c277f0aed60a 100755 --- a/tests/scripts/single_card/run_demos_single_card_n300_tests.sh +++ b/tests/scripts/single_card/run_demos_single_card_n300_tests.sh @@ -8,7 +8,7 @@ run_n300_falcon7b_tests() { echo "LOG_METAL: Running run_t3000_falcon7b_tests" # Perf verification for 128/1024/2048 seq lens - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b/demo/input_data.json' models/demos/wormhole/falcon7b/demo_wormhole.py ; fail+=$? + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/falcon7b_common/demo/input_data.json' models/demos/wormhole/falcon7b/demo_wormhole.py ; fail+=$? # Record the end time end_time=$(date +%s) diff --git a/tests/scripts/t3000/run_t3000_model_perf_tests.sh b/tests/scripts/t3000/run_t3000_model_perf_tests.sh index 6f97b8e76368..e5a8247c0666 100755 --- a/tests/scripts/t3000/run_t3000_model_perf_tests.sh +++ b/tests/scripts/t3000/run_t3000_model_perf_tests.sh @@ -9,7 +9,7 @@ run_t3000_falcon7b_tests() { echo "LOG_METAL: Running run_t3000_falcon7b_tests" - env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests -m "model_perf_t3000" ; fail+=$? + env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b_common/tests -m "model_perf_t3000" ; fail+=$? # Record the end time end_time=$(date +%s) diff --git a/tests/scripts/t3000/run_t3000_perplexity_tests.sh b/tests/scripts/t3000/run_t3000_perplexity_tests.sh index d60f4f8a8d58..2ecd20cd7653 100644 --- a/tests/scripts/t3000/run_t3000_perplexity_tests.sh +++ b/tests/scripts/t3000/run_t3000_perplexity_tests.sh @@ -9,7 +9,7 @@ run_t3000_perplexity_tests() { echo "LOG_METAL: Running run_t3000_perplexity_tests" # Falcon7B perplexity tests - WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests/perplexity/test_perplexity_falcon.py --timeout=1500 ; fail+=$? + WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b_common/tests/perplexity/test_perplexity_falcon.py --timeout=1500 ; fail+=$? # Record the end time end_time=$(date +%s) diff --git a/tests/ttnn/unit_tests/test_tracer.py b/tests/ttnn/unit_tests/test_tracer.py index 3c109fe3ee4d..ff7e401a42fe 100644 --- a/tests/ttnn/unit_tests/test_tracer.py +++ b/tests/ttnn/unit_tests/test_tracer.py @@ -131,7 +131,7 @@ def test_ttnn_bert(device, use_program_cache, model_name, batch_size, sequence_s def test_falcon7b_instruct(): from functools import partial from loguru import logger - from models.demos.falcon7b.reference.hf_modeling_falcon import FalconConfig, FalconForCausalLM + from models.demos.falcon7b_common.reference.hf_modeling_falcon import FalconConfig, FalconForCausalLM model_version = "tiiuae/falcon-7b-instruct"