diff --git a/models/experimental/bert_large_performant/unit_tests/fused_ops/test_bert_large_fused_ln.py b/models/experimental/bert_large_performant/unit_tests/fused_ops/test_bert_large_fused_ln.py index 9d25a66ef7c..f159033e22c 100644 --- a/models/experimental/bert_large_performant/unit_tests/fused_ops/test_bert_large_fused_ln.py +++ b/models/experimental/bert_large_performant/unit_tests/fused_ops/test_bert_large_fused_ln.py @@ -6,7 +6,6 @@ import torch -import tt_lib as ttl from tt_lib.utils import ( pad_weight, @@ -21,8 +20,6 @@ def run_layernorm_tests(device, test_id, batch, dtype, in0_mem_config, out_mem_config): torch.manual_seed(1234) - tensor = ttl.tensor - epsf = 1e-2 test_dims = ((batch, 1, 384, 1024),) @@ -40,22 +37,22 @@ def run_layernorm_tests(device, test_id, batch, dtype, in0_mem_config, out_mem_c if test_id >= 1: gamma = torch.rand(1, 1, 1, W) * 2 - 1 gammah32 = gamma.reshape([1, 1, -1, 32]) - ttgamma = tensor.Tensor( + ttgamma = ttnn.Tensor( gammah32.reshape(-1).tolist(), gammah32.shape, dtype, - tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, device, in0_mem_config, ) if test_id >= 2: beta = torch.rand(1, 1, 1, W) * 2.0 - 1.1 betah32 = beta.reshape([1, 1, -1, 32]) - ttbeta = tensor.Tensor( + ttbeta = ttnn.Tensor( betah32.reshape(-1).tolist(), betah32.shape, dtype, - tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, device, in0_mem_config, ) @@ -66,19 +63,19 @@ def run_layernorm_tests(device, test_id, batch, dtype, in0_mem_config, out_mem_c if test_id < 3: y *= 0.0 # zero out the y to exclude x+y from reference calculation - ttx = tensor.Tensor( + ttx = ttnn.Tensor( tilize_to_list(x), [N, C, H, W], dtype, - tensor.Layout.TILE, + ttnn.TILE_LAYOUT, device, in0_mem_config, ) - tty = tensor.Tensor( + tty = ttnn.Tensor( tilize_to_list(y), [N, C, H, W], dtype, - tensor.Layout.TILE, + ttnn.TILE_LAYOUT, device, in0_mem_config, ) @@ -129,22 +126,22 @@ def run_layernorm_tests(device, test_id, batch, dtype, in0_mem_config, out_mem_c @pytest.mark.parametrize( "out_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["out_DRAM", "out_L1"], ) @pytest.mark.parametrize( "in0_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in0_DRAM", "in0_L1"], ) @pytest.mark.parametrize( "dtype", - (ttl.tensor.DataType.BFLOAT16,), + (ttnn.bfloat16,), ids=["BFLOAT16"], ) @pytest.mark.parametrize( diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_concatenate_heads.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_concatenate_heads.py index 2f74e3e193e..ae5e6f5af01 100644 --- a/models/experimental/bert_large_performant/unit_tests/test_bert_large_concatenate_heads.py +++ b/models/experimental/bert_large_performant/unit_tests/test_bert_large_concatenate_heads.py @@ -6,7 +6,6 @@ import ttnn -import tt_lib as ttl from models.utility_functions import ( comp_pcc, ) @@ -25,17 +24,17 @@ def run_bert_large_concatenate_heads_test(device, batch, dtype, in0_mem_config, A = torch.randn(a_shape) a_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( A.flatten().tolist(), a_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in0_mem_config) ) - out = ttnn.experimental.concatenate_heads(a_t, ttl.tensor.CoreCoord(12, 9), memory_config=out_mem_config) + out = ttnn.experimental.concatenate_heads(a_t, ttnn.CoreCoord(12, 9), memory_config=out_mem_config) # Check memory of inputs and outputs assert a_t.memory_config().buffer_type == in0_mem_config.buffer_type @@ -45,7 +44,7 @@ def run_bert_large_concatenate_heads_test(device, batch, dtype, in0_mem_config, logger.debug(f"out: {out.memory_config().buffer_type} and {out.get_dtype()}") assert out.get_legacy_shape() == [batch, 1, 384, 1024] - tt_host_rm_out = out.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm_out = out.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm_out = tt_host_rm_out.to_torch() ref_out = torch.transpose(A, -3, -2).reshape([batch, 1, 384, 1024]) @@ -61,22 +60,22 @@ def run_bert_large_concatenate_heads_test(device, batch, dtype, in0_mem_config, @pytest.mark.parametrize( "out_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["out_DRAM", "out_L1"], ) @pytest.mark.parametrize( "in0_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in0_DRAM", "in0_L1"], ) @pytest.mark.parametrize( "dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["BFLOAT8_B", "BFLOAT16"], ) @pytest.mark.parametrize( @@ -93,19 +92,19 @@ def test_bert_large_concatenate_heads_test(device, batch, dtype, in0_mem_config, def test_bert_large_concatenate_heads_with_program_cache(device, use_program_cache): - dtype = ttl.tensor.DataType.BFLOAT8_B - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM) + dtype = ttnn.bfloat8_b + mem_config = ttnn.DRAM_MEMORY_CONFIG for _ in range(2): run_bert_large_concatenate_heads_test(device, 9, dtype, mem_config, mem_config) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1) + mem_config = ttnn.L1_MEMORY_CONFIG for _ in range(2): run_bert_large_concatenate_heads_test(device, 9, dtype, mem_config, mem_config) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) assert device.num_program_cache_entries() == 2 diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_ff1_matmul.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_ff1_matmul.py index 3aacd802cb4..f86924bc3a6 100644 --- a/models/experimental/bert_large_performant/unit_tests/test_bert_large_ff1_matmul.py +++ b/models/experimental/bert_large_performant/unit_tests/test_bert_large_ff1_matmul.py @@ -8,7 +8,6 @@ import numpy as np -import tt_lib as ttl import ttnn from models.utility_functions import ( comp_pcc, @@ -32,12 +31,9 @@ def run_bert_large_ff1_matmul_test( pytest.skip(f"Grid size {compute_grid_size} is not supported") if ( - dtype == ttl.tensor.DataType.BFLOAT16 - and out_mem_config.buffer_type == ttl.tensor.BufferType.L1 - and ( - in0_mem_config.buffer_type == ttl.tensor.BufferType.L1 - or in1_mem_config.buffer_type == ttl.tensor.BufferType.L1 - ) + dtype == ttnn.bfloat16 + and out_mem_config.buffer_type == ttnn.BufferType.L1 + and (in0_mem_config.buffer_type == ttnn.BufferType.L1 or in1_mem_config.buffer_type == ttnn.BufferType.L1) ): pytest.skip("Skipping test since these tensors won't fit on device!") @@ -52,36 +48,36 @@ def run_bert_large_ff1_matmul_test( BIAS = torch.randint(-20, 20, bias_shape, dtype=torch.float) a_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( A.flatten().tolist(), a_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in0_mem_config) ) b_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( B.flatten().tolist(), b_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in1_mem_config) ) if bias_mem_config is not None: bias_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( BIAS.flatten().tolist(), bias_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) .pad(bias_pad_shape, [0, 0, 0, 0], 0) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, bias_mem_config) ) else: @@ -107,7 +103,7 @@ def run_bert_large_ff1_matmul_test( logger.debug(f"out is on: {t2.memory_config().buffer_type}") assert t2.get_legacy_shape() == [9, 1, 384, 4096] - tt_host_rm = t2.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm = t2.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm = tt_host_rm.to_torch() ref_bmm = torch.matmul(A, B) @@ -133,16 +129,16 @@ def run_bert_large_ff1_matmul_test( @pytest.mark.parametrize( "out_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["out_DRAM", "out_L1"], ) @pytest.mark.parametrize( "bias_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, None, ), ids=["bias_DRAM", "bias_L1", "bias_None"], @@ -150,22 +146,22 @@ def run_bert_large_ff1_matmul_test( @pytest.mark.parametrize( "in1_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in1_DRAM", "in1_L1"], ) @pytest.mark.parametrize( "in0_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in0_DRAM", "in0_L1"], ) @pytest.mark.parametrize( "dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["BFLOAT8_B", "BFLOAT16"], ) def test_bert_large_ff1_matmul_test( @@ -190,8 +186,8 @@ def test_bert_large_ff1_matmul_test( def test_bert_large_ff1_matmul_with_program_cache(device, use_program_cache): - dtype = ttl.tensor.DataType.BFLOAT8_B - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM) + dtype = ttnn.bfloat8_b + mem_config = ttnn.DRAM_MEMORY_CONFIG for _ in range(2): run_bert_large_ff1_matmul_test( device, @@ -204,9 +200,9 @@ def test_bert_large_ff1_matmul_with_program_cache(device, use_program_cache): ) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1) + mem_config = ttnn.L1_MEMORY_CONFIG for _ in range(2): run_bert_large_ff1_matmul_test( device, @@ -219,6 +215,6 @@ def test_bert_large_ff1_matmul_with_program_cache(device, use_program_cache): ) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) assert device.num_program_cache_entries() == 2 diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_ff2_matmul.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_ff2_matmul.py index 4b2e02b31ce..0de1c97171d 100644 --- a/models/experimental/bert_large_performant/unit_tests/test_bert_large_ff2_matmul.py +++ b/models/experimental/bert_large_performant/unit_tests/test_bert_large_ff2_matmul.py @@ -8,7 +8,7 @@ import numpy as np -import tt_lib as ttl +import ttnn from models.utility_functions import ( comp_pcc, ) @@ -34,36 +34,36 @@ def run_bert_large_ff2_matmul_test(device, dtype, in0_mem_config, in1_mem_config BIAS = torch.randint(-20, 20, bias_shape, dtype=torch.float) a_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( A.flatten().tolist(), a_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in0_mem_config) ) b_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( B.flatten().tolist(), b_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in1_mem_config) ) if bias_mem_config is not None: bias_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( BIAS.flatten().tolist(), bias_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) .pad(bias_pad_shape, [0, 0, 0, 0], 0) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, bias_mem_config) ) else: @@ -84,7 +84,7 @@ def run_bert_large_ff2_matmul_test(device, dtype, in0_mem_config, in1_mem_config logger.debug(f"out is on: {t2.memory_config().buffer_type}") assert t2.get_legacy_shape() == [9, 1, 384, 1024] - tt_host_rm = t2.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm = t2.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm = tt_host_rm.to_torch() ref_bmm = torch.matmul(A, B) @@ -100,16 +100,16 @@ def run_bert_large_ff2_matmul_test(device, dtype, in0_mem_config, in1_mem_config @pytest.mark.parametrize( "out_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["out_DRAM", "out_L1"], ) @pytest.mark.parametrize( "bias_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, None, ), ids=["bias_DRAM", "bias_L1", "bias_None"], @@ -117,22 +117,22 @@ def run_bert_large_ff2_matmul_test(device, dtype, in0_mem_config, in1_mem_config @pytest.mark.parametrize( "in1_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in1_DRAM", "in1_L1"], ) @pytest.mark.parametrize( "in0_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in0_DRAM", "in0_L1"], ) @pytest.mark.parametrize( "dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["BFLOAT8_B", "BFLOAT16"], ) def test_bert_large_ff2_matmul_test( @@ -148,8 +148,8 @@ def test_bert_large_ff2_matmul_test( def test_bert_large_ff2_matmul_with_program_cache(device, use_program_cache): - dtype = ttl.tensor.DataType.BFLOAT8_B - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM) + dtype = ttnn.bfloat8_b + mem_config = ttnn.DRAM_MEMORY_CONFIG for _ in range(2): run_bert_large_ff2_matmul_test( device, @@ -161,9 +161,9 @@ def test_bert_large_ff2_matmul_with_program_cache(device, use_program_cache): ) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1) + mem_config = ttnn.L1_MEMORY_CONFIG for _ in range(2): run_bert_large_ff2_matmul_test( device, @@ -175,6 +175,6 @@ def test_bert_large_ff2_matmul_with_program_cache(device, use_program_cache): ) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) assert device.num_program_cache_entries() == 2 diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_fused_qkv_matmul.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_fused_qkv_matmul.py index 346f8bf6fa2..83e06593c93 100644 --- a/models/experimental/bert_large_performant/unit_tests/test_bert_large_fused_qkv_matmul.py +++ b/models/experimental/bert_large_performant/unit_tests/test_bert_large_fused_qkv_matmul.py @@ -7,7 +7,7 @@ import numpy as np -import tt_lib as ttl +import ttnn from models.utility_functions import ( comp_pcc, ) @@ -35,35 +35,35 @@ def run_bert_large_fused_qkv_matmul_test( BIAS = torch.randint(-20, 20, bias_shape, dtype=torch.float) a_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( A.flatten().tolist(), a_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in0_mem_config) ) b_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( B.flatten().tolist(), b_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in1_mem_config) ) if bias_mem_config is not None: bias_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( BIAS.flatten().tolist(), bias_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) .pad(bias_pad_shape, [0, 0, 0, 0], 0) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, bias_mem_config) ) else: @@ -84,7 +84,7 @@ def run_bert_large_fused_qkv_matmul_test( logger.debug(f"out is on: {t2.memory_config().buffer_type}") assert t2.get_legacy_shape() == [9, 1, 384, 3072] - tt_host_rm = t2.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm = t2.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm = tt_host_rm.to_torch() ref_bmm = torch.matmul(A, B) @@ -100,16 +100,16 @@ def run_bert_large_fused_qkv_matmul_test( @pytest.mark.parametrize( "out_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["out_DRAM", "out_L1"], ) @pytest.mark.parametrize( "bias_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, None, ), ids=["bias_DRAM", "bias_L1", "bias_None"], @@ -117,22 +117,22 @@ def run_bert_large_fused_qkv_matmul_test( @pytest.mark.parametrize( "in1_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in1_DRAM", "in1_L1"], ) @pytest.mark.parametrize( "in0_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in0_DRAM", "in0_L1"], ) @pytest.mark.parametrize( "dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["BFLOAT8_B", "BFLOAT16"], ) def test_bert_large_fused_qkv_matmul_test( @@ -148,8 +148,8 @@ def test_bert_large_fused_qkv_matmul_test( def test_bert_large_fused_qkv_matmul_with_program_cache(device, use_program_cache): - dtype = ttl.tensor.DataType.BFLOAT8_B - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM) + dtype = ttnn.bfloat8_b + mem_config = ttnn.DRAM_MEMORY_CONFIG for _ in range(2): run_bert_large_fused_qkv_matmul_test( device, @@ -161,9 +161,9 @@ def test_bert_large_fused_qkv_matmul_with_program_cache(device, use_program_cach ) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1) + mem_config = ttnn.L1_MEMORY_CONFIG for _ in range(2): run_bert_large_fused_qkv_matmul_test( device, @@ -175,6 +175,6 @@ def test_bert_large_fused_qkv_matmul_with_program_cache(device, use_program_cach ) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) assert device.num_program_cache_entries() == 2 diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_matmuls_and_bmms_with_mixed_precision.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_matmuls_and_bmms_with_mixed_precision.py index d7cea0e5de8..12769ea6b23 100644 --- a/models/experimental/bert_large_performant/unit_tests/test_bert_large_matmuls_and_bmms_with_mixed_precision.py +++ b/models/experimental/bert_large_performant/unit_tests/test_bert_large_matmuls_and_bmms_with_mixed_precision.py @@ -8,7 +8,6 @@ import numpy as np -import tt_lib as ttl import ttnn from models.utility_functions import ( comp_pcc, @@ -42,14 +41,11 @@ def run_bert_large_matmul_test( elif bert_large_op == custom_matmuls.bert_large_ff1_matmul: if ( - in0_dtype == ttl.tensor.DataType.BFLOAT16 - and in1_dtype == ttl.tensor.DataType.BFLOAT16 - and out_dtype == ttl.tensor.DataType.BFLOAT16 - and out_mem_config.buffer_type == ttl.tensor.BufferType.L1 - and ( - in0_mem_config.buffer_type == ttl.tensor.BufferType.L1 - or in1_mem_config.buffer_type == ttl.tensor.BufferType.L1 - ) + in0_dtype == ttnn.bfloat16 + and in1_dtype == ttnn.bfloat16 + and out_dtype == ttnn.bfloat16 + and out_mem_config.buffer_type == ttnn.BufferType.L1 + and (in0_mem_config.buffer_type == ttnn.BufferType.L1 or in1_mem_config.buffer_type == ttnn.BufferType.L1) ): pytest.skip("Skipping test since these tensors won't fit on device!") @@ -84,31 +80,31 @@ def run_bert_large_matmul_test( BIAS = torch.randint(-20, 20, bias_shape, dtype=torch.float) a_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( A.flatten().tolist(), a_shape, in0_dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in0_mem_config) ) b_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( B.flatten().tolist(), b_shape, in1_dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in1_mem_config) ) if bias_mem_config is not None: bias_t = ( - ttl.tensor.Tensor(BIAS, bias_dtype) + ttnn.Tensor(BIAS, bias_dtype) .pad(bias_pad_shape, [0, 0, 0, 0], 0) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, bias_mem_config) ) else: @@ -136,7 +132,7 @@ def run_bert_large_matmul_test( logger.debug(f"out ({expected_output_shape}): {t2.memory_config().buffer_type} and {t2.get_dtype()}") assert t2.get_legacy_shape() == expected_output_shape - tt_host_rm = t2.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm = t2.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm = tt_host_rm.to_torch() ref_bmm = torch.matmul(A, B) @@ -192,23 +188,23 @@ def run_bert_large_bmm_test( B = torch.randn(b_shape) - 0.95 a_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( A.flatten().tolist(), a_shape, in0_dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in0_mem_config) ) b_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( B.flatten().tolist(), b_shape, in1_dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in1_mem_config) ) @@ -226,7 +222,7 @@ def run_bert_large_bmm_test( logger.debug(f"out ({expected_output_shape}): {t2.memory_config().buffer_type} and {t2.get_dtype()}") assert t2.get_legacy_shape() == expected_output_shape - tt_host_rm = t2.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm = t2.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm = tt_host_rm.to_torch() if bert_large_op == custom_matmuls.bert_large_pre_softmax_bmm: @@ -247,38 +243,38 @@ def run_bert_large_bmm_test( "in0_mem_config, in1_mem_config, bias_mem_config, out_mem_config", ( ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.DRAM_MEMORY_CONFIG, + ttnn.DRAM_MEMORY_CONFIG, + ttnn.DRAM_MEMORY_CONFIG, ), ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.L1_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ), ids=["DRAM", "L1"], ) @pytest.mark.parametrize( "out_dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["out_BFLOAT8_B", "out_BFLOAT16"], ) @pytest.mark.parametrize( "bias_dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["bias_BFLOAT8_B", "bias_BFLOAT16"], ) @pytest.mark.parametrize( "in1_dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["in1_BFLOAT8_B", "in1_BFLOAT16"], ) @pytest.mark.parametrize( "in0_dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["in0_BFLOAT8_B", "in0_BFLOAT16"], ) @pytest.mark.parametrize( @@ -338,31 +334,31 @@ def test_bert_large_matmul( "in0_mem_config, in1_mem_config, out_mem_config", ( ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.DRAM_MEMORY_CONFIG, + ttnn.DRAM_MEMORY_CONFIG, ), ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.L1_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ), ids=["DRAM", "L1"], ) @pytest.mark.parametrize( "out_dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["out_BFLOAT8_B", "out_BFLOAT16"], ) @pytest.mark.parametrize( "in1_dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["in1_BFLOAT8_B", "in1_BFLOAT16"], ) @pytest.mark.parametrize( "in0_dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["in0_BFLOAT8_B", "in0_BFLOAT16"], ) @pytest.mark.parametrize( diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_post_softmax_bmm.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_post_softmax_bmm.py index 80069d5e749..9ccdee0c0e9 100644 --- a/models/experimental/bert_large_performant/unit_tests/test_bert_large_post_softmax_bmm.py +++ b/models/experimental/bert_large_performant/unit_tests/test_bert_large_post_softmax_bmm.py @@ -8,7 +8,7 @@ import numpy as np -import tt_lib as ttl +import ttnn from models.utility_functions import ( comp_pcc, ) @@ -36,23 +36,23 @@ def run_bert_large_post_softmax_bmm_test(device, dtype, in0_mem_config, in1_mem_ B = torch.randn(b_shape) - 0.95 a_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( A.flatten().tolist(), a_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in0_mem_config) ) b_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( B.flatten().tolist(), b_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in1_mem_config) ) @@ -67,7 +67,7 @@ def run_bert_large_post_softmax_bmm_test(device, dtype, in0_mem_config, in1_mem_ logger.debug(f"out is on: {t2.memory_config().buffer_type}") assert t2.get_legacy_shape() == out_shape - tt_host_rm = t2.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm = t2.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm = tt_host_rm.to_torch() ref_bmm = torch.matmul(A.reshape([9, 16, 384, 384]), B) @@ -84,30 +84,30 @@ def run_bert_large_post_softmax_bmm_test(device, dtype, in0_mem_config, in1_mem_ @pytest.mark.parametrize( "out_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["out_DRAM", "out_L1"], ) @pytest.mark.parametrize( "in1_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in1_DRAM", "in1_L1"], ) @pytest.mark.parametrize( "in0_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in0_DRAM", "in0_L1"], ) @pytest.mark.parametrize( "dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["BFLOAT8_B", "BFLOAT16"], ) def test_bert_large_post_softmax_bmm_test(device, dtype, in0_mem_config, in1_mem_config, out_mem_config, request): @@ -115,19 +115,19 @@ def test_bert_large_post_softmax_bmm_test(device, dtype, in0_mem_config, in1_mem def test_bert_large_post_softmax_bmm_with_program_cache(device, use_program_cache): - dtype = ttl.tensor.DataType.BFLOAT8_B - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM) + dtype = ttnn.bfloat8_b + mem_config = ttnn.DRAM_MEMORY_CONFIG for _ in range(2): run_bert_large_post_softmax_bmm_test(device, dtype, mem_config, mem_config, mem_config) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1) + mem_config = ttnn.L1_MEMORY_CONFIG for _ in range(2): run_bert_large_post_softmax_bmm_test(device, dtype, mem_config, mem_config, mem_config) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) assert device.num_program_cache_entries() == 2 diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_pre_softmax_bmm.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_pre_softmax_bmm.py index 7fe177da9ed..8c655711354 100644 --- a/models/experimental/bert_large_performant/unit_tests/test_bert_large_pre_softmax_bmm.py +++ b/models/experimental/bert_large_performant/unit_tests/test_bert_large_pre_softmax_bmm.py @@ -8,7 +8,7 @@ import numpy as np -import tt_lib as ttl +import ttnn from models.utility_functions import ( comp_pcc, ) @@ -30,23 +30,23 @@ def run_bert_large_pre_softmax_bmm_test(device, dtype, in0_mem_config, in1_mem_c B = torch.randn(b_shape) - 0.95 a_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( A.flatten().tolist(), a_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in0_mem_config) ) b_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( B.flatten().tolist(), b_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in1_mem_config) ) @@ -60,7 +60,7 @@ def run_bert_large_pre_softmax_bmm_test(device, dtype, in0_mem_config, in1_mem_c logger.debug(f"in1 is on: {b_t.memory_config().buffer_type}") logger.debug(f"out is on: {t2.memory_config().buffer_type}") - tt_host_rm = t2.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm = t2.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm = tt_host_rm.to_torch() ref_bmm = torch.matmul(A, B) @@ -77,30 +77,30 @@ def run_bert_large_pre_softmax_bmm_test(device, dtype, in0_mem_config, in1_mem_c @pytest.mark.parametrize( "out_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["out_DRAM", "out_L1"], ) @pytest.mark.parametrize( "in1_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in1_DRAM", "in1_L1"], ) @pytest.mark.parametrize( "in0_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in0_DRAM", "in0_L1"], ) @pytest.mark.parametrize( "dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["BFLOAT8_B", "BFLOAT16"], ) def test_bert_large_pre_softmax_bmm_test(device, dtype, in0_mem_config, in1_mem_config, out_mem_config, request): @@ -108,19 +108,19 @@ def test_bert_large_pre_softmax_bmm_test(device, dtype, in0_mem_config, in1_mem_ def test_bert_large_pre_softmax_bmm_with_program_cache(device, use_program_cache): - dtype = ttl.tensor.DataType.BFLOAT8_B - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM) + dtype = ttnn.bfloat8_b + mem_config = ttnn.DRAM_MEMORY_CONFIG for _ in range(2): run_bert_large_pre_softmax_bmm_test(device, dtype, mem_config, mem_config, mem_config) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1) + mem_config = ttnn.L1_MEMORY_CONFIG for _ in range(2): run_bert_large_pre_softmax_bmm_test(device, dtype, mem_config, mem_config, mem_config) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) assert device.num_program_cache_entries() == 2 diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_selfout_matmul.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_selfout_matmul.py index 52c62367298..910b7daa16b 100644 --- a/models/experimental/bert_large_performant/unit_tests/test_bert_large_selfout_matmul.py +++ b/models/experimental/bert_large_performant/unit_tests/test_bert_large_selfout_matmul.py @@ -8,7 +8,7 @@ import numpy as np -import tt_lib as ttl +import ttnn from models.utility_functions import ( comp_pcc, ) @@ -34,35 +34,35 @@ def run_bert_large_selfout_matmul_test(device, dtype, in0_mem_config, in1_mem_co BIAS = torch.randint(-20, 20, bias_shape, dtype=torch.float) a_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( A.flatten().tolist(), a_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in0_mem_config) ) b_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( B.flatten().tolist(), b_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in1_mem_config) ) if bias_mem_config is not None: bias_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( BIAS.flatten().tolist(), bias_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) .pad(bias_pad_shape, [0, 0, 0, 0], 0) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, bias_mem_config) ) else: @@ -83,7 +83,7 @@ def run_bert_large_selfout_matmul_test(device, dtype, in0_mem_config, in1_mem_co logger.debug(f"out is on: {t2.memory_config().buffer_type}") assert t2.get_legacy_shape() == [9, 1, 384, 1024] - tt_host_rm = t2.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm = t2.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm = tt_host_rm.to_torch() ref_bmm = torch.matmul(A, B) @@ -99,16 +99,16 @@ def run_bert_large_selfout_matmul_test(device, dtype, in0_mem_config, in1_mem_co @pytest.mark.parametrize( "out_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["out_DRAM", "out_L1"], ) @pytest.mark.parametrize( "bias_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, None, ), ids=["bias_DRAM", "bias_L1", "bias_None"], @@ -116,22 +116,22 @@ def run_bert_large_selfout_matmul_test(device, dtype, in0_mem_config, in1_mem_co @pytest.mark.parametrize( "in1_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in1_DRAM", "in1_L1"], ) @pytest.mark.parametrize( "in0_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in0_DRAM", "in0_L1"], ) @pytest.mark.parametrize( "dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["BFLOAT8_B", "BFLOAT16"], ) def test_bert_large_selfout_matmul_test( @@ -147,8 +147,8 @@ def test_bert_large_selfout_matmul_test( def test_bert_large_selfout_matmul_with_program_cache(device, use_program_cache): - dtype = ttl.tensor.DataType.BFLOAT8_B - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM) + dtype = ttnn.bfloat8_b + mem_config = ttnn.DRAM_MEMORY_CONFIG for _ in range(2): run_bert_large_selfout_matmul_test( device, @@ -160,9 +160,9 @@ def test_bert_large_selfout_matmul_with_program_cache(device, use_program_cache) ) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1) + mem_config = ttnn.L1_MEMORY_CONFIG for _ in range(2): run_bert_large_selfout_matmul_test( device, @@ -174,6 +174,6 @@ def test_bert_large_selfout_matmul_with_program_cache(device, use_program_cache) ) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) assert device.num_program_cache_entries() == 2 diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_split_and_transform_qkv_heads.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_split_and_transform_qkv_heads.py index 20768a59e58..2ce689fe4cd 100644 --- a/models/experimental/bert_large_performant/unit_tests/test_bert_large_split_and_transform_qkv_heads.py +++ b/models/experimental/bert_large_performant/unit_tests/test_bert_large_split_and_transform_qkv_heads.py @@ -8,7 +8,6 @@ import numpy as np -import tt_lib as ttl from models.utility_functions import comp_pcc, skip_for_grayskull import torch import ttnn @@ -26,18 +25,18 @@ def run_split_query_key_value_and_split_heads_test(device, batch, dtype, in0_mem A = torch.randn(a_shape) a_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( A.flatten().tolist(), a_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in0_mem_config) ) q, k, v = ttnn.experimental.split_query_key_value_and_split_heads( - a_t, ttl.tensor.CoreCoord(12, 9), memory_config=out_mem_config + a_t, ttnn.CoreCoord(12, 9), memory_config=out_mem_config ) # Check memory of inputs and outputs @@ -54,11 +53,11 @@ def run_split_query_key_value_and_split_heads_test(device, batch, dtype, in0_mem assert k.get_legacy_shape() == [batch, 16, 64, 384] assert v.get_legacy_shape() == [batch, 16, 384, 64] - tt_host_rm_q = q.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm_q = q.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm_q = tt_host_rm_q.to_torch() - tt_host_rm_k = k.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm_k = k.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm_k = tt_host_rm_k.to_torch() - tt_host_rm_v = v.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm_v = v.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm_v = tt_host_rm_v.to_torch() (ref_q, ref_k, ref_v) = torch.split(A, 1024, dim=-1) @@ -87,22 +86,22 @@ def run_split_query_key_value_and_split_heads_test(device, batch, dtype, in0_mem @pytest.mark.parametrize( "out_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["out_DRAM", "out_L1"], ) @pytest.mark.parametrize( "in0_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in0_DRAM", "in0_L1"], ) @pytest.mark.parametrize( "dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["BFLOAT8_B", "BFLOAT16"], ) @pytest.mark.parametrize( @@ -120,19 +119,19 @@ def test_split_query_key_value_and_split_heads_test(device, batch, dtype, in0_me @skip_for_grayskull("watcher error, see issue #6487") def test_split_query_key_value_and_split_heads_with_program_cache(device, use_program_cache): - dtype = ttl.tensor.DataType.BFLOAT8_B - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM) + dtype = ttnn.bfloat8_b + mem_config = ttnn.DRAM_MEMORY_CONFIG for _ in range(2): run_split_query_key_value_and_split_heads_test(device, 9, dtype, mem_config, mem_config) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1) + mem_config = ttnn.L1_MEMORY_CONFIG for _ in range(2): run_split_query_key_value_and_split_heads_test(device, 9, dtype, mem_config, mem_config) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) assert device.num_program_cache_entries() == 2 diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_split_query_key_value_and_split_heads.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_split_query_key_value_and_split_heads.py index 95d52eafc77..9992885aa36 100644 --- a/models/experimental/bert_large_performant/unit_tests/test_bert_large_split_query_key_value_and_split_heads.py +++ b/models/experimental/bert_large_performant/unit_tests/test_bert_large_split_query_key_value_and_split_heads.py @@ -7,7 +7,6 @@ import numpy as np -import tt_lib as ttl import ttnn from models.utility_functions import comp_pcc, skip_for_grayskull import torch @@ -26,18 +25,18 @@ def run_split_query_key_value_and_split_heads_test(device, batch, dtype, in0_mem A = torch.randn(a_shape) a_t = ( - ttl.tensor.Tensor( + ttnn.Tensor( A.flatten().tolist(), a_shape, dtype, - ttl.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttl.tensor.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(device, in0_mem_config) ) q, k, v = ttnn.experimental.split_query_key_value_and_split_heads( - a_t, ttl.tensor.CoreCoord(12, 9), memory_config=out_mem_config + a_t, ttnn.CoreCoord(12, 9), memory_config=out_mem_config ) # Check memory of inputs and outputs @@ -54,11 +53,11 @@ def run_split_query_key_value_and_split_heads_test(device, batch, dtype, in0_mem assert k.get_legacy_shape() == [batch, 16, 64, 384] assert v.get_legacy_shape() == [batch, 16, 384, 64] - tt_host_rm_q = q.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm_q = q.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm_q = tt_host_rm_q.to_torch() - tt_host_rm_k = k.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm_k = k.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm_k = tt_host_rm_k.to_torch() - tt_host_rm_v = v.cpu().to(ttl.tensor.Layout.ROW_MAJOR) + tt_host_rm_v = v.cpu().to(ttnn.ROW_MAJOR_LAYOUT) pyt_got_back_rm_v = tt_host_rm_v.to_torch() (ref_q, ref_k, ref_v) = torch.split(A, 1024, dim=-1) @@ -88,22 +87,22 @@ def run_split_query_key_value_and_split_heads_test(device, batch, dtype, in0_mem @pytest.mark.parametrize( "out_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["out_DRAM", "out_L1"], ) @pytest.mark.parametrize( "in0_mem_config", ( - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM), - ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in0_DRAM", "in0_L1"], ) @pytest.mark.parametrize( "dtype", - (ttl.tensor.DataType.BFLOAT8_B, ttl.tensor.DataType.BFLOAT16), + (ttnn.bfloat8_b, ttnn.bfloat16), ids=["BFLOAT8_B", "BFLOAT16"], ) @pytest.mark.parametrize( @@ -121,19 +120,19 @@ def test_split_query_key_value_and_split_heads(device, batch, dtype, in0_mem_con @skip_for_grayskull("watcher error, see issue #6487") def test_split_query_key_value_and_split_heads_with_program_cache(device, use_program_cache): - dtype = ttl.tensor.DataType.BFLOAT8_B - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM) + dtype = ttnn.bfloat8_b + mem_config = ttnn.DRAM_MEMORY_CONFIG for _ in range(2): run_split_query_key_value_and_split_heads_test(device, 9, dtype, mem_config, mem_config) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) - mem_config = ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.L1) + mem_config = ttnn.L1_MEMORY_CONFIG for _ in range(2): run_split_query_key_value_and_split_heads_test(device, 9, dtype, mem_config, mem_config) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) - tt_dummy_tensor = ttl.tensor.Tensor(py_dummy_tensor, dtype).to(ttl.tensor.Layout.TILE).to(device, mem_config) + tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) assert device.num_program_cache_entries() == 2 diff --git a/models/experimental/bert_large_performant/unit_tests/test_bert_large_split_query_key_value_and_split_heads_sharded.py b/models/experimental/bert_large_performant/unit_tests/test_bert_large_split_query_key_value_and_split_heads_sharded.py index 90983b9ffb0..84896abff1e 100644 --- a/models/experimental/bert_large_performant/unit_tests/test_bert_large_split_query_key_value_and_split_heads_sharded.py +++ b/models/experimental/bert_large_performant/unit_tests/test_bert_large_split_query_key_value_and_split_heads_sharded.py @@ -6,7 +6,6 @@ import numpy as np import ttnn -import tt_lib as ttl from tt_lib.utils import ( pad_weight, tilize_to_list, @@ -24,25 +23,25 @@ @pytest.mark.parametrize( "out_mem_config", - (ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),), + (ttnn.DRAM_MEMORY_CONFIG,), ids=["out_DRAM"], ) @pytest.mark.parametrize( "in0_mem_config", - (ttl.tensor.MemoryConfig(ttl.tensor.TensorMemoryLayout.INTERLEAVED, ttl.tensor.BufferType.DRAM),), + (ttnn.DRAM_MEMORY_CONFIG,), ids=["in0_DRAM"], ) @pytest.mark.parametrize( "dtype", - (ttl.tensor.DataType.BFLOAT8_B,), + (ttnn.bfloat8_b,), ids=["BFLOAT8_B"], ) def test_split_query_key_value_and_split_heads_with_program_cache(device, dtype, in0_mem_config, out_mem_config): torch.manual_seed(1234) - sharded_mem_config = ttl.tensor.MemoryConfig( - memory_layout=ttl.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - buffer_type=ttl.tensor.BufferType.L1, + sharded_mem_config = ttnn.MemoryConfig( + memory_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + buffer_type=ttnn.BufferType.L1, ) num_heads = 16 @@ -58,13 +57,13 @@ def test_split_query_key_value_and_split_heads_with_program_cache(device, dtype, in0_t, grid_size, [M // grid_size[0], K // grid_size[1]], - ttl.tensor.TensorMemoryLayout.BLOCK_SHARDED, - ttl.tensor.ShardOrientation.COL_MAJOR, + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.ShardOrientation.COL_MAJOR, ) q, k, v = ttnn.experimental.split_query_key_value_and_split_heads( in0_t_shard, - ttl.tensor.CoreCoord(grid_size[0], grid_size[1]), + ttnn.CoreCoord(grid_size[0], grid_size[1]), memory_config=sharded_mem_config, num_heads=num_heads, ) diff --git a/models/experimental/functional_unet/tt/unet_shallow_ttnn.py b/models/experimental/functional_unet/tt/unet_shallow_ttnn.py index 4a64f91a9e3..44292a1428d 100644 --- a/models/experimental/functional_unet/tt/unet_shallow_ttnn.py +++ b/models/experimental/functional_unet/tt/unet_shallow_ttnn.py @@ -66,7 +66,7 @@ def unet_concat(ttnn_tensors, dim=-1, use_reshard=True, perf_mode=False): t_mem_config.shard_spec.shape = reshard_shape t_mem_config.shard_spec.grid = output_mem_config.shard_spec.grid t_mem_config.shard_spec.orientation = output_mem_config.shard_spec.orientation - ttlib_tensors[i] = ttnn.experimental.tensor.reshard(t, t_mem_config) + ttlib_tensors[i] = ttnn.reshard(t, t_mem_config) else: output_mem_config = ttnn.DRAM_MEMORY_CONFIG for i in range(0, len(ttlib_tensors)): diff --git a/models/experimental/functional_vit/tt/ttnn_optimized_interleaved_vit.py b/models/experimental/functional_vit/tt/ttnn_optimized_interleaved_vit.py index ddc7e241893..19c81742573 100644 --- a/models/experimental/functional_vit/tt/ttnn_optimized_interleaved_vit.py +++ b/models/experimental/functional_vit/tt/ttnn_optimized_interleaved_vit.py @@ -63,10 +63,7 @@ def vit_embeddings( # cls_token = parameters.cls_token # position_embeddings = parameters.position_embeddings - l1_memory_config = ttnn.experimental.tensor.MemoryConfig( - memory_layout=ttnn.experimental.tensor.TensorMemoryLayout.INTERLEAVED, - buffer_type=ttnn.experimental.tensor.BufferType.L1, - ) + l1_memory_config = ttnn.L1_MEMORY_CONFIG patch_embeddings = vit_patch_embeddings(config, pixel_values, parameters=parameters.patch_embeddings) diff --git a/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit.py b/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit.py index 5eacfff2ab0..ff295f81309 100644 --- a/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit.py +++ b/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit.py @@ -18,15 +18,15 @@ def update_model_config(config, batch_size): core_grid = ttnn.CoreGrid(y=8, x=12) program_configs = { - "fold_output_program_config": ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.BLOCK_SHARDED, - ttnn.experimental.tensor.BufferType.L1, - ttnn.experimental.tensor.ShardSpec( - ttnn.experimental.tensor.CoreRangeSet( + "fold_output_program_config": ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.ShardSpec( + ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(12, 7), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(12, 7), ), } ), @@ -34,7 +34,7 @@ def update_model_config(config, batch_size): 224, 192, ], - ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, + ttnn.ShardOrientation.ROW_MAJOR, False, ), ), @@ -119,8 +119,8 @@ def update_model_config(config, batch_size): subblock_w=2, block_h=7, block_w=2, - # math_fidelity=ttnn.experimental.tensor.MathFidelity.HiFi4, - # im_data_format=ttnn.experimental.tensor.DataType.BFLOAT16, + # math_fidelity=ttnn.MathFidelity.HiFi4, + # im_data_format=ttnn.bfloat16, # out_data_format=ttnn.bfloat8_b, inplace=False, ), @@ -129,8 +129,8 @@ def update_model_config(config, batch_size): subblock_w=2, block_h=7, block_w=2, - # math_fidelity=ttnn.experimental.tensor.MathFidelity.HiFi4, - # im_data_format=ttnn.experimental.tensor.DataType.BFLOAT16, + # math_fidelity=ttnn.MathFidelity.HiFi4, + # im_data_format=ttnn.bfloat16, # out_data_format=ttnn.bfloat8_b, inplace=False, ), @@ -139,8 +139,8 @@ def update_model_config(config, batch_size): subblock_w=7, block_h=7, block_w=7, - # math_fidelity=ttnn.experimental.tensor.MathFidelity.HiFi4, - # im_data_format=ttnn.experimental.tensor.DataType.BFLOAT16, + # math_fidelity=ttnn.MathFidelity.HiFi4, + # im_data_format=ttnn.bfloat16, ), } @@ -176,18 +176,18 @@ def vit_patch_embeddings(config, pixel_values, *, parameters, unittest_check=Fal #### Exp 2 of resharding after Fold and before Matmul # pixel_values = ttnn.pad(pixel_values, ((0, 0), (0, 0), (0, 224), (0, 128)), 0) - # post_fold_config = ttnn.experimental.tensor.MemoryConfig( - # ttnn.experimental.tensor.TensorMemoryLayout.BLOCK_SHARDED, - # ttnn.experimental.tensor.BufferType.L1, - # ttnn.experimental.tensor.ShardSpec( - # ttnn.experimental.tensor.CoreRangeSet( - # {ttnn.experimental.tensor.CoreRange( - # ttnn.experimental.tensor.CoreCoord(0, 0), - # ttnn.experimental.tensor.CoreCoord(11, 7), + # post_fold_config = ttnn.MemoryConfig( + # ttnn.TensorMemoryLayout.BLOCK_SHARDED, + # ttnn.BufferType.L1, + # ttnn.ShardSpec( + # ttnn.CoreRangeSet( + # {ttnn.CoreRange( + # ttnn.CoreCoord(0, 0), + # ttnn.CoreCoord(11, 7), # ),}, # ), # [224,192], - # ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, + # ttnn.ShardOrientation.ROW_MAJOR, # False, # ), # ) @@ -225,10 +225,7 @@ def vit_embeddings( ): parameters = parameters.vit.embeddings - l1_memory_config = ttnn.experimental.tensor.MemoryConfig( - memory_layout=ttnn.experimental.tensor.TensorMemoryLayout.INTERLEAVED, - buffer_type=ttnn.experimental.tensor.BufferType.L1, - ) + l1_memory_config = ttnn.L1_MEMORY_CONFIG patch_embeddings = vit_patch_embeddings(config, pixel_values, parameters=parameters.patch_embeddings) embedding_output = ttnn.concat([cls_token, patch_embeddings], -2, memory_config=l1_memory_config) diff --git a/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit_backup.py b/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit_backup.py index 8aa671fd83f..65e62497d42 100644 --- a/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit_backup.py +++ b/models/experimental/functional_vit/tt/ttnn_optimized_sharded_vit_backup.py @@ -18,15 +18,15 @@ def update_model_config(config, batch_size): core_grid = ttnn.CoreGrid(y=8, x=12) program_configs = { - "fold_output_program_config": ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.BLOCK_SHARDED, - ttnn.experimental.tensor.BufferType.L1, - ttnn.experimental.tensor.ShardSpec( - ttnn.experimental.tensor.CoreRangeSet( + "fold_output_program_config": ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + ttnn.BufferType.L1, + ttnn.ShardSpec( + ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(12, 7), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(12, 7), ), } ), @@ -34,7 +34,7 @@ def update_model_config(config, batch_size): 224, 192, ], - ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, + ttnn.ShardOrientation.ROW_MAJOR, False, ), ), @@ -119,8 +119,8 @@ def update_model_config(config, batch_size): subblock_w=2, block_h=7, block_w=2, - # math_fidelity=ttnn.experimental.tensor.MathFidelity.HiFi4, - # im_data_format=ttnn.experimental.tensor.DataType.BFLOAT16, + # math_fidelity=ttnn.MathFidelity.HiFi4, + # im_data_format=ttnn.bfloat16, # out_data_format=ttnn.bfloat8_b, inplace=True, ), @@ -129,8 +129,8 @@ def update_model_config(config, batch_size): subblock_w=2, block_h=7, block_w=2, - # math_fidelity=ttnn.experimental.tensor.MathFidelity.HiFi4, - # im_data_format=ttnn.experimental.tensor.DataType.BFLOAT16, + # math_fidelity=ttnn.MathFidelity.HiFi4, + # im_data_format=ttnn.bfloat16, # out_data_format=ttnn.bfloat8_b, inplace=False, ), @@ -139,8 +139,8 @@ def update_model_config(config, batch_size): subblock_w=7, block_h=7, block_w=7, - # math_fidelity=ttnn.experimental.tensor.MathFidelity.HiFi4, - # im_data_format=ttnn.experimental.tensor.DataType.BFLOAT16, + # math_fidelity=ttnn.MathFidelity.HiFi4, + # im_data_format=ttnn.bfloat16, ), } @@ -183,18 +183,18 @@ def vit_patch_embeddings( #### Exp 2 of resharding after Fold and before Matmul # pixel_values = ttnn.pad(pixel_values, ((0, 0), (0, 0), (0, 224), (0, 128)), 0) - # post_fold_config = ttnn.experimental.tensor.MemoryConfig( - # ttnn.experimental.tensor.TensorMemoryLayout.BLOCK_SHARDED, - # ttnn.experimental.tensor.BufferType.L1, - # ttnn.experimental.tensor.ShardSpec( - # ttnn.experimental.tensor.CoreRangeSet( - # {ttnn.experimental.tensor.CoreRange( - # ttnn.experimental.tensor.CoreCoord(0, 0), - # ttnn.experimental.tensor.CoreCoord(11, 7), + # post_fold_config = ttnn.MemoryConfig( + # ttnn.TensorMemoryLayout.BLOCK_SHARDED, + # ttnn.BufferType.L1, + # ttnn.ShardSpec( + # ttnn.CoreRangeSet( + # {ttnn.CoreRange( + # ttnn.CoreCoord(0, 0), + # ttnn.CoreCoord(11, 7), # ),}, # ), # [224,192], - # ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, + # ttnn.ShardOrientation.ROW_MAJOR, # False, # ), # ) @@ -235,10 +235,7 @@ def vit_embeddings( # cls_token = parameters.cls_token # position_embeddings = parameters.position_embeddings - l1_memory_config = ttnn.experimental.tensor.MemoryConfig( - memory_layout=ttnn.experimental.tensor.TensorMemoryLayout.INTERLEAVED, - buffer_type=ttnn.experimental.tensor.BufferType.L1, - ) + l1_memory_config = ttnn.L1_MEMORY_CONFIG patch_embeddings = vit_patch_embeddings(config, pixel_values, parameters=parameters.patch_embeddings) # print("clcs", cls_token.shape) diff --git a/models/experimental/grok/tt/model_config.py b/models/experimental/grok/tt/model_config.py index 06e17751d8c..bc6e5737ccb 100644 --- a/models/experimental/grok/tt/model_config.py +++ b/models/experimental/grok/tt/model_config.py @@ -125,14 +125,14 @@ def __init__(self, device=None, instruct=False, dummy_weights=False): self.model_config.update({f"{key}_TILE": ttnn.TILE_LAYOUT for key in self.OP_KEYS if "LAYOUT" in key}) # Set configurations for sharded type - self.model_config["WIDTH_SHARDED_MEMCFG"] = ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.WIDTH_SHARDED, ttnn.experimental.tensor.BufferType.L1 + self.model_config["WIDTH_SHARDED_MEMCFG"] = ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.WIDTH_SHARDED, ttnn.BufferType.L1 ) - self.model_config["HEIGHT_SHARDED_MEMCFG"] = ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.experimental.tensor.BufferType.L1 + self.model_config["HEIGHT_SHARDED_MEMCFG"] = ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.L1 ) - self.model_config["BLOCK_SHARDED_MEMCFG"] = ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.BLOCK_SHARDED, ttnn.experimental.tensor.BufferType.L1 + self.model_config["BLOCK_SHARDED_MEMCFG"] = ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.BLOCK_SHARDED, ttnn.BufferType.L1 ) # Create sharded memory configs for different ops diff --git a/models/experimental/vgg/vgg_utils.py b/models/experimental/vgg/vgg_utils.py index 46b8d990ab6..f48f92fbc42 100644 --- a/models/experimental/vgg/vgg_utils.py +++ b/models/experimental/vgg/vgg_utils.py @@ -80,7 +80,7 @@ def cache_weights_in_weka(device, model_location_generator): ttnn.bfloat16, ttnn.ROW_MAJOR_LAYOUT, ) - ttnn.experimental.tensor.dump_tensor(file_name + str(key) + ".bin", value) + ttnn.dump_tensor(file_name + str(key) + ".bin", value) def store_weights(model_version, file_name, dtype, base_addresses): @@ -114,7 +114,7 @@ def store_weights(model_version, file_name, dtype, base_addresses): ttnn.ROW_MAJOR_LAYOUT, ) - ttnn.experimental.tensor.dump_tensor(file_name + str(key) + str(dtype) + ".bin", value) + ttnn.dump_tensor(file_name + str(key) + str(dtype) + ".bin", value) def get_tt_cache_path(model_version): diff --git a/models/experimental/whisper/tests/test_whisper_attention.py b/models/experimental/whisper/tests/test_whisper_attention.py index 97e1c0141a1..540e641dd52 100644 --- a/models/experimental/whisper/tests/test_whisper_attention.py +++ b/models/experimental/whisper/tests/test_whisper_attention.py @@ -9,14 +9,11 @@ from loguru import logger from transformers import WhisperModel, WhisperForAudioClassification -import tt_lib +import ttnn from models.experimental.whisper.tt.whisper_attention import TtWhisperAttention -from models.utility_functions import ( - torch2tt_tensor, - tt2torch_tensor, - comp_pcc -) +from models.utility_functions import torch2tt_tensor, tt2torch_tensor, comp_pcc + class PytorchWhisperAttention(nn.Module): def __init__(self, hf_reference_module): @@ -43,13 +40,9 @@ def forward( return result -def run_whisper_attention( - decoder, layer, device, for_audio_classification, is_self_attn=True -): +def run_whisper_attention(decoder, layer, device, for_audio_classification, is_self_attn=True): if for_audio_classification: - model = WhisperForAudioClassification.from_pretrained( - "sanchit-gandhi/whisper-medium-fleurs-lang-id" - ) + model = WhisperForAudioClassification.from_pretrained("sanchit-gandhi/whisper-medium-fleurs-lang-id") logger.info("Using WhisperForAudioClassification model") else: model = WhisperModel.from_pretrained("openai/whisper-tiny.en") @@ -92,31 +85,23 @@ def run_whisper_attention( # Encoder inputs logger.info("Making inputs ready for encoder") hidden_state_input_tensor = torch.rand(1, BATCH, embd_dim) - ttm_tensor_hidden_state = torch2tt_tensor( - hidden_state_input_tensor, device, tt_layout=tt_lib.tensor.Layout.ROW_MAJOR - ) + ttm_tensor_hidden_state = torch2tt_tensor(hidden_state_input_tensor, device, tt_layout=ttnn.ROW_MAJOR_LAYOUT) else: # Decoder inputs hidden_state_input_tensor = torch.rand(1, 32, embd_dim) - ttm_tensor_hidden_state = torch2tt_tensor( - hidden_state_input_tensor, device, tt_layout=tt_lib.tensor.Layout.ROW_MAJOR - ) + ttm_tensor_hidden_state = torch2tt_tensor(hidden_state_input_tensor, device, tt_layout=ttnn.ROW_MAJOR_LAYOUT) if not is_self_attn: key_value_states = torch.rand(1, BATCH, embd_dim) - ttm_tensor_key_value_states = torch2tt_tensor( - key_value_states, device, tt_layout=tt_lib.tensor.Layout.ROW_MAJOR - ) + ttm_tensor_key_value_states = torch2tt_tensor(key_value_states, device, tt_layout=ttnn.ROW_MAJOR_LAYOUT) if decoder and is_self_attn: # Decoder self attention - attention_mask_input_tensor = ( - torch.rand(size=(1, 1, 32, 32)) < 0.25 - ).int().float() * -3.4028e38 + attention_mask_input_tensor = (torch.rand(size=(1, 1, 32, 32)) < 0.25).int().float() * -3.4028e38 ttm_tensor_attention_mask = torch2tt_tensor( attention_mask_input_tensor, device, - tt_layout=tt_lib.tensor.Layout.ROW_MAJOR, + tt_layout=ttnn.ROW_MAJOR_LAYOUT, ) else: # Decoder encoder attention @@ -186,9 +171,7 @@ def run_whisper_attention( logger.debug(attn_weights_reshaped.size()) logger.debug(tt_attn_weights_to_torch.size()) - does_pass, pcc_message = comp_pcc( - attn_weights_reshaped, tt_attn_weights_to_torch, 0.98 - ) + does_pass, pcc_message = comp_pcc(attn_weights_reshaped, tt_attn_weights_to_torch, 0.98) logger.info(pcc_message) assert does_pass @@ -201,9 +184,7 @@ def run_whisper_attention( if DECODER: tt_past_key_value_to_torch = tt2torch_tensor(tt_past_key_value[0]) - does_pass, pcc_message = comp_pcc( - past_key_value[0], tt_past_key_value_to_torch, 0.98 - ) + does_pass, pcc_message = comp_pcc(past_key_value[0], tt_past_key_value_to_torch, 0.98) logger.info(pcc_message) assert does_pass @@ -215,9 +196,7 @@ def run_whisper_attention( tt_past_key_value_to_torch = tt2torch_tensor(tt_past_key_value[1]) - does_pass, pcc_message = comp_pcc( - past_key_value[1], tt_past_key_value_to_torch, 0.98 - ) + does_pass, pcc_message = comp_pcc(past_key_value[1], tt_past_key_value_to_torch, 0.98) logger.info(pcc_message) if does_pass: @@ -231,9 +210,7 @@ def run_whisper_attention( def test_WhisperEncoderAttention_inference(device): torch.manual_seed(1234) - run_whisper_attention( - decoder=False, layer=0, device=device, for_audio_classification=False - ) + run_whisper_attention(decoder=False, layer=0, device=device, for_audio_classification=False) def test_WhisperDecoderEncoderAttention_inference(device): @@ -263,6 +240,4 @@ def test_WhisperDecoderSelfAttention_inference(device): def test_WhisperEncoderForAudioClassificationAttention_inference(device): torch.manual_seed(1234) - run_whisper_attention( - decoder=False, layer=0, device=device, for_audio_classification=True - ) + run_whisper_attention(decoder=False, layer=0, device=device, for_audio_classification=True) diff --git a/models/experimental/whisper/tests/test_whisper_decoder.py b/models/experimental/whisper/tests/test_whisper_decoder.py index 713bbfb71fc..80bae10817d 100644 --- a/models/experimental/whisper/tests/test_whisper_decoder.py +++ b/models/experimental/whisper/tests/test_whisper_decoder.py @@ -7,7 +7,7 @@ from loguru import logger from transformers import WhisperModel, WhisperConfig -import tt_lib +import ttnn from models.experimental.whisper.tt.whisper_decoder import TtWhisperDecoder from models.utility_functions import ( @@ -67,7 +67,7 @@ def run_whisper_decoder(device): ) tt_whisper_decoder.eval() - ttm_encoder_hidden_states = torch2tt_tensor(encoder_hidden_states, device, tt_lib.tensor.Layout.ROW_MAJOR) + ttm_encoder_hidden_states = torch2tt_tensor(encoder_hidden_states, device, ttnn.ROW_MAJOR_LAYOUT) with torch.no_grad(): ttm_output = tt_whisper_decoder( input_ids=decoder_input_ids, diff --git a/models/experimental/whisper/tests/test_whisper_decoder_layer.py b/models/experimental/whisper/tests/test_whisper_decoder_layer.py index 95ea4e29477..0e0d7020e37 100644 --- a/models/experimental/whisper/tests/test_whisper_decoder_layer.py +++ b/models/experimental/whisper/tests/test_whisper_decoder_layer.py @@ -7,7 +7,7 @@ from loguru import logger from transformers import WhisperModel, WhisperConfig -import tt_lib +import ttnn from models.experimental.whisper.tt.whisper_decoder_layer import ( TtWhisperDecoderLayer, @@ -18,6 +18,7 @@ comp_pcc, ) + def run_whisper_decoder_layer(layer, device): model = WhisperModel.from_pretrained("openai/whisper-tiny.en") state_dict = model.state_dict() @@ -41,9 +42,7 @@ def run_whisper_decoder_layer(layer, device): seq_len = 32 # Similary to what Decoder's method self._prepare_decoder_attention_mask returns - attention_mask_input_tensor = ( - torch.rand(size=(1, 1, tgt_len, seq_len)) < 0.25 - ).int().float() * -3.4028e38 + attention_mask_input_tensor = (torch.rand(size=(1, 1, tgt_len, seq_len)) < 0.25).int().float() * -3.4028e38 hidden_state_input_tensor = torch.rand(batch, seq_len, embed_dim) encoder_hidden_states = torch.rand(batch, enc_seq_len, embed_dim) @@ -82,23 +81,15 @@ def run_whisper_decoder_layer(layer, device): ) """ TTM Whisper Decoder Layer """ - ttm_encoder_hidden_states = torch2tt_tensor( - encoder_hidden_states, device, tt_lib.tensor.Layout.ROW_MAJOR - ) + ttm_encoder_hidden_states = torch2tt_tensor(encoder_hidden_states, device, ttnn.ROW_MAJOR_LAYOUT) if encoder_attention_mask: - ttm_encoder_attention_mask = torch2tt_tensor( - encoder_attention_mask, device, tt_lib.tensor.Layout.ROW_MAJOR - ) + ttm_encoder_attention_mask = torch2tt_tensor(encoder_attention_mask, device, ttnn.ROW_MAJOR_LAYOUT) else: ttm_encoder_attention_mask = None - ttm_tensor_hidden_state = torch2tt_tensor( - hidden_state_input_tensor, device, tt_lib.tensor.Layout.ROW_MAJOR - ) - ttm_tensor_attention_mask = torch2tt_tensor( - attention_mask_input_tensor, device, tt_lib.tensor.Layout.ROW_MAJOR - ) + ttm_tensor_hidden_state = torch2tt_tensor(hidden_state_input_tensor, device, ttnn.ROW_MAJOR_LAYOUT) + ttm_tensor_attention_mask = torch2tt_tensor(attention_mask_input_tensor, device, ttnn.ROW_MAJOR_LAYOUT) # TODO: Support this parameter as tt tensor with padding # layer_head_mask_input_tensor has size [6] and is equal to number of encoder_attention_heads @@ -107,13 +98,9 @@ def run_whisper_decoder_layer(layer, device): # same for cross_attn_layer_head_mask layer_head_mask_input_tensor = layer_head_mask_input_tensor.view(1, 1, 1, num_heads) - layer_head_mask_input_tensor = torch2tt_tensor( - layer_head_mask_input_tensor, device, tt_lib.tensor.Layout.ROW_MAJOR - ) + layer_head_mask_input_tensor = torch2tt_tensor(layer_head_mask_input_tensor, device, ttnn.ROW_MAJOR_LAYOUT) cross_attn_layer_head_mask = cross_attn_layer_head_mask.view(1, 1, 1, num_heads) - cross_attn_layer_head_mask = torch2tt_tensor( - cross_attn_layer_head_mask, device, tt_lib.tensor.Layout.ROW_MAJOR - ) + cross_attn_layer_head_mask = torch2tt_tensor(cross_attn_layer_head_mask, device, ttnn.ROW_MAJOR_LAYOUT) tt_whisper_decoder_layer = TtWhisperDecoderLayer( base_address=base_address, diff --git a/models/experimental/whisper/tests/test_whisper_encoder.py b/models/experimental/whisper/tests/test_whisper_encoder.py index 02012ab78b4..9df873cea6f 100644 --- a/models/experimental/whisper/tests/test_whisper_encoder.py +++ b/models/experimental/whisper/tests/test_whisper_encoder.py @@ -12,7 +12,7 @@ AutoFeatureExtractor, ) -import tt_lib +import ttnn import pytest from models.utility_functions import skip_for_wormhole_b0 @@ -84,7 +84,7 @@ def run_whisper_encoder(device, for_audio_classification=False, encoder_layers=1 ) tt_whisper_encoder.eval() - input_features = torch2tt_tensor(input_features, device, tt_lib.tensor.Layout.ROW_MAJOR) + input_features = torch2tt_tensor(input_features, device, ttnn.ROW_MAJOR_LAYOUT) ttm_output = tt_whisper_encoder( input_features=input_features, head_mask=head_mask, diff --git a/models/experimental/whisper/tests/test_whisper_encoder_layer.py b/models/experimental/whisper/tests/test_whisper_encoder_layer.py index fd8c04530a2..23a3e52e844 100644 --- a/models/experimental/whisper/tests/test_whisper_encoder_layer.py +++ b/models/experimental/whisper/tests/test_whisper_encoder_layer.py @@ -9,7 +9,7 @@ from transformers import WhisperModel, WhisperForAudioClassification -import tt_lib +import ttnn import pytest from models.experimental.whisper.tt.whisper_encoder_layer import ( @@ -46,7 +46,7 @@ def run_whisper_encoder_layer(layer, device, for_audio_classification=False): hidden_state_input_tensor = torch.rand(1, 1500, embed_dim) attention_mask_input_tensor = None - ttm_tensor_hidden_state = torch2tt_tensor(hidden_state_input_tensor, device, tt_lib.tensor.Layout.ROW_MAJOR) + ttm_tensor_hidden_state = torch2tt_tensor(hidden_state_input_tensor, device, ttnn.ROW_MAJOR_LAYOUT) ttm_tensor_attention_mask = None layer_head_mask_input_tensor = None diff --git a/models/experimental/whisper/tests/test_whisper_for_audio_classification.py b/models/experimental/whisper/tests/test_whisper_for_audio_classification.py index 37433fe8e7a..bd6079fd914 100644 --- a/models/experimental/whisper/tests/test_whisper_for_audio_classification.py +++ b/models/experimental/whisper/tests/test_whisper_for_audio_classification.py @@ -9,7 +9,7 @@ from datasets import load_dataset from transformers import WhisperForAudioClassification, AutoFeatureExtractor -import tt_lib +import ttnn from models.experimental.whisper.tt.whisper_for_audio_classification import ( TtWhisperForAudioClassification, @@ -59,7 +59,7 @@ def run_whisper_for_audio_classification(device): tt_whisper_model.eval() with torch.no_grad(): - input_features = torch2tt_tensor(input_features, device, tt_lib.tensor.Layout.ROW_MAJOR) + input_features = torch2tt_tensor(input_features, device, ttnn.ROW_MAJOR_LAYOUT) ttm_logits = tt_whisper_model( input_features=input_features, ).logits diff --git a/models/experimental/whisper/tests/test_whisper_for_conditional_generation.py b/models/experimental/whisper/tests/test_whisper_for_conditional_generation.py index 6f851cd4dc1..ac1ed9c5048 100644 --- a/models/experimental/whisper/tests/test_whisper_for_conditional_generation.py +++ b/models/experimental/whisper/tests/test_whisper_for_conditional_generation.py @@ -37,7 +37,7 @@ TypicalLogitsWarper, ) -import tt_lib +import ttnn from models.experimental.whisper.tt.whisper_for_conditional_generation import ( TtWhisperForConditionalGeneration, @@ -333,7 +333,7 @@ def run_generate(sample, device): tt_model_kwargs["output_hidden_states"] = generation_config.output_hidden_states tt_model_kwargs["use_cache"] = generation_config.use_cache - tt_input_features = torch2tt_tensor(input_features, device, tt_lib.tensor.Layout.ROW_MAJOR) + tt_input_features = torch2tt_tensor(input_features, device, ttnn.ROW_MAJOR_LAYOUT) # Prepare model args for tt model tt_model_kwargs = _prepare_encoder_decoder_kwargs_for_generation( tt_model, tt_input_features, tt_model_kwargs, "input_features" diff --git a/models/experimental/whisper/tests/test_whisper_model.py b/models/experimental/whisper/tests/test_whisper_model.py index 3acedad7527..0ec76f12b8e 100644 --- a/models/experimental/whisper/tests/test_whisper_model.py +++ b/models/experimental/whisper/tests/test_whisper_model.py @@ -9,7 +9,7 @@ from loguru import logger from transformers import WhisperModel, AutoFeatureExtractor -import tt_lib +import ttnn from models.experimental.whisper.tt.whisper_model import TtWhisperModel from models.utility_functions import ( @@ -74,7 +74,7 @@ def run_whisper_model(device): tt_whisper.eval() with torch.no_grad(): - input_features = torch2tt_tensor(input_features, device, tt_lib.tensor.Layout.ROW_MAJOR) + input_features = torch2tt_tensor(input_features, device, ttnn.ROW_MAJOR_LAYOUT) logger.info("Running tt whisper model") ttm_output = tt_whisper(input_features=input_features, decoder_input_ids=decoder_input_ids) diff --git a/models/experimental/whisper/tt/whisper_attention.py b/models/experimental/whisper/tt/whisper_attention.py index 9a38b989173..12af7185961 100644 --- a/models/experimental/whisper/tt/whisper_attention.py +++ b/models/experimental/whisper/tt/whisper_attention.py @@ -5,8 +5,6 @@ import torch import torch.nn as nn import ttnn -import tt_lib -import ttnn from typing import Optional, Tuple, Union from models.utility_functions import torch2tt_tensor, tt2torch_tensor @@ -58,47 +56,47 @@ def __init__( self.v_proj_bias = torch2tt_tensor( state_dict[f"{base_address}.v_proj.bias"], self.device, - tt_layout=tt_lib.tensor.Layout.ROW_MAJOR, + tt_layout=ttnn.ROW_MAJOR_LAYOUT, ) self.q_proj_weight = torch2tt_tensor( state_dict[f"{base_address}.q_proj.weight"], self.device, - tt_layout=tt_lib.tensor.Layout.ROW_MAJOR, + tt_layout=ttnn.ROW_MAJOR_LAYOUT, ) self.q_proj_bias = torch2tt_tensor( state_dict[f"{base_address}.q_proj.bias"], self.device, - tt_layout=tt_lib.tensor.Layout.ROW_MAJOR, + tt_layout=ttnn.ROW_MAJOR_LAYOUT, ) self.out_proj_weight = torch2tt_tensor( state_dict[f"{base_address}.out_proj.weight"], self.device, - tt_layout=tt_lib.tensor.Layout.ROW_MAJOR, + tt_layout=ttnn.ROW_MAJOR_LAYOUT, ) self.out_proj_bias = torch2tt_tensor( state_dict[f"{base_address}.out_proj.bias"], self.device, - tt_layout=tt_lib.tensor.Layout.ROW_MAJOR, + tt_layout=ttnn.ROW_MAJOR_LAYOUT, ) self.cached_q_proj_shape = None self.q_proj_mul_const = None # Copied from transformers.models.bart.modeling_bart.BartAttention._shape with BART->whisper - def _shape(self, tt_tensor: tt_lib.tensor.Tensor, seq_len: int, bsz: int): + def _shape(self, tt_tensor: ttnn.Tensor, seq_len: int, bsz: int): tt_tensor = fallback_ops.reshape(tt_tensor, bsz, seq_len, self.num_heads, self.head_dim) tt_tensor = ttnn.transpose(tt_tensor, 1, -2) return tt_tensor def forward( self, - hidden_states: tt_lib.tensor.Tensor, - key_value_states: Optional[tt_lib.tensor.Tensor] = None, - past_key_value: Optional[Tuple[tt_lib.tensor.Tensor]] = None, - attention_mask: Optional[tt_lib.tensor.Tensor] = None, + hidden_states: ttnn.Tensor, + key_value_states: Optional[ttnn.Tensor] = None, + past_key_value: Optional[Tuple[ttnn.Tensor]] = None, + attention_mask: Optional[ttnn.Tensor] = None, layer_head_mask: Optional[torch.Tensor] = None, output_attentions: bool = False, - ) -> Tuple[tt_lib.tensor.Tensor, Optional[tt_lib.tensor.Tensor], Optional[Tuple[tt_lib.tensor.Tensor]],]: + ) -> Tuple[ttnn.Tensor, Optional[ttnn.Tensor], Optional[Tuple[ttnn.Tensor]],]: # if key_value_states are provided this layer is used as a cross-attention layer for the decoder is_cross_attention = key_value_states is not None diff --git a/models/experimental/whisper/tt/whisper_common.py b/models/experimental/whisper/tt/whisper_common.py index 178e4c0cbb8..95dbd979093 100644 --- a/models/experimental/whisper/tt/whisper_common.py +++ b/models/experimental/whisper/tt/whisper_common.py @@ -3,14 +3,11 @@ # SPDX-License-Identifier: Apache-2.0 import torch -import tt_lib import ttnn def linear(x, weight, bias=None): - out_mem_config_l1 = tt_lib.tensor.MemoryConfig( - tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.L1 - ) + out_mem_config_l1 = ttnn.L1_MEMORY_CONFIG weight = ttnn.transpose(weight, -2, -1) x = ttnn.matmul(x, weight) diff --git a/models/experimental/whisper/tt/whisper_decoder.py b/models/experimental/whisper/tt/whisper_decoder.py index f1b1def0564..02f9c1363b6 100644 --- a/models/experimental/whisper/tt/whisper_decoder.py +++ b/models/experimental/whisper/tt/whisper_decoder.py @@ -4,7 +4,6 @@ import math from functools import partial -import tt_lib import torch import torch.nn as nn @@ -41,11 +40,11 @@ class TtWhisperDecoderOutput: that may also contain a past key/values (to speed up sequential decoding). """ - last_hidden_state: tt_lib.tensor.Tensor = None - past_key_values: Optional[Tuple[Tuple[tt_lib.tensor.Tensor]]] = None - hidden_states: Optional[Tuple[tt_lib.tensor.Tensor]] = None - attentions: Optional[Tuple[tt_lib.tensor.Tensor]] = None - cross_attentions: Optional[Tuple[tt_lib.tensor.Tensor]] = None + last_hidden_state: ttnn.Tensor = None + past_key_values: Optional[Tuple[Tuple[ttnn.Tensor]]] = None + hidden_states: Optional[Tuple[ttnn.Tensor]] = None + attentions: Optional[Tuple[ttnn.Tensor]] = None + cross_attentions: Optional[Tuple[ttnn.Tensor]] = None class TtWhisperDecoder(nn.Module): @@ -55,7 +54,7 @@ class TtWhisperDecoder(nn.Module): Args: reference_model: WhisperModel - device: device: tt_lib.device.Device + device: device: ttnn.Device config: WhisperConfig """ @@ -106,12 +105,12 @@ def __init__( gamma = torch2tt_tensor( self.state_dict[f"{base_address}.layer_norm.weight"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) beta = torch2tt_tensor( self.state_dict[f"{base_address}.layer_norm.bias"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.layer_norm = partial(ttnn.layer_norm, weight=gamma, bias=beta, epsilon=1e-05) @@ -227,16 +226,16 @@ def forward( self, input_ids: torch.Tensor = None, attention_mask: torch.Tensor = None, - encoder_hidden_states: tt_lib.tensor.Tensor = None, + encoder_hidden_states: ttnn.Tensor = None, head_mask: torch.Tensor = None, cross_attn_head_mask: torch.Tensor = None, - past_key_values: Optional[Tuple[tt_lib.tensor.Tensor]] = None, + past_key_values: Optional[Tuple[ttnn.Tensor]] = None, inputs_embeds: torch.Tensor = None, use_cache: bool = None, output_attentions: bool = None, output_hidden_states: bool = None, return_dict: bool = None, - ) -> Union[Tuple[tt_lib.tensor.Tensor], TtWhisperDecoderOutput]: + ) -> Union[Tuple[ttnn.Tensor], TtWhisperDecoderOutput]: """ Args: input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): @@ -335,8 +334,8 @@ def forward( """TT implementation""" - hidden_states = torch2tt_tensor(hidden_states, self.device, tt_lib.tensor.Layout.ROW_MAJOR) - attention_mask = torch2tt_tensor(attention_mask, self.device, tt_lib.tensor.Layout.ROW_MAJOR) + hidden_states = torch2tt_tensor(hidden_states, self.device, ttnn.ROW_MAJOR_LAYOUT) + attention_mask = torch2tt_tensor(attention_mask, self.device, ttnn.ROW_MAJOR_LAYOUT) # TODO: Dropout not supported for not # hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training) diff --git a/models/experimental/whisper/tt/whisper_decoder_layer.py b/models/experimental/whisper/tt/whisper_decoder_layer.py index 37f785b50de..e4d94dfc159 100644 --- a/models/experimental/whisper/tt/whisper_decoder_layer.py +++ b/models/experimental/whisper/tt/whisper_decoder_layer.py @@ -7,7 +7,6 @@ import torch.nn as nn import ttnn -import tt_lib from typing import Optional, Tuple, Union from transformers import WhisperConfig @@ -38,9 +37,7 @@ def __init__( self.embed_dim = embed_dim self.decoder_ffn_dim = decoder_ffn_dim - self.out_mem_config_l1 = tt_lib.tensor.MemoryConfig( - tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.L1 - ) + self.out_mem_config_l1 = ttnn.L1_MEMORY_CONFIG # Do not use dropout for now # self.dropout = config.dropout @@ -58,12 +55,12 @@ def __init__( gamma = torch2tt_tensor( self.state_dict[f"{base_address}.self_attn_layer_norm.weight"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) beta = torch2tt_tensor( self.state_dict[f"{base_address}.self_attn_layer_norm.bias"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.self_attn_layer_norm = partial( @@ -86,12 +83,12 @@ def __init__( gamma1 = torch2tt_tensor( self.state_dict[f"{base_address}.encoder_attn_layer_norm.weight"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) beta1 = torch2tt_tensor( self.state_dict[f"{base_address}.encoder_attn_layer_norm.bias"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.encoder_attn_layer_norm = partial(ttnn.layer_norm, weight=gamma1, bias=beta1, epsilon=1e-05) @@ -99,48 +96,48 @@ def __init__( self.fc1_weight = torch2tt_tensor( self.state_dict[f"{base_address}.fc1.weight"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.fc1_bias = torch2tt_tensor( state_dict[f"{base_address}.fc1.bias"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.fc2_weight = torch2tt_tensor( self.state_dict[f"{base_address}.fc2.weight"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.fc2_bias = torch2tt_tensor( state_dict[f"{base_address}.fc2.bias"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) gamma2 = torch2tt_tensor( self.state_dict[f"{base_address}.final_layer_norm.weight"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) beta2 = torch2tt_tensor( self.state_dict[f"{base_address}.final_layer_norm.bias"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.final_layer_norm = partial(ttnn.layer_norm, weight=gamma2, bias=beta2, epsilon=1e-05) def forward( self, - hidden_states: tt_lib.tensor.Tensor, - attention_mask: Optional[tt_lib.tensor.Tensor] = None, - encoder_hidden_states: Optional[tt_lib.tensor.Tensor] = None, - encoder_attention_mask: Optional[tt_lib.tensor.Tensor] = None, + hidden_states: ttnn.Tensor, + attention_mask: Optional[ttnn.Tensor] = None, + encoder_hidden_states: Optional[ttnn.Tensor] = None, + encoder_attention_mask: Optional[ttnn.Tensor] = None, layer_head_mask: Optional[torch.Tensor] = None, cross_attn_layer_head_mask: Optional[torch.Tensor] = None, - past_key_value: Optional[Tuple[tt_lib.tensor.Tensor]] = None, + past_key_value: Optional[Tuple[ttnn.Tensor]] = None, output_attentions: Optional[bool] = False, use_cache: Optional[bool] = True, - ) -> Tuple[tt_lib.tensor.Tensor]: + ) -> Tuple[ttnn.Tensor]: """ Args: hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` @@ -221,7 +218,7 @@ def forward( if self.use_torch_gelu: torch_hidden_states = tt2torch_tensor(hidden_states) torch_hidden_states = torch.nn.functional.gelu(torch_hidden_states) - hidden_states = torch2tt_tensor(torch_hidden_states, self.device, tt_lib.tensor.Layout.ROW_MAJOR) + hidden_states = torch2tt_tensor(torch_hidden_states, self.device, ttnn.ROW_MAJOR_LAYOUT) else: hidden_states = ttnn.gelu(hidden_states) diff --git a/models/experimental/whisper/tt/whisper_encoder.py b/models/experimental/whisper/tt/whisper_encoder.py index 939a4905a7b..cc1dcb45a3a 100644 --- a/models/experimental/whisper/tt/whisper_encoder.py +++ b/models/experimental/whisper/tt/whisper_encoder.py @@ -4,7 +4,6 @@ from functools import partial import math -import tt_lib import torch import torch.nn as nn import random @@ -29,9 +28,9 @@ @dataclass class TtWhisperEncoderOutput: - last_hidden_state: tt_lib.tensor.Tensor = None - hidden_states: Optional[Tuple[tt_lib.tensor.Tensor]] = None - attentions: Optional[Tuple[tt_lib.tensor.Tensor]] = None + last_hidden_state: ttnn.Tensor = None + hidden_states: Optional[Tuple[ttnn.Tensor]] = None + attentions: Optional[Tuple[ttnn.Tensor]] = None class TtWhisperEncoder(nn.Module): @@ -41,7 +40,7 @@ class TtWhisperEncoder(nn.Module): Args: reference_model: WhisperModel - device: device: tt_lib.device.Device + device: device: ttnn.Device config: WhisperConfig """ @@ -90,12 +89,12 @@ def __init__(self, state_dict, base_address, device, config: WhisperConfig): gamma = torch2tt_tensor( self.state_dict[f"{base_address}.layer_norm.weight"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) beta = torch2tt_tensor( self.state_dict[f"{base_address}.layer_norm.bias"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.layer_norm = partial(ttnn.layer_norm, weight=gamma, bias=beta, epsilon=1e-05) @@ -114,13 +113,13 @@ def set_input_embeddings(self, value: nn.Module): def forward( self, - input_features: tt_lib.tensor.Tensor, # bc of shape - attention_mask: Optional[tt_lib.tensor.Tensor] = None, # NOT used in whisper + input_features: ttnn.Tensor, # bc of shape + attention_mask: Optional[ttnn.Tensor] = None, # NOT used in whisper head_mask: Optional[torch.Tensor] = None, # bc of shape [] output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - ) -> Union[Tuple[tt_lib.tensor.Tensor], TtWhisperEncoderOutput]: + ) -> Union[Tuple[ttnn.Tensor], TtWhisperEncoderOutput]: """ Args: input_features (`torch.LongTensor` of shape `(batch_size, feature_size, sequence_length)`): @@ -165,7 +164,7 @@ def forward( """PyTorch implementation end""" """TT implementation""" - hidden_states = torch2tt_tensor(hidden_states, self.device, tt_lib.tensor.Layout.ROW_MAJOR) + hidden_states = torch2tt_tensor(hidden_states, self.device, ttnn.ROW_MAJOR_LAYOUT) # TODO: Not suppporting dropout at moment # hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training) @@ -213,7 +212,7 @@ def custom_forward(*inputs): torch2tt_tensor( head_mask[idx], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) if head_mask is not None else None diff --git a/models/experimental/whisper/tt/whisper_encoder_layer.py b/models/experimental/whisper/tt/whisper_encoder_layer.py index e47bc3b4415..ffbf36905fd 100644 --- a/models/experimental/whisper/tt/whisper_encoder_layer.py +++ b/models/experimental/whisper/tt/whisper_encoder_layer.py @@ -3,14 +3,11 @@ # SPDX-License-Identifier: Apache-2.0 from functools import partial -import tt_lib import torch import torch.nn as nn import ttnn from typing import Tuple -import ttnn - from transformers import WhisperConfig from models.utility_functions import torch2tt_tensor, tt2torch_tensor @@ -44,9 +41,7 @@ def __init__( self.embed_dim = embed_dim self.encoder_ffn_dim = encoder_ffn_dim self.use_torch_gelu = use_torch_gelu - self.out_mem_config_l1 = tt_lib.tensor.MemoryConfig( - tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.L1 - ) + self.out_mem_config_l1 = ttnn.L1_MEMORY_CONFIG self.self_attn = TtWhisperAttention( config=config, @@ -59,12 +54,12 @@ def __init__( gamma = torch2tt_tensor( self.state_dict[f"{base_address}.self_attn_layer_norm.weight"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) beta = torch2tt_tensor( self.state_dict[f"{base_address}.self_attn_layer_norm.bias"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.self_attn_layer_norm = partial(ttnn.layer_norm, weight=gamma, bias=beta, epsilon=1e-05) @@ -80,34 +75,34 @@ def __init__( self.fc1_weight = torch2tt_tensor( self.state_dict[f"{base_address}.fc1.weight"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.fc1_bias = torch2tt_tensor( state_dict[f"{base_address}.fc1.bias"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.fc2_weight = torch2tt_tensor( self.state_dict[f"{base_address}.fc2.weight"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.fc2_bias = torch2tt_tensor( state_dict[f"{base_address}.fc2.bias"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) gamma_1 = torch2tt_tensor( self.state_dict[f"{base_address}.final_layer_norm.weight"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) beta_1 = torch2tt_tensor( self.state_dict[f"{base_address}.final_layer_norm.bias"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) self.final_layer_norm = partial(ttnn.layer_norm, gamma=gamma_1, beta=beta_1, eps=1e-05) @@ -119,11 +114,11 @@ def __init__( def forward( self, - hidden_states: tt_lib.tensor.Tensor, - attention_mask: tt_lib.tensor.Tensor, - layer_head_mask: tt_lib.tensor.Tensor, + hidden_states: ttnn.Tensor, + attention_mask: ttnn.Tensor, + layer_head_mask: ttnn.Tensor, output_attentions: bool = False, - ) -> Tuple[tt_lib.tensor.Tensor]: + ) -> Tuple[ttnn.Tensor]: """ Args: hidden_states (`torch.FloatTensor`): input to the layer of shape `(seq_len, batch, embed_dim)` @@ -158,7 +153,7 @@ def forward( if self.use_torch_gelu: torch_hidden_states = tt2torch_tensor(hidden_states) torch_hidden_states = torch.nn.functional.gelu(torch_hidden_states) - hidden_states = torch2tt_tensor(torch_hidden_states, self.device, tt_lib.tensor.Layout.ROW_MAJOR) + hidden_states = torch2tt_tensor(torch_hidden_states, self.device, ttnn.ROW_MAJOR_LAYOUT) else: hidden_states = ttnn.gelu(hidden_states) @@ -178,7 +173,7 @@ def forward( hidden_states_torch = torch.clamp(hidden_states_torch, min=-clamp_value, max=clamp_value) - hidden_states = torch2tt_tensor(hidden_states_torch, self.device, tt_lib.tensor.Layout.ROW_MAJOR) + hidden_states = torch2tt_tensor(hidden_states_torch, self.device, ttnn.ROW_MAJOR_LAYOUT) outputs = (hidden_states,) diff --git a/models/experimental/whisper/tt/whisper_for_audio_classification.py b/models/experimental/whisper/tt/whisper_for_audio_classification.py index 65d7dcebaf7..20d26447380 100644 --- a/models/experimental/whisper/tt/whisper_for_audio_classification.py +++ b/models/experimental/whisper/tt/whisper_for_audio_classification.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 -import tt_lib import ttnn import torch import torch.nn as nn @@ -18,10 +17,10 @@ @dataclass class TtWhisperForAudioClassificationOutput: - loss: Optional[tt_lib.tensor.Tensor] = None - logits: tt_lib.tensor.Tensor = None - hidden_states: Optional[Tuple[tt_lib.tensor.Tensor]] = None - attentions: Optional[Tuple[tt_lib.tensor.Tensor]] = None + loss: Optional[ttnn.Tensor] = None + logits: ttnn.Tensor = None + hidden_states: Optional[Tuple[ttnn.Tensor]] = None + attentions: Optional[Tuple[ttnn.Tensor]] = None class TtWhisperForAudioClassification(nn.Module): @@ -46,21 +45,15 @@ def __init__(self, state_dict, device, config): weight_init_const = 1.0 / num_layers self.layer_weights = ttnn.full((1, 1, 1, num_layers), weight_init_const) - self.projector_weight = torch2tt_tensor( - state_dict[f"projector.weight"], self.device, tt_lib.tensor.Layout.ROW_MAJOR - ) - self.projector_bias = torch2tt_tensor( - state_dict[f"projector.bias"], self.device, tt_lib.tensor.Layout.ROW_MAJOR - ) + self.projector_weight = torch2tt_tensor(state_dict[f"projector.weight"], self.device, ttnn.ROW_MAJOR_LAYOUT) + self.projector_bias = torch2tt_tensor(state_dict[f"projector.bias"], self.device, ttnn.ROW_MAJOR_LAYOUT) self.classifier_weight = torch2tt_tensor( state_dict[f"classifier.weight"], self.device, - tt_lib.tensor.Layout.ROW_MAJOR, - ) - self.classifier_bias = torch2tt_tensor( - state_dict[f"classifier.bias"], self.device, tt_lib.tensor.Layout.ROW_MAJOR + ttnn.ROW_MAJOR_LAYOUT, ) + self.classifier_bias = torch2tt_tensor(state_dict[f"classifier.bias"], self.device, ttnn.ROW_MAJOR_LAYOUT) def freeze_encoder(self): """ @@ -77,14 +70,14 @@ def set_input_embeddings(self, value: nn.Module): def forward( self, - input_features: Optional[tt_lib.tensor.Tensor] = None, + input_features: Optional[ttnn.Tensor] = None, head_mask: Optional[torch.Tensor] = None, - encoder_outputs: Optional[Tuple[Tuple[tt_lib.tensor.Tensor]]] = None, + encoder_outputs: Optional[Tuple[Tuple[ttnn.Tensor]]] = None, labels: Optional[torch.LongTensor] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - ) -> Union[Tuple[tt_lib.tensor.Tensor], TtWhisperForAudioClassificationOutput]: + ) -> Union[Tuple[ttnn.Tensor], TtWhisperForAudioClassificationOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., @@ -156,7 +149,7 @@ def forward( torch_hidden_states = tt2torch_tensor(hidden_states) torch_pooled_output = torch_hidden_states.mean(dim=-2) # If something changes these dimension -2 should always work - pooled_output = torch2tt_tensor(torch_pooled_output, self.device, tt_lib.tensor.Layout.ROW_MAJOR) + pooled_output = torch2tt_tensor(torch_pooled_output, self.device, ttnn.ROW_MAJOR_LAYOUT) # Apply classifier layer logits = linear(pooled_output, self.classifier_weight, self.classifier_bias) diff --git a/models/experimental/whisper/tt/whisper_for_conditional_generation.py b/models/experimental/whisper/tt/whisper_for_conditional_generation.py index 0b3f24e33cd..fc6a2a6afc0 100644 --- a/models/experimental/whisper/tt/whisper_for_conditional_generation.py +++ b/models/experimental/whisper/tt/whisper_for_conditional_generation.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 -import tt_lib +import ttnn import torch import torch.nn as nn from dataclasses import dataclass @@ -25,9 +25,7 @@ from models.experimental.whisper.tt.whisper_model import TtWhisperModel -def shift_tokens_right( - input_ids: torch.Tensor, pad_token_id: int, decoder_start_token_id: int -): +def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start_token_id: int): """ Shift input ids one token to the right. """ @@ -46,14 +44,14 @@ def shift_tokens_right( @dataclass class TtWhisperLMOutput: loss: Optional[torch.FloatTensor] = None - logits: tt_lib.tensor.Tensor = None - past_key_values: Optional[Tuple[Tuple[tt_lib.tensor.Tensor]]] = None - decoder_hidden_states: Optional[Tuple[tt_lib.tensor.Tensor]] = None - decoder_attentions: Optional[Tuple[tt_lib.tensor.Tensor]] = None - cross_attentions: Optional[Tuple[tt_lib.tensor.Tensor]] = None - encoder_last_hidden_state: Optional[tt_lib.tensor.Tensor] = None - encoder_hidden_states: Optional[Tuple[tt_lib.tensor.Tensor]] = None - encoder_attentions: Optional[Tuple[tt_lib.tensor.Tensor]] = None + logits: ttnn.Tensor = None + past_key_values: Optional[Tuple[Tuple[ttnn.Tensor]]] = None + decoder_hidden_states: Optional[Tuple[ttnn.Tensor]] = None + decoder_attentions: Optional[Tuple[ttnn.Tensor]] = None + cross_attentions: Optional[Tuple[ttnn.Tensor]] = None + encoder_last_hidden_state: Optional[ttnn.Tensor] = None + encoder_hidden_states: Optional[Tuple[ttnn.Tensor]] = None + encoder_attentions: Optional[Tuple[ttnn.Tensor]] = None class TtWhisperForConditionalGeneration(nn.Module): @@ -71,9 +69,7 @@ def __init__(self, state_dict, device, config: WhisperConfig): config=self.config, ) - self.proj_out_weight = torch2tt_tensor( - state_dict[f"proj_out.weight"], self.device, tt_lib.tensor.Layout.ROW_MAJOR - ) + self.proj_out_weight = torch2tt_tensor(state_dict[f"proj_out.weight"], self.device, ttnn.ROW_MAJOR_LAYOUT) def get_encoder(self): return self.model.get_encoder() @@ -106,22 +102,22 @@ def freeze_encoder(self): def forward( self, - input_features: Optional[tt_lib.tensor.Tensor] = None, + input_features: Optional[ttnn.Tensor] = None, attention_mask: Optional[torch.LongTensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, head_mask: Optional[torch.Tensor] = None, decoder_head_mask: Optional[torch.Tensor] = None, cross_attn_head_mask: Optional[torch.Tensor] = None, - encoder_outputs: Optional[Tuple[Tuple[tt_lib.tensor.Tensor]]] = None, - past_key_values: Optional[Tuple[Tuple[tt_lib.tensor.Tensor]]] = None, + encoder_outputs: Optional[Tuple[Tuple[ttnn.Tensor]]] = None, + past_key_values: Optional[Tuple[Tuple[ttnn.Tensor]]] = None, decoder_inputs_embeds: Optional[Tuple[torch.FloatTensor]] = None, labels: Optional[torch.LongTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - ) -> Union[Tuple[tt_lib.tensor.Tensor], TtWhisperLMOutput]: + ) -> Union[Tuple[ttnn.Tensor], TtWhisperLMOutput]: """ labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): Labels for computing the language modeling loss. Indices should either be in `[0, ..., config.vocab_size]` @@ -151,9 +147,7 @@ def forward( >>> transcription ' Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.' ```""" - return_dict = ( - return_dict if return_dict is not None else self.config.use_return_dict - ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict """TODO: Used in training mode""" if labels is not None: @@ -185,9 +179,7 @@ def forward( if labels is not None: # TODO: Not supporting Training in TTM for the moment loss_fct = nn.CrossEntropyLoss() - loss = loss_fct( - logits_to_torch.view(-1, self.config.vocab_size), labels.reshape(-1) - ) + loss = loss_fct(logits_to_torch.view(-1, self.config.vocab_size), labels.reshape(-1)) if not return_dict: output = (logits_to_torch,) + outputs[1:] @@ -230,9 +222,5 @@ def prepare_inputs_for_generation( def _reorder_cache(past_key_values, beam_idx): reordered_past = () for layer_past in past_key_values: - reordered_past += ( - tuple( - past_state.index_select(0, beam_idx) for past_state in layer_past - ), - ) + reordered_past += (tuple(past_state.index_select(0, beam_idx) for past_state in layer_past),) return reordered_past diff --git a/models/experimental/whisper/tt/whisper_model.py b/models/experimental/whisper/tt/whisper_model.py index ef9853a3e7c..6546148367f 100644 --- a/models/experimental/whisper/tt/whisper_model.py +++ b/models/experimental/whisper/tt/whisper_model.py @@ -10,7 +10,7 @@ from transformers import WhisperConfig -import tt_lib +import ttnn from models.experimental.whisper.tt.whisper_encoder import ( TtWhisperEncoder, TtWhisperEncoderOutput, @@ -20,14 +20,14 @@ @dataclass class TtWhisperModelOutput: - last_hidden_state: tt_lib.tensor.Tensor = None - past_key_values: Optional[Tuple[Tuple[tt_lib.tensor.Tensor]]] = None - decoder_hidden_states: Optional[Tuple[tt_lib.tensor.Tensor]] = None - decoder_attentions: Optional[Tuple[tt_lib.tensor.Tensor]] = None - cross_attentions: Optional[Tuple[tt_lib.tensor.Tensor]] = None - encoder_last_hidden_state: Optional[tt_lib.tensor.Tensor] = None - encoder_hidden_states: Optional[Tuple[tt_lib.tensor.Tensor]] = None - encoder_attentions: Optional[Tuple[tt_lib.tensor.Tensor]] = None + last_hidden_state: ttnn.Tensor = None + past_key_values: Optional[Tuple[Tuple[ttnn.Tensor]]] = None + decoder_hidden_states: Optional[Tuple[ttnn.Tensor]] = None + decoder_attentions: Optional[Tuple[ttnn.Tensor]] = None + cross_attentions: Optional[Tuple[ttnn.Tensor]] = None + encoder_last_hidden_state: Optional[ttnn.Tensor] = None + encoder_hidden_states: Optional[Tuple[ttnn.Tensor]] = None + encoder_attentions: Optional[Tuple[ttnn.Tensor]] = None class TtWhisperModel(nn.Module): @@ -35,9 +35,7 @@ class TtWhisperModel(nn.Module): The bare Whisper Model outputting raw hidden-states without any specific head on top." """ - def __init__( - self, state_dict, device, base_address: str = "", config: WhisperConfig = None - ): + def __init__(self, state_dict, device, base_address: str = "", config: WhisperConfig = None): super().__init__() self.state_dict = state_dict @@ -114,9 +112,7 @@ def _mask_input_features( attention_mask=attention_mask, min_masks=self.config.mask_time_min_masks, ) - mask_time_indices = torch.tensor( - mask_time_indices, device=input_features.device, dtype=torch.bool - ) + mask_time_indices = torch.tensor(mask_time_indices, device=input_features.device, dtype=torch.bool) mask_time_indices = mask_time_indices[:, None].expand(-1, hidden_size, -1) input_features[mask_time_indices] = 0 @@ -128,30 +124,28 @@ def _mask_input_features( mask_length=self.config.mask_feature_length, min_masks=self.config.mask_feature_min_masks, ) - mask_feature_indices = torch.tensor( - mask_feature_indices, device=input_features.device, dtype=torch.bool - ) + mask_feature_indices = torch.tensor(mask_feature_indices, device=input_features.device, dtype=torch.bool) input_features[mask_feature_indices] = 0 return input_features def forward( self, - input_features: Optional[tt_lib.tensor.Tensor] = None, + input_features: Optional[ttnn.Tensor] = None, attention_mask: Optional[torch.LongTensor] = None, decoder_input_ids: Optional[torch.LongTensor] = None, decoder_attention_mask: Optional[torch.LongTensor] = None, head_mask: Optional[torch.Tensor] = None, decoder_head_mask: Optional[torch.Tensor] = None, cross_attn_head_mask: Optional[torch.Tensor] = None, - encoder_outputs: Optional[Tuple[Tuple[tt_lib.tensor.Tensor]]] = None, - past_key_values: Optional[Tuple[Tuple[tt_lib.tensor.Tensor]]] = None, + encoder_outputs: Optional[Tuple[Tuple[ttnn.Tensor]]] = None, + past_key_values: Optional[Tuple[Tuple[ttnn.Tensor]]] = None, decoder_inputs_embeds: Optional[Tuple[torch.FloatTensor]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - ) -> Union[Tuple[tt_lib.tensor.Tensor], TtWhisperModelOutput]: + ) -> Union[Tuple[ttnn.Tensor], TtWhisperModelOutput]: """ Returns: @@ -172,25 +166,15 @@ def forward( [1, 2, 512] ```""" - output_attentions = ( - output_attentions - if output_attentions is not None - else self.config.output_attentions - ) + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( - output_hidden_states - if output_hidden_states is not None - else self.config.output_hidden_states + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states ) use_cache = use_cache if use_cache is not None else self.config.use_cache - return_dict = ( - return_dict if return_dict is not None else self.config.use_return_dict - ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict if encoder_outputs is None: - input_features = self._mask_input_features( - input_features, attention_mask=attention_mask - ) + input_features = self._mask_input_features(input_features, attention_mask=attention_mask) encoder_outputs = self.encoder( input_features, diff --git a/tests/tt_eager/python_api_testing/sweep_tests/pytests/test_sweep_conv_with_address_map.py b/tests/tt_eager/python_api_testing/sweep_tests/pytests/test_sweep_conv_with_address_map.py index df07bbedb04..e77ebf43141 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/pytests/test_sweep_conv_with_address_map.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/pytests/test_sweep_conv_with_address_map.py @@ -6,7 +6,6 @@ from loguru import logger import ttnn import numpy as np -import tt_lib as ttl from tt_lib.utils import _nearest_32, _nearest_y from tests.tt_eager.python_api_testing.sweep_tests.comparison_funcs import comp_pcc from tests.tt_eager.python_api_testing.conv.pytorch_conv_tb import ( diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_attn_matmul.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_attn_matmul.py index 0fe73b9ff1f..68bdc6501ad 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_attn_matmul.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_attn_matmul.py @@ -55,7 +55,7 @@ def test_attn_matmul(num_loops, enable_async, in0_dtype, in1_dtype, out_dtype, d tt_input_tensor_a, tt_input_tensor_b, compute_with_storage_grid_size=ttnn.CoreCoord(compute_grid_size.x, compute_grid_size.y), - memory_config=ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1), + memory_config=ttnn.L1_MEMORY_CONFIG, dtype=out_dtype, ) tt_input_tensor_a.deallocate() @@ -101,7 +101,7 @@ def test_attn_matmul_fp32(num_loops, enable_async, in_dtype, device): tt_input_tensor_a, tt_input_tensor_b, compute_with_storage_grid_size=ttnn.CoreCoord(compute_grid_size.x, compute_grid_size.y), - memory_config=ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1), + memory_config=ttnn.L1_MEMORY_CONFIG, dtype=in_dtype, compute_kernel_config=compute_kernel_config, ) @@ -141,7 +141,7 @@ def test_attn_matmul_with_program_cache( tt_input_tensor_a, tt_input_tensor_b, compute_with_storage_grid_size=ttnn.CoreCoord(compute_grid_size.x, compute_grid_size.y), - memory_config=ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1), + memory_config=ttnn.L1_MEMORY_CONFIG, dtype=out_dtype, ) tt_output_tensor = tt_output_tensor_on_device.cpu().to(ttnn.ROW_MAJOR_LAYOUT).to_torch() @@ -201,7 +201,7 @@ def test_group_attn_matmul( compute_grid_size = device.compute_with_storage_grid_size() - interleaved_mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.DRAM) + interleaved_mem_config = ttnn.DRAM_MEMORY_CONFIG # NOTE: Mixed precision is supported as well; but might not have enough space for larger seq_len with BFLOAT16 in0_dtype = ttnn.bfloat8_b @@ -291,7 +291,7 @@ def test_group_attn_matmul_with_program_cache( compute_grid_size = device.compute_with_storage_grid_size() - interleaved_mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.DRAM) + interleaved_mem_config = ttnn.DRAM_MEMORY_CONFIG shard_orientation = ttnn.ShardOrientation.COL_MAJOR # Only used if sharded @@ -419,7 +419,7 @@ def test_group_attn_matmul_fp32( compute_grid_size = device.compute_with_storage_grid_size() - interleaved_mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.DRAM) + interleaved_mem_config = ttnn.DRAM_MEMORY_CONFIG # NOTE: Mixed precision is supported as well; but might not have enough space for larger seq_len with BFLOAT16 in0_dtype = in_dtype diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_bcast.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_bcast.py index 4b78d2f55b7..2b5992b346e 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_bcast.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_bcast.py @@ -45,7 +45,7 @@ @pytest.mark.parametrize("in0_batch_size", [1, 2]) @pytest.mark.parametrize( "orientation", - [ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, ttnn.experimental.tensor.ShardOrientation.COL_MAJOR], + [ttnn.ShardOrientation.ROW_MAJOR, ttnn.ShardOrientation.COL_MAJOR], ) def test_bcast( device, @@ -67,12 +67,12 @@ def test_bcast( if shard_strategy == ttnn.ShardStrategy.BLOCK: shard_grid = ( (shard_grid[0], 4) - if shard_grid[1] == 8 and orientation == ttnn.experimental.tensor.ShardOrientation.COL_MAJOR + if shard_grid[1] == 8 and orientation == ttnn.ShardOrientation.COL_MAJOR else shard_grid ) shard_grid = ( (4, shard_grid[1]) - if shard_grid[0] == 8 and orientation == ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR + if shard_grid[0] == 8 and orientation == ttnn.ShardOrientation.ROW_MAJOR else shard_grid ) input_shape = [in0_batch_size, 1, input_height, input_width] @@ -92,7 +92,7 @@ def test_bcast( shard_orientation = orientation core_grid = ( ttnn.CoreGrid(y=shard_grid[0], x=shard_grid[1]) - if shard_orientation == ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR + if shard_orientation == ttnn.ShardOrientation.ROW_MAJOR else ttnn.CoreGrid(y=shard_grid[1], x=shard_grid[0]) ) else: @@ -108,7 +108,7 @@ def test_bcast( in_sharded_mem_config = ttnn.create_sharded_memory_config( shape=( (shard_height, shard_width) - if shard_orientation == ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR + if shard_orientation == ttnn.ShardOrientation.ROW_MAJOR else (shard_width, shard_height) ), core_grid=core_grid, diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_distributed_layernorm_pre_allgather.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_distributed_layernorm_pre_allgather.py index 84bc4994862..83648cdf223 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_distributed_layernorm_pre_allgather.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_distributed_layernorm_pre_allgather.py @@ -103,7 +103,7 @@ def run_layernorm_part_1(inp_shape, n_devices, is_rmsnorm, input_dtype, output_d # out_torchfp32 = referencefp32(inp_chunked, n_devices, is_rmsnorm) # out_torchfp32 = torch.concat(out_torchfp32, -1) - dram_memcfg = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.DRAM) + dram_memcfg = ttnn.DRAM_MEMORY_CONFIG tt_inp = [] for d in range(n_devices): @@ -255,7 +255,7 @@ def test_layernorm_part_1_with_program_cache2( ): dummy_tensors = [] - dram_memcfg = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.DRAM) + dram_memcfg = ttnn.DRAM_MEMORY_CONFIG for i in range(2): if i > 0: diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_downsample.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_downsample.py index b9803f26d0e..2a17bd117ec 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_downsample.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_downsample.py @@ -94,10 +94,7 @@ def test_run_downsample( A_cl_host = A_cl_host.pad(input_shape, (0, 0, 0, 0), 0.0) A_interleaved = A_cl_host.to(ttnn.TILE_LAYOUT).to( device, - ttnn.MemoryConfig( - memory_layout=ttnn.TensorMemoryLayout.INTERLEAVED, - buffer_type=ttnn.BufferType.L1, - ), + ttnn.L1_MEMORY_CONFIG, ) assert A_interleaved.get_legacy_shape()[0] == 1 and A_interleaved.get_legacy_shape()[1] == 1 @@ -142,7 +139,7 @@ def test_run_downsample( A_downampled_sharded = ttnn.downsample(A_sharded, downsample_params, dtype=dtype) A_downsampled = ttnn.sharded_to_interleaved( A_downampled_sharded, - ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1), + ttnn.L1_MEMORY_CONFIG, ) out = A_downsampled out_shape = [1, 1, _nearest_y(batch_size * output_height * output_width, 32), input_channels] diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py index afed7160517..27045816934 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_eps.py @@ -31,7 +31,7 @@ def test_run_sfpu_eps(device): def test_run_sfpu_tensor(device): value = device.sfpu_eps() shape = [1, 1, 32, 32] - eps = ttnn.full(ttnn.experimental.tensor.Shape(shape), value) + eps = ttnn.full(ttnn.Shape(shape), value) eps = eps.cpu().to(ttnn.ROW_MAJOR_LAYOUT).to_torch() passing = np.isclose(np.ones((1, 1, 32, 32)) * value, eps.float()).all() assert passing diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_nlp_concat_heads_decode.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_nlp_concat_heads_decode.py index 400a936e2b9..108b649f662 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_nlp_concat_heads_decode.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_nlp_concat_heads_decode.py @@ -27,9 +27,9 @@ def num_to_corerange(x): num_x = min(x, 8) num_y = x // num_x assert num_x * num_y == x - return ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(num_x - 1, num_y - 1), + return ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(num_x - 1, num_y - 1), ) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_nlp_create_qkv_heads.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_nlp_create_qkv_heads.py index 66013779163..7532fcae575 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_nlp_create_qkv_heads.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_nlp_create_qkv_heads.py @@ -71,16 +71,16 @@ def run_nlp_create_qkv_heads_falcon7b_test(batch, seq_len, dtype, in0_mem_config @pytest.mark.parametrize( "out_mem_config", ( - ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.DRAM), - ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["out_DRAM", "out_L1"], ) @pytest.mark.parametrize( "in0_mem_config", ( - ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.DRAM), - ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in0_DRAM", "in0_L1"], ) @@ -106,14 +106,14 @@ def test_nlp_create_qkv_heads_falcon7b_test(batch, seq_len, dtype, in0_mem_confi def test_nlp_create_qkv_heads_falcon7b_with_program_cache(device, use_program_cache): dtype = ttnn.bfloat8_b - mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.DRAM) + mem_config = ttnn.DRAM_MEMORY_CONFIG for _ in range(2): run_nlp_create_qkv_heads_falcon7b_test(1, 32, dtype, mem_config, mem_config, device) dummy_shape = [1, 1, 32, 32] py_dummy_tensor = torch.randn(dummy_shape) tt_dummy_tensor = ttnn.Tensor(py_dummy_tensor, dtype).to(ttnn.TILE_LAYOUT).to(device, mem_config) - mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1) + mem_config = ttnn.L1_MEMORY_CONFIG for _ in range(2): run_nlp_create_qkv_heads_falcon7b_test(1, 32, dtype, mem_config, mem_config, device) dummy_shape = [1, 1, 32, 32] @@ -226,16 +226,16 @@ def run_nlp_create_qkv_heads_test( @pytest.mark.parametrize( "out_mem_config", ( - ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.DRAM), - ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["out_DRAM", "out_L1"], ) @pytest.mark.parametrize( "in_mem_config", ( - ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.DRAM), - ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1), + ttnn.DRAM_MEMORY_CONFIG, + ttnn.L1_MEMORY_CONFIG, ), ids=["in_DRAM", "in_L1"], ) @@ -268,11 +268,7 @@ def test_nlp_create_qkv_heads_test( ): if is_grayskull() and dtype == ttnn.float32: pytest.skip("Skipping float32 tests on Grayskull") - if ( - dtype == ttnn.float32 - and (batch == 111 or batch == 5) - and in_mem_config == ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1) - ): + if dtype == ttnn.float32 and (batch == 111 or batch == 5) and in_mem_config == ttnn.L1_MEMORY_CONFIG: logger.warning("fp32 tensor too large to fit L1") else: run_nlp_create_qkv_heads_test( @@ -292,7 +288,7 @@ def test_nlp_create_qkv_heads_test( def test_nlp_create_qkv_heads_with_program_cache(device, use_program_cache): dtype = ttnn.bfloat8_b - mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1) + mem_config = ttnn.L1_MEMORY_CONFIG for _ in range(2): run_nlp_create_qkv_heads_test(5, 1024, 64, 4, 2, True, False, dtype, mem_config, mem_config, device) # Same in0_shape to make sure cache misses if we have additional optional tensor works @@ -464,7 +460,7 @@ def test_sharded_nlp_create_qkv_heads_test( def test_sharded_nlp_create_qkv_heads_with_program_cache(device, use_program_cache): dtype = ttnn.bfloat8_b - mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1) + mem_config = ttnn.L1_MEMORY_CONFIG for _ in range(2): run_sharded_nlp_create_qkv_heads_test(32, 1, 64, 16, 8, False, dtype, device) # Same in0_shape to make sure cache misses if we have additional optional tensor works diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_resnet50_first_conv.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_resnet50_first_conv.py index fbb28acb9d7..517ee61d2da 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_resnet50_first_conv.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_resnet50_first_conv.py @@ -142,7 +142,7 @@ def test_resnet50_first_conv( extra_pad_w_right=1 + extra_padding_for_32B_alignment, ) print("A_cl_host shape", A_cl_host.get_legacy_shape()) - memory_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.L1) + memory_config = ttnn.L1_MEMORY_CONFIG # save original shape (N, H, W, C) original_A_cl_host_shape = A_cl_host.get_legacy_shape() diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_rotary_embedding_llama.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_rotary_embedding_llama.py index 4c767fb2024..e88b7bd3c68 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_rotary_embedding_llama.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_rotary_embedding_llama.py @@ -241,9 +241,9 @@ def test_rotary_embedding_llama( inp.reshape(-1).tolist(), inp.shape, ttnn.bfloat16, - ttnn.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttnn.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(devices[0]) ) @@ -299,9 +299,9 @@ def test_rotary_embedding_llama_with_program_cache( inp.reshape(-1).tolist(), inp.shape, ttnn.bfloat16, - ttnn.Layout.ROW_MAJOR, + ttnn.ROW_MAJOR_LAYOUT, ) - .to(ttnn.Layout.TILE) + .to(ttnn.TILE_LAYOUT) .to(devices[0]) ) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_scaled_dot_product_attention_decode.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_scaled_dot_product_attention_decode.py index 4f58795a9f2..6ba3d2779e8 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_scaled_dot_product_attention_decode.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_scaled_dot_product_attention_decode.py @@ -42,9 +42,9 @@ def num_to_corerange(x): num_x = min(x, 8) num_y = x // num_x assert num_x * num_y == x - return ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(num_x - 1, num_y - 1), + return ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(num_x - 1, num_y - 1), ) @@ -198,16 +198,12 @@ def run_test_sdpa_decode_multi_pos( fp32_dest_acc_en=False, packer_l1_acc=False, ) - dram_memcfg = ttnn.types.MemoryConfig(ttnn.types.TensorMemoryLayout.INTERLEAVED, ttnn.types.BufferType.DRAM) + dram_memcfg = ttnn.DRAM_MEMORY_CONFIG - shard_grid = ttnn.experimental.tensor.CoreRangeSet({num_to_corerange(b)}) - shard_spec = ttnn.experimental.tensor.ShardSpec( - shard_grid, (padded_num_heads, d), ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, False - ) + shard_grid = ttnn.CoreRangeSet({num_to_corerange(b)}) + shard_spec = ttnn.ShardSpec(shard_grid, (padded_num_heads, d), ttnn.ShardOrientation.ROW_MAJOR, False) - height_sharded_memcfg = ttnn.types.MemoryConfig( - ttnn.types.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.types.BufferType.L1, shard_spec - ) + height_sharded_memcfg = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.L1, shard_spec) K = fa_rand(nkv, b, s, d) V = fa_rand(nkv, b, s, d) @@ -335,16 +331,12 @@ def run_test_sdpa_decode_single_iter( fp32_dest_acc_en=False, packer_l1_acc=False, ) - dram_memcfg = ttnn.types.MemoryConfig(ttnn.types.TensorMemoryLayout.INTERLEAVED, ttnn.types.BufferType.DRAM) + dram_memcfg = ttnn.DRAM_MEMORY_CONFIG - shard_grid = ttnn.experimental.tensor.CoreRangeSet({num_to_corerange(b)}) - shard_spec = ttnn.experimental.tensor.ShardSpec( - shard_grid, (padded_num_heads, d), ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, False - ) + shard_grid = ttnn.CoreRangeSet({num_to_corerange(b)}) + shard_spec = ttnn.ShardSpec(shard_grid, (padded_num_heads, d), ttnn.ShardOrientation.ROW_MAJOR, False) - height_sharded_memcfg = ttnn.types.MemoryConfig( - ttnn.types.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.types.BufferType.L1, shard_spec - ) + height_sharded_memcfg = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.L1, shard_spec) K = fa_rand(nkv, b, s, d) V = fa_rand(nkv, b, s, d) @@ -578,9 +570,7 @@ def test_sdpa_decode_program_cache(device, b, nh, nkv, s, d, dtype, use_program_ device=device, dtype=dtype, layout=ttnn.TILE_LAYOUT, - memory_config=ttnn.types.MemoryConfig( - ttnn.types.TensorMemoryLayout.INTERLEAVED, ttnn.types.BufferType.DRAM - ), + memory_config=ttnn.DRAM_MEMORY_CONFIG, ) ) dummy_tensors.append( @@ -589,13 +579,13 @@ def test_sdpa_decode_program_cache(device, b, nh, nkv, s, d, dtype, use_program_ device=device, dtype=dtype, layout=ttnn.TILE_LAYOUT, - memory_config=ttnn.types.MemoryConfig( - ttnn.types.TensorMemoryLayout.HEIGHT_SHARDED, - ttnn.types.BufferType.L1, - ttnn.experimental.tensor.ShardSpec( - ttnn.experimental.tensor.CoreRangeSet({num_to_corerange(32)}), + memory_config=ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + ttnn.BufferType.L1, + ttnn.ShardSpec( + ttnn.CoreRangeSet({num_to_corerange(32)}), (32, 32), - ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, + ttnn.ShardOrientation.ROW_MAJOR, False, ), ), @@ -675,7 +665,7 @@ def run_test_sdpa_decode_ndpcc(device, b, nh, nkv, s, d, dtype, grid_size, q_dty fp32_dest_acc_en=False, packer_l1_acc=False, ) - dram_memcfg = ttnn.types.MemoryConfig(ttnn.types.TensorMemoryLayout.INTERLEAVED, ttnn.types.BufferType.DRAM) + dram_memcfg = ttnn.DRAM_MEMORY_CONFIG K = fa_rand(nkv, b, s, d) V = fa_rand(nkv, b, s, d) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_scaled_dot_product_attention_decode_gqa.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_scaled_dot_product_attention_decode_gqa.py index 87c2e9e8c3b..5ea854dc310 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_scaled_dot_product_attention_decode_gqa.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_scaled_dot_product_attention_decode_gqa.py @@ -42,9 +42,9 @@ def num_to_corerange(x): num_x = min(x, 8) num_y = x // num_x assert num_x * num_y == x - return ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(num_x - 1, num_y - 1), + return ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(num_x - 1, num_y - 1), ) @@ -103,7 +103,7 @@ def run_test_sdpa_decode_single_iter( fp32_dest_acc_en=False, packer_l1_acc=False, ) - dram_memcfg = ttnn.types.MemoryConfig(ttnn.types.TensorMemoryLayout.INTERLEAVED, ttnn.types.BufferType.DRAM) + dram_memcfg = ttnn.DRAM_MEMORY_CONFIG K = fa_rand(b, nkv, s, d) V = fa_rand(b, nkv, s, d) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_tensor.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_tensor.py index cc4e050438e..f7b7ab08fa9 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_tensor.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_tensor.py @@ -51,9 +51,9 @@ def test_tensor_conversion_between_torch_and_tt(shape, tt_dtype, device): ttnn.uint32, ttnn.uint16, }: - assert tt_tensor.storage_type() == ttnn.experimental.tensor.StorageType.BORROWED + assert tt_tensor.storage_type() == ttnn.StorageType.BORROWED else: - assert tt_tensor.storage_type() == ttnn.experimental.tensor.StorageType.OWNED + assert tt_tensor.storage_type() == ttnn.StorageType.OWNED if tt_dtype in {ttnn.bfloat8_b, ttnn.bfloat4_b}: tt_tensor = tt_tensor.to(ttnn.TILE_LAYOUT) @@ -119,7 +119,7 @@ def test_tensor_conversion_between_torch_and_np(shape, tt_dtype, device): tt_tensor = ttnn.Tensor(np_tensor, tt_dtype) if tt_dtype in {ttnn.float32, ttnn.uint32, ttnn.uint16}: - assert tt_tensor.storage_type() == ttnn.experimental.tensor.StorageType.BORROWED + assert tt_tensor.storage_type() == ttnn.StorageType.BORROWED if tt_dtype in { ttnn.float32, diff --git a/tests/ttnn/integration_tests/resnet/test_ttnn_functional_resnet.py b/tests/ttnn/integration_tests/resnet/test_ttnn_functional_resnet.py index 4c5f4516e81..d47b6d3e8f7 100644 --- a/tests/ttnn/integration_tests/resnet/test_ttnn_functional_resnet.py +++ b/tests/ttnn/integration_tests/resnet/test_ttnn_functional_resnet.py @@ -33,24 +33,24 @@ def create_core_range_set_from_ncores(ncores: int, bb_ncores_w: int, bb_ncores_h: int): bb_ncores = bb_ncores_w * bb_ncores_h ## total cores in the bounding box grid if ncores == bb_ncores: ## no last partial core row - return ttnn.experimental.tensor.CoreRangeSet( + return ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(bb_ncores_w - 1, bb_ncores_h - 1), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(bb_ncores_w - 1, bb_ncores_h - 1), ) } ) elif ncores < bb_ncores: ## with last partial core row - return ttnn.experimental.tensor.CoreRangeSet( + return ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(bb_ncores_w - 1, bb_ncores_h - 2), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(bb_ncores_w - 1, bb_ncores_h - 2), ), - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, bb_ncores_h - 1), - ttnn.experimental.tensor.CoreCoord(ncores % bb_ncores_w - 1, bb_ncores_h - 1), + ttnn.CoreRange( + ttnn.CoreCoord(0, bb_ncores_h - 1), + ttnn.CoreCoord(ncores % bb_ncores_w - 1, bb_ncores_h - 1), ), } ) diff --git a/tests/ttnn/integration_tests/stable_diffusion/test_sharded_attention.py b/tests/ttnn/integration_tests/stable_diffusion/test_sharded_attention.py index 0d6f11b8e2b..a8f01c37002 100644 --- a/tests/ttnn/integration_tests/stable_diffusion/test_sharded_attention.py +++ b/tests/ttnn/integration_tests/stable_diffusion/test_sharded_attention.py @@ -928,7 +928,7 @@ def test_q_and_kv( in_1, program_config=program_config, memory_config=block_sharded_memory_config, - dtype=ttnn.experimental.tensor.DataType.BFLOAT8_B, + dtype=ttnn.bfloat8_b, compute_kernel_config=compute_kernel_config, ) in_0_sharded.deallocate() diff --git a/tests/ttnn/integration_tests/stable_diffusion/test_sharded_matmuls.py b/tests/ttnn/integration_tests/stable_diffusion/test_sharded_matmuls.py index 72f2e7dddd6..304b49c9d12 100644 --- a/tests/ttnn/integration_tests/stable_diffusion/test_sharded_matmuls.py +++ b/tests/ttnn/integration_tests/stable_diffusion/test_sharded_matmuls.py @@ -1632,24 +1632,20 @@ def test_matmul( in_0_torch, device, tt_memory_config=l1_interleaved_memory_config, - tt_dtype=( - ttnn.experimental.tensor.DataType.BFLOAT8_B - if input_dtype == "BFLOAT8_B" - else ttnn.experimental.tensor.DataType.BFLOAT16 - ), + tt_dtype=(ttnn.bfloat8_b if input_dtype == "BFLOAT8_B" else ttnn.bfloat16), ) in_1 = torch2tt_tensor( in_1_torch, device, tt_memory_config=l1_interleaved_memory_config, - tt_dtype=ttnn.experimental.tensor.DataType.BFLOAT8_B, + tt_dtype=ttnn.bfloat8_b, ) if bias: in_2 = torch2tt_tensor( in_2_torch, device, tt_memory_config=l1_interleaved_memory_config, - tt_dtype=ttnn.experimental.tensor.DataType.BFLOAT8_B, + tt_dtype=ttnn.bfloat8_b, ) compute_kernel_config = ttnn.WormholeComputeKernelConfig( @@ -1665,24 +1661,16 @@ def test_matmul( in_0, grid_size, [M * Z0 * W0 // logical_grid[0], K // logical_grid[1]], - ttnn.experimental.tensor.TensorMemoryLayout.BLOCK_SHARDED, - ( - ttnn.experimental.tensor.ShardOrientation.COL_MAJOR - if transpose_mcast - else ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR - ), + ttnn.TensorMemoryLayout.BLOCK_SHARDED, + (ttnn.ShardOrientation.COL_MAJOR if transpose_mcast else ttnn.ShardOrientation.ROW_MAJOR), ) elif input_mem_config == "DEV_0_L1_HEIGHT_SHARDED": in_0 = ttnn.interleaved_to_sharded( in_0, grid_size, [round_up_to_tile_dim(M * Z0 * W0 // (grid_size[0] * grid_size[1])), round_up_to_tile_dim(K)], - ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - ( - ttnn.experimental.tensor.ShardOrientation.COL_MAJOR - if transpose_mcast - else ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR - ), + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + (ttnn.ShardOrientation.COL_MAJOR if transpose_mcast else ttnn.ShardOrientation.ROW_MAJOR), ) elif input_mem_config == "DEV_0_L1_INTERLEAVED": in_0 = ttnn.to_memory_config(in_0, l1_interleaved_memory_config) @@ -1745,11 +1733,7 @@ def test_matmul( bias=in_2 if bias else None, program_config=program_config, memory_config=output_mem_config, - dtype=( - ttnn.experimental.tensor.DataType.BFLOAT8_B - if output_dtype == "BFLOAT8_B" - else ttnn.experimental.tensor.DataType.BFLOAT16 - ), + dtype=(ttnn.bfloat8_b if output_dtype == "BFLOAT8_B" else ttnn.bfloat16), compute_kernel_config=compute_kernel_config, ) diff --git a/tests/ttnn/integration_tests/vit/test_accuracy_ttnn_optim_interleaved_vit.py b/tests/ttnn/integration_tests/vit/test_accuracy_ttnn_optim_interleaved_vit.py index b8ddd66f54f..beeeefe8120 100644 --- a/tests/ttnn/integration_tests/vit/test_accuracy_ttnn_optim_interleaved_vit.py +++ b/tests/ttnn/integration_tests/vit/test_accuracy_ttnn_optim_interleaved_vit.py @@ -118,29 +118,27 @@ def test_accuracy( patch_size = 16 torch_pixel_values = torch_pixel_values.reshape(batch_size, img_h, img_w // patch_size, 4 * patch_size) N, H, W, C = torch_pixel_values.shape - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(7, 0), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(7, 0), ), } ) n_cores = 8 - shard_spec = ttnn.experimental.tensor.ShardSpec( - shard_grid, [N * H * W // n_cores, C], ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, False - ) + shard_spec = ttnn.ShardSpec(shard_grid, [N * H * W // n_cores, C], ttnn.ShardOrientation.ROW_MAJOR, False) tt_inputs = torch2tt_tensor( torch_pixel_values, device, - ttnn.experimental.tensor.Layout.ROW_MAJOR, - tt_memory_config=ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - ttnn.experimental.tensor.BufferType.L1, + ttnn.ROW_MAJOR_LAYOUT, + tt_memory_config=ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + ttnn.BufferType.L1, shard_spec, ), - tt_dtype=ttnn.experimental.tensor.DataType.BFLOAT16, + tt_dtype=ttnn.bfloat16, ) if torch_attention_mask is not None: diff --git a/tests/ttnn/integration_tests/vit/test_ttnn_optimized_interleaved_vit.py b/tests/ttnn/integration_tests/vit/test_ttnn_optimized_interleaved_vit.py index b10991c2ee4..d4707b0978a 100644 --- a/tests/ttnn/integration_tests/vit/test_ttnn_optimized_interleaved_vit.py +++ b/tests/ttnn/integration_tests/vit/test_ttnn_optimized_interleaved_vit.py @@ -48,29 +48,27 @@ def test_vit_patch_embeddings(device, model_name, batch_size, image_size, image_ patch_size = 16 torch_pixel_values = torch_pixel_values.reshape(batch_size, img_h, img_w // patch_size, 4 * patch_size) N, H, W, C = torch_pixel_values.shape - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(7, 0), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(7, 0), ), } ) n_cores = 8 - shard_spec = ttnn.experimental.tensor.ShardSpec( - shard_grid, [N * H * W // n_cores, C], ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, False - ) + shard_spec = ttnn.ShardSpec(shard_grid, [N * H * W // n_cores, C], ttnn.ShardOrientation.ROW_MAJOR, False) pixel_values = torch2tt_tensor( torch_pixel_values, device, - ttnn.experimental.tensor.Layout.ROW_MAJOR, - tt_memory_config=ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - ttnn.experimental.tensor.BufferType.L1, + ttnn.ROW_MAJOR_LAYOUT, + tt_memory_config=ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + ttnn.BufferType.L1, shard_spec, ), - tt_dtype=ttnn.experimental.tensor.DataType.BFLOAT16, + tt_dtype=ttnn.bfloat16, ) output = ttnn_optimized_interleaved_vit.vit_patch_embeddings( @@ -129,29 +127,27 @@ def test_vit_embeddings(device, model_name, batch_size, image_size, image_channe patch_size = 16 torch_pixel_values = torch_pixel_values.reshape(batch_size, img_h, img_w // patch_size, 4 * patch_size) N, H, W, C = torch_pixel_values.shape - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(7, 0), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(7, 0), ), } ) n_cores = 8 - shard_spec = ttnn.experimental.tensor.ShardSpec( - shard_grid, [N * H * W // n_cores, C], ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, False - ) + shard_spec = ttnn.ShardSpec(shard_grid, [N * H * W // n_cores, C], ttnn.ShardOrientation.ROW_MAJOR, False) pixel_values = torch2tt_tensor( torch_pixel_values, device, - ttnn.experimental.tensor.Layout.ROW_MAJOR, - tt_memory_config=ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - ttnn.experimental.tensor.BufferType.L1, + ttnn.ROW_MAJOR_LAYOUT, + tt_memory_config=ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + ttnn.BufferType.L1, shard_spec, ), - tt_dtype=ttnn.experimental.tensor.DataType.BFLOAT16, + tt_dtype=ttnn.bfloat16, ) output = ttnn_optimized_interleaved_vit.vit_embeddings( @@ -443,29 +439,27 @@ def test_vit(device, model_name, batch_size, image_size, image_channels, sequenc patch_size = 16 torch_pixel_values = torch_pixel_values.reshape(batch_size, img_h, img_w // patch_size, 4 * patch_size) N, H, W, C = torch_pixel_values.shape - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(7, 0), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(7, 0), ), } ) n_cores = 8 - shard_spec = ttnn.experimental.tensor.ShardSpec( - shard_grid, [N * H * W // n_cores, C], ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, False - ) + shard_spec = ttnn.ShardSpec(shard_grid, [N * H * W // n_cores, C], ttnn.ShardOrientation.ROW_MAJOR, False) pixel_values = torch2tt_tensor( torch_pixel_values, device, - ttnn.experimental.tensor.Layout.ROW_MAJOR, - tt_memory_config=ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - ttnn.experimental.tensor.BufferType.L1, + ttnn.ROW_MAJOR_LAYOUT, + tt_memory_config=ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + ttnn.BufferType.L1, shard_spec, ), - tt_dtype=ttnn.experimental.tensor.DataType.BFLOAT16, + tt_dtype=ttnn.bfloat16, ) # pixel_values = ttnn.from_torch(pixel_values, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT, device=device) diff --git a/tests/ttnn/integration_tests/vit/test_ttnn_optimized_sharded_vit.py b/tests/ttnn/integration_tests/vit/test_ttnn_optimized_sharded_vit.py index 40a326866d9..37a6a1010a9 100644 --- a/tests/ttnn/integration_tests/vit/test_ttnn_optimized_sharded_vit.py +++ b/tests/ttnn/integration_tests/vit/test_ttnn_optimized_sharded_vit.py @@ -48,27 +48,25 @@ def test_vit_patch_embeddings(device, model_name, batch_size, image_size, image_ patch_size = 16 torch_pixel_values = torch_pixel_values.reshape(batch_size, img_h, img_w // patch_size, 4 * patch_size) N, H, W, C = torch_pixel_values.shape - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(7, 0), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(7, 0), ), } ) n_cores = 8 - shard_spec = ttnn.experimental.tensor.ShardSpec( - shard_grid, [N * H * W // n_cores, C], ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, False - ) + shard_spec = ttnn.ShardSpec(shard_grid, [N * H * W // n_cores, C], ttnn.ShardOrientation.ROW_MAJOR, False) pixel_values = ttnn.from_torch( torch_pixel_values, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT, device=device, - memory_config=ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - ttnn.experimental.tensor.BufferType.L1, + memory_config=ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + ttnn.BufferType.L1, shard_spec, ), ) @@ -137,27 +135,25 @@ def test_vit_embeddings(device, model_name, batch_size, image_size, image_channe patch_size = 16 torch_pixel_values = torch_pixel_values.reshape(batch_size, img_h, img_w // patch_size, 4 * patch_size) N, H, W, C = torch_pixel_values.shape - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(7, 0), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(7, 0), ), } ) n_cores = 8 - shard_spec = ttnn.experimental.tensor.ShardSpec( - shard_grid, [N * H * W // n_cores, C], ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, False - ) + shard_spec = ttnn.ShardSpec(shard_grid, [N * H * W // n_cores, C], ttnn.ShardOrientation.ROW_MAJOR, False) pixel_values = ttnn.from_torch( torch_pixel_values, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT, device=device, - memory_config=ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - ttnn.experimental.tensor.BufferType.L1, + memory_config=ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + ttnn.BufferType.L1, shard_spec, ), ) @@ -490,27 +486,25 @@ def test_vit(device, model_name, batch_size, image_size, image_channels, sequenc patch_size = 16 torch_pixel_values = torch_pixel_values.reshape(batch_size, img_h, img_w // patch_size, 4 * patch_size) N, H, W, C = torch_pixel_values.shape - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(7, 0), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(7, 0), ), } ) n_cores = 8 - shard_spec = ttnn.experimental.tensor.ShardSpec( - shard_grid, [N * H * W // n_cores, C], ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, False - ) + shard_spec = ttnn.ShardSpec(shard_grid, [N * H * W // n_cores, C], ttnn.ShardOrientation.ROW_MAJOR, False) pixel_values = ttnn.from_torch( torch_pixel_values, dtype=ttnn.bfloat16, layout=ttnn.ROW_MAJOR_LAYOUT, device=device, - memory_config=ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - ttnn.experimental.tensor.BufferType.L1, + memory_config=ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + ttnn.BufferType.L1, shard_spec, ), ) diff --git a/tests/ttnn/multichip_unit_tests/test_multidevice_TG.py b/tests/ttnn/multichip_unit_tests/test_multidevice_TG.py index f81adc0d56b..5371a7e43d8 100644 --- a/tests/ttnn/multichip_unit_tests/test_multidevice_TG.py +++ b/tests/ttnn/multichip_unit_tests/test_multidevice_TG.py @@ -891,7 +891,7 @@ def run_test_sdpa_decode_single_iter( fp32_dest_acc_en=False, packer_l1_acc=False, ) - dram_memcfg = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.DRAM) + dram_memcfg = ttnn.DRAM_MEMORY_CONFIG mesh_shape = ttnn.CoreRangeSet({num_to_corerange(b)}) shard_spec = ttnn.ShardSpec(mesh_shape, (padded_num_heads, d), ttnn.ShardOrientation.ROW_MAJOR, False) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/bias_gelu_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/bias_gelu_output.py index edf4846195d..8aa6883ed40 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/bias_gelu_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/bias_gelu_output.py @@ -74,7 +74,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.bias_gelu(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.bias_gelu(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/eq_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/eq_output.py index 26ec2319588..6441fcbf72c 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/eq_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/eq_output.py @@ -69,7 +69,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.eq(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.eq(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/ge_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/ge_output.py index c305c8cef4b..a8698602ec6 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/ge_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/ge_output.py @@ -69,7 +69,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.gte(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.ge(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/gt_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/gt_output.py index bea70fec796..acef8f58c2e 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/gt_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/gt_output.py @@ -69,7 +69,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.gt(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.gt(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/ldexp_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/ldexp_output.py index c58e1f2ce71..91df8823bd0 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/ldexp_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/ldexp_output.py @@ -72,7 +72,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.ldexp(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.ldexp(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.999) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/le_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/le_output.py index f69dbdfe931..a88f83479c3 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/le_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/le_output.py @@ -69,7 +69,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.lte(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.le(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/logaddexp2_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/logaddexp2_output.py index 05970879334..e1fcde45396 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/logaddexp2_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/logaddexp2_output.py @@ -72,7 +72,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.logaddexp2(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.logaddexp2(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/logaddexp_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/logaddexp_output.py index f889af6a7a6..58e4dc78abb 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/logaddexp_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/logaddexp_output.py @@ -72,7 +72,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.logaddexp(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.logaddexp(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.999) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/logical_and_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/logical_and_output.py index d3f880ebc0d..df15051111a 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/logical_and_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/logical_and_output.py @@ -69,7 +69,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.logical_and(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.logical_and(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/logical_or_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/logical_or_output.py index dbc39780871..18fca398111 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/logical_or_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/logical_or_output.py @@ -69,7 +69,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.logical_or(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.logical_or(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/lt_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/lt_output.py index e2f7b0c694e..40a2f0bb895 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/lt_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/lt_output.py @@ -69,7 +69,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.lt(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.lt(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/mul_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/mul_output.py index 14f523df67a..a98e67ef4af 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/mul_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/mul_output.py @@ -69,7 +69,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.mul(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.mul(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/ne_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/ne_output.py index ae5eeff3eea..f157c94c7bd 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/ne_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/ne_output.py @@ -69,7 +69,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.ne(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.ne(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/squared_difference_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/squared_difference_output.py index d520c0718ec..c6cb41cbfcb 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/squared_difference_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/squared_difference_output.py @@ -73,7 +73,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.squared_difference(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.squared_difference(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/sweep_tests/sweeps/sweeps/sub_output.py b/tests/ttnn/sweep_tests/sweeps/sweeps/sub_output.py index 2123fda08e1..1c0db58195f 100644 --- a/tests/ttnn/sweep_tests/sweeps/sweeps/sub_output.py +++ b/tests/ttnn/sweep_tests/sweeps/sweeps/sub_output.py @@ -69,7 +69,7 @@ def run( memory_config=out_tensor_memory_config, ) - ttnn.experimental.tensor.sub(input_tensor_a, input_tensor_b, output_tensor=output_tensor) + ttnn.sub(input_tensor_a, input_tensor_b, output_tensor=output_tensor) output_tensor = ttnn.to_torch(output_tensor) return check_with_pcc(torch_output_tensor, output_tensor, 0.99) diff --git a/tests/ttnn/ttnn_utility_fuction.py b/tests/ttnn/ttnn_utility_fuction.py index c3c19b8eec8..ec7ba445631 100644 --- a/tests/ttnn/ttnn_utility_fuction.py +++ b/tests/ttnn/ttnn_utility_fuction.py @@ -6,44 +6,38 @@ from typing import Union, Tuple -def get_shard_grid_from_num_cores(ncores: Union[int, Tuple[int, int]], device) -> ttnn.experimental.tensor.CoreRangeSet: +def get_shard_grid_from_num_cores(ncores: Union[int, Tuple[int, int]], device) -> ttnn.CoreRangeSet: max_grid_size = (device.compute_with_storage_grid_size().y, device.compute_with_storage_grid_size().x) if isinstance(ncores, int): if ncores % max_grid_size[1] == 0: core_grid = ttnn.CoreGrid(y=ncores // max_grid_size[1], x=max_grid_size[1]) - grid_coord = ttnn.experimental.tensor.CoreCoord(core_grid.x - 1, core_grid.y - 1) - return ttnn.experimental.tensor.CoreRangeSet( - {ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, 0), grid_coord)} - ) + grid_coord = ttnn.CoreCoord(core_grid.x - 1, core_grid.y - 1) + return ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord)}) else: if ncores < max_grid_size[1]: core_grid = ttnn.CoreGrid(y=1, x=ncores) - grid_coord = ttnn.experimental.tensor.CoreCoord(core_grid.x - 1, 0) - return ttnn.experimental.tensor.CoreRangeSet( - {ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, 0), grid_coord)} - ) + grid_coord = ttnn.CoreCoord(core_grid.x - 1, 0) + return ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord)}) else: core_grid_1 = ttnn.CoreGrid(y=ncores // max_grid_size[1], x=max_grid_size[1]) core_grid_2 = ttnn.CoreGrid(y=ncores // max_grid_size[1] + 1, x=ncores % max_grid_size[1]) - grid_coord_1 = ttnn.experimental.tensor.CoreCoord(core_grid_1.x - 1, core_grid_1.y - 1) - grid_coord_2 = ttnn.experimental.tensor.CoreCoord(core_grid_2.x - 1, core_grid_2.y - 1) - return ttnn.experimental.tensor.CoreRangeSet( + grid_coord_1 = ttnn.CoreCoord(core_grid_1.x - 1, core_grid_1.y - 1) + grid_coord_2 = ttnn.CoreCoord(core_grid_2.x - 1, core_grid_2.y - 1) + return ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, 0), grid_coord_1), - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, grid_coord_2.y), grid_coord_2 - ), + ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord_1), + ttnn.CoreRange(ttnn.CoreCoord(0, grid_coord_2.y), grid_coord_2), } ) elif isinstance(ncores, tuple): ncores_h, ncores_w = ncores assert ncores_h <= max_grid_size[0] assert ncores_w <= max_grid_size[1] - return ttnn.experimental.tensor.CoreRangeSet( + return ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(ncores_w - 1, ncores_h - 1), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(ncores_w - 1, ncores_h - 1), ) } ) diff --git a/tests/ttnn/unit_tests/operations/test_concat.py b/tests/ttnn/unit_tests/operations/test_concat.py index 038561b4396..a943e7aa763 100644 --- a/tests/ttnn/unit_tests/operations/test_concat.py +++ b/tests/ttnn/unit_tests/operations/test_concat.py @@ -42,13 +42,7 @@ def test_concat(device, height, width, dim, async_mode): (1, 1, 16, 16), (8, 16), (8, 32), - ttnn.experimental.tensor.CoreRangeSet( - { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(0, 1) - ) - } - ), + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1))}), ), ( (1, 1, 160, 32), @@ -56,13 +50,7 @@ def test_concat(device, height, width, dim, async_mode): (1, 1, 160, 32), (80, 32), (80, 64), - ttnn.experimental.tensor.CoreRangeSet( - { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(0, 1) - ) - } - ), + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1))}), ), ( (1, 1, 160, 32), @@ -70,13 +58,7 @@ def test_concat(device, height, width, dim, async_mode): (1, 1, 160, 16), (80, 16), (80, 48), - ttnn.experimental.tensor.CoreRangeSet( - { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(0, 1) - ) - } - ), + ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1))}), ), ), ) diff --git a/tests/ttnn/unit_tests/operations/test_core.py b/tests/ttnn/unit_tests/operations/test_core.py index c689def9343..23b9d1f8459 100644 --- a/tests/ttnn/unit_tests/operations/test_core.py +++ b/tests/ttnn/unit_tests/operations/test_core.py @@ -203,21 +203,17 @@ 128, ttnn.TILE_LAYOUT, dict( - core_grid=ttnn.experimental.tensor.CoreRangeSet( + core_grid=ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(0, 1) - ), + ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1)), } ), strategy=ttnn.ShardStrategy.HEIGHT, ), dict( - core_grid=ttnn.experimental.tensor.CoreRangeSet( + core_grid=ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(1, 1) - ), + ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(1, 1)), } ), strategy=ttnn.ShardStrategy.BLOCK, @@ -230,21 +226,17 @@ 128, ttnn.TILE_LAYOUT, dict( - core_grid=ttnn.experimental.tensor.CoreRangeSet( + core_grid=ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(0, 1) - ), + ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1)), } ), strategy=ttnn.ShardStrategy.HEIGHT, ), dict( - core_grid=ttnn.experimental.tensor.CoreRangeSet( + core_grid=ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(1, 1) - ), + ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(1, 1)), } ), strategy=ttnn.ShardStrategy.BLOCK, @@ -257,21 +249,17 @@ 128, ttnn.TILE_LAYOUT, dict( - core_grid=ttnn.experimental.tensor.CoreRangeSet( + core_grid=ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(0, 2) - ), + ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 2)), } ), strategy=ttnn.ShardStrategy.HEIGHT, ), dict( - core_grid=ttnn.experimental.tensor.CoreRangeSet( + core_grid=ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(1, 1) - ), + ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(1, 1)), } ), strategy=ttnn.ShardStrategy.BLOCK, @@ -353,11 +341,9 @@ class DirectReadWriteType(Enum): [1, 1, 32, 1024], [32, 256], dict( - core_grid=ttnn.experimental.tensor.CoreRangeSet( + core_grid=ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(0, 3) - ), + ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 3)), } ), strategy=ttnn.ShardStrategy.WIDTH, @@ -367,17 +353,11 @@ class DirectReadWriteType(Enum): [1, 1, 32, 1024], [32, 128], dict( - core_grid=ttnn.experimental.tensor.CoreRangeSet( + core_grid=ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(0, 1) - ), - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 2), ttnn.experimental.tensor.CoreCoord(0, 3) - ), - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 4), ttnn.experimental.tensor.CoreCoord(0, 7) - ), + ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(0, 1)), + ttnn.CoreRange(ttnn.CoreCoord(0, 2), ttnn.CoreCoord(0, 3)), + ttnn.CoreRange(ttnn.CoreCoord(0, 4), ttnn.CoreCoord(0, 7)), } ), strategy=ttnn.ShardStrategy.WIDTH, diff --git a/tests/ttnn/unit_tests/operations/test_distributed_layernorm.py b/tests/ttnn/unit_tests/operations/test_distributed_layernorm.py index c689d2cd1df..fc27488c11d 100644 --- a/tests/ttnn/unit_tests/operations/test_distributed_layernorm.py +++ b/tests/ttnn/unit_tests/operations/test_distributed_layernorm.py @@ -69,7 +69,7 @@ def run_distributed_layernorm( inp_shape, n_devices, is_rmsnorm, dtype, stats_dtype, devices, fp32_enabled=False, iterations=1 ): compute_kernel_config = ttnn.WormholeComputeKernelConfig( - math_fidelity=ttnn.experimental.tensor.MathFidelity.HiFi4, # Highest fidelity + math_fidelity=ttnn.MathFidelity.HiFi4, # Highest fidelity math_approx_mode=False, fp32_dest_acc_en=fp32_enabled, packer_l1_acc=False, diff --git a/tests/ttnn/unit_tests/operations/test_experimental.py b/tests/ttnn/unit_tests/operations/test_experimental.py index 4ad8812ed14..84e457edf57 100644 --- a/tests/ttnn/unit_tests/operations/test_experimental.py +++ b/tests/ttnn/unit_tests/operations/test_experimental.py @@ -92,13 +92,10 @@ def test_ttnn_linear( input_shape_b = [1, 1, k_size, n_size] bias_shape = [1, 1, 1, n_size] - interleaved_memory_config = ttnn.experimental.tensor.MemoryConfig( - memory_layout=ttnn.experimental.tensor.TensorMemoryLayout.INTERLEAVED, - buffer_type=ttnn.experimental.tensor.BufferType.DRAM, - ) - sharded_memory_config = ttnn.experimental.tensor.MemoryConfig( - memory_layout=ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - buffer_type=ttnn.experimental.tensor.BufferType.L1, + interleaved_memory_config = ttnn.DRAM_MEMORY_CONFIG + sharded_memory_config = ttnn.MemoryConfig( + memory_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + buffer_type=ttnn.BufferType.L1, ) output_memory_config = sharded_memory_config if output_is_sharded else interleaved_memory_config @@ -148,8 +145,8 @@ def test_ttnn_linear( input_tensor_a, grid_size, [m_size // num_cores, k_size], - ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + ttnn.ShardOrientation.ROW_MAJOR, ) output_tensor = ttnn.linear( @@ -187,17 +184,11 @@ def test_ttnn_matmul_dram_sharded(device, m_size, k_size, n_size): torch_input_tensor_in0, layout=ttnn.TILE_LAYOUT, device=device, dtype=ttnn.bfloat16 ) # in0 shard config - grid_coord = ttnn.experimental.tensor.CoreCoord(grid_size.x - 1, grid_size.y - 1) - shard_grid = ttnn.experimental.tensor.CoreRangeSet( - {ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, 0), grid_coord)} - ) + grid_coord = ttnn.CoreCoord(grid_size.x - 1, grid_size.y - 1) + shard_grid = ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord)}) shard_shape = (32, 1024) - shard_spec = ttnn.experimental.tensor.ShardSpec( - shard_grid, shard_shape, ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, False - ) - sharded_mem_config = ttnn.MemoryConfig( - ttnn.types.TensorMemoryLayout.WIDTH_SHARDED, ttnn.types.BufferType.L1, shard_spec - ) + shard_spec = ttnn.ShardSpec(shard_grid, shard_shape, ttnn.ShardOrientation.ROW_MAJOR, False) + sharded_mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.WIDTH_SHARDED, ttnn.BufferType.L1, shard_spec) input_tensor_in0 = ttnn.to_memory_config(input_tensor_in0, sharded_mem_config) # in1 shard config @@ -205,9 +196,7 @@ def test_ttnn_matmul_dram_sharded(device, m_size, k_size, n_size): in1_shard_grid = ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), in1_shard_grid)}) in1_shard_shape = (8192, 96) in1_shard_spec = ttnn.ShardSpec(in1_shard_grid, in1_shard_shape, ttnn.ShardOrientation.ROW_MAJOR, False) - in1_mem_config = ttnn.MemoryConfig( - ttnn.types.TensorMemoryLayout.WIDTH_SHARDED, ttnn.types.BufferType.DRAM, in1_shard_spec - ) + in1_mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.WIDTH_SHARDED, ttnn.BufferType.DRAM, in1_shard_spec) input_tensor_in1 = ttnn.from_torch( torch_input_tensor_in1, layout=ttnn.TILE_LAYOUT, @@ -257,13 +246,10 @@ def test_sharded_partial_op(device, H, num_cores, num_slices, enable_async): in0_shape = [1, 1, H, 64] W = in0_shape[-1] - interleaved_mem_config = ttnn.experimental.tensor.MemoryConfig( - memory_layout=ttnn.experimental.tensor.TensorMemoryLayout.INTERLEAVED, - buffer_type=ttnn.experimental.tensor.BufferType.L1, - ) - sharded_mem_config = ttnn.experimental.tensor.MemoryConfig( - memory_layout=ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - buffer_type=ttnn.experimental.tensor.BufferType.L1, + interleaved_mem_config = ttnn.L1_MEMORY_CONFIG + sharded_mem_config = ttnn.MemoryConfig( + memory_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + buffer_type=ttnn.BufferType.L1, ) in0 = torch.ones(in0_shape).bfloat16().float() @@ -291,8 +277,8 @@ def test_sharded_partial_op(device, H, num_cores, num_slices, enable_async): height_shard_spec, num_slices, slice_index, - ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + ttnn.ShardOrientation.ROW_MAJOR, ) assert in0_t_slice.is_sharded() diff --git a/tests/ttnn/unit_tests/operations/test_paged_update_cache.py b/tests/ttnn/unit_tests/operations/test_paged_update_cache.py index 74ba76e8019..e7593d398b5 100644 --- a/tests/ttnn/unit_tests/operations/test_paged_update_cache.py +++ b/tests/ttnn/unit_tests/operations/test_paged_update_cache.py @@ -26,10 +26,10 @@ def run_test_update_cache_decode( # Input is sharded compute_grid_size = device.compute_with_storage_grid_size() num_cores = num_users - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( ttnn.experimental.tensor.num_cores_to_corerange_set(num_cores, compute_grid_size, True) ) - input_shard_spec = ttnn.experimental.tensor.ShardSpec( + input_shard_spec = ttnn.ShardSpec( shard_grid, [ xt.volume() // xt.get_legacy_shape()[-1] // num_cores, @@ -38,9 +38,7 @@ def run_test_update_cache_decode( ttnn.ShardOrientation.ROW_MAJOR, False, ) - input_mem_config = ttnn.experimental.tensor.MemoryConfig( - ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.experimental.tensor.BufferType.L1, input_shard_spec - ) + input_mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.L1, input_shard_spec) xt = xt.to(device, input_mem_config) # Create arbitrary update indices @@ -111,10 +109,10 @@ def test_update_cache_decode( # Input is sharded compute_grid_size = device.compute_with_storage_grid_size() num_cores = num_users - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( ttnn.experimental.tensor.num_cores_to_corerange_set(num_cores, compute_grid_size, True) ) - input_shard_spec = ttnn.experimental.tensor.ShardSpec( + input_shard_spec = ttnn.ShardSpec( shard_grid, [ xt.volume() // xt.get_legacy_shape()[-1] // num_cores, @@ -123,8 +121,8 @@ def test_update_cache_decode( ttnn.ShardOrientation.ROW_MAJOR, False, ) - input_mem_config = ttnn.experimental.tensor.MemoryConfig( - ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.experimental.tensor.BufferType.L1, input_shard_spec + input_mem_config = ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.L1, input_shard_spec ) sharded_low = xt.to(device, input_mem_config) sharded_reserved = ttnn.Tensor(x_pad, input_dtype).to(ttnn.TILE_LAYOUT).to(device, input_mem_config) @@ -186,10 +184,10 @@ def test_update_cache_decode_program_cache( # Input is sharded compute_grid_size = device.compute_with_storage_grid_size() num_cores = num_users - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( ttnn.experimental.tensor.num_cores_to_corerange_set(num_cores, compute_grid_size, True) ) - input_shard_spec = ttnn.experimental.tensor.ShardSpec( + input_shard_spec = ttnn.ShardSpec( shard_grid, [ xt.volume() // xt.get_legacy_shape()[-1] // num_cores, @@ -198,8 +196,8 @@ def test_update_cache_decode_program_cache( ttnn.ShardOrientation.ROW_MAJOR, False, ) - input_mem_config = ttnn.experimental.tensor.MemoryConfig( - ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.experimental.tensor.BufferType.L1, input_shard_spec + input_mem_config = ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.L1, input_shard_spec ) sharded_low = xt.to(device, input_mem_config) dummy_tensors.append(sharded_low) @@ -230,10 +228,10 @@ def run_test_tensor_index_update_cache_decode( # Input is sharded compute_grid_size = device.compute_with_storage_grid_size() num_cores = num_users - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( ttnn.experimental.tensor.num_cores_to_corerange_set(num_cores, compute_grid_size, True) ) - input_shard_spec = ttnn.experimental.tensor.ShardSpec( + input_shard_spec = ttnn.ShardSpec( shard_grid, [ xt.volume() // xt.get_legacy_shape()[-1] // num_cores, @@ -242,15 +240,13 @@ def run_test_tensor_index_update_cache_decode( ttnn.ShardOrientation.ROW_MAJOR, False, ) - input_mem_config = ttnn.experimental.tensor.MemoryConfig( - ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.experimental.tensor.BufferType.L1, input_shard_spec - ) + input_mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.L1, input_shard_spec) xt = xt.to(device, input_mem_config) # Create arbitrary update indices cache_idxs = [cache_idx + i * 17 for i in range(num_users)] logger.info(f"cache_idxs: {cache_idxs}") - cache_idxs_tt = ttnn.Tensor(torch.tensor(cache_idxs), ttnn.experimental.tensor.DataType.INT32).to(device) + cache_idxs_tt = ttnn.Tensor(torch.tensor(cache_idxs), ttnn.int32).to(device) cachett = ttnn.experimental.paged_update_cache(cachett, xt, update_idxs_tensor=cache_idxs_tt) @@ -372,10 +368,10 @@ def run_test_paged_update_cache_decode( # Input is sharded compute_grid_size = device.compute_with_storage_grid_size() num_cores = num_users - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( ttnn.experimental.tensor.num_cores_to_corerange_set(num_cores, compute_grid_size, True) ) - input_shard_spec = ttnn.experimental.tensor.ShardSpec( + input_shard_spec = ttnn.ShardSpec( shard_grid, [ xt.volume() // xt.get_legacy_shape()[-1] // num_cores, @@ -384,9 +380,7 @@ def run_test_paged_update_cache_decode( ttnn.ShardOrientation.ROW_MAJOR, False, ) - input_mem_config = ttnn.experimental.tensor.MemoryConfig( - ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.experimental.tensor.BufferType.L1, input_shard_spec - ) + input_mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.L1, input_shard_spec) xt = xt.to(device, input_mem_config) # Create arbitrary update indices @@ -394,8 +388,8 @@ def run_test_paged_update_cache_decode( # Arbitrary user is "dropped", to test skipping in kernel cache_idxs[num_users // 2] = -1 # logger.info(f"cache_idxs: {cache_idxs}") - cache_idxs_tt = ttnn.Tensor(torch.tensor(cache_idxs), ttnn.experimental.tensor.DataType.INT32).to(device) - page_table_tt = ttnn.Tensor(page_table, ttnn.experimental.tensor.DataType.INT32).to(device) + cache_idxs_tt = ttnn.Tensor(torch.tensor(cache_idxs), ttnn.int32).to(device) + page_table_tt = ttnn.Tensor(page_table, ttnn.int32).to(device) cachett = ttnn.experimental.paged_update_cache( cachett, xt, update_idxs_tensor=cache_idxs_tt, page_table=page_table_tt @@ -504,10 +498,10 @@ def test_paged_update_cache_decode_program_caching( # Input is sharded compute_grid_size = device.compute_with_storage_grid_size() num_cores = num_users - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( ttnn.experimental.tensor.num_cores_to_corerange_set(num_cores, compute_grid_size, True) ) - input_shard_spec = ttnn.experimental.tensor.ShardSpec( + input_shard_spec = ttnn.ShardSpec( shard_grid, [ xt.volume() // xt.get_legacy_shape()[-1] // num_cores, @@ -516,8 +510,8 @@ def test_paged_update_cache_decode_program_caching( ttnn.ShardOrientation.ROW_MAJOR, False, ) - input_mem_config = ttnn.experimental.tensor.MemoryConfig( - ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.experimental.tensor.BufferType.L1, input_shard_spec + input_mem_config = ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, ttnn.BufferType.L1, input_shard_spec ) sharded_low = xt.to(device, input_mem_config) dummy_tensors.append(sharded_low) @@ -572,7 +566,7 @@ def run_test_paged_fill_cache( assert torch.allclose(paged_cache_back, cache) cachett = ttnn.Tensor(shuffled_page_cache, cache_dtype).to(ttnn.TILE_LAYOUT).to(device) - page_table_tt = ttnn.Tensor(page_table, ttnn.experimental.tensor.DataType.INT32).to(device) + page_table_tt = ttnn.Tensor(page_table, ttnn.int32).to(device) # Update cache for every user for i in range(num_users): diff --git a/tests/ttnn/unit_tests/operations/test_reallocate.py b/tests/ttnn/unit_tests/operations/test_reallocate.py index 2d946962e41..23c76397b3d 100644 --- a/tests/ttnn/unit_tests/operations/test_reallocate.py +++ b/tests/ttnn/unit_tests/operations/test_reallocate.py @@ -26,23 +26,23 @@ def test_ttnn_reallocate(device, mem_config, num_allocs): if num_allocs == 2 and mem_config == ttnn.DRAM_MEMORY_CONFIG: pytest.xfail("#7732: dram tensor corruption after move") - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_grid = ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(7, 7), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(7, 7), ), } ) # If sharded, creat actual memory config if mem_config == ttnn.L1_BLOCK_SHARDED_MEMORY_CONFIG: - shard_spec = ttnn.experimental.tensor.ShardSpec( - shard_grid, [batch * height * depth // 8, width], ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR, False + shard_spec = ttnn.ShardSpec( + shard_grid, [batch * height * depth // 8, width], ttnn.ShardOrientation.ROW_MAJOR, False ) - mem_config = ttnn.experimental.tensor.MemoryConfig( - ttnn.experimental.tensor.TensorMemoryLayout.HEIGHT_SHARDED, - ttnn.experimental.tensor.BufferType.L1, + mem_config = ttnn.MemoryConfig( + ttnn.TensorMemoryLayout.HEIGHT_SHARDED, + ttnn.BufferType.L1, shard_spec, ) diff --git a/tests/ttnn/unit_tests/operations/test_silu.py b/tests/ttnn/unit_tests/operations/test_silu.py index 7c1deded7cb..b246c615b41 100644 --- a/tests/ttnn/unit_tests/operations/test_silu.py +++ b/tests/ttnn/unit_tests/operations/test_silu.py @@ -58,13 +58,13 @@ def run_elt_silu_relu( input_2d_height_padded = _nearest_y(input_2d_height, grid_size[0] * 32) shard_height = math.ceil(input_2d_height_padded / grid_size[0]) shard_width = math.ceil(input_2d_width / grid_size[1]) - shard_orientation = ttnn.experimental.tensor.ShardOrientation.COL_MAJOR - tensor_memory_layout = ttnn.types.TensorMemoryLayout.BLOCK_SHARDED - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + shard_orientation = ttnn.ShardOrientation.COL_MAJOR + tensor_memory_layout = ttnn.TensorMemoryLayout.BLOCK_SHARDED + shard_grid = ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(grid_size[0] - 1, grid_size[1] - 1), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(grid_size[0] - 1, grid_size[1] - 1), ) } ) @@ -73,14 +73,14 @@ def run_elt_silu_relu( shard_height = math.ceil(input_2d_height_padded / ncores) shard_grid = get_shard_grid_from_num_cores(ncores, device) shard_width = input_2d_width - shard_orientation = ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR - tensor_memory_layout = ttnn.types.TensorMemoryLayout.HEIGHT_SHARDED + shard_orientation = ttnn.ShardOrientation.ROW_MAJOR + tensor_memory_layout = ttnn.TensorMemoryLayout.HEIGHT_SHARDED elif shard_strategy == ttnn.ShardStrategy.WIDTH: shard_height = input_2d_height input_2d_width_padded = _nearest_y(input_2d_width, ncores * 32) shard_width = math.ceil(input_2d_width_padded / ncores) - shard_orientation = ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR - tensor_memory_layout = ttnn.types.TensorMemoryLayout.WIDTH_SHARDED + shard_orientation = ttnn.ShardOrientation.ROW_MAJOR + tensor_memory_layout = ttnn.TensorMemoryLayout.WIDTH_SHARDED shard_grid = get_shard_grid_from_num_cores(ncores, device) assert shard_height % TILE_WIDTH == 0 @@ -89,8 +89,8 @@ def run_elt_silu_relu( logger.debug(f"shard_grid={shard_grid}") logger.debug(f"input_shard_height={shard_height}, input_shard_width={shard_width}") - shard_spec = ttnn.experimental.tensor.ShardSpec(shard_grid, (shard_height, shard_width), shard_orientation, False) - in_sharded_mem_config = ttnn.MemoryConfig(tensor_memory_layout, ttnn.types.BufferType.L1, shard_spec) + shard_spec = ttnn.ShardSpec(shard_grid, (shard_height, shard_width), shard_orientation, False) + in_sharded_mem_config = ttnn.MemoryConfig(tensor_memory_layout, ttnn.BufferType.L1, shard_spec) logger.debug(f"shard_memory_layout={in_sharded_mem_config}") input_tensor = ttnn.to_memory_config(input_tensor, memory_config=in_sharded_mem_config) diff --git a/tests/ttnn/unit_tests/operations/test_silu_row_major.py b/tests/ttnn/unit_tests/operations/test_silu_row_major.py index 81a6be74f9b..f205e23d758 100644 --- a/tests/ttnn/unit_tests/operations/test_silu_row_major.py +++ b/tests/ttnn/unit_tests/operations/test_silu_row_major.py @@ -78,12 +78,12 @@ def test_silu_multi_core(device, input_shape, shard_strategy): ncores = (nshards_h, nshards_w) shard_grid = get_shard_grid_from_num_cores(ncores, device) - shard_orientation = ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR + shard_orientation = ttnn.ShardOrientation.ROW_MAJOR if shard_strategy == ttnn.ShardStrategy.BLOCK: - tensor_memory_layout = ttnn.types.TensorMemoryLayout.BLOCK_SHARDED + tensor_memory_layout = ttnn.TensorMemoryLayout.BLOCK_SHARDED elif shard_strategy == ttnn.ShardStrategy.HEIGHT: - tensor_memory_layout = ttnn.types.TensorMemoryLayout.HEIGHT_SHARDED + tensor_memory_layout = ttnn.TensorMemoryLayout.HEIGHT_SHARDED ## input shard if shard_strategy == ttnn.ShardStrategy.BLOCK: @@ -97,8 +97,8 @@ def test_silu_multi_core(device, input_shape, shard_strategy): pytest.skip("Shard sizes that are not multiples of 1024 are not supported.") shard_shape = (shard_height, shard_width) logger.debug(f"shard_shape={shard_shape}") - shard_spec = ttnn.experimental.tensor.ShardSpec(shard_grid, shard_shape, shard_orientation, False) - in_sharded_mem_config = ttnn.MemoryConfig(tensor_memory_layout, ttnn.types.BufferType.L1, shard_spec) + shard_spec = ttnn.ShardSpec(shard_grid, shard_shape, shard_orientation, False) + in_sharded_mem_config = ttnn.MemoryConfig(tensor_memory_layout, ttnn.BufferType.L1, shard_spec) logger.debug(f"in_shard_mem_config: {in_sharded_mem_config}") logger.debug(f"ncore --> {ncores}") diff --git a/tests/ttnn/unit_tests/operations/test_upsample.py b/tests/ttnn/unit_tests/operations/test_upsample.py index e5ecf9c2a9e..ba18bf54dfc 100644 --- a/tests/ttnn/unit_tests/operations/test_upsample.py +++ b/tests/ttnn/unit_tests/operations/test_upsample.py @@ -16,45 +16,39 @@ TILE_WIDTH = 32 -def get_shard_grid_from_num_cores(device, ncores: Union[int, Tuple[int, int]]) -> ttnn.experimental.tensor.CoreRangeSet: +def get_shard_grid_from_num_cores(device, ncores: Union[int, Tuple[int, int]]) -> ttnn.CoreRangeSet: device_grid = device.compute_with_storage_grid_size() max_grid_size = (device_grid.y, device_grid.x) if isinstance(ncores, int): if ncores % max_grid_size[1] == 0: core_grid = ttnn.CoreGrid(y=ncores // max_grid_size[1], x=max_grid_size[1]) - grid_coord = ttnn.experimental.tensor.CoreCoord(core_grid.x - 1, core_grid.y - 1) - return ttnn.experimental.tensor.CoreRangeSet( - {ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, 0), grid_coord)} - ) + grid_coord = ttnn.CoreCoord(core_grid.x - 1, core_grid.y - 1) + return ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord)}) else: if ncores < max_grid_size[1]: core_grid = ttnn.CoreGrid(y=1, x=ncores) - grid_coord = ttnn.experimental.tensor.CoreCoord(core_grid.x - 1, 0) - return ttnn.experimental.tensor.CoreRangeSet( - {ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, 0), grid_coord)} - ) + grid_coord = ttnn.CoreCoord(core_grid.x - 1, 0) + return ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord)}) else: core_grid_1 = ttnn.CoreGrid(y=ncores // max_grid_size[1], x=max_grid_size[1]) core_grid_2 = ttnn.CoreGrid(y=ncores // max_grid_size[1] + 1, x=ncores % max_grid_size[1]) - grid_coord_1 = ttnn.experimental.tensor.CoreCoord(core_grid_1.x - 1, core_grid_1.y - 1) - grid_coord_2 = ttnn.experimental.tensor.CoreCoord(core_grid_2.x - 1, core_grid_2.y - 1) - return ttnn.experimental.tensor.CoreRangeSet( + grid_coord_1 = ttnn.CoreCoord(core_grid_1.x - 1, core_grid_1.y - 1) + grid_coord_2 = ttnn.CoreCoord(core_grid_2.x - 1, core_grid_2.y - 1) + return ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, 0), grid_coord_1), - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, grid_coord_2.y), grid_coord_2 - ), + ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord_1), + ttnn.CoreRange(ttnn.CoreCoord(0, grid_coord_2.y), grid_coord_2), } ) elif isinstance(ncores, tuple): ncores_h, ncores_w = ncores assert ncores_h <= max_grid_size[0] assert ncores_w <= max_grid_size[1] - return ttnn.experimental.tensor.CoreRangeSet( + return ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), - ttnn.experimental.tensor.CoreCoord(ncores_w - 1, ncores_h - 1), + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), + ttnn.CoreCoord(ncores_w - 1, ncores_h - 1), ) } ) @@ -182,7 +176,7 @@ def test_upsample_multi_core(device, input_shape, scale_h, scale_w, shard_strate # ) shard_grid = get_shard_grid_from_num_cores(device, ncores) - shard_orientation = ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR + shard_orientation = ttnn.ShardOrientation.ROW_MAJOR if shard_strategy == ttnn.ShardStrategy.BLOCK: tensor_memory_layout = ttnn.types.TensorMemoryLayout.BLOCK_SHARDED @@ -197,13 +191,13 @@ def test_upsample_multi_core(device, input_shape, scale_h, scale_w, shard_strate shard_height = math.ceil(batch_size * height * width / ncores) shard_width = num_channels shard_shape = (shard_height, shard_width) - shard_spec = ttnn.experimental.tensor.ShardSpec(shard_grid, shard_shape, shard_orientation, False) + shard_spec = ttnn.ShardSpec(shard_grid, shard_shape, shard_orientation, False) in_sharded_mem_config = ttnn.MemoryConfig(tensor_memory_layout, ttnn.types.BufferType.L1, shard_spec) ## output shard shard_height = shard_height * scale_h * scale_w shard_shape = (shard_height, shard_width) - shard_spec = ttnn.experimental.tensor.ShardSpec(shard_grid, shard_shape, shard_orientation, False) + shard_spec = ttnn.ShardSpec(shard_grid, shard_shape, shard_orientation, False) out_sharded_mem_config = ttnn.MemoryConfig(tensor_memory_layout, ttnn.types.BufferType.L1, shard_spec) print(f"in_shard_mem_config: {in_sharded_mem_config}") diff --git a/tests/ttnn/unit_tests/test_multi_device_async.py b/tests/ttnn/unit_tests/test_multi_device_async.py index 35a3bf71a5b..2ec669c661c 100644 --- a/tests/ttnn/unit_tests/test_multi_device_async.py +++ b/tests/ttnn/unit_tests/test_multi_device_async.py @@ -263,7 +263,7 @@ def test_multi_device_argmax(pcie_device_mesh, layout, mem_config): mesh_mapper=ttnn.ReplicateTensorToMesh(pcie_device_mesh), ) - tt_out_11BH = ttnn.experimental.tensor.argmax(tt_out_11BH, dim=-1) + tt_out_11BH = ttnn.argmax(tt_out_11BH, dim=-1) tt_out_1B = ttnn.reshape(tt_out_11BH[:1, :, :, :], ttnn.Shape([1, 32])) tt_out_1B = ttnn.to_torch(tt_out_1B, mesh_composer=ttnn.ConcatMeshToTensor(pcie_device_mesh, dim=0))[0] diff --git a/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor.hpp b/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor.hpp index 87922a3011e..44bb14c5633 100644 --- a/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor.hpp +++ b/ttnn/cpp/ttnn/deprecated/tt_lib/csrc/tt_lib_bindings_tensor.hpp @@ -9,7 +9,6 @@ namespace tt::tt_metal { namespace detail{ - void TensorModulePyTensor( py::module & m_tensor); void TensorModulePyTensorTypes(py::module &m_tensor); void TensorModulePyTensor(py::module &m_tensor); diff --git a/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded_pybind.cpp b/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded_pybind.cpp index 7b20b2cc627..1ceccb41701 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded_pybind.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/sharded/interleaved_to_sharded/interleaved_to_sharded_pybind.cpp @@ -90,10 +90,10 @@ void py_bind_interleaved_to_sharded(pybind11::module& module) { Example 2 (using sharded memory config): >>> sharded_memory_config_dict = dict( - core_grid=ttnn.experimental.tensor.CoreRangeSet( + core_grid=ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(1, 1) + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), ttnn.CoreCoord(1, 1) ), } ), diff --git a/ttnn/cpp/ttnn/operations/data_movement/sharded/reshard/reshard_pybind.cpp b/ttnn/cpp/ttnn/operations/data_movement/sharded/reshard/reshard_pybind.cpp index 9603192bedd..20cd064dd0a 100644 --- a/ttnn/cpp/ttnn/operations/data_movement/sharded/reshard/reshard_pybind.cpp +++ b/ttnn/cpp/ttnn/operations/data_movement/sharded/reshard/reshard_pybind.cpp @@ -59,10 +59,10 @@ void py_bind_reshard(pybind11::module& module) { Example: >>> sharded_memory_config_dict = dict( - core_grid=ttnn.experimental.tensor.CoreRangeSet( + core_grid=ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange( - ttnn.experimental.tensor.CoreCoord(0, 0), ttnn.experimental.tensor.CoreCoord(1, 1) + ttnn.CoreRange( + ttnn.CoreCoord(0, 0), ttnn.CoreCoord(1, 1) ), } ), diff --git a/ttnn/ttnn/core.py b/ttnn/ttnn/core.py index 4fb9de06052..194295b252f 100644 --- a/ttnn/ttnn/core.py +++ b/ttnn/ttnn/core.py @@ -94,7 +94,7 @@ def create_sharded_memory_config( if not isinstance(shape, (list, tuple, ttnn.Shape)): raise RuntimeError("Invalid input shape") - if not isinstance(core_grid, (ttnn.CoreGrid, tuple, list, ttnn.experimental.tensor.CoreRangeSet)): + if not isinstance(core_grid, (ttnn.CoreGrid, tuple, list, ttnn.CoreRangeSet)): raise RuntimeError("Invalid core_grid type") if strategy == ShardStrategy.BLOCK: @@ -107,20 +107,18 @@ def create_sharded_memory_config( raise RuntimeError("Invalid sharding strategy") if orientation is None: - shard_orientation = ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR + shard_orientation = ttnn.ShardOrientation.ROW_MAJOR elif orientation == ShardOrientation.ROW_MAJOR: - shard_orientation = ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR + shard_orientation = ttnn.ShardOrientation.ROW_MAJOR elif orientation == ShardOrientation.COL_MAJOR: - shard_orientation = ttnn.experimental.tensor.ShardOrientation.COL_MAJOR + shard_orientation = ttnn.ShardOrientation.COL_MAJOR else: raise RuntimeError("Invalid shard orientation") shard_grid = None if isinstance(core_grid, ttnn.CoreGrid): - grid_coord = ttnn.experimental.tensor.CoreCoord(core_grid.x - 1, core_grid.y - 1) - shard_grid = ttnn.experimental.tensor.CoreRangeSet( - {ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, 0), grid_coord)} - ) + grid_coord = ttnn.CoreCoord(core_grid.x - 1, core_grid.y - 1) + shard_grid = ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord)}) elif isinstance(core_grid, (list, tuple)): if len(core_grid) != 2: raise RuntimeError("Invalid core_grid") @@ -129,15 +127,15 @@ def create_sharded_memory_config( if not isinstance(core_grid[1], ttnn.CoreGrid): raise RuntimeError("Invalid core_grid type") - grid_coord_1 = ttnn.experimental.tensor.CoreCoord(core_grid[0].x - 1, core_grid[0].y - 1) - grid_coord_2 = ttnn.experimental.tensor.CoreCoord(core_grid[1].x - 1, core_grid[0].y) - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + grid_coord_1 = ttnn.CoreCoord(core_grid[0].x - 1, core_grid[0].y - 1) + grid_coord_2 = ttnn.CoreCoord(core_grid[1].x - 1, core_grid[0].y) + shard_grid = ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, 0), grid_coord_1), - ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, core_grid[0].y), grid_coord_2), + ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord_1), + ttnn.CoreRange(ttnn.CoreCoord(0, core_grid[0].y), grid_coord_2), } ) - elif isinstance(core_grid, ttnn.experimental.tensor.CoreRangeSet): + elif isinstance(core_grid, ttnn.CoreRangeSet): shard_grid = core_grid if not use_height_and_width_as_shard_shape: raise RuntimeError("height and width must be shard shape with CoreRangeSet") @@ -148,9 +146,9 @@ def create_sharded_memory_config( batch_size = math.prod(batch_sizes) if use_height_and_width_as_shard_shape: - if shard_orientation == ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR: + if shard_orientation == ttnn.ShardOrientation.ROW_MAJOR: shard_shape = height, width - elif shard_orientation == ttnn.experimental.tensor.ShardOrientation.COL_MAJOR: + elif shard_orientation == ttnn.ShardOrientation.COL_MAJOR: shard_shape = width, height else: raise RuntimeError("Invalid shard orientation") @@ -158,14 +156,14 @@ def create_sharded_memory_config( shard_height = batch_size * height shard_width = width if strategy == ShardStrategy.BLOCK: - if shard_orientation == ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR: + if shard_orientation == ttnn.ShardOrientation.ROW_MAJOR: if shard_height % core_grid.y != 0: raise RuntimeError("Invalid sharding core_grid") if shard_width % core_grid.x != 0: raise RuntimeError("Invalid sharding core_grid") shard_shape = shard_height // core_grid.y, shard_width // core_grid.x - elif shard_orientation == ttnn.experimental.tensor.ShardOrientation.COL_MAJOR: + elif shard_orientation == ttnn.ShardOrientation.COL_MAJOR: if shard_height % core_grid.x != 0: raise RuntimeError("Invalid sharding core_grid") if shard_width % core_grid.y != 0: @@ -186,7 +184,7 @@ def create_sharded_memory_config( else: raise RuntimeError("Invalid sharding scheme") - shard_spec = ttnn.experimental.tensor.ShardSpec(shard_grid, shard_shape, shard_orientation, halo) + shard_spec = ttnn.ShardSpec(shard_grid, shard_shape, shard_orientation, halo) memory_config = MemoryConfig(tensor_memory_layout, BufferType.L1, shard_spec) return memory_config @@ -225,7 +223,7 @@ def create_sharded_memory_config_( if not isinstance(shape, (list, tuple, ttnn.Shape)): raise RuntimeError("Invalid input shape") - if not isinstance(core_grid, (ttnn.CoreGrid, tuple, list, ttnn.experimental.tensor.CoreRangeSet)): + if not isinstance(core_grid, (ttnn.CoreGrid, tuple, list, ttnn.CoreRangeSet)): raise RuntimeError("Invalid core_grid type") if isinstance(strategy, ShardStrategy): @@ -244,24 +242,22 @@ def create_sharded_memory_config_( if isinstance(orientation, ShardOrientation): if orientation is None: - shard_orientation = ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR + shard_orientation = ttnn.ShardOrientation.ROW_MAJOR elif orientation == ShardOrientation.ROW_MAJOR: - shard_orientation = ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR + shard_orientation = ttnn.ShardOrientation.ROW_MAJOR elif orientation == ShardOrientation.COL_MAJOR: - shard_orientation = ttnn.experimental.tensor.ShardOrientation.COL_MAJOR + shard_orientation = ttnn.ShardOrientation.COL_MAJOR else: raise RuntimeError("Invalid shard orientation") - elif isinstance(orientation, ttnn.experimental.tensor.ShardOrientation): + elif isinstance(orientation, ttnn.ShardOrientation): shard_orientation = orientation else: raise RuntimeError("Invalid type of shard orientation") shard_grid = None if isinstance(core_grid, ttnn.CoreGrid): - grid_coord = ttnn.experimental.tensor.CoreCoord(core_grid.x - 1, core_grid.y - 1) - shard_grid = ttnn.experimental.tensor.CoreRangeSet( - {ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, 0), grid_coord)} - ) + grid_coord = ttnn.CoreCoord(core_grid.x - 1, core_grid.y - 1) + shard_grid = ttnn.CoreRangeSet({ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord)}) elif isinstance(core_grid, (list, tuple)): if len(core_grid) != 2: raise RuntimeError("Invalid core_grid") @@ -270,15 +266,15 @@ def create_sharded_memory_config_( if not isinstance(core_grid[1], ttnn.CoreGrid): raise RuntimeError("Invalid core_grid type") - grid_coord_1 = ttnn.experimental.tensor.CoreCoord(core_grid[0].x - 1, core_grid[0].y - 1) - grid_coord_2 = ttnn.experimental.tensor.CoreCoord(core_grid[1].x - 1, core_grid[0].y) - shard_grid = ttnn.experimental.tensor.CoreRangeSet( + grid_coord_1 = ttnn.CoreCoord(core_grid[0].x - 1, core_grid[0].y - 1) + grid_coord_2 = ttnn.CoreCoord(core_grid[1].x - 1, core_grid[0].y) + shard_grid = ttnn.CoreRangeSet( { - ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, 0), grid_coord_1), - ttnn.experimental.tensor.CoreRange(ttnn.experimental.tensor.CoreCoord(0, core_grid[0].y), grid_coord_2), + ttnn.CoreRange(ttnn.CoreCoord(0, 0), grid_coord_1), + ttnn.CoreRange(ttnn.CoreCoord(0, core_grid[0].y), grid_coord_2), } ) - elif isinstance(core_grid, ttnn.experimental.tensor.CoreRangeSet): + elif isinstance(core_grid, ttnn.CoreRangeSet): shard_grid = core_grid else: raise RuntimeError("Invalid core_grid type") @@ -287,9 +283,9 @@ def create_sharded_memory_config_( batch_size = math.prod(batch_sizes) if use_height_and_width_as_shard_shape: - if shard_orientation == ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR: + if shard_orientation == ttnn.ShardOrientation.ROW_MAJOR: shard_shape = height, width - elif shard_orientation == ttnn.experimental.tensor.ShardOrientation.COL_MAJOR: + elif shard_orientation == ttnn.ShardOrientation.COL_MAJOR: shard_shape = width, height else: raise RuntimeError("Invalid shard orientation") @@ -301,11 +297,11 @@ def create_sharded_memory_config_( if tensor_memory_layout == TensorMemoryLayout.BLOCK_SHARDED: if grid_size.y * grid_size.x != total_num_cores: raise RuntimeError("Invalid CoreRangeSet for block sharding strategy") - if shard_orientation == ttnn.experimental.tensor.ShardOrientation.ROW_MAJOR: + if shard_orientation == ttnn.ShardOrientation.ROW_MAJOR: tensor_height_padded = roundup(tensor_height, grid_size.y * 32) if tile_layout else tensor_height shard_shape = divup(tensor_height_padded, grid_size.y), divup(tensor_width, grid_size.x) - elif shard_orientation == ttnn.experimental.tensor.ShardOrientation.COL_MAJOR: + elif shard_orientation == ttnn.ShardOrientation.COL_MAJOR: tensor_height_padded = roundup(tensor_height, grid_size.x * 32) if tile_layout else tensor_height shard_shape = divup(tensor_height_padded, grid_size.x), divup(tensor_width, grid_size.y) else: @@ -320,7 +316,7 @@ def create_sharded_memory_config_( if tile_layout and shard_shape[0] % 32 != 0 and shard_shape[1] % 32 != 0: raise RuntimeError("Incorrent tensor shape") - shard_spec = ttnn.experimental.tensor.ShardSpec(shard_grid, shard_shape, shard_orientation, halo) + shard_spec = ttnn.ShardSpec(shard_grid, shard_shape, shard_orientation, halo) memory_config = MemoryConfig(tensor_memory_layout, BufferType.L1, shard_spec) return memory_config