From e35dfe145935207caf4055e4882c836dd13e2dd1 Mon Sep 17 00:00:00 2001 From: yugi957 Date: Tue, 15 Oct 2024 22:41:44 +0000 Subject: [PATCH] #13593: embedding fuzed tilized reconditioned, one test made reproducable, fixed index within embedding op --- tests/sweep_framework/one_test.py | 24 ++++++++++--------- .../kernels/dataflow/embeddings_tilize.cpp | 24 +++++++++---------- .../ttnn/operations/embedding/embedding.hpp | 9 +++++-- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/tests/sweep_framework/one_test.py b/tests/sweep_framework/one_test.py index 868f1af7feb..62a778c9a5d 100644 --- a/tests/sweep_framework/one_test.py +++ b/tests/sweep_framework/one_test.py @@ -1,6 +1,8 @@ from typing import Optional, Tuple import torch + +torch.manual_seed(42) import random import ttnn @@ -66,14 +68,14 @@ def run( print(torch_output_tensor.shape) # Loop through rows of tensor and print first false element, print PASS if row is correct - for i in range(check_tensor.shape[1]): - if False in check_tensor[0, i]: - print(f"Row {i}: FAIL") - print(torch_output_tensor[0, i]) - print(ttnn_output_tensor[0, i]) - break - else: - print(f"Row {i}: PASS") + # for i in range(check_tensor.shape[1]): + # if False in check_tensor[0, i]: + # print(f"Row {i}: FAIL") + # print(torch_output_tensor[0, i]) + # print(ttnn_output_tensor[0, i]) + # break + # else: + # print(f"Row {i}: PASS") # Compare the results and return performance and accuracy check result = check_with_pcc(torch_output_tensor, ttnn_output_tensor, 0.999) @@ -93,11 +95,11 @@ def get_devices(test_module): # pcc errors # embedding_specs = {'weight_shape': [2, 768], 'indices_shape': [1, 8]} - # embedding_specs = {'weight_shape': [30528, 768], 'indices_shape': [1, 8], 'padding_idx': 0} - # embedding_specs = {'weight_shape': [400, 10], 'indices_shape': [1, 24]} + embedding_specs = {"weight_shape": [30528, 768], "indices_shape": [1, 32], "padding_idx": 0} + # embedding_specs = {'weight_shape': [400, 10], 'indices_shape': [1, 24]} # Should output 1, 24, 10 # page/buffer errors - embedding_specs = {"weight_shape": [77, 512], "indices_shape": [1, 7]} + # embedding_specs = {"weight_shape": [77, 512], "indices_shape": [1, 8]} #SHould output 1, 7, 512 dtype = ttnn.bfloat16 layout = ttnn.TILE_LAYOUT diff --git a/ttnn/cpp/ttnn/operations/embedding/device/kernels/dataflow/embeddings_tilize.cpp b/ttnn/cpp/ttnn/operations/embedding/device/kernels/dataflow/embeddings_tilize.cpp index 3361b261937..a3cd6e05a01 100644 --- a/ttnn/cpp/ttnn/operations/embedding/device/kernels/dataflow/embeddings_tilize.cpp +++ b/ttnn/cpp/ttnn/operations/embedding/device/kernels/dataflow/embeddings_tilize.cpp @@ -95,21 +95,21 @@ void kernel_main() { uint64_t src_noc_addr; uint32_t token = input_l1_ptr[k]; #if defined PADDED - if (token == pad_token) { - src_noc_addr = pad_noc_addr; - } else { - src_noc_addr = get_noc_addr(token, weights); - } - #elif defined BINARY - if (token == 0) { - src_noc_addr = zero_noc_addr; - } else { - src_noc_addr = one_noc_addr; - } + if (token == pad_token) { + src_noc_addr = pad_noc_addr; + } else { + src_noc_addr = get_noc_addr(token, weights); + } + #elif defined BINARY + if (token == 0) { + src_noc_addr = zero_noc_addr; + } else { + src_noc_addr = one_noc_addr; + } #else #if defined BFP16 union { float f; uint32_t u; } u; - u.u = (uint32_t)input_l1_ptr[token_idx] << 16; + u.u = (uint32_t)input_l1_ptr[k] << 16; uint32_t token_casted = static_cast(u.f); src_noc_addr = get_noc_addr(token_casted, weights); #else diff --git a/ttnn/cpp/ttnn/operations/embedding/embedding.hpp b/ttnn/cpp/ttnn/operations/embedding/embedding.hpp index b028e64cca9..326781aaf82 100644 --- a/ttnn/cpp/ttnn/operations/embedding/embedding.hpp +++ b/ttnn/cpp/ttnn/operations/embedding/embedding.hpp @@ -47,11 +47,16 @@ struct EmbeddingOperation { auto input_tensor = ttnn::reshape(mutable_input_tensor, ttnn::Shape{std::array{batch_size, 1, 1, sentence_size}}); - bool tilized = false; + bool fuzed_tilized = layout == ttnn::TILE_LAYOUT; + + // If layout is row major, OR if the input tensor is not a multiple of TILE_HEIGHT, then we cannot use tilized + if(!fuzed_tilized || input_tensor.get_legacy_shape()[-1] % TILE_HEIGHT) fuzed_tilized = false; + if(!fuzed_tilized || weight.get_legacy_shape()[-1] % TILE_WIDTH) fuzed_tilized = false; + auto embeddings = operation::run( Embeddings{ .output_mem_config = memory_config.value_or(input_tensor.memory_config()), - .tilized = tilized, + .tilized = fuzed_tilized, .embeddings_type = embeddings_type, .pad_token = pad_token, .output_dtype = dtype.value_or(weight.get_dtype())},