#15061: Extended {to,from}_vector to support tilized layout, bf4/8 fo…

…rmats (#16105) ### Ticket #15061 ### Problem description `to_vector` / `from_vector` don't support some of the special cases, which prevents a more widespread adoption (distributing tensors across mesh of devices in particular). ### What's changed * Support tilized layouts. * Support bf4 / bf8 data types with auto-padding. * Extended `chunk` / `concat` support for the added types. ### Next steps * Optimize certain operations on-device, such as tilization, whenever possible. * Perform auto-padding in tilized layouts / when using sharding. * Switching pytensor logic to using `from_vector` API. ### Checklist - [X] [Post commit CI passes](https://github.com/tenstorrent/tt-metal/actions/runs/12422597810) - [X] New/Existing tests provide coverage for changes --------- Co-authored-by: Oleg Milyutin <[email protected]>
tenstorrent · Dec 24, 2024 · c7e6b43 · c7e6b43
1 parent a547d4c
commit c7e6b43
Show file tree

Hide file tree

Showing 7 changed files with 214 additions and 79 deletions.
diff --git a/tests/ttnn/unit_tests/gtests/tensor/test_vector_conversion.cpp b/tests/ttnn/unit_tests/gtests/tensor/test_vector_conversion.cpp
@@ -7,16 +7,26 @@
 #include <algorithm>
 #include <cstdint>
 
+#include "tests/ttnn/unit_tests/gtests/ttnn_test_fixtures.hpp"
+#include "common/bfloat16.hpp"
 #include "ttnn/tensor/tensor.hpp"
+#include "ttnn/tensor/tensor_utils.hpp"
+#include "ttnn/tensor/types.hpp"
 #include "ttnn/tensor/xtensor/conversion_utils.hpp"
 #include "ttnn/tensor/xtensor/xtensor_all_includes.hpp"
 
 namespace ttnn {
 namespace {
 
 using ::testing::Eq;
+using ::testing::FloatNear;
 using ::testing::Pointwise;
 
+template <typename... Args>
+testing::Matcher<ttnn::SimpleShape> ShapeIs(Args... args) {
+    return testing::Eq(ttnn::SimpleShape({args...}));
+}
+
 const std::vector<ttnn::SimpleShape>& get_shapes_for_test() {
     static auto* shapes = new std::vector<ttnn::SimpleShape>{
         ttnn::SimpleShape{1},
@@ -35,13 +45,14 @@ TensorSpec get_tensor_spec(const ttnn::SimpleShape& shape, DataType dtype, Layou
 }
 
 template <typename T>
-std::vector<T> arange(int64_t start, int64_t end, int64_t step) {
+std::vector<T> arange(int64_t start, int64_t end, int64_t step, std::optional<int64_t> cap = std::nullopt) {
     std::vector<T> result;
     for (int el : xt::arange<int64_t>(start, end, step)) {
+        int capped_el = cap ? el % *cap : el;
         if constexpr (std::is_same_v<T, ::bfloat16>) {
-            result.push_back(T(static_cast<float>(el)));
+            result.push_back(T(static_cast<float>(capped_el)));
         } else {
-            result.push_back(static_cast<T>(el));
+            result.push_back(static_cast<T>(capped_el));
         }
     }
     return result;
@@ -50,7 +61,7 @@ std::vector<T> arange(int64_t start, int64_t end, int64_t step) {
 template <typename T>
 class VectorConversionTest : public ::testing::Test {};
 
-using TestTypes = ::testing::Types<float, bfloat16, uint32_t, int32_t>;
+using TestTypes = ::testing::Types<float, bfloat16, uint8_t, uint16_t, uint32_t, int32_t>;
 TYPED_TEST_SUITE(VectorConversionTest, TestTypes);
 
 TYPED_TEST(VectorConversionTest, Roundtrip) {
@@ -74,21 +85,17 @@ TYPED_TEST(VectorConversionTest, RoundtripTilezedLayout) {
     ttnn::SimpleShape shape{128, 128};
 
     auto input = arange<TypeParam>(0, shape.volume(), 1);
-    // TODO: Support this.
-    EXPECT_ANY_THROW(
-        Tensor::from_vector(input, get_tensor_spec(shape, convert_to_data_type<TypeParam>(), Layout::TILE)));
 
-    auto output = Tensor::from_vector(input, get_tensor_spec(shape, convert_to_data_type<TypeParam>()))
-                      .to(Layout::TILE)
+    auto output = Tensor::from_vector(input, get_tensor_spec(shape, convert_to_data_type<TypeParam>(), Layout::TILE))
                       .template to_vector<TypeParam>();
+
     EXPECT_THAT(output, Pointwise(Eq(), input));
 }
 
 TYPED_TEST(VectorConversionTest, InvalidDtype) {
     ttnn::SimpleShape shape{32, 32};
-    auto input = arange<TypeParam>(0, 42, 1);
+    auto input = arange<TypeParam>(0, shape.volume(), 1);
 
-    ASSERT_NE(input.size(), shape.volume());
     EXPECT_ANY_THROW(Tensor::from_vector(
         input,
         get_tensor_spec(
@@ -97,7 +104,7 @@ TYPED_TEST(VectorConversionTest, InvalidDtype) {
             (std::is_same_v<TypeParam, int32_t> ? DataType::FLOAT32 : DataType::INT32))));
 }
 
-TEST(FloatVectorConversionTest, RoundtripBfloat16Representation) {
+TEST(FloatVectorConversionTest, RoundtripBfloat16) {
     for (const auto& shape : get_shapes_for_test()) {
         auto input_bf16 = arange<bfloat16>(0, static_cast<int64_t>(shape.volume()), 1);
         std::vector<float> input_ft;
@@ -115,5 +122,69 @@ TEST(FloatVectorConversionTest, RoundtripBfloat16Representation) {
     }
 }
 
+class BlockFloatVectorConversionTest : public ::testing::TestWithParam<DataType> {};
+
+TEST_P(BlockFloatVectorConversionTest, InvalidLayout) {
+    ttnn::SimpleShape shape{32, 32};
+    // Block float types are only supported in TILE layout.
+    EXPECT_ANY_THROW(
+        Tensor::from_vector(std::vector<float>(shape.volume()), get_tensor_spec(shape, GetParam(), Layout::ROW_MAJOR)));
+}
+
+TEST_P(BlockFloatVectorConversionTest, Roundtrip) {
+    ttnn::SimpleShape shape{32, 32};
+    std::vector<float> input = arange<float>(0, shape.volume(), 1, /*cap=*/32);
+
+    auto output = Tensor::from_vector(input, get_tensor_spec(shape, GetParam(), Layout::TILE)).to_vector<float>();
+    EXPECT_THAT(output, Pointwise(FloatNear(4.0f), input));
+}
+
+TEST_P(BlockFloatVectorConversionTest, RoundtripWithPadding) {
+    ttnn::SimpleShape shape{14, 47};
+    std::vector<float> input = arange<float>(0, shape.volume(), 1, /*cap=*/32);
+
+    auto output = Tensor::from_vector(input, get_tensor_spec(shape, GetParam(), Layout::TILE));
+
+    EXPECT_THAT(output.get_logical_shape(), ShapeIs(14, 47));
+    EXPECT_THAT(output.get_padded_shape(), ShapeIs(32, 64));
+
+    EXPECT_THAT(output.to_vector<float>(), Pointwise(FloatNear(4.0f), input));
+}
+
+TEST_P(BlockFloatVectorConversionTest, RoundtripWithPaddingAndCustomTile) {
+    ttnn::SimpleShape shape{14, 47};
+    std::vector<float> input = arange<float>(0, shape.volume(), 1, /*cap=*/32);
+
+    TensorSpec spec(shape, TensorLayout(GetParam(), PageConfig(Layout::TILE, Tile({16, 16})), MemoryConfig{}));
+    auto output = Tensor::from_vector(input, spec);
+
+    EXPECT_THAT(output.get_logical_shape(), ShapeIs(14, 47));
+    EXPECT_THAT(output.get_padded_shape(), ShapeIs(16, 48));
+
+    EXPECT_THAT(output.to_vector<float>(), Pointwise(FloatNear(4.0f), input));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    BlockFloatVectorConversionTest,
+    BlockFloatVectorConversionTest,
+    ::testing::Values(DataType::BFLOAT4_B, DataType::BFLOAT8_B));
+
+using DeviceVectorConversionTest = TTNNFixtureWithDevice;
+
+TEST_F(DeviceVectorConversionTest, RoundtripWithMemoryConfig) {
+    ttnn::SimpleShape shape{128, 128};
+
+    auto input = arange<float>(0, shape.volume(), 1);
+
+    TensorSpec spec(
+        shape, TensorLayout(DataType::FLOAT32, Layout::ROW_MAJOR, MemoryConfig{.buffer_type = BufferType::L1}));
+    auto output = Tensor::from_vector(input, spec, device_);
+
+    EXPECT_TRUE(is_device_tensor(output));
+    EXPECT_TRUE(output.memory_config().is_l1());
+
+    EXPECT_THAT(output.to_vector<float>(), Pointwise(Eq(), input));
+}
+
 }  // namespace
 }  // namespace ttnn
diff --git a/tt_metal/common/bfloat4.hpp b/tt_metal/common/bfloat4.hpp
@@ -10,15 +10,16 @@
 #include <immintrin.h>
 
 #include "tt_metal/common/assert.hpp"
+#include "tt_metal/common/blockfloat_common.hpp"
 #include "tt_metal/common/tt_backend_api_types.hpp"
+#include "tt_metal/tt_stl/span.hpp"
 #include "tracy/Tracy.hpp"
-#include "blockfloat_common.hpp"
 
 // TODO: empty struct to facilitate Tensor template logic. Reconsider how/why templating is supported in Tensor
 struct bfloat4_b {};
 
 inline std::vector<uint32_t> pack_fp32_vec_as_bfp4_tiles(
-    const std::vector<float>& fp32_vec,
+    tt::stl::Span<const float> fp32_vec,
     bool row_major_input,
     bool is_exp_a,
     const std::optional<tt::tt_metal::Tile>& tile = std::nullopt) {

diff --git a/tt_metal/common/bfloat8.hpp b/tt_metal/common/bfloat8.hpp
@@ -10,9 +10,10 @@
 #include <immintrin.h>
 
 #include "tt_metal/common/assert.hpp"
+#include "tt_metal/common/blockfloat_common.hpp"
 #include "tt_metal/common/tt_backend_api_types.hpp"
+#include "tt_metal/tt_stl/span.hpp"
 #include "tracy/Tracy.hpp"
-#include "blockfloat_common.hpp"
 
 // TODO: empty struct to facilitate Tensor template logic. Reconsider how/why templating is supported in Tensor
 struct bfloat8_b {};
@@ -99,7 +100,7 @@ inline uint32_t create_packed_bfp8_packed_as_u32(
 }
 
 inline std::vector<uint32_t> pack_fp32_vec_as_bfp8_tiles(
-    const std::vector<float>& fp32_vec,
+    tt::stl::Span<const float> fp32_vec,
     bool row_major_input,
     bool is_exp_a,
     const std::optional<tt::tt_metal::Tile>& tile = std::nullopt) {

diff --git a/tt_metal/common/blockfloat_common.hpp b/tt_metal/common/blockfloat_common.hpp
@@ -13,6 +13,7 @@
 #include "tt_metal/common/tt_backend_api_types.hpp"
 #include "tracy/Tracy.hpp"
 #include "tt_metal/impl/tile/tile.hpp"
+#include "tt_metal/tt_stl/span.hpp"
 
 inline uint8_t get_max_exp(const std::vector<uint32_t>& vec, bool is_exp_a) {
     TT_ASSERT(vec.size() == 16);
@@ -288,7 +289,7 @@ inline uint32_t create_packed_bfp_packed_as_u32(
 
 template <tt::DataFormat BfpFormat>
 inline std::vector<uint32_t> pack_fp32_vec_as_bfp_tiles(
-    const std::vector<float>& fp32_vec,
+    tt::stl::Span<const float> fp32_vec,
     bool row_major_input,
     bool is_exp_a,
     const std::optional<tt::tt_metal::Tile>& tile = std::nullopt) {
@@ -344,7 +345,7 @@ inline std::vector<uint32_t> pack_fp32_vec_as_bfp_tiles(
                         } else {
                             data_index = fp32_element_index++;
                         }
-                        float float_num = fp32_vec.at(data_index);
+                        float float_num = fp32_vec[data_index];
                         uint32_t uint32_num = *reinterpret_cast<uint32_t*>(&float_num);
                         single_row.push_back(uint32_num);
                     }