From 23b8d7f9443d92a789ae6cbb925dcae074382f5f Mon Sep 17 00:00:00 2001 From: Oleg Milyutin Date: Tue, 3 Dec 2024 17:25:43 +0000 Subject: [PATCH] Removed the use of creation numpy creation functions --- tests/tt_eager/ops/test_bcast_op.cpp | 18 ++-- tests/tt_eager/ops/test_bmm_op.cpp | 18 ++-- .../tensors/test_async_tensor_apis.cpp | 86 ++++++++++--------- tests/tt_eager/tensors/test_copy_and_move.cpp | 11 +-- .../tt_metal/common/dispatch_fixture.hpp | 2 +- .../unit_tests/gtests/test_async_runtime.cpp | 56 ++++++------ ttnn/cpp/ttnn/operations/numpy/functions.hpp | 43 ---------- 7 files changed, 92 insertions(+), 142 deletions(-) diff --git a/tests/tt_eager/ops/test_bcast_op.cpp b/tests/tt_eager/ops/test_bcast_op.cpp index a761c1eba51..05be3303c06 100644 --- a/tests/tt_eager/ops/test_bcast_op.cpp +++ b/tests/tt_eager/ops/test_bcast_op.cpp @@ -3,16 +3,13 @@ // SPDX-License-Identifier: Apache-2.0 #include "tt_metal/host_api.hpp" +#include "ttnn/cpp/ttnn/operations/creation.hpp" #include "ttnn/tensor/tensor.hpp" #include "ttnn/operations/data_movement/bcast/bcast.hpp" #include "common/constants.hpp" #include #include -#include -#include -#include - using namespace tt; using namespace tt_metal; using namespace constants; @@ -53,9 +50,8 @@ int main(int argc, char** argv) { } Tensor a = ttnn::numpy::random::random(input_shape_a).to(Layout::TILE).to(device); - Tensor b = ttnn::numpy::zeros({1, 1, TILE_HEIGHT, TILE_WIDTH}, DataType::BFLOAT16) - .to(Layout::TILE) - .to(device); + Tensor b = ttnn::zeros( + ttnn::Shape({1, 1, TILE_HEIGHT, TILE_WIDTH}), DataType::BFLOAT16, Layout::TILE, *device); for (auto bcast_math : magic_enum::enum_values()) { Tensor c = ttnn::bcast(0, a, b, bcast_math, bcast_dim); @@ -72,28 +68,28 @@ int main(int argc, char** argv) { { Tensor a = ttnn::numpy::random::random({1, 1, 32, 4544}).to(Layout::TILE).to(device); - Tensor b = ttnn::numpy::zeros({1, 1, 32, 4544}, DataType::BFLOAT16).to(Layout::TILE).to(device); + Tensor b = ttnn::zeros(ttnn::Shape({1, 1, 32, 4544}), DataType::BFLOAT16, Layout::TILE, *device); Tensor c = ttnn::bcast(0, a, b, ttnn::BcastOpMath::MUL, ttnn::BcastOpDim::H); Tensor d = c.cpu(); } { Tensor a = ttnn::numpy::random::random({1, 1, 32, 4544}).to(Layout::TILE).to(device); - Tensor b = ttnn::numpy::zeros({1, 1, 32, 4544}, DataType::BFLOAT16).to(Layout::TILE).to(device); + Tensor b = ttnn::zeros(ttnn::Shape({1, 1, 32, 4544}), DataType::BFLOAT16, Layout::TILE, *device); Tensor c = ttnn::bcast(0, a, b, ttnn::BcastOpMath::ADD, ttnn::BcastOpDim::H); Tensor d = c.cpu(); } { Tensor a = ttnn::numpy::random::random({1, 71, 32, 32}).to(Layout::TILE).to(device); - Tensor b = ttnn::numpy::zeros({1, 1, 32, 32}, DataType::BFLOAT16).to(Layout::TILE).to(device); + Tensor b = ttnn::zeros(ttnn::Shape({1, 1, 32, 32}), DataType::BFLOAT16, Layout::TILE, *device); Tensor c = ttnn::bcast(0, a, b, ttnn::BcastOpMath::MUL, ttnn::BcastOpDim::HW); Tensor d = c.cpu(); } { Tensor a = ttnn::numpy::random::random({1, 71, 32, 64}).to(Layout::TILE).to(device); - Tensor b = ttnn::numpy::zeros({1, 1, 32, 32}, DataType::BFLOAT16).to(Layout::TILE).to(device); + Tensor b = ttnn::zeros(ttnn::Shape({1, 1, 32, 32}), DataType::BFLOAT16, Layout::TILE, *device); Tensor c = ttnn::bcast(0, a, b, ttnn::BcastOpMath::MUL, ttnn::BcastOpDim::HW); Tensor d = c.cpu(); } diff --git a/tests/tt_eager/ops/test_bmm_op.cpp b/tests/tt_eager/ops/test_bmm_op.cpp index c7760e67354..f769870b595 100644 --- a/tests/tt_eager/ops/test_bmm_op.cpp +++ b/tests/tt_eager/ops/test_bmm_op.cpp @@ -3,15 +3,13 @@ // SPDX-License-Identifier: Apache-2.0 #include "tt_metal/host_api.hpp" +#include "ttnn/cpp/ttnn/operations/creation.hpp" #include "ttnn/tensor/tensor.hpp" +#include "ttnn/tensor/types.hpp" #include "ttnn/operations/matmul/device/matmul_op.hpp" #include "common/constants.hpp" #include "ttnn/operations/numpy/functions.hpp" -#include -#include -#include - using namespace tt; using namespace tt_metal; using namespace constants; @@ -37,14 +35,14 @@ int main(int argc, char** argv) { uint32_t Kt = 2; uint32_t Nt = 4; uint32_t B = 5; - tt::tt_metal::LegacyShape shapea = {B, 1, Mt * TILE_HEIGHT, Kt * TILE_WIDTH}; - tt::tt_metal::LegacyShape shapeb = {B, 1, Kt * TILE_HEIGHT, Nt * TILE_WIDTH}; - tt::tt_metal::LegacyShape shapeb1 = {1, 1, Kt * TILE_HEIGHT, Nt * TILE_WIDTH}; + ttnn::Shape shapea({B, 1, Mt * TILE_HEIGHT, Kt * TILE_WIDTH}); + ttnn::Shape shapeb({B, 1, Kt * TILE_HEIGHT, Nt * TILE_WIDTH}); + ttnn::Shape shapeb1({1, 1, Kt * TILE_HEIGHT, Nt * TILE_WIDTH}); // Allocates a DRAM buffer on device populated with values specified by initialize - Tensor a = ttnn::numpy::random::random(shapea).to(Layout::TILE).to(device); - Tensor b = ttnn::numpy::zeros(shapeb, DataType::BFLOAT16).to(Layout::TILE).to(device); - Tensor b1 = ttnn::numpy::zeros(shapeb1, DataType::BFLOAT16).to(Layout::TILE).to(device); + Tensor a = ttnn::numpy::random::random(shapea.value).to(Layout::TILE).to(device); + Tensor b = ttnn::zeros(shapeb, DataType::BFLOAT16, Layout::TILE, *device); + Tensor b1 = ttnn::zeros(shapeb1, DataType::BFLOAT16, Layout::TILE, *device); Tensor mm = ttnn::operations::matmul::matmul( a, diff --git a/tests/tt_eager/tensors/test_async_tensor_apis.cpp b/tests/tt_eager/tensors/test_async_tensor_apis.cpp index b762f10acf7..7641dab1450 100644 --- a/tests/tt_eager/tensors/test_async_tensor_apis.cpp +++ b/tests/tt_eager/tensors/test_async_tensor_apis.cpp @@ -2,13 +2,11 @@ // // SPDX-License-Identifier: Apache-2.0 -#include #include -#include -#include #include "common/bfloat16.hpp" #include "common/constants.hpp" +#include "ttnn/cpp/ttnn/operations/creation.hpp" #include "ttnn/tensor/host_buffer/functions.hpp" #include "ttnn/tensor/host_buffer/types.hpp" #include "ttnn/tensor/tensor.hpp" @@ -21,11 +19,12 @@ #include "ttnn/operations/eltwise/binary/binary.hpp" #include "ttnn/operations/eltwise/unary/unary.hpp" -using namespace tt; -using namespace tt_metal; -using namespace constants; - +namespace tt::tt_metal { namespace { + +using ::tt::constants::TILE_HEIGHT; +using ::tt::constants::TILE_WIDTH; + uint32_t get_device_buffer_address(const Tensor& tensor) { TT_FATAL(std::holds_alternative(tensor.get_storage()), "Tensor storage is not DeviceStorage"); auto buffer = std::get(tensor.get_storage()).buffer; @@ -33,7 +32,6 @@ uint32_t get_device_buffer_address(const Tensor& tensor) { buffer->device()->push_work([&]() { result = buffer->address(); }, true); return result; } -} // namespace TEST_F(DispatchFixture, TestTensorOwnershipSanity) { // Sanity test tensor read, write and update paths with synchronous @@ -122,18 +120,12 @@ TEST_F(DispatchFixture, TestAsyncEltwiseBinary) { for (int i = 0; i < 5; i++) { // Initialize tensors and move them to DRAM - Tensor input_tensor_a = - ttnn::numpy::full( - tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast(i), DataType::BFLOAT16, Layout::TILE) - .to(device); - Tensor input_tensor_b = - ttnn::numpy::full( - tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast(i), DataType::BFLOAT16, Layout::TILE) - .to(device); - Tensor input_tensor_c = - ttnn::numpy::full( - tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast(i), DataType::BFLOAT16, Layout::TILE) - .to(device); + Tensor input_tensor_a = ttnn::full( + ttnn::Shape({1, 1, 1024, 1024}), static_cast(i), DataType::BFLOAT16, Layout::TILE, *device); + Tensor input_tensor_b = ttnn::full( + ttnn::Shape({1, 1, 1024, 1024}), static_cast(i), DataType::BFLOAT16, Layout::TILE, *device); + Tensor input_tensor_c = ttnn::full( + ttnn::Shape({1, 1, 1024, 1024}), static_cast(i), DataType::BFLOAT16, Layout::TILE, *device); Tensor output_tensor_device = ttnn::multiply(ttnn::add(input_tensor_a, input_tensor_b), input_tensor_c); Tensor output_tensor_device_2 = ttnn::neg(ttnn::subtract(output_tensor_device, input_tensor_c)); @@ -181,12 +173,18 @@ TEST_F(DispatchFixture, TestAsyncRefCountManager) { for (int i = 0; i < 5; i++) { // Run for multiple loops to ensure deterministic behaviour with device addresses // Initialize 2 tensors on device - Tensor tensor1 = ttnn::numpy::full( - tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast(i), DataType::BFLOAT16) - .to(device); - Tensor tensor2 = ttnn::numpy::full( - tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast(i), DataType::BFLOAT16) - .to(device); + Tensor tensor1 = ttnn::full( + ttnn::Shape({1, 1, 1024, 1024}), + static_cast(i), + DataType::BFLOAT16, + /*layout=*/std::nullopt, + *device); + Tensor tensor2 = ttnn::full( + ttnn::Shape({1, 1, 1024, 1024}), + static_cast(i), + DataType::BFLOAT16, + /*layout=*/std::nullopt, + *device); uint32_t tensor2_device_buf_addr = get_device_buffer_address(tensor2); // Assign tensor1 to tensor2 and ensure that ref counts are appropriately updated with the buffer for tensor2 // deallocated @@ -195,18 +193,23 @@ TEST_F(DispatchFixture, TestAsyncRefCountManager) { EXPECT_EQ(tensor1.tensor_attributes->main_thread_ref_count, 2); // To check if tensor2 is deallocated, create a third tensor on device and ensure that its address matches the // prev addr for tensor2 - Tensor tensor3 = ttnn::numpy::full( - tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast(i), DataType::BFLOAT16) - .to(device); + Tensor tensor3 = ttnn::full( + ttnn::Shape({1, 1, 1024, 1024}), + static_cast(i), + DataType::BFLOAT16, + /*layout=*/std::nullopt, + *device); EXPECT_EQ(get_device_buffer_address(tensor3), tensor2_device_buf_addr); EXPECT_EQ(get_device_buffer_address(tensor1), get_device_buffer_address(tensor2)); } log_info(LogTest, "Testing Device tensor self-assignment through function"); for (int i = 0; i < 5; i++) { - Tensor device_tensor = - ttnn::numpy::full( - tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast(i), DataType::BFLOAT16) - .to(device); + Tensor device_tensor = ttnn::full( + ttnn::Shape({1, 1, 1024, 1024}), + static_cast(i), + DataType::BFLOAT16, + /*layout=*/std::nullopt, + *device); uint32_t device_tensor_address = get_device_buffer_address(device_tensor); // This step will copy the tensor to a temp rval and std::move it back to the caller's instance of device_tensor // Ensure ref count and address remain unchanged @@ -217,18 +220,19 @@ TEST_F(DispatchFixture, TestAsyncRefCountManager) { log_info(LogTest, "Testing Device tensor move assignment"); for (int i = 0; i < 5; i++) { - Tensor tensor1 = ttnn::numpy::full( - tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast(i), DataType::BFLOAT16) - .to(device); + Tensor tensor1 = ttnn::full( + ttnn::Shape({1, 1, 1024, 1024}), + static_cast(i), + DataType::BFLOAT16, + /*layout=*/std::nullopt, + *device); Tensor tensor2 = std::move(tensor1); EXPECT_EQ(tensor2.tensor_attributes->main_thread_ref_count, 1); } log_info(LogTest, "Testing Device tensor self-assignment"); - Tensor tensor_to_self_assign = - ttnn::numpy::full( - tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast(0), DataType::BFLOAT16) - .to(device); + Tensor tensor_to_self_assign = ttnn::full( + ttnn::Shape({1, 1, 1024, 1024}), static_cast(0), DataType::BFLOAT16, /*layout=*/std::nullopt, *device); uint32_t tensor_to_self_assign_address = get_device_buffer_address(tensor_to_self_assign); tensor_to_self_assign = tensor_to_self_assign; EXPECT_EQ(tensor_to_self_assign.tensor_attributes->main_thread_ref_count, 1); @@ -338,3 +342,5 @@ TEST_F(DispatchFixture, TestTensorAsyncDataMovement) { EXPECT_EQ(readback_tensor.get_layout(), Layout::ROW_MAJOR); EXPECT_EQ(readback_tensor.get_shape(), ttnn::Shape(tt::tt_metal::LegacyShape({1, 1, 32, tensor_stop / 32}))); } +} // namespace +} // namespace tt::tt_metal diff --git a/tests/tt_eager/tensors/test_copy_and_move.cpp b/tests/tt_eager/tensors/test_copy_and_move.cpp index f735791ad4c..0129a039829 100644 --- a/tests/tt_eager/tensors/test_copy_and_move.cpp +++ b/tests/tt_eager/tensors/test_copy_and_move.cpp @@ -2,12 +2,9 @@ // // SPDX-License-Identifier: Apache-2.0 -#include -#include -#include - #include "common/bfloat16.hpp" #include "common/constants.hpp" +#include "ttnn/cpp/ttnn/operations/creation.hpp" #include "ttnn/tensor/host_buffer/functions.hpp" #include "ttnn/tensor/host_buffer/types.hpp" #include "ttnn/tensor/tensor.hpp" @@ -58,7 +55,7 @@ bool test_tensor_copy_semantics(Device* device) { pass &= dev_a_data == host_d_copy_data; // dev tensor updated with host tensor copy assignment - Tensor host_e = ttnn::numpy::ones(single_tile_shape).to(Layout::TILE); + Tensor host_e = ttnn::ones(single_tile_shape, DataType::BFLOAT16, Layout::TILE); Tensor dev_e_copy = ttnn::numpy::random::random(single_tile_shape).to(Layout::TILE).to(device); dev_e_copy = host_e; pass &= (dev_e_copy.storage_type() == StorageType::OWNED); @@ -67,8 +64,8 @@ bool test_tensor_copy_semantics(Device* device) { pass &= host_e_data == dev_e_copy_data; // dev tensor updated with dev tensor copy assignment - Tensor dev_b = ttnn::numpy::ones(single_tile_shape).to(Layout::TILE).to(device); - Tensor dev_b_copy = ttnn::numpy::zeros(single_tile_shape).to(Layout::TILE).to(device); + Tensor dev_b = ttnn::ones(single_tile_shape, DataType::BFLOAT16, Layout::TILE, *device); + Tensor dev_b_copy = ttnn::zeros(single_tile_shape, DataType::BFLOAT16, Layout::TILE, *device); dev_b_copy = dev_b; pass &= (dev_b_copy.storage_type() == StorageType::DEVICE); auto dev_b_on_host = dev_b.cpu(); diff --git a/tests/tt_metal/tt_metal/common/dispatch_fixture.hpp b/tests/tt_metal/tt_metal/common/dispatch_fixture.hpp index 7656ac8c147..57bfbcdb934 100644 --- a/tests/tt_metal/tt_metal/common/dispatch_fixture.hpp +++ b/tests/tt_metal/tt_metal/common/dispatch_fixture.hpp @@ -46,7 +46,7 @@ class DispatchFixture : public ::testing::Test { } void ReadBuffer( tt::tt_metal::Device* device, - std::shared_ptr out_buffer, + const std::shared_ptr& out_buffer, std::vector& dst_vec) { if (this->slow_dispatch_) { tt::tt_metal::detail::ReadFromBuffer(out_buffer, dst_vec); diff --git a/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp b/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp index 7e1ab23115e..b5495a324db 100644 --- a/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp +++ b/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp @@ -2,6 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 +#include "ttnn/cpp/ttnn/operations/creation.hpp" #include "ttnn/tensor/tensor.hpp" #include "ttnn/tensor/layout/tensor_layout.hpp" #include "ttnn_multi_command_queue_fixture.hpp" @@ -10,14 +11,13 @@ #include "ttnn/operations/moreh/moreh_sum/moreh_sum.hpp" #include "common/bfloat16.hpp" #include "ttnn/async_runtime.hpp" -#include "ttnn/operations/numpy/functions.hpp" #include "tt_metal/impl/event/event.hpp" #include -using namespace tt; -using namespace tt_metal; -using MultiCommandQueueSingleDeviceFixture = ttnn::MultiCommandQueueSingleDeviceFixture; -using namespace constants; +namespace tt::tt_metal { +namespace { + +using MultiCommandQueueSingleDeviceFixture = ::ttnn::MultiCommandQueueSingleDeviceFixture; TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) { Device* device = this->device_; @@ -40,16 +40,14 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) { host_data[i] = bfloat16(static_cast(1)); } // Create golden data using tt_eager APIs - Tensor np_tensor = ttnn::numpy::full(input_shape.value, static_cast(1), DataType::BFLOAT16) - .to(Layout::TILE) - .to(device); + Tensor np_tensor = ttnn::full(input_shape, static_cast(1), DataType::BFLOAT16, Layout::TILE, *device_); ttnn::SmallVector reduce_dims = {3}; Tensor np_out = ttnn::moreh_sum(np_tensor, reduce_dims, false, std::nullopt, std::nullopt, std::nullopt); Tensor np_out_host = np_out.cpu(); const bfloat16* golden_output = std::get>(std::get(np_out_host.get_storage()).buffer).begin(); // Enable Asynchronous Execution and test ttnn runtime APIs - device->enable_async(true); + device_->enable_async(true); // Events for host - device synchronization auto write_event = std::make_shared(); auto workload_event = std::make_shared(); @@ -63,9 +61,9 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) { output_buf_size_datums * datum_size_bytes, tensor_layout.compute_packed_buffer_size_bytes(np_out.get_padded_shape())); auto input_buffer = tt::tt_metal::tensor_impl::allocate_buffer_on_device( - device, TensorSpec(input_shape.padded_shape(), tensor_layout)); + device_, TensorSpec(input_shape.padded_shape(), tensor_layout)); auto output_buffer = tt::tt_metal::tensor_impl::allocate_buffer_on_device( - device, TensorSpec(np_out.get_padded_shape(), tensor_layout)); + device_, TensorSpec(np_out.get_padded_shape(), tensor_layout)); auto input_storage = tt::tt_metal::DeviceStorage{input_buffer}; auto output_storage = tt::tt_metal::DeviceStorage{output_buffer}; Tensor input_tensor = Tensor(input_storage, input_shape, DataType::BFLOAT16, Layout::TILE); @@ -73,13 +71,13 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) { // Populate input_tensor with data ttnn::write_buffer(io_cq, input_tensor, {host_data}); // Record the completion of the write event - ttnn::record_event(device->command_queue(io_cq), write_event); + ttnn::record_event(device_->command_queue(io_cq), write_event); // Host stalls until write is completed, before sending workload ttnn::event_synchronize(write_event); // Dispatch workload. Preallocated output_tensor is populated by op/ ttnn::moreh_sum(input_tensor, /*dim*/ 3, false, output_tensor, std::nullopt, std::nullopt); // Record completion of workload - ttnn::record_event(device->command_queue(workload_dispatch_cq), workload_event); + ttnn::record_event(device_->command_queue(workload_dispatch_cq), workload_event); ttnn::event_synchronize(workload_event); // Read output back, once workload is complete ttnn::read_buffer(io_cq, output_tensor, {readback_data}); @@ -93,7 +91,7 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) { // Deallocate tensors (tensor gives up buffer). Done asynchronously, so sync on queue after. input_tensor.deallocate(); output_tensor.deallocate(); - ttnn::queue_synchronize(device->command_queue(io_cq)); + ttnn::queue_synchronize(device_->command_queue(io_cq)); // Buffer only has 2 owners in main thread. EXPECT_EQ(input_buffer.use_count(), 2); EXPECT_EQ(output_buffer.use_count(), 2); @@ -103,8 +101,7 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) { } TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncRuntimeAllocatedBuffers) { - Device* device = this->device_; - device->enable_async(true); + device_->enable_async(true); MemoryConfig mem_cfg = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED, .buffer_type = BufferType::DRAM, @@ -131,26 +128,26 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncRuntimeAllocatedBuffers) { TensorLayout tensor_layout(DataType::BFLOAT16, PageConfig(Layout::TILE), mem_cfg); ASSERT_EQ(buf_size_datums * datum_size_bytes, tensor_layout.compute_packed_buffer_size_bytes(shape)); auto input_buffer = - tt::tt_metal::tensor_impl::allocate_buffer_on_device(device, TensorSpec(shape, tensor_layout)); + tt::tt_metal::tensor_impl::allocate_buffer_on_device(device_, TensorSpec(shape, tensor_layout)); auto input_storage = tt::tt_metal::DeviceStorage{input_buffer}; Tensor input_tensor = Tensor(input_storage, shape, DataType::BFLOAT16, Layout::TILE); - ttnn::write_buffer(io_cq, input_tensor, {host_data}); // Write using cq 1 - ttnn::record_event(device->command_queue(io_cq), write_event); // Record write on cq 1 + ttnn::write_buffer(io_cq, input_tensor, {host_data}); // Write using cq 1 + ttnn::record_event(device_->command_queue(io_cq), write_event); // Record write on cq 1 // Wait until cq 1 write is complete - ttnn::wait_for_event(device->command_queue(workload_dispatch_cq), write_event); + ttnn::wait_for_event(device_->command_queue(workload_dispatch_cq), write_event); // Run operation on cq 0 Tensor output_tensor = ttnn::sqrt(workload_dispatch_cq, input_tensor); auto dummy_buffer_0 = - tt::tt_metal::tensor_impl::allocate_buffer_on_device(device, TensorSpec(shape, tensor_layout)); + tt::tt_metal::tensor_impl::allocate_buffer_on_device(device_, TensorSpec(shape, tensor_layout)); output_tensor = ttnn::neg(workload_dispatch_cq, output_tensor); // Allocate this buffer to stress test async allocation across op execution and explicit allocation auto dummy_buffer_1 = - tt::tt_metal::tensor_impl::allocate_buffer_on_device(device, TensorSpec(shape, tensor_layout)); + tt::tt_metal::tensor_impl::allocate_buffer_on_device(device_, TensorSpec(shape, tensor_layout)); // Record cq 0 prog execution - ttnn::record_event(device->command_queue(workload_dispatch_cq), workload_event); + ttnn::record_event(device_->command_queue(workload_dispatch_cq), workload_event); // Wait until cq 0 prog execution is done - ttnn::wait_for_event(device->command_queue(io_cq), workload_event); + ttnn::wait_for_event(device_->command_queue(io_cq), workload_event); // Read using cq 1 ttnn::read_buffer(io_cq, output_tensor, {readback_data}); for (int i = 0; i < buf_size_datums; i++) { @@ -166,8 +163,7 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncRuntimeBufferDestructor) { // Test functionality for the buffer destructor, which will call deallocate asynchronously // We must ensure that the deallocate step, which can run after the buffer has been destroyed // does not rely on stale buffer state, after the buffer has been destroyed on host - Device* device = this->device_; - device->enable_async(true); + device_->enable_async(true); MemoryConfig mem_cfg = MemoryConfig{ .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED, .buffer_type = BufferType::DRAM, @@ -182,9 +178,9 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncRuntimeBufferDestructor) { TensorLayout tensor_layout(DataType::BFLOAT16, PageConfig(Layout::TILE), mem_cfg); TensorSpec tensor_spec(shape, tensor_layout); for (int loop = 0; loop < 100000; loop++) { - { - auto input_buffer_dummy = tt::tt_metal::tensor_impl::allocate_buffer_on_device(device, tensor_spec); - device->synchronize(); - } + auto input_buffer_dummy = tt::tt_metal::tensor_impl::allocate_buffer_on_device(device_, tensor_spec); + device_->synchronize(); } } +} // namespace +} // namespace tt::tt_metal diff --git a/ttnn/cpp/ttnn/operations/numpy/functions.hpp b/ttnn/cpp/ttnn/operations/numpy/functions.hpp index 31f1ec32efe..8fa40f0c4e6 100644 --- a/ttnn/cpp/ttnn/operations/numpy/functions.hpp +++ b/ttnn/cpp/ttnn/operations/numpy/functions.hpp @@ -134,49 +134,6 @@ static Tensor full_impl( } } -// TODO: #14974 - Can this be deleted, as it is only used in tests? -template -static Tensor full( - const tt::tt_metal::LegacyShape& shape, - const T value, - const DataType data_type, - const Layout layout = Layout::ROW_MAJOR, - Device* device = nullptr, - const MemoryConfig& output_mem_config = MemoryConfig{ - .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - return full_impl( - ttnn::DefaultQueueId, - shape, - value, - data_type, - layout, - device ? std::vector{device} : std::vector{}, - output_mem_config, - std::nullopt); -} - -// TODO: #14974 - Can this be deleted, as it is only used in tests? -static Tensor zeros( - const tt::tt_metal::LegacyShape& shape, - const DataType data_type = DataType::BFLOAT16, - const Layout layout = Layout::ROW_MAJOR, - Device* device = nullptr, - const MemoryConfig& output_mem_config = MemoryConfig{ - .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - return full(shape, 0.0f, data_type, layout, device, output_mem_config); -} - -// TODO: #14974 - Can this be deleted, as it is only used in tests? -static Tensor ones( - const tt::tt_metal::LegacyShape& shape, - const DataType data_type = DataType::BFLOAT16, - const Layout layout = Layout::ROW_MAJOR, - Device* device = nullptr, - const MemoryConfig& output_mem_config = MemoryConfig{ - .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) { - return full(shape, 1.0f, data_type, layout, device, output_mem_config); -} - template static Tensor arange( const int64_t start,