From 23b8d7f9443d92a789ae6cbb925dcae074382f5f Mon Sep 17 00:00:00 2001
From: Oleg Milyutin <omilyutin@tenstorrent.com>
Date: Tue, 3 Dec 2024 17:25:43 +0000
Subject: [PATCH] Removed the use of creation numpy creation functions

---
 tests/tt_eager/ops/test_bcast_op.cpp          | 18 ++--
 tests/tt_eager/ops/test_bmm_op.cpp            | 18 ++--
 .../tensors/test_async_tensor_apis.cpp        | 86 ++++++++++---------
 tests/tt_eager/tensors/test_copy_and_move.cpp | 11 +--
 .../tt_metal/common/dispatch_fixture.hpp      |  2 +-
 .../unit_tests/gtests/test_async_runtime.cpp  | 56 ++++++------
 ttnn/cpp/ttnn/operations/numpy/functions.hpp  | 43 ----------
 7 files changed, 92 insertions(+), 142 deletions(-)
diff --git a/tests/tt_eager/ops/test_bcast_op.cpp b/tests/tt_eager/ops/test_bcast_op.cpp
index a761c1eba51..05be3303c06 100644
--- a/tests/tt_eager/ops/test_bcast_op.cpp
+++ b/tests/tt_eager/ops/test_bcast_op.cpp
@@ -3,16 +3,13 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #include "tt_metal/host_api.hpp"
+#include "ttnn/cpp/ttnn/operations/creation.hpp"
 #include "ttnn/tensor/tensor.hpp"
 #include "ttnn/operations/data_movement/bcast/bcast.hpp"
 #include "common/constants.hpp"
 #include <magic_enum.hpp>
 #include <ttnn/operations/numpy/functions.hpp>
 
-#include <algorithm>
-#include <functional>
-#include <random>
-
 using namespace tt;
 using namespace tt_metal;
 using namespace constants;
@@ -53,9 +50,8 @@ int main(int argc, char** argv) {
                     }
 
                     Tensor a = ttnn::numpy::random::random(input_shape_a).to(Layout::TILE).to(device);
-                    Tensor b = ttnn::numpy::zeros({1, 1, TILE_HEIGHT, TILE_WIDTH}, DataType::BFLOAT16)
-                                   .to(Layout::TILE)
-                                   .to(device);
+                    Tensor b = ttnn::zeros(
+                        ttnn::Shape({1, 1, TILE_HEIGHT, TILE_WIDTH}), DataType::BFLOAT16, Layout::TILE, *device);
 
                     for (auto bcast_math : magic_enum::enum_values<ttnn::BcastOpMath>()) {
                         Tensor c = ttnn::bcast(0, a, b, bcast_math, bcast_dim);
@@ -72,28 +68,28 @@ int main(int argc, char** argv) {
 
             {
                 Tensor a = ttnn::numpy::random::random({1, 1, 32, 4544}).to(Layout::TILE).to(device);
-                Tensor b = ttnn::numpy::zeros({1, 1, 32, 4544}, DataType::BFLOAT16).to(Layout::TILE).to(device);
+                Tensor b = ttnn::zeros(ttnn::Shape({1, 1, 32, 4544}), DataType::BFLOAT16, Layout::TILE, *device);
                 Tensor c = ttnn::bcast(0, a, b, ttnn::BcastOpMath::MUL, ttnn::BcastOpDim::H);
                 Tensor d = c.cpu();
             }
 
             {
                 Tensor a = ttnn::numpy::random::random({1, 1, 32, 4544}).to(Layout::TILE).to(device);
-                Tensor b = ttnn::numpy::zeros({1, 1, 32, 4544}, DataType::BFLOAT16).to(Layout::TILE).to(device);
+                Tensor b = ttnn::zeros(ttnn::Shape({1, 1, 32, 4544}), DataType::BFLOAT16, Layout::TILE, *device);
                 Tensor c = ttnn::bcast(0, a, b, ttnn::BcastOpMath::ADD, ttnn::BcastOpDim::H);
                 Tensor d = c.cpu();
             }
 
             {
                 Tensor a = ttnn::numpy::random::random({1, 71, 32, 32}).to(Layout::TILE).to(device);
-                Tensor b = ttnn::numpy::zeros({1, 1, 32, 32}, DataType::BFLOAT16).to(Layout::TILE).to(device);
+                Tensor b = ttnn::zeros(ttnn::Shape({1, 1, 32, 32}), DataType::BFLOAT16, Layout::TILE, *device);
                 Tensor c = ttnn::bcast(0, a, b, ttnn::BcastOpMath::MUL, ttnn::BcastOpDim::HW);
                 Tensor d = c.cpu();
             }
 
             {
                 Tensor a = ttnn::numpy::random::random({1, 71, 32, 64}).to(Layout::TILE).to(device);
-                Tensor b = ttnn::numpy::zeros({1, 1, 32, 32}, DataType::BFLOAT16).to(Layout::TILE).to(device);
+                Tensor b = ttnn::zeros(ttnn::Shape({1, 1, 32, 32}), DataType::BFLOAT16, Layout::TILE, *device);
                 Tensor c = ttnn::bcast(0, a, b, ttnn::BcastOpMath::MUL, ttnn::BcastOpDim::HW);
                 Tensor d = c.cpu();
             }
diff --git a/tests/tt_eager/ops/test_bmm_op.cpp b/tests/tt_eager/ops/test_bmm_op.cpp
index c7760e67354..f769870b595 100644
--- a/tests/tt_eager/ops/test_bmm_op.cpp
+++ b/tests/tt_eager/ops/test_bmm_op.cpp
@@ -3,15 +3,13 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #include "tt_metal/host_api.hpp"
+#include "ttnn/cpp/ttnn/operations/creation.hpp"
 #include "ttnn/tensor/tensor.hpp"
+#include "ttnn/tensor/types.hpp"
 #include "ttnn/operations/matmul/device/matmul_op.hpp"
 #include "common/constants.hpp"
 #include "ttnn/operations/numpy/functions.hpp"
 
-#include <algorithm>
-#include <functional>
-#include <random>
-
 using namespace tt;
 using namespace tt_metal;
 using namespace constants;
@@ -37,14 +35,14 @@ int main(int argc, char** argv) {
         uint32_t Kt = 2;
         uint32_t Nt = 4;
         uint32_t B = 5;
-        tt::tt_metal::LegacyShape shapea = {B, 1, Mt * TILE_HEIGHT, Kt * TILE_WIDTH};
-        tt::tt_metal::LegacyShape shapeb = {B, 1, Kt * TILE_HEIGHT, Nt * TILE_WIDTH};
-        tt::tt_metal::LegacyShape shapeb1 = {1, 1, Kt * TILE_HEIGHT, Nt * TILE_WIDTH};
+        ttnn::Shape shapea({B, 1, Mt * TILE_HEIGHT, Kt * TILE_WIDTH});
+        ttnn::Shape shapeb({B, 1, Kt * TILE_HEIGHT, Nt * TILE_WIDTH});
+        ttnn::Shape shapeb1({1, 1, Kt * TILE_HEIGHT, Nt * TILE_WIDTH});
 
         // Allocates a DRAM buffer on device populated with values specified by initialize
-        Tensor a = ttnn::numpy::random::random(shapea).to(Layout::TILE).to(device);
-        Tensor b = ttnn::numpy::zeros(shapeb, DataType::BFLOAT16).to(Layout::TILE).to(device);
-        Tensor b1 = ttnn::numpy::zeros(shapeb1, DataType::BFLOAT16).to(Layout::TILE).to(device);
+        Tensor a = ttnn::numpy::random::random(shapea.value).to(Layout::TILE).to(device);
+        Tensor b = ttnn::zeros(shapeb, DataType::BFLOAT16, Layout::TILE, *device);
+        Tensor b1 = ttnn::zeros(shapeb1, DataType::BFLOAT16, Layout::TILE, *device);
 
         Tensor mm = ttnn::operations::matmul::matmul(
                         a,
diff --git a/tests/tt_eager/tensors/test_async_tensor_apis.cpp b/tests/tt_eager/tensors/test_async_tensor_apis.cpp
index b762f10acf7..7641dab1450 100644
--- a/tests/tt_eager/tensors/test_async_tensor_apis.cpp
+++ b/tests/tt_eager/tensors/test_async_tensor_apis.cpp
@@ -2,13 +2,11 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-#include <algorithm>
 #include <chrono>
-#include <functional>
-#include <random>
 
 #include "common/bfloat16.hpp"
 #include "common/constants.hpp"
+#include "ttnn/cpp/ttnn/operations/creation.hpp"
 #include "ttnn/tensor/host_buffer/functions.hpp"
 #include "ttnn/tensor/host_buffer/types.hpp"
 #include "ttnn/tensor/tensor.hpp"
@@ -21,11 +19,12 @@
 #include "ttnn/operations/eltwise/binary/binary.hpp"
 #include "ttnn/operations/eltwise/unary/unary.hpp"
 
-using namespace tt;
-using namespace tt_metal;
-using namespace constants;
-
+namespace tt::tt_metal {
 namespace {
+
+using ::tt::constants::TILE_HEIGHT;
+using ::tt::constants::TILE_WIDTH;
+
 uint32_t get_device_buffer_address(const Tensor& tensor) {
     TT_FATAL(std::holds_alternative<DeviceStorage>(tensor.get_storage()), "Tensor storage is not DeviceStorage");
     auto buffer = std::get<DeviceStorage>(tensor.get_storage()).buffer;
@@ -33,7 +32,6 @@ uint32_t get_device_buffer_address(const Tensor& tensor) {
     buffer->device()->push_work([&]() { result = buffer->address(); }, true);
     return result;
 }
-}  // namespace
 
 TEST_F(DispatchFixture, TestTensorOwnershipSanity) {
     // Sanity test tensor read, write and update paths with synchronous
@@ -122,18 +120,12 @@ TEST_F(DispatchFixture, TestAsyncEltwiseBinary) {
 
     for (int i = 0; i < 5; i++) {
         // Initialize tensors and move them to DRAM
-        Tensor input_tensor_a =
-            ttnn::numpy::full<float>(
-                tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast<float>(i), DataType::BFLOAT16, Layout::TILE)
-                .to(device);
-        Tensor input_tensor_b =
-            ttnn::numpy::full<float>(
-                tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast<float>(i), DataType::BFLOAT16, Layout::TILE)
-                .to(device);
-        Tensor input_tensor_c =
-            ttnn::numpy::full<float>(
-                tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast<float>(i), DataType::BFLOAT16, Layout::TILE)
-                .to(device);
+        Tensor input_tensor_a = ttnn::full(
+            ttnn::Shape({1, 1, 1024, 1024}), static_cast<float>(i), DataType::BFLOAT16, Layout::TILE, *device);
+        Tensor input_tensor_b = ttnn::full(
+            ttnn::Shape({1, 1, 1024, 1024}), static_cast<float>(i), DataType::BFLOAT16, Layout::TILE, *device);
+        Tensor input_tensor_c = ttnn::full(
+            ttnn::Shape({1, 1, 1024, 1024}), static_cast<float>(i), DataType::BFLOAT16, Layout::TILE, *device);
         Tensor output_tensor_device = ttnn::multiply(ttnn::add(input_tensor_a, input_tensor_b), input_tensor_c);
         Tensor output_tensor_device_2 = ttnn::neg(ttnn::subtract(output_tensor_device, input_tensor_c));
 
@@ -181,12 +173,18 @@ TEST_F(DispatchFixture, TestAsyncRefCountManager) {
     for (int i = 0; i < 5; i++) {
         // Run for multiple loops to ensure deterministic behaviour with device addresses
         // Initialize 2 tensors on device
-        Tensor tensor1 = ttnn::numpy::full<float>(
-                             tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast<float>(i), DataType::BFLOAT16)
-                             .to(device);
-        Tensor tensor2 = ttnn::numpy::full<float>(
-                             tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast<float>(i), DataType::BFLOAT16)
-                             .to(device);
+        Tensor tensor1 = ttnn::full(
+            ttnn::Shape({1, 1, 1024, 1024}),
+            static_cast<float>(i),
+            DataType::BFLOAT16,
+            /*layout=*/std::nullopt,
+            *device);
+        Tensor tensor2 = ttnn::full(
+            ttnn::Shape({1, 1, 1024, 1024}),
+            static_cast<float>(i),
+            DataType::BFLOAT16,
+            /*layout=*/std::nullopt,
+            *device);
         uint32_t tensor2_device_buf_addr = get_device_buffer_address(tensor2);
         // Assign tensor1 to tensor2 and ensure that ref counts are appropriately updated with the buffer for tensor2
         // deallocated
@@ -195,18 +193,23 @@ TEST_F(DispatchFixture, TestAsyncRefCountManager) {
         EXPECT_EQ(tensor1.tensor_attributes->main_thread_ref_count, 2);
         // To check if tensor2 is deallocated, create a third tensor on device and ensure that its address matches the
         // prev addr for tensor2
-        Tensor tensor3 = ttnn::numpy::full<float>(
-                             tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast<float>(i), DataType::BFLOAT16)
-                             .to(device);
+        Tensor tensor3 = ttnn::full(
+            ttnn::Shape({1, 1, 1024, 1024}),
+            static_cast<float>(i),
+            DataType::BFLOAT16,
+            /*layout=*/std::nullopt,
+            *device);
         EXPECT_EQ(get_device_buffer_address(tensor3), tensor2_device_buf_addr);
         EXPECT_EQ(get_device_buffer_address(tensor1), get_device_buffer_address(tensor2));
     }
     log_info(LogTest, "Testing Device tensor self-assignment through function");
     for (int i = 0; i < 5; i++) {
-        Tensor device_tensor =
-            ttnn::numpy::full<float>(
-                tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast<float>(i), DataType::BFLOAT16)
-                .to(device);
+        Tensor device_tensor = ttnn::full(
+            ttnn::Shape({1, 1, 1024, 1024}),
+            static_cast<float>(i),
+            DataType::BFLOAT16,
+            /*layout=*/std::nullopt,
+            *device);
         uint32_t device_tensor_address = get_device_buffer_address(device_tensor);
         // This step will copy the tensor to a temp rval and std::move it back to the caller's instance of device_tensor
         // Ensure ref count and address remain unchanged
@@ -217,18 +220,19 @@ TEST_F(DispatchFixture, TestAsyncRefCountManager) {
 
     log_info(LogTest, "Testing Device tensor move assignment");
     for (int i = 0; i < 5; i++) {
-        Tensor tensor1 = ttnn::numpy::full<float>(
-                             tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast<float>(i), DataType::BFLOAT16)
-                             .to(device);
+        Tensor tensor1 = ttnn::full(
+            ttnn::Shape({1, 1, 1024, 1024}),
+            static_cast<float>(i),
+            DataType::BFLOAT16,
+            /*layout=*/std::nullopt,
+            *device);
         Tensor tensor2 = std::move(tensor1);
         EXPECT_EQ(tensor2.tensor_attributes->main_thread_ref_count, 1);
     }
 
     log_info(LogTest, "Testing Device tensor self-assignment");
-    Tensor tensor_to_self_assign =
-        ttnn::numpy::full<float>(
-            tt::tt_metal::LegacyShape({1, 1, 1024, 1024}), static_cast<float>(0), DataType::BFLOAT16)
-            .to(device);
+    Tensor tensor_to_self_assign = ttnn::full(
+        ttnn::Shape({1, 1, 1024, 1024}), static_cast<float>(0), DataType::BFLOAT16, /*layout=*/std::nullopt, *device);
     uint32_t tensor_to_self_assign_address = get_device_buffer_address(tensor_to_self_assign);
     tensor_to_self_assign = tensor_to_self_assign;
     EXPECT_EQ(tensor_to_self_assign.tensor_attributes->main_thread_ref_count, 1);
@@ -338,3 +342,5 @@ TEST_F(DispatchFixture, TestTensorAsyncDataMovement) {
     EXPECT_EQ(readback_tensor.get_layout(), Layout::ROW_MAJOR);
     EXPECT_EQ(readback_tensor.get_shape(), ttnn::Shape(tt::tt_metal::LegacyShape({1, 1, 32, tensor_stop / 32})));
 }
+}  // namespace
+}  // namespace tt::tt_metal
diff --git a/tests/tt_eager/tensors/test_copy_and_move.cpp b/tests/tt_eager/tensors/test_copy_and_move.cpp
index f735791ad4c..0129a039829 100644
--- a/tests/tt_eager/tensors/test_copy_and_move.cpp
+++ b/tests/tt_eager/tensors/test_copy_and_move.cpp
@@ -2,12 +2,9 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-#include <algorithm>
-#include <functional>
-#include <random>
-
 #include "common/bfloat16.hpp"
 #include "common/constants.hpp"
+#include "ttnn/cpp/ttnn/operations/creation.hpp"
 #include "ttnn/tensor/host_buffer/functions.hpp"
 #include "ttnn/tensor/host_buffer/types.hpp"
 #include "ttnn/tensor/tensor.hpp"
@@ -58,7 +55,7 @@ bool test_tensor_copy_semantics(Device* device) {
     pass &= dev_a_data == host_d_copy_data;
 
     // dev tensor updated with host tensor copy assignment
-    Tensor host_e = ttnn::numpy::ones(single_tile_shape).to(Layout::TILE);
+    Tensor host_e = ttnn::ones(single_tile_shape, DataType::BFLOAT16, Layout::TILE);
     Tensor dev_e_copy = ttnn::numpy::random::random(single_tile_shape).to(Layout::TILE).to(device);
     dev_e_copy = host_e;
     pass &= (dev_e_copy.storage_type() == StorageType::OWNED);
@@ -67,8 +64,8 @@ bool test_tensor_copy_semantics(Device* device) {
     pass &= host_e_data == dev_e_copy_data;
 
     // dev tensor updated with dev tensor copy assignment
-    Tensor dev_b = ttnn::numpy::ones(single_tile_shape).to(Layout::TILE).to(device);
-    Tensor dev_b_copy = ttnn::numpy::zeros(single_tile_shape).to(Layout::TILE).to(device);
+    Tensor dev_b = ttnn::ones(single_tile_shape, DataType::BFLOAT16, Layout::TILE, *device);
+    Tensor dev_b_copy = ttnn::zeros(single_tile_shape, DataType::BFLOAT16, Layout::TILE, *device);
     dev_b_copy = dev_b;
     pass &= (dev_b_copy.storage_type() == StorageType::DEVICE);
     auto dev_b_on_host = dev_b.cpu();
diff --git a/tests/tt_metal/tt_metal/common/dispatch_fixture.hpp b/tests/tt_metal/tt_metal/common/dispatch_fixture.hpp
index 7656ac8c147..57bfbcdb934 100644
--- a/tests/tt_metal/tt_metal/common/dispatch_fixture.hpp
+++ b/tests/tt_metal/tt_metal/common/dispatch_fixture.hpp
@@ -46,7 +46,7 @@ class DispatchFixture : public ::testing::Test {
     }
     void ReadBuffer(
         tt::tt_metal::Device* device,
-        std::shared_ptr<tt::tt_metal::Buffer> out_buffer,
+        const std::shared_ptr<tt::tt_metal::Buffer>& out_buffer,
         std::vector<uint32_t>& dst_vec) {
         if (this->slow_dispatch_) {
             tt::tt_metal::detail::ReadFromBuffer(out_buffer, dst_vec);
diff --git a/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp b/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp
index 7e1ab23115e..b5495a324db 100644
--- a/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp
+++ b/tests/ttnn/unit_tests/gtests/test_async_runtime.cpp
@@ -2,6 +2,7 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
+#include "ttnn/cpp/ttnn/operations/creation.hpp"
 #include "ttnn/tensor/tensor.hpp"
 #include "ttnn/tensor/layout/tensor_layout.hpp"
 #include "ttnn_multi_command_queue_fixture.hpp"
@@ -10,14 +11,13 @@
 #include "ttnn/operations/moreh/moreh_sum/moreh_sum.hpp"
 #include "common/bfloat16.hpp"
 #include "ttnn/async_runtime.hpp"
-#include "ttnn/operations/numpy/functions.hpp"
 #include "tt_metal/impl/event/event.hpp"
 #include <cmath>
 
-using namespace tt;
-using namespace tt_metal;
-using MultiCommandQueueSingleDeviceFixture = ttnn::MultiCommandQueueSingleDeviceFixture;
-using namespace constants;
+namespace tt::tt_metal {
+namespace {
+
+using MultiCommandQueueSingleDeviceFixture = ::ttnn::MultiCommandQueueSingleDeviceFixture;
 
 TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) {
     Device* device = this->device_;
@@ -40,16 +40,14 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) {
         host_data[i] = bfloat16(static_cast<float>(1));
     }
     // Create golden data using tt_eager APIs
-    Tensor np_tensor = ttnn::numpy::full<float>(input_shape.value, static_cast<float>(1), DataType::BFLOAT16)
-                           .to(Layout::TILE)
-                           .to(device);
+    Tensor np_tensor = ttnn::full(input_shape, static_cast<float>(1), DataType::BFLOAT16, Layout::TILE, *device_);
     ttnn::SmallVector<int64_t> reduce_dims = {3};
     Tensor np_out = ttnn::moreh_sum(np_tensor, reduce_dims, false, std::nullopt, std::nullopt, std::nullopt);
     Tensor np_out_host = np_out.cpu();
     const bfloat16* golden_output =
         std::get<owned_buffer::Buffer<bfloat16>>(std::get<OwnedStorage>(np_out_host.get_storage()).buffer).begin();
     // Enable Asynchronous Execution and test ttnn runtime APIs
-    device->enable_async(true);
+    device_->enable_async(true);
     // Events for host - device synchronization
     auto write_event = std::make_shared<Event>();
     auto workload_event = std::make_shared<Event>();
@@ -63,9 +61,9 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) {
         output_buf_size_datums * datum_size_bytes,
         tensor_layout.compute_packed_buffer_size_bytes(np_out.get_padded_shape()));
     auto input_buffer = tt::tt_metal::tensor_impl::allocate_buffer_on_device(
-        device, TensorSpec(input_shape.padded_shape(), tensor_layout));
+        device_, TensorSpec(input_shape.padded_shape(), tensor_layout));
     auto output_buffer = tt::tt_metal::tensor_impl::allocate_buffer_on_device(
-        device, TensorSpec(np_out.get_padded_shape(), tensor_layout));
+        device_, TensorSpec(np_out.get_padded_shape(), tensor_layout));
     auto input_storage = tt::tt_metal::DeviceStorage{input_buffer};
     auto output_storage = tt::tt_metal::DeviceStorage{output_buffer};
     Tensor input_tensor = Tensor(input_storage, input_shape, DataType::BFLOAT16, Layout::TILE);
@@ -73,13 +71,13 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) {
     // Populate input_tensor with data
     ttnn::write_buffer(io_cq, input_tensor, {host_data});
     // Record the completion of the write event
-    ttnn::record_event(device->command_queue(io_cq), write_event);
+    ttnn::record_event(device_->command_queue(io_cq), write_event);
     // Host stalls until write is completed, before sending workload
     ttnn::event_synchronize(write_event);
     // Dispatch workload. Preallocated output_tensor is populated by op/
     ttnn::moreh_sum(input_tensor, /*dim*/ 3, false, output_tensor, std::nullopt, std::nullopt);
     // Record completion of workload
-    ttnn::record_event(device->command_queue(workload_dispatch_cq), workload_event);
+    ttnn::record_event(device_->command_queue(workload_dispatch_cq), workload_event);
     ttnn::event_synchronize(workload_event);
     // Read output back, once workload is complete
     ttnn::read_buffer(io_cq, output_tensor, {readback_data});
@@ -93,7 +91,7 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) {
     // Deallocate tensors (tensor gives up buffer). Done asynchronously, so sync on queue after.
     input_tensor.deallocate();
     output_tensor.deallocate();
-    ttnn::queue_synchronize(device->command_queue(io_cq));
+    ttnn::queue_synchronize(device_->command_queue(io_cq));
     // Buffer only has 2 owners in main thread.
     EXPECT_EQ(input_buffer.use_count(), 2);
     EXPECT_EQ(output_buffer.use_count(), 2);
@@ -103,8 +101,7 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncPreallocatedOutputs) {
 }
 
 TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncRuntimeAllocatedBuffers) {
-    Device* device = this->device_;
-    device->enable_async(true);
+    device_->enable_async(true);
     MemoryConfig mem_cfg = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED,
         .buffer_type = BufferType::DRAM,
@@ -131,26 +128,26 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncRuntimeAllocatedBuffers) {
             TensorLayout tensor_layout(DataType::BFLOAT16, PageConfig(Layout::TILE), mem_cfg);
             ASSERT_EQ(buf_size_datums * datum_size_bytes, tensor_layout.compute_packed_buffer_size_bytes(shape));
             auto input_buffer =
-                tt::tt_metal::tensor_impl::allocate_buffer_on_device(device, TensorSpec(shape, tensor_layout));
+                tt::tt_metal::tensor_impl::allocate_buffer_on_device(device_, TensorSpec(shape, tensor_layout));
             auto input_storage = tt::tt_metal::DeviceStorage{input_buffer};
             Tensor input_tensor = Tensor(input_storage, shape, DataType::BFLOAT16, Layout::TILE);
-            ttnn::write_buffer(io_cq, input_tensor, {host_data});           // Write using cq 1
-            ttnn::record_event(device->command_queue(io_cq), write_event);  // Record write on cq 1
+            ttnn::write_buffer(io_cq, input_tensor, {host_data});            // Write using cq 1
+            ttnn::record_event(device_->command_queue(io_cq), write_event);  // Record write on cq 1
             // Wait until cq 1 write is complete
-            ttnn::wait_for_event(device->command_queue(workload_dispatch_cq), write_event);
+            ttnn::wait_for_event(device_->command_queue(workload_dispatch_cq), write_event);
 
             // Run operation on cq 0
             Tensor output_tensor = ttnn::sqrt(workload_dispatch_cq, input_tensor);
             auto dummy_buffer_0 =
-                tt::tt_metal::tensor_impl::allocate_buffer_on_device(device, TensorSpec(shape, tensor_layout));
+                tt::tt_metal::tensor_impl::allocate_buffer_on_device(device_, TensorSpec(shape, tensor_layout));
             output_tensor = ttnn::neg(workload_dispatch_cq, output_tensor);
             // Allocate this buffer to stress test async allocation across op execution and explicit allocation
             auto dummy_buffer_1 =
-                tt::tt_metal::tensor_impl::allocate_buffer_on_device(device, TensorSpec(shape, tensor_layout));
+                tt::tt_metal::tensor_impl::allocate_buffer_on_device(device_, TensorSpec(shape, tensor_layout));
             // Record cq 0 prog execution
-            ttnn::record_event(device->command_queue(workload_dispatch_cq), workload_event);
+            ttnn::record_event(device_->command_queue(workload_dispatch_cq), workload_event);
             // Wait until cq 0 prog execution is done
-            ttnn::wait_for_event(device->command_queue(io_cq), workload_event);
+            ttnn::wait_for_event(device_->command_queue(io_cq), workload_event);
             // Read using cq 1
             ttnn::read_buffer(io_cq, output_tensor, {readback_data});
             for (int i = 0; i < buf_size_datums; i++) {
@@ -166,8 +163,7 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncRuntimeBufferDestructor) {
     // Test functionality for the buffer destructor, which will call deallocate asynchronously
     // We must ensure that the deallocate step, which can run after the buffer has been destroyed
     // does not rely on stale buffer state, after the buffer has been destroyed on host
-    Device* device = this->device_;
-    device->enable_async(true);
+    device_->enable_async(true);
     MemoryConfig mem_cfg = MemoryConfig{
         .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED,
         .buffer_type = BufferType::DRAM,
@@ -182,9 +178,9 @@ TEST_F(MultiCommandQueueSingleDeviceFixture, TestAsyncRuntimeBufferDestructor) {
     TensorLayout tensor_layout(DataType::BFLOAT16, PageConfig(Layout::TILE), mem_cfg);
     TensorSpec tensor_spec(shape, tensor_layout);
     for (int loop = 0; loop < 100000; loop++) {
-        {
-            auto input_buffer_dummy = tt::tt_metal::tensor_impl::allocate_buffer_on_device(device, tensor_spec);
-            device->synchronize();
-        }
+        auto input_buffer_dummy = tt::tt_metal::tensor_impl::allocate_buffer_on_device(device_, tensor_spec);
+        device_->synchronize();
     }
 }
+}  // namespace
+}  // namespace tt::tt_metal
diff --git a/ttnn/cpp/ttnn/operations/numpy/functions.hpp b/ttnn/cpp/ttnn/operations/numpy/functions.hpp
index 31f1ec32efe..8fa40f0c4e6 100644
--- a/ttnn/cpp/ttnn/operations/numpy/functions.hpp
+++ b/ttnn/cpp/ttnn/operations/numpy/functions.hpp
@@ -134,49 +134,6 @@ static Tensor full_impl(
     }
 }
 
-// TODO: #14974 - Can this be deleted, as it is only used in tests?
-template <typename T>
-static Tensor full(
-    const tt::tt_metal::LegacyShape& shape,
-    const T value,
-    const DataType data_type,
-    const Layout layout = Layout::ROW_MAJOR,
-    Device* device = nullptr,
-    const MemoryConfig& output_mem_config = MemoryConfig{
-        .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    return full_impl(
-        ttnn::DefaultQueueId,
-        shape,
-        value,
-        data_type,
-        layout,
-        device ? std::vector<Device*>{device} : std::vector<Device*>{},
-        output_mem_config,
-        std::nullopt);
-}
-
-// TODO: #14974 - Can this be deleted, as it is only used in tests?
-static Tensor zeros(
-    const tt::tt_metal::LegacyShape& shape,
-    const DataType data_type = DataType::BFLOAT16,
-    const Layout layout = Layout::ROW_MAJOR,
-    Device* device = nullptr,
-    const MemoryConfig& output_mem_config = MemoryConfig{
-        .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    return full(shape, 0.0f, data_type, layout, device, output_mem_config);
-}
-
-// TODO: #14974 - Can this be deleted, as it is only used in tests?
-static Tensor ones(
-    const tt::tt_metal::LegacyShape& shape,
-    const DataType data_type = DataType::BFLOAT16,
-    const Layout layout = Layout::ROW_MAJOR,
-    Device* device = nullptr,
-    const MemoryConfig& output_mem_config = MemoryConfig{
-        .memory_layout = tt::tt_metal::TensorMemoryLayout::INTERLEAVED}) {
-    return full(shape, 1.0f, data_type, layout, device, output_mem_config);
-}
-
 template <typename T>
 static Tensor arange(
     const int64_t start,