#0: Remove some dead code (#16084)

1. `Tensor::deepcopy` that isn't actually doing a deep copy and isn't used anywhere. 2. `cpu_sharded` / `host_sharded` that doesn't work due to deprecation of slow dispatch, and that was previously used for debugging. ### Checklist - [x] [Post commit CI passes](https://github.com/tenstorrent/tt-metal/actions/runs/12365560530) (failure unrelated)
tenstorrent · Dec 17, 2024 · 55bb502 · 55bb502
1 parent 52f05f3
commit 55bb502
Show file tree

Hide file tree

Showing 8 changed files with 1 addition and 78 deletions.
diff --git a/ttnn/cpp/pybind11/pytensor.cpp b/ttnn/cpp/pybind11/pytensor.cpp
@@ -1161,13 +1161,6 @@ void pytensor_module(py::module& m_tensor) {
 
                 tt_tensor = tt_tensor.cpu()
         )doc")
-        .def("cpu_sharded", &Tensor::cpu_sharded, R"doc(
-            Move TT Tensor from TT accelerator device to host device in sharded orientation.
-
-            .. code-block:: python
-
-                tt_tensor = tt_tensor.cpu_sharded()
-        )doc")
         .def(
             "to",
             py::overload_cast<Layout, Device*>(&Tensor::to, py::const_),

diff --git a/ttnn/cpp/ttnn/tensor/tensor.cpp b/ttnn/cpp/ttnn/tensor/tensor.cpp
@@ -482,21 +482,9 @@ void Tensor::perform_cleanup_for_async_mode() {
     }
 }
 
-void Tensor::deepcopy(const Tensor& other) {
-    ZoneScoped;
-    // Wait until the tensor being copied is populated
-    other.wait_for_tensor_data_populated();
-    // Populate tensor metadata
-    this->set_storage(other.get_storage());
-    this->set_tensor_spec(other.get_tensor_spec());
-    // Set metadata populated flag for getters
-    this->tensor_attributes->num_workers_completed++;
-}
-
 void Tensor::populate_buffers_and_metadata(const Tensor& other) {
     ZoneScoped;
-    // Similar to deepcopy, but to be applied on a tensor that has an empty storage
-    // container initialized. Require tensor storage to be correctly initialized.
+    // Applied on a tensor that has an empty storage container initialized.
     this->set_tensor_spec(other.get_tensor_spec());
     // Populate storage container with buffers + shapes
     std::visit(
@@ -698,8 +686,6 @@ Tensor Tensor::cpu(bool blocking, uint8_t cq_id, const std::vector<SubDeviceId>&
     return tensor_ops::tensor_cpu(*this, blocking, cq_id, sub_device_ids);
 }
 
-Tensor Tensor::cpu_sharded() const { return tensor_ops::tensor_cpu_sharded(*this); }
-
 Tensor Tensor::extract_shard(const CoreCoord& core) const {
     ZoneScoped;
     const auto& buffer_page_mapping = *this->buffer()->get_buffer_page_mapping();

diff --git a/ttnn/cpp/ttnn/tensor/tensor.hpp b/ttnn/cpp/ttnn/tensor/tensor.hpp
@@ -131,8 +131,6 @@ struct Tensor {
 
     void perform_cleanup_for_async_mode();
 
-    void deepcopy(const Tensor& other);
-
     void populate_buffers_and_metadata(const Tensor& other);
 
     void deallocate(bool force = false);
@@ -209,8 +207,6 @@ struct Tensor {
         uint8_t cq_id = ttnn::DefaultQueueId,
         const std::vector<SubDeviceId>& sub_device_ids = {}) const;
 
-    Tensor cpu_sharded() const;
-
     Tensor unpad(const ttnn::SimpleShape& output_tensor_start, const ttnn::SimpleShape& output_tensor_end) const;
 
     Tensor pad_to_tile(float pad_value) const;

diff --git a/ttnn/cpp/ttnn/tensor/tensor_impl.cpp b/ttnn/cpp/ttnn/tensor/tensor_impl.cpp
@@ -633,43 +633,6 @@ Tensor to_host<bfloat8_b>(
     return to_host<uint32_t>(tensor, blocking, cq_id, sub_device_ids);
 }
 
-// ======================================================================================
-//                                  .to_host_sharded()
-// ======================================================================================
-
-template <typename T>
-Tensor to_host_sharded(const Tensor& tensor) {
-    TT_ASSERT(tensor.is_allocated(), "Buffer must be allocated on device!");
-    auto device_buffer = tensor.buffer();
-    auto device = tensor.device();
-    TT_ASSERT(device != nullptr && "Need device to be set copy data from device to host!");
-    std::vector<T> data_vec;
-    const char* TT_METAL_SLOW_DISPATCH_MODE = std::getenv("TT_METAL_SLOW_DISPATCH_MODE");
-    if (TT_METAL_SLOW_DISPATCH_MODE == nullptr) {
-        TT_THROW("FAST_DISPATCH is not supported for to_host_sharded!");
-    }
-    ::detail::ReadFromBuffer(*device_buffer, data_vec, true);
-    auto output_buffer = owned_buffer::create<T>(std::move(data_vec));
-    return Tensor(OwnedStorage{output_buffer}, tensor.get_tensor_spec());
-}
-
-template Tensor to_host_sharded<bfloat16>(const Tensor& tensor);
-template Tensor to_host_sharded<float>(const Tensor& tensor);
-template Tensor to_host_sharded<int32_t>(const Tensor& tensor);
-template Tensor to_host_sharded<uint32_t>(const Tensor& tensor);
-template Tensor to_host_sharded<uint16_t>(const Tensor& tensor);
-template Tensor to_host_sharded<uint8_t>(const Tensor& tensor);
-
-template <>
-Tensor to_host_sharded<bfloat4_b>(const Tensor& tensor) {
-    return to_host_sharded<uint32_t>(tensor);
-}
-
-template <>
-Tensor to_host_sharded<bfloat8_b>(const Tensor& tensor) {
-    return to_host_sharded<uint32_t>(tensor);
-}
-
 // ======================================================================================
 //                               .to_device() details
 // ======================================================================================

diff --git a/ttnn/cpp/ttnn/tensor/tensor_impl.hpp b/ttnn/cpp/ttnn/tensor/tensor_impl.hpp
@@ -191,9 +191,6 @@ Tensor to_host(
     uint8_t cq_id = ttnn::DefaultQueueId,
     tt::stl::Span<const SubDeviceId> sub_device_ids = {});
 
-template <typename T>
-Tensor to_host_sharded(const Tensor& tensor);
-
 template <typename T>
 Tensor to_device(
     const Tensor& tensor,

diff --git a/ttnn/cpp/ttnn/tensor/tensor_impl_wrapper.hpp b/ttnn/cpp/ttnn/tensor/tensor_impl_wrapper.hpp
@@ -39,7 +39,6 @@ inline size_t packed_buffer_size_bytes_wrapper(DataType dtype, size_t volume_unp
 
 WRAP_FUNCTION(to_host)
 WRAP_FUNCTION(extract_shard)
-WRAP_FUNCTION(to_host_sharded)
 WRAP_FUNCTION(to_device)
 WRAP_FUNCTION(to_layout)
 WRAP_FUNCTION(pad)

diff --git a/ttnn/cpp/ttnn/tensor/tensor_ops.cpp b/ttnn/cpp/ttnn/tensor/tensor_ops.cpp
@@ -160,15 +160,6 @@ Tensor tensor_cpu(
     return host_tensor;
 }
 
-Tensor tensor_cpu_sharded(const Tensor& input_tensor) {
-    ZoneScoped;
-    GraphTracker::instance().track_function_start("Tensor::cpu_sharded", input_tensor);
-    auto output = tensor_impl::to_host_sharded_wrapper(input_tensor);
-    output = tt::tt_metal::set_tensor_id(output);
-    GraphTracker::instance().track_function_end(output);
-    return output;
-}
-
 Tensor tensor_to(const Tensor& input_tensor, Layout target_layout, Device* worker) {
     ZoneScoped;
     GraphTracker::instance().track_function_start("Tensor::to", input_tensor, target_layout, worker);

diff --git a/ttnn/cpp/ttnn/tensor/tensor_ops.hpp b/ttnn/cpp/ttnn/tensor/tensor_ops.hpp
@@ -41,8 +41,6 @@ Tensor tensor_to(const Tensor& input_tensor, Layout target_layout, distributed::
 Tensor tensor_cpu(
     const Tensor& input_tensor, bool blocking, uint8_t cq_id, const std::vector<SubDeviceId>& sub_device_ids);
 
-Tensor tensor_cpu_sharded(const Tensor& input_tensor);
-
 void tensor_print(const Tensor& input_tensor);
 
 Tensor tensor_pad(