From 55bb502f905c63ec5e7b7883838a2fe03f004eaa Mon Sep 17 00:00:00 2001 From: Oleg Milyutin Date: Tue, 17 Dec 2024 10:43:36 -0500 Subject: [PATCH] #0: Remove some dead code (#16084) 1. `Tensor::deepcopy` that isn't actually doing a deep copy and isn't used anywhere. 2. `cpu_sharded` / `host_sharded` that doesn't work due to deprecation of slow dispatch, and that was previously used for debugging. ### Checklist - [x] [Post commit CI passes](https://github.com/tenstorrent/tt-metal/actions/runs/12365560530) (failure unrelated) --- ttnn/cpp/pybind11/pytensor.cpp | 7 ---- ttnn/cpp/ttnn/tensor/tensor.cpp | 16 +-------- ttnn/cpp/ttnn/tensor/tensor.hpp | 4 --- ttnn/cpp/ttnn/tensor/tensor_impl.cpp | 37 -------------------- ttnn/cpp/ttnn/tensor/tensor_impl.hpp | 3 -- ttnn/cpp/ttnn/tensor/tensor_impl_wrapper.hpp | 1 - ttnn/cpp/ttnn/tensor/tensor_ops.cpp | 9 ----- ttnn/cpp/ttnn/tensor/tensor_ops.hpp | 2 -- 8 files changed, 1 insertion(+), 78 deletions(-) diff --git a/ttnn/cpp/pybind11/pytensor.cpp b/ttnn/cpp/pybind11/pytensor.cpp index 68a507a2c4a..cd381eabab5 100644 --- a/ttnn/cpp/pybind11/pytensor.cpp +++ b/ttnn/cpp/pybind11/pytensor.cpp @@ -1161,13 +1161,6 @@ void pytensor_module(py::module& m_tensor) { tt_tensor = tt_tensor.cpu() )doc") - .def("cpu_sharded", &Tensor::cpu_sharded, R"doc( - Move TT Tensor from TT accelerator device to host device in sharded orientation. - - .. code-block:: python - - tt_tensor = tt_tensor.cpu_sharded() - )doc") .def( "to", py::overload_cast(&Tensor::to, py::const_), diff --git a/ttnn/cpp/ttnn/tensor/tensor.cpp b/ttnn/cpp/ttnn/tensor/tensor.cpp index 689cc127d34..1057b183496 100644 --- a/ttnn/cpp/ttnn/tensor/tensor.cpp +++ b/ttnn/cpp/ttnn/tensor/tensor.cpp @@ -482,21 +482,9 @@ void Tensor::perform_cleanup_for_async_mode() { } } -void Tensor::deepcopy(const Tensor& other) { - ZoneScoped; - // Wait until the tensor being copied is populated - other.wait_for_tensor_data_populated(); - // Populate tensor metadata - this->set_storage(other.get_storage()); - this->set_tensor_spec(other.get_tensor_spec()); - // Set metadata populated flag for getters - this->tensor_attributes->num_workers_completed++; -} - void Tensor::populate_buffers_and_metadata(const Tensor& other) { ZoneScoped; - // Similar to deepcopy, but to be applied on a tensor that has an empty storage - // container initialized. Require tensor storage to be correctly initialized. + // Applied on a tensor that has an empty storage container initialized. this->set_tensor_spec(other.get_tensor_spec()); // Populate storage container with buffers + shapes std::visit( @@ -698,8 +686,6 @@ Tensor Tensor::cpu(bool blocking, uint8_t cq_id, const std::vector& return tensor_ops::tensor_cpu(*this, blocking, cq_id, sub_device_ids); } -Tensor Tensor::cpu_sharded() const { return tensor_ops::tensor_cpu_sharded(*this); } - Tensor Tensor::extract_shard(const CoreCoord& core) const { ZoneScoped; const auto& buffer_page_mapping = *this->buffer()->get_buffer_page_mapping(); diff --git a/ttnn/cpp/ttnn/tensor/tensor.hpp b/ttnn/cpp/ttnn/tensor/tensor.hpp index 30e18978b8e..6827c421320 100644 --- a/ttnn/cpp/ttnn/tensor/tensor.hpp +++ b/ttnn/cpp/ttnn/tensor/tensor.hpp @@ -131,8 +131,6 @@ struct Tensor { void perform_cleanup_for_async_mode(); - void deepcopy(const Tensor& other); - void populate_buffers_and_metadata(const Tensor& other); void deallocate(bool force = false); @@ -209,8 +207,6 @@ struct Tensor { uint8_t cq_id = ttnn::DefaultQueueId, const std::vector& sub_device_ids = {}) const; - Tensor cpu_sharded() const; - Tensor unpad(const ttnn::SimpleShape& output_tensor_start, const ttnn::SimpleShape& output_tensor_end) const; Tensor pad_to_tile(float pad_value) const; diff --git a/ttnn/cpp/ttnn/tensor/tensor_impl.cpp b/ttnn/cpp/ttnn/tensor/tensor_impl.cpp index dc7545ac0e5..3f731c97c65 100644 --- a/ttnn/cpp/ttnn/tensor/tensor_impl.cpp +++ b/ttnn/cpp/ttnn/tensor/tensor_impl.cpp @@ -633,43 +633,6 @@ Tensor to_host( return to_host(tensor, blocking, cq_id, sub_device_ids); } -// ====================================================================================== -// .to_host_sharded() -// ====================================================================================== - -template -Tensor to_host_sharded(const Tensor& tensor) { - TT_ASSERT(tensor.is_allocated(), "Buffer must be allocated on device!"); - auto device_buffer = tensor.buffer(); - auto device = tensor.device(); - TT_ASSERT(device != nullptr && "Need device to be set copy data from device to host!"); - std::vector data_vec; - const char* TT_METAL_SLOW_DISPATCH_MODE = std::getenv("TT_METAL_SLOW_DISPATCH_MODE"); - if (TT_METAL_SLOW_DISPATCH_MODE == nullptr) { - TT_THROW("FAST_DISPATCH is not supported for to_host_sharded!"); - } - ::detail::ReadFromBuffer(*device_buffer, data_vec, true); - auto output_buffer = owned_buffer::create(std::move(data_vec)); - return Tensor(OwnedStorage{output_buffer}, tensor.get_tensor_spec()); -} - -template Tensor to_host_sharded(const Tensor& tensor); -template Tensor to_host_sharded(const Tensor& tensor); -template Tensor to_host_sharded(const Tensor& tensor); -template Tensor to_host_sharded(const Tensor& tensor); -template Tensor to_host_sharded(const Tensor& tensor); -template Tensor to_host_sharded(const Tensor& tensor); - -template <> -Tensor to_host_sharded(const Tensor& tensor) { - return to_host_sharded(tensor); -} - -template <> -Tensor to_host_sharded(const Tensor& tensor) { - return to_host_sharded(tensor); -} - // ====================================================================================== // .to_device() details // ====================================================================================== diff --git a/ttnn/cpp/ttnn/tensor/tensor_impl.hpp b/ttnn/cpp/ttnn/tensor/tensor_impl.hpp index 87c34bdb199..0ceb2b9c1d1 100644 --- a/ttnn/cpp/ttnn/tensor/tensor_impl.hpp +++ b/ttnn/cpp/ttnn/tensor/tensor_impl.hpp @@ -191,9 +191,6 @@ Tensor to_host( uint8_t cq_id = ttnn::DefaultQueueId, tt::stl::Span sub_device_ids = {}); -template -Tensor to_host_sharded(const Tensor& tensor); - template Tensor to_device( const Tensor& tensor, diff --git a/ttnn/cpp/ttnn/tensor/tensor_impl_wrapper.hpp b/ttnn/cpp/ttnn/tensor/tensor_impl_wrapper.hpp index 6ab8a8dae75..9cf4c810591 100644 --- a/ttnn/cpp/ttnn/tensor/tensor_impl_wrapper.hpp +++ b/ttnn/cpp/ttnn/tensor/tensor_impl_wrapper.hpp @@ -39,7 +39,6 @@ inline size_t packed_buffer_size_bytes_wrapper(DataType dtype, size_t volume_unp WRAP_FUNCTION(to_host) WRAP_FUNCTION(extract_shard) -WRAP_FUNCTION(to_host_sharded) WRAP_FUNCTION(to_device) WRAP_FUNCTION(to_layout) WRAP_FUNCTION(pad) diff --git a/ttnn/cpp/ttnn/tensor/tensor_ops.cpp b/ttnn/cpp/ttnn/tensor/tensor_ops.cpp index 96b53b87901..c2df9f3e430 100644 --- a/ttnn/cpp/ttnn/tensor/tensor_ops.cpp +++ b/ttnn/cpp/ttnn/tensor/tensor_ops.cpp @@ -160,15 +160,6 @@ Tensor tensor_cpu( return host_tensor; } -Tensor tensor_cpu_sharded(const Tensor& input_tensor) { - ZoneScoped; - GraphTracker::instance().track_function_start("Tensor::cpu_sharded", input_tensor); - auto output = tensor_impl::to_host_sharded_wrapper(input_tensor); - output = tt::tt_metal::set_tensor_id(output); - GraphTracker::instance().track_function_end(output); - return output; -} - Tensor tensor_to(const Tensor& input_tensor, Layout target_layout, Device* worker) { ZoneScoped; GraphTracker::instance().track_function_start("Tensor::to", input_tensor, target_layout, worker); diff --git a/ttnn/cpp/ttnn/tensor/tensor_ops.hpp b/ttnn/cpp/ttnn/tensor/tensor_ops.hpp index b8edff425f8..b65af33cb42 100644 --- a/ttnn/cpp/ttnn/tensor/tensor_ops.hpp +++ b/ttnn/cpp/ttnn/tensor/tensor_ops.hpp @@ -41,8 +41,6 @@ Tensor tensor_to(const Tensor& input_tensor, Layout target_layout, distributed:: Tensor tensor_cpu( const Tensor& input_tensor, bool blocking, uint8_t cq_id, const std::vector& sub_device_ids); -Tensor tensor_cpu_sharded(const Tensor& input_tensor); - void tensor_print(const Tensor& input_tensor); Tensor tensor_pad(