Skip to content

Commit

Permalink
#0: Remove some dead code (#16084)
Browse files Browse the repository at this point in the history
1. `Tensor::deepcopy` that isn't actually doing a deep copy and isn't
used anywhere.
2. `cpu_sharded` / `host_sharded` that doesn't work due to deprecation
of slow dispatch, and that was previously used for debugging.

### Checklist
- [x] [Post commit CI
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12365560530)
(failure unrelated)
  • Loading branch information
omilyutin-tt authored Dec 17, 2024
1 parent 52f05f3 commit 55bb502
Show file tree
Hide file tree
Showing 8 changed files with 1 addition and 78 deletions.
7 changes: 0 additions & 7 deletions ttnn/cpp/pybind11/pytensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1161,13 +1161,6 @@ void pytensor_module(py::module& m_tensor) {
tt_tensor = tt_tensor.cpu()
)doc")
.def("cpu_sharded", &Tensor::cpu_sharded, R"doc(
Move TT Tensor from TT accelerator device to host device in sharded orientation.
.. code-block:: python
tt_tensor = tt_tensor.cpu_sharded()
)doc")
.def(
"to",
py::overload_cast<Layout, Device*>(&Tensor::to, py::const_),
Expand Down
16 changes: 1 addition & 15 deletions ttnn/cpp/ttnn/tensor/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -482,21 +482,9 @@ void Tensor::perform_cleanup_for_async_mode() {
}
}

void Tensor::deepcopy(const Tensor& other) {
ZoneScoped;
// Wait until the tensor being copied is populated
other.wait_for_tensor_data_populated();
// Populate tensor metadata
this->set_storage(other.get_storage());
this->set_tensor_spec(other.get_tensor_spec());
// Set metadata populated flag for getters
this->tensor_attributes->num_workers_completed++;
}

void Tensor::populate_buffers_and_metadata(const Tensor& other) {
ZoneScoped;
// Similar to deepcopy, but to be applied on a tensor that has an empty storage
// container initialized. Require tensor storage to be correctly initialized.
// Applied on a tensor that has an empty storage container initialized.
this->set_tensor_spec(other.get_tensor_spec());
// Populate storage container with buffers + shapes
std::visit(
Expand Down Expand Up @@ -698,8 +686,6 @@ Tensor Tensor::cpu(bool blocking, uint8_t cq_id, const std::vector<SubDeviceId>&
return tensor_ops::tensor_cpu(*this, blocking, cq_id, sub_device_ids);
}

Tensor Tensor::cpu_sharded() const { return tensor_ops::tensor_cpu_sharded(*this); }

Tensor Tensor::extract_shard(const CoreCoord& core) const {
ZoneScoped;
const auto& buffer_page_mapping = *this->buffer()->get_buffer_page_mapping();
Expand Down
4 changes: 0 additions & 4 deletions ttnn/cpp/ttnn/tensor/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,6 @@ struct Tensor {

void perform_cleanup_for_async_mode();

void deepcopy(const Tensor& other);

void populate_buffers_and_metadata(const Tensor& other);

void deallocate(bool force = false);
Expand Down Expand Up @@ -209,8 +207,6 @@ struct Tensor {
uint8_t cq_id = ttnn::DefaultQueueId,
const std::vector<SubDeviceId>& sub_device_ids = {}) const;

Tensor cpu_sharded() const;

Tensor unpad(const ttnn::SimpleShape& output_tensor_start, const ttnn::SimpleShape& output_tensor_end) const;

Tensor pad_to_tile(float pad_value) const;
Expand Down
37 changes: 0 additions & 37 deletions ttnn/cpp/ttnn/tensor/tensor_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,43 +633,6 @@ Tensor to_host<bfloat8_b>(
return to_host<uint32_t>(tensor, blocking, cq_id, sub_device_ids);
}

// ======================================================================================
// .to_host_sharded()
// ======================================================================================

template <typename T>
Tensor to_host_sharded(const Tensor& tensor) {
TT_ASSERT(tensor.is_allocated(), "Buffer must be allocated on device!");
auto device_buffer = tensor.buffer();
auto device = tensor.device();
TT_ASSERT(device != nullptr && "Need device to be set copy data from device to host!");
std::vector<T> data_vec;
const char* TT_METAL_SLOW_DISPATCH_MODE = std::getenv("TT_METAL_SLOW_DISPATCH_MODE");
if (TT_METAL_SLOW_DISPATCH_MODE == nullptr) {
TT_THROW("FAST_DISPATCH is not supported for to_host_sharded!");
}
::detail::ReadFromBuffer(*device_buffer, data_vec, true);
auto output_buffer = owned_buffer::create<T>(std::move(data_vec));
return Tensor(OwnedStorage{output_buffer}, tensor.get_tensor_spec());
}

template Tensor to_host_sharded<bfloat16>(const Tensor& tensor);
template Tensor to_host_sharded<float>(const Tensor& tensor);
template Tensor to_host_sharded<int32_t>(const Tensor& tensor);
template Tensor to_host_sharded<uint32_t>(const Tensor& tensor);
template Tensor to_host_sharded<uint16_t>(const Tensor& tensor);
template Tensor to_host_sharded<uint8_t>(const Tensor& tensor);

template <>
Tensor to_host_sharded<bfloat4_b>(const Tensor& tensor) {
return to_host_sharded<uint32_t>(tensor);
}

template <>
Tensor to_host_sharded<bfloat8_b>(const Tensor& tensor) {
return to_host_sharded<uint32_t>(tensor);
}

// ======================================================================================
// .to_device() details
// ======================================================================================
Expand Down
3 changes: 0 additions & 3 deletions ttnn/cpp/ttnn/tensor/tensor_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,6 @@ Tensor to_host(
uint8_t cq_id = ttnn::DefaultQueueId,
tt::stl::Span<const SubDeviceId> sub_device_ids = {});

template <typename T>
Tensor to_host_sharded(const Tensor& tensor);

template <typename T>
Tensor to_device(
const Tensor& tensor,
Expand Down
1 change: 0 additions & 1 deletion ttnn/cpp/ttnn/tensor/tensor_impl_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ inline size_t packed_buffer_size_bytes_wrapper(DataType dtype, size_t volume_unp

WRAP_FUNCTION(to_host)
WRAP_FUNCTION(extract_shard)
WRAP_FUNCTION(to_host_sharded)
WRAP_FUNCTION(to_device)
WRAP_FUNCTION(to_layout)
WRAP_FUNCTION(pad)
Expand Down
9 changes: 0 additions & 9 deletions ttnn/cpp/ttnn/tensor/tensor_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,6 @@ Tensor tensor_cpu(
return host_tensor;
}

Tensor tensor_cpu_sharded(const Tensor& input_tensor) {
ZoneScoped;
GraphTracker::instance().track_function_start("Tensor::cpu_sharded", input_tensor);
auto output = tensor_impl::to_host_sharded_wrapper(input_tensor);
output = tt::tt_metal::set_tensor_id(output);
GraphTracker::instance().track_function_end(output);
return output;
}

Tensor tensor_to(const Tensor& input_tensor, Layout target_layout, Device* worker) {
ZoneScoped;
GraphTracker::instance().track_function_start("Tensor::to", input_tensor, target_layout, worker);
Expand Down
2 changes: 0 additions & 2 deletions ttnn/cpp/ttnn/tensor/tensor_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ Tensor tensor_to(const Tensor& input_tensor, Layout target_layout, distributed::
Tensor tensor_cpu(
const Tensor& input_tensor, bool blocking, uint8_t cq_id, const std::vector<SubDeviceId>& sub_device_ids);

Tensor tensor_cpu_sharded(const Tensor& input_tensor);

void tensor_print(const Tensor& input_tensor);

Tensor tensor_pad(
Expand Down

0 comments on commit 55bb502

Please sign in to comment.