diff --git a/ttnn/cpp/pybind11/__init__.cpp b/ttnn/cpp/pybind11/__init__.cpp
index ac209e3caeb..4f263b3760f 100644
--- a/ttnn/cpp/pybind11/__init__.cpp
+++ b/ttnn/cpp/pybind11/__init__.cpp
@@ -48,7 +48,7 @@ PYBIND11_MODULE(_ttnn, module) {
     auto m_operations = module.def_submodule("operations", "ttnn Operations");
 
     // TYPES
-    ttnn::tensor::py_module_types(m_tensor);
+    ttnn::tensor::tensor_mem_config_module_types(m_tensor);
     ttnn::tensor::pytensor_module_types(m_tensor);
     ttnn::graph::py_graph_module_types(m_graph);
 
@@ -63,7 +63,7 @@ PYBIND11_MODULE(_ttnn, module) {
     ttnn::reports::py_module_types(m_reports);
 
     // FUNCTIONS / OPERATIONS
-    ttnn::tensor::py_module(m_tensor);
+    ttnn::tensor::tensor_mem_config_module(m_tensor);
     ttnn::tensor::pytensor_module(m_tensor);
     ttnn::core::py_module(m_core);
     ttnn::graph::py_graph_module(m_graph);
diff --git a/ttnn/cpp/pybind11/device.hpp b/ttnn/cpp/pybind11/device.hpp
index 165224d0d67..9a57c4d1fae 100644
--- a/ttnn/cpp/pybind11/device.hpp
+++ b/ttnn/cpp/pybind11/device.hpp
@@ -12,7 +12,7 @@ namespace py = pybind11;
 
 namespace ttnn::device {
 
-    void py_device_module_types(py::module& module);
-    void py_device_module(py::module& module);
+void py_device_module_types(py::module& module);
+void py_device_module(py::module& module);
 
 } // namespace ttnn::device
diff --git a/ttnn/cpp/pybind11/events.hpp b/ttnn/cpp/pybind11/events.hpp
index ff4bb17b28e..6d308a88512 100644
--- a/ttnn/cpp/pybind11/events.hpp
+++ b/ttnn/cpp/pybind11/events.hpp
@@ -11,7 +11,7 @@ namespace py = pybind11;
 
 namespace ttnn::events {
 
-    void py_module_types(py::module& module);
-    void py_module(py::module& module);
+void py_module_types(py::module& module);
+void py_module(py::module& module);
 
 } // namespace ttnn::events
diff --git a/ttnn/cpp/pybind11/profiler.hpp b/ttnn/cpp/pybind11/profiler.hpp
index d2ab92753af..d6c9cf3414f 100644
--- a/ttnn/cpp/pybind11/profiler.hpp
+++ b/ttnn/cpp/pybind11/profiler.hpp
@@ -10,6 +10,6 @@ namespace py = pybind11;
 
 namespace ttnn::profiler {
 
-    void py_module(py::module& module);
+void py_module(py::module& module);
 
 } // namespace ttnn::profiler
diff --git a/ttnn/cpp/pybind11/pytensor.cpp b/ttnn/cpp/pybind11/pytensor.cpp
index 2e316f30250..feddb572634 100644
--- a/ttnn/cpp/pybind11/pytensor.cpp
+++ b/ttnn/cpp/pybind11/pytensor.cpp
@@ -645,961 +645,961 @@ Tensor convert_python_tensors_to_tt_tensors(py::list tensor_shards, std::optiona
 
 } // namespace detail
 
-    void pytensor_module_types(py::module &m_tensor) {
-        using tt::tt_metal::Shape;
-        // Tensor constructors that accept device and .to(device) function use keep alive call policy to communicate that Device needs to outlive Tensor.
-        // This is because when tensors on device are destroyed they need to deallocate their buffers via device.
-        // keep_alive increases the ref count of the Device object being passed into the constructor and .to() function.
-        // For additional info see: https://pybind11.readthedocs.io/en/stable/advanced/functions.html#keep-alive
-        auto pyTensor = py::class_<Tensor>(m_tensor, "Tensor", R"doc(
-
-            Class constructor supports tensors of rank 4.
-            The constructor takes following arguments:
-
-            +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
-            |  Argument  |                 Description                            |       Data type           |           Valid range              | Required |
-            +============+========================================================+===========================+====================================+==========+
-            | data       | Data to store in TT tensor                             | List[float/int]           |                                    | Yes      |
-            +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
-            | shape      | Shape of TT tensor                                     | List[int[4]]              |                                    | Yes      |
-            +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
-            | data_type  | Data type of numbers in TT tensor                      | ttnn.DataType             | ttnn.DataType.BFLOAT16             | Yes      |
-            |            |                                                        |                           |                                    |          |
-            |            |                                                        |                           | ttnn.DataType.FLOAT32              |          |
-            |            |                                                        |                           |                                    |          |
-            |            |                                                        |                           | ttnn.DataType.UINT32               |          |
-            |            |                                                        |                           |                                    |          |
-            |            |                                                        |                           | ttnn.DataType.BFLOAT8_B            |          |
-            |            |                                                        |                           |                                    |          |
-            |            |                                                        |                           | ttnn.DataType.BFLOAT4_B            |          |
-            +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
-            | layout     | Layout of tensor data in memory                        | ttnn.Layout               | ttnn.Layout.ROW_MAJOR              | Yes      |
-            |            |                                                        |                           |                                    |          |
-            |            |                                                        |                           | ttnn.Layout.TILE                   |          |
-            +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
-            | device     | Device on which tensor will be created                 | ttnn.Device               | Host or TT accelerator device      | No       |
-            +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
-            | mem_config | Layout of tensor in TT Accelerator device memory banks | ttnn.MemoryConfig         |                                    | No       |
-            +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
-
-        )doc");
-    }
+void pytensor_module_types(py::module &m_tensor) {
+    using tt::tt_metal::Shape;
+    // Tensor constructors that accept device and .to(device) function use keep alive call policy to communicate that Device needs to outlive Tensor.
+    // This is because when tensors on device are destroyed they need to deallocate their buffers via device.
+    // keep_alive increases the ref count of the Device object being passed into the constructor and .to() function.
+    // For additional info see: https://pybind11.readthedocs.io/en/stable/advanced/functions.html#keep-alive
+    auto pyTensor = py::class_<Tensor>(m_tensor, "Tensor", R"doc(
+
+        Class constructor supports tensors of rank 4.
+        The constructor takes following arguments:
+
+        +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
+        |  Argument  |                 Description                            |       Data type           |           Valid range              | Required |
+        +============+========================================================+===========================+====================================+==========+
+        | data       | Data to store in TT tensor                             | List[float/int]           |                                    | Yes      |
+        +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
+        | shape      | Shape of TT tensor                                     | List[int[4]]              |                                    | Yes      |
+        +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
+        | data_type  | Data type of numbers in TT tensor                      | ttnn.DataType             | ttnn.DataType.BFLOAT16             | Yes      |
+        |            |                                                        |                           |                                    |          |
+        |            |                                                        |                           | ttnn.DataType.FLOAT32              |          |
+        |            |                                                        |                           |                                    |          |
+        |            |                                                        |                           | ttnn.DataType.UINT32               |          |
+        |            |                                                        |                           |                                    |          |
+        |            |                                                        |                           | ttnn.DataType.BFLOAT8_B            |          |
+        |            |                                                        |                           |                                    |          |
+        |            |                                                        |                           | ttnn.DataType.BFLOAT4_B            |          |
+        +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
+        | layout     | Layout of tensor data in memory                        | ttnn.Layout               | ttnn.Layout.ROW_MAJOR              | Yes      |
+        |            |                                                        |                           |                                    |          |
+        |            |                                                        |                           | ttnn.Layout.TILE                   |          |
+        +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
+        | device     | Device on which tensor will be created                 | ttnn.Device               | Host or TT accelerator device      | No       |
+        +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
+        | mem_config | Layout of tensor in TT Accelerator device memory banks | ttnn.MemoryConfig         |                                    | No       |
+        +------------+--------------------------------------------------------+---------------------------+------------------------------------+----------+
+
+    )doc");
+}
 
-    void pytensor_module(py::module &m_tensor) {
-        m_tensor.def(
-            "decorate_external_operation",
-            [](const py::function &function, std::optional<std::string> function_name) -> py::function {
-                return py::cpp_function(std::function([function, function_name](
-                                                          const py::args &args, const py::kwargs &kwargs) {
-                    ZoneScopedN("TT_DNN_FALLBACK_OP");
-                    uint32_t device_operation_id = ttnn::CoreIDs::instance().fetch_and_increment_device_operation_id();
-                    auto [operation, input_tensors] = detail::parse_external_operation(function, args, kwargs, function_name);
-                    GraphTracker::instance().track_function_start(operation.get_type_name(), args, kwargs);
-                    detail::log_external_operation(ttnn::CoreIDs::instance().get_python_operation_id(), device_operation_id, operation, input_tensors);
-
-                    auto output = function(*args, **kwargs);
-
-                    TracyOpTTNNExternal(device_operation_id, operation, input_tensors);
-                    GraphTracker::instance().track_function_end(output);
-                    return output;
-                }));
-            },
-            py::arg("function").noconvert(),
-            py::arg("function_name").noconvert() = std::nullopt,
+void pytensor_module(py::module &m_tensor) {
+    m_tensor.def(
+        "decorate_external_operation",
+        [](const py::function &function, std::optional<std::string> function_name) -> py::function {
+            return py::cpp_function(std::function([function, function_name](
+                                                        const py::args &args, const py::kwargs &kwargs) {
+                ZoneScopedN("TT_DNN_FALLBACK_OP");
+                uint32_t device_operation_id = ttnn::CoreIDs::instance().fetch_and_increment_device_operation_id();
+                auto [operation, input_tensors] = detail::parse_external_operation(function, args, kwargs, function_name);
+                GraphTracker::instance().track_function_start(operation.get_type_name(), args, kwargs);
+                detail::log_external_operation(ttnn::CoreIDs::instance().get_python_operation_id(), device_operation_id, operation, input_tensors);
+
+                auto output = function(*args, **kwargs);
+
+                TracyOpTTNNExternal(device_operation_id, operation, input_tensors);
+                GraphTracker::instance().track_function_end(output);
+                return output;
+            }));
+        },
+        py::arg("function").noconvert(),
+        py::arg("function_name").noconvert() = std::nullopt,
+        R"doc(
+        Decorate external operation for purposes of reporting and profiling.
+
+            +----------+----------------------+-----------+-------------+----------+
+            | Argument | Description          | Data type | Valid range | Required |
+            +==========+======================+===========+=============+==========+
+            | function | Fallback Operation   | Function  |             | Yes      |
+            +----------+----------------------+-----------+-------------+----------+
+            | args     | Packed args          | tuple     |             | No       |
+            +----------+----------------------+-----------+-------------+----------+
+            | kwargs   | Packed kwargs        | dict      |             | No       |
+            +----------+----------------------+-----------+-------------+----------+
+    )doc");
+
+    auto pyTensor = static_cast<py::class_<Tensor>>(m_tensor.attr("Tensor"));
+    pyTensor.def(py::init<ttnn::Tensor &>())
+        .def(
+            py::init<>([](std::vector<float> &&data,
+                            const std::array<uint32_t, 4> &shape,
+                            DataType data_type,
+                            Layout layout) {
+                auto owned_buffer = detail::create_owned_buffer_from_vector_of_floats(std::move(data), data_type);
+                return Tensor(OwnedStorage{owned_buffer}, shape, data_type, layout);
+            }),
+            py::return_value_policy::move,
             R"doc(
-            Decorate external operation for purposes of reporting and profiling.
-
-                +----------+----------------------+-----------+-------------+----------+
-                | Argument | Description          | Data type | Valid range | Required |
-                +==========+======================+===========+=============+==========+
-                | function | Fallback Operation   | Function  |             | Yes      |
-                +----------+----------------------+-----------+-------------+----------+
-                | args     | Packed args          | tuple     |             | No       |
-                +----------+----------------------+-----------+-------------+----------+
-                | kwargs   | Packed kwargs        | dict      |             | No       |
-                +----------+----------------------+-----------+-------------+----------+
-        )doc");
-
-        auto pyTensor = static_cast<py::class_<Tensor>>(m_tensor.attr("Tensor"));
-        pyTensor.def(py::init<ttnn::Tensor &>())
-            .def(
-                py::init<>([](std::vector<float> &&data,
-                              const std::array<uint32_t, 4> &shape,
-                              DataType data_type,
-                              Layout layout) {
-                    auto owned_buffer = detail::create_owned_buffer_from_vector_of_floats(std::move(data), data_type);
-                    return Tensor(OwnedStorage{owned_buffer}, shape, data_type, layout);
-                }),
-                py::return_value_policy::move,
-                R"doc(
-                    +---------------+---------------+
-                    | Argument      | Name          |
-                    +===============+===============+
-                    | arg0          | data          |
-                    +---------------+---------------+
-                    | arg1          | shape         |
-                    +---------------+---------------+
-                    | arg2          | data_type     |
-                    +---------------+---------------+
-                    | arg3          | layout        |
-                    +---------------+---------------+
-
-                    Example of creating a TT Tensor on host:
-
-                    .. code-block:: python
-
-                        py_tensor = torch.randn((1, 1, 32, 32))
-                        ttnn.Tensor(
-                            py_tensor.reshape(-1).tolist(),
-                            py_tensor.size(),
-                            ttnn.DataType.BFLOAT16,
-                            ttnn.Layout.ROW_MAJOR,
-                        )
-                )doc")
-            .def(
-                py::init<>([](std::vector<float> &&data,
-                              const std::array<uint32_t, 4> &shape,
-                              DataType data_type,
-                              Layout layout,
-                              Device *device) {
-                    auto owned_buffer = detail::create_owned_buffer_from_vector_of_floats(std::move(data), data_type);
-                    auto tensor = Tensor(OwnedStorage{owned_buffer}, shape, data_type, layout);
-                    return tensor.to(device, MemoryConfig{});
-                }),
-                py::keep_alive<1, 6>(),
-                py::return_value_policy::move,
-                R"doc(
-                    +---------------+---------------+
-                    | Argument      | Name          |
-                    +===============+===============+
-                    | arg0          | data          |
-                    +---------------+---------------+
-                    | arg1          | shape         |
-                    +---------------+---------------+
-                    | arg2          | data_type     |
-                    +---------------+---------------+
-                    | arg3          | layout        |
-                    +---------------+---------------+
-                    | arg3          | device        |
-                    +---------------+---------------+
-
-                    Only BFLOAT16 (in ROW_MAJOR or TILE layout) and BFLOAT8_B, BFLOAT4_B (in TILE layout) are supported on device.
-
-                    Note that TT Tensor in ROW_MAJOR layout on TT Accelerator device must have size of last dimension divisble by 2.
-
-                    Example of creating a TT Tensor on TT accelerator device:
-
-                    .. code-block:: python
-
-                        py_tensor = torch.randn((1, 1, 32, 32))
-                        tt_device = ttnn.CreateDevice(0)
-                        // ...
-                        ttnn.Tensor(
-                            py_tensor.reshape(-1).tolist(),
-                            py_tensor.size(),
-                            ttnn.DataType.BFLOAT16,
-                            ttnn.Layout.ROW_MAJOR,
-                            tt_device
-                        )
-                )doc")
-            .def(
-                py::init<>([](std::vector<float> &&data,
-                              const std::array<uint32_t, 4> &shape,
-                              DataType data_type,
-                              Layout layout,
-                              Device *device,
-                              const MemoryConfig &memory_config) {
-                    auto owned_buffer = detail::create_owned_buffer_from_vector_of_floats(std::move(data), data_type);
-                    auto tensor = Tensor(OwnedStorage{owned_buffer}, shape, data_type, layout);
-                    return tensor.to(device, memory_config);
-                }),
-                py::keep_alive<1, 6>(),
-                py::return_value_policy::move,
-                R"doc(
-                    +---------------+---------------+
-                    | Argument      | Name          |
-                    +===============+===============+
-                    | arg0          | data          |
-                    +---------------+---------------+
-                    | arg1          | shape         |
-                    +---------------+---------------+
-                    | arg2          | data_type     |
-                    +---------------+---------------+
-                    | arg3          | layout        |
-                    +---------------+---------------+
-                    | arg4          | device        |
-                    +---------------+---------------+
-                    | arg5          | mem_config    |
-                    +---------------+---------------+
-
-                    Only BFLOAT16 (in ROW_MAJOR or TILE layout) and BFLOAT8_B, BFLOAT4_B (in TILE layout) are supported on device.
-
-                    Note that TT Tensor in ROW_MAJOR layout on TT Accelerator device must have size of last dimension divisble by 2.
-
-                    Example of creating a TT Tensor on TT accelerator device with specified mem_config:
-
-                    .. code-block:: python
-
-                        py_tensor = torch.randn((1, 1, 32, 32))
-                        tt_device = ttnn.CreateDevice(0)
-                        mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.SINGLE_BANK)
-                        // ...
-                        ttnn.Tensor(
-                            py_tensor.reshape(-1).tolist(),
-                            py_tensor.size(),
-                            ttnn.DataType.BFLOAT16,
-                            ttnn.Layout.ROW_MAJOR,
-                            tt_device,
-                            mem_config
-                        )
-                )doc")
-            .def(
-                py::init<>([](const py::object &tensor,
-                              std::optional<DataType> data_type,
-                              const std::unordered_map<std::string, std::string> &strategy) {
-                    if (py::isinstance<py::list>(tensor)) {
-                        return detail::convert_python_tensors_to_tt_tensors(tensor, data_type, strategy);
-                    }
-                    return detail::convert_python_tensor_to_tt_tensor(tensor, data_type);
-                }),
-                py::arg("tensor"),
-                py::arg("data_type") = std::nullopt,
-                py::arg("strategy") = std::unordered_map<std::string, std::string>(),
-                py::return_value_policy::move,
-                R"doc(
-                    +--------------+------------------------+
-                    | Argument     | Description            |
-                    +==============+========================+
-                    | tensor       | Pytorch or Numpy Tensor|
-                    +--------------+------------------------+
-                    | data_type    | TT Tensor data type    |
-                    +--------------+------------------------+
-
-                    Example of creating a TT Tensor that uses torch.Tensor's storage as its own storage:
-
-                    .. code-block:: python
-
-                        py_tensor = torch.randn((1, 1, 32, 32))
-                        ttnn.Tensor(py_tensor)
-                )doc")
-            .def(
-                py::init<>([](const py::object &python_tensor,
-                              std::optional<DataType> data_type,
-                              Device *device,
-                              Layout layout,
-                              const MemoryConfig &mem_config) {
-                    auto tensor = detail::convert_python_tensor_to_tt_tensor(python_tensor, data_type);
-                    auto layout_tensor = tensor.to(layout);
-                    return layout_tensor.to(device, mem_config);
-                }),
-                py::arg("tensor"),
-                py::arg("data_type") = std::nullopt,
-                py::arg("device").noconvert(),
-                py::arg("layout").noconvert(),
-                py::arg("mem_config").noconvert(),
-                py::return_value_policy::move,
-                R"doc(
-                    +--------------+------------------------+
-                    | Argument     | Description            |
-                    +==============+========================+
-                    | tensor       | Pytorch or Numpy Tensor|
-                    +--------------+------------------------+
-                    | data_type    | TT Tensor data type    |
-                    +--------------+------------------------+
-                    | device       | TT device ptr          |
-                    +--------------+------------------------+
-                    | layout       | TT layout              |
-                    +--------------+------------------------+
-                    | mem_config   | TT memory_config       |
-                    +--------------+------------------------+
-
-
-                    Example of creating a TT Tensor that uses torch.Tensor's storage as its own storage:
-
-                    .. code-block:: python
-
-                        py_tensor = np.zeros((1, 1, 32, 32))
-                        ttnn.Tensor(py_tensor)
-                )doc")
-            .def_property_readonly("shape", [](const Tensor &self) { return self.get_shape(); })
-            .def_property_readonly("dtype", [](const Tensor &self) { return self.get_dtype(); })
-            .def_property_readonly("layout", [](const Tensor &self) { return self.get_layout(); })
-            .def(
-                "deallocate",
-                [](Tensor &self, bool force) { return self.deallocate(force); },
-                py::arg("force") = false,
-                R"doc(
-                    Dellocates all data of a tensor. This either deletes all host data or deallocates tensor data from device memory.
-                )doc")
-            .def(
-                "to",
-                py::overload_cast<Device *, const MemoryConfig &>(&Tensor::to, py::const_),
-                py::arg("device").noconvert(),
-                py::arg("mem_config").noconvert() = MemoryConfig{.memory_layout = TensorMemoryLayout::INTERLEAVED},
-                py::keep_alive<0, 2>(),
-                R"doc(
-                Move TT Tensor from host device to TT accelerator device.
-
-                Only BFLOAT16 (in ROW_MAJOR or TILE layout) and BFLOAT8_B, BFLOAT4_B (in TILE layout) are supported on device.
-
-                If ``arg1`` is not supplied, default ``MemoryConfig`` with ``interleaved`` set to ``True``.
-
-                +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
-                | Argument  | Description                                     | Data type                  | Valid range           | Required |
-                +===========+=================================================+============================+=======================+==========+
-                | arg0      | Device to which tensor will be moved            | ttnn.Device                | TT accelerator device | Yes      |
-                +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
-                | arg1      | MemoryConfig of tensor of TT accelerator device | ttnn.MemoryConfig          |                       | No       |
-                +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
+                +---------------+---------------+
+                | Argument      | Name          |
+                +===============+===============+
+                | arg0          | data          |
+                +---------------+---------------+
+                | arg1          | shape         |
+                +---------------+---------------+
+                | arg2          | data_type     |
+                +---------------+---------------+
+                | arg3          | layout        |
+                +---------------+---------------+
+
+                Example of creating a TT Tensor on host:
 
                 .. code-block:: python
 
-                    tt_tensor = tt_tensor.to(tt_device)
+                    py_tensor = torch.randn((1, 1, 32, 32))
+                    ttnn.Tensor(
+                        py_tensor.reshape(-1).tolist(),
+                        py_tensor.size(),
+                        ttnn.DataType.BFLOAT16,
+                        ttnn.Layout.ROW_MAJOR,
+                    )
             )doc")
-            .def(
-                "track_ref_count",
-                [](Tensor &self) { return self.track_ref_count(); },
-                R"doc(
-                    Log the reference count (as seen by the main and worker threads) of a tensor as it evolves during runtime.
-                )doc")
-            .def(
-                "to",
-                py::overload_cast<MeshDevice *, const MemoryConfig &>(&Tensor::to, py::const_),
-                py::arg("mesh_device").noconvert(),
-                py::arg("mem_config").noconvert() = MemoryConfig{.memory_layout = TensorMemoryLayout::INTERLEAVED},
-                py::keep_alive<0, 2>(),
-                R"doc(
-                Move TT Tensor from host device to TT accelerator device.
+        .def(
+            py::init<>([](std::vector<float> &&data,
+                            const std::array<uint32_t, 4> &shape,
+                            DataType data_type,
+                            Layout layout,
+                            Device *device) {
+                auto owned_buffer = detail::create_owned_buffer_from_vector_of_floats(std::move(data), data_type);
+                auto tensor = Tensor(OwnedStorage{owned_buffer}, shape, data_type, layout);
+                return tensor.to(device, MemoryConfig{});
+            }),
+            py::keep_alive<1, 6>(),
+            py::return_value_policy::move,
+            R"doc(
+                +---------------+---------------+
+                | Argument      | Name          |
+                +===============+===============+
+                | arg0          | data          |
+                +---------------+---------------+
+                | arg1          | shape         |
+                +---------------+---------------+
+                | arg2          | data_type     |
+                +---------------+---------------+
+                | arg3          | layout        |
+                +---------------+---------------+
+                | arg3          | device        |
+                +---------------+---------------+
 
                 Only BFLOAT16 (in ROW_MAJOR or TILE layout) and BFLOAT8_B, BFLOAT4_B (in TILE layout) are supported on device.
 
-                If ``arg1`` is not supplied, default ``MemoryConfig`` with ``interleaved`` set to ``True``.
+                Note that TT Tensor in ROW_MAJOR layout on TT Accelerator device must have size of last dimension divisble by 2.
 
-                +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
-                | Argument  | Description                                     | Data type                  | Valid range           | Required |
-                +===========+=================================================+============================+=======================+==========+
-                | arg0      | MeshDevice to which tensor will be moved        | ttnn.MeshDevice            | TT accelerator device | Yes      |
-                +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
-                | arg1      | MemoryConfig of tensor of TT accelerator device | ttnn.MemoryConfig          |                       | No       |
-                +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
+                Example of creating a TT Tensor on TT accelerator device:
 
                 .. code-block:: python
 
-                    tt_tensor = tt_tensor.to(tt_device)
+                    py_tensor = torch.randn((1, 1, 32, 32))
+                    tt_device = ttnn.CreateDevice(0)
+                    // ...
+                    ttnn.Tensor(
+                        py_tensor.reshape(-1).tolist(),
+                        py_tensor.size(),
+                        ttnn.DataType.BFLOAT16,
+                        ttnn.Layout.ROW_MAJOR,
+                        tt_device
+                    )
             )doc")
-            .def("sync", [](Tensor &self) { return self.wait_for_tensor_data_populated(); })
-            .def(
-                "extract_shard",
-                [](const Tensor &self, CoreCoord core) { return self.extract_shard(core); },
-                py::arg("core").noconvert(),
-                py::keep_alive<0, 2>(),
-                R"doc(
-                Move TT Tensor from host device to TT accelerator device.
+        .def(
+            py::init<>([](std::vector<float> &&data,
+                            const std::array<uint32_t, 4> &shape,
+                            DataType data_type,
+                            Layout layout,
+                            Device *device,
+                            const MemoryConfig &memory_config) {
+                auto owned_buffer = detail::create_owned_buffer_from_vector_of_floats(std::move(data), data_type);
+                auto tensor = Tensor(OwnedStorage{owned_buffer}, shape, data_type, layout);
+                return tensor.to(device, memory_config);
+            }),
+            py::keep_alive<1, 6>(),
+            py::return_value_policy::move,
+            R"doc(
+                +---------------+---------------+
+                | Argument      | Name          |
+                +===============+===============+
+                | arg0          | data          |
+                +---------------+---------------+
+                | arg1          | shape         |
+                +---------------+---------------+
+                | arg2          | data_type     |
+                +---------------+---------------+
+                | arg3          | layout        |
+                +---------------+---------------+
+                | arg4          | device        |
+                +---------------+---------------+
+                | arg5          | mem_config    |
+                +---------------+---------------+
 
                 Only BFLOAT16 (in ROW_MAJOR or TILE layout) and BFLOAT8_B, BFLOAT4_B (in TILE layout) are supported on device.
 
-                If ``arg1`` is not supplied, default ``MemoryConfig`` with ``interleaved`` set to ``True``.
-
-                +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
-                | Argument  | Description                                     | Data type                  | Valid range           | Required |
-                +===========+=================================================+============================+=======================+==========+
-                | arg0      | Core who's shard we want                        | ttnn.CoreCoord             | TT accelerator device | Yes      |
-                +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
+                Note that TT Tensor in ROW_MAJOR layout on TT Accelerator device must have size of last dimension divisble by 2.
 
+                Example of creating a TT Tensor on TT accelerator device with specified mem_config:
 
                 .. code-block:: python
 
-                    tt_tensor = tt_tensor.to(tt_device)
+                    py_tensor = torch.randn((1, 1, 32, 32))
+                    tt_device = ttnn.CreateDevice(0)
+                    mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.SINGLE_BANK)
+                    // ...
+                    ttnn.Tensor(
+                        py_tensor.reshape(-1).tolist(),
+                        py_tensor.size(),
+                        ttnn.DataType.BFLOAT16,
+                        ttnn.Layout.ROW_MAJOR,
+                        tt_device,
+                        mem_config
+                    )
             )doc")
-            .def(
-                "extract_shard",
-                [](const Tensor &self, const uint32_t &core_id) { return self.extract_shard(core_id); },
-                py::arg("core_id").noconvert(),
-                py::keep_alive<0, 2>(),
-                R"doc(
-                Move TT Tensor from host device to TT accelerator device.
-
-                Only BFLOAT16 (in ROW_MAJOR or TILE layout) and BFLOAT8_B, BFLOAT4_B (in TILE layout) are supported on device.
-
-                If ``arg1`` is not supplied, default ``MemoryConfig`` with ``interleaved`` set to ``True``.
-
-                +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
-                | Argument  | Description                                     | Data type                  | Valid range           | Required |
-                +===========+=================================================+============================+=======================+==========+
-                | arg0      | Core who's shard we want                        | uint32_t                   | TT accelerator device | Yes      |
-                +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
+        .def(
+            py::init<>([](const py::object &tensor,
+                            std::optional<DataType> data_type,
+                            const std::unordered_map<std::string, std::string> &strategy) {
+                if (py::isinstance<py::list>(tensor)) {
+                    return detail::convert_python_tensors_to_tt_tensors(tensor, data_type, strategy);
+                }
+                return detail::convert_python_tensor_to_tt_tensor(tensor, data_type);
+            }),
+            py::arg("tensor"),
+            py::arg("data_type") = std::nullopt,
+            py::arg("strategy") = std::unordered_map<std::string, std::string>(),
+            py::return_value_policy::move,
+            R"doc(
+                +--------------+------------------------+
+                | Argument     | Description            |
+                +==============+========================+
+                | tensor       | Pytorch or Numpy Tensor|
+                +--------------+------------------------+
+                | data_type    | TT Tensor data type    |
+                +--------------+------------------------+
 
+                Example of creating a TT Tensor that uses torch.Tensor's storage as its own storage:
 
                 .. code-block:: python
 
-                    tt_tensor = tt_tensor.to(tt_device)
+                    py_tensor = torch.randn((1, 1, 32, 32))
+                    ttnn.Tensor(py_tensor)
             )doc")
-            .def(
-                "cpu",
-                [](const Tensor &self, bool blocking) { return self.cpu(blocking); },
-                py::arg("blocking") = true,
-                R"doc(
-                Move TT Tensor from TT accelerator device to host device.
+        .def(
+            py::init<>([](const py::object &python_tensor,
+                            std::optional<DataType> data_type,
+                            Device *device,
+                            Layout layout,
+                            const MemoryConfig &mem_config) {
+                auto tensor = detail::convert_python_tensor_to_tt_tensor(python_tensor, data_type);
+                auto layout_tensor = tensor.to(layout);
+                return layout_tensor.to(device, mem_config);
+            }),
+            py::arg("tensor"),
+            py::arg("data_type") = std::nullopt,
+            py::arg("device").noconvert(),
+            py::arg("layout").noconvert(),
+            py::arg("mem_config").noconvert(),
+            py::return_value_policy::move,
+            R"doc(
+                +--------------+------------------------+
+                | Argument     | Description            |
+                +==============+========================+
+                | tensor       | Pytorch or Numpy Tensor|
+                +--------------+------------------------+
+                | data_type    | TT Tensor data type    |
+                +--------------+------------------------+
+                | device       | TT device ptr          |
+                +--------------+------------------------+
+                | layout       | TT layout              |
+                +--------------+------------------------+
+                | mem_config   | TT memory_config       |
+                +--------------+------------------------+
+
+
+                Example of creating a TT Tensor that uses torch.Tensor's storage as its own storage:
 
                 .. code-block:: python
 
-                    tt_tensor = tt_tensor.cpu()
+                    py_tensor = np.zeros((1, 1, 32, 32))
+                    ttnn.Tensor(py_tensor)
             )doc")
-            .def("cpu_sharded", &Tensor::cpu_sharded, R"doc(
-                Move TT Tensor from TT accelerator device to host device in sharded orientation.
-
-                .. code-block:: python
-
-                    tt_tensor = tt_tensor.cpu_sharded()
+        .def_property_readonly("shape", [](const Tensor &self) { return self.get_shape(); })
+        .def_property_readonly("dtype", [](const Tensor &self) { return self.get_dtype(); })
+        .def_property_readonly("layout", [](const Tensor &self) { return self.get_layout(); })
+        .def(
+            "deallocate",
+            [](Tensor &self, bool force) { return self.deallocate(force); },
+            py::arg("force") = false,
+            R"doc(
+                Dellocates all data of a tensor. This either deletes all host data or deallocates tensor data from device memory.
             )doc")
-            .def(
-                "to",
-                py::overload_cast<Layout, Device *>(&Tensor::to, py::const_),
-                py::arg("target_layout").noconvert(),
-                py::arg("worker") = nullptr,
-                R"doc(
-                Convert TT Tensor to provided memory layout. Available layouts conversions are:
-
-                * ROW_MAJOR to TILE
-                * TILE to ROW_MAJOR
-
-                +-----------+-------------------------------------------------+----------------------------+--------------------------------+----------+
-                | Argument  | Description                                     | Data type                  | Valid range                    | Required |
-                +===========+=================================================+============================+================================+==========+
-                | arg0      | Target memory layout                            | ttnn.Layout                | ROW_MAJOR, TILE                | Yes      |
-                +-----------+-------------------------------------------------+----------------------------+--------------------------------+----------+
-                | arg1      | Worker thread performing layout conversion      | ttnn.Device                | Thread tied to TT accelerator  | No       |
-                |           | (optional)                                      |                            | device                         |          |
-                +-----------+-------------------------------------------------+----------------------------+--------------------------------+----------+
+        .def(
+            "to",
+            py::overload_cast<Device *, const MemoryConfig &>(&Tensor::to, py::const_),
+            py::arg("device").noconvert(),
+            py::arg("mem_config").noconvert() = MemoryConfig{.memory_layout = TensorMemoryLayout::INTERLEAVED},
+            py::keep_alive<0, 2>(),
+            R"doc(
+            Move TT Tensor from host device to TT accelerator device.
 
-                .. code-block:: python
+            Only BFLOAT16 (in ROW_MAJOR or TILE layout) and BFLOAT8_B, BFLOAT4_B (in TILE layout) are supported on device.
 
-                    tt_tensor = tt_tensor.to(ttnn.Layout.TILE, worker)
-            )doc")
-            .def(
-                "to",
-                py::overload_cast<Layout, MeshDevice*>(&Tensor::to, py::const_),
-                py::arg("target_layout").noconvert(),
-                py::arg("mesh_device") = nullptr,
-                R"doc(
-                Convert TT Tensor to provided memory layout. Available layouts conversions are:
-
-                * ROW_MAJOR to TILE
-                * TILE to ROW_MAJOR
-
-                +-----------+-------------------------------------------------+----------------------------+--------------------------------+----------+
-                | Argument  | Description                                     | Data type                  | Valid range                    | Required |
-                +===========+=================================================+============================+================================+==========+
-                | arg0      | Target memory layout                            | ttnn.Layout                | ROW_MAJOR, TILE                | Yes      |
-                +-----------+-------------------------------------------------+----------------------------+--------------------------------+----------+
-                | arg1      | Worker thread performing layout conversion      | ttnn.Device                | Thread tied to TT accelerator  | No       |
-                |           | (optional)                                      |                            | device                         |          |
-                +-----------+-------------------------------------------------+----------------------------+--------------------------------+----------+
+            If ``arg1`` is not supplied, default ``MemoryConfig`` with ``interleaved`` set to ``True``.
 
-                .. code-block:: python
+            +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
+            | Argument  | Description                                     | Data type                  | Valid range           | Required |
+            +===========+=================================================+============================+=======================+==========+
+            | arg0      | Device to which tensor will be moved            | ttnn.Device                | TT accelerator device | Yes      |
+            +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
+            | arg1      | MemoryConfig of tensor of TT accelerator device | ttnn.MemoryConfig          |                       | No       |
+            +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
 
-                    tt_tensor = tt_tensor.to(ttnn.Layout.TILE, mesh_device)
-            )doc")
-            .def(
-                "pad",
-                [](const Tensor &self,
-                   const std::array<uint32_t, 4> &output_tensor_shape,
-                   const std::array<uint32_t, 4> &input_tensor_start,
-                   float pad_value) { return self.pad(output_tensor_shape, input_tensor_start, pad_value); },
-                R"doc(
-                Pad TT Tensor with given pad value ``arg2``.
-
-                The input tensor must be on host and in ROW_MAJOR layout.
-
-                Returns an output tensor that contains the input tensor at the given input tensor start indices ``arg1`` and the padded value everywhere else.
-
-                +---------------------+------------------------------------------------------+--------------+-----------------------------------------------------+----------+
-                | Argument            | Description                                          | Data type    | Valid range                                         | Required |
-                +=====================+======================================================+==============+=====================================================+==========+
-                | arg0                | Shape of output tensor                               | List[int[4]] |                                                     | Yes      |
-                +---------------------+------------------------------------------------------+--------------+-----------------------------------------------------+----------+
-                | arg1                | Start indices to place input tensor in output tensor | List[int[4]] | Values along each dim must be                       | Yes      |
-                |                     |                                                      |              |                                                     |          |
-                |                     |                                                      |              | <= (output_tensor_shape[i] - input_tensor_shape[i]) |          |
-                +---------------------+------------------------------------------------------+--------------+-----------------------------------------------------+----------+
-                | arg2                | Value to pad input tensor                            | float        |                                                     | Yes      |
-                +---------------------+------------------------------------------------------+--------------+-----------------------------------------------------+----------+
+            .. code-block:: python
 
-                .. code-block:: python
+                tt_tensor = tt_tensor.to(tt_device)
+        )doc")
+        .def(
+            "track_ref_count",
+            [](Tensor &self) { return self.track_ref_count(); },
+            R"doc(
+                Log the reference count (as seen by the main and worker threads) of a tensor as it evolves during runtime.
+            )doc")
+        .def(
+            "to",
+            py::overload_cast<MeshDevice *, const MemoryConfig &>(&Tensor::to, py::const_),
+            py::arg("mesh_device").noconvert(),
+            py::arg("mem_config").noconvert() = MemoryConfig{.memory_layout = TensorMemoryLayout::INTERLEAVED},
+            py::keep_alive<0, 2>(),
+            R"doc(
+            Move TT Tensor from host device to TT accelerator device.
 
-                    input_tensor_shape = [1, 1, 3, 3]
-                    output_tensor_shape = [1, 2, 5, 5]
-                    input_tensor_start = [0, 1, 1, 1]
-                    pad_value = 0
+            Only BFLOAT16 (in ROW_MAJOR or TILE layout) and BFLOAT8_B, BFLOAT4_B (in TILE layout) are supported on device.
 
-                    inp = torch.Tensor(
-                        [ 1, 2, 3,
-                        4, 5, 6,
-                        7, 8, 9 ]
-                    )
-                    tt_tensor = ttnn.Tensor(
-                        inp.tolist(),
-                        input_tensor_shape,
-                        ttnn.DataType.BFLOAT16,
-                        ttnn.Layout.ROW_MAJOR,
-                    )
-                    tt_tensor_padded = tt_tensor.pad(output_tensor_shape, input_tensor_start, pad_value)
-
-                    print("Input tensor:")
-                    print(tt_tensor)
-                    print("\nPadded tensor:")
-                    print(tt_tensor_padded)
-
-                Example output:
-
-                .. code-block::
-
-                    Input tensor:
-                    [ [[[1, 2, 3],
-                        [4, 5, 6],
-                        [7, 8, 9]]] dtype=bfloat16 ]
-
-                    Padded tensor:
-                    [ [[[0, 0, 0, 0, 0],
-                        [0, 0, 0, 0, 0],
-                        [0, 0, 0, 0, 0],
-                        [0, 0, 0, 0, 0],
-                        [0, 0, 0, 0, 0]],
-
-                        [[0, 0, 0, 0, 0],
-                        [0, 1, 2, 3, 0],
-                        [0, 4, 5, 6, 0],
-                        [0, 7, 8, 9, 0],
-                        [0, 0, 0, 0, 0]]] dtype=bfloat16 ]
-            )doc")
-            .def(
-                "unpad",
-                [](const Tensor &self,
-                   const std::array<uint32_t, 4> &output_tensor_start,
-                   const std::array<uint32_t, 4> &output_tensor_end) {
-                    return self.unpad(output_tensor_start, output_tensor_end);
-                },
-                R"doc(
-                Unpad this TT Tensor.
-
-                This tensor must be on host and in ROW_MAJOR layout.
-
-                Returns an output tensor from output tensor start indices ``arg0`` to output tensor end indices ``arg1`` (inclusive) of the input tensor.
-
-                +---------------------+----------------------------------------------+--------------+-----------------------------------------------------+----------+
-                | Argument            | Description                                  | Data type    | Valid range                                         | Required |
-                +=====================+==============================================+==============+=====================================================+==========+
-                | arg0                | Start indices of input tensor                | List[int[4]] | Values along each dim must be                       | Yes      |
-                |                     |                                              |              |                                                     |          |
-                |                     |                                              |              | < input_tensor_shape[i] and <= output_tensor_end[i] |          |
-                +---------------------+----------------------------------------------+--------------+-----------------------------------------------------+----------+
-                | arg1                | End indices of input tensor in output tensor | List[int[4]] | Values along each dim must be                       | Yes      |
-                |                     |                                              |              |                                                     |          |
-                |                     |                                              |              | < input_tensor_shape[i]                             |          |
-                +---------------------+----------------------------------------------+--------------+-----------------------------------------------------+----------+
+            If ``arg1`` is not supplied, default ``MemoryConfig`` with ``interleaved`` set to ``True``.
 
-                .. code-block:: python
+            +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
+            | Argument  | Description                                     | Data type                  | Valid range           | Required |
+            +===========+=================================================+============================+=======================+==========+
+            | arg0      | MeshDevice to which tensor will be moved        | ttnn.MeshDevice            | TT accelerator device | Yes      |
+            +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
+            | arg1      | MemoryConfig of tensor of TT accelerator device | ttnn.MemoryConfig          |                       | No       |
+            +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
 
-                    input_tensor_shape = [1, 1, 5, 5]
-                    output_tensor_start = [0, 0, 1, 1]
-                    output_tensor_end = [0, 0, 3, 3]
+            .. code-block:: python
 
-                    inp = torch.Tensor(
-                        [ 0, 0, 0, 0, 0,
-                        0, 1, 2, 3, 0,
-                        0, 4, 5, 6, 0,
-                        0, 7, 8, 9, 0,
-                        0, 0, 0, 0, 0 ]
-                    )
-                    tt_tensor = ttnn.Tensor(
-                        inp.tolist(),
-                        input_tensor_shape,
-                        ttnn.DataType.BFLOAT16,
-                        ttnn.Layout.ROW_MAJOR,
-                    )
-                    tt_tensor_unpadded = tt_tensor.unpad(output_tensor_start, output_tensor_end)
+                tt_tensor = tt_tensor.to(tt_device)
+        )doc")
+        .def("sync", [](Tensor &self) { return self.wait_for_tensor_data_populated(); })
+        .def(
+            "extract_shard",
+            [](const Tensor &self, CoreCoord core) { return self.extract_shard(core); },
+            py::arg("core").noconvert(),
+            py::keep_alive<0, 2>(),
+            R"doc(
+            Move TT Tensor from host device to TT accelerator device.
 
-                    print("Input tensor:")
-                    print(tt_tensor)
-                    print("\nUnpadded tensor:")
-                    print(tt_tensor_unpadded)
+            Only BFLOAT16 (in ROW_MAJOR or TILE layout) and BFLOAT8_B, BFLOAT4_B (in TILE layout) are supported on device.
 
-                Example output:
+            If ``arg1`` is not supplied, default ``MemoryConfig`` with ``interleaved`` set to ``True``.
 
-                .. code-block::
+            +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
+            | Argument  | Description                                     | Data type                  | Valid range           | Required |
+            +===========+=================================================+============================+=======================+==========+
+            | arg0      | Core who's shard we want                        | ttnn.CoreCoord             | TT accelerator device | Yes      |
+            +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
 
-                    Input tensor:
-                    [ [[[0, 0, 0, 0, 0],
-                        [0, 1, 2, 3, 0],
-                        [0, 4, 5, 6, 0],
-                        [0, 7, 8, 9, 0],
-                        [0, 0, 0, 0, 0]]] dtype=bfloat16 ]
 
-                    Unpadded tensor:
-                    [ [[[1, 2, 3],
-                        [4, 5, 6],
-                        [7, 8, 9]]] dtype=bfloat16 ]
-            )doc")
-            .def(
-                "pad_to_tile", [](const Tensor &self, float pad_value) { return self.pad_to_tile(pad_value); }, R"doc(
-                Pads TT Tensor with given pad value ``arg0``.
+            .. code-block:: python
 
-                The input tensor must be on host and in ROW_MAJOR layout.
+                tt_tensor = tt_tensor.to(tt_device)
+        )doc")
+        .def(
+            "extract_shard",
+            [](const Tensor &self, const uint32_t &core_id) { return self.extract_shard(core_id); },
+            py::arg("core_id").noconvert(),
+            py::keep_alive<0, 2>(),
+            R"doc(
+            Move TT Tensor from host device to TT accelerator device.
 
-                Returns an output tensor that contains the input tensor padded with the padded value in the last two dims to multiples of 32.
+            Only BFLOAT16 (in ROW_MAJOR or TILE layout) and BFLOAT8_B, BFLOAT4_B (in TILE layout) are supported on device.
 
-                Padding will be added to the right and bottom of the tensor.
+            If ``arg1`` is not supplied, default ``MemoryConfig`` with ``interleaved`` set to ``True``.
 
-                +---------------------+------------------------------------------------------+--------------+-----------------------------------------------------+----------+
-                | Argument            | Description                                          | Data type    | Valid range                                         | Required |
-                +=====================+======================================================+==============+=====================================================+==========+
-                | arg0                | Value to pad input tensor                            | float        |                                                     | Yes      |
-                +---------------------+------------------------------------------------------+--------------+-----------------------------------------------------+----------+
+            +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
+            | Argument  | Description                                     | Data type                  | Valid range           | Required |
+            +===========+=================================================+============================+=======================+==========+
+            | arg0      | Core who's shard we want                        | uint32_t                   | TT accelerator device | Yes      |
+            +-----------+-------------------------------------------------+----------------------------+-----------------------+----------+
 
-                .. code-block:: python
 
-                    input_tensor_shape = [1, 1, 3, 3]
-                    pad_value = 0
+            .. code-block:: python
 
-                    inp = torch.Tensor(
-                        [ 1, 2, 3,
-                        4, 5, 6,
-                        7, 8, 9 ]
-                    )
-                    tt_tensor = ttnn.Tensor(
-                        inp.tolist(),
-                        input_tensor_shape,
-                        ttnn.DataType.BFLOAT16,
-                        ttnn.Layout.ROW_MAJOR,
-                    )
-                    tt_tensor_padded = tt_tensor.pad_to_tile(pad_value)
+                tt_tensor = tt_tensor.to(tt_device)
+        )doc")
+        .def(
+            "cpu",
+            [](const Tensor &self, bool blocking) { return self.cpu(blocking); },
+            py::arg("blocking") = true,
+            R"doc(
+            Move TT Tensor from TT accelerator device to host device.
 
-                    print("Input tensor:")
-                    print(tt_tensor)
-                    print("\nPadded tensor:")
-                    print(tt_tensor_padded)
+            .. code-block:: python
 
-                Example output:
+                tt_tensor = tt_tensor.cpu()
+        )doc")
+        .def("cpu_sharded", &Tensor::cpu_sharded, R"doc(
+            Move TT Tensor from TT accelerator device to host device in sharded orientation.
 
-                .. code-block::
+            .. code-block:: python
 
-                    Input tensor:
-                    [ [[[1, 2, 3],
-                        [4, 5, 6],
-                        [7, 8, 9]]] dtype=bfloat16 ]
+                tt_tensor = tt_tensor.cpu_sharded()
+        )doc")
+        .def(
+            "to",
+            py::overload_cast<Layout, Device *>(&Tensor::to, py::const_),
+            py::arg("target_layout").noconvert(),
+            py::arg("worker") = nullptr,
+            R"doc(
+            Convert TT Tensor to provided memory layout. Available layouts conversions are:
 
-                    Padded tensor:
-                    [ [[[1, 2, 3, 0, ..., 0],
-                        [4, 5, 6, 0, ..., 0],
-                        [7, 8, 9, 0, ..., 0],
-                        [0, 0, 0, 0, ..., 0],
-                        ...,
-                        [0, 0, 0, 0, ..., 0]]] dtype=bfloat16 ]
-            )doc")
-            .def(
-                "unpad_from_tile",
-                [](const Tensor &self, const std::vector<uint32_t> &output_tensor_shape) {
-                    return self.unpad_from_tile(output_tensor_shape);
-                },
-                R"doc(
-                Unpads TT Tensor from given input tensor ``arg0``.
+            * ROW_MAJOR to TILE
+            * TILE to ROW_MAJOR
 
-                The input tensor must be on host and in ROW_MAJOR layout.
+            +-----------+-------------------------------------------------+----------------------------+--------------------------------+----------+
+            | Argument  | Description                                     | Data type                  | Valid range                    | Required |
+            +===========+=================================================+============================+================================+==========+
+            | arg0      | Target memory layout                            | ttnn.Layout                | ROW_MAJOR, TILE                | Yes      |
+            +-----------+-------------------------------------------------+----------------------------+--------------------------------+----------+
+            | arg1      | Worker thread performing layout conversion      | ttnn.Device                | Thread tied to TT accelerator  | No       |
+            |           | (optional)                                      |                            | device                         |          |
+            +-----------+-------------------------------------------------+----------------------------+--------------------------------+----------+
 
-                This function expects the real data to aligned on the top left of the tensor.
+            .. code-block:: python
 
-                Returns an output tensor with padding removed from the right and bottom of the input tensor.
+                tt_tensor = tt_tensor.to(ttnn.Layout.TILE, worker)
+        )doc")
+        .def(
+            "to",
+            py::overload_cast<Layout, MeshDevice*>(&Tensor::to, py::const_),
+            py::arg("target_layout").noconvert(),
+            py::arg("mesh_device") = nullptr,
+            R"doc(
+            Convert TT Tensor to provided memory layout. Available layouts conversions are:
 
-                +---------------------+----------------------------------------------+--------------+------------------------------------------------------------------------------+----------+
-                | Argument            | Description                                  | Data type    | Valid range                                                                  | Required |
-                +=====================+==============================================+==============+==============================================================================+==========+
-                | arg0                | Shape of output tensor                       | List[int[4]] | All dims must match the input tensor dims apart from the last two dims.      | Yes      |
-                |                     |                                              |              |                                                                              |          |
-                |                     |                                              |              | Last two dims have the following restrictions:                               |          |
-                |                     |                                              |              |                                                                              |          |
-                |                     |                                              |              | input_tensor_shape[i] must be a multiple of 32                               |          |
-                |                     |                                              |              |                                                                              |          |
-                |                     |                                              |              | input_tensor_shape[i] - 32 < output_tensor_shape[i] <= input_tensor_shape[i] |          |
-                +---------------------+----------------------------------------------+--------------+------------------------------------------------------------------------------+----------+
+            * ROW_MAJOR to TILE
+            * TILE to ROW_MAJOR
 
+            +-----------+-------------------------------------------------+----------------------------+--------------------------------+----------+
+            | Argument  | Description                                     | Data type                  | Valid range                    | Required |
+            +===========+=================================================+============================+================================+==========+
+            | arg0      | Target memory layout                            | ttnn.Layout                | ROW_MAJOR, TILE                | Yes      |
+            +-----------+-------------------------------------------------+----------------------------+--------------------------------+----------+
+            | arg1      | Worker thread performing layout conversion      | ttnn.Device                | Thread tied to TT accelerator  | No       |
+            |           | (optional)                                      |                            | device                         |          |
+            +-----------+-------------------------------------------------+----------------------------+--------------------------------+----------+
 
-                .. code-block:: python
+            .. code-block:: python
 
-                    input_tensor_shape = [1, 1, 32, 32]
-                    output_tensor_shape = [1, 1, 3, 3]
+                tt_tensor = tt_tensor.to(ttnn.Layout.TILE, mesh_device)
+        )doc")
+        .def(
+            "pad",
+            [](const Tensor &self,
+                const std::array<uint32_t, 4> &output_tensor_shape,
+                const std::array<uint32_t, 4> &input_tensor_start,
+                float pad_value) { return self.pad(output_tensor_shape, input_tensor_start, pad_value); },
+            R"doc(
+            Pad TT Tensor with given pad value ``arg2``.
+
+            The input tensor must be on host and in ROW_MAJOR layout.
+
+            Returns an output tensor that contains the input tensor at the given input tensor start indices ``arg1`` and the padded value everywhere else.
+
+            +---------------------+------------------------------------------------------+--------------+-----------------------------------------------------+----------+
+            | Argument            | Description                                          | Data type    | Valid range                                         | Required |
+            +=====================+======================================================+==============+=====================================================+==========+
+            | arg0                | Shape of output tensor                               | List[int[4]] |                                                     | Yes      |
+            +---------------------+------------------------------------------------------+--------------+-----------------------------------------------------+----------+
+            | arg1                | Start indices to place input tensor in output tensor | List[int[4]] | Values along each dim must be                       | Yes      |
+            |                     |                                                      |              |                                                     |          |
+            |                     |                                                      |              | <= (output_tensor_shape[i] - input_tensor_shape[i]) |          |
+            +---------------------+------------------------------------------------------+--------------+-----------------------------------------------------+----------+
+            | arg2                | Value to pad input tensor                            | float        |                                                     | Yes      |
+            +---------------------+------------------------------------------------------+--------------+-----------------------------------------------------+----------+
+
+            .. code-block:: python
+
+                input_tensor_shape = [1, 1, 3, 3]
+                output_tensor_shape = [1, 2, 5, 5]
+                input_tensor_start = [0, 1, 1, 1]
+                pad_value = 0
+
+                inp = torch.Tensor(
+                    [ 1, 2, 3,
+                    4, 5, 6,
+                    7, 8, 9 ]
+                )
+                tt_tensor = ttnn.Tensor(
+                    inp.tolist(),
+                    input_tensor_shape,
+                    ttnn.DataType.BFLOAT16,
+                    ttnn.Layout.ROW_MAJOR,
+                )
+                tt_tensor_padded = tt_tensor.pad(output_tensor_shape, input_tensor_start, pad_value)
+
+                print("Input tensor:")
+                print(tt_tensor)
+                print("\nPadded tensor:")
+                print(tt_tensor_padded)
+
+            Example output:
+
+            .. code-block::
+
+                Input tensor:
+                [ [[[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9]]] dtype=bfloat16 ]
+
+                Padded tensor:
+                [ [[[0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0]],
+
+                    [[0, 0, 0, 0, 0],
+                    [0, 1, 2, 3, 0],
+                    [0, 4, 5, 6, 0],
+                    [0, 7, 8, 9, 0],
+                    [0, 0, 0, 0, 0]]] dtype=bfloat16 ]
+        )doc")
+        .def(
+            "unpad",
+            [](const Tensor &self,
+                const std::array<uint32_t, 4> &output_tensor_start,
+                const std::array<uint32_t, 4> &output_tensor_end) {
+                return self.unpad(output_tensor_start, output_tensor_end);
+            },
+            R"doc(
+            Unpad this TT Tensor.
+
+            This tensor must be on host and in ROW_MAJOR layout.
+
+            Returns an output tensor from output tensor start indices ``arg0`` to output tensor end indices ``arg1`` (inclusive) of the input tensor.
+
+            +---------------------+----------------------------------------------+--------------+-----------------------------------------------------+----------+
+            | Argument            | Description                                  | Data type    | Valid range                                         | Required |
+            +=====================+==============================================+==============+=====================================================+==========+
+            | arg0                | Start indices of input tensor                | List[int[4]] | Values along each dim must be                       | Yes      |
+            |                     |                                              |              |                                                     |          |
+            |                     |                                              |              | < input_tensor_shape[i] and <= output_tensor_end[i] |          |
+            +---------------------+----------------------------------------------+--------------+-----------------------------------------------------+----------+
+            | arg1                | End indices of input tensor in output tensor | List[int[4]] | Values along each dim must be                       | Yes      |
+            |                     |                                              |              |                                                     |          |
+            |                     |                                              |              | < input_tensor_shape[i]                             |          |
+            +---------------------+----------------------------------------------+--------------+-----------------------------------------------------+----------+
+
+            .. code-block:: python
+
+                input_tensor_shape = [1, 1, 5, 5]
+                output_tensor_start = [0, 0, 1, 1]
+                output_tensor_end = [0, 0, 3, 3]
+
+                inp = torch.Tensor(
+                    [ 0, 0, 0, 0, 0,
+                    0, 1, 2, 3, 0,
+                    0, 4, 5, 6, 0,
+                    0, 7, 8, 9, 0,
+                    0, 0, 0, 0, 0 ]
+                )
+                tt_tensor = ttnn.Tensor(
+                    inp.tolist(),
+                    input_tensor_shape,
+                    ttnn.DataType.BFLOAT16,
+                    ttnn.Layout.ROW_MAJOR,
+                )
+                tt_tensor_unpadded = tt_tensor.unpad(output_tensor_start, output_tensor_end)
+
+                print("Input tensor:")
+                print(tt_tensor)
+                print("\nUnpadded tensor:")
+                print(tt_tensor_unpadded)
+
+            Example output:
+
+            .. code-block::
+
+                Input tensor:
+                [ [[[0, 0, 0, 0, 0],
+                    [0, 1, 2, 3, 0],
+                    [0, 4, 5, 6, 0],
+                    [0, 7, 8, 9, 0],
+                    [0, 0, 0, 0, 0]]] dtype=bfloat16 ]
+
+                Unpadded tensor:
+                [ [[[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9]]] dtype=bfloat16 ]
+        )doc")
+        .def(
+            "pad_to_tile", [](const Tensor &self, float pad_value) { return self.pad_to_tile(pad_value); }, R"doc(
+            Pads TT Tensor with given pad value ``arg0``.
+
+            The input tensor must be on host and in ROW_MAJOR layout.
+
+            Returns an output tensor that contains the input tensor padded with the padded value in the last two dims to multiples of 32.
+
+            Padding will be added to the right and bottom of the tensor.
+
+            +---------------------+------------------------------------------------------+--------------+-----------------------------------------------------+----------+
+            | Argument            | Description                                          | Data type    | Valid range                                         | Required |
+            +=====================+======================================================+==============+=====================================================+==========+
+            | arg0                | Value to pad input tensor                            | float        |                                                     | Yes      |
+            +---------------------+------------------------------------------------------+--------------+-----------------------------------------------------+----------+
+
+            .. code-block:: python
+
+                input_tensor_shape = [1, 1, 3, 3]
+                pad_value = 0
+
+                inp = torch.Tensor(
+                    [ 1, 2, 3,
+                    4, 5, 6,
+                    7, 8, 9 ]
+                )
+                tt_tensor = ttnn.Tensor(
+                    inp.tolist(),
+                    input_tensor_shape,
+                    ttnn.DataType.BFLOAT16,
+                    ttnn.Layout.ROW_MAJOR,
+                )
+                tt_tensor_padded = tt_tensor.pad_to_tile(pad_value)
+
+                print("Input tensor:")
+                print(tt_tensor)
+                print("\nPadded tensor:")
+                print(tt_tensor_padded)
+
+            Example output:
+
+            .. code-block::
+
+                Input tensor:
+                [ [[[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9]]] dtype=bfloat16 ]
+
+                Padded tensor:
+                [ [[[1, 2, 3, 0, ..., 0],
+                    [4, 5, 6, 0, ..., 0],
+                    [7, 8, 9, 0, ..., 0],
+                    [0, 0, 0, 0, ..., 0],
+                    ...,
+                    [0, 0, 0, 0, ..., 0]]] dtype=bfloat16 ]
+        )doc")
+        .def(
+            "unpad_from_tile",
+            [](const Tensor &self, const std::vector<uint32_t> &output_tensor_shape) {
+                return self.unpad_from_tile(output_tensor_shape);
+            },
+            R"doc(
+            Unpads TT Tensor from given input tensor ``arg0``.
+
+            The input tensor must be on host and in ROW_MAJOR layout.
+
+            This function expects the real data to aligned on the top left of the tensor.
+
+            Returns an output tensor with padding removed from the right and bottom of the input tensor.
+
+            +---------------------+----------------------------------------------+--------------+------------------------------------------------------------------------------+----------+
+            | Argument            | Description                                  | Data type    | Valid range                                                                  | Required |
+            +=====================+==============================================+==============+==============================================================================+==========+
+            | arg0                | Shape of output tensor                       | List[int[4]] | All dims must match the input tensor dims apart from the last two dims.      | Yes      |
+            |                     |                                              |              |                                                                              |          |
+            |                     |                                              |              | Last two dims have the following restrictions:                               |          |
+            |                     |                                              |              |                                                                              |          |
+            |                     |                                              |              | input_tensor_shape[i] must be a multiple of 32                               |          |
+            |                     |                                              |              |                                                                              |          |
+            |                     |                                              |              | input_tensor_shape[i] - 32 < output_tensor_shape[i] <= input_tensor_shape[i] |          |
+            +---------------------+----------------------------------------------+--------------+------------------------------------------------------------------------------+----------+
+
+
+            .. code-block:: python
+
+                input_tensor_shape = [1, 1, 32, 32]
+                output_tensor_shape = [1, 1, 3, 3]
+
+                inp = torch.arange(start=1.0, end=10.0).reshape(1, 1, 3, 3)
+                inp = torch.nn.functional.pad(inp, [0, input_tensor_shape[3] - inp.shape[3], 0, input_tensor_shape[2] - inp.shape[2]]).reshape(-1)
+                tt_tensor = ttnn.Tensor(
+                    inp.tolist(),
+                    input_tensor_shape,
+                    ttnn.DataType.BFLOAT16,
+                    ttnn.Layout.ROW_MAJOR,
+                )
+                tt_tensor_unpadded = tt_tensor.unpad_from_tile(output_tensor_shape)
+
+                print("Input tensor:")
+                print(tt_tensor)
+                print("\nUnpadded tensor:")
+                print(tt_tensor_unpadded)
+
+            Example output:
+
+            .. code-block::
+
+                Input tensor:
+                [ [[[1, 2, 3, 0, ..., 0],
+                    [4, 5, 6, 0, ..., 0],
+                    [7, 8, 9, 0, ..., 0],
+                    [0, 0, 0, 0, ..., 0],
+                    ...,
+                    [0, 0, 0, 0, ..., 0]]] dtype=bfloat16 ]
+
+                Unpadded tensor:
+                [ [[[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9]]] dtype=bfloat16 ]
+        )doc")
+        .def(
+            "__repr__", [](const Tensor &self) { return self.write_to_string(); }, R"doc(
+            Prints the tensor as list of nested lists. Number of levels of nesting is equal to tensor rank.
 
-                    inp = torch.arange(start=1.0, end=10.0).reshape(1, 1, 3, 3)
-                    inp = torch.nn.functional.pad(inp, [0, input_tensor_shape[3] - inp.shape[3], 0, input_tensor_shape[2] - inp.shape[2]]).reshape(-1)
-                    tt_tensor = ttnn.Tensor(
-                        inp.tolist(),
-                        input_tensor_shape,
-                        ttnn.DataType.BFLOAT16,
-                        ttnn.Layout.ROW_MAJOR,
-                    )
-                    tt_tensor_unpadded = tt_tensor.unpad_from_tile(output_tensor_shape)
+            .. code-block:: python
 
-                    print("Input tensor:")
-                    print(tt_tensor)
-                    print("\nUnpadded tensor:")
-                    print(tt_tensor_unpadded)
+                print(tt_tensor)
 
-                Example output:
+            Example output for a rank 4 TT Tensor with shape (1, 1, 32, 32):
 
-                .. code-block::
+            .. code-block::
 
-                    Input tensor:
-                    [ [[[1, 2, 3, 0, ..., 0],
-                        [4, 5, 6, 0, ..., 0],
-                        [7, 8, 9, 0, ..., 0],
-                        [0, 0, 0, 0, ..., 0],
-                        ...,
-                        [0, 0, 0, 0, ..., 0]]] dtype=bfloat16 ]
+                [ [[[0.220703, 0.839844, 0.960938, ..., 0.378906, 0.507812],
+                [0.03125, 0.511719, 0.0407715, ..., 0.945312, 0.671875],
+                ...
+                [0.433594, 0.165039, 0.980469, ..., , 0.349609]]] dtype=bfloat16 ]
 
-                    Unpadded tensor:
-                    [ [[[1, 2, 3],
-                        [4, 5, 6],
-                        [7, 8, 9]]] dtype=bfloat16 ]
-            )doc")
-            .def(
-                "__repr__", [](const Tensor &self) { return self.write_to_string(); }, R"doc(
-                Prints the tensor as list of nested lists. Number of levels of nesting is equal to tensor rank.
+        )doc")
+        .def(
+            "get_legacy_shape",
+            [](const Tensor &self) { return self.get_legacy_shape(); },
+            R"doc(
+            Get the shape of the tensor as Shape class.
 
-                .. code-block:: python
+            .. code-block:: python
 
-                    print(tt_tensor)
+                shape = tt_tensor.get_legacy_shape()
 
-                Example output for a rank 4 TT Tensor with shape (1, 1, 32, 32):
+        )doc")
+        .def(
+            "volume", [](const Tensor &self) { return self.volume(); }, R"doc(
+            Get the volume of the tensor.
 
-                .. code-block::
+            .. code-block:: python
 
-                    [ [[[0.220703, 0.839844, 0.960938, ..., 0.378906, 0.507812],
-                    [0.03125, 0.511719, 0.0407715, ..., 0.945312, 0.671875],
-                    ...
-                    [0.433594, 0.165039, 0.980469, ..., , 0.349609]]] dtype=bfloat16 ]
+                volume = tt_tensor.volume()
 
-            )doc")
-            .def(
-                "get_legacy_shape",
-                [](const Tensor &self) { return self.get_legacy_shape(); },
-                R"doc(
-                Get the shape of the tensor as Shape class.
+        )doc")
+        .def(
+            "storage_type", [](const Tensor &self) { return self.storage_type(); }, R"doc(
+            Check if the tensor is on host
 
-                .. code-block:: python
+            .. code-block:: python
 
-                    shape = tt_tensor.get_legacy_shape()
+                storage_type = tt_tensor.storage_type()
 
-            )doc")
-            .def(
-                "volume", [](const Tensor &self) { return self.volume(); }, R"doc(
-                Get the volume of the tensor.
+        )doc")
+    .def(
+        "device",
+        [](const Tensor &self) { return self.device(); },
+        R"doc(
+            Get the device of the tensor.
 
-                .. code-block:: python
+            .. code-block:: python
 
-                    volume = tt_tensor.volume()
+                device = tt_tensor.device()
 
-            )doc")
-            .def(
-                "storage_type", [](const Tensor &self) { return self.storage_type(); }, R"doc(
-                Check if the tensor is on host
+        )doc",
+        py::return_value_policy::reference)
+    .def(
+        "devices",
+        [](const Tensor &self) { return self.get_workers(); },
+        R"doc(
+            Get devices tensor is mapped on to.
 
-                .. code-block:: python
+            .. code-block:: python
 
-                    storage_type = tt_tensor.storage_type()
+                devices = tt_tensor.devices()
 
-            )doc")
+        )doc",
+        py::return_value_policy::reference)
         .def(
-            "device",
-            [](const Tensor &self) { return self.device(); },
+            "to_torch",
+            [](const Tensor &self) -> py::object { return detail::convert_tt_tensor_to_torch_tensor(self); },
             R"doc(
-                Get the device of the tensor.
+            Convert tensor to torch tensor.
 
-                .. code-block:: python
+            The tensor must be on host when calling this function.
+
+            .. code-block:: python
 
-                    device = tt_tensor.device()
+                data = tt_tensor.cpu().to_torch() # move TT Tensor to host and convert it to torch tensor
 
-            )doc",
-            py::return_value_policy::reference)
+        )doc")
         .def(
-            "devices",
-            [](const Tensor &self) { return self.get_workers(); },
+            "to_numpy",
+            [](const Tensor &self) -> py::object { return detail::convert_tt_tensor_to_numpy_tensor(self); },
             R"doc(
-                Get devices tensor is mapped on to.
-
-                .. code-block:: python
+            Convert tensor to numpy tensor.
 
-                    devices = tt_tensor.devices()
+            The tensor must be on host when calling this function.
 
-            )doc",
-            py::return_value_policy::reference)
-            .def(
-                "to_torch",
-                [](const Tensor &self) -> py::object { return detail::convert_tt_tensor_to_torch_tensor(self); },
-                R"doc(
-                Convert tensor to torch tensor.
+            .. code-block:: python
 
-                The tensor must be on host when calling this function.
+                data = tt_tensor.cpu().to_numpy() # move TT Tensor to host and convert it to numpy tensor
 
-                .. code-block:: python
+        )doc")
+        .def(
+            "buffer",
+            [](const Tensor &self) -> std::variant<OwnedBuffer, BorrowedBuffer> {
+                return std::visit(
+                    [](auto &&storage) -> std::variant<OwnedBuffer, BorrowedBuffer> {
+                        using T = std::decay_t<decltype(storage)>;
+                        if constexpr (std::is_same_v<T, OwnedStorage>) {
+                            return storage.buffer;
+                        } else if constexpr (std::is_same_v<T, DeviceStorage>) {
+                            TT_THROW("Device storage doesn't support buffer method");
+                        } else if constexpr (std::is_same_v<T, BorrowedStorage>) {
+                            return storage.buffer;
+                        } else if constexpr (std::is_same_v<T, MultiDeviceStorage>) {
+                            TT_THROW("MultiDeviceStorage doesn't support buffer method");
+                        } else if constexpr (std::is_same_v<T, MultiDeviceHostStorage>) {
+                            TT_THROW("MultiDeviceHostStorage doesn't support buffer method");
+                        } else {
+                            raise_unsupported_storage<T>();
+                        }
+                    },
+                    self.get_storage());
+            },
+            R"doc(
+            Get the underlying buffer.
 
-                    data = tt_tensor.cpu().to_torch() # move TT Tensor to host and convert it to torch tensor
+            The tensor must be on the cpu when calling this function.
 
-            )doc")
-            .def(
-                "to_numpy",
-                [](const Tensor &self) -> py::object { return detail::convert_tt_tensor_to_numpy_tensor(self); },
-                R"doc(
-                Convert tensor to numpy tensor.
+            .. code-block:: python
 
-                The tensor must be on host when calling this function.
+                buffer = tt_tensor.cpu().buffer() # move TT Tensor to host and get the buffer
 
-                .. code-block:: python
+        )doc")
+        .def(
+            "buffer_address",
+            [](const Tensor &self) -> uint32_t {
+                return std::visit(
+                    [](auto &&storage) -> uint32_t {
+                        using T = std::decay_t<decltype(storage)>;
+                        if constexpr (std::is_same_v<T, OwnedStorage>) {
+                            TT_THROW("OwnedStorage doesn't support buffer_address method");
+                        } else if constexpr (std::is_same_v<T, DeviceStorage>) {
+                            return storage.buffer->address();
+                        } else if constexpr (std::is_same_v<T, BorrowedStorage>) {
+                            TT_THROW("BorrowedStorage doesn't support buffer_address method");
+                        } else if constexpr (std::is_same_v<T, MultiDeviceStorage>) {
+                            TT_THROW("MultiDeviceStorage doesn't support buffer_address method");
+                        } else if constexpr (std::is_same_v<T, MultiDeviceHostStorage>) {
+                            TT_THROW("MultiDeviceHostStorage doesn't support buffer_address method");
+                        } else {
+                            raise_unsupported_storage<T>();
+                        }
+                    },
+                    self.get_storage());
+            },
+            R"doc(
+            Get the address of the underlying buffer.
 
-                    data = tt_tensor.cpu().to_numpy() # move TT Tensor to host and convert it to numpy tensor
+            The tensor must be on the single device when calling this function.
 
-            )doc")
-            .def(
-                "buffer",
-                [](const Tensor &self) -> std::variant<OwnedBuffer, BorrowedBuffer> {
-                    return std::visit(
-                        [](auto &&storage) -> std::variant<OwnedBuffer, BorrowedBuffer> {
-                            using T = std::decay_t<decltype(storage)>;
-                            if constexpr (std::is_same_v<T, OwnedStorage>) {
-                                return storage.buffer;
-                            } else if constexpr (std::is_same_v<T, DeviceStorage>) {
-                                TT_THROW("Device storage doesn't support buffer method");
-                            } else if constexpr (std::is_same_v<T, BorrowedStorage>) {
-                                return storage.buffer;
-                            } else if constexpr (std::is_same_v<T, MultiDeviceStorage>) {
-                                TT_THROW("MultiDeviceStorage doesn't support buffer method");
-                            } else if constexpr (std::is_same_v<T, MultiDeviceHostStorage>) {
-                                TT_THROW("MultiDeviceHostStorage doesn't support buffer method");
-                            } else {
-                                raise_unsupported_storage<T>();
-                            }
-                        },
-                        self.get_storage());
-                },
-                R"doc(
-                Get the underlying buffer.
-
-                The tensor must be on the cpu when calling this function.
+            .. code-block:: python
 
-                .. code-block:: python
+                address = tt_tensor.buffer_address()
 
-                    buffer = tt_tensor.cpu().buffer() # move TT Tensor to host and get the buffer
+        )doc")
+        .def(
+            "get_layout", [](const Tensor &self) { return self.get_layout(); }, R"doc(
+            Get memory layout of TT Tensor.
 
-            )doc")
-            .def(
-                "buffer_address",
-                [](const Tensor &self) -> uint32_t {
-                    return std::visit(
-                        [](auto &&storage) -> uint32_t {
-                            using T = std::decay_t<decltype(storage)>;
-                            if constexpr (std::is_same_v<T, OwnedStorage>) {
-                                TT_THROW("OwnedStorage doesn't support buffer_address method");
-                            } else if constexpr (std::is_same_v<T, DeviceStorage>) {
-                                return storage.buffer->address();
-                            } else if constexpr (std::is_same_v<T, BorrowedStorage>) {
-                                TT_THROW("BorrowedStorage doesn't support buffer_address method");
-                            } else if constexpr (std::is_same_v<T, MultiDeviceStorage>) {
-                                TT_THROW("MultiDeviceStorage doesn't support buffer_address method");
-                            } else if constexpr (std::is_same_v<T, MultiDeviceHostStorage>) {
-                                TT_THROW("MultiDeviceHostStorage doesn't support buffer_address method");
-                            } else {
-                                raise_unsupported_storage<T>();
-                            }
-                        },
-                        self.get_storage());
-                },
-                R"doc(
-                Get the address of the underlying buffer.
-
-                The tensor must be on the single device when calling this function.
+            .. code-block:: python
 
-                .. code-block:: python
+                layout = tt_tensor.get_layout()
 
-                    address = tt_tensor.buffer_address()
+        )doc")
+        .def(
+            "memory_config", [](const Tensor &self) { return self.memory_config(); }, R"doc(
+            Get buffer type of TT Tensor.
 
-            )doc")
-            .def(
-                "get_layout", [](const Tensor &self) { return self.get_layout(); }, R"doc(
-                Get memory layout of TT Tensor.
+            .. code-block:: python
 
-                .. code-block:: python
+                memory_config = tt_tensor.memory_config()
 
-                    layout = tt_tensor.get_layout()
+        )doc")
+        .def(
+            "is_allocated", [](const Tensor &self) { return self.is_allocated(); }, R"doc(
+            Check if TT Tensor is allocated.
 
-            )doc")
-            .def(
-                "memory_config", [](const Tensor &self) { return self.memory_config(); }, R"doc(
-                Get buffer type of TT Tensor.
+            .. code-block:: python
 
-                .. code-block:: python
+                is_sharded = tt_tensor.is_sharded()
 
-                    memory_config = tt_tensor.memory_config()
+        )doc")
+        .def("is_contiguous", [](const Tensor &self) -> bool { return self.is_contiguous(); })
+        .def(
+            "is_sharded", [](const Tensor &self) { return self.is_sharded(); }, R"doc(
+            Check if TT Tensor is sharded.
 
-            )doc")
-            .def(
-                "is_allocated", [](const Tensor &self) { return self.is_allocated(); }, R"doc(
-                Check if TT Tensor is allocated.
+            .. code-block:: python
 
-                .. code-block:: python
+                is_sharded = tt_tensor.is_sharded()
 
-                    is_sharded = tt_tensor.is_sharded()
+        )doc")
+        .def(
+            "get_dtype", [](const Tensor &self) { return self.get_dtype(); }, R"doc(
+            Get dtype of TT Tensor.
 
-            )doc")
-            .def("is_contiguous", [](const Tensor &self) -> bool { return self.is_contiguous(); })
-            .def(
-                "is_sharded", [](const Tensor &self) { return self.is_sharded(); }, R"doc(
-                Check if TT Tensor is sharded.
+            .. code-block:: python
 
-                .. code-block:: python
+                dtype = tt_tensor.get_dtype()
+        )doc")
+        .def(
+            "shape_without_padding",
+            [](const Tensor &self) { return Shape{self.get_legacy_shape().without_padding()}; },
+            R"doc(
+            Get shape without padding of TT Tensor.
 
-                    is_sharded = tt_tensor.is_sharded()
+            .. code-block:: python
 
-            )doc")
-            .def(
-                "get_dtype", [](const Tensor &self) { return self.get_dtype(); }, R"doc(
-                Get dtype of TT Tensor.
+                dtype = tt_tensor.shape_without_padding()
+        )doc")
+        .def(
+            "reshape",
+            [](Tensor &self, int N, int C, int H, int W) { return self.reshape(N, C, H, W); },
+            R"doc(
+                Reshapes TT tensor
 
                 .. code-block:: python
 
-                    dtype = tt_tensor.get_dtype()
+                    reshaped_tensor = tt_tensor.reshape(N, C, H, W)
             )doc")
-            .def(
-                "shape_without_padding",
-                [](const Tensor &self) { return Shape{self.get_legacy_shape().without_padding()}; },
-                R"doc(
-                Get shape without padding of TT Tensor.
+        .def(
+            "reshape",
+            [](Tensor &self, const tt::tt_metal::Shape &shape) -> Tensor { return self.reshape(shape); },
+            R"doc(
+                Reshapes TT tensor
 
                 .. code-block:: python
 
-                    dtype = tt_tensor.shape_without_padding()
+                    reshaped_tensor = tt_tensor.reshape((4, 3, 32))
             )doc")
-            .def(
-                "reshape",
-                [](Tensor &self, int N, int C, int H, int W) { return self.reshape(N, C, H, W); },
-                R"doc(
-                    Reshapes TT tensor
-
-                    .. code-block:: python
-
-                        reshaped_tensor = tt_tensor.reshape(N, C, H, W)
-                )doc")
-            .def(
-                "reshape",
-                [](Tensor &self, const tt::tt_metal::Shape &shape) -> Tensor { return self.reshape(shape); },
-                R"doc(
-                    Reshapes TT tensor
-
-                    .. code-block:: python
-
-                        reshaped_tensor = tt_tensor.reshape((4, 3, 32))
-                )doc")
-            .def_property(
-                "tensor_id",
-                [](const Tensor &self) { return self.tensor_id; },
-                [](Tensor &self, std::size_t tensor_id) { self.tensor_id = tensor_id; });
-    }
+        .def_property(
+            "tensor_id",
+            [](const Tensor &self) { return self.tensor_id; },
+            [](Tensor &self, std::size_t tensor_id) { self.tensor_id = tensor_id; });
+}
 
 }  // namespace ttnn::tensor
diff --git a/ttnn/cpp/pybind11/tensor.cpp b/ttnn/cpp/pybind11/tensor.cpp
index c9281f27172..9a0bb65322a 100644
--- a/ttnn/cpp/pybind11/tensor.cpp
+++ b/ttnn/cpp/pybind11/tensor.cpp
@@ -64,7 +64,7 @@ void implement_buffer_protocol(py::module& m_tensor, std::string_view name) {
 
 }  // namespace detail
 
-void py_module_types(py::module& m_tensor) {
+void tensor_mem_config_module_types(py::module& m_tensor) {
     export_enum<Layout>(m_tensor);
     export_enum<DataType>(m_tensor);
     export_enum<StorageType>(m_tensor);
@@ -115,7 +115,7 @@ void py_module_types(py::module& m_tensor) {
 
 }
 
-void tensor_module(py::module& m_tensor) {
+void tensor_mem_config_module(py::module& m_tensor) {
     using tt::tt_metal::Shape;
 
     auto py_core_coord = static_cast<py::class_<CoreCoord>>(m_tensor.attr("CoreCoord"));
@@ -291,8 +291,4 @@ void tensor_module(py::module& m_tensor) {
 }
 
 
-void py_module(py::module& module) {
-            tensor_module(module);
-        }
-
 }  // namespace ttnn::tensor
diff --git a/ttnn/cpp/pybind11/tensor.hpp b/ttnn/cpp/pybind11/tensor.hpp
index 3bc367c0c1b..2d26484ac60 100644
--- a/ttnn/cpp/pybind11/tensor.hpp
+++ b/ttnn/cpp/pybind11/tensor.hpp
@@ -10,9 +10,9 @@ namespace py = pybind11;
 
 namespace ttnn::tensor {
 
-    void pytensor_module_types(pybind11::module & m_tensor);
-    void pytensor_module(pybind11::module & m_tensor);
-    void py_module_types(py::module& module);
-    void py_module(py::module& module);
+void pytensor_module_types(py::module & m_tensor);
+void pytensor_module(py::module & m_tensor);
+void tensor_mem_config_module_types(py::module& module);
+void tensor_mem_config_module(py::module& module);
 
 } // namespace ttnn::tensor