From 254023c9c91ae270928fd4c26ff4ec954b17b305 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Sat, 20 Jul 2024 14:15:58 +0000 Subject: [PATCH 01/14] #9758: Moved cpp files to ttnn folder --- tests/tt_eager/ops/test_average_pool.cpp | 2 +- .../trace_testing/misc/test_average_pool.py | 3 +- .../unit_testing/misc/test_average_pool.py | 4 +- .../unit_testing/misc/test_max_pool.py | 9 +- .../test_untilize_with_halo_and_max_pool.py | 9 +- ...test_untilize_with_halo_and_max_pool_v2.py | 4 +- ttnn/CMakeLists.txt | 4 + ttnn/cpp/pybind11/operations/__init__.hpp | 4 +- ttnn/cpp/pybind11/operations/pool.hpp | 66 ------- .../tt_dnn/op_library/CMakeLists.txt | 4 - .../tt_lib/csrc/tt_lib_bindings_tensor.cpp | 94 +--------- ttnn/cpp/ttnn/operations/maxpool2d.hpp | 2 +- .../pool/average_pool.cpp | 5 + .../pool/average_pool.hpp | 0 .../pool/device}/kernels/compute/max_pool.cpp | 0 .../kernels/compute/max_pool_multi_core.cpp | 0 .../reader_max_pool_2d_multi_core.cpp | 0 .../reader_max_pool_2d_multi_core_sharded.cpp | 0 ...x_pool_2d_multi_core_sharded_with_halo.cpp | 0 ...ool_2d_multi_core_sharded_with_halo_v2.cpp | 0 .../reader_max_pool_2d_single_core.cpp | 0 .../writer_max_pool_2d_multi_core.cpp | 0 .../writer_max_pool_2d_multi_core_v2.cpp | 0 .../writer_max_pool_2d_single_core.cpp | 0 .../pool/max_pool.cpp | 4 + .../pool/max_pool.hpp | 0 .../pool/max_pool_multi_core.cpp | 36 ++++ .../pool/max_pool_single_core.cpp | 18 ++ ttnn/cpp/ttnn/operations/{ => pool}/pool.hpp | 2 +- ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp | 166 ++++++++++++++++++ .../ttnn/operations/upsample/upsample_op.cpp | 2 +- ttnn/tt_lib/fused_ops/average_pool.py | 3 +- ttnn/tt_lib/fused_ops/max_pool.py | 8 +- ttnn/ttnn/__init__.py | 2 +- ttnn/ttnn/operations/conv/tt_py_max_pool.py | 5 +- ttnn/ttnn/operations/pool.py | 10 ++ 36 files changed, 285 insertions(+), 181 deletions(-) delete mode 100644 ttnn/cpp/pybind11/operations/pool.hpp rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library => operations}/pool/average_pool.cpp (85%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library => operations}/pool/average_pool.hpp (100%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library/pool => operations/pool/device}/kernels/compute/max_pool.cpp (100%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library/pool => operations/pool/device}/kernels/compute/max_pool_multi_core.cpp (100%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library/pool => operations/pool/device}/kernels/dataflow/reader_max_pool_2d_multi_core.cpp (100%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library/pool => operations/pool/device}/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp (100%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library/pool => operations/pool/device}/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp (100%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library/pool => operations/pool/device}/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp (100%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library/pool => operations/pool/device}/kernels/dataflow/reader_max_pool_2d_single_core.cpp (100%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library/pool => operations/pool/device}/kernels/dataflow/writer_max_pool_2d_multi_core.cpp (100%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library/pool => operations/pool/device}/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp (100%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library/pool => operations/pool/device}/kernels/dataflow/writer_max_pool_2d_single_core.cpp (100%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library => operations}/pool/max_pool.cpp (98%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library => operations}/pool/max_pool.hpp (100%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library => operations}/pool/max_pool_multi_core.cpp (95%) rename ttnn/cpp/ttnn/{experimental/tt_dnn/op_library => operations}/pool/max_pool_single_core.cpp (91%) rename ttnn/cpp/ttnn/operations/{ => pool}/pool.hpp (92%) create mode 100644 ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp diff --git a/tests/tt_eager/ops/test_average_pool.cpp b/tests/tt_eager/ops/test_average_pool.cpp index c0772c6032e..5dab6ec8f2e 100644 --- a/tests/tt_eager/ops/test_average_pool.cpp +++ b/tests/tt_eager/ops/test_average_pool.cpp @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "ttnn/experimental/tt_dnn/op_library/pool/average_pool.hpp" +#include "ttnn/cpp/ttnn/operations/pool/average_pool.hpp" #include "ttnn/experimental/tt_dnn/op_library/auto_format.hpp" #include "tt_numpy/functions.hpp" diff --git a/tests/tt_eager/python_api_testing/trace_testing/misc/test_average_pool.py b/tests/tt_eager/python_api_testing/trace_testing/misc/test_average_pool.py index abaea606780..108280c9ac9 100644 --- a/tests/tt_eager/python_api_testing/trace_testing/misc/test_average_pool.py +++ b/tests/tt_eager/python_api_testing/trace_testing/misc/test_average_pool.py @@ -11,6 +11,7 @@ from tt_lib.utils import _nearest_32 from models.utility_functions import comp_pcc +import ttnn TILE_HEIGHT = TILE_WIDTH = 32 @@ -63,7 +64,7 @@ def test_run_average_pool(act_shape, dtype, device, use_program_cache, enable_as ttact_res = ttact.to(device) def run_ops(ttact_res): - return ttl.tensor.average_pool_2d(ttact_res) + return ttnn.average_pool_2d(ttact_res) # Compile run_ops(ttact_res) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_average_pool.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_average_pool.py index 6b0133a1637..f6e5fcf5759 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_average_pool.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_average_pool.py @@ -12,6 +12,8 @@ from tt_lib.utils import _nearest_32 from models.utility_functions import comp_pcc +import ttnn + TILE_HEIGHT = TILE_WIDTH = 32 @@ -43,7 +45,7 @@ def test_run_average_pool(act_shape, dtype, device): ttact = ttact.pad_to_tile(0.0) ttact = ttact.to(device) - out = ttl.tensor.average_pool_2d(ttact) + out = ttnn.average_pool_2d(ttact) out = out.cpu().to(ttl.tensor.Layout.ROW_MAJOR) out_shape = [batch_size, 1, 1, channels] diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_max_pool.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_max_pool.py index 40090d8aa6e..ef3c11fe5d0 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_max_pool.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_max_pool.py @@ -17,6 +17,7 @@ from functools import reduce import operator +import ttnn def volume(shape): @@ -236,7 +237,7 @@ def test_run_max_pool( else: ttact = ttact.to(device, in_mem_config) - out_padded = ttl.tensor.max_pool2d( + out_padded = ttnn.max_pool2d( ttact, in_n, in_h, @@ -249,9 +250,9 @@ def test_run_max_pool( pad_w, dilation_h, dilation_w, - out_mem_config, - nblocks, - use_multicore, + memory_config=out_mem_config, + nblocks=nblocks, + use_multicore=use_multicore, ) if out_mem_config.is_sharded(): out_padded = ttl.tensor.sharded_to_interleaved(out_padded, interleaved_mem_config) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool.py index 419597c2b1b..e7c09997703 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool.py @@ -14,6 +14,7 @@ from tt_lib.utils import _nearest_32 from tests.tt_eager.python_api_testing.sweep_tests.comparison_funcs import comp_pcc from models.utility_functions import is_wormhole_b0 +import ttnn def volume(shape): @@ -186,7 +187,7 @@ def test_run_max_pool( # ttl.device.DumpDeviceMemoryState(device) ttact_sharded.deallocate() - out_padded = ttl.tensor.max_pool2d( + out_padded = ttnn.max_pool2d( out_untilize, in_n, in_h, @@ -199,9 +200,9 @@ def test_run_max_pool( pad_w, dilation_h, dilation_w, - out_mem_config, - nblocks, - True, + memory_config=out_mem_config, + nblocks=nblocks, + use_multicore=True, ) out_padded = ttl.tensor.sharded_to_interleaved(out_padded, interleaved_mem_config) out_padded = out_padded.cpu().to(ttl.tensor.Layout.ROW_MAJOR) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py index b4198064967..4de749400d4 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py @@ -9,11 +9,13 @@ import torch -from ttnn.operations.conv.tt_py_max_pool import ( + +from ttnn.operations.pool import ( TTPyMaxPool, SlidingWindowOpParamsWithParallelConfig, ) + import tt_lib as ttl from tt_lib.utils import _nearest_32 from tests.tt_eager.python_api_testing.sweep_tests.comparison_funcs import comp_pcc diff --git a/ttnn/CMakeLists.txt b/ttnn/CMakeLists.txt index ab048c351f9..511e9286eab 100644 --- a/ttnn/CMakeLists.txt +++ b/ttnn/CMakeLists.txt @@ -58,6 +58,10 @@ set(TTNN_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/normalization/groupnorm/device/multi_core/groupnorm_op_multi_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/device/transformer_device_operation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/average_pool.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/max_pool_multi_core.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/max_pool_single_core.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/max_pool.cpp ) ### Setup TTNN as a shared library with optional Python bindings diff --git a/ttnn/cpp/pybind11/operations/__init__.hpp b/ttnn/cpp/pybind11/operations/__init__.hpp index 7ddba605eb6..8a4765599cf 100644 --- a/ttnn/cpp/pybind11/operations/__init__.hpp +++ b/ttnn/cpp/pybind11/operations/__init__.hpp @@ -14,8 +14,10 @@ #include "pybind11/operations/creation.hpp" #include "pybind11/operations/kv_cache.hpp" #include "pybind11/operations/maxpool2d.hpp" -#include "pybind11/operations/pool.hpp" +#include "pybind11/operations/normalization.hpp" +#include "pybind11/operations/copy.hpp" #include "pybind11/operations/ternary.hpp" +#include "ttnn/operations/pool/pool_pybind.hpp" #include "ttnn/operations/eltwise/binary/binary_pybind.hpp" #include "ttnn/operations/eltwise/binary_backward/binary_backward_pybind.hpp" #include "ttnn/operations/conv2d/conv2d_pybind.hpp" diff --git a/ttnn/cpp/pybind11/operations/pool.hpp b/ttnn/cpp/pybind11/operations/pool.hpp deleted file mode 100644 index b4056ca4163..00000000000 --- a/ttnn/cpp/pybind11/operations/pool.hpp +++ /dev/null @@ -1,66 +0,0 @@ -// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include -#include - -#include "ttnn/cpp/pybind11/decorators.hpp" -#include "ttnn/operations/pool.hpp" -#include "ttnn/types.hpp" - -namespace py = pybind11; - -namespace ttnn { -namespace operations { -namespace pool { - -namespace detail { - -void bind_global_avg_pool2d(py::module& module) { - auto doc = fmt::format( - R"doc({0}(input_tensor: ttnn.Tensor, *, memory_config: Optional[ttnn.MemoryConfig] = None, dtype: Optional[ttnn.DataType] = None) -> ttnn.Tensor - - Applies {0} to :attr:`input_tensor` by performing a 2D adaptive average pooling over an input signal composed of several input planes. This operation computes the average of all elements in each channel across the entire spatial dimensions. - - .. math:: - {0}(\\mathrm{{input\\_tensor}}_i) - - Args: - * :attr:`input_tensor` (ttnn.Tensor): The input tensor to be pooled. Typically of shape (batch_size, channels, height, width). - - Keyword Args: - * :attr:`memory_config` (Optional[ttnn.MemoryConfig]): Memory configuration for the operation. - * :attr:`dtype` (Optional[ttnn.DataType]): data type for the output tensor - - Returns: - ttnn.Tensor: The tensor with the averaged values. The output tensor shape is (batch_size, channels, 1, 1). - - Example: - - >>> tensor = ttnn.from_torch(torch.randn((10, 3, 32, 32), dtype=ttnn.bfloat16), device=device) - >>> output = {1}(tensor) - )doc", - ttnn::global_avg_pool2d.base_name(), - ttnn::global_avg_pool2d.python_fully_qualified_name()); - - bind_registered_operation( - module, - ttnn::global_avg_pool2d, - doc, - ttnn::pybind_arguments_t{ - py::arg("input_tensor"), - py::kw_only(), - py::arg("memory_config") = std::nullopt, - py::arg("dtype") = std::nullopt}); -} - -} // namespace detail - -void py_module(py::module& module) { detail::bind_global_avg_pool2d(module); } - -} // namespace pool -} // namespace operations -} // namespace ttnn diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/CMakeLists.txt b/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/CMakeLists.txt index 412dda8643f..54bcd4d6c98 100644 --- a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/CMakeLists.txt +++ b/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/CMakeLists.txt @@ -25,10 +25,6 @@ set(TT_DNN_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/non_zero_indices/non_zero_indices_op.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fill_rm/fill_rm_op.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fully_connected/fully_connected_op.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/pool/average_pool.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/pool/max_pool.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/pool/max_pool_single_core.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/pool/max_pool_multi_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/transpose/transpose_op.cpp ${CMAKE_CURRENT_SOURCE_DIR}/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp diff --git a/ttnn/cpp/ttnn/experimental/tt_lib/csrc/tt_lib_bindings_tensor.cpp b/ttnn/cpp/ttnn/experimental/tt_lib/csrc/tt_lib_bindings_tensor.cpp index 69f93ba78ae..cbbc605375c 100644 --- a/ttnn/cpp/ttnn/experimental/tt_lib/csrc/tt_lib_bindings_tensor.cpp +++ b/ttnn/cpp/ttnn/experimental/tt_lib/csrc/tt_lib_bindings_tensor.cpp @@ -14,8 +14,6 @@ #include "ttnn/experimental/tt_dnn/op_library/fully_connected/fully_connected_op.hpp" #include "ttnn/experimental/tt_dnn/op_library/layernorm_distributed/layernorm_pre_allgather_op.hpp" #include "ttnn/experimental/tt_dnn/op_library/layernorm_distributed/layernorm_post_allgather_op.hpp" -#include "ttnn/experimental/tt_dnn/op_library/pool/average_pool.hpp" -#include "ttnn/experimental/tt_dnn/op_library/pool/max_pool.hpp" #include "ttnn/experimental/tt_dnn/op_library/reduce/reduce_op.hpp" #include "ttnn/experimental/tt_dnn/op_library/fast_reduce_nc/fast_reduce_nc_op.hpp" #include "ttnn/experimental/tt_dnn/op_library/rotary_embedding/rotary_embedding_op.hpp" @@ -524,15 +522,16 @@ void TensorModule(py::module& m_tensor) { "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" )doc"); - // Pools + // Upsample m_tensor.def( - "average_pool_2d", - &average_pool_2d, - py::arg().noconvert(), + "upsample", + &upsample, + py::arg("input").noconvert(), + py::arg("scale_factor_h").noconvert(), + py::arg("scale_factor_w").noconvert(), py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - py::arg("output_dtype").noconvert() = std::nullopt, R"doc( - Average Pool 2D + UpSample 2D It operates on tensors whose that have channels as the last dimension +----------+----------------------------+------------+-------------------------------+----------+ @@ -542,85 +541,6 @@ void TensorModule(py::module& m_tensor) { +----------+----------------------------+------------+-------------------------------+----------+ )doc"); - m_tensor.def( - "max_pool2d", - &max_pool2d, - py::arg("input").noconvert(), - py::arg("in_n").noconvert(), - py::arg("in_h").noconvert(), - py::arg("in_w").noconvert(), - py::arg("kernel_h").noconvert(), - py::arg("kernel_w").noconvert(), - py::arg("stride_h") = 1, - py::arg("stride_w") = 1, - py::arg("pad_h") = 0, - py::arg("pad_w") = 0, - py::arg("dilation_h") = 1, - py::arg("dilation_w") = 1, - py::arg("output_mem_config") = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - py::arg("nblocks") = 1, - py::arg("use_multicore") = true, - R"doc( - Max Pool 2D - +-------------------+-------------------------------+---------------+-------------+----------+ - | Argument | Description | Data type | Valid range | Required | - +===================+===============================+===============+=============+==========+ - | input | Input activations tensor | Tensor | | Yes | - | in_n | Input nbatch | Tensor | | Yes | - | in_h | Input height | Tensor | | Yes | - | in_w | Input width | Tensor | | Yes | - | kernel_h | kernel window height | uint32_t | | Yes | - | kernel_w | kernel window width | uint32_t | | Yes | - | stride_h | stride in height dim | uint32_t | | No | - | stride_w | stride in width dim | uint32_t | | No | - | pad_h | padding in height dim | uint32_t | | No | - | pad_w | padding in width dim | uint32_t | | No | - | dilation_h | kernel dilation in height dim | uint32_t | | No | - | dilation_w | kernel dilation in width dim | uint32_t | | No | - | output_mem_config | output tensor memory config | MemoryConfig | | No | - +-------------------+-------------------------------+---------------+-------------+----------+ - )doc"); - - m_tensor.def( - "max_pool2d_v2", - &max_pool2d_v2, - py::arg("input").noconvert(), - py::arg("reader_indices").noconvert(), - py::arg("in_n").noconvert(), - py::arg("in_h").noconvert(), - py::arg("in_w").noconvert(), - py::arg("kernel_h").noconvert(), - py::arg("kernel_w").noconvert(), - py::arg("stride_h") = 1, - py::arg("stride_w") = 1, - py::arg("pad_h") = 0, - py::arg("pad_w") = 0, - py::arg("dilation_h") = 1, - py::arg("dilation_w") = 1, - py::arg("output_mem_config") = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - py::arg("nblocks") = 1, - py::arg("use_multicore") = true, - R"doc( - Max Pool 2D - +-------------------+-------------------------------+---------------+-------------+----------+ - | Argument | Description | Data type | Valid range | Required | - +===================+===============================+===============+=============+==========+ - | input | Input activations tensor | Tensor | | Yes | - | in_n | Input nbatch | Tensor | | Yes | - | in_h | Input height | Tensor | | Yes | - | in_w | Input width | Tensor | | Yes | - | kernel_h | kernel window height | uint32_t | | Yes | - | kernel_w | kernel window width | uint32_t | | Yes | - | stride_h | stride in height dim | uint32_t | | No | - | stride_w | stride in width dim | uint32_t | | No | - | pad_h | padding in height dim | uint32_t | | No | - | pad_w | padding in width dim | uint32_t | | No | - | dilation_h | kernel dilation in height dim | uint32_t | | No | - | dilation_w | kernel dilation in width dim | uint32_t | | No | - | output_mem_config | output tensor memory config | MemoryConfig | | No | - +-------------------+-------------------------------+---------------+-------------+----------+ - )doc"); - // TMs m_tensor.def( "split_last_dim_two_chunks_tiled", diff --git a/ttnn/cpp/ttnn/operations/maxpool2d.hpp b/ttnn/cpp/ttnn/operations/maxpool2d.hpp index ee15eacbdc4..db7472942e2 100644 --- a/ttnn/cpp/ttnn/operations/maxpool2d.hpp +++ b/ttnn/cpp/ttnn/operations/maxpool2d.hpp @@ -9,7 +9,7 @@ #include "ttnn/operations/core.hpp" #include "tt_metal/common/math.hpp" #include "ttnn/operations/conv2d/conv2d.hpp" -#include "ttnn/experimental/tt_dnn/op_library/pool/max_pool.hpp" +#include "ttnn/cpp/ttnn/operations/pool/max_pool.hpp" #include "ttnn/experimental/tt_dnn/op_library/sliding_window_op_infra/halo_op.hpp" #include "ttnn/experimental/tt_dnn/op_library/sliding_window_op_infra/sliding_window.hpp" diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/average_pool.cpp b/ttnn/cpp/ttnn/operations/pool/average_pool.cpp similarity index 85% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/average_pool.cpp rename to ttnn/cpp/ttnn/operations/pool/average_pool.cpp index f2853708507..1e9b430144e 100644 --- a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/average_pool.cpp +++ b/ttnn/cpp/ttnn/operations/pool/average_pool.cpp @@ -2,8 +2,13 @@ // // SPDX-License-Identifier: Apache-2.0 +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/average_pool.cpp #include "ttnn/experimental/tt_dnn/op_library/pool/average_pool.hpp" #include "ttnn/experimental/tt_dnn/op_library/reduce/reduce_op.hpp" +======== +#include "ttnn/operations/pool/average_pool.hpp" +#include "tt_dnn/op_library/reduce/reduce_op.hpp" +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/average_pool.cpp namespace tt { diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/average_pool.hpp b/ttnn/cpp/ttnn/operations/pool/average_pool.hpp similarity index 100% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/average_pool.hpp rename to ttnn/cpp/ttnn/operations/pool/average_pool.hpp diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/compute/max_pool.cpp b/ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool.cpp similarity index 100% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/compute/max_pool.cpp rename to ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool.cpp diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/compute/max_pool_multi_core.cpp b/ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool_multi_core.cpp similarity index 100% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/compute/max_pool_multi_core.cpp rename to ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool_multi_core.cpp diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core.cpp b/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core.cpp similarity index 100% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core.cpp rename to ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core.cpp diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp b/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp similarity index 100% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp rename to ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp b/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp similarity index 100% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp rename to ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp b/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp similarity index 100% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp rename to ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_single_core.cpp b/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_single_core.cpp similarity index 100% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_single_core.cpp rename to ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_single_core.cpp diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_multi_core.cpp b/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core.cpp similarity index 100% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_multi_core.cpp rename to ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core.cpp diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp b/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp similarity index 100% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp rename to ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_single_core.cpp b/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_single_core.cpp similarity index 100% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_single_core.cpp rename to ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_single_core.cpp diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool.cpp b/ttnn/cpp/ttnn/operations/pool/max_pool.cpp similarity index 98% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool.cpp rename to ttnn/cpp/ttnn/operations/pool/max_pool.cpp index 6d2d5814a57..399e3dfaf31 100644 --- a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool.cpp +++ b/ttnn/cpp/ttnn/operations/pool/max_pool.cpp @@ -2,7 +2,11 @@ // // SPDX-License-Identifier: Apache-2.0 +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool.cpp #include "ttnn/experimental/tt_dnn/op_library/pool/max_pool.hpp" +======== +#include "ttnn/operations/pool/max_pool.hpp" +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool.cpp #include #include diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool.hpp b/ttnn/cpp/ttnn/operations/pool/max_pool.hpp similarity index 100% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool.hpp rename to ttnn/cpp/ttnn/operations/pool/max_pool.hpp diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp b/ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp similarity index 95% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp rename to ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp index 7fd6ca2b3d6..67a773be92c 100644 --- a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp +++ b/ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp @@ -8,12 +8,21 @@ #include "detail/util.hpp" #include "tensor/host_buffer/functions.hpp" #include "tensor/tensor_utils.hpp" +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp #include "ttnn/experimental/tt_dnn/op_library/pool/max_pool.hpp" #include "ttnn/experimental/tt_dnn/op_library/reduce/reduce_op.hpp" // for reduce_op_utils #include "ttnn/experimental/tt_dnn/op_library/sharding_utilities.hpp" #include "ttnn/experimental/tt_dnn/op_library/sliding_window_op_infra/sliding_window.hpp" #include "ttnn/experimental/tt_dnn/op_library/sliding_window_op_infra/utils.hpp" #include "ttnn/experimental/tt_dnn/op_library/work_split.hpp" +======== +#include "ttnn/operations/pool/max_pool.hpp" +#include "tt_dnn/op_library/reduce/reduce_op.hpp" // for reduce_op_utils +#include "tt_dnn/op_library/sharding_utilities.hpp" +#include "tt_dnn/op_library/sliding_window_op_infra/sliding_window.hpp" +#include "tt_dnn/op_library/sliding_window_op_infra/utils.hpp" +#include "tt_dnn/op_library/work_split.hpp" +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp #include "tt_metal/host_api.hpp" namespace tt { @@ -492,10 +501,17 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic( if (input.memory_config().is_sharded()) { // sharded, without halo reader_kernel_fname = +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp std::string("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp"); } else { reader_kernel_fname = std::string("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core.cpp"); +======== + std::string("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp"); + } else { + reader_kernel_fname = + std::string("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core.cpp"); +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp } auto reader_kernel = CreateKernel(program, reader_kernel_fname, all_cores, reader_config); @@ -509,7 +525,11 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic( std::vector writer_ct_args = reader_ct_args; auto writer_config = WriterDataMovementConfig(writer_ct_args, writer_defines); std::string writer_kernel_fname( +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp "ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_multi_core.cpp"); +======== + "ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core.cpp"); +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp auto writer_kernel = CreateKernel(program, writer_kernel_fname, all_cores, writer_config); /** @@ -542,7 +562,11 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic( .math_approx_mode = false, .compile_args = compute_ct_args, .defines = reduce_op_utils::get_defines(reduce_op, reduce_dim)}; +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp std::string compute_kernel_fname("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/compute/max_pool_multi_core.cpp"); +======== + std::string compute_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool_multi_core.cpp"); +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp auto compute_kernel = CreateKernel(program, compute_kernel_fname, core_range, compute_config); if (out_nhw_per_core_cliff > 0) { @@ -942,7 +966,11 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo_v2_impl bf16_one_u32}; std::string reader_kernel_fname( +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp "ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp"); +======== + "ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp"); +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp auto reader0_config = DataMovementConfig{ .processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default, .compile_args = reader0_ct_args}; @@ -973,7 +1001,11 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo_v2_impl .compile_args = writer_ct_args, .defines = writer_defines}; std::string +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp writer_kernel_fname("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp"); auto +======== + writer_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp"); auto +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp writer_kernel = CreateKernel(program, writer_kernel_fname, all_cores, writer_config); */ @@ -1007,7 +1039,11 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo_v2_impl .math_approx_mode = false, .compile_args = compute_ct_args, .defines = reduce_op_utils::get_defines(reduce_op, reduce_dim)}; +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp std::string compute_kernel_fname("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/compute/max_pool_multi_core.cpp"); +======== + std::string compute_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool_multi_core.cpp"); +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp auto compute_kernel = CreateKernel(program, compute_kernel_fname, core_range, compute_config); /* diff --git a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_single_core.cpp b/ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp similarity index 91% rename from ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_single_core.cpp rename to ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp index c5de5ed12d6..18e98a214ae 100644 --- a/ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_single_core.cpp +++ b/ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp @@ -8,9 +8,15 @@ #include "detail/util.hpp" #include "tensor/host_buffer/functions.hpp" #include "tensor/tensor_utils.hpp" +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_single_core.cpp #include "ttnn/experimental/tt_dnn/op_library/pool/max_pool.hpp" #include "ttnn/experimental/tt_dnn/op_library/reduce/reduce_op.hpp" // for reduce_op_utils #include "ttnn/experimental/tt_dnn/op_library/work_split.hpp" +======== +#include "ttnn/operations/pool/max_pool.hpp" +#include "tt_dnn/op_library/reduce/reduce_op.hpp" // for reduce_op_utils +#include "tt_dnn/op_library/work_split.hpp" +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp #include "tt_metal/host_api.hpp" namespace tt { @@ -151,7 +157,11 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten (in_cb_page_nelems_padded * out_nelems * 2) >> 5 // TODO: generalize num rows to fill in in_cb }; auto reader_config = ReaderDataMovementConfig(reader_ct_args); +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_single_core.cpp std::string reader_kernel_fname("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_single_core.cpp"); +======== + std::string reader_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_single_core.cpp"); +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp auto reader_kernel = CreateKernel(program, reader_kernel_fname, cores, @@ -200,7 +210,11 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten std::vector writer_ct_args = reader_ct_args; std::vector writer_rt_args = reader_rt_args; auto writer_config = WriterDataMovementConfig(writer_ct_args); +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_single_core.cpp std::string writer_kernel_fname("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_single_core.cpp"); +======== + std::string writer_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_single_core.cpp"); +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp auto writer_kernel = CreateKernel(program, writer_kernel_fname, cores, @@ -228,7 +242,11 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten nbatch, out_h}, // out_h_per_core .defines = reduce_op_utils::get_defines(reduce_op, reduce_dim)}; +<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_single_core.cpp std::string compute_kernel_fname("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/compute/max_pool.cpp"); +======== + std::string compute_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool.cpp"); +>>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp auto compute_kernel = CreateKernel(program, compute_kernel_fname, cores, diff --git a/ttnn/cpp/ttnn/operations/pool.hpp b/ttnn/cpp/ttnn/operations/pool/pool.hpp similarity index 92% rename from ttnn/cpp/ttnn/operations/pool.hpp rename to ttnn/cpp/ttnn/operations/pool/pool.hpp index 4631c645ed5..f416785be24 100644 --- a/ttnn/cpp/ttnn/operations/pool.hpp +++ b/ttnn/cpp/ttnn/operations/pool/pool.hpp @@ -4,7 +4,7 @@ #pragma once -#include "ttnn/experimental/tt_dnn/op_library/pool/average_pool.hpp" +#include "ttnn/cpp/ttnn/operations/pool/average_pool.hpp" #include "ttnn/decorators.hpp" #include "ttnn/operations/core.hpp" diff --git a/ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp b/ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp new file mode 100644 index 00000000000..cda8b5f4db9 --- /dev/null +++ b/ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp @@ -0,0 +1,166 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include + +#include "ttnn/cpp/pybind11/decorators.hpp" +#include "ttnn/operations/pool/pool.hpp" +#include "ttnn/types.hpp" + +namespace py = pybind11; + +namespace ttnn { +namespace operations { +namespace pool { + +namespace detail { + +void bind_global_avg_pool2d(py::module& module) { + auto doc = fmt::format( + R"doc({0}(input_tensor: ttnn.Tensor, *, memory_config: Optional[ttnn.MemoryConfig] = None, dtype: Optional[ttnn.DataType] = None) -> ttnn.Tensor + + Applies {0} to :attr:`input_tensor` by performing a 2D adaptive average pooling over an input signal composed of several input planes. This operation computes the average of all elements in each channel across the entire spatial dimensions. + + .. math:: + {0}(\\mathrm{{input\\_tensor}}_i) + + Args: + * :attr:`input_tensor` (ttnn.Tensor): The input tensor to be pooled. Typically of shape (batch_size, channels, height, width). + + Keyword Args: + * :attr:`memory_config` (Optional[ttnn.MemoryConfig]): Memory configuration for the operation. + * :attr:`dtype` (Optional[ttnn.DataType]): data type for the output tensor + + Returns: + ttnn.Tensor: The tensor with the averaged values. The output tensor shape is (batch_size, channels, 1, 1). + + Example: + + >>> tensor = ttnn.from_torch(torch.randn((10, 3, 32, 32), dtype=ttnn.bfloat16), device=device) + >>> output = {1}(tensor) + )doc", + ttnn::global_avg_pool2d.base_name(), + ttnn::global_avg_pool2d.python_fully_qualified_name()); + + bind_registered_operation( + module, + ttnn::global_avg_pool2d, + doc, + ttnn::pybind_arguments_t{ + py::arg("input_tensor"), + py::kw_only(), + py::arg("memory_config") = std::nullopt, + py::arg("dtype") = std::nullopt}); +} + +} // namespace detail + +void py_module(py::module& module) { + detail::bind_global_avg_pool2d(module); + module.def( + "max_pool2d", + &max_pool2d, + py::arg("input").noconvert(), + py::arg("in_n").noconvert(), + py::arg("in_h").noconvert(), + py::arg("in_w").noconvert(), + py::arg("kernel_h").noconvert(), + py::arg("kernel_w").noconvert(), + py::arg("stride_h") = 1, + py::arg("stride_w") = 1, + py::arg("pad_h") = 0, + py::arg("pad_w") = 0, + py::arg("dilation_h") = 1, + py::arg("dilation_w") = 1, + py::kw_only(), + py::arg("memory_config") = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, + py::arg("nblocks") = 1, + py::arg("use_multicore") = true, + R"doc( + Max Pool 2D + +-------------------+-------------------------------+---------------+-------------+----------+ + | Argument | Description | Data type | Valid range | Required | + +===================+===============================+===============+=============+==========+ + | input | Input activations tensor | Tensor | | Yes | + | in_n | Input nbatch | Tensor | | Yes | + | in_h | Input height | Tensor | | Yes | + | in_w | Input width | Tensor | | Yes | + | kernel_h | kernel window height | uint32_t | | Yes | + | kernel_w | kernel window width | uint32_t | | Yes | + | stride_h | stride in height dim | uint32_t | | No | + | stride_w | stride in width dim | uint32_t | | No | + | pad_h | padding in height dim | uint32_t | | No | + | pad_w | padding in width dim | uint32_t | | No | + | dilation_h | kernel dilation in height dim | uint32_t | | No | + | dilation_w | kernel dilation in width dim | uint32_t | | No | + | memory_config | Output memory config | MemoryConfig | | No | + +-------------------+-------------------------------+---------------+-------------+----------+ + )doc"); + + module.def( + "max_pool2d_v2", + &max_pool2d_v2, + py::arg("input").noconvert(), + py::arg("reader_indices").noconvert(), + py::arg("in_n").noconvert(), + py::arg("in_h").noconvert(), + py::arg("in_w").noconvert(), + py::arg("kernel_h").noconvert(), + py::arg("kernel_w").noconvert(), + py::arg("stride_h") = 1, + py::arg("stride_w") = 1, + py::arg("pad_h") = 0, + py::arg("pad_w") = 0, + py::arg("dilation_h") = 1, + py::arg("dilation_w") = 1, + py::kw_only(), + py::arg("memory_config") = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, + py::arg("nblocks") = 1, + py::arg("use_multicore") = true, + R"doc( + Max Pool 2D + +-------------------+-------------------------------+---------------+-------------+----------+ + | Argument | Description | Data type | Valid range | Required | + +===================+===============================+===============+=============+==========+ + | input | Input activations tensor | Tensor | | Yes | + | in_n | Input nbatch | Tensor | | Yes | + | in_h | Input height | Tensor | | Yes | + | in_w | Input width | Tensor | | Yes | + | kernel_h | kernel window height | uint32_t | | Yes | + | kernel_w | kernel window width | uint32_t | | Yes | + | stride_h | stride in height dim | uint32_t | | No | + | stride_w | stride in width dim | uint32_t | | No | + | pad_h | padding in height dim | uint32_t | | No | + | pad_w | padding in width dim | uint32_t | | No | + | dilation_h | kernel dilation in height dim | uint32_t | | No | + | dilation_w | kernel dilation in width dim | uint32_t | | No | + | memory_config | output tensor memory config | MemoryConfig | | No | + +-------------------+-------------------------------+---------------+-------------+----------+ + )doc"); + + module.def( + "average_pool_2d", + &average_pool_2d, + py::arg().noconvert(), + py::kw_only(), + py::arg("memory_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, + py::arg("dtype").noconvert() = std::nullopt, + R"doc( + Average Pool 2D + It operates on tensors whose that have channels as the last dimension + + +----------+----------------------------+------------+-------------------------------+----------+ + | Argument | Description | Data type | Valid range | Required | + +==========+============================+============+===============================+==========+ + | act | Input activations tensor | Tensor | | Yes | + +----------+----------------------------+------------+-------------------------------+----------+ + )doc"); +} + +} // namespace pool +} // namespace operations +} // namespace ttnn diff --git a/ttnn/cpp/ttnn/operations/upsample/upsample_op.cpp b/ttnn/cpp/ttnn/operations/upsample/upsample_op.cpp index 5b5d2d823d0..d30a5d97029 100644 --- a/ttnn/cpp/ttnn/operations/upsample/upsample_op.cpp +++ b/ttnn/cpp/ttnn/operations/upsample/upsample_op.cpp @@ -10,7 +10,7 @@ #include "detail/util.hpp" #include "tensor/host_buffer/functions.hpp" #include "tensor/tensor_utils.hpp" -#include "ttnn/experimental/tt_dnn/op_library/pool/max_pool.hpp" +#include "ttnn/cpp/ttnn/operations/pool/max_pool.hpp" #include "ttnn/experimental/tt_dnn/op_library/reduce/reduce_op.hpp" // for reduce_op_utils #include "ttnn/experimental/tt_dnn/op_library/work_split.hpp" #include "tt_metal/host_api.hpp" diff --git a/ttnn/tt_lib/fused_ops/average_pool.py b/ttnn/tt_lib/fused_ops/average_pool.py index 22ac1781c22..18bcbc5018a 100644 --- a/ttnn/tt_lib/fused_ops/average_pool.py +++ b/ttnn/tt_lib/fused_ops/average_pool.py @@ -5,11 +5,12 @@ import tt_lib as ttl from typing import Union, List +import ttnn def run_avg_pool_on_device_wrapper(device): def average_pool_2d(x, output_mem_config, output_dtype=None): - out = ttl.tensor.average_pool_2d(x, output_mem_config, output_dtype) + out = ttnn.average_pool_2d(x, memory_config=output_mem_config, dtype=output_dtype) return out return average_pool_2d diff --git a/ttnn/tt_lib/fused_ops/max_pool.py b/ttnn/tt_lib/fused_ops/max_pool.py index b8b0b8240d6..466db7f0641 100644 --- a/ttnn/tt_lib/fused_ops/max_pool.py +++ b/ttnn/tt_lib/fused_ops/max_pool.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import tt_lib as ttl - +import ttnn from typing import Union, List @@ -33,8 +33,8 @@ def max_pool_2d(x): # out_shape_nopad = compute_max_pool_shape(kernel_size, stride, padding, x_shape_nopad) # if reshape_2d and channels_last: # x = x.reshape(x_shape_nopad[0], 1, x_shape_nopad[1] * x_shape_nopad[2], x_shape_nopad[3]) - # out = ttl.tensor.max_pool2d(x, x_shape_nopad[1], x_shape_nopad[2], kernel_size, kernel_size, stride, stride, padding, padding, output_mem_config=output_mem_config, nblocks=nblocks, use_multicore=True) - out = ttl.tensor.max_pool2d( + # out = ttnn.max_pool2d(x, x_shape_nopad[1], x_shape_nopad[2], kernel_size, kernel_size, stride, stride, padding, padding, output_mem_config=output_mem_config, nblocks=nblocks, use_multicore=True) + out = ttnn.max_pool2d( x, in_n, in_h, @@ -45,7 +45,7 @@ def max_pool_2d(x): stride, padding, padding, - output_mem_config=output_mem_config, + memory_config=output_mem_config, nblocks=nblocks, use_multicore=True, ) diff --git a/ttnn/ttnn/__init__.py b/ttnn/ttnn/__init__.py index ae8983df9e4..8b309a53752 100644 --- a/ttnn/ttnn/__init__.py +++ b/ttnn/ttnn/__init__.py @@ -322,4 +322,4 @@ def prelu(*args, **kwargs): # Alias for leaky_relu. TODO(#8544): implement PReL determine_expected_group_norm_sharded_config_and_grid_size, ) from ttnn.operations.conv2d import Conv2d, Conv2dConfig, get_conv_output_dim, get_conv_padded_input_shape_and_mem_config -from ttnn.operations.pool import MaxPool2d +from ttnn.operations.pool import MaxPool2d, global_avg_pool2d, max_pool2d, average_pool_2d diff --git a/ttnn/ttnn/operations/conv/tt_py_max_pool.py b/ttnn/ttnn/operations/conv/tt_py_max_pool.py index 03f8f878240..78b9eb17d0b 100644 --- a/ttnn/ttnn/operations/conv/tt_py_max_pool.py +++ b/ttnn/ttnn/operations/conv/tt_py_max_pool.py @@ -30,6 +30,7 @@ import math import torch +from ttnn._ttnn.operations import pool class TTPyMaxPool(TTPyOp): @@ -221,7 +222,7 @@ def max_pool_(activation): if self.deallocate_activation: activation.deallocate() - output = ttl.tensor.max_pool2d_v2( + output = pool.max_pool2d_v2( haloed_act, reader_indices, in_n, @@ -233,7 +234,7 @@ def max_pool_(activation): stride_w, pad_h, pad_w, - output_mem_config=self.output_sharded_memory_config, + memory_config=self.output_sharded_memory_config, ) haloed_act.deallocate() return output diff --git a/ttnn/ttnn/operations/pool.py b/ttnn/ttnn/operations/pool.py index 40d678634ce..55d8dc3b319 100644 --- a/ttnn/ttnn/operations/pool.py +++ b/ttnn/ttnn/operations/pool.py @@ -10,6 +10,7 @@ from ttnn.operations.conv.tt_py_max_pool import ( TTPyMaxPool, SlidingWindowOpParams, + SlidingWindowOpParamsWithParallelConfig, ) import tt_lib as ttl @@ -128,4 +129,13 @@ def _golden_function(input_tensor: ttnn.Tensor): ttnn.attach_golden_function(ttnn.global_avg_pool2d, golden_function=_golden_function) +max_pool2d = ttnn.register_operation( + name="ttnn.max_pool2d", is_method=True, validate_input_tensors=lambda *args, **kwargs: None +)(ttnn._ttnn.operations.pool.max_pool2d) + + +average_pool_2d = ttnn.register_operation( + name="ttnn.average_pool_2d", is_method=True, validate_input_tensors=lambda *args, **kwargs: None +)(ttnn._ttnn.operations.pool.average_pool_2d) + __all__ = [] From 96e64ee5cc87c8542e85878115f2af7768b0dbbe Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Sat, 20 Jul 2024 14:17:25 +0000 Subject: [PATCH 02/14] #9758: Moved maxpool.hpp --- tests/tt_eager/ops/test_average_pool.cpp | 2 +- ttnn/CMakeLists.txt | 4 + ttnn/cpp/pybind11/operations/__init__.hpp | 15 +- ttnn/cpp/pybind11/operations/maxpool2d.hpp | 47 ------- .../cpp/ttnn/operations/pool/average_pool.hpp | 22 --- .../avg_pool.cpp} | 15 +- .../pool/{pool.hpp => avgpool/avg_pool.hpp} | 20 ++- .../pool/avgpool/avg_pool_pybind.hpp | 85 +++++++++++ .../device/kernels/compute/max_pool.cpp | 0 .../kernels/compute/max_pool_multi_core.cpp | 0 .../reader_max_pool_2d_multi_core.cpp | 0 .../reader_max_pool_2d_multi_core_sharded.cpp | 0 ...x_pool_2d_multi_core_sharded_with_halo.cpp | 0 ...ool_2d_multi_core_sharded_with_halo_v2.cpp | 0 .../reader_max_pool_2d_single_core.cpp | 0 .../writer_max_pool_2d_multi_core.cpp | 0 .../writer_max_pool_2d_multi_core_v2.cpp | 0 .../writer_max_pool_2d_single_core.cpp | 0 .../device}/max_pool_multi_core.cpp | 36 ----- .../device}/max_pool_single_core.cpp | 18 --- .../pool/{ => maxpool}/max_pool.cpp | 6 +- .../pool/{ => maxpool}/max_pool.hpp | 54 +++++++ .../pool/maxpool/maxpool_pybind.hpp | 132 ++++++++++++++++++ ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp | 30 +++- .../ttnn/operations/upsample/upsample_op.cpp | 2 +- ttnn/ttnn/operations/conv/tt_py_max_pool.py | 2 +- ttnn/ttnn/operations/pool.py | 13 +- 27 files changed, 349 insertions(+), 154 deletions(-) delete mode 100644 ttnn/cpp/pybind11/operations/maxpool2d.hpp delete mode 100644 ttnn/cpp/ttnn/operations/pool/average_pool.hpp rename ttnn/cpp/ttnn/operations/pool/{average_pool.cpp => avgpool/avg_pool.cpp} (66%) rename ttnn/cpp/ttnn/operations/pool/{pool.hpp => avgpool/avg_pool.hpp} (65%) create mode 100644 ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool_pybind.hpp rename ttnn/cpp/ttnn/operations/pool/{ => maxpool}/device/kernels/compute/max_pool.cpp (100%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool}/device/kernels/compute/max_pool_multi_core.cpp (100%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool}/device/kernels/dataflow/reader_max_pool_2d_multi_core.cpp (100%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool}/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp (100%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool}/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp (100%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool}/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp (100%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool}/device/kernels/dataflow/reader_max_pool_2d_single_core.cpp (100%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool}/device/kernels/dataflow/writer_max_pool_2d_multi_core.cpp (100%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool}/device/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp (100%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool}/device/kernels/dataflow/writer_max_pool_2d_single_core.cpp (100%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool/device}/max_pool_multi_core.cpp (94%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool/device}/max_pool_single_core.cpp (91%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool}/max_pool.cpp (98%) rename ttnn/cpp/ttnn/operations/pool/{ => maxpool}/max_pool.hpp (77%) create mode 100644 ttnn/cpp/ttnn/operations/pool/maxpool/maxpool_pybind.hpp diff --git a/tests/tt_eager/ops/test_average_pool.cpp b/tests/tt_eager/ops/test_average_pool.cpp index 5dab6ec8f2e..bd0435a7d91 100644 --- a/tests/tt_eager/ops/test_average_pool.cpp +++ b/tests/tt_eager/ops/test_average_pool.cpp @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "ttnn/cpp/ttnn/operations/pool/average_pool.hpp" +#include "ttnn/cpp/ttnn/operations/pool/avgpool/average_pool.hpp" #include "ttnn/experimental/tt_dnn/op_library/auto_format.hpp" #include "tt_numpy/functions.hpp" diff --git a/ttnn/CMakeLists.txt b/ttnn/CMakeLists.txt index 511e9286eab..978abd1f1fd 100644 --- a/ttnn/CMakeLists.txt +++ b/ttnn/CMakeLists.txt @@ -62,6 +62,10 @@ set(TTNN_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/max_pool_multi_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/max_pool_single_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/max_pool.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/avgpool/average_pool.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/max_pool.cpp ) ### Setup TTNN as a shared library with optional Python bindings diff --git a/ttnn/cpp/pybind11/operations/__init__.hpp b/ttnn/cpp/pybind11/operations/__init__.hpp index 8a4765599cf..5801d509c70 100644 --- a/ttnn/cpp/pybind11/operations/__init__.hpp +++ b/ttnn/cpp/pybind11/operations/__init__.hpp @@ -14,10 +14,13 @@ #include "pybind11/operations/creation.hpp" #include "pybind11/operations/kv_cache.hpp" #include "pybind11/operations/maxpool2d.hpp" +#include "pybind11/operations/matmul.hpp" #include "pybind11/operations/normalization.hpp" #include "pybind11/operations/copy.hpp" #include "pybind11/operations/ternary.hpp" -#include "ttnn/operations/pool/pool_pybind.hpp" + +#include "ttnn/operations/pool/avgpool/avg_pool_pybind.hpp" +#include "ttnn/operations/pool/maxpool/maxpool_pybind.hpp" #include "ttnn/operations/eltwise/binary/binary_pybind.hpp" #include "ttnn/operations/eltwise/binary_backward/binary_backward_pybind.hpp" #include "ttnn/operations/conv2d/conv2d_pybind.hpp" @@ -93,8 +96,11 @@ void py_module(py::module& module) { auto m_conv2d = module.def_submodule("conv2d", "conv2d operation"); conv2d::py_module(m_conv2d); - auto m_maxpool2d = module.def_submodule("maxpool2d", "maxpool 2d operation"); - maxpool2d::py_module(m_maxpool2d); + auto m_maxpool = module.def_submodule("maxpool", "maxpool operation"); + maxpool::py_module(m_maxpool); + + auto m_avgpool = module.def_submodule("avgpool", "avgpool operation"); + avgpool::py_module(m_avgpool); auto m_normalization = module.def_submodule("normalization", "normalization operations"); normalization::py_module(m_normalization); @@ -108,9 +114,6 @@ void py_module(py::module& module) { auto m_kv_cache = module.def_submodule("kv_cache", "KV cache operations"); kv_cache::py_module(m_kv_cache); - auto m_pool = module.def_submodule("pool", "pool operations"); - pool::py_module(m_pool); - auto m_copy = module.def_submodule("copy", "copy operations"); copy::py_module(m_copy); diff --git a/ttnn/cpp/pybind11/operations/maxpool2d.hpp b/ttnn/cpp/pybind11/operations/maxpool2d.hpp deleted file mode 100644 index a215600d258..00000000000 --- a/ttnn/cpp/pybind11/operations/maxpool2d.hpp +++ /dev/null @@ -1,47 +0,0 @@ -// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include -#include - -#include "ttnn/operations/maxpool2d.hpp" -#include "ttnn/types.hpp" - -namespace py = pybind11; - -namespace ttnn::operations::maxpool2d { - -using array2_t = std::array; - -void py_module(py::module& module) { - module.def( - "maxpool2d", - [](const ttnn::Tensor& input_tensor, - uint32_t batch_size, - uint32_t input_height, - uint32_t input_width, - uint32_t channels, - array2_t kernel_size, - array2_t stride, - array2_t padding, - array2_t dilation, - Device& device) -> Tensor { - return maxpool2d(input_tensor, batch_size, input_height, input_width, channels, kernel_size, stride, padding, dilation, device); - }, - py::kw_only(), - py::arg("input_tensor"), - py::arg("batch_size"), - py::arg("input_height"), - py::arg("input_width"), - py::arg("channels"), - py::arg("kernel_size"), - py::arg("stride"), - py::arg("padding"), - py::arg("dilation"), - py::arg("device")); -} - -} // namespace ttnn::operations::maxpool diff --git a/ttnn/cpp/ttnn/operations/pool/average_pool.hpp b/ttnn/cpp/ttnn/operations/pool/average_pool.hpp deleted file mode 100644 index e6f2b63da50..00000000000 --- a/ttnn/cpp/ttnn/operations/pool/average_pool.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "tt_metal/host_api.hpp" -#include "tensor/tensor.hpp" - -#include "ttnn/experimental/tt_dnn/op_library/operation.hpp" - -namespace tt { -namespace tt_metal { - -enum class PoolType { - AVG -}; - -Tensor average_pool_2d(const Tensor& input, const MemoryConfig& output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, const std::optional& output_dtype = std::nullopt); - -} // namespace tt_metal -} // namespace tt diff --git a/ttnn/cpp/ttnn/operations/pool/average_pool.cpp b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.cpp similarity index 66% rename from ttnn/cpp/ttnn/operations/pool/average_pool.cpp rename to ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.cpp index 1e9b430144e..4eed04e03e7 100644 --- a/ttnn/cpp/ttnn/operations/pool/average_pool.cpp +++ b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.cpp @@ -2,33 +2,28 @@ // // SPDX-License-Identifier: Apache-2.0 -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/average_pool.cpp -#include "ttnn/experimental/tt_dnn/op_library/pool/average_pool.hpp" +#include "ttnn/operations/pool/avgpool/avg_pool.hpp" #include "ttnn/experimental/tt_dnn/op_library/reduce/reduce_op.hpp" -======== -#include "ttnn/operations/pool/average_pool.hpp" -#include "tt_dnn/op_library/reduce/reduce_op.hpp" ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/average_pool.cpp namespace tt { namespace tt_metal { template -Tensor pool_2d(const Tensor& input, const MemoryConfig& output_mem_config, const std::optional& output_dtype) { +Tensor pool_2d(const Tensor& input, const MemoryConfig& memory_config, const std::optional& output_dtype) { TT_ASSERT(input.storage_type() == StorageType::DEVICE, "Input tensor needs to be on device"); auto input_shape = input.get_legacy_shape(); switch (pool) { case PoolType::AVG: { auto height_without_padding = input.get_legacy_shape().without_padding()[-2]; - return reduce(input, ReduceOpMath::SUM, ReduceOpDim::H, 1 / float(height_without_padding), output_mem_config, output_dtype); + return reduce(input, ReduceOpMath::SUM, ReduceOpDim::H, 1 / float(height_without_padding), memory_config, output_dtype); } default: TT_ASSERT(false && "Undefined pool type"); } } -Tensor average_pool_2d(const Tensor& input, const MemoryConfig& output_mem_config, const std::optional& output_dtype) { +Tensor average_pool_2d(const Tensor& input, const MemoryConfig& memory_config, const std::optional& output_dtype) { TT_ASSERT(input.storage_type() == StorageType::DEVICE, "Input tensor needs to be on device"); auto output = input; @@ -39,7 +34,7 @@ Tensor average_pool_2d(const Tensor& input, const MemoryConfig& output_mem_confi auto output_shape = Shape({in_shape[0], 1, in_shape[1] * in_shape[2], in_shape[3]}, output_padding); output = output.reshape(output_shape); - output = pool_2d(output, output_mem_config, output_dtype); + output = pool_2d(output, memory_config, output_dtype); return output; } diff --git a/ttnn/cpp/ttnn/operations/pool/pool.hpp b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.hpp similarity index 65% rename from ttnn/cpp/ttnn/operations/pool/pool.hpp rename to ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.hpp index f416785be24..6486a3efe49 100644 --- a/ttnn/cpp/ttnn/operations/pool/pool.hpp +++ b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.hpp @@ -4,7 +4,25 @@ #pragma once -#include "ttnn/cpp/ttnn/operations/pool/average_pool.hpp" +#include "tt_metal/host_api.hpp" +#include "tensor/tensor.hpp" + +#include "tt_dnn/op_library/operation.hpp" + +namespace tt { +namespace tt_metal { + +enum class PoolType { + AVG +}; + +Tensor average_pool_2d(const Tensor& input, const MemoryConfig& memory_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, const std::optional& output_dtype = std::nullopt); + +} // namespace tt_metal +} // namespace tt + + +#include "ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.hpp" #include "ttnn/decorators.hpp" #include "ttnn/operations/core.hpp" diff --git a/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool_pybind.hpp b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool_pybind.hpp new file mode 100644 index 00000000000..1aebffdb599 --- /dev/null +++ b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool_pybind.hpp @@ -0,0 +1,85 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include + +#include "ttnn/cpp/pybind11/decorators.hpp" +#include "ttnn/operations/pool/avgpool/avg_pool.hpp" +#include "ttnn/types.hpp" + +namespace py = pybind11; +using array2_t = std::array; +namespace ttnn { +namespace operations { +namespace avgpool { + +namespace detail { + +void bind_global_avg_pool2d(py::module& module) { + auto doc = fmt::format( + R"doc({0}(input_tensor: ttnn.Tensor, *, memory_config: Optional[ttnn.MemoryConfig] = None, dtype: Optional[ttnn.DataType] = None) -> ttnn.Tensor + + Applies {0} to :attr:`input_tensor` by performing a 2D adaptive average pooling over an input signal composed of several input planes. This operation computes the average of all elements in each channel across the entire spatial dimensions. + + .. math:: + {0}(\\mathrm{{input\\_tensor}}_i) + + Args: + * :attr:`input_tensor` (ttnn.Tensor): The input tensor to be pooled. Typically of shape (batch_size, channels, height, width). + + Keyword Args: + * :attr:`memory_config` (Optional[ttnn.MemoryConfig]): Memory configuration for the operation. + * :attr:`dtype` (Optional[ttnn.DataType]): data type for the output tensor + + Returns: + ttnn.Tensor: The tensor with the averaged values. The output tensor shape is (batch_size, channels, 1, 1). + + Example: + + >>> tensor = ttnn.from_torch(torch.randn((10, 3, 32, 32), dtype=ttnn.bfloat16), device=device) + >>> output = {1}(tensor) + )doc", + ttnn::global_avg_pool2d.base_name(), + ttnn::global_avg_pool2d.python_fully_qualified_name()); + + bind_registered_operation( + module, + ttnn::global_avg_pool2d, + doc, + ttnn::pybind_arguments_t{ + py::arg("input_tensor"), + py::kw_only(), + py::arg("memory_config") = std::nullopt, + py::arg("dtype") = std::nullopt}); +} + +} // namespace detail + +void py_module(py::module& module) { + detail::bind_global_avg_pool2d(module); + module.def( + "average_pool_2d", + &average_pool_2d, + py::arg().noconvert(), + py::kw_only(), + py::arg("memory_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, + py::arg("dtype").noconvert() = std::nullopt, + R"doc( + Average Pool 2D + It operates on tensors whose that have channels as the last dimension + + +----------+----------------------------+------------+-------------------------------+----------+ + | Argument | Description | Data type | Valid range | Required | + +==========+============================+============+===============================+==========+ + | act | Input activations tensor | Tensor | | Yes | + +----------+----------------------------+------------+-------------------------------+----------+ + )doc"); +} + +} // namespace avgpool +} // namespace operations +} // namespace ttnn diff --git a/ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/compute/max_pool.cpp similarity index 100% rename from ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/compute/max_pool.cpp diff --git a/ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool_multi_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/compute/max_pool_multi_core.cpp similarity index 100% rename from ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool_multi_core.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/compute/max_pool_multi_core.cpp diff --git a/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_multi_core.cpp similarity index 100% rename from ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_multi_core.cpp diff --git a/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp similarity index 100% rename from ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp diff --git a/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp similarity index 100% rename from ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo.cpp diff --git a/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp similarity index 100% rename from ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp diff --git a/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_single_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_single_core.cpp similarity index 100% rename from ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_single_core.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_single_core.cpp diff --git a/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/writer_max_pool_2d_multi_core.cpp similarity index 100% rename from ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/writer_max_pool_2d_multi_core.cpp diff --git a/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp similarity index 100% rename from ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp diff --git a/ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_single_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/writer_max_pool_2d_single_core.cpp similarity index 100% rename from ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_single_core.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/writer_max_pool_2d_single_core.cpp diff --git a/ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp similarity index 94% rename from ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp index 67a773be92c..9e0448f42d2 100644 --- a/ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp @@ -8,21 +8,12 @@ #include "detail/util.hpp" #include "tensor/host_buffer/functions.hpp" #include "tensor/tensor_utils.hpp" -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp -#include "ttnn/experimental/tt_dnn/op_library/pool/max_pool.hpp" -#include "ttnn/experimental/tt_dnn/op_library/reduce/reduce_op.hpp" // for reduce_op_utils -#include "ttnn/experimental/tt_dnn/op_library/sharding_utilities.hpp" -#include "ttnn/experimental/tt_dnn/op_library/sliding_window_op_infra/sliding_window.hpp" -#include "ttnn/experimental/tt_dnn/op_library/sliding_window_op_infra/utils.hpp" -#include "ttnn/experimental/tt_dnn/op_library/work_split.hpp" -======== #include "ttnn/operations/pool/max_pool.hpp" #include "tt_dnn/op_library/reduce/reduce_op.hpp" // for reduce_op_utils #include "tt_dnn/op_library/sharding_utilities.hpp" #include "tt_dnn/op_library/sliding_window_op_infra/sliding_window.hpp" #include "tt_dnn/op_library/sliding_window_op_infra/utils.hpp" #include "tt_dnn/op_library/work_split.hpp" ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp #include "tt_metal/host_api.hpp" namespace tt { @@ -501,17 +492,10 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic( if (input.memory_config().is_sharded()) { // sharded, without halo reader_kernel_fname = -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp - std::string("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp"); - } else { - reader_kernel_fname = - std::string("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core.cpp"); -======== std::string("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp"); } else { reader_kernel_fname = std::string("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core.cpp"); ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp } auto reader_kernel = CreateKernel(program, reader_kernel_fname, all_cores, reader_config); @@ -525,11 +509,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic( std::vector writer_ct_args = reader_ct_args; auto writer_config = WriterDataMovementConfig(writer_ct_args, writer_defines); std::string writer_kernel_fname( -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp - "ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_multi_core.cpp"); -======== "ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core.cpp"); ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp auto writer_kernel = CreateKernel(program, writer_kernel_fname, all_cores, writer_config); /** @@ -562,11 +542,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic( .math_approx_mode = false, .compile_args = compute_ct_args, .defines = reduce_op_utils::get_defines(reduce_op, reduce_dim)}; -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp - std::string compute_kernel_fname("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/compute/max_pool_multi_core.cpp"); -======== std::string compute_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool_multi_core.cpp"); ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp auto compute_kernel = CreateKernel(program, compute_kernel_fname, core_range, compute_config); if (out_nhw_per_core_cliff > 0) { @@ -966,11 +942,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo_v2_impl bf16_one_u32}; std::string reader_kernel_fname( -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp - "ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp"); -======== "ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp"); ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp auto reader0_config = DataMovementConfig{ .processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default, .compile_args = reader0_ct_args}; @@ -1001,11 +973,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo_v2_impl .compile_args = writer_ct_args, .defines = writer_defines}; std::string -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp - writer_kernel_fname("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp"); auto -======== writer_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp"); auto ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp writer_kernel = CreateKernel(program, writer_kernel_fname, all_cores, writer_config); */ @@ -1039,11 +1007,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo_v2_impl .math_approx_mode = false, .compile_args = compute_ct_args, .defines = reduce_op_utils::get_defines(reduce_op, reduce_dim)}; -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_multi_core.cpp - std::string compute_kernel_fname("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/compute/max_pool_multi_core.cpp"); -======== std::string compute_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool_multi_core.cpp"); ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_multi_core.cpp auto compute_kernel = CreateKernel(program, compute_kernel_fname, core_range, compute_config); /* diff --git a/ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp similarity index 91% rename from ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp index 18e98a214ae..337290ca2d3 100644 --- a/ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp @@ -8,15 +8,9 @@ #include "detail/util.hpp" #include "tensor/host_buffer/functions.hpp" #include "tensor/tensor_utils.hpp" -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_single_core.cpp -#include "ttnn/experimental/tt_dnn/op_library/pool/max_pool.hpp" -#include "ttnn/experimental/tt_dnn/op_library/reduce/reduce_op.hpp" // for reduce_op_utils -#include "ttnn/experimental/tt_dnn/op_library/work_split.hpp" -======== #include "ttnn/operations/pool/max_pool.hpp" #include "tt_dnn/op_library/reduce/reduce_op.hpp" // for reduce_op_utils #include "tt_dnn/op_library/work_split.hpp" ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp #include "tt_metal/host_api.hpp" namespace tt { @@ -157,11 +151,7 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten (in_cb_page_nelems_padded * out_nelems * 2) >> 5 // TODO: generalize num rows to fill in in_cb }; auto reader_config = ReaderDataMovementConfig(reader_ct_args); -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_single_core.cpp - std::string reader_kernel_fname("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/reader_max_pool_2d_single_core.cpp"); -======== std::string reader_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_single_core.cpp"); ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp auto reader_kernel = CreateKernel(program, reader_kernel_fname, cores, @@ -210,11 +200,7 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten std::vector writer_ct_args = reader_ct_args; std::vector writer_rt_args = reader_rt_args; auto writer_config = WriterDataMovementConfig(writer_ct_args); -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_single_core.cpp - std::string writer_kernel_fname("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/dataflow/writer_max_pool_2d_single_core.cpp"); -======== std::string writer_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_single_core.cpp"); ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp auto writer_kernel = CreateKernel(program, writer_kernel_fname, cores, @@ -242,11 +228,7 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten nbatch, out_h}, // out_h_per_core .defines = reduce_op_utils::get_defines(reduce_op, reduce_dim)}; -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool_single_core.cpp - std::string compute_kernel_fname("ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/kernels/compute/max_pool.cpp"); -======== std::string compute_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool.cpp"); ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool_single_core.cpp auto compute_kernel = CreateKernel(program, compute_kernel_fname, cores, diff --git a/ttnn/cpp/ttnn/operations/pool/max_pool.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.cpp similarity index 98% rename from ttnn/cpp/ttnn/operations/pool/max_pool.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.cpp index 399e3dfaf31..8061b2007d6 100644 --- a/ttnn/cpp/ttnn/operations/pool/max_pool.cpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.cpp @@ -2,11 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 -<<<<<<<< HEAD:ttnn/cpp/ttnn/experimental/tt_dnn/op_library/pool/max_pool.cpp -#include "ttnn/experimental/tt_dnn/op_library/pool/max_pool.hpp" -======== -#include "ttnn/operations/pool/max_pool.hpp" ->>>>>>>> 56d42b955d (#9758: Moved cpp files to ttnn folder):ttnn/cpp/ttnn/operations/pool/max_pool.cpp +#include "ttnn/operations/pool/maxpool/max_pool.hpp" #include #include diff --git a/ttnn/cpp/ttnn/operations/pool/max_pool.hpp b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp similarity index 77% rename from ttnn/cpp/ttnn/operations/pool/max_pool.hpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp index e2a3610010c..2624b36fc5a 100644 --- a/ttnn/cpp/ttnn/operations/pool/max_pool.hpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp @@ -4,10 +4,14 @@ #pragma once +#include "ttnn/core.hpp" +#include "ttnn/types.hpp" #include "tensor/tensor.hpp" +#include "ttnn/cpp/ttnn/operations/conv2d.hpp" #include "ttnn/experimental/tt_dnn/op_library/run_operation.hpp" #include "ttnn/experimental/tt_dnn/op_library/sliding_window_op_infra/sliding_window.hpp" + inline uint32_t ceil_multiple_of(uint32_t n, uint32_t m) { return (uint32_t) std::ceil((float) n / m) * m; } @@ -173,3 +177,53 @@ Tensor maxpool2d_new(const Tensor &input, } // namespace tt_metal } // namespace tt + + +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + + + +namespace ttnn::operations { +namespace maxpool { + +using array2_t = std::array; + +// maxpool macro-op +inline Tensor maxpool2d(const Tensor& input_tensor, uint32_t batch_size, uint32_t input_h, uint32_t input_w, uint32_t channels, array2_t kernel_size, array2_t stride, array2_t padding, array2_t dilation, Device& device) { + MemoryConfig memory_config = input_tensor.memory_config(); + const auto shard_grid = memory_config.shard_spec.value().grid; + const auto shard_scheme = memory_config.memory_layout; + const auto shard_orientation = memory_config.shard_spec.value().orientation; + + TT_FATAL(shard_scheme == TensorMemoryLayout::HEIGHT_SHARDED, "Only height sharded tensors are supported."); + TT_FATAL(shard_orientation == ShardOrientation::ROW_MAJOR, "Only row major orientation is supported."); + + ParallelConfig parallel_config = conv2d::determine_parallel_config( + shard_scheme == TensorMemoryLayout::HEIGHT_SHARDED, + batch_size, + 0, // in_channels -- not used + input_h, + input_w, + 0, // out_channels -- not used + device, + shard_orientation); + uint32_t num_cores_nhw = conv2d::get_num_cores_nhw_from_parallel_config(parallel_config); + + SlidingWindowConfig sliding_window_config = SlidingWindowConfig(batch_size, + input_h, input_w, + kernel_size.at(0), kernel_size.at(1), + stride.at(0), stride.at(1), + padding.at(0), padding.at(1), + dilation.at(0), dilation.at(1), + num_cores_nhw, + parallel_config.grid); + uint32_t neg_inf_pad_val = 0xf7ff; // TODO: double check + + auto haloed_tensor = ttnn::operations::halo::halo_op(input_tensor, sliding_window_config, neg_inf_pad_val, false, parallel_config.shard_orientation == ShardOrientation::COL_MAJOR, 0, memory_config); + return tt::tt_metal::maxpool2d_new(haloed_tensor, sliding_window_config, channels, memory_config); +} + +} // namespace maxpool +} // namespace ttnn::operations diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/maxpool_pybind.hpp b/ttnn/cpp/ttnn/operations/pool/maxpool/maxpool_pybind.hpp new file mode 100644 index 00000000000..a65ab24f66c --- /dev/null +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/maxpool_pybind.hpp @@ -0,0 +1,132 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include + +#include "ttnn/cpp/pybind11/decorators.hpp" +#include "ttnn/operations/pool/maxpool/max_pool.hpp" + +#include "ttnn/types.hpp" + +namespace py = pybind11; +using array2_t = std::array; +namespace ttnn { +namespace operations { +namespace maxpool { + + +void py_module(py::module& module) { + module.def( + "max_pool2d", + &max_pool2d, + py::arg("input").noconvert(), + py::arg("in_n").noconvert(), + py::arg("in_h").noconvert(), + py::arg("in_w").noconvert(), + py::arg("kernel_h").noconvert(), + py::arg("kernel_w").noconvert(), + py::arg("stride_h") = 1, + py::arg("stride_w") = 1, + py::arg("pad_h") = 0, + py::arg("pad_w") = 0, + py::arg("dilation_h") = 1, + py::arg("dilation_w") = 1, + py::kw_only(), + py::arg("memory_config") = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, + py::arg("nblocks") = 1, + py::arg("use_multicore") = true, + R"doc( + Max Pool 2D + +-------------------+-------------------------------+---------------+-------------+----------+ + | Argument | Description | Data type | Valid range | Required | + +===================+===============================+===============+=============+==========+ + | input | Input activations tensor | Tensor | | Yes | + | in_n | Input nbatch | Tensor | | Yes | + | in_h | Input height | Tensor | | Yes | + | in_w | Input width | Tensor | | Yes | + | kernel_h | kernel window height | uint32_t | | Yes | + | kernel_w | kernel window width | uint32_t | | Yes | + | stride_h | stride in height dim | uint32_t | | No | + | stride_w | stride in width dim | uint32_t | | No | + | pad_h | padding in height dim | uint32_t | | No | + | pad_w | padding in width dim | uint32_t | | No | + | dilation_h | kernel dilation in height dim | uint32_t | | No | + | dilation_w | kernel dilation in width dim | uint32_t | | No | + | memory_config | Output memory config | MemoryConfig | | No | + +-------------------+-------------------------------+---------------+-------------+----------+ + )doc"); + + module.def( + "max_pool2d_v2", + &max_pool2d_v2, + py::arg("input").noconvert(), + py::arg("reader_indices").noconvert(), + py::arg("in_n").noconvert(), + py::arg("in_h").noconvert(), + py::arg("in_w").noconvert(), + py::arg("kernel_h").noconvert(), + py::arg("kernel_w").noconvert(), + py::arg("stride_h") = 1, + py::arg("stride_w") = 1, + py::arg("pad_h") = 0, + py::arg("pad_w") = 0, + py::arg("dilation_h") = 1, + py::arg("dilation_w") = 1, + py::kw_only(), + py::arg("memory_config") = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, + py::arg("nblocks") = 1, + py::arg("use_multicore") = true, + R"doc( + Max Pool 2D + +-------------------+-------------------------------+---------------+-------------+----------+ + | Argument | Description | Data type | Valid range | Required | + +===================+===============================+===============+=============+==========+ + | input | Input activations tensor | Tensor | | Yes | + | in_n | Input nbatch | Tensor | | Yes | + | in_h | Input height | Tensor | | Yes | + | in_w | Input width | Tensor | | Yes | + | kernel_h | kernel window height | uint32_t | | Yes | + | kernel_w | kernel window width | uint32_t | | Yes | + | stride_h | stride in height dim | uint32_t | | No | + | stride_w | stride in width dim | uint32_t | | No | + | pad_h | padding in height dim | uint32_t | | No | + | pad_w | padding in width dim | uint32_t | | No | + | dilation_h | kernel dilation in height dim | uint32_t | | No | + | dilation_w | kernel dilation in width dim | uint32_t | | No | + | memory_config | output tensor memory config | MemoryConfig | | No | + +-------------------+-------------------------------+---------------+-------------+----------+ + )doc"); + module.def( + "maxpool2d", + [](const ttnn::Tensor& input_tensor, + uint32_t batch_size, + uint32_t input_height, + uint32_t input_width, + uint32_t channels, + array2_t kernel_size, + array2_t stride, + array2_t padding, + array2_t dilation, + Device& device) -> Tensor { + return ttnn::operations::maxpool::maxpool2d(input_tensor, batch_size, input_height, input_width, channels, kernel_size, stride, padding, dilation, device); + }, + py::kw_only(), + py::arg("input_tensor"), + py::arg("batch_size"), + py::arg("input_height"), + py::arg("input_width"), + py::arg("channels"), + py::arg("kernel_size"), + py::arg("stride"), + py::arg("padding"), + py::arg("dilation"), + py::arg("device")); +} + +} // namespace maxpool +} // namespace operations +} // namespace ttnn diff --git a/ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp b/ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp index cda8b5f4db9..7ee4c756475 100644 --- a/ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp +++ b/ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp @@ -9,10 +9,12 @@ #include "ttnn/cpp/pybind11/decorators.hpp" #include "ttnn/operations/pool/pool.hpp" +#include "ttnn/operations/pool/maxpool/maxpool2d.hpp" + #include "ttnn/types.hpp" namespace py = pybind11; - +using array2_t = std::array; namespace ttnn { namespace operations { namespace pool { @@ -141,7 +143,31 @@ void py_module(py::module& module) { | memory_config | output tensor memory config | MemoryConfig | | No | +-------------------+-------------------------------+---------------+-------------+----------+ )doc"); - + module.def( + "maxpool2d", + [](const ttnn::Tensor& input_tensor, + uint32_t batch_size, + uint32_t input_height, + uint32_t input_width, + uint32_t channels, + array2_t kernel_size, + array2_t stride, + array2_t padding, + array2_t dilation, + Device& device) -> Tensor { + return ttnn::operations::maxpool2d::maxpool2d(input_tensor, batch_size, input_height, input_width, channels, kernel_size, stride, padding, dilation, device); + }, + py::kw_only(), + py::arg("input_tensor"), + py::arg("batch_size"), + py::arg("input_height"), + py::arg("input_width"), + py::arg("channels"), + py::arg("kernel_size"), + py::arg("stride"), + py::arg("padding"), + py::arg("dilation"), + py::arg("device")); module.def( "average_pool_2d", &average_pool_2d, diff --git a/ttnn/cpp/ttnn/operations/upsample/upsample_op.cpp b/ttnn/cpp/ttnn/operations/upsample/upsample_op.cpp index d30a5d97029..1fc453867e3 100644 --- a/ttnn/cpp/ttnn/operations/upsample/upsample_op.cpp +++ b/ttnn/cpp/ttnn/operations/upsample/upsample_op.cpp @@ -10,7 +10,7 @@ #include "detail/util.hpp" #include "tensor/host_buffer/functions.hpp" #include "tensor/tensor_utils.hpp" -#include "ttnn/cpp/ttnn/operations/pool/max_pool.hpp" +#include "ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp" #include "ttnn/experimental/tt_dnn/op_library/reduce/reduce_op.hpp" // for reduce_op_utils #include "ttnn/experimental/tt_dnn/op_library/work_split.hpp" #include "tt_metal/host_api.hpp" diff --git a/ttnn/ttnn/operations/conv/tt_py_max_pool.py b/ttnn/ttnn/operations/conv/tt_py_max_pool.py index 78b9eb17d0b..d9a4257ae55 100644 --- a/ttnn/ttnn/operations/conv/tt_py_max_pool.py +++ b/ttnn/ttnn/operations/conv/tt_py_max_pool.py @@ -30,7 +30,7 @@ import math import torch -from ttnn._ttnn.operations import pool +from ttnn.operations import pool class TTPyMaxPool(TTPyOp): diff --git a/ttnn/ttnn/operations/pool.py b/ttnn/ttnn/operations/pool.py index 55d8dc3b319..c716ce2b0a1 100644 --- a/ttnn/ttnn/operations/pool.py +++ b/ttnn/ttnn/operations/pool.py @@ -130,12 +130,17 @@ def _golden_function(input_tensor: ttnn.Tensor): ttnn.attach_golden_function(ttnn.global_avg_pool2d, golden_function=_golden_function) max_pool2d = ttnn.register_operation( - name="ttnn.max_pool2d", is_method=True, validate_input_tensors=lambda *args, **kwargs: None -)(ttnn._ttnn.operations.pool.max_pool2d) + name="ttnn.max_pool2d", +)(ttnn._ttnn.operations.maxpool.max_pool2d) + + +max_pool2d_v2 = ttnn.register_operation( + name="ttnn.max_pool2d_v2", +)(ttnn._ttnn.operations.maxpool.max_pool2d_v2) average_pool_2d = ttnn.register_operation( - name="ttnn.average_pool_2d", is_method=True, validate_input_tensors=lambda *args, **kwargs: None -)(ttnn._ttnn.operations.pool.average_pool_2d) + name="ttnn.average_pool_2d", +)(ttnn._ttnn.operations.avgpool.average_pool_2d) __all__ = [] From 3945327857c0c9d5fff7a9e3db4987e5d9795ab4 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Tue, 2 Jul 2024 19:03:54 +0000 Subject: [PATCH 03/14] #9758: Fixed Golden Func --- tests/tt_eager/ops/test_average_pool.cpp | 3 +- .../max_pool_program_factory.cpp} | 0 .../ttnn/operations/pool/maxpool/max_pool.hpp | 7 --- ttnn/ttnn/operations/conv/tt_py_max_pool.py | 6 ++- ttnn/ttnn/operations/pool.py | 53 +++++++++++++------ 5 files changed, 44 insertions(+), 25 deletions(-) rename ttnn/cpp/ttnn/operations/pool/maxpool/{max_pool.cpp => device/max_pool_program_factory.cpp} (100%) diff --git a/tests/tt_eager/ops/test_average_pool.cpp b/tests/tt_eager/ops/test_average_pool.cpp index bd0435a7d91..656f315356c 100644 --- a/tests/tt_eager/ops/test_average_pool.cpp +++ b/tests/tt_eager/ops/test_average_pool.cpp @@ -2,8 +2,9 @@ // // SPDX-License-Identifier: Apache-2.0 -#include "ttnn/cpp/ttnn/operations/pool/avgpool/average_pool.hpp" +#include "ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.hpp" #include "ttnn/experimental/tt_dnn/op_library/auto_format.hpp" +#include "tt_dnn/op_library/auto_format.hpp" #include "tt_numpy/functions.hpp" #include "tensor/tensor.hpp" diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp similarity index 100% rename from ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.cpp rename to ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp index 2624b36fc5a..1e22eef409c 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp @@ -178,13 +178,6 @@ Tensor maxpool2d_new(const Tensor &input, } // namespace tt_metal } // namespace tt - -// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. -// -// SPDX-License-Identifier: Apache-2.0 - - - namespace ttnn::operations { namespace maxpool { diff --git a/ttnn/ttnn/operations/conv/tt_py_max_pool.py b/ttnn/ttnn/operations/conv/tt_py_max_pool.py index d9a4257ae55..6df9605e41b 100644 --- a/ttnn/ttnn/operations/conv/tt_py_max_pool.py +++ b/ttnn/ttnn/operations/conv/tt_py_max_pool.py @@ -30,7 +30,6 @@ import math import torch -from ttnn.operations import pool class TTPyMaxPool(TTPyOp): @@ -45,7 +44,9 @@ def __init__( deallocate_activation=True, act_dtype=None, channels=None, + pool_op=None, ): + self.pool_op = pool_op if parallel_config_override is None: parallel_config_override = {} if "max_pool" not in reader_patterns_cache: @@ -222,7 +223,8 @@ def max_pool_(activation): if self.deallocate_activation: activation.deallocate() - output = pool.max_pool2d_v2( + # output = pool.max_pool2d_v2( + output = self.pool_op( haloed_act, reader_indices, in_n, diff --git a/ttnn/ttnn/operations/pool.py b/ttnn/ttnn/operations/pool.py index c716ce2b0a1..eaac01920e3 100644 --- a/ttnn/ttnn/operations/pool.py +++ b/ttnn/ttnn/operations/pool.py @@ -95,6 +95,7 @@ def __init__( deallocate_activation=deallocate_activation, act_dtype=dtype, channels=channels, + pool_op=max_pool2d_v2, ) @ttnn.register_python_operation(name="ttnn.MaxPool2d.__call__", is_method=True) @@ -116,31 +117,53 @@ def copy_output_from_device(self, output: ttnn.Tensor): ## Average Pooling -def _golden_function(input_tensor: ttnn.Tensor): +def golden_global_avg_pool2d(input_tensor: ttnn.Tensor): import torch - input_tensor = ttnn.from_device(input_tensor) - input_tensor = ttnn.to_layout(input_tensor, ttnn.ROW_MAJOR_LAYOUT) - input_tensor = ttnn.to_torch(input_tensor) - output_size = (1, 1) return torch.nn.functional.global_avg_pool2d(input_tensor, output_size) -ttnn.attach_golden_function(ttnn.global_avg_pool2d, golden_function=_golden_function) +def golden_maxpool2d( + _input_tensor: ttnn.Tensor, + in_n: int, + in_h: int, + in_w: int, + kernel_h: int, + kernel_w: int, + stride_h: int, + stride_w: int, + pad_h: int, + pad_w: int, + dilation_h: int, + dilation_w: int, + *, + memory_config: ttnn.MemoryConfig, + nblocks: int, + use_multicore: bool, +): + import torch + + kernel_size = (kernel_h, kernel_w) + stride = (stride_h, stride_w) + padding = (pad_h, pad_w) + dilation = (dilation_h, dilation_w) + + return torch.nn.functional.max_pool2d( + _input_tensor, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation + ) -max_pool2d = ttnn.register_operation( - name="ttnn.max_pool2d", -)(ttnn._ttnn.operations.maxpool.max_pool2d) +global_avg_pool2d = ttnn._ttnn.operations.avgpool.global_avg_pool2d +average_pool_2d = ttnn._ttnn.operations.avgpool.average_pool_2d +max_pool2d = ttnn.ttnn._ttnn.operations.maxpool.max_pool2d +max_pool2d_v2 = ttnn._ttnn.operations.maxpool.max_pool2d_v2 -max_pool2d_v2 = ttnn.register_operation( - name="ttnn.max_pool2d_v2", -)(ttnn._ttnn.operations.maxpool.max_pool2d_v2) +ttnn.attach_golden_function(global_avg_pool2d, golden_function=golden_global_avg_pool2d) +# ttnn.attach_golden_function(average_pool_2d, golden_function=golden_global_avg_pool2d) +# ttnn.attach_golden_function(max_pool2d_v2, golden_function=golden_maxpool2d) +# ttnn.attach_golden_function(max_pool2d_v2, golden_function=golden_maxpool2d) -average_pool_2d = ttnn.register_operation( - name="ttnn.average_pool_2d", -)(ttnn._ttnn.operations.avgpool.average_pool_2d) __all__ = [] From d280e6fad39502b17574fc45d8c3c4a2f86c593b Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Sat, 6 Jul 2024 23:15:20 +0000 Subject: [PATCH 04/14] #9758: Renamed average_pool_2d to avg_pool2d --- tests/tt_eager/ops/test_average_pool.cpp | 2 +- .../trace_testing/misc/test_average_pool.py | 2 +- .../unit_testing/misc/test_average_pool.py | 2 +- ...test_untilize_with_halo_and_max_pool_v2.py | 5 +- .../ttnn/operations/pool/avgpool/avg_pool.cpp | 2 +- .../ttnn/operations/pool/avgpool/avg_pool.hpp | 4 +- .../pool/avgpool/avg_pool_pybind.hpp | 6 +- .../device/max_pool_program_factory.cpp | 2 +- .../ttnn/operations/pool/maxpool/max_pool.hpp | 2 +- ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp | 192 ------------------ ttnn/tt_lib/fused_ops/average_pool.py | 6 +- ttnn/ttnn/operations/pool.py | 4 +- 12 files changed, 20 insertions(+), 209 deletions(-) delete mode 100644 ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp diff --git a/tests/tt_eager/ops/test_average_pool.cpp b/tests/tt_eager/ops/test_average_pool.cpp index 656f315356c..108ce824906 100644 --- a/tests/tt_eager/ops/test_average_pool.cpp +++ b/tests/tt_eager/ops/test_average_pool.cpp @@ -25,7 +25,7 @@ Tensor run_avg_pool_2d_resnet(Shape& tensor_shape, Device* device) { if (!AutoFormat::check_input_tensor_format(input_tensor, padded_input_shape)) { padded_input_tensor = AutoFormat::format_input_tensor(input_tensor, device, padded_input_shape, 0, Layout::TILE); // pad with 0s } - auto device_output = average_pool_2d(padded_input_tensor); + auto device_output = avg_pool2d(padded_input_tensor); return device_output.cpu(); }; diff --git a/tests/tt_eager/python_api_testing/trace_testing/misc/test_average_pool.py b/tests/tt_eager/python_api_testing/trace_testing/misc/test_average_pool.py index 108280c9ac9..77996476dc8 100644 --- a/tests/tt_eager/python_api_testing/trace_testing/misc/test_average_pool.py +++ b/tests/tt_eager/python_api_testing/trace_testing/misc/test_average_pool.py @@ -64,7 +64,7 @@ def test_run_average_pool(act_shape, dtype, device, use_program_cache, enable_as ttact_res = ttact.to(device) def run_ops(ttact_res): - return ttnn.average_pool_2d(ttact_res) + return ttnn.avg_pool2d(ttact_res) # Compile run_ops(ttact_res) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_average_pool.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_average_pool.py index f6e5fcf5759..beb3ee4ab00 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_average_pool.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_average_pool.py @@ -45,7 +45,7 @@ def test_run_average_pool(act_shape, dtype, device): ttact = ttact.pad_to_tile(0.0) ttact = ttact.to(device) - out = ttnn.average_pool_2d(ttact) + out = ttnn.avg_pool2d(ttact) out = out.cpu().to(ttl.tensor.Layout.ROW_MAJOR) out_shape = [batch_size, 1, 1, channels] diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py index 4de749400d4..acbdfbd8e1f 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py @@ -14,6 +14,7 @@ TTPyMaxPool, SlidingWindowOpParamsWithParallelConfig, ) +from ttnn.operations.pool import max_pool2d_v2 as ttnn_max_pool2d_v2 import tt_lib as ttl @@ -172,7 +173,9 @@ def test_run_max_pool( assert kernel_w == kernel_h and stride_w == stride_h and pad_w == pad_h and dilation_w == dilation_h max_pool_reader_patterns_cache = {} - max_pool = TTPyMaxPool(sliding_window_op_params, device, max_pool_reader_patterns_cache, pad_val=pad_val) + max_pool = TTPyMaxPool( + sliding_window_op_params, device, max_pool_reader_patterns_cache, pad_val=pad_val, pool_op=ttnn_max_pool2d_v2 + ) ttact_sharded = max_pool.copy_input_to_device(ttact) out_padded = max_pool(ttact_sharded) diff --git a/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.cpp b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.cpp index 4eed04e03e7..b341f0965fe 100644 --- a/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.cpp +++ b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.cpp @@ -23,7 +23,7 @@ Tensor pool_2d(const Tensor& input, const MemoryConfig& memory_config, const std } } -Tensor average_pool_2d(const Tensor& input, const MemoryConfig& memory_config, const std::optional& output_dtype) { +Tensor avg_pool2d(const Tensor& input, const MemoryConfig& memory_config, const std::optional& output_dtype) { TT_ASSERT(input.storage_type() == StorageType::DEVICE, "Input tensor needs to be on device"); auto output = input; diff --git a/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.hpp b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.hpp index 6486a3efe49..c7de056b039 100644 --- a/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.hpp +++ b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.hpp @@ -16,7 +16,7 @@ enum class PoolType { AVG }; -Tensor average_pool_2d(const Tensor& input, const MemoryConfig& memory_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, const std::optional& output_dtype = std::nullopt); +Tensor avg_pool2d(const Tensor& input, const MemoryConfig& memory_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, const std::optional& output_dtype = std::nullopt); } // namespace tt_metal } // namespace tt @@ -36,7 +36,7 @@ struct GlobalAveragePool2D { const std::optional& memory_config_arg = std::nullopt, const std::optional& output_dtype = std::nullopt) { auto memory_config = memory_config_arg.value_or(input.memory_config()); - auto result = tt::tt_metal::average_pool_2d(input, memory_config, output_dtype); + auto result = tt::tt_metal::avg_pool2d(input, memory_config, output_dtype); return result; } }; diff --git a/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool_pybind.hpp b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool_pybind.hpp index 1aebffdb599..703bd9423b0 100644 --- a/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool_pybind.hpp +++ b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool_pybind.hpp @@ -62,15 +62,15 @@ void bind_global_avg_pool2d(py::module& module) { void py_module(py::module& module) { detail::bind_global_avg_pool2d(module); module.def( - "average_pool_2d", - &average_pool_2d, + "avg_pool2d", + &avg_pool2d, py::arg().noconvert(), py::kw_only(), py::arg("memory_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, py::arg("dtype").noconvert() = std::nullopt, R"doc( Average Pool 2D - It operates on tensors whose that have channels as the last dimension + It operates on tensors that have channels as the last dimension. +----------+----------------------------+------------+-------------------------------+----------+ | Argument | Description | Data type | Valid range | Required | diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp index 8061b2007d6..20dd934c570 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp @@ -353,7 +353,7 @@ operation::ProgramWithCallbacks MaxPoolNew::create_program(const std::vector& input_tensors, const std::vector>& optional_input_tensors, const std::vector &output_tensors) const { +operation::OpPerformanceModel MaxPoolNew::create_op_performance_model(const std::vector& input_tensors, const std::vector>& optional_input_tensors, const std::vector>& optional_output_tensors, const std::vector &output_tensors) const { const auto& input = input_tensors.at(0); const auto& input_shape = input.get_shape(); uint32_t batch_size = sliding_window_config_.batch_size_; diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp index 1e22eef409c..24aac37c3b6 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp @@ -152,7 +152,7 @@ struct MaxPoolNew { std::vector compute_output_shapes(const std::vector &input_tensors) const; std::vector create_output_tensors(const std::vector &input_tensors) const; operation::ProgramWithCallbacks create_program(const std::vector& input_tensors, std::vector &output_tensors) const; - operation::OpPerformanceModel create_op_performance_model(const std::vector& input_tensors, const std::vector>& optional_input_tensors, const std::vector &output_tensors) const; + operation::OpPerformanceModel create_op_performance_model(const std::vector& input_tensors, const std::vector>& optional_input_tensors, const std::vector>& optional_output_tensors, const std::vector &output_tensors) const; static constexpr auto attribute_names = std::make_tuple( "sliding_window_config", diff --git a/ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp b/ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp deleted file mode 100644 index 7ee4c756475..00000000000 --- a/ttnn/cpp/ttnn/operations/pool/pool_pybind.hpp +++ /dev/null @@ -1,192 +0,0 @@ -// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include -#include - -#include "ttnn/cpp/pybind11/decorators.hpp" -#include "ttnn/operations/pool/pool.hpp" -#include "ttnn/operations/pool/maxpool/maxpool2d.hpp" - -#include "ttnn/types.hpp" - -namespace py = pybind11; -using array2_t = std::array; -namespace ttnn { -namespace operations { -namespace pool { - -namespace detail { - -void bind_global_avg_pool2d(py::module& module) { - auto doc = fmt::format( - R"doc({0}(input_tensor: ttnn.Tensor, *, memory_config: Optional[ttnn.MemoryConfig] = None, dtype: Optional[ttnn.DataType] = None) -> ttnn.Tensor - - Applies {0} to :attr:`input_tensor` by performing a 2D adaptive average pooling over an input signal composed of several input planes. This operation computes the average of all elements in each channel across the entire spatial dimensions. - - .. math:: - {0}(\\mathrm{{input\\_tensor}}_i) - - Args: - * :attr:`input_tensor` (ttnn.Tensor): The input tensor to be pooled. Typically of shape (batch_size, channels, height, width). - - Keyword Args: - * :attr:`memory_config` (Optional[ttnn.MemoryConfig]): Memory configuration for the operation. - * :attr:`dtype` (Optional[ttnn.DataType]): data type for the output tensor - - Returns: - ttnn.Tensor: The tensor with the averaged values. The output tensor shape is (batch_size, channels, 1, 1). - - Example: - - >>> tensor = ttnn.from_torch(torch.randn((10, 3, 32, 32), dtype=ttnn.bfloat16), device=device) - >>> output = {1}(tensor) - )doc", - ttnn::global_avg_pool2d.base_name(), - ttnn::global_avg_pool2d.python_fully_qualified_name()); - - bind_registered_operation( - module, - ttnn::global_avg_pool2d, - doc, - ttnn::pybind_arguments_t{ - py::arg("input_tensor"), - py::kw_only(), - py::arg("memory_config") = std::nullopt, - py::arg("dtype") = std::nullopt}); -} - -} // namespace detail - -void py_module(py::module& module) { - detail::bind_global_avg_pool2d(module); - module.def( - "max_pool2d", - &max_pool2d, - py::arg("input").noconvert(), - py::arg("in_n").noconvert(), - py::arg("in_h").noconvert(), - py::arg("in_w").noconvert(), - py::arg("kernel_h").noconvert(), - py::arg("kernel_w").noconvert(), - py::arg("stride_h") = 1, - py::arg("stride_w") = 1, - py::arg("pad_h") = 0, - py::arg("pad_w") = 0, - py::arg("dilation_h") = 1, - py::arg("dilation_w") = 1, - py::kw_only(), - py::arg("memory_config") = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - py::arg("nblocks") = 1, - py::arg("use_multicore") = true, - R"doc( - Max Pool 2D - +-------------------+-------------------------------+---------------+-------------+----------+ - | Argument | Description | Data type | Valid range | Required | - +===================+===============================+===============+=============+==========+ - | input | Input activations tensor | Tensor | | Yes | - | in_n | Input nbatch | Tensor | | Yes | - | in_h | Input height | Tensor | | Yes | - | in_w | Input width | Tensor | | Yes | - | kernel_h | kernel window height | uint32_t | | Yes | - | kernel_w | kernel window width | uint32_t | | Yes | - | stride_h | stride in height dim | uint32_t | | No | - | stride_w | stride in width dim | uint32_t | | No | - | pad_h | padding in height dim | uint32_t | | No | - | pad_w | padding in width dim | uint32_t | | No | - | dilation_h | kernel dilation in height dim | uint32_t | | No | - | dilation_w | kernel dilation in width dim | uint32_t | | No | - | memory_config | Output memory config | MemoryConfig | | No | - +-------------------+-------------------------------+---------------+-------------+----------+ - )doc"); - - module.def( - "max_pool2d_v2", - &max_pool2d_v2, - py::arg("input").noconvert(), - py::arg("reader_indices").noconvert(), - py::arg("in_n").noconvert(), - py::arg("in_h").noconvert(), - py::arg("in_w").noconvert(), - py::arg("kernel_h").noconvert(), - py::arg("kernel_w").noconvert(), - py::arg("stride_h") = 1, - py::arg("stride_w") = 1, - py::arg("pad_h") = 0, - py::arg("pad_w") = 0, - py::arg("dilation_h") = 1, - py::arg("dilation_w") = 1, - py::kw_only(), - py::arg("memory_config") = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - py::arg("nblocks") = 1, - py::arg("use_multicore") = true, - R"doc( - Max Pool 2D - +-------------------+-------------------------------+---------------+-------------+----------+ - | Argument | Description | Data type | Valid range | Required | - +===================+===============================+===============+=============+==========+ - | input | Input activations tensor | Tensor | | Yes | - | in_n | Input nbatch | Tensor | | Yes | - | in_h | Input height | Tensor | | Yes | - | in_w | Input width | Tensor | | Yes | - | kernel_h | kernel window height | uint32_t | | Yes | - | kernel_w | kernel window width | uint32_t | | Yes | - | stride_h | stride in height dim | uint32_t | | No | - | stride_w | stride in width dim | uint32_t | | No | - | pad_h | padding in height dim | uint32_t | | No | - | pad_w | padding in width dim | uint32_t | | No | - | dilation_h | kernel dilation in height dim | uint32_t | | No | - | dilation_w | kernel dilation in width dim | uint32_t | | No | - | memory_config | output tensor memory config | MemoryConfig | | No | - +-------------------+-------------------------------+---------------+-------------+----------+ - )doc"); - module.def( - "maxpool2d", - [](const ttnn::Tensor& input_tensor, - uint32_t batch_size, - uint32_t input_height, - uint32_t input_width, - uint32_t channels, - array2_t kernel_size, - array2_t stride, - array2_t padding, - array2_t dilation, - Device& device) -> Tensor { - return ttnn::operations::maxpool2d::maxpool2d(input_tensor, batch_size, input_height, input_width, channels, kernel_size, stride, padding, dilation, device); - }, - py::kw_only(), - py::arg("input_tensor"), - py::arg("batch_size"), - py::arg("input_height"), - py::arg("input_width"), - py::arg("channels"), - py::arg("kernel_size"), - py::arg("stride"), - py::arg("padding"), - py::arg("dilation"), - py::arg("device")); - module.def( - "average_pool_2d", - &average_pool_2d, - py::arg().noconvert(), - py::kw_only(), - py::arg("memory_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - py::arg("dtype").noconvert() = std::nullopt, - R"doc( - Average Pool 2D - It operates on tensors whose that have channels as the last dimension - - +----------+----------------------------+------------+-------------------------------+----------+ - | Argument | Description | Data type | Valid range | Required | - +==========+============================+============+===============================+==========+ - | act | Input activations tensor | Tensor | | Yes | - +----------+----------------------------+------------+-------------------------------+----------+ - )doc"); -} - -} // namespace pool -} // namespace operations -} // namespace ttnn diff --git a/ttnn/tt_lib/fused_ops/average_pool.py b/ttnn/tt_lib/fused_ops/average_pool.py index 18bcbc5018a..a296e7ab7a0 100644 --- a/ttnn/tt_lib/fused_ops/average_pool.py +++ b/ttnn/tt_lib/fused_ops/average_pool.py @@ -9,8 +9,8 @@ def run_avg_pool_on_device_wrapper(device): - def average_pool_2d(x, output_mem_config, output_dtype=None): - out = ttnn.average_pool_2d(x, memory_config=output_mem_config, dtype=output_dtype) + def avg_pool2d(x, output_mem_config, output_dtype=None): + out = ttnn.avg_pool2d(x, memory_config=output_mem_config, dtype=output_dtype) return out - return average_pool_2d + return avg_pool2d diff --git a/ttnn/ttnn/operations/pool.py b/ttnn/ttnn/operations/pool.py index eaac01920e3..12109a9a7b0 100644 --- a/ttnn/ttnn/operations/pool.py +++ b/ttnn/ttnn/operations/pool.py @@ -155,12 +155,12 @@ def golden_maxpool2d( global_avg_pool2d = ttnn._ttnn.operations.avgpool.global_avg_pool2d -average_pool_2d = ttnn._ttnn.operations.avgpool.average_pool_2d +avg_pool2d = ttnn._ttnn.operations.avgpool.avg_pool2d max_pool2d = ttnn.ttnn._ttnn.operations.maxpool.max_pool2d max_pool2d_v2 = ttnn._ttnn.operations.maxpool.max_pool2d_v2 ttnn.attach_golden_function(global_avg_pool2d, golden_function=golden_global_avg_pool2d) -# ttnn.attach_golden_function(average_pool_2d, golden_function=golden_global_avg_pool2d) +# ttnn.attach_golden_function(avg_pool2d, golden_function=golden_global_avg_pool2d) # ttnn.attach_golden_function(max_pool2d_v2, golden_function=golden_maxpool2d) # ttnn.attach_golden_function(max_pool2d_v2, golden_function=golden_maxpool2d) From f8a8e8c22bf3666e86ec5af40fbedc67bbb0792b Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Tue, 9 Jul 2024 09:57:11 +0000 Subject: [PATCH 05/14] #9758: Changed maxpool naming #9758: Fixed register operation --- ...test_untilize_with_halo_and_max_pool_v2.py | 8 +++-- ttnn/cpp/pybind11/operations/__init__.hpp | 5 ---- .../tt_lib/csrc/tt_lib_bindings_tensor.cpp | 19 ------------ .../device/max_pool_program_factory.cpp | 2 +- .../ttnn/operations/pool/maxpool/max_pool.hpp | 2 +- .../pool/maxpool/maxpool_pybind.hpp | 29 ++----------------- ttnn/ttnn/operations/conv/tt_py_max_pool.py | 2 +- ttnn/ttnn/operations/pool.py | 25 ++++++++-------- 8 files changed, 24 insertions(+), 68 deletions(-) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py index acbdfbd8e1f..ea8f6b217b7 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py @@ -14,7 +14,7 @@ TTPyMaxPool, SlidingWindowOpParamsWithParallelConfig, ) -from ttnn.operations.pool import max_pool2d_v2 as ttnn_max_pool2d_v2 +from ttnn.operations.pool import max_pool2d_legacy as ttnn_max_pool2d_legacy import tt_lib as ttl @@ -174,7 +174,11 @@ def test_run_max_pool( max_pool_reader_patterns_cache = {} max_pool = TTPyMaxPool( - sliding_window_op_params, device, max_pool_reader_patterns_cache, pad_val=pad_val, pool_op=ttnn_max_pool2d_v2 + sliding_window_op_params, + device, + max_pool_reader_patterns_cache, + pad_val=pad_val, + pool_op=ttnn_max_pool2d_legacy, ) ttact_sharded = max_pool.copy_input_to_device(ttact) diff --git a/ttnn/cpp/pybind11/operations/__init__.hpp b/ttnn/cpp/pybind11/operations/__init__.hpp index 5801d509c70..c23d5f0d847 100644 --- a/ttnn/cpp/pybind11/operations/__init__.hpp +++ b/ttnn/cpp/pybind11/operations/__init__.hpp @@ -13,10 +13,6 @@ #include "pybind11/operations/core.hpp" #include "pybind11/operations/creation.hpp" #include "pybind11/operations/kv_cache.hpp" -#include "pybind11/operations/maxpool2d.hpp" -#include "pybind11/operations/matmul.hpp" -#include "pybind11/operations/normalization.hpp" -#include "pybind11/operations/copy.hpp" #include "pybind11/operations/ternary.hpp" #include "ttnn/operations/pool/avgpool/avg_pool_pybind.hpp" @@ -39,7 +35,6 @@ #include "ttnn/operations/eltwise/complex_binary_backward/complex_binary_backward_pybind.hpp" #include "ttnn/operations/experimental/experimental_pybind.hpp" - namespace py = pybind11; namespace ttnn { diff --git a/ttnn/cpp/ttnn/experimental/tt_lib/csrc/tt_lib_bindings_tensor.cpp b/ttnn/cpp/ttnn/experimental/tt_lib/csrc/tt_lib_bindings_tensor.cpp index cbbc605375c..46de5a50e1c 100644 --- a/ttnn/cpp/ttnn/experimental/tt_lib/csrc/tt_lib_bindings_tensor.cpp +++ b/ttnn/cpp/ttnn/experimental/tt_lib/csrc/tt_lib_bindings_tensor.cpp @@ -522,25 +522,6 @@ void TensorModule(py::module& m_tensor) { "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" )doc"); - // Upsample - m_tensor.def( - "upsample", - &upsample, - py::arg("input").noconvert(), - py::arg("scale_factor_h").noconvert(), - py::arg("scale_factor_w").noconvert(), - py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - R"doc( - UpSample 2D - It operates on tensors whose that have channels as the last dimension - - +----------+----------------------------+------------+-------------------------------+----------+ - | Argument | Description | Data type | Valid range | Required | - +==========+============================+============+===============================+==========+ - | act | Input activations tensor | Tensor | | Yes | - +----------+----------------------------+------------+-------------------------------+----------+ - )doc"); - // TMs m_tensor.def( "split_last_dim_two_chunks_tiled", diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp index 20dd934c570..2fe6a5cde86 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp @@ -201,7 +201,7 @@ Tensor max_pool2d(const Tensor &input, {input}).at(0); } -Tensor max_pool2d_v2(const Tensor &input, +Tensor max_pool2d_legacy(const Tensor &input, const Tensor &reader_indices, uint32_t in_n, uint32_t in_h, uint32_t in_w, uint32_t kernel_size_h, uint32_t kernel_size_w, diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp index 24aac37c3b6..305af0c8106 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp @@ -130,7 +130,7 @@ Tensor max_pool2d(const Tensor &input, const MemoryConfig& out_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, uint32_t nblocks = 1, bool use_multicore = true); -Tensor max_pool2d_v2(const Tensor &input, const Tensor &reader_indices, +Tensor max_pool2d_legacy(const Tensor &input, const Tensor &reader_indices, uint32_t in_n, uint32_t in_h, uint32_t in_w, uint32_t kernel_size_h, uint32_t kernel_size_w, uint32_t stride_h = 1, uint32_t stride_w = 1, diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/maxpool_pybind.hpp b/ttnn/cpp/ttnn/operations/pool/maxpool/maxpool_pybind.hpp index a65ab24f66c..68daadcb98a 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/maxpool_pybind.hpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/maxpool_pybind.hpp @@ -61,8 +61,8 @@ void py_module(py::module& module) { )doc"); module.def( - "max_pool2d_v2", - &max_pool2d_v2, + "max_pool2d_legacy", + &max_pool2d_legacy, py::arg("input").noconvert(), py::arg("reader_indices").noconvert(), py::arg("in_n").noconvert(), @@ -100,31 +100,6 @@ void py_module(py::module& module) { | memory_config | output tensor memory config | MemoryConfig | | No | +-------------------+-------------------------------+---------------+-------------+----------+ )doc"); - module.def( - "maxpool2d", - [](const ttnn::Tensor& input_tensor, - uint32_t batch_size, - uint32_t input_height, - uint32_t input_width, - uint32_t channels, - array2_t kernel_size, - array2_t stride, - array2_t padding, - array2_t dilation, - Device& device) -> Tensor { - return ttnn::operations::maxpool::maxpool2d(input_tensor, batch_size, input_height, input_width, channels, kernel_size, stride, padding, dilation, device); - }, - py::kw_only(), - py::arg("input_tensor"), - py::arg("batch_size"), - py::arg("input_height"), - py::arg("input_width"), - py::arg("channels"), - py::arg("kernel_size"), - py::arg("stride"), - py::arg("padding"), - py::arg("dilation"), - py::arg("device")); } } // namespace maxpool diff --git a/ttnn/ttnn/operations/conv/tt_py_max_pool.py b/ttnn/ttnn/operations/conv/tt_py_max_pool.py index 6df9605e41b..0ffe3045bfc 100644 --- a/ttnn/ttnn/operations/conv/tt_py_max_pool.py +++ b/ttnn/ttnn/operations/conv/tt_py_max_pool.py @@ -223,7 +223,7 @@ def max_pool_(activation): if self.deallocate_activation: activation.deallocate() - # output = pool.max_pool2d_v2( + # output = pool.max_pool2d_legacy( output = self.pool_op( haloed_act, reader_indices, diff --git a/ttnn/ttnn/operations/pool.py b/ttnn/ttnn/operations/pool.py index 12109a9a7b0..2399cec1030 100644 --- a/ttnn/ttnn/operations/pool.py +++ b/ttnn/ttnn/operations/pool.py @@ -95,7 +95,7 @@ def __init__( deallocate_activation=deallocate_activation, act_dtype=dtype, channels=channels, - pool_op=max_pool2d_v2, + pool_op=max_pool2d_legacy, ) @ttnn.register_python_operation(name="ttnn.MaxPool2d.__call__", is_method=True) @@ -154,16 +154,17 @@ def golden_maxpool2d( ) -global_avg_pool2d = ttnn._ttnn.operations.avgpool.global_avg_pool2d -avg_pool2d = ttnn._ttnn.operations.avgpool.avg_pool2d -max_pool2d = ttnn.ttnn._ttnn.operations.maxpool.max_pool2d -max_pool2d_v2 = ttnn._ttnn.operations.maxpool.max_pool2d_v2 - -ttnn.attach_golden_function(global_avg_pool2d, golden_function=golden_global_avg_pool2d) -# ttnn.attach_golden_function(avg_pool2d, golden_function=golden_global_avg_pool2d) - -# ttnn.attach_golden_function(max_pool2d_v2, golden_function=golden_maxpool2d) -# ttnn.attach_golden_function(max_pool2d_v2, golden_function=golden_maxpool2d) - +global_avg_pool2d = ttnn.register_python_operation( + name="ttnn.global_avg_pool2d", golden_function=golden_global_avg_pool2d +)(ttnn._ttnn.operations.avgpool.global_avg_pool2d) +avg_pool2d = ttnn.register_python_operation(name="ttnn.avg_pool2d", golden_function=golden_global_avg_pool2d)( + ttnn._ttnn.operations.avgpool.avg_pool2d +) +max_pool2d = ttnn.register_python_operation(name="ttnn.max_pool2d", golden_function=golden_maxpool2d)( + ttnn._ttnn.operations.maxpool.max_pool2d +) +max_pool2d_legacy = ttnn.register_python_operation(name="ttnn.max_pool2d_legacy", golden_function=golden_maxpool2d)( + ttnn._ttnn.operations.maxpool.max_pool2d_legacy +) __all__ = [] From 530fcdc5174f39c05b1bd2bdcf4df4e84790462d Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Tue, 9 Jul 2024 15:11:51 +0000 Subject: [PATCH 06/14] #9758: Refactored py files --- ...test_untilize_with_halo_and_max_pool_v2.py | 5 +- ttnn/ttnn/__init__.py | 3 +- ttnn/ttnn/operations/avg_pool2d.py | 31 ++++ .../{conv/tt_py_max_pool.py => max_pool2d.py} | 140 ++++++++++++++- ttnn/ttnn/operations/pool.py | 170 ------------------ 5 files changed, 172 insertions(+), 177 deletions(-) create mode 100644 ttnn/ttnn/operations/avg_pool2d.py rename ttnn/ttnn/operations/{conv/tt_py_max_pool.py => max_pool2d.py} (71%) delete mode 100644 ttnn/ttnn/operations/pool.py diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py index ea8f6b217b7..dba1b71b5d4 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py @@ -10,11 +10,11 @@ import torch -from ttnn.operations.pool import ( +from ttnn.operations.max_pool2d import ( TTPyMaxPool, SlidingWindowOpParamsWithParallelConfig, ) -from ttnn.operations.pool import max_pool2d_legacy as ttnn_max_pool2d_legacy +from ttnn.operations.max_pool2d import max_pool2d_legacy as ttnn_max_pool2d_legacy import tt_lib as ttl @@ -178,7 +178,6 @@ def test_run_max_pool( device, max_pool_reader_patterns_cache, pad_val=pad_val, - pool_op=ttnn_max_pool2d_legacy, ) ttact_sharded = max_pool.copy_input_to_device(ttact) diff --git a/ttnn/ttnn/__init__.py b/ttnn/ttnn/__init__.py index 8b309a53752..5cae3826bac 100644 --- a/ttnn/ttnn/__init__.py +++ b/ttnn/ttnn/__init__.py @@ -322,4 +322,5 @@ def prelu(*args, **kwargs): # Alias for leaky_relu. TODO(#8544): implement PReL determine_expected_group_norm_sharded_config_and_grid_size, ) from ttnn.operations.conv2d import Conv2d, Conv2dConfig, get_conv_output_dim, get_conv_padded_input_shape_and_mem_config -from ttnn.operations.pool import MaxPool2d, global_avg_pool2d, max_pool2d, average_pool_2d +from ttnn.operations.max_pool2d import TTPyMaxPool, max_pool2d, max_pool2d_legacy, MaxPool2d +from ttnn.operations.avg_pool2d import global_avg_pool2d, avg_pool2d diff --git a/ttnn/ttnn/operations/avg_pool2d.py b/ttnn/ttnn/operations/avg_pool2d.py new file mode 100644 index 00000000000..36962bd659d --- /dev/null +++ b/ttnn/ttnn/operations/avg_pool2d.py @@ -0,0 +1,31 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +from typing import Tuple, Union, Dict + + +import sys +import ttnn + + +__all__ = [] + + +def golden_global_avg_pool2d(input_tensor: ttnn.Tensor): + import torch + + output_size = (1, 1) + return torch.nn.functional.global_avg_pool2d(input_tensor, output_size) + + +global_avg_pool2d = ttnn.register_python_operation( + name="ttnn.global_avg_pool2d", golden_function=golden_global_avg_pool2d +)(ttnn._ttnn.operations.avgpool.global_avg_pool2d) + +avg_pool2d = ttnn.register_python_operation(name="ttnn.avg_pool2d", golden_function=golden_global_avg_pool2d)( + ttnn._ttnn.operations.avgpool.avg_pool2d +) + + +__all__ = [] diff --git a/ttnn/ttnn/operations/conv/tt_py_max_pool.py b/ttnn/ttnn/operations/max_pool2d.py similarity index 71% rename from ttnn/ttnn/operations/conv/tt_py_max_pool.py rename to ttnn/ttnn/operations/max_pool2d.py index 0ffe3045bfc..7f94464813e 100644 --- a/ttnn/ttnn/operations/conv/tt_py_max_pool.py +++ b/ttnn/ttnn/operations/max_pool2d.py @@ -23,13 +23,148 @@ calculate_memory_config, ) -from typing import Union +from typing import Union, Tuple, Dict from tt_lib.utils import _nearest_32 import tt_lib as ttl import math import torch +import ttnn + + +def golden_maxpool2d( + _input_tensor: ttnn.Tensor, + in_n: int, + in_h: int, + in_w: int, + kernel_h: int, + kernel_w: int, + stride_h: int, + stride_w: int, + pad_h: int, + pad_w: int, + dilation_h: int, + dilation_w: int, + *, + memory_config: ttnn.MemoryConfig, + nblocks: int, + use_multicore: bool, +): + import torch + + kernel_size = (kernel_h, kernel_w) + stride = (stride_h, stride_w) + padding = (pad_h, pad_w) + dilation = (dilation_h, dilation_w) + + return torch.nn.functional.max_pool2d( + _input_tensor, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation + ) + + +max_pool2d = ttnn.register_python_operation(name="ttnn.max_pool2d", golden_function=golden_maxpool2d)( + ttnn._ttnn.operations.maxpool.max_pool2d +) + +max_pool2d_legacy = ttnn.register_python_operation(name="ttnn.max_pool2d_legacy", golden_function=golden_maxpool2d)( + ttnn._ttnn.operations.maxpool.max_pool2d_legacy +) + + +class MaxPool2d: + r""" + Applies a 2D max pooling over an input signal composed of several input planes. + + If `padding` is non-zero, then the input is implicitly padded with negative infinity on both sides for padding number of points. + `dilation` controls the spacing between the kernel points. + + Arguments: + * :attr: kernel_size (Union[int, Tuple[int, int]]): the size of the window to take a max over + * :attr: stride (Union[int, Tuple[int, int]]): the stride of the window. Default value is 1 + * :attr: padding (Union[int, Tuple[int, int]]): Implicit negative infinity padding to be added on both sides + * :attr: dilation (Union[int, Tuple[int, int]]): a parameter that controls the stride of window elements + """ + + def __init__( + self, + kernel_size: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + dtype: ttnn.DataType = None, + *, + device: ttnn.Device, + batch_size: int, + input_height: int, + input_width: int, + reader_patterns_cache: Dict, + parallel_config_override: Dict = None, + deallocate_activation: bool = False, + channels: int = None, + ): + if isinstance(kernel_size, int): + window_h = kernel_size + window_w = kernel_size + else: + window_h, window_w = kernel_size + + if isinstance(stride, int): + stride_h = stride + stride_w = stride + else: + stride_h, stride_w = stride + + if isinstance(padding, int): + pad_h = padding + pad_w = padding + else: + pad_h, pad_w = padding + + if isinstance(dilation, int): + dilation_h = dilation + dilation_w = dilation + else: + dilation_h, dilation_w = dilation + assert dilation_h == 1, f"Only dilation_h = 1 supported. Found dilation_h={dilation_h}" + assert dilation_w == 1, f"Only dilation_w = 1 supported. Found dilation_w={dilation_w}" + + sliding_window_op_params = SlidingWindowOpParams( + stride_h=stride_h, + stride_w=stride_w, + pad_h=pad_h, + pad_w=pad_w, + window_h=window_h, + window_w=window_w, + batch_size=batch_size, + input_h=input_height, + input_w=input_width, + ) + self.max_pool = TTPyMaxPool( + sliding_window_op_params, + device, + reader_patterns_cache, + pad_val=0xF7FF, + parallel_config_override=parallel_config_override, + deallocate_activation=deallocate_activation, + act_dtype=dtype, + channels=channels, + ) + + @ttnn.register_python_operation(name="ttnn.MaxPool2d.__call__", is_method=True) + def __call__(self, activation: ttnn.Tensor): + return self.max_pool(activation) + + @ttnn.register_python_operation(name="ttnn.MaxPool2d.copy_input_to_device", is_method=True) + def copy_input_to_device(self, input: ttnn.Tensor): + return self.max_pool.copy_input_to_device(input) + + @ttnn.register_python_operation( + name="ttnn.MaxPool2d.copy_output_from_device", + is_method=True, + ) + def copy_output_from_device(self, output: ttnn.Tensor): + return self.max_pool.copy_output_from_device(output) class TTPyMaxPool(TTPyOp): @@ -223,8 +358,7 @@ def max_pool_(activation): if self.deallocate_activation: activation.deallocate() - # output = pool.max_pool2d_legacy( - output = self.pool_op( + output = max_pool2d_legacy( haloed_act, reader_indices, in_n, diff --git a/ttnn/ttnn/operations/pool.py b/ttnn/ttnn/operations/pool.py deleted file mode 100644 index 2399cec1030..00000000000 --- a/ttnn/ttnn/operations/pool.py +++ /dev/null @@ -1,170 +0,0 @@ -# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. - -# SPDX-License-Identifier: Apache-2.0 - -from typing import Tuple, Union, Dict - -import sys -import ttnn - -from ttnn.operations.conv.tt_py_max_pool import ( - TTPyMaxPool, - SlidingWindowOpParams, - SlidingWindowOpParamsWithParallelConfig, -) - -import tt_lib as ttl - -__all__ = [] - - -class MaxPool2d: - r""" - Applies a 2D max pooling over an input signal composed of several input planes. - - If `padding` is non-zero, then the input is implicitly padded with negative infinity on both sides for padding number of points. - `dilation` controls the spacing between the kernel points. - - Arguments: - * :attr: kernel_size (Union[int, Tuple[int, int]]): the size of the window to take a max over - * :attr: stride (Union[int, Tuple[int, int]]): the stride of the window. Default value is 1 - * :attr: padding (Union[int, Tuple[int, int]]): Implicit negative infinity padding to be added on both sides - * :attr: dilation (Union[int, Tuple[int, int]]): a parameter that controls the stride of window elements - """ - - def __init__( - self, - kernel_size: Union[int, Tuple[int, int]], - stride: Union[int, Tuple[int, int]] = 1, - padding: Union[int, Tuple[int, int]] = 0, - dilation: Union[int, Tuple[int, int]] = 1, - dtype: ttnn.DataType = None, - *, - device: ttnn.Device, - batch_size: int, - input_height: int, - input_width: int, - reader_patterns_cache: Dict, - parallel_config_override: Dict = None, - deallocate_activation: bool = False, - channels: int = None, - ): - if isinstance(kernel_size, int): - window_h = kernel_size - window_w = kernel_size - else: - window_h, window_w = kernel_size - - if isinstance(stride, int): - stride_h = stride - stride_w = stride - else: - stride_h, stride_w = stride - - if isinstance(padding, int): - pad_h = padding - pad_w = padding - else: - pad_h, pad_w = padding - - if isinstance(dilation, int): - dilation_h = dilation - dilation_w = dilation - else: - dilation_h, dilation_w = dilation - assert dilation_h == 1, f"Only dilation_h = 1 supported. Found dilation_h={dilation_h}" - assert dilation_w == 1, f"Only dilation_w = 1 supported. Found dilation_w={dilation_w}" - - sliding_window_op_params = SlidingWindowOpParams( - stride_h=stride_h, - stride_w=stride_w, - pad_h=pad_h, - pad_w=pad_w, - window_h=window_h, - window_w=window_w, - batch_size=batch_size, - input_h=input_height, - input_w=input_width, - ) - self.max_pool = TTPyMaxPool( - sliding_window_op_params, - device, - reader_patterns_cache, - pad_val=0xF7FF, - parallel_config_override=parallel_config_override, - deallocate_activation=deallocate_activation, - act_dtype=dtype, - channels=channels, - pool_op=max_pool2d_legacy, - ) - - @ttnn.register_python_operation(name="ttnn.MaxPool2d.__call__", is_method=True) - def __call__(self, activation: ttnn.Tensor): - return self.max_pool(activation) - - @ttnn.register_python_operation(name="ttnn.MaxPool2d.copy_input_to_device", is_method=True) - def copy_input_to_device(self, input: ttnn.Tensor): - return self.max_pool.copy_input_to_device(input) - - @ttnn.register_python_operation( - name="ttnn.MaxPool2d.copy_output_from_device", - is_method=True, - ) - def copy_output_from_device(self, output: ttnn.Tensor): - return self.max_pool.copy_output_from_device(output) - - -## Average Pooling - - -def golden_global_avg_pool2d(input_tensor: ttnn.Tensor): - import torch - - output_size = (1, 1) - return torch.nn.functional.global_avg_pool2d(input_tensor, output_size) - - -def golden_maxpool2d( - _input_tensor: ttnn.Tensor, - in_n: int, - in_h: int, - in_w: int, - kernel_h: int, - kernel_w: int, - stride_h: int, - stride_w: int, - pad_h: int, - pad_w: int, - dilation_h: int, - dilation_w: int, - *, - memory_config: ttnn.MemoryConfig, - nblocks: int, - use_multicore: bool, -): - import torch - - kernel_size = (kernel_h, kernel_w) - stride = (stride_h, stride_w) - padding = (pad_h, pad_w) - dilation = (dilation_h, dilation_w) - - return torch.nn.functional.max_pool2d( - _input_tensor, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation - ) - - -global_avg_pool2d = ttnn.register_python_operation( - name="ttnn.global_avg_pool2d", golden_function=golden_global_avg_pool2d -)(ttnn._ttnn.operations.avgpool.global_avg_pool2d) -avg_pool2d = ttnn.register_python_operation(name="ttnn.avg_pool2d", golden_function=golden_global_avg_pool2d)( - ttnn._ttnn.operations.avgpool.avg_pool2d -) -max_pool2d = ttnn.register_python_operation(name="ttnn.max_pool2d", golden_function=golden_maxpool2d)( - ttnn._ttnn.operations.maxpool.max_pool2d -) -max_pool2d_legacy = ttnn.register_python_operation(name="ttnn.max_pool2d_legacy", golden_function=golden_maxpool2d)( - ttnn._ttnn.operations.maxpool.max_pool2d_legacy -) - -__all__ = [] From c4b8a0c84c6a056c62b6499aa26f779cdfa18f0e Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Sat, 13 Jul 2024 10:07:35 +0000 Subject: [PATCH 07/14] #9758: Removed array2_t --- .../pool/avgpool/avg_pool_pybind.hpp | 1 - .../maxpool/device/max_pool_multi_core.cpp | 26 +++++++++---------- .../maxpool/device/max_pool_single_core.cpp | 14 +++++----- .../ttnn/operations/pool/maxpool/max_pool.hpp | 3 +-- .../pool/maxpool/maxpool_pybind.hpp | 1 - 5 files changed, 21 insertions(+), 24 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool_pybind.hpp b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool_pybind.hpp index 703bd9423b0..a44d75cef0d 100644 --- a/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool_pybind.hpp +++ b/ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool_pybind.hpp @@ -12,7 +12,6 @@ #include "ttnn/types.hpp" namespace py = pybind11; -using array2_t = std::array; namespace ttnn { namespace operations { namespace avgpool { diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp index 9e0448f42d2..ef936d9e8cd 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp @@ -91,7 +91,7 @@ uint32_t get_num_cores(const Device* device, uint32_t out_nhw, uint32_t nbatch) break; default: // TT_ASSERT(false, "General case is not yet handled! Only RN50 shapes supported in multicore."); - uint32_t out_nhw_per_core = (uint32_t)ceil((float)out_nhw / avail_ncores); + uint32_t out_nhw_per_core = (uint32_t)std::ceil((float)out_nhw / avail_ncores); ncores = out_nhw / out_nhw_per_core; while (avail_ncores > 0) { if (out_nhw % avail_ncores == 0 && (out_nhw / avail_ncores) % TILE_HEIGHT == 0) { @@ -104,7 +104,7 @@ uint32_t get_num_cores(const Device* device, uint32_t out_nhw, uint32_t nbatch) break; } } else if (device->arch() == ARCH::WORMHOLE_B0) { - uint32_t out_nhw_per_core = (uint32_t)ceil((float)out_nhw / avail_ncores); + uint32_t out_nhw_per_core = (uint32_t)std::ceil((float)out_nhw / avail_ncores); ncores = out_nhw / out_nhw_per_core; while (avail_ncores > 0) { if (out_nhw % avail_ncores == 0 && (out_nhw / avail_ncores) % TILE_HEIGHT == 0) { @@ -215,14 +215,14 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic( uint32_t kernel_size_hw = kernel_size_w * kernel_size_h; // number of valid rows, to read uint32_t kernel_size_hw_padded = ceil_multiple_of(kernel_size_hw, constants::TILE_HEIGHT); - uint32_t in_ntiles_hw = (uint32_t)ceil((float)kernel_size_hw_padded / constants::TILE_HEIGHT); - uint32_t in_ntiles_c = (uint32_t)ceil((float)input_shape[3] / constants::TILE_WIDTH); - uint32_t out_ntiles_hw = (uint32_t)ceil((float)output_shape[2] / constants::TILE_HEIGHT); - uint32_t out_ntiles_c = (uint32_t)ceil((float)output_shape[3] / constants::TILE_WIDTH); + uint32_t in_ntiles_hw = (uint32_t)std::ceil((float)kernel_size_hw_padded / constants::TILE_HEIGHT); + uint32_t in_ntiles_c = (uint32_t)std::ceil((float)input_shape[3] / constants::TILE_WIDTH); + uint32_t out_ntiles_hw = (uint32_t)std::ceil((float)output_shape[2] / constants::TILE_HEIGHT); + uint32_t out_ntiles_c = (uint32_t)std::ceil((float)output_shape[3] / constants::TILE_WIDTH); uint32_t out_nelems = nblocks; // TODO [AS]: Remove hard coding after identifying optimal param val // Also ensure the calculated ncores is good - uint32_t out_w_loop_count = ceil((float)out_w / out_nelems); + uint32_t out_w_loop_count = std::ceil((float)out_w / out_nelems); uint32_t in_hw = in_h * in_w; uint32_t in_nhw = in_hw * nbatch; @@ -523,8 +523,8 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic( kernel_size_hw, out_h, out_w, - (uint32_t)ceil((float)output_shape[2] / constants::TILE_HEIGHT), - (uint32_t)ceil((float)output_shape[3] / constants::TILE_WIDTH), + (uint32_t)std::ceil((float)output_shape[2] / constants::TILE_HEIGHT), + (uint32_t)std::ceil((float)output_shape[3] / constants::TILE_WIDTH), out_nelems, out_w_loop_count, nbatch, @@ -724,9 +724,9 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo_v2_impl uint32_t kernel_size_hw = kernel_size_w * kernel_size_h; // number of valid rows, to read uint32_t kernel_size_hw_padded = ceil_multiple_of(kernel_size_hw, constants::TILE_HEIGHT); - uint32_t in_ntiles_hw = (uint32_t)ceil((float)kernel_size_hw_padded / constants::TILE_HEIGHT); - uint32_t in_ntiles_c = (uint32_t)ceil((float)input_shape[3] / constants::TILE_WIDTH); - uint32_t out_ntiles_c = (uint32_t)ceil((float)output_shape[3] / constants::TILE_WIDTH); + uint32_t in_ntiles_hw = (uint32_t)std::ceil((float)kernel_size_hw_padded / constants::TILE_HEIGHT); + uint32_t in_ntiles_c = (uint32_t)std::ceil((float)input_shape[3] / constants::TILE_WIDTH); + uint32_t out_ntiles_c = (uint32_t)std::ceil((float)output_shape[3] / constants::TILE_WIDTH); TT_ASSERT(nblocks == 1, "Multiple blocks not yet supported"); @@ -735,7 +735,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo_v2_impl TT_FATAL(input_shape[3] == 16); tile_w = constants::FACE_WIDTH; } - uint32_t out_w_loop_count = ceil((float)out_w / nblocks); + uint32_t out_w_loop_count = std::ceil((float)out_w / nblocks); // distributing out_hw across the grid auto grid_size = device->compute_with_storage_grid_size(); diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp index 337290ca2d3..fe3ec7757db 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp @@ -55,13 +55,13 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten uint32_t kernel_size_hw = kernel_size_w * kernel_size_h; // number of valid rows, to read uint32_t kernel_size_hw_padded = ceil_multiple_of(kernel_size_hw, constants::TILE_HEIGHT); - uint32_t in_ntiles_hw = (uint32_t) ceil((float) kernel_size_hw_padded / constants::TILE_HEIGHT); - uint32_t in_ntiles_c = (uint32_t) ceil((float) input_shape[3] / constants::TILE_WIDTH); - uint32_t out_ntiles_hw = (uint32_t) ceil((float) output_shape[2] / constants::TILE_HEIGHT); - uint32_t out_ntiles_c = (uint32_t) ceil((float) output_shape[3] / constants::TILE_WIDTH); + uint32_t in_ntiles_hw = (uint32_t) std::ceil((float) kernel_size_hw_padded / constants::TILE_HEIGHT); + uint32_t in_ntiles_c = (uint32_t) std::ceil((float) input_shape[3] / constants::TILE_WIDTH); + uint32_t out_ntiles_hw = (uint32_t) std::ceil((float) output_shape[2] / constants::TILE_HEIGHT); + uint32_t out_ntiles_c = (uint32_t) std::ceil((float) output_shape[3] / constants::TILE_WIDTH); uint32_t out_nelems = nblocks; // TODO [AS]: Remove hard coding after identifying optimal param val - uint32_t out_w_loop_count = ceil((float) out_w / out_nelems); + uint32_t out_w_loop_count = std::ceil((float) out_w / out_nelems); uint32_t in_hw = in_h * in_w; uint32_t out_hw = out_h * out_w; @@ -221,8 +221,8 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten kernel_size_hw_padded, out_h, out_w, - (uint32_t) ceil((float) output_shape[2] / constants::TILE_HEIGHT), - (uint32_t) ceil((float) output_shape[3] / constants::TILE_WIDTH), + (uint32_t) std::ceil((float) output_shape[2] / constants::TILE_HEIGHT), + (uint32_t) std::ceil((float) output_shape[3] / constants::TILE_WIDTH), out_nelems, out_w_loop_count, nbatch, diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp index 305af0c8106..3723acfe8d7 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp @@ -181,10 +181,9 @@ Tensor maxpool2d_new(const Tensor &input, namespace ttnn::operations { namespace maxpool { -using array2_t = std::array; // maxpool macro-op -inline Tensor maxpool2d(const Tensor& input_tensor, uint32_t batch_size, uint32_t input_h, uint32_t input_w, uint32_t channels, array2_t kernel_size, array2_t stride, array2_t padding, array2_t dilation, Device& device) { +inline Tensor maxpool2d(const Tensor& input_tensor, uint32_t batch_size, uint32_t input_h, uint32_t input_w, uint32_t channels, std::array kernel_size, std::array stride, std::array padding, std::array dilation, Device& device) { MemoryConfig memory_config = input_tensor.memory_config(); const auto shard_grid = memory_config.shard_spec.value().grid; const auto shard_scheme = memory_config.memory_layout; diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/maxpool_pybind.hpp b/ttnn/cpp/ttnn/operations/pool/maxpool/maxpool_pybind.hpp index 68daadcb98a..4338fcbb1e6 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/maxpool_pybind.hpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/maxpool_pybind.hpp @@ -13,7 +13,6 @@ #include "ttnn/types.hpp" namespace py = pybind11; -using array2_t = std::array; namespace ttnn { namespace operations { namespace maxpool { From 618b8a2fbf7bc298d88edabd324af2c20dbb34ff Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Sat, 13 Jul 2024 10:24:03 +0000 Subject: [PATCH 08/14] #9758: Single pool submodule --- models/demos/resnet/tt/metalResnetBlock50.py | 2 +- ttnn/cpp/pybind11/operations/__init__.hpp | 8 ++--- ttnn/ttnn/__init__.py | 3 +- ttnn/ttnn/operations/avg_pool2d.py | 31 ------------------- .../operations/{max_pool2d.py => pool.py} | 20 ++++++++++-- 5 files changed, 23 insertions(+), 41 deletions(-) delete mode 100644 ttnn/ttnn/operations/avg_pool2d.py rename ttnn/ttnn/operations/{max_pool2d.py => pool.py} (96%) diff --git a/models/demos/resnet/tt/metalResnetBlock50.py b/models/demos/resnet/tt/metalResnetBlock50.py index d775467e96c..c92543fc359 100644 --- a/models/demos/resnet/tt/metalResnetBlock50.py +++ b/models/demos/resnet/tt/metalResnetBlock50.py @@ -27,7 +27,7 @@ TTPyCompositeConv, SlidingWindowOpParamsWithParallelConfig, ) -from ttnn.operations.conv.tt_py_max_pool import TTPyMaxPool +from ttnn.operations.pool import TTPyMaxPool from models.utility_functions import ( _nearest_32, diff --git a/ttnn/cpp/pybind11/operations/__init__.hpp b/ttnn/cpp/pybind11/operations/__init__.hpp index c23d5f0d847..dc40707d640 100644 --- a/ttnn/cpp/pybind11/operations/__init__.hpp +++ b/ttnn/cpp/pybind11/operations/__init__.hpp @@ -91,11 +91,9 @@ void py_module(py::module& module) { auto m_conv2d = module.def_submodule("conv2d", "conv2d operation"); conv2d::py_module(m_conv2d); - auto m_maxpool = module.def_submodule("maxpool", "maxpool operation"); - maxpool::py_module(m_maxpool); - - auto m_avgpool = module.def_submodule("avgpool", "avgpool operation"); - avgpool::py_module(m_avgpool); + auto m_pool = module.def_submodule("pool", "pooling operations"); + maxpool::py_module(m_pool); + avgpool::py_module(m_pool); auto m_normalization = module.def_submodule("normalization", "normalization operations"); normalization::py_module(m_normalization); diff --git a/ttnn/ttnn/__init__.py b/ttnn/ttnn/__init__.py index 5cae3826bac..ec97f0792a0 100644 --- a/ttnn/ttnn/__init__.py +++ b/ttnn/ttnn/__init__.py @@ -322,5 +322,4 @@ def prelu(*args, **kwargs): # Alias for leaky_relu. TODO(#8544): implement PReL determine_expected_group_norm_sharded_config_and_grid_size, ) from ttnn.operations.conv2d import Conv2d, Conv2dConfig, get_conv_output_dim, get_conv_padded_input_shape_and_mem_config -from ttnn.operations.max_pool2d import TTPyMaxPool, max_pool2d, max_pool2d_legacy, MaxPool2d -from ttnn.operations.avg_pool2d import global_avg_pool2d, avg_pool2d +from ttnn.operations.pool import TTPyMaxPool, max_pool2d, max_pool2d_legacy, MaxPool2d, global_avg_pool2d, avg_pool2d diff --git a/ttnn/ttnn/operations/avg_pool2d.py b/ttnn/ttnn/operations/avg_pool2d.py deleted file mode 100644 index 36962bd659d..00000000000 --- a/ttnn/ttnn/operations/avg_pool2d.py +++ /dev/null @@ -1,31 +0,0 @@ -# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. - -# SPDX-License-Identifier: Apache-2.0 - -from typing import Tuple, Union, Dict - - -import sys -import ttnn - - -__all__ = [] - - -def golden_global_avg_pool2d(input_tensor: ttnn.Tensor): - import torch - - output_size = (1, 1) - return torch.nn.functional.global_avg_pool2d(input_tensor, output_size) - - -global_avg_pool2d = ttnn.register_python_operation( - name="ttnn.global_avg_pool2d", golden_function=golden_global_avg_pool2d -)(ttnn._ttnn.operations.avgpool.global_avg_pool2d) - -avg_pool2d = ttnn.register_python_operation(name="ttnn.avg_pool2d", golden_function=golden_global_avg_pool2d)( - ttnn._ttnn.operations.avgpool.avg_pool2d -) - - -__all__ = [] diff --git a/ttnn/ttnn/operations/max_pool2d.py b/ttnn/ttnn/operations/pool.py similarity index 96% rename from ttnn/ttnn/operations/max_pool2d.py rename to ttnn/ttnn/operations/pool.py index 7f94464813e..0b7a588a591 100644 --- a/ttnn/ttnn/operations/max_pool2d.py +++ b/ttnn/ttnn/operations/pool.py @@ -64,11 +64,11 @@ def golden_maxpool2d( max_pool2d = ttnn.register_python_operation(name="ttnn.max_pool2d", golden_function=golden_maxpool2d)( - ttnn._ttnn.operations.maxpool.max_pool2d + ttnn._ttnn.operations.pool.max_pool2d ) max_pool2d_legacy = ttnn.register_python_operation(name="ttnn.max_pool2d_legacy", golden_function=golden_maxpool2d)( - ttnn._ttnn.operations.maxpool.max_pool2d_legacy + ttnn._ttnn.operations.pool.max_pool2d_legacy ) @@ -416,3 +416,19 @@ def copy_output_from_device(self, output_d: ttnn.Tensor): interleaved_mem_config = ttnn.MemoryConfig(ttnn.TensorMemoryLayout.INTERLEAVED, ttnn.BufferType.DRAM) output_d = ttl.tensor.sharded_to_interleaved(output_d, interleaved_mem_config) return output_d.cpu() + + +def golden_global_avg_pool2d(input_tensor: ttnn.Tensor): + import torch + + output_size = (1, 1) + return torch.nn.functional.global_avg_pool2d(input_tensor, output_size) + + +global_avg_pool2d = ttnn.register_python_operation( + name="ttnn.global_avg_pool2d", golden_function=golden_global_avg_pool2d +)(ttnn._ttnn.operations.pool.global_avg_pool2d) + +avg_pool2d = ttnn.register_python_operation(name="ttnn.avg_pool2d", golden_function=golden_global_avg_pool2d)( + ttnn._ttnn.operations.pool.avg_pool2d +) From 36fee51d2ba6c0b777a4db6b0cd51660f850384a Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Sat, 13 Jul 2024 12:00:43 +0000 Subject: [PATCH 09/14] #9758: Fixed maxpool test --- .../misc/test_untilize_with_halo_and_max_pool_v2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py index dba1b71b5d4..fd2376f13d3 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_untilize_with_halo_and_max_pool_v2.py @@ -10,11 +10,11 @@ import torch -from ttnn.operations.max_pool2d import ( +from ttnn.operations.pool import ( TTPyMaxPool, SlidingWindowOpParamsWithParallelConfig, ) -from ttnn.operations.max_pool2d import max_pool2d_legacy as ttnn_max_pool2d_legacy +from ttnn.operations.pool import max_pool2d_legacy as ttnn_max_pool2d_legacy import tt_lib as ttl From 3752aaa152614420b2dbac77814d4eff8f83e8b7 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Sat, 20 Jul 2024 06:33:25 +0000 Subject: [PATCH 10/14] #9758: Fixed build file --- ttnn/CMakeLists.txt | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/ttnn/CMakeLists.txt b/ttnn/CMakeLists.txt index 978abd1f1fd..ae8cacab89a 100644 --- a/ttnn/CMakeLists.txt +++ b/ttnn/CMakeLists.txt @@ -58,14 +58,10 @@ set(TTNN_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/normalization/groupnorm/device/multi_core/groupnorm_op_multi_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/device/transformer_device_operation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/average_pool.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/max_pool_multi_core.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/max_pool_single_core.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/max_pool.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/avgpool/average_pool.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/avgpool/avg_pool.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/max_pool.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp ) ### Setup TTNN as a shared library with optional Python bindings From 5b4165f803d5d1b2aacb273725b9eac73abed5a3 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Fri, 19 Jul 2024 08:52:32 +0000 Subject: [PATCH 11/14] #9758: Rebase fix --- .../ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp | 2 +- .../operations/pool/maxpool/device/max_pool_single_core.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp index ef936d9e8cd..1ebf3dd6d2b 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp @@ -8,7 +8,7 @@ #include "detail/util.hpp" #include "tensor/host_buffer/functions.hpp" #include "tensor/tensor_utils.hpp" -#include "ttnn/operations/pool/max_pool.hpp" +#include "ttnn/operations/pool/maxpool/max_pool.hpp" #include "tt_dnn/op_library/reduce/reduce_op.hpp" // for reduce_op_utils #include "tt_dnn/op_library/sharding_utilities.hpp" #include "tt_dnn/op_library/sliding_window_op_infra/sliding_window.hpp" diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp index fe3ec7757db..4054102ed1f 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp @@ -8,7 +8,7 @@ #include "detail/util.hpp" #include "tensor/host_buffer/functions.hpp" #include "tensor/tensor_utils.hpp" -#include "ttnn/operations/pool/max_pool.hpp" +#include "ttnn/operations/pool/maxpool/max_pool.hpp" #include "tt_dnn/op_library/reduce/reduce_op.hpp" // for reduce_op_utils #include "tt_dnn/op_library/work_split.hpp" #include "tt_metal/host_api.hpp" From 5cfcb900a0c72f1f54255af9d89096cd58b3c685 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Fri, 19 Jul 2024 11:05:49 +0000 Subject: [PATCH 12/14] #9758: Fixed kernel paths --- .../pool/maxpool/device/max_pool_multi_core.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp index 1ebf3dd6d2b..702ea6a4a1d 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp @@ -492,10 +492,10 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic( if (input.memory_config().is_sharded()) { // sharded, without halo reader_kernel_fname = - std::string("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp"); + std::string("ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded.cpp"); } else { reader_kernel_fname = - std::string("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core.cpp"); + std::string("ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_multi_core.cpp"); } auto reader_kernel = CreateKernel(program, reader_kernel_fname, all_cores, reader_config); @@ -509,7 +509,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic( std::vector writer_ct_args = reader_ct_args; auto writer_config = WriterDataMovementConfig(writer_ct_args, writer_defines); std::string writer_kernel_fname( - "ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core.cpp"); + "ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/writer_max_pool_2d_multi_core.cpp"); auto writer_kernel = CreateKernel(program, writer_kernel_fname, all_cores, writer_config); /** @@ -542,7 +542,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_generic( .math_approx_mode = false, .compile_args = compute_ct_args, .defines = reduce_op_utils::get_defines(reduce_op, reduce_dim)}; - std::string compute_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool_multi_core.cpp"); + std::string compute_kernel_fname("ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/compute/max_pool_multi_core.cpp"); auto compute_kernel = CreateKernel(program, compute_kernel_fname, core_range, compute_config); if (out_nhw_per_core_cliff > 0) { @@ -942,7 +942,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo_v2_impl bf16_one_u32}; std::string reader_kernel_fname( - "ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp"); + "ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_multi_core_sharded_with_halo_v2.cpp"); auto reader0_config = DataMovementConfig{ .processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default, .compile_args = reader0_ct_args}; @@ -973,7 +973,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo_v2_impl .compile_args = writer_ct_args, .defines = writer_defines}; std::string - writer_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp"); auto + writer_kernel_fname("ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/writer_max_pool_2d_multi_core_v2.cpp"); auto writer_kernel = CreateKernel(program, writer_kernel_fname, all_cores, writer_config); */ @@ -1007,7 +1007,7 @@ operation::ProgramWithCallbacks max_pool_2d_multi_core_sharded_with_halo_v2_impl .math_approx_mode = false, .compile_args = compute_ct_args, .defines = reduce_op_utils::get_defines(reduce_op, reduce_dim)}; - std::string compute_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool_multi_core.cpp"); + std::string compute_kernel_fname("ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/compute/max_pool_multi_core.cpp"); auto compute_kernel = CreateKernel(program, compute_kernel_fname, core_range, compute_config); /* From bba1a103eef702d2a91b85d722c641139296b984 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Fri, 19 Jul 2024 12:51:56 +0000 Subject: [PATCH 13/14] #9758: Fixed kernel paths --- .../python_api_testing/unit_testing/misc/test_max_pool.py | 4 ++-- .../operations/pool/maxpool/device/max_pool_single_core.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_max_pool.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_max_pool.py index ef3c11fe5d0..661d0a4588a 100644 --- a/tests/tt_eager/python_api_testing/unit_testing/misc/test_max_pool.py +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_max_pool.py @@ -171,8 +171,8 @@ def test_run_max_pool( f"Skipping over Resnet specific config where parallelization does not fit on core grid {compute_grid_size}" ) - if (compute_grid_size.x * compute_grid_size.y) == ncores_on_n300: - pytest.skip(f"Skipping on N300 (8x7 core grid) due to bug https://github.com/tenstorrent/tt-metal/issues/5458") + # if (compute_grid_size.x * compute_grid_size.y) == ncores_on_n300: + # pytest.skip(f"Skipping on N300 (8x7 core grid) due to bug https://github.com/tenstorrent/tt-metal/issues/5458") torch.set_printoptions(precision=3, sci_mode=False, linewidth=500, threshold=10000, edgeitems=32) diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp index 4054102ed1f..d8fce28c364 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp @@ -151,7 +151,7 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten (in_cb_page_nelems_padded * out_nelems * 2) >> 5 // TODO: generalize num rows to fill in in_cb }; auto reader_config = ReaderDataMovementConfig(reader_ct_args); - std::string reader_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/reader_max_pool_2d_single_core.cpp"); + std::string reader_kernel_fname("ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/reader_max_pool_2d_single_core.cpp"); auto reader_kernel = CreateKernel(program, reader_kernel_fname, cores, @@ -200,7 +200,7 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten std::vector writer_ct_args = reader_ct_args; std::vector writer_rt_args = reader_rt_args; auto writer_config = WriterDataMovementConfig(writer_ct_args); - std::string writer_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/dataflow/writer_max_pool_2d_single_core.cpp"); + std::string writer_kernel_fname("ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/dataflow/writer_max_pool_2d_single_core.cpp"); auto writer_kernel = CreateKernel(program, writer_kernel_fname, cores, @@ -228,7 +228,7 @@ operation::ProgramWithCallbacks max_pool_2d_single_core(const Tensor &input, Ten nbatch, out_h}, // out_h_per_core .defines = reduce_op_utils::get_defines(reduce_op, reduce_dim)}; - std::string compute_kernel_fname("ttnn/cpp/ttnn/operations/pool/device/kernels/compute/max_pool.cpp"); + std::string compute_kernel_fname("ttnn/cpp/ttnn/operations/pool/maxpool/device/kernels/compute/max_pool.cpp"); auto compute_kernel = CreateKernel(program, compute_kernel_fname, cores, From 96beec7cd48bceaeb91c9166f08493974e95be44 Mon Sep 17 00:00:00 2001 From: Sankar Manoj Date: Sat, 20 Jul 2024 14:56:26 +0000 Subject: [PATCH 14/14] #9758: Fixed include --- ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp index 3723acfe8d7..4fa8c3752c4 100644 --- a/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp +++ b/ttnn/cpp/ttnn/operations/pool/maxpool/max_pool.hpp @@ -7,7 +7,7 @@ #include "ttnn/core.hpp" #include "ttnn/types.hpp" #include "tensor/tensor.hpp" -#include "ttnn/cpp/ttnn/operations/conv2d.hpp" +#include "ttnn/operations/conv2d/conv2d.hpp" #include "ttnn/experimental/tt_dnn/op_library/run_operation.hpp" #include "ttnn/experimental/tt_dnn/op_library/sliding_window_op_infra/sliding_window.hpp"