Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move pool to TTNN #9855

Merged
merged 14 commits into from
Jul 20, 2024
2 changes: 1 addition & 1 deletion models/demos/resnet/tt/metalResnetBlock50.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
TTPyCompositeConv,
SlidingWindowOpParamsWithParallelConfig,
)
from ttnn.operations.conv.tt_py_max_pool import TTPyMaxPool
from ttnn.operations.pool import TTPyMaxPool

from models.utility_functions import (
_nearest_32,
Expand Down
5 changes: 3 additions & 2 deletions tests/tt_eager/ops/test_average_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
//
// SPDX-License-Identifier: Apache-2.0

#include "ttnn/experimental/tt_dnn/op_library/pool/average_pool.hpp"
#include "ttnn/cpp/ttnn/operations/pool/avgpool/avg_pool.hpp"
#include "ttnn/experimental/tt_dnn/op_library/auto_format.hpp"
#include "tt_dnn/op_library/auto_format.hpp"
#include "tt_numpy/functions.hpp"

#include "tensor/tensor.hpp"
Expand All @@ -24,7 +25,7 @@ Tensor run_avg_pool_2d_resnet(Shape& tensor_shape, Device* device) {
if (!AutoFormat::check_input_tensor_format(input_tensor, padded_input_shape)) {
padded_input_tensor = AutoFormat::format_input_tensor(input_tensor, device, padded_input_shape, 0, Layout::TILE); // pad with 0s
}
auto device_output = average_pool_2d(padded_input_tensor);
auto device_output = avg_pool2d(padded_input_tensor);
return device_output.cpu();
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from tt_lib.utils import _nearest_32
from models.utility_functions import comp_pcc
import ttnn

TILE_HEIGHT = TILE_WIDTH = 32

Expand Down Expand Up @@ -63,7 +64,7 @@ def test_run_average_pool(act_shape, dtype, device, use_program_cache, enable_as
ttact_res = ttact.to(device)

def run_ops(ttact_res):
return ttl.tensor.average_pool_2d(ttact_res)
return ttnn.avg_pool2d(ttact_res)

# Compile
run_ops(ttact_res)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from tt_lib.utils import _nearest_32
from models.utility_functions import comp_pcc

import ttnn

TILE_HEIGHT = TILE_WIDTH = 32


Expand Down Expand Up @@ -43,7 +45,7 @@ def test_run_average_pool(act_shape, dtype, device):
ttact = ttact.pad_to_tile(0.0)
ttact = ttact.to(device)

out = ttl.tensor.average_pool_2d(ttact)
out = ttnn.avg_pool2d(ttact)

out = out.cpu().to(ttl.tensor.Layout.ROW_MAJOR)
out_shape = [batch_size, 1, 1, channels]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from functools import reduce
import operator
import ttnn


def volume(shape):
Expand Down Expand Up @@ -170,8 +171,8 @@ def test_run_max_pool(
f"Skipping over Resnet specific config where parallelization does not fit on core grid {compute_grid_size}"
)

if (compute_grid_size.x * compute_grid_size.y) == ncores_on_n300:
pytest.skip(f"Skipping on N300 (8x7 core grid) due to bug https://github.com/tenstorrent/tt-metal/issues/5458")
# if (compute_grid_size.x * compute_grid_size.y) == ncores_on_n300:
# pytest.skip(f"Skipping on N300 (8x7 core grid) due to bug https://github.com/tenstorrent/tt-metal/issues/5458")

torch.set_printoptions(precision=3, sci_mode=False, linewidth=500, threshold=10000, edgeitems=32)

Expand Down Expand Up @@ -236,7 +237,7 @@ def test_run_max_pool(
else:
ttact = ttact.to(device, in_mem_config)

out_padded = ttl.tensor.max_pool2d(
out_padded = ttnn.max_pool2d(
ttact,
in_n,
in_h,
Expand All @@ -249,9 +250,9 @@ def test_run_max_pool(
pad_w,
dilation_h,
dilation_w,
out_mem_config,
nblocks,
use_multicore,
memory_config=out_mem_config,
nblocks=nblocks,
use_multicore=use_multicore,
)
if out_mem_config.is_sharded():
out_padded = ttl.tensor.sharded_to_interleaved(out_padded, interleaved_mem_config)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from tt_lib.utils import _nearest_32
from tests.tt_eager.python_api_testing.sweep_tests.comparison_funcs import comp_pcc
from models.utility_functions import is_wormhole_b0
import ttnn


def volume(shape):
Expand Down Expand Up @@ -186,7 +187,7 @@ def test_run_max_pool(
# ttl.device.DumpDeviceMemoryState(device)
ttact_sharded.deallocate()

out_padded = ttl.tensor.max_pool2d(
out_padded = ttnn.max_pool2d(
out_untilize,
in_n,
in_h,
Expand All @@ -199,9 +200,9 @@ def test_run_max_pool(
pad_w,
dilation_h,
dilation_w,
out_mem_config,
nblocks,
True,
memory_config=out_mem_config,
nblocks=nblocks,
use_multicore=True,
)
out_padded = ttl.tensor.sharded_to_interleaved(out_padded, interleaved_mem_config)
out_padded = out_padded.cpu().to(ttl.tensor.Layout.ROW_MAJOR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@

import torch

from ttnn.operations.conv.tt_py_max_pool import (

from ttnn.operations.pool import (
TTPyMaxPool,
SlidingWindowOpParamsWithParallelConfig,
)
from ttnn.operations.pool import max_pool2d_legacy as ttnn_max_pool2d_legacy


import tt_lib as ttl
from tt_lib.utils import _nearest_32
Expand Down Expand Up @@ -170,7 +173,12 @@ def test_run_max_pool(
assert kernel_w == kernel_h and stride_w == stride_h and pad_w == pad_h and dilation_w == dilation_h

max_pool_reader_patterns_cache = {}
max_pool = TTPyMaxPool(sliding_window_op_params, device, max_pool_reader_patterns_cache, pad_val=pad_val)
max_pool = TTPyMaxPool(
sliding_window_op_params,
device,
max_pool_reader_patterns_cache,
pad_val=pad_val,
)
ttact_sharded = max_pool.copy_input_to_device(ttact)

out_padded = max_pool(ttact_sharded)
Expand Down
4 changes: 4 additions & 0 deletions ttnn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ set(TTNN_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/normalization/groupnorm/device/multi_core/groupnorm_op_multi_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/transformer/device/transformer_device_operation.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/eltwise/binary/device/binary_composite_op.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/avgpool/avg_pool.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_multi_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_single_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/pool/maxpool/device/max_pool_program_factory.cpp
)

### Setup TTNN as a shared library with optional Python bindings
Expand Down
14 changes: 6 additions & 8 deletions ttnn/cpp/pybind11/operations/__init__.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
#include "pybind11/operations/core.hpp"
#include "pybind11/operations/creation.hpp"
#include "pybind11/operations/kv_cache.hpp"
#include "pybind11/operations/maxpool2d.hpp"
#include "pybind11/operations/pool.hpp"
#include "pybind11/operations/ternary.hpp"

#include "ttnn/operations/pool/avgpool/avg_pool_pybind.hpp"
#include "ttnn/operations/pool/maxpool/maxpool_pybind.hpp"
#include "ttnn/operations/eltwise/binary/binary_pybind.hpp"
#include "ttnn/operations/eltwise/binary_backward/binary_backward_pybind.hpp"
#include "ttnn/operations/conv2d/conv2d_pybind.hpp"
Expand All @@ -34,7 +35,6 @@
#include "ttnn/operations/eltwise/complex_binary_backward/complex_binary_backward_pybind.hpp"
#include "ttnn/operations/experimental/experimental_pybind.hpp"


namespace py = pybind11;

namespace ttnn {
Expand Down Expand Up @@ -91,8 +91,9 @@ void py_module(py::module& module) {
auto m_conv2d = module.def_submodule("conv2d", "conv2d operation");
conv2d::py_module(m_conv2d);

auto m_maxpool2d = module.def_submodule("maxpool2d", "maxpool 2d operation");
maxpool2d::py_module(m_maxpool2d);
auto m_pool = module.def_submodule("pool", "pooling operations");
maxpool::py_module(m_pool);
avgpool::py_module(m_pool);

auto m_normalization = module.def_submodule("normalization", "normalization operations");
normalization::py_module(m_normalization);
Expand All @@ -106,9 +107,6 @@ void py_module(py::module& module) {
auto m_kv_cache = module.def_submodule("kv_cache", "KV cache operations");
kv_cache::py_module(m_kv_cache);

auto m_pool = module.def_submodule("pool", "pool operations");
pool::py_module(m_pool);

auto m_copy = module.def_submodule("copy", "copy operations");
copy::py_module(m_copy);

Expand Down
47 changes: 0 additions & 47 deletions ttnn/cpp/pybind11/operations/maxpool2d.hpp

This file was deleted.

4 changes: 0 additions & 4 deletions ttnn/cpp/ttnn/experimental/tt_dnn/op_library/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ set(TT_DNN_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/non_zero_indices/non_zero_indices_op.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fill_rm/fill_rm_op.cpp
${CMAKE_CURRENT_SOURCE_DIR}/fully_connected/fully_connected_op.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pool/average_pool.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pool/max_pool.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pool/max_pool_single_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pool/max_pool_multi_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/transpose/transpose_op.cpp
${CMAKE_CURRENT_SOURCE_DIR}/transpose/wh_multi_core/transpose_wh_op_multi_core.cpp
${CMAKE_CURRENT_SOURCE_DIR}/transpose/hc_multi_core/transpose_hc_op_multi_core.cpp
Expand Down

This file was deleted.

Loading
Loading