Skip to content

Commit

Permalink
#9999: remove old maxpools (#12243)
Browse files Browse the repository at this point in the history
* #9999: remove old maxpools

* #9999: remove old unit tests and move nondivis set over to new

* #9999: minor

* #9999: removing more old maxpool usages

* #9999: removing more old maxpool usages

* #9999: Another left-over test set

* #9999: max_pool2d_new -> max_pool2d

* #9999: Update remaining large resnets

* #9999: remove suffix new
  • Loading branch information
mywoodstock authored Sep 9, 2024
1 parent fba4a2d commit 2b5d9e4
Show file tree
Hide file tree
Showing 40 changed files with 415 additions and 5,369 deletions.
2 changes: 1 addition & 1 deletion docs/source/ttnn/ttnn/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ Pooling
:maxdepth: 1

ttnn/global_avg_pool2d
ttnn/MaxPool2d
ttnn/max_pool2d

Vision
========
Expand Down
6 changes: 0 additions & 6 deletions docs/source/ttnn/ttnn/ttnn/MaxPool2d.rst

This file was deleted.

6 changes: 6 additions & 0 deletions docs/source/ttnn/ttnn/ttnn/max_pool2d.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.. _ttnn.max_pool2d:

ttnn.max_pool2d
###############

.. autofunction:: ttnn.max_pool2d
Original file line number Diff line number Diff line change
Expand Up @@ -395,25 +395,6 @@ def __init__(
self.conv1_output_channels = self.conv1_weight_tensor.shape[0]
assert self.conv1_weight_tensor.shape[2] == 4

self.max_pool_reader_patterns_cache = {}
max_pool_parallel_config_override = {}

self.max_pool = ttnn.MaxPool2d(
kernel_size=(3, 3),
stride=(2, 2),
padding=(1, 1),
dilation=(1, 1),
dtype=ttnn.bfloat16,
device=self.device,
batch_size=self.batch_size,
input_height=256,
input_width=256,
reader_patterns_cache=self.max_pool_reader_patterns_cache,
deallocate_activation=True,
parallel_config_override=max_pool_parallel_config_override,
channels=self.conv1_output_channels,
)

self.layer1 = self._make_layer(
parameters=parameters.layer1,
planes=64,
Expand Down Expand Up @@ -481,7 +462,6 @@ def __init__(
def __del__(self):
# Need to clear global configs for each Resnet run
self.conv_op_cache.clear()
self.max_pool_reader_patterns_cache.clear()

def _make_layer(
self,
Expand Down Expand Up @@ -570,15 +550,18 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
)
# Relu is fused with conv1

if self.batch_size == 20:
x = ttnn.reallocate(x)

if is_wormhole_b0() and self.batch_size == 20:
# TODO: fix the need to do the reshard here
x = ttnn.to_memory_config(x, ttnn.L1_MEMORY_CONFIG)
x = ttnn.to_layout(x, ttnn.ROW_MAJOR_LAYOUT)
x = ttnn.to_memory_config(x, self.max_pool.max_pool.input_sharded_memory_config)
x = self.max_pool(x)
x = ttnn.max_pool2d(
input_tensor=x,
batch_size=self.batch_size,
input_h=x_height,
input_w=x_width,
channels=self.conv1_output_channels,
kernel_size=[3, 3],
stride=[2, 2],
padding=[1, 1],
dilation=[1, 1],
device=device,
)

x_height = 128
x_width = 128
Expand Down Expand Up @@ -868,15 +851,18 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
)
# Relu is fused with conv1

if self.batch_size == 20:
x = ttnn.reallocate(x)

if is_wormhole_b0() and self.batch_size == 20:
# TODO: fix the need to do the reshard here
x = ttnn.to_memory_config(x, ttnn.L1_MEMORY_CONFIG)
x = ttnn.to_layout(x, ttnn.ROW_MAJOR_LAYOUT)
x = ttnn.to_memory_config(x, self.max_pool.max_pool.input_sharded_memory_config)
x = self.max_pool(x)
x = ttnn.max_pool2d(
input_tensor=x,
batch_size=self.batch_size,
input_h=x_height,
input_w=x_width,
channels=self.conv1_output_channels,
kernel_size=[3, 3],
stride=[2, 2],
padding=[1, 1],
dilation=[1, 1],
device=device,
)

x_height = 128
x_width = 128
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@
from loguru import logger
from tests.ttnn.utils_for_testing import assert_with_pcc

use_new_maxpool2d = True


hardcoded_matmul_config_linear = {
8: ttnn.MatmulMultiCoreReuseMultiCast1DProgramConfig(
compute_with_storage_grid_size=(8, 4),
Expand Down Expand Up @@ -222,26 +219,6 @@ def __call__(
logger.debug(
f"==== Running {batch_size}, {input_height}, {input_width}, {self.conv1_input_channels}, {self.conv1_output_channels}"
)
# if (
# is_wormhole_b0()
# and (batch_size == 20) ## or batch_size == 16)
# and input_height == 56
# and self.conv1_input_channels == 256
# and self.conv1_output_channels == 128
# ):
# # TODO: fix the need to do the reshard here
# ## reshard to 49 cores
# ## TensorMemoryLayout::HEIGHT_SHARDED;(grid={[(x=0;y=0) - (x=7;y=5)]; [(x=0;y=6) - (x=0;y=6)]}; shape={1280; 256}; orientation=ShardOrientation::ROW_MAJOR; halo=false
# mem_config = ttnn.create_sharded_memory_config_(
# ttnn.Shape([batch_size * input_height * input_width, 256]),
# (ttnn.CoreGrid(x=8, y=6), ttnn.CoreGrid(x=1, y=7)),
# ttnn.TensorMemoryLayout.HEIGHT_SHARDED,
# ttnn.ShardOrientation.ROW_MAJOR,
# tile_layout=True,
# )
# x_resharded = ttnn.to_memory_config(x, mem_config)
# ttnn.deallocate(x)
# x = ttnn.reallocate(x_resharded)

# conv1 is 1x1 conv
logger.debug(f"Running conv1")
Expand Down Expand Up @@ -504,24 +481,6 @@ def __init__(
self.max_pool_reader_patterns_cache = {}
max_pool_parallel_config_override = {}

if not use_new_maxpool2d:
self.max_pool = ttnn.MaxPool2d(
kernel_size=(3, 3),
stride=(2, 2),
padding=(1, 1),
dilation=(1, 1),
dtype=ttnn.bfloat16,
device=self.device,
batch_size=self.batch_size,
input_height=112,
input_width=112,
reader_patterns_cache=self.max_pool_reader_patterns_cache,
deallocate_activation=True,
parallel_config_override=max_pool_parallel_config_override,
channels=self.conv1_output_channels,
mesh_mapper=self.mesh_mapper,
)

self.layer1 = self._make_layer(
parameters=parameters.layer1,
planes=64,
Expand Down Expand Up @@ -771,21 +730,18 @@ def run(self, input_tensor, device, ops_parallel_config, conv_op_cache={}) -> tt
if self.batch_size == 20:
x = ttnn.reallocate(x)

if use_new_maxpool2d:
x = ttnn.max_pool2d_new(
input_tensor=x,
batch_size=self.batch_size,
input_h=x_height,
input_w=x_width,
channels=self.conv1_output_channels,
kernel_size=[3, 3],
stride=[2, 2],
padding=[1, 1],
dilation=[1, 1],
device=device,
)
else:
x = self.max_pool(x)
x = ttnn.max_pool2d(
input_tensor=x,
batch_size=self.batch_size,
input_h=x_height,
input_w=x_width,
channels=self.conv1_output_channels,
kernel_size=[3, 3],
stride=[2, 2],
padding=[1, 1],
dilation=[1, 1],
device=device,
)

x_height = 56
x_width = 56
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -390,25 +390,6 @@ def __init__(
self.conv1_output_channels = self.conv1_weight_tensor.shape[0]
assert self.conv1_weight_tensor.shape[2] == 4

self.max_pool_reader_patterns_cache = {}
max_pool_parallel_config_override = {}

self.max_pool = ttnn.MaxPool2d(
kernel_size=(3, 3),
stride=(2, 2),
padding=(1, 1),
dilation=(1, 1),
dtype=ttnn.bfloat16,
device=self.device,
batch_size=self.batch_size,
input_height=448,
input_width=448,
reader_patterns_cache=self.max_pool_reader_patterns_cache,
deallocate_activation=True,
parallel_config_override=max_pool_parallel_config_override,
channels=self.conv1_output_channels,
)

self.layer1 = self._make_layer(
parameters=parameters.layer1,
planes=64,
Expand Down Expand Up @@ -476,7 +457,6 @@ def __init__(
def __del__(self):
# Need to clear global configs for each Resnet run
self.conv_op_cache.clear()
self.max_pool_reader_patterns_cache.clear()

def _make_layer(
self,
Expand Down Expand Up @@ -567,12 +547,18 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
if self.batch_size == 20 or self.batch_size == 1:
x = ttnn.reallocate(x)

if is_wormhole_b0() and self.batch_size == 20:
# TODO: fix the need to do the reshard here
x = ttnn.to_memory_config(x, ttnn.L1_MEMORY_CONFIG)
x = ttnn.to_layout(x, ttnn.ROW_MAJOR_LAYOUT)
x = ttnn.to_memory_config(x, self.max_pool.max_pool.input_sharded_memory_config)
x = self.max_pool(x)
x = ttnn.max_pool2d(
input_tensor=x,
batch_size=self.batch_size,
input_h=x_height,
input_w=x_width,
channels=self.conv1_output_channels,
kernel_size=[3, 3],
stride=[2, 2],
padding=[1, 1],
dilation=[1, 1],
device=device,
)

x_height = 224
x_width = 224
Expand Down Expand Up @@ -859,12 +845,18 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
if self.batch_size == 20 or self.batch_size == 1:
x = ttnn.reallocate(x)

if is_wormhole_b0() and self.batch_size == 20:
# TODO: fix the need to do the reshard here
x = ttnn.to_memory_config(x, ttnn.L1_MEMORY_CONFIG)
x = ttnn.to_layout(x, ttnn.ROW_MAJOR_LAYOUT)
x = ttnn.to_memory_config(x, self.max_pool.max_pool.input_sharded_memory_config)
x = self.max_pool(x)
x = ttnn.max_pool2d(
input_tensor=x,
batch_size=self.batch_size,
input_h=x_height,
input_w=x_width,
channels=self.conv1_output_channels,
kernel_size=[3, 3],
stride=[2, 2],
padding=[1, 1],
dilation=[1, 1],
device=device,
)

x_height = 224
x_width = 224
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,25 +392,6 @@ def __init__(
self.conv1_output_channels = self.conv1_weight_tensor.shape[0]
assert self.conv1_weight_tensor.shape[2] == 4

self.max_pool_reader_patterns_cache = {}
max_pool_parallel_config_override = {}

self.max_pool = ttnn.MaxPool2d(
kernel_size=(3, 3),
stride=(2, 2),
padding=(1, 1),
dilation=(1, 1),
dtype=ttnn.bfloat16,
device=self.device,
batch_size=self.batch_size,
input_height=448,
input_width=448,
reader_patterns_cache=self.max_pool_reader_patterns_cache,
deallocate_activation=True,
parallel_config_override=max_pool_parallel_config_override,
channels=self.conv1_output_channels,
)

self.layer1 = self._make_layer(
parameters=parameters.layer1,
planes=64,
Expand Down Expand Up @@ -478,7 +459,6 @@ def __init__(
def __del__(self):
# Need to clear global configs for each Resnet run
self.conv_op_cache.clear()
self.max_pool_reader_patterns_cache.clear()

def _make_layer(
self,
Expand Down Expand Up @@ -569,12 +549,18 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
if self.batch_size == 20 or self.batch_size == 1:
x = ttnn.reallocate(x)

if is_wormhole_b0() and self.batch_size == 20:
# TODO: fix the need to do the reshard here
x = ttnn.to_memory_config(x, ttnn.L1_MEMORY_CONFIG)
x = ttnn.to_layout(x, ttnn.ROW_MAJOR_LAYOUT)
x = ttnn.to_memory_config(x, self.max_pool.max_pool.input_sharded_memory_config)
x = self.max_pool(x)
x = ttnn.max_pool2d(
input_tensor=x,
batch_size=self.batch_size,
input_h=x_height,
input_w=x_width,
channels=self.conv1_output_channels,
kernel_size=[3, 3],
stride=[2, 2],
padding=[1, 1],
dilation=[1, 1],
device=device,
)

x_height = 224
x_width = 224
Expand Down Expand Up @@ -888,12 +874,18 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
if self.batch_size == 20 or self.batch_size == 1:
x = ttnn.reallocate(x)

if is_wormhole_b0() and self.batch_size == 20:
# TODO: fix the need to do the reshard here
x = ttnn.to_memory_config(x, ttnn.L1_MEMORY_CONFIG)
x = ttnn.to_layout(x, ttnn.ROW_MAJOR_LAYOUT)
x = ttnn.to_memory_config(x, self.max_pool.max_pool.input_sharded_memory_config)
x = self.max_pool(x)
x = ttnn.max_pool2d(
input_tensor=x,
batch_size=self.batch_size,
input_h=x_height,
input_w=x_width,
channels=self.conv1_output_channels,
kernel_size=[3, 3],
stride=[2, 2],
padding=[1, 1],
dilation=[1, 1],
device=device,
)

x_height = 224
x_width = 224
Expand Down
Loading

0 comments on commit 2b5d9e4

Please sign in to comment.