Skip to content

Commit

Permalink
#0: Conv2dConfig refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
sankarmanoj-tt committed Nov 19, 2024
1 parent f6281d2 commit 21135db
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 51 deletions.
11 changes: 7 additions & 4 deletions models/demos/segformer/tt/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,8 @@ def __call__(self, device, input_tensor):
conv_config = ttnn.Conv2dConfig(
dtype=self.dtype,
weights_dtype=ttnn.bfloat16,
math_fidelity=ttnn.MathFidelity.LoFi,
activation=self.activation,
shard_layout=self.shard_layout,
math_approx_mode_enabled=True,
fp32_dest_acc_enabled=False,
packer_l1_accum_enabled=False,
input_channels_alignment=16 if input_tensor.shape[3] < 16 else 32,
transpose_shards=False,
reshard_if_not_optimal=self.reshard,
Expand All @@ -54,6 +50,12 @@ def __call__(self, device, input_tensor):
enable_act_double_buffer=True,
enable_split_reader=False,
)
compute_config = ttnn.GetComputeKernelConfig(
math_fidelity=ttnn.MathFidelity.LoFi,
math_approx_mode=True,
fp32_dest_acc_en=False,
packer_l1_acc=False,
)
if self.act_block_h is not None:
conv_config.act_block_h_override = self.act_block_h

Expand All @@ -71,6 +73,7 @@ def __call__(self, device, input_tensor):
input_height=input_tensor.shape[1],
input_width=input_tensor.shape[2],
conv_config=conv_config,
compute_config=compute_config,
groups=self.groups,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,12 @@ def run_downsample_if_req(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=shard_layout,
deallocate_activation=True,
reallocate_halo_output=True,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)
ttnn.deallocate(x)
Expand Down Expand Up @@ -228,13 +228,13 @@ def __call__(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -291,7 +291,6 @@ def __call__(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
deallocate_activation=True,
reallocate_halo_output=reallocate_halo_output,
Expand All @@ -301,6 +300,7 @@ def __call__(
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)

Expand All @@ -322,12 +322,12 @@ def __call__(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -538,12 +538,12 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
deallocate_activation=True,
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down Expand Up @@ -844,12 +844,12 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
deallocate_activation=True,
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,14 @@ def run_downsample_if_req(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
deallocate_activation=True,
reallocate_halo_output=True,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)
ttnn.deallocate(x)
Expand Down Expand Up @@ -225,13 +225,13 @@ def __call__(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -286,7 +286,6 @@ def __call__(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
deallocate_activation=True,
reallocate_halo_output=reallocate_halo_output,
Expand All @@ -296,6 +295,7 @@ def __call__(
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)

Expand All @@ -317,12 +317,12 @@ def __call__(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -532,12 +532,12 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
deallocate_activation=True,
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down Expand Up @@ -835,12 +835,12 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
deallocate_activation=True,
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,14 +179,14 @@ def run_downsample_if_req(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
deallocate_activation=True,
reallocate_halo_output=True,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)
ttnn.deallocate(x)
Expand Down Expand Up @@ -226,13 +226,13 @@ def __call__(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -288,7 +288,6 @@ def __call__(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
deallocate_activation=True,
reallocate_halo_output=reallocate_halo_output,
Expand All @@ -298,6 +297,7 @@ def __call__(
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)

Expand All @@ -319,12 +319,12 @@ def __call__(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -534,12 +534,12 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
deallocate_activation=True,
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down Expand Up @@ -865,12 +865,12 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
deallocate_activation=True,
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ def run_downsample_if_req(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
Expand All @@ -188,6 +187,7 @@ def run_downsample_if_req(
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=height_sharding,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)
ttnn.deallocate(x)
Expand Down Expand Up @@ -232,14 +232,14 @@ def __call__(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=height_sharding,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -337,7 +337,6 @@ def __call__(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
deallocate_activation=True,
reallocate_halo_output=reallocate_halo_output,
Expand All @@ -348,6 +347,7 @@ def __call__(
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=height_sharding,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)

Expand All @@ -369,13 +369,13 @@ def __call__(
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
shard_layout=ttnn.TensorMemoryLayout.HEIGHT_SHARDED
if height_sharding
else ttnn.TensorMemoryLayout.BLOCK_SHARDED,
reshard_if_not_optimal=reshard_if_not_optimal,
transpose_shards=height_sharding,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)

Expand Down Expand Up @@ -597,13 +597,13 @@ def first_run(self, input_tensor, device, batch_size, ops_parallel_config) -> tt
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
deallocate_activation=True,
reallocate_halo_output=True,
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down Expand Up @@ -931,12 +931,12 @@ def optimized_run(self, input_tensor, device, batch_size, ops_parallel_config, c
conv_config=ttnn.Conv2dConfig(
dtype=self.model_config["ACTIVATIONS_DTYPE"],
weights_dtype=self.model_config["WEIGHTS_DTYPE"],
math_fidelity=self.model_config["MATH_FIDELITY"],
activation="relu",
deallocate_activation=True,
input_channels_alignment=16 if not is_wormhole_b0() else 32,
act_block_h_override=act_block_h_override,
),
compute_config=ttnn.GetComputeKernelConfig(math_fidelity=self.model_config["MATH_FIDELITY"]),
conv_op_cache=conv_op_cache,
)
# Relu is fused with conv1
Expand Down
Loading

0 comments on commit 21135db

Please sign in to comment.