Skip to content

Commit

Permalink
#0: Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
sankarmanoj-tt committed Dec 24, 2024
1 parent 63456b2 commit 9592cf1
Show file tree
Hide file tree
Showing 11 changed files with 34 additions and 31 deletions.
1 change: 0 additions & 1 deletion tests/ttnn/unit_tests/operations/test_new_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,6 @@ def test_conv_ws(
debug = False
groups = 1

# torch.manual_seed()
conv_input_shape = [batch_size, input_channels, input_height, input_width]
conv_weight_shape = [output_channels, input_channels // groups, filter_height, filter_width]
conv_bias_shape = [1, 1, 1, output_channels]
Expand Down
3 changes: 2 additions & 1 deletion ttnn/cpp/ttnn/operations/conv/conv2d/conv2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ Result conv2d(
bool auto_shard = false;
if (!input_tensor.is_sharded() && !conv_config.shard_layout.has_value()) {
// In this case we deduce the shard layout.
conv_config = adjust_conv_op_config_for_auto_shard_if_necessary(
conv_config = determine_conv_config_for_auto_shard(
conv_config,
mm_conv,
batch_size,
Expand All @@ -88,6 +88,7 @@ Result conv2d(
kernel_size,
compute_grid_size,
compute_config,
input_tensor.layout(),
ttnn::is_tensor_on_device_or_multidevice(input_tensor) ?
std::make_optional(input_tensor.memory_config()):
std::nullopt
Expand Down
27 changes: 18 additions & 9 deletions ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "conv2d_utils.hpp"
#include "impl/buffers/buffer_constants.hpp"
#include "ttnn/operations/conv/conv2d/device/conv2d_op.hpp"
#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
#include "tt_metal/common/work_split.hpp"
#include "ttnn/operations/eltwise/unary/common/unary_op_utils.hpp"
Expand Down Expand Up @@ -873,7 +874,7 @@ std::tuple<uint32_t, uint32_t> get_padded_subblock_h_ntiles(
return {out_subblock_h_ntiles, act_block_h_ntiles};
}

Conv2dConfig adjust_conv_op_config_for_auto_shard_if_necessary(
Conv2dConfig determine_conv_config_for_auto_shard(
const Conv2dConfig& conv_config_,
bool is_mm_conv,
uint32_t batch_size,
Expand All @@ -888,6 +889,7 @@ Conv2dConfig adjust_conv_op_config_for_auto_shard_if_necessary(
std::array<uint32_t, 2> kernel_size,
const CoreCoord& compute_grid_size,
const DeviceComputeKernelConfig& compute_config,
Layout input_tensor_layout,
std::optional<const MemoryConfig> input_memory_config) {

Conv2dConfig conv_config = conv_config_;
Expand All @@ -914,7 +916,6 @@ Conv2dConfig adjust_conv_op_config_for_auto_shard_if_necessary(
kernel_size,
compute_grid_size);

auto input_tensor_layout = Layout::ROW_MAJOR;
if (conv_config.act_block_h_override == 0 && conv_config.shard_layout != TensorMemoryLayout::WIDTH_SHARDED) {
if (in_channels <= constants::TILE_WIDTH / 2 && conv_config.input_channels_alignment == constants::TILE_WIDTH &&
!is_mm_conv && conv_config.shard_layout == TensorMemoryLayout::HEIGHT_SHARDED && input_tensor_layout == Layout::ROW_MAJOR) {
Expand Down Expand Up @@ -1019,7 +1020,7 @@ std::tuple<OptimizedConvParallelizationConfig, OptimizedConvBlockConfig, MemoryC
}


std::pair<uint32_t,uint32_t> conv2d::estimate_L1_usage(
conv_op_l1_usage conv2d::estimate_L1_usage(
tt::ARCH arch, TensorMemoryLayout shard_layout,
const DataType input_dtype, const DataType weights_dtype, const DataType output_dtype,
const DeviceComputeKernelConfig& compute_kernel_config,
Expand Down Expand Up @@ -1132,7 +1133,10 @@ std::pair<uint32_t,uint32_t> conv2d::estimate_L1_usage(

tt::log_debug(tt::LogOp, "Total CB Size: {}", total_CB_size);

return {output_size_per_core_in_bytes, total_CB_size};
return conv2d::conv_op_l1_usage{
.tensor_allocation_size=output_size_per_core_in_bytes,
.CB_allocation_size=total_CB_size
};
} else if (shard_layout == TensorMemoryLayout::HEIGHT_SHARDED) {
uint32_t output_size = 0;
if(use_non_tile_height){
Expand Down Expand Up @@ -1237,8 +1241,11 @@ std::pair<uint32_t,uint32_t> conv2d::estimate_L1_usage(
temp_sum_cb_27_size = output_tile_size;
tt::log_debug(tt::LogOp, "CB27 Size: {}", temp_sum_cb_27_size);
}

return {output_size, act_cb_0_size + weights_cb_1_size + bias_cb_2_size + l1_scratchpad_cb_5_size + split_second_act_reader_cb_7_size + matmul_partials_cb_24_size + tilized_act_cb_25_size + temp_sum_cb_27_size};
uint32_t total_CB_size = act_cb_0_size + weights_cb_1_size + bias_cb_2_size + l1_scratchpad_cb_5_size + split_second_act_reader_cb_7_size + matmul_partials_cb_24_size + tilized_act_cb_25_size + temp_sum_cb_27_size;
return conv2d::conv_op_l1_usage{
.tensor_allocation_size=output_size,
.CB_allocation_size=total_CB_size
};
} else if(shard_layout == TensorMemoryLayout::BLOCK_SHARDED) {

auto output_shard_shape = output_memory_config.shard_spec.value().shape;
Expand Down Expand Up @@ -1329,11 +1336,13 @@ std::pair<uint32_t,uint32_t> conv2d::estimate_L1_usage(
cb28_size = output_block_ntiles * output_tile_size;
tt::log_debug(tt::LogOp, "CB28 Size: {}", cb28_size);
}

return{ output_size, act_cb_0_size + weights_cb_1_size + bias_cb_2_size + l1_scratchpad_cb_5_size + cb6_size + matmul_partials_cb_24_size + tilized_act_cb_25_size + cb28_size};
uint32_t total_CB_size = act_cb_0_size + weights_cb_1_size + bias_cb_2_size + l1_scratchpad_cb_5_size + cb6_size + matmul_partials_cb_24_size + tilized_act_cb_25_size + cb28_size;
return conv2d::conv_op_l1_usage{
.tensor_allocation_size=output_size,
.CB_allocation_size=total_CB_size
};
}
TT_THROW("Invalid shard layout {}", shard_layout);
return {0, 0};

}

Expand Down
5 changes: 2 additions & 3 deletions ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
#include <optional>

#include "ttnn/operations/matmul/device/matmul_op.hpp"
#include "ttnn/tensor/types.hpp"
#include "ttnn/types.hpp"
#include "ttnn/operations/conv/conv2d/device/conv2d_op.hpp"
#include "ttnn/tensor/tensor.hpp"
#include "ttnn/operations/sliding_window/sliding_window.hpp"
Expand Down Expand Up @@ -110,7 +108,7 @@ static std::tuple<ttnn::Shape, ttnn::MemoryConfig, bool, bool> get_conv_padded_i
bool is_non_tile_mul_width=false);


Conv2dConfig adjust_conv_op_config_for_auto_shard_if_necessary(
Conv2dConfig determine_conv_config_for_auto_shard(
const Conv2dConfig& conv_config_,
bool is_mm_conv,
uint32_t batch_size,
Expand All @@ -125,6 +123,7 @@ Conv2dConfig adjust_conv_op_config_for_auto_shard_if_necessary(
std::array<uint32_t, 2> kernel_size,
const CoreCoord& compute_grid_size,
const DeviceComputeKernelConfig& compute_config,
Layout input_tensor_layout,
std::optional<const MemoryConfig> input_memory_config);

template <typename T>
Expand Down
4 changes: 0 additions & 4 deletions ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
#include "conv2d_op.hpp"
#include "common/math.hpp"

#include "buffers/buffer_constants.hpp"
#include "common/math.hpp"
#include "tt_metal/host_api.hpp"
#include "tt_metal/detail/tt_metal.hpp"
#include "tt_metal/common/constants.hpp"
Expand All @@ -20,9 +18,7 @@
#include "ttnn/operations/experimental/auto_format/auto_format.hpp"

#include "ttnn/operations/sliding_window/sliding_window.hpp"
#include "ttnn/tensor/tensor.hpp"
#include "ttnn/tensor/tensor_utils.hpp"
#include "ttnn/tensor/types.hpp"
using namespace tt::constants;
namespace optimized_conv_op_utils {
using namespace tt;
Expand Down
8 changes: 6 additions & 2 deletions ttnn/cpp/ttnn/operations/conv/conv2d/device/conv2d_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

#pragma once

#include <cstdint>
#include "ttnn/operations/sliding_window/sliding_window.hpp"
#include "ttnn/tensor/tensor.hpp"
#include "ttnn/run_operation.hpp"
Expand Down Expand Up @@ -236,7 +235,12 @@ Tensor optimized_conv_new(const Tensor& a, const Tensor &b, std::optional<const
bool use_non_tile_height = false
);

std::pair<uint32_t,uint32_t> estimate_L1_usage(
struct conv_op_l1_usage{
uint32_t tensor_allocation_size;
uint32_t CB_allocation_size;
};

conv_op_l1_usage estimate_L1_usage(
tt::ARCH arch, TensorMemoryLayout shard_layout,
const DataType input_dtype, const DataType weights_dtype, const DataType output_dtype,
const DeviceComputeKernelConfig& compute_kernel_config,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
//
// SPDX-License-Identifier: Apache-2.0

#include "common/assert.hpp"
#include "common/logger.hpp"
#include "common/math.hpp"
#include "conv2d_op.hpp"
#include "ttnn/operations/conv/conv2d/device/conv2d_op.hpp"
#include "ttnn/operations/sliding_window/sliding_window.hpp"
#include "tt_metal/common/work_split.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
// SPDX-License-Identifier: Apache-2.0

#include <cstdint>
#include "common/logger.hpp"
#include "conv2d_op.hpp"
#include "ttnn/operations/conv/conv2d/device/conv2d_op.hpp"
#include "ttnn/operations/sliding_window/sliding_window.hpp"
#include "tt_metal/common/work_split.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ void kernel_main() {
const InterleavedAddrGenFast<true> s_weight = {
.bank_base_address = weight_addr_dram_base, .page_size = weight_tile_nbytes, .data_format = weight_df};
#ifdef FUSE_BIAS
cb_reserve_back(bias_cb_id, weight_block_width_ntiles);
const uint32_t bias_pagesize = get_tile_size(bias_cb_id);
const DataFormat bias_df = get_dataformat(bias_cb_id);
const InterleavedAddrGenFast<bias_in_dram> s_bias = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
//
// SPDX-License-Identifier: Apache-2.0

#include "prepare_conv2d_weights.hpp"
#include "conv2d_utils.hpp"
#include "ttnn/operations/conv/conv2d/prepare_conv2d_weights.hpp"
#include "ttnn/operations/conv/conv2d/conv2d_utils.hpp"
#include "ttnn/operations/core/compute_kernel/compute_kernel_config.hpp"
#include "ttnn/operations/sliding_window/sliding_window.hpp"
#include "tt_metal/common/work_split.hpp"
Expand Down Expand Up @@ -104,7 +104,7 @@ static OptimizedConvBlockConfig get_opt_block_config(
const MemoryConfig& input_memory_config) {
auto compute_grid_size = device->compute_with_storage_grid_size();

conv_config = adjust_conv_op_config_for_auto_shard_if_necessary(
conv_config = determine_conv_config_for_auto_shard(
conv_config,
mm_conv,
batch_size,
Expand All @@ -119,6 +119,7 @@ static OptimizedConvBlockConfig get_opt_block_config(
kernel_size,
device->compute_with_storage_grid_size(),
compute_config,
input_tensor_layout,
input_memory_config);

ShardOrientation shard_orientation =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ Result conv_transpose2d(
bool auto_shard = false;
if (!input_tensor.is_sharded() && !conv_config.shard_layout.has_value()) {
// In this case we deduce the shard layout.
conv_config = adjust_conv_op_config_for_auto_shard_if_necessary(
conv_config = determine_conv_config_for_auto_shard(
conv_config,
mm_conv,
batch_size,
Expand All @@ -188,6 +188,7 @@ Result conv_transpose2d(
kernel_size,
compute_grid_size,
compute_config,
input_tensor.layout(),
ttnn::is_tensor_on_device_or_multidevice(input_tensor)
? std::make_optional(input_tensor.memory_config())
: std::nullopt);
Expand Down

0 comments on commit 9592cf1

Please sign in to comment.