Skip to content

Commit

Permalink
#5752: changes after rebase.
Browse files Browse the repository at this point in the history
  • Loading branch information
shwetankTT committed Sep 9, 2024
1 parent 7394dd2 commit fde00d0
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ Tensor HaloTensorCreation(const Tensor &input){
ttnn::Tensor input_tensor = input; // tensor to return
SlidingWindowConfig sliding_window_config = SlidingWindowConfig(
batch_size,
input_height, input_width,
2, 2, //kernel size
1, 1, // stride
0, 0, //padding
1, 1,
{input_height, input_width},
{2, 2}, //kernel size
{1, 1}, // stride
{0, 0}, //padding
{1, 1}, //dilation
num_cores_nhw,
input_tensor.memory_config().shard_spec.value().grid,
true, true);
Expand Down
52 changes: 22 additions & 30 deletions ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,16 @@ bool SlidingWindowConfig::has_parallel_config() const {
* Calculate the window op output shape, excludes the channel dimension since this config is independent of the depth.
*/
Shape SlidingWindowConfig::get_output_shape() const {
uint32_t output_h = (input_hw_.first + 2 * pad_hw_.first - dilation_hw_.first * window_hw_.first) / stride_hw_.first + 1;
uint32_t output_w = (input_hw_.second + 2 * pad_hw_.second - dilation_hw_.second * window_hw_.second) / stride_hw_.second + 1;
if(is_bilinear_){
uint32_t output_h = (input_hw.first + 2 * pad_hw.first - dilation_hw.first * window_hw.first) / stride_hw.first + 1;
uint32_t output_w = (input_hw.second + 2 * pad_hw.second - dilation_hw.second * window_hw.second) / stride_hw.second + 1;
if(is_bilinear){
//std::cout << "is bilinear sliding windows cpp" << std::endl;
output_h = input_hw_.first;
output_w = input_hw_.second; //for bilinear input and output should be same.. and kernel size is 2x2
output_h = input_hw.first;
output_w = input_hw.second; //for bilinear input and output should be same.. and kernel size is 2x2
// we need neighboring width in the output tensor
}
// uint32_t output_h = (std::get<0>(input_hw_) + 2 * std::get<0>(pad_hw_) - std::get<0>(dilation_hw_) * std::get<0>(window_hw_)) / std::get<0>(stride_hw_) + 1;
// uint32_t output_w = (std::get<1>(input_hw_) + 2 * std::get<1>(pad_hw_) - std::get<1>(dilation_hw_) * std::get<1>(window_hw_)) / std::get<1>(stride_hw_) + 1;
log_debug(tt::LogOp, "output_size: {} {} {}", batch_size_, output_h, output_w);
return Shape( std::vector<uint32_t>{batch_size_, output_h, output_w, 0});
log_debug(tt::LogOp, "output_size: {} {} {}", batch_size, output_h, output_w);
return Shape( std::vector<uint32_t>{batch_size, output_h, output_w, 0});
}

/**
Expand All @@ -52,12 +50,12 @@ uint32_t SlidingWindowConfig::get_output_shard_y(bool snap_to_tile) const {


std::vector<bool> generate_pad_metadata(const SlidingWindowConfig& config) {
uint32_t padded_input_h = config.input_hw_.first + 2 * config.pad_hw_.first;
uint32_t padded_input_w = config.input_hw_.second + 2 * config.pad_hw_.second;
uint32_t extra_padding_layer = config.is_bilinear_ ? config.input_hw_.second*2 : 0; //need to test this
std::vector<bool> pad_metadata(config.batch_size_ * padded_input_h * padded_input_w + extra_padding_layer, false);
//std::cout << "padded_input_h " << padded_input_h << " padded_input_w " << padded_input_w << std::endl;
for (uint32_t b = 0; b < config.batch_size_; ++b) {
uint32_t padded_input_h = config.input_hw.first + 2 * config.pad_hw.first;
uint32_t padded_input_w = config.input_hw.second + 2 * config.pad_hw.second;
uint32_t extra_padding_layer = config.is_bilinear ? config.input_hw.second*2 : 0; //extra padding in case of bilinear
std::vector<bool> pad_metadata(config.batch_size * padded_input_h * padded_input_w + extra_padding_layer, false);

for (uint32_t b = 0; b < config.batch_size; ++b) {
for (uint32_t h = 0; h < padded_input_h; ++h) {
for (uint32_t w = 0; w < padded_input_w; ++w) {
if (h < config.pad_hw.first || h >= config.pad_hw.first + config.input_hw.first ||
Expand All @@ -69,7 +67,7 @@ std::vector<bool> generate_pad_metadata(const SlidingWindowConfig& config) {
}

for (int i=0; i<extra_padding_layer; i++) {
pad_metadata[config.batch_size_ * padded_input_h * padded_input_w + i] = true;
pad_metadata[config.batch_size * padded_input_h * padded_input_w + i] = true;
}
return pad_metadata;
}
Expand All @@ -84,9 +82,8 @@ std::vector<uint32_t> generate_op_trace_metadata(const SlidingWindowConfig& conf
for (uint32_t b = 0; b < output_shape[0]; ++b) {
for (uint32_t h = 0; h < output_shape[1]; ++h) {
for (uint32_t w = 0; w < output_shape[2]; ++w) {
uint32_t input_index = b * padded_input_h * padded_input_w + h * config.stride_hw_.first * padded_input_w + w * config.stride_hw_.second;
op_trace_metadata[i++] = input_index < config.input_hw_.second ? 0 : input_index - config.input_hw_.second ;
//std::cout << "op_trace_metadata " << i << " " << input_index << std::endl;
uint32_t input_index = b * padded_input_h * padded_input_w + h * config.stride_hw.first * padded_input_w + w * config.stride_hw.second;
op_trace_metadata[i++] = config.is_bilinear ? (input_index < config.input_hw.second ? 0 : input_index - config.input_hw.second) : input_index ;
}
}
}
Expand All @@ -99,13 +96,16 @@ std::vector<std::pair<uint32_pair_t, uint32_pair_t>> generate_shard_boundaries(c
uint32_t output_shard_h = config.get_output_shard_y(config.snap_to_tile);
uint32_t padded_input_w = config.input_hw.second + 2 * config.pad_hw.second;
uint32_t max_index = op_trace_metadata.size();
uint32_t halo_with_pad_len = (config.window_hw_.first - 1) * padded_input_w + config.window_hw_.second - 1;
if(config.is_bilinear_){
uint32_t halo_with_pad_len = (config.window_hw.first - 1) * padded_input_w + config.window_hw.second - 1;
if(config.is_bilinear){
halo_with_pad_len += config.input_hw.second;
}
uint32_t output_index_start = 0;
for (uint32_t core = 0; core < num_cores; ++ core) {
uint32_t output_index_end = std::min(output_index_start + output_shard_h, max_index) - 1;
uint32_t input_index_start = op_trace_metadata[output_index_start];
if (config.is_bilinear_ == false and input_index_start == 0 and output_index_start != 0) {
uint32_t input_index_end = op_trace_metadata[output_index_end] + halo_with_pad_len;
if (config.is_bilinear == false and input_index_start == 0 and output_index_start != 0) {
input_index_start = op_trace_metadata[output_index_end] + 1;
input_index_end = input_index_start - 1;
log_debug(tt::LogOp, "core: {}, output_index_start: {}, output_index_end: {}, input_index_start: {}, input_index_end: {}", core, output_index_start, output_index_end, input_index_start, input_index_end);
Expand Down Expand Up @@ -190,14 +190,6 @@ std::tuple<std::vector<std::vector<uint16_t>>, std::vector<std::vector<uint16_t>
uint32_t local_idx = global_idx - input_start;
auto [is_pad_stick, src_idx] = tensor_metadata[global_idx];
auto [src_core_id, src_local_idx] = src_idx;

if(local_idx >= pad_local || src_local_idx >= pad_local) {
//std::cout << "shard_boundaries " << output_boundary.first << " " << output_boundary.second << " " << input_boundary.first << " " << input_boundary.second << std::endl;
//std::cout << local_idx << " " << pad_local << " " << src_local_idx << " " << pad_local << std::endl;
//std::cout << is_pad_stick << " " << src_idx <<" " << src_core_id<< " " << src_local_idx << std::endl;
log_error(tt::LogOp, "Index overflow");
}

TT_ASSERT(local_idx < pad_local && src_local_idx < pad_local, "Index overflow");
if (is_pad_stick) {
TT_ASSERT(src_local_idx == 0);
Expand Down
17 changes: 1 addition & 16 deletions ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,24 +46,9 @@ struct SlidingWindowConfig {
// parallel configuration
uint32_t num_cores_nhw = 1; // num cores along collapsed height nhw
CoreRangeSet core_range_set = std::set{CoreRange({0, 0}, {0, 0})}; // active cores
bool has_parallel_config_;
// uint32_t num_cores_nhw_; // num cores along collapsed height nhw
// CoreRangeSet core_range_set_; // active cores

bool snap_to_tile_;
bool is_bilinear_;

SlidingWindowConfig(uint32_t batch_size, uint32_t input_h, uint32_t input_w, uint32_t window_h, uint32_t window_w, uint32_t stride_h, uint32_t stride_w, uint32_t pad_h, uint32_t pad_w, uint32_t dilation_h = 1, uint32_t dilation_w = 1, uint32_t num_cores_nhw = 0, CoreRangeSet core_range = {{}}, bool snap_to_tile = false, bool is_bilinear = false)
: batch_size_(batch_size), input_hw_(input_h, input_w), window_hw_(window_h, window_w), stride_hw_(stride_h, stride_w), pad_hw_(pad_h, pad_w), dilation_hw_(dilation_h, dilation_w), has_parallel_config_(false), num_cores_nhw_(num_cores_nhw), core_range_set_(core_range), snap_to_tile_(snap_to_tile), is_bilinear_(is_bilinear) {
has_parallel_config_ = num_cores_nhw_ > 0 && !core_range_set_.ranges().empty();
}

SlidingWindowConfig(const SlidingWindowConfig& other): batch_size_(other.batch_size_), input_hw_(other.input_hw_), window_hw_(other.window_hw_), stride_hw_(other.stride_hw_), pad_hw_(other.pad_hw_), dilation_hw_(other.dilation_hw_), has_parallel_config_(other.has_parallel_config_), num_cores_nhw_(other.num_cores_nhw_), core_range_set_(other.core_range_set_), snap_to_tile_(other.snap_to_tile_), is_bilinear_(other.is_bilinear_) {}

SlidingWindowConfig(): core_range_set_({{{0,0}, {0,0}}}) {}


bool snap_to_tile = false;
bool is_bilinear = false;

std::string to_string() const;
bool has_parallel_config() const;
Expand Down

0 comments on commit fde00d0

Please sign in to comment.