diff --git a/ttnn/cpp/ttnn/operations/pool/upsample/device/upsample_bilinear_program_factory_multicore.cpp b/ttnn/cpp/ttnn/operations/pool/upsample/device/upsample_bilinear_program_factory_multicore.cpp index 7b7990660dea..8eead68c7c57 100644 --- a/ttnn/cpp/ttnn/operations/pool/upsample/device/upsample_bilinear_program_factory_multicore.cpp +++ b/ttnn/cpp/ttnn/operations/pool/upsample/device/upsample_bilinear_program_factory_multicore.cpp @@ -37,11 +37,11 @@ Tensor HaloTensorCreation(const Tensor &input){ ttnn::Tensor input_tensor = input; // tensor to return SlidingWindowConfig sliding_window_config = SlidingWindowConfig( batch_size, - input_height, input_width, - 2, 2, //kernel size - 1, 1, // stride - 0, 0, //padding - 1, 1, + {input_height, input_width}, + {2, 2}, //kernel size + {1, 1}, // stride + {0, 0}, //padding + {1, 1}, //dilation num_cores_nhw, input_tensor.memory_config().shard_spec.value().grid, true, true); diff --git a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp index 0f60567d6178..daf2fafcd899 100644 --- a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp +++ b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp @@ -24,18 +24,16 @@ bool SlidingWindowConfig::has_parallel_config() const { * Calculate the window op output shape, excludes the channel dimension since this config is independent of the depth. */ Shape SlidingWindowConfig::get_output_shape() const { - uint32_t output_h = (input_hw_.first + 2 * pad_hw_.first - dilation_hw_.first * window_hw_.first) / stride_hw_.first + 1; - uint32_t output_w = (input_hw_.second + 2 * pad_hw_.second - dilation_hw_.second * window_hw_.second) / stride_hw_.second + 1; - if(is_bilinear_){ + uint32_t output_h = (input_hw.first + 2 * pad_hw.first - dilation_hw.first * window_hw.first) / stride_hw.first + 1; + uint32_t output_w = (input_hw.second + 2 * pad_hw.second - dilation_hw.second * window_hw.second) / stride_hw.second + 1; + if(is_bilinear){ //std::cout << "is bilinear sliding windows cpp" << std::endl; - output_h = input_hw_.first; - output_w = input_hw_.second; //for bilinear input and output should be same.. and kernel size is 2x2 + output_h = input_hw.first; + output_w = input_hw.second; //for bilinear input and output should be same.. and kernel size is 2x2 // we need neighboring width in the output tensor } - // uint32_t output_h = (std::get<0>(input_hw_) + 2 * std::get<0>(pad_hw_) - std::get<0>(dilation_hw_) * std::get<0>(window_hw_)) / std::get<0>(stride_hw_) + 1; - // uint32_t output_w = (std::get<1>(input_hw_) + 2 * std::get<1>(pad_hw_) - std::get<1>(dilation_hw_) * std::get<1>(window_hw_)) / std::get<1>(stride_hw_) + 1; - log_debug(tt::LogOp, "output_size: {} {} {}", batch_size_, output_h, output_w); - return Shape( std::vector{batch_size_, output_h, output_w, 0}); + log_debug(tt::LogOp, "output_size: {} {} {}", batch_size, output_h, output_w); + return Shape( std::vector{batch_size, output_h, output_w, 0}); } /** @@ -52,12 +50,12 @@ uint32_t SlidingWindowConfig::get_output_shard_y(bool snap_to_tile) const { std::vector generate_pad_metadata(const SlidingWindowConfig& config) { - uint32_t padded_input_h = config.input_hw_.first + 2 * config.pad_hw_.first; - uint32_t padded_input_w = config.input_hw_.second + 2 * config.pad_hw_.second; - uint32_t extra_padding_layer = config.is_bilinear_ ? config.input_hw_.second*2 : 0; //need to test this - std::vector pad_metadata(config.batch_size_ * padded_input_h * padded_input_w + extra_padding_layer, false); - //std::cout << "padded_input_h " << padded_input_h << " padded_input_w " << padded_input_w << std::endl; - for (uint32_t b = 0; b < config.batch_size_; ++b) { + uint32_t padded_input_h = config.input_hw.first + 2 * config.pad_hw.first; + uint32_t padded_input_w = config.input_hw.second + 2 * config.pad_hw.second; + uint32_t extra_padding_layer = config.is_bilinear ? config.input_hw.second*2 : 0; //extra padding in case of bilinear + std::vector pad_metadata(config.batch_size * padded_input_h * padded_input_w + extra_padding_layer, false); + + for (uint32_t b = 0; b < config.batch_size; ++b) { for (uint32_t h = 0; h < padded_input_h; ++h) { for (uint32_t w = 0; w < padded_input_w; ++w) { if (h < config.pad_hw.first || h >= config.pad_hw.first + config.input_hw.first || @@ -69,7 +67,7 @@ std::vector generate_pad_metadata(const SlidingWindowConfig& config) { } for (int i=0; i generate_op_trace_metadata(const SlidingWindowConfig& conf for (uint32_t b = 0; b < output_shape[0]; ++b) { for (uint32_t h = 0; h < output_shape[1]; ++h) { for (uint32_t w = 0; w < output_shape[2]; ++w) { - uint32_t input_index = b * padded_input_h * padded_input_w + h * config.stride_hw_.first * padded_input_w + w * config.stride_hw_.second; - op_trace_metadata[i++] = input_index < config.input_hw_.second ? 0 : input_index - config.input_hw_.second ; - //std::cout << "op_trace_metadata " << i << " " << input_index << std::endl; + uint32_t input_index = b * padded_input_h * padded_input_w + h * config.stride_hw.first * padded_input_w + w * config.stride_hw.second; + op_trace_metadata[i++] = config.is_bilinear ? (input_index < config.input_hw.second ? 0 : input_index - config.input_hw.second) : input_index ; } } } @@ -99,13 +96,16 @@ std::vector> generate_shard_boundaries(c uint32_t output_shard_h = config.get_output_shard_y(config.snap_to_tile); uint32_t padded_input_w = config.input_hw.second + 2 * config.pad_hw.second; uint32_t max_index = op_trace_metadata.size(); - uint32_t halo_with_pad_len = (config.window_hw_.first - 1) * padded_input_w + config.window_hw_.second - 1; - if(config.is_bilinear_){ + uint32_t halo_with_pad_len = (config.window_hw.first - 1) * padded_input_w + config.window_hw.second - 1; + if(config.is_bilinear){ + halo_with_pad_len += config.input_hw.second; + } uint32_t output_index_start = 0; for (uint32_t core = 0; core < num_cores; ++ core) { uint32_t output_index_end = std::min(output_index_start + output_shard_h, max_index) - 1; uint32_t input_index_start = op_trace_metadata[output_index_start]; - if (config.is_bilinear_ == false and input_index_start == 0 and output_index_start != 0) { + uint32_t input_index_end = op_trace_metadata[output_index_end] + halo_with_pad_len; + if (config.is_bilinear == false and input_index_start == 0 and output_index_start != 0) { input_index_start = op_trace_metadata[output_index_end] + 1; input_index_end = input_index_start - 1; log_debug(tt::LogOp, "core: {}, output_index_start: {}, output_index_end: {}, input_index_start: {}, input_index_end: {}", core, output_index_start, output_index_end, input_index_start, input_index_end); @@ -190,14 +190,6 @@ std::tuple>, std::vector uint32_t local_idx = global_idx - input_start; auto [is_pad_stick, src_idx] = tensor_metadata[global_idx]; auto [src_core_id, src_local_idx] = src_idx; - - if(local_idx >= pad_local || src_local_idx >= pad_local) { - //std::cout << "shard_boundaries " << output_boundary.first << " " << output_boundary.second << " " << input_boundary.first << " " << input_boundary.second << std::endl; - //std::cout << local_idx << " " << pad_local << " " << src_local_idx << " " << pad_local << std::endl; - //std::cout << is_pad_stick << " " << src_idx <<" " << src_core_id<< " " << src_local_idx << std::endl; - log_error(tt::LogOp, "Index overflow"); - } - TT_ASSERT(local_idx < pad_local && src_local_idx < pad_local, "Index overflow"); if (is_pad_stick) { TT_ASSERT(src_local_idx == 0); diff --git a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp index 4c3d4b01feb6..1a22e2cae663 100644 --- a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp +++ b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp @@ -46,24 +46,9 @@ struct SlidingWindowConfig { // parallel configuration uint32_t num_cores_nhw = 1; // num cores along collapsed height nhw CoreRangeSet core_range_set = std::set{CoreRange({0, 0}, {0, 0})}; // active cores - bool has_parallel_config_; - // uint32_t num_cores_nhw_; // num cores along collapsed height nhw - // CoreRangeSet core_range_set_; // active cores - - bool snap_to_tile_; - bool is_bilinear_; - - SlidingWindowConfig(uint32_t batch_size, uint32_t input_h, uint32_t input_w, uint32_t window_h, uint32_t window_w, uint32_t stride_h, uint32_t stride_w, uint32_t pad_h, uint32_t pad_w, uint32_t dilation_h = 1, uint32_t dilation_w = 1, uint32_t num_cores_nhw = 0, CoreRangeSet core_range = {{}}, bool snap_to_tile = false, bool is_bilinear = false) - : batch_size_(batch_size), input_hw_(input_h, input_w), window_hw_(window_h, window_w), stride_hw_(stride_h, stride_w), pad_hw_(pad_h, pad_w), dilation_hw_(dilation_h, dilation_w), has_parallel_config_(false), num_cores_nhw_(num_cores_nhw), core_range_set_(core_range), snap_to_tile_(snap_to_tile), is_bilinear_(is_bilinear) { - has_parallel_config_ = num_cores_nhw_ > 0 && !core_range_set_.ranges().empty(); - } - - SlidingWindowConfig(const SlidingWindowConfig& other): batch_size_(other.batch_size_), input_hw_(other.input_hw_), window_hw_(other.window_hw_), stride_hw_(other.stride_hw_), pad_hw_(other.pad_hw_), dilation_hw_(other.dilation_hw_), has_parallel_config_(other.has_parallel_config_), num_cores_nhw_(other.num_cores_nhw_), core_range_set_(other.core_range_set_), snap_to_tile_(other.snap_to_tile_), is_bilinear_(other.is_bilinear_) {} - - SlidingWindowConfig(): core_range_set_({{{0,0}, {0,0}}}) {} - bool snap_to_tile = false; + bool is_bilinear = false; std::string to_string() const; bool has_parallel_config() const;