#5752: changes after rebase.

tenstorrent · Sep 9, 2024 · fde00d0 · fde00d0
1 parent 7394dd2
commit fde00d0
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 51 deletions.
diff --git a/.../cpp/ttnn/operations/pool/upsample/device/upsample_bilinear_program_factory_multicore.cpp b/.../cpp/ttnn/operations/pool/upsample/device/upsample_bilinear_program_factory_multicore.cpp
@@ -37,11 +37,11 @@ Tensor HaloTensorCreation(const Tensor &input){
     ttnn::Tensor input_tensor = input;  // tensor to return
     SlidingWindowConfig sliding_window_config = SlidingWindowConfig(
             batch_size,
-            input_height, input_width,
-            2, 2, //kernel size
-            1, 1, // stride
-            0, 0, //padding
-            1, 1,
+            {input_height, input_width},
+            {2, 2}, //kernel size
+            {1, 1}, // stride
+            {0, 0}, //padding
+            {1, 1}, //dilation
             num_cores_nhw,
             input_tensor.memory_config().shard_spec.value().grid,
             true, true);

diff --git a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.cpp
@@ -24,18 +24,16 @@ bool SlidingWindowConfig::has_parallel_config() const {
     * Calculate the window op output shape, excludes the channel dimension since this config is independent of the depth.
     */
 Shape SlidingWindowConfig::get_output_shape() const {
-    uint32_t output_h = (input_hw_.first + 2 * pad_hw_.first - dilation_hw_.first * window_hw_.first) / stride_hw_.first + 1;
-    uint32_t output_w = (input_hw_.second + 2 * pad_hw_.second - dilation_hw_.second * window_hw_.second) / stride_hw_.second + 1;
-    if(is_bilinear_){
+    uint32_t output_h = (input_hw.first + 2 * pad_hw.first - dilation_hw.first * window_hw.first) / stride_hw.first + 1;
+    uint32_t output_w = (input_hw.second + 2 * pad_hw.second - dilation_hw.second * window_hw.second) / stride_hw.second + 1;
+    if(is_bilinear){
         //std::cout << "is bilinear sliding windows cpp" << std::endl;
-        output_h = input_hw_.first;
-        output_w = input_hw_.second; //for bilinear input and output should be same.. and kernel size is 2x2
+        output_h = input_hw.first;
+        output_w = input_hw.second; //for bilinear input and output should be same.. and kernel size is 2x2
         // we need neighboring width in the output tensor
     }
-    // uint32_t output_h = (std::get<0>(input_hw_) + 2 * std::get<0>(pad_hw_) - std::get<0>(dilation_hw_) * std::get<0>(window_hw_)) / std::get<0>(stride_hw_) + 1;
-    // uint32_t output_w = (std::get<1>(input_hw_) + 2 * std::get<1>(pad_hw_) - std::get<1>(dilation_hw_) * std::get<1>(window_hw_)) / std::get<1>(stride_hw_) + 1;
-    log_debug(tt::LogOp, "output_size: {} {} {}", batch_size_, output_h, output_w);
-    return Shape( std::vector<uint32_t>{batch_size_, output_h, output_w, 0});
+    log_debug(tt::LogOp, "output_size: {} {} {}", batch_size, output_h, output_w);
+    return Shape( std::vector<uint32_t>{batch_size, output_h, output_w, 0});
 }
 
 /**
@@ -52,12 +50,12 @@ uint32_t SlidingWindowConfig::get_output_shard_y(bool snap_to_tile) const {
 
 
 std::vector<bool> generate_pad_metadata(const SlidingWindowConfig& config) {
-    uint32_t padded_input_h = config.input_hw_.first + 2 * config.pad_hw_.first;
-    uint32_t padded_input_w = config.input_hw_.second + 2 * config.pad_hw_.second;
-    uint32_t extra_padding_layer = config.is_bilinear_ ? config.input_hw_.second*2 : 0; //need to test this
-    std::vector<bool> pad_metadata(config.batch_size_ * padded_input_h * padded_input_w + extra_padding_layer, false);
-    //std::cout << "padded_input_h " << padded_input_h << " padded_input_w " << padded_input_w << std::endl;
-    for (uint32_t b = 0; b < config.batch_size_; ++b) {
+    uint32_t padded_input_h = config.input_hw.first + 2 * config.pad_hw.first;
+    uint32_t padded_input_w = config.input_hw.second + 2 * config.pad_hw.second;
+    uint32_t extra_padding_layer = config.is_bilinear ? config.input_hw.second*2 : 0; //extra padding in case of bilinear
+    std::vector<bool> pad_metadata(config.batch_size * padded_input_h * padded_input_w + extra_padding_layer, false);
+
+    for (uint32_t b = 0; b < config.batch_size; ++b) {
         for (uint32_t h = 0; h < padded_input_h; ++h) {
             for (uint32_t w = 0; w < padded_input_w; ++w) {
                 if (h < config.pad_hw.first || h >= config.pad_hw.first + config.input_hw.first ||
@@ -69,7 +67,7 @@ std::vector<bool> generate_pad_metadata(const SlidingWindowConfig& config) {
     }
 
     for (int i=0; i<extra_padding_layer; i++) {
-        pad_metadata[config.batch_size_ * padded_input_h * padded_input_w + i] = true;
+        pad_metadata[config.batch_size * padded_input_h * padded_input_w + i] = true;
     }
     return pad_metadata;
 }
@@ -84,9 +82,8 @@ std::vector<uint32_t> generate_op_trace_metadata(const SlidingWindowConfig& conf
     for (uint32_t b = 0; b < output_shape[0]; ++b) {
         for (uint32_t h = 0; h < output_shape[1]; ++h) {
             for (uint32_t w = 0; w < output_shape[2]; ++w) {
-                uint32_t input_index = b * padded_input_h * padded_input_w + h * config.stride_hw_.first * padded_input_w + w * config.stride_hw_.second;
-                op_trace_metadata[i++] = input_index < config.input_hw_.second  ? 0 : input_index - config.input_hw_.second ;
-                //std::cout << "op_trace_metadata " << i << " " << input_index << std::endl;
+                uint32_t input_index = b * padded_input_h * padded_input_w + h * config.stride_hw.first * padded_input_w + w * config.stride_hw.second;
+                op_trace_metadata[i++] = config.is_bilinear ? (input_index < config.input_hw.second  ? 0 : input_index - config.input_hw.second) : input_index ;
             }
         }
     }
@@ -99,13 +96,16 @@ std::vector<std::pair<uint32_pair_t, uint32_pair_t>> generate_shard_boundaries(c
     uint32_t output_shard_h = config.get_output_shard_y(config.snap_to_tile);
     uint32_t padded_input_w = config.input_hw.second + 2 * config.pad_hw.second;
     uint32_t max_index = op_trace_metadata.size();
-    uint32_t halo_with_pad_len = (config.window_hw_.first - 1) * padded_input_w + config.window_hw_.second - 1;
-    if(config.is_bilinear_){
+    uint32_t halo_with_pad_len = (config.window_hw.first - 1) * padded_input_w + config.window_hw.second - 1;
+    if(config.is_bilinear){
+        halo_with_pad_len += config.input_hw.second;
+    }
     uint32_t output_index_start = 0;
     for (uint32_t core = 0; core < num_cores; ++ core) {
         uint32_t output_index_end = std::min(output_index_start + output_shard_h, max_index) - 1;
         uint32_t input_index_start = op_trace_metadata[output_index_start];
-        if (config.is_bilinear_ == false and input_index_start == 0 and output_index_start != 0) {
+        uint32_t input_index_end = op_trace_metadata[output_index_end] + halo_with_pad_len;
+        if (config.is_bilinear == false and input_index_start == 0 and output_index_start != 0) {
             input_index_start = op_trace_metadata[output_index_end] + 1;
             input_index_end = input_index_start - 1;
             log_debug(tt::LogOp, "core: {}, output_index_start: {}, output_index_end: {}, input_index_start: {}, input_index_end: {}", core, output_index_start, output_index_end, input_index_start, input_index_end);
@@ -190,14 +190,6 @@ std::tuple<std::vector<std::vector<uint16_t>>, std::vector<std::vector<uint16_t>
             uint32_t local_idx = global_idx - input_start;
             auto [is_pad_stick, src_idx] = tensor_metadata[global_idx];
             auto [src_core_id, src_local_idx] = src_idx;
-
-            if(local_idx >= pad_local || src_local_idx >= pad_local) {
-                //std::cout << "shard_boundaries " << output_boundary.first << " " << output_boundary.second << " " << input_boundary.first << " " << input_boundary.second << std::endl;
-                //std::cout << local_idx << " " << pad_local << " " << src_local_idx << " " << pad_local << std::endl;
-                //std::cout <<  is_pad_stick << " " << src_idx <<"    " << src_core_id<<  "    " << src_local_idx << std::endl;
-                log_error(tt::LogOp, "Index overflow");
-            }
-
             TT_ASSERT(local_idx < pad_local && src_local_idx < pad_local, "Index overflow");
             if (is_pad_stick) {
                 TT_ASSERT(src_local_idx == 0);

diff --git a/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp b/ttnn/cpp/ttnn/operations/sliding_window/sliding_window.hpp
@@ -46,24 +46,9 @@ struct SlidingWindowConfig {
     // parallel configuration
     uint32_t num_cores_nhw = 1;        // num cores along collapsed height nhw
     CoreRangeSet core_range_set = std::set{CoreRange({0, 0}, {0, 0})};   // active cores
-    bool has_parallel_config_;
-    // uint32_t num_cores_nhw_;        // num cores along collapsed height nhw
-    // CoreRangeSet core_range_set_;   // active cores
-
-    bool snap_to_tile_;
-    bool is_bilinear_;
-
-    SlidingWindowConfig(uint32_t batch_size, uint32_t input_h, uint32_t input_w, uint32_t window_h, uint32_t window_w, uint32_t stride_h, uint32_t stride_w, uint32_t pad_h, uint32_t pad_w, uint32_t dilation_h = 1, uint32_t dilation_w = 1, uint32_t num_cores_nhw = 0, CoreRangeSet core_range = {{}}, bool snap_to_tile = false, bool is_bilinear = false)
-        : batch_size_(batch_size), input_hw_(input_h, input_w), window_hw_(window_h, window_w), stride_hw_(stride_h, stride_w), pad_hw_(pad_h, pad_w), dilation_hw_(dilation_h, dilation_w), has_parallel_config_(false), num_cores_nhw_(num_cores_nhw), core_range_set_(core_range), snap_to_tile_(snap_to_tile), is_bilinear_(is_bilinear) {
-            has_parallel_config_ = num_cores_nhw_ > 0 && !core_range_set_.ranges().empty();
-        }
-
-    SlidingWindowConfig(const SlidingWindowConfig& other): batch_size_(other.batch_size_), input_hw_(other.input_hw_), window_hw_(other.window_hw_), stride_hw_(other.stride_hw_), pad_hw_(other.pad_hw_), dilation_hw_(other.dilation_hw_), has_parallel_config_(other.has_parallel_config_), num_cores_nhw_(other.num_cores_nhw_), core_range_set_(other.core_range_set_), snap_to_tile_(other.snap_to_tile_), is_bilinear_(other.is_bilinear_) {}
-
-    SlidingWindowConfig(): core_range_set_({{{0,0}, {0,0}}}) {}
-
 
     bool snap_to_tile = false;
+    bool is_bilinear = false;
 
     std::string to_string() const;
     bool has_parallel_config() const;