Skip to content

Commit

Permalink
#5725: adding few validation for upsample.
Browse files Browse the repository at this point in the history
  • Loading branch information
shwetankTT committed Sep 19, 2024
1 parent 1673fd9 commit 261c81e
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 19 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

Expand All @@ -9,12 +9,18 @@

// Fill given four values into the memory starting at the given address.
// WARNING: Use with caution as there's no memory protection. Make sure size is within limits
ALWI bool fill_four_val(uint32_t begin_addr, uint16_t val, uint16_t val1, uint16_t val2, uint16_t val3) {
ALWI void fill_four_val(uint32_t begin_addr, uint16_t val, uint16_t val1, uint16_t val2, uint16_t val3) {
volatile tt_l1_ptr uint32_t* ptr = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(begin_addr);

ptr[0] = (val | (val1 << 16));
ptr[1] = (val2 | (val3 << 16));
return true;
}

ALWI float uint32_to_float(uint32_t f)
{
float ret;
std::memcpy(&ret, &f, sizeof(float));
return ret;
}


Expand All @@ -34,24 +40,23 @@ void kernel_main() {

constexpr uint32_t in_cb_id = get_compile_time_arg_val(0);
constexpr uint32_t out_cb_id = tt::CB::c_in1;
constexpr uint32_t is_reader = get_compile_time_arg_val(2);

uint32_t in_image_row_nbytes = in_w * stick_nbytes;
uint32_t out_image_row_nbytes = out_w * stick_nbytes;
uint32_t reader_image_rows_per_core = (in_image_rows_per_core + is_reader) / 2;
uint32_t writer_image_rows_per_core = in_image_rows_per_core / 2;
uint32_t image_row_begin = is_reader ? 0 : reader_image_rows_per_core;
uint32_t image_row_end = is_reader ? reader_image_rows_per_core : in_image_rows_per_core;
uint32_t l1_read_addr = get_read_ptr(in_cb_id); //+ image_row_begin * in_image_row_nbytes;
//constexpr uint32_t is_reader = get_compile_time_arg_val(2);
constexpr uint32_t scale_h_inv_comp = get_compile_time_arg_val(3);
constexpr uint32_t scale_w_inv_comp = get_compile_time_arg_val(4);
constexpr uint32_t y_index_comp = get_compile_time_arg_val(5);
constexpr uint32_t x_index_compute_comp = get_compile_time_arg_val(6);

uint32_t l1_read_addr = get_read_ptr(in_cb_id);
constexpr uint32_t in_scalar_cb_id = tt::CB::c_in4;

// assuming shard begins with a new row. TODO: generalize?
float scale_h_inv = 1.0f / scale_h;
float scale_w_inv = 1.0f / scale_w;
float scale_h_inv = uint32_to_float(scale_h_inv_comp);
float scale_w_inv = uint32_to_float(scale_w_inv_comp);
float x, y, x_index, y_index, dx, dy;
y_index = (float)(0.5f) * (float)scale_h_inv + 0.5f;
y_index = uint32_to_float(y_index_comp);
float x_index_compute = uint32_to_float(x_index_compute_comp);
for (uint32_t image_row = 0 ; image_row < in_image_rows_per_core * scale_h; ++image_row){
x_index = (float)(0.5f) * (float)scale_w_inv -0.5f;
x_index = x_index_compute;
for(uint32_t j=0; j < in_w * scale_w; j++){
cb_reserve_back(out_cb_id, 4);
cb_reserve_back(in_scalar_cb_id, 1);
Expand All @@ -64,7 +69,7 @@ void kernel_main() {
uint32_t x1 = int(x);
uint32_t y1 = int(y);
uint32_t x2 = min(x1 + 1, in_w-1);
uint32_t y2 = y1 + 1; //, in_image_rows_per_core - 1);
uint32_t y2 = y1 + 1;
if(is_last_row){
y2 = min(y2, in_image_rows_per_core); //if last row, y2 should be in_image_rows_per_core
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

Expand Down Expand Up @@ -175,10 +175,25 @@ operation::ProgramWithCallbacks bilinear_multi_core(const Tensor &input, Tensor&
log_debug(LogOp, "input_nsticks_per_core: {}, output_nsticks_per_core: {}", input_nsticks_per_core, output_nsticks_per_core);

// Kernels
//computation needed for the bilinear kernel. Passing them as an argument.
float scale_h_inv = 1.0f / (float)scale_factor_h;
float scale_w_inv = 1.0f / (float)scale_factor_w;
float y_index = (float)(0.5f) * (float)scale_h_inv + 0.5f;
float x_index_compute = (float)(0.5f) * (float)scale_w_inv - 0.5f;

uint32_t scale_h_inv_u32 = *reinterpret_cast<uint32_t*>(&scale_h_inv);
uint32_t scale_w_inv_u32 = *reinterpret_cast<uint32_t*>(&scale_w_inv);
uint32_t y_index_u32 = *reinterpret_cast<uint32_t*>(&y_index);
uint32_t x_index_compute_u32 = *reinterpret_cast<uint32_t*>(&x_index_compute);

std::vector<uint32_t> reader_compile_time_args = {
in_cb_id,
out_cb_id,
false,
scale_h_inv_u32,
scale_w_inv_u32,
y_index_u32,
x_index_compute_u32,
};

string writer_kernel_fname, reader_kernel_fname, compute_kernel_fname;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ void UpSample::validate(const std::vector<Tensor> &input_tensors) const {
TT_FATAL(input_tensor_a.get_dtype() == DataType::BFLOAT16, "Input tensor data type should be BFLOAT16");
if (input_tensor_a.memory_config().is_sharded()) {
TT_FATAL(input_tensor_a.memory_config().memory_layout == output_mem_config_.memory_layout, "Input tensor memory layout should be same as output tensor memory layout");
TT_FATAL(input_tensor_a.memory_config().memory_layout == TensorMemoryLayout::HEIGHT_SHARDED || input_tensor_a.memory_config().memory_layout == TensorMemoryLayout::BLOCK_SHARDED, "Input tensor memory layout should be HEIGHT or BLOCK sharded");
if(mode_ == "nearest")
TT_FATAL(input_tensor_a.memory_config().memory_layout == TensorMemoryLayout::HEIGHT_SHARDED || input_tensor_a.memory_config().memory_layout == TensorMemoryLayout::BLOCK_SHARDED, "Input tensor memory layout should be HEIGHT or BLOCK sharded");
else if(mode_ == "bilinear")
TT_FATAL(input_tensor_a.memory_config().memory_layout == TensorMemoryLayout::HEIGHT_SHARDED, "Input tensor memory layout should be HEIGHT sharded");
TT_FATAL(mode_ == "bilinear" || mode_ == "nearest", "Upsample only supports bilinear or nearest mode");
TT_FATAL(input_tensor_a.buffer()->buffer_type() == tt::tt_metal::BufferType::L1, "Input buffer should be sharded in L1");
}
}
Expand Down
3 changes: 3 additions & 0 deletions ttnn/cpp/ttnn/operations/pool/upsample/upsample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ ttnn::Tensor ExecuteUpSample::invoke(const ttnn::Tensor& input_tensor,
MemoryConfig mem_config = output_mem_config.value_or(input_tensor.memory_config());
ttnn::DeviceComputeKernelConfig config = compute_kernel_config.value_or(
ttnn::init_device_compute_kernel_config(input_tensor.device()->arch(), std::nullopt, MathFidelity::HiFi4));
if(mode.empty()) {
mode = "nearest";
}
int scale_h = 1;
int scale_w = 1;
std::visit(
Expand Down

0 comments on commit 261c81e

Please sign in to comment.