Skip to content

Commit

Permalink
Replaced hardcoded QueueIds for DefaultQueueId (#11171)
Browse files Browse the repository at this point in the history
#11170: use default queueid
  • Loading branch information
ntarafdar authored Aug 8, 2024
1 parent e1f16e7 commit 171a55a
Show file tree
Hide file tree
Showing 14 changed files with 150 additions and 101 deletions.
1 change: 1 addition & 0 deletions ttnn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ set(TTNN_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/experimental/reduction/argmax/argmax.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/reduction/topk/device/topk_op.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/embedding/device/embedding_device_operation.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/data_movement/slice/slice.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/data_movement/slice/device/slice_op.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/data_movement/slice/device/slice_program_factory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/ttnn/operations/data_movement/concat/device/concat_device_operation.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include "optional"
#include "tt_dnn/op_library/math.hpp"
#include "ttnn/operations/data_movement/slice/slice.hpp"
#include "ttnn/operations/data_movement/slice/device/slice_op.hpp"
#include "tt_dnn/op_library/work_split.hpp"
#include "tt_metal/common/constants.hpp"
#include "tt_metal/detail/util.hpp"
Expand Down Expand Up @@ -154,7 +154,7 @@ operation::ProgramWithCallbacks multi_core_nlp_kv_cache_load_slice(
uint32_t num_units_per_shard_width = shard_spec.shape[1] / TILE_WIDTH;
auto num_tiles_per_core = num_units_per_shard_height * num_units_per_shard_width;

const auto tensor_start = static_cast<const ttnn::operations::data_movement::Slice *>(operation)->slice_start;
const auto tensor_start = static_cast<const ttnn::operations::data_movement::SliceDeviceOperation *>(operation)->slice_start;
auto all_runtime_args = get_unpad_runtime_args_tile_sharded(
src_tensor, dst_tensor, tensor_start, num_cores_total, num_cores_x, num_tiles_per_core);

Expand Down
1 change: 1 addition & 0 deletions ttnn/cpp/ttnn/operations/data_movement/concat/concat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//
// SPDX-License-Identifier: Apache-2.0

#include "ttnn/common/constants.hpp"
#include "ttnn/tensor/types.hpp"
#include "ttnn/operations/core/core.hpp"

Expand Down
1 change: 0 additions & 1 deletion ttnn/cpp/ttnn/operations/data_movement/concat/concat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

#pragma once

#include "ttnn/common/constants.hpp"
#include "ttnn/tensor/types.hpp"
#include "ttnn/operations/core/core.hpp"

Expand Down
2 changes: 1 addition & 1 deletion ttnn/cpp/ttnn/operations/data_movement/pad/pad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ ttnn::Tensor ExecutePad::operator()(\
const ShapeType& input_tensor_start,\
const float value) {\
\
return pad_impl<ShapeType>(0, input_tensor, output_padded_shape, input_tensor_start, value, false, std::nullopt);\
return pad_impl<ShapeType>(DefaultQueueId, input_tensor, output_padded_shape, input_tensor_start, value, false, std::nullopt);\
}

PAD_OVERLOAD_DIM_IMPL(tt::tt_metal::Array1D)
Expand Down
3 changes: 0 additions & 3 deletions ttnn/cpp/ttnn/operations/data_movement/pad/pad.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,8 @@
#pragma once

#include "ttnn/tensor/types.hpp"

#include "tt_metal/common/logger.hpp"

#include <ranges>

#include "ttnn/decorators.hpp"


Expand Down
3 changes: 2 additions & 1 deletion ttnn/cpp/ttnn/operations/data_movement/permute/permute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "permute.hpp"

#include "ttnn/common/constants.hpp"
#include "ttnn/operations/data_movement/transpose/transpose.hpp"
#include "ttnn/deprecated/tt_dnn/op_library/copy/copy_op.hpp"

Expand Down Expand Up @@ -212,7 +213,7 @@ ttnn::Tensor ExecutePermute::operator()(
const ttnn::Tensor& input_tensor,
const std::vector<int64_t>& dims,
const std::optional<MemoryConfig>& memory_config) {
return operator()(0, input_tensor, dims, memory_config);
return operator()(DefaultQueueId, input_tensor, dims, memory_config);
}

ttnn::Tensor ExecutePermute::operator()(const ttnn::Tensor& input_tensor, const std::vector<int64_t>& dims) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ uint32_t get_rm_start_offset(const Tensor &tensor, const Shape &slice_start) {



void Slice::validate_with_output_tensors(
void SliceDeviceOperation::validate_with_output_tensors(
const std::vector<Tensor> &input_tensors, const std::vector<std::optional<Tensor>> &output_tensors) const {
const auto &input_tensor_a = input_tensors.at(0);
TT_FATAL(input_tensor_a.storage_type() == StorageType::DEVICE, "Operands to unpad need to be on device!");
Expand Down Expand Up @@ -97,7 +97,7 @@ void Slice::validate_with_output_tensors(
}
}

std::vector<tt::tt_metal::Shape> Slice::compute_output_shapes(const std::vector<Tensor> &input_tensors) const {
std::vector<tt::tt_metal::Shape> SliceDeviceOperation::compute_output_shapes(const std::vector<Tensor> &input_tensors) const {
std::vector<uint32_t> out_shape;
auto rank = input_tensors[0].get_legacy_shape().rank();
out_shape.reserve(rank);
Expand All @@ -108,14 +108,14 @@ std::vector<tt::tt_metal::Shape> Slice::compute_output_shapes(const std::vector<
return {output_tensor_shape};
}

std::vector<Tensor> Slice::create_output_tensors(
std::vector<Tensor> SliceDeviceOperation::create_output_tensors(
const std::vector<Tensor> &input_tensors, const std::vector<std::optional<Tensor>> &output_tensors) const {
const auto &input_tensor_a = input_tensors.at(0);
return operation::generic_create_output_tensors(
*this, input_tensors, input_tensor_a.get_dtype(), input_tensor_a.get_layout(), this->output_mem_config);
}

operation::ProgramWithCallbacks Slice::create_program(
operation::ProgramWithCallbacks SliceDeviceOperation::create_program(
const std::vector<Tensor> &input_tensors, std::vector<Tensor> &output_tensors) const {
const auto &input_tensor_a = input_tensors.at(0);
auto &output_tensor = output_tensors.at(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace ttnn::operations::data_movement {
uint32_t get_rm_start_offset(const Tensor &tensor, const Shape &slice_start);
uint32_t get_tiled_start_offset(const Tensor &input_tensor, const Shape &slice_start);

struct Slice {
struct SliceDeviceOperation {
const tt::tt_metal::Shape slice_start;
const tt::tt_metal::Shape slice_end;
const MemoryConfig output_mem_config;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ operation::ProgramWithCallbacks slice_rm_multi_core(
num_sticks_per_core_group_2] =
split_work_to_cores(compute_with_storage_grid_size, num_unpadded_sticks);

const auto tensor_start = static_cast<const ttnn::operations::data_movement::Slice *>(operation)->slice_start;
const auto tensor_start = static_cast<const ttnn::operations::data_movement::SliceDeviceOperation *>(operation)->slice_start;
auto all_runtime_args = get_slice_runtime_args_rm(
src_tensor,
dst_tensor,
Expand Down Expand Up @@ -510,7 +510,7 @@ operation::ProgramWithCallbacks slice_tile_multi_core(
[num_cores, all_cores, core_group_1, core_group_2, num_tiles_per_core_group_1, num_tiles_per_core_group_2] =
split_work_to_cores(compute_with_storage_grid_size, num_unpadded_tiles);

const auto& tensor_start = static_cast<const ttnn::operations::data_movement::Slice*>(operation)->slice_start;
const auto& tensor_start = static_cast<const ttnn::operations::data_movement::SliceDeviceOperation *>(operation)->slice_start;
set_slice_runtime_args_tile<false>(
src_tensor,
dst_tensor,
Expand Down
125 changes: 125 additions & 0 deletions ttnn/cpp/ttnn/operations/data_movement/slice/slice.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0


#include "ttnn/common/constants.hpp"
#include "slice.hpp"
#include "device/slice_op.hpp"
#include "ttnn/run_operation.hpp"
#include "ttnn/operations/core/core.hpp"


namespace ttnn::operations::data_movement {

ttnn::Tensor SliceOperation::operator()(
uint8_t queue_id,
const ttnn::Tensor& input_tensor,
tt::tt_metal::Shape output_tensor_start,
tt::tt_metal::Shape output_tensor_end,
const std::optional<MemoryConfig>& memory_config_arg) {
if (input_tensor.storage_type() != StorageType::DEVICE) {
tt::tt_metal::Shape output_tensor_shape = {
output_tensor_end[0] - output_tensor_start[0] + 1,
output_tensor_end[1] - output_tensor_start[1] + 1,
output_tensor_end[2] - output_tensor_start[2] + 1,
output_tensor_end[3] - output_tensor_start[3] + 1,
};
if (input_tensor.get_legacy_shape() == output_tensor_shape) {
return input_tensor;
} else {
return input_tensor.unpad(output_tensor_start, output_tensor_end);
}
}
else {
auto memory_config = memory_config_arg.value_or(input_tensor.memory_config());
// TODO: Generalize this early exit of slice for other cases
auto& input_tensor_shape = input_tensor.get_legacy_shape();
if (input_tensor.is_sharded() && input_tensor.memory_config() == memory_config &&
input_tensor_shape.rank() > 1 && input_tensor_shape.rank() == output_tensor_start.rank() &&
output_tensor_start.rank() == output_tensor_end.rank()) {
uint32_t i;
// Require all leading dims to be 1 (TODO: This can be relaxed to support outermost non-1 dim unpadding)
bool in_place_unpad = true;
for (i = 0; i < input_tensor.get_legacy_shape().rank() - 2; ++i) {
in_place_unpad &=
output_tensor_start[i] == 0 && output_tensor_end[i] == 0 && input_tensor_shape[i] == 1;
}
in_place_unpad &= output_tensor_start[i] == 0 &&
tt::div_up(output_tensor_end[i] + 1, input_tensor.shard_spec().value().shape[0]) ==
tt::div_up(input_tensor_shape[i], input_tensor.shard_spec().value().shape[0]);
i++;
in_place_unpad &= output_tensor_start[i] == 0 && output_tensor_end[i] == input_tensor_shape[i] - 1;
if (in_place_unpad) {
auto new_shape = input_tensor.get_legacy_shape();
auto new_pad = new_shape.padding();

std::size_t unpad_val = input_tensor_shape[-2] - output_tensor_end[-2] - 1;
new_shape[-2] -= unpad_val;
new_pad[-2].back -= std::min(unpad_val, new_pad[-2].back);
auto padded_shape = ttnn::Shape(tt::tt_metal::Shape(new_shape, new_pad));
return Tensor(input_tensor.storage(), padded_shape, input_tensor.dtype(), input_tensor.layout());
}
}

return operation::run(
SliceDeviceOperation{output_tensor_start, output_tensor_end, memory_config}, {input_tensor}, {}, {}, queue_id)
.at(0);

}
}

ttnn::Tensor SliceOperation::operator()(
const ttnn::Tensor& input_tensor,
tt::tt_metal::Shape output_tensor_start,
tt::tt_metal::Shape output_tensor_end,
const std::optional<MemoryConfig>& memory_config_arg) {
return operator()(0, input_tensor, output_tensor_start, output_tensor_end, memory_config_arg);
}

ttnn::Tensor SliceOperation::operator()(
uint8_t queue_id,
const ttnn::Tensor& input_tensor,
tt::tt_metal::Array1D output_tensor_start,
tt::tt_metal::Array1D output_tensor_end,
const std::optional<MemoryConfig>& memory_config_arg) {
return operator()(
queue_id,
input_tensor,
tt::tt_metal::Shape(output_tensor_start),
tt::tt_metal::Shape(output_tensor_end),
memory_config_arg);
}

ttnn::Tensor SliceOperation::operator()(
uint8_t queue_id,
const ttnn::Tensor& input_tensor,
tt::tt_metal::Array4D output_tensor_start,
tt::tt_metal::Array4D output_tensor_end,
const std::optional<MemoryConfig>& memory_config_arg) {
return operator()(
queue_id,
input_tensor,
tt::tt_metal::Shape(output_tensor_start),
tt::tt_metal::Shape(output_tensor_end),
memory_config_arg);
}

ttnn::Tensor SliceOperation::operator()(
const ttnn::Tensor& input_tensor,
tt::tt_metal::Array4D output_tensor_start,
tt::tt_metal::Array4D output_tensor_end,
const std::optional<MemoryConfig>& memory_config_arg) {
return operator()(DefaultQueueId, input_tensor, output_tensor_start, output_tensor_end, memory_config_arg);
}

ttnn::Tensor SliceOperation::operator()(
const ttnn::Tensor& input_tensor,
tt::tt_metal::Array4D output_tensor_start,
tt::tt_metal::Array4D output_tensor_end) {
return operator()(DefaultQueueId, input_tensor, output_tensor_start, output_tensor_end, std::nullopt);
}

} // namespace operations


Loading

0 comments on commit 171a55a

Please sign in to comment.