Skip to content

Commit

Permalink
#8835: updated TracyOpTNNNDeviceV2 to write tensors to json
Browse files Browse the repository at this point in the history
  • Loading branch information
arakhmati committed Jun 18, 2024
1 parent 9f9fb34 commit c83299f
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 59 deletions.
42 changes: 34 additions & 8 deletions tt_metal/tools/profiler/op_profiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include <filesystem>
#include <tuple>
#include <type_traits>

#include "tensor/tensor.hpp"
Expand Down Expand Up @@ -257,20 +258,45 @@ inline json get_base_json(
return j;
}

inline json get_base_json(uint32_t opID, const auto& op) {
template <typename operation_t>
inline json get_base_json(
uint32_t operation_id,
const typename operation_t::operation_attributes_t& operation_attributes,
const typename operation_t::tensor_args_t& tensor_args,
typename operation_t::tensor_return_value_t& tensor_return_value) {
ZoneScoped;
json j;
j["global_call_count"] = opID;
j["global_call_count"] = operation_id;

std::string opName = "device operation";

std::replace(opName.begin(), opName.end(), ',', ';');
j["op_code"] = opName;

json attributesObj;
constexpr auto& attribute_names = std::decay_t<decltype(operation_attributes)>::attribute_names;
const auto attribute_values = operation_attributes.attribute_values();
[&attributesObj, &attribute_names, &attribute_values]<size_t... Ns>(std::index_sequence<Ns...>) {
(
[&attributesObj, &attribute_names, &attribute_values] {
const auto& attribute_name = std::get<Ns>(attribute_names);
const auto& attribute = std::get<Ns>(attribute_values);
attributesObj[attribute_name] = fmt::format("{}", attribute);
}(),
...);
}(std::make_index_sequence<std::tuple_size_v<std::decay_t<decltype(attribute_names)>>>{});
j["attributes"] = attributesObj;
j["input_tensors"] = get_tensors_json(std::vector<Tensor>{});
j["output_tensors"] = get_tensors_json(std::vector<Tensor>{});

std::vector<json> input_tensors;
tt::stl::reflection::visit_object_of_type<Tensor>(
[&input_tensors](auto&& tensor) { input_tensors.push_back(get_tensor_json(tensor)); }, tensor_args);
j["input_tensors"] = input_tensors;

std::vector<json> output_tensors;
tt::stl::reflection::visit_object_of_type<Tensor>(
[&output_tensors](auto&& tensor) { output_tensors.push_back(get_tensor_json(tensor)); }, tensor_return_value);
j["output_tensors"] = output_tensors;

return j;
}

Expand Down Expand Up @@ -354,16 +380,16 @@ inline std::string op_meta_data_serialized_json(
const auto& program_hash,
const auto& operation_attributes,
const auto& tensor_args,
auto& tensor_attributes) {
auto j = get_base_json(operation_id, operation_attributes);
auto& tensor_return_value) {
auto j = get_base_json<operation_t>(operation_id, operation_attributes, tensor_args, tensor_return_value);
j["op_type"] = magic_enum::enum_name(OpType::tt_dnn_device);
j["device_id"] = device_id;
j["op_hash"] = program_hash;
j["kernel_info"] = get_kernels_json(program);

j["optional_input_tensors"] = get_tensors_json(std::vector<std::optional<Tensor>>{});
j["optional_input_tensors"] = std::vector<json>{};

auto perfModel = operation_t::create_op_performance_model(operation_attributes, tensor_args, tensor_attributes);
auto perfModel = operation_t::create_op_performance_model(operation_attributes, tensor_args, tensor_return_value);
j["performance_model"]["compute_ns"] = perfModel.get_compute_ns();
j["performance_model"]["ideal_ns"] = perfModel.get_ideal_ns();
j["performance_model"]["bandwidth_ns"] = perfModel.get_bandwidth_ns();
Expand Down
42 changes: 42 additions & 0 deletions tt_metal/tt_stl/reflection.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,48 @@ std::ostream& operator<<(std::ostream& os, const std::set<T>& set) {
return os;
}

template <typename to_visit_t, typename T>
requires std::same_as<std::decay_t<T>, to_visit_t>
constexpr auto visit_object_of_type(auto callback, T&& value) {
callback(value);
}

template <typename to_visit_t, typename T>
constexpr auto visit_object_of_type(auto callback, const std::optional<T>& value) {
if (value.has_value()) {
visit_object_of_type<to_visit_t>(callback, value.value());
}
}

template <typename to_visit_t, typename T>
constexpr auto visit_object_of_type(auto callback, const std::vector<T>& value) {
for (auto& tensor : value) {
visit_object_of_type<to_visit_t>(callback, tensor);
}
}

template <typename to_visit_t, typename T, auto N>
constexpr auto visit_object_of_type(auto callback, const std::array<T, N>& value) {
for (auto& tensor : value) {
visit_object_of_type<to_visit_t>(callback, tensor);
}
}

template <typename to_visit_t, typename... Ts>
constexpr auto visit_object_of_type(auto callback, const std::tuple<Ts...>& value) {
constexpr auto num_attributes = sizeof...(Ts);
[&callback, &value]<size_t... Ns>(std::index_sequence<Ns...>) {
(visit_object_of_type<to_visit_t>(callback, std::get<Ns>(value)), ...);
}(std::make_index_sequence<num_attributes>{});
}

template <typename to_visit_t, typename T>
requires(not std::same_as<std::decay_t<T>, to_visit_t>) and requires { std::decay_t<T>::attribute_names; }
constexpr auto visit_object_of_type(auto callback, T&& object) {
constexpr auto num_attributes = std::tuple_size_v<decltype(std::decay_t<T>::attribute_names)>;
visit_object_of_type<to_visit_t>(callback, object.attribute_values());
}

} // namespace reflection
} // namespace stl
} // namespace tt
Expand Down
59 changes: 9 additions & 50 deletions ttnn/cpp/ttnn/device_operation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,49 +95,6 @@ template <typename... Ts>
return table[i];
}

template <typename T>
requires std::same_as<std::decay_t<T>, Tensor>
constexpr auto visit_tensor(auto callback, T&& value) {
callback(value);
}

template <typename T>
constexpr auto visit_tensor(auto callback, const std::optional<T>& value) {
if (value.has_value()) {
const auto& tensor = value.value();
visit_tensor(callback, tensor);
}
}

template <typename T>
constexpr auto visit_tensor(auto callback, const std::vector<T>& value) {
for (auto& tensor : value) {
visit_tensor(callback, tensor);
}
}

template <typename T, auto N>
constexpr auto visit_tensor(auto callback, const std::array<T, N>& value) {
for (auto& tensor : value) {
visit_tensor(callback, tensor);
}
}

template <typename... Ts>
constexpr auto visit_tensor(auto callback, const std::tuple<Ts...>& value) {
constexpr auto num_attributes = sizeof...(Ts);
[&callback, &value]<size_t... Ns>(std::index_sequence<Ns...>) {
(visit_tensor(callback, std::get<Ns>(value)), ...);
}(std::make_index_sequence<num_attributes>{});
}

template <typename T>
requires(not std::same_as<std::decay_t<T>, Tensor>) and requires { std::decay_t<T>::attribute_names; }
constexpr auto visit_tensor(auto callback, T&& object) {
constexpr auto num_attributes = std::tuple_size_v<decltype(std::decay_t<T>::attribute_names)>;
visit_tensor(callback, object.attribute_values());
}

template <typename T>
requires std::same_as<std::decay_t<T>, Tensor>
constexpr auto get_first_tensor(T&& value) {
Expand Down Expand Up @@ -256,13 +213,18 @@ inline auto& create_or_get_program_from_cache(
}
}

constexpr auto check_tensor_types = [](auto&& tensor) {
static_assert(std::same_as<std::decay_t<decltype(tensor)>, Tensor>);
};

template <DeviceOperationConcept operation_t>
typename operation_t::tensor_return_value_t run(
uint8_t cq_id,
const typename operation_t::operation_attributes_t& operation_attributes,
const typename operation_t::tensor_args_t& tensor_args) {
ZoneScopedN("TT_DNN_DEVICE_OP");
uint32_t operation_id = assign_operation_id();

tt::stl::reflection::visit_object_of_type<Tensor>(check_tensor_types, tensor_args);

using tensor_return_value_t = typename operation_t::tensor_return_value_t;
static_assert(not std::same_as<tensor_return_value_t, void>, "Operation cannot return type cannot be void");
Expand All @@ -279,6 +241,7 @@ typename operation_t::tensor_return_value_t run(
operation_t::validate_on_program_cache_miss(operation_attributes, tensor_args);
}
auto tensor_return_value = operation_t::create_output_tensors(operation_attributes, tensor_args);
tt::stl::reflection::visit_object_of_type<Tensor>(check_tensor_types, tensor_return_value);

auto& program = create_or_get_program_from_cache<operation_t>(
program_cache, cache_hit, program_hash, operation_attributes, tensor_args, tensor_return_value);
Expand All @@ -292,21 +255,17 @@ typename operation_t::tensor_return_value_t run(
auto assign_global_buffer_to_program = [&program](auto&& tensor) {
AssignGlobalBufferToProgram(tensor.device_buffer(), program);
};
visit_tensor(assign_global_buffer_to_program, tensor_args);
tt::stl::reflection::visit_object_of_type<Tensor>(assign_global_buffer_to_program, tensor_args);
tt::tt_metal::EnqueueProgram(queue, program, false);
} else {
ZoneScopedN("LaunchProgram");
::detail::LaunchProgram(device, program);
}

// Visit output tensors with the sole purpose of checking the return type to make sure that it only has Tensors
// TODO: come up with a better way of checking the return type
visit_tensor([](auto&& tensor) {}, tensor_return_value);

// TODO: update this to work properly take program cache info, as well as tensors
TracyOpTNNNDeviceV2(
operation_t{},
operation_id,
assign_operation_id(),
device->id(),
program,
program_hash,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ struct Binary {
using tensor_args_t = ttnn::operations::binary::tensor_args_t;
using shape_return_value_t = ttnn::operations::binary::shape_return_value_t;
using tensor_return_value_t = ttnn::operations::binary::tensor_return_value_t;

using program_factory_t = std::variant<
ElementWiseMultiCore,
BroadcastWidthMultiCore,
Expand Down

0 comments on commit c83299f

Please sign in to comment.