From 1e042f685beb81979914d71e4c0c4efcfd2190ed Mon Sep 17 00:00:00 2001 From: Akhmed Rakhmati Date: Thu, 16 May 2024 19:45:53 +0000 Subject: [PATCH] #8569: Handle static and dynamic OP validation performantly --- tt_eager/tt_dnn/op_library/operation.hpp | 16 +++++ tt_eager/tt_dnn/op_library/run_operation.cpp | 19 ++++-- tt_eager/ttnn/config.hpp | 72 ++++++++++++++++++++ ttnn/cpp/ttnn/core.hpp | 56 +-------------- 4 files changed, 103 insertions(+), 60 deletions(-) create mode 100644 tt_eager/ttnn/config.hpp diff --git a/tt_eager/tt_dnn/op_library/operation.hpp b/tt_eager/tt_dnn/op_library/operation.hpp index a4091a2699b..0c1acf9183f 100644 --- a/tt_eager/tt_dnn/op_library/operation.hpp +++ b/tt_eager/tt_dnn/op_library/operation.hpp @@ -12,6 +12,7 @@ #include "tt_metal/impl/program/program.hpp" #include "tt_stl/concepts.hpp" #include "tt_stl/reflection.hpp" +#include "ttnn/config.hpp" namespace tt { @@ -498,6 +499,8 @@ struct DeviceOperation final { output_tensors); } + inline bool uses_custom_program_hash() const { return this->uses_custom_program_hash_impl_(); } + inline const Hash compute_program_hash( const Tensors& input_tensors, const OptionalConstTensors& optional_input_tensors) const { ZoneScoped; @@ -536,6 +539,9 @@ struct DeviceOperation final { const Tensors& input_tensors, const OptionalConstTensors& optional_input_tensors, const OptionalTensors& optional_output_tensors) -> void { + if (ttnn::CONFIG.enable_fast_runtime_mode) { + return; + } const auto& operation = *reinterpret_cast*>(&storage); if constexpr ( (detail::implements_validate() or @@ -663,6 +669,15 @@ struct DeviceOperation final { static_assert(tt::stl::concepts::always_false_v, "Operation doesn't implement create_program"); } }}, + uses_custom_program_hash_impl_{[]() -> bool { + if constexpr (detail::implements_compute_program_hash()) { + return true; + } else if constexpr (detail::implements_compute_program_hash_with_optional_input_tensors()) { + return true; + } else { + return false; + } + }}, create_profiler_info_impl_{[](const storage_t& storage, const Tensors& input_tensors) -> const ProfilerInfo { const auto& operation = *reinterpret_cast*>(&storage); std::optional preferred_name = tt::stl::get_type_name(); @@ -720,6 +735,7 @@ struct DeviceOperation final { const Tensors&, const std::vector>&, OutputTensors&); + bool (*uses_custom_program_hash_impl_)(); const Hash (*compute_program_hash_impl_)( const storage_t& value, const Tensors&, const std::vector>&); const ProfilerInfo (*create_profiler_info_impl_)(const storage_t& value, const Tensors& input_tensors); diff --git a/tt_eager/tt_dnn/op_library/run_operation.cpp b/tt_eager/tt_dnn/op_library/run_operation.cpp index 05f7747ad5d..93c12c55422 100644 --- a/tt_eager/tt_dnn/op_library/run_operation.cpp +++ b/tt_eager/tt_dnn/op_library/run_operation.cpp @@ -146,7 +146,8 @@ OutputTensors run_device_operation( const DeviceOperation&, const Tensors&, const OptionalConstTensors&, - OutputTensors&)> + OutputTensors&, + const OptionalTensors&)> get_or_create_program; auto& program_cache = input_tensors[0].device()->program_cache; @@ -157,12 +158,18 @@ OutputTensors run_device_operation( const DeviceOperation& operation, const Tensors& input_tensors, const OptionalConstTensors& optional_input_tensors, - OutputTensors& output_tensors) -> std::reference_wrapper { + OutputTensors& output_tensors, + const OptionalTensors& optional_output_tensors) -> std::reference_wrapper { program_hash = operation.compute_program_hash(input_tensors, optional_input_tensors); auto program_ptr = program_cache.find(program_hash); bool cache_hit = program_ptr.has_value(); log_debug(tt::LogOp, "Program Hash: {} ({})", program_hash, cache_hit ? "HIT" : "MISS"); + + if (not cache_hit or operation.uses_custom_program_hash()) { + operation.validate(input_tensors, optional_input_tensors, optional_output_tensors); + } + if (not cache_hit) { program_ptr = std::make_shared>(operation.create_program(input_tensors, optional_input_tensors, output_tensors)); program_cache.insert(program_hash, program_ptr.value()); @@ -196,16 +203,18 @@ OutputTensors run_device_operation( get_or_create_program = [](const DeviceOperation& operation, const Tensors& input_tensors, const OptionalConstTensors& optional_input_tensors, - OutputTensors& output_tensors) -> std::shared_ptr { + OutputTensors& output_tensors, + const OptionalTensors& optional_output_tensors) -> std::shared_ptr { + operation.validate(input_tensors, optional_input_tensors, optional_output_tensors); auto program_with_callbacks = operation.create_program(input_tensors, optional_input_tensors, output_tensors); return std::make_shared(std::move(program_with_callbacks.program)); }; } - operation.validate(input_tensors, optional_input_tensors, optional_output_tensors); auto output_tensors = operation.create_output_tensors(input_tensors, optional_output_tensors); - auto program = get_or_create_program(operation, input_tensors, optional_input_tensors, output_tensors); + auto program = get_or_create_program( + operation, input_tensors, optional_input_tensors, output_tensors, optional_output_tensors); uint32_t device_id = detail::get_device(input_tensors, optional_input_tensors)->id(); // Enqueue or Launch Program diff --git a/tt_eager/ttnn/config.hpp b/tt_eager/ttnn/config.hpp new file mode 100644 index 00000000000..e13635e127b --- /dev/null +++ b/tt_eager/ttnn/config.hpp @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include +#include + +namespace ttnn { + +namespace core { + +struct Config { + std::string cache_path = "/home/.cache/ttnn"; + std::string model_cache_path = "/home/.cache/ttnn/models"; + std::string tmp_dir = "/tmp/ttnn"; + bool enable_model_cache = false; + bool enable_fast_runtime_mode = false; + bool throw_exception_on_fallback = false; + bool enable_logging = false; + bool enable_graph_report = false; + bool enable_detailed_buffer_report = false; + bool enable_detailed_tensor_report = false; + bool enable_comparison_mode = false; + float comparison_mode_pcc = 0.9999; + std::string root_report_path = "generated/ttnn/reports"; + std::optional report_name = std::nullopt; + + static constexpr auto attribute_names = std::make_tuple( + "cache_path", + "model_cache_path", + "tmp_dir", + "enable_model_cache", + "enable_fast_runtime_mode", + "throw_exception_on_fallback", + "enable_logging", + "enable_graph_report", + "enable_detailed_buffer_report", + "enable_detailed_tensor_report", + "enable_comparison_mode", + "comparison_mode_pcc", + "root_report_path", + "report_name"); + + const auto attribute_values() const { + return std::make_tuple( + std::cref(this->cache_path), + std::cref(this->model_cache_path), + std::cref(this->tmp_dir), + std::cref(this->enable_model_cache), + std::cref(this->enable_fast_runtime_mode), + std::cref(this->throw_exception_on_fallback), + std::cref(this->enable_logging), + std::cref(this->enable_graph_report), + std::cref(this->enable_detailed_buffer_report), + std::cref(this->enable_detailed_tensor_report), + std::cref(this->enable_comparison_mode), + std::cref(this->comparison_mode_pcc), + std::cref(this->root_report_path), + std::cref(this->report_name)); + } +}; + +inline Config CONFIG{}; + +} // namespace core + +using core::CONFIG; +using core::Config; +} // namespace ttnn diff --git a/ttnn/cpp/ttnn/core.hpp b/ttnn/cpp/ttnn/core.hpp index 1d40f720af0..4ba605f8769 100644 --- a/ttnn/cpp/ttnn/core.hpp +++ b/ttnn/cpp/ttnn/core.hpp @@ -11,6 +11,7 @@ #include "tt_eager/tensor/tensor_impl.hpp" // TTNN_TENSOR_PRINT_PROFILE #include "tt_eager/tensor/types.hpp" #include "tt_eager/tt_dnn/op_library/operation.hpp" +#include "ttnn/config.hpp" #include "ttnn/types.hpp" namespace ttnn { @@ -29,59 +30,6 @@ namespace ttnn { namespace core { -struct Config { - std::string cache_path = "/home/.cache/ttnn"; - std::string model_cache_path = "/home/.cache/ttnn/models"; - std::string tmp_dir = "/tmp/ttnn"; - bool enable_model_cache = false; - bool enable_fast_runtime_mode = false; - bool throw_exception_on_fallback = false; - bool enable_logging = false; - bool enable_graph_report = false; - bool enable_detailed_buffer_report = false; - bool enable_detailed_tensor_report = false; - bool enable_comparison_mode = false; - float comparison_mode_pcc = 0.9999; - std::string root_report_path = "generated/ttnn/reports"; - std::optional report_name = std::nullopt; - - static constexpr auto attribute_names = std::make_tuple( - "cache_path", - "model_cache_path", - "tmp_dir", - "enable_model_cache", - "enable_fast_runtime_mode", - "throw_exception_on_fallback", - "enable_logging", - "enable_graph_report", - "enable_detailed_buffer_report", - "enable_detailed_tensor_report", - "enable_comparison_mode", - "comparison_mode_pcc", - "root_report_path", - "report_name"); - - const auto attribute_values() const { - return std::make_tuple( - std::cref(this->cache_path), - std::cref(this->model_cache_path), - std::cref(this->tmp_dir), - std::cref(this->enable_model_cache), - std::cref(this->enable_fast_runtime_mode), - std::cref(this->throw_exception_on_fallback), - std::cref(this->enable_logging), - std::cref(this->enable_graph_report), - std::cref(this->enable_detailed_buffer_report), - std::cref(this->enable_detailed_tensor_report), - std::cref(this->enable_comparison_mode), - std::cref(this->comparison_mode_pcc), - std::cref(this->root_report_path), - std::cref(this->report_name)); - } -}; - -inline Config CONFIG{}; - inline std::uint32_t pad_to_multiple_of_tile_size(std::uint32_t value) { return (value + (ttnn::TILE_SIZE - 1)) / ttnn::TILE_SIZE * ttnn::TILE_SIZE; } @@ -118,8 +66,6 @@ inline void dump_stack_trace_on_segfault() { } // namespace core -using core::CONFIG; -using core::Config; using core::get_memory_config; using core::has_storage_type_of; using core::pad_to_multiple_of_tile_size;