From cf39027fb1d4a7fbea6f560f6da13a041657758b Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Tue, 28 Jan 2025 15:44:55 +0100 Subject: [PATCH] Address review comments --- .../include/snippets/op/perf_count.hpp | 64 ++++++++--- .../pass/insert_perf_count_verbose.cpp | 25 +++-- src/common/snippets/src/op/perf_count.cpp | 103 +++++++++++------- 3 files changed, 127 insertions(+), 65 deletions(-) diff --git a/src/common/snippets/include/snippets/op/perf_count.hpp b/src/common/snippets/include/snippets/op/perf_count.hpp index d89125c0daecaf..8ce3280826225f 100644 --- a/src/common/snippets/include/snippets/op/perf_count.hpp +++ b/src/common/snippets/include/snippets/op/perf_count.hpp @@ -19,25 +19,54 @@ class PerfCountEnd; namespace utils { /** - * @interface PerfCountDumper + * @interface Dumper * @brief Dumper for node debug properties * @ingroup snippets */ class Dumper { public: - Dumper(); - ~Dumper(); + Dumper() = default; + Dumper(const Dumper&) = delete; + virtual ~Dumper() = default; - void update(const op::PerfCountEnd* node, - ov::threading::ThreadLocal accumulation, - ov::threading::ThreadLocal iteration); + void init(const std::string ¶ms); + virtual void update(const op::PerfCountEnd* node) = 0; +protected: + std::map m_debug_params_map; + std::string m_params; +}; + +/** + * @interface ConsoleDumper + * @brief Dumper for node debug properties (output: stdout) + * @ingroup snippets + */ +class ConsoleDumper : public Dumper { +public: + ConsoleDumper() = default; + ConsoleDumper(const ConsoleDumper&) = delete; + ~ConsoleDumper() override; + + void update(const op::PerfCountEnd* node) override; +}; + +/** + * @interface CSVDumper + * @brief Dumper for node debug properties (output: .csv file) + * @ingroup snippets + */ +class CSVDumper : public Dumper { +public: + CSVDumper(const std::string &csv_path); + CSVDumper(const CSVDumper&) = delete; + ~CSVDumper() override; + + void update(const op::PerfCountEnd* node) override; private: - void dump_brgemm_params_to_csv(); + const std::string csv_path; - static std::string brgemm_csv_path; - static std::map m_debug_params_map; - static size_t nodes_count; + void dump_brgemm_params_to_csv(); }; } // namespace utils @@ -104,21 +133,30 @@ class PerfCountBegin : public PerfCountBeginBase { class PerfCountEnd : public PerfCountEndBase { public: OPENVINO_OP("PerfCountEnd", "SnippetsOpset", PerfCountEndBase); - PerfCountEnd(const Output& pc_begin); + PerfCountEnd(const Output& pc_begin, + std::vector> dumpers = {}, + const std::string& params = ""); PerfCountEnd(); ~PerfCountEnd(); - void output_perf_count(); std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override; void init_pc_begin(); void set_accumulated_time(); + const ov::threading::ThreadLocal &get_accumulation() const { + return accumulation; + } + + const ov::threading::ThreadLocal &get_iteration() const { + return iteration; + } + private: ov::threading::ThreadLocal accumulation; ov::threading::ThreadLocal iteration; - utils::Dumper csv_dumper; + std::vector> dumpers; std::shared_ptr m_pc_begin = nullptr; }; diff --git a/src/common/snippets/src/lowered/pass/insert_perf_count_verbose.cpp b/src/common/snippets/src/lowered/pass/insert_perf_count_verbose.cpp index f76bdc6d583d85..969dd6242f4cc1 100644 --- a/src/common/snippets/src/lowered/pass/insert_perf_count_verbose.cpp +++ b/src/common/snippets/src/lowered/pass/insert_perf_count_verbose.cpp @@ -23,12 +23,18 @@ bool InsertPerfCountVerbose::run(snippets::lowered::LinearIR& linear_ir, snippets::lowered::LinearIR::constExprIt begin, snippets::lowered::LinearIR::constExprIt end) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::InsertPerfCountVerbose") - if (linear_ir.get_config().debug_config.dumpParams.csv_path.empty()) { - return false; - } + static size_t seq_number = 0; bool modified = false; auto csv_path = linear_ir.get_config().debug_config.dumpParams.csv_path; + + std::vector> dumpers; + dumpers.push_back(std::make_shared()); + // Add CSV dumper if path is provided + if (!linear_ir.get_config().debug_config.dumpParams.csv_path.empty()) { + dumpers.push_back(std::make_shared(csv_path)); + } + for (auto expr_it = begin; expr_it != end; expr_it++) { const auto& brgemm_expr = *expr_it; const auto brgemm = ov::as_type_ptr(brgemm_expr->get_node()); @@ -36,18 +42,15 @@ bool InsertPerfCountVerbose::run(snippets::lowered::LinearIR& linear_ir, continue; // Collect brgemm parameters auto params = collect_params(brgemm_expr, linear_ir); + const auto& perf_count_begin = std::make_shared(); - perf_count_begin->set_friendly_name(std::string("PerfCount_Begin_") + std::to_string(seq_number) + - "_DebugParams"); + perf_count_begin->set_friendly_name(std::string("PerfCountVerbose_Begin_") + std::to_string(seq_number)); const auto empty_inputs = std::vector{}; linear_ir.insert_node(perf_count_begin, empty_inputs, expr_it->get()->get_loop_ids(), false, expr_it); - const auto& perf_count_end = std::make_shared(perf_count_begin->output(0)); - perf_count_end->set_friendly_name(std::string("PerfCount_End_") + std::to_string(seq_number) + - "_DebugParams"); - // Attach brgemm parameters to PerfCountEnd node - perf_count_end->get_rt_info()["brgemm_params"] = params; - perf_count_end->get_rt_info()["brgemm_params_csv_path"] = csv_path; + const auto& perf_count_end = std::make_shared(perf_count_begin->output(0), dumpers, params); + perf_count_end->set_friendly_name(std::string("PerfCountVerbose_End_") + std::to_string(seq_number)); + linear_ir.insert_node(perf_count_end, empty_inputs, expr_it->get()->get_loop_ids(), false, next(expr_it)); seq_number++; modified = true; diff --git a/src/common/snippets/src/op/perf_count.cpp b/src/common/snippets/src/op/perf_count.cpp index d70e215efa7e08..e78d7cadf279ec 100644 --- a/src/common/snippets/src/op/perf_count.cpp +++ b/src/common/snippets/src/op/perf_count.cpp @@ -10,22 +10,24 @@ namespace ov { namespace snippets { -//////////////////utils::Dumper/////////////// +//////////////////utils/////////////// namespace utils { -Dumper::Dumper() { - ++nodes_count; +//////////////////utils::Dumper/////////////// + +void Dumper::init(const std::string ¶ms) { + m_params = params; } -Dumper::~Dumper() { - --nodes_count; - if (nodes_count == 0) { - dump_brgemm_params_to_csv(); - } +//////////////////utils::ConsoleDumper/////////////// + +ConsoleDumper::~ConsoleDumper() { } -void Dumper::update(const op::PerfCountEnd* node, ov::threading::ThreadLocal accumulation, ov::threading::ThreadLocal iteration) { +void ConsoleDumper::update(const op::PerfCountEnd* node) { + auto accumulation = node->get_accumulation(); + auto iteration = node->get_iteration(); OPENVINO_ASSERT(accumulation.size() == iteration.size(), "accumulation size should be the same as iteration size in perf_count_end node."); auto iterator_iter = iteration.begin(); auto iterator_acc = accumulation.begin(); @@ -52,37 +54,53 @@ void Dumper::update(const op::PerfCountEnd* node, ov::threading::ThreadLocalget_friendly_name().find("_DebugParams") == std::string::npos) { - return; - } - const auto& rt_info = node->get_rt_info(); - auto brgemm_params_it = rt_info.find("brgemm_params"); - if (brgemm_params_it == rt_info.end()) { - return; - } - if (brgemm_csv_path.empty()) { - auto brgemm_csv_path_it = rt_info.find("brgemm_params_csv_path"); - if (brgemm_csv_path_it != rt_info.end()) { - brgemm_csv_path = brgemm_csv_path_it->second.as(); - } - } - m_debug_params_map[node->get_friendly_name()] = - brgemm_params_it->second.as() + std::to_string(acc_max) + ',' + std::to_string(avg_max); +//////////////////utils::CSVDumper/////////////// + +CSVDumper::CSVDumper(const std::string& csv_path) : csv_path(csv_path) {} + +CSVDumper::~CSVDumper() { + dump_brgemm_params_to_csv(); } -size_t Dumper::nodes_count = 0; -std::map Dumper::m_debug_params_map; -std::string Dumper::brgemm_csv_path; // NOLINT +void CSVDumper::update(const op::PerfCountEnd* node) { + auto accumulation = node->get_accumulation(); + auto iteration = node->get_iteration(); + OPENVINO_ASSERT(accumulation.size() == iteration.size(), "accumulation size should be the same as iteration size in perf_count_end node."); + auto iterator_iter = iteration.begin(); + auto iterator_acc = accumulation.begin(); + uint64_t avg_max = 0; + for (; iterator_iter != iteration.end(); ++iterator_iter, ++iterator_acc) { + const auto iter = *iterator_iter; + const auto acc = *iterator_acc; + uint64_t avg = iter == 0 ? 0 : acc / iter; + if (avg > avg_max) + avg_max = avg; + } + + // max time of all threads: combine for reduce max + auto BinaryFunc = [](const uint64_t& a, const uint64_t& b) { + return a >= b ? a : b; + }; + + // max accumulation + uint64_t acc_max = accumulation.combine(BinaryFunc); -void Dumper::dump_brgemm_params_to_csv() { - if (m_debug_params_map.empty() || brgemm_csv_path.empty()) { + m_debug_params_map[node->get_friendly_name()] = m_params + std::to_string(acc_max) + ',' + std::to_string(avg_max); +} + +void CSVDumper::dump_brgemm_params_to_csv() { + if (m_debug_params_map.empty() || csv_path.empty()) { return; } - std::ofstream csv_file(brgemm_csv_path); + std::ofstream csv_file(csv_path, std::ios_base::app); OPENVINO_ASSERT(csv_file.is_open(), "Failed to open csv file for brgemm debug parameters."); - csv_file << "name,subgraph_name,in_type,out_type,in_shapes,out_shapes,in_layouts,out_layouts,M,N,K,m_block,n_block,k_block,acc_max_time," - "avg_max_time\n"; + if (csv_file.tellp() == 0) { + csv_file << "name,subgraph_name,in_type,out_type,in_shapes,out_shapes,in_layouts,out_layouts,M,N,K,m_block,n_" + "block,k_block,acc_max_time," + "avg_max_time\n"; + } for (const auto& [_, params] : m_debug_params_map) { csv_file << params << '\n'; } @@ -149,17 +167,24 @@ void PerfCountBegin::set_start_time() { PerfCountEnd::PerfCountEnd() : PerfCountEndBase() {} - -PerfCountEnd::PerfCountEnd(const Output& pc_begin) +PerfCountEnd::PerfCountEnd(const Output& pc_begin, + std::vector> dumpers, + const std::string& params) : PerfCountEndBase({pc_begin}), accumulation(0ul), - iteration(0u) { + iteration(0u), + dumpers(dumpers) { constructor_validate_and_infer_types(); init_pc_begin(); + for (const auto& dumper : dumpers) { + dumper->init(params); + } } PerfCountEnd::~PerfCountEnd() { - output_perf_count(); + for (const auto& dumper : dumpers) { + dumper->update(this); + } } std::shared_ptr PerfCountEnd::clone_with_new_inputs(const OutputVector& inputs) const { @@ -178,10 +203,6 @@ void PerfCountEnd::init_pc_begin() { NODE_VALIDATION_CHECK(this, m_pc_begin != nullptr, "PerfCountEnd last input is not connected to PerfCountBegin"); } -void PerfCountEnd::output_perf_count() { - csv_dumper.update(this, accumulation, iteration); -} - } // namespace op } // namespace snippets } // namespace ov