Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
aobolensk committed Jan 28, 2025
1 parent 4369e6b commit cf39027
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 65 deletions.
64 changes: 51 additions & 13 deletions src/common/snippets/include/snippets/op/perf_count.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,54 @@ class PerfCountEnd;
namespace utils {

/**
* @interface PerfCountDumper
* @interface Dumper
* @brief Dumper for node debug properties
* @ingroup snippets
*/
class Dumper {
public:
Dumper();
~Dumper();
Dumper() = default;
Dumper(const Dumper&) = delete;
virtual ~Dumper() = default;

void update(const op::PerfCountEnd* node,
ov::threading::ThreadLocal<uint64_t> accumulation,
ov::threading::ThreadLocal<uint32_t> iteration);
void init(const std::string &params);
virtual void update(const op::PerfCountEnd* node) = 0;
protected:
std::map<std::string, std::string> m_debug_params_map;
std::string m_params;
};

/**
* @interface ConsoleDumper
* @brief Dumper for node debug properties (output: stdout)
* @ingroup snippets
*/
class ConsoleDumper : public Dumper {
public:
ConsoleDumper() = default;
ConsoleDumper(const ConsoleDumper&) = delete;
~ConsoleDumper() override;

void update(const op::PerfCountEnd* node) override;
};

/**
* @interface CSVDumper
* @brief Dumper for node debug properties (output: .csv file)
* @ingroup snippets
*/
class CSVDumper : public Dumper {
public:
CSVDumper(const std::string &csv_path);
CSVDumper(const CSVDumper&) = delete;
~CSVDumper() override;

void update(const op::PerfCountEnd* node) override;

private:
void dump_brgemm_params_to_csv();
const std::string csv_path;

static std::string brgemm_csv_path;
static std::map<std::string, std::string> m_debug_params_map;
static size_t nodes_count;
void dump_brgemm_params_to_csv();
};

} // namespace utils
Expand Down Expand Up @@ -104,21 +133,30 @@ class PerfCountBegin : public PerfCountBeginBase {
class PerfCountEnd : public PerfCountEndBase {
public:
OPENVINO_OP("PerfCountEnd", "SnippetsOpset", PerfCountEndBase);
PerfCountEnd(const Output<Node>& pc_begin);
PerfCountEnd(const Output<Node>& pc_begin,
std::vector<std::shared_ptr<utils::Dumper>> dumpers = {},
const std::string& params = "");
PerfCountEnd();
~PerfCountEnd();

void output_perf_count();
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;

void init_pc_begin();
void set_accumulated_time();

const ov::threading::ThreadLocal<uint64_t> &get_accumulation() const {
return accumulation;
}

const ov::threading::ThreadLocal<uint32_t> &get_iteration() const {
return iteration;
}

private:
ov::threading::ThreadLocal<uint64_t> accumulation;
ov::threading::ThreadLocal<uint32_t> iteration;

utils::Dumper csv_dumper;
std::vector<std::shared_ptr<utils::Dumper>> dumpers;
std::shared_ptr<PerfCountBegin> m_pc_begin = nullptr;
};

Expand Down
25 changes: 14 additions & 11 deletions src/common/snippets/src/lowered/pass/insert_perf_count_verbose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,34 @@ bool InsertPerfCountVerbose::run(snippets::lowered::LinearIR& linear_ir,
snippets::lowered::LinearIR::constExprIt begin,
snippets::lowered::LinearIR::constExprIt end) {
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::InsertPerfCountVerbose")
if (linear_ir.get_config().debug_config.dumpParams.csv_path.empty()) {
return false;
}

static size_t seq_number = 0;
bool modified = false;
auto csv_path = linear_ir.get_config().debug_config.dumpParams.csv_path;

std::vector<std::shared_ptr<snippets::utils::Dumper>> dumpers;
dumpers.push_back(std::make_shared<snippets::utils::ConsoleDumper>());
// Add CSV dumper if path is provided
if (!linear_ir.get_config().debug_config.dumpParams.csv_path.empty()) {
dumpers.push_back(std::make_shared<snippets::utils::CSVDumper>(csv_path));
}

for (auto expr_it = begin; expr_it != end; expr_it++) {
const auto& brgemm_expr = *expr_it;
const auto brgemm = ov::as_type_ptr<ov::snippets::op::Brgemm>(brgemm_expr->get_node());
if (!brgemm)
continue;
// Collect brgemm parameters
auto params = collect_params(brgemm_expr, linear_ir);

const auto& perf_count_begin = std::make_shared<snippets::op::PerfCountBegin>();
perf_count_begin->set_friendly_name(std::string("PerfCount_Begin_") + std::to_string(seq_number) +
"_DebugParams");
perf_count_begin->set_friendly_name(std::string("PerfCountVerbose_Begin_") + std::to_string(seq_number));
const auto empty_inputs = std::vector<PortConnectorPtr>{};
linear_ir.insert_node(perf_count_begin, empty_inputs, expr_it->get()->get_loop_ids(), false, expr_it);

const auto& perf_count_end = std::make_shared<snippets::op::PerfCountEnd>(perf_count_begin->output(0));
perf_count_end->set_friendly_name(std::string("PerfCount_End_") + std::to_string(seq_number) +
"_DebugParams");
// Attach brgemm parameters to PerfCountEnd node
perf_count_end->get_rt_info()["brgemm_params"] = params;
perf_count_end->get_rt_info()["brgemm_params_csv_path"] = csv_path;
const auto& perf_count_end = std::make_shared<snippets::op::PerfCountEnd>(perf_count_begin->output(0), dumpers, params);
perf_count_end->set_friendly_name(std::string("PerfCountVerbose_End_") + std::to_string(seq_number));

linear_ir.insert_node(perf_count_end, empty_inputs, expr_it->get()->get_loop_ids(), false, next(expr_it));
seq_number++;
modified = true;
Expand Down
103 changes: 62 additions & 41 deletions src/common/snippets/src/op/perf_count.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,24 @@
namespace ov {
namespace snippets {

//////////////////utils::Dumper///////////////
//////////////////utils///////////////

namespace utils {

Dumper::Dumper() {
++nodes_count;
//////////////////utils::Dumper///////////////

void Dumper::init(const std::string &params) {
m_params = params;
}

Dumper::~Dumper() {
--nodes_count;
if (nodes_count == 0) {
dump_brgemm_params_to_csv();
}
//////////////////utils::ConsoleDumper///////////////

ConsoleDumper::~ConsoleDumper() {
}

void Dumper::update(const op::PerfCountEnd* node, ov::threading::ThreadLocal<uint64_t> accumulation, ov::threading::ThreadLocal<uint32_t> iteration) {
void ConsoleDumper::update(const op::PerfCountEnd* node) {
auto accumulation = node->get_accumulation();
auto iteration = node->get_iteration();
OPENVINO_ASSERT(accumulation.size() == iteration.size(), "accumulation size should be the same as iteration size in perf_count_end node.");
auto iterator_iter = iteration.begin();
auto iterator_acc = accumulation.begin();
Expand All @@ -52,37 +54,53 @@ void Dumper::update(const op::PerfCountEnd* node, ov::threading::ThreadLocal<uin
std::cout << "max accumulated time:" << acc_max << "ns" << std::endl;
// max avg
std::cout << "max avg time:" << avg_max << "ns" << std::endl;
}

if (acc_max == 0 || avg_max == 0 || node->get_friendly_name().find("_DebugParams") == std::string::npos) {
return;
}
const auto& rt_info = node->get_rt_info();
auto brgemm_params_it = rt_info.find("brgemm_params");
if (brgemm_params_it == rt_info.end()) {
return;
}
if (brgemm_csv_path.empty()) {
auto brgemm_csv_path_it = rt_info.find("brgemm_params_csv_path");
if (brgemm_csv_path_it != rt_info.end()) {
brgemm_csv_path = brgemm_csv_path_it->second.as<std::string>();
}
}
m_debug_params_map[node->get_friendly_name()] =
brgemm_params_it->second.as<std::string>() + std::to_string(acc_max) + ',' + std::to_string(avg_max);
//////////////////utils::CSVDumper///////////////

CSVDumper::CSVDumper(const std::string& csv_path) : csv_path(csv_path) {}

CSVDumper::~CSVDumper() {
dump_brgemm_params_to_csv();
}

size_t Dumper::nodes_count = 0;
std::map<std::string, std::string> Dumper::m_debug_params_map;
std::string Dumper::brgemm_csv_path; // NOLINT
void CSVDumper::update(const op::PerfCountEnd* node) {
auto accumulation = node->get_accumulation();
auto iteration = node->get_iteration();
OPENVINO_ASSERT(accumulation.size() == iteration.size(), "accumulation size should be the same as iteration size in perf_count_end node.");
auto iterator_iter = iteration.begin();
auto iterator_acc = accumulation.begin();
uint64_t avg_max = 0;
for (; iterator_iter != iteration.end(); ++iterator_iter, ++iterator_acc) {
const auto iter = *iterator_iter;
const auto acc = *iterator_acc;
uint64_t avg = iter == 0 ? 0 : acc / iter;
if (avg > avg_max)
avg_max = avg;
}

// max time of all threads: combine for reduce max
auto BinaryFunc = [](const uint64_t& a, const uint64_t& b) {
return a >= b ? a : b;
};

// max accumulation
uint64_t acc_max = accumulation.combine(BinaryFunc);

void Dumper::dump_brgemm_params_to_csv() {
if (m_debug_params_map.empty() || brgemm_csv_path.empty()) {
m_debug_params_map[node->get_friendly_name()] = m_params + std::to_string(acc_max) + ',' + std::to_string(avg_max);
}

void CSVDumper::dump_brgemm_params_to_csv() {
if (m_debug_params_map.empty() || csv_path.empty()) {
return;
}
std::ofstream csv_file(brgemm_csv_path);
std::ofstream csv_file(csv_path, std::ios_base::app);
OPENVINO_ASSERT(csv_file.is_open(), "Failed to open csv file for brgemm debug parameters.");
csv_file << "name,subgraph_name,in_type,out_type,in_shapes,out_shapes,in_layouts,out_layouts,M,N,K,m_block,n_block,k_block,acc_max_time,"
"avg_max_time\n";
if (csv_file.tellp() == 0) {
csv_file << "name,subgraph_name,in_type,out_type,in_shapes,out_shapes,in_layouts,out_layouts,M,N,K,m_block,n_"
"block,k_block,acc_max_time,"
"avg_max_time\n";
}
for (const auto& [_, params] : m_debug_params_map) {
csv_file << params << '\n';
}
Expand Down Expand Up @@ -149,17 +167,24 @@ void PerfCountBegin::set_start_time() {

PerfCountEnd::PerfCountEnd() : PerfCountEndBase() {}


PerfCountEnd::PerfCountEnd(const Output<Node>& pc_begin)
PerfCountEnd::PerfCountEnd(const Output<Node>& pc_begin,
std::vector<std::shared_ptr<utils::Dumper>> dumpers,
const std::string& params)
: PerfCountEndBase({pc_begin}),
accumulation(0ul),
iteration(0u) {
iteration(0u),
dumpers(dumpers) {
constructor_validate_and_infer_types();
init_pc_begin();
for (const auto& dumper : dumpers) {
dumper->init(params);
}
}

PerfCountEnd::~PerfCountEnd() {
output_perf_count();
for (const auto& dumper : dumpers) {
dumper->update(this);
}
}

std::shared_ptr<Node> PerfCountEnd::clone_with_new_inputs(const OutputVector& inputs) const {
Expand All @@ -178,10 +203,6 @@ void PerfCountEnd::init_pc_begin() {
NODE_VALIDATION_CHECK(this, m_pc_begin != nullptr, "PerfCountEnd last input is not connected to PerfCountBegin");
}

void PerfCountEnd::output_perf_count() {
csv_dumper.update(this, accumulation, iteration);
}

} // namespace op
} // namespace snippets
} // namespace ov
Expand Down

0 comments on commit cf39027

Please sign in to comment.