Skip to content

Commit

Permalink
fix PetfMetrics
Browse files Browse the repository at this point in the history
  • Loading branch information
pavel-esir committed Sep 4, 2024
1 parent 4cfba8a commit df5cd75
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 21 deletions.
7 changes: 2 additions & 5 deletions src/cpp/include/openvino/genai/perf_metrics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,6 @@ struct OPENVINO_GENAI_EXPORTS RawPerfMetrics {
std::vector<TimePoint> m_new_token_times;
std::vector<size_t> m_batch_sizes;
std::vector<MicroSeconds> m_durations;

size_t num_generated_tokens;
size_t num_input_tokens;
};

/**
Expand Down Expand Up @@ -111,8 +108,8 @@ struct OPENVINO_GENAI_EXPORTS PerfMetrics {
size_t num_input_tokens;

float get_load_time(); // Load time in ms.
float get_num_generated_tokens();
float get_num_input_tokens();
size_t get_num_generated_tokens();
size_t get_num_input_tokens();
MeanStdPair get_ttft(); // Time to the first token (in ms) (TTTFT).
MeanStdPair get_tpot(); // Time (in ms) per output token (TPOT).
MeanStdPair get_throughput(); // Tokens per second.
Expand Down
4 changes: 3 additions & 1 deletion src/cpp/src/llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase {
raw_counters.generate_durations.emplace_back(PerfMetrics::get_microsec(stop_time - start_time));
raw_counters.tokenization_durations.emplace_back(PerfMetrics::get_microsec(encode_stop_time - start_time));
raw_counters.detokenization_durations.emplace_back(PerfMetrics::get_microsec(decode_stop_time - decode_start_time));


// Added tokenization/detokenization times, and updated generate duration, need to reevaluate statistics.
decoded_results.perf_metrics.m_evaluated = false;
decoded_results.perf_metrics.evaluate_statistics(start_time);
return decoded_results;
}
Expand Down
7 changes: 5 additions & 2 deletions src/cpp/src/perf_metrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
namespace {

ov::genai::MeanStdPair calc_mean_and_std(const std::vector<ov::genai::MicroSeconds>& durations) {
if (durations.size() == 0) {
return {-1, -1};
}
// Accepts time durations in microseconds and returns standard deviation and mean in milliseconds.
float mean = std::accumulate(durations.begin(), durations.end(), 0.0f,
[](const float& acc, const ov::genai::MicroSeconds& duration) -> float {
Expand All @@ -36,12 +39,12 @@ float PerfMetrics::get_load_time() {
return load_time;
}

float PerfMetrics::get_num_generated_tokens() {
size_t PerfMetrics::get_num_generated_tokens() {
evaluate_statistics();
return num_generated_tokens;
}

float PerfMetrics::get_num_input_tokens() {
size_t PerfMetrics::get_num_input_tokens() {
evaluate_statistics();
return num_generated_tokens;
}
Expand Down
26 changes: 13 additions & 13 deletions src/python/py_generate_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,22 +247,22 @@ auto perf_metrics_docstring = R"(
:param get_num_input_tokens: Returns the number of tokens in the input prompt.
:type get_num_input_tokens: int
:param get_ttft: Returns the mean and standard deviation of TTFT.
:param get_ttft: Returns the mean and standard deviation of TTFT in milliseconds.
:type get_ttft: MeanStdPair
:param get_tpot: Returns the mean and standard deviation of TPOT.
:param get_tpot: Returns the mean and standard deviation of TPOT in milliseconds.
:type get_tpot: MeanStdPair
:param get_throughput: Returns the mean and standard deviation of throughput.
:param get_throughput: Returns the mean and standard deviation of throughput in tokens per second.
:type get_throughput: MeanStdPair
:param get_generate_duration: Returns the mean and standard deviation of generate duration.
:param get_generate_duration: Returns the mean and standard deviation of generate durations in milliseconds.
:type get_generate_duration: MeanStdPair
:param get_tokenization_duration: Returns the mean and standard deviation of tokenization duration.
:param get_tokenization_duration: Returns the mean and standard deviation of tokenization durations in milliseconds.
:type get_tokenization_duration: MeanStdPair
:param get_detokenization_duration: Returns the mean and standard deviation of detokenization duration.
:param get_detokenization_duration: Returns the mean and standard deviation of detokenization durations in milliseconds.
:type get_detokenization_duration: MeanStdPair
:param raw_metrics: A structure of RawPerfMetrics type that holds raw metrics.
Expand Down Expand Up @@ -776,9 +776,7 @@ PYBIND11_MODULE(py_generate_pipeline, m) {
.def_property_readonly("m_durations", [](const RawPerfMetrics &rw) {
return get_ms(rw, &RawPerfMetrics::m_durations);
})
.def_readonly("m_batch_sizes", &RawPerfMetrics::m_batch_sizes)
.def_readonly("num_generated_tokens", &RawPerfMetrics::num_generated_tokens)
.def_readonly("num_input_tokens", &RawPerfMetrics::num_input_tokens);
.def_readonly("m_batch_sizes", &RawPerfMetrics::m_batch_sizes);

py::class_<MeanStdPair>(m, "MeanStdPair")
.def(py::init<>())
Expand All @@ -787,13 +785,15 @@ PYBIND11_MODULE(py_generate_pipeline, m) {

py::class_<PerfMetrics>(m, "PerfMetrics", perf_metrics_docstring)
.def(py::init<>())
.def("get_load_time", &PerfMetrics::get_load_time)
.def("get_num_generated_tokens", &PerfMetrics::get_num_generated_tokens)
.def("get_num_input_tokens", &PerfMetrics::get_num_input_tokens)
.def("get_ttft", &PerfMetrics::get_ttft)
.def("get_tpot", &PerfMetrics::get_tpot)
.def("get_throughput", &PerfMetrics::get_throughput)
.def("get_generate_duration", &PerfMetrics::get_generate_duration)
.def("get_tokenization_duration", &PerfMetrics::get_tokenization_duration)
.def("get_detokenization_duration", &PerfMetrics::get_detokenization_duration)
.def("get_throughput", &PerfMetrics::get_throughput)
.def("get_tpot", &PerfMetrics::get_tpot)
.def("get_ttft", &PerfMetrics::get_ttft)
.def("get_load_time", &PerfMetrics::get_load_time)
.def("__add__", &PerfMetrics::operator+)
.def("__iadd__", &PerfMetrics::operator+=)
.def_readonly("raw_metrics", &PerfMetrics::raw_metrics);
Expand Down

0 comments on commit df5cd75

Please sign in to comment.