Skip to content

Commit

Permalink
Add Prometheus metrics for tx execution and 0L VDFs (#1102)
Browse files Browse the repository at this point in the history
* add prints for new transactions, mempool latency, executor steps time, and vdf fimer.

* more prints on a per tx level

* add sequence number to tx debug

* print payload

* payload too noisy

* adds more fine-grained executor metrics to prometheus

* adds more metrics for Prometheus monitoring for VDF verification and transaction execution

* adds two new Prometheus metrics to track how many VDF proofs are submitted and how many fail verification because of non-met criteria.

Co-authored-by: 0o-de-lally <[email protected]>
  • Loading branch information
intuitiveminds and 0o-de-lally authored May 10, 2022
1 parent cf9900a commit b7e8d06
Show file tree
Hide file tree
Showing 10 changed files with 138 additions and 6 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 31 additions & 2 deletions execution/executor/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ use crate::{
DIEM_EXECUTOR_EXECUTE_AND_COMMIT_CHUNK_SECONDS, DIEM_EXECUTOR_EXECUTE_BLOCK_SECONDS,
DIEM_EXECUTOR_SAVE_TRANSACTIONS_SECONDS, DIEM_EXECUTOR_TRANSACTIONS_SAVED,
DIEM_EXECUTOR_VM_EXECUTE_BLOCK_SECONDS,
DIEM_EXECUTOR_EXECUTE_AND_COMMIT_CHUNK_VERIFY_LATENCY,
DIEM_EXECUTOR_EXECUTE_AND_COMMIT_CHUNK_EXECUTION_LATENCY,
DIEM_EXECUTOR_EXECUTE_AND_COMMIT_CHUNK_SAVE_TX_LATENCY
},
speculation_cache::SpeculationCache,
types::{ProcessedVMOutput, TransactionData},
Expand Down Expand Up @@ -59,7 +62,7 @@ use std::{
collections::{hash_map, HashMap, HashSet},
convert::TryFrom,
marker::PhantomData,
sync::Arc,
sync::Arc, time::Instant,
};
use storage_interface::{state_view::VerifiedStateView, DbReaderWriter, TreeState};

Expand Down Expand Up @@ -605,22 +608,44 @@ impl<V: VMExecutor> ChunkExecutor for Executor<V> {
.num_txns_in_request(txn_list_with_proof.transactions.len()),
"sync_request_received",
);


// temp time the transaction execution.
let start_time = Instant::now();
let metrics_timer_vl = DIEM_EXECUTOR_EXECUTE_AND_COMMIT_CHUNK_VERIFY_LATENCY.start_timer();
// 2. Verify input transaction list.
let (transactions, transaction_infos) =
self.verify_chunk(txn_list_with_proof, &verified_target_li)?;

let latency = start_time.elapsed();
metrics_timer_vl.observe_duration();
dbg!("verify_chunk latency", &latency);

// 3. Execute transactions.

// temp time the transaction execution.
let start_time = Instant::now();
let metrics_timer_el = DIEM_EXECUTOR_EXECUTE_AND_COMMIT_CHUNK_EXECUTION_LATENCY.start_timer();

let first_version = self.cache.synced_trees().txn_accumulator().num_leaves();
let (output, txns_to_commit, reconfig_events) =
self.execute_chunk(first_version, transactions, transaction_infos)?;

let latency = start_time.elapsed();
metrics_timer_el.observe_duration();
dbg!("execute_chunk latency", &latency);


// temp time the transaction execution.
let start_time = Instant::now();
let metrics_timer_stxl = DIEM_EXECUTOR_EXECUTE_AND_COMMIT_CHUNK_SAVE_TX_LATENCY.start_timer();

// 4. Commit to DB.
let ledger_info_to_commit =
Self::find_chunk_li(verified_target_li, epoch_change_li, &output)?;
if ledger_info_to_commit.is_none() && txns_to_commit.is_empty() {
return Ok(reconfig_events);
}

fail_point!("executor::commit_chunk", |_| {
Err(anyhow::anyhow!("Injected error in commit_chunk"))
});
Expand All @@ -630,6 +655,10 @@ impl<V: VMExecutor> ChunkExecutor for Executor<V> {
ledger_info_to_commit.as_ref(),
)?;

let latency = start_time.elapsed();
metrics_timer_stxl.observe_duration();
dbg!("save_transactions latency", &latency);

// 5. Cache maintenance.
let output_trees = output.executed_trees().clone();
if let Some(ledger_info_with_sigs) = &ledger_info_to_commit {
Expand Down
30 changes: 30 additions & 0 deletions execution/executor/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,33 @@ pub static DIEM_EXECUTOR_TRANSACTIONS_SAVED: Lazy<Histogram> = Lazy::new(|| {
)
.unwrap()
});

pub static DIEM_EXECUTOR_EXECUTE_AND_COMMIT_CHUNK_VERIFY_LATENCY: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
// metric name
"diem_executor_execute_and_commit_chunk_verify_latency",
// metric description
"The time spent to verify a chunk in Diem executor"
)
.unwrap()
});

pub static DIEM_EXECUTOR_EXECUTE_AND_COMMIT_CHUNK_EXECUTION_LATENCY: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
// metric name
"diem_executor_execute_and_commit_chunk_execution_latency",
// metric description
"The time spent to execute a chunk in Diem executor"
)
.unwrap()
});

pub static DIEM_EXECUTOR_EXECUTE_AND_COMMIT_CHUNK_SAVE_TX_LATENCY: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
// metric name
"diem_executor_execute_and_commit_chunk_save_tx_latency",
// metric description
"The time spent to save a transaction in Diem executor"
)
.unwrap()
});
8 changes: 8 additions & 0 deletions language/diem-vm/src/counters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,11 @@ pub static TXN_GAS_USAGE: Lazy<Histogram> =
pub static CRITICAL_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!("diem_vm_critical_errors", "Number of critical errors").unwrap()
});

pub static EXECUTOR_SINGLE_TX_LATENCY: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"diem_vm_executor_single_tx_latency",
"Latency to execute a single transaction in the VM"
)
.unwrap()
});
20 changes: 19 additions & 1 deletion language/diem-vm/src/diem_transaction_executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ use move_vm_types::gas_schedule::GasStatus;
use rayon::prelude::*;
use std::{
collections::HashSet,
convert::{AsMut, AsRef},
convert::{AsMut, AsRef}, time::Instant,
};

pub struct DiemVM(DiemVMImpl);
Expand Down Expand Up @@ -768,8 +768,26 @@ impl DiemVM {
debug!(log_context, "Retry after reconfiguration");
continue;
};


// temp time the transaction execution.
let start_time = Instant::now();
let metric_single_tx_lat = EXECUTOR_SINGLE_TX_LATENCY.start_timer();

let (vm_status, output, sender) =
self.execute_single_transaction(&txn, data_cache, &log_context)?;

match &txn {
PreprocessedTransaction::UserTransaction(t) => {
dbg!(&t.sequence_number());
},
_ => {},
};
dbg!("tx sender", &sender);
let latency = start_time.elapsed();
metric_single_tx_lat.observe_duration();
dbg!("single tx latency", &latency);

if !output.status().is_discarded() {
data_cache.push_write_set(output.write_set());
} else {
Expand Down
1 change: 1 addition & 0 deletions language/move-vm/natives/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ diem-workspace-hack = { path = "../../../common/workspace-hack" }
move-core-types = { path = "../../move-core/types" }
move-vm-types = { path = "../types" }
move-binary-format = { path = "../../move-binary-format" }
diem-metrics = { path = "../../../common/metrics" }

######### 0L ########
diem-global-constants = { path = "../../../config/global-constants"}
Expand Down
20 changes: 20 additions & 0 deletions language/move-vm/natives/src/counters.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
use diem_metrics::{
register_histogram, register_int_counter, Histogram, IntCounter
};
use once_cell::sync::Lazy;

pub static MOVE_VM_NATIVE_VERIFY_VDF_LATENCY: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"diem_move_vm_native_verify_vdf_latency",
"Latency to verify a VDF challenge"
)
.unwrap()
});

pub static MOVE_VM_NATIVE_VERIFY_VDF_PROOF_COUNT: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!("diem_move_vm_native_verify_vdf_proof_count", "Cumulative number of verified proofs").unwrap()
});

pub static MOVE_VM_NATIVE_VERIFY_VDF_PROOF_ERROR_COUNT: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!("diem_move_vm_native_verify_vdf_proof_error_count", "Cumulative number of errors while verifying proofs").unwrap()
});
1 change: 1 addition & 0 deletions language/move-vm/natives/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub mod signer;
pub mod vector;

//////// 0L ////////
pub mod counters;
pub mod vdf;
pub mod ol_decimal;
pub mod ol_hash;
Expand Down
20 changes: 18 additions & 2 deletions language/move-vm/natives/src/vdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,34 @@ use move_vm_types::{
natives::function::{native_gas, NativeContext, NativeResult},
values::{Reference, Value},
};
use std::collections::VecDeque;
use std::{collections::VecDeque, time::Instant};
use std::convert::TryFrom;
use move_binary_format::errors::{PartialVMError, PartialVMResult};
use smallvec::smallvec;
use crate::counters::{MOVE_VM_NATIVE_VERIFY_VDF_LATENCY, MOVE_VM_NATIVE_VERIFY_VDF_PROOF_COUNT,
MOVE_VM_NATIVE_VERIFY_VDF_PROOF_ERROR_COUNT};

/// Rust implementation of Move's `native public fun verify(challenge: vector<u8>, difficulty: u64, alleged_solution: vector<u8>): bool`
pub fn verify(
context: &impl NativeContext,
_ty_args: Vec<Type>,
mut arguments: VecDeque<Value>,
) -> PartialVMResult<NativeResult> {
// temporary logging.
let start_time = Instant::now();
let metric_timer = MOVE_VM_NATIVE_VERIFY_VDF_LATENCY.start_timer();

if arguments.len() != 4 {
let msg = format!(
"wrong number of arguments for vdf_verify expected 4 found {}",
arguments.len()
);
MOVE_VM_NATIVE_VERIFY_VDF_PROOF_ERROR_COUNT.inc();
return Err(PartialVMError::new(StatusCode::UNREACHABLE).with_message(msg));
}

MOVE_VM_NATIVE_VERIFY_VDF_PROOF_COUNT.inc();

// pop the arguments (reverse order).
let security = pop_arg!(arguments, Reference)
.read_ref()?
Expand All @@ -49,7 +58,8 @@ pub fn verify(

// refuse to try anthing with a security parameter above 2048 for DOS risk.
if security > 2048 {
return Err(PartialVMError::new(StatusCode::UNREACHABLE).with_message("VDF security parameter above threshold".to_string()));
MOVE_VM_NATIVE_VERIFY_VDF_PROOF_ERROR_COUNT.inc();
return Err(PartialVMError::new(StatusCode::UNREACHABLE).with_message("VDF security parameter above threshold".to_string()));
}

// TODO change the `cost_index` when we have our own cost table.
Expand All @@ -59,6 +69,12 @@ pub fn verify(
let result = v.verify(&challenge, difficulty, &solution);

let return_values = smallvec![Value::bool(result.is_ok())];

// temporary logging
let latency = start_time.elapsed();
metric_timer.observe_duration();
dbg!("vdf verification latency", &latency);

Ok(NativeResult::ok(cost, return_values))
}

Expand Down
10 changes: 9 additions & 1 deletion mempool/src/shared_mempool/tasks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ pub(crate) async fn process_client_transaction_submission<V>(
timer: HistogramTimer,
) where
V: TransactionValidation,
{
{
dbg!("new transaction", &transaction);
dbg!("new transaction", &transaction.sender());
dbg!("new transaction", &transaction.sequence_number());

timer.stop_and_record();
let _timer =
counters::process_txn_submit_latency_timer(counters::CLIENT_LABEL, counters::CLIENT_LABEL);
Expand Down Expand Up @@ -432,6 +436,10 @@ pub(crate) async fn process_consensus_request(mempool: &Mutex<CoreMempool>, req:
counters::REQUEST_SUCCESS_LABEL
};
let latency = start_time.elapsed();

dbg!("mempool_service latency", &latency);


counters::mempool_service_latency(counter_label, result, latency);
}

Expand Down

0 comments on commit b7e8d06

Please sign in to comment.