Skip to content

Commit

Permalink
add metrics and logs
Browse files Browse the repository at this point in the history
  • Loading branch information
bchocho committed Nov 14, 2024
1 parent ec78e1d commit e05f5dc
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 9 deletions.
13 changes: 10 additions & 3 deletions aptos-move/aptos-vm/src/block_executor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ pub(crate) mod vm_wrapper;

use crate::{
block_executor::vm_wrapper::AptosExecutorTask,
counters::{BLOCK_EXECUTOR_CONCURRENCY, BLOCK_EXECUTOR_EXECUTE_BLOCK_SECONDS},
counters::{BLOCK_EXECUTOR_CONCURRENCY, BLOCK_EXECUTOR_EXECUTE_BLOCK_SECONDS, OTHER_TIMERS},
};
use aptos_aggregator::{
delayed_change::DelayedChange, delta_change_set::DeltaOp, resolver::TAggregatorV1View,
Expand All @@ -17,6 +17,7 @@ use aptos_block_executor::{
txn_commit_hook::TransactionCommitHook, types::InputOutputKey,
};
use aptos_infallible::Mutex;
use aptos_metrics_core::TimerHelper;
use aptos_types::{
block_executor::config::BlockExecutorConfig,
contract_event::ContractEvent,
Expand Down Expand Up @@ -422,7 +423,10 @@ impl BlockAptosVM {
let environment =
Arc::new(Environment::new(state_view).try_enable_delayed_field_optimization());
let ret = executor.execute_block(environment, signature_verified_block, state_view);
match ret {
aptos_logger::info!("OTHER execute_block end.");

let make_ret_timer = OTHER_TIMERS.timer_with(&["make_ret"]);
let ret = match ret {
Ok(block_output) => {
let (transaction_outputs, block_end_info) = block_output.into_inner();
let output_vec: Vec<_> = transaction_outputs
Expand All @@ -449,7 +453,10 @@ impl BlockAptosVM {
message: Some(err_msg),
}),
Err(BlockExecutionError::FatalVMError(err)) => Err(err),
}
};
make_ret_timer.observe_duration();
aptos_logger::info!("OTHER make_ret end.");
ret
}

/// Uses shared thread pool to execute blocks.
Expand Down
12 changes: 12 additions & 0 deletions aptos-move/aptos-vm/src/counters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@ const BLOCK_EXECUTION_TIME_BUCKETS: [f64; 16] = [
0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.0, 1.25, 1.5, 1.75, 2.0, 3.0, 4.0, 5.0,
];

pub static OTHER_TIMERS: Lazy<HistogramVec> = Lazy::new(|| {
register_histogram_vec!(
// metric name
"aptos_vm_other_timers_seconds",
// metric description
"The time spent in seconds of others in Aptos VM",
&["name"],
exponential_buckets(/*start=*/ 1e-3, /*factor=*/ 2.0, /*count=*/ 20).unwrap(),
)
.unwrap()
});

pub static BLOCK_EXECUTOR_EXECUTE_BLOCK_SECONDS: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
// metric name
Expand Down
20 changes: 16 additions & 4 deletions aptos-move/block-executor/src/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -828,10 +828,11 @@ where
final_results,
)?;
}
info!("OTHER drain_commit_queue done.");
Ok(())
};

loop {
let ret = loop {
if let SchedulerTask::ValidationTask(txn_idx, incarnation, _) = &scheduler_task {
if *incarnation as usize > num_workers.pow(2) + num_txns + 10 {
// Something is wrong if we observe high incarnations (e.g. a bug
Expand Down Expand Up @@ -912,11 +913,14 @@ where
},
SchedulerTask::Retry => scheduler.next_task(),
SchedulerTask::Done => {
info!("OTHER Done done.");
drain_commit_queue()?;
break Ok(());
},
}
}
};
info!("OTHER loop done.");
ret
}

pub(crate) fn execute_transactions_parallel(
Expand Down Expand Up @@ -1012,6 +1016,7 @@ where

// Explicit async drops.
DEFAULT_DROPPER.schedule_drop((last_input_output, scheduler, versioned_cache));
info!("OTHER schedule_drop end.");

let block_end_info = if self
.config
Expand All @@ -1024,9 +1029,12 @@ where
None
};

(!shared_maybe_error.load(Ordering::SeqCst))
let timer = std::time::Instant::now();
let ret = (!shared_maybe_error.load(Ordering::SeqCst))
.then(|| BlockOutput::new(final_results.into_inner(), block_end_info))
.ok_or(())
.ok_or(());
info!("OTHER block_output end. {} ms", timer.elapsed().as_millis());
ret
}

fn apply_output_sequential(
Expand Down Expand Up @@ -1464,6 +1472,7 @@ where

info!("parallel execution requiring fallback");
}
info!("OTHER execute_transactions_parallel end.");

// If we didn't run parallel, or it didn't finish successfully - run sequential
let sequential_result = self.execute_transactions_sequential(
Expand All @@ -1472,6 +1481,7 @@ where
base_view,
false,
);
info!("OTHER execute_transactions_sequential end.");

// If sequential gave us result, return it
let sequential_error = match sequential_result {
Expand Down Expand Up @@ -1511,6 +1521,7 @@ where
},
Err(SequentialBlockExecutionError::ErrorToReturn(err)) => err,
};
info!("OTHER sequential_result end.");

if self.config.local.discard_failed_blocks {
// We cannot execute block, discard everything (including block metadata and validator transactions)
Expand All @@ -1529,6 +1540,7 @@ where
.collect();
return Ok(BlockOutput::new(ret, self.empty_block_end_info()));
}
info!("OTHER discard_failed_blocks end.");

Err(sequential_error)
}
Expand Down
1 change: 1 addition & 0 deletions execution/executor/src/block_executor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ where
Some(block_id),
)?
};
info!("OTHER execute_transaction_block end. {}", block_id);

let _timer = OTHER_TIMERS.timer_with(&["state_checkpoint"]);

Expand Down
11 changes: 9 additions & 2 deletions execution/executor/src/workflow/do_get_execution_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,12 @@ impl DoGetExecutionOutput {
) -> Result<ExecutionOutput> {
let block_output =
Self::execute_block::<V>(txn_provider.clone(), &state_view, onchain_config)?;
let block_output_into_inner_timer = OTHER_TIMERS.timer_with(&["block_output_into_inner"]);
let (transaction_outputs, block_end_info) = block_output.into_inner();
block_output_into_inner_timer.observe_duration();

Parser::parse(
let parse_result_timer = OTHER_TIMERS.timer_with(&["parse_result"]);
let result = Parser::parse(
state_view.next_version(),
txn_provider
.to_vec()
Expand All @@ -113,7 +116,11 @@ impl DoGetExecutionOutput {
state_view.into_state_cache(),
block_end_info,
append_state_checkpoint_to_block,
)
);
parse_result_timer.observe_duration();
info!("OTHER parse_result end.");

result
}

pub fn by_transaction_execution_sharded<V: VMExecutor>(
Expand Down

0 comments on commit e05f5dc

Please sign in to comment.