Skip to content

Commit

Permalink
refactor(metrics): clean & restructure some metrics (#13195)
Browse files Browse the repository at this point in the history
  • Loading branch information
fuyufjh authored Nov 2, 2023
1 parent 801582b commit 1211f9d
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 103 deletions.
2 changes: 1 addition & 1 deletion docker/dashboards/risingwave-dev-dashboard.json

Large diffs are not rendered by default.

107 changes: 10 additions & 97 deletions grafana/risingwave-dev-dashboard.dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -971,36 +971,6 @@ def section_streaming_actors(outer_panels):
),
],
),
panels.timeseries_actor_latency(
"Actor Barrier Latency",
"",
[
panels.target(
f"rate({metric('stream_actor_barrier_time')}[$__rate_interval]) > 0",
"{{actor_id}}",
),
],
),
panels.timeseries_actor_latency(
"Actor Processing Time",
"",
[
panels.target(
f"rate({metric('stream_actor_processing_time')}[$__rate_interval]) > 0",
"{{actor_id}}",
),
],
),
panels.timeseries_actor_latency(
"Actor Execution Time",
"",
[
panels.target(
f"rate({metric('stream_actor_actor_execution_time')}[$__rate_interval]) > 0",
"{{actor_id}}",
),
],
),
panels.timeseries_row(
"Actor Input Row",
"",
Expand All @@ -1021,25 +991,6 @@ def section_streaming_actors(outer_panels):
),
],
),
panels.timeseries_actor_ops(
"Join Executor Cache",
"",
[
panels.target(
f"rate({metric('stream_join_lookup_miss_count')}[$__rate_interval])",
"cache miss - {{side}} side, join_table_id {{join_table_id}} degree_table_id {{degree_table_id}} actor {{actor_id}} ",
),
panels.target(
f"rate({metric('stream_join_lookup_total_count')}[$__rate_interval])",
"total lookups {{side}} side, join_table_id {{join_table_id}} degree_table_id {{degree_table_id}} actor {{actor_id}}",
),
panels.target(
f"rate({metric('stream_join_insert_cache_miss_count')}[$__rate_interval])",
"cache miss when insert {{side}} side, join_table_id {{join_table_id}} degree_table_id {{degree_table_id}} actor {{actor_id}}",
),
],
),

panels.timeseries_actor_ops(
"Temporal Join Executor Cache",
"",
Expand Down Expand Up @@ -1183,54 +1134,6 @@ def section_streaming_actors(outer_panels):
),
],
),
panels.timeseries_actor_ops(
"Aggregation Executor Cache Statistics For Each Key/State",
"Lookup miss count counts the number of aggregation key's cache miss per second."
"Lookup total count counts the number of rows processed per second."
"By diving these two metrics, one can derive the cache miss rate per second.",
[
panels.target(
f"rate({metric('stream_agg_lookup_miss_count')}[$__rate_interval])",
"stream agg cache miss - table {{table_id}} actor {{actor_id}}",
),
panels.target(
f"rate({metric('stream_agg_lookup_total_count')}[$__rate_interval])",
"stream agg total lookups - table {{table_id}} actor {{actor_id}}",
),
panels.target(
f"rate({metric('stream_agg_distinct_cache_miss_count')}[$__rate_interval])",
"distinct agg cache miss - table {{table_id}} actor {{actor_id}}",
),
panels.target(
f"rate({metric('stream_agg_distinct_total_cache_count')}[$__rate_interval])",
"distinct agg total lookups - table {{table_id}} actor {{actor_id}}",
),
panels.target(
f"rate({metric('stream_group_top_n_cache_miss_count')}[$__rate_interval])",
"group top n cache miss - table {{table_id}} actor {{actor_id}}",
),
panels.target(
f"rate({metric('stream_group_top_n_total_query_cache_count')}[$__rate_interval])",
"group top n total lookups - table {{table_id}} actor {{actor_id}}",
),
panels.target(
f"rate({metric('stream_group_top_n_appendonly_cache_miss_count')}[$__rate_interval])",
"group top n appendonly cache miss - table {{table_id}} actor {{actor_id}}",
),
panels.target(
f"rate({metric('stream_group_top_n_appendonly_total_query_cache_count')}[$__rate_interval])",
"group top n appendonly total lookups - table {{table_id}} actor {{actor_id}}",
),
panels.target(
f"rate({metric('stream_lookup_cache_miss_count')}[$__rate_interval])",
"lookup executor cache miss - table {{table_id}} actor {{actor_id}}",
),
panels.target(
f"rate({metric('stream_lookup_total_query_cache_count')}[$__rate_interval])",
"lookup executor total lookups - table {{table_id}} actor {{actor_id}}",
),
],
),
panels.timeseries_actor_ops(
"Aggregation Executor Cache Statistics For Each StreamChunk",
"",
Expand Down Expand Up @@ -1318,6 +1221,16 @@ def section_streaming_actors_tokio(outer_panels):
outer_panels.row_collapsed(
"Streaming Actors (Tokio)",
[
panels.timeseries_actor_latency(
"Actor Execution Time",
"",
[
panels.target(
f"rate({metric('stream_actor_actor_execution_time')}[$__rate_interval]) > 0",
"{{actor_id}}",
),
],
),
panels.timeseries_actor_latency_small(
"Tokio: Actor Fast Poll Time",
"",
Expand Down
2 changes: 1 addition & 1 deletion grafana/risingwave-dev-dashboard.json

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions src/stream/src/executor/monitor/streaming_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ pub struct StreamingMetrics {

// Streaming actor metrics from tokio (disabled by default)
pub actor_execution_time: GenericGaugeVec<AtomicF64>,
pub actor_output_buffer_blocking_duration_ns: LabelGuardedIntCounterVec<3>,
pub actor_input_buffer_blocking_duration_ns: LabelGuardedIntCounterVec<3>,
pub actor_scheduled_duration: GenericGaugeVec<AtomicF64>,
pub actor_scheduled_cnt: GenericGaugeVec<AtomicI64>,
pub actor_fast_poll_duration: GenericGaugeVec<AtomicF64>,
Expand Down Expand Up @@ -72,6 +70,10 @@ pub struct StreamingMetrics {
// Exchange (see also `compute::ExchangeServiceMetrics`)
pub exchange_frag_recv_size: GenericCounterVec<AtomicU64>,

// Backpressure
pub actor_output_buffer_blocking_duration_ns: LabelGuardedIntCounterVec<3>,
pub actor_input_buffer_blocking_duration_ns: LabelGuardedIntCounterVec<3>,

// Streaming Join
pub join_lookup_miss_count: LabelGuardedIntCounterVec<5>,
pub join_lookup_total_count: LabelGuardedIntCounterVec<5>,
Expand Down Expand Up @@ -903,8 +905,6 @@ impl StreamingMetrics {
level,
executor_row_count,
actor_execution_time,
actor_output_buffer_blocking_duration_ns,
actor_input_buffer_blocking_duration_ns,
actor_scheduled_duration,
actor_scheduled_cnt,
actor_fast_poll_duration,
Expand All @@ -924,6 +924,8 @@ impl StreamingMetrics {
sink_input_row_count,
mview_input_row_count,
exchange_frag_recv_size,
actor_output_buffer_blocking_duration_ns,
actor_input_buffer_blocking_duration_ns,
join_lookup_miss_count,
join_lookup_total_count,
join_insert_cache_miss_count,
Expand Down

0 comments on commit 1211f9d

Please sign in to comment.