Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor StreamingMetrics with LabelGuarded ones #16728

Closed
Tracked by #14838
fuyufjh opened this issue May 13, 2024 · 0 comments · Fixed by #16737
Closed
Tracked by #14838

Refactor StreamingMetrics with LabelGuarded ones #16728

fuyufjh opened this issue May 13, 2024 · 0 comments · Fixed by #16737
Assignees
Milestone

Comments

@fuyufjh
Copy link
Member

fuyufjh commented May 13, 2024

See background in #14838

There are still lots of metrics not LabelGuarded yet, such as actor metrics and Aggregation metrics.

#[derive(Clone)]
pub struct StreamingMetrics {
pub level: MetricLevel,
// Executor metrics (disabled by default)
pub executor_row_count: GenericCounterVec<AtomicU64>,
// Streaming actor metrics from tokio (disabled by default)
pub actor_execution_time: GenericGaugeVec<AtomicF64>,
pub actor_scheduled_duration: GenericGaugeVec<AtomicF64>,
pub actor_scheduled_cnt: GenericGaugeVec<AtomicI64>,
pub actor_fast_poll_duration: GenericGaugeVec<AtomicF64>,
pub actor_fast_poll_cnt: GenericGaugeVec<AtomicI64>,
pub actor_slow_poll_duration: GenericGaugeVec<AtomicF64>,
pub actor_slow_poll_cnt: GenericGaugeVec<AtomicI64>,
pub actor_poll_duration: GenericGaugeVec<AtomicF64>,
pub actor_poll_cnt: GenericGaugeVec<AtomicI64>,
pub actor_idle_duration: GenericGaugeVec<AtomicF64>,
pub actor_idle_cnt: GenericGaugeVec<AtomicI64>,
// Streaming actor
pub actor_memory_usage: GenericGaugeVec<AtomicI64>,
pub actor_in_record_cnt: LabelGuardedIntCounterVec<3>,
pub actor_out_record_cnt: LabelGuardedIntCounterVec<2>,
// Source
pub source_output_row_count: GenericCounterVec<AtomicU64>,
pub source_split_change_count: GenericCounterVec<AtomicU64>,
pub source_backfill_row_count: LabelGuardedIntCounterVec<4>,
// Sink & materialized view
pub sink_input_row_count: LabelGuardedIntCounterVec<3>,
pub mview_input_row_count: IntCounterVec,
pub sink_chunk_buffer_size: LabelGuardedIntGaugeVec<3>,
// Exchange (see also `compute::ExchangeServiceMetrics`)
pub exchange_frag_recv_size: GenericCounterVec<AtomicU64>,
// Backpressure
pub actor_output_buffer_blocking_duration_ns: LabelGuardedIntCounterVec<3>,
pub actor_input_buffer_blocking_duration_ns: LabelGuardedIntCounterVec<3>,
// Streaming Join
pub join_lookup_miss_count: LabelGuardedIntCounterVec<5>,
pub join_lookup_total_count: LabelGuardedIntCounterVec<5>,
pub join_insert_cache_miss_count: LabelGuardedIntCounterVec<5>,
pub join_actor_input_waiting_duration_ns: LabelGuardedIntCounterVec<2>,
pub join_match_duration_ns: LabelGuardedIntCounterVec<3>,
pub join_barrier_align_duration: RelabeledGuardedHistogramVec<3>,
pub join_cached_entry_count: LabelGuardedIntGaugeVec<3>,
pub join_matched_join_keys: RelabeledGuardedHistogramVec<3>,
// Streaming Aggregation
pub agg_lookup_miss_count: GenericCounterVec<AtomicU64>,
pub agg_total_lookup_count: GenericCounterVec<AtomicU64>,
pub agg_cached_entry_count: GenericGaugeVec<AtomicI64>,
pub agg_chunk_lookup_miss_count: GenericCounterVec<AtomicU64>,
pub agg_chunk_total_lookup_count: GenericCounterVec<AtomicU64>,
pub agg_distinct_cache_miss_count: GenericCounterVec<AtomicU64>,
pub agg_distinct_total_cache_count: GenericCounterVec<AtomicU64>,
pub agg_distinct_cached_entry_count: GenericGaugeVec<AtomicI64>,
pub agg_dirty_groups_count: GenericGaugeVec<AtomicI64>,
pub agg_dirty_groups_heap_size: GenericGaugeVec<AtomicI64>,
// Streaming TopN
pub group_top_n_cache_miss_count: GenericCounterVec<AtomicU64>,
pub group_top_n_total_query_cache_count: GenericCounterVec<AtomicU64>,
pub group_top_n_cached_entry_count: GenericGaugeVec<AtomicI64>,
pub group_top_n_appendonly_cache_miss_count: GenericCounterVec<AtomicU64>,
pub group_top_n_appendonly_total_query_cache_count: GenericCounterVec<AtomicU64>,
pub group_top_n_appendonly_cached_entry_count: GenericGaugeVec<AtomicI64>,
// Lookup executor
pub lookup_cache_miss_count: GenericCounterVec<AtomicU64>,
pub lookup_total_query_cache_count: GenericCounterVec<AtomicU64>,
pub lookup_cached_entry_count: GenericGaugeVec<AtomicI64>,
// temporal join
pub temporal_join_cache_miss_count: GenericCounterVec<AtomicU64>,
pub temporal_join_total_query_cache_count: GenericCounterVec<AtomicU64>,
pub temporal_join_cached_entry_count: GenericGaugeVec<AtomicI64>,
// Backfill
pub backfill_snapshot_read_row_count: LabelGuardedIntCounterVec<2>,
pub backfill_upstream_output_row_count: LabelGuardedIntCounterVec<2>,
// CDC Backfill
pub cdc_backfill_snapshot_read_row_count: GenericCounterVec<AtomicU64>,
pub cdc_backfill_upstream_output_row_count: GenericCounterVec<AtomicU64>,
// Over Window
pub over_window_cached_entry_count: GenericGaugeVec<AtomicI64>,
pub over_window_cache_lookup_count: GenericCounterVec<AtomicU64>,
pub over_window_cache_miss_count: GenericCounterVec<AtomicU64>,
pub over_window_range_cache_entry_count: GenericGaugeVec<AtomicI64>,
pub over_window_range_cache_lookup_count: GenericCounterVec<AtomicU64>,
pub over_window_range_cache_left_miss_count: GenericCounterVec<AtomicU64>,
pub over_window_range_cache_right_miss_count: GenericCounterVec<AtomicU64>,
/// The duration from receipt of barrier to all actors collection.
/// And the max of all node `barrier_inflight_latency` is the latency for a barrier
/// to flow through the graph.
pub barrier_inflight_latency: Histogram,
/// The duration of sync to storage.
pub barrier_sync_latency: Histogram,
/// The progress made by the earliest in-flight barriers in the local barrier manager.
pub barrier_manager_progress: IntCounter,
// Sink related metrics
pub sink_commit_duration: LabelGuardedHistogramVec<3>,
pub connector_sink_rows_received: LabelGuardedIntCounterVec<2>,
pub log_store_first_write_epoch: LabelGuardedIntGaugeVec<3>,
pub log_store_latest_write_epoch: LabelGuardedIntGaugeVec<3>,
pub log_store_write_rows: LabelGuardedIntCounterVec<3>,
pub log_store_latest_read_epoch: LabelGuardedIntGaugeVec<3>,
pub log_store_read_rows: LabelGuardedIntCounterVec<3>,
pub log_store_reader_wait_new_future_duration_ns: LabelGuardedIntCounterVec<3>,
pub kv_log_store_storage_write_count: LabelGuardedIntCounterVec<3>,
pub kv_log_store_storage_write_size: LabelGuardedIntCounterVec<3>,
pub kv_log_store_rewind_count: LabelGuardedIntCounterVec<3>,
pub kv_log_store_rewind_delay: LabelGuardedHistogramVec<3>,
pub kv_log_store_storage_read_count: LabelGuardedIntCounterVec<4>,
pub kv_log_store_storage_read_size: LabelGuardedIntCounterVec<4>,
pub kv_log_store_buffer_unconsumed_item_count: LabelGuardedIntGaugeVec<3>,
pub kv_log_store_buffer_unconsumed_row_count: LabelGuardedIntGaugeVec<3>,
pub kv_log_store_buffer_unconsumed_epoch_count: LabelGuardedIntGaugeVec<3>,
pub kv_log_store_buffer_unconsumed_min_epoch: LabelGuardedIntGaugeVec<3>,
// Sink iceberg metrics
pub iceberg_write_qps: LabelGuardedIntCounterVec<2>,
pub iceberg_write_latency: LabelGuardedHistogramVec<2>,
pub iceberg_rolling_unflushed_data_file: LabelGuardedIntGaugeVec<2>,
pub iceberg_position_delete_cache_num: LabelGuardedIntGaugeVec<2>,
pub iceberg_partition_num: LabelGuardedIntGaugeVec<2>,
// Memory management
// FIXME(yuhao): use u64 here
pub lru_current_watermark_time_ms: IntGauge,
pub lru_physical_now_ms: IntGauge,
pub lru_runtime_loop_count: IntCounter,
pub lru_watermark_step: IntGauge,
pub lru_evicted_watermark_time_ms: LabelGuardedIntGaugeVec<3>,
pub jemalloc_allocated_bytes: IntGauge,
pub jemalloc_active_bytes: IntGauge,
pub jemalloc_resident_bytes: IntGauge,
pub jemalloc_metadata_bytes: IntGauge,
pub jvm_allocated_bytes: IntGauge,
pub jvm_active_bytes: IntGauge,
// Materialize
pub materialize_cache_hit_count: GenericCounterVec<AtomicU64>,
pub materialize_cache_total_count: GenericCounterVec<AtomicU64>,
// Memory
pub stream_memory_usage: LabelGuardedIntGaugeVec<3>,
}

@github-actions github-actions bot added this to the release-1.10 milestone May 13, 2024
@fuyufjh fuyufjh changed the title StreamingMetrics Refactor StreamingMetrics with LabelGuarded ones May 13, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants