From bb0e758adccb3ae949a8e04b950f68ac7d03cdfd Mon Sep 17 00:00:00 2001 From: Kyle Wong <37189875+kyle-a-wong@users.noreply.github.com> Date: Fri, 6 Dec 2024 13:07:14 -0500 Subject: [PATCH] ui,metric,server: fix custom chart to include histogram metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #135705 made changes to the `admin/metricmetadata` api to include tsdb recorded names and custom charts to be powered by this api instead of `_status/nodes_ui`. This introduced a bug where computed histogram metrics stopped appearing in custom charts. This is because these metrics are computed at record time and are not metrics that exist in the metrics registries. Now, `admin/metricmetadata` recorded names should include all computed histogram metrics. Benchmarks: ``` building benchmark binaries for 87e0d85: ui,metric,server: fix custom chart to include hist [bazel=true] 1/1 - pkg=1/1 iter=10/10 cockroachdb/cockroach/pkg/server/status | name old time/op new time/op delta ExtractValueAllocs-12 8.19µs ± 2% 8.09µs ± 0% -1.17% (p=0.000 n=10+8) name old alloc/op new alloc/op delta ExtractValueAllocs-12 17.3kB ± 0% 17.3kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta ExtractValueAllocs-12 566 ± 0% 566 ± 0% ~ (all equal) ``` Fixes: #136939 Epic: None Release note: None --- pkg/server/application_api/BUILD.bazel | 2 + pkg/server/application_api/metrics_test.go | 71 ++++++++++- pkg/server/serverpb/admin.go | 6 +- pkg/server/status/recorder.go | 30 +++-- pkg/server/status/recorder_test.go | 10 +- .../reports/containers/customChart/index.tsx | 2 +- pkg/util/metric/metric.go | 113 +++++++++++++++--- 7 files changed, 193 insertions(+), 41 deletions(-) diff --git a/pkg/server/application_api/BUILD.bazel b/pkg/server/application_api/BUILD.bazel index 420856a06faa..f38ce22758d7 100644 --- a/pkg/server/application_api/BUILD.bazel +++ b/pkg/server/application_api/BUILD.bazel @@ -85,6 +85,7 @@ go_test( "//pkg/util/httputil", "//pkg/util/leaktest", "//pkg/util/log", + "//pkg/util/metric", "//pkg/util/protoutil", "//pkg/util/randident", "//pkg/util/randutil", @@ -95,6 +96,7 @@ go_test( "@com_github_cockroachdb_errors//:errors", "@com_github_gogo_protobuf//proto", "@com_github_kr_pretty//:pretty", + "@com_github_prometheus_client_model//go", "@com_github_stretchr_testify//assert", "@com_github_stretchr_testify//require", "@org_golang_x_sync//errgroup", diff --git a/pkg/server/application_api/metrics_test.go b/pkg/server/application_api/metrics_test.go index 798612075563..8c9044818db6 100644 --- a/pkg/server/application_api/metrics_test.go +++ b/pkg/server/application_api/metrics_test.go @@ -20,6 +20,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/httputil" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/metric" + prometheusgo "github.com/prometheus/client_model/go" "github.com/stretchr/testify/require" ) @@ -64,12 +66,79 @@ func TestGetRecordedMetricNames(t *testing.T) { metricsMetadata, _, _ := s.MetricsRecorder().GetMetricsMetadata(true /* combine */) recordedNames := s.MetricsRecorder().GetRecordedMetricNames(metricsMetadata) - require.Equal(t, len(metricsMetadata), len(recordedNames)) for _, v := range recordedNames { require.True(t, strings.HasPrefix(v, "cr.node") || strings.HasPrefix(v, "cr.store")) } } +func TestGetRecordedMetricNames_histogram(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + s := serverutils.StartServerOnly(t, base.TestServerArgs{}) + defer s.Stopper().Stop(context.Background()) + metricName := "my.metric" + metricsMetadata := map[string]metric.Metadata{ + metricName: { + Name: metricName, + Help: "help text", + Measurement: "measurement", + Unit: metric.Unit_COUNT, + MetricType: prometheusgo.MetricType_HISTOGRAM, + }, + } + + recordedNames := s.MetricsRecorder().GetRecordedMetricNames(metricsMetadata) + require.Equal(t, len(metric.HistogramMetricComputers), len(recordedNames)) + for _, histogramMetric := range metric.HistogramMetricComputers { + _, ok := recordedNames[metricName+histogramMetric.Suffix] + require.True(t, ok) + } +} + +func TestHistogramMetricComputers(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + metricName := "my.metric" + h := metric.NewHistogram(metric.HistogramOptions{ + Metadata: metric.Metadata{Name: metricName}, + Buckets: []float64{10, 20, 30, 40, 50, 60, 70, 80, 90, 100}, + Mode: metric.HistogramModePrometheus, + }) + + sum := int64(0) + count := 0 + + for i := 1; i <= 10; i++ { + recordedVal := int64(i) * 10 + sum += recordedVal + count++ + h.RecordValue(recordedVal) + } + + avg := float64(sum) / float64(count) + snapshot := h.WindowedSnapshot() + results := make(map[string]float64, len(metric.HistogramMetricComputers)) + for _, c := range metric.HistogramMetricComputers { + results[metricName+c.Suffix] = c.ComputedMetric(snapshot) + } + + expected := map[string]float64{ + metricName + "-sum": float64(sum), + metricName + "-avg": avg, + metricName + "-count": float64(count), + metricName + "-max": 100, + metricName + "-p99.999": 100, + metricName + "-p99.99": 100, + metricName + "-p99.9": 100, + metricName + "-p99": 100, + metricName + "-p90": 90, + metricName + "-p75": 80, + metricName + "-p50": 50, + } + require.Equal(t, expected, results) +} + // TestStatusVars verifies that prometheus metrics are available via the // /_status/vars and /_status/load endpoints. func TestStatusVars(t *testing.T) { diff --git a/pkg/server/serverpb/admin.go b/pkg/server/serverpb/admin.go index fa58ad2c346e..d2d506fa9751 100644 --- a/pkg/server/serverpb/admin.go +++ b/pkg/server/serverpb/admin.go @@ -86,12 +86,10 @@ func GetInternalTimeseriesNamesFromServer( var sl []string for name, meta := range resp.Metadata { if meta.MetricType == io_prometheus_client.MetricType_HISTOGRAM { - // See usage of RecordHistogramQuantiles in pkg/server/status/recorder.go. - for _, q := range metric.RecordHistogramQuantiles { + // See usage of HistogramMetricComputers in pkg/server/status/recorder.go. + for _, q := range metric.HistogramMetricComputers { sl = append(sl, name+q.Suffix) } - sl = append(sl, name+"-avg") - sl = append(sl, name+"-count") } else { sl = append(sl, name) } diff --git a/pkg/server/status/recorder.go b/pkg/server/status/recorder.go index 2baa5c815142..4e0e4fb11cb1 100644 --- a/pkg/server/status/recorder.go +++ b/pkg/server/status/recorder.go @@ -498,13 +498,20 @@ func (mr *MetricsRecorder) GetRecordedMetricNames( storeMetricsMap := make(map[string]metric.Metadata) tsDbMetricNames := make(map[string]string, len(allMetadata)) mr.writeStoreMetricsMetadata(storeMetricsMap) - for k := range allMetadata { + for metricName, metadata := range allMetadata { prefix := nodeTimeSeriesPrefix - if _, ok := storeMetricsMap[k]; ok { + if _, ok := storeMetricsMap[metricName]; ok { prefix = storeTimeSeriesPrefix } + if metadata.MetricType == prometheusgo.MetricType_HISTOGRAM { + for _, metricComputer := range metric.HistogramMetricComputers { + computedMetricName := metricName + metricComputer.Suffix + tsDbMetricNames[computedMetricName] = fmt.Sprintf(prefix, computedMetricName) + } + } else { + tsDbMetricNames[metricName] = fmt.Sprintf(prefix, metricName) + } - tsDbMetricNames[k] = fmt.Sprintf(prefix, k) } return tsDbMetricNames } @@ -711,18 +718,15 @@ func extractValue(name string, mtr interface{}, fn func(string, float64)) error return errors.Newf(`extractValue called on histogram metric %q that does not implement the CumulativeHistogram interface. All histogram metrics are expected to implement this interface`, name) } - count, sum := cumulative.CumulativeSnapshot().Total() - fn(name+"-count", float64(count)) - fn(name+"-sum", sum) + cumulativeSnapshot := cumulative.CumulativeSnapshot() // Use windowed stats for avg and quantiles windowedSnapshot := mtr.WindowedSnapshot() - avg := windowedSnapshot.Mean() - if math.IsNaN(avg) || math.IsInf(avg, +1) || math.IsInf(avg, -1) { - avg = 0 - } - fn(name+"-avg", avg) - for _, pt := range metric.RecordHistogramQuantiles { - fn(name+pt.Suffix, windowedSnapshot.ValueAtQuantile(pt.Quantile)) + for _, c := range metric.HistogramMetricComputers { + if c.IsSummaryMetric { + fn(name+c.Suffix, c.ComputedMetric(windowedSnapshot)) + } else { + fn(name+c.Suffix, c.ComputedMetric(cumulativeSnapshot)) + } } case metric.PrometheusExportable: // NB: this branch is intentionally at the bottom since all metrics implement it. diff --git a/pkg/server/status/recorder_test.go b/pkg/server/status/recorder_test.go index 27d710026cc7..be520f81206e 100644 --- a/pkg/server/status/recorder_test.go +++ b/pkg/server/status/recorder_test.go @@ -515,7 +515,7 @@ func TestMetricsRecorder(t *testing.T) { {"testGauge", "gauge", 20}, {"testGaugeFloat64", "floatgauge", 20}, {"testCounter", "counter", 5}, - {"testHistogram", "histogram", 9}, + {"testHistogram", "histogram", 10}, {"testAggGauge", "agggauge", 4}, {"testAggCounter", "aggcounter", 7}, @@ -608,12 +608,14 @@ func TestMetricsRecorder(t *testing.T) { }) reg.reg.AddMetric(h) h.RecordValue(data.val) - for _, q := range metric.RecordHistogramQuantiles { + for _, q := range metric.HistogramMetricComputers { + if !q.IsSummaryMetric { + continue + } addExpected(reg.prefix, data.name+q.Suffix, reg.source, 100, 10, reg.isNode) } addExpected(reg.prefix, data.name+"-count", reg.source, 100, 1, reg.isNode) - addExpected(reg.prefix, data.name+"-sum", reg.source, 100, 9, reg.isNode) - addExpected(reg.prefix, data.name+"-avg", reg.source, 100, 9, reg.isNode) + addExpected(reg.prefix, data.name+"-sum", reg.source, 100, 10, reg.isNode) default: t.Fatalf("unexpected: %+v", data) } diff --git a/pkg/ui/workspaces/db-console/src/views/reports/containers/customChart/index.tsx b/pkg/ui/workspaces/db-console/src/views/reports/containers/customChart/index.tsx index c78b02a38ae1..e8252d5a48e4 100644 --- a/pkg/ui/workspaces/db-console/src/views/reports/containers/customChart/index.tsx +++ b/pkg/ui/workspaces/db-console/src/views/reports/containers/customChart/index.tsx @@ -139,7 +139,7 @@ export class CustomChart extends React.Component< return []; } - return keys(metricsMetadata.metadata).map(k => { + return keys(metricsMetadata.recordedNames).map(k => { const fullMetricName = metricsMetadata.recordedNames[k]; return { value: fullMetricName, diff --git a/pkg/util/metric/metric.go b/pkg/util/metric/metric.go index 63f79e42211f..4ffd6706c0c2 100644 --- a/pkg/util/metric/metric.go +++ b/pkg/util/metric/metric.go @@ -1126,24 +1126,101 @@ func MergeWindowedHistogram(cur *prometheusgo.Histogram, prev *prometheusgo.Hist *cur.SampleSum = sampleSum } -// Quantile is a quantile along with a string suffix to be attached to the metric -// name upon recording into the internal TSDB. -type Quantile struct { - Suffix string - Quantile float64 -} - -// RecordHistogramQuantiles are the quantiles at which (windowed) histograms -// are recorded into the internal TSDB. -var RecordHistogramQuantiles = []Quantile{ - {"-max", 100}, - {"-p99.999", 99.999}, - {"-p99.99", 99.99}, - {"-p99.9", 99.9}, - {"-p99", 99}, - {"-p90", 90}, - {"-p75", 75}, - {"-p50", 50}, +// HistogramMetricComputer is the computation function used to compute and +// store histogram metrics into the internal TSDB, along with the suffix +// to be attached to the metric. +type HistogramMetricComputer struct { + Suffix string + IsSummaryMetric bool + ComputedMetric func(h HistogramSnapshot) float64 +} + +// HistogramMetricComputers is a slice of all the HistogramMetricComputer +// that are used to record (windowed) histogram metrics into TSDB. +var HistogramMetricComputers = []HistogramMetricComputer{ + { + Suffix: "-max", + IsSummaryMetric: true, + ComputedMetric: func(h HistogramSnapshot) float64 { + return h.ValueAtQuantile(100) + }, + }, + { + Suffix: "-p99.999", + IsSummaryMetric: true, + ComputedMetric: func(h HistogramSnapshot) float64 { + return h.ValueAtQuantile(99.999) + }, + }, + { + Suffix: "-p99.99", + IsSummaryMetric: true, + ComputedMetric: func(h HistogramSnapshot) float64 { + return h.ValueAtQuantile(99.99) + }, + }, + { + Suffix: "-p99.9", + IsSummaryMetric: true, + ComputedMetric: func(h HistogramSnapshot) float64 { + return h.ValueAtQuantile(99.9) + }, + }, + { + Suffix: "-p99", + IsSummaryMetric: true, + ComputedMetric: func(h HistogramSnapshot) float64 { + return h.ValueAtQuantile(99) + }, + }, + { + Suffix: "-p90", + IsSummaryMetric: true, + ComputedMetric: func(h HistogramSnapshot) float64 { + return h.ValueAtQuantile(90) + }, + }, + { + Suffix: "-p75", + IsSummaryMetric: true, + ComputedMetric: func(h HistogramSnapshot) float64 { + return h.ValueAtQuantile(75) + }, + }, + { + Suffix: "-p50", + IsSummaryMetric: true, + ComputedMetric: func(h HistogramSnapshot) float64 { + return h.ValueAtQuantile(50) + }, + }, + { + Suffix: "-avg", + IsSummaryMetric: true, + ComputedMetric: func(h HistogramSnapshot) float64 { + avg := h.Mean() + if math.IsNaN(avg) || math.IsInf(avg, +1) || math.IsInf(avg, -1) { + avg = 0 + } + return avg + }, + }, + { + Suffix: "-count", + IsSummaryMetric: false, + ComputedMetric: func(h HistogramSnapshot) float64 { + count, _ := h.Total() + return float64(count) + }, + }, + { + Suffix: "-sum", + IsSummaryMetric: false, + ComputedMetric: func(h HistogramSnapshot) float64 { + _, sum := h.Total() + return sum + }, + }, } // vector holds the base vector implementation. This is meant to be embedded