Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
136900: ui,metric,server: fix custom chart to include histogram metrics r=kyle-a-wong a=kyle-a-wong

PR cockroachdb#135705 made changes to the `admin/metricmetadata` api to include tsdb recorded names and custom charts to be powered by this api instead of `_status/nodes_ui`. This introduced a bug where computed histogram metrics stopped appearing in custom charts. This is because these metrics are computed at record time and are not metrics that exist in the metrics registries.

Now, `admin/metricmetadata` recorded names should include all computed histogram metrics.

Benchmarks:
```
building benchmark binaries for 87e0d85: ui,metric,server: fix custom chart to include hist [bazel=true] 1/1 -

  pkg=1/1 iter=10/10 cockroachdb/cockroach/pkg/server/status |

name                   old time/op    new time/op    delta
ExtractValueAllocs-12    8.19µs ± 2%    8.09µs ± 0%  -1.17%  (p=0.000 n=10+8)

name                   old alloc/op   new alloc/op   delta
ExtractValueAllocs-12    17.3kB ± 0%    17.3kB ± 0%    ~     (all equal)

name                   old allocs/op  new allocs/op  delta
ExtractValueAllocs-12       566 ± 0%       566 ± 0%    ~     (all equal)
```

Fixes: cockroachdb#136939
Epic: None
Release note: None

Co-authored-by: Kyle Wong <[email protected]>
  • Loading branch information
craig[bot] and kyle-a-wong committed Dec 13, 2024
2 parents a55295b + bb0e758 commit 603ff88
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 41 deletions.
2 changes: 2 additions & 0 deletions pkg/server/application_api/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ go_test(
"//pkg/util/httputil",
"//pkg/util/leaktest",
"//pkg/util/log",
"//pkg/util/metric",
"//pkg/util/protoutil",
"//pkg/util/randident",
"//pkg/util/randutil",
Expand All @@ -95,6 +96,7 @@ go_test(
"@com_github_cockroachdb_errors//:errors",
"@com_github_gogo_protobuf//proto",
"@com_github_kr_pretty//:pretty",
"@com_github_prometheus_client_model//go",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
"@org_golang_x_sync//errgroup",
Expand Down
71 changes: 70 additions & 1 deletion pkg/server/application_api/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/httputil"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/metric"
prometheusgo "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -64,12 +66,79 @@ func TestGetRecordedMetricNames(t *testing.T) {
metricsMetadata, _, _ := s.MetricsRecorder().GetMetricsMetadata(true /* combine */)
recordedNames := s.MetricsRecorder().GetRecordedMetricNames(metricsMetadata)

require.Equal(t, len(metricsMetadata), len(recordedNames))
for _, v := range recordedNames {
require.True(t, strings.HasPrefix(v, "cr.node") || strings.HasPrefix(v, "cr.store"))
}
}

func TestGetRecordedMetricNames_histogram(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
s := serverutils.StartServerOnly(t, base.TestServerArgs{})
defer s.Stopper().Stop(context.Background())
metricName := "my.metric"
metricsMetadata := map[string]metric.Metadata{
metricName: {
Name: metricName,
Help: "help text",
Measurement: "measurement",
Unit: metric.Unit_COUNT,
MetricType: prometheusgo.MetricType_HISTOGRAM,
},
}

recordedNames := s.MetricsRecorder().GetRecordedMetricNames(metricsMetadata)
require.Equal(t, len(metric.HistogramMetricComputers), len(recordedNames))
for _, histogramMetric := range metric.HistogramMetricComputers {
_, ok := recordedNames[metricName+histogramMetric.Suffix]
require.True(t, ok)
}
}

func TestHistogramMetricComputers(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

metricName := "my.metric"
h := metric.NewHistogram(metric.HistogramOptions{
Metadata: metric.Metadata{Name: metricName},
Buckets: []float64{10, 20, 30, 40, 50, 60, 70, 80, 90, 100},
Mode: metric.HistogramModePrometheus,
})

sum := int64(0)
count := 0

for i := 1; i <= 10; i++ {
recordedVal := int64(i) * 10
sum += recordedVal
count++
h.RecordValue(recordedVal)
}

avg := float64(sum) / float64(count)
snapshot := h.WindowedSnapshot()
results := make(map[string]float64, len(metric.HistogramMetricComputers))
for _, c := range metric.HistogramMetricComputers {
results[metricName+c.Suffix] = c.ComputedMetric(snapshot)
}

expected := map[string]float64{
metricName + "-sum": float64(sum),
metricName + "-avg": avg,
metricName + "-count": float64(count),
metricName + "-max": 100,
metricName + "-p99.999": 100,
metricName + "-p99.99": 100,
metricName + "-p99.9": 100,
metricName + "-p99": 100,
metricName + "-p90": 90,
metricName + "-p75": 80,
metricName + "-p50": 50,
}
require.Equal(t, expected, results)
}

// TestStatusVars verifies that prometheus metrics are available via the
// /_status/vars and /_status/load endpoints.
func TestStatusVars(t *testing.T) {
Expand Down
6 changes: 2 additions & 4 deletions pkg/server/serverpb/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,10 @@ func GetInternalTimeseriesNamesFromServer(
var sl []string
for name, meta := range resp.Metadata {
if meta.MetricType == io_prometheus_client.MetricType_HISTOGRAM {
// See usage of RecordHistogramQuantiles in pkg/server/status/recorder.go.
for _, q := range metric.RecordHistogramQuantiles {
// See usage of HistogramMetricComputers in pkg/server/status/recorder.go.
for _, q := range metric.HistogramMetricComputers {
sl = append(sl, name+q.Suffix)
}
sl = append(sl, name+"-avg")
sl = append(sl, name+"-count")
} else {
sl = append(sl, name)
}
Expand Down
30 changes: 17 additions & 13 deletions pkg/server/status/recorder.go
Original file line number Diff line number Diff line change
Expand Up @@ -498,13 +498,20 @@ func (mr *MetricsRecorder) GetRecordedMetricNames(
storeMetricsMap := make(map[string]metric.Metadata)
tsDbMetricNames := make(map[string]string, len(allMetadata))
mr.writeStoreMetricsMetadata(storeMetricsMap)
for k := range allMetadata {
for metricName, metadata := range allMetadata {
prefix := nodeTimeSeriesPrefix
if _, ok := storeMetricsMap[k]; ok {
if _, ok := storeMetricsMap[metricName]; ok {
prefix = storeTimeSeriesPrefix
}
if metadata.MetricType == prometheusgo.MetricType_HISTOGRAM {
for _, metricComputer := range metric.HistogramMetricComputers {
computedMetricName := metricName + metricComputer.Suffix
tsDbMetricNames[computedMetricName] = fmt.Sprintf(prefix, computedMetricName)
}
} else {
tsDbMetricNames[metricName] = fmt.Sprintf(prefix, metricName)
}

tsDbMetricNames[k] = fmt.Sprintf(prefix, k)
}
return tsDbMetricNames
}
Expand Down Expand Up @@ -711,18 +718,15 @@ func extractValue(name string, mtr interface{}, fn func(string, float64)) error
return errors.Newf(`extractValue called on histogram metric %q that does not implement the
CumulativeHistogram interface. All histogram metrics are expected to implement this interface`, name)
}
count, sum := cumulative.CumulativeSnapshot().Total()
fn(name+"-count", float64(count))
fn(name+"-sum", sum)
cumulativeSnapshot := cumulative.CumulativeSnapshot()
// Use windowed stats for avg and quantiles
windowedSnapshot := mtr.WindowedSnapshot()
avg := windowedSnapshot.Mean()
if math.IsNaN(avg) || math.IsInf(avg, +1) || math.IsInf(avg, -1) {
avg = 0
}
fn(name+"-avg", avg)
for _, pt := range metric.RecordHistogramQuantiles {
fn(name+pt.Suffix, windowedSnapshot.ValueAtQuantile(pt.Quantile))
for _, c := range metric.HistogramMetricComputers {
if c.IsSummaryMetric {
fn(name+c.Suffix, c.ComputedMetric(windowedSnapshot))
} else {
fn(name+c.Suffix, c.ComputedMetric(cumulativeSnapshot))
}
}
case metric.PrometheusExportable:
// NB: this branch is intentionally at the bottom since all metrics implement it.
Expand Down
10 changes: 6 additions & 4 deletions pkg/server/status/recorder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ func TestMetricsRecorder(t *testing.T) {
{"testGauge", "gauge", 20},
{"testGaugeFloat64", "floatgauge", 20},
{"testCounter", "counter", 5},
{"testHistogram", "histogram", 9},
{"testHistogram", "histogram", 10},
{"testAggGauge", "agggauge", 4},
{"testAggCounter", "aggcounter", 7},

Expand Down Expand Up @@ -608,12 +608,14 @@ func TestMetricsRecorder(t *testing.T) {
})
reg.reg.AddMetric(h)
h.RecordValue(data.val)
for _, q := range metric.RecordHistogramQuantiles {
for _, q := range metric.HistogramMetricComputers {
if !q.IsSummaryMetric {
continue
}
addExpected(reg.prefix, data.name+q.Suffix, reg.source, 100, 10, reg.isNode)
}
addExpected(reg.prefix, data.name+"-count", reg.source, 100, 1, reg.isNode)
addExpected(reg.prefix, data.name+"-sum", reg.source, 100, 9, reg.isNode)
addExpected(reg.prefix, data.name+"-avg", reg.source, 100, 9, reg.isNode)
addExpected(reg.prefix, data.name+"-sum", reg.source, 100, 10, reg.isNode)
default:
t.Fatalf("unexpected: %+v", data)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ export class CustomChart extends React.Component<
return [];
}

return keys(metricsMetadata.metadata).map(k => {
return keys(metricsMetadata.recordedNames).map(k => {
const fullMetricName = metricsMetadata.recordedNames[k];
return {
value: fullMetricName,
Expand Down
113 changes: 95 additions & 18 deletions pkg/util/metric/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -1126,24 +1126,101 @@ func MergeWindowedHistogram(cur *prometheusgo.Histogram, prev *prometheusgo.Hist
*cur.SampleSum = sampleSum
}

// Quantile is a quantile along with a string suffix to be attached to the metric
// name upon recording into the internal TSDB.
type Quantile struct {
Suffix string
Quantile float64
}

// RecordHistogramQuantiles are the quantiles at which (windowed) histograms
// are recorded into the internal TSDB.
var RecordHistogramQuantiles = []Quantile{
{"-max", 100},
{"-p99.999", 99.999},
{"-p99.99", 99.99},
{"-p99.9", 99.9},
{"-p99", 99},
{"-p90", 90},
{"-p75", 75},
{"-p50", 50},
// HistogramMetricComputer is the computation function used to compute and
// store histogram metrics into the internal TSDB, along with the suffix
// to be attached to the metric.
type HistogramMetricComputer struct {
Suffix string
IsSummaryMetric bool
ComputedMetric func(h HistogramSnapshot) float64
}

// HistogramMetricComputers is a slice of all the HistogramMetricComputer
// that are used to record (windowed) histogram metrics into TSDB.
var HistogramMetricComputers = []HistogramMetricComputer{
{
Suffix: "-max",
IsSummaryMetric: true,
ComputedMetric: func(h HistogramSnapshot) float64 {
return h.ValueAtQuantile(100)
},
},
{
Suffix: "-p99.999",
IsSummaryMetric: true,
ComputedMetric: func(h HistogramSnapshot) float64 {
return h.ValueAtQuantile(99.999)
},
},
{
Suffix: "-p99.99",
IsSummaryMetric: true,
ComputedMetric: func(h HistogramSnapshot) float64 {
return h.ValueAtQuantile(99.99)
},
},
{
Suffix: "-p99.9",
IsSummaryMetric: true,
ComputedMetric: func(h HistogramSnapshot) float64 {
return h.ValueAtQuantile(99.9)
},
},
{
Suffix: "-p99",
IsSummaryMetric: true,
ComputedMetric: func(h HistogramSnapshot) float64 {
return h.ValueAtQuantile(99)
},
},
{
Suffix: "-p90",
IsSummaryMetric: true,
ComputedMetric: func(h HistogramSnapshot) float64 {
return h.ValueAtQuantile(90)
},
},
{
Suffix: "-p75",
IsSummaryMetric: true,
ComputedMetric: func(h HistogramSnapshot) float64 {
return h.ValueAtQuantile(75)
},
},
{
Suffix: "-p50",
IsSummaryMetric: true,
ComputedMetric: func(h HistogramSnapshot) float64 {
return h.ValueAtQuantile(50)
},
},
{
Suffix: "-avg",
IsSummaryMetric: true,
ComputedMetric: func(h HistogramSnapshot) float64 {
avg := h.Mean()
if math.IsNaN(avg) || math.IsInf(avg, +1) || math.IsInf(avg, -1) {
avg = 0
}
return avg
},
},
{
Suffix: "-count",
IsSummaryMetric: false,
ComputedMetric: func(h HistogramSnapshot) float64 {
count, _ := h.Total()
return float64(count)
},
},
{
Suffix: "-sum",
IsSummaryMetric: false,
ComputedMetric: func(h HistogramSnapshot) float64 {
_, sum := h.Total()
return sum
},
},
}

// vector holds the base vector implementation. This is meant to be embedded
Expand Down

0 comments on commit 603ff88

Please sign in to comment.