Skip to content

Commit

Permalink
metrics: add build duration metric
Browse files Browse the repository at this point in the history
This adds a build duration metric with a few easy attributes. We should
implement more attributes in the future, but these ones were the easiest
to implement in the current CLI.

The backend attribute gives a named backend for this build and the
status is either completed, failed, or canceled when the context
canceled error is returned.

This also modifies some aspects of how the resource is configured by
including `service.instance.id`. This id is used to uniquely identify
the CLI invocation for use in downstream aggregators. This allows
downstream aggregators to know that the metric for an instance has not
reset and that it is a unique invocation for future aggregation. Some
work will have to be done in the aggregator to prevent the storage of
this instance id as it can cause issues with cardinality limitations,
but it's necessary for the initial reporter to include this.

The temporality selector is still the same, but has been removed from
the otlp exporter options since that one doesn't seem to do anything.  I
also recently learned the temporality didn't do what I thought it did.
I thought it would allow for multiple different invocations to be
treated as the same instance for the purposes of aggregation. It does
not work this way as the metric sdk considers each of these a gap reset.
That's the reason the instance id was added. This makes the difference
between cumulative and delta mostly cosmetic, but delta temporality has
some benefits for downstream consumers for aggregation so it's likely
good to keep.

Signed-off-by: Jonathan A. Sternberg <[email protected]>
  • Loading branch information
jsternberg committed Jan 11, 2024
1 parent 78adfc8 commit 7ec0de2
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 44 deletions.
2 changes: 1 addition & 1 deletion commands/bake.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ import (
"github.com/docker/buildx/bake"
"github.com/docker/buildx/build"
"github.com/docker/buildx/builder"
"github.com/docker/buildx/internal/metrics"
"github.com/docker/buildx/localstate"
"github.com/docker/buildx/util/buildflags"
"github.com/docker/buildx/util/cobrautil/completion"
"github.com/docker/buildx/util/confutil"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/dockerutil"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing"
"github.com/docker/cli/cli/command"
Expand Down
53 changes: 30 additions & 23 deletions commands/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@ import (
"github.com/docker/buildx/controller/control"
controllererrors "github.com/docker/buildx/controller/errdefs"
controllerapi "github.com/docker/buildx/controller/pb"
"github.com/docker/buildx/internal/env"
"github.com/docker/buildx/internal/metrics"
"github.com/docker/buildx/monitor"
"github.com/docker/buildx/store"
"github.com/docker/buildx/store/storeutil"
"github.com/docker/buildx/util/buildflags"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/ioset"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing"
"github.com/docker/buildx/version"
Expand Down Expand Up @@ -287,14 +288,7 @@ func runBuild(dockerCli command.Cli, options buildOptions) (err error) {
return err
}

var resp *client.SolveResponse
var retErr error
if isExperimental() {
resp, retErr = runControllerBuild(ctx, dockerCli, opts, options, printer)
} else {
resp, retErr = runBasicBuild(ctx, dockerCli, opts, options, printer)
}

resp, retErr := doBuild(ctx, dockerCli, opts, options, printer, mp)
if err := printer.Wait(); retErr == nil {
retErr = err
}
Expand Down Expand Up @@ -334,6 +328,31 @@ func getImageID(resp map[string]string) string {
return dgst
}

func doBuild(ctx context.Context, dockerCli command.Cli, opts *controllerapi.BuildOptions, options buildOptions, printer *progress.Printer, mp metric.MeterProvider) (resp *client.SolveResponse, err error) {
record := metrics.Measure(mp, "docker.build.duration",
metric.WithDescription("Measures the total build duration from the CLI."),
)

if isExperimental() {
resp, err = runControllerBuild(ctx, dockerCli, opts, options, printer)
} else {
resp, err = runBasicBuild(ctx, dockerCli, opts, options, printer)
}

status := "completed"
if errors.Is(err, context.Canceled) {
status = "canceled"
} else if err != nil {
status = "error"
}

record(ctx,
attribute.String("status", status),
attribute.String("backend", options.builder),
)
return resp, err
}

func runBasicBuild(ctx context.Context, dockerCli command.Cli, opts *controllerapi.BuildOptions, options buildOptions, printer *progress.Printer) (*client.SolveResponse, error) {
resp, res, err := cbuild.RunBuild(ctx, dockerCli, *opts, dockerCli.In(), printer, false)
if res != nil {
Expand Down Expand Up @@ -711,11 +730,7 @@ func (w *wrapped) Unwrap() error {
}

func isExperimental() bool {
if v, ok := os.LookupEnv("BUILDX_EXPERIMENTAL"); ok {
vv, _ := strconv.ParseBool(v)
return vv
}
return false
return env.IsExperimental()
}

func updateLastActivity(dockerCli command.Cli, ng *store.NodeGroup) error {
Expand Down Expand Up @@ -942,15 +957,7 @@ func maybeJSONArray(v string) []string {
}

func recordVersionInfo(mp metric.MeterProvider, command string) {
// Still in the process of testing/stabilizing these counters.
if !isExperimental() {
return
}

meter := mp.Meter("github.com/docker/buildx",
metric.WithInstrumentationVersion(version.Version),
)

meter := metrics.Meter(mp)
counter, err := meter.Int64Counter("docker.cli.count",
metric.WithDescription("Number of invocations of the docker buildx command."),
)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0
go.opentelemetry.io/otel/metric v1.19.0
go.opentelemetry.io/otel/sdk v1.19.0
go.opentelemetry.io/otel/sdk/metric v1.19.0
go.opentelemetry.io/otel/trace v1.19.0
golang.org/x/mod v0.11.0
Expand Down Expand Up @@ -146,7 +147,6 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/prometheus v0.42.0 // indirect
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect
Expand Down
15 changes: 15 additions & 0 deletions internal/env/env.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package env

import (
"os"
"strconv"
)

// IsExperimental checks if the experimental flag has been configured.
func IsExperimental() bool {
if v, ok := os.LookupEnv("BUILDX_EXPERIMENTAL"); ok {
vv, _ := strconv.ParseBool(v)
return vv
}
return false
}
85 changes: 72 additions & 13 deletions util/metrics/metrics.go → internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,26 @@ import (
"context"
"fmt"
"net/url"
"os"
"path"
"path/filepath"
"sync"
"time"

"github.com/docker/buildx/internal/env"
"github.com/docker/buildx/version"
"github.com/docker/cli/cli/command"
"github.com/moby/buildkit/util/tracing/detect"
"github.com/google/uuid"
"github.com/pkg/errors"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/noop"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/metric/metricdata"
"go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
"golang.org/x/sync/errgroup"
)

Expand All @@ -34,35 +43,43 @@ type ReportFunc func()
func MeterProvider(cli command.Cli) (metric.MeterProvider, ReportFunc, error) {
var exps []sdkmetric.Exporter

if exp, err := dockerOtelExporter(cli); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
}
// Only metric exporters if the experimental flag is set.
if env.IsExperimental() {
if exp, err := dockerOtelExporter(cli); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
}

if exp, err := detectOtlpExporter(context.Background()); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
if exp, err := detectOtlpExporter(context.Background()); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
}
}

if len(exps) == 0 {
// No exporters are configured so use a noop provider.
return noop.NewMeterProvider(), func() {}, nil
}

// Use delta temporality because, since this is a CLI program, we can never
// know the cumulative value.
reader := sdkmetric.NewManualReader(
sdkmetric.WithTemporalitySelector(deltaTemporality),
)
mp := sdkmetric.NewMeterProvider(
sdkmetric.WithResource(detect.Resource()),
sdkmetric.WithResource(Resource()),
sdkmetric.WithReader(reader),
)
return mp, reportFunc(reader, exps), nil
}

// Meter returns a Meter from the MetricProvider that indicates the measurement
// comes from buildx with the appropriate version.
func Meter(mp metric.MeterProvider) metric.Meter {
return mp.Meter(version.Package,
metric.WithInstrumentationVersion(version.Version))
}

// reportFunc returns a ReportFunc for collecting ResourceMetrics and then
// exporting them to the configured Exporter.
func reportFunc(reader sdkmetric.Reader, exps []sdkmetric.Exporter) ReportFunc {
Expand Down Expand Up @@ -184,6 +201,48 @@ func otelExporterOtlpEndpoint(cli command.Cli) (string, error) {
}

// deltaTemporality sets the Temporality of every instrument to delta.
//
// This isn't really needed since we create a unique resource on each invocation,
// but it can help with cardinality concerns for downstream processors since they can
// perform aggregation for a time interval and then discard the data once that time
// period has passed. Cumulative temporality would imply to the downstream processor
// that they might receive a successive point and they may unnecessarily keep state
// they really shouldn't.
func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality {
return metricdata.DeltaTemporality
}

var (
res *resource.Resource
resOnce sync.Once
)

func Resource() *resource.Resource {
resOnce.Do(func() {
var err error
res, err = resource.New(context.Background(),
resource.WithDetectors(serviceNameDetector{}),
resource.WithAttributes(
attribute.Stringer("service.instance.id", uuid.New()),
),
resource.WithFromEnv(),
resource.WithTelemetrySDK(),
)
if err != nil {
otel.Handle(err)
}
})
return res
}

type serviceNameDetector struct{}

func (serviceNameDetector) Detect(ctx context.Context) (*resource.Resource, error) {
return resource.StringDetector(
semconv.SchemaURL,
semconv.ServiceNameKey,
func() (string, error) {
return filepath.Base(os.Args[0]), nil
},
).Detect(ctx)
}
8 changes: 2 additions & 6 deletions util/metrics/otlp.go → internal/metrics/otlp.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,9 @@ func detectOtlpExporter(ctx context.Context) (sdkmetric.Exporter, error) {

switch proto {
case "grpc":
return otlpmetricgrpc.New(ctx,
otlpmetricgrpc.WithTemporalitySelector(deltaTemporality),
)
return otlpmetricgrpc.New(ctx)
case "http/protobuf":
return otlpmetrichttp.New(ctx,
otlpmetrichttp.WithTemporalitySelector(deltaTemporality),
)
return otlpmetrichttp.New(ctx)
// case "http/json": // unsupported by library
default:
return nil, errors.Errorf("unsupported otlp protocol %v", proto)
Expand Down
35 changes: 35 additions & 0 deletions internal/metrics/util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package metrics

import (
"context"
"time"

"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)

const (
TimeUnit string = "ms"
)

type RecordFunc func(ctx context.Context, attrs ...attribute.KeyValue)

func Measure(mp metric.MeterProvider, name string, opts ...metric.Int64HistogramOption) RecordFunc {
allOpts := []metric.Int64HistogramOption{
metric.WithUnit(TimeUnit),
}
if len(opts) > 0 {
allOpts = append(allOpts, opts...)
}
histogram, err := Meter(mp).Int64Histogram(name, allOpts...)
if err != nil {
otel.Handle(err)
}

start := time.Now()
return func(ctx context.Context, attrs ...attribute.KeyValue) {
dur := int64(time.Since(start) / time.Millisecond)
histogram.Record(ctx, dur, metric.WithAttributes(attrs...))
}
}

0 comments on commit 7ec0de2

Please sign in to comment.