Skip to content

Commit

Permalink
metrics: add build duration metric
Browse files Browse the repository at this point in the history
This adds a build duration metric with a few easy attributes. We should
implement more attributes in the future, but these ones were the easiest
to implement in the current CLI.

This also modifies some aspects of how the resource is configured by
including `service.instance.id`. This id is used to uniquely identify
the CLI invocation for use in downstream aggregators. This allows
downstream aggregators to know that the metric for an instance has not
reset and that it is a unique invocation for future aggregation. Some
work will have to be done in the aggregator to prevent the storage of
this instance id as it can cause issues with cardinality limitations,
but it's necessary for the initial reporter to include this.

The temporality selector is still the same, but has been removed from
the otlp exporter options since that one doesn't seem to do anything.  I
also recently learned the temporality didn't do what I thought it did.
I thought it would allow for multiple different invocations to be
treated as the same instance for the purposes of aggregation. It does
not work this way as the metric sdk considers each of these a gap reset.
That's the reason the instance id was added. This makes the difference
between cumulative and delta mostly cosmetic, but delta temporality has
some benefits for downstream consumers for aggregation so it's likely
good to keep.

Signed-off-by: Jonathan A. Sternberg <[email protected]>
  • Loading branch information
jsternberg committed Jan 11, 2024
1 parent 78adfc8 commit 6a79645
Show file tree
Hide file tree
Showing 8 changed files with 180 additions and 44 deletions.
24 changes: 24 additions & 0 deletions build/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/distribution/reference"
"github.com/docker/buildx/builder"
"github.com/docker/buildx/driver"
"github.com/docker/buildx/internal/metrics"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/dockerutil"
"github.com/docker/buildx/util/imagetools"
Expand Down Expand Up @@ -55,6 +56,8 @@ import (
specs "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/trace"
"golang.org/x/sync/errgroup"
)
Expand Down Expand Up @@ -686,6 +689,10 @@ func BuildWithResultHandler(ctx context.Context, nodes []builder.Node, opt map[s
return err
}

record := metrics.Measure(ctx, "docker.build.duration",
metric.WithDescription("Measures the total build duration."),
)

frontendInputs := make(map[string]*pb.Definition)
for key, st := range so.FrontendInputs {
def, err := st.Marshal(ctx)
Expand Down Expand Up @@ -785,6 +792,13 @@ func BuildWithResultHandler(ctx context.Context, nodes []builder.Node, opt map[s
} else {
rr, err = c.Build(ctx, *so, "buildx", buildFunc, ch)
}

record(ctx,
attribute.String("status", errorToStatus(err)),
attribute.String("backend", dp.Node().Driver.Factory().Name()),
attribute.String("build.ref", so.Ref),
)

if desktop.BuildBackendEnabled() && node.Driver.HistoryAPISupported(ctx) {
buildRef := fmt.Sprintf("%s/%s/%s", node.Builder, node.Name, so.Ref)
if err != nil {
Expand Down Expand Up @@ -1523,3 +1537,13 @@ func ReadSourcePolicy() (*spb.Policy, error) {

return &pol, nil
}

func errorToStatus(err error) string {
if err == nil {
return "completed"
} else if errors.Is(err, context.Canceled) {
return "canceled"
} else {
return "error"
}
}
8 changes: 3 additions & 5 deletions commands/bake.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ import (
"github.com/docker/buildx/bake"
"github.com/docker/buildx/build"
"github.com/docker/buildx/builder"
"github.com/docker/buildx/internal/metrics"
"github.com/docker/buildx/localstate"
"github.com/docker/buildx/util/buildflags"
"github.com/docker/buildx/util/cobrautil/completion"
"github.com/docker/buildx/util/confutil"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/dockerutil"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing"
"github.com/docker/cli/cli/command"
Expand All @@ -44,15 +44,13 @@ type bakeOptions struct {
}

func runBake(dockerCli command.Cli, targets []string, in bakeOptions, cFlags commonFlags) (err error) {
ctx := appcontext.Context()

mp, report, err := metrics.MeterProvider(dockerCli)
ctx, report, err := metrics.Initialize(appcontext.Context(), dockerCli)
if err != nil {
return err
}
defer report()

recordVersionInfo(mp, "bake")
recordVersionInfo(ctx, "bake")

ctx, end, err := tracing.TraceCurrentCommand(ctx, "bake")
if err != nil {
Expand Down
20 changes: 5 additions & 15 deletions commands/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ import (
"github.com/docker/buildx/controller/control"
controllererrors "github.com/docker/buildx/controller/errdefs"
controllerapi "github.com/docker/buildx/controller/pb"
"github.com/docker/buildx/internal/metrics"
"github.com/docker/buildx/monitor"
"github.com/docker/buildx/store"
"github.com/docker/buildx/store/storeutil"
"github.com/docker/buildx/util/buildflags"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/ioset"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing"
"github.com/docker/buildx/version"
Expand Down Expand Up @@ -216,15 +216,13 @@ func (o *buildOptions) toDisplayMode() (progressui.DisplayMode, error) {
}

func runBuild(dockerCli command.Cli, options buildOptions) (err error) {
ctx := appcontext.Context()

mp, report, err := metrics.MeterProvider(dockerCli)
ctx, report, err := metrics.Initialize(appcontext.Context(), dockerCli)
if err != nil {
return err
}
defer report()

recordVersionInfo(mp, "build")
recordVersionInfo(ctx, "build")

ctx, end, err := tracing.TraceCurrentCommand(ctx, "build")
if err != nil {
Expand Down Expand Up @@ -941,16 +939,8 @@ func maybeJSONArray(v string) []string {
return []string{v}
}

func recordVersionInfo(mp metric.MeterProvider, command string) {
// Still in the process of testing/stabilizing these counters.
if !isExperimental() {
return
}

meter := mp.Meter("github.com/docker/buildx",
metric.WithInstrumentationVersion(version.Version),
)

func recordVersionInfo(ctx context.Context, command string) {
meter := metrics.Meter(ctx)
counter, err := meter.Int64Counter("docker.cli.count",
metric.WithDescription("Number of invocations of the docker buildx command."),
)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0
go.opentelemetry.io/otel/metric v1.19.0
go.opentelemetry.io/otel/sdk v1.19.0
go.opentelemetry.io/otel/sdk/metric v1.19.0
go.opentelemetry.io/otel/trace v1.19.0
golang.org/x/mod v0.11.0
Expand Down Expand Up @@ -146,7 +147,6 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/prometheus v0.42.0 // indirect
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect
Expand Down
15 changes: 15 additions & 0 deletions internal/env/env.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package env

import (
"os"
"strconv"
)

// IsExperimental checks if the experimental flag has been configured.
func IsExperimental() bool {
if v, ok := os.LookupEnv("BUILDX_EXPERIMENTAL"); ok {
vv, _ := strconv.ParseBool(v)
return vv
}
return false
}
110 changes: 93 additions & 17 deletions util/metrics/metrics.go → internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,34 @@ import (
"context"
"fmt"
"net/url"
"os"
"path"
"path/filepath"
"sync"
"time"

"github.com/docker/buildx/internal/env"
"github.com/docker/buildx/version"
"github.com/docker/cli/cli/command"
"github.com/moby/buildkit/util/tracing/detect"
"github.com/google/uuid"
"github.com/pkg/errors"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/noop"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/metric/metricdata"
"go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
"golang.org/x/sync/errgroup"
)

type contextKey int

const (
meterProviderKey contextKey = iota

otelConfigFieldName = "otel"
shutdownTimeout = 2 * time.Second
)
Expand All @@ -27,40 +40,61 @@ const (
// desired endpoint. It should be invoked on application shutdown.
type ReportFunc func()

// MeterProvider returns a MeterProvider suitable for CLI usage.
// Initialize returns a context.Context with a MeterProvider suitable for CLI usage.
// The primary difference between this metric reader and a more typical
// usage is that metric reporting only happens once when ReportFunc
// is invoked.
func MeterProvider(cli command.Cli) (metric.MeterProvider, ReportFunc, error) {
func Initialize(ctx context.Context, cli command.Cli) (context.Context, ReportFunc, error) {
var exps []sdkmetric.Exporter

if exp, err := dockerOtelExporter(cli); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
}
// Only metric exporters if the experimental flag is set.
if env.IsExperimental() {
if exp, err := dockerOtelExporter(cli); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
}

if exp, err := detectOtlpExporter(context.Background()); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
if exp, err := detectOtlpExporter(context.Background()); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
}
}

if len(exps) == 0 {
// No exporters are configured so use a noop provider.
return noop.NewMeterProvider(), func() {}, nil
return ctx, func() {}, nil
}

// Use delta temporality because, since this is a CLI program, we can never
// know the cumulative value.
reader := sdkmetric.NewManualReader(
sdkmetric.WithTemporalitySelector(deltaTemporality),
)
mp := sdkmetric.NewMeterProvider(
sdkmetric.WithResource(detect.Resource()),
sdkmetric.WithResource(Resource()),
sdkmetric.WithReader(reader),
)
return mp, reportFunc(reader, exps), nil
return withMeterProvider(ctx, mp), reportFunc(reader, exps), nil
}

func withMeterProvider(ctx context.Context, mp metric.MeterProvider) context.Context {
return context.WithValue(ctx, meterProviderKey, mp)
}

func MeterProvider(ctx context.Context) metric.MeterProvider {
mp, ok := ctx.Value(meterProviderKey).(metric.MeterProvider)
if !ok {
mp = noop.NewMeterProvider()
}
return mp
}

// Meter returns a Meter from the MetricProvider that indicates the measurement
// comes from buildx with the appropriate version.
func Meter(ctx context.Context) metric.Meter {
mp := MeterProvider(ctx)
return mp.Meter(version.Package,
metric.WithInstrumentationVersion(version.Version))
}

// reportFunc returns a ReportFunc for collecting ResourceMetrics and then
Expand Down Expand Up @@ -184,6 +218,48 @@ func otelExporterOtlpEndpoint(cli command.Cli) (string, error) {
}

// deltaTemporality sets the Temporality of every instrument to delta.
//
// This isn't really needed since we create a unique resource on each invocation,
// but it can help with cardinality concerns for downstream processors since they can
// perform aggregation for a time interval and then discard the data once that time
// period has passed. Cumulative temporality would imply to the downstream processor
// that they might receive a successive point and they may unnecessarily keep state
// they really shouldn't.
func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality {
return metricdata.DeltaTemporality
}

var (
res *resource.Resource
resOnce sync.Once
)

func Resource() *resource.Resource {
resOnce.Do(func() {
var err error
res, err = resource.New(context.Background(),
resource.WithDetectors(serviceNameDetector{}),
resource.WithAttributes(
attribute.Stringer("service.instance.id", uuid.New()),
),
resource.WithFromEnv(),
resource.WithTelemetrySDK(),
)
if err != nil {
otel.Handle(err)
}
})
return res
}

type serviceNameDetector struct{}

func (serviceNameDetector) Detect(ctx context.Context) (*resource.Resource, error) {
return resource.StringDetector(
semconv.SchemaURL,
semconv.ServiceNameKey,
func() (string, error) {
return filepath.Base(os.Args[0]), nil
},
).Detect(ctx)
}
8 changes: 2 additions & 6 deletions util/metrics/otlp.go → internal/metrics/otlp.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,9 @@ func detectOtlpExporter(ctx context.Context) (sdkmetric.Exporter, error) {

switch proto {
case "grpc":
return otlpmetricgrpc.New(ctx,
otlpmetricgrpc.WithTemporalitySelector(deltaTemporality),
)
return otlpmetricgrpc.New(ctx)
case "http/protobuf":
return otlpmetrichttp.New(ctx,
otlpmetrichttp.WithTemporalitySelector(deltaTemporality),
)
return otlpmetrichttp.New(ctx)
// case "http/json": // unsupported by library
default:
return nil, errors.Errorf("unsupported otlp protocol %v", proto)
Expand Down
37 changes: 37 additions & 0 deletions internal/metrics/util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package metrics

import (
"context"
"time"

"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)

const (
TimeUnit string = "ms"
)

// RecordFunc is used to record the measurement from Measure.
type RecordFunc func(ctx context.Context, attrs ...attribute.KeyValue)

// Measure is a utility for measuring a time duration with certain attributes.
func Measure(ctx context.Context, name string, opts ...metric.Int64HistogramOption) RecordFunc {
allOpts := []metric.Int64HistogramOption{
metric.WithUnit(TimeUnit),
}
if len(opts) > 0 {
allOpts = append(allOpts, opts...)
}
histogram, err := Meter(ctx).Int64Histogram(name, allOpts...)
if err != nil {
otel.Handle(err)
}

start := time.Now()
return func(ctx context.Context, attrs ...attribute.KeyValue) {
dur := int64(time.Since(start) / time.Millisecond)
histogram.Record(ctx, dur, metric.WithAttributes(attrs...))
}
}

0 comments on commit 6a79645

Please sign in to comment.