Skip to content

Commit

Permalink
Merge pull request #2224 from jsternberg/otelsdk-package
Browse files Browse the repository at this point in the history
otel: include service instance id attribute to resource and move to metricutil package
  • Loading branch information
tonistiigi authored Jan 30, 2024
2 parents 2c3d7da + c65b7ed commit cb85668
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 101 deletions.
9 changes: 0 additions & 9 deletions commands/bake.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"github.com/docker/buildx/util/confutil"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/dockerutil"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing"
"github.com/docker/cli/cli/command"
Expand All @@ -43,14 +42,6 @@ type bakeOptions struct {
}

func runBake(ctx context.Context, dockerCli command.Cli, targets []string, in bakeOptions, cFlags commonFlags) (err error) {
mp, report, err := metrics.MeterProvider(dockerCli)
if err != nil {
return err
}
defer report()

recordVersionInfo(mp, "bake")

ctx, end, err := tracing.TraceCurrentCommand(ctx, "bake")
if err != nil {
return err
Expand Down
39 changes: 3 additions & 36 deletions commands/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@ import (
"github.com/docker/buildx/util/cobrautil"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/ioset"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/metricutil"
"github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing"
"github.com/docker/buildx/version"
"github.com/docker/cli-docs-tool/annotation"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
Expand All @@ -53,9 +52,6 @@ import (
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"google.golang.org/grpc/codes"
)

Expand Down Expand Up @@ -216,13 +212,11 @@ func (o *buildOptions) toDisplayMode() (progressui.DisplayMode, error) {
}

func runBuild(ctx context.Context, dockerCli command.Cli, options buildOptions) (err error) {
mp, report, err := metrics.MeterProvider(dockerCli)
mp, err := metricutil.NewMeterProvider(ctx, dockerCli)
if err != nil {
return err
}
defer report()

recordVersionInfo(mp, "build")
defer mp.Report(context.Background())

ctx, end, err := tracing.TraceCurrentCommand(ctx, "build")
if err != nil {
Expand Down Expand Up @@ -936,30 +930,3 @@ func maybeJSONArray(v string) []string {
}
return []string{v}
}

func recordVersionInfo(mp metric.MeterProvider, command string) {
// Still in the process of testing/stabilizing these counters.
if !isExperimental() {
return
}

meter := mp.Meter("github.com/docker/buildx",
metric.WithInstrumentationVersion(version.Version),
)

counter, err := meter.Int64Counter("docker.cli.count",
metric.WithDescription("Number of invocations of the docker buildx command."),
)
if err != nil {
otel.Handle(err)
}

counter.Add(context.Background(), 1,
metric.WithAttributes(
attribute.String("command", command),
attribute.String("package", version.Package),
attribute.String("version", version.Version),
attribute.String("revision", version.Revision),
),
)
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0
go.opentelemetry.io/otel/metric v1.19.0
go.opentelemetry.io/otel/sdk v1.19.0
go.opentelemetry.io/otel/sdk/metric v1.19.0
go.opentelemetry.io/otel/trace v1.19.0
golang.org/x/mod v0.13.0
Expand Down Expand Up @@ -147,7 +148,6 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/prometheus v0.42.0 // indirect
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect
Expand Down
15 changes: 15 additions & 0 deletions util/confutil/exp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package confutil

import (
"os"
"strconv"
)

// IsExperimental checks if the experimental flag has been configured.
func IsExperimental() bool {
if v, ok := os.LookupEnv("BUILDX_EXPERIMENTAL"); ok {
vv, _ := strconv.ParseBool(v)
return vv
}
return false
}
122 changes: 74 additions & 48 deletions util/metrics/metrics.go → util/metricutil/metric.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package metrics
package metricutil

import (
"context"
Expand All @@ -7,8 +7,9 @@ import (
"path"
"time"

"github.com/docker/buildx/util/confutil"
"github.com/docker/buildx/version"
"github.com/docker/cli/cli/command"
"github.com/moby/buildkit/util/tracing/detect"
"github.com/pkg/errors"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/metric"
Expand All @@ -20,75 +21,86 @@ import (

const (
otelConfigFieldName = "otel"
shutdownTimeout = 2 * time.Second
reportTimeout = 2 * time.Second
)

// ReportFunc is invoked to signal the metrics should be sent to the
// desired endpoint. It should be invoked on application shutdown.
type ReportFunc func()
// MeterProvider holds a MeterProvider for metric generation and the configured
// exporters for reporting metrics from the CLI.
type MeterProvider struct {
metric.MeterProvider
reader *sdkmetric.ManualReader
exporters []sdkmetric.Exporter
}

// MeterProvider returns a MeterProvider suitable for CLI usage.
// The primary difference between this metric reader and a more typical
// usage is that metric reporting only happens once when ReportFunc
// is invoked.
func MeterProvider(cli command.Cli) (metric.MeterProvider, ReportFunc, error) {
// NewMeterProvider configures a MeterProvider from the CLI context.
func NewMeterProvider(ctx context.Context, cli command.Cli) (*MeterProvider, error) {
var exps []sdkmetric.Exporter

if exp, err := dockerOtelExporter(cli); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
}
// Only metric exporters if the experimental flag is set.
if confutil.IsExperimental() {
if exp, err := dockerOtelExporter(cli); err != nil {
return nil, err
} else if exp != nil {
exps = append(exps, exp)
}

if exp, err := detectOtlpExporter(context.Background()); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
if exp, err := detectOtlpExporter(ctx); err != nil {
return nil, err
} else if exp != nil {
exps = append(exps, exp)
}
}

if len(exps) == 0 {
// No exporters are configured so use a noop provider.
return noop.NewMeterProvider(), func() {}, nil
return &MeterProvider{
MeterProvider: noop.NewMeterProvider(),
}, nil
}

// Use delta temporality because, since this is a CLI program, we can never
// know the cumulative value.
reader := sdkmetric.NewManualReader(
sdkmetric.WithTemporalitySelector(deltaTemporality),
)
mp := sdkmetric.NewMeterProvider(
sdkmetric.WithResource(detect.Resource()),
sdkmetric.WithResource(Resource()),
sdkmetric.WithReader(reader),
)
return mp, reportFunc(reader, exps), nil
return &MeterProvider{
MeterProvider: mp,
reader: reader,
exporters: exps,
}, nil
}

// reportFunc returns a ReportFunc for collecting ResourceMetrics and then
// exporting them to the configured Exporter.
func reportFunc(reader sdkmetric.Reader, exps []sdkmetric.Exporter) ReportFunc {
return func() {
ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
defer cancel()

var rm metricdata.ResourceMetrics
if err := reader.Collect(ctx, &rm); err != nil {
// Error when collecting metrics. Do not send any.
return
}
// Report exports metrics to the configured exporter. This should be done before the CLI
// exits.
func (m *MeterProvider) Report(ctx context.Context) {
if m.reader == nil {
// Not configured.
return
}

var eg errgroup.Group
for _, exp := range exps {
exp := exp
eg.Go(func() error {
_ = exp.Export(ctx, &rm)
_ = exp.Shutdown(ctx)
return nil
})
}
ctx, cancel := context.WithTimeout(ctx, reportTimeout)
defer cancel()

// Can't report an error because we don't allow it to.
_ = eg.Wait()
var rm metricdata.ResourceMetrics
if err := m.reader.Collect(ctx, &rm); err != nil {
// Error when collecting metrics. Do not send any.
return
}

var eg errgroup.Group
for _, exp := range m.exporters {
exp := exp
eg.Go(func() error {
_ = exp.Export(ctx, &rm)
_ = exp.Shutdown(ctx)
return nil
})
}

// Can't report an error because we don't allow it to.
_ = eg.Wait()
}

// dockerOtelExporter reads the CLI metadata to determine an OTLP exporter
Expand Down Expand Up @@ -184,6 +196,20 @@ func otelExporterOtlpEndpoint(cli command.Cli) (string, error) {
}

// deltaTemporality sets the Temporality of every instrument to delta.
//
// This isn't really needed since we create a unique resource on each invocation,
// but it can help with cardinality concerns for downstream processors since they can
// perform aggregation for a time interval and then discard the data once that time
// period has passed. Cumulative temporality would imply to the downstream processor
// that they might receive a successive point and they may unnecessarily keep state
// they really shouldn't.
func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality {
return metricdata.DeltaTemporality
}

// Meter returns a Meter from the MetricProvider that indicates the measurement
// comes from buildx with the appropriate version.
func Meter(mp metric.MeterProvider) metric.Meter {
return mp.Meter(version.Package,
metric.WithInstrumentationVersion(version.Version))
}
10 changes: 3 additions & 7 deletions util/metrics/otlp.go → util/metricutil/otlp.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package metrics
package metricutil

import (
"context"
Expand Down Expand Up @@ -35,13 +35,9 @@ func detectOtlpExporter(ctx context.Context) (sdkmetric.Exporter, error) {

switch proto {
case "grpc":
return otlpmetricgrpc.New(ctx,
otlpmetricgrpc.WithTemporalitySelector(deltaTemporality),
)
return otlpmetricgrpc.New(ctx)
case "http/protobuf":
return otlpmetrichttp.New(ctx,
otlpmetrichttp.WithTemporalitySelector(deltaTemporality),
)
return otlpmetrichttp.New(ctx)
// case "http/json": // unsupported by library
default:
return nil, errors.Errorf("unsupported otlp protocol %v", proto)
Expand Down
53 changes: 53 additions & 0 deletions util/metricutil/resource.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package metricutil

import (
"context"
"os"
"path/filepath"
"sync"

"github.com/google/uuid"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
)

var (
res *resource.Resource
resOnce sync.Once
)

// Resource retrieves the OTEL resource for the buildx CLI.
func Resource() *resource.Resource {
resOnce.Do(func() {
var err error
res, err = resource.New(context.Background(),
resource.WithDetectors(serviceNameDetector{}),
resource.WithAttributes(
// Use a unique instance id so OTEL knows that each invocation
// of the CLI is its own instance. Without this, downstream
// OTEL processors may think the same process is restarting
// continuously and reset the metric counters.
semconv.ServiceInstanceID(uuid.New().String()),
),
resource.WithFromEnv(),
resource.WithTelemetrySDK(),
)
if err != nil {
otel.Handle(err)
}
})
return res
}

type serviceNameDetector struct{}

func (serviceNameDetector) Detect(ctx context.Context) (*resource.Resource, error) {
return resource.StringDetector(
semconv.SchemaURL,
semconv.ServiceNameKey,
func() (string, error) {
return filepath.Base(os.Args[0]), nil
},
).Detect(ctx)
}

0 comments on commit cb85668

Please sign in to comment.