diff --git a/README.md b/README.md index 2be383ba..eba60072 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,8 @@ Metrics gathered from Google Stackdriver Monitoring are converted to Prometheus 4. the monitored resource labels (see [Monitored Resource Types][monitored-resources]) * For each timeseries, only the most recent data point is exported. * Stackdriver `GAUGE` metric kinds are reported as Prometheus `Gauge` metrics; Stackdriver `DELTA` and `CUMULATIVE` metric kinds are reported as Prometheus `Counter` metrics. -* Only `BOOL`, `INT64` and `DOUBLE` metric types are supported, other types (`STRING`, `DISTRIBUTION` and `MONEY`) are discarded. +* Only `BOOL`, `INT64`, `DOUBLE` and `DISTRIBUTION` metric types are supported, other types (`STRING` and `MONEY`) are discarded. +* `DISTRIBUTION` metric type is reported as a Prometheus `Histogram`, except the `_sum` time series is not supported. ### Example diff --git a/collectors/monitoring_collector.go b/collectors/monitoring_collector.go index eab3ca84..2b46fa56 100644 --- a/collectors/monitoring_collector.go +++ b/collectors/monitoring_collector.go @@ -1,7 +1,9 @@ package collectors import ( + "errors" "fmt" + "math" "sync" "time" @@ -226,6 +228,7 @@ func (c *MonitoringCollector) reportTimeSeriesMetrics( var metricValue float64 var metricValueType prometheus.ValueType var newestTSPoint *monitoring.Point + var metricDesc *prometheus.Desc for _, timeSeries := range page.TimeSeries { newestEndTime := time.Unix(0, 0) @@ -240,6 +243,34 @@ func (c *MonitoringCollector) reportTimeSeriesMetrics( } } + labelKeys := []string{"unit"} + labelValues := []string{metricDescriptor.Unit} + + // Add the metric labels + // @see https://cloud.google.com/monitoring/api/metrics + for key, value := range timeSeries.Metric.Labels { + labelKeys = append(labelKeys, key) + labelValues = append(labelValues, value) + } + + // Add the monitored resource labels + // @see https://cloud.google.com/monitoring/api/resources + for key, value := range timeSeries.Resource.Labels { + labelKeys = append(labelKeys, key) + labelValues = append(labelValues, value) + } + + // The metric name to report is composed by the 3 parts: + // 1. namespace is a constant prefix (stackdriver) + // 2. subsystem is the monitored resource type (ie gce_instance) + // 3. name is the metric type (ie compute.googleapis.com/instance/cpu/usage_time) + metricDesc = prometheus.NewDesc( + prometheus.BuildFQName("stackdriver", utils.NormalizeMetricName(timeSeries.Resource.Type), utils.NormalizeMetricName(timeSeries.Metric.Type)), + metricDescriptor.Description, + labelKeys, + prometheus.Labels{}, + ) + switch timeSeries.MetricKind { case "GAUGE": metricValueType = prometheus.GaugeValue @@ -261,39 +292,28 @@ func (c *MonitoringCollector) reportTimeSeriesMetrics( metricValue = float64(*newestTSPoint.Value.Int64Value) case "DOUBLE": metricValue = *newestTSPoint.Value.DoubleValue + case "DISTRIBUTION": + dist := newestTSPoint.Value.DistributionValue + buckets, err := c.generateHistogramBuckets(dist) + if err == nil { + ch <- prometheus.MustNewConstHistogram( + metricDesc, + uint64(dist.Count), + 0, // Stackdriver does not provide the sum + buckets, + labelValues..., + ) + } else { + log.Debugf("Discarding resource %s metric %s: %s", timeSeries.Resource.Type, timeSeries.Metric.Type, err) + } + continue default: log.Debugf("Discarding `%s` metric: %+v", timeSeries.ValueType, timeSeries) continue } - labelKeys := []string{"unit"} - labelValues := []string{metricDescriptor.Unit} - - // Add the metric labels - // @see https://cloud.google.com/monitoring/api/metrics - for key, value := range timeSeries.Metric.Labels { - labelKeys = append(labelKeys, key) - labelValues = append(labelValues, value) - } - - // Add the monitored resource labels - // @see https://cloud.google.com/monitoring/api/resources - for key, value := range timeSeries.Resource.Labels { - labelKeys = append(labelKeys, key) - labelValues = append(labelValues, value) - } - - // The metric name to report is composed by the 3 parts: - // 1. namespace is a constant prefix (stackdriver) - // 2. subsystem is the monitored resource type (ie gce_instance) - // 3. name is the metric type (ie compute.googleapis.com/instance/cpu/usage_time) ch <- prometheus.MustNewConstMetric( - prometheus.NewDesc( - prometheus.BuildFQName("stackdriver", utils.NormalizeMetricName(timeSeries.Resource.Type), utils.NormalizeMetricName(timeSeries.Metric.Type)), - metricDescriptor.Description, - labelKeys, - prometheus.Labels{}, - ), + metricDesc, metricValueType, metricValue, labelValues..., @@ -302,3 +322,56 @@ func (c *MonitoringCollector) reportTimeSeriesMetrics( return nil } + +func (c *MonitoringCollector) generateHistogramBuckets( + dist *monitoring.Distribution, +) (map[float64]uint64, error) { + opts := dist.BucketOptions + var bucketKeys []float64 + switch { + case opts.ExplicitBuckets != nil: + // @see https://cloud.google.com/monitoring/api/ref_v3/rest/v3/TypedValue#explicit + bucketKeys = make([]float64, len(opts.ExplicitBuckets.Bounds)+1) + for i, b := range opts.ExplicitBuckets.Bounds { + bucketKeys[i] = b + } + case opts.LinearBuckets != nil: + // @see https://cloud.google.com/monitoring/api/ref_v3/rest/v3/TypedValue#linear + // NumFiniteBuckets is inclusive so bucket count is num+2 + num := int(opts.LinearBuckets.NumFiniteBuckets) + bucketKeys = make([]float64, num+2) + for i := 0; i <= num; i++ { + bucketKeys[i] = opts.LinearBuckets.Offset + (float64(i) * opts.LinearBuckets.Width) + } + case opts.ExponentialBuckets != nil: + // @see https://cloud.google.com/monitoring/api/ref_v3/rest/v3/TypedValue#exponential + // NumFiniteBuckets is inclusive so bucket count is num+2 + num := int(opts.ExponentialBuckets.NumFiniteBuckets) + bucketKeys = make([]float64, num+2) + for i := 0; i <= num; i++ { + bucketKeys[i] = opts.ExponentialBuckets.Scale * math.Pow(opts.ExponentialBuckets.GrowthFactor, float64(i)) + } + default: + return nil, errors.New("Unknown distribution buckets") + } + // The last bucket is always infinity + // @see https://cloud.google.com/monitoring/api/ref_v3/rest/v3/TypedValue#bucketoptions + bucketKeys[len(bucketKeys)-1] = math.Inf(1) + + // Prometheus expects each bucket to have a lower bound of 0, but Google + // sends a bucket with a lower bound of the previous bucket's upper bound, so + // we need to store the last bucket and add it to the next bucket to make it + // 0-bound. + // Any remaining keys without data have a value of 0 + buckets := map[float64]uint64{} + var last uint64 + for i, b := range bucketKeys { + if len(dist.BucketCounts) > i { + buckets[b] = uint64(dist.BucketCounts[i]) + last + last = buckets[b] + } else { + buckets[b] = last + } + } + return buckets, nil +}