Merge pull request #11 from levenlabs/master

Support for DISTRIBUTION metric type
prometheus-community · Aug 27, 2017 · 0a914da · 0a914da
2 parents 0f8f390 + 8c6108e
commit 0a914da
Show file tree

Hide file tree

Showing 2 changed files with 102 additions and 28 deletions.
diff --git a/README.md b/README.md
@@ -92,7 +92,8 @@ Metrics gathered from Google Stackdriver Monitoring are converted to Prometheus
   4. the monitored resource labels (see [Monitored Resource Types][monitored-resources])
 * For each timeseries, only the most recent data point is exported.
 * Stackdriver `GAUGE` metric kinds are reported as Prometheus `Gauge` metrics; Stackdriver `DELTA` and `CUMULATIVE` metric kinds are reported as Prometheus `Counter` metrics.
-* Only `BOOL`, `INT64` and `DOUBLE` metric types are supported, other types (`STRING`, `DISTRIBUTION` and `MONEY`) are discarded.
+* Only `BOOL`, `INT64`, `DOUBLE` and `DISTRIBUTION` metric types are supported, other types (`STRING` and `MONEY`) are discarded.
+* `DISTRIBUTION` metric type is reported as a Prometheus `Histogram`, except the `_sum` time series is not supported.
 
 ### Example
 

diff --git a/collectors/monitoring_collector.go b/collectors/monitoring_collector.go
@@ -1,7 +1,9 @@
 package collectors
 
 import (
+	"errors"
 	"fmt"
+	"math"
 	"sync"
 	"time"
 
@@ -226,6 +228,7 @@ func (c *MonitoringCollector) reportTimeSeriesMetrics(
 	var metricValue float64
 	var metricValueType prometheus.ValueType
 	var newestTSPoint *monitoring.Point
+	var metricDesc *prometheus.Desc
 
 	for _, timeSeries := range page.TimeSeries {
 		newestEndTime := time.Unix(0, 0)
@@ -240,6 +243,34 @@ func (c *MonitoringCollector) reportTimeSeriesMetrics(
 			}
 		}
 
+		labelKeys := []string{"unit"}
+		labelValues := []string{metricDescriptor.Unit}
+
+		// Add the metric labels
+		// @see https://cloud.google.com/monitoring/api/metrics
+		for key, value := range timeSeries.Metric.Labels {
+			labelKeys = append(labelKeys, key)
+			labelValues = append(labelValues, value)
+		}
+
+		// Add the monitored resource labels
+		// @see https://cloud.google.com/monitoring/api/resources
+		for key, value := range timeSeries.Resource.Labels {
+			labelKeys = append(labelKeys, key)
+			labelValues = append(labelValues, value)
+		}
+
+		// The metric name to report is composed by the 3 parts:
+		// 1. namespace is a constant prefix (stackdriver)
+		// 2. subsystem is the monitored resource type (ie gce_instance)
+		// 3. name is the metric type (ie compute.googleapis.com/instance/cpu/usage_time)
+		metricDesc = prometheus.NewDesc(
+			prometheus.BuildFQName("stackdriver", utils.NormalizeMetricName(timeSeries.Resource.Type), utils.NormalizeMetricName(timeSeries.Metric.Type)),
+			metricDescriptor.Description,
+			labelKeys,
+			prometheus.Labels{},
+		)
+
 		switch timeSeries.MetricKind {
 		case "GAUGE":
 			metricValueType = prometheus.GaugeValue
@@ -261,39 +292,28 @@ func (c *MonitoringCollector) reportTimeSeriesMetrics(
 			metricValue = float64(*newestTSPoint.Value.Int64Value)
 		case "DOUBLE":
 			metricValue = *newestTSPoint.Value.DoubleValue
+		case "DISTRIBUTION":
+			dist := newestTSPoint.Value.DistributionValue
+			buckets, err := c.generateHistogramBuckets(dist)
+			if err == nil {
+				ch <- prometheus.MustNewConstHistogram(
+					metricDesc,
+					uint64(dist.Count),
+					0, // Stackdriver does not provide the sum
+					buckets,
+					labelValues...,
+				)
+			} else {
+				log.Debugf("Discarding resource %s metric %s: %s", timeSeries.Resource.Type, timeSeries.Metric.Type, err)
+			}
+			continue
 		default:
 			log.Debugf("Discarding `%s` metric: %+v", timeSeries.ValueType, timeSeries)
 			continue
 		}
 
-		labelKeys := []string{"unit"}
-		labelValues := []string{metricDescriptor.Unit}
-
-		// Add the metric labels
-		// @see https://cloud.google.com/monitoring/api/metrics
-		for key, value := range timeSeries.Metric.Labels {
-			labelKeys = append(labelKeys, key)
-			labelValues = append(labelValues, value)
-		}
-
-		// Add the monitored resource labels
-		// @see https://cloud.google.com/monitoring/api/resources
-		for key, value := range timeSeries.Resource.Labels {
-			labelKeys = append(labelKeys, key)
-			labelValues = append(labelValues, value)
-		}
-
-		// The metric name to report is composed by the 3 parts:
-		// 1. namespace is a constant prefix (stackdriver)
-		// 2. subsystem is the monitored resource type (ie gce_instance)
-		// 3. name is the metric type (ie compute.googleapis.com/instance/cpu/usage_time)
 		ch <- prometheus.MustNewConstMetric(
-			prometheus.NewDesc(
-				prometheus.BuildFQName("stackdriver", utils.NormalizeMetricName(timeSeries.Resource.Type), utils.NormalizeMetricName(timeSeries.Metric.Type)),
-				metricDescriptor.Description,
-				labelKeys,
-				prometheus.Labels{},
-			),
+			metricDesc,
 			metricValueType,
 			metricValue,
 			labelValues...,
@@ -302,3 +322,56 @@ func (c *MonitoringCollector) reportTimeSeriesMetrics(
 
 	return nil
 }
+
+func (c *MonitoringCollector) generateHistogramBuckets(
+	dist *monitoring.Distribution,
+) (map[float64]uint64, error) {
+	opts := dist.BucketOptions
+	var bucketKeys []float64
+	switch {
+	case opts.ExplicitBuckets != nil:
+		// @see https://cloud.google.com/monitoring/api/ref_v3/rest/v3/TypedValue#explicit
+		bucketKeys = make([]float64, len(opts.ExplicitBuckets.Bounds)+1)
+		for i, b := range opts.ExplicitBuckets.Bounds {
+			bucketKeys[i] = b
+		}
+	case opts.LinearBuckets != nil:
+		// @see https://cloud.google.com/monitoring/api/ref_v3/rest/v3/TypedValue#linear
+		// NumFiniteBuckets is inclusive so bucket count is num+2
+		num := int(opts.LinearBuckets.NumFiniteBuckets)
+		bucketKeys = make([]float64, num+2)
+		for i := 0; i <= num; i++ {
+			bucketKeys[i] = opts.LinearBuckets.Offset + (float64(i) * opts.LinearBuckets.Width)
+		}
+	case opts.ExponentialBuckets != nil:
+		// @see https://cloud.google.com/monitoring/api/ref_v3/rest/v3/TypedValue#exponential
+		// NumFiniteBuckets is inclusive so bucket count is num+2
+		num := int(opts.ExponentialBuckets.NumFiniteBuckets)
+		bucketKeys = make([]float64, num+2)
+		for i := 0; i <= num; i++ {
+			bucketKeys[i] = opts.ExponentialBuckets.Scale * math.Pow(opts.ExponentialBuckets.GrowthFactor, float64(i))
+		}
+	default:
+		return nil, errors.New("Unknown distribution buckets")
+	}
+	// The last bucket is always infinity
+	// @see https://cloud.google.com/monitoring/api/ref_v3/rest/v3/TypedValue#bucketoptions
+	bucketKeys[len(bucketKeys)-1] = math.Inf(1)
+
+	// Prometheus expects each bucket to have a lower bound of 0, but Google
+	// sends a bucket with a lower bound of the previous bucket's upper bound, so
+	// we need to store the last bucket and add it to the next bucket to make it
+	// 0-bound.
+	// Any remaining keys without data have a value of 0
+	buckets := map[float64]uint64{}
+	var last uint64
+	for i, b := range bucketKeys {
+		if len(dist.BucketCounts) > i {
+			buckets[b] = uint64(dist.BucketCounts[i]) + last
+			last = buckets[b]
+		} else {
+			buckets[b] = last
+		}
+	}
+	return buckets, nil
+}