Skip to content

Commit

Permalink
fix dimensions not match(DescribeBaseMetrics/GetMonitorData) bug
Browse files Browse the repository at this point in the history
  • Loading branch information
shitoumomo committed Jan 28, 2022
1 parent 044b83d commit 822ff84
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 51 deletions.
5 changes: 4 additions & 1 deletion pkg/collector/product.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ func (c *TcProductCollector) LoadMetricsByProductConf() error {
level.Error(c.logger).Log("msg", "create metric series err", "err", err, "Namespace", c.Namespace, "name", mname)
continue
}
level.Info(c.logger).Log("msg", "found instances", "count", len(series), "Namespace", c.Namespace, "name", mname)
err = nm.LoadSeries(series)
if err != nil {
level.Error(c.logger).Log("msg", "load metric series err", "err", err, "Namespace", c.Namespace, "name", mname)
Expand Down Expand Up @@ -227,7 +228,7 @@ func (c *TcProductCollector) initQuerys() (err error) {
return e
}
c.Querys = append(c.Querys, q)
numSeries += len(q.Metric.Series)
numSeries += len(q.Metric.SeriesCache.Series)
}
level.Info(c.logger).Log("msg", "Init all query ok", "Namespace", c.Namespace, "numMetric", len(c.Querys), "numSeries", numSeries)
return
Expand Down Expand Up @@ -276,11 +277,13 @@ func (r *TcProductCollectorReloader) Run() {
time.Sleep(r.relodInterval)

for {
level.Info(r.logger).Log("msg", "start reload product metadata", "Namespace", r.collector.Namespace)
e := r.reloadMetricsByProductConf()
if e != nil {
level.Error(r.logger).Log("msg", "reload product error", "err", e,
"namespace", r.collector.Namespace)
}
level.Info(r.logger).Log("msg", "complete reload product metadata", "Namespace", r.collector.Namespace)
select {
case <-r.ctx.Done():
return
Expand Down
5 changes: 1 addition & 4 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,10 @@ type TencentProduct struct {
}

func (p *TencentProduct) IsReloadEnable() bool {
if len(p.OnlyIncludeMetrics) > 0 {
return false
}
if util.IsStrInList(constant.NotSupportInstanceNamespaces, p.Namespace) {
return false
}
return p.AllInstances
return true
}

type TencentConfig struct {
Expand Down
13 changes: 3 additions & 10 deletions pkg/metric/label.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ type TcmLabels struct {
}

// 根据标签名, 获取所有标签的值
func (l *TcmLabels) GetValues(filters map[string]string, ins instance.TcInstance) (values []string, err error) {
func (l *TcmLabels) GetValues(filters map[string]string, ins instance.TcInstance) map[string]string {
lowerKeyFilters := map[string]string{}
for k, v := range filters {
lowerKeyFilters[strings.ToLower(k)] = v
Expand All @@ -41,25 +41,18 @@ func (l *TcmLabels) GetValues(filters map[string]string, ins instance.TcInstance
v, ok := lowerKeyFilters[strings.ToLower(name)]
if ok {
nameValues[name] = v
} else {
nameValues[name] = ""
}
}
for _, name := range l.instanceLabelNames {
v, e := ins.GetFieldValueByName(name)
if e != nil {
nameValues[name] = ""
} else {
if e == nil && v != "" {
nameValues[name] = v
}
}
for name, value := range l.constLabels {
nameValues[name] = value
}
for _, name := range l.Names {
values = append(values, nameValues[name])
}
return
return nameValues
}

func NewTcmLabels(qln []string, iln []string, cl Labels) (*TcmLabels, error) {
Expand Down
88 changes: 59 additions & 29 deletions pkg/metric/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,31 @@ import (
"github.com/tencentyun/tencentcloud-exporter/pkg/util"
)

type SeriesCache struct {
Series map[string]*TcmSeries // 包含的多个时间线
// need cache it, because some cases DescribeBaseMetrics/GetMonitorData dims not match
LabelNames map[string]struct{}
}

func newCache() *SeriesCache {
return &SeriesCache{
Series: make(map[string]*TcmSeries),
LabelNames: make(map[string]struct{}),
}
}

type Desc struct {
FQName string
Help string
}

// 代表一个指标, 包含多个时间线
type TcmMetric struct {
Id string
Meta *TcmMeta // 指标元数据
Labels *TcmLabels // 指标labels
Series map[string]*TcmSeries // 包含的多个时间线
StatPromDesc map[string]*prometheus.Desc // 按统计纬度的Desc, max、min、avg、last
Meta *TcmMeta // 指标元数据
Labels *TcmLabels // 指标labels
SeriesCache *SeriesCache
StatPromDesc map[string]Desc // 按统计纬度的Desc, max、min、avg、last
Conf *TcmMetricConfig
seriesLock sync.Mutex
}
Expand All @@ -25,11 +43,16 @@ func (m *TcmMetric) LoadSeries(series []*TcmSeries) error {
m.seriesLock.Lock()
defer m.seriesLock.Unlock()

newSeries := make(map[string]*TcmSeries)
newSeriesCache := newCache()

for _, s := range series {
newSeries[s.Id] = s
newSeriesCache.Series[s.Id] = s
// add label names
for key, _ := range s.QueryLabels {
newSeriesCache.LabelNames[key] = struct{}{}
}
}
m.Series = newSeries
m.SeriesCache = newSeriesCache
return nil
}

Expand Down Expand Up @@ -73,21 +96,34 @@ func (m *TcmMetric) GetLatestPromMetrics(repo TcmMetricRepository) (pms []promet
return nil, err
}
}
values, err := m.Labels.GetValues(samples.Series.QueryLabels, samples.Series.Instance)
if err != nil {
return nil, err
labels := m.Labels.GetValues(samples.Series.QueryLabels, samples.Series.Instance)
// add all dimensions from cloud monitor into prom labels
for _, dim := range point.Dimensions {
labels[*dim.Name] = *dim.Value
}
var names []string
var values []string
for k, v := range labels {
names = append(names, util.ToUnderlineLower(k))
values = append(values, v)
}
newDesc := prometheus.NewDesc(
desc.FQName,
desc.Help,
names,
nil,
)
var pm prometheus.Metric
if m.Conf.StatDelaySeconds > 0 {
pm = prometheus.NewMetricWithTimestamp(time.Unix(int64(point.Timestamp), 0), prometheus.MustNewConstMetric(
desc,
newDesc,
prometheus.GaugeValue,
point.Value,
values...,
))
} else {
pm = prometheus.MustNewConstMetric(
desc,
newDesc,
prometheus.GaugeValue,
point.Value,
values...,
Expand All @@ -102,7 +138,7 @@ func (m *TcmMetric) GetLatestPromMetrics(repo TcmMetricRepository) (pms []promet

func (m *TcmMetric) GetSeriesSplitByBatch(batch int) (steps [][]*TcmSeries) {
var series []*TcmSeries
for _, s := range m.Series {
for _, s := range m.SeriesCache.Series {
series = append(series, s)
}

Expand Down Expand Up @@ -130,7 +166,7 @@ func NewTcmMetric(meta *TcmMeta, conf *TcmMetricConfig) (*TcmMetric, error) {
return nil, err
}

statDescs := make(map[string]*prometheus.Desc)
statDescs := make(map[string]Desc)
statType, err := meta.GetStatType(conf.StatPeriodSeconds)
if err != nil {
return nil, err
Expand All @@ -142,10 +178,6 @@ func NewTcmMetric(meta *TcmMeta, conf *TcmMetricConfig) (*TcmMetric, error) {
statType,
*meta.m.Meaning.Zh,
)
var lnames []string
for _, name := range labels.Names {
lnames = append(lnames, util.ToUnderlineLower(name))
}
for _, s := range conf.StatTypes {
var st string
if s == "last" {
Expand Down Expand Up @@ -176,20 +208,18 @@ func NewTcmMetric(meta *TcmMeta, conf *TcmMetricConfig) (*TcmMetric, error) {
st,
)
fqName = strings.ToLower(fqName)
desc := prometheus.NewDesc(
fqName,
help,
lnames,
nil,
)
statDescs[strings.ToLower(s)] = desc
statDescs[strings.ToLower(s)] = Desc{
FQName: fqName,
Help: help,
}
}

m := &TcmMetric{
Id: id,
Meta: meta,
Labels: labels,
Series: map[string]*TcmSeries{},
Id: id,
Meta: meta,
Labels: labels,
SeriesCache: newCache(),

StatPromDesc: statDescs,
Conf: conf,
}
Expand Down
11 changes: 9 additions & 2 deletions pkg/metric/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,15 +228,22 @@ func (repo *TcmMetricRepositoryImpl) buildSamples(
) (*TcmSamples, map[string]string, error) {
ql := map[string]string{}
for _, dimension := range points.Dimensions {
name := *dimension.Name
if *dimension.Value != "" {
ql[*dimension.Name] = *dimension.Value
_, ok := m.SeriesCache.LabelNames[name]
if !ok {
// if not in query label names, need ignore it
// because series id = query labels md5
continue
}
ql[name] = *dimension.Value
}
}
sid, e := GetTcmSeriesId(m, ql)
if e != nil {
return nil, ql, fmt.Errorf("get series id fail")
}
s, ok := m.Series[sid]
s, ok := m.SeriesCache.Series[sid]
if !ok {
return nil, ql, fmt.Errorf("response data point not match series")
}
Expand Down
16 changes: 11 additions & 5 deletions pkg/metric/sample.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ import (

// 代表一个数据点
type TcmSample struct {
Timestamp float64
Value float64
Timestamp float64
Value float64
Dimensions []*monitor.Dimension
}

// 代表一个时间线的多个数据点
Expand Down Expand Up @@ -57,8 +58,9 @@ func (s *TcmSamples) GetAvgPoint() (point *TcmSample, err error) {
}
avg := sum / float64(len(s.Samples))
sample := &TcmSample{
Timestamp: s.Samples[len(s.Samples)-1].Timestamp,
Value: avg,
Timestamp: s.Samples[len(s.Samples)-1].Timestamp,
Value: avg,
Dimensions: s.Samples[len(s.Samples)-1].Dimensions,
}
return sample, nil
}
Expand All @@ -78,7 +80,11 @@ func NewTcmSamples(series *TcmSeries, p *monitor.DataPoint) (s *TcmSamples, err
}

for i := 0; i < len(p.Timestamps); i++ {
s.Samples = append(s.Samples, &TcmSample{*p.Timestamps[i], *p.Values[i]})
s.Samples = append(s.Samples, &TcmSample{
Timestamp: *p.Timestamps[i],
Value: *p.Values[i],
Dimensions: p.Dimensions,
})
}
return
}

0 comments on commit 822ff84

Please sign in to comment.