From 53af650f85bb9570788d4c67cd6cb49877c498ef Mon Sep 17 00:00:00 2001 From: Xu Deng Date: Thu, 2 May 2024 13:02:00 -0400 Subject: [PATCH] Add emitting of e2e test instance metrics (#8089) --- internal/test/e2e/cloudwatch.go | 87 +++++++++++++++++++++++++++++++++ internal/test/e2e/run.go | 1 + 2 files changed, 88 insertions(+) create mode 100644 internal/test/e2e/cloudwatch.go diff --git a/internal/test/e2e/cloudwatch.go b/internal/test/e2e/cloudwatch.go new file mode 100644 index 000000000000..fe0a7e65c54a --- /dev/null +++ b/internal/test/e2e/cloudwatch.go @@ -0,0 +1,87 @@ +package e2e + +import ( + "fmt" + "regexp" + "strings" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/cloudwatch" + + "github.com/aws/eks-anywhere/pkg/logger" +) + +var svc *cloudwatch.CloudWatch + +func init() { + if s, err := session.NewSession(); err == nil { + svc = cloudwatch.New(s) + } else { + fmt.Println("Cannot create CloudWatch service", err) + } +} + +func putInstanceTestResultMetrics(r instanceTestsResults) { + if svc == nil { + logger.Info("Cannot publish metrics as cloudwatch service was not initialized") + return + } + + logger.Info("Publishing instance test result metrics") + // Note 0 metrics are emitted for the purpose of aggregation. For example, when the succeededCount metrics are [0, 1, 0, 1], we can calculate the success rate as 2 / 4 = 50%. However, when 0 are excluded, the metrics becomes [1, 1], and you would not be able to calculate the success rate from that series. + erroredCount, failedCount, succeededCount := 0, 0, 0 + if r.err != nil { + erroredCount = 1 + } else if !r.testCommandResult.Successful() { + failedCount = 1 + } else { + succeededCount = 1 + } + + data := &cloudwatch.MetricDatum{ + Unit: aws.String("Count"), + Dimensions: []*cloudwatch.Dimension{ + { + Name: aws.String("Provider"), + Value: aws.String(getProviderName(r.conf.Regex)), + }, + { + Name: aws.String("BranchName"), + Value: aws.String(r.conf.BranchName), + }, + }, + Timestamp: aws.Time(time.Now()), + } + putMetric(data, "ErroredInstanceTests", erroredCount) + putMetric(data, "FailedInstanceTests", failedCount) + putMetric(data, "SucceededInstanceTests", succeededCount) + + // TODO: publish time metrics + logger.Info("Test instance metrics published") +} + +func getProviderName(testRe string) string { + providerRe := regexp.MustCompile(`Test((?i:vsphere)|(?i:cloudstack)|(?i:snow)|(?i:docker)|(?i:nutanix)|(?i:tinkerbell))`) + provider := []byte("Unknown") + t := providerRe.FindSubmatch([]byte(testRe)) + if len(t) > 1 { + provider = t[1] + } + return strings.ToLower(string(provider)) +} + +func putMetric(data *cloudwatch.MetricDatum, metricName string, value int) { + data.MetricName = aws.String(metricName) + data.Value = aws.Float64(float64(value)) + + if _, err := svc.PutMetricData(&cloudwatch.PutMetricDataInput{ + Namespace: aws.String("EksaE2ETests"), + MetricData: []*cloudwatch.MetricDatum{data}, + }); err != nil { + logger.Error(err, "Cannot put metrics to cloudwatch") + } else { + logger.Info("Instance test result metrics published") + } +} diff --git a/internal/test/e2e/run.go b/internal/test/e2e/run.go index d1417f5c1b69..595fae1db8bb 100644 --- a/internal/test/e2e/run.go +++ b/internal/test/e2e/run.go @@ -169,6 +169,7 @@ func RunTestsInParallel(conf ParallelRunConf) error { "completedInstances", completedInstances, "totalInstances", totalInstances, ) + putInstanceTestResultMetrics(r) } if failedInstances > 0 {