Skip to content

Commit

Permalink
Send custom metrics to Azure (#25)
Browse files Browse the repository at this point in the history
* Send metrics to Azure

* grant current user "Monitoring Metrics Publisher" on platform

* add config to generate custom metrics endpoint

* update readme

* remove Float64() metric until it may be needed

* add additional metrics
  • Loading branch information
simongottschlag authored Sep 12, 2022
1 parent f9f0a70 commit dcd5deb
Show file tree
Hide file tree
Showing 15 changed files with 325 additions and 12 deletions.
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ YAML-files can contain one or more documents (with `---` as a document separator
- Populate Container Apps registries with default registry credential
- Send notifications to the git commits
- Filter locations, making it possible to specify in the manifest what regions can run the app
- Push custom metrics to Azure monitor

## Frequently Asked Questions

Expand Down Expand Up @@ -120,7 +121,7 @@ In GitHub, a successful notification looks like this:

It sure is! You can find an example for the setup using terraform [here](test/terraform-multi-region/main.tf). We've also recorded a short video showing it in action:

[![Watch the video](https://img.youtube.com/vi/9SwfSIfa6I0/maxresdefault.jpg)](https://youtu.be/9SwfSIfa6I0)
[![Watch the video](docs/multi-region-thumbnail.jpg)](https://youtu.be/9SwfSIfa6I0)

> What is the location filter feature?

Expand All @@ -135,13 +136,19 @@ It makes it possible to specify `spec.locationFilter` with an array of what Azur
- No change if `spec.locationFilter` contains the location of azcagit (defined with `--location`)
- If `spec.locationFilter` has a value, of values, where it or none of them match the location of azcagit - we'll skip it (only logged with `--debug` enabled)

> Where can I find the custom metrics?

If you open the `azcagit` container app (in the platform resource group) and go to Monitoring and then Metrics, you can choose the namespace azcagit and then the specific metrics you want to look at.

![custom-metrics](docs/custom-metrics.png "Example custom metrics in Azure")

## Things TODO in the future

- [x] Append secrets to Container Apps from KeyVault
- [x] ~~Better error handling of validation failures (should deletion be stopped?)~~ _stop reconciliation on any parsing error_
- [x] Push git commit status (like [Flux notification-controller](https://fluxcd.io/docs/components/notification/provider/#git-commit-status))
- [ ] Health checks
- [ ] Metrics
- [x] Metrics
- [x] Manually trigger reconcile
- [x] Enforce Location for app
- [x] Add Container Registry credentials by default
Expand Down
Binary file added docs/custom-metrics.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/multi-region-location-filter.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/multi-region-thumbnail.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions src/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ type Config struct {
SubscriptionID string `json:"subscription_id" arg:"-s,--subscription-id,env:AZURE_SUBSCRIPTION_ID,required" help:"Azure Subscription ID"`
ManagedEnvironmentID string `json:"managed_environment_id" arg:"-m,--managed-environment-id,env:MANAGED_ENVIRONMENT_ID,required" help:"Azure Container Apps Managed Environment ID"`
KeyVaultName string `json:"key_vault_name" arg:"-k,--key-vault-name,env:KEY_VAULT_NAME,required" help:"Azure KeyVault name to extract secrets from"`
OwnContainerAppName string `json:"own_container_app_name" arg:"--own-container-app-name,env:OWN_CONTAINER_APP_NAME" default:"azcagit" help:"The name of the Container App that is running azcagit"`
OwnResourceGroupName string `json:"own_resource_group" arg:"--own-resource-group-name,env:OWN_RESOURCE_GROUP_NAME,required" help:"The name of the resource group that the azcagit Container App is located in"`
ContainerRegistryServer string `json:"container_registry_server" arg:"--container-registry-server,env:CONTAINER_REGISTRY_SERVER" help:"The container registry server"`
ContainerRegistryUsername string `json:"container_registry_username" arg:"--container-registry-username,env:CONTAINER_REGISTRY_USERNAME" help:"The container registry username"`
ContainerRegistryPassword string `json:"container_registry_password" arg:"--container-registry-password,env:CONTAINER_REGISTRY_PASSWORD" help:"The container registry password"`
Expand Down
4 changes: 4 additions & 0 deletions src/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ func TestNewConfig(t *testing.T) {
"baz",
"--key-vault-name",
"ze-keyvault",
"--own-resource-group-name",
"platform",
"--location",
"westeurope",
"--git-url",
Expand All @@ -31,6 +33,8 @@ func TestNewConfig(t *testing.T) {
SubscriptionID: "bar",
ManagedEnvironmentID: "baz",
KeyVaultName: "ze-keyvault",
OwnContainerAppName: "azcagit",
OwnResourceGroupName: "platform",
Location: "westeurope",
ReconcileInterval: "5m",
CheckoutPath: "/tmp",
Expand Down
5 changes: 4 additions & 1 deletion src/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/xenitab/azcagit/src/cache"
"github.com/xenitab/azcagit/src/config"
"github.com/xenitab/azcagit/src/logger"
"github.com/xenitab/azcagit/src/metrics"
"github.com/xenitab/azcagit/src/notification"
"github.com/xenitab/azcagit/src/reconcile"
"github.com/xenitab/azcagit/src/remote"
Expand Down Expand Up @@ -77,10 +78,12 @@ func run(ctx context.Context, cfg config.Config) error {
return err
}

metricsClient := metrics.NewAzureMetrics(cfg, cred)

appCache := cache.NewAppCache()
secretCache := cache.NewSecretCache()

reconciler, err := reconcile.NewReconciler(cfg, sourceClient, remoteClient, secretClient, notificationClient, appCache, secretCache)
reconciler, err := reconcile.NewReconciler(cfg, sourceClient, remoteClient, secretClient, notificationClient, metricsClient, appCache, secretCache)
if err != nil {
return err
}
Expand Down
140 changes: 140 additions & 0 deletions src/metrics/azure.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package metrics

import (
"context"
"fmt"
"net/http"
"strings"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
"github.com/xenitab/azcagit/src/config"
)

type AzureMetrics struct {
pl runtime.Pipeline
customMetricsEndpoint string
azureRegion string
}

var _ Metrics = (*AzureMetrics)(nil)

func NewAzureMetrics(cfg config.Config, credential azcore.TokenCredential) *AzureMetrics {
// The `//` in `https://monitoring.azure.com//.default` is intentional and the required audience is `https://monitoring.azure.com/`,
// right now something happens inside of the `runtime` which makes the audience `https://monitoring.azure.com` when there's a single `/`.
authPolicy := runtime.NewBearerTokenPolicy(credential, []string{"https://monitoring.azure.com//.default"}, nil)
pl := runtime.NewPipeline("azcagit", "undefined", runtime.PipelineOptions{PerRetry: []policy.Policy{authPolicy}}, &policy.ClientOptions{})
return &AzureMetrics{
pl: pl,
customMetricsEndpoint: generateCustomMetricsEndpoint(cfg),
azureRegion: sanitizeAzureLocation(cfg.Location),
}
}

func generateCustomMetricsEndpoint(cfg config.Config) string {
azureRegion := sanitizeAzureLocation(cfg.Location)
resourceId := fmt.Sprintf("subscriptions/%s/resourceGroups/%s/providers/Microsoft.App/containerApps/%s", cfg.SubscriptionID, cfg.OwnResourceGroupName, cfg.OwnContainerAppName)
return fmt.Sprintf("https://%s.monitoring.azure.com/%s/metrics", azureRegion, resourceId)

}

func sanitizeAzureLocation(location string) string {
locationWithoutSpaces := strings.ReplaceAll(location, " ", "")
lowercaseLocation := strings.ToLower(locationWithoutSpaces)
return lowercaseLocation
}

func (m *AzureMetrics) Int(ctx context.Context, metricName string, metric int) error {
customMetrics := newCustomMetrics(m.azureRegion, metricName, float64(metric))
return m.create(ctx, customMetrics)
}

func (m *AzureMetrics) Duration(ctx context.Context, metricName string, metric time.Duration) error {
customMetrics := newCustomMetrics(m.azureRegion, metricName, metric.Seconds())
return m.create(ctx, customMetrics)
}

func (m *AzureMetrics) Success(ctx context.Context, metricName string, metric bool) error {
metricVal := float64(0)
if metric {
metricVal = 1
}
customMetrics := newCustomMetrics(m.azureRegion, metricName, metricVal)
return m.create(ctx, customMetrics)
}

func newCustomMetrics(region string, metricName string, metric float64) CustomMetrics {
return CustomMetrics{
Time: time.Now(),
Data: CustomMetricsData{
BaseData: CustomMetricsBaseData{
Metric: metricName,
Namespace: "azcagit",
DimNames: []string{
"region",
},
Series: []CustomMetricsSeries{
{
DimValues: []string{
region,
},
Min: metric,
Max: metric,
Sum: metric,
Count: 1,
},
},
},
},
}
}

type CustomMetricsSeries struct {
DimValues []string `json:"dimValues,omitempty"`
Min float64 `json:"min,omitempty"`
Max float64 `json:"max,omitempty"`
Sum float64 `json:"sum,omitempty"`
Count int `json:"count,omitempty"`
}

type CustomMetricsBaseData struct {
Metric string `json:"metric,omitempty"`
Namespace string `json:"namespace,omitempty"`
DimNames []string `json:"dimNames,omitempty"`
Series []CustomMetricsSeries `json:"series,omitempty"`
}

type CustomMetricsData struct {
BaseData CustomMetricsBaseData `json:"baseData,omitempty"`
}

type CustomMetrics struct {
Time time.Time `json:"time,omitempty"`
Data CustomMetricsData `json:"data,omitempty"`
}

func (client *AzureMetrics) create(ctx context.Context, body CustomMetrics) error {
req, err := client.customCreateRequest(ctx, body)
if err != nil {
return err
}
resp, err := client.pl.Do(req)
if err != nil {
return err
}
if !runtime.HasStatusCode(resp, http.StatusOK) {
return runtime.NewResponseError(resp)
}
return nil
}

func (client *AzureMetrics) customCreateRequest(ctx context.Context, body CustomMetrics) (*policy.Request, error) {
req, err := runtime.NewRequest(ctx, http.MethodPost, client.customMetricsEndpoint)
if err != nil {
return nil, err
}
req.Raw().Header["Accept"] = []string{"application/json"}
return req, runtime.MarshalAsJSON(req, body)
}
51 changes: 51 additions & 0 deletions src/metrics/inmem.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package metrics

import (
"context"
"time"
)

type InMemMetrics struct {
intMetrics []int
durationMetrics []time.Duration
successMetrics []bool
}

func NewInMemMetrics() *InMemMetrics {
return &InMemMetrics{}
}

var _ Metrics = (*InMemMetrics)(nil)

func (m *InMemMetrics) Int(ctx context.Context, metricName string, metric int) error {
m.intMetrics = append(m.intMetrics, metric)
return nil
}

func (m *InMemMetrics) IntStats() []int {
return m.intMetrics
}

func (m *InMemMetrics) Duration(ctx context.Context, metricName string, metric time.Duration) error {
m.durationMetrics = append(m.durationMetrics, metric)
return nil
}

func (m *InMemMetrics) DurationStats() []time.Duration {
return m.durationMetrics
}

func (m *InMemMetrics) Success(ctx context.Context, metricName string, metric bool) error {
m.successMetrics = append(m.successMetrics, metric)
return nil
}

func (m *InMemMetrics) SuccessStats() []bool {
return m.successMetrics
}

func (m *InMemMetrics) Reset() {
m.intMetrics = []int{}
m.durationMetrics = []time.Duration{}
m.successMetrics = []bool{}
}
12 changes: 12 additions & 0 deletions src/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package metrics

import (
"context"
"time"
)

type Metrics interface {
Int(ctx context.Context, metricName string, metric int) error
Duration(ctx context.Context, metricName string, metric time.Duration) error
Success(ctx context.Context, metricName string, metric bool) error
}
41 changes: 40 additions & 1 deletion src/reconcile/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import (
"context"
"fmt"
"strings"
"time"

"github.com/go-logr/logr"
"github.com/hashicorp/go-multierror"
"github.com/xenitab/azcagit/src/cache"
"github.com/xenitab/azcagit/src/config"
"github.com/xenitab/azcagit/src/metrics"
"github.com/xenitab/azcagit/src/notification"
"github.com/xenitab/azcagit/src/remote"
"github.com/xenitab/azcagit/src/secret"
Expand All @@ -21,19 +23,21 @@ type Reconciler struct {
remoteClient remote.Remote
secretClient secret.Secret
notificationClient notification.Notification
metricsClient metrics.Metrics
appCache *cache.AppCache
secretCache *cache.SecretCache
previousNotificationEvent notification.NotificationEvent
}

func NewReconciler(cfg config.Config, sourceClient source.Source, remoteClient remote.Remote, secretClient secret.Secret, notificationClient notification.Notification, appCache *cache.AppCache, secretCache *cache.SecretCache) (*Reconciler, error) {
func NewReconciler(cfg config.Config, sourceClient source.Source, remoteClient remote.Remote, secretClient secret.Secret, notificationClient notification.Notification, metricsClient metrics.Metrics, appCache *cache.AppCache, secretCache *cache.SecretCache) (*Reconciler, error) {
previousNotificationEvent := notification.NotificationEvent{}
return &Reconciler{
cfg,
sourceClient,
remoteClient,
secretClient,
notificationClient,
metricsClient,
appCache,
secretCache,
previousNotificationEvent,
Expand All @@ -43,6 +47,7 @@ func NewReconciler(cfg config.Config, sourceClient source.Source, remoteClient r
func (r *Reconciler) Run(ctx context.Context) error {
var result *multierror.Error

startTime := time.Now()
revision, reconcileErr := r.run(ctx)
if reconcileErr != nil {
result = multierror.Append(reconcileErr, result)
Expand All @@ -53,9 +58,33 @@ func (r *Reconciler) Run(ctx context.Context) error {
result = multierror.Append(err, result)
}

r.reportReconcileMetrics(ctx, startTime, result)

return result.ErrorOrNil()
}

func (r *Reconciler) reportReconcileMetrics(ctx context.Context, startTime time.Time, result *multierror.Error) {
log := logr.FromContextOrDiscard(ctx)

endTime := time.Now()
reconcileDuration := endTime.Sub(startTime)

err := r.metricsClient.Duration(ctx, "Reconcile Duration (s)", reconcileDuration)
if err != nil {
log.Error(err, "unable to push metrics for reconcile duration")
}

success := true
if result.ErrorOrNil() != nil {
success = false
}

err = r.metricsClient.Success(ctx, "Reconcile Result", success)
if err != nil {
log.Error(err, "unable to push metrics for reconcile result")
}
}

func (r *Reconciler) run(ctx context.Context) (string, error) {
sourceApps, revision, err := r.getSourceApps(ctx)
if err != nil {
Expand All @@ -64,6 +93,8 @@ func (r *Reconciler) run(ctx context.Context) (string, error) {

r.filterSourceApps(ctx, sourceApps)

r.reportSourceAppsMetrics(ctx, sourceApps)

err = r.populateSourceAppsSecrets(ctx, sourceApps)
if err != nil {
return revision, err
Expand Down Expand Up @@ -97,6 +128,14 @@ func (r *Reconciler) run(ctx context.Context) (string, error) {
return revision, nil
}

func (r *Reconciler) reportSourceAppsMetrics(ctx context.Context, sourceApps *source.SourceApps) {
log := logr.FromContextOrDiscard(ctx)
err := r.metricsClient.Int(ctx, "Source App Count", len(*sourceApps))
if err != nil {
log.Error(err, "unable to push metrics for source app count")
}
}

func (r *Reconciler) getSourceApps(ctx context.Context) (*source.SourceApps, string, error) {
sourceApps, revision, err := r.sourceClient.Get(ctx)
if err != nil {
Expand Down
Loading

0 comments on commit dcd5deb

Please sign in to comment.