Skip to content

Commit

Permalink
chore: add resource throughput metrics
Browse files Browse the repository at this point in the history
In addition to measuring operations, measure number of resources
affected (important e.g. for list/watch operations, where a single
operation might yield lots of resources).

<details>

```
> HELP omni_resource_throughput_total Number of resources processed by watches/reads/writes.
> TYPE omni_resource_throughput_total counter
omni_resource_throughput_total{kind="read",type="AuthConfigs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="BackupDatas.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="ClusterBootstrapStatuses.omni.sidero.dev"} 3
omni_resource_throughput_total{kind="read",type="ClusterConfigVersions.omni.sidero.dev"} 10
omni_resource_throughput_total{kind="read",type="ClusterEndpoints.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="ClusterKubernetesNodes.omni.sidero.dev"} 5
omni_resource_throughput_total{kind="read",type="ClusterMachineConfigPatches.omni.sidero.dev"} 20
omni_resource_throughput_total{kind="read",type="ClusterMachineConfigStatuses.omni.sidero.dev"} 14
omni_resource_throughput_total{kind="read",type="ClusterMachineConfigs.omni.sidero.dev"} 46
omni_resource_throughput_total{kind="read",type="ClusterMachineEncryptionKeys.omni.sidero.dev"} 18
omni_resource_throughput_total{kind="read",type="ClusterMachineStatuses.omni.sidero.dev"} 12
omni_resource_throughput_total{kind="read",type="ClusterMachineTalosVersions.omni.sidero.dev"} 4
omni_resource_throughput_total{kind="read",type="ClusterMachines.omni.sidero.dev"} 22
omni_resource_throughput_total{kind="read",type="ClusterSecrets.omni.sidero.dev"} 4
omni_resource_throughput_total{kind="read",type="ClusterStatuses.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="ClusterUUIDs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="ClusterWorkloadProxyStatuses.omni.sidero.dev"} 3
omni_resource_throughput_total{kind="read",type="ConfigPatches.omni.sidero.dev"} 80
omni_resource_throughput_total{kind="read",type="Configs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="ConnectionParams.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="ControlPlaneStatuses.omni.sidero.dev"} 4
omni_resource_throughput_total{kind="read",type="DBVersions.system.sidero.dev"} 1
omni_resource_throughput_total{kind="read",type="EtcdBackupEncryptions.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="EtcdBackupOverallStatuses.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="EtcdBackupStoreStatuses.omni.sidero.dev"} 3
omni_resource_throughput_total{kind="read",type="ExtensionsConfigurations.omni.sidero.dev"} 0
omni_resource_throughput_total{kind="read",type="FeaturesConfigs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="InstallationMedias.omni.sidero.dev"} 41
omni_resource_throughput_total{kind="read",type="JWTPublicKeys.system.sidero.dev"} 7
omni_resource_throughput_total{kind="read",type="Kubeconfigs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="KubernetesNodeAuditResults.omni.sidero.dev"} 3
omni_resource_throughput_total{kind="read",type="KubernetesUpgradeManifestStatuses.omni.sidero.dev"} 3
omni_resource_throughput_total{kind="read",type="KubernetesUpgradeStatuses.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="KubernetesVersions.omni.sidero.dev"} 178
omni_resource_throughput_total{kind="read",type="Links.omni.sidero.dev"} 8
omni_resource_throughput_total{kind="read",type="LoadBalancerConfigs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="LoadBalancerStatuses.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="MachineConfigGenOptions.omni.sidero.dev"} 16
omni_resource_throughput_total{kind="read",type="MachineExtensionsStatuses.omni.sidero.dev"} 12
omni_resource_throughput_total{kind="read",type="MachineSetDestroyStatuses.omni.sidero.dev"} 0
omni_resource_throughput_total{kind="read",type="MachineSetNodes.omni.sidero.dev"} 5
omni_resource_throughput_total{kind="read",type="MachineSetStatuses.omni.sidero.dev"} 8
omni_resource_throughput_total{kind="read",type="MachineStatusLinks.omni.sidero.dev"} 24
omni_resource_throughput_total{kind="read",type="MachineStatuses.omni.sidero.dev"} 16
omni_resource_throughput_total{kind="read",type="Machines.omni.sidero.dev"} 36
omni_resource_throughput_total{kind="read",type="OngoingTasks.omni.sidero.dev"} 0
omni_resource_throughput_total{kind="read",type="PublicKeys.omni.sidero.dev"} 0
omni_resource_throughput_total{kind="read",type="RedactedClusterMachineConfigs.omni.sidero.dev"} 21
omni_resource_throughput_total{kind="read",type="ResourceDefinitions.meta.cosi.dev"} 91
omni_resource_throughput_total{kind="read",type="SchematicConfigurations.omni.sidero.dev"} 12
omni_resource_throughput_total{kind="read",type="TalosConfigs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="read",type="TalosExtensions.omni.sidero.dev"} 32
omni_resource_throughput_total{kind="read",type="TalosUpgradeStatuses.omni.sidero.dev"} 4
omni_resource_throughput_total{kind="read",type="TalosVersions.omni.sidero.dev"} 118
omni_resource_throughput_total{kind="read",type="Users.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="BackupDatas.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="ClusterDestroyStatuses.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="watch",type="ClusterEndpoints.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="ClusterMachineConfigPatches.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="watch",type="ClusterMachineConfigStatuses.omni.sidero.dev"} 5
omni_resource_throughput_total{kind="watch",type="ClusterMachineConfigs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="ClusterMachineIdentities.omni.sidero.dev"} 10
omni_resource_throughput_total{kind="watch",type="ClusterMachineStatuses.omni.sidero.dev"} 6
omni_resource_throughput_total{kind="watch",type="ClusterMachineTalosVersions.omni.sidero.dev"} 6
omni_resource_throughput_total{kind="watch",type="ClusterMachines.omni.sidero.dev"} 7
omni_resource_throughput_total{kind="watch",type="ClusterSecrets.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="ClusterStatuses.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="ClusterUUIDs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="Clusters.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="ConfigPatches.omni.sidero.dev"} 16
omni_resource_throughput_total{kind="watch",type="Configs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="ConnectionParams.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="ControlPlaneStatuses.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="EtcdBackupEncryptions.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="EtcdBackupS3Configs.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="watch",type="EtcdBackupStatuses.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="watch",type="ExposedServices.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="watch",type="ImagePullRequests.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="watch",type="ImagePullStatuses.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="watch",type="Kubeconfigs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="KubernetesStatuses.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="KubernetesUpgradeStatuses.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="KubernetesVersions.omni.sidero.dev"} 90
omni_resource_throughput_total{kind="watch",type="Links.omni.sidero.dev"} 9
omni_resource_throughput_total{kind="watch",type="LoadBalancerConfigs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="LoadBalancerStatuses.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="MachineConfigGenOptions.omni.sidero.dev"} 9
omni_resource_throughput_total{kind="watch",type="MachineLabels.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="watch",type="MachineSetNodes.omni.sidero.dev"} 6
omni_resource_throughput_total{kind="watch",type="MachineSetStatuses.omni.sidero.dev"} 3
omni_resource_throughput_total{kind="watch",type="MachineSets.omni.sidero.dev"} 3
omni_resource_throughput_total{kind="watch",type="MachineStatusSnapshots.omni.sidero.dev"} 9
omni_resource_throughput_total{kind="watch",type="MachineStatuses.omni.sidero.dev"} 18
omni_resource_throughput_total{kind="watch",type="Machines.omni.sidero.dev"} 18
omni_resource_throughput_total{kind="watch",type="RedactedClusterMachineConfigs.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="watch",type="SchematicConfigurations.omni.sidero.dev"} 6
omni_resource_throughput_total{kind="watch",type="TalosConfigs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="TalosExtensions.omni.sidero.dev"} 38
omni_resource_throughput_total{kind="watch",type="TalosUpgradeStatuses.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="watch",type="TalosVersions.omni.sidero.dev"} 60
omni_resource_throughput_total{kind="write",type="ClusterMachineConfigPatches.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="write",type="ClusterMachineConfigs.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="write",type="ClusterMachines.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="write",type="ConfigPatches.omni.sidero.dev"} 8
omni_resource_throughput_total{kind="write",type="ConnectionParams.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="write",type="EtcdBackupStoreStatuses.omni.sidero.dev"} 2
omni_resource_throughput_total{kind="write",type="InstallationMedias.omni.sidero.dev"} 41
omni_resource_throughput_total{kind="write",type="JWTPublicKeys.system.sidero.dev"} 1
omni_resource_throughput_total{kind="write",type="MachineStatusMetrics.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="write",type="Namespaces.meta.cosi.dev"} 6
omni_resource_throughput_total{kind="write",type="RedactedClusterMachineConfigs.omni.sidero.dev"} 1
omni_resource_throughput_total{kind="write",type="ResourceDefinitions.meta.cosi.dev"} 91
omni_resource_throughput_total{kind="write",type="SysVersions.system.sidero.dev"} 1
```

</details>

Signed-off-by: Andrey Smirnov <[email protected]>
  • Loading branch information
smira committed Jul 3, 2024
1 parent 9671551 commit b910c20
Showing 1 changed file with 95 additions and 8 deletions.
103 changes: 95 additions & 8 deletions internal/backend/runtime/omni/state_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ type stateMetrics struct {
st state.CoreState

resourceOperations *prometheus.CounterVec
resourceThroughput *prometheus.CounterVec
}

// Check interfaces.
Expand All @@ -37,6 +38,13 @@ func wrapStateWithMetrics(st state.CoreState) *stateMetrics {
},
[]string{"operation", "type"},
),
resourceThroughput: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "omni_resource_throughput_total",
Help: "Number of resources processed by watches/reads/writes.",
},
[]string{"kind", "type"},
),
}
}

Expand All @@ -48,52 +56,131 @@ func (metrics *stateMetrics) Describe(ch chan<- *prometheus.Desc) {
// Collect implements prom.Collector interface.
func (metrics *stateMetrics) Collect(ch chan<- prometheus.Metric) {
metrics.resourceOperations.Collect(ch)
metrics.resourceThroughput.Collect(ch)
}

func (metrics *stateMetrics) Get(ctx context.Context, r resource.Pointer, opts ...state.GetOption) (resource.Resource, error) {
metrics.resourceOperations.WithLabelValues("get", r.Type()).Inc()

return metrics.st.Get(ctx, r, opts...)
result, err := metrics.st.Get(ctx, r, opts...)

if result != nil {
metrics.resourceThroughput.WithLabelValues("read", r.Type()).Inc()
}

return result, err
}

func (metrics *stateMetrics) List(ctx context.Context, r resource.Kind, opts ...state.ListOption) (resource.List, error) {
metrics.resourceOperations.WithLabelValues("list", r.Type()).Inc()

return metrics.st.List(ctx, r, opts...)
result, err := metrics.st.List(ctx, r, opts...)

metrics.resourceThroughput.WithLabelValues("read", r.Type()).Add(float64(len(result.Items)))

return result, err
}

func (metrics *stateMetrics) Create(ctx context.Context, r resource.Resource, opts ...state.CreateOption) error {
metrics.resourceOperations.WithLabelValues("create", r.Metadata().Type()).Inc()

return metrics.st.Create(ctx, r, opts...)
err := metrics.st.Create(ctx, r, opts...)

if err == nil {
metrics.resourceThroughput.WithLabelValues("write", r.Metadata().Type()).Inc()
}

return err
}

func (metrics *stateMetrics) Update(ctx context.Context, newResource resource.Resource, opts ...state.UpdateOption) error {
metrics.resourceOperations.WithLabelValues("update", newResource.Metadata().Type()).Inc()

return metrics.st.Update(ctx, newResource, opts...)
err := metrics.st.Update(ctx, newResource, opts...)

if err == nil {
metrics.resourceThroughput.WithLabelValues("write", newResource.Metadata().Type()).Inc()
}

return err
}

func (metrics *stateMetrics) Destroy(ctx context.Context, r resource.Pointer, opts ...state.DestroyOption) error {
metrics.resourceOperations.WithLabelValues("destroy", r.Type()).Inc()

return metrics.st.Destroy(ctx, r, opts...)
err := metrics.st.Destroy(ctx, r, opts...)

if err == nil {
metrics.resourceThroughput.WithLabelValues("write", r.Type()).Inc()
}

return err
}

func (metrics *stateMetrics) Watch(ctx context.Context, r resource.Pointer, ch chan<- state.Event, opts ...state.WatchOption) error {
metrics.resourceOperations.WithLabelValues("watch", r.Type()).Inc()

return metrics.st.Watch(ctx, r, ch, opts...)
return metrics.st.Watch(ctx, r, metrics.watchChannelWrapper(ctx, ch, r.Type()), opts...)
}

func (metrics *stateMetrics) WatchKind(ctx context.Context, r resource.Kind, ch chan<- state.Event, opts ...state.WatchKindOption) error {
metrics.resourceOperations.WithLabelValues("watch", r.Type()).Inc()

return metrics.st.WatchKind(ctx, r, ch, opts...)
return metrics.st.WatchKind(ctx, r, metrics.watchChannelWrapper(ctx, ch, r.Type()), opts...)
}

func (metrics *stateMetrics) WatchKindAggregated(ctx context.Context, r resource.Kind, c chan<- []state.Event, opts ...state.WatchKindOption) error {
metrics.resourceOperations.WithLabelValues("watch", r.Type()).Inc()

return metrics.st.WatchKindAggregated(ctx, r, c, opts...)
return metrics.st.WatchKindAggregated(ctx, r, metrics.watchAggregatedChannelWrapper(ctx, c, r.Type()), opts...)
}

func (metrics *stateMetrics) watchChannelWrapper(ctx context.Context, out chan<- state.Event, typ resource.Type) chan<- state.Event {
in := make(chan state.Event)

go func() {
for {
var ev state.Event

select {
case <-ctx.Done():
return
case ev = <-in:
metrics.resourceThroughput.WithLabelValues("watch", typ).Inc()
}

select {
case <-ctx.Done():
return
case out <- ev:
}
}
}()

return in
}

func (metrics *stateMetrics) watchAggregatedChannelWrapper(ctx context.Context, out chan<- []state.Event, typ resource.Type) chan<- []state.Event {
in := make(chan []state.Event)

go func() {
for {
var ev []state.Event

select {
case <-ctx.Done():
return
case ev = <-in:
metrics.resourceThroughput.WithLabelValues("watch", typ).Add(float64(len(ev)))
}

select {
case <-ctx.Done():
return
case out <- ev:
}
}
}()

return in
}

0 comments on commit b910c20

Please sign in to comment.