Skip to content

Commit

Permalink
Merge pull request #1415 from carvel-dev/expose-metrics
Browse files Browse the repository at this point in the history
Expose metrics to report time taken in fetch/template/deploy phase of app, pkgi, pkgr
  • Loading branch information
sethiyash authored Jan 27, 2024
2 parents 7b7d396 + b292117 commit e82ab40
Show file tree
Hide file tree
Showing 27 changed files with 636 additions and 189 deletions.
13 changes: 8 additions & 5 deletions cmd/controller/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,6 @@ func Run(opts Options, runLog logr.Logger) error {
return fmt.Errorf("Building packaging client: %s", err)
}

runLog.Info("setting up metrics")
appMetrics := metrics.NewAppMetrics()
appMetrics.RegisterAllMetrics()

var server *apiserver.APIServer
if opts.StartAPIServer {
// assign bindPort to env var KAPPCTRL_API_PORT if available
Expand Down Expand Up @@ -187,6 +183,11 @@ func Run(opts Options, runLog logr.Logger) error {
kubeconf := kubeconfig.NewKubeconfig(coreClient, runLog)
compInfo := componentinfo.NewComponentInfo(coreClient, kubeconf, Version)

runLog.Info("setting up metrics")
appMetrics := metrics.NewMetrics()
appMetrics.ReconcileTimeMetrics.RegisterAllMetrics()
appMetrics.ReconcileCountMetrics.RegisterAllMetrics()

cacheFolderApps := memdir.NewTmpDir("cache-appcr")
err = cacheFolderApps.Create()
if err != nil {
Expand Down Expand Up @@ -227,7 +228,8 @@ func Run(opts Options, runLog logr.Logger) error {
pkgToPkgInstallHandler := pkginstall.NewPackageInstallVersionHandler(
kcClient, opts.PackagingGlobalNS, runLog.WithName("handler"))

reconciler := pkginstall.NewReconciler(kcClient, pkgClient, coreClient, pkgToPkgInstallHandler, runLog.WithName("pkgi"), compInfo, kcConfig)
reconciler := pkginstall.NewReconciler(kcClient, pkgClient, coreClient, pkgToPkgInstallHandler,
runLog.WithName("pkgi"), compInfo, kcConfig, appMetrics)

ctrl, err := controller.New("pkgi", mgr, controller.Options{
Reconciler: reconciler,
Expand All @@ -254,6 +256,7 @@ func Run(opts Options, runLog logr.Logger) error {
CoreClient: coreClient,
AppClient: kcClient,
KcConfig: kcConfig,
AppMetrics: appMetrics,
CmdRunner: sidecarCmdExec,
Kubeconf: kubeconf,
CacheFolder: cacheFolderPkgRepoApps,
Expand Down
5 changes: 5 additions & 0 deletions config/config/agg-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,10 @@ spec:
- port: 443
protocol: TCP
targetPort: api
name: main
- port: 8080
protocol: TCP
targetPort: metrics
name: metrics
selector:
app: kapp-controller
3 changes: 3 additions & 0 deletions config/config/deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ spec:
- containerPort: #@ data.values.apiPort
name: api
protocol: TCP
- containerPort: #@ data.values.metricsPort
name: metrics
protocol: TCP
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
Expand Down
2 changes: 2 additions & 0 deletions config/values-schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ dangerousEnablePprof: false
tlsCipherSuites: ""
#@schema/desc "API port"
apiPort: 8443
#@schema/desc "Metrics port"
metricsPort: 8080
#@schema/desc "The coreDNSIP will be injected into /etc/resolv.conf of kapp-controller pod"
coreDNSIP: ""
#@schema/desc "HostNetwork of kapp-controller deployment."
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ require (
github.com/cppforlife/go-cli-ui v0.0.0-20220425131040-94f26b16bc14
github.com/go-logr/logr v1.2.4
github.com/k14s/semver/v4 v4.0.1-0.20210701191048-266d47ac6115
github.com/prometheus/client_model v0.4.0
github.com/spf13/cobra v1.6.1
golang.org/x/sync v0.3.0
gopkg.in/yaml.v2 v2.4.0
Expand Down Expand Up @@ -86,7 +87,6 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/common v0.42.0 // indirect
github.com/prometheus/procfs v0.9.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
Expand Down
7 changes: 5 additions & 2 deletions pkg/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ type App struct {

log logr.Logger
opts Opts
appMetrics *metrics.AppMetrics
appMetrics *metrics.Metrics

pendingStatusUpdate bool
flushAllStatusUpdates bool
Expand All @@ -66,7 +66,7 @@ type App struct {

func NewApp(app v1alpha1.App, hooks Hooks,
fetchFactory fetch.Factory, templateFactory template.Factory,
deployFactory deploy.Factory, log logr.Logger, opts Opts, appMetrics *metrics.AppMetrics, compInfo ComponentInfo) *App {
deployFactory deploy.Factory, log logr.Logger, opts Opts, appMetrics *metrics.Metrics, compInfo ComponentInfo) *App {

return &App{app: app, appPrev: *(app.DeepCopy()), hooks: hooks,
fetchFactory: fetchFactory, templateFactory: templateFactory,
Expand All @@ -76,6 +76,9 @@ func NewApp(app v1alpha1.App, hooks Hooks,
func (a *App) Name() string { return a.app.Name }
func (a *App) Namespace() string { return a.app.Namespace }

// Kind return kind of App
func (a *App) Kind() string { return "App" }

func (a *App) Status() v1alpha1.AppStatus { return a.app.Status }

func (a *App) StatusAsYAMLBytes() ([]byte, error) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/app/app_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ type CRDAppFactory struct {
CoreClient kubernetes.Interface
AppClient kcclient.Interface
KcConfig *config.Config
AppMetrics *metrics.AppMetrics
AppMetrics *metrics.Metrics
VendirConfigHook func(vendirconf.Config) vendirconf.Config
KbldAllowBuild bool
CmdRunner exec.CmdRunner
Expand Down
32 changes: 25 additions & 7 deletions pkg/app/app_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func (a *App) Reconcile(force bool) (reconcile.Result, error) {

var err error

a.appMetrics.InitMetrics(a.Name(), a.Namespace())
a.appMetrics.ReconcileCountMetrics.InitMetrics(a.Kind(), a.Name(), a.Namespace())

timerOpts := ReconcileTimerOpts{
DefaultSyncPeriod: a.opts.DefaultSyncPeriod,
Expand Down Expand Up @@ -103,6 +103,13 @@ func (a *App) reconcileDeploy() error {
}

func (a *App) reconcileFetchTemplateDeploy() exec.CmdRunResult {
reconcileStartTime := time.Now()
a.appMetrics.IsFirstReconcile = a.appMetrics.ReconcileCountMetrics.GetReconcileAttemptCounterValue(a.Kind(), a.Name(), a.Namespace()) == 1
defer func() {
a.appMetrics.ReconcileTimeMetrics.RegisterOverallTime(a.Kind(), a.Name(), a.Namespace(), a.appMetrics.IsFirstReconcile,
time.Since(reconcileStartTime))
}()

tmpDir := memdir.NewTmpDir("fetch-template-deploy")

err := tmpDir.Create()
Expand All @@ -129,6 +136,9 @@ func (a *App) reconcileFetchTemplateDeploy() exec.CmdRunResult {
UpdatedAt: metav1.NewTime(time.Now().UTC()),
}

a.appMetrics.ReconcileTimeMetrics.RegisterFetchTime(a.Kind(), a.Name(), a.Namespace(), a.appMetrics.IsFirstReconcile,
a.app.Status.Fetch.UpdatedAt.Sub(a.app.Status.Fetch.StartedAt.Time))

err := a.updateStatus("marking fetch completed")
if err != nil {
return exec.NewCmdRunResultWithErr(err)
Expand All @@ -139,6 +149,8 @@ func (a *App) reconcileFetchTemplateDeploy() exec.CmdRunResult {
}
}

templateStartTime := time.Now()

tplResult := a.template(assetsPath)

a.app.Status.Template = &v1alpha1.AppStatusTemplate{
Expand All @@ -148,6 +160,9 @@ func (a *App) reconcileFetchTemplateDeploy() exec.CmdRunResult {
UpdatedAt: metav1.NewTime(time.Now().UTC()),
}

a.appMetrics.ReconcileTimeMetrics.RegisterTemplateTime(a.Kind(), a.Name(), a.Namespace(), a.appMetrics.IsFirstReconcile,
a.app.Status.Template.UpdatedAt.Sub(templateStartTime))

err = a.updateStatus("marking template completed")
if err != nil {
return exec.NewCmdRunResultWithErr(err)
Expand Down Expand Up @@ -202,6 +217,9 @@ func (a *App) updateLastDeploy(result exec.CmdRunResult) exec.CmdRunResult {
},
}

a.appMetrics.ReconcileTimeMetrics.RegisterDeployTime(a.Kind(), a.Name(), a.Namespace(), a.appMetrics.IsFirstReconcile,
a.Status().Deploy.UpdatedAt.Sub(a.Status().Deploy.StartedAt.Time))

return result
}

Expand Down Expand Up @@ -253,7 +271,7 @@ func (a *App) setReconciling() {
Status: corev1.ConditionTrue,
})

a.appMetrics.RegisterReconcileAttempt(a.app.Name, a.app.Namespace)
a.appMetrics.ReconcileCountMetrics.RegisterReconcileAttempt(a.Kind(), a.Name(), a.Namespace())
a.app.Status.FriendlyDescription = "Reconciling"
}

Expand All @@ -269,7 +287,7 @@ func (a *App) setReconcileCompleted(result exec.CmdRunResult) {
a.app.Status.ConsecutiveReconcileFailures++
a.app.Status.ConsecutiveReconcileSuccesses = 0
a.app.Status.FriendlyDescription = fmt.Sprintf("Reconcile failed: %s", result.ErrorStr())
a.appMetrics.RegisterReconcileFailure(a.app.Name, a.app.Namespace)
a.appMetrics.ReconcileCountMetrics.RegisterReconcileFailure(a.Kind(), a.Name(), a.Namespace())
a.setUsefulErrorMessage(result)
} else {
a.app.Status.Conditions = append(a.app.Status.Conditions, v1alpha1.Condition{
Expand All @@ -280,7 +298,7 @@ func (a *App) setReconcileCompleted(result exec.CmdRunResult) {
a.app.Status.ConsecutiveReconcileSuccesses++
a.app.Status.ConsecutiveReconcileFailures = 0
a.app.Status.FriendlyDescription = "Reconcile succeeded"
a.appMetrics.RegisterReconcileSuccess(a.app.Name, a.app.Namespace)
a.appMetrics.ReconcileCountMetrics.RegisterReconcileSuccess(a.Kind(), a.Name(), a.Namespace())
a.app.Status.UsefulErrorMessage = ""
}
}
Expand All @@ -293,7 +311,7 @@ func (a *App) setDeleting() {
Status: corev1.ConditionTrue,
})

a.appMetrics.RegisterReconcileDeleteAttempt(a.app.Name, a.app.Namespace)
a.appMetrics.ReconcileCountMetrics.RegisterReconcileDeleteAttempt(a.Kind(), a.Name(), a.Namespace())
a.app.Status.FriendlyDescription = "Deleting"
}

Expand All @@ -309,10 +327,10 @@ func (a *App) setDeleteCompleted(result exec.CmdRunResult) {
a.app.Status.ConsecutiveReconcileFailures++
a.app.Status.ConsecutiveReconcileSuccesses = 0
a.app.Status.FriendlyDescription = fmt.Sprintf("Delete failed: %s", result.ErrorStr())
a.appMetrics.RegisterReconcileDeleteFailed(a.app.Name, a.app.Namespace)
a.appMetrics.ReconcileCountMetrics.RegisterReconcileDeleteFailed(a.Kind(), a.Name(), a.Namespace())
a.setUsefulErrorMessage(result)
} else {
a.appMetrics.DeleteMetrics(a.app.Name, a.app.Namespace)
a.appMetrics.ReconcileCountMetrics.DeleteMetrics(a.Kind(), a.Name(), a.Namespace())
}
}

Expand Down
9 changes: 3 additions & 6 deletions pkg/app/app_reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (

func Test_NoInspectReconcile_IfNoDeployAttempted(t *testing.T) {
log := logf.Log.WithName("kc")
var appMetrics = metrics.NewAppMetrics()

// The url under fetch is invalid, which will cause this
// app to fail before deploy.
Expand All @@ -52,7 +51,7 @@ func Test_NoInspectReconcile_IfNoDeployAttempted(t *testing.T) {
tmpFac := template.NewFactory(k8scs, fetchFac, false, exec.NewPlainCmdRunner())
deployFac := deploy.NewFactory(k8scs, kubeconfig.NewKubeconfig(k8scs, log), nil, exec.NewPlainCmdRunner(), log)

crdApp := NewCRDApp(&app, log, appMetrics, kappcs, fetchFac, tmpFac, deployFac, FakeComponentInfo{}, Opts{MinimumSyncPeriod: 30 * time.Second})
crdApp := NewCRDApp(&app, log, metrics.NewMetrics(), kappcs, fetchFac, tmpFac, deployFac, FakeComponentInfo{}, Opts{MinimumSyncPeriod: 30 * time.Second})
_, err := crdApp.Reconcile(false)
assert.Nil(t, err, "unexpected error with reconciling", err)

Expand Down Expand Up @@ -86,7 +85,6 @@ func Test_NoInspectReconcile_IfNoDeployAttempted(t *testing.T) {

func Test_NoInspectReconcile_IfInspectNotEnabled(t *testing.T) {
log := logf.Log.WithName("kc")
var appMetrics = metrics.NewAppMetrics()

app := v1alpha1.App{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -119,7 +117,7 @@ func Test_NoInspectReconcile_IfInspectNotEnabled(t *testing.T) {
tmpFac := template.NewFactory(k8scs, fetchFac, false, exec.NewPlainCmdRunner())
deployFac := deploy.NewFactory(k8scs, kubeconfig.NewKubeconfig(k8scs, log), nil, exec.NewPlainCmdRunner(), log)

crdApp := NewCRDApp(&app, log, appMetrics, kappcs, fetchFac, tmpFac, deployFac, FakeComponentInfo{}, Opts{MinimumSyncPeriod: 30 * time.Second})
crdApp := NewCRDApp(&app, log, metrics.NewMetrics(), kappcs, fetchFac, tmpFac, deployFac, FakeComponentInfo{}, Opts{MinimumSyncPeriod: 30 * time.Second})
_, err := crdApp.Reconcile(false)
assert.Nil(t, err, "unexpected error with reconciling", err)

Expand Down Expand Up @@ -164,7 +162,6 @@ func Test_NoInspectReconcile_IfInspectNotEnabled(t *testing.T) {

func Test_TemplateError_DisplayedInStatus_UsefulErrorMessageProperty(t *testing.T) {
log := logf.Log.WithName("kc")
var appMetrics = metrics.NewAppMetrics()

fetchInline := map[string]string{
"file.yml": `foo: #@ data.values.nothere`,
Expand All @@ -191,7 +188,7 @@ func Test_TemplateError_DisplayedInStatus_UsefulErrorMessageProperty(t *testing.
tmpFac := template.NewFactory(k8scs, fetchFac, false, exec.NewPlainCmdRunner())
deployFac := deploy.NewFactory(k8scs, kubeconfig.NewKubeconfig(k8scs, log), nil, exec.NewPlainCmdRunner(), log)

crdApp := NewCRDApp(&app, log, appMetrics, kappcs, fetchFac, tmpFac, deployFac, FakeComponentInfo{}, Opts{MinimumSyncPeriod: 30 * time.Second})
crdApp := NewCRDApp(&app, log, metrics.NewMetrics(), kappcs, fetchFac, tmpFac, deployFac, FakeComponentInfo{}, Opts{MinimumSyncPeriod: 30 * time.Second})
_, err := crdApp.Reconcile(false)
assert.Nil(t, err, "Unexpected error with reconciling", err)

Expand Down
2 changes: 1 addition & 1 deletion pkg/app/app_template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func Test_BuildAdditionalDownwardAPIValues_MemoizedCallCount(t *testing.T) {
K8sAPIsCount: &k8sAPIsCallCount,
KCVersionCount: &kcVersionCallCount,
}
app := NewApp(appEmpty, Hooks{}, fetchFac, tmpFac, deployFac, log, Opts{}, metrics.NewAppMetrics(), fakeInfo)
app := NewApp(appEmpty, Hooks{}, fetchFac, tmpFac, deployFac, log, Opts{}, metrics.NewMetrics(), fakeInfo)

dir, err := os.MkdirTemp("", "temp")
assert.NoError(t, err)
Expand Down
9 changes: 5 additions & 4 deletions pkg/app/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/vmware-tanzu/carvel-kapp-controller/pkg/exec"
"github.com/vmware-tanzu/carvel-kapp-controller/pkg/fetch"
"github.com/vmware-tanzu/carvel-kapp-controller/pkg/kubeconfig"
"github.com/vmware-tanzu/carvel-kapp-controller/pkg/metrics"
"github.com/vmware-tanzu/carvel-kapp-controller/pkg/reftracker"
"github.com/vmware-tanzu/carvel-kapp-controller/pkg/template"
v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -64,7 +65,7 @@ func Test_SecretRefs_RetrievesAllSecretRefs(t *testing.T) {
tmpFac := template.NewFactory(k8scs, fetchFac, false, exec.NewPlainCmdRunner())
deployFac := deploy.NewFactory(k8scs, kubeconfig.NewKubeconfig(k8scs, log), nil, exec.NewPlainCmdRunner(), log)

app := apppkg.NewApp(appWithRefs, apppkg.Hooks{}, fetchFac, tmpFac, deployFac, log, apppkg.Opts{}, nil, FakeComponentInfo{})
app := apppkg.NewApp(appWithRefs, apppkg.Hooks{}, fetchFac, tmpFac, deployFac, log, apppkg.Opts{}, metrics.NewMetrics(), FakeComponentInfo{})

out := app.SecretRefs()
assert.Truef(t, reflect.DeepEqual(out, expected), "Expected: %s\nGot: %s\n", expected, out)
Expand All @@ -88,7 +89,7 @@ func Test_SecretRefs_RetrievesNoSecretRefs_WhenNonePresent(t *testing.T) {
tmpFac := template.NewFactory(k8scs, fetchFac, false, exec.NewPlainCmdRunner())
deployFac := deploy.NewFactory(k8scs, kubeconfig.NewKubeconfig(k8scs, log), nil, exec.NewPlainCmdRunner(), log)

app := apppkg.NewApp(appEmpty, apppkg.Hooks{}, fetchFac, tmpFac, deployFac, log, apppkg.Opts{}, nil, FakeComponentInfo{})
app := apppkg.NewApp(appEmpty, apppkg.Hooks{}, fetchFac, tmpFac, deployFac, log, apppkg.Opts{}, metrics.NewMetrics(), FakeComponentInfo{})

out := app.SecretRefs()
assert.Equal(t, 0, len(out), "No SecretRefs to be returned")
Expand Down Expand Up @@ -126,7 +127,7 @@ func Test_ConfigMapRefs_RetrievesAllConfigMapRefs(t *testing.T) {
tmpFac := template.NewFactory(k8scs, fetchFac, false, exec.NewPlainCmdRunner())
deployFac := deploy.NewFactory(k8scs, kubeconfig.NewKubeconfig(k8scs, log), nil, exec.NewPlainCmdRunner(), log)

app := apppkg.NewApp(appWithRefs, apppkg.Hooks{}, fetchFac, tmpFac, deployFac, log, apppkg.Opts{}, nil, FakeComponentInfo{})
app := apppkg.NewApp(appWithRefs, apppkg.Hooks{}, fetchFac, tmpFac, deployFac, log, apppkg.Opts{}, metrics.NewMetrics(), FakeComponentInfo{})

out := app.ConfigMapRefs()
assert.Truef(t, reflect.DeepEqual(out, expected), "Expected: %s\nGot: %s\n", expected, out)
Expand All @@ -150,7 +151,7 @@ func Test_ConfigMapRefs_RetrievesNoConfigMapRefs_WhenNonePresent(t *testing.T) {
tmpFac := template.NewFactory(k8scs, fetchFac, false, exec.NewPlainCmdRunner())
deployFac := deploy.NewFactory(k8scs, kubeconfig.NewKubeconfig(k8scs, log), nil, exec.NewPlainCmdRunner(), log)

app := apppkg.NewApp(appEmpty, apppkg.Hooks{}, fetchFac, tmpFac, deployFac, log, apppkg.Opts{}, nil, FakeComponentInfo{})
app := apppkg.NewApp(appEmpty, apppkg.Hooks{}, fetchFac, tmpFac, deployFac, log, apppkg.Opts{}, metrics.NewMetrics(), FakeComponentInfo{})

out := app.ConfigMapRefs()
assert.Lenf(t, out, 0, "Expected: %s\nGot: %s\n", "No ConfigMapRefs to be returned", out)
Expand Down
14 changes: 6 additions & 8 deletions pkg/app/crd_app.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,18 @@ import (
)

type CRDApp struct {
app *App
appModel *kcv1alpha1.App
log logr.Logger
appMetrics *metrics.AppMetrics
appClient kcclient.Interface
app *App
appModel *kcv1alpha1.App
log logr.Logger
appClient kcclient.Interface
}

// NewCRDApp creates new CRD app
func NewCRDApp(appModel *kcv1alpha1.App, log logr.Logger, appMetrics *metrics.AppMetrics,
appClient kcclient.Interface, fetchFactory fetch.Factory,
func NewCRDApp(appModel *kcv1alpha1.App, log logr.Logger, appMetrics *metrics.Metrics, appClient kcclient.Interface, fetchFactory fetch.Factory,
templateFactory template.Factory, deployFactory deploy.Factory,
compInfo ComponentInfo, opts Opts) *CRDApp {

crdApp := &CRDApp{appModel: appModel, log: log, appMetrics: appMetrics, appClient: appClient}
crdApp := &CRDApp{appModel: appModel, log: log, appClient: appClient}

crdApp.app = NewApp(*appModel, Hooks{
BlockDeletion: crdApp.blockDeletion,
Expand Down
Loading

0 comments on commit e82ab40

Please sign in to comment.