Skip to content

Commit

Permalink
add health checks
Browse files Browse the repository at this point in the history
Signed-off-by: Austin Abro <[email protected]>
  • Loading branch information
AustinAbro321 committed Aug 29, 2024
1 parent d7eb999 commit 730ba5d
Show file tree
Hide file tree
Showing 17 changed files with 343 additions and 15 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ require (
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
github.com/hashicorp/golang-lru/arc/v2 v2.0.5 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.5 // indirect
github.com/onsi/gomega v1.32.0 // indirect
github.com/redis/go-redis/extra/rediscmd/v9 v9.0.5 // indirect
github.com/redis/go-redis/extra/redisotel/v9 v9.0.5 // indirect
github.com/redis/go-redis/v9 v9.3.0 // indirect
Expand Down
2 changes: 1 addition & 1 deletion site/src/content/docs/commands/zarf_dev_deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ zarf dev deploy [flags]
--registry-override stringToString Specify a map of domains to override on package create when pulling images (e.g. --registry-override docker.io=dockerio-reg.enterprise.intranet) (default [])
--retries int Number of retries to perform for Zarf deploy operations like git/image pushes or Helm installs (default 3)
--skip-webhooks [alpha] Skip waiting for external webhooks to execute as each package component is deployed
--timeout duration Timeout for Helm operations such as installs and rollbacks (default 15m0s)
--timeout duration Timeout for health checks and Helm operations such as installs and rollbacks (default 15m0s)
```

### Options inherited from parent commands
Expand Down
2 changes: 1 addition & 1 deletion site/src/content/docs/commands/zarf_init.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ $ zarf init --artifact-push-password={PASSWORD} --artifact-push-username={USERNA
--set stringToString Specify deployment variables to set on the command line (KEY=value) (default [])
--skip-webhooks [alpha] Skip waiting for external webhooks to execute as each package component is deployed
--storage-class string Specify the storage class to use for the registry and git server. E.g. --storage-class=standard
--timeout duration Timeout for Helm operations such as installs and rollbacks (default 15m0s)
--timeout duration Timeout for health checks and Helm operations such as installs and rollbacks (default 15m0s)
```

### Options inherited from parent commands
Expand Down
2 changes: 1 addition & 1 deletion site/src/content/docs/commands/zarf_package_deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ zarf package deploy [ PACKAGE_SOURCE ] [flags]
--set stringToString Specify deployment variables to set on the command line (KEY=value) (default [])
--shasum string Shasum of the package to deploy. Required if deploying a remote package and "--insecure" is not provided
--skip-webhooks [alpha] Skip waiting for external webhooks to execute as each package component is deployed
--timeout duration Timeout for Helm operations such as installs and rollbacks (default 15m0s)
--timeout duration Timeout for health checks and Helm operations such as installs and rollbacks (default 15m0s)
```

### Options inherited from parent commands
Expand Down
18 changes: 18 additions & 0 deletions site/src/content/docs/ref/components.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,24 @@ When merging components together Zarf will adopt the following strategies depend

<ExampleYAML src={import("../../../../../examples/big-bang/zarf.yaml?raw")} component="bigbang" />

### Health Checks

<Properties item="ZarfComponent" include={["healthChecks"]} />

Health checks wait until the specified resources are fully reconciled, meaning that their desired and current states match. Internally, [kstatus](https://github.com/kubernetes-sigs/cli-utils/blob/master/pkg/kstatus/README.md#kstatus) is used to assess when reconciliation is complete. Health checks supports all Kubernetes resources that implement the [status](https://kubernetes.io/docs/concepts/overview/working-with-objects/#object-spec-and-status) field, including custom resource definitions. If the status field is not implemented on a resource, it will automatically pass the health check.

```yaml
healthChecks:
- name: my-pod
namespace: my-namespace
apiVersion: v1
kind: Pod
- name: my-stateful-set
namespace: my-namespace
apiVersion: apps/v1
kind: StatefulSet
```
## Deploying Components
When deploying a Zarf package, components are deployed in the order they are defined in the `zarf.yaml`.
Expand Down
18 changes: 17 additions & 1 deletion src/api/v1alpha1/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,21 @@ type ZarfComponent struct {

// Custom commands to run at various stages of a package lifecycle.
Actions ZarfComponentActions `json:"actions,omitempty"`

// List of resources to health check after deployment
HealthChecks []NamespacedObjectKindReference `json:"healthChecks,omitempty"`
}

// NamespacedObjectKindReference is a reference to a specific resource in a namespace using its kind and API version.
type NamespacedObjectKindReference struct {
// API Version of the resource
APIVersion string `json:"apiVersion"`
// Kind of the resource
Kind string `json:"kind"`
// Namespace of the resource
Namespace string `json:"namespace"`
// Name of the resource
Name string `json:"name"`
}

// RequiresCluster returns if the component requires a cluster connection to deploy.
Expand All @@ -70,8 +85,9 @@ func (c ZarfComponent) RequiresCluster() bool {
hasManifests := len(c.Manifests) > 0
hasRepos := len(c.Repos) > 0
hasDataInjections := len(c.DataInjections) > 0
hasHealthChecks := len(c.HealthChecks) > 0

if hasImages || hasCharts || hasManifests || hasRepos || hasDataInjections {
if hasImages || hasCharts || hasManifests || hasRepos || hasDataInjections || hasHealthChecks {
return true
}

Expand Down
15 changes: 15 additions & 0 deletions src/api/v1beta1/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,21 @@ type ZarfComponent struct {

// Custom commands to run at various stages of a package lifecycle.
Actions ZarfComponentActions `json:"actions,omitempty"`

// List of resources to health check after deployment
HealthChecks []NamespacedObjectKindReference `json:"healthChecks,omitempty"`
}

// NamespacedObjectKindReference is a reference to a specific resource in a namespace using its kind and API version.
type NamespacedObjectKindReference struct {
// API Version of the resource
APIVersion string `json:"apiVersion"`
// Kind of the resource
Kind string `json:"kind"`
// Namespace of the resource
Namespace string `json:"namespace"`
// Name of the resource
Name string `json:"name"`
}

// RequiresCluster returns if the component requires a cluster connection to deploy.
Expand Down
2 changes: 1 addition & 1 deletion src/config/lang/english.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ $ zarf package mirror-resources <your-package.tar.zst> \
CmdPackageDeployFlagShasum = "Shasum of the package to deploy. Required if deploying a remote package and \"--insecure\" is not provided"
CmdPackageDeployFlagSget = "[Deprecated] Path to public sget key file for remote packages signed via cosign. This flag will be removed in v1.0.0 please use the --key flag instead."
CmdPackageDeployFlagSkipWebhooks = "[alpha] Skip waiting for external webhooks to execute as each package component is deployed"
CmdPackageDeployFlagTimeout = "Timeout for Helm operations such as installs and rollbacks"
CmdPackageDeployFlagTimeout = "Timeout for health checks and Helm operations such as installs and rollbacks"
CmdPackageDeployValidateArchitectureErr = "this package architecture is %s, but the target cluster only has the %s architecture(s). These architectures must be compatible when \"images\" are present"
CmdPackageDeployValidateLastNonBreakingVersionWarn = "The version of this Zarf binary '%s' is less than the LastNonBreakingVersion of '%s'. You may need to upgrade your Zarf version to at least '%s' to deploy this package"
CmdPackageDeployInvalidCLIVersionWarn = "CLIVersion is set to '%s' which can cause issues with package creation and deployment. To avoid such issues, please set the value to the valid semantic version for this version of Zarf."
Expand Down
1 change: 1 addition & 0 deletions src/pkg/packager/composer/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ func (ic *ImportChain) Compose(ctx context.Context) (composed *v1alpha1.ZarfComp
overrideDeprecated(composed, node.ZarfComponent)
overrideResources(composed, node.ZarfComponent)
overrideActions(composed, node.ZarfComponent)
composed.HealthChecks = append(composed.HealthChecks, node.ZarfComponent.HealthChecks...)

bigbang.Compose(composed, node.ZarfComponent, node.relativeToHead)

Expand Down
44 changes: 44 additions & 0 deletions src/pkg/packager/composer/list_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,50 @@ func TestCompose(t *testing.T) {
Name: "no-import",
},
},
{
name: "Health Checks",
ic: createChainFromSlice(t, []v1alpha1.ZarfComponent{
{
Name: "base",
HealthChecks: []v1alpha1.NamespacedObjectKindReference{
{
APIVersion: "v1",
Kind: "Pods",
Namespace: "base-ns",
Name: "base-pod",
},
},
},
{
Name: "import-one",
HealthChecks: []v1alpha1.NamespacedObjectKindReference{
{
APIVersion: "v1",
Kind: "Pods",
Namespace: "import-ns",
Name: "import-pod",
},
},
},
}),
expectedComposed: v1alpha1.ZarfComponent{
Name: "base",
HealthChecks: []v1alpha1.NamespacedObjectKindReference{
{
APIVersion: "v1",
Kind: "Pods",
Namespace: "import-ns",
Name: "import-pod",
},
{
APIVersion: "v1",
Kind: "Pods",
Namespace: "base-ns",
Name: "base-pod",
},
},
},
},
{
name: "Multiple Components",
ic: createChainFromSlice(t, []v1alpha1.ZarfComponent{
Expand Down
57 changes: 48 additions & 9 deletions src/pkg/packager/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,15 @@ import (
"golang.org/x/sync/errgroup"

"github.com/avast/retry-go/v4"
"github.com/defenseunicorns/pkg/helpers/v2"
pkgkubernetes "github.com/defenseunicorns/pkg/kubernetes"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/cli-utils/pkg/kstatus/watcher"
"sigs.k8s.io/cli-utils/pkg/object"

"github.com/defenseunicorns/pkg/helpers/v2"
"github.com/zarf-dev/zarf/src/api/v1alpha1"
"github.com/zarf-dev/zarf/src/config"
"github.com/zarf-dev/zarf/src/internal/git"
Expand Down Expand Up @@ -233,6 +237,30 @@ func (p *Packager) deployComponents(ctx context.Context) (deployedComponents []t
return deployedComponents, nil
}

func runHealthChecks(ctx context.Context, watcher watcher.StatusWatcher, healthChecks []v1alpha1.NamespacedObjectKindReference) error {
objs := []object.ObjMetadata{}
for _, hc := range healthChecks {
gv, err := schema.ParseGroupVersion(hc.APIVersion)
if err != nil {
return err
}
obj := object.ObjMetadata{
GroupKind: schema.GroupKind{
Group: gv.Group,
Kind: hc.Kind,
},
Namespace: hc.Namespace,
Name: hc.Name,
}
objs = append(objs, obj)
}
err := pkgkubernetes.WaitForReady(ctx, watcher, objs)
if err != nil {
return err
}
return nil
}

func (p *Packager) deployInitComponent(ctx context.Context, component v1alpha1.ZarfComponent) (charts []types.InstalledChart, err error) {
hasExternalRegistry := p.cfg.InitOpts.RegistryInfo.Address != ""
isSeedRegistry := component.Name == "zarf-seed-registry"
Expand Down Expand Up @@ -307,7 +335,7 @@ func (p *Packager) deployComponent(ctx context.Context, component v1alpha1.ZarfC
if p.state == nil {
err = p.setupState(ctx)
if err != nil {
return charts, err
return nil, err
}
}

Expand All @@ -323,28 +351,28 @@ func (p *Packager) deployComponent(ctx context.Context, component v1alpha1.ZarfC

err = p.populateComponentAndStateTemplates(component.Name)
if err != nil {
return charts, err
return nil, err
}

if err = actions.Run(ctx, onDeploy.Defaults, onDeploy.Before, p.variableConfig); err != nil {
return charts, fmt.Errorf("unable to run component before action: %w", err)
return nil, fmt.Errorf("unable to run component before action: %w", err)
}

if hasFiles {
if err := p.processComponentFiles(component, componentPath.Files); err != nil {
return charts, fmt.Errorf("unable to process the component files: %w", err)
return nil, fmt.Errorf("unable to process the component files: %w", err)
}
}

if hasImages {
if err := p.pushImagesToRegistry(ctx, component.Images, noImgChecksum); err != nil {
return charts, fmt.Errorf("unable to push images to the registry: %w", err)
return nil, fmt.Errorf("unable to push images to the registry: %w", err)
}
}

if hasRepos {
if err = p.pushReposToRepository(ctx, componentPath.Repos, component.Repos); err != nil {
return charts, fmt.Errorf("unable to push the repos to the repository: %w", err)
return nil, fmt.Errorf("unable to push the repos to the repository: %w", err)
}
}

Expand All @@ -357,12 +385,23 @@ func (p *Packager) deployComponent(ctx context.Context, component v1alpha1.ZarfC

if hasCharts || hasManifests {
if charts, err = p.installChartAndManifests(ctx, componentPath, component); err != nil {
return charts, err
return nil, err
}
}

if err = actions.Run(ctx, onDeploy.Defaults, onDeploy.After, p.variableConfig); err != nil {
return charts, fmt.Errorf("unable to run component after action: %w", err)
return nil, fmt.Errorf("unable to run component after action: %w", err)
}

if len(component.HealthChecks) > 0 {
healthCheckContext, cancel := context.WithTimeout(ctx, p.cfg.DeployOpts.Timeout)
defer cancel()
spinner := message.NewProgressSpinner("Running Health checks for %s", component.Name)
defer spinner.Stop()
if err = runHealthChecks(healthCheckContext, p.cluster.Watcher, component.HealthChecks); err != nil {
return nil, fmt.Errorf("health checks failed: %w", err)
}
spinner.Success()
}

err = g.Wait()
Expand Down
Loading

0 comments on commit 730ba5d

Please sign in to comment.