Skip to content

Commit

Permalink
implement health checks
Browse files Browse the repository at this point in the history
Signed-off-by: Austin Abro <[email protected]>
  • Loading branch information
AustinAbro321 committed Sep 3, 2024
1 parent e239857 commit a1739d3
Show file tree
Hide file tree
Showing 17 changed files with 336 additions and 7 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ require (
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
github.com/hashicorp/golang-lru/arc/v2 v2.0.5 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.5 // indirect
github.com/onsi/gomega v1.32.0 // indirect
github.com/redis/go-redis/extra/rediscmd/v9 v9.0.5 // indirect
github.com/redis/go-redis/extra/redisotel/v9 v9.0.5 // indirect
github.com/redis/go-redis/v9 v9.3.0 // indirect
Expand Down
2 changes: 1 addition & 1 deletion site/src/content/docs/commands/zarf_dev_deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ zarf dev deploy [flags]
--registry-override stringToString Specify a map of domains to override on package create when pulling images (e.g. --registry-override docker.io=dockerio-reg.enterprise.intranet) (default [])
--retries int Number of retries to perform for Zarf deploy operations like git/image pushes or Helm installs (default 3)
--skip-webhooks [alpha] Skip waiting for external webhooks to execute as each package component is deployed
--timeout duration Timeout for Helm operations such as installs and rollbacks (default 15m0s)
--timeout duration Timeout for health checks and Helm operations such as installs and rollbacks (default 15m0s)
```

### Options inherited from parent commands
Expand Down
2 changes: 1 addition & 1 deletion site/src/content/docs/commands/zarf_init.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ $ zarf init --artifact-push-password={PASSWORD} --artifact-push-username={USERNA
--set stringToString Specify deployment variables to set on the command line (KEY=value) (default [])
--skip-webhooks [alpha] Skip waiting for external webhooks to execute as each package component is deployed
--storage-class string Specify the storage class to use for the registry and git server. E.g. --storage-class=standard
--timeout duration Timeout for Helm operations such as installs and rollbacks (default 15m0s)
--timeout duration Timeout for health checks and Helm operations such as installs and rollbacks (default 15m0s)
```

### Options inherited from parent commands
Expand Down
2 changes: 1 addition & 1 deletion site/src/content/docs/commands/zarf_package_deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ zarf package deploy [ PACKAGE_SOURCE ] [flags]
--set stringToString Specify deployment variables to set on the command line (KEY=value) (default [])
--shasum string Shasum of the package to deploy. Required if deploying a remote package and "--insecure" is not provided
--skip-webhooks [alpha] Skip waiting for external webhooks to execute as each package component is deployed
--timeout duration Timeout for Helm operations such as installs and rollbacks (default 15m0s)
--timeout duration Timeout for health checks and Helm operations such as installs and rollbacks (default 15m0s)
```

### Options inherited from parent commands
Expand Down
18 changes: 18 additions & 0 deletions site/src/content/docs/ref/components.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,24 @@ When merging components together Zarf will adopt the following strategies depend

<ExampleYAML src={import("../../../../../examples/big-bang/zarf.yaml?raw")} component="bigbang" />

### Health Checks

<Properties item="ZarfComponent" include={["healthChecks"]} />

Health checks wait until the specified resources are fully reconciled, meaning that their desired and current states match. Internally, [kstatus](https://github.com/kubernetes-sigs/cli-utils/blob/master/pkg/kstatus/README.md#kstatus) is used to assess when reconciliation is complete. Health checks supports all Kubernetes resources that implement the [status](https://kubernetes.io/docs/concepts/overview/working-with-objects/#object-spec-and-status) field, including custom resource definitions. If the status field is not implemented on a resource, it will automatically pass the health check.

```yaml
healthChecks:
- name: my-pod
namespace: my-namespace
apiVersion: v1
kind: Pod
- name: my-stateful-set
namespace: my-namespace
apiVersion: apps/v1
kind: StatefulSet
```
## Deploying Components
When deploying a Zarf package, components are deployed in the order they are defined in the `zarf.yaml`.
Expand Down
18 changes: 17 additions & 1 deletion src/api/v1alpha1/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,21 @@ type ZarfComponent struct {

// Custom commands to run at various stages of a package lifecycle.
Actions ZarfComponentActions `json:"actions,omitempty"`

// List of resources to health check after deployment
HealthChecks []NamespacedObjectKindReference `json:"healthChecks,omitempty"`
}

// NamespacedObjectKindReference is a reference to a specific resource in a namespace using its kind and API version.
type NamespacedObjectKindReference struct {
// API Version of the resource
APIVersion string `json:"apiVersion"`
// Kind of the resource
Kind string `json:"kind"`
// Namespace of the resource
Namespace string `json:"namespace"`
// Name of the resource
Name string `json:"name"`
}

// RequiresCluster returns if the component requires a cluster connection to deploy.
Expand All @@ -70,8 +85,9 @@ func (c ZarfComponent) RequiresCluster() bool {
hasManifests := len(c.Manifests) > 0
hasRepos := len(c.Repos) > 0
hasDataInjections := len(c.DataInjections) > 0
hasHealthChecks := len(c.HealthChecks) > 0

if hasImages || hasCharts || hasManifests || hasRepos || hasDataInjections {
if hasImages || hasCharts || hasManifests || hasRepos || hasDataInjections || hasHealthChecks {
return true
}

Expand Down
15 changes: 15 additions & 0 deletions src/api/v1beta1/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,21 @@ type ZarfComponent struct {

// Custom commands to run at various stages of a package lifecycle.
Actions ZarfComponentActions `json:"actions,omitempty"`

// List of resources to health check after deployment
HealthChecks []NamespacedObjectKindReference `json:"healthChecks,omitempty"`
}

// NamespacedObjectKindReference is a reference to a specific resource in a namespace using its kind and API version.
type NamespacedObjectKindReference struct {
// API Version of the resource
APIVersion string `json:"apiVersion"`
// Kind of the resource
Kind string `json:"kind"`
// Namespace of the resource
Namespace string `json:"namespace"`
// Name of the resource
Name string `json:"name"`
}

// RequiresCluster returns if the component requires a cluster connection to deploy.
Expand Down
2 changes: 1 addition & 1 deletion src/config/lang/english.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ $ zarf package mirror-resources <your-package.tar.zst> \
CmdPackageDeployFlagShasum = "Shasum of the package to deploy. Required if deploying a remote package and \"--insecure\" is not provided"
CmdPackageDeployFlagSget = "[Deprecated] Path to public sget key file for remote packages signed via cosign. This flag will be removed in v1.0.0 please use the --key flag instead."
CmdPackageDeployFlagSkipWebhooks = "[alpha] Skip waiting for external webhooks to execute as each package component is deployed"
CmdPackageDeployFlagTimeout = "Timeout for Helm operations such as installs and rollbacks"
CmdPackageDeployFlagTimeout = "Timeout for health checks and Helm operations such as installs and rollbacks"
CmdPackageDeployValidateArchitectureErr = "this package architecture is %s, but the target cluster only has the %s architecture(s). These architectures must be compatible when \"images\" are present"
CmdPackageDeployValidateLastNonBreakingVersionWarn = "The version of this Zarf binary '%s' is less than the LastNonBreakingVersion of '%s'. You may need to upgrade your Zarf version to at least '%s' to deploy this package"
CmdPackageDeployInvalidCLIVersionWarn = "CLIVersion is set to '%s' which can cause issues with package creation and deployment. To avoid such issues, please set the value to the valid semantic version for this version of Zarf."
Expand Down
1 change: 1 addition & 0 deletions src/pkg/packager/composer/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ func (ic *ImportChain) Compose(ctx context.Context) (composed *v1alpha1.ZarfComp
overrideDeprecated(composed, node.ZarfComponent)
overrideResources(composed, node.ZarfComponent)
overrideActions(composed, node.ZarfComponent)
composed.HealthChecks = append(composed.HealthChecks, node.ZarfComponent.HealthChecks...)

bigbang.Compose(composed, node.ZarfComponent, node.relativeToHead)

Expand Down
44 changes: 44 additions & 0 deletions src/pkg/packager/composer/list_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,50 @@ func TestCompose(t *testing.T) {
Name: "no-import",
},
},
{
name: "Health Checks",
ic: createChainFromSlice(t, []v1alpha1.ZarfComponent{
{
Name: "base",
HealthChecks: []v1alpha1.NamespacedObjectKindReference{
{
APIVersion: "v1",
Kind: "Pods",
Namespace: "base-ns",
Name: "base-pod",
},
},
},
{
Name: "import-one",
HealthChecks: []v1alpha1.NamespacedObjectKindReference{
{
APIVersion: "v1",
Kind: "Pods",
Namespace: "import-ns",
Name: "import-pod",
},
},
},
}),
expectedComposed: v1alpha1.ZarfComponent{
Name: "base",
HealthChecks: []v1alpha1.NamespacedObjectKindReference{
{
APIVersion: "v1",
Kind: "Pods",
Namespace: "import-ns",
Name: "import-pod",
},
{
APIVersion: "v1",
Kind: "Pods",
Namespace: "base-ns",
Name: "base-pod",
},
},
},
},
{
name: "Multiple Components",
ic: createChainFromSlice(t, []v1alpha1.ZarfComponent{
Expand Down
41 changes: 40 additions & 1 deletion src/pkg/packager/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,15 @@ import (
"golang.org/x/sync/errgroup"

"github.com/avast/retry-go/v4"
"github.com/defenseunicorns/pkg/helpers/v2"
pkgkubernetes "github.com/defenseunicorns/pkg/kubernetes"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/cli-utils/pkg/kstatus/watcher"
"sigs.k8s.io/cli-utils/pkg/object"

"github.com/defenseunicorns/pkg/helpers/v2"
"github.com/zarf-dev/zarf/src/api/v1alpha1"
"github.com/zarf-dev/zarf/src/config"
"github.com/zarf-dev/zarf/src/internal/git"
Expand Down Expand Up @@ -234,6 +238,30 @@ func (p *Packager) deployComponents(ctx context.Context) ([]types.DeployedCompon
return deployedComponents, nil
}

func runHealthChecks(ctx context.Context, watcher watcher.StatusWatcher, healthChecks []v1alpha1.NamespacedObjectKindReference) error {
objs := []object.ObjMetadata{}
for _, hc := range healthChecks {
gv, err := schema.ParseGroupVersion(hc.APIVersion)
if err != nil {
return err
}
obj := object.ObjMetadata{
GroupKind: schema.GroupKind{
Group: gv.Group,
Kind: hc.Kind,
},
Namespace: hc.Namespace,
Name: hc.Name,
}
objs = append(objs, obj)
}
err := pkgkubernetes.WaitForReady(ctx, watcher, objs)
if err != nil {
return err
}
return nil
}

func (p *Packager) deployInitComponent(ctx context.Context, component v1alpha1.ZarfComponent) ([]types.InstalledChart, error) {
hasExternalRegistry := p.cfg.InitOpts.RegistryInfo.Address != ""
isSeedRegistry := component.Name == "zarf-seed-registry"
Expand Down Expand Up @@ -370,6 +398,17 @@ func (p *Packager) deployComponent(ctx context.Context, component v1alpha1.ZarfC
return nil, fmt.Errorf("unable to run component after action: %w", err)
}

if len(component.HealthChecks) > 0 {
healthCheckContext, cancel := context.WithTimeout(ctx, p.cfg.DeployOpts.Timeout)
defer cancel()
spinner := message.NewProgressSpinner("Running Health checks for %s", component.Name)
defer spinner.Stop()
if err = runHealthChecks(healthCheckContext, p.cluster.Watcher, component.HealthChecks); err != nil {
return nil, fmt.Errorf("health checks failed: %w", err)
}
spinner.Success()
}

err = g.Wait()
if err != nil {
return nil, err
Expand Down
89 changes: 89 additions & 0 deletions src/pkg/packager/deploy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,22 @@
package packager

import (
"context"
"testing"
"time"

"github.com/stretchr/testify/require"
"github.com/zarf-dev/zarf/src/api/v1alpha1"
"github.com/zarf-dev/zarf/src/pkg/packager/sources"
"github.com/zarf-dev/zarf/src/types"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/yaml"
dynamicfake "k8s.io/client-go/dynamic/fake"
"k8s.io/kubectl/pkg/scheme"
"sigs.k8s.io/cli-utils/pkg/kstatus/watcher"
"sigs.k8s.io/cli-utils/pkg/testutil"
)

func TestGenerateValuesOverrides(t *testing.T) {
Expand Down Expand Up @@ -272,3 +282,82 @@ func TestServiceInfoFromServiceURL(t *testing.T) {
})
}
}

var podCurrentYaml = `
apiVersion: v1
kind: Pod
metadata:
name: good-pod
namespace: ns
status:
conditions:
- type: Ready
status: "True"
phase: Running
`

var podYaml = `
apiVersion: v1
kind: Pod
metadata:
name: in-progress-pod
namespace: ns
`

func yamlToUnstructured(t *testing.T, yml string) *unstructured.Unstructured {
t.Helper()
m := make(map[string]interface{})
err := yaml.Unmarshal([]byte(yml), &m)
require.NoError(t, err)
return &unstructured.Unstructured{Object: m}
}

func TestRunHealthChecks(t *testing.T) {
t.Parallel()
tests := []struct {
name string
podYaml string
expectErr error
}{
{
name: "Pod is running",
podYaml: podCurrentYaml,
expectErr: nil,
},
{
name: "Pod is never ready",
podYaml: podYaml,
expectErr: context.DeadlineExceeded,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
fakeClient := dynamicfake.NewSimpleDynamicClient(scheme.Scheme)
fakeMapper := testutil.NewFakeRESTMapper(
v1.SchemeGroupVersion.WithKind("Pod"),
)
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
defer cancel()
pod := yamlToUnstructured(t, tt.podYaml)
statusWatcher := watcher.NewDefaultStatusWatcher(fakeClient, fakeMapper)
podGVR := schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"}
require.NoError(t, fakeClient.Tracker().Create(podGVR, pod, pod.GetNamespace()))
objs := []v1alpha1.NamespacedObjectKindReference{
{
APIVersion: pod.GetAPIVersion(),
Kind: pod.GetKind(),
Namespace: pod.GetNamespace(),
Name: pod.GetName(),
},
}
err := runHealthChecks(ctx, statusWatcher, objs)
if tt.expectErr != nil {
require.ErrorIs(t, err, tt.expectErr)
return
}
require.NoError(t, err)
})
}
}
1 change: 0 additions & 1 deletion src/pkg/packager/prepare.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,6 @@ func (p *Packager) findImages(ctx context.Context) (map[string][]string, error)
// Break the manifest into separate resources
yamls, err := utils.SplitYAML(contents)
if err != nil {
fmt.Println("got this err")
return nil, err
}
resources = append(resources, yamls...)
Expand Down
34 changes: 34 additions & 0 deletions src/test/e2e/36_health_check_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: 2021-Present The Zarf Authors

// Package test provides e2e tests for Zarf.
package test

import (
"fmt"

"testing"

"github.com/stretchr/testify/require"
)

func TestHealthChecks(t *testing.T) {
t.Log("E2E: Health Checks")

_, _, err := e2e.Zarf(t, "package", "create", "src/test/packages/36-health-checks", "-o=build", "--confirm")
require.NoError(t, err)

path := fmt.Sprintf("build/zarf-package-health-checks-%s.tar.zst", e2e.Arch)

_, _, err = e2e.Zarf(t, "package", "deploy", path, "--confirm")
require.NoError(t, err)

defer func() {
_, _, err = e2e.Zarf(t, "package", "remove", "health-checks", "--confirm")
require.NoError(t, err)
}()

stdOut, _, err := e2e.Kubectl(t, "get", "pod", "ready-pod", "-n", "health-checks", "-o", "jsonpath={.status.phase}")
require.NoError(t, err)
require.Equal(t, "Running", stdOut)
}
Loading

0 comments on commit a1739d3

Please sign in to comment.