diff --git a/go.mod b/go.mod index f800815edc..89db4ffeeb 100644 --- a/go.mod +++ b/go.mod @@ -80,6 +80,7 @@ require ( github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect github.com/hashicorp/golang-lru/arc/v2 v2.0.5 // indirect github.com/hashicorp/golang-lru/v2 v2.0.5 // indirect + github.com/onsi/gomega v1.32.0 // indirect github.com/redis/go-redis/extra/rediscmd/v9 v9.0.5 // indirect github.com/redis/go-redis/extra/redisotel/v9 v9.0.5 // indirect github.com/redis/go-redis/v9 v9.3.0 // indirect diff --git a/site/src/content/docs/commands/zarf_dev_deploy.md b/site/src/content/docs/commands/zarf_dev_deploy.md index 29293c1d9a..41ee7f0b85 100644 --- a/site/src/content/docs/commands/zarf_dev_deploy.md +++ b/site/src/content/docs/commands/zarf_dev_deploy.md @@ -31,7 +31,7 @@ zarf dev deploy [flags] --registry-override stringToString Specify a map of domains to override on package create when pulling images (e.g. --registry-override docker.io=dockerio-reg.enterprise.intranet) (default []) --retries int Number of retries to perform for Zarf deploy operations like git/image pushes or Helm installs (default 3) --skip-webhooks [alpha] Skip waiting for external webhooks to execute as each package component is deployed - --timeout duration Timeout for Helm operations such as installs and rollbacks (default 15m0s) + --timeout duration Timeout for health checks and Helm operations such as installs and rollbacks (default 15m0s) ``` ### Options inherited from parent commands diff --git a/site/src/content/docs/commands/zarf_init.md b/site/src/content/docs/commands/zarf_init.md index 8fc88244c3..5702caa72b 100644 --- a/site/src/content/docs/commands/zarf_init.md +++ b/site/src/content/docs/commands/zarf_init.md @@ -78,7 +78,7 @@ $ zarf init --artifact-push-password={PASSWORD} --artifact-push-username={USERNA --set stringToString Specify deployment variables to set on the command line (KEY=value) (default []) --skip-webhooks [alpha] Skip waiting for external webhooks to execute as each package component is deployed --storage-class string Specify the storage class to use for the registry and git server. E.g. --storage-class=standard - --timeout duration Timeout for Helm operations such as installs and rollbacks (default 15m0s) + --timeout duration Timeout for health checks and Helm operations such as installs and rollbacks (default 15m0s) ``` ### Options inherited from parent commands diff --git a/site/src/content/docs/commands/zarf_package_deploy.md b/site/src/content/docs/commands/zarf_package_deploy.md index 07f2fa46de..d89b0f1bbc 100644 --- a/site/src/content/docs/commands/zarf_package_deploy.md +++ b/site/src/content/docs/commands/zarf_package_deploy.md @@ -30,7 +30,7 @@ zarf package deploy [ PACKAGE_SOURCE ] [flags] --set stringToString Specify deployment variables to set on the command line (KEY=value) (default []) --shasum string Shasum of the package to deploy. Required if deploying a remote package and "--insecure" is not provided --skip-webhooks [alpha] Skip waiting for external webhooks to execute as each package component is deployed - --timeout duration Timeout for Helm operations such as installs and rollbacks (default 15m0s) + --timeout duration Timeout for health checks and Helm operations such as installs and rollbacks (default 15m0s) ``` ### Options inherited from parent commands diff --git a/site/src/content/docs/ref/components.mdx b/site/src/content/docs/ref/components.mdx index 50be327f1b..419d43bc6d 100644 --- a/site/src/content/docs/ref/components.mdx +++ b/site/src/content/docs/ref/components.mdx @@ -267,6 +267,24 @@ When merging components together Zarf will adopt the following strategies depend +### Health Checks + + + +Health checks wait until the specified resources are fully reconciled, meaning that their desired and current states match. Internally, [kstatus](https://github.com/kubernetes-sigs/cli-utils/blob/master/pkg/kstatus/README.md#kstatus) is used to assess when reconciliation is complete. Health checks supports all Kubernetes resources that implement the [status](https://kubernetes.io/docs/concepts/overview/working-with-objects/#object-spec-and-status) field, including custom resource definitions. If the status field is not implemented on a resource, it will automatically pass the health check. + +```yaml + healthChecks: + - name: my-pod + namespace: my-namespace + apiVersion: v1 + kind: Pod + - name: my-stateful-set + namespace: my-namespace + apiVersion: apps/v1 + kind: StatefulSet +``` + ## Deploying Components When deploying a Zarf package, components are deployed in the order they are defined in the `zarf.yaml`. diff --git a/src/api/v1alpha1/component.go b/src/api/v1alpha1/component.go index 9739de8644..9827410b43 100644 --- a/src/api/v1alpha1/component.go +++ b/src/api/v1alpha1/component.go @@ -61,6 +61,21 @@ type ZarfComponent struct { // Custom commands to run at various stages of a package lifecycle. Actions ZarfComponentActions `json:"actions,omitempty"` + + // List of resources to health check after deployment + HealthChecks []NamespacedObjectKindReference `json:"healthChecks,omitempty"` +} + +// NamespacedObjectKindReference is a reference to a specific resource in a namespace using its kind and API version. +type NamespacedObjectKindReference struct { + // API Version of the resource + APIVersion string `json:"apiVersion"` + // Kind of the resource + Kind string `json:"kind"` + // Namespace of the resource + Namespace string `json:"namespace"` + // Name of the resource + Name string `json:"name"` } // RequiresCluster returns if the component requires a cluster connection to deploy. @@ -70,8 +85,9 @@ func (c ZarfComponent) RequiresCluster() bool { hasManifests := len(c.Manifests) > 0 hasRepos := len(c.Repos) > 0 hasDataInjections := len(c.DataInjections) > 0 + hasHealthChecks := len(c.HealthChecks) > 0 - if hasImages || hasCharts || hasManifests || hasRepos || hasDataInjections { + if hasImages || hasCharts || hasManifests || hasRepos || hasDataInjections || hasHealthChecks { return true } diff --git a/src/api/v1beta1/component.go b/src/api/v1beta1/component.go index 2d27777c54..aa8ec9bff4 100644 --- a/src/api/v1beta1/component.go +++ b/src/api/v1beta1/component.go @@ -49,6 +49,21 @@ type ZarfComponent struct { // Custom commands to run at various stages of a package lifecycle. Actions ZarfComponentActions `json:"actions,omitempty"` + + // List of resources to health check after deployment + HealthChecks []NamespacedObjectKindReference `json:"healthChecks,omitempty"` +} + +// NamespacedObjectKindReference is a reference to a specific resource in a namespace using its kind and API version. +type NamespacedObjectKindReference struct { + // API Version of the resource + APIVersion string `json:"apiVersion"` + // Kind of the resource + Kind string `json:"kind"` + // Namespace of the resource + Namespace string `json:"namespace"` + // Name of the resource + Name string `json:"name"` } // RequiresCluster returns if the component requires a cluster connection to deploy. diff --git a/src/config/lang/english.go b/src/config/lang/english.go index 50ce790c44..1afdfab83c 100644 --- a/src/config/lang/english.go +++ b/src/config/lang/english.go @@ -276,7 +276,7 @@ $ zarf package mirror-resources \ CmdPackageDeployFlagShasum = "Shasum of the package to deploy. Required if deploying a remote package and \"--insecure\" is not provided" CmdPackageDeployFlagSget = "[Deprecated] Path to public sget key file for remote packages signed via cosign. This flag will be removed in v1.0.0 please use the --key flag instead." CmdPackageDeployFlagSkipWebhooks = "[alpha] Skip waiting for external webhooks to execute as each package component is deployed" - CmdPackageDeployFlagTimeout = "Timeout for Helm operations such as installs and rollbacks" + CmdPackageDeployFlagTimeout = "Timeout for health checks and Helm operations such as installs and rollbacks" CmdPackageDeployValidateArchitectureErr = "this package architecture is %s, but the target cluster only has the %s architecture(s). These architectures must be compatible when \"images\" are present" CmdPackageDeployValidateLastNonBreakingVersionWarn = "The version of this Zarf binary '%s' is less than the LastNonBreakingVersion of '%s'. You may need to upgrade your Zarf version to at least '%s' to deploy this package" CmdPackageDeployInvalidCLIVersionWarn = "CLIVersion is set to '%s' which can cause issues with package creation and deployment. To avoid such issues, please set the value to the valid semantic version for this version of Zarf." diff --git a/src/pkg/packager/composer/list.go b/src/pkg/packager/composer/list.go index 291fdb71db..b021355739 100644 --- a/src/pkg/packager/composer/list.go +++ b/src/pkg/packager/composer/list.go @@ -337,6 +337,7 @@ func (ic *ImportChain) Compose(ctx context.Context) (composed *v1alpha1.ZarfComp overrideDeprecated(composed, node.ZarfComponent) overrideResources(composed, node.ZarfComponent) overrideActions(composed, node.ZarfComponent) + composed.HealthChecks = append(composed.HealthChecks, node.ZarfComponent.HealthChecks...) bigbang.Compose(composed, node.ZarfComponent, node.relativeToHead) diff --git a/src/pkg/packager/composer/list_test.go b/src/pkg/packager/composer/list_test.go index 1703348b3d..9cbeab2a3b 100644 --- a/src/pkg/packager/composer/list_test.go +++ b/src/pkg/packager/composer/list_test.go @@ -82,6 +82,50 @@ func TestCompose(t *testing.T) { Name: "no-import", }, }, + { + name: "Health Checks", + ic: createChainFromSlice(t, []v1alpha1.ZarfComponent{ + { + Name: "base", + HealthChecks: []v1alpha1.NamespacedObjectKindReference{ + { + APIVersion: "v1", + Kind: "Pods", + Namespace: "base-ns", + Name: "base-pod", + }, + }, + }, + { + Name: "import-one", + HealthChecks: []v1alpha1.NamespacedObjectKindReference{ + { + APIVersion: "v1", + Kind: "Pods", + Namespace: "import-ns", + Name: "import-pod", + }, + }, + }, + }), + expectedComposed: v1alpha1.ZarfComponent{ + Name: "base", + HealthChecks: []v1alpha1.NamespacedObjectKindReference{ + { + APIVersion: "v1", + Kind: "Pods", + Namespace: "import-ns", + Name: "import-pod", + }, + { + APIVersion: "v1", + Kind: "Pods", + Namespace: "base-ns", + Name: "base-pod", + }, + }, + }, + }, { name: "Multiple Components", ic: createChainFromSlice(t, []v1alpha1.ZarfComponent{ diff --git a/src/pkg/packager/deploy.go b/src/pkg/packager/deploy.go index 517bff7d60..1fb3f1bc14 100644 --- a/src/pkg/packager/deploy.go +++ b/src/pkg/packager/deploy.go @@ -19,11 +19,15 @@ import ( "golang.org/x/sync/errgroup" "github.com/avast/retry-go/v4" - "github.com/defenseunicorns/pkg/helpers/v2" + pkgkubernetes "github.com/defenseunicorns/pkg/kubernetes" corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/cli-utils/pkg/kstatus/watcher" + "sigs.k8s.io/cli-utils/pkg/object" + "github.com/defenseunicorns/pkg/helpers/v2" "github.com/zarf-dev/zarf/src/api/v1alpha1" "github.com/zarf-dev/zarf/src/config" "github.com/zarf-dev/zarf/src/internal/git" @@ -234,6 +238,30 @@ func (p *Packager) deployComponents(ctx context.Context) ([]types.DeployedCompon return deployedComponents, nil } +func runHealthChecks(ctx context.Context, watcher watcher.StatusWatcher, healthChecks []v1alpha1.NamespacedObjectKindReference) error { + objs := []object.ObjMetadata{} + for _, hc := range healthChecks { + gv, err := schema.ParseGroupVersion(hc.APIVersion) + if err != nil { + return err + } + obj := object.ObjMetadata{ + GroupKind: schema.GroupKind{ + Group: gv.Group, + Kind: hc.Kind, + }, + Namespace: hc.Namespace, + Name: hc.Name, + } + objs = append(objs, obj) + } + err := pkgkubernetes.WaitForReady(ctx, watcher, objs) + if err != nil { + return err + } + return nil +} + func (p *Packager) deployInitComponent(ctx context.Context, component v1alpha1.ZarfComponent) ([]types.InstalledChart, error) { hasExternalRegistry := p.cfg.InitOpts.RegistryInfo.Address != "" isSeedRegistry := component.Name == "zarf-seed-registry" @@ -370,6 +398,17 @@ func (p *Packager) deployComponent(ctx context.Context, component v1alpha1.ZarfC return nil, fmt.Errorf("unable to run component after action: %w", err) } + if len(component.HealthChecks) > 0 { + healthCheckContext, cancel := context.WithTimeout(ctx, p.cfg.DeployOpts.Timeout) + defer cancel() + spinner := message.NewProgressSpinner("Running Health checks for %s", component.Name) + defer spinner.Stop() + if err = runHealthChecks(healthCheckContext, p.cluster.Watcher, component.HealthChecks); err != nil { + return nil, fmt.Errorf("health checks failed: %w", err) + } + spinner.Success() + } + err = g.Wait() if err != nil { return nil, err diff --git a/src/pkg/packager/deploy_test.go b/src/pkg/packager/deploy_test.go index a1f32aa346..f5dba049e1 100644 --- a/src/pkg/packager/deploy_test.go +++ b/src/pkg/packager/deploy_test.go @@ -4,12 +4,22 @@ package packager import ( + "context" "testing" + "time" "github.com/stretchr/testify/require" "github.com/zarf-dev/zarf/src/api/v1alpha1" "github.com/zarf-dev/zarf/src/pkg/packager/sources" "github.com/zarf-dev/zarf/src/types" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/yaml" + dynamicfake "k8s.io/client-go/dynamic/fake" + "k8s.io/kubectl/pkg/scheme" + "sigs.k8s.io/cli-utils/pkg/kstatus/watcher" + "sigs.k8s.io/cli-utils/pkg/testutil" ) func TestGenerateValuesOverrides(t *testing.T) { @@ -272,3 +282,82 @@ func TestServiceInfoFromServiceURL(t *testing.T) { }) } } + +var podCurrentYaml = ` +apiVersion: v1 +kind: Pod +metadata: + name: good-pod + namespace: ns +status: + conditions: + - type: Ready + status: "True" + phase: Running +` + +var podYaml = ` +apiVersion: v1 +kind: Pod +metadata: + name: in-progress-pod + namespace: ns +` + +func yamlToUnstructured(t *testing.T, yml string) *unstructured.Unstructured { + t.Helper() + m := make(map[string]interface{}) + err := yaml.Unmarshal([]byte(yml), &m) + require.NoError(t, err) + return &unstructured.Unstructured{Object: m} +} + +func TestRunHealthChecks(t *testing.T) { + t.Parallel() + tests := []struct { + name string + podYaml string + expectErr error + }{ + { + name: "Pod is running", + podYaml: podCurrentYaml, + expectErr: nil, + }, + { + name: "Pod is never ready", + podYaml: podYaml, + expectErr: context.DeadlineExceeded, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + fakeClient := dynamicfake.NewSimpleDynamicClient(scheme.Scheme) + fakeMapper := testutil.NewFakeRESTMapper( + v1.SchemeGroupVersion.WithKind("Pod"), + ) + ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) + defer cancel() + pod := yamlToUnstructured(t, tt.podYaml) + statusWatcher := watcher.NewDefaultStatusWatcher(fakeClient, fakeMapper) + podGVR := schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"} + require.NoError(t, fakeClient.Tracker().Create(podGVR, pod, pod.GetNamespace())) + objs := []v1alpha1.NamespacedObjectKindReference{ + { + APIVersion: pod.GetAPIVersion(), + Kind: pod.GetKind(), + Namespace: pod.GetNamespace(), + Name: pod.GetName(), + }, + } + err := runHealthChecks(ctx, statusWatcher, objs) + if tt.expectErr != nil { + require.ErrorIs(t, err, tt.expectErr) + return + } + require.NoError(t, err) + }) + } +} diff --git a/src/pkg/packager/prepare.go b/src/pkg/packager/prepare.go index 37b80f0ec4..1e43335898 100644 --- a/src/pkg/packager/prepare.go +++ b/src/pkg/packager/prepare.go @@ -245,7 +245,6 @@ func (p *Packager) findImages(ctx context.Context) (map[string][]string, error) // Break the manifest into separate resources yamls, err := utils.SplitYAML(contents) if err != nil { - fmt.Println("got this err") return nil, err } resources = append(resources, yamls...) diff --git a/src/test/e2e/36_health_check_test.go b/src/test/e2e/36_health_check_test.go new file mode 100644 index 0000000000..3ad84162bd --- /dev/null +++ b/src/test/e2e/36_health_check_test.go @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2021-Present The Zarf Authors + +// Package test provides e2e tests for Zarf. +package test + +import ( + "fmt" + + "testing" + + "github.com/stretchr/testify/require" +) + +func TestHealthChecks(t *testing.T) { + t.Log("E2E: Health Checks") + + _, _, err := e2e.Zarf(t, "package", "create", "src/test/packages/36-health-checks", "-o=build", "--confirm") + require.NoError(t, err) + + path := fmt.Sprintf("build/zarf-package-health-checks-%s.tar.zst", e2e.Arch) + + _, _, err = e2e.Zarf(t, "package", "deploy", path, "--confirm") + require.NoError(t, err) + + defer func() { + _, _, err = e2e.Zarf(t, "package", "remove", "health-checks", "--confirm") + require.NoError(t, err) + }() + + stdOut, _, err := e2e.Kubectl(t, "get", "pod", "ready-pod", "-n", "health-checks", "-o", "jsonpath={.status.phase}") + require.NoError(t, err) + require.Equal(t, "Running", stdOut) +} diff --git a/src/test/packages/36-health-checks/ready-pod.yaml b/src/test/packages/36-health-checks/ready-pod.yaml new file mode 100644 index 0000000000..836ce373a6 --- /dev/null +++ b/src/test/packages/36-health-checks/ready-pod.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Pod +metadata: + name: ready-pod +spec: + # Extra security to ensure the pod isn't ready before the health checks run + initContainers: + - name: init-wait + image: ghcr.io/stefanprodan/podinfo:6.4.0 + command: ["sh", "-c", "sleep 3"] + containers: + - name: podinfo + image: ghcr.io/stefanprodan/podinfo:6.4.0 diff --git a/src/test/packages/36-health-checks/zarf.yaml b/src/test/packages/36-health-checks/zarf.yaml new file mode 100644 index 0000000000..a34f125cd0 --- /dev/null +++ b/src/test/packages/36-health-checks/zarf.yaml @@ -0,0 +1,21 @@ +kind: ZarfPackageConfig +metadata: + name: health-checks + description: Deploys a simple pod to test health checks + +components: + - name: health-checks + required: true + manifests: + - name: ready-pod + namespace: health-checks + noWait: true + files: + - ready-pod.yaml + images: + - ghcr.io/stefanprodan/podinfo:6.4.0 + healthChecks: + - name: ready-pod + namespace: health-checks + apiVersion: v1 + kind: Pod diff --git a/zarf.schema.json b/zarf.schema.json index b1fca50208..d5067e42ad 100644 --- a/zarf.schema.json +++ b/zarf.schema.json @@ -169,6 +169,38 @@ "^x-": {} } }, + "NamespacedObjectKindReference": { + "properties": { + "apiVersion": { + "type": "string", + "description": "API Version of the resource" + }, + "kind": { + "type": "string", + "description": "Kind of the resource" + }, + "namespace": { + "type": "string", + "description": "Namespace of the resource" + }, + "name": { + "type": "string", + "description": "Name of the resource" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "apiVersion", + "kind", + "namespace", + "name" + ], + "description": "NamespacedObjectKindReference is a reference to a specific resource in a namespace using its kind and API version.", + "patternProperties": { + "^x-": {} + } + }, "Shell": { "properties": { "windows": { @@ -512,6 +544,13 @@ "actions": { "$ref": "#/$defs/ZarfComponentActions", "description": "Custom commands to run at various stages of a package lifecycle." + }, + "healthChecks": { + "items": { + "$ref": "#/$defs/NamespacedObjectKindReference" + }, + "type": "array", + "description": "List of resources to health check after deployment" } }, "additionalProperties": false,