From 726e58da912e524aa9e9fc9957094aebf1c0f3e9 Mon Sep 17 00:00:00 2001 From: eaudetcobello Date: Wed, 16 Oct 2024 12:50:33 -0400 Subject: [PATCH 1/6] Use new channel field instead of install script overwrite (#73) --- templates/aws/cluster-template.yaml | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/templates/aws/cluster-template.yaml b/templates/aws/cluster-template.yaml index 883270ec..e81314d7 100644 --- a/templates/aws/cluster-template.yaml +++ b/templates/aws/cluster-template.yaml @@ -51,14 +51,8 @@ spec: name: ${CLUSTER_NAME}-control-plane spec: nodeName: "{{ ds.meta_data.local_hostname }}" - files: - # note(ben): This is only required as long as k8s does not have a stable release. - - path: /capi/scripts/install.sh - permissions: "0500" - owner: "root:root" - content: | - #!/bin/bash -xe - snap install k8s --classic --edge + # note(ben): This is only required as long as k8s does not have a stable release. + channel: "1.31-classic/edge" controlPlane: cloudProvider: external replicas: ${CONTROL_PLANE_MACHINE_COUNT} @@ -128,14 +122,8 @@ spec: template: spec: nodeName: "{{ ds.meta_data.local_hostname }}" - files: - # note(ben): This is only required as long as k8s does not have a stable release. - - path: /capi/scripts/install.sh - permissions: "0500" - owner: "root:root" - content: | - #!/bin/bash -xe - snap install k8s --classic --edge + # note(ben): This is only required as long as k8s does not have a stable release. + channel: "1.31-classic/edge" --- apiVersion: addons.cluster.x-k8s.io/v1beta1 kind: ClusterResourceSet From cefee4cc380840f1bfdeb259c6211e60176dcf0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciek=20Go=C5=82aszewski?= Date: Sat, 19 Oct 2024 15:46:16 +0200 Subject: [PATCH 2/6] Improve UX around setting Proxy configurations in the spec (#69) --- bootstrap/api/v1beta2/ck8sconfig_types.go | 12 ++++ ...ootstrap.cluster.x-k8s.io_ck8sconfigs.yaml | 15 +++- ....cluster.x-k8s.io_ck8sconfigtemplates.yaml | 15 +++- .../controllers/ck8sconfig_controller.go | 6 ++ ...ne.cluster.x-k8s.io_ck8scontrolplanes.yaml | 15 +++- ...er.x-k8s.io_ck8scontrolplanetemplates.yaml | 17 +++-- pkg/cloudinit/common.go | 46 +++++++++++- pkg/cloudinit/controlplane_init_test.go | 69 ++++++++++++++++++ pkg/cloudinit/controlplane_join_test.go | 69 ++++++++++++++++++ pkg/cloudinit/embed.go | 2 + pkg/cloudinit/scripts/configure-proxy.sh | 27 +++++++ pkg/cloudinit/worker_join_test.go | 70 +++++++++++++++++++ 12 files changed, 349 insertions(+), 14 deletions(-) create mode 100644 pkg/cloudinit/scripts/configure-proxy.sh diff --git a/bootstrap/api/v1beta2/ck8sconfig_types.go b/bootstrap/api/v1beta2/ck8sconfig_types.go index 3d936b7b..31acb1f8 100644 --- a/bootstrap/api/v1beta2/ck8sconfig_types.go +++ b/bootstrap/api/v1beta2/ck8sconfig_types.go @@ -69,6 +69,18 @@ type CK8sConfigSpec struct { // +optional SnapstoreProxyID string `json:"snapstoreProxyId,omitempty"` + // HTTPSProxy is optional https proxy configuration + // +optional + HTTPSProxy string `json:"httpsProxy,omitempty"` + + // HTTPProxy is optional http proxy configuration + // +optional + HTTPProxy string `json:"httpProxy,omitempty"` + + // NoProxy is optional no proxy configuration + // +optional + NoProxy string `json:"noProxy,omitempty"` + // Channel is the channel to use for the snap install. // +optional Channel string `json:"channel,omitempty"` diff --git a/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigs.yaml b/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigs.yaml index b237b1aa..d0b175e2 100644 --- a/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigs.yaml +++ b/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigs.yaml @@ -51,9 +51,6 @@ spec: items: type: string type: array - channel: - description: Channel is the channel to use for the snap install. - type: string bootstrapConfig: description: BootstrapConfig is the data to be passed to the bootstrap script. @@ -87,6 +84,9 @@ spec: - secret type: object type: object + channel: + description: Channel is the channel to use for the snap install. + type: string controlPlane: description: CK8sControlPlaneConfig is configuration for the control plane node. @@ -201,6 +201,12 @@ spec: - path type: object type: array + httpProxy: + description: HTTPProxy is optional http proxy configuration + type: string + httpsProxy: + description: HTTPSProxy is optional https proxy configuration + type: string initConfig: description: CK8sInitConfig is configuration for the initializing the cluster features. @@ -233,6 +239,9 @@ spec: LocalPath is the path of a local snap file in the workload cluster to use for the snap install. If Channel or Revision are set, this will be ignored. type: string + noProxy: + description: NoProxy is optional no proxy configuration + type: string nodeName: description: |- NodeName is the name to use for the kubelet of this node. It is needed for clouds diff --git a/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigtemplates.yaml b/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigtemplates.yaml index c868cfd5..a9fd896b 100644 --- a/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigtemplates.yaml +++ b/bootstrap/config/crd/bases/bootstrap.cluster.x-k8s.io_ck8sconfigtemplates.yaml @@ -58,9 +58,6 @@ spec: items: type: string type: array - channel: - description: Channel is the channel to use for the snap install. - type: string bootstrapConfig: description: BootstrapConfig is the data to be passed to the bootstrap script. @@ -94,6 +91,9 @@ spec: - secret type: object type: object + channel: + description: Channel is the channel to use for the snap install. + type: string controlPlane: description: CK8sControlPlaneConfig is configuration for the control plane node. @@ -210,6 +210,12 @@ spec: - path type: object type: array + httpProxy: + description: HTTPProxy is optional http proxy configuration + type: string + httpsProxy: + description: HTTPSProxy is optional https proxy configuration + type: string initConfig: description: CK8sInitConfig is configuration for the initializing the cluster features. @@ -242,6 +248,9 @@ spec: LocalPath is the path of a local snap file in the workload cluster to use for the snap install. If Channel or Revision are set, this will be ignored. type: string + noProxy: + description: NoProxy is optional no proxy configuration + type: string nodeName: description: |- NodeName is the name to use for the kubelet of this node. It is needed for clouds diff --git a/bootstrap/controllers/ck8sconfig_controller.go b/bootstrap/controllers/ck8sconfig_controller.go index 82d8002e..60489ef8 100644 --- a/bootstrap/controllers/ck8sconfig_controller.go +++ b/bootstrap/controllers/ck8sconfig_controller.go @@ -378,6 +378,9 @@ func (r *CK8sConfigReconciler) joinWorker(ctx context.Context, scope *Scope) err ConfigFileContents: string(joinConfig), MicroclusterAddress: scope.Config.Spec.ControlPlaneConfig.MicroclusterAddress, MicroclusterPort: microclusterPort, + HTTPProxy: scope.Config.Spec.HTTPProxy, + HTTPSProxy: scope.Config.Spec.HTTPSProxy, + NoProxy: scope.Config.Spec.NoProxy, AirGapped: scope.Config.Spec.AirGapped, SnapstoreProxyScheme: scope.Config.Spec.SnapstoreProxyScheme, SnapstoreProxyDomain: scope.Config.Spec.SnapstoreProxyDomain, @@ -693,6 +696,9 @@ func (r *CK8sConfigReconciler) handleClusterNotInitialized(ctx context.Context, MicroclusterAddress: scope.Config.Spec.ControlPlaneConfig.MicroclusterAddress, MicroclusterPort: microclusterPort, NodeName: scope.Config.Spec.NodeName, + HTTPProxy: scope.Config.Spec.HTTPProxy, + HTTPSProxy: scope.Config.Spec.HTTPSProxy, + NoProxy: scope.Config.Spec.NoProxy, AirGapped: scope.Config.Spec.AirGapped, SnapstoreProxyScheme: scope.Config.Spec.SnapstoreProxyScheme, SnapstoreProxyDomain: scope.Config.Spec.SnapstoreProxyDomain, diff --git a/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml b/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml index 7dc56812..bc1ad0bb 100644 --- a/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml +++ b/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml @@ -246,9 +246,6 @@ spec: items: type: string type: array - channel: - description: Channel is the channel to use for the snap install. - type: string bootstrapConfig: description: BootstrapConfig is the data to be passed to the bootstrap script. @@ -282,6 +279,9 @@ spec: - secret type: object type: object + channel: + description: Channel is the channel to use for the snap install. + type: string controlPlane: description: CK8sControlPlaneConfig is configuration for the control plane node. @@ -398,6 +398,12 @@ spec: - path type: object type: array + httpProxy: + description: HTTPProxy is optional http proxy configuration + type: string + httpsProxy: + description: HTTPSProxy is optional https proxy configuration + type: string initConfig: description: CK8sInitConfig is configuration for the initializing the cluster features. @@ -430,6 +436,9 @@ spec: LocalPath is the path of a local snap file in the workload cluster to use for the snap install. If Channel or Revision are set, this will be ignored. type: string + noProxy: + description: NoProxy is optional no proxy configuration + type: string nodeName: description: |- NodeName is the name to use for the kubelet of this node. It is needed for clouds diff --git a/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml b/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml index ff6e0fcd..715a65e3 100644 --- a/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml +++ b/controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml @@ -221,10 +221,6 @@ spec: items: type: string type: array - channel: - description: Channel is the channel to use for the snap - install. - type: string bootstrapConfig: description: BootstrapConfig is the data to be passed to the bootstrap script. @@ -258,6 +254,10 @@ spec: - secret type: object type: object + channel: + description: Channel is the channel to use for the snap + install. + type: string controlPlane: description: CK8sControlPlaneConfig is configuration for the control plane node. @@ -376,6 +376,12 @@ spec: - path type: object type: array + httpProxy: + description: HTTPProxy is optional http proxy configuration + type: string + httpsProxy: + description: HTTPSProxy is optional https proxy configuration + type: string initConfig: description: CK8sInitConfig is configuration for the initializing the cluster features. @@ -408,6 +414,9 @@ spec: LocalPath is the path of a local snap file in the workload cluster to use for the snap install. If Channel or Revision are set, this will be ignored. type: string + noProxy: + description: NoProxy is optional no proxy configuration + type: string nodeName: description: |- NodeName is the name to use for the kubelet of this node. It is needed for clouds diff --git a/pkg/cloudinit/common.go b/pkg/cloudinit/common.go index 7f820350..bd3320b8 100644 --- a/pkg/cloudinit/common.go +++ b/pkg/cloudinit/common.go @@ -49,6 +49,12 @@ type BaseUserData struct { SnapstoreProxyDomain string // The snap store proxy ID SnapstoreProxyID string + // HTTPProxy is http_proxy configuration. + HTTPProxy string + // HTTPSProxy is https_proxy configuration. + HTTPSProxy string + // NoProxy is no_proxy configuration. + NoProxy string // MicroclusterAddress is the address to use for microcluster. MicroclusterAddress string // MicroclusterPort is the port to use for microcluster. @@ -95,6 +101,12 @@ func NewBaseCloudConfig(data BaseUserData) (CloudConfig, error) { config.RunCommands = append(config.RunCommands, "/capi/scripts/configure-snapstore-proxy.sh") } + // proxy configuration + if proxyConfigFiles := getProxyConfigFiles(data); proxyConfigFiles != nil { + config.WriteFiles = append(config.WriteFiles, proxyConfigFiles...) + config.RunCommands = append(config.RunCommands, "/capi/scripts/configure-proxy.sh") + } + var configFileContents string if data.BootstrapConfig != "" { configFileContents = data.BootstrapConfig @@ -139,7 +151,6 @@ func NewBaseCloudConfig(data BaseUserData) (CloudConfig, error) { }, )..., ) - // boot commands config.BootCommands = data.BootCommands @@ -190,3 +201,36 @@ func getSnapstoreProxyConfigFiles(data BaseUserData) []File { return []File{schemeFile, domainFile, storeIDFile} } + +// getProxyConfigFiles returns the proxy config files. +// Returns slice of files for each proxy parameters are present in data structure with corresponding value +// Nil indicates that no files are returned. +func getProxyConfigFiles(data BaseUserData) []File { + var files []File + if data.HTTPProxy != "" { + files = append(files, File{ + Path: "/capi/etc/http-proxy", + Content: data.HTTPProxy, + Permissions: "0400", + Owner: "root:root", + }) + } + if data.HTTPSProxy != "" { + files = append(files, File{ + Path: "/capi/etc/https-proxy", + Content: data.HTTPSProxy, + Permissions: "0400", + Owner: "root:root", + }) + } + if data.NoProxy != "" { + files = append(files, File{ + Path: "/capi/etc/no-proxy", + Content: data.NoProxy, + Permissions: "0400", + Owner: "root:root", + }) + } + + return files +} diff --git a/pkg/cloudinit/controlplane_init_test.go b/pkg/cloudinit/controlplane_init_test.go index d113658c..676fe9ae 100644 --- a/pkg/cloudinit/controlplane_init_test.go +++ b/pkg/cloudinit/controlplane_init_test.go @@ -23,6 +23,7 @@ import ( . "github.com/onsi/gomega" format "github.com/onsi/gomega/format" "github.com/onsi/gomega/gstruct" + "github.com/onsi/gomega/types" "github.com/canonical/cluster-api-k8s/pkg/cloudinit" ) @@ -88,6 +89,7 @@ func TestNewInitControlPlane(t *testing.T) { HaveField("Path", "/capi/scripts/wait-apiserver-ready.sh"), HaveField("Path", "/capi/scripts/deploy-manifests.sh"), HaveField("Path", "/capi/scripts/configure-auth-token.sh"), + HaveField("Path", "/capi/scripts/configure-proxy.sh"), HaveField("Path", "/capi/scripts/configure-node-token.sh"), HaveField("Path", "/capi/scripts/create-sentinel-bootstrap.sh"), HaveField("Path", "/capi/scripts/configure-snapstore-proxy.sh"), @@ -105,6 +107,73 @@ func TestNewInitControlPlane(t *testing.T) { ), "Some /capi/scripts files are missing") } +func TestNewInitControlPlaneWithOptionalProxySettings(t *testing.T) { + g := NewWithT(t) + for _, tc := range []struct { + name string + baseUserData cloudinit.BaseUserData + expectRunCommand bool + expectWriteFiles []types.GomegaMatcher + }{ + { + name: "AllProxyFieldsSet", + baseUserData: cloudinit.BaseUserData{ + KubernetesVersion: "v1.30.0", + HTTPProxy: "http://proxy.internal", + HTTPSProxy: "https://proxy.internal", + NoProxy: "10.0.0.0/8,10.152.183.1,192.168.0.0/16", + MicroclusterAddress: "10.0.0.0/8", + }, + expectRunCommand: true, + expectWriteFiles: []types.GomegaMatcher{ + HaveField("Path", "/capi/scripts/configure-proxy.sh"), + HaveField("Path", "/capi/etc/http-proxy"), + HaveField("Path", "/capi/etc/https-proxy"), + HaveField("Path", "/capi/etc/no-proxy"), + }, + }, + { + name: "HTTPSProxyOnly", + baseUserData: cloudinit.BaseUserData{ + KubernetesVersion: "v1.30.0", + HTTPSProxy: "https://proxy.internal", + MicroclusterAddress: "10.0.0.0/8", + }, + expectRunCommand: true, + expectWriteFiles: []types.GomegaMatcher{ + HaveField("Path", "/capi/scripts/configure-proxy.sh"), + HaveField("Path", "/capi/etc/https-proxy"), + }, + }, + { + name: "NoProxyFields", + baseUserData: cloudinit.BaseUserData{ + KubernetesVersion: "v1.30.0", + MicroclusterAddress: "10.0.0.0/8", + }, + expectRunCommand: false, + expectWriteFiles: []types.GomegaMatcher{ + HaveField("Path", "/capi/scripts/configure-proxy.sh"), + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + config, err := cloudinit.NewInitControlPlane(cloudinit.InitControlPlaneInput{BaseUserData: tc.baseUserData}) + + g.Expect(err).ToNot(HaveOccurred()) + // Verify proxy run command. + if tc.expectRunCommand { + g.Expect(config.RunCommands).To(ContainElement("/capi/scripts/configure-proxy.sh")) + } else { + g.Expect(config.RunCommands).NotTo(ContainElement("/capi/scripts/configure-proxy.sh")) + } + // Verify proxy files present. + g.Expect(config.WriteFiles).To(ContainElements(tc.expectWriteFiles), + "Required files in /capi directory are missing") + }) + } +} + func TestUserSuppliedBootstrapConfig(t *testing.T) { g := NewWithT(t) diff --git a/pkg/cloudinit/controlplane_join_test.go b/pkg/cloudinit/controlplane_join_test.go index a9a009ae..57eca5c5 100644 --- a/pkg/cloudinit/controlplane_join_test.go +++ b/pkg/cloudinit/controlplane_join_test.go @@ -6,6 +6,7 @@ import ( . "github.com/onsi/gomega" "github.com/onsi/gomega/gstruct" + "github.com/onsi/gomega/types" "github.com/canonical/cluster-api-k8s/pkg/cloudinit" ) @@ -66,6 +67,7 @@ func TestNewJoinControlPlane(t *testing.T) { HaveField("Path", "/capi/scripts/wait-apiserver-ready.sh"), HaveField("Path", "/capi/scripts/deploy-manifests.sh"), HaveField("Path", "/capi/scripts/configure-auth-token.sh"), + HaveField("Path", "/capi/scripts/configure-proxy.sh"), HaveField("Path", "/capi/scripts/configure-node-token.sh"), HaveField("Path", "/capi/scripts/create-sentinel-bootstrap.sh"), HaveField("Path", "/capi/scripts/configure-snapstore-proxy.sh"), @@ -82,6 +84,73 @@ func TestNewJoinControlPlane(t *testing.T) { ), "Some /capi/scripts files are missing") } +func TestNewJoinControlPlaneOptionalProxySettings(t *testing.T) { + g := NewWithT(t) + for _, tc := range []struct { + name string + baseUserData cloudinit.BaseUserData + expectRunCommand bool + expectWriteFiles []types.GomegaMatcher + }{ + { + name: "AllProxyFieldsSet", + baseUserData: cloudinit.BaseUserData{ + KubernetesVersion: "v1.30.0", + HTTPProxy: "http://proxy.internal", + HTTPSProxy: "https://proxy.internal", + NoProxy: "10.0.0.0/8,10.152.183.1,192.168.0.0/16", + MicroclusterAddress: "10.0.0.0/8", + }, + expectRunCommand: true, + expectWriteFiles: []types.GomegaMatcher{ + HaveField("Path", "/capi/scripts/configure-proxy.sh"), + HaveField("Path", "/capi/etc/http-proxy"), + HaveField("Path", "/capi/etc/https-proxy"), + HaveField("Path", "/capi/etc/no-proxy"), + }, + }, + { + name: "HTTPSProxyOnly", + baseUserData: cloudinit.BaseUserData{ + KubernetesVersion: "v1.30.0", + HTTPSProxy: "https://proxy.internal", + MicroclusterAddress: "10.0.0.0/8", + }, + expectRunCommand: true, + expectWriteFiles: []types.GomegaMatcher{ + HaveField("Path", "/capi/scripts/configure-proxy.sh"), + HaveField("Path", "/capi/etc/https-proxy"), + }, + }, + { + name: "NoProxyFields", + baseUserData: cloudinit.BaseUserData{ + KubernetesVersion: "v1.30.0", + MicroclusterAddress: "10.0.0.0/8", + }, + expectRunCommand: false, + expectWriteFiles: []types.GomegaMatcher{ + HaveField("Path", "/capi/scripts/configure-proxy.sh"), + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + config, err := cloudinit.NewJoinControlPlane(cloudinit.JoinControlPlaneInput{BaseUserData: tc.baseUserData}) + + g.Expect(err).ToNot(HaveOccurred()) + // Verify proxy run command. + if tc.expectRunCommand { + g.Expect(config.RunCommands).To(ContainElement("/capi/scripts/configure-proxy.sh")) + } else { + g.Expect(config.RunCommands).NotTo(ContainElement("/capi/scripts/configure-proxy.sh")) + } + // Verify proxy files present. + g.Expect(config.WriteFiles).To(ContainElements(tc.expectWriteFiles), + "Required files in /capi directory are missing") + }) + } +} + func TestNewJoinControlPlaneInvalidVersionError(t *testing.T) { g := NewWithT(t) diff --git a/pkg/cloudinit/embed.go b/pkg/cloudinit/embed.go index 1d00a962..dafaa308 100644 --- a/pkg/cloudinit/embed.go +++ b/pkg/cloudinit/embed.go @@ -20,6 +20,7 @@ var ( scriptBootstrap script = "bootstrap.sh" scriptLoadImages script = "load-images.sh" scriptConfigureAuthToken script = "configure-auth-token.sh" // #nosec G101 + scriptConfigureProxy script = "configure-proxy.sh" scriptConfigureNodeToken script = "configure-node-token.sh" // #nosec G101 scriptJoinCluster script = "join-cluster.sh" scriptWaitAPIServerReady script = "wait-apiserver-ready.sh" @@ -44,6 +45,7 @@ var ( scriptBootstrap: mustEmbed(scriptBootstrap), scriptLoadImages: mustEmbed(scriptLoadImages), scriptConfigureAuthToken: mustEmbed(scriptConfigureAuthToken), + scriptConfigureProxy: mustEmbed(scriptConfigureProxy), scriptConfigureNodeToken: mustEmbed(scriptConfigureNodeToken), scriptJoinCluster: mustEmbed(scriptJoinCluster), scriptWaitAPIServerReady: mustEmbed(scriptWaitAPIServerReady), diff --git a/pkg/cloudinit/scripts/configure-proxy.sh b/pkg/cloudinit/scripts/configure-proxy.sh new file mode 100644 index 00000000..f473e2a8 --- /dev/null +++ b/pkg/cloudinit/scripts/configure-proxy.sh @@ -0,0 +1,27 @@ +#!/bin/bash -e + +# Assumptions: +# - runs before install k8s + +HTTP_PROXY_FILE="/tmp/capi/etc/http-proxy" +HTTPS_PROXY_FILE="/tmp/capi/etc/https-proxy" +NO_PROXY_FILE="/tmp/capi/etc/no-proxy" +ENVIRONMENT_FILE="/tmp/etc/environment" + +if [ -f ${HTTP_PROXY_FILE} ]; then + HTTP_PROXY=$(cat ${HTTP_PROXY_FILE}) + echo "http_proxy=${HTTP_PROXY}" >> "${ENVIRONMENT_FILE}" + echo "HTTP_PROXY=${HTTP_PROXY}" >> "${ENVIRONMENT_FILE}" +fi + +if [ -f ${HTTPS_PROXY_FILE} ]; then + HTTPS_PROXY=$(cat ${HTTPS_PROXY_FILE}) + echo "https_proxy=${HTTPS_PROXY}" >> "${ENVIRONMENT_FILE}" + echo "HTTPS_PROXY=${HTTPS_PROXY}" >> "${ENVIRONMENT_FILE}" +fi + +if [ -f ${NO_PROXY_FILE} ]; then + NO_PROXY=$(cat ${NO_PROXY_FILE}) + echo "no_proxy=${NO_PROXY}" >> "${ENVIRONMENT_FILE}" + echo "NO_PROXY=${NO_PROXY}" >> "${ENVIRONMENT_FILE}" +fi diff --git a/pkg/cloudinit/worker_join_test.go b/pkg/cloudinit/worker_join_test.go index addadc38..3fbd06f6 100644 --- a/pkg/cloudinit/worker_join_test.go +++ b/pkg/cloudinit/worker_join_test.go @@ -6,6 +6,7 @@ import ( . "github.com/onsi/gomega" "github.com/onsi/gomega/gstruct" + "github.com/onsi/gomega/types" "github.com/canonical/cluster-api-k8s/pkg/cloudinit" ) @@ -66,6 +67,7 @@ func TestNewJoinWorker(t *testing.T) { HaveField("Path", "/capi/scripts/wait-apiserver-ready.sh"), HaveField("Path", "/capi/scripts/deploy-manifests.sh"), HaveField("Path", "/capi/scripts/configure-auth-token.sh"), + HaveField("Path", "/capi/scripts/configure-proxy.sh"), HaveField("Path", "/capi/scripts/configure-node-token.sh"), HaveField("Path", "/capi/scripts/create-sentinel-bootstrap.sh"), HaveField("Path", "/capi/scripts/configure-snapstore-proxy.sh"), @@ -82,6 +84,74 @@ func TestNewJoinWorker(t *testing.T) { ), "Some /capi/scripts files are missing") } +func TestNewJoinWorkerWithProxySettings(t *testing.T) { + g := NewWithT(t) + for _, tc := range []struct { + name string + baseUserData cloudinit.BaseUserData + expectRunCommand bool + expectWriteFiles []types.GomegaMatcher + }{ + { + name: "AllProxyFieldsSet", + baseUserData: cloudinit.BaseUserData{ + KubernetesVersion: "v1.30.0", + HTTPProxy: "http://proxy.internal", + HTTPSProxy: "https://proxy.internal", + NoProxy: "10.0.0.0/8,10.152.183.1,192.168.0.0/16", + MicroclusterAddress: "10.0.0.0/8", + }, + expectRunCommand: true, + expectWriteFiles: []types.GomegaMatcher{ + HaveField("Path", "/capi/scripts/configure-proxy.sh"), + HaveField("Path", "/capi/etc/http-proxy"), + HaveField("Path", "/capi/etc/https-proxy"), + HaveField("Path", "/capi/etc/no-proxy"), + }, + }, + { + name: "HTTPSProxyOnly", + baseUserData: cloudinit.BaseUserData{ + KubernetesVersion: "v1.30.0", + HTTPSProxy: "https://proxy.internal", + NoProxy: "10.0.0.0/8,10.152.183.1,192.168.0.0/16", + MicroclusterAddress: "10.0.0.0/8", + }, + expectRunCommand: true, + expectWriteFiles: []types.GomegaMatcher{ + HaveField("Path", "/capi/scripts/configure-proxy.sh"), + HaveField("Path", "/capi/etc/https-proxy"), + }, + }, + { + name: "NoProxyFields", + baseUserData: cloudinit.BaseUserData{ + KubernetesVersion: "v1.30.0", + MicroclusterAddress: "10.0.0.0/8", + }, + expectRunCommand: false, + expectWriteFiles: []types.GomegaMatcher{ + HaveField("Path", "/capi/scripts/configure-proxy.sh"), + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + config, err := cloudinit.NewJoinWorker(cloudinit.JoinWorkerInput{BaseUserData: tc.baseUserData}) + + g.Expect(err).ToNot(HaveOccurred()) + // Verify proxy run command. + if tc.expectRunCommand { + g.Expect(config.RunCommands).To(ContainElement("/capi/scripts/configure-proxy.sh")) + } else { + g.Expect(config.RunCommands).NotTo(ContainElement("/capi/scripts/configure-proxy.sh")) + } + // Verify proxy files present. + g.Expect(config.WriteFiles).To(ContainElements(tc.expectWriteFiles), + "Required files in /capi directory are missing") + }) + } +} + func TestNewJoinWorkerInvalidVersionError(t *testing.T) { g := NewWithT(t) From 183bfa9a6b20c36f58f6096401fa19320cb43df3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Berkay=20Tekin=20=C3=96z?= Date: Sun, 20 Oct 2024 18:56:50 +0300 Subject: [PATCH 3/6] Use updated annotations from k8s api repo (#72) --- go.mod | 2 +- go.sum | 4 ++-- pkg/ck8s/config_init.go | 9 +++++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/go.mod b/go.mod index 52c72399..4e11c408 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/canonical/cluster-api-k8s go 1.22.6 require ( - github.com/canonical/k8s-snap-api v1.0.9 + github.com/canonical/k8s-snap-api v1.0.11 github.com/go-logr/logr v1.4.1 github.com/google/uuid v1.4.0 github.com/onsi/ginkgo v1.16.5 diff --git a/go.sum b/go.sum index 193817d4..33ea1288 100644 --- a/go.sum +++ b/go.sum @@ -29,8 +29,8 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/bwesterb/go-ristretto v1.2.0/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0= -github.com/canonical/k8s-snap-api v1.0.9 h1:WhbyVtnR0GIAdY1UYBIzkspfgodxrHjlpT9FbG4NIu4= -github.com/canonical/k8s-snap-api v1.0.9/go.mod h1:LDPoIYCeYnfgOFrwVPJ/4edGU264w7BB7g0GsVi36AY= +github.com/canonical/k8s-snap-api v1.0.11 h1:nGtwrUQBLiaL3HUXFx2gb4kq6qVpl2yNwMwHVX0dEok= +github.com/canonical/k8s-snap-api v1.0.11/go.mod h1:LDPoIYCeYnfgOFrwVPJ/4edGU264w7BB7g0GsVi36AY= github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= diff --git a/pkg/ck8s/config_init.go b/pkg/ck8s/config_init.go index 2ceff580..b6458e64 100644 --- a/pkg/ck8s/config_init.go +++ b/pkg/ck8s/config_init.go @@ -5,6 +5,7 @@ import ( "strings" apiv1 "github.com/canonical/k8s-snap-api/api/v1" + apiv1_annotations "github.com/canonical/k8s-snap-api/api/v1/annotations" "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -87,12 +88,12 @@ func GenerateInitControlPlaneConfig(cfg InitControlPlaneConfig) (apiv1.Bootstrap out.ClusterConfig.Annotations = map[string]string{} } - if _, ok := out.ClusterConfig.Annotations[apiv1.AnnotationSkipCleanupKubernetesNodeOnRemove]; !ok { - out.ClusterConfig.Annotations[apiv1.AnnotationSkipCleanupKubernetesNodeOnRemove] = "true" + if _, ok := out.ClusterConfig.Annotations[apiv1_annotations.AnnotationSkipCleanupKubernetesNodeOnRemove]; !ok { + out.ClusterConfig.Annotations[apiv1_annotations.AnnotationSkipCleanupKubernetesNodeOnRemove] = "true" } - if _, ok := out.ClusterConfig.Annotations[apiv1.AnnotationSkipStopServicesOnRemove]; !ok { - out.ClusterConfig.Annotations[apiv1.AnnotationSkipStopServicesOnRemove] = "true" + if _, ok := out.ClusterConfig.Annotations[apiv1_annotations.AnnotationSkipStopServicesOnRemove]; !ok { + out.ClusterConfig.Annotations[apiv1_annotations.AnnotationSkipStopServicesOnRemove] = "true" } // features From 1012cfcdbd368fae878a8b1a67c962625650d30d Mon Sep 17 00:00:00 2001 From: Homayoon Alimohammadi Date: Mon, 21 Oct 2024 14:45:35 +0400 Subject: [PATCH 4/6] Add E2E tests for MachineDeployment orchestrated in-place upgrades (#74) --- test/e2e/helpers.go | 97 +++++++++++++++++++----- test/e2e/machine_deployment_test.go | 112 ++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+), 18 deletions(-) create mode 100644 test/e2e/machine_deployment_test.go diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 42ad619a..23a521b7 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -555,44 +555,50 @@ func WaitForControlPlaneAndMachinesReady(ctx context.Context, input WaitForContr } type ApplyInPlaceUpgradeAndWaitInput struct { - Getter framework.Getter - Machine *clusterv1.Machine + Getter framework.Getter + Obj client.Object + // DestinationObj is used as a destination to decode whatever is retrieved from the client. + // e.g: + // {DestinationObj: &clusterv1.Machine{}, ...} + // client.Get(ctx, objKey, DestinationObj) + DestinationObj client.Object ClusterProxy framework.ClusterProxy UpgradeOption string WaitForUpgradeIntervals []interface{} } +// ApplyInPlaceUpgradeAndWait applies an in-place upgrade to an object and waits for the upgrade to complete. func ApplyInPlaceUpgradeAndWait(ctx context.Context, input ApplyInPlaceUpgradeAndWaitInput) { Expect(ctx).NotTo(BeNil()) - Expect(input.Machine).ToNot(BeNil()) + Expect(input.Obj).ToNot(BeNil()) + Expect(input.DestinationObj).ToNot(BeNil()) Expect(input.ClusterProxy).ToNot(BeNil()) Expect(input.UpgradeOption).ToNot(BeEmpty()) mgmtClient := input.ClusterProxy.GetClient() - patchHelper, err := patch.NewHelper(input.Machine, mgmtClient) + patchHelper, err := patch.NewHelper(input.Obj, mgmtClient) Expect(err).ToNot(HaveOccurred()) - mAnnotations := input.Machine.GetAnnotations() + annotations := input.Obj.GetAnnotations() - if mAnnotations == nil { - mAnnotations = map[string]string{} + if annotations == nil { + annotations = map[string]string{} } - mAnnotations[bootstrapv1.InPlaceUpgradeToAnnotation] = input.UpgradeOption - input.Machine.SetAnnotations(mAnnotations) - err = patchHelper.Patch(ctx, input.Machine) + annotations[bootstrapv1.InPlaceUpgradeToAnnotation] = input.UpgradeOption + input.Obj.SetAnnotations(annotations) + err = patchHelper.Patch(ctx, input.Obj) Expect(err).ToNot(HaveOccurred()) By("Checking for in-place upgrade status to be equal to done") Eventually(func() (bool, error) { - um := &clusterv1.Machine{} - if err := input.Getter.Get(ctx, client.ObjectKey{Namespace: input.Machine.Namespace, Name: input.Machine.Name}, um); err != nil { + if err := input.Getter.Get(ctx, client.ObjectKeyFromObject(input.Obj), input.DestinationObj); err != nil { Byf("Failed to get the machine: %+v", err) return false, err } - mAnnotations := um.GetAnnotations() + mAnnotations := input.DestinationObj.GetAnnotations() status, ok := mAnnotations[bootstrapv1.InPlaceUpgradeStatusAnnotation] if !ok { @@ -600,7 +606,7 @@ func ApplyInPlaceUpgradeAndWait(ctx context.Context, input ApplyInPlaceUpgradeAn } return status == bootstrapv1.InPlaceUpgradeDoneStatus, nil - }, input.WaitForUpgradeIntervals...).Should(BeTrue(), "In-place upgrade failed for %s", input.Machine.Name) + }, input.WaitForUpgradeIntervals...).Should(BeTrue(), "In-place upgrade failed for %s", input.Obj.GetName()) } type ApplyInPlaceUpgradeForControlPlaneInput struct { @@ -633,7 +639,8 @@ func ApplyInPlaceUpgradeForControlPlane(ctx context.Context, input ApplyInPlaceU for _, machine := range machineList.Items { ApplyInPlaceUpgradeAndWait(ctx, ApplyInPlaceUpgradeAndWaitInput{ Getter: input.Getter, - Machine: &machine, + Obj: &machine, + DestinationObj: &clusterv1.Machine{}, ClusterProxy: input.ClusterProxy, UpgradeOption: input.UpgradeOption, WaitForUpgradeIntervals: input.WaitForUpgradeIntervals, @@ -659,7 +666,7 @@ func ApplyInPlaceUpgradeForWorker(ctx context.Context, input ApplyInPlaceUpgrade Expect(input.UpgradeOption).ToNot(BeEmpty()) for _, md := range input.MachineDeployments { - // Look up all the control plane machines. + // Look up all the worker machines. inClustersNamespaceListOption := client.InNamespace(input.Cluster.Namespace) matchClusterListOption := client.MatchingLabels{ clusterv1.ClusterNameLabel: input.Cluster.Name, @@ -669,12 +676,13 @@ func ApplyInPlaceUpgradeForWorker(ctx context.Context, input ApplyInPlaceUpgrade machineList := &clusterv1.MachineList{} Eventually(func() error { return input.Lister.List(ctx, machineList, inClustersNamespaceListOption, matchClusterListOption) - }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Couldn't list control-plane machines for the cluster %q", input.Cluster.Name) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Couldn't list worker machines for the cluster %q", input.Cluster.Name) for _, machine := range machineList.Items { ApplyInPlaceUpgradeAndWait(ctx, ApplyInPlaceUpgradeAndWaitInput{ Getter: input.Getter, - Machine: &machine, + Obj: &machine, + DestinationObj: &clusterv1.Machine{}, ClusterProxy: input.ClusterProxy, UpgradeOption: input.UpgradeOption, WaitForUpgradeIntervals: input.WaitForUpgradeIntervals, @@ -683,6 +691,59 @@ func ApplyInPlaceUpgradeForWorker(ctx context.Context, input ApplyInPlaceUpgrade } } +type ApplyInPlaceUpgradeForMachineDeploymentInput struct { + Lister framework.Lister + Getter framework.Getter + ClusterProxy framework.ClusterProxy + Cluster *clusterv1.Cluster + MachineDeployments []*clusterv1.MachineDeployment + UpgradeOption string + WaitForUpgradeIntervals []interface{} +} + +func ApplyInPlaceUpgradeForMachineDeployment(ctx context.Context, input ApplyInPlaceUpgradeForMachineDeploymentInput) { + Expect(ctx).NotTo(BeNil()) + Expect(input.ClusterProxy).ToNot(BeNil()) + Expect(input.Cluster).ToNot(BeNil()) + Expect(input.MachineDeployments).ToNot(BeNil()) + Expect(input.UpgradeOption).ToNot(BeEmpty()) + + var machineDeployment *clusterv1.MachineDeployment + for _, md := range input.MachineDeployments { + if md.Labels[clusterv1.ClusterNameLabel] == input.Cluster.Name { + machineDeployment = md + break + } + } + Expect(machineDeployment).ToNot(BeNil()) + + ApplyInPlaceUpgradeAndWait(ctx, ApplyInPlaceUpgradeAndWaitInput{ + Getter: input.Getter, + Obj: machineDeployment, + DestinationObj: &clusterv1.MachineDeployment{}, + ClusterProxy: input.ClusterProxy, + UpgradeOption: input.UpgradeOption, + WaitForUpgradeIntervals: input.WaitForUpgradeIntervals, + }) + + // Make sure all the machines are upgraded + inClustersNamespaceListOption := client.InNamespace(input.Cluster.Namespace) + matchClusterListOption := client.MatchingLabels{ + clusterv1.ClusterNameLabel: input.Cluster.Name, + clusterv1.MachineDeploymentNameLabel: machineDeployment.Name, + } + + machineList := &clusterv1.MachineList{} + Eventually(func() error { + return input.Lister.List(ctx, machineList, inClustersNamespaceListOption, matchClusterListOption) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Couldn't list machines for the machineDeployment %q", machineDeployment.Name) + + for _, machine := range machineList.Items { + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeStatusAnnotation]).To(Equal(bootstrapv1.InPlaceUpgradeDoneStatus)) + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeReleaseAnnotation]).To(Equal(input.UpgradeOption)) + } +} + // UpgradeControlPlaneAndWaitForUpgradeInput is the input type for UpgradeControlPlaneAndWaitForUpgrade. type UpgradeControlPlaneAndWaitForUpgradeInput struct { ClusterProxy framework.ClusterProxy diff --git a/test/e2e/machine_deployment_test.go b/test/e2e/machine_deployment_test.go new file mode 100644 index 00000000..a728f0ae --- /dev/null +++ b/test/e2e/machine_deployment_test.go @@ -0,0 +1,112 @@ +//go:build e2e +// +build e2e + +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "path/filepath" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" + "sigs.k8s.io/cluster-api/util" +) + +var _ = Describe("Machine Deployment Orchestrated In place upgrades", func() { + var ( + ctx = context.TODO() + specName = "workload-cluster-md-inplace" + namespace *corev1.Namespace + cancelWatches context.CancelFunc + result *ApplyClusterTemplateAndWaitResult + clusterName string + clusterctlLogFolder string + infrastructureProvider string + ) + + BeforeEach(func() { + Expect(e2eConfig.Variables).To(HaveKey(KubernetesVersion)) + + clusterName = fmt.Sprintf("capick8s-md-in-place-%s", util.RandomString(6)) + infrastructureProvider = clusterctl.DefaultInfrastructureProvider + + // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. + namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) + + result = new(ApplyClusterTemplateAndWaitResult) + + clusterctlLogFolder = filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()) + }) + + AfterEach(func() { + cleanInput := cleanupInput{ + SpecName: specName, + Cluster: result.Cluster, + ClusterProxy: bootstrapClusterProxy, + Namespace: namespace, + CancelWatches: cancelWatches, + IntervalsGetter: e2eConfig.GetIntervals, + SkipCleanup: skipCleanup, + ArtifactFolder: artifactFolder, + } + + dumpSpecResourcesAndCleanup(ctx, cleanInput) + }) + + Context("Performing Machine Deployment Orchestrated in-place upgrades", func() { + It("Creating a workload cluster and applying in-place upgrade to Machine Deployment [MD-InPlace] [PR-Blocking]", func() { + By("Creating a workload cluster of 1 control plane and 3 worker nodes") + ApplyClusterTemplateAndWait(ctx, ApplyClusterTemplateAndWaitInput{ + ClusterProxy: bootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: clusterctlLogFolder, + ClusterctlConfigPath: clusterctlConfigPath, + KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: infrastructureProvider, + Namespace: namespace.Name, + ClusterName: clusterName, + KubernetesVersion: e2eConfig.GetVariable(KubernetesVersion), + ControlPlaneMachineCount: ptr.To(int64(1)), + WorkerMachineCount: ptr.To(int64(3)), + }, + WaitForClusterIntervals: e2eConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: e2eConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachineDeployments: e2eConfig.GetIntervals(specName, "wait-worker-nodes"), + }, result) + + bootstrapProxyClient := bootstrapClusterProxy.GetClient() + + By("Applying in place upgrade with local path for worker nodes") + ApplyInPlaceUpgradeForMachineDeployment(ctx, ApplyInPlaceUpgradeForMachineDeploymentInput{ + Lister: bootstrapProxyClient, + Getter: bootstrapProxyClient, + ClusterProxy: bootstrapClusterProxy, + Cluster: result.Cluster, + WaitForUpgradeIntervals: e2eConfig.GetIntervals(specName, "wait-machine-upgrade"), + UpgradeOption: e2eConfig.GetVariable(InPlaceUpgradeOption), + MachineDeployments: result.MachineDeployments, + }) + }) + }) + +}) From 9c70379d7922e4d7ed203755515a3d196aaa462c Mon Sep 17 00:00:00 2001 From: eaudetcobello Date: Wed, 23 Oct 2024 13:55:40 -0400 Subject: [PATCH 5/6] Support running e2e tests on AWS (#45) --- Makefile | 3 +- test/e2e/README.md | 67 ++++ test/e2e/cluster_upgrade_test.go | 3 +- test/e2e/config/ck8s-aws.yaml | 128 +++++++ test/e2e/config/ck8s-docker.yaml | 8 +- test/e2e/create_test.go | 2 +- .../infrastructure-aws/cluster-template.yaml | 315 ++++++++++++++++++ test/e2e/data/shared/v1beta1/metadata.yaml | 9 +- .../e2e/data/shared/v1beta1_aws/metadata.yaml | 5 + test/e2e/helpers.go | 2 +- test/e2e/kcp_remediation_test.go | 10 +- test/e2e/md_remediation_test.go | 8 +- test/e2e/node_scale_test.go | 2 +- tilt-provider.yaml | 24 ++ 14 files changed, 572 insertions(+), 14 deletions(-) create mode 100644 test/e2e/config/ck8s-aws.yaml create mode 100644 test/e2e/data/infrastructure-aws/cluster-template.yaml create mode 100644 test/e2e/data/shared/v1beta1_aws/metadata.yaml create mode 100644 tilt-provider.yaml diff --git a/Makefile b/Makefile index 74a1612b..b1de8246 100644 --- a/Makefile +++ b/Makefile @@ -99,7 +99,8 @@ GINKGO_NODES ?= 1 # GINKGO_NODES is the number of parallel nodes to run GINKGO_TIMEOUT ?= 2h GINKGO_POLL_PROGRESS_AFTER ?= 60m GINKGO_POLL_PROGRESS_INTERVAL ?= 5m -E2E_CONF_FILE ?= $(TEST_DIR)/e2e/config/ck8s-docker.yaml +E2E_INFRA ?= docker +E2E_CONF_FILE ?= $(TEST_DIR)/e2e/config/ck8s-$(E2E_INFRA).yaml SKIP_RESOURCE_CLEANUP ?= false USE_EXISTING_CLUSTER ?= false GINKGO_NOCOLOR ?= false diff --git a/test/e2e/README.md b/test/e2e/README.md index 849be79c..5c617bf5 100644 --- a/test/e2e/README.md +++ b/test/e2e/README.md @@ -21,6 +21,73 @@ To run a specific e2e test, such as `[PR-Blocking]`, use the `GINKGO_FOCUS` envi make GINKGO_FOCUS="\\[PR-Blocking\\]" test-e2e # only run e2e test with `[PR-Blocking]` in its spec name ``` +### Use an existing cluster as the management cluster + +This is useful if you want to use a cluster managed by Tilt. + +```shell +make USE_EXISTING_CLUSTER=true test-e2e +``` + +### Run e2e tests on AWS + +To run the tests on AWS you will need to set the AWS_B64ENCODED_CREDENTIALS environment variable. + +Then, you can run: + +```shell +make E2E_INFRA=aws test-e2e +``` + +Note: The remediation tests currently do not pass on cloud providers. We recommend excluding these tests from your test runs. + +For more information, please refer to the following: + +[Kubernetes Slack Discussion](kubernetes.slack.com/archives/C8TSNPY4T/p1680525266510109) + +[Github Issue #4198](github.com/kubernetes-sigs/cluster-api-provider-aws/issues/4198) + +### Running the tests with Tilt + +This section explains how to run the E2E tests on AWS using a management cluster run by Tilt. + +This section assumes you have *kind* and *Docker* installed. (See [Prerequisites](https://cluster-api.sigs.k8s.io/developer/tilt#prerequisites)) + +First, clone the upstream cluster-api and cluster-api-provider-aws repositories. +```shell +git clone https://github.com/kubernetes-sigs/cluster-api.git +git clone https://github.com/kubernetes-sigs/cluster-api-provider-aws.git +``` + +Next, you need to create a `tilt-settings.yaml` file inside the `cluster-api` directory. +The kustomize_substitutions you provide here are automatically applied to the *management cluster*. +```shell +default_registry: "ghcr.io/canonical/cluster-api-k8s" +provider_repos: +- ../cluster-api-k8s +- ../cluster-api-provider-aws +enable_providers: +- aws +- ck8s-bootstrap +- ck8s-control-plane +``` + +Tilt will know how to run the aws provider controllers because the `cluster-api-provider-aws` repository has a `tilt-provider.yaml` file at it's root. Canonical Kubernetes also provides this file at the root of the repository. The CK8s provider names, ck8s-bootstrap and ck8s-control-plane, are defined in CK8's `tilt-provider.yaml` file. + +Next, you have to customize the variables that will be substituted into the cluster templates applied by the tests (these are under `test/e2e/data/infrastructure-aws`). You can customize the variables in the `test/e2e/config/ck8s-aws.yaml` file under the `variables` key. + +Finally, in one terminal, go into the `cluster-api` directory and run `make tilt-up`. You should see a kind cluster be created, and finally a message indicating that Tilt is available at a certain address. + +In a second terminal in the `cluster-api-k8s` directory, run `make USE_EXISTING_CLUSTER=true test-e2e`. + +### Cleaning up after an e2e test + +The test framework tries it's best to cleanup resources after a test suite, but it is possible that +cloud resources are left over. This can be very problematic especially if you run the tests multiple times +while iterating on development (see [Cluster API Book - Tear down](https://cluster-api.sigs.k8s.io/developer/e2e#tear-down)). + +You can use a tool like [aws-nuke](https://github.com/eriksten/aws-nuke) to cleanup your AWS account after a test. + ## Develop an e2e test Refer to [Developing E2E tests](https://cluster-api.sigs.k8s.io/developer/e2e) for a complete guide for developing e2e tests. diff --git a/test/e2e/cluster_upgrade_test.go b/test/e2e/cluster_upgrade_test.go index 80dc7052..5b9841c1 100644 --- a/test/e2e/cluster_upgrade_test.go +++ b/test/e2e/cluster_upgrade_test.go @@ -22,6 +22,7 @@ package e2e import ( . "github.com/onsi/ginkgo/v2" "k8s.io/utils/ptr" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" ) var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { @@ -33,7 +34,7 @@ var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { BootstrapClusterProxy: bootstrapClusterProxy, ArtifactFolder: artifactFolder, SkipCleanup: skipCleanup, - InfrastructureProvider: ptr.To("docker"), + InfrastructureProvider: ptr.To(clusterctl.DefaultInfrastructureProvider), ControlPlaneMachineCount: ptr.To[int64](3), WorkerMachineCount: ptr.To[int64](1), } diff --git a/test/e2e/config/ck8s-aws.yaml b/test/e2e/config/ck8s-aws.yaml new file mode 100644 index 00000000..495b0d20 --- /dev/null +++ b/test/e2e/config/ck8s-aws.yaml @@ -0,0 +1,128 @@ +--- +managementClusterName: capi-test + +# E2E test scenario using local dev images and manifests built from the source tree for following providers: +# - bootstrap ck8s +# - control-plane ck8s +images: + # Use local dev images built source tree; + - name: ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev + loadBehavior: mustLoad + - name: ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev + loadBehavior: mustLoad + +# List of providers that will be installed into the management cluster +# See InitManagementClusterAndWatchControllerLogs function call +providers: + - name: cluster-api + type: CoreProvider + versions: + - name: v1.8.4 + value: https://github.com/kubernetes-sigs/cluster-api/releases/download/v1.8.4/core-components.yaml + type: url + contract: v1beta1 + files: + - sourcePath: "../data/shared/v1beta1/metadata.yaml" + replacements: + - old: "imagePullPolicy: Always" + new: "imagePullPolicy: IfNotPresent" + - name: aws + type: InfrastructureProvider + versions: + - name: v2.6.1 + value: "https://github.com/kubernetes-sigs/cluster-api-provider-aws/releases/download/v2.6.1/infrastructure-components.yaml" + type: url + contract: v1beta2 + files: + - sourcePath: "../data/shared/v1beta1_aws/metadata.yaml" + replacements: + - old: "imagePullPolicy: Always" + new: "imagePullPolicy: IfNotPresent" + + # when bootstrapping with tilt, it will use + # https://github.com/kubernetes-sigs/cluster-api/blob/main/hack/tools/internal/tilt-prepare/main.go + # name here should match defaultProviderVersion + - name: v1.9.99 + value: "https://github.com/kubernetes-sigs/cluster-api-provider-aws/releases/download/v2.6.1/infrastructure-components.yaml" + type: url + contract: v1beta2 + files: + - sourcePath: "../data/shared/v1beta1_aws/metadata.yaml" + replacements: + - old: "imagePullPolicy: Always" + new: "imagePullPolicy: IfNotPresent" + files: + - sourcePath: "../data/infrastructure-aws/cluster-template.yaml" + - name: ck8s + type: BootstrapProvider + versions: + # Could add older release version for upgrading test, but + # by default, will only use the latest version defined in + # ${ProjectRoot}/metadata.yaml to init the management cluster + # this version should be updated when ${ProjectRoot}/metadata.yaml + # is modified + - name: v0.1.99 # next; use manifest from source files + value: "../../../bootstrap/config/default" + replacements: + - old: "ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:latest" + new: "ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev" + files: + - sourcePath: "../../../metadata.yaml" + - name: ck8s + type: ControlPlaneProvider + versions: + - name: v0.1.99 # next; use manifest from source files + value: "../../../controlplane/config/default" + replacements: + - old: "ghcr.io/canonical/cluster-api-k8s/controlplane-controller:latest" + new: "ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev" + files: + - sourcePath: "../../../metadata.yaml" + +# These variables replace the variables in test/e2e/data/infrastructure-aws manifests +# They are used during clusterctl generate cluster +variables: + KUBERNETES_VERSION_MANAGEMENT: "v1.30.0" + KUBERNETES_VERSION: "v1.30.0" + KUBERNETES_VERSION_UPGRADE_TO: "v1.30.1" + IP_FAMILY: "IPv4" + KIND_IMAGE_VERSION: "v1.30.0" + AWS_CONTROL_PLANE_INSTANCE_TYPE: t3.large + AWS_NODE_INSTANCE_TYPE: t3.large + AWS_PUBLIC_IP: true + AWS_CREATE_BASTION: true + AWS_SSH_KEY_NAME: "default" + AWS_AMI_ID: "ami-01b139e6226d65e4f" + AWS_CONTROL_PLANE_ROOT_VOLUME_SIZE: 16 + AWS_NODE_ROOT_VOLUME_SIZE: 16 + AWS_REGION: "us-east-2" + AWS_CCM_IMAGE: "registry.k8s.io/provider-aws/cloud-controller-manager:v1.28.3" + # https://github.com/kubernetes-sigs/cluster-api-provider-aws/blob/main/test/e2e/data/e2e_conf.yaml#L203C1-L205C27 + # There is some work to be done here on figuring out which experimental features + # we want to enable/disable. + EXP_CLUSTER_RESOURCE_SET: "true" + EXP_MACHINE_SET_PREFLIGHT_CHECKS: "false" + CLUSTER_TOPOLOGY: "true" + CAPA_LOGLEVEL: "4" + +intervals: + # Ref: https://github.com/kubernetes-sigs/cluster-api-provider-aws/blob/main/test/e2e/data/e2e_conf.yaml + default/wait-machines: [ "35m", "10s" ] + default/wait-cluster: [ "35m", "10s" ] + default/wait-control-plane: [ "35m", "10s" ] + default/wait-worker-nodes: [ "35m", "10s" ] + conformance/wait-control-plane: [ "35m", "10s" ] + conformance/wait-worker-nodes: [ "35m", "10s" ] + default/wait-controllers: [ "35m", "10s" ] + default/wait-delete-cluster: [ "35m", "10s" ] + default/wait-machine-upgrade: [ "35m", "10s" ] + default/wait-contolplane-upgrade: [ "35m", "10s" ] + default/wait-machine-status: [ "35m", "10s" ] + default/wait-failed-machine-status: [ "35m", "10s" ] + default/wait-infra-subnets: [ "5m", "30s" ] + default/wait-machine-pool-nodes: [ "35m", "10s" ] + default/wait-machine-pool-upgrade: [ "35m", "10s" ] + default/wait-create-identity: [ "3m", "10s" ] + default/wait-job: [ "35m", "10s" ] + default/wait-deployment-ready: [ "35m", "10s" ] + default/wait-loadbalancer-ready: [ "5m", "30s" ] diff --git a/test/e2e/config/ck8s-docker.yaml b/test/e2e/config/ck8s-docker.yaml index 8dba00e0..a5175491 100644 --- a/test/e2e/config/ck8s-docker.yaml +++ b/test/e2e/config/ck8s-docker.yaml @@ -15,8 +15,8 @@ providers: - name: cluster-api type: CoreProvider versions: - - name: v1.6.2 - value: https://github.com/kubernetes-sigs/cluster-api/releases/download/v1.6.2/core-components.yaml + - name: v1.8.4 + value: https://github.com/kubernetes-sigs/cluster-api/releases/download/v1.8.4/core-components.yaml type: url files: - sourcePath: "../data/shared/v1beta1/metadata.yaml" @@ -28,8 +28,8 @@ providers: versions: # By default, will use the latest version defined in ../data/shared/v1beta1/metadata.yaml # to init the management cluster - - name: v1.6.2 # used during e2e-test - value: https://github.com/kubernetes-sigs/cluster-api/releases/download/v1.6.2/infrastructure-components-development.yaml + - name: v1.8.4 # used during e2e-test + value: https://github.com/kubernetes-sigs/cluster-api/releases/download/v1.8.4/infrastructure-components-development.yaml type: url files: - sourcePath: "../data/shared/v1beta1/metadata.yaml" diff --git a/test/e2e/create_test.go b/test/e2e/create_test.go index 81dd2b8f..41c0e762 100644 --- a/test/e2e/create_test.go +++ b/test/e2e/create_test.go @@ -48,7 +48,7 @@ var _ = Describe("Workload cluster creation", func() { Expect(e2eConfig.Variables).To(HaveKey(KubernetesVersion)) clusterName = fmt.Sprintf("capick8s-create-%s", util.RandomString(6)) - infrastructureProvider = "docker" + infrastructureProvider = clusterctl.DefaultInfrastructureProvider // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) diff --git a/test/e2e/data/infrastructure-aws/cluster-template.yaml b/test/e2e/data/infrastructure-aws/cluster-template.yaml new file mode 100644 index 00000000..e0a96714 --- /dev/null +++ b/test/e2e/data/infrastructure-aws/cluster-template.yaml @@ -0,0 +1,315 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: ${CLUSTER_NAME} + labels: + ccm: external +spec: + clusterNetwork: + pods: + cidrBlocks: + - 10.1.0.0/16 + services: + cidrBlocks: + - 10.152.183.0/24 + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 + kind: CK8sControlPlane + name: ${CLUSTER_NAME}-control-plane + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 + kind: AWSCluster + name: ${CLUSTER_NAME} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 +kind: AWSCluster +metadata: + name: ${CLUSTER_NAME} +spec: + region: ${AWS_REGION} + sshKeyName: ${AWS_SSH_KEY_NAME} + bastion: + enabled: ${AWS_CREATE_BASTION} + controlPlaneLoadBalancer: + healthCheckProtocol: TCP + network: + cni: + cniIngressRules: + - description: microcluster + protocol: tcp + toPort: 2380 +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta2 +kind: CK8sControlPlane +metadata: + name: ${CLUSTER_NAME}-control-plane + namespace: default +spec: + machineTemplate: + infrastructureTemplate: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 + kind: AWSMachineTemplate + name: ${CLUSTER_NAME}-control-plane + spec: + nodeName: "{{ ds.meta_data.local_hostname }}" + channel: "${KUBERNETES_VERSION}-classic/edge" + controlPlane: + cloudProvider: external + replicas: ${CONTROL_PLANE_MACHINE_COUNT} + version: ${KUBERNETES_VERSION} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 +kind: AWSMachineTemplate +metadata: + name: ${CLUSTER_NAME}-control-plane +spec: + template: + spec: + ami: + id: ${AWS_AMI_ID} + iamInstanceProfile: control-plane.cluster-api-provider-aws.sigs.k8s.io + instanceType: ${AWS_CONTROL_PLANE_INSTANCE_TYPE} + publicIP: ${AWS_PUBLIC_IP} + sshKeyName: ${AWS_SSH_KEY_NAME} + rootVolume: + size: ${AWS_CONTROL_PLANE_ROOT_VOLUME_SIZE} +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + name: ${CLUSTER_NAME}-worker-md-0 +spec: + clusterName: ${CLUSTER_NAME} + replicas: ${WORKER_MACHINE_COUNT} + selector: + matchLabels: + cluster.x-k8s.io/cluster-name: ${CLUSTER_NAME} + template: + spec: + version: ${KUBERNETES_VERSION} + clusterName: ${CLUSTER_NAME} + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 + kind: CK8sConfigTemplate + name: ${CLUSTER_NAME}-md-0 + infrastructureRef: + name: "${CLUSTER_NAME}-md-0" + apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 + kind: AWSMachineTemplate +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta2 +kind: AWSMachineTemplate +metadata: + name: ${CLUSTER_NAME}-md-0 +spec: + template: + spec: + ami: + id: ${AWS_AMI_ID} + iamInstanceProfile: nodes.cluster-api-provider-aws.sigs.k8s.io + instanceType: ${AWS_NODE_INSTANCE_TYPE} + publicIP: ${AWS_PUBLIC_IP} + sshKeyName: ${AWS_SSH_KEY_NAME} + rootVolume: + size: ${AWS_NODE_ROOT_VOLUME_SIZE} +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 +kind: CK8sConfigTemplate +metadata: + name: ${CLUSTER_NAME}-md-0 +spec: + template: + spec: + nodeName: "{{ ds.meta_data.local_hostname }}" + channel: "${KUBERNETES_VERSION}-classic/edge" +--- +apiVersion: addons.cluster.x-k8s.io/v1beta1 +kind: ClusterResourceSet +metadata: + name: crs-ccm +spec: + clusterSelector: + matchLabels: + ccm: external + resources: + - kind: ConfigMap + name: cloud-controller-manager-addon + strategy: ApplyOnce +--- +apiVersion: v1 +data: + aws-ccm-external.yaml: | + --- + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: aws-cloud-controller-manager + namespace: kube-system + labels: + k8s-app: aws-cloud-controller-manager + spec: + selector: + matchLabels: + k8s-app: aws-cloud-controller-manager + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + k8s-app: aws-cloud-controller-manager + spec: + nodeSelector: + node-role.kubernetes.io/control-plane: "" + tolerations: + - key: node.cloudprovider.kubernetes.io/uninitialized + value: "true" + effect: NoSchedule + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists + serviceAccountName: cloud-controller-manager + containers: + - name: aws-cloud-controller-manager + image: ${AWS_CCM_IMAGE} + args: + - --v=2 + - --cloud-provider=aws + - --use-service-account-credentials=true + - --configure-cloud-routes=false + resources: + requests: + cpu: 200m + hostNetwork: true + --- + apiVersion: v1 + kind: ServiceAccount + metadata: + name: cloud-controller-manager + namespace: kube-system + --- + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + name: cloud-controller-manager:apiserver-authentication-reader + namespace: kube-system + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: extension-apiserver-authentication-reader + subjects: + - apiGroup: "" + kind: ServiceAccount + name: cloud-controller-manager + namespace: kube-system + --- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + name: system:cloud-controller-manager + rules: + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update + - apiGroups: + - "" + resources: + - nodes + verbs: + - '*' + - apiGroups: + - "" + resources: + - nodes/status + verbs: + - patch + - apiGroups: + - "" + resources: + - services + verbs: + - list + - patch + - update + - watch + - apiGroups: + - "" + resources: + - services/status + verbs: + - list + - patch + - update + - watch + - apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - get + - list + - watch + - apiGroups: + - "" + resources: + - persistentvolumes + verbs: + - get + - list + - update + - watch + - apiGroups: + - "" + resources: + - endpoints + verbs: + - create + - get + - list + - watch + - update + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - list + - watch + - update + - apiGroups: + - "" + resources: + - serviceaccounts/token + verbs: + - create + --- + kind: ClusterRoleBinding + apiVersion: rbac.authorization.k8s.io/v1 + metadata: + name: system:cloud-controller-manager + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:cloud-controller-manager + subjects: + - apiGroup: "" + kind: ServiceAccount + name: cloud-controller-manager + namespace: kube-system +kind: ConfigMap +metadata: + name: cloud-controller-manager-addon diff --git a/test/e2e/data/shared/v1beta1/metadata.yaml b/test/e2e/data/shared/v1beta1/metadata.yaml index 92b9968a..b7d38077 100644 --- a/test/e2e/data/shared/v1beta1/metadata.yaml +++ b/test/e2e/data/shared/v1beta1/metadata.yaml @@ -1,9 +1,12 @@ -# maps release series of major.minor to cluster-api contract version, -# update this file only when you update the version for cluster-api -# CoreProvider and docker InfrastructureProvider in test/e2e/config/k3s-docker.yaml apiVersion: clusterctl.cluster.x-k8s.io/v1alpha3 kind: Metadata releaseSeries: + - major: 1 + minor: 8 + contract: v1beta1 + - major: 1 + minor: 7 + contract: v1beta1 - major: 1 minor: 6 contract: v1beta1 diff --git a/test/e2e/data/shared/v1beta1_aws/metadata.yaml b/test/e2e/data/shared/v1beta1_aws/metadata.yaml new file mode 100644 index 00000000..8e288cef --- /dev/null +++ b/test/e2e/data/shared/v1beta1_aws/metadata.yaml @@ -0,0 +1,5 @@ +apiVersion: clusterctl.cluster.x-k8s.io/v1alpha3 +releaseSeries: + - major: 2 + minor: 6 + contract: v1beta1 diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 23a521b7..4f3284d1 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -525,7 +525,7 @@ type WaitForControlPlaneAndMachinesReadyInput struct { ControlPlane *controlplanev1.CK8sControlPlane } -// WaitForControlPlaneAndMachinesReady waits for a KThreeControlPlane object to be ready (all the machine provisioned and one node ready). +// WaitForControlPlaneAndMachinesReady waits for a CK8sControlPlane object to be ready (all the machine provisioned and one node ready). func WaitForControlPlaneAndMachinesReady(ctx context.Context, input WaitForControlPlaneAndMachinesReadyInput, intervals ...interface{}) { Expect(ctx).NotTo(BeNil(), "ctx is required for WaitForControlPlaneReady") Expect(input.GetLister).ToNot(BeNil(), "Invalid argument. input.GetLister can't be nil when calling WaitForControlPlaneReady") diff --git a/test/e2e/kcp_remediation_test.go b/test/e2e/kcp_remediation_test.go index 881f1907..3c3b220e 100644 --- a/test/e2e/kcp_remediation_test.go +++ b/test/e2e/kcp_remediation_test.go @@ -23,9 +23,16 @@ import ( . "github.com/onsi/ginkgo/v2" "k8s.io/utils/ptr" capi_e2e "sigs.k8s.io/cluster-api/test/e2e" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" ) var _ = Describe("When testing KCP remediation", func() { + // See kubernetes.slack.com/archives/C8TSNPY4T/p1680525266510109 + // And github.com/kubernetes-sigs/cluster-api-provider-aws/issues/4198 + if clusterctl.DefaultInfrastructureProvider == "aws" { + Skip("Skipping KCP remediation test for AWS") + } + capi_e2e.KCPRemediationSpec(ctx, func() capi_e2e.KCPRemediationSpecInput { return capi_e2e.KCPRemediationSpecInput{ E2EConfig: e2eConfig, @@ -33,6 +40,7 @@ var _ = Describe("When testing KCP remediation", func() { BootstrapClusterProxy: bootstrapClusterProxy, ArtifactFolder: artifactFolder, SkipCleanup: skipCleanup, - InfrastructureProvider: ptr.To("docker")} + InfrastructureProvider: ptr.To(clusterctl.DefaultInfrastructureProvider), + } }) }) diff --git a/test/e2e/md_remediation_test.go b/test/e2e/md_remediation_test.go index 4f707ba2..5f40620e 100644 --- a/test/e2e/md_remediation_test.go +++ b/test/e2e/md_remediation_test.go @@ -34,6 +34,12 @@ import ( ) var _ = Describe("When testing MachineDeployment remediation", func() { + // See kubernetes.slack.com/archives/C8TSNPY4T/p1680525266510109 + // And github.com/kubernetes-sigs/cluster-api-provider-aws/issues/4198 + if clusterctl.DefaultInfrastructureProvider == "aws" { + Skip("Skipping KCP remediation test for AWS") + } + var ( ctx = context.TODO() specName = "machine-deployment-remediation" @@ -49,7 +55,7 @@ var _ = Describe("When testing MachineDeployment remediation", func() { Expect(e2eConfig.Variables).To(HaveKey(KubernetesVersion)) clusterName = fmt.Sprintf("capick8s-md-remediation-%s", util.RandomString(6)) - infrastructureProvider = "docker" + infrastructureProvider = clusterctl.DefaultInfrastructureProvider // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) diff --git a/test/e2e/node_scale_test.go b/test/e2e/node_scale_test.go index e295b450..4652e2f5 100644 --- a/test/e2e/node_scale_test.go +++ b/test/e2e/node_scale_test.go @@ -48,7 +48,7 @@ var _ = Describe("Workload cluster scaling", func() { Expect(e2eConfig.Variables).To(HaveKey(KubernetesVersion)) clusterName = fmt.Sprintf("capick8s-node-scale-%s", util.RandomString(6)) - infrastructureProvider = "docker" + infrastructureProvider = clusterctl.DefaultInfrastructureProvider // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) diff --git a/tilt-provider.yaml b/tilt-provider.yaml new file mode 100644 index 00000000..d715c79b --- /dev/null +++ b/tilt-provider.yaml @@ -0,0 +1,24 @@ +- name: ck8s-bootstrap + config: + context: bootstrap + image: ghcr.io/canonical/cluster-api-k8s/bootstrap-controller + live_reload_deps: + - main.go + - api + - controllers + - ../go.mod + - ../go.sum + - ../pkg + label: CABPCK +- name: ck8s-control-plane + config: + context: controlplane + image: ghcr.io/canonical/cluster-api-k8s/controlplane-controller + live_reload_deps: + - main.go + - api + - controllers + - ../go.mod + - ../go.sum + - ../pkg + label: CACPPCK From 05b035234cfc7a170a9575ca79de2fd53bdec5c6 Mon Sep 17 00:00:00 2001 From: Homayoon Alimohammadi Date: Thu, 24 Oct 2024 10:42:58 +0400 Subject: [PATCH 6/6] Add CP orchestrated in-place upgrade controller (#71) * Add CK8sControlPlane orchestrated in-place upgrade controller * Add E2E tests for CK8sControlPlane orchestrated in-place upgrades * Include e2e tests in the CI --- .github/workflows/e2e.yaml | 1 + controlplane/config/rbac/role.yaml | 19 ++ ...orchestrated_inplace_upgrade_controller.go | 301 ++++++++++++++++++ controlplane/main.go | 8 + pkg/upgrade/inplace/inplace.go | 44 +++ pkg/upgrade/inplace/interface.go | 19 ++ pkg/upgrade/inplace/lock.go | 192 +++++++++++ pkg/upgrade/inplace/mark.go | 102 ++++++ test/e2e/helpers.go | 110 ++++++- test/e2e/machine_deployment_test.go | 2 +- .../orchestrated_cp_in_place_upgrade_test.go | 108 +++++++ 11 files changed, 900 insertions(+), 6 deletions(-) create mode 100644 controlplane/controllers/orchestrated_inplace_upgrade_controller.go create mode 100644 pkg/upgrade/inplace/inplace.go create mode 100644 pkg/upgrade/inplace/interface.go create mode 100644 pkg/upgrade/inplace/lock.go create mode 100644 pkg/upgrade/inplace/mark.go create mode 100644 test/e2e/orchestrated_cp_in_place_upgrade_test.go diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 2fbbd3a3..b4ec01ae 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -65,6 +65,7 @@ jobs: - "Workload cluster creation" - "Workload cluster scaling" - "Workload cluster upgrade" + - "Orchestrated In place upgrades" # TODO(ben): Remove once all tests are running stable. fail-fast: false steps: diff --git a/controlplane/config/rbac/role.yaml b/controlplane/config/rbac/role.yaml index ec2334e9..8795b487 100644 --- a/controlplane/config/rbac/role.yaml +++ b/controlplane/config/rbac/role.yaml @@ -48,6 +48,25 @@ rules: - patch - update - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machinesets + - machinesets/status + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + verbs: + - create + - delete + - get + - list + - watch - apiGroups: - "" resources: diff --git a/controlplane/controllers/orchestrated_inplace_upgrade_controller.go b/controlplane/controllers/orchestrated_inplace_upgrade_controller.go new file mode 100644 index 00000000..3b04d09f --- /dev/null +++ b/controlplane/controllers/orchestrated_inplace_upgrade_controller.go @@ -0,0 +1,301 @@ +package controllers + +import ( + "context" + "fmt" + "time" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/record" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/collections" + "sigs.k8s.io/cluster-api/util/patch" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + bootstrapv1 "github.com/canonical/cluster-api-k8s/bootstrap/api/v1beta2" + controlplanev1 "github.com/canonical/cluster-api-k8s/controlplane/api/v1beta2" + "github.com/canonical/cluster-api-k8s/pkg/ck8s" + "github.com/canonical/cluster-api-k8s/pkg/trace" + "github.com/canonical/cluster-api-k8s/pkg/upgrade/inplace" +) + +// OrchestratedInPlaceUpgradeController reconciles a CK8sControlPlane object and manages the in-place upgrades. +type OrchestratedInPlaceUpgradeController struct { + scheme *runtime.Scheme + recorder record.EventRecorder + machineGetter inplace.MachineGetter + + client.Client + Log logr.Logger + lock inplace.UpgradeLock +} + +// OrchestratedInPlaceUpgradeScope is a struct that holds the context of the upgrade process. +type OrchestratedInPlaceUpgradeScope struct { + cluster *clusterv1.Cluster + ck8sControlPlane *controlplanev1.CK8sControlPlane + ck8sPatcher inplace.Patcher + upgradeTo string + ownedMachines collections.Machines +} + +// SetupWithManager sets up the controller with the Manager. +func (r *OrchestratedInPlaceUpgradeController) SetupWithManager(mgr ctrl.Manager) error { + r.scheme = mgr.GetScheme() + r.recorder = mgr.GetEventRecorderFor("ck8s-cp-orchestrated-inplace-upgrade-controller") + r.machineGetter = &ck8s.Management{ + Client: r.Client, + } + r.lock = inplace.NewUpgradeLock(r.Client) + + if err := ctrl.NewControllerManagedBy(mgr). + For(&controlplanev1.CK8sControlPlane{}). + Owns(&clusterv1.Machine{}). + Complete(r); err != nil { + return fmt.Errorf("failed to get new controller builder: %w", err) + } + + return nil +} + +// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;create;delete;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinesets;machinesets/status,verbs=get;list;watch +// +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch + +// Reconcile handles the reconciliation of a CK8sControlPlane object. +func (r *OrchestratedInPlaceUpgradeController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + traceID := trace.NewID() + log := r.Log.WithValues("orchestrated_inplace_upgrade", req.NamespacedName, "trace_id", traceID) + log.V(1).Info("Reconciliation started...") + + ck8sCP := &controlplanev1.CK8sControlPlane{} + if err := r.Get(ctx, req.NamespacedName, ck8sCP); err != nil { + if apierrors.IsNotFound(err) { + log.V(1).Info("CK8sControlPlane resource not found. Ignoring since the object must be deleted.") + return ctrl.Result{}, nil + } + return ctrl.Result{}, fmt.Errorf("failed to get CK8sControlPlane: %w", err) + } + + if inplace.GetUpgradeInstructions(ck8sCP) == "" { + log.V(1).Info("CK8sControlPlane has no upgrade instructions, skipping reconciliation") + return ctrl.Result{}, nil + } + + if isDeleted(ck8sCP) { + log.V(1).Info("CK8sControlPlane is being deleted, skipping reconciliation") + return ctrl.Result{}, nil + } + + if !r.machinesAreReady(ck8sCP) { + log.V(1).Info("Machines are not ready, requeuing...") + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + + scope, err := r.createScope(ctx, ck8sCP) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to create scope: %w", err) + } + + upgradingMachine, err := r.lock.IsLocked(ctx, scope.cluster) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to check if upgrade is locked: %w", err) + } + + // Upgrade is locked and a machine is already upgrading + if upgradingMachine != nil { + // NOTE(Hue): Maybe none of the `upgrade-to` and `release` annotations are set on the machine. + // If that's the case, the machine will never get upgraded. + // We consider this a stale lock and unlock the upgrade process. + if inplace.GetUpgradeInstructions(upgradingMachine) != scope.upgradeTo { + log.V(1).Info("Machine does not have expected upgrade instructions, unlocking...", "machine", upgradingMachine.Name) + if err := r.lock.Unlock(ctx, scope.cluster); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to unlock upgrade: %w", err) + } + return ctrl.Result{Requeue: true}, nil + } + + if inplace.IsUpgraded(upgradingMachine, scope.upgradeTo) { + if err := r.lock.Unlock(ctx, scope.cluster); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to unlock upgrade: %w", err) + } + + return ctrl.Result{Requeue: true}, nil + } + + if inplace.IsMachineUpgradeFailed(upgradingMachine) { + log.Info("Machine upgrade failed for machine, requeuing...", "machine", upgradingMachine.Name) + if err := r.markUpgradeFailed(ctx, scope, upgradingMachine); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to mark upgrade as failed: %w", err) + } + + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + + log.V(1).Info("Upgrade is locked, a machine is upgrading, requeuing...", "machine", upgradingMachine.Name) + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + + // Check if there are machines to upgrade + var upgradedMachines int + for _, m := range scope.ownedMachines { + if inplace.IsUpgraded(m, scope.upgradeTo) { + log.V(1).Info("Machine is already upgraded", "machine", m.Name) + upgradedMachines++ + continue + } + + if isDeleted(m) { + log.V(1).Info("Machine is being deleted, requeuing...", "machine", m.Name) + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + + // Lock the process for the machine and start the upgrade + if err := r.lock.Lock(ctx, scope.cluster, m); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to lock upgrade for machine %q: %w", m.Name, err) + } + + if err := r.markMachineToUpgrade(ctx, scope, m); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to mark machine to upgrade: %w", err) + } + + log.V(1).Info("Machine marked for upgrade", "machine", m.Name) + + if err := r.markUpgradeInProgress(ctx, scope, m); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to mark upgrade as in-progress: %w", err) + } + + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + + if upgradedMachines == len(scope.ownedMachines) { + if err := r.markUpgradeDone(ctx, scope); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to mark upgrade as done: %w", err) + } + + log.V(1).Info("All machines are upgraded") + return ctrl.Result{}, nil + } + + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil +} + +// markUpgradeInProgress annotates the CK8sControlPlane with in-place upgrade in-progress. +func (r *OrchestratedInPlaceUpgradeController) markUpgradeInProgress(ctx context.Context, scope *OrchestratedInPlaceUpgradeScope, upgradingMachine *clusterv1.Machine) error { + if err := inplace.MarkUpgradeInProgress(ctx, scope.ck8sControlPlane, scope.upgradeTo, scope.ck8sPatcher); err != nil { + return fmt.Errorf("failed to mark object with upgrade in-progress: %w", err) + } + + r.recorder.Eventf( + scope.ck8sControlPlane, + corev1.EventTypeNormal, + bootstrapv1.InPlaceUpgradeInProgressEvent, + "In-place upgrade is in-progress for %q", + upgradingMachine.Name, + ) + return nil +} + +// markUpgradeDone annotates the CK8sControlPlane with in-place upgrade done. +func (r *OrchestratedInPlaceUpgradeController) markUpgradeDone(ctx context.Context, scope *OrchestratedInPlaceUpgradeScope) error { + if err := inplace.MarkUpgradeDone(ctx, scope.ck8sControlPlane, scope.upgradeTo, scope.ck8sPatcher); err != nil { + return fmt.Errorf("failed to mark object with upgrade done: %w", err) + } + + r.recorder.Eventf( + scope.ck8sControlPlane, + corev1.EventTypeNormal, + bootstrapv1.InPlaceUpgradeDoneEvent, + "In-place upgrade is done", + ) + return nil +} + +// markUpgradeFailed annotates the CK8sControlPlane with in-place upgrade failed. +func (r *OrchestratedInPlaceUpgradeController) markUpgradeFailed(ctx context.Context, scope *OrchestratedInPlaceUpgradeScope, failedM *clusterv1.Machine) error { + if err := inplace.MarkUpgradeFailed(ctx, scope.ck8sControlPlane, scope.ck8sPatcher); err != nil { + return fmt.Errorf("failed to mark object with upgrade failed: %w", err) + } + + r.recorder.Eventf( + scope.ck8sControlPlane, + corev1.EventTypeWarning, + bootstrapv1.InPlaceUpgradeFailedEvent, + "In-place upgrade failed for machine %q.", + failedM.Name, + ) + return nil +} + +// createScope creates a new OrchestratedInPlaceUpgradeScope. +func (r *OrchestratedInPlaceUpgradeController) createScope(ctx context.Context, ck8sCP *controlplanev1.CK8sControlPlane) (*OrchestratedInPlaceUpgradeScope, error) { + patchHelper, err := patch.NewHelper(ck8sCP, r.Client) + if err != nil { + return nil, fmt.Errorf("failed to create new patch helper: %w", err) + } + + cluster, err := util.GetOwnerCluster(ctx, r.Client, ck8sCP.ObjectMeta) + if err != nil { + return nil, fmt.Errorf("failed to get cluster: %w", err) + } + + ownedMachines, err := r.getControlPlaneMachines(ctx, cluster) + if err != nil { + return nil, fmt.Errorf("failed to get owned machines: %w", err) + } + + return &OrchestratedInPlaceUpgradeScope{ + cluster: cluster, + ck8sControlPlane: ck8sCP, + upgradeTo: inplace.GetUpgradeInstructions(ck8sCP), + ownedMachines: ownedMachines, + ck8sPatcher: patchHelper, + }, nil +} + +// getControlPlaneMachines gets the control plane machines of the cluster. +func (r *OrchestratedInPlaceUpgradeController) getControlPlaneMachines(ctx context.Context, cluster *clusterv1.Cluster) (collections.Machines, error) { + ownedMachines, err := r.machineGetter.GetMachinesForCluster(ctx, client.ObjectKeyFromObject(cluster), collections.ControlPlaneMachines(cluster.Name)) + if err != nil { + return nil, fmt.Errorf("failed to get cluster machines: %w", err) + } + + return ownedMachines, nil +} + +// markMachineToUpgrade marks the machine to upgrade. +func (r *OrchestratedInPlaceUpgradeController) markMachineToUpgrade(ctx context.Context, scope *OrchestratedInPlaceUpgradeScope, m *clusterv1.Machine) error { + if err := inplace.MarkMachineToUpgrade(ctx, m, scope.upgradeTo, r.Client); err != nil { + return fmt.Errorf("failed to mark machine to inplace upgrade: %w", err) + } + + r.recorder.Eventf( + scope.ck8sControlPlane, + corev1.EventTypeNormal, + bootstrapv1.InPlaceUpgradeInProgressEvent, + "Machine %q is upgrading to %q", + m.Name, + scope.upgradeTo, + ) + + return nil +} + +func (r *OrchestratedInPlaceUpgradeController) machinesAreReady(ck8sCP *controlplanev1.CK8sControlPlane) bool { + if ck8sCP == nil || ck8sCP.Spec.Replicas == nil { + return false + } + return ck8sCP.Status.ReadyReplicas == *ck8sCP.Spec.Replicas +} + +// isDeleted returns true if the object is being deleted. +func isDeleted(obj client.Object) bool { + return !obj.GetDeletionTimestamp().IsZero() +} diff --git a/controlplane/main.go b/controlplane/main.go index 06dd60d1..a4907c98 100644 --- a/controlplane/main.go +++ b/controlplane/main.go @@ -116,6 +116,14 @@ func main() { os.Exit(1) } + inplaceUpgradeLogger := ctrl.Log.WithName("controllers").WithName("OrchestratedInPlaceUpgrade") + if err = (&controllers.OrchestratedInPlaceUpgradeController{ + Client: mgr.GetClient(), + Log: inplaceUpgradeLogger, + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "failed to create controller", "controller", "OrchestratedInPlaceUpgrade") + } + if os.Getenv("ENABLE_WEBHOOKS") != "false" { if err = (&controlplanev1.CK8sControlPlane{}).SetupWebhookWithManager(mgr); err != nil { setupLog.Error(err, "unable to create webhook", "webhook", "CK8sControlPlane") diff --git a/pkg/upgrade/inplace/inplace.go b/pkg/upgrade/inplace/inplace.go new file mode 100644 index 00000000..e40ced90 --- /dev/null +++ b/pkg/upgrade/inplace/inplace.go @@ -0,0 +1,44 @@ +package inplace + +import ( + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/controller-runtime/pkg/client" + + bootstrapv1 "github.com/canonical/cluster-api-k8s/bootstrap/api/v1beta2" +) + +// IsUpgraded checks if the object is already upgraded to the specified release. +func IsUpgraded(obj client.Object, release string) bool { + return obj.GetAnnotations()[bootstrapv1.InPlaceUpgradeReleaseAnnotation] == release +} + +// GetUpgradeInstructions returns the in-place upgrade instructions set on the object. +func GetUpgradeInstructions(obj client.Object) string { + // NOTE(Hue): The reason we are checking the `release` annotation as well is that we want to make sure + // we upgrade the new machines that joined after the initial upgrade process. + // The `upgrade-to` overwrites the `release` annotation, because we might have both in case + // the user decides to do another in-place upgrade after a successful one. + upgradeTo := obj.GetAnnotations()[bootstrapv1.InPlaceUpgradeReleaseAnnotation] + if to, ok := obj.GetAnnotations()[bootstrapv1.InPlaceUpgradeToAnnotation]; ok { + upgradeTo = to + } + + return upgradeTo +} + +// IsMachineUpgradeFailed checks if the machine upgrade failed. +// The upgrade might be getting retried at the moment of the check. This check insures that the upgrade failed *at some point*. +func IsMachineUpgradeFailed(m *clusterv1.Machine) bool { + return m.Annotations[bootstrapv1.InPlaceUpgradeLastFailedAttemptAtAnnotation] != "" +} + +// IsMachineUpgrading checks if the object is upgrading. +func IsMachineUpgrading(m *clusterv1.Machine) bool { + // NOTE(Hue): We can't easily generalize this function to check for all objects. + // Generally speaking, the `status == in-progress` should indicate that the object is upgrading. + // But from the orchestrated upgrade perspective, we need to also check the `upgrade-to` annotation + // so that we know if the single machine inplace upgrade + // controller is going to handle the upgrade process, hence "in-progress". + return m.Annotations[bootstrapv1.InPlaceUpgradeStatusAnnotation] == bootstrapv1.InPlaceUpgradeInProgressStatus || + m.Annotations[bootstrapv1.InPlaceUpgradeToAnnotation] != "" +} diff --git a/pkg/upgrade/inplace/interface.go b/pkg/upgrade/inplace/interface.go new file mode 100644 index 00000000..3d215d1a --- /dev/null +++ b/pkg/upgrade/inplace/interface.go @@ -0,0 +1,19 @@ +package inplace + +import ( + "context" + + "sigs.k8s.io/cluster-api/util/collections" + "sigs.k8s.io/cluster-api/util/patch" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// MachineGetter is an interface that defines the methods used to get machines. +type MachineGetter interface { + GetMachinesForCluster(ctx context.Context, cluster client.ObjectKey, filters ...collections.Func) (collections.Machines, error) +} + +// Patcher is an interface that knows how to patch an object. +type Patcher interface { + Patch(ctx context.Context, obj client.Object, opts ...patch.Option) error +} diff --git a/pkg/upgrade/inplace/lock.go b/pkg/upgrade/inplace/lock.go new file mode 100644 index 00000000..eb4bd499 --- /dev/null +++ b/pkg/upgrade/inplace/lock.go @@ -0,0 +1,192 @@ +package inplace + +import ( + "context" + "encoding/json" + "errors" + "fmt" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// UpgradeLock is an interface that defines the methods used to lock and unlock the inplace upgrade process. +type UpgradeLock interface { + // IsLocked checks if the upgrade process is locked and (if locked) returns the machine that the process is locked for. + IsLocked(ctx context.Context, cluster *clusterv1.Cluster) (*clusterv1.Machine, error) + // Lock is a non-blocking call that tries to lock the upgrade process for the given machine. + Lock(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) error + // Unlock unlocks the upgrade process. + Unlock(ctx context.Context, cluster *clusterv1.Cluster) error +} + +const ( + lockConfigMapNameSuffix = "cp-inplace-upgrade-lock" + lockInformationKey = "lock-information" +) + +func NewUpgradeLock(c client.Client) *upgradeLock { + return &upgradeLock{ + c: c, + semaphore: &semaphore{}, + } +} + +type upgradeLock struct { + c client.Client + semaphore *semaphore +} + +type semaphore struct { + configMap *corev1.ConfigMap +} + +func newSemaphore() *semaphore { + return &semaphore{configMap: &corev1.ConfigMap{}} +} + +func (s *semaphore) getLockInfo() (*lockInformation, error) { + if s.configMap == nil { + return nil, errors.New("configmap is nil") + } + if s.configMap.Data == nil { + return nil, errors.New("configmap data is nil") + } + liStr, ok := s.configMap.Data[lockInformationKey] + if !ok { + return nil, errors.New("lock information key not found") + } + + li := &lockInformation{} + if err := json.Unmarshal([]byte(liStr), li); err != nil { + return nil, fmt.Errorf("failed to unmarshal lock information: %w", err) + } + + return li, nil +} + +func (s *semaphore) setLockInfo(li lockInformation) error { + if s.configMap == nil { + s.configMap = &corev1.ConfigMap{} + } + if s.configMap.Data == nil { + s.configMap.Data = make(map[string]string) + } + + liStr, err := json.Marshal(li) + if err != nil { + return fmt.Errorf("failed to marshal lock information: %w", err) + } + + s.configMap.Data[lockInformationKey] = string(liStr) + return nil +} + +func (s *semaphore) setMetadata(cluster *clusterv1.Cluster) { + if s.configMap == nil { + s.configMap = &corev1.ConfigMap{} + } + + s.configMap.ObjectMeta = metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: configMapName(cluster.Name), + Labels: map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: clusterv1.GroupVersion.String(), + Kind: clusterv1.ClusterKind, + Name: cluster.Name, + UID: cluster.UID, + }, + }, + } +} + +type lockInformation struct { + MachineName string `json:"machineName"` + MachineNamespace string `json:"machineNamespace"` +} + +func configMapName(clusterName string) string { + return fmt.Sprintf("%s-%s", clusterName, lockConfigMapNameSuffix) +} + +// IsLocked checks if the upgrade process is locked and (if locked) returns the machine that the process is locked for. +func (l *upgradeLock) IsLocked(ctx context.Context, cluster *clusterv1.Cluster) (*clusterv1.Machine, error) { + l.semaphore = newSemaphore() + name := configMapName(cluster.Name) + if err := l.c.Get(ctx, client.ObjectKey{Name: name, Namespace: cluster.Namespace}, l.semaphore.configMap); err != nil { + if apierrors.IsNotFound(err) { + return nil, nil + } + return nil, fmt.Errorf("failed to get configmap %q: %w", name, err) + } + + li, err := l.semaphore.getLockInfo() + if err != nil { + return nil, fmt.Errorf("failed to get lock information: %w", err) + } + + machine := &clusterv1.Machine{} + if err := l.c.Get(ctx, client.ObjectKey{Name: li.MachineName, Namespace: li.MachineNamespace}, machine); err != nil { + // must be a stale lock from a deleted machine, unlock. + if apierrors.IsNotFound(err) { + if err := l.Unlock(ctx, cluster); err != nil { + return nil, fmt.Errorf("failed to unlock: %w", err) + } + return nil, nil + } + return nil, fmt.Errorf("failed to get machine %q: %w", li.MachineName, err) + } + + return machine, nil +} + +// Unlock unlocks the upgrade process. +func (l *upgradeLock) Unlock(ctx context.Context, cluster *clusterv1.Cluster) error { + cm := &corev1.ConfigMap{} + name := configMapName(cluster.Name) + if err := l.c.Get(ctx, client.ObjectKey{Name: name, Namespace: cluster.Namespace}, cm); err != nil { + // if the configmap is not found, it means the lock is already released. + if apierrors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to get configmap %q: %w", name, err) + } + + if err := l.c.Delete(ctx, cm); err != nil { + // if the configmap is not found, it means the lock is already released. + if apierrors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to delete configmap %q: %w", name, err) + } + + return nil +} + +// Lock locks the upgrade process for the given machine. +func (l *upgradeLock) Lock(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) error { + l.semaphore = newSemaphore() + li := lockInformation{ + MachineName: machine.Name, + MachineNamespace: machine.Namespace, + } + if err := l.semaphore.setLockInfo(li); err != nil { + return fmt.Errorf("failed to set lock information: %w", err) + } + l.semaphore.setMetadata(cluster) + + if err := l.c.Create(ctx, l.semaphore.configMap); err != nil { + return fmt.Errorf("failed to create configmap: %w", err) + } + + return nil +} + +var _ UpgradeLock = &upgradeLock{} diff --git a/pkg/upgrade/inplace/mark.go b/pkg/upgrade/inplace/mark.go new file mode 100644 index 00000000..c6281b0a --- /dev/null +++ b/pkg/upgrade/inplace/mark.go @@ -0,0 +1,102 @@ +package inplace + +import ( + "context" + "fmt" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/patch" + "sigs.k8s.io/controller-runtime/pkg/client" + + bootstrapv1 "github.com/canonical/cluster-api-k8s/bootstrap/api/v1beta2" +) + +// MarkMachineToUpgrade marks the machine to upgrade. +func MarkMachineToUpgrade(ctx context.Context, m *clusterv1.Machine, to string, c client.Client) error { + patchHelper, err := patch.NewHelper(m, c) + if err != nil { + return fmt.Errorf("failed to create new patch helper: %w", err) + } + + if m.Annotations == nil { + m.Annotations = make(map[string]string) + } + + // clean up + delete(m.Annotations, bootstrapv1.InPlaceUpgradeReleaseAnnotation) + delete(m.Annotations, bootstrapv1.InPlaceUpgradeStatusAnnotation) + delete(m.Annotations, bootstrapv1.InPlaceUpgradeChangeIDAnnotation) + delete(m.Annotations, bootstrapv1.InPlaceUpgradeLastFailedAttemptAtAnnotation) + + m.Annotations[bootstrapv1.InPlaceUpgradeToAnnotation] = to + + if err := patchHelper.Patch(ctx, m); err != nil { + return fmt.Errorf("failed to patch: %w", err) + } + + return nil +} + +// MarkUpgradeFailed annotates the object with in-place upgrade failed. +func MarkUpgradeFailed(ctx context.Context, obj client.Object, patcher Patcher) error { + annotations := obj.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + + // clean up + delete(annotations, bootstrapv1.InPlaceUpgradeReleaseAnnotation) + + annotations[bootstrapv1.InPlaceUpgradeStatusAnnotation] = bootstrapv1.InPlaceUpgradeFailedStatus + obj.SetAnnotations(annotations) + + if err := patcher.Patch(ctx, obj); err != nil { + return fmt.Errorf("failed to patch: %w", err) + } + + return nil +} + +// MarkUpgradeInProgress annotates the object with in-place upgrade in-progress. +func MarkUpgradeInProgress(ctx context.Context, obj client.Object, to string, patcher Patcher) error { + annotations := obj.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + + // clean up + delete(annotations, bootstrapv1.InPlaceUpgradeReleaseAnnotation) + + annotations[bootstrapv1.InPlaceUpgradeStatusAnnotation] = bootstrapv1.InPlaceUpgradeInProgressStatus + annotations[bootstrapv1.InPlaceUpgradeToAnnotation] = to + + obj.SetAnnotations(annotations) + + if err := patcher.Patch(ctx, obj); err != nil { + return fmt.Errorf("failed to patch: %w", err) + } + + return nil +} + +// MarkUpgradeDone annotates the object with in-place upgrade done. +func MarkUpgradeDone(ctx context.Context, obj client.Object, to string, patcher Patcher) error { + annotations := obj.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + + // clean up + delete(annotations, bootstrapv1.InPlaceUpgradeToAnnotation) + + annotations[bootstrapv1.InPlaceUpgradeStatusAnnotation] = bootstrapv1.InPlaceUpgradeDoneStatus + annotations[bootstrapv1.InPlaceUpgradeReleaseAnnotation] = to + + obj.SetAnnotations(annotations) + + if err := patcher.Patch(ctx, obj); err != nil { + return fmt.Errorf("failed to patch: %w", err) + } + + return nil +} diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 4f3284d1..66738ff2 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -594,7 +594,7 @@ func ApplyInPlaceUpgradeAndWait(ctx context.Context, input ApplyInPlaceUpgradeAn Eventually(func() (bool, error) { if err := input.Getter.Get(ctx, client.ObjectKeyFromObject(input.Obj), input.DestinationObj); err != nil { - Byf("Failed to get the machine: %+v", err) + Byf("Failed to get the object: %+v", err) return false, err } @@ -728,20 +728,107 @@ func ApplyInPlaceUpgradeForMachineDeployment(ctx context.Context, input ApplyInP // Make sure all the machines are upgraded inClustersNamespaceListOption := client.InNamespace(input.Cluster.Namespace) - matchClusterListOption := client.MatchingLabels{ + belongsToMDListOption := client.MatchingLabels{ clusterv1.ClusterNameLabel: input.Cluster.Name, clusterv1.MachineDeploymentNameLabel: machineDeployment.Name, } - machineList := &clusterv1.MachineList{} + mdMachineList := &clusterv1.MachineList{} Eventually(func() error { - return input.Lister.List(ctx, machineList, inClustersNamespaceListOption, matchClusterListOption) + return input.Lister.List(ctx, mdMachineList, inClustersNamespaceListOption, belongsToMDListOption) }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Couldn't list machines for the machineDeployment %q", machineDeployment.Name) - for _, machine := range machineList.Items { + for _, machine := range mdMachineList.Items { Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeStatusAnnotation]).To(Equal(bootstrapv1.InPlaceUpgradeDoneStatus)) Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeReleaseAnnotation]).To(Equal(input.UpgradeOption)) } + + // Make sure other machines are not upgraded + allMachines := &clusterv1.MachineList{} + Eventually(func() error { + return input.Lister.List(ctx, allMachines, inClustersNamespaceListOption) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Couldn't list all machines") + + for _, machine := range allMachines.Items { + // skip the ones belong to the MD under test machines + if isMachineInList(machine, mdMachineList) { + continue + } + + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeToAnnotation]).To(BeEmpty()) + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeLastFailedAttemptAtAnnotation]).To(BeEmpty()) + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeChangeIDAnnotation]).To(BeEmpty()) + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeStatusAnnotation]).To(BeEmpty()) + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeReleaseAnnotation]).To(BeEmpty()) + } +} + +type ApplyInPlaceUpgradeForCK8sControlPlaneInput struct { + Lister framework.Lister + Getter framework.Getter + ClusterProxy framework.ClusterProxy + Cluster *clusterv1.Cluster + UpgradeOption string + WaitForUpgradeIntervals []interface{} +} + +func ApplyInPlaceUpgradeForCK8sControlPlane(ctx context.Context, input ApplyInPlaceUpgradeForCK8sControlPlaneInput) { + Expect(ctx).NotTo(BeNil()) + Expect(input.ClusterProxy).ToNot(BeNil()) + Expect(input.Cluster).ToNot(BeNil()) + Expect(input.UpgradeOption).ToNot(BeEmpty()) + + ck8sCP := GetCK8sControlPlaneByCluster(ctx, GetCK8sControlPlaneByClusterInput{ + Lister: input.Lister, + ClusterName: input.Cluster.Name, + Namespace: input.Cluster.Namespace, + }) + Expect(ck8sCP).ToNot(BeNil()) + + ApplyInPlaceUpgradeAndWait(ctx, ApplyInPlaceUpgradeAndWaitInput{ + Getter: input.Getter, + Obj: ck8sCP, + DestinationObj: &controlplanev1.CK8sControlPlane{}, + ClusterProxy: input.ClusterProxy, + UpgradeOption: input.UpgradeOption, + WaitForUpgradeIntervals: input.WaitForUpgradeIntervals, + }) + + // Make sure all the machines are upgraded + inClustersNamespaceListOption := client.InNamespace(input.Cluster.Namespace) + cpMatchLabelsListOption := client.MatchingLabels{ + clusterv1.ClusterNameLabel: input.Cluster.Name, + clusterv1.MachineControlPlaneLabel: "", + } + + cpMachineList := &clusterv1.MachineList{} + Eventually(func() error { + return input.Lister.List(ctx, cpMachineList, inClustersNamespaceListOption, cpMatchLabelsListOption) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Couldn't list machines for the CK8sControlPlane %q", ck8sCP.Name) + + for _, machine := range cpMachineList.Items { + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeStatusAnnotation]).To(Equal(bootstrapv1.InPlaceUpgradeDoneStatus)) + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeReleaseAnnotation]).To(Equal(input.UpgradeOption)) + } + + // Make sure other machines (non-cp ones) are not upgraded + allMachines := &clusterv1.MachineList{} + Eventually(func() error { + return input.Lister.List(ctx, allMachines, inClustersNamespaceListOption) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Couldn't list all machines") + + for _, machine := range allMachines.Items { + // skip the control plane machines + if isMachineInList(machine, cpMachineList) { + continue + } + + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeToAnnotation]).To(BeEmpty()) + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeLastFailedAttemptAtAnnotation]).To(BeEmpty()) + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeChangeIDAnnotation]).To(BeEmpty()) + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeStatusAnnotation]).To(BeEmpty()) + Expect(machine.Annotations[bootstrapv1.InPlaceUpgradeReleaseAnnotation]).To(BeEmpty()) + } } // UpgradeControlPlaneAndWaitForUpgradeInput is the input type for UpgradeControlPlaneAndWaitForUpgrade. @@ -877,3 +964,16 @@ func byClusterOptions(name, namespace string) []client.ListOption { }, } } + +func isMachineInList(machine clusterv1.Machine, list *clusterv1.MachineList) bool { + if list == nil { + return false + } + + for _, m := range list.Items { + if m.Name == machine.Name { + return true + } + } + return false +} diff --git a/test/e2e/machine_deployment_test.go b/test/e2e/machine_deployment_test.go index a728f0ae..2eaf4bda 100644 --- a/test/e2e/machine_deployment_test.go +++ b/test/e2e/machine_deployment_test.go @@ -96,7 +96,7 @@ var _ = Describe("Machine Deployment Orchestrated In place upgrades", func() { bootstrapProxyClient := bootstrapClusterProxy.GetClient() - By("Applying in place upgrade with local path for worker nodes") + By("Applying in place upgrade with local path for MachineDeployment object") ApplyInPlaceUpgradeForMachineDeployment(ctx, ApplyInPlaceUpgradeForMachineDeploymentInput{ Lister: bootstrapProxyClient, Getter: bootstrapProxyClient, diff --git a/test/e2e/orchestrated_cp_in_place_upgrade_test.go b/test/e2e/orchestrated_cp_in_place_upgrade_test.go new file mode 100644 index 00000000..88b6b198 --- /dev/null +++ b/test/e2e/orchestrated_cp_in_place_upgrade_test.go @@ -0,0 +1,108 @@ +//go:build e2e +// +build e2e + +/* +Copyright 2021 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "path/filepath" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" + "sigs.k8s.io/cluster-api/util" +) + +var _ = Describe("CK8sControlPlane Orchestrated In place upgrades", func() { + var ( + ctx = context.TODO() + specName = "workload-cluster-ck8scp-inplace" + namespace *corev1.Namespace + cancelWatches context.CancelFunc + result *ApplyClusterTemplateAndWaitResult + clusterName string + clusterctlLogFolder string + infrastructureProvider string + ) + + BeforeEach(func() { + Expect(e2eConfig.Variables).To(HaveKey(KubernetesVersion)) + + clusterName = fmt.Sprintf("capick8s-ck8scp-in-place-%s", util.RandomString(6)) + infrastructureProvider = clusterctl.DefaultInfrastructureProvider + + // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. + namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) + + result = new(ApplyClusterTemplateAndWaitResult) + + clusterctlLogFolder = filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()) + }) + + AfterEach(func() { + cleanInput := cleanupInput{ + SpecName: specName, + Cluster: result.Cluster, + ClusterProxy: bootstrapClusterProxy, + Namespace: namespace, + CancelWatches: cancelWatches, + IntervalsGetter: e2eConfig.GetIntervals, + SkipCleanup: skipCleanup, + ArtifactFolder: artifactFolder, + } + + dumpSpecResourcesAndCleanup(ctx, cleanInput) + }) + + Context("Performing CK8sControlPlane Orchestrated in-place upgrades", func() { + It("Creating a workload cluster and applying in-place upgrade to CK8sControlPlane [CK8SCP-InPlace] [PR-Blocking]", func() { + By("Creating a workload cluster of 3 control plane and 1 worker nodes") + ApplyClusterTemplateAndWait(ctx, ApplyClusterTemplateAndWaitInput{ + ClusterProxy: bootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: clusterctlLogFolder, + ClusterctlConfigPath: clusterctlConfigPath, + KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: infrastructureProvider, + Namespace: namespace.Name, + ClusterName: clusterName, + KubernetesVersion: e2eConfig.GetVariable(KubernetesVersion), + ControlPlaneMachineCount: ptr.To(int64(3)), + WorkerMachineCount: ptr.To(int64(1)), + }, + WaitForClusterIntervals: e2eConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: e2eConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachineDeployments: e2eConfig.GetIntervals(specName, "wait-worker-nodes"), + }, result) + + bootstrapProxyClient := bootstrapClusterProxy.GetClient() + + By("Applying in place upgrade with local path for CK8sControlPlane object") + ApplyInPlaceUpgradeForCK8sControlPlane(ctx, ApplyInPlaceUpgradeForCK8sControlPlaneInput{ + Lister: bootstrapProxyClient, + Getter: bootstrapProxyClient, + ClusterProxy: bootstrapClusterProxy, + Cluster: result.Cluster, + WaitForUpgradeIntervals: e2eConfig.GetIntervals(specName, "wait-machine-upgrade"), + UpgradeOption: e2eConfig.GetVariable(InPlaceUpgradeOption), + }) + }) + }) + +})