diff --git a/README.md b/README.md index 6ced325..32c94a5 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ make run As soon as the bootstrap and control-plane controllers are up and running you can apply the cluster manifests describing the desired specs of the cluster you want to provision. Each machine is associated with a MicroK8sConfig through which you can set the cluster's properties. Please review the available options in [the respective definitions file](./apis/v1beta1/microk8sconfig_types.go). You may also find useful the example manifests found under the [examples](./examples/) directory. Note that the configuration structure followed is similar to the the one of kubeadm, in the MicroK8sConfig you will find a CLusterConfiguration and an InitConfiguration sections. When targeting a specific infrastructure you should be aware of which ports are used by MicroK8s and allow them in the network security groups on your deployment. -Two workload cluster templates are available under the [templates](./templates/) folder, which are actively used to validate releases: +Two workload cluster templates are available under the [templates](./templates) folder, which are actively used to validate releases: - [AWS](./templates/cluster-template-aws.yaml), using the [AWS Infrastructure Provider](https://github.com/kubernetes-sigs/cluster-api-provider-aws) - [OpenStack](./templates/cluster-template-openstack.yaml), using the [OpenStack Infrastructure Provider](https://github.com/kubernetes-sigs/cluster-api-provider-openstack) diff --git a/integration/README.md b/integration/README.md index a905784..8afc059 100644 --- a/integration/README.md +++ b/integration/README.md @@ -4,40 +4,10 @@ The integration/e2e tests have the following prerequisites: - * an environment variable `CLUSTER_MANIFEST_FILE` pointing to the cluster manifest. Cluster manifests can be produced with the help of the templates found under `templates`. For example: - ``` - export AWS_REGION=us-east-1 - export AWS_SSH_KEY_NAME=capi - export CONTROL_PLANE_MACHINE_COUNT=3 - export WORKER_MACHINE_COUNT=3 - export AWS_CREATE_BASTION=false - export AWS_PUBLIC_IP=false - export AWS_CONTROL_PLANE_MACHINE_FLAVOR=t3.large - export AWS_NODE_MACHINE_FLAVOR=t3.large - export CLUSTER_NAME=test-ci-cluster - clusterctl generate cluster ${CLUSTER_NAME} --from "templates/cluster-template-aws.yaml" --kubernetes-version 1.25.0 > cluster.yaml - export CLUSTER_MANIFEST_FILE=$PWD/cluster.yaml - ``` - - * Additional environment variables when testing cluster upgrades: - ``` - export CAPI_UPGRADE_VERSION=v1.26.0 - export CAPI_UPGRADE_MD_NAME=${CLUSTER_NAME}-md-0 - export CAPI_UPGRADE_MD_TYPE=machinedeployments.cluster.x-k8s.io - export CAPI_UPGRADE_CP_NAME=${CLUSTER_NAME}-control-plane - export CAPI_UPGRADE_CP_TYPE=microk8scontrolplanes.controlplane.cluster.x-k8s.io - - # Change the control plane and worker machine count to desired values for in-place upgrades tests and create a new cluster manifest. - CONTROL_PLANE_MACHINE_COUNT=1 - WORKER_MACHINE_COUNT=1 - clusterctl generate cluster ${CLUSTER_NAME} --from "templates/cluster-template-aws.yaml" --kubernetes-version 1.25.0 > cluster-inplace.yaml - export CLUSTER_INPLACE_MANIFEST_FILE=$PWD/cluster-inplace.yaml - - ``` - - * `clusterctl` available in the PATH - - * `kubectl` available in the PATH + * make sure to have ssh key in aws `capi`in `us-east-1 region` if you do not have key refer + to CAPI on [AWS prerequisites documentation](https://cluster-api-aws.sigs.k8s.io/topics/using-clusterawsadm-to-fulfill-prerequisites#ssh-key-pair) + + * local testing requires the following to be available in the PATH: `clusterctl`, `kubectl`, `helm` * a management cluster initialised via `clusterctl` with the infrastructure targeted as well as the version of the MicroK8s providers we want to be tested @@ -67,7 +37,7 @@ microk8s config > ~/.kube/config #### Initialize infrastructure provider -Visit [here](https://cluster-api.sigs.k8s.io/user/quick-start.html#initialization-for-common-providers) for a list of common infrasturture providers. +Visit [here](https://cluster-api.sigs.k8s.io/user/quick-start.html#initialization-for-common-providers) for a list of common infrastructure providers. ```bash clusterctl init --infrastructure --bootstrap - --control-plane - @@ -83,7 +53,7 @@ docker push /capi-bootstrap-provider-microk8s: sed "s,docker.io/cdkbot/capi-bootstrap-provider-microk8s:latest,docker.io//capi-bootstrap-provider-microk8s:," -i bootstrap-components.yaml ``` -Similarly for control-plane provider +Similarly, for control-plane provider ```bash cd control-plane docker build -t /capi-control-plane-provider-microk8s: . @@ -96,6 +66,9 @@ sed "s,docker.io/cdkbot/capi-control-plane-provider-microk8s:latest,docker.io/= maxAttempts { - t.Fatal(err) - } else { - t.Logf("Retrying...") - attempt++ - time.Sleep(10 * time.Second) - } - } else { - break - } +func waitForPod(ctx context.Context, t testing.TB, pod string, ns string) { + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + _, err := execCommand(t, "kubectl", "wait", "--timeout=15s", "--for=condition=available", "deploy/"+pod, "-n", ns) + return err + }); err != nil { + t.Fatal(err) } } @@ -148,289 +157,252 @@ func teardownCluster() { } // deployCluster deploys a cluster using the manifest in CLUSTER_MANIFEST_FILE. -func deployCluster(t testing.TB, cluster_manifest_file string) { - t.Log("Setting up the cluster") - command := []string{"kubectl", "apply", "-f", cluster_manifest_file} - cmd := exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() - if err != nil { - t.Error(string(outputBytes)) - t.Fatalf("Failed to create the requested cluster. %s", err) +func deployCluster(ctx context.Context, t testing.TB, clusterManifestFile string) { + t.Logf("Setting up the cluster using %s", clusterManifestFile) + if _, err := execCommand(t, "kubectl", "apply", "-f", clusterManifestFile); err != nil { + t.Fatalf("Failed to get the name of the cluster. %s", err) } time.Sleep(30 * time.Second) - command = []string{"kubectl", "get", "cluster", "--no-headers", "-o", "custom-columns=:metadata.name"} - cmd = exec.Command(command[0], command[1:]...) - outputBytes, err = cmd.CombinedOutput() + output, err := execCommand(t, "kubectl", "get", "cluster", "--no-headers", "-o", "custom-columns=:metadata.name") if err != nil { - t.Error(string(outputBytes)) t.Fatalf("Failed to get the name of the cluster. %s", err) } - cluster := strings.Trim(string(outputBytes), "\n") + cluster := strings.Trim(output, "\n") t.Logf("Cluster name is %s", cluster) - attempt := 0 - maxAttempts := 60 - command = []string{"clusterctl", "get", "kubeconfig", cluster} - for { - cmd = exec.Command(command[0], command[1:]...) - outputBytes, err = cmd.Output() + if err = retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err = execCommand(t, "clusterctl", "get", "kubeconfig", cluster) if err != nil { - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - t.Log("Failed to get the target's kubeconfig, retrying.") - time.Sleep(20 * time.Second) - } - } else { - cfg := strings.Trim(string(outputBytes), "\n") - err = os.WriteFile(KUBECONFIG, []byte(cfg), 0644) - if err != nil { - t.Fatalf("Could not persist the targets kubeconfig file. %s", err) - } - t.Logf("Target's kubeconfig file is at %s", KUBECONFIG) - t.Log(cfg) - break + return err + } + + cfg := strings.Trim(output, "\n") + err = os.WriteFile(KUBECONFIG, []byte(cfg), 0644) + if err != nil { + t.Fatalf("Could not persist the targets kubeconfig file. %s", err) } + t.Logf("Target's kubeconfig file is at %s", KUBECONFIG) + return nil + + }); err != nil { + t.Fatal(err) } // Wait until the cluster is provisioned - attempt = 0 - maxAttempts = 60 - command = []string{"kubectl", "get", "cluster", cluster} - for { - cmd = exec.Command(command[0], command[1:]...) - outputBytes, err = cmd.CombinedOutput() + if err = retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err = execCommand(t, "kubectl", "get", "cluster", cluster) if err != nil { - t.Log(string(outputBytes)) - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - t.Log("Retrying") - time.Sleep(10 * time.Second) - } - } else { - if strings.Contains(string(outputBytes), "Provisioned") { - break - } else { - attempt++ - time.Sleep(20 * time.Second) - t.Log("Waiting for the cluster to be provisioned") - } + return err + } + if strings.Contains(output, "Provisioned") { + return nil } + return errors.New("cluster not provisioned") + }); err != nil { + t.Fatal(err) } +} +// verifyCluster check if cluster is functional +func verifyCluster(ctx context.Context, t testing.TB) { // Wait until all machines are running - attempt = 0 - maxAttempts = 60 + t.Log("Verify cluster deployment") + machines := 0 - command = []string{"kubectl", "get", "machine", "--no-headers"} - for { - cmd = exec.Command(command[0], command[1:]...) - outputBytes, err = cmd.CombinedOutput() - output := string(outputBytes) + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err := execCommand(t, "kubectl", "get", "machine", "--no-headers") if err != nil { - t.Log(output) - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - t.Log("Retrying") - time.Sleep(10 * time.Second) - } - } else { - machines = strings.Count(output, "\n") - running := strings.Count(output, "Running") - t.Logf("Machines %d out of which %d are Running", machines, running) - if machines == running { - break - } else { - attempt++ - time.Sleep(10 * time.Second) - t.Log("Waiting for machines to start running") - } + return err + } + machines = strings.Count(output, "\n") + running := strings.Count(output, "Running") + msg := fmt.Sprintf("Machines %d out of which %d are Running", machines, running) + t.Logf(msg) + if machines == running { + return nil } + return errors.New(msg) + }); err != nil { + t.Fatal(err) } // Make sure we have as many nodes as machines - attempt = 0 - maxAttempts = 60 - command = []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "get", "no", "--no-headers"} - for { - cmd = exec.Command(command[0], command[1:]...) - outputBytes, err = cmd.CombinedOutput() - output := string(outputBytes) + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err := execCommand(t, "kubectl", "--kubeconfig="+KUBECONFIG, "get", "no", "--no-headers") if err != nil { - t.Log(output) - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - time.Sleep(10 * time.Second) - t.Log("Retrying") - } - } else { - nodes := strings.Count(output, "\n") - ready := strings.Count(output, " Ready") - t.Logf("Machines are %d, Nodes are %d out of which %d are Ready", machines, nodes, ready) - if machines == nodes && ready == nodes { - break - } else { - attempt++ - time.Sleep(20 * time.Second) - t.Log("Waiting for nodes to become ready") - } + return err + } + nodes := strings.Count(output, "\n") + ready := strings.Count(output, " Ready") + msg := fmt.Sprintf("Machines are %d, Nodes are %d out of which %d are Ready", machines, nodes, ready) + t.Log(msg) + if machines == nodes && ready == nodes { + return nil } + return errors.New(msg) + }); err != nil { + t.Fatal(err) } } // deployMicrobot deploys a deployment of microbot. -func deployMicrobot(t testing.TB) { +func deployMicrobot(ctx context.Context, t testing.TB) { t.Log("Deploying microbot") - command := []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "create", "deploy", "--image=cdkbot/microbot:1", "--replicas=30", "bot"} - cmd := exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() - if err != nil { - t.Error(string(outputBytes)) + if output, err := execCommand(t, "kubectl", "--kubeconfig="+KUBECONFIG, "create", "deploy", "--image=cdkbot/microbot:1", "--replicas=30", "bot"); err != nil { + t.Error(output) t.Fatalf("Failed to create the requested microbot deployment. %s", err) } // Make sure we have as many nodes as machines - attempt := 0 - maxAttempts := 60 t.Log("Waiting for the deployment to complete") - command = []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "wait", "deploy/bot", "--for=jsonpath={.status.readyReplicas}=30"} - for { - cmd = exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() - if err != nil { - t.Log(string(outputBytes)) - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - t.Log("Retrying") - time.Sleep(10 * time.Second) - } - } else { - break - } + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + _, err := execCommand(t, "kubectl", "--kubeconfig="+KUBECONFIG, "wait", "deploy/bot", "--for=jsonpath={.status.readyReplicas}=30") + return err + }); err != nil { + t.Fatal(err) } + } -// upgradeCluster upgrades the cluster to a new version based on the upgrade strategy. -func upgradeCluster(t testing.TB, upgrade_strategy string) { +// validateNoCalico Checks if calico daemon set is not deployed on the cluster. +func validateNoCalico(ctx context.Context, t testing.TB) { + t.Log("Validate no Calico daemon set") - version, control_plane_name, control_plane_type, worker_deployment_name, - worker_deployment_type := getUpgradeEnvVars(t) + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err := execCommand(t, "kubectl", "--kubeconfig="+KUBECONFIG, "-n", "kube-system", "get", "ds") + if err != nil { + return err + } + if strings.Contains(output, "calico") { + return errors.New("there is calico daemon set") - t.Logf("Upgrading cluster to %s via %s", version, upgrade_strategy) - // Patch contol plane machine upgrades based on type of upgrade strategy. - outputBytes, err := controlPlanePatch(control_plane_name, control_plane_type, version, upgrade_strategy) - if err != nil { - t.Error(string(outputBytes)) - t.Fatalf("Failed to merge the patch to control plane. %s", err) + } + return nil + }); err != nil { + t.Fatal(err) } + t.Log("No calico daemon set") +} - // Patch worker machine upgrades. - outputBytes, err = workerPatch(worker_deployment_name, worker_deployment_type, version) +// installCilium installs cilium from helm chart +func installCilium(t testing.TB) { + t.Log("Deploy Cilium") + + command := []string{"helm", "install", "cilium", "--repo", "https://helm.cilium.io/", + "cilium", "--namespace", "kube-system", "--set", "cni.confPath=/var/snap/microk8s/current/args/cni-network", + "--set", "cni.binPath=/var/snap/microk8s/current/opt/cni/bin", + "--set", "daemon.runPath=/var/snap/microk8s/current/var/run/cilium", + "--set", "operator.replicas=1", + "--set", "ipam.operator.clusterPoolIPv4PodCIDRList=10.1.0.0/16", + "--set", "nodePort.enabled=true", + } + t.Logf("running command: %s", strings.Join(command, " ")) + cmd := exec.Command(command[0], command[1:]...) + cmd.Env = append(cmd.Env, "KUBECONFIG="+KUBECONFIG) + outputBytes, err := cmd.CombinedOutput() if err != nil { t.Error(string(outputBytes)) - t.Fatalf("Failed to merge the patch to the machine deployments. %s", err) + t.Fatalf("Failed to deploy cilium from heml chart. %s", err) } +} - time.Sleep(30 * time.Second) +// validateCilium checks a deployment of cilium daemon set. +func validateCilium(ctx context.Context, t testing.TB) { + t.Log("Validate Cilium") - // Now all the machines should be upgraded to the new version. - attempt := 0 - maxAttempts := 60 - command := []string{"kubectl", "get", "machine", "--no-headers"} - for { - cmd := exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() - output := string(outputBytes) + machines := 0 + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err := execCommand(t, "kubectl", "get", "machine", "--no-headers") if err != nil { - t.Log(output) - if attempt >= maxAttempts { - t.Fatal(err) - } - - attempt++ - t.Log("Retrying") - time.Sleep(20 * time.Second) - } else { - totalMachines := strings.Count(output, "Running") - - // We count all the "Running" machines with the new version. - re := regexp.MustCompile("Running .* " + version) - upgradedMachines := len(re.FindAllString(output, -1)) - t.Logf("Total machines %d out of which %d are upgraded", totalMachines, upgradedMachines) - if totalMachines == upgradedMachines { - break - } else { - attempt++ - time.Sleep(20 * time.Second) - t.Log("Waiting for machines to upgrade and start running") - } + return err + } + machines = strings.Count(output, "\n") + if machines == 0 { + return errors.New("machines to haven't start yet") } + return err + }); err != nil { + t.Fatal(err) } -} - -// controlPlanePatch patches the control plane machines based on the upgrade strategy and version. -func controlPlanePatch(control_plane_name, control_plane_type, version, upgrade_strategy string) ([]byte, error) { - command := []string{"kubectl", "patch", "--type=merge", control_plane_type, control_plane_name, "--patch", - fmt.Sprintf(`{"spec":{"version":"%s","upgradeStrategy":"%s"}}`, version, upgrade_strategy)} - cmd := exec.Command(command[0], command[1:]...) - return cmd.CombinedOutput() + t.Log("Checking Cilium daemon set") + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + _, err := execCommand(t, "kubectl", "--kubeconfig="+KUBECONFIG, "-n", "kube-system", "wait", "ds/cilium", fmt.Sprintf("--for=jsonpath={.status.numberAvailable}=%d", machines)) + return err + }); err != nil { + t.Fatal(err) + } } -// workerPatch patches a given worker machines with the given version. -func workerPatch(worker_deployment_name, worker_deployment_type, version string) ([]byte, error) { - command := []string{"kubectl", "patch", "--type=merge", worker_deployment_type, worker_deployment_name, "--patch", - fmt.Sprintf(`{"spec":{"template":{"spec":{"version":"%s"}}}}`, version)} +// execCommand executes command transforms output bytes to string and reruns error from exec +func execCommand(t testing.TB, command ...string) (string, error) { + t.Logf("running command: %s", strings.Join(command, " ")) cmd := exec.Command(command[0], command[1:]...) - - return cmd.CombinedOutput() + outputBytes, err := cmd.CombinedOutput() + output := string(outputBytes) + t.Logf(output) + return output, err } -// getUpgradeEnvVars returns the environment variables needed for the upgrade test. -func getUpgradeEnvVars(t testing.TB) (version string, control_plane_name string, control_plane_type string, - worker_deployment_name string, worker_deployment_type string) { - version = os.Getenv("CAPI_UPGRADE_VERSION") - if version == "" { - t.Fatalf("Environment variable CAPI_UPGRADE_VERSION is not set." + - "Please set it to the version you want to upgrade to.") +// upgradeCluster upgrades the cluster to a new version based on the upgrade strategy. +func upgradeCluster(ctx context.Context, t testing.TB, upgradeStrategy string) { + version := "v1.28.0" + t.Logf("Upgrading cluster to %s via %s", version, upgradeStrategy) + + // Patch control plane machine upgrades based on type of upgrade strategy. + if _, err := execCommand(t, "kubectl", "patch", "--type=merge", + "microk8scontrolplanes.controlplane.cluster.x-k8s.io", "test-ci-cluster-control-plane", "--patch", + fmt.Sprintf(`{"spec":{"version":"%s","upgradeStrategy":"%s"}}`, version, upgradeStrategy)); err != nil { + t.Fatalf("Failed to merge the patch to control plane. %s", err) } - control_plane_name = os.Getenv("CAPI_UPGRADE_CP_NAME") - if control_plane_name == "" { - t.Fatalf("Environment variable CAPI_UPGRADE_CP_NAME is not set." + - "Please set it to the name of the control plane you want to upgrade.") + // Patch worker machine upgrades. + if _, err := execCommand(t, "kubectl", "patch", "--type=merge", + "machinedeployments.cluster.x-k8s.io", "test-ci-cluster-md-0", "--patch", + fmt.Sprintf(`{"spec":{"template":{"spec":{"version":"%s"}}}}`, version)); err != nil { + t.Fatalf("Failed to merge the patch to the machine deployments. %s", err) } - control_plane_type = os.Getenv("CAPI_UPGRADE_CP_TYPE") - if control_plane_type == "" { - t.Fatalf("Environment variable CAPI_UPGRADE_CP_TYPE is not set." + - "Please set it to the type of the control plane you want to upgrade.") - } + time.Sleep(30 * time.Second) - worker_deployment_name = os.Getenv("CAPI_UPGRADE_MD_NAME") - if worker_deployment_name == "" { - t.Fatalf("Environment variable CAPI_UPGRADE_MD_NAME is not set." + - "Please set it to the name of the machine deployment you want to upgrade.") + // Now all the machines should be upgraded to the new version. + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err := execCommand(t, "kubectl", "get", "machine", "--no-headers") + if err != nil { + return err + } + totalMachines := strings.Count(output, "Running") + re := regexp.MustCompile("Running .* " + version) + upgradedMachines := len(re.FindAllString(output, -1)) + msg := fmt.Sprintf("Total machines %d out of which %d are upgraded", totalMachines, upgradedMachines) + t.Logf(msg) + if totalMachines == upgradedMachines { + return nil + } + return errors.New(msg) + }); err != nil { + t.Fatal(err) } +} - worker_deployment_type = os.Getenv("CAPI_UPGRADE_MD_TYPE") - if worker_deployment_type == "" { - t.Fatalf("Environment variable CAPI_UPGRADE_MD_TYPE is not set." + - "Please set it to the type of the machine deployment you want to upgrade.") +// retryFor will retry a given function for the given amount of times. +// retryFor will wait for backoff between retries. +func retryFor(ctx context.Context, retryCount int, delayBetweenRetry time.Duration, retryFunc func() error) error { + var err error = nil + for i := 0; i < retryCount; i++ { + if err = retryFunc(); err != nil { + select { + case <-ctx.Done(): + return context.Canceled + case <-time.After(delayBetweenRetry): + continue + } + } + break } - - return version, control_plane_name, control_plane_type, worker_deployment_name, worker_deployment_type + return err }