From 72c5fb89461a2fa145ca243e429a6eeb33c26d38 Mon Sep 17 00:00:00 2001 From: Joe Kratzat Date: Thu, 2 Feb 2023 15:47:49 -0500 Subject: [PATCH 1/3] feat: add windows support --- Makefile | 1 + scripts/ci-e2e.sh | 1 + .../cluster-template-windows-calico.yaml | 156 ++++++++++++++++++ test/e2e/cluster_test.go | 50 ++++++ test/e2e/config/e2e_conf.yaml | 2 + .../kustomization.yaml | 7 + .../cluster-template-windows-calico/md.yaml | 46 ++++++ 7 files changed, 263 insertions(+) create mode 100644 templates/cluster-template-windows-calico.yaml create mode 100644 test/e2e/data/infrastructure-oci/v1beta1/cluster-template-windows-calico/kustomization.yaml create mode 100644 test/e2e/data/infrastructure-oci/v1beta1/cluster-template-windows-calico/md.yaml diff --git a/Makefile b/Makefile index 1e63227a..d34baf1a 100644 --- a/Makefile +++ b/Makefile @@ -255,6 +255,7 @@ generate-e2e-templates: $(KUSTOMIZE) $(KUSTOMIZE) build $(OCI_TEMPLATES)/v1beta1/cluster-template-managed --load-restrictor LoadRestrictionsNone > $(OCI_TEMPLATES)/v1beta1/cluster-template-managed.yaml $(KUSTOMIZE) build $(OCI_TEMPLATES)/v1beta1/cluster-template-managed-cluster-identity --load-restrictor LoadRestrictionsNone > $(OCI_TEMPLATES)/v1beta1/cluster-template-managed-cluster-identity.yaml $(KUSTOMIZE) build $(OCI_TEMPLATES)/v1beta1/cluster-template-cluster-identity --load-restrictor LoadRestrictionsNone > $(OCI_TEMPLATES)/v1beta1/cluster-template-cluster-identity.yaml + $(KUSTOMIZE) build $(OCI_TEMPLATES)/v1beta1/cluster-template-windows-calico --load-restrictor LoadRestrictionsNone > $(OCI_TEMPLATES)/v1beta1/cluster-template-windows-calico.yaml .PHONY: test-e2e-run test-e2e-run: generate-e2e-templates $(GINKGO) $(ENVSUBST) ## Run e2e tests diff --git a/scripts/ci-e2e.sh b/scripts/ci-e2e.sh index b41bd64f..14407336 100755 --- a/scripts/ci-e2e.sh +++ b/scripts/ci-e2e.sh @@ -24,6 +24,7 @@ source "${REPO_ROOT}/hack/ensure-tags.sh" : "${OCI_UPGRADE_IMAGE_ID:?Environment variable empty or not defined.}" : "${OCI_ALTERNATIVE_REGION_IMAGE_ID:?Environment variable empty or not defined.}" : OCI_MANAGED_NODE_IMAGE_ID +: OCI_WINDOWS_IMAGE_ID export LOCAL_ONLY=${LOCAL_ONLY:-"true"} diff --git a/templates/cluster-template-windows-calico.yaml b/templates/cluster-template-windows-calico.yaml new file mode 100644 index 00000000..a036fd61 --- /dev/null +++ b/templates/cluster-template-windows-calico.yaml @@ -0,0 +1,156 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + labels: + cluster.x-k8s.io/cluster-name: "${CLUSTER_NAME}" + cni: calico + csi-proxy: enabled + windows: enabled + name: "${CLUSTER_NAME}" + namespace: "${NAMESPACE}" +spec: + clusterNetwork: + pods: + cidrBlocks: + - ${POD_CIDR:="192.168.0.0/16"} + serviceDomain: ${SERVICE_DOMAIN:="cluster.local"} + services: + cidrBlocks: + - ${SERVICE_CIDR:="10.128.0.0/12"} + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: OCICluster + name: "${CLUSTER_NAME}" + namespace: "${NAMESPACE}" + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + kind: KubeadmControlPlane + name: "${CLUSTER_NAME}-control-plane" + namespace: "${NAMESPACE}" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: OCICluster +metadata: + labels: + cluster.x-k8s.io/cluster-name: "${CLUSTER_NAME}" + name: "${CLUSTER_NAME}" +spec: + compartmentId: "${OCI_COMPARTMENT_ID}" +--- +kind: KubeadmControlPlane +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +metadata: + name: "${CLUSTER_NAME}-control-plane" + namespace: "${NAMESPACE}" +spec: + version: "${KUBERNETES_VERSION}" + replicas: ${CONTROL_PLANE_MACHINE_COUNT} + machineTemplate: + infrastructureRef: + kind: OCIMachineTemplate + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + name: "${CLUSTER_NAME}-control-plane" + namespace: "${NAMESPACE}" + kubeadmConfigSpec: + clusterConfiguration: + kubernetesVersion: ${KUBERNETES_VERSION} + apiServer: + certSANs: [localhost, 127.0.0.1] + extraArgs: + cloud-provider: oci + dns: {} + etcd: {} + networking: {} + scheduler: {} + initConfiguration: + nodeRegistration: + criSocket: /var/run/containerd/containerd.sock + kubeletExtraArgs: + cloud-provider: external + provider-id: oci://{{ ds["id"] }} + joinConfiguration: + discovery: {} + nodeRegistration: + criSocket: /var/run/containerd/containerd.sock + kubeletExtraArgs: + cloud-provider: external + provider-id: oci://{{ ds["id"] }} +--- +kind: OCIMachineTemplate +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +metadata: + name: "${CLUSTER_NAME}-control-plane" +spec: + template: + spec: + imageId: "${OCI_CONTROL_PLANE_IMAGE_ID}" + compartmentId: "${OCI_COMPARTMENT_ID}" + shape: "${OCI_CONTROL_PLANE_MACHINE_TYPE=VM.Standard.E4.Flex}" + shapeConfig: + ocpus: "${OCI_CONTROL_PLANE_MACHINE_TYPE_OCPUS=1}" + metadata: + ssh_authorized_keys: "${OCI_SSH_KEY}" + isPvEncryptionInTransitEnabled: ${OCI_CONTROL_PLANE_PV_TRANSIT_ENCRYPTION=true} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: OCIMachineTemplate +metadata: + name: "${CLUSTER_NAME}-md-win" +spec: + template: + spec: + imageId: "${OCI_NODE_IMAGE_ID}" + compartmentId: "${OCI_COMPARTMENT_ID}" + shape: "${OCI_NODE_MACHINE_TYPE=VM.Standard.E4.Flex}" + shapeConfig: + ocpus: "${OCI_NODE_MACHINE_TYPE_OCPUS=1}" + vnicAttachments: + - displayName: "CalicoNic" + nicIndex: 1 # second nic must be used for hyper-v + metadata: + ssh_authorized_keys: "${OCI_SSH_KEY}" + isPvEncryptionInTransitEnabled: ${OCI_NODE_PV_TRANSIT_ENCRYPTION=true} +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1alpha4 +kind: KubeadmConfigTemplate +metadata: + name: "${CLUSTER_NAME}-md-win" +spec: + template: + spec: + joinConfiguration: + nodeRegistration: + criSocket: npipe:////./pipe/containerd-containerd + kubeletExtraArgs: + cloud-provider: external + provider-id: oci://{{ ds.meta_data["instance_id"] }} + feature-gates: WindowsHostProcessContainers=true + v: "2" + windows-priorityclass: ABOVE_NORMAL_PRIORITY_CLASS + name: '{{ ds.meta_data["local_hostname"] }}' + preKubeadmCommands: + - powershell C:\Windows\Setup\Scripts\enable_second_nic.ps1 + - powershell C:\Users\opc\attach_secondary_vnic.ps1 > C:\Users\opc\attach_secondary_vnic_log.txt +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + name: "${CLUSTER_NAME}-md-win" +spec: + clusterName: "${CLUSTER_NAME}" + replicas: ${NODE_MACHINE_COUNT} + selector: + matchLabels: + template: + spec: + clusterName: "${CLUSTER_NAME}" + version: "${KUBERNETES_VERSION}" + bootstrap: + configRef: + name: "${CLUSTER_NAME}-md-win" + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + infrastructureRef: + name: "${CLUSTER_NAME}-md-win" + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: OCIMachineTemplate \ No newline at end of file diff --git a/test/e2e/cluster_test.go b/test/e2e/cluster_test.go index 1b442858..fb1a97f8 100644 --- a/test/e2e/cluster_test.go +++ b/test/e2e/cluster_test.go @@ -199,6 +199,29 @@ var _ = Describe("Workload cluster creation", func() { validateOLImage(namespace.Name, clusterName) }) + It("Windows - With 1 Linux control-plane nodes and with 1 Windows worker nodes using Calico CNI", func() { + clusterName = getClusterName(clusterNamePrefix, "windows-calico") + clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ + ClusterProxy: bootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()), + ClusterctlConfigPath: clusterctlConfigPath, + KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: clusterctl.DefaultInfrastructureProvider, + Flavor: "windows-calico", + Namespace: namespace.Name, + ClusterName: clusterName, + KubernetesVersion: e2eConfig.GetVariable(capi_e2e.KubernetesVersion), + ControlPlaneMachineCount: pointer.Int64Ptr(1), + WorkerMachineCount: pointer.Int64Ptr(1), + }, + WaitForClusterIntervals: e2eConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: e2eConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachineDeployments: e2eConfig.GetIntervals(specName, "wait-windows-worker-nodes"), + }, result) + validateWindowsImage(namespace.Name, clusterName) + }) + It("Cloud Provider OCI testing [PRBlocking]", func() { clusterName = getClusterName(clusterNamePrefix, "ccm-testing") clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ @@ -983,6 +1006,33 @@ func validateOLImage(nameSpace string, clusterName string) { } } +func validateWindowsImage(nameSpace string, clusterName string) { + lister := bootstrapClusterProxy.GetClient() + inClustersNamespaceListOption := client.InNamespace(nameSpace) + matchClusterListOption := client.MatchingLabels{ + clusterv1.ClusterLabelName: clusterName, + } + + machineList := &clusterv1.MachineList{} + Expect(lister.List(context.Background(), machineList, inClustersNamespaceListOption, matchClusterListOption)). + To(Succeed(), "Couldn't list machines for the cluster %q", clusterName) + + Expect(len(machineList.Items)).To(Equal(2)) + for _, machine := range machineList.Items { + if machine.Labels["os"] == "windows" { + instanceOcid := strings.Split(*machine.Spec.ProviderID, "//")[1] + Log(fmt.Sprintf("Instance OCID is %s", instanceOcid)) + resp, err := computeClient.GetInstance(context.Background(), core.GetInstanceRequest{ + InstanceId: common.String(instanceOcid), + }) + Expect(err).NotTo(HaveOccurred()) + instanceSourceDetails, ok := resp.SourceDetails.(core.InstanceSourceViaImageDetails) + Expect(ok).To(BeTrue()) + Expect(*instanceSourceDetails.ImageId).To(Equal(os.Getenv("OCI_WINDOWS_IMAGE_ID"))) + } + } +} + func getClusterName(prefix, specName string) string { clusterName := os.Getenv("CLUSTER_NAME") if clusterName == "" { diff --git a/test/e2e/config/e2e_conf.yaml b/test/e2e/config/e2e_conf.yaml index 1e6193a4..f3300655 100644 --- a/test/e2e/config/e2e_conf.yaml +++ b/test/e2e/config/e2e_conf.yaml @@ -73,6 +73,7 @@ providers: - sourcePath: "../data/infrastructure-oci/v1beta1/cluster-template-managed.yaml" - sourcePath: "../data/infrastructure-oci/v1beta1/cluster-template-managed-cluster-identity.yaml" - sourcePath: "../data/infrastructure-oci/v1beta1/cluster-template-cluster-identity.yaml" + - sourcePath: "../data/infrastructure-oci/v1beta1/cluster-template-windows-calico.yaml" - sourcePath: "../data/infrastructure-oci/v1beta1/metadata.yaml" variables: @@ -111,6 +112,7 @@ intervals: default/wait-cluster: ["30m", "10s"] default/wait-control-plane: ["30m", "10s"] default/wait-worker-nodes: ["30m", "10s"] + default/wait-windows-worker-nodes: ["60m", "30s"] default/wait-cluster-bare-metal: [ "60m", "10s" ] default/wait-control-plane-bare-metal: [ "60m", "10s" ] default/wait-worker-nodes-bare-metal: [ "60m", "10s" ] diff --git a/test/e2e/data/infrastructure-oci/v1beta1/cluster-template-windows-calico/kustomization.yaml b/test/e2e/data/infrastructure-oci/v1beta1/cluster-template-windows-calico/kustomization.yaml new file mode 100644 index 00000000..d1f72456 --- /dev/null +++ b/test/e2e/data/infrastructure-oci/v1beta1/cluster-template-windows-calico/kustomization.yaml @@ -0,0 +1,7 @@ +bases: + - ../bases/cluster.yaml + - ../bases/md.yaml + - ../bases/crs.yaml + - ../bases/ccm.yaml +patchesStrategicMerge: + - ./md.yaml \ No newline at end of file diff --git a/test/e2e/data/infrastructure-oci/v1beta1/cluster-template-windows-calico/md.yaml b/test/e2e/data/infrastructure-oci/v1beta1/cluster-template-windows-calico/md.yaml new file mode 100644 index 00000000..9f291624 --- /dev/null +++ b/test/e2e/data/infrastructure-oci/v1beta1/cluster-template-windows-calico/md.yaml @@ -0,0 +1,46 @@ +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: OCIMachineTemplate +metadata: + name: "${CLUSTER_NAME}-md-0" + labels: + os: windows +spec: + template: + spec: + imageId: "${OCI_WINDOWS_IMAGE_ID}" + shape: "BM.Standard.E4.128" + shapeConfig: + ocpus: "128" + vnicAttachments: + - displayName: "CalicoNic" + nicIndex: 1 # second nic must be used for hyper-v + isPvEncryptionInTransitEnabled: ${OCI_NODE_PV_TRANSIT_ENCRYPTION=false} +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1alpha4 +kind: KubeadmConfigTemplate +metadata: + name: "${CLUSTER_NAME}-md-0" +spec: + template: + spec: + joinConfiguration: + nodeRegistration: + criSocket: npipe:////./pipe/containerd-containerd + kubeletExtraArgs: + provider-id: oci://{{ ds.meta_data["instance_id"] }} + feature-gates: WindowsHostProcessContainers=true + v: "2" + windows-priorityclass: ABOVE_NORMAL_PRIORITY_CLASS + name: '{{ ds.meta_data["local_hostname"] }}' + preKubeadmCommands: + - powershell C:\Windows\Setup\Scripts\enable_second_nic.ps1 + - powershell C:\Users\opc\attach_secondary_vnic.ps1 > C:\Users\opc\attach_secondary_vnic_log.txt +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + name: "${CLUSTER_NAME}-md-0" +spec: + template: + spec: + failureDomain: "2" \ No newline at end of file From d0888533ee7275d3922510ee49b2ded839ab0fc1 Mon Sep 17 00:00:00 2001 From: Joe Kratzat Date: Tue, 7 Feb 2023 14:01:24 -0500 Subject: [PATCH 2/3] Add NSG rules for VXLAN to windows template --- .../cluster-template-windows-calico.yaml | 286 ++++++++++++++++++ 1 file changed, 286 insertions(+) diff --git a/templates/cluster-template-windows-calico.yaml b/templates/cluster-template-windows-calico.yaml index a036fd61..b7b5e24b 100644 --- a/templates/cluster-template-windows-calico.yaml +++ b/templates/cluster-template-windows-calico.yaml @@ -36,6 +36,292 @@ metadata: name: "${CLUSTER_NAME}" spec: compartmentId: "${OCI_COMPARTMENT_ID}" + networkSpec: + vcn: + name: ${CLUSTER_NAME} + cidr: "10.0.0.0/16" + networkSecurityGroups: + - name: control-plane-endpoint + role: control-plane-endpoint + egressRules: + - egressRule: + isStateless: false + destination: "10.0.0.0/29" + protocol: "6" + destinationType: "CIDR_BLOCK" + description: "Kubernetes API traffic to Control Plane" + tcpOptions: + destinationPortRange: + max: 6443 + min: 6443 + ingressRules: + - ingressRule: + isStateless: false + source: "0.0.0.0/0" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "External access to Kubernetes API endpoint" + tcpOptions: + destinationPortRange: + max: 6443 + min: 6443 + - ingressRule: + isStateless: false + source: "10.0.0.0/16" + protocol: "1" + sourceType: "CIDR_BLOCK" + description: "Path discovery" + icmpOptions: + code: 4 + type: 3 + - name: control-plane + role: control-plane + egressRules: + - egressRule: + isStateless: false + destination: "0.0.0.0/0" + protocol: "all" + destinationType: "CIDR_BLOCK" + description: "Control Plane access to Internet" + ingressRules: + - ingressRule: + isStateless: false + source: "10.0.0.8/29" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "Kubernetes API endpoint to Control Plane(apiserver port)" + tcpOptions: + destinationPortRange: + max: 6443 + min: 6443 + - ingressRule: + isStateless: false + source: "10.0.0.0/29" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "Control plane node to Control Plane(apiserver port) communication" + tcpOptions: + destinationPortRange: + max: 6443 + min: 6443 + - ingressRule: + isStateless: false + source: "10.0.64.0/20" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "Worker Node to Control Plane(apiserver port) communication" + tcpOptions: + destinationPortRange: + max: 6443 + min: 6443 + - ingressRule: + isStateless: false + source: "10.0.0.0/29" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "etcd client communication" + tcpOptions: + destinationPortRange: + max: 2379 + min: 2379 + - ingressRule: + isStateless: false + source: "10.0.0.0/29" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "etcd peer" + tcpOptions: + destinationPortRange: + max: 2380 + min: 2380 + - ingressRule: + isStateless: false + source: "10.0.0.0/16" + protocol: "1" + sourceType: "CIDR_BLOCK" + description: "Path discovery" + icmpOptions: + code: 4 + type: 3 + - ingressRule: + isStateless: false + source: "0.0.0.0/0" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "Inbound SSH traffic to Control Plane" + tcpOptions: + destinationPortRange: + max: 22 + min: 22 + - ingressRule: + isStateless: false + source: "10.0.0.0/29" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "Control Plane to Control Plane Kubelet Communication" + tcpOptions: + destinationPortRange: + max: 10250 + min: 10250 + - ingressRule: + isStateless: false + source: "10.0.64.0/20" + protocol: "17" + sourceType: "CIDR_BLOCK" + description: "Calico VXLAN" + udpOptions: + destinationPortRange: + max: 4789 + min: 4789 + - ingressRule: + isStateless: false + source: "10.0.0.0/29" + protocol: "17" + sourceType: "CIDR_BLOCK" + description: "Calico VXLAN" + udpOptions: + destinationPortRange: + max: 4789 + min: 4789 + - name: worker + role: worker + egressRules: + - egressRule: + isStateless: false + destination: "0.0.0.0/0" + protocol: "all" + destinationType: "CIDR_BLOCK" + description: "Worker node access to Internet" + ingressRules: + - ingressRule: + isStateless: false + source: "0.0.0.0/0" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "Inbound SSH traffic to worker node" + tcpOptions: + destinationPortRange: + max: 22 + min: 22 + - ingressRule: + isStateless: false + source: "10.0.0.0/16" + protocol: "1" + sourceType: "CIDR_BLOCK" + description: "Path discovery" + icmpOptions: + code: 4 + type: 3 + - ingressRule: + isStateless: false + source: "10.0.0.0/29" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "Control Plane to worker node Kubelet Communication" + tcpOptions: + destinationPortRange: + max: 10250 + min: 10250 + - ingressRule: + isStateless: false + source: "10.0.64.0/20" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "Worker node to worker node Kubelet Communication" + tcpOptions: + destinationPortRange: + max: 10250 + min: 10250 + - ingressRule: + isStateless: false + source: "10.0.64.0/20" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "Worker node to default NodePort ingress communication" + tcpOptions: + destinationPortRange: + max: 32767 + min: 30000 + - ingressRule: + isStateless: false + source: "10.0.64.0/20" + protocol: "17" + sourceType: "CIDR_BLOCK" + description: "Calico VXLAN" + udpOptions: + destinationPortRange: + max: 4789 + min: 4789 + - ingressRule: + isStateless: false + source: "10.0.0.0/29" + protocol: "17" + sourceType: "CIDR_BLOCK" + description: "Calico VXLAN" + udpOptions: + destinationPortRange: + max: 4789 + min: 4789 + - name: service-lb + role: service-lb + egressRules: + - egressRule: + isStateless: false + destination: "10.0.64.0/20" + protocol: "6" + destinationType: "CIDR_BLOCK" + description: "Service LoadBalancer to default NodePort egress communication" + tcpOptions: + destinationPortRange: + max: 32767 + min: 30000 + ingressRules: + - ingressRule: + isStateless: false + source: "10.0.0.0/16" + protocol: "1" + sourceType: "CIDR_BLOCK" + description: "Path discovery" + icmpOptions: + code: 4 + type: 3 + - ingressRule: + isStateless: false + source: "0.0.0.0/0" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "Accept http traffic on port 80" + tcpOptions: + destinationPortRange: + max: 80 + min: 80 + - ingressRule: + isStateless: false + source: "0.0.0.0/0" + protocol: "6" + sourceType: "CIDR_BLOCK" + description: "Accept http traffic on port 443" + tcpOptions: + destinationPortRange: + max: 443 + min: 443 + subnets: + - name: control-plane-endpoint + role: control-plane-endpoint + cidr: "10.0.0.8/29" + type: public + - name: control-plane + role: control-plane + cidr: "10.0.0.0/29" + type: private + - name: service-lb + role: service-lb + cidr: "10.0.0.32/27" + type: public + - name: worker + role: worker + cidr: "10.0.64.0/20" + type: private --- kind: KubeadmControlPlane apiVersion: controlplane.cluster.x-k8s.io/v1beta1 From 47b4f1c494cdb00dbc9078496a9673f75b98afa2 Mon Sep 17 00:00:00 2001 From: Joe Kratzat Date: Thu, 9 Feb 2023 20:26:45 -0500 Subject: [PATCH 3/3] add windows doc book --- docs/src/SUMMARY.md | 1 + .../src/gs/create-windows-workload-cluster.md | 219 ++++++++++++++++++ 2 files changed, 220 insertions(+) create mode 100644 docs/src/gs/create-windows-workload-cluster.md diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 89cb5195..1cf6ab9f 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -20,6 +20,7 @@ - [Create Workload Cluster](./gs/create-workload-cluster.md) - [MachineHealthChecks](./gs/create-mhc-workload-cluster.md) - [Create GPU Workload Cluster](./gs/create-gpu-workload-cluster.md) + - [Create Windows Workload Cluster](./gs/create-windows-workload-cluster.md) - [Create Workload Templates](./gs/create-workload-templates.md) - [Using externally managed infrastructure](./gs/externally-managed-cluster-infrastructure.md) - [Install Oracle Cloud Infrastructure Cloud Controller Manager](./gs/install-oci-ccm.md) diff --git a/docs/src/gs/create-windows-workload-cluster.md b/docs/src/gs/create-windows-workload-cluster.md new file mode 100644 index 00000000..1cf616ae --- /dev/null +++ b/docs/src/gs/create-windows-workload-cluster.md @@ -0,0 +1,219 @@ + + +# Create a Windows workload cluster + +## Overview + +CAPOCI enables users to create and manage Windows workload clusters in Oracle Cloud Infrastructure (OCI). +This means that the [Kubernetes Control Plane][kubernetes-control-plane] will be Linux and the nodes will be Windows. +First, users build the [Windows image using image-builder][image-builder], then use the Windows flavor +template from the [latest release][capoci-latest-release]. Finally, install the [Calico CNI Provider +and OCI Cloud Controller Manager](#install-calico-cni-provider-and-oci-cloud-controller-manager). + +## Known Limitations + +The Windows workload cluster has known limitations: + +- Limited to [Standard Bare Metal (BM) shapes][bm-shapes] +- Limited to OCI provided platform images. See [image-build documentation][image-builder] for more details +- Custom image MUST be built using the same shape of Bare Metal the worker nodes will run +- CNI provider support is [Calico in VXLAN mode][calico-windows] +- [Block volumes][block-volume] are not currently supported +- Bring Your Own License (BYOL) is not supported +- See [Calico windows docs][calico-limitations] for their limitations + +## Licensing + +BYOL is currently not supported using CAPOCI. For more info on Windows Licensing +see the [Compute FAQ documentation][compute-windows-faq]. + +## Build Windows image + +> NOTE: It is recommended to [check shape availability](#check-shape-availability) before building image(s) + +In order to launch Windows instances for the cluster a Windows image, [using image-builder][image-builder], +will need to be built. It is **important** to make sure the same shape is used to build and launch the instance. + +Example: If a `BM.Standard2.52` is used to build then the `OCI_NODE_MACHINE_TYPE` MUST +be `BM.Standard2.52` + + +## Check shape availability + +Make sure the [OCI CLI][install-oci-cli] is installed. Then set the AD information if using +muti-AD regions. + +> NOTE: Use the [OCI Regions and Availability Domains][regions] page to figure out which +regions have multiple ADs. + +```bash +oci iam availability-domain list --compartment-id= --region= +``` + +Using the AD `name` from the output above start searching for BM shape availability. + +```bash +oci compute shape list --compartment-id= --profile=DEFAULT --region=us-ashburn-1 --availability-domain= | grep BM + +"shape": "BM.Standard.E3.128" +"shape-name": "BM.Standard2.52" +"shape-name": "BM.Standard.E3.128" +"shape": "BM.Standard.E2.64" +"shape-name": "BM.Standard2.52" +"shape-name": "BM.Standard3.64" +"shape": "BM.Standard1.36" +``` + +> NOTE: If the output is empty then the compartment for that region/AD doesn't have BM shapes. +If you are unable to locate any shapes you may need to submit a +[service limit increase request][compute-service-limit] + + +## Create a new Windows workload cluster + +It is recommended to have the following guides handy: + +- [Windows Cluster Debugging][windows-cluster-debug] +- [Windows Container Debugging][windows-containers-debug] + +When using `clusterctl` to generate the cluster use the `windows-calico` example flavor. + +The following command uses the `OCI_CONTROL_PLANE_MACHINE_TYPE` and `OCI_NODE_MACHINE_TYPE` +parameters to specify bare metal shapes instead of using CAPOCI's default virtual +instance shape. The `OCI_CONTROL_PLANE_PV_TRANSIT_ENCRYPTION` and `OCI_NODE_PV_TRANSIT_ENCRYPTION` +parameters disable encryption of data in flight between the bare metal instance and the block storage resources. + +> NOTE: The `OCI_NODE_MACHINE_TYPE_OCPUS` must match the OPCU count of the BM shape. +See the [Compute Shapes][bm-shapes] page to get the OCPU count for the specific shape. + +```bash +OCI_COMPARTMENT_ID= \ +OCI_CONTROL_PLANE_IMAGE_ID= \ +OCI_NODE_IMAGE_ID= \ +OCI_SSH_KEY= \ +NODE_MACHINE_COUNT=1 \ +OCI_NODE_MACHINE_TYPE=BM.Standard.E4.128 \ +OCI_NODE_MACHINE_TYPE_OCPUS=128 \ +OCI_NODE_PV_TRANSIT_ENCRYPTION=false \ +OCI_CONTROL_PLANE_MACHINE_TYPE_OCPUS=3 \ +OCI_CONTROL_PLANE_MACHINE_TYPE=VM.Standard3.Flex \ +CONTROL_PLANE_MACHINE_COUNT=3 \ +OCI_SHAPE_MEMORY_IN_GBS= \ +KUBERNETES_VERSION= \ +clusterctl generate cluster \ +--target-namespace default \ +--flavor windows-calico | kubectl apply -f - +``` + +### Access workload cluster Kubeconfig + +Execute the following command to list all the workload clusters present: + +```bash +kubectl get clusters -A +``` + +Execute the following command to access the kubeconfig of a workload cluster: + +```bash +clusterctl get kubeconfig -n default > .kubeconfig +``` + +### Install Calico CNI Provider and OCI Cloud Controller Manager + +#### Install Calico + +The [Calico for Windows][calico-windows] getting started guide should be read for better understand of the CNI on Windows. +It is recommended to have the following guides handy: + +- [Windows Calico Troubleshooting][calico-windows-debug] + +##### The steps to follow: + +**On the management cluster** + +1. Run + ``` + kubectl get OCICluster -o jsonpath='{.spec.controlPlaneEndpoint.host}' + ``` + to get the `KUBERNETES_SERVICE_HOST` info that will be used in later steps + +**On the workload cluster** + +1. Download the [v3.24.5 calico release][calico-release] + ``` + curl -L https://github.com/projectcalico/calico/releases/download/v3.24.5/release-v3.24.5.tgz -o calico-v3.24.5.tgz + ``` +1. Uncompress the downloaded file and locate the `calico-vxlan.yaml`, `calico-windows-vxlan.yaml` and `windows-kube-proxy.yaml` +files in the `manifests` dir +1. Edit the `calico-vxlan.yaml` and modify the follow variables to allow Calico running on the nodes use VXLAN + - `CALICO_IPV4POOL_IPIP` - set to `"Never"` + - `CALICO_IPV4POOL_VXLAN` - set to `"Always"` +1. ``` + kubectl apply -f calico-vxlan.yaml + ``` +1. Wait for the IPAMConfig to be loaded +1. ``` + kubectl patch IPAMConfig default --type merge --patch='{"spec": {"strictAffinity": true}}' + ``` +1. Edit the `calico-windows-vxlan.yaml` and modify the follow variables to allow Calico running on the nodes to talk +to the Kubernetes control plane + - `KUBERNETES_SERVICE_HOST` - the IP address from the management cluster step + - `KUBERNETES_SERVICE_PORT`- the port from step the management cluster step + - `K8S_SERVICE_CIDR` - The service CIDR set in the cluster template + - `DNS_NAME_SERVERS` - the IP address from dns service + ``` + kubectl get svc kube-dns -n kube-system -o jsonpath='{.spec.clusterIP}' + ``` + - Change the namespace from `calico-system` to `kube-system` + - add the following `env` to the container named `node` + ```yaml + - name: VXLAN_ADAPTER + value: "Ethernet 2" + ``` +1. ``` + kubectl apply -f calico-windows-vxlan.yaml + ``` + (it takes a bit for this to pass livenessprobe) +1. Edit the `windows-kube-proxy.yaml` + - update the `kube-proxy` container environment variable `K8S_VERSION` to the version of kubernetes you are deploying + - update the `image` version for the container named `kube-proxy` and make sure to set the + correct [windows nanoserver version][docker-hub-nanoserver] example: `ltsc2019` +1. ``` + kubectl apply -f windows-kube-proxy.yaml + ``` + +#### Install OCI Cloud Controller Manager + +By default, the [OCI Cloud Controller Manager (CCM)][oci-ccm] is not installed into a workload cluster. To install the OCI CCM, follow [these instructions][install-oci-ccm]. + +### Scheduling Windows containers + +With the cluster in a ready state and CCM installed, an [example deployment][win-webserver-deployment] +can be used to test that pods are scheduled. Accessing the deployed pods and using `nslookup` you can +[test that the cluster DNS is working][win-webserver-dns]. + + +[block-volume]: https://docs.oracle.com/en-us/iaas/Content/GSG/Tasks/addingstorage.htm +[bm-shapes]: https://docs.oracle.com/en-us/iaas/Content/Compute/References/computeshapes.htm#bm-standard +[calico-limitations]: https://docs.tigera.io/calico/3.24/getting-started/kubernetes/windows-calico/limitations +[calico-release]: https://github.com/projectcalico/calico/releases/download/v3.24.5/release-v3.24.5.tgz +[calico-windows]: https://docs.tigera.io/calico/3.24/getting-started/kubernetes/windows-calico/ +[calico-windows-debug]: https://docs.tigera.io/calico/3.24/getting-started/kubernetes/windows-calico/troubleshoot +[capoci-latest-release]: https://github.com/oracle/cluster-api-provider-oci/releases/latest +[compute-service-limit]: https://docs.oracle.com/en-us/iaas/Content/General/Concepts/servicelimits.htm#computelimits +[compute-windows-faq]: https://www.oracle.com/cloud/compute/faq/#category-windows +[docker-hub-nanoserver]: https://hub.docker.com/_/microsoft-windows-nanoserver +[image-builder]: https://image-builder.sigs.k8s.io/capi/providers/oci.html#building-a-windows-image +[install-ccm]: ../gs/create-workload-cluster.md#install-oci-cloud-controller-manager-and-csi-in-a-self-provisioned-cluster +[install-oci-ccm]: ./install-oci-ccm.md +[install-oci-cli]: https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/cliinstall.htm +[kubernetes-control-plane]: https://kubernetes.io/docs/concepts/overview/components/#control-plane-components +[oci-ccm]: https://github.com/oracle/oci-cloud-controller-manager +[regions]: https://docs.oracle.com/en-us/iaas/Content/General/Concepts/regions.htm +[win-webserver-deployment]: https://kubernetes.io/docs/concepts/windows/user-guide/ +[win-webserver-dns]: https://kubernetes.io/docs/tutorials/services/connect-applications-service/#dns +[windows-cluster-debug]: https://kubernetes.io/docs/tasks/debug/debug-cluster/windows/ +[windows-containers-debug]: https://learn.microsoft.com/en-us/virtualization/windowscontainers/kubernetes/common-problems + +