From 57aff53fc264d0917850a0028337ecf3e5c94274 Mon Sep 17 00:00:00 2001 From: Ilya Alekseyev Date: Mon, 14 Oct 2024 21:38:50 +0200 Subject: [PATCH] Add Failure Domains for Worker nodes feature for Nutanix provider (#8837) * Nutanix Failure Domains for Worker nodes * Add unit-tests and fix failed * Add missed test manifest * Fix comments and rebase --- ...mazonaws.com_nutanixdatacenterconfigs.yaml | 6 + ...s.amazonaws.com_nutanixmachineconfigs.yaml | 22 + go.mod | 2 +- .../v1alpha1/nutanixdatacenterconfig_types.go | 6 +- pkg/api/v1alpha1/zz_generated.deepcopy.go | 5 + pkg/providers/nutanix/config/md-template.yaml | 104 +++ pkg/providers/nutanix/provider_test.go | 12 +- pkg/providers/nutanix/template.go | 45 ++ pkg/providers/nutanix/template_test.go | 80 ++- ...onfig_with_failure_domains_invalid_wg.yaml | 31 + .../eksa-cluster-multi-worker-fds.yaml | 177 +++++ .../testdata/eksa-cluster-worker-fds.yaml | 89 +++ .../expected_results_multi_worker_fds.yaml | 627 ++++++++++++++++++ .../expected_results_multi_worker_fds_md.yaml | 356 ++++++++++ .../testdata/expected_results_worker_fds.yaml | 627 ++++++++++++++++++ .../expected_results_worker_fds_md.yaml | 137 ++++ pkg/providers/nutanix/validator.go | 48 +- pkg/providers/nutanix/validator_test.go | 59 +- 18 files changed, 2414 insertions(+), 19 deletions(-) create mode 100644 pkg/providers/nutanix/testdata/datacenterConfig_with_failure_domains_invalid_wg.yaml create mode 100644 pkg/providers/nutanix/testdata/eksa-cluster-multi-worker-fds.yaml create mode 100644 pkg/providers/nutanix/testdata/eksa-cluster-worker-fds.yaml create mode 100644 pkg/providers/nutanix/testdata/expected_results_multi_worker_fds.yaml create mode 100644 pkg/providers/nutanix/testdata/expected_results_multi_worker_fds_md.yaml create mode 100644 pkg/providers/nutanix/testdata/expected_results_worker_fds.yaml create mode 100644 pkg/providers/nutanix/testdata/expected_results_worker_fds_md.yaml diff --git a/config/crd/bases/anywhere.eks.amazonaws.com_nutanixdatacenterconfigs.yaml b/config/crd/bases/anywhere.eks.amazonaws.com_nutanixdatacenterconfigs.yaml index d3e65adcd4f8..bdddde066fdd 100644 --- a/config/crd/bases/anywhere.eks.amazonaws.com_nutanixdatacenterconfigs.yaml +++ b/config/crd/bases/anywhere.eks.amazonaws.com_nutanixdatacenterconfigs.yaml @@ -115,6 +115,12 @@ spec: - type type: object type: array + workerMachineGroups: + description: Worker Machine Groups holds the list of worker + machine group names that will use this failure domain. + items: + type: string + type: array required: - name type: object diff --git a/config/crd/bases/anywhere.eks.amazonaws.com_nutanixmachineconfigs.yaml b/config/crd/bases/anywhere.eks.amazonaws.com_nutanixmachineconfigs.yaml index 65ac440b26c8..113b00ef288e 100644 --- a/config/crd/bases/anywhere.eks.amazonaws.com_nutanixmachineconfigs.yaml +++ b/config/crd/bases/anywhere.eks.amazonaws.com_nutanixmachineconfigs.yaml @@ -74,6 +74,28 @@ spec: required: - type type: object + gpus: + description: List of GPU devices that should be added to the VMs. + items: + description: NutanixGPUIdentifier holds VM GPU device configuration. + properties: + deviceID: + description: deviceID is the device ID of the GPU device. + format: int64 + type: integer + name: + description: vendorID is the vendor ID of the GPU device. + type: string + type: + description: type is the type of the GPU device. + enum: + - deviceID + - name + type: string + required: + - type + type: object + type: array image: description: image is to identify the OS image uploaded to the Prism Central (PC) The image identifier (uuid or name) can be obtained diff --git a/go.mod b/go.mod index bdc3732727a2..914f19af3414 100644 --- a/go.mod +++ b/go.mod @@ -181,7 +181,7 @@ require ( go.opentelemetry.io/otel/trace v1.20.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/mod v0.14.0 // indirect - golang.org/x/sync v0.6.0 // indirect + golang.org/x/sync v0.6.0 golang.org/x/sys v0.18.0 // indirect golang.org/x/term v0.18.0 // indirect golang.org/x/time v0.5.0 // indirect diff --git a/pkg/api/v1alpha1/nutanixdatacenterconfig_types.go b/pkg/api/v1alpha1/nutanixdatacenterconfig_types.go index 2ab1b77467f3..7361f887e309 100644 --- a/pkg/api/v1alpha1/nutanixdatacenterconfig_types.go +++ b/pkg/api/v1alpha1/nutanixdatacenterconfig_types.go @@ -68,6 +68,10 @@ type NutanixDatacenterFailureDomain struct { // Subnets holds the list of subnets identifiers cluster's network subnets. // +kubebuilder:validation:Required Subnets []NutanixResourceIdentifier `json:"subnets,omitempty"` + + // Worker Machine Groups holds the list of worker machine group names that will use this failure domain. + // +optional + WorkerMachineGroups []string `json:"workerMachineGroups,omitempty"` } // NutanixDatacenterConfigStatus defines the observed state of NutanixDatacenterConfig. @@ -165,7 +169,7 @@ func (in *NutanixDatacenterConfig) Validate() error { } } - if in.Spec.FailureDomains != nil && len(in.Spec.FailureDomains) != 0 { + if len(in.Spec.FailureDomains) != 0 { dccName := in.Namespace + "/" + in.Name validateClusterResourceIdentifier := createValidateNutanixResourceFunc("NutanixDatacenterConfig.Spec.FailureDomains.Cluster", "cluster", dccName) validateSubnetResourceIdentifier := createValidateNutanixResourceFunc("NutanixDatacenterConfig.Spec.FailureDomains.Subnets", "subnet", dccName) diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index 18e9c4196df7..fc7a2f3dddee 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -2057,6 +2057,11 @@ func (in *NutanixDatacenterFailureDomain) DeepCopyInto(out *NutanixDatacenterFai (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.WorkerMachineGroups != nil { + in, out := &in.WorkerMachineGroups, &out.WorkerMachineGroups + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NutanixDatacenterFailureDomain. diff --git a/pkg/providers/nutanix/config/md-template.yaml b/pkg/providers/nutanix/config/md-template.yaml index 8cb017650596..90e7b91c33c2 100644 --- a/pkg/providers/nutanix/config/md-template.yaml +++ b/pkg/providers/nutanix/config/md-template.yaml @@ -1,3 +1,106 @@ +{{- if $.failureDomains -}}{{ range $fd := $.failureDomains -}} +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + labels: + cluster.x-k8s.io/cluster-name: "{{$.clusterName}}" + name: "{{$.workerNodeGroupName}}-{{$fd.Name}}" + namespace: "{{$.eksaSystemNamespace}}" +{{- if $.autoscalingConfig }} + annotations: + cluster.x-k8s.io/cluster-api-autoscaler-node-group-min-size: "{{ $.autoscalingConfig.MinCount }}" + cluster.x-k8s.io/cluster-api-autoscaler-node-group-max-size: "{{ $.autoscalingConfig.MaxCount }}" +{{- end }} +spec: + clusterName: "{{$.clusterName}}" +{{- if not $.autoscalingConfig }} + replicas: {{ index $.failureDomainsReplicas $fd.Name }} +{{- end }} + selector: + matchLabels: {} + template: + metadata: + labels: + cluster.x-k8s.io/cluster-name: "{{$.clusterName}}" + spec: + failureDomain: "{{$fd.Name}}" + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + name: "{{$.workloadkubeadmconfigTemplateName}}" + clusterName: "{{$.clusterName}}" + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + name: "{{$.workloadTemplateName}}-{{$fd.Name}}" + version: "{{$.kubernetesVersion}}" +{{- if $.upgradeRolloutStrategy }} + strategy: + rollingUpdate: + maxSurge: {{$.maxSurge}} + maxUnavailable: {{$.maxUnavailable}} +{{- end }} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixMachineTemplate +metadata: + name: "{{$.workloadTemplateName}}-{{$fd.Name}}" + namespace: "{{$.eksaSystemNamespace}}" +spec: + template: + spec: + providerID: "nutanix://{{$.clusterName}}-m1" + vcpusPerSocket: {{$.vcpusPerSocket}} + vcpuSockets: {{$.vcpuSockets}} + memorySize: {{$.memorySize}} + systemDiskSize: {{$.systemDiskSize}} + image: +{{- if (eq $.imageIDType "name") }} + type: name + name: "{{$.imageName}}" +{{ else if (eq $.imageIDType "uuid") }} + type: uuid + uuid: "{{$.imageUUID}}" +{{ end }} + cluster: +{{- if (eq $fd.Cluster.Type "name") }} + type: name + name: "{{ $fd.Cluster.Name }}" +{{- else if (eq $fd.Cluster.Type "uuid") }} + type: uuid + uuid: "{{ $fd.Cluster.UUID }}" +{{ end }} + subnet: +{{- range $subnet := $fd.Subnets }} +{{- if (eq $subnet.Type "name") }} + - type: name + name: "{{ $subnet.Name }}" +{{- else if (eq $subnet.Type "uuid") }} + - type: uuid + uuid: "{{ $subnet.UUID }}" +{{- end }} +{{- end }} +{{- if $.projectIDType}} + project: +{{- if (eq $.projectIDType "name") }} + type: name + name: "{{$.projectName}}" +{{- else if (eq $.projectIDType "uuid") }} + type: uuid + uuid: "{{$.projectUUID}}" +{{ end }} +{{ end }} +{{- if $.additionalCategories}} + additionalCategories: +{{- range $.additionalCategories}} + - key: "{{ $.Key }}" + value: "{{ $.Value }}" +{{- end }} +{{- end }} +--- +{{ end -}} +{{- else -}} apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment metadata: @@ -107,6 +210,7 @@ spec: {{- end }} {{- end }} --- +{{ end -}} apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 kind: KubeadmConfigTemplate metadata: diff --git a/pkg/providers/nutanix/provider_test.go b/pkg/providers/nutanix/provider_test.go index edd67448282c..97e2650f766f 100644 --- a/pkg/providers/nutanix/provider_test.go +++ b/pkg/providers/nutanix/provider_test.go @@ -312,13 +312,13 @@ func TestNutanixProviderSetupAndValidateCreate(t *testing.T) { name: "cluster config with unsupported upgrade strategy configuration for cp", clusterConfFile: "testdata/cluster_nutanix_with_upgrade_strategy_cp.yaml", expectErr: true, - expectErrStr: "failed setup and validations: Upgrade rollout strategy customization is not supported for nutanix provider", + expectErrStr: "failed setup and validations: upgrade rollout strategy customization is not supported for nutanix provider", }, { name: "cluster config with unsupported upgrade strategy configuration for md", clusterConfFile: "testdata/cluster_nutanix_with_upgrade_strategy_md.yaml", expectErr: true, - expectErrStr: "failed setup and validations: Upgrade rollout strategy customization is not supported for nutanix provider", + expectErrStr: "failed setup and validations: upgrade rollout strategy customization is not supported for nutanix provider", }, } @@ -508,13 +508,13 @@ func TestNutanixProviderSetupAndValidateDeleteCluster(t *testing.T) { name: "cluster config with unsupported upgrade strategy configuration for cp", clusterConfFile: "testdata/cluster_nutanix_with_upgrade_strategy_cp.yaml", expectErr: true, - expectErrStr: "failed setup and validations: Upgrade rollout strategy customization is not supported for nutanix provider", + expectErrStr: "failed setup and validations: upgrade rollout strategy customization is not supported for nutanix provider", }, { name: "cluster config with unsupported upgrade strategy configuration for md", clusterConfFile: "testdata/cluster_nutanix_with_upgrade_strategy_md.yaml", expectErr: true, - expectErrStr: "failed setup and validations: Upgrade rollout strategy customization is not supported for nutanix provider", + expectErrStr: "failed setup and validations: upgrade rollout strategy customization is not supported for nutanix provider", }, } @@ -560,13 +560,13 @@ func TestNutanixProviderSetupAndValidateUpgradeCluster(t *testing.T) { name: "cluster config with unsupported upgrade strategy configuration for cp", clusterConfFile: "testdata/cluster_nutanix_with_upgrade_strategy_cp.yaml", expectErr: true, - expectErrStr: "failed setup and validations: Upgrade rollout strategy customization is not supported for nutanix provider", + expectErrStr: "failed setup and validations: upgrade rollout strategy customization is not supported for nutanix provider", }, { name: "cluster config with unsupported upgrade strategy configuration for md", clusterConfFile: "testdata/cluster_nutanix_with_upgrade_strategy_md.yaml", expectErr: true, - expectErrStr: "failed setup and validations: Upgrade rollout strategy customization is not supported for nutanix provider", + expectErrStr: "failed setup and validations: upgrade rollout strategy customization is not supported for nutanix provider", }, } diff --git a/pkg/providers/nutanix/template.go b/pkg/providers/nutanix/template.go index 507d2ec32c47..86f3d2eb4913 100644 --- a/pkg/providers/nutanix/template.go +++ b/pkg/providers/nutanix/template.go @@ -346,10 +346,53 @@ func buildTemplateMapCP( return values, nil } +func calcFailureDomainReplicas(workerNodeGroupConfiguration v1alpha1.WorkerNodeGroupConfiguration, failureDomains []v1alpha1.NutanixDatacenterFailureDomain) map[string]int { + replicasPerFailureDomain := make(map[string]int) + failureDomainCount := len(failureDomains) + + if workerNodeGroupConfiguration.AutoScalingConfiguration != nil { + return replicasPerFailureDomain + } + + if failureDomainCount == 0 { + return replicasPerFailureDomain + } + + workerNodeGroupCount := failureDomainCount + if workerNodeGroupConfiguration.Count != nil { + workerNodeGroupCount = int(*workerNodeGroupConfiguration.Count) + } + + minCount := int(workerNodeGroupCount / failureDomainCount) + + for i := 0; i < len(failureDomains); i++ { + replicasPerFailureDomain[failureDomains[i].Name] = minCount + } + replicasPerFailureDomain[failureDomains[0].Name] = workerNodeGroupCount - (failureDomainCount-1)*minCount + + return replicasPerFailureDomain +} + +func getFailureDomainsForWorkerNodeGroup(allFailureDomains []v1alpha1.NutanixDatacenterFailureDomain, workerNodeGroupConfigurationName string) []v1alpha1.NutanixDatacenterFailureDomain { + result := make([]v1alpha1.NutanixDatacenterFailureDomain, 0) + for _, fd := range allFailureDomains { + for _, workerMachineGroup := range fd.WorkerMachineGroups { + if workerMachineGroup == workerNodeGroupConfigurationName { + result = append(result, fd) + } + } + } + + return result +} + func buildTemplateMapMD(clusterSpec *cluster.Spec, workerNodeGroupMachineSpec v1alpha1.NutanixMachineConfigSpec, workerNodeGroupConfiguration v1alpha1.WorkerNodeGroupConfiguration) (map[string]interface{}, error) { versionsBundle := clusterSpec.WorkerNodeGroupVersionsBundle(workerNodeGroupConfiguration) format := "cloud-config" + failureDomainsForWorkerNodeGroup := getFailureDomainsForWorkerNodeGroup(clusterSpec.NutanixDatacenter.Spec.FailureDomains, workerNodeGroupConfiguration.Name) + replicasPerFailureDomain := calcFailureDomainReplicas(workerNodeGroupConfiguration, failureDomainsForWorkerNodeGroup) + values := map[string]interface{}{ "clusterName": clusterSpec.Cluster.Name, "eksaSystemNamespace": constants.EksaSystemNamespace, @@ -374,6 +417,8 @@ func buildTemplateMapMD(clusterSpec *cluster.Spec, workerNodeGroupMachineSpec v1 "subnetUUID": workerNodeGroupMachineSpec.Subnet.UUID, "workerNodeGroupName": fmt.Sprintf("%s-%s", clusterSpec.Cluster.Name, workerNodeGroupConfiguration.Name), "workerNodeGroupTaints": workerNodeGroupConfiguration.Taints, + "failureDomains": failureDomainsForWorkerNodeGroup, + "failureDomainsReplicas": replicasPerFailureDomain, } if clusterSpec.Cluster.Spec.RegistryMirrorConfiguration != nil { diff --git a/pkg/providers/nutanix/template_test.go b/pkg/providers/nutanix/template_test.go index 71045105a515..eafeaeb4d291 100644 --- a/pkg/providers/nutanix/template_test.go +++ b/pkg/providers/nutanix/template_test.go @@ -577,9 +577,10 @@ func TestNewNutanixTemplateBuilderProxy(t *testing.T) { assert.NoError(t, err) assert.NotNil(t, workerSpec) - expectedWorkersSpec, err := os.ReadFile("testdata/expected_results_proxy_md.yaml") + resultMdFileName := "testdata/expected_results_proxy_md.yaml" + expectedWorkersSpec, err := os.ReadFile(resultMdFileName) require.NoError(t, err) - assert.Equal(t, expectedWorkersSpec, workerSpec) + test.AssertContentToFile(t, string(expectedWorkersSpec), resultMdFileName) } func TestTemplateBuilder_CertSANs(t *testing.T) { @@ -726,6 +727,81 @@ func TestTemplateBuilderFailureDomains(t *testing.T) { } } +func TestTemplateBuilderWorkersFailureDomains(t *testing.T) { + for _, tc := range []struct { + Input string + OutputCP string + OutputMD string + workloadTemplateNames map[string]string + kubeadmconfigTemplateNames map[string]string + }{ + { + Input: "testdata/eksa-cluster-worker-fds.yaml", + OutputCP: "testdata/expected_results_worker_fds.yaml", + OutputMD: "testdata/expected_results_worker_fds_md.yaml", + workloadTemplateNames: map[string]string{ + "eksa-unit-test": "eksa-unit-test", + }, + kubeadmconfigTemplateNames: map[string]string{ + "eksa-unit-test": "eksa-unit-test", + }, + }, + { + Input: "testdata/eksa-cluster-multi-worker-fds.yaml", + OutputCP: "testdata/expected_results_multi_worker_fds.yaml", + OutputMD: "testdata/expected_results_multi_worker_fds_md.yaml", + workloadTemplateNames: map[string]string{ + "eksa-unit-test-1": "eksa-unit-test-1", + "eksa-unit-test-2": "eksa-unit-test-2", + "eksa-unit-test-3": "eksa-unit-test-3", + }, + kubeadmconfigTemplateNames: map[string]string{ + "eksa-unit-test-1": "eksa-unit-test", + "eksa-unit-test-2": "eksa-unit-test", + "eksa-unit-test-3": "eksa-unit-test", + }, + }, + } { + clusterSpec := test.NewFullClusterSpec(t, tc.Input) + machineConf := clusterSpec.NutanixMachineConfig(clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name) + workerConfSpecs := make(map[string]anywherev1.NutanixMachineConfigSpec) + for _, worker := range clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { + workerConf := clusterSpec.NutanixMachineConfig(worker.MachineGroupRef.Name) + workerConfSpecs[worker.MachineGroupRef.Name] = workerConf.Spec + } + dcConf := clusterSpec.NutanixDatacenter + + // workerMachineConfigs := clusterSpec.NutanixMachineConfigs + + t.Setenv(constants.EksaNutanixUsernameKey, "admin") + t.Setenv(constants.EksaNutanixPasswordKey, "password") + creds := GetCredsFromEnv() + builder := NewNutanixTemplateBuilder(&dcConf.Spec, &machineConf.Spec, &machineConf.Spec, workerConfSpecs, creds, time.Now) + assert.NotNil(t, builder) + + buildSpec := test.NewFullClusterSpec(t, tc.Input) + + cpSpec, err := builder.GenerateCAPISpecControlPlane(buildSpec) + assert.NoError(t, err) + assert.NotNil(t, cpSpec) + test.AssertContentToFile(t, string(cpSpec), tc.OutputCP) + + // workloadTemplateNames, kubeadmconfigTemplateNames := getTemplateNames(clusterSpec, builder, workerMachineConfigs) + // workloadTemplateNames := map[string]string{ + // "eksa-unit-test-1": "eksa-unit-test-1", + // "eksa-unit-test-2": "eksa-unit-test-2", + // "eksa-unit-test-3": "eksa-unit-test-3", + // } + // kubeadmconfigTemplateNames := map[string]string{ + // "eksa-unit-test": "eksa-unit-test", + // } + workerSpec, err := builder.GenerateCAPISpecWorkers(buildSpec, tc.workloadTemplateNames, tc.kubeadmconfigTemplateNames) + assert.NoError(t, err) + assert.NotNil(t, workerSpec) + test.AssertContentToFile(t, string(workerSpec), tc.OutputMD) + } +} + func TestTemplateBuilderGPUs(t *testing.T) { for _, tc := range []struct { Input string diff --git a/pkg/providers/nutanix/testdata/datacenterConfig_with_failure_domains_invalid_wg.yaml b/pkg/providers/nutanix/testdata/datacenterConfig_with_failure_domains_invalid_wg.yaml new file mode 100644 index 000000000000..89bb2147cf42 --- /dev/null +++ b/pkg/providers/nutanix/testdata/datacenterConfig_with_failure_domains_invalid_wg.yaml @@ -0,0 +1,31 @@ +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: NutanixDatacenterConfig +metadata: + name: eksa-unit-test + namespace: default +spec: + endpoint: "prism.nutanix.com" + port: 9440 + credentialRef: + kind: Secret + name: "nutanix-credentials" + insecure: true + failureDomains: + - name: "pe1" + cluster: + type: name + name: "prism-cluster" + subnets: + - type: uuid + uuid: "2d166190-7759-4dc6-b835-923262d6b497" + workerMachineGroups: + - eksa-unit-test-1 + - name: "pe2" + cluster: + type: uuid + uuid: "4d69ca7d-022f-49d1-a454-74535993bda4" + subnets: + - type: name + name: "prism-subnet" + workerMachineGroups: + - eksa-unit-test-1 diff --git a/pkg/providers/nutanix/testdata/eksa-cluster-multi-worker-fds.yaml b/pkg/providers/nutanix/testdata/eksa-cluster-multi-worker-fds.yaml new file mode 100644 index 000000000000..70034a827cb4 --- /dev/null +++ b/pkg/providers/nutanix/testdata/eksa-cluster-multi-worker-fds.yaml @@ -0,0 +1,177 @@ +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: Cluster +metadata: + name: eksa-unit-test + namespace: default +spec: + kubernetesVersion: "1.19" + controlPlaneConfiguration: + name: eksa-unit-test + count: 3 + endpoint: + host: test-ip + machineGroupRef: + name: eksa-unit-test + kind: NutanixMachineConfig + workerNodeGroupConfigurations: + - count: 4 + name: eksa-unit-test-1 + machineGroupRef: + name: eksa-unit-test-1 + kind: NutanixMachineConfig + - count: 3 + name: eksa-unit-test-2 + machineGroupRef: + name: eksa-unit-test-2 + kind: NutanixMachineConfig + - count: 2 + name: eksa-unit-test-3 + machineGroupRef: + name: eksa-unit-test-3 + kind: NutanixMachineConfig + datacenterRef: + kind: NutanixDatacenterConfig + name: eksa-unit-test + clusterNetwork: + cni: "cilium" + pods: + cidrBlocks: + - 192.168.0.0/16 + services: + cidrBlocks: + - 10.96.0.0/12 +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: NutanixDatacenterConfig +metadata: + name: eksa-unit-test + namespace: default +spec: + endpoint: "prism.nutanix.com" + port: 9440 + credentialRef: + kind: Secret + name: "nutanix-credentials" + failureDomains: + - name: "pe1" + workerMachineGroups: + - eksa-unit-test-1 + - eksa-unit-test-2 + - eksa-unit-test-3 + cluster: + type: name + name: "prism-cluster" + subnets: + - type: uuid + uuid: "2d166190-7759-4dc6-b835-923262d6b497" + - name: "pe2" + workerMachineGroups: + - eksa-unit-test-1 + - eksa-unit-test-2 + cluster: + type: uuid + uuid: "4d69ca7d-022f-49d1-a454-74535993bda4" + subnets: + - type: name + name: "prism-subnet" +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: NutanixMachineConfig +metadata: + name: eksa-unit-test + namespace: default +spec: + vcpusPerSocket: 1 + vcpuSockets: 4 + memorySize: 8Gi + image: + type: "name" + name: "prism-image" + cluster: + type: "name" + name: "prism-cluster" + subnet: + type: "name" + name: "prism-subnet" + systemDiskSize: 40Gi + osFamily: "ubuntu" + users: + - name: "mySshUsername" + sshAuthorizedKeys: + - "mySshAuthorizedKey" +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: NutanixMachineConfig +metadata: + name: eksa-unit-test-1 + namespace: default +spec: + vcpusPerSocket: 1 + vcpuSockets: 4 + memorySize: 8Gi + image: + type: "name" + name: "prism-image" + cluster: + type: "name" + name: "prism-cluster" + subnet: + type: "name" + name: "prism-subnet" + systemDiskSize: 40Gi + osFamily: "ubuntu" + users: + - name: "mySshUsername" + sshAuthorizedKeys: + - "mySshAuthorizedKey" +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: NutanixMachineConfig +metadata: + name: eksa-unit-test-2 + namespace: default +spec: + vcpusPerSocket: 1 + vcpuSockets: 2 + memorySize: 16Gi + image: + type: "name" + name: "prism-image" + cluster: + type: "name" + name: "prism-cluster" + subnet: + type: "name" + name: "prism-subnet" + systemDiskSize: 40Gi + osFamily: "ubuntu" + users: + - name: "mySshUsername" + sshAuthorizedKeys: + - "mySshAuthorizedKey" +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: NutanixMachineConfig +metadata: + name: eksa-unit-test-3 + namespace: default +spec: + vcpusPerSocket: 2 + vcpuSockets: 4 + memorySize: 4Gi + image: + type: "name" + name: "prism-image" + cluster: + type: "name" + name: "prism-cluster" + subnet: + type: "name" + name: "prism-subnet" + systemDiskSize: 40Gi + osFamily: "ubuntu" + users: + - name: "mySshUsername" + sshAuthorizedKeys: + - "mySshAuthorizedKey" +--- diff --git a/pkg/providers/nutanix/testdata/eksa-cluster-worker-fds.yaml b/pkg/providers/nutanix/testdata/eksa-cluster-worker-fds.yaml new file mode 100644 index 000000000000..2601d0382b4a --- /dev/null +++ b/pkg/providers/nutanix/testdata/eksa-cluster-worker-fds.yaml @@ -0,0 +1,89 @@ +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: Cluster +metadata: + name: eksa-unit-test + namespace: default +spec: + kubernetesVersion: "1.19" + controlPlaneConfiguration: + name: eksa-unit-test + count: 3 + endpoint: + host: test-ip + machineGroupRef: + name: eksa-unit-test + kind: NutanixMachineConfig + workerNodeGroupConfigurations: + - count: 4 + name: eksa-unit-test + machineGroupRef: + name: eksa-unit-test + kind: NutanixMachineConfig + datacenterRef: + kind: NutanixDatacenterConfig + name: eksa-unit-test + clusterNetwork: + cni: "cilium" + pods: + cidrBlocks: + - 192.168.0.0/16 + services: + cidrBlocks: + - 10.96.0.0/12 +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: NutanixDatacenterConfig +metadata: + name: eksa-unit-test + namespace: default +spec: + endpoint: "prism.nutanix.com" + port: 9440 + credentialRef: + kind: Secret + name: "nutanix-credentials" + failureDomains: + - name: "pe1" + workerMachineGroups: + - eksa-unit-test + cluster: + type: name + name: "prism-cluster" + subnets: + - type: uuid + uuid: "2d166190-7759-4dc6-b835-923262d6b497" + - name: "pe2" + workerMachineGroups: + - eksa-unit-test + cluster: + type: uuid + uuid: "4d69ca7d-022f-49d1-a454-74535993bda4" + subnets: + - type: name + name: "prism-subnet" +--- +apiVersion: anywhere.eks.amazonaws.com/v1alpha1 +kind: NutanixMachineConfig +metadata: + name: eksa-unit-test + namespace: default +spec: + vcpusPerSocket: 1 + vcpuSockets: 4 + memorySize: 8Gi + image: + type: "name" + name: "prism-image" + cluster: + type: "name" + name: "prism-cluster" + subnet: + type: "name" + name: "prism-subnet" + systemDiskSize: 40Gi + osFamily: "ubuntu" + users: + - name: "mySshUsername" + sshAuthorizedKeys: + - "mySshAuthorizedKey" +--- diff --git a/pkg/providers/nutanix/testdata/expected_results_multi_worker_fds.yaml b/pkg/providers/nutanix/testdata/expected_results_multi_worker_fds.yaml new file mode 100644 index 000000000000..68739c610f69 --- /dev/null +++ b/pkg/providers/nutanix/testdata/expected_results_multi_worker_fds.yaml @@ -0,0 +1,627 @@ +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixCluster +metadata: + name: "eksa-unit-test" + namespace: "eksa-system" +spec: + failureDomains: + - name: "pe1" + cluster: + type: "name" + name: "prism-cluster" + subnets: + - type: "uuid" + uuid: "2d166190-7759-4dc6-b835-923262d6b497" + controlPlane: true + - name: "pe2" + cluster: + type: "uuid" + uuid: "4d69ca7d-022f-49d1-a454-74535993bda4" + subnets: + - type: "name" + name: "prism-subnet" + controlPlane: true + prismCentral: + address: "prism.nutanix.com" + port: 9440 + insecure: false + credentialRef: + name: "capx-eksa-unit-test" + kind: Secret + controlPlaneEndpoint: + host: "test-ip" + port: 6443 +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + name: "eksa-unit-test" + namespace: "eksa-system" +spec: + clusterNetwork: + services: + cidrBlocks: [10.96.0.0/12] + pods: + cidrBlocks: [192.168.0.0/16] + serviceDomain: "cluster.local" + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + kind: KubeadmControlPlane + name: "eksa-unit-test" + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixCluster + name: "eksa-unit-test" +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +kind: KubeadmControlPlane +metadata: + name: "eksa-unit-test" + namespace: "eksa-system" +spec: + replicas: 3 + version: "v1.19.8-eks-1-19-4" + machineTemplate: + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + name: "" + kubeadmConfigSpec: + clusterConfiguration: + imageRepository: "public.ecr.aws/eks-distro/kubernetes" + apiServer: + certSANs: + - localhost + - 127.0.0.1 + - 0.0.0.0 + extraArgs: + cloud-provider: external + audit-policy-file: /etc/kubernetes/audit-policy.yaml + audit-log-path: /var/log/kubernetes/api-audit.log + audit-log-maxage: "30" + audit-log-maxbackup: "10" + audit-log-maxsize: "512" + extraVolumes: + - hostPath: /etc/kubernetes/audit-policy.yaml + mountPath: /etc/kubernetes/audit-policy.yaml + name: audit-policy + pathType: File + readOnly: true + - hostPath: /var/log/kubernetes + mountPath: /var/log/kubernetes + name: audit-log-dir + pathType: DirectoryOrCreate + readOnly: false + controllerManager: + extraArgs: + cloud-provider: external + enable-hostpath-provisioner: "true" + dns: + imageRepository: public.ecr.aws/eks-distro/coredns + imageTag: v1.8.0-eks-1-19-4 + etcd: + local: + imageRepository: public.ecr.aws/eks-distro/etcd-io + imageTag: v3.4.14-eks-1-19-4 + files: + - content: | + apiVersion: v1 + kind: Pod + metadata: + creationTimestamp: null + name: kube-vip + namespace: kube-system + spec: + containers: + - name: kube-vip + image: + imagePullPolicy: IfNotPresent + args: + - manager + env: + - name: vip_arp + value: "true" + - name: address + value: "test-ip" + - name: port + value: "6443" + - name: vip_cidr + value: "32" + - name: cp_enable + value: "true" + - name: cp_namespace + value: kube-system + - name: vip_ddns + value: "false" + - name: vip_leaderelection + value: "true" + - name: vip_leaseduration + value: "15" + - name: vip_renewdeadline + value: "10" + - name: vip_retryperiod + value: "2" + - name: svc_enable + value: "false" + - name: lb_enable + value: "false" + securityContext: + capabilities: + add: + - NET_ADMIN + - SYS_TIME + - NET_RAW + volumeMounts: + - mountPath: /etc/kubernetes/admin.conf + name: kubeconfig + resources: {} + hostNetwork: true + volumes: + - name: kubeconfig + hostPath: + type: FileOrCreate + path: /etc/kubernetes/admin.conf + status: {} + owner: root:root + path: /etc/kubernetes/manifests/kube-vip.yaml + - content: | + apiVersion: audit.k8s.io/v1beta1 + kind: Policy + rules: + # Log aws-auth configmap changes + - level: RequestResponse + namespaces: ["kube-system"] + verbs: ["update", "patch", "delete"] + resources: + - group: "" # core + resources: ["configmaps"] + resourceNames: ["aws-auth"] + omitStages: + - "RequestReceived" + # The following requests were manually identified as high-volume and low-risk, + # so drop them. + - level: None + users: ["system:kube-proxy"] + verbs: ["watch"] + resources: + - group: "" # core + resources: ["endpoints", "services", "services/status"] + - level: None + users: ["kubelet"] # legacy kubelet identity + verbs: ["get"] + resources: + - group: "" # core + resources: ["nodes", "nodes/status"] + - level: None + userGroups: ["system:nodes"] + verbs: ["get"] + resources: + - group: "" # core + resources: ["nodes", "nodes/status"] + - level: None + users: + - system:kube-controller-manager + - system:kube-scheduler + - system:serviceaccount:kube-system:endpoint-controller + verbs: ["get", "update"] + namespaces: ["kube-system"] + resources: + - group: "" # core + resources: ["endpoints"] + - level: None + users: ["system:apiserver"] + verbs: ["get"] + resources: + - group: "" # core + resources: ["namespaces", "namespaces/status", "namespaces/finalize"] + # Don't log HPA fetching metrics. + - level: None + users: + - system:kube-controller-manager + verbs: ["get", "list"] + resources: + - group: "metrics.k8s.io" + # Don't log these read-only URLs. + - level: None + nonResourceURLs: + - /healthz* + - /version + - /swagger* + # Don't log events requests. + - level: None + resources: + - group: "" # core + resources: ["events"] + # node and pod status calls from nodes are high-volume and can be large, don't log responses for expected updates from nodes + - level: Request + users: ["kubelet", "system:node-problem-detector", "system:serviceaccount:kube-system:node-problem-detector"] + verbs: ["update","patch"] + resources: + - group: "" # core + resources: ["nodes/status", "pods/status"] + omitStages: + - "RequestReceived" + - level: Request + userGroups: ["system:nodes"] + verbs: ["update","patch"] + resources: + - group: "" # core + resources: ["nodes/status", "pods/status"] + omitStages: + - "RequestReceived" + # deletecollection calls can be large, don't log responses for expected namespace deletions + - level: Request + users: ["system:serviceaccount:kube-system:namespace-controller"] + verbs: ["deletecollection"] + omitStages: + - "RequestReceived" + # Secrets, ConfigMaps, and TokenReviews can contain sensitive & binary data, + # so only log at the Metadata level. + - level: Metadata + resources: + - group: "" # core + resources: ["secrets", "configmaps"] + - group: authentication.k8s.io + resources: ["tokenreviews"] + omitStages: + - "RequestReceived" + - level: Request + resources: + - group: "" + resources: ["serviceaccounts/token"] + # Get repsonses can be large; skip them. + - level: Request + verbs: ["get", "list", "watch"] + resources: + - group: "" # core + - group: "admissionregistration.k8s.io" + - group: "apiextensions.k8s.io" + - group: "apiregistration.k8s.io" + - group: "apps" + - group: "authentication.k8s.io" + - group: "authorization.k8s.io" + - group: "autoscaling" + - group: "batch" + - group: "certificates.k8s.io" + - group: "extensions" + - group: "metrics.k8s.io" + - group: "networking.k8s.io" + - group: "policy" + - group: "rbac.authorization.k8s.io" + - group: "scheduling.k8s.io" + - group: "settings.k8s.io" + - group: "storage.k8s.io" + omitStages: + - "RequestReceived" + # Default level for known APIs + - level: RequestResponse + resources: + - group: "" # core + - group: "admissionregistration.k8s.io" + - group: "apiextensions.k8s.io" + - group: "apiregistration.k8s.io" + - group: "apps" + - group: "authentication.k8s.io" + - group: "authorization.k8s.io" + - group: "autoscaling" + - group: "batch" + - group: "certificates.k8s.io" + - group: "extensions" + - group: "metrics.k8s.io" + - group: "networking.k8s.io" + - group: "policy" + - group: "rbac.authorization.k8s.io" + - group: "scheduling.k8s.io" + - group: "settings.k8s.io" + - group: "storage.k8s.io" + omitStages: + - "RequestReceived" + # Default level for all other requests. + - level: Metadata + omitStages: + - "RequestReceived" + owner: root:root + path: /etc/kubernetes/audit-policy.yaml + initConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + # We have to pin the cgroupDriver to cgroupfs as kubeadm >=1.21 defaults to systemd + # kind will implement systemd support in: https://github.com/kubernetes-sigs/kind/issues/1726 + #cgroup-driver: cgroupfs + eviction-hard: nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0% + tls-cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 + joinConfiguration: + nodeRegistration: + criSocket: /var/run/containerd/containerd.sock + kubeletExtraArgs: + cloud-provider: external + read-only-port: "0" + anonymous-auth: "false" + tls-cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 + name: "{{ ds.meta_data.hostname }}" + users: + - name: "mySshUsername" + lockPassword: false + sudo: ALL=(ALL) NOPASSWD:ALL + sshAuthorizedKeys: + - "mySshAuthorizedKey" + preKubeadmCommands: + - hostnamectl set-hostname "{{ ds.meta_data.hostname }}" + - echo "::1 ipv6-localhost ipv6-loopback" >/etc/hosts + - echo "127.0.0.1 localhost" >>/etc/hosts + - echo "127.0.0.1 {{ ds.meta_data.hostname }}" >> /etc/hosts + postKubeadmCommands: + - echo export KUBECONFIG=/etc/kubernetes/admin.conf >> /root/.bashrc + useExperimentalRetryJoin: true +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixMachineTemplate +metadata: + name: "" + namespace: "eksa-system" +spec: + template: + spec: + providerID: "nutanix://eksa-unit-test-m1" + vcpusPerSocket: 1 + vcpuSockets: 4 + memorySize: 8Gi + systemDiskSize: 40Gi + image: + type: name + name: "prism-image" + + cluster: + type: name + name: "prism-cluster" + subnet: + - type: name + name: "prism-subnet" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: eksa-unit-test-nutanix-ccm + namespace: "eksa-system" +data: + nutanix-ccm.yaml: | + --- + apiVersion: v1 + kind: ServiceAccount + metadata: + name: cloud-controller-manager + namespace: kube-system + --- + kind: ConfigMap + apiVersion: v1 + metadata: + name: nutanix-config + namespace: kube-system + data: + nutanix_config.json: |- + { + "prismCentral": { + "address": "prism.nutanix.com", + "port": 9440, + "insecure": false, + "credentialRef": { + "kind": "secret", + "name": "nutanix-creds", + "namespace": "kube-system" + } + }, + "enableCustomLabeling": false, + "topologyDiscovery": { + "type": "Prism" + } + } + --- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + annotations: + rbac.authorization.kubernetes.io/autoupdate: "true" + name: system:cloud-controller-manager + rules: + - apiGroups: + - "" + resources: + - secrets + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update + - apiGroups: + - "" + resources: + - nodes + verbs: + - "*" + - apiGroups: + - "" + resources: + - nodes/status + verbs: + - patch + - apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - apiGroups: + - "" + resources: + - endpoints + verbs: + - create + - get + - list + - watch + - update + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + --- + kind: ClusterRoleBinding + apiVersion: rbac.authorization.k8s.io/v1 + metadata: + name: system:cloud-controller-manager + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:cloud-controller-manager + subjects: + - kind: ServiceAccount + name: cloud-controller-manager + namespace: kube-system + --- + apiVersion: apps/v1 + kind: Deployment + metadata: + labels: + k8s-app: nutanix-cloud-controller-manager + name: nutanix-cloud-controller-manager + namespace: kube-system + spec: + replicas: 1 + selector: + matchLabels: + k8s-app: nutanix-cloud-controller-manager + strategy: + type: Recreate + template: + metadata: + labels: + k8s-app: nutanix-cloud-controller-manager + spec: + hostNetwork: true + priorityClassName: system-cluster-critical + nodeSelector: + node-role.kubernetes.io/control-plane: "" + serviceAccountName: cloud-controller-manager + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + k8s-app: nutanix-cloud-controller-manager + topologyKey: kubernetes.io/hostname + dnsPolicy: Default + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/master + operator: Exists + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + operator: Exists + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 120 + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 120 + - effect: NoSchedule + key: node.cloudprovider.kubernetes.io/uninitialized + operator: Exists + - effect: NoSchedule + key: node.kubernetes.io/not-ready + operator: Exists + containers: + - image: "" + imagePullPolicy: IfNotPresent + name: nutanix-cloud-controller-manager + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + args: + - "--leader-elect=true" + - "--cloud-config=/etc/cloud/nutanix_config.json" + resources: + requests: + cpu: 100m + memory: 50Mi + volumeMounts: + - mountPath: /etc/cloud + name: nutanix-config-volume + readOnly: true + volumes: + - name: nutanix-config-volume + configMap: + name: nutanix-config +--- +apiVersion: addons.cluster.x-k8s.io/v1beta1 +kind: ClusterResourceSet +metadata: + name: eksa-unit-test-nutanix-ccm-crs + namespace: "eksa-system" +spec: + clusterSelector: + matchLabels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + resources: + - kind: ConfigMap + name: eksa-unit-test-nutanix-ccm + - kind: Secret + name: eksa-unit-test-nutanix-ccm-secret + strategy: Reconcile +--- +apiVersion: v1 +kind: Secret +metadata: + name: "eksa-unit-test-nutanix-ccm-secret" + namespace: "eksa-system" +stringData: + nutanix-ccm-secret.yaml: | + apiVersion: v1 + kind: Secret + metadata: + name: nutanix-creds + namespace: kube-system + stringData: + credentials: |- + [ + { + "type": "basic_auth", + "data": { + "prismCentral": { + "username": "admin", + "password": "password" + }, + "prismElements": null + } + } + ] +type: addons.cluster.x-k8s.io/resource-set diff --git a/pkg/providers/nutanix/testdata/expected_results_multi_worker_fds_md.yaml b/pkg/providers/nutanix/testdata/expected_results_multi_worker_fds_md.yaml new file mode 100644 index 000000000000..868e4d1a4ce1 --- /dev/null +++ b/pkg/providers/nutanix/testdata/expected_results_multi_worker_fds_md.yaml @@ -0,0 +1,356 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + name: "eksa-unit-test-eksa-unit-test-1-pe1" + namespace: "eksa-system" +spec: + clusterName: "eksa-unit-test" + replicas: 2 + selector: + matchLabels: {} + template: + metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + spec: + failureDomain: "pe1" + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + name: "eksa-unit-test" + clusterName: "eksa-unit-test" + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + name: "eksa-unit-test-1-pe1" + version: "v1.19.8-eks-1-19-4" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixMachineTemplate +metadata: + name: "eksa-unit-test-1-pe1" + namespace: "eksa-system" +spec: + template: + spec: + providerID: "nutanix://eksa-unit-test-m1" + vcpusPerSocket: 1 + vcpuSockets: 4 + memorySize: 8Gi + systemDiskSize: 40Gi + image: + type: name + name: "prism-image" + + cluster: + type: name + name: "prism-cluster" + subnet: + - type: uuid + uuid: "2d166190-7759-4dc6-b835-923262d6b497" +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + name: "eksa-unit-test-eksa-unit-test-1-pe2" + namespace: "eksa-system" +spec: + clusterName: "eksa-unit-test" + replicas: 2 + selector: + matchLabels: {} + template: + metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + spec: + failureDomain: "pe2" + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + name: "eksa-unit-test" + clusterName: "eksa-unit-test" + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + name: "eksa-unit-test-1-pe2" + version: "v1.19.8-eks-1-19-4" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixMachineTemplate +metadata: + name: "eksa-unit-test-1-pe2" + namespace: "eksa-system" +spec: + template: + spec: + providerID: "nutanix://eksa-unit-test-m1" + vcpusPerSocket: 1 + vcpuSockets: 4 + memorySize: 8Gi + systemDiskSize: 40Gi + image: + type: name + name: "prism-image" + + cluster: + type: uuid + uuid: "4d69ca7d-022f-49d1-a454-74535993bda4" + + subnet: + - type: name + name: "prism-subnet" +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: KubeadmConfigTemplate +metadata: + name: "eksa-unit-test" + namespace: "eksa-system" +spec: + template: + spec: + preKubeadmCommands: + - hostnamectl set-hostname "{{ ds.meta_data.hostname }}" + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + # We have to pin the cgroupDriver to cgroupfs as kubeadm >=1.21 defaults to systemd + # kind will implement systemd support in: https://github.com/kubernetes-sigs/kind/issues/1726 + #cgroup-driver: cgroupfs + eviction-hard: nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0% + tls-cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 + name: '{{ ds.meta_data.hostname }}' + users: + - name: "mySshUsername" + lockPassword: false + sudo: ALL=(ALL) NOPASSWD:ALL + sshAuthorizedKeys: + - "mySshAuthorizedKey" + +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + name: "eksa-unit-test-eksa-unit-test-2-pe1" + namespace: "eksa-system" +spec: + clusterName: "eksa-unit-test" + replicas: 2 + selector: + matchLabels: {} + template: + metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + spec: + failureDomain: "pe1" + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + name: "eksa-unit-test" + clusterName: "eksa-unit-test" + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + name: "eksa-unit-test-2-pe1" + version: "v1.19.8-eks-1-19-4" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixMachineTemplate +metadata: + name: "eksa-unit-test-2-pe1" + namespace: "eksa-system" +spec: + template: + spec: + providerID: "nutanix://eksa-unit-test-m1" + vcpusPerSocket: 1 + vcpuSockets: 2 + memorySize: 16Gi + systemDiskSize: 40Gi + image: + type: name + name: "prism-image" + + cluster: + type: name + name: "prism-cluster" + subnet: + - type: uuid + uuid: "2d166190-7759-4dc6-b835-923262d6b497" +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + name: "eksa-unit-test-eksa-unit-test-2-pe2" + namespace: "eksa-system" +spec: + clusterName: "eksa-unit-test" + replicas: 1 + selector: + matchLabels: {} + template: + metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + spec: + failureDomain: "pe2" + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + name: "eksa-unit-test" + clusterName: "eksa-unit-test" + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + name: "eksa-unit-test-2-pe2" + version: "v1.19.8-eks-1-19-4" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixMachineTemplate +metadata: + name: "eksa-unit-test-2-pe2" + namespace: "eksa-system" +spec: + template: + spec: + providerID: "nutanix://eksa-unit-test-m1" + vcpusPerSocket: 1 + vcpuSockets: 2 + memorySize: 16Gi + systemDiskSize: 40Gi + image: + type: name + name: "prism-image" + + cluster: + type: uuid + uuid: "4d69ca7d-022f-49d1-a454-74535993bda4" + + subnet: + - type: name + name: "prism-subnet" +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: KubeadmConfigTemplate +metadata: + name: "eksa-unit-test" + namespace: "eksa-system" +spec: + template: + spec: + preKubeadmCommands: + - hostnamectl set-hostname "{{ ds.meta_data.hostname }}" + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + # We have to pin the cgroupDriver to cgroupfs as kubeadm >=1.21 defaults to systemd + # kind will implement systemd support in: https://github.com/kubernetes-sigs/kind/issues/1726 + #cgroup-driver: cgroupfs + eviction-hard: nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0% + tls-cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 + name: '{{ ds.meta_data.hostname }}' + users: + - name: "mySshUsername" + lockPassword: false + sudo: ALL=(ALL) NOPASSWD:ALL + sshAuthorizedKeys: + - "mySshAuthorizedKey" + +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + name: "eksa-unit-test-eksa-unit-test-3-pe1" + namespace: "eksa-system" +spec: + clusterName: "eksa-unit-test" + replicas: 2 + selector: + matchLabels: {} + template: + metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + spec: + failureDomain: "pe1" + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + name: "eksa-unit-test" + clusterName: "eksa-unit-test" + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + name: "eksa-unit-test-3-pe1" + version: "v1.19.8-eks-1-19-4" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixMachineTemplate +metadata: + name: "eksa-unit-test-3-pe1" + namespace: "eksa-system" +spec: + template: + spec: + providerID: "nutanix://eksa-unit-test-m1" + vcpusPerSocket: 2 + vcpuSockets: 4 + memorySize: 4Gi + systemDiskSize: 40Gi + image: + type: name + name: "prism-image" + + cluster: + type: name + name: "prism-cluster" + subnet: + - type: uuid + uuid: "2d166190-7759-4dc6-b835-923262d6b497" +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: KubeadmConfigTemplate +metadata: + name: "eksa-unit-test" + namespace: "eksa-system" +spec: + template: + spec: + preKubeadmCommands: + - hostnamectl set-hostname "{{ ds.meta_data.hostname }}" + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + # We have to pin the cgroupDriver to cgroupfs as kubeadm >=1.21 defaults to systemd + # kind will implement systemd support in: https://github.com/kubernetes-sigs/kind/issues/1726 + #cgroup-driver: cgroupfs + eviction-hard: nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0% + tls-cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 + name: '{{ ds.meta_data.hostname }}' + users: + - name: "mySshUsername" + lockPassword: false + sudo: ALL=(ALL) NOPASSWD:ALL + sshAuthorizedKeys: + - "mySshAuthorizedKey" + +--- diff --git a/pkg/providers/nutanix/testdata/expected_results_worker_fds.yaml b/pkg/providers/nutanix/testdata/expected_results_worker_fds.yaml new file mode 100644 index 000000000000..68739c610f69 --- /dev/null +++ b/pkg/providers/nutanix/testdata/expected_results_worker_fds.yaml @@ -0,0 +1,627 @@ +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixCluster +metadata: + name: "eksa-unit-test" + namespace: "eksa-system" +spec: + failureDomains: + - name: "pe1" + cluster: + type: "name" + name: "prism-cluster" + subnets: + - type: "uuid" + uuid: "2d166190-7759-4dc6-b835-923262d6b497" + controlPlane: true + - name: "pe2" + cluster: + type: "uuid" + uuid: "4d69ca7d-022f-49d1-a454-74535993bda4" + subnets: + - type: "name" + name: "prism-subnet" + controlPlane: true + prismCentral: + address: "prism.nutanix.com" + port: 9440 + insecure: false + credentialRef: + name: "capx-eksa-unit-test" + kind: Secret + controlPlaneEndpoint: + host: "test-ip" + port: 6443 +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + name: "eksa-unit-test" + namespace: "eksa-system" +spec: + clusterNetwork: + services: + cidrBlocks: [10.96.0.0/12] + pods: + cidrBlocks: [192.168.0.0/16] + serviceDomain: "cluster.local" + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + kind: KubeadmControlPlane + name: "eksa-unit-test" + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixCluster + name: "eksa-unit-test" +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +kind: KubeadmControlPlane +metadata: + name: "eksa-unit-test" + namespace: "eksa-system" +spec: + replicas: 3 + version: "v1.19.8-eks-1-19-4" + machineTemplate: + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + name: "" + kubeadmConfigSpec: + clusterConfiguration: + imageRepository: "public.ecr.aws/eks-distro/kubernetes" + apiServer: + certSANs: + - localhost + - 127.0.0.1 + - 0.0.0.0 + extraArgs: + cloud-provider: external + audit-policy-file: /etc/kubernetes/audit-policy.yaml + audit-log-path: /var/log/kubernetes/api-audit.log + audit-log-maxage: "30" + audit-log-maxbackup: "10" + audit-log-maxsize: "512" + extraVolumes: + - hostPath: /etc/kubernetes/audit-policy.yaml + mountPath: /etc/kubernetes/audit-policy.yaml + name: audit-policy + pathType: File + readOnly: true + - hostPath: /var/log/kubernetes + mountPath: /var/log/kubernetes + name: audit-log-dir + pathType: DirectoryOrCreate + readOnly: false + controllerManager: + extraArgs: + cloud-provider: external + enable-hostpath-provisioner: "true" + dns: + imageRepository: public.ecr.aws/eks-distro/coredns + imageTag: v1.8.0-eks-1-19-4 + etcd: + local: + imageRepository: public.ecr.aws/eks-distro/etcd-io + imageTag: v3.4.14-eks-1-19-4 + files: + - content: | + apiVersion: v1 + kind: Pod + metadata: + creationTimestamp: null + name: kube-vip + namespace: kube-system + spec: + containers: + - name: kube-vip + image: + imagePullPolicy: IfNotPresent + args: + - manager + env: + - name: vip_arp + value: "true" + - name: address + value: "test-ip" + - name: port + value: "6443" + - name: vip_cidr + value: "32" + - name: cp_enable + value: "true" + - name: cp_namespace + value: kube-system + - name: vip_ddns + value: "false" + - name: vip_leaderelection + value: "true" + - name: vip_leaseduration + value: "15" + - name: vip_renewdeadline + value: "10" + - name: vip_retryperiod + value: "2" + - name: svc_enable + value: "false" + - name: lb_enable + value: "false" + securityContext: + capabilities: + add: + - NET_ADMIN + - SYS_TIME + - NET_RAW + volumeMounts: + - mountPath: /etc/kubernetes/admin.conf + name: kubeconfig + resources: {} + hostNetwork: true + volumes: + - name: kubeconfig + hostPath: + type: FileOrCreate + path: /etc/kubernetes/admin.conf + status: {} + owner: root:root + path: /etc/kubernetes/manifests/kube-vip.yaml + - content: | + apiVersion: audit.k8s.io/v1beta1 + kind: Policy + rules: + # Log aws-auth configmap changes + - level: RequestResponse + namespaces: ["kube-system"] + verbs: ["update", "patch", "delete"] + resources: + - group: "" # core + resources: ["configmaps"] + resourceNames: ["aws-auth"] + omitStages: + - "RequestReceived" + # The following requests were manually identified as high-volume and low-risk, + # so drop them. + - level: None + users: ["system:kube-proxy"] + verbs: ["watch"] + resources: + - group: "" # core + resources: ["endpoints", "services", "services/status"] + - level: None + users: ["kubelet"] # legacy kubelet identity + verbs: ["get"] + resources: + - group: "" # core + resources: ["nodes", "nodes/status"] + - level: None + userGroups: ["system:nodes"] + verbs: ["get"] + resources: + - group: "" # core + resources: ["nodes", "nodes/status"] + - level: None + users: + - system:kube-controller-manager + - system:kube-scheduler + - system:serviceaccount:kube-system:endpoint-controller + verbs: ["get", "update"] + namespaces: ["kube-system"] + resources: + - group: "" # core + resources: ["endpoints"] + - level: None + users: ["system:apiserver"] + verbs: ["get"] + resources: + - group: "" # core + resources: ["namespaces", "namespaces/status", "namespaces/finalize"] + # Don't log HPA fetching metrics. + - level: None + users: + - system:kube-controller-manager + verbs: ["get", "list"] + resources: + - group: "metrics.k8s.io" + # Don't log these read-only URLs. + - level: None + nonResourceURLs: + - /healthz* + - /version + - /swagger* + # Don't log events requests. + - level: None + resources: + - group: "" # core + resources: ["events"] + # node and pod status calls from nodes are high-volume and can be large, don't log responses for expected updates from nodes + - level: Request + users: ["kubelet", "system:node-problem-detector", "system:serviceaccount:kube-system:node-problem-detector"] + verbs: ["update","patch"] + resources: + - group: "" # core + resources: ["nodes/status", "pods/status"] + omitStages: + - "RequestReceived" + - level: Request + userGroups: ["system:nodes"] + verbs: ["update","patch"] + resources: + - group: "" # core + resources: ["nodes/status", "pods/status"] + omitStages: + - "RequestReceived" + # deletecollection calls can be large, don't log responses for expected namespace deletions + - level: Request + users: ["system:serviceaccount:kube-system:namespace-controller"] + verbs: ["deletecollection"] + omitStages: + - "RequestReceived" + # Secrets, ConfigMaps, and TokenReviews can contain sensitive & binary data, + # so only log at the Metadata level. + - level: Metadata + resources: + - group: "" # core + resources: ["secrets", "configmaps"] + - group: authentication.k8s.io + resources: ["tokenreviews"] + omitStages: + - "RequestReceived" + - level: Request + resources: + - group: "" + resources: ["serviceaccounts/token"] + # Get repsonses can be large; skip them. + - level: Request + verbs: ["get", "list", "watch"] + resources: + - group: "" # core + - group: "admissionregistration.k8s.io" + - group: "apiextensions.k8s.io" + - group: "apiregistration.k8s.io" + - group: "apps" + - group: "authentication.k8s.io" + - group: "authorization.k8s.io" + - group: "autoscaling" + - group: "batch" + - group: "certificates.k8s.io" + - group: "extensions" + - group: "metrics.k8s.io" + - group: "networking.k8s.io" + - group: "policy" + - group: "rbac.authorization.k8s.io" + - group: "scheduling.k8s.io" + - group: "settings.k8s.io" + - group: "storage.k8s.io" + omitStages: + - "RequestReceived" + # Default level for known APIs + - level: RequestResponse + resources: + - group: "" # core + - group: "admissionregistration.k8s.io" + - group: "apiextensions.k8s.io" + - group: "apiregistration.k8s.io" + - group: "apps" + - group: "authentication.k8s.io" + - group: "authorization.k8s.io" + - group: "autoscaling" + - group: "batch" + - group: "certificates.k8s.io" + - group: "extensions" + - group: "metrics.k8s.io" + - group: "networking.k8s.io" + - group: "policy" + - group: "rbac.authorization.k8s.io" + - group: "scheduling.k8s.io" + - group: "settings.k8s.io" + - group: "storage.k8s.io" + omitStages: + - "RequestReceived" + # Default level for all other requests. + - level: Metadata + omitStages: + - "RequestReceived" + owner: root:root + path: /etc/kubernetes/audit-policy.yaml + initConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + # We have to pin the cgroupDriver to cgroupfs as kubeadm >=1.21 defaults to systemd + # kind will implement systemd support in: https://github.com/kubernetes-sigs/kind/issues/1726 + #cgroup-driver: cgroupfs + eviction-hard: nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0% + tls-cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 + joinConfiguration: + nodeRegistration: + criSocket: /var/run/containerd/containerd.sock + kubeletExtraArgs: + cloud-provider: external + read-only-port: "0" + anonymous-auth: "false" + tls-cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 + name: "{{ ds.meta_data.hostname }}" + users: + - name: "mySshUsername" + lockPassword: false + sudo: ALL=(ALL) NOPASSWD:ALL + sshAuthorizedKeys: + - "mySshAuthorizedKey" + preKubeadmCommands: + - hostnamectl set-hostname "{{ ds.meta_data.hostname }}" + - echo "::1 ipv6-localhost ipv6-loopback" >/etc/hosts + - echo "127.0.0.1 localhost" >>/etc/hosts + - echo "127.0.0.1 {{ ds.meta_data.hostname }}" >> /etc/hosts + postKubeadmCommands: + - echo export KUBECONFIG=/etc/kubernetes/admin.conf >> /root/.bashrc + useExperimentalRetryJoin: true +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixMachineTemplate +metadata: + name: "" + namespace: "eksa-system" +spec: + template: + spec: + providerID: "nutanix://eksa-unit-test-m1" + vcpusPerSocket: 1 + vcpuSockets: 4 + memorySize: 8Gi + systemDiskSize: 40Gi + image: + type: name + name: "prism-image" + + cluster: + type: name + name: "prism-cluster" + subnet: + - type: name + name: "prism-subnet" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: eksa-unit-test-nutanix-ccm + namespace: "eksa-system" +data: + nutanix-ccm.yaml: | + --- + apiVersion: v1 + kind: ServiceAccount + metadata: + name: cloud-controller-manager + namespace: kube-system + --- + kind: ConfigMap + apiVersion: v1 + metadata: + name: nutanix-config + namespace: kube-system + data: + nutanix_config.json: |- + { + "prismCentral": { + "address": "prism.nutanix.com", + "port": 9440, + "insecure": false, + "credentialRef": { + "kind": "secret", + "name": "nutanix-creds", + "namespace": "kube-system" + } + }, + "enableCustomLabeling": false, + "topologyDiscovery": { + "type": "Prism" + } + } + --- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + annotations: + rbac.authorization.kubernetes.io/autoupdate: "true" + name: system:cloud-controller-manager + rules: + - apiGroups: + - "" + resources: + - secrets + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update + - apiGroups: + - "" + resources: + - nodes + verbs: + - "*" + - apiGroups: + - "" + resources: + - nodes/status + verbs: + - patch + - apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - apiGroups: + - "" + resources: + - endpoints + verbs: + - create + - get + - list + - watch + - update + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + --- + kind: ClusterRoleBinding + apiVersion: rbac.authorization.k8s.io/v1 + metadata: + name: system:cloud-controller-manager + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:cloud-controller-manager + subjects: + - kind: ServiceAccount + name: cloud-controller-manager + namespace: kube-system + --- + apiVersion: apps/v1 + kind: Deployment + metadata: + labels: + k8s-app: nutanix-cloud-controller-manager + name: nutanix-cloud-controller-manager + namespace: kube-system + spec: + replicas: 1 + selector: + matchLabels: + k8s-app: nutanix-cloud-controller-manager + strategy: + type: Recreate + template: + metadata: + labels: + k8s-app: nutanix-cloud-controller-manager + spec: + hostNetwork: true + priorityClassName: system-cluster-critical + nodeSelector: + node-role.kubernetes.io/control-plane: "" + serviceAccountName: cloud-controller-manager + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + k8s-app: nutanix-cloud-controller-manager + topologyKey: kubernetes.io/hostname + dnsPolicy: Default + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/master + operator: Exists + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + operator: Exists + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 120 + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 120 + - effect: NoSchedule + key: node.cloudprovider.kubernetes.io/uninitialized + operator: Exists + - effect: NoSchedule + key: node.kubernetes.io/not-ready + operator: Exists + containers: + - image: "" + imagePullPolicy: IfNotPresent + name: nutanix-cloud-controller-manager + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + args: + - "--leader-elect=true" + - "--cloud-config=/etc/cloud/nutanix_config.json" + resources: + requests: + cpu: 100m + memory: 50Mi + volumeMounts: + - mountPath: /etc/cloud + name: nutanix-config-volume + readOnly: true + volumes: + - name: nutanix-config-volume + configMap: + name: nutanix-config +--- +apiVersion: addons.cluster.x-k8s.io/v1beta1 +kind: ClusterResourceSet +metadata: + name: eksa-unit-test-nutanix-ccm-crs + namespace: "eksa-system" +spec: + clusterSelector: + matchLabels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + resources: + - kind: ConfigMap + name: eksa-unit-test-nutanix-ccm + - kind: Secret + name: eksa-unit-test-nutanix-ccm-secret + strategy: Reconcile +--- +apiVersion: v1 +kind: Secret +metadata: + name: "eksa-unit-test-nutanix-ccm-secret" + namespace: "eksa-system" +stringData: + nutanix-ccm-secret.yaml: | + apiVersion: v1 + kind: Secret + metadata: + name: nutanix-creds + namespace: kube-system + stringData: + credentials: |- + [ + { + "type": "basic_auth", + "data": { + "prismCentral": { + "username": "admin", + "password": "password" + }, + "prismElements": null + } + } + ] +type: addons.cluster.x-k8s.io/resource-set diff --git a/pkg/providers/nutanix/testdata/expected_results_worker_fds_md.yaml b/pkg/providers/nutanix/testdata/expected_results_worker_fds_md.yaml new file mode 100644 index 000000000000..d2caf0f9ec5d --- /dev/null +++ b/pkg/providers/nutanix/testdata/expected_results_worker_fds_md.yaml @@ -0,0 +1,137 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + name: "eksa-unit-test-eksa-unit-test-pe1" + namespace: "eksa-system" +spec: + clusterName: "eksa-unit-test" + replicas: 2 + selector: + matchLabels: {} + template: + metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + spec: + failureDomain: "pe1" + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + name: "eksa-unit-test" + clusterName: "eksa-unit-test" + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + name: "eksa-unit-test-pe1" + version: "v1.19.8-eks-1-19-4" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixMachineTemplate +metadata: + name: "eksa-unit-test-pe1" + namespace: "eksa-system" +spec: + template: + spec: + providerID: "nutanix://eksa-unit-test-m1" + vcpusPerSocket: 1 + vcpuSockets: 4 + memorySize: 8Gi + systemDiskSize: 40Gi + image: + type: name + name: "prism-image" + + cluster: + type: name + name: "prism-cluster" + subnet: + - type: uuid + uuid: "2d166190-7759-4dc6-b835-923262d6b497" +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + name: "eksa-unit-test-eksa-unit-test-pe2" + namespace: "eksa-system" +spec: + clusterName: "eksa-unit-test" + replicas: 2 + selector: + matchLabels: {} + template: + metadata: + labels: + cluster.x-k8s.io/cluster-name: "eksa-unit-test" + spec: + failureDomain: "pe2" + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + name: "eksa-unit-test" + clusterName: "eksa-unit-test" + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: NutanixMachineTemplate + name: "eksa-unit-test-pe2" + version: "v1.19.8-eks-1-19-4" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixMachineTemplate +metadata: + name: "eksa-unit-test-pe2" + namespace: "eksa-system" +spec: + template: + spec: + providerID: "nutanix://eksa-unit-test-m1" + vcpusPerSocket: 1 + vcpuSockets: 4 + memorySize: 8Gi + systemDiskSize: 40Gi + image: + type: name + name: "prism-image" + + cluster: + type: uuid + uuid: "4d69ca7d-022f-49d1-a454-74535993bda4" + + subnet: + - type: name + name: "prism-subnet" +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: KubeadmConfigTemplate +metadata: + name: "eksa-unit-test" + namespace: "eksa-system" +spec: + template: + spec: + preKubeadmCommands: + - hostnamectl set-hostname "{{ ds.meta_data.hostname }}" + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + # We have to pin the cgroupDriver to cgroupfs as kubeadm >=1.21 defaults to systemd + # kind will implement systemd support in: https://github.com/kubernetes-sigs/kind/issues/1726 + #cgroup-driver: cgroupfs + eviction-hard: nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0% + tls-cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 + name: '{{ ds.meta_data.hostname }}' + users: + - name: "mySshUsername" + lockPassword: false + sudo: ALL=(ALL) NOPASSWD:ALL + sshAuthorizedKeys: + - "mySshAuthorizedKey" + +--- diff --git a/pkg/providers/nutanix/validator.go b/pkg/providers/nutanix/validator.go index 3a14d6451da1..4723f3ee46f4 100644 --- a/pkg/providers/nutanix/validator.go +++ b/pkg/providers/nutanix/validator.go @@ -57,7 +57,7 @@ func (v *Validator) ValidateClusterSpec(ctx context.Context, spec *cluster.Spec, return err } - if err := v.ValidateDatacenterConfig(ctx, client, spec.NutanixDatacenter); err != nil { + if err := v.ValidateDatacenterConfig(ctx, client, spec); err != nil { return err } @@ -110,7 +110,8 @@ func (v *Validator) checkImageNameMatchesKubernetesVersion(ctx context.Context, } // ValidateDatacenterConfig validates the datacenter config. -func (v *Validator) ValidateDatacenterConfig(ctx context.Context, client Client, config *anywherev1.NutanixDatacenterConfig) error { +func (v *Validator) ValidateDatacenterConfig(ctx context.Context, client Client, spec *cluster.Spec) error { + config := spec.NutanixDatacenter if config.Spec.Insecure { logger.Info("Warning: Skipping TLS validation for insecure connection to Nutanix Prism Central; this is not recommended for production use") } @@ -131,23 +132,26 @@ func (v *Validator) ValidateDatacenterConfig(ctx context.Context, client Client, return err } - if err := v.validateFailureDomains(ctx, client, config); err != nil { + if err := v.validateFailureDomains(ctx, client, spec); err != nil { return err } return nil } -func (v *Validator) validateFailureDomains(ctx context.Context, client Client, config *anywherev1.NutanixDatacenterConfig) error { +func (v *Validator) validateFailureDomains(ctx context.Context, client Client, spec *cluster.Spec) error { + config := spec.NutanixDatacenter + regexName, err := regexp.Compile("^[a-z0-9]([-a-z0-9]*[a-z0-9])?$") if err != nil { return err } + failureDomainCount := len(config.Spec.FailureDomains) for _, fd := range config.Spec.FailureDomains { if res := regexName.MatchString(fd.Name); !res { errorStr := `failure domain name should contains only small letters, digits, and hyphens. - It should start with small letter or digit` + it should start with small letter or digit` return fmt.Errorf(errorStr) } @@ -160,6 +164,25 @@ func (v *Validator) validateFailureDomains(ctx context.Context, client Client, c return err } } + + workerMachineGroups := getWorkerMachineGroups(spec) + for _, workerMachineGroupName := range fd.WorkerMachineGroups { + if err := v.validateWorkerMachineGroup(workerMachineGroups, workerMachineGroupName, failureDomainCount); err != nil { + return err + } + } + } + + return nil +} + +func (v *Validator) validateWorkerMachineGroup(workerMachineGroups map[string]anywherev1.WorkerNodeGroupConfiguration, workerMachineGroupName string, fdCount int) error { + if _, ok := workerMachineGroups[workerMachineGroupName]; !ok { + return fmt.Errorf("worker machine group %s not found in the cluster worker node group definitions", workerMachineGroupName) + } + + if workerMachineGroups[workerMachineGroupName].Count != nil && *workerMachineGroups[workerMachineGroupName].Count > fdCount { + return fmt.Errorf("count %d of machines in workerNodeGroupConfiguration %s shouldn't be greater than the failure domain count %d where those machines should be spreaded accross", *workerMachineGroups[workerMachineGroupName].Count, workerMachineGroupName, fdCount) } return nil @@ -692,11 +715,11 @@ func (v *Validator) validateFreeGPU(ctx context.Context, v3Client Client, cluste func (v *Validator) validateUpgradeRolloutStrategy(clusterSpec *cluster.Spec) error { if clusterSpec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy != nil { - return fmt.Errorf("Upgrade rollout strategy customization is not supported for nutanix provider") + return fmt.Errorf("upgrade rollout strategy customization is not supported for nutanix provider") } for _, workerNodeGroupConfiguration := range clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { if workerNodeGroupConfiguration.UpgradeRolloutStrategy != nil { - return fmt.Errorf("Upgrade rollout strategy customization is not supported for nutanix provider") + return fmt.Errorf("upgrade rollout strategy customization is not supported for nutanix provider") } } return nil @@ -736,6 +759,17 @@ func findSubnetUUIDByName(ctx context.Context, v3Client Client, clusterUUID, sub return res.Entities[0].Metadata.UUID, nil } +// getWorkerMachineGroups retrieves the worker machine group names from the cluster spec. +func getWorkerMachineGroups(spec *cluster.Spec) map[string]anywherev1.WorkerNodeGroupConfiguration { + result := make(map[string]anywherev1.WorkerNodeGroupConfiguration) + + for _, workerNodeGroupConf := range spec.Cluster.Spec.WorkerNodeGroupConfigurations { + result[workerNodeGroupConf.MachineGroupRef.Name] = workerNodeGroupConf + } + + return result +} + // getClusterUUID retrieves the cluster uuid by the given cluster identifier. func getClusterUUID(ctx context.Context, v3Client Client, cluster anywherev1.NutanixResourceIdentifier) (string, error) { var clusterUUID string diff --git a/pkg/providers/nutanix/validator_test.go b/pkg/providers/nutanix/validator_test.go index 7ef81a156aa3..be2606fc9561 100644 --- a/pkg/providers/nutanix/validator_test.go +++ b/pkg/providers/nutanix/validator_test.go @@ -61,6 +61,9 @@ var nutanixDatacenterConfigSpecWithFailureDomainInvalidCluster string //go:embed testdata/datacenterConfig_with_failure_domains_invalid_subnet.yaml var nutanixDatacenterConfigSpecWithFailureDomainInvalidSubnet string +//go:embed testdata/datacenterConfig_with_failure_domains_invalid_wg.yaml +var nutanixDatacenterConfigSpecWithFailureDomainInvalidWorkerMachineGroups string + func fakeClusterList() *v3.ClusterListIntentResponse { return &v3.ClusterListIntentResponse{ Entities: []*v3.ClusterIntentResponse{ @@ -1460,6 +1463,11 @@ func TestNutanixValidatorValidateDatacenterConfig(t *testing.T) { dcConfFile: nutanixDatacenterConfigSpecWithFailureDomainInvalidSubnet, expectErr: true, }, + { + name: "failure domains with invalid workerMachineGroups", + dcConfFile: nutanixDatacenterConfigSpecWithFailureDomainInvalidWorkerMachineGroups, + expectErr: true, + }, } ctrl := gomock.NewController(t) @@ -1494,10 +1502,13 @@ func TestNutanixValidatorValidateDatacenterConfig(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { dcConf := &anywherev1.NutanixDatacenterConfig{} + clusterSpec := test.NewFullClusterSpec(t, "testdata/eksa-cluster.yaml") + clusterSpec.NutanixDatacenter = dcConf + err := yaml.Unmarshal([]byte(tc.dcConfFile), dcConf) require.NoError(t, err) - err = validator.ValidateDatacenterConfig(context.Background(), clientCache.clients["test"], dcConf) + err = validator.ValidateDatacenterConfig(context.Background(), clientCache.clients["test"], clusterSpec) if tc.expectErr { assert.Error(t, err, tc.name) } else { @@ -1538,10 +1549,13 @@ func TestNutanixValidatorValidateDatacenterConfigWithInvalidCreds(t *testing.T) for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { dcConf := &anywherev1.NutanixDatacenterConfig{} + clusterSpec := test.NewFullClusterSpec(t, "testdata/eksa-cluster.yaml") + clusterSpec.NutanixDatacenter = dcConf + err := yaml.Unmarshal([]byte(tc.dcConfFile), dcConf) require.NoError(t, err) - err = validator.ValidateDatacenterConfig(context.Background(), clientCache.clients["test"], dcConf) + err = validator.ValidateDatacenterConfig(context.Background(), clientCache.clients["test"], clusterSpec) if tc.expectErr { assert.Error(t, err, tc.name) } else { @@ -1745,3 +1759,44 @@ func TestValidateClusterMachineConfigsSuccess(t *testing.T) { t.Fatalf("validation should pass: %v", err) } } + +func TestValidateMachineConfigFailureDomainsWrongCount(t *testing.T) { + ctx := context.Background() + clusterConfigFile := "testdata/eksa-cluster-multi-worker-fds.yaml" + clusterSpec := test.NewFullClusterSpec(t, clusterConfigFile) + + ctrl := gomock.NewController(t) + mockClient := mocknutanix.NewMockClient(ctrl) + mockClient.EXPECT().GetCurrentLoggedInUser(gomock.Any()).Return(&v3.UserIntentResponse{}, nil).AnyTimes() + mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).DoAndReturn( + func(_ context.Context, filters *v3.DSMetadata) (*v3.ClusterListIntentResponse, error) { + return fakeClusterListForDCTest(filters.Filter) + }, + ).AnyTimes() + mockClient.EXPECT().ListSubnet(gomock.Any(), gomock.Any()).DoAndReturn( + func(_ context.Context, filters *v3.DSMetadata) (*v3.SubnetListIntentResponse, error) { + return fakeSubnetListForDCTest(filters.Filter) + }, + ).AnyTimes() + mockClient.EXPECT().GetSubnet(gomock.Any(), gomock.Eq("2d166190-7759-4dc6-b835-923262d6b497")).Return(nil, nil).AnyTimes() + mockClient.EXPECT().GetCluster(gomock.Any(), gomock.Eq("4d69ca7d-022f-49d1-a454-74535993bda4")).Return(nil, nil).AnyTimes() + + mockTLSValidator := mockCrypto.NewMockTlsValidator(ctrl) + mockTLSValidator.EXPECT().ValidateCert(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes() + + mockTransport := mocknutanix.NewMockRoundTripper(ctrl) + mockTransport.EXPECT().RoundTrip(gomock.Any()).Return(&http.Response{}, nil).AnyTimes() + + mockHTTPClient := &http.Client{Transport: mockTransport} + clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}} + validator := NewValidator(clientCache, mockTLSValidator, mockHTTPClient) + + for i := 0; i < len(clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations); i++ { + clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations[i].Count = utils.IntPtr(20) + } + + err := validator.validateFailureDomains(ctx, clientCache.clients["test"], clusterSpec) + if err == nil { + t.Fatalf("validation should not pass: %v", err) + } +}