From 4236a56b15deeb2f4365fa6ad7355755edd27967 Mon Sep 17 00:00:00 2001 From: Guillermo Gaston Date: Fri, 15 Sep 2023 14:24:57 -0500 Subject: [PATCH 1/3] Validate Memory Availability for machine configs for Vsphere provider as a pre-flight check for create and upgrade operations. Signed-off-by: Rahul Ganesh --- pkg/executables/govc.go | 74 ++++++++++++ pkg/executables/govc_test.go | 79 +++++++++++++ pkg/providers/vsphere/mocks/client.go | 20 ++++ pkg/providers/vsphere/vsphere.go | 156 +++++++++++++++++++++++++- pkg/providers/vsphere/vsphere_test.go | 153 ++++++++++++++++++++++--- 5 files changed, 467 insertions(+), 15 deletions(-) diff --git a/pkg/executables/govc.go b/pkg/executables/govc.go index c60aff0d9380..96035be71a38 100644 --- a/pkg/executables/govc.go +++ b/pkg/executables/govc.go @@ -11,6 +11,7 @@ import ( "net/http" "os" "path/filepath" + "regexp" "strconv" "strings" "time" @@ -39,6 +40,7 @@ const ( DeployOptsFile = "deploy-opts.json" disk1 = "Hard disk 1" disk2 = "Hard disk 2" + MemoryAvailable = "Memory_Available" ) var requiredEnvs = []string{govcUsernameKey, govcPasswordKey, govcURLKey, govcInsecure, govcDatacenterKey} @@ -1143,3 +1145,75 @@ func (g *Govc) SetGroupRoleOnObject(ctx context.Context, principal string, role return nil } + +type resourcePoolInfo struct { + ResourcePoolIdentifier *resourcePool +} + +type resourcePool struct { + memoryUsage string + memoryLimit string +} + +// GetResourcePoolInfo returns the pool info for the provided resource pool. +func (g *Govc) GetResourcePoolInfo(ctx context.Context, datacenter, resourcepool string, args ...string) (map[string]int, error) { + params := []string{"pool.info", "-dc", datacenter, resourcepool} + params = append(params, args...) + response, err := g.exec(ctx, params...) + if err != nil { + return nil, fmt.Errorf("getting resource pool information: %v", err) + } + + scanner := bufio.NewScanner(strings.NewReader(response.String())) + var resourcePoolInfoResponse resourcePoolInfo + resourcePoolInfoResponse.ResourcePoolIdentifier = new(resourcePool) + for scanner.Scan() { + metaData := scanner.Text() + if strings.Contains(metaData, "Mem Usage") { + resourcePoolInfoResponse.ResourcePoolIdentifier.memoryUsage = strings.Split(metaData, ":")[1] + } + if strings.Contains(metaData, "Mem Limit") { + resourcePoolInfoResponse.ResourcePoolIdentifier.memoryLimit = strings.Split(metaData, ":")[1] + } + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("failure reading memory allocation for resource pool") + } + + poolInfo, err := getPoolInfo(resourcePoolInfoResponse.ResourcePoolIdentifier) + if err != nil { + return nil, err + } + return poolInfo, nil +} + +// helper function that parses the resource pool response and returns memory requirements. +func getPoolInfo(rp *resourcePool) (map[string]int, error) { + memoryUsed, err := getValueFromString(rp.memoryUsage) + if err != nil { + return nil, fmt.Errorf("unable to obtain memory usage for resource pool %s: %v", rp.memoryUsage, err) + } + memoryLimit, err := getValueFromString(rp.memoryLimit) + if err != nil { + return nil, fmt.Errorf("unable to obtain memory limit for resource pool %s: %v", rp.memoryLimit, err) + } + poolInfo := make(map[string]int) + if memoryLimit != -1 { + poolInfo[MemoryAvailable] = memoryLimit - memoryUsed + } else { + poolInfo[MemoryAvailable] = memoryLimit + } + return poolInfo, nil +} + +func getValueFromString(str string) (int, error) { + splitResponse := strings.Split(strings.TrimSpace(str), " ") + nonNumericRegex := regexp.MustCompile(`[^0-9- ]+`) + cleanedString := nonNumericRegex.ReplaceAllString(splitResponse[0], "") + numValue, err := strconv.Atoi(cleanedString) + if err != nil { + return 0, err + } + return numValue, nil +} diff --git a/pkg/executables/govc_test.go b/pkg/executables/govc_test.go index 424fdde9834c..f3c03109167e 100644 --- a/pkg/executables/govc_test.go +++ b/pkg/executables/govc_test.go @@ -1643,3 +1643,82 @@ func TestGovcGetHardDiskSizeError(t *testing.T) { }) } } + +func TestGovcGetResourcePoolInfo(t *testing.T) { + datacenter := "SDDC-Datacenter" + resourcePool := "*/Resources/Test-ResourcePool" + govcErr := errors.New("error PoolInfo()") + ctx := context.Background() + _, g, executable, env := setup(t) + + tests := []struct { + testName string + response string + govcErr error + wantErr error + wantMemInfo map[string]int + }{ + { + testName: "pool_info_memory_limit_set", + response: `Name: Test-ResourcePool + Path: /SDDC-Datacenter/host/Cluster-1/Resources/Test-ResourcePool + Mem Usage: 100MB (11.3%) + Mem Shares: normal + Mem Reservation: 0MB (expandable=true) + Mem Limit: 1000MB`, + govcErr: nil, + wantErr: nil, + wantMemInfo: map[string]int{executables.MemoryAvailable: 900}, + }, + { + testName: "pool_info_memory_limit_unset", + response: `Name: Test-ResourcePool + Path: /SDDC-Datacenter/host/Cluster-1/Resources/Test-ResourcePool + Mem Usage: 100MB (11.3%) + Mem Shares: normal + Mem Reservation: 0MB (expandable=true) + Mem Limit: -1MB`, + govcErr: nil, + wantErr: nil, + wantMemInfo: map[string]int{executables.MemoryAvailable: -1}, + }, + { + testName: "pool_info_memory_usage_corrupt", + response: `Name: Test-ResourcePool + Mem Usage:corrupt-val + Mem Limit:-1MB`, + govcErr: nil, + wantErr: fmt.Errorf("unable to obtain memory usage for resource pool corrupt-val: strconv.Atoi: parsing \"-\": invalid syntax"), + wantMemInfo: nil, + }, + { + testName: "pool_info_memory_limit_corrupt", + response: `Name: Test-ResourcePool + Mem Usage:100 + Mem Limit:corrupt-val`, + govcErr: nil, + wantErr: fmt.Errorf("unable to obtain memory limit for resource pool corrupt-val: strconv.Atoi: parsing \"-\": invalid syntax"), + wantMemInfo: nil, + }, + { + testName: "pool_info_error", + response: "", + govcErr: govcErr, + wantErr: fmt.Errorf("getting resource pool information: %v", govcErr), + wantMemInfo: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.testName, func(t *testing.T) { + gt := NewWithT(t) + responseBytes := bytes.NewBuffer([]byte(tt.response)) + executable.EXPECT().ExecuteWithEnv(ctx, env, "pool.info", "-dc", datacenter, resourcePool).Return(*responseBytes, tt.govcErr) + poolMemInfo, err := g.GetResourcePoolInfo(ctx, datacenter, resourcePool) + if tt.wantErr != nil { + gt.Expect(err.Error()).To(Equal(tt.wantErr.Error())) + } + gt.Expect(poolMemInfo).To(Equal(tt.wantMemInfo)) + }) + } +} diff --git a/pkg/providers/vsphere/mocks/client.go b/pkg/providers/vsphere/mocks/client.go index 886647f29002..ac5ff8d60e7b 100644 --- a/pkg/providers/vsphere/mocks/client.go +++ b/pkg/providers/vsphere/mocks/client.go @@ -258,6 +258,26 @@ func (mr *MockProviderGovcClientMockRecorder) GetLibraryElementContentVersion(ar return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetLibraryElementContentVersion", reflect.TypeOf((*MockProviderGovcClient)(nil).GetLibraryElementContentVersion), arg0, arg1) } +// GetResourcePoolInfo mocks base method. +func (m *MockProviderGovcClient) GetResourcePoolInfo(arg0 context.Context, arg1, arg2 string, arg3 ...string) (map[string]int, error) { + m.ctrl.T.Helper() + varargs := []interface{}{arg0, arg1, arg2} + for _, a := range arg3 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "GetResourcePoolInfo", varargs...) + ret0, _ := ret[0].(map[string]int) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetResourcePoolInfo indicates an expected call of GetResourcePoolInfo. +func (mr *MockProviderGovcClientMockRecorder) GetResourcePoolInfo(arg0, arg1, arg2 interface{}, arg3 ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{arg0, arg1, arg2}, arg3...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetResourcePoolInfo", reflect.TypeOf((*MockProviderGovcClient)(nil).GetResourcePoolInfo), varargs...) +} + // GetTags mocks base method. func (m *MockProviderGovcClient) GetTags(arg0 context.Context, arg1 string) ([]string, error) { m.ctrl.T.Helper() diff --git a/pkg/providers/vsphere/vsphere.go b/pkg/providers/vsphere/vsphere.go index 84b9fa342bfe..b1ebb2e8043b 100644 --- a/pkg/providers/vsphere/vsphere.go +++ b/pkg/providers/vsphere/vsphere.go @@ -52,6 +52,7 @@ const ( backOffPeriod = 5 * time.Second disk1 = "Hard disk 1" disk2 = "Hard disk 2" + MemoryAvailable = "Memory_Available" ethtoolDaemonSetName = "vsphere-disable-udp-offload" ) @@ -122,6 +123,7 @@ type ProviderGovcClient interface { CreateRole(ctx context.Context, name string, privileges []string) error SetGroupRoleOnObject(ctx context.Context, principal string, role string, object string, domain string) error GetHardDiskSize(ctx context.Context, vm, datacenter string) (map[string]float64, error) + GetResourcePoolInfo(ctx context.Context, datacenter, resourcepool string, args ...string) (map[string]int, error) } type ProviderKubectlClient interface { @@ -338,7 +340,9 @@ func (p *vsphereProvider) SetupAndValidateCreateCluster(ctx context.Context, clu if err := p.validateDatastoreUsageForCreate(ctx, vSphereClusterSpec); err != nil { return fmt.Errorf("validating vsphere machine configs datastore usage: %v", err) } - + if err := p.validateMemoryUsageForCreate(ctx, vSphereClusterSpec); err != nil { + return fmt.Errorf("validating vsphere machine configs resource pool memory usage: %v", err) + } if err := p.generateSSHKeysIfNotSet(clusterSpec.VSphereMachineConfigs); err != nil { return fmt.Errorf("failed setup and validations: %v", err) } @@ -419,6 +423,10 @@ func (p *vsphereProvider) SetupAndValidateUpgradeCluster(ctx context.Context, cl return fmt.Errorf("validating vsphere machine configs datastore usage: %v", err) } + if err := p.validateMemoryUsageForUpgrade(ctx, vSphereClusterSpec, cluster); err != nil { + return fmt.Errorf("validating vsphere machine configs resource pool memory usage: %v", err) + } + if !p.skippedValidations[validations.VSphereUserPriv] { if err := p.validator.validateVsphereUserPrivs(ctx, vSphereClusterSpec); err != nil { return fmt.Errorf("validating vsphere user privileges: %v", err) @@ -592,6 +600,152 @@ func (p *vsphereProvider) validateDatastoreUsageForCreate(ctx context.Context, v return nil } +type memoryUsage struct { + availableMemoryMiB int + needMemoryMiB int +} + +func (p *vsphereProvider) getPrevMachineConfigMemoryUsage(ctx context.Context, mc *v1alpha1.VSphereMachineConfig, cluster *types.Cluster, count int) (memoryMiB int, err error) { + em, err := p.providerKubectlClient.GetEksaVSphereMachineConfig(ctx, mc.Name, cluster.KubeconfigFile, mc.GetNamespace()) + if err != nil { + return 0, err + } + if em != nil && em.Spec.ResourcePool == mc.Spec.ResourcePool { + return em.Spec.MemoryMiB * em.Spec.NumCPUs * count, nil + } + return 0, nil +} + +func (p *vsphereProvider) getMachineConfigMemoryRequirements(ctx context.Context, dc string, mc *v1alpha1.VSphereMachineConfig, count int) (available int, need int, err error) { + poolInfo, err := p.providerGovcClient.GetResourcePoolInfo(ctx, dc, mc.Spec.ResourcePool) + if err != nil { + return 0, 0, err + } + needMemoryMiB := mc.Spec.MemoryMiB * mc.Spec.NumCPUs * count + return poolInfo[MemoryAvailable], needMemoryMiB, nil +} + +func (p *vsphereProvider) calculateResourcePoolMemoryUsage(ctx context.Context, dc string, mc *v1alpha1.VSphereMachineConfig, cluster *types.Cluster, mu map[string]*memoryUsage, prevCount, newCount int) error { + availableMemoryMiB, needMemoryMiB, err := p.getMachineConfigMemoryRequirements(ctx, dc, mc, newCount) + if err != nil { + return err + } + + // the last old machine is deleted only after the desired number of new machines are rolled out, so reduce 1 count to accommodate for the last old machine which is not deleted until the end of rollout + prevUsage, err := p.getPrevMachineConfigMemoryUsage(ctx, mc, cluster, prevCount-1) + if err != nil { + return err + } + // update the available memory with previous usage only when memory limit is set + if availableMemoryMiB != -1 { + availableMemoryMiB += prevUsage + } + updateMemoryUsageMap(mc, needMemoryMiB, availableMemoryMiB, prevUsage, mu) + return nil +} + +func updateMemoryUsageMap(mc *v1alpha1.VSphereMachineConfig, needMiB, availableMiB, prevUsage int, mu map[string]*memoryUsage) { + if _, ok := mu[mc.Spec.ResourcePool]; ok && mu[mc.Spec.ResourcePool].availableMemoryMiB != -1 { + mu[mc.Spec.ResourcePool].needMemoryMiB += needMiB + mu[mc.Spec.ResourcePool].availableMemoryMiB += prevUsage + } else { + mu[mc.Spec.ResourcePool] = &memoryUsage{ + availableMemoryMiB: availableMiB, + needMemoryMiB: needMiB, + } + } +} + +func (p *vsphereProvider) validateMemoryUsageForCreate(ctx context.Context, clusterSpec *Spec) error { + memoryUsage := make(map[string]*memoryUsage) + datacenter := clusterSpec.VSphereDatacenter.Spec.Datacenter + cpMachineConfig := clusterSpec.controlPlaneMachineConfig() + controlPlaneAvailableMiB, controlPlaneNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, cpMachineConfig, clusterSpec.Cluster.Spec.ControlPlaneConfiguration.Count) + if err != nil { + return fmt.Errorf("calculating memory usage for control plane: %v", err) + } + updateMemoryUsageMap(cpMachineConfig, controlPlaneNeedMiB, controlPlaneAvailableMiB, 0, memoryUsage) + for _, workerNodeGroupConfiguration := range clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { + workerMachineConfig := clusterSpec.workerMachineConfig(workerNodeGroupConfiguration) + workerAvailableMiB, workerNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, workerMachineConfig, *workerNodeGroupConfiguration.Count) + if err != nil { + return fmt.Errorf("calculating memory usage for worker node groups: %v", err) + } + updateMemoryUsageMap(workerMachineConfig, workerNeedMiB, workerAvailableMiB, 0, memoryUsage) + } + etcdMachineConfig := clusterSpec.etcdMachineConfig() + if etcdMachineConfig != nil { + etcdAvailableMiB, etcdNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, etcdMachineConfig, clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.Count) + if err != nil { + return err + } + updateMemoryUsageMap(etcdMachineConfig, etcdNeedMiB, etcdAvailableMiB, 0, memoryUsage) + } + for resourcePool, usage := range memoryUsage { + if usage.availableMemoryMiB != -1 { + if usage.needMemoryMiB > usage.availableMemoryMiB { + return fmt.Errorf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool) + } + } + } + logger.V(5).Info("Memory availability for machine configs in requested resource pool validated") + return nil +} + +func (p *vsphereProvider) validateMemoryUsageForUpgrade(ctx context.Context, currentClusterSpec *Spec, cluster *types.Cluster) error { + memoryUsage := make(map[string]*memoryUsage) + prevEksaCluster, err := p.providerKubectlClient.GetEksaCluster(ctx, cluster, currentClusterSpec.Cluster.GetName()) + if err != nil { + return err + } + + datacenter := currentClusterSpec.VSphereDatacenter.Spec.Datacenter + cpMachineConfig := currentClusterSpec.controlPlaneMachineConfig() + if err := p.calculateResourcePoolMemoryUsage(ctx, datacenter, cpMachineConfig, cluster, memoryUsage, prevEksaCluster.Spec.ControlPlaneConfiguration.Count, currentClusterSpec.Cluster.Spec.ControlPlaneConfiguration.Count); err != nil { + return fmt.Errorf("calculating memory usage for control plane: %v", err) + } + + prevMachineConfigRefs := machineRefSliceToMap(prevEksaCluster.MachineConfigRefs()) + if err := p.getWorkerNodeGroupMemoryUsage(ctx, datacenter, currentClusterSpec, cluster, memoryUsage, prevMachineConfigRefs); err != nil { + return fmt.Errorf("calculating memory usage for worker node groups: %v", err) + } + + etcdMachineConfig := currentClusterSpec.etcdMachineConfig() + if etcdMachineConfig != nil { + etcdAvailableMiB, etcdNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, etcdMachineConfig, currentClusterSpec.Cluster.Spec.ExternalEtcdConfiguration.Count) + if err != nil { + return err + } + // older etcd machines are not deleted until the end of rollout so do not account for the previous usage + updateMemoryUsageMap(etcdMachineConfig, etcdNeedMiB, etcdAvailableMiB, 0, memoryUsage) + } + + for resourcePool, usage := range memoryUsage { + if usage.availableMemoryMiB != -1 && usage.needMemoryMiB > usage.availableMemoryMiB { + return fmt.Errorf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool) + } + } + logger.V(5).Info("Memory availability for machine configs in requested resource pool validated") + return nil +} + +func (p *vsphereProvider) getWorkerNodeGroupMemoryUsage(ctx context.Context, datacenter string, currentClusterSpec *Spec, cluster *types.Cluster, memoryUsage map[string]*memoryUsage, prevMachineConfigRefs map[string]v1alpha1.Ref) error { + for _, workerNodeGroupConfiguration := range currentClusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { + prevCount := 0 + workerMachineConfig := currentClusterSpec.workerMachineConfig(workerNodeGroupConfiguration) + if _, ok := prevMachineConfigRefs[workerNodeGroupConfiguration.MachineGroupRef.Name]; ok { + prevCount = *workerNodeGroupConfiguration.Count + } else { + // set count to 1 when no previous machine config was found for the current worker node group in the spec to avoid a negative count in calculation + prevCount = 1 + } + if err := p.calculateResourcePoolMemoryUsage(ctx, datacenter, workerMachineConfig, cluster, memoryUsage, prevCount, *workerNodeGroupConfiguration.Count); err != nil { + return fmt.Errorf("calculating memory usage: %v", err) + } + } + return nil +} + func (p *vsphereProvider) UpdateSecrets(ctx context.Context, cluster *types.Cluster, _ *cluster.Spec) error { var contents bytes.Buffer err := p.createSecret(ctx, cluster, &contents) diff --git a/pkg/providers/vsphere/vsphere_test.go b/pkg/providers/vsphere/vsphere_test.go index 2d0488e2b5e0..97d090f3c2e4 100644 --- a/pkg/providers/vsphere/vsphere_test.go +++ b/pkg/providers/vsphere/vsphere_test.go @@ -155,6 +155,10 @@ func (pc *DummyProviderGovcClient) GetHardDiskSize(ctx context.Context, vm, data return map[string]float64{"Hard disk 1": 23068672}, nil } +func (pc *DummyProviderGovcClient) GetResourcePoolInfo(ctx context.Context, datacenter, resourcePool string, args ...string) (map[string]int, error) { + return map[string]int{"Memory_Available": -1}, nil +} + func (pc *DummyProviderGovcClient) GetTags(ctx context.Context, path string) (tags []string, err error) { return []string{eksd119ReleaseTag, eksd121ReleaseTag, pc.osTag}, nil } @@ -1469,8 +1473,8 @@ func TestSetupAndValidateUpgradeClusterMissingPrivError(t *testing.T) { provider.providerKubectlClient = kubectl setupContext(t) - kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(1) - kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(3) + kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(2) + kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(5) vscb := mocks.NewMockVSphereClientBuilder(mockCtrl) vscb.EXPECT().Build(ctx, gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), clusterSpec.VSphereDatacenter.Spec.Datacenter).Return(nil, fmt.Errorf("error")) @@ -1735,8 +1739,8 @@ func TestSetupAndValidateUpgradeCluster(t *testing.T) { provider.providerKubectlClient = kubectl setupContext(t) - kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(2) - kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(3) + kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(3) + kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(5) err := provider.SetupAndValidateUpgradeCluster(ctx, cluster, clusterSpec, clusterSpec) if err != nil { t.Fatalf("unexpected failure %v", err) @@ -1799,8 +1803,8 @@ func TestSetupAndValidateUpgradeClusterCPSshNotExists(t *testing.T) { provider.providerKubectlClient = kubectl cluster := &types.Cluster{} - kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(2) - kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(3) + kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(3) + kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(5) err := provider.SetupAndValidateUpgradeCluster(ctx, cluster, clusterSpec, clusterSpec) if err != nil { t.Fatalf("unexpected failure %v", err) @@ -1820,8 +1824,8 @@ func TestSetupAndValidateUpgradeClusterWorkerSshNotExists(t *testing.T) { provider.providerKubectlClient = kubectl cluster := &types.Cluster{} - kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(2) - kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(3) + kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(3) + kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(5) err := provider.SetupAndValidateUpgradeCluster(ctx, cluster, clusterSpec, clusterSpec) if err != nil { @@ -1842,8 +1846,8 @@ func TestSetupAndValidateUpgradeClusterEtcdSshNotExists(t *testing.T) { provider.providerKubectlClient = kubectl cluster := &types.Cluster{} - kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(2) - kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(3) + kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(3) + kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(5) err := provider.SetupAndValidateUpgradeCluster(ctx, cluster, clusterSpec, clusterSpec) if err != nil { @@ -1862,13 +1866,15 @@ func TestSetupAndValidateUpgradeClusterSameMachineConfigforCPandEtcd(t *testing. mockCtrl := gomock.NewController(t) kubectl := mocks.NewMockProviderKubectlClient(mockCtrl) provider.providerKubectlClient = kubectl - cluster := &types.Cluster{} - kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(2) + kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(3) for _, mc := range clusterSpec.VSphereMachineConfigs { - kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Return(mc, nil) + kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Return(mc, nil).AnyTimes() } - + // resourcePoolResponse := map[string]int{ + // "Memory_Available": -1, + // } + // provider.govc.EXPECT().GetResourcePoolInfo(tt.ctx, tt.clusterSpec.VSphereDatacenter.Spec.Datacenter, tt.clusterSpec.VSphereMachineConfigs[controlPlaneMachineConfigName].Spec.ResourcePool).Return(resourcePoolResponse, nil) err := provider.SetupAndValidateUpgradeCluster(ctx, cluster, clusterSpec, clusterSpec) if err != nil { t.Fatalf("unexpected failure %v", err) @@ -2338,6 +2344,10 @@ func TestSetupAndValidateCreateClusterFullCloneDiskGiBLessThan20TemplateDiskSize tt.govc.EXPECT().GetWorkloadAvailableSpace(tt.ctx, tt.clusterSpec.VSphereMachineConfigs[controlPlaneMachineConfigName].Spec.Datastore).Return(100.0, nil) tt.ipValidator.EXPECT().ValidateControlPlaneIPUniqueness(tt.cluster) + resourcePoolResponse := map[string]int{ + "Memory_Available": -1, + } + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, tt.clusterSpec.VSphereDatacenter.Spec.Datacenter, tt.clusterSpec.VSphereMachineConfigs[controlPlaneMachineConfigName].Spec.ResourcePool).Return(resourcePoolResponse, nil) err := tt.provider.SetupAndValidateCreateCluster(context.Background(), tt.clusterSpec) assert.NoError(t, err, "No error expected for provider.SetupAndValidateCreateCluster()") @@ -3472,6 +3482,121 @@ func TestValidateMachineConfigsDatastoreUsageUpgradeError(t *testing.T) { thenErrorExpected(t, fmt.Sprintf("not enough space in datastore %s for given diskGiB and count for respective machine groups", tt.clusterSpec.VSphereMachineConfigs[tt.clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name].Spec.Datastore), err) } +func TestValidateMachineConfigsMemoryUsageCreateSuccess(t *testing.T) { + tt := newProviderTest(t) + machineConfigs := tt.clusterSpec.VSphereMachineConfigs + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + machineConfigs[tt.clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name].Spec.ResourcePool = "test-resourcepool" + for _, config := range machineConfigs { + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{MemoryAvailable: -1}, nil) + } + vSpec := NewSpec(tt.clusterSpec) + err := tt.provider.validateMemoryUsageForCreate(tt.ctx, vSpec) + if err != nil { + t.Fatalf("unexpected failure %v", err) + } +} + +func TestValidateMachineConfigsMemoryUsageCreateError(t *testing.T) { + tt := newProviderTest(t) + machineConfigs := tt.clusterSpec.VSphereMachineConfigs + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + for _, config := range machineConfigs { + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{MemoryAvailable: 100000}, nil) + } + vSpec := NewSpec(tt.clusterSpec) + err := tt.provider.validateMemoryUsageForCreate(tt.ctx, vSpec) + resourcePool := machineConfigs[tt.clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name].Spec.ResourcePool + thenErrorExpected(t, fmt.Sprintf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool), err) +} + +func TestSetupAndValidateCreateClusterMemoryUsageError(t *testing.T) { + tt := newProviderTest(t) + tt.setExpectationForSetup() + tt.setExpectationForVCenterValidation() + tt.setExpectationsForDefaultDiskAndCloneModeGovcCalls() + tt.setExpectationsForMachineConfigsVCenterValidation() + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + cpMachineConfig := tt.machineConfigs[tt.clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name] + for _, mc := range tt.machineConfigs { + tt.govc.EXPECT().SearchTemplate(tt.ctx, tt.datacenterConfig.Spec.Datacenter, mc.Spec.Template).Return(mc.Spec.Template, nil).AnyTimes() + } + tt.govc.EXPECT().GetTags(tt.ctx, cpMachineConfig.Spec.Template).Return([]string{eksd119ReleaseTag, ubuntuOSTag}, nil) + tt.govc.EXPECT().ListTags(tt.ctx) + tt.govc.EXPECT().GetWorkloadAvailableSpace(tt.ctx, cpMachineConfig.Spec.Datastore).Return(1000.0, nil).AnyTimes() + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, cpMachineConfig.Spec.ResourcePool).Return(nil, fmt.Errorf("error")) + err := tt.provider.SetupAndValidateCreateCluster(tt.ctx, tt.clusterSpec) + thenErrorExpected(t, "validating vsphere machine configs resource pool memory usage: calculating memory usage for control plane: error", err) +} + +func TestValidateMachineConfigsMemoryUsageUpgradeSuccess(t *testing.T) { + tt := newProviderTest(t) + cluster := &types.Cluster{ + Name: "test", + } + tt.kubectl.EXPECT().GetEksaCluster(tt.ctx, cluster, tt.clusterSpec.Cluster.GetName()).Return(tt.clusterSpec.Cluster.DeepCopy(), nil) + vSpec := NewSpec(tt.clusterSpec) + vSpec.Cluster.Spec.ControlPlaneConfiguration.Count += 2 + // change the worker node group to test there is no negative count scenario + wnMachineConfig := getMachineConfig(vSpec.Spec, "test-wn") + newMachineConfigName := "new-test-wn" + newWorkerMachineConfig := wnMachineConfig.DeepCopy() + newWorkerMachineConfig.Name = newMachineConfigName + vSpec.VSphereMachineConfigs[newMachineConfigName] = newWorkerMachineConfig + vSpec.Cluster.Spec.WorkerNodeGroupConfigurations[0].MachineGroupRef.Name = newMachineConfigName + machineConfigs := tt.clusterSpec.VSphereMachineConfigs + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + for _, config := range machineConfigs { + tt.kubectl.EXPECT().GetEksaVSphereMachineConfig(tt.ctx, config.Name, cluster.KubeconfigFile, config.Namespace).Return(config, nil).AnyTimes() + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{MemoryAvailable: -1}, nil).AnyTimes() + } + err := tt.provider.validateMemoryUsageForUpgrade(tt.ctx, vSpec, cluster) + if err != nil { + t.Fatalf("unexpected failure %v", err) + } +} + +func TestValidateMachineConfigsMemoryUsageUpgradeError(t *testing.T) { + tt := newProviderTest(t) + cluster := &types.Cluster{ + Name: "test", + } + tt.kubectl.EXPECT().GetEksaCluster(tt.ctx, cluster, tt.clusterSpec.Cluster.GetName()).Return(tt.clusterSpec.Cluster.DeepCopy(), nil) + machineConfigs := tt.clusterSpec.VSphereMachineConfigs + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + for _, config := range machineConfigs { + tt.kubectl.EXPECT().GetEksaVSphereMachineConfig(tt.ctx, config.Name, cluster.KubeconfigFile, config.Namespace).AnyTimes() + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{MemoryAvailable: 10000}, nil) + } + vSpec := NewSpec(tt.clusterSpec) + vSpec.Cluster.Spec.ControlPlaneConfiguration.Count += 2 + err := tt.provider.validateMemoryUsageForUpgrade(tt.ctx, vSpec, cluster) + resourcePool := machineConfigs[tt.clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name].Spec.ResourcePool + thenErrorExpected(t, fmt.Sprintf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool), err) +} + +func TestSetupAndValidateUpgradeClusterMemoryUsageError(t *testing.T) { + tt := newProviderTest(t) + cluster := &types.Cluster{ + Name: "test", + } + tt.setExpectationForSetup() + tt.setExpectationForVCenterValidation() + tt.setExpectationsForDefaultDiskAndCloneModeGovcCalls() + tt.setExpectationsForMachineConfigsVCenterValidation() + tt.kubectl.EXPECT().GetEksaCluster(tt.ctx, cluster, tt.clusterSpec.Cluster.GetName()).Return(tt.clusterSpec.Cluster.DeepCopy(), nil).Times(2) + tt.kubectl.EXPECT().GetEksaVSphereMachineConfig(tt.ctx, gomock.Any(), cluster.KubeconfigFile, tt.clusterSpec.Cluster.GetNamespace()).AnyTimes() + cpMachineConfig := tt.machineConfigs[tt.clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name] + tt.govc.EXPECT().SearchTemplate(tt.ctx, tt.datacenterConfig.Spec.Datacenter, cpMachineConfig.Spec.Template).Return(cpMachineConfig.Spec.Template, nil).AnyTimes() + tt.govc.EXPECT().GetTags(tt.ctx, cpMachineConfig.Spec.Template).Return([]string{eksd119ReleaseTag, ubuntuOSTag}, nil) + tt.govc.EXPECT().ListTags(tt.ctx) + tt.govc.EXPECT().GetWorkloadAvailableSpace(tt.ctx, cpMachineConfig.Spec.Datastore).Return(1000.0, nil).AnyTimes() + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, cpMachineConfig.Spec.ResourcePool).Return(nil, fmt.Errorf("error")) + err := tt.provider.SetupAndValidateUpgradeCluster(tt.ctx, cluster, tt.clusterSpec, tt.clusterSpec) + thenErrorExpected(t, "validating vsphere machine configs resource pool memory usage: calculating memory usage for control plane: error", err) +} + func TestValidateMachineConfigsNameUniquenessSuccess(t *testing.T) { tt := newProviderTest(t) cluster := &types.Cluster{ From 4ba484312e7474d19c2cbb519d7cdab55d0ecaf8 Mon Sep 17 00:00:00 2001 From: Rahul Ganesh Date: Sun, 17 Sep 2023 22:19:27 -0700 Subject: [PATCH 2/3] remove commented lines and add functional comments Signed-off-by: Rahul Ganesh --- pkg/executables/govc.go | 3 ++- pkg/providers/vsphere/vsphere_test.go | 4 ---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pkg/executables/govc.go b/pkg/executables/govc.go index 96035be71a38..6f3c653ae224 100644 --- a/pkg/executables/govc.go +++ b/pkg/executables/govc.go @@ -1188,7 +1188,7 @@ func (g *Govc) GetResourcePoolInfo(ctx context.Context, datacenter, resourcepool return poolInfo, nil } -// helper function that parses the resource pool response and returns memory requirements. +// getPoolInfo parses resource pool response and returns memory requirements. func getPoolInfo(rp *resourcePool) (map[string]int, error) { memoryUsed, err := getValueFromString(rp.memoryUsage) if err != nil { @@ -1207,6 +1207,7 @@ func getPoolInfo(rp *resourcePool) (map[string]int, error) { return poolInfo, nil } +// getValueFromString cleans the input string and returns the extracted numerical value. func getValueFromString(str string) (int, error) { splitResponse := strings.Split(strings.TrimSpace(str), " ") nonNumericRegex := regexp.MustCompile(`[^0-9- ]+`) diff --git a/pkg/providers/vsphere/vsphere_test.go b/pkg/providers/vsphere/vsphere_test.go index 97d090f3c2e4..73c6cacd618b 100644 --- a/pkg/providers/vsphere/vsphere_test.go +++ b/pkg/providers/vsphere/vsphere_test.go @@ -1871,10 +1871,6 @@ func TestSetupAndValidateUpgradeClusterSameMachineConfigforCPandEtcd(t *testing. for _, mc := range clusterSpec.VSphereMachineConfigs { kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Return(mc, nil).AnyTimes() } - // resourcePoolResponse := map[string]int{ - // "Memory_Available": -1, - // } - // provider.govc.EXPECT().GetResourcePoolInfo(tt.ctx, tt.clusterSpec.VSphereDatacenter.Spec.Datacenter, tt.clusterSpec.VSphereMachineConfigs[controlPlaneMachineConfigName].Spec.ResourcePool).Return(resourcePoolResponse, nil) err := provider.SetupAndValidateUpgradeCluster(ctx, cluster, clusterSpec, clusterSpec) if err != nil { t.Fatalf("unexpected failure %v", err) From a554fe6a58823f1bb51b5fc4f5fa11dd4b0b33d9 Mon Sep 17 00:00:00 2001 From: Rahul Ganesh Date: Tue, 26 Sep 2023 16:02:15 -0700 Subject: [PATCH 3/3] Facilitate modular upgrades for tinkerbell provider Signed-off-by: Rahul Ganesh --- pkg/api/v1alpha1/cluster_webhook.go | 7 --- pkg/api/v1alpha1/cluster_webhook_test.go | 52 +++++++++---------- pkg/api/v1alpha1/tinkerbellmachineconfig.go | 7 +++ .../v1alpha1/tinkerbellmachineconfig_types.go | 1 + pkg/providers/tinkerbell/assert.go | 5 ++ pkg/providers/tinkerbell/cluster.go | 1 + pkg/providers/tinkerbell/template.go | 24 +++++++-- pkg/providers/tinkerbell/validate.go | 23 +++++++- 8 files changed, 83 insertions(+), 37 deletions(-) diff --git a/pkg/api/v1alpha1/cluster_webhook.go b/pkg/api/v1alpha1/cluster_webhook.go index 703fbcd0ef44..080693223434 100644 --- a/pkg/api/v1alpha1/cluster_webhook.go +++ b/pkg/api/v1alpha1/cluster_webhook.go @@ -473,8 +473,6 @@ func validateKubeVersionSkew(newVersion, oldVersion KubernetesVersion, path *fie // ValidateWorkerKubernetesVersionSkew validates worker node group Kubernetes version skew between upgrades. func ValidateWorkerKubernetesVersionSkew(new, old *Cluster) field.ErrorList { var allErrs field.ErrorList - path := field.NewPath("spec").Child("WorkerNodeConfiguration.kubernetesVersion") - newClusterVersion := new.Spec.KubernetesVersion oldClusterVersion := old.Spec.KubernetesVersion @@ -485,11 +483,6 @@ func ValidateWorkerKubernetesVersionSkew(new, old *Cluster) field.ErrorList { for _, nodeGroupNewSpec := range new.Spec.WorkerNodeGroupConfigurations { newVersion := nodeGroupNewSpec.KubernetesVersion - if newVersion != nil && nodeGroupNewSpec.MachineGroupRef.Kind == TinkerbellMachineConfigKind { - allErrs = append(allErrs, field.Forbidden(path, "worker node group level kubernetesVersion is not supported for Tinkerbell")) - return allErrs - } - if workerNodeGrpOldSpec, ok := workerNodeGroupMap[nodeGroupNewSpec.Name]; ok { oldVersion := workerNodeGrpOldSpec.KubernetesVersion allErrs = append(allErrs, performWorkerKubernetesValidations(oldVersion, newVersion, oldClusterVersion, newClusterVersion)...) diff --git a/pkg/api/v1alpha1/cluster_webhook_test.go b/pkg/api/v1alpha1/cluster_webhook_test.go index feefe38afae1..73efd21cefd8 100644 --- a/pkg/api/v1alpha1/cluster_webhook_test.go +++ b/pkg/api/v1alpha1/cluster_webhook_test.go @@ -2317,29 +2317,29 @@ func TestValidateWorkerVersionSkewAddNodeGroup(t *testing.T) { g.Expect(err).To(Succeed()) } -func TestValidateWorkerVersionBlockTinkerbell(t *testing.T) { - kube119 := v1alpha1.KubernetesVersion("1.19") - - newCluster := baseCluster() - newCluster.Spec.KubernetesVersion = kube119 - newCluster.Spec.WorkerNodeGroupConfigurations[0].KubernetesVersion = &kube119 - newCluster.Spec.WorkerNodeGroupConfigurations[0].MachineGroupRef.Kind = v1alpha1.TinkerbellMachineConfigKind - newWorker := v1alpha1.WorkerNodeGroupConfiguration{ - Name: "md-1", - Count: ptr.Int(1), - MachineGroupRef: &v1alpha1.Ref{ - Kind: v1alpha1.TinkerbellMachineConfigKind, - Name: "eksa-unit-test", - }, - KubernetesVersion: &kube119, - } - newCluster.Spec.WorkerNodeGroupConfigurations = append(newCluster.Spec.WorkerNodeGroupConfigurations, newWorker) - - oldCluster := baseCluster() - oldCluster.Spec.KubernetesVersion = kube119 - oldCluster.Spec.WorkerNodeGroupConfigurations[0].KubernetesVersion = &kube119 - - err := newCluster.ValidateUpdate(oldCluster) - g := NewWithT(t) - g.Expect(err).ToNot(BeNil()) -} +// func TestValidateWorkerVersionBlockTinkerbell(t *testing.T) { +// kube119 := v1alpha1.KubernetesVersion("1.19") + +// newCluster := baseCluster() +// newCluster.Spec.KubernetesVersion = kube119 +// newCluster.Spec.WorkerNodeGroupConfigurations[0].KubernetesVersion = &kube119 +// newCluster.Spec.WorkerNodeGroupConfigurations[0].MachineGroupRef.Kind = v1alpha1.TinkerbellMachineConfigKind +// newWorker := v1alpha1.WorkerNodeGroupConfiguration{ +// Name: "md-1", +// Count: ptr.Int(1), +// MachineGroupRef: &v1alpha1.Ref{ +// Kind: v1alpha1.TinkerbellMachineConfigKind, +// Name: "eksa-unit-test", +// }, +// KubernetesVersion: &kube119, +// } +// newCluster.Spec.WorkerNodeGroupConfigurations = append(newCluster.Spec.WorkerNodeGroupConfigurations, newWorker) + +// oldCluster := baseCluster() +// oldCluster.Spec.KubernetesVersion = kube119 +// oldCluster.Spec.WorkerNodeGroupConfigurations[0].KubernetesVersion = &kube119 + +// err := newCluster.ValidateUpdate(oldCluster) +// g := NewWithT(t) +// g.Expect(err).ToNot(BeNil()) +// } diff --git a/pkg/api/v1alpha1/tinkerbellmachineconfig.go b/pkg/api/v1alpha1/tinkerbellmachineconfig.go index 20cd5a56e4e3..1750e721d7bd 100644 --- a/pkg/api/v1alpha1/tinkerbellmachineconfig.go +++ b/pkg/api/v1alpha1/tinkerbellmachineconfig.go @@ -2,6 +2,7 @@ package v1alpha1 import ( "fmt" + "net/url" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -91,6 +92,12 @@ func validateTinkerbellMachineConfig(config *TinkerbellMachineConfig) error { ) } + if config.Spec.OSImageURL != "" { + if _, err := url.ParseRequestURI(config.Spec.OSImageURL); err != nil { + return fmt.Errorf("parsing osImageOverride: %v", err) + } + } + if len(config.Spec.Users) == 0 { return fmt.Errorf("TinkerbellMachineConfig: missing spec.Users: %s", config.Name) } diff --git a/pkg/api/v1alpha1/tinkerbellmachineconfig_types.go b/pkg/api/v1alpha1/tinkerbellmachineconfig_types.go index 059452823297..e81649705938 100644 --- a/pkg/api/v1alpha1/tinkerbellmachineconfig_types.go +++ b/pkg/api/v1alpha1/tinkerbellmachineconfig_types.go @@ -13,6 +13,7 @@ type TinkerbellMachineConfigSpec struct { HardwareSelector HardwareSelector `json:"hardwareSelector"` TemplateRef Ref `json:"templateRef,omitempty"` OSFamily OSFamily `json:"osFamily"` + OSImageURL string `json:"osImageURL"` Users []UserConfiguration `json:"users,omitempty"` HostOSConfiguration *HostOSConfiguration `json:"hostOSConfiguration,omitempty"` } diff --git a/pkg/providers/tinkerbell/assert.go b/pkg/providers/tinkerbell/assert.go index 99018e6c72b9..f5d4aef3f256 100644 --- a/pkg/providers/tinkerbell/assert.go +++ b/pkg/providers/tinkerbell/assert.go @@ -90,6 +90,11 @@ func AssertOsFamilyValid(spec *ClusterSpec) error { return validateOsFamily(spec) } +// AssertOSImageURLDontOverlap ensures that the OSImageURL value is either set at the datacenter config level or set for each machine config and not at both levels. +func AssertOSImageURLDontOverlap(spec *ClusterSpec) error { + return validateOSImageURLDontOverlap(spec) +} + // AssertcontrolPlaneIPNotInUse ensures the endpoint host for the control plane isn't in use. // The check may be unreliable due to its implementation. func NewIPNotInUseAssertion(client networkutils.NetClient) ClusterSpecAssertion { diff --git a/pkg/providers/tinkerbell/cluster.go b/pkg/providers/tinkerbell/cluster.go index fcc61f5bc36f..f14caf28f320 100644 --- a/pkg/providers/tinkerbell/cluster.go +++ b/pkg/providers/tinkerbell/cluster.go @@ -108,6 +108,7 @@ func NewClusterSpecValidator(assertions ...ClusterSpecAssertion) *ClusterSpecVal AssertMachineConfigsValid, AssertMachineConfigNamespaceMatchesDatacenterConfig, AssertOsFamilyValid, + AssertOSImageURLDontOverlap, AssertTinkerbellIPAndControlPlaneIPNotSame, AssertHookRetrievableWithoutProxy, ) diff --git a/pkg/providers/tinkerbell/template.go b/pkg/providers/tinkerbell/template.go index e3ca2a529cbf..a345f021ef70 100644 --- a/pkg/providers/tinkerbell/template.go +++ b/pkg/providers/tinkerbell/template.go @@ -69,9 +69,16 @@ func (tb *TemplateBuilder) GenerateCAPISpecControlPlane(clusterSpec *cluster.Spe if err != nil { return nil, err } + var OSImageURL string + if clusterSpec.TinkerbellDatacenter.Spec.OSImageURL != "" { + OSImageURL = clusterSpec.TinkerbellDatacenter.Spec.OSImageURL + } else { + OSImageURL = tb.controlPlaneMachineSpec.OSImageURL + } + if cpTemplateConfig == nil { versionBundle := bundle.VersionsBundle - cpTemplateConfig = v1alpha1.NewDefaultTinkerbellTemplateConfigCreate(clusterSpec.Cluster, *versionBundle, tb.datacenterSpec.OSImageURL, tb.tinkerbellIP, tb.datacenterSpec.TinkerbellIP, tb.controlPlaneMachineSpec.OSFamily) + cpTemplateConfig = v1alpha1.NewDefaultTinkerbellTemplateConfigCreate(clusterSpec.Cluster, *versionBundle, OSImageURL, tb.tinkerbellIP, tb.datacenterSpec.TinkerbellIP, tb.controlPlaneMachineSpec.OSFamily) } cpTemplateString, err := cpTemplateConfig.ToTemplateString() @@ -81,12 +88,16 @@ func (tb *TemplateBuilder) GenerateCAPISpecControlPlane(clusterSpec *cluster.Spe var etcdMachineSpec v1alpha1.TinkerbellMachineConfigSpec var etcdTemplateString string + if clusterSpec.Cluster.Spec.ExternalEtcdConfiguration != nil { etcdMachineSpec = *tb.etcdMachineSpec + if etcdMachineSpec.OSImageURL != "" { + OSImageURL = etcdMachineSpec.OSImageURL + } etcdTemplateConfig := clusterSpec.TinkerbellTemplateConfigs[tb.etcdMachineSpec.TemplateRef.Name] if etcdTemplateConfig == nil { versionBundle := bundle.VersionsBundle - etcdTemplateConfig = v1alpha1.NewDefaultTinkerbellTemplateConfigCreate(clusterSpec.Cluster, *versionBundle, tb.datacenterSpec.OSImageURL, tb.tinkerbellIP, tb.datacenterSpec.TinkerbellIP, tb.etcdMachineSpec.OSFamily) + etcdTemplateConfig = v1alpha1.NewDefaultTinkerbellTemplateConfigCreate(clusterSpec.Cluster, *versionBundle, OSImageURL, tb.tinkerbellIP, tb.datacenterSpec.TinkerbellIP, tb.etcdMachineSpec.OSFamily) } etcdTemplateString, err = etcdTemplateConfig.ToTemplateString() if err != nil { @@ -111,12 +122,19 @@ func (tb *TemplateBuilder) GenerateCAPISpecControlPlane(clusterSpec *cluster.Spe func (tb *TemplateBuilder) GenerateCAPISpecWorkers(clusterSpec *cluster.Spec, workloadTemplateNames, kubeadmconfigTemplateNames map[string]string) (content []byte, err error) { workerSpecs := make([][]byte, 0, len(clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations)) bundle := clusterSpec.RootVersionsBundle() + var OSImageURL string + if clusterSpec.TinkerbellDatacenter.Spec.OSImageURL != "" { + OSImageURL = clusterSpec.TinkerbellDatacenter.Spec.OSImageURL + } for _, workerNodeGroupConfiguration := range clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { workerNodeMachineSpec := tb.WorkerNodeGroupMachineSpecs[workerNodeGroupConfiguration.MachineGroupRef.Name] wTemplateConfig := clusterSpec.TinkerbellTemplateConfigs[workerNodeMachineSpec.TemplateRef.Name] if wTemplateConfig == nil { versionBundle := bundle.VersionsBundle - wTemplateConfig = v1alpha1.NewDefaultTinkerbellTemplateConfigCreate(clusterSpec.Cluster, *versionBundle, tb.datacenterSpec.OSImageURL, tb.tinkerbellIP, tb.datacenterSpec.TinkerbellIP, workerNodeMachineSpec.OSFamily) + if workerNodeMachineSpec.OSImageURL != "" { + OSImageURL = workerNodeMachineSpec.OSImageURL + } + wTemplateConfig = v1alpha1.NewDefaultTinkerbellTemplateConfigCreate(clusterSpec.Cluster, *versionBundle, OSImageURL, tb.tinkerbellIP, tb.datacenterSpec.TinkerbellIP, workerNodeMachineSpec.OSFamily) } wTemplateString, err := wTemplateConfig.ToTemplateString() diff --git a/pkg/providers/tinkerbell/validate.go b/pkg/providers/tinkerbell/validate.go index 2ddcccc97d81..1a1a22c1ab12 100644 --- a/pkg/providers/tinkerbell/validate.go +++ b/pkg/providers/tinkerbell/validate.go @@ -29,12 +29,33 @@ func validateOsFamily(spec *ClusterSpec) error { } } - if controlPlaneOsFamily != v1alpha1.Bottlerocket && spec.DatacenterConfig.Spec.OSImageURL == "" { + if controlPlaneOsFamily != v1alpha1.Bottlerocket && spec.DatacenterConfig.Spec.OSImageURL == "" && spec.ControlPlaneMachineConfig().Spec.OSImageURL == "" { return fmt.Errorf("please use bottlerocket as osFamily for auto-importing or provide a valid osImageURL") } return nil } +func validateOSImageURLDontOverlap(spec *ClusterSpec) error { + var dcOSImageURLfound bool + if spec.TinkerbellDatacenter.Spec.OSImageURL != "" { + dcOSImageURLfound = true + } + return validateMachineCfgOSImageURL(spec.TinkerbellMachineConfigs, dcOSImageURLfound) +} + +func validateMachineCfgOSImageURL(machineConfigs map[string]*v1alpha1.TinkerbellMachineConfig, dataCenterOSImageURLfound bool) error { + for _, mc := range machineConfigs { + if mc.Spec.OSImageURL != "" && dataCenterOSImageURLfound { + return fmt.Errorf("overlapping OSImageURL found, OSImageURL can either be set at datacenter config or for each machine config not both") + } + if mc.Spec.OSImageURL == "" && !dataCenterOSImageURLfound && mc.Spec.OSFamily != v1alpha1.Bottlerocket { + return fmt.Errorf("OSImageURL should be set for each machine config not found for: %s", mc.ObjectMeta.Name) + } + } + + return nil +} + func validateMachineRefExists( ref *v1alpha1.Ref, machineConfigs map[string]*v1alpha1.TinkerbellMachineConfig,