From a30e86a9bd38c55995a9f7213d2cf5c9b07919dd Mon Sep 17 00:00:00 2001 From: Rahul Ganesh Date: Wed, 20 Sep 2023 14:26:11 -0700 Subject: [PATCH] Add a helper function that returns Machine Config with count. Use the helper function in validation to simplify the algorithm Signed-off-by: Rahul Ganesh --- pkg/providers/vsphere/spec.go | 30 ++++++ pkg/providers/vsphere/vsphere.go | 149 +++++++++----------------- pkg/providers/vsphere/vsphere_test.go | 16 +-- 3 files changed, 88 insertions(+), 107 deletions(-) diff --git a/pkg/providers/vsphere/spec.go b/pkg/providers/vsphere/spec.go index abc3114a2883..3e24135cb347 100644 --- a/pkg/providers/vsphere/spec.go +++ b/pkg/providers/vsphere/spec.go @@ -37,6 +37,36 @@ func (s *Spec) machineConfigs() []*anywherev1.VSphereMachineConfig { return machineConfigs } +// MachineConfigCount represents a machineConfig with it's associated count. +type MachineConfigCount struct { + *anywherev1.VSphereMachineConfig + Count int +} + +func (s *Spec) machineConfigsWithCount() []MachineConfigCount { + machineConfigs := make([]MachineConfigCount, 0, len(s.VSphereMachineConfigs)) + cpMachineConfig := MachineConfigCount{ + VSphereMachineConfig: s.controlPlaneMachineConfig(), + Count: s.Cluster.Spec.ControlPlaneConfiguration.Count, + } + machineConfigs = append(machineConfigs, cpMachineConfig) + if s.etcdMachineConfig() != nil { + etcdMachineConfig := MachineConfigCount{ + VSphereMachineConfig: s.etcdMachineConfig(), + Count: s.Cluster.Spec.ExternalEtcdConfiguration.Count, + } + machineConfigs = append(machineConfigs, etcdMachineConfig) + } + for _, wc := range s.Cluster.Spec.WorkerNodeGroupConfigurations { + workerNodeGroupConfig := MachineConfigCount{ + VSphereMachineConfig: s.workerMachineConfig(wc), + Count: *wc.Count, + } + machineConfigs = append(machineConfigs, workerNodeGroupConfig) + } + return machineConfigs +} + func etcdMachineConfig(s *cluster.Spec) *anywherev1.VSphereMachineConfig { if s.Cluster.Spec.ExternalEtcdConfiguration == nil || s.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef == nil { return nil diff --git a/pkg/providers/vsphere/vsphere.go b/pkg/providers/vsphere/vsphere.go index 9e4066fdfd01..05ae41d77dc4 100644 --- a/pkg/providers/vsphere/vsphere.go +++ b/pkg/providers/vsphere/vsphere.go @@ -340,7 +340,7 @@ func (p *vsphereProvider) SetupAndValidateCreateCluster(ctx context.Context, clu if err := p.validateDatastoreUsageForCreate(ctx, vSphereClusterSpec); err != nil { return fmt.Errorf("validating vsphere machine configs datastore usage: %v", err) } - if err := p.validateMemoryUsageForCreate(ctx, vSphereClusterSpec); err != nil { + if err := p.validateMemoryUsage(ctx, vSphereClusterSpec, nil); err != nil { return fmt.Errorf("validating vsphere machine configs resource pool memory usage: %v", err) } if err := p.generateSSHKeysIfNotSet(clusterSpec.VSphereMachineConfigs); err != nil { @@ -423,7 +423,7 @@ func (p *vsphereProvider) SetupAndValidateUpgradeCluster(ctx context.Context, cl return fmt.Errorf("validating vsphere machine configs datastore usage: %v", err) } - if err := p.validateMemoryUsageForUpgrade(ctx, vSphereClusterSpec, cluster); err != nil { + if err := p.validateMemoryUsage(ctx, vSphereClusterSpec, cluster); err != nil { return fmt.Errorf("validating vsphere machine configs resource pool memory usage: %v", err) } @@ -598,147 +598,98 @@ func (p *vsphereProvider) validateDatastoreUsageForCreate(ctx context.Context, v return nil } -type memoryUsage struct { - availableMemoryMiB int - needMemoryMiB int -} - -func (p *vsphereProvider) getPrevMachineConfigMemoryUsage(ctx context.Context, mc *v1alpha1.VSphereMachineConfig, cluster *types.Cluster, count int) (memoryMiB int, err error) { +// getPrevMachineConfigMemoryUsage returns the memoryMiB freed up from the given machineConfig based on the count. +func (p *vsphereProvider) getPrevMachineConfigMemoryUsage(ctx context.Context, mc *v1alpha1.VSphereMachineConfig, cluster *types.Cluster, machineConfigCount int) (memoryMiB int, err error) { em, err := p.providerKubectlClient.GetEksaVSphereMachineConfig(ctx, mc.Name, cluster.KubeconfigFile, mc.GetNamespace()) if err != nil { return 0, err } if em != nil && em.Spec.ResourcePool == mc.Spec.ResourcePool { - return em.Spec.MemoryMiB * em.Spec.NumCPUs * count, nil + return em.Spec.MemoryMiB * machineConfigCount, nil } return 0, nil } -func (p *vsphereProvider) getMachineConfigMemoryRequirements(ctx context.Context, dc string, mc *v1alpha1.VSphereMachineConfig, count int) (available int, need int, err error) { - poolInfo, err := p.providerGovcClient.GetResourcePoolInfo(ctx, dc, mc.Spec.ResourcePool) +// getMachineConfigMemoryAvailability accepts a machine config and returns available memory in the config's resource pool along with needed memory for the machine config. +func (p *vsphereProvider) getMachineConfigMemoryAvailability(ctx context.Context, datacenter string, mc *v1alpha1.VSphereMachineConfig, machineConfigCount int) (availableMemoryMiB, needMemoryMiB int, err error) { + poolInfo, err := p.providerGovcClient.GetResourcePoolInfo(ctx, datacenter, mc.Spec.ResourcePool) if err != nil { return 0, 0, err } - needMemoryMiB := mc.Spec.MemoryMiB * mc.Spec.NumCPUs * count + needMemoryMiB = mc.Spec.MemoryMiB * machineConfigCount return poolInfo[MemoryAvailable], needMemoryMiB, nil } -func (p *vsphereProvider) calculateResourcePoolMemoryUsage(ctx context.Context, dc string, mc *v1alpha1.VSphereMachineConfig, cluster *types.Cluster, mu map[string]*memoryUsage, prevCount, newCount int) error { - availableMemoryMiB, needMemoryMiB, err := p.getMachineConfigMemoryRequirements(ctx, dc, mc, newCount) - if err != nil { - return err - } - - // the last old machine is deleted only after the desired number of new machines are rolled out, so reduce 1 count to accommodate for the last old machine which is not deleted until the end of rollout - prevUsage, err := p.getPrevMachineConfigMemoryUsage(ctx, mc, cluster, prevCount-1) - if err != nil { - return err +// updateMemoryUsageMap updates the memory availability for the machine config's resource pool. +func updateMemoryUsageMap(mc *v1alpha1.VSphereMachineConfig, needMiB, availableMiB int, mu map[string]int) { + if _, ok := mu[mc.Spec.ResourcePool]; !ok { + mu[mc.Spec.ResourcePool] = availableMiB } - // update the available memory with previous usage only when memory limit is set - if availableMemoryMiB != -1 { - availableMemoryMiB += prevUsage + // needMiB can be ignored when the resource pool memory limit is unset + if availableMiB != -1 { + mu[mc.Spec.ResourcePool] -= needMiB } - updateMemoryUsageMap(mc, needMemoryMiB, availableMemoryMiB, prevUsage, mu) - return nil } -func updateMemoryUsageMap(mc *v1alpha1.VSphereMachineConfig, needMiB, availableMiB, prevUsage int, mu map[string]*memoryUsage) { - if _, ok := mu[mc.Spec.ResourcePool]; ok && mu[mc.Spec.ResourcePool].availableMemoryMiB != -1 { - mu[mc.Spec.ResourcePool].needMemoryMiB += needMiB - mu[mc.Spec.ResourcePool].availableMemoryMiB += prevUsage - } else { - mu[mc.Spec.ResourcePool] = &memoryUsage{ - availableMemoryMiB: availableMiB, - needMemoryMiB: needMiB, - } +func addPrevMachineConfigMemoryUsage(mc *v1alpha1.VSphereMachineConfig, prevUsage int, memoryUsage map[string]int) { + // when the memory limit for the respective resource pool is unset, skip accounting for previous usage and validating the needed memory + if _, ok := memoryUsage[mc.Spec.ResourcePool]; ok && memoryUsage[mc.Spec.ResourcePool] != -1 { + memoryUsage[mc.Spec.ResourcePool] += prevUsage } } -func (p *vsphereProvider) validateMemoryUsageForCreate(ctx context.Context, clusterSpec *Spec) error { - memoryUsage := make(map[string]*memoryUsage) +func (p *vsphereProvider) validateMemoryUsage(ctx context.Context, clusterSpec *Spec, cluster *types.Cluster) error { + memoryUsage := make(map[string]int) datacenter := clusterSpec.VSphereDatacenter.Spec.Datacenter - cpMachineConfig := clusterSpec.controlPlaneMachineConfig() - controlPlaneAvailableMiB, controlPlaneNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, cpMachineConfig, clusterSpec.Cluster.Spec.ControlPlaneConfiguration.Count) - if err != nil { - return fmt.Errorf("calculating memory usage for control plane: %v", err) - } - updateMemoryUsageMap(cpMachineConfig, controlPlaneNeedMiB, controlPlaneAvailableMiB, 0, memoryUsage) - for _, workerNodeGroupConfiguration := range clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { - workerMachineConfig := clusterSpec.workerMachineConfig(workerNodeGroupConfiguration) - workerAvailableMiB, workerNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, workerMachineConfig, *workerNodeGroupConfiguration.Count) + for _, mc := range clusterSpec.machineConfigsWithCount() { + availableMemoryMiB, needMemoryMiB, err := p.getMachineConfigMemoryAvailability(ctx, datacenter, mc.VSphereMachineConfig, mc.Count) if err != nil { - return fmt.Errorf("calculating memory usage for worker node groups: %v", err) + return fmt.Errorf("calculating memory usage for machine config %v: %v", mc.VSphereMachineConfig.ObjectMeta.Name, err) } - updateMemoryUsageMap(workerMachineConfig, workerNeedMiB, workerAvailableMiB, 0, memoryUsage) + updateMemoryUsageMap(mc.VSphereMachineConfig, needMemoryMiB, availableMemoryMiB, memoryUsage) } - etcdMachineConfig := clusterSpec.etcdMachineConfig() - if etcdMachineConfig != nil { - etcdAvailableMiB, etcdNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, etcdMachineConfig, clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.Count) + // account for previous cluster resources that are freed up during upgrade. + if cluster != nil { + err := p.updatePrevClusterMemoryUsage(ctx, clusterSpec, cluster, memoryUsage) if err != nil { return err } - updateMemoryUsageMap(etcdMachineConfig, etcdNeedMiB, etcdAvailableMiB, 0, memoryUsage) } - for resourcePool, usage := range memoryUsage { - if usage.availableMemoryMiB != -1 { - if usage.needMemoryMiB > usage.availableMemoryMiB { - return fmt.Errorf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool) - } + for resourcePool, remaniningMiB := range memoryUsage { + if remaniningMiB != -1 && remaniningMiB < 0 { + return fmt.Errorf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool) } } logger.V(5).Info("Memory availability for machine configs in requested resource pool validated") return nil } -func (p *vsphereProvider) validateMemoryUsageForUpgrade(ctx context.Context, currentClusterSpec *Spec, cluster *types.Cluster) error { - memoryUsage := make(map[string]*memoryUsage) - prevEksaCluster, err := p.providerKubectlClient.GetEksaCluster(ctx, cluster, currentClusterSpec.Cluster.GetName()) +// updatePrevClusterMemoryUsage calculates memory freed up from previous CP and worker nodes during upgrade and adds up the memory usage for the specific resource pool. +func (p *vsphereProvider) updatePrevClusterMemoryUsage(ctx context.Context, clusterSpec *Spec, cluster *types.Cluster, memoryUsage map[string]int) error { + prevEksaCluster, err := p.providerKubectlClient.GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()) if err != nil { return err } - - datacenter := currentClusterSpec.VSphereDatacenter.Spec.Datacenter - cpMachineConfig := currentClusterSpec.controlPlaneMachineConfig() - if err := p.calculateResourcePoolMemoryUsage(ctx, datacenter, cpMachineConfig, cluster, memoryUsage, prevEksaCluster.Spec.ControlPlaneConfiguration.Count, currentClusterSpec.Cluster.Spec.ControlPlaneConfiguration.Count); err != nil { - return fmt.Errorf("calculating memory usage for control plane: %v", err) - } - prevMachineConfigRefs := machineRefSliceToMap(prevEksaCluster.MachineConfigRefs()) - if err := p.getWorkerNodeGroupMemoryUsage(ctx, datacenter, currentClusterSpec, cluster, memoryUsage, prevMachineConfigRefs); err != nil { - return fmt.Errorf("calculating memory usage for worker node groups: %v", err) - } - - etcdMachineConfig := currentClusterSpec.etcdMachineConfig() - if etcdMachineConfig != nil { - etcdAvailableMiB, etcdNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, etcdMachineConfig, currentClusterSpec.Cluster.Spec.ExternalEtcdConfiguration.Count) + if _, ok := prevMachineConfigRefs[clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name]; ok { + cpMachineConfig := clusterSpec.controlPlaneMachineConfig() + // The last CP machine is deleted only after the desired number of new worker machines are rolled out, so don't add it's memory + prevCPusage, err := p.getPrevMachineConfigMemoryUsage(ctx, cpMachineConfig, cluster, prevEksaCluster.Spec.ControlPlaneConfiguration.Count-1) if err != nil { - return err - } - // older etcd machines are not deleted until the end of rollout so do not account for the previous usage - updateMemoryUsageMap(etcdMachineConfig, etcdNeedMiB, etcdAvailableMiB, 0, memoryUsage) - } - - for resourcePool, usage := range memoryUsage { - if usage.availableMemoryMiB != -1 && usage.needMemoryMiB > usage.availableMemoryMiB { - return fmt.Errorf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool) + return fmt.Errorf("calculating previous memory usage for control plane: %v", err) } + addPrevMachineConfigMemoryUsage(cpMachineConfig, prevCPusage, memoryUsage) } - logger.V(5).Info("Memory availability for machine configs in requested resource pool validated") - return nil -} - -func (p *vsphereProvider) getWorkerNodeGroupMemoryUsage(ctx context.Context, datacenter string, currentClusterSpec *Spec, cluster *types.Cluster, memoryUsage map[string]*memoryUsage, prevMachineConfigRefs map[string]v1alpha1.Ref) error { - for _, workerNodeGroupConfiguration := range currentClusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { - prevCount := 0 - workerMachineConfig := currentClusterSpec.workerMachineConfig(workerNodeGroupConfiguration) + for _, workerNodeGroupConfiguration := range clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { + workerMachineConfig := clusterSpec.workerMachineConfig(workerNodeGroupConfiguration) if _, ok := prevMachineConfigRefs[workerNodeGroupConfiguration.MachineGroupRef.Name]; ok { - prevCount = *workerNodeGroupConfiguration.Count - } else { - // set count to 1 when no previous machine config was found for the current worker node group in the spec to avoid a negative count in calculation - prevCount = 1 - } - if err := p.calculateResourcePoolMemoryUsage(ctx, datacenter, workerMachineConfig, cluster, memoryUsage, prevCount, *workerNodeGroupConfiguration.Count); err != nil { - return fmt.Errorf("calculating memory usage: %v", err) + prevCount := *workerNodeGroupConfiguration.Count + // The last worker machine is deleted only after the desired number of new worker machines are rolled out, so don't add it's memory + prevWorkerUsage, err := p.getPrevMachineConfigMemoryUsage(ctx, workerMachineConfig, cluster, prevCount-1) + if err != nil { + return fmt.Errorf("calculating previous memory usage for worker node group - %v: %v", workerMachineConfig.Name, err) + } + addPrevMachineConfigMemoryUsage(workerMachineConfig, prevWorkerUsage, memoryUsage) } } return nil diff --git a/pkg/providers/vsphere/vsphere_test.go b/pkg/providers/vsphere/vsphere_test.go index 73c6cacd618b..fa2f5e2df6f4 100644 --- a/pkg/providers/vsphere/vsphere_test.go +++ b/pkg/providers/vsphere/vsphere_test.go @@ -3487,7 +3487,7 @@ func TestValidateMachineConfigsMemoryUsageCreateSuccess(t *testing.T) { tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{MemoryAvailable: -1}, nil) } vSpec := NewSpec(tt.clusterSpec) - err := tt.provider.validateMemoryUsageForCreate(tt.ctx, vSpec) + err := tt.provider.validateMemoryUsage(tt.ctx, vSpec, nil) if err != nil { t.Fatalf("unexpected failure %v", err) } @@ -3498,10 +3498,10 @@ func TestValidateMachineConfigsMemoryUsageCreateError(t *testing.T) { machineConfigs := tt.clusterSpec.VSphereMachineConfigs datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter for _, config := range machineConfigs { - tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{MemoryAvailable: 100000}, nil) + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{MemoryAvailable: 10000}, nil) } vSpec := NewSpec(tt.clusterSpec) - err := tt.provider.validateMemoryUsageForCreate(tt.ctx, vSpec) + err := tt.provider.validateMemoryUsage(tt.ctx, vSpec, nil) resourcePool := machineConfigs[tt.clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name].Spec.ResourcePool thenErrorExpected(t, fmt.Sprintf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool), err) } @@ -3522,7 +3522,7 @@ func TestSetupAndValidateCreateClusterMemoryUsageError(t *testing.T) { tt.govc.EXPECT().GetWorkloadAvailableSpace(tt.ctx, cpMachineConfig.Spec.Datastore).Return(1000.0, nil).AnyTimes() tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, cpMachineConfig.Spec.ResourcePool).Return(nil, fmt.Errorf("error")) err := tt.provider.SetupAndValidateCreateCluster(tt.ctx, tt.clusterSpec) - thenErrorExpected(t, "validating vsphere machine configs resource pool memory usage: calculating memory usage for control plane: error", err) + thenErrorExpected(t, "validating vsphere machine configs resource pool memory usage: calculating memory usage for machine config test-cp: error", err) } func TestValidateMachineConfigsMemoryUsageUpgradeSuccess(t *testing.T) { @@ -3546,7 +3546,7 @@ func TestValidateMachineConfigsMemoryUsageUpgradeSuccess(t *testing.T) { tt.kubectl.EXPECT().GetEksaVSphereMachineConfig(tt.ctx, config.Name, cluster.KubeconfigFile, config.Namespace).Return(config, nil).AnyTimes() tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{MemoryAvailable: -1}, nil).AnyTimes() } - err := tt.provider.validateMemoryUsageForUpgrade(tt.ctx, vSpec, cluster) + err := tt.provider.validateMemoryUsage(tt.ctx, vSpec, cluster) if err != nil { t.Fatalf("unexpected failure %v", err) } @@ -3566,7 +3566,7 @@ func TestValidateMachineConfigsMemoryUsageUpgradeError(t *testing.T) { } vSpec := NewSpec(tt.clusterSpec) vSpec.Cluster.Spec.ControlPlaneConfiguration.Count += 2 - err := tt.provider.validateMemoryUsageForUpgrade(tt.ctx, vSpec, cluster) + err := tt.provider.validateMemoryUsage(tt.ctx, vSpec, cluster) resourcePool := machineConfigs[tt.clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name].Spec.ResourcePool thenErrorExpected(t, fmt.Sprintf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool), err) } @@ -3580,7 +3580,7 @@ func TestSetupAndValidateUpgradeClusterMemoryUsageError(t *testing.T) { tt.setExpectationForVCenterValidation() tt.setExpectationsForDefaultDiskAndCloneModeGovcCalls() tt.setExpectationsForMachineConfigsVCenterValidation() - tt.kubectl.EXPECT().GetEksaCluster(tt.ctx, cluster, tt.clusterSpec.Cluster.GetName()).Return(tt.clusterSpec.Cluster.DeepCopy(), nil).Times(2) + tt.kubectl.EXPECT().GetEksaCluster(tt.ctx, cluster, tt.clusterSpec.Cluster.GetName()).Return(tt.clusterSpec.Cluster.DeepCopy(), nil).Times(1) tt.kubectl.EXPECT().GetEksaVSphereMachineConfig(tt.ctx, gomock.Any(), cluster.KubeconfigFile, tt.clusterSpec.Cluster.GetNamespace()).AnyTimes() cpMachineConfig := tt.machineConfigs[tt.clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name] tt.govc.EXPECT().SearchTemplate(tt.ctx, tt.datacenterConfig.Spec.Datacenter, cpMachineConfig.Spec.Template).Return(cpMachineConfig.Spec.Template, nil).AnyTimes() @@ -3590,7 +3590,7 @@ func TestSetupAndValidateUpgradeClusterMemoryUsageError(t *testing.T) { datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, cpMachineConfig.Spec.ResourcePool).Return(nil, fmt.Errorf("error")) err := tt.provider.SetupAndValidateUpgradeCluster(tt.ctx, cluster, tt.clusterSpec, tt.clusterSpec) - thenErrorExpected(t, "validating vsphere machine configs resource pool memory usage: calculating memory usage for control plane: error", err) + thenErrorExpected(t, "validating vsphere machine configs resource pool memory usage: calculating memory usage for machine config test-cp: error", err) } func TestValidateMachineConfigsNameUniquenessSuccess(t *testing.T) {