diff --git a/pkg/executables/govc.go b/pkg/executables/govc.go index 3e1ed83a9e82d..96035be71a38b 100644 --- a/pkg/executables/govc.go +++ b/pkg/executables/govc.go @@ -40,8 +40,7 @@ const ( DeployOptsFile = "deploy-opts.json" disk1 = "Hard disk 1" disk2 = "Hard disk 2" - cpuAvailable = "CPU_Available" - memoryAvailable = "Memory_Available" + MemoryAvailable = "Memory_Available" ) var requiredEnvs = []string{govcUsernameKey, govcPasswordKey, govcURLKey, govcInsecure, govcDatacenterKey} @@ -1148,12 +1147,10 @@ func (g *Govc) SetGroupRoleOnObject(ctx context.Context, principal string, role } type resourcePoolInfo struct { - resourcePoolIdentifier *resourcePool + ResourcePoolIdentifier *resourcePool } type resourcePool struct { - CPUUsage string - CPULimit string memoryUsage string memoryLimit string } @@ -1166,52 +1163,52 @@ func (g *Govc) GetResourcePoolInfo(ctx context.Context, datacenter, resourcepool if err != nil { return nil, fmt.Errorf("getting resource pool information: %v", err) } + + scanner := bufio.NewScanner(strings.NewReader(response.String())) var resourcePoolInfoResponse resourcePoolInfo - err = yaml.Unmarshal(response.Bytes(), &resourcePoolInfoResponse) - if err != nil { - return nil, fmt.Errorf("unmarshalling resource pool info: %v", err) + resourcePoolInfoResponse.ResourcePoolIdentifier = new(resourcePool) + for scanner.Scan() { + metaData := scanner.Text() + if strings.Contains(metaData, "Mem Usage") { + resourcePoolInfoResponse.ResourcePoolIdentifier.memoryUsage = strings.Split(metaData, ":")[1] + } + if strings.Contains(metaData, "Mem Limit") { + resourcePoolInfoResponse.ResourcePoolIdentifier.memoryLimit = strings.Split(metaData, ":")[1] + } + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("failure reading memory allocation for resource pool") } - poolInfo, err := getPoolInfo(resourcePoolInfoResponse.resourcePoolIdentifier) + + poolInfo, err := getPoolInfo(resourcePoolInfoResponse.ResourcePoolIdentifier) if err != nil { return nil, err } return poolInfo, nil } -// helper function that parses the resource pool responce and returns CPU and memory requirements. +// helper function that parses the resource pool response and returns memory requirements. func getPoolInfo(rp *resourcePool) (map[string]int, error) { - CPUUsed, err := getValueFromString(rp.CPUUsage) - if err != nil { - return nil, fmt.Errorf("unable to obtain CPU usage for resource pool %s: %v", rp.CPUUsage, err) - } - CPULimit, err := getValueFromString(rp.CPULimit) - if err != nil { - return nil, fmt.Errorf("unable to obtain CPU limit for resource pool %s: %v", rp.CPULimit, err) - } memoryUsed, err := getValueFromString(rp.memoryUsage) if err != nil { - return nil, fmt.Errorf("unable to obtain memory usage for resource pool %s: %v", rp.CPULimit, err) + return nil, fmt.Errorf("unable to obtain memory usage for resource pool %s: %v", rp.memoryUsage, err) } memoryLimit, err := getValueFromString(rp.memoryLimit) if err != nil { - return nil, fmt.Errorf("unable to obtain memory limit for resource pool %s: %v", rp.CPULimit, err) + return nil, fmt.Errorf("unable to obtain memory limit for resource pool %s: %v", rp.memoryLimit, err) } poolInfo := make(map[string]int) if memoryLimit != -1 { - poolInfo[memoryAvailable] = memoryLimit - memoryUsed - } else { - poolInfo[memoryAvailable] = -1 - } - if CPULimit != -1 { - poolInfo[cpuAvailable] = CPULimit - CPUUsed + poolInfo[MemoryAvailable] = memoryLimit - memoryUsed } else { - poolInfo[cpuAvailable] = -1 + poolInfo[MemoryAvailable] = memoryLimit } return poolInfo, nil } func getValueFromString(str string) (int, error) { - splitResponse := strings.Split(str, " ") + splitResponse := strings.Split(strings.TrimSpace(str), " ") nonNumericRegex := regexp.MustCompile(`[^0-9- ]+`) cleanedString := nonNumericRegex.ReplaceAllString(splitResponse[0], "") numValue, err := strconv.Atoi(cleanedString) diff --git a/pkg/executables/govc_test.go b/pkg/executables/govc_test.go index 424fdde9834c6..3661f6174b1d3 100644 --- a/pkg/executables/govc_test.go +++ b/pkg/executables/govc_test.go @@ -1643,3 +1643,64 @@ func TestGovcGetHardDiskSizeError(t *testing.T) { }) } } + +func TestGovcGetResourcePoolInfo(t *testing.T) { + datacenter := "SDDC-Datacenter" + resourcePool := "*/Resources/Test-ResourcePool" + govcErr := errors.New("error PoolInfo()") + ctx := context.Background() + _, g, executable, env := setup(t) + + tests := []struct { + testName string + response string + govcErr error + wantErr error + wantMemInfo map[string]int + }{ + { + testName: "pool_info_memory_limit_set", + response: `Name: Test-ResourcePool + Path: /SDDC-Datacenter/host/Cluster-1/Resources/Test-ResourcePool + Mem Usage: 100MB (11.3%) + Mem Shares: normal + Mem Reservation: 0MB (expandable=true) + Mem Limit: 1000MB`, + govcErr: nil, + wantErr: nil, + wantMemInfo: map[string]int{executables.MemoryAvailable: 900}, + }, + { + testName: "pool_info_memory_limit_unset", + response: `Name: Test-ResourcePool + Path: /SDDC-Datacenter/host/Cluster-1/Resources/Test-ResourcePool + Mem Usage: 100MB (11.3%) + Mem Shares: normal + Mem Reservation: 0MB (expandable=true) + Mem Limit: -1MB`, + govcErr: nil, + wantErr: nil, + wantMemInfo: map[string]int{executables.MemoryAvailable: -1}, + }, + { + testName: "pool_info_error", + response: "", + govcErr: govcErr, + wantErr: fmt.Errorf("getting resource pool information: %v", govcErr), + wantMemInfo: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.testName, func(t *testing.T) { + gt := NewWithT(t) + responseBytes := bytes.NewBuffer([]byte(tt.response)) + executable.EXPECT().ExecuteWithEnv(ctx, env, "pool.info", "-dc", datacenter, resourcePool).Return(*responseBytes, tt.govcErr) + poolMemInfo, err := g.GetResourcePoolInfo(ctx, datacenter, resourcePool) + if tt.wantErr != nil { + gt.Expect(err.Error()).To(Equal(tt.wantErr.Error())) + } + gt.Expect(poolMemInfo).To(Equal(tt.wantMemInfo)) + }) + } +} diff --git a/pkg/providers/vsphere/vsphere.go b/pkg/providers/vsphere/vsphere.go index 8e950bf83e12e..58a967d463160 100644 --- a/pkg/providers/vsphere/vsphere.go +++ b/pkg/providers/vsphere/vsphere.go @@ -19,7 +19,6 @@ import ( controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "github.com/aws/eks-anywhere/pkg/api/v1alpha1" - anywherev1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1" "github.com/aws/eks-anywhere/pkg/bootstrapper" "github.com/aws/eks-anywhere/pkg/clients/kubernetes" "github.com/aws/eks-anywhere/pkg/cluster" @@ -54,7 +53,7 @@ const ( disk1 = "Hard disk 1" disk2 = "Hard disk 2" cpuAvailable = "CPU_Available" - memoryAvailable = "Memory_Available" + MemoryAvailable = "Memory_Available" ethtoolDaemonSetName = "vsphere-disable-udp-offload" ) @@ -342,7 +341,7 @@ func (p *vsphereProvider) SetupAndValidateCreateCluster(ctx context.Context, clu if err := p.validateDatastoreUsageForCreate(ctx, vSphereClusterSpec); err != nil { return fmt.Errorf("validating vsphere machine configs datastore usage: %v", err) } - if err := p.validateMemoryUsageForCreate(ctx, vSphereClusterSpec.VSphereDatacenter, vSphereClusterSpec); err != nil { + if err := p.validateMemoryUsageForCreate(ctx, vSphereClusterSpec); err != nil { return fmt.Errorf("validating vsphere machine configs resource pool memory usage: %v", err) } if err := p.generateSSHKeysIfNotSet(clusterSpec.VSphereMachineConfigs); err != nil { @@ -425,7 +424,7 @@ func (p *vsphereProvider) SetupAndValidateUpgradeCluster(ctx context.Context, cl return fmt.Errorf("validating vsphere machine configs datastore usage: %v", err) } - if err := p.validateMemoryUsageForUpgrade(ctx, vSphereClusterSpec.VSphereDatacenter, vSphereClusterSpec, cluster); err != nil { + if err := p.validateMemoryUsageForUpgrade(ctx, vSphereClusterSpec, cluster); err != nil { return fmt.Errorf("validating vsphere machine configs resource pool memory usage: %v", err) } @@ -611,7 +610,7 @@ func (p *vsphereProvider) getPrevMachineConfigMemoryUsage(ctx context.Context, m return 0, err } if em != nil && em.Spec.ResourcePool == mc.Spec.ResourcePool { - return em.Spec.MemoryMiB * count, nil + return em.Spec.MemoryMiB * em.Spec.NumCPUs * count, nil } return 0, nil } @@ -621,8 +620,8 @@ func (p *vsphereProvider) getMachineConfigMemoryRequirements(ctx context.Context if err != nil { return 0, 0, err } - needMemoryMiB := mc.Spec.MemoryMiB * count - return poolInfo[memoryAvailable], needMemoryMiB, nil + needMemoryMiB := mc.Spec.MemoryMiB * mc.Spec.NumCPUs * count + return poolInfo[MemoryAvailable], needMemoryMiB, nil } func (p *vsphereProvider) calculateResourcePoolMemoryUsage(ctx context.Context, dc string, mc *v1alpha1.VSphereMachineConfig, cluster *types.Cluster, mu map[string]*memoryUsage, prevCount, newCount int) error { @@ -653,9 +652,9 @@ func updateMemoryUsageMap(mc *v1alpha1.VSphereMachineConfig, needMiB, availableM } } -func (p *vsphereProvider) validateMemoryUsageForCreate(ctx context.Context, dc *anywherev1.VSphereDatacenterConfig, clusterSpec *Spec) error { +func (p *vsphereProvider) validateMemoryUsageForCreate(ctx context.Context, clusterSpec *Spec) error { memoryUsage := make(map[string]*memoryUsage) - datacenter := dc.Spec.Datacenter + datacenter := clusterSpec.VSphereDatacenter.Spec.Datacenter cpMachineConfig := clusterSpec.controlPlaneMachineConfig() controlPlaneAvailableMiB, controlPlaneNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, cpMachineConfig, clusterSpec.Cluster.Spec.ControlPlaneConfiguration.Count) if err != nil { @@ -685,17 +684,18 @@ func (p *vsphereProvider) validateMemoryUsageForCreate(ctx context.Context, dc * } } } + logger.V(5).Info("Memory availability for machine configs in requested resource pool validated") return nil } -func (p *vsphereProvider) validateMemoryUsageForUpgrade(ctx context.Context, dc *anywherev1.VSphereDatacenterConfig, currentClusterSpec *Spec, cluster *types.Cluster) error { +func (p *vsphereProvider) validateMemoryUsageForUpgrade(ctx context.Context, currentClusterSpec *Spec, cluster *types.Cluster) error { memoryUsage := make(map[string]*memoryUsage) prevEksaCluster, err := p.providerKubectlClient.GetEksaCluster(ctx, cluster, currentClusterSpec.Cluster.GetName()) if err != nil { return err } - datacenter := dc.Spec.Datacenter + datacenter := currentClusterSpec.VSphereDatacenter.Spec.Datacenter cpMachineConfig := currentClusterSpec.controlPlaneMachineConfig() if err := p.calculateResourcePoolMemoryUsage(ctx, datacenter, cpMachineConfig, cluster, memoryUsage, prevEksaCluster.Spec.ControlPlaneConfiguration.Count, currentClusterSpec.Cluster.Spec.ControlPlaneConfiguration.Count); err != nil { return fmt.Errorf("calculating memory usage for control plane: %v", err) @@ -712,7 +712,7 @@ func (p *vsphereProvider) validateMemoryUsageForUpgrade(ctx context.Context, dc if err != nil { return err } - // older etcd machines are not deleted until the end of rolling so do not account for the previous usage + // older etcd machines are not deleted until the end of rollout so do not account for the previous usage updateMemoryUsageMap(etcdMachineConfig, etcdNeedMiB, etcdAvailableMiB, 0, memoryUsage) } @@ -721,6 +721,7 @@ func (p *vsphereProvider) validateMemoryUsageForUpgrade(ctx context.Context, dc return fmt.Errorf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool) } } + logger.V(5).Info("Memory availability for machine configs in requested resource pool validated") return nil } @@ -731,7 +732,7 @@ func (p *vsphereProvider) getWorkerNodeGroupMemoryUsage(ctx context.Context, dat if _, ok := prevMachineConfigRefs[workerNodeGroupConfiguration.MachineGroupRef.Name]; ok { prevCount = *workerNodeGroupConfiguration.Count } else { - // set count to 1 when no previous machines were found in the resource pool to avoid a negative count in calculation + // set count to 1 when no previous machine config was found for the current worker node group in the spec to avoid a negative count in calculation prevCount = 1 } if err := p.calculateResourcePoolMemoryUsage(ctx, datacenter, workerMachineConfig, cluster, memoryUsage, prevCount, *workerNodeGroupConfiguration.Count); err != nil { diff --git a/pkg/providers/vsphere/vsphere_test.go b/pkg/providers/vsphere/vsphere_test.go index d55dc57e533cc..2d5137de62f8d 100644 --- a/pkg/providers/vsphere/vsphere_test.go +++ b/pkg/providers/vsphere/vsphere_test.go @@ -3481,6 +3481,73 @@ func TestValidateMachineConfigsDatastoreUsageUpgradeError(t *testing.T) { thenErrorExpected(t, fmt.Sprintf("not enough space in datastore %s for given diskGiB and count for respective machine groups", tt.clusterSpec.VSphereMachineConfigs[tt.clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name].Spec.Datastore), err) } +func TestValidateMachineConfigsMemoryUsageCreateSuccess(t *testing.T) { + tt := newProviderTest(t) + machineConfigs := tt.clusterSpec.VSphereMachineConfigs + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + machineConfigs[tt.clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name].Spec.ResourcePool = "test-resourcepool" + for _, config := range machineConfigs { + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{executables.MemoryAvailable: -1}, nil) + } + vSpec := NewSpec(tt.clusterSpec) + err := tt.provider.validateMemoryUsageForCreate(tt.ctx, vSpec) + if err != nil { + t.Fatalf("unexpected failure %v", err) + } +} + +func TestValidateMachineConfigsMemoryUsageCreateError(t *testing.T) { + tt := newProviderTest(t) + machineConfigs := tt.clusterSpec.VSphereMachineConfigs + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + for _, config := range machineConfigs { + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{executables.MemoryAvailable: 100000}, nil) + } + vSpec := NewSpec(tt.clusterSpec) + err := tt.provider.validateMemoryUsageForCreate(tt.ctx, vSpec) + resourcePool := machineConfigs[tt.clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name].Spec.ResourcePool + thenErrorExpected(t, fmt.Sprintf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool), err) +} + +func TestValidateMachineConfigsMemoryUsageUpgradeSuccess(t *testing.T) { + tt := newProviderTest(t) + cluster := &types.Cluster{ + Name: "test", + } + tt.kubectl.EXPECT().GetEksaCluster(tt.ctx, cluster, tt.clusterSpec.Cluster.GetName()).Return(tt.clusterSpec.Cluster.DeepCopy(), nil) + machineConfigs := tt.clusterSpec.VSphereMachineConfigs + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + for _, config := range machineConfigs { + tt.kubectl.EXPECT().GetEksaVSphereMachineConfig(tt.ctx, config.Name, cluster.KubeconfigFile, config.Namespace).AnyTimes() + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{executables.MemoryAvailable: -1}, nil) + } + vSpec := NewSpec(tt.clusterSpec) + vSpec.Cluster.Spec.ControlPlaneConfiguration.Count += 2 + err := tt.provider.validateMemoryUsageForUpgrade(tt.ctx, vSpec, cluster) + if err != nil { + t.Fatalf("unexpected failure %v", err) + } +} + +func TestValidateMachineConfigsMemoryUsageUpgradeError(t *testing.T) { + tt := newProviderTest(t) + cluster := &types.Cluster{ + Name: "test", + } + tt.kubectl.EXPECT().GetEksaCluster(tt.ctx, cluster, tt.clusterSpec.Cluster.GetName()).Return(tt.clusterSpec.Cluster.DeepCopy(), nil) + machineConfigs := tt.clusterSpec.VSphereMachineConfigs + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + for _, config := range machineConfigs { + tt.kubectl.EXPECT().GetEksaVSphereMachineConfig(tt.ctx, config.Name, cluster.KubeconfigFile, config.Namespace).AnyTimes() + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{executables.MemoryAvailable: 10000}, nil) + } + vSpec := NewSpec(tt.clusterSpec) + vSpec.Cluster.Spec.ControlPlaneConfiguration.Count += 2 + err := tt.provider.validateMemoryUsageForUpgrade(tt.ctx, vSpec, cluster) + resourcePool := machineConfigs[tt.clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name].Spec.ResourcePool + thenErrorExpected(t, fmt.Sprintf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool), err) +} + func TestValidateMachineConfigsNameUniquenessSuccess(t *testing.T) { tt := newProviderTest(t) cluster := &types.Cluster{