Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate Memory Usage for given resource pool for Vsphere provider #6680

Merged
merged 3 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions pkg/executables/govc.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"net/http"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -39,6 +40,7 @@
DeployOptsFile = "deploy-opts.json"
disk1 = "Hard disk 1"
disk2 = "Hard disk 2"
MemoryAvailable = "Memory_Available"
)

var requiredEnvs = []string{govcUsernameKey, govcPasswordKey, govcURLKey, govcInsecure, govcDatacenterKey}
Expand Down Expand Up @@ -1143,3 +1145,76 @@

return nil
}

type resourcePoolInfo struct {
ResourcePoolIdentifier *resourcePool
}

type resourcePool struct {
memoryUsage string
memoryLimit string
}

// GetResourcePoolInfo returns the pool info for the provided resource pool.
func (g *Govc) GetResourcePoolInfo(ctx context.Context, datacenter, resourcepool string, args ...string) (map[string]int, error) {
params := []string{"pool.info", "-dc", datacenter, resourcepool}
params = append(params, args...)
response, err := g.exec(ctx, params...)
if err != nil {
return nil, fmt.Errorf("getting resource pool information: %v", err)
}

scanner := bufio.NewScanner(strings.NewReader(response.String()))
var resourcePoolInfoResponse resourcePoolInfo
resourcePoolInfoResponse.ResourcePoolIdentifier = new(resourcePool)
for scanner.Scan() {
metaData := scanner.Text()
if strings.Contains(metaData, "Mem Usage") {
resourcePoolInfoResponse.ResourcePoolIdentifier.memoryUsage = strings.Split(metaData, ":")[1]
}
if strings.Contains(metaData, "Mem Limit") {
resourcePoolInfoResponse.ResourcePoolIdentifier.memoryLimit = strings.Split(metaData, ":")[1]
}
}

if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failure reading memory allocation for resource pool")
}

Check warning on line 1182 in pkg/executables/govc.go

View check run for this annotation

Codecov / codecov/patch

pkg/executables/govc.go#L1181-L1182

Added lines #L1181 - L1182 were not covered by tests

poolInfo, err := getPoolInfo(resourcePoolInfoResponse.ResourcePoolIdentifier)
if err != nil {
return nil, err
}
return poolInfo, nil
}

// getPoolInfo parses resource pool response and returns memory requirements.
func getPoolInfo(rp *resourcePool) (map[string]int, error) {
memoryUsed, err := getValueFromString(rp.memoryUsage)
if err != nil {
return nil, fmt.Errorf("unable to obtain memory usage for resource pool %s: %v", rp.memoryUsage, err)
}
memoryLimit, err := getValueFromString(rp.memoryLimit)
if err != nil {
return nil, fmt.Errorf("unable to obtain memory limit for resource pool %s: %v", rp.memoryLimit, err)
}
poolInfo := make(map[string]int)
if memoryLimit != -1 {
poolInfo[MemoryAvailable] = memoryLimit - memoryUsed
} else {
poolInfo[MemoryAvailable] = memoryLimit
}
return poolInfo, nil
}

// getValueFromString cleans the input string and returns the extracted numerical value.
func getValueFromString(str string) (int, error) {
splitResponse := strings.Split(strings.TrimSpace(str), " ")
nonNumericRegex := regexp.MustCompile(`[^0-9- ]+`)
cleanedString := nonNumericRegex.ReplaceAllString(splitResponse[0], "")
numValue, err := strconv.Atoi(cleanedString)
if err != nil {
return 0, err
}
return numValue, nil
}
79 changes: 79 additions & 0 deletions pkg/executables/govc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1643,3 +1643,82 @@ func TestGovcGetHardDiskSizeError(t *testing.T) {
})
}
}

func TestGovcGetResourcePoolInfo(t *testing.T) {
datacenter := "SDDC-Datacenter"
resourcePool := "*/Resources/Test-ResourcePool"
govcErr := errors.New("error PoolInfo()")
ctx := context.Background()
_, g, executable, env := setup(t)

tests := []struct {
testName string
response string
govcErr error
wantErr error
wantMemInfo map[string]int
}{
{
testName: "pool_info_memory_limit_set",
response: `Name: Test-ResourcePool
Path: /SDDC-Datacenter/host/Cluster-1/Resources/Test-ResourcePool
Mem Usage: 100MB (11.3%)
Mem Shares: normal
Mem Reservation: 0MB (expandable=true)
Mem Limit: 1000MB`,
govcErr: nil,
wantErr: nil,
wantMemInfo: map[string]int{executables.MemoryAvailable: 900},
},
{
testName: "pool_info_memory_limit_unset",
response: `Name: Test-ResourcePool
Path: /SDDC-Datacenter/host/Cluster-1/Resources/Test-ResourcePool
Mem Usage: 100MB (11.3%)
Mem Shares: normal
Mem Reservation: 0MB (expandable=true)
Mem Limit: -1MB`,
govcErr: nil,
wantErr: nil,
wantMemInfo: map[string]int{executables.MemoryAvailable: -1},
},
{
testName: "pool_info_memory_usage_corrupt",
response: `Name: Test-ResourcePool
Mem Usage:corrupt-val
Mem Limit:-1MB`,
govcErr: nil,
wantErr: fmt.Errorf("unable to obtain memory usage for resource pool corrupt-val: strconv.Atoi: parsing \"-\": invalid syntax"),
wantMemInfo: nil,
},
{
testName: "pool_info_memory_limit_corrupt",
response: `Name: Test-ResourcePool
Mem Usage:100
Mem Limit:corrupt-val`,
govcErr: nil,
wantErr: fmt.Errorf("unable to obtain memory limit for resource pool corrupt-val: strconv.Atoi: parsing \"-\": invalid syntax"),
wantMemInfo: nil,
},
{
testName: "pool_info_error",
response: "",
govcErr: govcErr,
wantErr: fmt.Errorf("getting resource pool information: %v", govcErr),
wantMemInfo: nil,
},
}

for _, tt := range tests {
t.Run(tt.testName, func(t *testing.T) {
gt := NewWithT(t)
responseBytes := bytes.NewBuffer([]byte(tt.response))
executable.EXPECT().ExecuteWithEnv(ctx, env, "pool.info", "-dc", datacenter, resourcePool).Return(*responseBytes, tt.govcErr)
poolMemInfo, err := g.GetResourcePoolInfo(ctx, datacenter, resourcePool)
if tt.wantErr != nil {
gt.Expect(err.Error()).To(Equal(tt.wantErr.Error()))
}
gt.Expect(poolMemInfo).To(Equal(tt.wantMemInfo))
})
}
}
20 changes: 20 additions & 0 deletions pkg/providers/vsphere/mocks/client.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions pkg/providers/vsphere/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,36 @@ func (s *Spec) machineConfigs() []*anywherev1.VSphereMachineConfig {
return machineConfigs
}

// MachineConfigCount represents a machineConfig with it's associated count.
type MachineConfigCount struct {
*anywherev1.VSphereMachineConfig
Count int
}

func (s *Spec) machineConfigsWithCount() []MachineConfigCount {
machineConfigs := make([]MachineConfigCount, 0, len(s.VSphereMachineConfigs))
cpMachineConfig := MachineConfigCount{
VSphereMachineConfig: s.controlPlaneMachineConfig(),
Count: s.Cluster.Spec.ControlPlaneConfiguration.Count,
}
machineConfigs = append(machineConfigs, cpMachineConfig)
if s.etcdMachineConfig() != nil {
etcdMachineConfig := MachineConfigCount{
VSphereMachineConfig: s.etcdMachineConfig(),
Count: s.Cluster.Spec.ExternalEtcdConfiguration.Count,
}
machineConfigs = append(machineConfigs, etcdMachineConfig)
}
for _, wc := range s.Cluster.Spec.WorkerNodeGroupConfigurations {
workerNodeGroupConfig := MachineConfigCount{
VSphereMachineConfig: s.workerMachineConfig(wc),
Count: *wc.Count,
}
machineConfigs = append(machineConfigs, workerNodeGroupConfig)
}
return machineConfigs
}

func etcdMachineConfig(s *cluster.Spec) *anywherev1.VSphereMachineConfig {
if s.Cluster.Spec.ExternalEtcdConfiguration == nil || s.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef == nil {
return nil
Expand Down
107 changes: 106 additions & 1 deletion pkg/providers/vsphere/vsphere.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
backOffPeriod = 5 * time.Second
disk1 = "Hard disk 1"
disk2 = "Hard disk 2"
MemoryAvailable = "Memory_Available"
ethtoolDaemonSetName = "vsphere-disable-udp-offload"
)

Expand Down Expand Up @@ -122,6 +123,7 @@
CreateRole(ctx context.Context, name string, privileges []string) error
SetGroupRoleOnObject(ctx context.Context, principal string, role string, object string, domain string) error
GetHardDiskSize(ctx context.Context, vm, datacenter string) (map[string]float64, error)
GetResourcePoolInfo(ctx context.Context, datacenter, resourcepool string, args ...string) (map[string]int, error)
}

type ProviderKubectlClient interface {
Expand Down Expand Up @@ -338,7 +340,9 @@
if err := p.validateDatastoreUsageForCreate(ctx, vSphereClusterSpec); err != nil {
return fmt.Errorf("validating vsphere machine configs datastore usage: %v", err)
}

if err := p.validateMemoryUsage(ctx, vSphereClusterSpec, nil); err != nil {
return fmt.Errorf("validating vsphere machine configs resource pool memory usage: %v", err)
}
if err := p.generateSSHKeysIfNotSet(clusterSpec.VSphereMachineConfigs); err != nil {
return fmt.Errorf("failed setup and validations: %v", err)
}
Expand Down Expand Up @@ -419,6 +423,10 @@
return fmt.Errorf("validating vsphere machine configs datastore usage: %v", err)
}

if err := p.validateMemoryUsage(ctx, vSphereClusterSpec, cluster); err != nil {
return fmt.Errorf("validating vsphere machine configs resource pool memory usage: %v", err)
}

if !p.skippedValidations[validations.VSphereUserPriv] {
if err := p.validator.validateVsphereUserPrivs(ctx, vSphereClusterSpec); err != nil {
return fmt.Errorf("validating vsphere user privileges: %v", err)
Expand Down Expand Up @@ -590,6 +598,103 @@
return nil
}

// getPrevMachineConfigMemoryUsage returns the memoryMiB freed up from the given machineConfig based on the count.
func (p *vsphereProvider) getPrevMachineConfigMemoryUsage(ctx context.Context, mc *v1alpha1.VSphereMachineConfig, cluster *types.Cluster, machineConfigCount int) (memoryMiB int, err error) {
em, err := p.providerKubectlClient.GetEksaVSphereMachineConfig(ctx, mc.Name, cluster.KubeconfigFile, mc.GetNamespace())
if err != nil {
return 0, err
}
if em != nil && em.Spec.ResourcePool == mc.Spec.ResourcePool {
return em.Spec.MemoryMiB * machineConfigCount, nil

Check warning on line 608 in pkg/providers/vsphere/vsphere.go

View check run for this annotation

Codecov / codecov/patch

pkg/providers/vsphere/vsphere.go#L607-L608

Added lines #L607 - L608 were not covered by tests
}
return 0, nil
}

// getMachineConfigMemoryAvailability accepts a machine config and returns available memory in the config's resource pool along with needed memory for the machine config.
func (p *vsphereProvider) getMachineConfigMemoryAvailability(ctx context.Context, datacenter string, mc *v1alpha1.VSphereMachineConfig, machineConfigCount int) (availableMemoryMiB, needMemoryMiB int, err error) {
poolInfo, err := p.providerGovcClient.GetResourcePoolInfo(ctx, datacenter, mc.Spec.ResourcePool)
if err != nil {
return 0, 0, err
}
needMemoryMiB = mc.Spec.MemoryMiB * machineConfigCount
return poolInfo[MemoryAvailable], needMemoryMiB, nil
}

// updateMemoryUsageMap updates the memory availability for the machine config's resource pool.
func updateMemoryUsageMap(mc *v1alpha1.VSphereMachineConfig, needMiB, availableMiB int, mu map[string]int) {
if _, ok := mu[mc.Spec.ResourcePool]; !ok {
mu[mc.Spec.ResourcePool] = availableMiB
}
// needMiB can be ignored when the resource pool memory limit is unset
if availableMiB != -1 {
mu[mc.Spec.ResourcePool] -= needMiB
}
}

func addPrevMachineConfigMemoryUsage(mc *v1alpha1.VSphereMachineConfig, prevUsage int, memoryUsage map[string]int) {
// when the memory limit for the respective resource pool is unset, skip accounting for previous usage and validating the needed memory
if _, ok := memoryUsage[mc.Spec.ResourcePool]; ok && memoryUsage[mc.Spec.ResourcePool] != -1 {
memoryUsage[mc.Spec.ResourcePool] += prevUsage
}
}

func (p *vsphereProvider) validateMemoryUsage(ctx context.Context, clusterSpec *Spec, cluster *types.Cluster) error {
memoryUsage := make(map[string]int)
datacenter := clusterSpec.VSphereDatacenter.Spec.Datacenter
for _, mc := range clusterSpec.machineConfigsWithCount() {
availableMemoryMiB, needMemoryMiB, err := p.getMachineConfigMemoryAvailability(ctx, datacenter, mc.VSphereMachineConfig, mc.Count)
if err != nil {
return fmt.Errorf("calculating memory usage for machine config %v: %v", mc.VSphereMachineConfig.ObjectMeta.Name, err)
}
updateMemoryUsageMap(mc.VSphereMachineConfig, needMemoryMiB, availableMemoryMiB, memoryUsage)
}
// account for previous cluster resources that are freed up during upgrade.
if cluster != nil {
err := p.updatePrevClusterMemoryUsage(ctx, clusterSpec, cluster, memoryUsage)
if err != nil {
return err
}
}
for resourcePool, remaniningMiB := range memoryUsage {

Check warning on line 658 in pkg/providers/vsphere/vsphere.go

View check run for this annotation

Codecov / codecov/patch

pkg/providers/vsphere/vsphere.go#L657-L658

Added lines #L657 - L658 were not covered by tests
if remaniningMiB != -1 && remaniningMiB < 0 {
return fmt.Errorf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool)
}
}
logger.V(5).Info("Memory availability for machine configs in requested resource pool validated")
return nil
}

// updatePrevClusterMemoryUsage calculates memory freed up from previous CP and worker nodes during upgrade and adds up the memory usage for the specific resource pool.
func (p *vsphereProvider) updatePrevClusterMemoryUsage(ctx context.Context, clusterSpec *Spec, cluster *types.Cluster, memoryUsage map[string]int) error {
prevEksaCluster, err := p.providerKubectlClient.GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName())
if err != nil {
return err
}
prevMachineConfigRefs := machineRefSliceToMap(prevEksaCluster.MachineConfigRefs())
if _, ok := prevMachineConfigRefs[clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name]; ok {

Check warning on line 674 in pkg/providers/vsphere/vsphere.go

View check run for this annotation

Codecov / codecov/patch

pkg/providers/vsphere/vsphere.go#L673-L674

Added lines #L673 - L674 were not covered by tests
cpMachineConfig := clusterSpec.controlPlaneMachineConfig()
// The last CP machine is deleted only after the desired number of new worker machines are rolled out, so don't add it's memory
prevCPusage, err := p.getPrevMachineConfigMemoryUsage(ctx, cpMachineConfig, cluster, prevEksaCluster.Spec.ControlPlaneConfiguration.Count-1)
if err != nil {
return fmt.Errorf("calculating previous memory usage for control plane: %v", err)
}
addPrevMachineConfigMemoryUsage(cpMachineConfig, prevCPusage, memoryUsage)
}

Check warning on line 682 in pkg/providers/vsphere/vsphere.go

View check run for this annotation

Codecov / codecov/patch

pkg/providers/vsphere/vsphere.go#L681-L682

Added lines #L681 - L682 were not covered by tests
for _, workerNodeGroupConfiguration := range clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations {
workerMachineConfig := clusterSpec.workerMachineConfig(workerNodeGroupConfiguration)
if _, ok := prevMachineConfigRefs[workerNodeGroupConfiguration.MachineGroupRef.Name]; ok {
prevCount := *workerNodeGroupConfiguration.Count
// The last worker machine is deleted only after the desired number of new worker machines are rolled out, so don't add it's memory
prevWorkerUsage, err := p.getPrevMachineConfigMemoryUsage(ctx, workerMachineConfig, cluster, prevCount-1)
if err != nil {
return fmt.Errorf("calculating previous memory usage for worker node group - %v: %v", workerMachineConfig.Name, err)
}
addPrevMachineConfigMemoryUsage(workerMachineConfig, prevWorkerUsage, memoryUsage)
}

Check warning on line 693 in pkg/providers/vsphere/vsphere.go

View check run for this annotation

Codecov / codecov/patch

pkg/providers/vsphere/vsphere.go#L692-L693

Added lines #L692 - L693 were not covered by tests
}
return nil
}

func (p *vsphereProvider) UpdateSecrets(ctx context.Context, cluster *types.Cluster, _ *cluster.Spec) error {
var contents bytes.Buffer
err := p.createSecret(ctx, cluster, &contents)
Expand Down
Loading