Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nutanix GPU support implementation #8745

Merged
merged 3 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions pkg/api/v1alpha1/nutanixmachineconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,17 @@
// NutanixIdentifierType is an enumeration of different resource identifier types.
type NutanixIdentifierType string

// NutanixGPUIdentifierType is an enumeration of different GPU identifier types.
type NutanixGPUIdentifierType string

func (c NutanixIdentifierType) String() string {
return string(c)
}

func (c NutanixGPUIdentifierType) String() string {
return string(c)

Check warning on line 21 in pkg/api/v1alpha1/nutanixmachineconfig.go

View check run for this annotation

Codecov / codecov/patch

pkg/api/v1alpha1/nutanixmachineconfig.go#L20-L21

Added lines #L20 - L21 were not covered by tests
}

const (
// NutanixMachineConfigKind is the kind for a NutanixMachineConfig.
NutanixMachineConfigKind = "NutanixMachineConfig"
Expand All @@ -23,6 +30,11 @@
// NutanixIdentifierName is a resource identifier identifying the object by Name.
NutanixIdentifierName NutanixIdentifierType = "name"

// NutanixGPUIdentifierDeviceID is a GPU identifier identifying the object by DeviceID.
NutanixGPUIdentifierDeviceID NutanixGPUIdentifierType = "deviceID"
// NutanixGPUIdentifierName is a GPU identifier identifying the object by Name.
NutanixGPUIdentifierName NutanixGPUIdentifierType = "name"

defaultNutanixOSFamily = Ubuntu
defaultNutanixSystemDiskSizeGi = "40Gi"
defaultNutanixMemorySizeGi = "4Gi"
Expand Down Expand Up @@ -62,6 +74,22 @@
Value string `json:"value,omitempty"`
}

// NutanixGPUIdentifier holds VM GPU device configuration.
type NutanixGPUIdentifier struct {
// deviceID is the device ID of the GPU device.
// +optional
DeviceID *int64 `json:"deviceID,omitempty"`

// vendorID is the vendor ID of the GPU device.
// +optional
Name string `json:"name,omitempty"`

// type is the type of the GPU device.
// +kubebuilder:validation:Required
// +kubebuilder:validation:Enum:=deviceID;name
Type NutanixGPUIdentifierType `json:"type"`
}

// NutanixMachineConfigGenerateOpt is a functional option that can be passed to NewNutanixMachineConfigGenerate to
// customize the generated machine config
//
Expand Down
4 changes: 4 additions & 0 deletions pkg/api/v1alpha1/nutanixmachineconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ type NutanixMachineConfigSpec struct {
// Categories must be created in Prism Central before they can be used.
// +kubebuilder:validation:Optional
AdditionalCategories []NutanixCategoryIdentifier `json:"additionalCategories,omitempty"`

// List of GPU devices that should be added to the VMs.
// +kubebuilder:validation:Optional
GPUs []NutanixGPUIdentifier `json:"gpus,omitempty"`
}

// SetDefaults sets defaults to NutanixMachineConfig if user has not provided.
Expand Down
27 changes: 27 additions & 0 deletions pkg/api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pkg/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ const (
ConfigMapKind = "ConfigMap"
ClusterResourceSetKind = "ClusterResourceSet"

NutanixMachineConfigKind = "NutanixMachineConfig"

BottlerocketDefaultUser = "ec2-user"
UbuntuDefaultUser = "capv"

Expand Down
1 change: 1 addition & 0 deletions pkg/providers/nutanix/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

type Client interface {
GetSubnet(ctx context.Context, uuid string) (*v3.SubnetIntentResponse, error)
ListAllHost(ctx context.Context) (*v3.HostListResponse, error)
ListSubnet(ctx context.Context, getEntitiesRequest *v3.DSMetadata) (*v3.SubnetListIntentResponse, error)
GetImage(ctx context.Context, uuid string) (*v3.ImageIntentResponse, error)
ListImage(ctx context.Context, getEntitiesRequest *v3.DSMetadata) (*v3.ImageListIntentResponse, error)
Expand Down
12 changes: 12 additions & 0 deletions pkg/providers/nutanix/config/md-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ spec:
value: "{{ .Value }}"
{{- end }}
{{- end }}
{{- if .GPUs }}
gpus:
{{- range .GPUs }}
{{- if (eq .Type "deviceID") }}
- type: deviceID
deviceID: {{ .DeviceID }}
{{- else if (eq .Type "name") }}
- type: name
name: "{{ .Name }}"
{{- end }}
{{- end }}
{{- end }}
---
apiVersion: bootstrap.cluster.x-k8s.io/v1beta1
kind: KubeadmConfigTemplate
Expand Down
15 changes: 15 additions & 0 deletions pkg/providers/nutanix/mocks/client.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkg/providers/nutanix/provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ func TestNutanixProviderSetupAndValidateCreate(t *testing.T) {
},
}
mockClient.EXPECT().ListImage(gomock.Any(), gomock.Any()).Return(images, nil).AnyTimes()
mockClient.EXPECT().ListAllHost(gomock.Any()).Return(fakeHostList(), nil).AnyTimes()
mockCertValidator := mockCrypto.NewMockTlsValidator(ctrl)
mockCertValidator.EXPECT().ValidateCert(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil)
mockCertValidator.EXPECT().ValidateCert(gomock.Any(), gomock.Any(), gomock.Any()).Return(errors.New("invalid cert"))
Expand Down
4 changes: 4 additions & 0 deletions pkg/providers/nutanix/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,10 @@ func buildTemplateMapMD(clusterSpec *cluster.Spec, workerNodeGroupMachineSpec v1
values["additionalCategories"] = workerNodeGroupMachineSpec.AdditionalCategories
}

if len(workerNodeGroupMachineSpec.GPUs) > 0 {
values["GPUs"] = workerNodeGroupMachineSpec.GPUs
}

adiantum marked this conversation as resolved.
Show resolved Hide resolved
if workerNodeGroupConfiguration.KubeletConfiguration != nil {
wnKubeletConfig := workerNodeGroupConfiguration.KubeletConfiguration.Object
if _, ok := wnKubeletConfig["tlsCipherSuites"]; !ok {
Expand Down
46 changes: 46 additions & 0 deletions pkg/providers/nutanix/template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,52 @@ func TestTemplateBuilderFailureDomains(t *testing.T) {
}
}

func TestTemplateBuilderGPUs(t *testing.T) {
for _, tc := range []struct {
Input string
Output string
OutputMD string
}{
{
Input: "testdata/eksa-cluster-gpus.yaml",
Output: "testdata/expected_results_gpus.yaml",
OutputMD: "testdata/expected_results_gpus_md.yaml",
},
} {
clusterSpec := test.NewFullClusterSpec(t, tc.Input)

machineCfg := clusterSpec.NutanixMachineConfig(clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name)
workerConfs := map[string]anywherev1.NutanixMachineConfigSpec{
"eksa-unit-test": machineCfg.Spec,
}

t.Setenv(constants.EksaNutanixUsernameKey, "admin")
t.Setenv(constants.EksaNutanixPasswordKey, "password")
creds := GetCredsFromEnv()

bldr := NewNutanixTemplateBuilder(&clusterSpec.NutanixDatacenter.Spec, &machineCfg.Spec, &machineCfg.Spec,
workerConfs, creds, time.Now)

cpSpec, err := bldr.GenerateCAPISpecControlPlane(clusterSpec)
assert.NoError(t, err)
assert.NotNil(t, cpSpec)
test.AssertContentToFile(t, string(cpSpec), tc.Output)

workloadTemplateNames := map[string]string{
"eksa-unit-test": "eksa-unit-test",
}
kubeadmconfigTemplateNames := map[string]string{
"eksa-unit-test": "eksa-unit-test",
}

data, err := bldr.GenerateCAPISpecWorkers(clusterSpec, workloadTemplateNames, kubeadmconfigTemplateNames)

assert.NoError(t, err)

test.AssertContentToFile(t, string(data), tc.OutputMD)
}
}

func minimalNutanixConfigSpec(t *testing.T) (*anywherev1.NutanixDatacenterConfig, *anywherev1.NutanixMachineConfig, map[string]anywherev1.NutanixMachineConfigSpec) {
dcConf := &anywherev1.NutanixDatacenterConfig{}
err := yaml.Unmarshal([]byte(nutanixDatacenterConfigSpec), dcConf)
Expand Down
75 changes: 75 additions & 0 deletions pkg/providers/nutanix/testdata/eksa-cluster-gpus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
apiVersion: anywhere.eks.amazonaws.com/v1alpha1
kind: Cluster
metadata:
name: eksa-unit-test
namespace: default
spec:
kubernetesVersion: "1.19"
controlPlaneConfiguration:
name: eksa-unit-test
count: 3
endpoint:
host: test-ip
machineGroupRef:
name: eksa-unit-test
kind: NutanixMachineConfig
workerNodeGroupConfigurations:
- count: 4
name: eksa-unit-test
machineGroupRef:
name: eksa-unit-test
kind: NutanixMachineConfig
datacenterRef:
kind: NutanixDatacenterConfig
name: eksa-unit-test
clusterNetwork:
cni: "cilium"
pods:
cidrBlocks:
- 192.168.0.0/16
services:
cidrBlocks:
- 10.96.0.0/12
---
apiVersion: anywhere.eks.amazonaws.com/v1alpha1
kind: NutanixDatacenterConfig
metadata:
name: eksa-unit-test
namespace: default
spec:
endpoint: "prism.nutanix.com"
port: 9440
credentialRef:
kind: Secret
name: "nutanix-credentials"
---
apiVersion: anywhere.eks.amazonaws.com/v1alpha1
kind: NutanixMachineConfig
metadata:
name: eksa-unit-test
namespace: default
spec:
vcpusPerSocket: 1
vcpuSockets: 4
memorySize: 8Gi
image:
type: "name"
name: "prism-image"
cluster:
type: "name"
name: "prism-cluster"
subnet:
type: "name"
name: "prism-subnet"
gpus:
- type: deviceID
deviceID: 8757
- type: name
name: "Ampere 40"
systemDiskSize: 40Gi
osFamily: "ubuntu"
users:
- name: "mySshUsername"
sshAuthorizedKeys:
- "mySshAuthorizedKey"
---
Loading
Loading