-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add build scripts for building Nvidia and Neuron AMIs based on AL2023 (…
…#1924) Co-authored-by: Carter <[email protected]> Co-authored-by: Nikolay Kvetsinski <[email protected]>
- Loading branch information
1 parent
32bd8b4
commit a943086
Showing
33 changed files
with
2,337 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package containerd | ||
|
||
import ( | ||
"slices" | ||
"strings" | ||
|
||
"go.uber.org/zap" | ||
) | ||
|
||
type instanceOptions struct { | ||
RuntimeName string | ||
RuntimeBinaryName string | ||
} | ||
|
||
type instanceTypeMixin struct { | ||
instanceFamilies []string | ||
apply func() instanceOptions | ||
} | ||
|
||
func (m *instanceTypeMixin) matches(instanceType string) bool { | ||
instanceFamily := strings.Split(instanceType, ".")[0] | ||
return slices.Contains(m.instanceFamilies, instanceFamily) | ||
} | ||
|
||
var ( | ||
// TODO: fetch this list dynamically | ||
nvidiaInstances = []string{"p3", "p3dn", "p4d", "p4de", "p5", "g4", "g4dn", "g5", "g6", "g6e"} | ||
NvidiaInstanceTypeMixin = instanceTypeMixin{ | ||
instanceFamilies: nvidiaInstances, | ||
apply: applyNvidia, | ||
} | ||
|
||
mixins = []instanceTypeMixin{ | ||
NvidiaInstanceTypeMixin, | ||
} | ||
) | ||
|
||
const nvidiaRuntimeName = "nvidia" | ||
const nvidiaRuntimeBinaryName = "/usr/bin/nvidia-container-runtime" | ||
const defaultRuntimeName = "runc" | ||
const defaultRuntimeBinaryName = "/usr/sbin/runc" | ||
|
||
// applyInstanceTypeMixins adds the needed OCI hook options to containerd config.toml | ||
// based on the instance family | ||
func applyInstanceTypeMixins(instanceType string) instanceOptions { | ||
for _, mixin := range mixins { | ||
if mixin.matches(instanceType) { | ||
return mixin.apply() | ||
} | ||
} | ||
zap.L().Info("No instance specific containerd runtime configuration needed..", zap.String("instanceType", instanceType)) | ||
return applyDefault() | ||
} | ||
|
||
// applyNvidia adds the needed NVIDIA containerd options | ||
func applyNvidia() instanceOptions { | ||
zap.L().Info("Configuring NVIDIA runtime..") | ||
return instanceOptions{RuntimeName: nvidiaRuntimeName, RuntimeBinaryName: nvidiaRuntimeBinaryName} | ||
} | ||
|
||
// applyDefault adds the default runc containerd options | ||
func applyDefault() instanceOptions { | ||
zap.L().Info("Configuring default runtime..") | ||
return instanceOptions{RuntimeName: defaultRuntimeName, RuntimeBinaryName: defaultRuntimeBinaryName} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package containerd | ||
|
||
import ( | ||
"reflect" | ||
"testing" | ||
) | ||
|
||
func TestApplyInstanceTypeMixins(t *testing.T) { | ||
|
||
var nvidiaExpectedOutput = instanceOptions{RuntimeName: "nvidia", RuntimeBinaryName: "/usr/bin/nvidia-container-runtime"} | ||
var neuronExpectedOutput = instanceOptions{RuntimeName: "runc", RuntimeBinaryName: "/usr/sbin/runc"} | ||
var nonAcceleratedExpectedOutput = instanceOptions{RuntimeName: "runc", RuntimeBinaryName: "/usr/sbin/runc"} | ||
|
||
var tests = []struct { | ||
name string | ||
instanceType string | ||
expectedOutput instanceOptions | ||
}{ | ||
{name: "nvidia_test", instanceType: "p5.xlarge", expectedOutput: nvidiaExpectedOutput}, | ||
{name: "neuron_test", instanceType: "inf2.xlarge", expectedOutput: neuronExpectedOutput}, | ||
// non accelerated instance | ||
{name: "non_accelerated_test", instanceType: "m5.xlarge", expectedOutput: nonAcceleratedExpectedOutput}, | ||
} | ||
for _, test := range tests { | ||
expected := applyInstanceTypeMixins(test.instanceType) | ||
|
||
if !reflect.DeepEqual(expected, test.expectedOutput) { | ||
t.Fatalf("unexpected output in test case %s: %s, expecting: %s", test.name, expected, test.expectedOutput) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 18 additions & 0 deletions
18
nodeadm/test/e2e/cases/containerd-runtime-config-neuron/config.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
--- | ||
apiVersion: node.eks.aws/v1alpha1 | ||
kind: NodeConfig | ||
spec: | ||
cluster: | ||
name: my-cluster | ||
apiServerEndpoint: https://example.com | ||
certificateAuthority: Y2VydGlmaWNhdGVBdXRob3JpdHk= | ||
cidr: 10.100.0.0/16 | ||
containerd: | ||
config: | | ||
version = 2 | ||
[grpc] | ||
address = "/run/foo/foo.sock" | ||
[plugins."io.containerd.grpc.v1.cri".containerd] | ||
discard_unpacked_layers = false |
30 changes: 30 additions & 0 deletions
30
nodeadm/test/e2e/cases/containerd-runtime-config-neuron/expected-containerd-config.toml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
root = '/var/lib/containerd' | ||
state = '/run/containerd' | ||
version = 2 | ||
|
||
[grpc] | ||
address = '/run/foo/foo.sock' | ||
|
||
[plugins] | ||
[plugins.'io.containerd.grpc.v1.cri'] | ||
sandbox_image = '602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/pause:3.5' | ||
|
||
[plugins.'io.containerd.grpc.v1.cri'.cni] | ||
bin_dir = '/opt/cni/bin' | ||
conf_dir = '/etc/cni/net.d' | ||
|
||
[plugins.'io.containerd.grpc.v1.cri'.containerd] | ||
default_runtime_name = 'runc' | ||
discard_unpacked_layers = false | ||
|
||
[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes] | ||
[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes.runc] | ||
base_runtime_spec = '/etc/containerd/base-runtime-spec.json' | ||
runtime_type = 'io.containerd.runc.v2' | ||
|
||
[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes.runc.options] | ||
BinaryName = '/usr/sbin/runc' | ||
SystemdCgroup = true | ||
|
||
[plugins.'io.containerd.grpc.v1.cri'.registry] | ||
config_path = '/etc/containerd/certs.d:/etc/docker/certs.d' |
15 changes: 15 additions & 0 deletions
15
nodeadm/test/e2e/cases/containerd-runtime-config-neuron/run.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -o errexit | ||
set -o nounset | ||
set -o pipefail | ||
|
||
source /helpers.sh | ||
|
||
mock::aws /etc/aemm-inf1-config.json | ||
mock::kubelet 1.27.0 | ||
wait::dbus-ready | ||
|
||
nodeadm init --skip run --config-source file://config.yaml | ||
|
||
assert::files-equal /etc/containerd/config.toml expected-containerd-config.toml |
18 changes: 18 additions & 0 deletions
18
nodeadm/test/e2e/cases/containerd-runtime-config-nvidia/config.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
--- | ||
apiVersion: node.eks.aws/v1alpha1 | ||
kind: NodeConfig | ||
spec: | ||
cluster: | ||
name: my-cluster | ||
apiServerEndpoint: https://example.com | ||
certificateAuthority: Y2VydGlmaWNhdGVBdXRob3JpdHk= | ||
cidr: 10.100.0.0/16 | ||
containerd: | ||
config: | | ||
version = 2 | ||
[grpc] | ||
address = "/run/foo/foo.sock" | ||
[plugins."io.containerd.grpc.v1.cri".containerd] | ||
discard_unpacked_layers = false |
30 changes: 30 additions & 0 deletions
30
nodeadm/test/e2e/cases/containerd-runtime-config-nvidia/expected-containerd-config.toml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
root = '/var/lib/containerd' | ||
state = '/run/containerd' | ||
version = 2 | ||
|
||
[grpc] | ||
address = '/run/foo/foo.sock' | ||
|
||
[plugins] | ||
[plugins.'io.containerd.grpc.v1.cri'] | ||
sandbox_image = '602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/pause:3.5' | ||
|
||
[plugins.'io.containerd.grpc.v1.cri'.cni] | ||
bin_dir = '/opt/cni/bin' | ||
conf_dir = '/etc/cni/net.d' | ||
|
||
[plugins.'io.containerd.grpc.v1.cri'.containerd] | ||
default_runtime_name = 'nvidia' | ||
discard_unpacked_layers = false | ||
|
||
[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes] | ||
[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes.nvidia] | ||
base_runtime_spec = '/etc/containerd/base-runtime-spec.json' | ||
runtime_type = 'io.containerd.runc.v2' | ||
|
||
[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes.nvidia.options] | ||
BinaryName = '/usr/bin/nvidia-container-runtime' | ||
SystemdCgroup = true | ||
|
||
[plugins.'io.containerd.grpc.v1.cri'.registry] | ||
config_path = '/etc/containerd/certs.d:/etc/docker/certs.d' |
15 changes: 15 additions & 0 deletions
15
nodeadm/test/e2e/cases/containerd-runtime-config-nvidia/run.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -o errexit | ||
set -o nounset | ||
set -o pipefail | ||
|
||
source /helpers.sh | ||
|
||
mock::aws /etc/aemm-g5-config.json | ||
mock::kubelet 1.27.0 | ||
wait::dbus-ready | ||
|
||
nodeadm init --skip run --config-source file://config.yaml | ||
|
||
assert::files-equal /etc/containerd/config.toml expected-containerd-config.toml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.