feat: Update CBR example to use EKS managed node group (aws-ia#1972)

rodrigobersa · Jul 5, 2024 · 630d220 · 630d220
1 parent 370e1b3
commit 630d220
Show file tree

Hide file tree

Showing 9 changed files with 78 additions and 19 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/streetsidesoftware/cspell-cli
-    rev: v8.8.2
+    rev: v8.10.0
     hooks:
       - id: cspell
         args: [--exclude, 'ADOPTERS.md', --exclude, '.pre-commit-config.yaml', --exclude, '.gitignore', --exclude, '*.drawio', --exclude, 'mkdocs.yml', --exclude, '.helmignore', --exclude, '.github/workflows/*', --exclude, 'patterns/istio-multi-cluster/*', --exclude, 'patterns/blue-green-upgrade/*']
@@ -19,7 +19,7 @@ repos:
       - id: detect-aws-credentials
         args: [--allow-missing-credentials]
   - repo: https://github.com/antonbabenko/pre-commit-terraform
-    rev: v1.91.0
+    rev: v1.92.0
     hooks:
       - id: terraform_fmt
       - id: terraform_docs

diff --git a/patterns/ml-capacity-block/README.md b/patterns/ml-capacity-block/README.md
@@ -1,13 +1,10 @@
 # EKS w/ ML Capacity Block Reservation (CBR)
 
-This pattern demonstrates how to consume/utilize ML capacity block reservations (CBR) with Amazon EKS. The solution is comprised of primarily 2 components:
-
-1. The self-managed node group that will utilize the CBR should have the subnets provided to it restricted to the availability zone where the CBR has been allocated. For example - if the CBR is allocated to `us-west-2b`, the node group should only have subnet IDs provided to it that reside in `us-west-2b`. If the subnets that reside in other AZs are provided, its possible to encounter an error such as `InvalidParameterException: The following supplied instance types do not exist ...`. It is not guaranteed that this error will always be shown, and may appear random since the underlying autoscaling group(s) will provision nodes into different AZs at random. It will only occur when the underlying autoscaling group tries to provision instances into an AZ where capacity is not allocated and there is insufficient on-demand capacity for the desired instance type.
-
-    !!! warning
-        The use of self-managed node group(s) are required at this time to support capacity block reservations within EKS. This pattern will be updated to demonstrate EKS managed node groups once support has been implemented by the EKS service.
+This pattern demonstrates how to consume/utilize ML capacity block reservations (CBR) with Amazon EKS. The solution is comprised of primarily of the following components:
 
+1. The node group, either EKS managed or self-managed, that will utilize the CBR should have the subnets provided to it restricted to the availability zone where the CBR has been allocated. For example - if the CBR is allocated to `us-west-2b`, the node group should only have subnet IDs provided to it that reside in `us-west-2b`. If the subnets that reside in other AZs are provided, its possible to encounter an error such as `InvalidParameterException: The following supplied instance types do not exist ...`. It is not guaranteed that this error will always be shown, and may appear random since the underlying autoscaling group(s) will provision nodes into different AZs at random. It will only occur when the underlying autoscaling group tries to provision instances into an AZ where capacity is not allocated and there is insufficient on-demand capacity for the desired instance type.
 2. The launch template utilized should specify the `instance_market_options` and `capacity_reservation_specification` arguments. This is how the CBR is utilized by the node group (i.e. - tells the autoscaling group to launch instances utilizing provided capacity reservation).
+3. In the case of EKS managed node group(s), the `capacity_type` should be set to `"CAPACITY_BLOCK"`.
 
 <b>Links:</b>
 
@@ -16,7 +13,7 @@ This pattern demonstrates how to consume/utilize ML capacity block reservations
 
 ## Code
 
-```terraform hl_lines="5-11 54-56 84-92"
+```terraform hl_lines="5-11 80-94 106-109 138-151"
 {% include  "../../patterns/ml-capacity-block/eks.tf" %}
 ```
 

diff --git a/patterns/ml-capacity-block/eks.tf b/patterns/ml-capacity-block/eks.tf
@@ -16,7 +16,7 @@ variable "capacity_reservation_id" {
 
 module "eks" {
   source  = "terraform-aws-modules/eks/aws"
-  version = "~> 20.11"
+  version = "~> 20.17"
 
   cluster_name    = local.name
   cluster_version = "1.30"
@@ -41,6 +41,58 @@ module "eks" {
   subnet_ids = module.vpc.private_subnets
 
   eks_managed_node_groups = {
+    cbr = {
+      # The EKS AL2 GPU AMI provides all of the necessary components
+      # for accelerated workloads w/ EFA
+      ami_type       = "AL2_x86_64_GPU"
+      instance_types = ["p5.48xlarge"]
+
+      pre_bootstrap_user_data = <<-EOT
+        # Mount instance store volumes in RAID-0 for kubelet and containerd
+        # https://github.com/awslabs/amazon-eks-ami/blob/master/doc/USER_GUIDE.md#raid-0-for-kubelet-and-containerd-raid0
+        /bin/setup-local-disks raid0
+      EOT
+
+      min_size     = 2
+      max_size     = 2
+      desired_size = 2
+
+      # This will:
+      # 1. Create a placement group to place the instances close to one another
+      # 2. Ignore subnets that reside in AZs that do not support the instance type
+      # 3. Expose all of the available EFA interfaces on the launch template
+      enable_efa_support = true
+
+      labels = {
+        "vpc.amazonaws.com/efa.present" = "true"
+        "nvidia.com/gpu.present"        = "true"
+      }
+
+      taints = {
+        # Ensure only GPU workloads are scheduled on this node group
+        gpu = {
+          key    = "nvidia.com/gpu"
+          value  = "true"
+          effect = "NO_SCHEDULE"
+        }
+      }
+
+      # First subnet is in the "${local.region}a" availability zone
+      # where the capacity reservation is created
+      # TODO - Update the subnet to match the availability zone of *YOUR capacity reservation
+      subnet_ids = [element(module.vpc.private_subnets, 0)]
+
+      # ML capacity block reservation
+      capacity_type = "CAPACITY_BLOCK"
+      instance_market_options = {
+        market_type = "capacity-block"
+      }
+      capacity_reservation_specification = {
+        capacity_reservation_target = {
+          capacity_reservation_id = var.capacity_reservation_id
+        }
+      }
+    }
     # This node group is for core addons such as CoreDNS
     default = {
       instance_types = ["m5.large"]
@@ -51,10 +103,12 @@ module "eks" {
     }
   }
 
-  # Note: ML capacity block reservations are only supported
-  # on self-managed node groups at this time
+  # Self-managed node group equivalent for ML capacity block reservation
+  # This is not required for ML CBR support with EKS managed node groups,
+  # its just showing use with both node group types. Users should select
+  # the one that works for their use case.
   self_managed_node_groups = {
-    cbr = {
+    cbr2 = {
       # The EKS AL2 GPU AMI provides all of the necessary components
       # for accelerated workloads w/ EFA
       ami_type      = "AL2_x86_64_GPU"
@@ -81,6 +135,11 @@ module "eks" {
       # 3. Expose all of the available EFA interfaces on the launch template
       enable_efa_support = true
 
+      # First subnet is in the "${local.region}a" availability zone
+      # where the capacity reservation is created
+      # TODO - Update the subnet to match the availability zone of *YOUR capacity reservation
+      subnet_ids = [element(module.vpc.private_subnets, 0)]
+
       # ML capacity block reservation
       instance_market_options = {
         market_type = "capacity-block"

diff --git a/patterns/ml-capacity-block/helm.tf b/patterns/ml-capacity-block/helm.tf
@@ -30,7 +30,7 @@ resource "helm_release" "aws_efa_device_plugin" {
   name       = "aws-efa-k8s-device-plugin"
   repository = "https://aws.github.io/eks-charts"
   chart      = "aws-efa-k8s-device-plugin"
-  version    = "v0.4.4"
+  version    = "v0.5.2"
   namespace  = "kube-system"
   wait       = false
 

diff --git a/patterns/ml-capacity-block/main.tf b/patterns/ml-capacity-block/main.tf
@@ -4,7 +4,7 @@ terraform {
   required_providers {
     aws = {
       source  = "hashicorp/aws"
-      version = ">= 5.34"
+      version = ">= 5.57"
     }
     helm = {
       source  = "hashicorp/helm"

diff --git a/patterns/nvidia-gpu-efa/eks.tf b/patterns/nvidia-gpu-efa/eks.tf
@@ -4,7 +4,7 @@
 
 module "eks" {
   source  = "terraform-aws-modules/eks/aws"
-  version = "~> 20.11"
+  version = "~> 20.17"
 
   cluster_name    = local.name
   cluster_version = "1.30"

diff --git a/patterns/nvidia-gpu-efa/helm.tf b/patterns/nvidia-gpu-efa/helm.tf
@@ -30,7 +30,7 @@ resource "helm_release" "aws_efa_device_plugin" {
   name       = "aws-efa-k8s-device-plugin"
   repository = "https://aws.github.io/eks-charts"
   chart      = "aws-efa-k8s-device-plugin"
-  version    = "v0.4.4"
+  version    = "v0.5.2"
   namespace  = "kube-system"
   wait       = false
 

diff --git a/patterns/targeted-odcr/eks.tf b/patterns/targeted-odcr/eks.tf
@@ -13,7 +13,7 @@ variable "capacity_reservation_arns" {
 
 module "eks" {
   source  = "terraform-aws-modules/eks/aws"
-  version = "~> 20.11"
+  version = "~> 20.17"
 
   cluster_name    = local.name
   cluster_version = "1.30"
@@ -80,7 +80,10 @@ module "eks" {
 
       # First subnet is in the "${local.region}a" availability zone
       # where the capacity reservation is created
+      # TODO - Update the subnet to match the availability zone of *YOUR capacity reservation
       subnet_ids = [element(module.vpc.private_subnets, 0)]
+
+      # Targeted on-demand capacity reservation
       capacity_reservation_specification = {
         capacity_reservation_target = {
           capacity_reservation_resource_group_arn = aws_resourcegroups_group.odcr.arn

diff --git a/patterns/targeted-odcr/helm.tf b/patterns/targeted-odcr/helm.tf
@@ -30,7 +30,7 @@ resource "helm_release" "aws_efa_device_plugin" {
   name       = "aws-efa-k8s-device-plugin"
   repository = "https://aws.github.io/eks-charts"
   chart      = "aws-efa-k8s-device-plugin"
-  version    = "v0.4.4"
+  version    = "v0.5.2"
   namespace  = "kube-system"
   wait       = false