Skip to content

Commit

Permalink
made efa optional
Browse files Browse the repository at this point in the history
  • Loading branch information
vara-bonthu committed Apr 5, 2024
1 parent 30b5cc2 commit e5dfd01
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 5 deletions.
4 changes: 4 additions & 0 deletions ai-ml/jark-stack/terraform/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Docs coming soon...
| Name | Version |
|------|---------|
| <a name="provider_aws"></a> [aws](#provider\_aws) | >= 3.72 |
| <a name="provider_aws.ecr"></a> [aws.ecr](#provider\_aws.ecr) | >= 3.72 |
| <a name="provider_kubernetes"></a> [kubernetes](#provider\_kubernetes) | >= 2.10 |

## Modules
Expand All @@ -41,13 +42,16 @@ Docs coming soon...
| [kubernetes_namespace_v1.jupyterhub](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace_v1) | resource |
| [kubernetes_secret_v1.huggingface_token](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/secret_v1) | resource |
| [kubernetes_storage_class.default_gp3](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/storage_class) | resource |
| [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source |
| [aws_ecrpublic_authorization_token.token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecrpublic_authorization_token) | data source |
| [aws_eks_cluster_auth.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster_auth) | data source |

## Inputs

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.29"` | no |
| <a name="input_enable_aws_efa_k8s_device_plugin"></a> [enable\_aws\_efa\_k8s\_device\_plugin](#input\_enable\_aws\_efa\_k8s\_device\_plugin) | Enable AWS EFA K8s Device Plugin | `bool` | `false` | no |
| <a name="input_huggingface_token"></a> [huggingface\_token](#input\_huggingface\_token) | Hugging Face Secret Token | `string` | `"DUMMY_TOKEN_REPLACE_ME"` | no |
| <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"jark-stack"` | no |
| <a name="input_region"></a> [region](#input\_region) | region | `string` | `"us-west-2"` | no |
Expand Down
7 changes: 6 additions & 1 deletion ai-ml/jark-stack/terraform/addons.tf
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,16 @@ module "data_addons" {
#---------------------------------------
# EFA Device Plugin Add-on
#---------------------------------------
enable_aws_efa_k8s_device_plugin = true
# IMPORTANT: Enable EFA only on nodes with EFA devices attached.
# Otherwise, you'll encounter the "No devices found..." error. Restart the pod after attaching an EFA device, or use a node selector to prevent incompatible scheduling.
enable_aws_efa_k8s_device_plugin = var.enable_aws_efa_k8s_device_plugin
aws_efa_k8s_device_plugin_helm_config = {
values = [file("${path.module}/helm-values/aws-efa-k8s-device-plugin-values.yaml")]
}

#---------------------------------------------------------------
# Karpenter Resources Add-on
#---------------------------------------------------------------
enable_karpenter_resources = true
karpenter_resources_helm_config = {
g5-gpu-karpenter = {
Expand Down
3 changes: 0 additions & 3 deletions ai-ml/jark-stack/terraform/install.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
#!/bin/bash

read -p "Enter the region: " region
export AWS_DEFAULT_REGION=$region

# List of Terraform modules to apply in sequence
targets=(
"module.vpc"
Expand Down
15 changes: 14 additions & 1 deletion ai-ml/jark-stack/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@ provider "aws" {
region = local.region
}

# ECR always authenticates with `us-east-1` region
# Docs -> https://docs.aws.amazon.com/AmazonECR/latest/public/public-registries.html
provider "aws" {
alias = "ecr"
region = "us-east-1"
}

provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
Expand All @@ -27,10 +34,16 @@ data "aws_eks_cluster_auth" "this" {
name = module.eks.cluster_name
}

data "aws_availability_zones" "available" {}

data "aws_ecrpublic_authorization_token" "token" {
provider = aws.ecr
}

locals {
name = var.name
region = var.region
azs = ["${local.region}c", "${local.region}d"]
azs = slice(data.aws_availability_zones.available.names, 0, 2)
tags = {
Blueprint = local.name
GithubRepo = "github.com/awslabs/data-on-eks"
Expand Down
6 changes: 6 additions & 0 deletions ai-ml/jark-stack/terraform/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,9 @@ variable "huggingface_token" {
default = "DUMMY_TOKEN_REPLACE_ME"
sensitive = true
}

variable "enable_aws_efa_k8s_device_plugin" {
description = "Enable AWS EFA K8s Device Plugin"
type = bool
default = false
}

0 comments on commit e5dfd01

Please sign in to comment.