From cde1440970e826f3c5acee8208f03332e526116d Mon Sep 17 00:00:00 2001 From: Ovidiu Valeanu Date: Sun, 3 Mar 2024 16:40:00 +0000 Subject: [PATCH] feat: Argo Workflow blueprint upgrade to latest Karpenter (#459) --- schedulers/terraform/argo-workflow/README.md | 7 +- schedulers/terraform/argo-workflow/addons.tf | 150 ++++++++++++++---- .../spark-compute-optimized-provisioner.yaml | 109 ------------- schedulers/terraform/argo-workflow/main.tf | 8 - .../sensor-sqs-sparkjobs.yaml | 40 ----- 5 files changed, 125 insertions(+), 189 deletions(-) delete mode 100644 schedulers/terraform/argo-workflow/karpenter-provisioners/spark-compute-optimized-provisioner.yaml diff --git a/schedulers/terraform/argo-workflow/README.md b/schedulers/terraform/argo-workflow/README.md index b0638ac48..b70782fff 100644 --- a/schedulers/terraform/argo-workflow/README.md +++ b/schedulers/terraform/argo-workflow/README.md @@ -19,7 +19,6 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ |------|---------| | [aws](#provider\_aws) | >= 3.72 | | [aws.ecr](#provider\_aws.ecr) | >= 3.72 | -| [kubectl](#provider\_kubectl) | >= 1.14 | | [kubernetes](#provider\_kubernetes) | >= 2.10 | | [random](#provider\_random) | 3.3.2 | @@ -30,8 +29,8 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | [amp\_ingest\_irsa](#module\_amp\_ingest\_irsa) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 | | [ebs\_csi\_driver\_irsa](#module\_ebs\_csi\_driver\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 5.20 | | [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.15 | -| [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | 1.9.2 | -| [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | ~> 1.0 | +| [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 | +| [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | ~> 1.30 | | [irsa\_argo\_events](#module\_irsa\_argo\_events) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 | | [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 3.0 | | [spark\_team\_a\_irsa](#module\_spark\_team\_a\_irsa) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 | @@ -50,7 +49,6 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | [aws_s3_object.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | | [aws_secretsmanager_secret.grafana](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/secretsmanager_secret) | resource | | [aws_secretsmanager_secret_version.grafana](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/secretsmanager_secret_version) | resource | -| [kubectl_manifest.karpenter_provisioner](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | | [kubernetes_annotations.gp2_default](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/annotations) | resource | | [kubernetes_cluster_role.spark_argowf_role](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/cluster_role) | resource | | [kubernetes_cluster_role.spark_role](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/cluster_role) | resource | @@ -75,7 +73,6 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | [aws_partition.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/partition) | data source | | [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source | | [aws_secretsmanager_secret_version.admin_password_version](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret_version) | data source | -| [kubectl_path_documents.karpenter_provisioners](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/data-sources/path_documents) | data source | ## Inputs diff --git a/schedulers/terraform/argo-workflow/addons.tf b/schedulers/terraform/argo-workflow/addons.tf index 32e7d307f..c07ca7bb2 100644 --- a/schedulers/terraform/argo-workflow/addons.tf +++ b/schedulers/terraform/argo-workflow/addons.tf @@ -59,7 +59,7 @@ module "ebs_csi_driver_irsa" { #--------------------------------------------------------------- module "eks_blueprints_addons" { source = "aws-ia/eks-blueprints-addons/aws" - version = "1.9.2" + version = "~> 1.2" cluster_name = module.eks.cluster_name @@ -100,24 +100,30 @@ module "eks_blueprints_addons" { description = "Cluster Proportional Autoscaler for CoreDNS Service" } - #--------------------------------------- - # Metrics Server - #--------------------------------------- - enable_metrics_server = true - metrics_server = { - values = [templatefile("${path.module}/helm-values/metrics-server-values.yaml", {})] - } - #--------------------------------------- # Karpenter Autoscaler for EKS Cluster #--------------------------------------- enable_karpenter = true karpenter_enable_spot_termination = true + karpenter_node = { + iam_role_additional_policies = { + AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" + } + } karpenter = { + chart_version = "v0.34.0" repository_username = data.aws_ecrpublic_authorization_token.token.user_name repository_password = data.aws_ecrpublic_authorization_token.token.password } + #--------------------------------------- + # Metrics Server + #--------------------------------------- + enable_metrics_server = true + metrics_server = { + values = [templatefile("${path.module}/helm-values/metrics-server-values.yaml", {})] + } + #--------------------------------------- # AWS for FluentBit - DaemonSet #--------------------------------------- @@ -199,7 +205,7 @@ module "eks_blueprints_addons" { #--------------------------------------------------------------- module "eks_data_addons" { source = "aws-ia/eks-data-addons/aws" - version = "~> 1.0" # ensure to update this to the latest/desired version + version = "~> 1.30" # ensure to update this to the latest/desired version oidc_provider_arn = module.eks.oidc_provider_arn @@ -234,26 +240,116 @@ module "eks_data_addons" { ] } -} - -#--------------------------------------- -# Karpenter Provisioners -#--------------------------------------- -data "kubectl_path_documents" "karpenter_provisioners" { - pattern = "${path.module}/karpenter-provisioners/spark-*.yaml" - vars = { - azs = local.region - eks_cluster_id = module.eks.cluster_name + #--------------------------------------- + # Karpenter Autoscaler for EKS Cluster + #--------------------------------------- + enable_karpenter_resources = true + karpenter_resources_helm_config = { + spark-compute-optimized = { + values = [ + <<-EOT + name: spark-compute-optimized + clusterName: ${module.eks.cluster_name} + ec2NodeClass: + karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]} + subnetSelectorTerms: + tags: + Name: "${module.eks.cluster_name}-private*" + securityGroupSelectorTerms: + tags: + Name: ${module.eks.cluster_name}-node + instanceStorePolicy: RAID0 + nodePool: + labels: + - type: karpenter + - NodeGroupType: SparkComputeOptimized + - multiArch: Spark + requirements: + - key: "karpenter.sh/capacity-type" + operator: In + values: ["spot", "on-demand"] + - key: "kubernetes.io/arch" + operator: In + values: ["amd64"] + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: ["c"] + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["c5d"] + - key: "karpenter.k8s.aws/instance-cpu" + operator: In + values: ["4", "8", "16", "36"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + - key: "karpenter.k8s.aws/instance-generation" + operator: Gt + values: ["2"] + limits: + cpu: 1000 + disruption: + consolidationPolicy: WhenEmpty + consolidateAfter: 30s + expireAfter: 720h + weight: 100 + EOT + ] + } + spark-graviton-compute-optimized = { + values = [ + <<-EOT + name: spark-graviton-compute-optimized + clusterName: ${module.eks.cluster_name} + ec2NodeClass: + karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]} + subnetSelectorTerms: + tags: + Name: "${module.eks.cluster_name}-private*" + securityGroupSelectorTerms: + tags: + Name: ${module.eks.cluster_name}-node + instanceStorePolicy: RAID0 + nodePool: + labels: + - type: karpenter + - NodeGroupType: SparkGravitonComputeOptimized + - multiArch: Spark + requirements: + - key: "karpenter.sh/capacity-type" + operator: In + values: ["spot", "on-demand"] + - key: "kubernetes.io/arch" + operator: In + values: ["arm64"] + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: ["c"] + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["c7gd"] + - key: "karpenter.k8s.aws/instance-cpu" + operator: In + values: ["4", "8", "16", "32"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + - key: "karpenter.k8s.aws/instance-generation" + operator: Gt + values: ["2"] + limits: + cpu: 1000 + disruption: + consolidationPolicy: WhenEmpty + consolidateAfter: 30s + expireAfter: 720h + weight: 50 + EOT + ] + } } } -resource "kubectl_manifest" "karpenter_provisioner" { - for_each = toset(data.kubectl_path_documents.karpenter_provisioners.documents) - yaml_body = each.value - - depends_on = [module.eks_blueprints_addons] -} - #tfsec:ignore:* module "s3_bucket" { source = "terraform-aws-modules/s3-bucket/aws" diff --git a/schedulers/terraform/argo-workflow/karpenter-provisioners/spark-compute-optimized-provisioner.yaml b/schedulers/terraform/argo-workflow/karpenter-provisioners/spark-compute-optimized-provisioner.yaml deleted file mode 100644 index b93858da8..000000000 --- a/schedulers/terraform/argo-workflow/karpenter-provisioners/spark-compute-optimized-provisioner.yaml +++ /dev/null @@ -1,109 +0,0 @@ -apiVersion: karpenter.sh/v1alpha5 -kind: Provisioner -metadata: - name: spark-compute-optimized - namespace: karpenter # Same namespace as Karpenter add-on installed -spec: - kubeletConfiguration: - containerRuntime: containerd - # podsPerCore: 2 - # maxPods: 20 - requirements: - - key: "topology.kubernetes.io/zone" - operator: In - values: [${azs}a] #Update the correct region and zones - - key: "karpenter.sh/capacity-type" - operator: In - values: ["spot", "on-demand"] - - key: "node.kubernetes.io/instance-type" #If not included, all instance types are considered - operator: In - values: ["c5d.xlarge","c5d.2xlarge","c5d.4xlarge","c5d.9xlarge"] # 1 NVMe disk - - key: "kubernetes.io/arch" - operator: In - values: ["amd64"] - limits: - resources: - cpu: 2000 - providerRef: - name: spark-compute-optimized - labels: - type: karpenter - provisioner: spark-compute-optimized - NodeGroupType: SparkComputeOptimized - taints: - - key: spark-compute-optimized - value: 'true' - effect: NoSchedule - ttlSecondsAfterEmpty: 120 # optional, but never scales down if not set - ---- -apiVersion: karpenter.k8s.aws/v1alpha1 -kind: AWSNodeTemplate -metadata: - name: spark-compute-optimized - namespace: karpenter -spec: - blockDeviceMappings: - - deviceName: /dev/xvda - ebs: - volumeSize: 100Gi - volumeType: gp3 - encrypted: true - deleteOnTermination: true - metadataOptions: - httpEndpoint: enabled - httpProtocolIPv6: disabled - httpPutResponseHopLimit: 2 - httpTokens: required - subnetSelector: - Name: "${eks_cluster_id}-private*" # Name of the Subnets to spin up the nodes - securityGroupSelector: # required, when not using launchTemplate - Name: "${eks_cluster_id}-node*" # name of the SecurityGroup to be used with Nodes - # instanceProfile: "" # optional, if already set in controller args - #RAID0 config example - userData: | - MIME-Version: 1.0 - Content-Type: multipart/mixed; boundary="BOUNDARY" - - --BOUNDARY - Content-Type: text/x-shellscript; charset="us-ascii" - - #!/bin/bash - echo "Running a custom user data script" - set -ex - yum install mdadm -y - - DEVICES=$(lsblk -o NAME,TYPE -dsn | awk '/disk/ {print $1}') - - DISK_ARRAY=() - - for DEV in $DEVICES - do - DISK_ARRAY+=("/dev/$${DEV}") - done - - DISK_COUNT=$${#DISK_ARRAY[@]} - - if [ $${DISK_COUNT} -eq 0 ]; then - echo "No SSD disks available. No further action needed." - else - if [ $${DISK_COUNT} -eq 1 ]; then - TARGET_DEV=$${DISK_ARRAY[0]} - mkfs.xfs $${TARGET_DEV} - else - mdadm --create --verbose /dev/md0 --level=0 --raid-devices=$${DISK_COUNT} $${DISK_ARRAY[@]} - mkfs.xfs /dev/md0 - TARGET_DEV=/dev/md0 - fi - - mkdir -p /local1 - echo $${TARGET_DEV} /local1 xfs defaults,noatime 1 2 >> /etc/fstab - mount -a - # NOTE: Update permissions on folder according to your needs and specific user group. This is just an example. - chmod 777 -R /local* - fi - - --BOUNDARY-- - - tags: - InstanceType: "spark-compute-optimized" # optional, add tags for your own use diff --git a/schedulers/terraform/argo-workflow/main.tf b/schedulers/terraform/argo-workflow/main.tf index 254231538..a77b42bf5 100644 --- a/schedulers/terraform/argo-workflow/main.tf +++ b/schedulers/terraform/argo-workflow/main.tf @@ -23,14 +23,6 @@ provider "helm" { } } -provider "kubectl" { - apply_retry_count = 30 - host = module.eks.cluster_endpoint - cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data) - load_config_file = false - token = data.aws_eks_cluster_auth.this.token -} - data "aws_eks_cluster_auth" "this" { name = module.eks.cluster_name } diff --git a/schedulers/terraform/argo-workflow/workflow-examples/sensor-sqs-sparkjobs.yaml b/schedulers/terraform/argo-workflow/workflow-examples/sensor-sqs-sparkjobs.yaml index c1eea5f7a..f80d4e70d 100644 --- a/schedulers/terraform/argo-workflow/workflow-examples/sensor-sqs-sparkjobs.yaml +++ b/schedulers/terraform/argo-workflow/workflow-examples/sensor-sqs-sparkjobs.yaml @@ -87,11 +87,6 @@ spec: sparkVersion: "3.1.1" restartPolicy: type: Never - volumes: - - name: "test-volume" - hostPath: - path: "/tmp" - type: Directory driver: cores: 1 coreLimit: "1200m" @@ -137,10 +132,6 @@ spec: }, "tolerations": [{"key": "spark-compute-optimized", "operator": "Exists", "effect": "NoSchedule"}] }] - volumeMounts: - - name: "test-volume" - mountPath: "/tmp" - readOnly: false executor: cores: 1 instances: 4 @@ -158,10 +149,6 @@ spec: version: 3.3.1 annotations: yunikorn.apache.org/task-group-name: "spark-executor" - volumeMounts: - - name: "test-volume" - mountPath: "/tmp" - readOnly: false - name: sparkapp-operator-taxi resource: action: create @@ -222,23 +209,7 @@ spec: onFailureRetryInterval: 10 onSubmissionFailureRetries: 5 onSubmissionFailureRetryInterval: 20 - volumes: # using NVMe instance storage mounted on /mnt/k8s-disks - - name: spark-local-dir-1 - hostPath: - path: /mnt/k8s-disks - type: Directory driver: - volumeMounts: # Points to InstanceStore 150GB NVMe SSD for shuffle spill over from memory - - name: spark-local-dir-1 - mountPath: /data1 - readOnly: false - initContainers: - - name: volume-permissions - image: public.ecr.aws/y4g4v0z7/busybox - command: [ 'sh', '-c', 'chown -R 185 /mnt/k8s-disks' ] - volumeMounts: - - mountPath: "/mnt/k8s-disks" - name: "spark-local-dir-1" cores: 1 coreLimit: "1200m" memory: "4g" @@ -288,17 +259,6 @@ spec: executor: podSecurityContext: fsGroup: 185 - volumeMounts: - - name: spark-local-dir-1 - mountPath: /data1 - readOnly: false - initContainers: - - name: volume-permissions - image: public.ecr.aws/y4g4v0z7/busybox - command: [ 'sh', '-c', 'chown -R 185 /mnt/k8s-disks' ] - volumeMounts: - - mountPath: "/mnt/k8s-disks" - name: "spark-local-dir-1" cores: 1 coreLimit: "1200m" instances: 4