From 4d348ea146027841974b33d302db111a4ae3f695 Mon Sep 17 00:00:00 2001 From: Vara Bonthu Date: Sun, 27 Oct 2024 23:58:08 -0700 Subject: [PATCH 1/7] Updates to the vllm llama3.1 405b model --- .../vllm-llama3.1-405b-trn1/docker/Dockerfile | 2 +- .../vllm-llama3.1-405b-trn1/docker/run.sh | 4 +-- .../llama3-405b-vllm-lws-deployment.yaml | 36 +++++++++++++------ 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/Dockerfile b/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/Dockerfile index d53b7feed..a22d55447 100644 --- a/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/Dockerfile +++ b/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/Dockerfile @@ -7,7 +7,7 @@ # 1. The line "COPY neuron_artifacts/. /neuron_artifacts/" will be replaced or removed once Neuron SDK 2.20 is released. # 2. The line "COPY vllm /vllm" will be replaced or removed once Neuron SDK 2.20 is released, and the Neuron team upstreams the changes to the vLLM project. # ----------------------------------------------------------------------------------- -FROM public.ecr.aws/neuron/pytorch-inference-neuronx:2.1.2-neuronx-py310-sdk2.20.0-ubuntu20.04-0 +FROM public.ecr.aws/neuron/pytorch-inference-neuronx:2.1.2-neuronx-py310-sdk2.20.0-ubuntu20.04 ENV VLLM_TARGET_DEVICE=neuron diff --git a/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/run.sh b/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/run.sh index 8ed298c89..7f7f4ddb8 100644 --- a/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/run.sh +++ b/gen-ai/inference/vllm-llama3.1-405b-trn1/docker/run.sh @@ -26,8 +26,8 @@ fi # Run the Python script python neuron_multi_node_runner.py \ --model=$NEURON_MODEL_PATH \ ---max-num-seqs=$MAX_NUM_SEQ \ ---max-model-len=$MAX_MODEL_LENGTH \ +--max-num-seqs=$MAX_NUM_SEQS \ +--max-model-len=$MAX_MODEL_LEN \ --block-size=$BLOCK_SIZE \ --tensor-parallel-size=$GLOBAL_TP \ --port=$VLLM_LEADER_SERVICE_PORT \ diff --git a/gen-ai/inference/vllm-llama3.1-405b-trn1/llama3-405b-vllm-lws-deployment.yaml b/gen-ai/inference/vllm-llama3.1-405b-trn1/llama3-405b-vllm-lws-deployment.yaml index d59ff5ea1..4d2a2d7cf 100644 --- a/gen-ai/inference/vllm-llama3.1-405b-trn1/llama3-405b-vllm-lws-deployment.yaml +++ b/gen-ai/inference/vllm-llama3.1-405b-trn1/llama3-405b-vllm-lws-deployment.yaml @@ -1,8 +1,8 @@ # Important Notice: # Before deploying this configuration, please ensure the following: -# 1. **Region and Environment Variables**: Verify that the `AWS_DEFAULT_REGION` and other environment variables are correctly set for your deployment. This YAML is currently configured for the `us-west-2` region. -# 2. **Pre-requisite**: Check the README.md file for instructions. -# 3. **Custom Images**: This configuration uses a custom image hosted on ECR (`public.ecr.aws/data-on-eks/neuron-sdk2.20-vllm0.5.0-neuron:latest`). Ensure that this image is accessible and meets your deployment needs. +# 1. Region and Environment Variables: Verify that the `AWS_DEFAULT_REGION` and other environment variables are correctly set for your deployment. This YAML is currently configured for the `us-west-2` region. +# 2. Pre-requisite: Check the README.md file for instructions. +# 3. Custom Images: This configuration uses a custom image hosted on ECR (`public.ecr.aws/data-on-eks/neuron-sdk2.20-vllm0.5.0-neuron:v8`). Ensure that this image is accessible and meets your deployment needs. --- apiVersion: v1 @@ -75,15 +75,23 @@ spec: value: us-west-2 - name: NEURON_MODEL_PATH value: /mnt/k8s-disks/0/checkpoints/llama-3.1-405b-instruct/ + # - name: NEURON_CONTEXT_LENGTH_BUCKETS # not working + # value: "1024,2048,4096,8192,16384,32768,65536,131072" + # - name: NEURON_TOKEN_GEN_BUCKETS # not working + # value: "1024,2048,4096,8192,16384,32768,65536,131072" - name: NEURON_CONTEXT_LENGTH_ESTIMATE value: '[''1024'',''2048'',''4096'',''8192'',''16384'',''32768'',''65536'',''131072'']' - - name: MAX_MODEL_LENGTH + - name: MAX_MODEL_LEN value: "8192" - name: BLOCK_SIZE value: "8192" - - name: MAX_NUM_SEQ + - name: MAX_NUM_SEQS value: "2" - - name: NEURON_QUANT + - name: NEURON_CC_PIPELINE_FACTOR + value: "4" + - name: NEURON_COMPILE_CACHE_URL + value: /mnt/k8s-disks/0/checkpoints/llama-3.1-405b-instruct/compiled_artifact + - name: NEURON_QUANT # not in the vllm code value: "False" - name: NEURON_SEQUENCE_PARALLEL value: "True" @@ -99,7 +107,7 @@ spec: valueFrom: fieldRef: fieldPath: metadata.annotations['leaderworkerset.sigs.k8s.io/size'] - image: public.ecr.aws/data-on-eks/neuron-sdk2.20-vllm0.5.0-neuron:latest + image: public.ecr.aws/data-on-eks/neuron-sdk2.20-vllm0.5.0-neuron:v8 imagePullPolicy: Always name: vllm-leader ports: @@ -186,14 +194,22 @@ spec: value: us-west-2 - name: NEURON_MODEL_PATH value: /mnt/k8s-disks/0/checkpoints/llama-3.1-405b-instruct/ + # - name: NEURON_CONTEXT_LENGTH_BUCKETS # not working + # value: "1024,2048,4096,8192,16384,32768,65536,131072" + # - name: NEURON_TOKEN_GEN_BUCKETS # not working + # value: "1024,2048,4096,8192,16384,32768,65536,131072" - name: NEURON_CONTEXT_LENGTH_ESTIMATE value: '[''1024'',''2048'',''4096'',''8192'',''16384'',''32768'',''65536'',''131072'']' - - name: MAX_MODEL_LENGTH + - name: MAX_MODEL_LEN value: "8192" - name: BLOCK_SIZE value: "8192" - - name: MAX_NUM_SEQ + - name: MAX_NUM_SEQS value: "2" + - name: NEURON_CC_PIPELINE_FACTOR # not required + value: "4" + - name: NEURON_COMPILE_CACHE_URL + value: /mnt/k8s-disks/0/checkpoints/llama-3.1-405b-instruct/compiled_artifact - name: NEURON_QUANT value: "False" - name: NEURON_SEQUENCE_PARALLEL @@ -210,7 +226,7 @@ spec: valueFrom: fieldRef: fieldPath: metadata.annotations['leaderworkerset.sigs.k8s.io/size'] - image: public.ecr.aws/data-on-eks/neuron-sdk2.20-vllm0.5.0-neuron:latest + image: public.ecr.aws/data-on-eks/neuron-sdk2.20-vllm0.5.0-neuron:v8 imagePullPolicy: Always name: vllm-worker ports: From cca329d9196b584a6c67f1aa7a987199c68cebcf Mon Sep 17 00:00:00 2001 From: Vara Bonthu Date: Mon, 28 Oct 2024 00:06:06 -0700 Subject: [PATCH 2/7] Spark Benchmarks updates to latest Spark version --- .../terraform/spark-k8s-operator/README.md | 12 +- .../terraform/spark-k8s-operator/addons.tf | 124 +++++++++++++++--- analytics/terraform/spark-k8s-operator/eks.tf | 39 ++++++ .../benchmark/tpcds-benchmark-3t.yaml | 84 +++++++----- .../tpcds-benchmark-data-generation-3t.yaml | 111 +++++++++++----- .../examples/docker/Dockerfile-benchmark | 81 ++++++++++++ .../helm-values/spark-operator-values.yaml | 65 --------- 7 files changed, 359 insertions(+), 157 deletions(-) create mode 100644 analytics/terraform/spark-k8s-operator/examples/docker/Dockerfile-benchmark delete mode 100644 analytics/terraform/spark-k8s-operator/helm-values/spark-operator-values.yaml diff --git a/analytics/terraform/spark-k8s-operator/README.md b/analytics/terraform/spark-k8s-operator/README.md index 305425918..171a59136 100644 --- a/analytics/terraform/spark-k8s-operator/README.md +++ b/analytics/terraform/spark-k8s-operator/README.md @@ -30,7 +30,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | [ebs\_csi\_driver\_irsa](#module\_ebs\_csi\_driver\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 5.34 | | [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.15 | | [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 | -| [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | 1.33.0 | +| [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | 1.34 | | [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 3.0 | | [spark\_team\_irsa](#module\_spark\_team\_irsa) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 | | [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | ~> 5.0 | @@ -70,18 +70,18 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [aws\_auth\_roles](#input\_aws\_auth\_roles) | additional aws auth roles |
list(
object(
{
rolearn = string
username = string
groups = list(string
)
}
)
)
| `[]` | no | +| [aws\_auth\_roles](#input\_aws\_auth\_roles) | additional aws auth roles |
list(
object(
{
rolearn = string
username = string
groups = list(string
)
}
)
)
| `[]` | no | | [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.30"` | no | -| [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` |
[
"100.64.0.0/17",
"100.64.128.0/17"
]
| no | +| [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` |
[
"100.64.0.0/17",
"100.64.128.0/17"
]
| no | | [enable\_amazon\_prometheus](#input\_enable\_amazon\_prometheus) | Enable AWS Managed Prometheus service | `bool` | `true` | no | | [enable\_vpc\_endpoints](#input\_enable\_vpc\_endpoints) | Enable VPC Endpoints | `bool` | `false` | no | | [enable\_yunikorn](#input\_enable\_yunikorn) | Enable Apache YuniKorn Scheduler | `bool` | `true` | no | | [kms\_key\_admin\_roles](#input\_kms\_key\_admin\_roles) | list of role ARNs to add to the KMS policy | `list(string)` | `[]` | no | | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"spark-operator-doeks"` | no | -| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` |
[
"10.1.1.0/24",
"10.1.2.0/24"
]
| no | -| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` |
[
"10.1.0.0/26",
"10.1.0.64/26"
]
| no | +| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` |
[
"10.1.1.0/24",
"10.1.2.0/24"
]
| no | +| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` |
[
"10.1.0.0/26",
"10.1.0.64/26"
]
| no | | [region](#input\_region) | Region | `string` | `"us-west-2"` | no | -| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | +| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/16"` | no | ## Outputs diff --git a/analytics/terraform/spark-k8s-operator/addons.tf b/analytics/terraform/spark-k8s-operator/addons.tf index 7782d8595..ae5328e25 100644 --- a/analytics/terraform/spark-k8s-operator/addons.tf +++ b/analytics/terraform/spark-k8s-operator/addons.tf @@ -41,7 +41,7 @@ resource "kubernetes_storage_class" "ebs_csi_encrypted_gp3_storage_class" { #--------------------------------------------------------------- module "eks_data_addons" { source = "aws-ia/eks-data-addons/aws" - version = "1.33.0" # ensure to update this to the latest/desired version + version = "1.34" # ensure to update this to the latest/desired version oidc_provider_arn = module.eks.oidc_provider_arn @@ -54,6 +54,9 @@ module "eks_data_addons" { name: spark-compute-optimized clusterName: ${module.eks.cluster_name} ec2NodeClass: + amiFamily: AL2023 + amiSelectorTerms: + - alias: al2023@latest # Amazon Linux 2023 karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]} subnetSelectorTerms: tags: @@ -93,9 +96,8 @@ module "eks_data_addons" { limits: cpu: 1000 disruption: - consolidationPolicy: WhenEmpty - consolidateAfter: 30s - expireAfter: 720h + consolidationPolicy: WhenEmptyOrUnderutilized + consolidateAfter: 5m weight: 100 EOT ] @@ -106,6 +108,9 @@ module "eks_data_addons" { name: spark-graviton-memory-optimized clusterName: ${module.eks.cluster_name} ec2NodeClass: + amiFamily: AL2023 + amiSelectorTerms: + - alias: al2023@latest # Amazon Linux 2023 karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]} subnetSelectorTerms: tags: @@ -144,19 +149,76 @@ module "eks_data_addons" { limits: cpu: 1000 disruption: - consolidationPolicy: WhenEmpty - consolidateAfter: 30s - expireAfter: 720h + consolidationPolicy: WhenEmptyOrUnderutilized + consolidateAfter: 1m weight: 50 EOT ] } + spark-graviton-benchmark = { + values = [ + <<-EOT + name: spark-graviton-benchmark + clusterName: ${module.eks.cluster_name} + ec2NodeClass: + amiFamily: AL2023 + amiSelectorTerms: + - alias: al2023@latest # Amazon Linux 2023 + karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]} + subnetSelectorTerms: + tags: + Name: "${module.eks.cluster_name}-private*" + securityGroupSelectorTerms: + tags: + Name: ${module.eks.cluster_name}-node + instanceStorePolicy: RAID0 + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + volumeSize: 300Gi + volumeType: gp3 + encrypted: true + deleteOnTermination: true + nodePool: + labels: + - NodeGroupType: SparkGravitonBenchmark + requirements: + - key: "karpenter.sh/capacity-type" + operator: In + values: ["spot", "on-demand"] + - key: "kubernetes.io/arch" + operator: In + values: ["arm64"] + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: ["r"] + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["r6g", "r6gd", "r7g", "r7gd", "r8g"] + - key: "karpenter.k8s.aws/instance-size" + operator: In + values: ["8xlarge", "12xlarge", "16xlarge"] + - key: "karpenter.k8s.aws/instance-generation" + operator: Gt + values: ["2"] + limits: + cpu: 2000 + disruption: + consolidationPolicy: WhenEmptyOrUnderutilized + consolidateAfter: 1m + weight: 100 + EOT + ] + } spark-memory-optimized = { values = [ <<-EOT name: spark-memory-optimized clusterName: ${module.eks.cluster_name} ec2NodeClass: + amiFamily: AL2023 + amiSelectorTerms: + - alias: al2023@latest # Amazon Linux 2023 karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]} subnetSelectorTerms: tags: @@ -195,9 +257,8 @@ module "eks_data_addons" { limits: cpu: 1000 disruption: - consolidationPolicy: WhenEmpty - consolidateAfter: 30s - expireAfter: 720h + consolidationPolicy: WhenEmptyOrUnderutilized + consolidateAfter: 5m weight: 100 EOT ] @@ -208,6 +269,9 @@ module "eks_data_addons" { name: spark-vertical-ebs-scale clusterName: ${module.eks.cluster_name} ec2NodeClass: + amiFamily: AL2023 + amiSelectorTerms: + - alias: al2023@latest # Amazon Linux 2023 karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]} subnetSelectorTerms: tags: @@ -217,9 +281,9 @@ module "eks_data_addons" { Name: ${module.eks.cluster_name}-node userData: | MIME-Version: 1.0 - Content-Type: multipart/mixed; boundary="BOUNDARY" + Content-Type: multipart/mixed; boundary="//" - --BOUNDARY + --// Content-Type: text/x-shellscript; charset="us-ascii" #!/bin/bash @@ -303,7 +367,7 @@ module "eks_data_addons" { /usr/bin/chown -hR +999:+1000 /mnt/k8s-disks fi - --BOUNDARY-- + --//-- nodePool: labels: @@ -325,9 +389,8 @@ module "eks_data_addons" { limits: cpu: 1000 disruption: - consolidationPolicy: WhenEmpty - consolidateAfter: 30s - expireAfter: 720h + consolidationPolicy: WhenEmptyOrUnderutilized + consolidateAfter: 5m weight: 100 EOT ] @@ -336,11 +399,34 @@ module "eks_data_addons" { #--------------------------------------------------------------- # Spark Operator Add-on + # Add this to enable YuniKorn as Default Scheduler + # controller: + # batchScheduler: + # enable: true + # default: "yunikorn" #--------------------------------------------------------------- enable_spark_operator = true spark_operator_helm_config = { - version = "1.4.2" - values = [templatefile("${path.module}/helm-values/spark-operator-values.yaml", {})] + version = "2.0.2" + values = [ + <<-EOT + spark: + # -- List of namespaces where to run spark jobs. + # If empty string is included, all namespaces will be allowed. + # Make sure the namespaces have already existed. + jobNamespaces: + - default + - spark-team-a + - spark-team-b + - spark-team-c + serviceAccount: + # -- Specifies whether to create a service account for the controller. + create: false + rbac: + # -- Specifies whether to create RBAC resources for the controller. + create: false + EOT + ] } #--------------------------------------------------------------- @@ -455,7 +541,7 @@ module "eks_blueprints_addons" { } } karpenter = { - chart_version = "v0.34.0" + chart_version = "1.0.6" repository_username = data.aws_ecrpublic_authorization_token.token.user_name repository_password = data.aws_ecrpublic_authorization_token.token.password } diff --git a/analytics/terraform/spark-k8s-operator/eks.tf b/analytics/terraform/spark-k8s-operator/eks.tf index 6f4ca3098..cc66a5ddf 100644 --- a/analytics/terraform/spark-k8s-operator/eks.tf +++ b/analytics/terraform/spark-k8s-operator/eks.tf @@ -174,6 +174,45 @@ module "eks" { } } + spark_graviton_bench = { + name = "spark-graviton-bench" + description = "Spark managed node group for Graviton Benchmarks" + # Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned + subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : + substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0) + ] + + ami_type = "AL2023_ARM_64_STANDARD" + + min_size = 0 + max_size = 8 + desired_size = 0 + # This storage is used as a shuffle for non NVMe SSD instances. e.g., r8g instances + block_device_mappings = { + xvda = { + device_name = "/dev/xvda" + ebs = { + volume_size = 300 + volume_type = "gp3" + iops = 3000 + encrypted = true + delete_on_termination = true + } + } + } + + instance_types = ["r8g.12xlarge"] # Change Instance type to run the benchmark with various instance types + + labels = { + NodeGroupType = "spark-graviton-benchmark-mng" + } + + tags = { + Name = "spark-graviton-benchmark-mng" + NodeGroupType = "spark-graviton-benchmark-mng" + } + } + # ec2-instance-selector --vcpus=48 --gpus 0 -a arm64 --allow-list '.*d.*' # This command will give you the list of the instances with similar vcpus for arm64 dense instances spark_spot_x86_48cpu = { diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-3t.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-3t.yaml index 9e1d37685..fa57826cc 100644 --- a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-3t.yaml +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-3t.yaml @@ -1,7 +1,7 @@ # NOTE: This example requires the following prerequisites before executing the jobs -# 1. Ensure spark-team-a name space exists -# 2. replace with your bucket name -# 3. Ensure you run "analytics/spark-k8s-operator/spark-samples/tpcds-benchmark-data-generation-1t.yaml" which generates 3 TB input data +# 1. Create an S3 bucket to store the generated data +# 2. Ensure spark-team-a name space exists +# 3. replace with your bucket name --- apiVersion: "sparkoperator.k8s.io/v1beta2" @@ -9,30 +9,29 @@ kind: SparkApplication metadata: name: tpcds-benchmark-3tb namespace: spark-team-a - labels: - app: "tpcds-benchmark" - applicationId: "tpcds-benchmark-3t" - # Assign the job to a Yunikorn Queue via label. - queue: root.prod + spec: + # batchScheduler: yunikorn + # batchSchedulerOptions: + # queue: root.default type: Scala mode: cluster - image: public.ecr.aws/data-on-eks/emr-on-eks-benchmark:3.1.2 + image: public.ecr.aws/data-on-eks/spark3.5.3-scala2.12-java17-python3-ubuntu-tpcds:v2 imagePullPolicy: IfNotPresent - sparkVersion: 3.1.2 + sparkVersion: 3.5.3 mainClass: com.amazonaws.eks.tpcds.BenchmarkSQL mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar arguments: # TPC-DS data location - - "s3://blogpost-sparkoneks-us-east-1/blog/BLOG_TPCDS-TEST-3T-partitioned" + - "s3a:///TPCDS-TEST-1TB" # results location - - "s3:///TPCDS-TEST-3T-RESULT" + - "s3a:///TPCDS-TEST-1T-RESULT" # Path to kit in the docker image - "/opt/tpcds-kit/tools" # Data Format - "parquet" # Scale factor (in GB) - - "3000" # changed from 3000 to 100gb for demo + - "1000" # changed from 3000 to 100gb for demo # Number of iterations - "1" # Optimize queries with hive tables @@ -49,16 +48,25 @@ spec: "spark.sql.adaptive.localShuffleReader.enabled": "true" "spark.sql.adaptive.coalescePartitions.enabled": "true" "spark.sql.adaptive.skewJoin.enabled": "true" - # "spark.sql.adaptive.logLevel": "WARN" - # IRSA for S3 connection "spark.kubernetes.executor.podNamePrefix": "benchmark-exec" - "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" + # S3 Optimizations + # "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" # This is using AWS SDK V1 in maintenance mode + "spark.hadoop.fs.s3a.aws.credentials.provider.mapping": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider=software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" + "spark.hadoop.fs.s3a.aws.credentials.provider": "software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" # AWS SDK V2 https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem" + "spark.hadoop.fs.s3a.fast.upload": "true" + "spark.hadoop.fs.s3a.path.style.access": "true" + "spark.hadoop.fs.s3a.fast.upload.buffer": "disk" + "spark.hadoop.fs.s3a.buffer.dir": "/tmp/s3a" + "spark.hadoop.fs.s3a.multipart.size": "128M" # Good for large files + "spark.hadoop.fs.s3a.multipart.threshold": "256M" + "spark.hadoop.fs.s3a.threads.max": "50" + "spark.hadoop.fs.s3a.connection.maximum": "200" + "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2" "spark.executor.defaultJavaOptions": "-verbose:gc -XX:+UseParallelGC -XX:InitiatingHeapOccupancyPercent=70" - # Keep pods in a single AZ - # "spark.kubernetes.node.selector.topology.kubernetes.io/zone": "us-west-1b" - # "spark.kubernetes.node.selector.eks.amazonaws.com/capacityType": "ON_DEMAND" + # "spark.hadoop.fs.s3a.readahead.range": "256K" + # ----------------------------------------------------- # This block is very critical when you get errors like # Exception in thread \"main\" io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred @@ -70,37 +78,51 @@ spec: spark.kubernetes.driver.requestTimeout: "120000" # spark.kubernetes.allocation.batch.size: "20" # default 5 but adjust according to your cluster size # ----------------------------------------------------- + # S3 Optimizations + "spark.hadoop.fs.s3a.multipart.size": "67108864" # 64 MB part size for S3 uploads + "spark.hadoop.fs.s3a.threads.max": "40" # Limit S3 threads for optimized throughput + "spark.hadoop.fs.s3a.connection.maximum": "100" # Set max connections for S3 + + # Data writing and shuffle tuning + "spark.shuffle.file.buffer": "1m" # Increase shuffle buffer for better disk I/O + "spark.reducer.maxSizeInFlight": "48m" # Increase reducer buffer size in-flight data + + # Optional: Tuning multipart upload threshold + "spark.hadoop.fs.s3a.multipart.purge": "true" # Automatically clear failed multipart uploads + "spark.hadoop.fs.s3a.multipart.threshold": "134217728" # 128 MB threshold to start multi-part upload driver: cores: 4 coreLimit: "4.1" memory: "5g" - memoryOverhead: "1000" + memoryOverhead: "1g" serviceAccount: spark-team-a + securityContext: + runAsUser: 185 + env: + - name: JAVA_HOME + value: "/opt/java/openjdk" # the c5d instances that Karpenter will launch will have the NVMe storage preformatted and available to the pod # we do not need to leverage a hostPath mount or volume to leverage that storage. # ephemeral-storage requests and limits can be used to manage the storage utilization nodeSelector: - provisioner: spark-compute-optimized - tolerations: - - key: "spark-compute-optimized" - operator: "Exists" - effect: "NoSchedule" + NodeGroupType: SparkGravitonBenchmark executor: cores: 4 coreLimit: "4.3" memory: "6g" memoryOverhead: "2g" # 8 executors per node - instances: 47 # changed from 47 to 20 for demo + instances: 47 serviceAccount: spark-team-a + securityContext: + runAsUser: 185 + env: + - name: JAVA_HOME + value: "/opt/java/openjdk" # the c5d instances that Karpenter will launch will have the NVMe storage preformatted and available to the pod # we do not need to leverage a hostPath mount or volume to leverage that storage. # ephemeral-storage requests and limits can be used to manage the storage utilization nodeSelector: - provisioner: spark-compute-optimized - tolerations: - - key: "spark-compute-optimized" - operator: "Exists" - effect: "NoSchedule" + NodeGroupType: SparkGravitonBenchmark restartPolicy: type: Never diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-3t.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-3t.yaml index d6d66fdba..0092b7ce4 100644 --- a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-3t.yaml +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-3t.yaml @@ -1,35 +1,33 @@ # NOTE: This example requires the following prerequisites before executing the jobs # 1. Ensure spark-team-a name space exists -# 2. replace with your bucket name +# 2. replace with your bucket name --- apiVersion: "sparkoperator.k8s.io/v1beta2" kind: SparkApplication metadata: - name: tpcds-data-generation-3t + name: tpcds-data-generation-1tb namespace: spark-team-a - labels: - app: "tpcds-data-generation" - applicationId: "tpcds-data-generation-3t" - # Assign the job to a Yunikorn Queue via label. - queue: root.prod spec: + # batchScheduler: yunikorn + # batchSchedulerOptions: + # queue: root.default type: Scala mode: cluster - image: public.ecr.aws/data-on-eks/emr-on-eks-benchmark:3.1.2 + image: public.ecr.aws/data-on-eks/spark3.5.3-scala2.12-java17-python3-ubuntu-tpcds:v2 imagePullPolicy: IfNotPresent - sparkVersion: 3.1.2 + sparkVersion: 3.5.3 mainClass: com.amazonaws.eks.tpcds.DataGeneration mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar arguments: # TPC-DS data location - - "s3a:///TPCDS-TEST-3T" + - "s3a:///TPCDS-TEST-1TB" # Path to kit in the docker image - "/opt/tpcds-kit/tools" # Data Format - "parquet" - # Scale factor (in GB) - - "3000" + # Scale factor (in GB) - S3 output size shows 309.4GB for 1000GB Input + - "1000" # Generate data num partitions - "200" # Create the partitioned fact tables @@ -39,20 +37,42 @@ spec: # Logging set to WARN - "true" sparkConf: + "spark.executorEnv.JAVA_HOME": "/opt/java/openjdk" + "spark.driverEnv.JAVA_HOME": "/opt/java/openjdk" "spark.network.timeout": "2000s" "spark.executor.heartbeatInterval": "300s" "spark.kubernetes.memoryOverheadFactor": "0.3" + # AQE + "spark.sql.adaptive.enabled": "true" + "spark.sql.adaptive.localShuffleReader.enabled": "true" + "spark.sql.adaptive.coalescePartitions.enabled": "true" + "spark.sql.adaptive.skewJoin.enabled": "true" + # "spark.kubernetes.memoryOverheadFactor": "0.35" # Optional tweak for memory-intensive jobs "spark.sql.files.maxRecordsPerFile": "30000000" "spark.serializer": "org.apache.spark.serializer.KryoSerializer" - # "spark.local.dir": "/data1" - - # S3 settings - "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" + # S3 Optimizations + # "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" # This is using AWS SDK V1 in maintenance mode + "spark.hadoop.fs.s3a.aws.credentials.provider.mapping": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider=software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" + "spark.hadoop.fs.s3a.aws.credentials.provider": "software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" # AWS SDK V2 https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html + "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem" "spark.hadoop.fs.s3a.fast.upload": "true" "spark.hadoop.fs.s3a.path.style.access": "true" - "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem" + "spark.hadoop.fs.s3a.fast.upload.buffer": "disk" + "spark.hadoop.fs.s3a.buffer.dir": "/data1/s3a" + "spark.hadoop.fs.s3a.multipart.size": "128M" # Good for large files + "spark.hadoop.fs.s3a.multipart.threshold": "256M" + "spark.hadoop.fs.s3a.threads.max": "50" + "spark.hadoop.fs.s3a.connection.maximum": "200" "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2" "spark.kubernetes.executor.podNamePrefix": "oss-data-gen" + "spark.sql.shuffle.partitions": "2000" # Adjust according to your job size + + # "spark.hadoop.fs.s3a.committer.staging.conflict-mode": "append" + # Data writing and shuffle tuning + "spark.shuffle.file.buffer": "1m" + "spark.reducer.maxSizeInFlight": "128m" + # Java options for driver and executor + # Other existing configurations "spark.executor.defaultJavaOptions": "-verbose:gc -XX:+UseG1GC" "spark.driver.defaultJavaOptions": "-XX:+UseG1GC" # ----------------------------------------------------- @@ -60,13 +80,12 @@ spec: # Exception in thread \"main\" io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred # Caused by: java.net.SocketTimeoutException: timeout # spark.kubernetes.local.dirs.tmpfs: "true" + # Timeout settings for large data generation spark.kubernetes.submission.connectionTimeout: "60000000" spark.kubernetes.submission.requestTimeout: "60000000" spark.kubernetes.driver.connectionTimeout: "60000000" spark.kubernetes.driver.requestTimeout: "60000000" # spark.kubernetes.allocation.batch.size: "20" # default 5 but adjust according to your cluster size - # ----------------------------------------------------- - restartPolicy: type: Never driver: @@ -74,28 +93,48 @@ spec: coreLimit: "10.1" memory: "10g" serviceAccount: spark-team-a - # the c5d instances that Karpenter will launch will have the NVMe storage preformatted and available to the pod - # we do not need to leverage a hostPath mount or volume to leverage that storage. - # ephemeral-storage requests and limits can be used to manage the storage utilization + securityContext: + runAsUser: 185 + volumeMounts: + - name: spark-local-dir-1 + mountPath: /data1 + env: + - name: JAVA_HOME + value: "/opt/java/openjdk" + initContainers: + - name: volume-permission + image: public.ecr.aws/docker/library/busybox + command: ['sh', '-c', 'mkdir -p /data1; chown -R 185:185 /data1'] + volumeMounts: + - name: spark-local-dir-1 + mountPath: /data1 nodeSelector: - provisioner: spark-compute-optimized - tolerations: - - key: "spark-compute-optimized" - operator: "Exists" - effect: "NoSchedule" + NodeGroupType: SparkComputeOptimized executor: cores: 11 coreLimit: "11.1" memory: "15g" - # 3 executors per node 9 nodes instances: 26 serviceAccount: spark-team-a - # the c5d instances that Karpenter will launch will have the NVMe storage preformatted and available to the pod - # we do not need to leverage a hostPath mount or volume to leverage that storage. - # the data generation can utilize a large amount of storage + securityContext: + runAsUser: 185 + volumeMounts: + - name: spark-local-dir-1 + mountPath: /data1 + initContainers: + - name: volume-permission + image: public.ecr.aws/docker/library/busybox + command: ['sh', '-c', 'mkdir -p /data1; chown -R 185:185 /data1'] + volumeMounts: + - name: spark-local-dir-1 + mountPath: /data1 + env: + - name: JAVA_HOME + value: "/opt/java/openjdk" nodeSelector: - provisioner: spark-compute-optimized - tolerations: - - key: "spark-compute-optimized" - operator: "Exists" - effect: "NoSchedule" + NodeGroupType: SparkComputeOptimized + volumes: + - name: spark-local-dir-1 + hostPath: + path: "/mnt/k8s-disks/0" + type: DirectoryOrCreate diff --git a/analytics/terraform/spark-k8s-operator/examples/docker/Dockerfile-benchmark b/analytics/terraform/spark-k8s-operator/examples/docker/Dockerfile-benchmark new file mode 100644 index 000000000..50b2cd47a --- /dev/null +++ b/analytics/terraform/spark-k8s-operator/examples/docker/Dockerfile-benchmark @@ -0,0 +1,81 @@ +# Use the official Spark base image with Java 17 and Python 3 +FROM apache/spark:3.5.3-scala2.12-java17-python3-ubuntu as tpc-toolkit + +# Arguments for version control +ARG HADOOP_VERSION=3.4.1 +ARG AWS_SDK_VERSION=2.29.0 +ARG SPARK_UID=185 + +# Set environment variables +ENV SPARK_HOME=/opt/spark + +# Set up as root to install dependencies and tools +USER root + +# Install necessary build tools and specific sbt version 0.13.18 +RUN apt-get update && \ + apt-get install -y \ + gcc \ + make \ + flex \ + bison \ + git \ + openjdk-17-jdk \ + wget \ + curl && \ + # Install sbt 0.13.18 + wget https://github.com/sbt/sbt/releases/download/v0.13.18/sbt-0.13.18.tgz && \ + tar -xzf sbt-0.13.18.tgz -C /usr/local && \ + ln -s /usr/local/sbt/bin/sbt /usr/local/bin/sbt && \ + # Cleanup + rm sbt-0.13.18.tgz && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Clone and compile TPC-DS toolkit +WORKDIR /opt +RUN git clone https://github.com/databricks/tpcds-kit.git && \ + cd tpcds-kit/tools && \ + make OS=LINUX && \ + chmod +x dsdgen dsqgen + +# Clone the SQL perf library and related files +RUN git clone -b delta https://github.com/aws-samples/emr-on-eks-benchmark.git /tmp/emr-on-eks-benchmark + +# Build the Databricks SQL perf library +RUN cd /tmp/emr-on-eks-benchmark/spark-sql-perf && sbt +package + +# Use the compiled Databricks SQL perf library to build benchmark utility +RUN cd /tmp/emr-on-eks-benchmark/ && \ + mkdir -p /tmp/emr-on-eks-benchmark/benchmark/libs && \ + cp /tmp/emr-on-eks-benchmark/spark-sql-perf/target/scala-2.12/*.jar /tmp/emr-on-eks-benchmark/benchmark/libs && \ + cd /tmp/emr-on-eks-benchmark/benchmark && sbt assembly + +# Remove any old Hadoop libraries +RUN rm -f ${SPARK_HOME}/jars/hadoop-client-* && \ + rm -f ${SPARK_HOME}/jars/hadoop-yarn-server-web-proxy-*.jar + +# Add Hadoop AWS connector and AWS SDK for S3A support, along with hadoop-common dependencies +# TODO: hadoop-common, hadoop-yarn-server-web-proxy might not be required. Remove these and test it. +RUN cd ${SPARK_HOME}/jars && \ + wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar && \ + wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-client-api/${HADOOP_VERSION}/hadoop-client-api-${HADOOP_VERSION}.jar && \ + wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-client-runtime/${HADOOP_VERSION}/hadoop-client-runtime-${HADOOP_VERSION}.jar && \ + wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-common/${HADOOP_VERSION}/hadoop-common-${HADOOP_VERSION}.jar && \ + wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-yarn-server-web-proxy/${HADOOP_VERSION}/hadoop-yarn-server-web-proxy-${HADOOP_VERSION}.jar && \ + wget https://repo1.maven.org/maven2/software/amazon/awssdk/bundle/${AWS_SDK_VERSION}/bundle-${AWS_SDK_VERSION}.jar + +# Create directory for TPC-DS data and set permissions +RUN mkdir -p /opt/tpcds-data && \ + chown -R ${SPARK_UID}:${SPARK_UID} /opt/tpcds-data + +# Copy the built JARs to Spark's jars directory +RUN mkdir -p ${SPARK_HOME}/examples/jars/ && \ + cp /tmp/emr-on-eks-benchmark/benchmark/target/scala-2.12/*jar ${SPARK_HOME}/examples/jars/ && \ + chown -R ${SPARK_UID}:${SPARK_UID} ${SPARK_HOME}/examples + +# Set working directory +WORKDIR ${SPARK_HOME} + +# Switch to non-root user +USER ${SPARK_UID} diff --git a/analytics/terraform/spark-k8s-operator/helm-values/spark-operator-values.yaml b/analytics/terraform/spark-k8s-operator/helm-values/spark-operator-values.yaml deleted file mode 100644 index 3ced8f515..000000000 --- a/analytics/terraform/spark-k8s-operator/helm-values/spark-operator-values.yaml +++ /dev/null @@ -1,65 +0,0 @@ -replicaCount: 1 - -webhook: - # -- Enable webhook server - enable: true - # -- Webhook service port - port: 8080 - -serviceAccounts: - spark: - # -- Create a service account for spark apps - create: true - # -- Optional name for the spark service account - name: "" - # -- Optional annotations for the spark service account - annotations: {} - sparkoperator: - # -- Create a service account for the operator - create: true - # -- Optional name for the operator service account - name: "" - # -- Optional annotations for the operator service account - annotations: {} - -# Enable this to monitor only one namespace with this Spark Operator. -# By default, this operator monitors all namespaces for submitting Spark jobs. -# Currently, it does not support selecting multiple namespaces to be monitored by each Spark Operator. You can select either one or all namespaces. -# sparkJobNamespaces: -# - "spark-team-a" - - -# -- Operator concurrency, higher values might increase memory usage -controllerThreads: 10 - -# resources -- Pod resource requests and limits -# Note, that each job submission will spawn a JVM within the Spark Operator Pod using "/usr/local/openjdk-11/bin/java -Xmx128m". -# Kubernetes may kill these Java processes at will to enforce resource limits. When that happens, you will see the following error: -# 'failed to run spark-submit for SparkApplication [...]: signal: killed' - when this happens, you may want to increase memory limits. -resources: - limits: - cpu: 200m - memory: 1Gi - requests: - cpu: 100m - memory: 512Mi - -batchScheduler: - # -- Enable batch scheduler for spark jobs scheduling. If enabled, users can specify batch scheduler name in spark application - enable: true - - -#------------------------------------ -# THIS WILL CREATE SERVICE AND INGRESS OBJECT FOR EACH SPARK APPLICATION -#------------------------------------ -uiService: - # -- Enable UI service creation for Spark application - enable: true - - # -- Ingress URL format. - # Requires the UI service to be enabled by setting `uiService.enable` to true. - # 1/ Enable ingressUrlFormat to create an Ingress object for each Spark Job submitted using Spark Operator - # 2/ This setup also requires ingres-nginx to be deployed with NLB as LB with IP based routing. - # 3. Enter the NLB DNS name or enter Custom Domain name from route53 below which points to the NLB - - # ingressUrlFormat: '/{{$appName}}' From 2a4337602bd8a6d32e96758a5edd5a82aaa65bc1 Mon Sep 17 00:00:00 2001 From: Vara Bonthu Date: Mon, 28 Oct 2024 00:06:56 -0700 Subject: [PATCH 3/7] pre-commit updates --- ai-ml/emr-spark-rapids/README.md | 2 +- ai-ml/nvidia-triton-server/README.md | 4 ++-- analytics/terraform/datahub-on-eks/README.md | 4 ++-- analytics/terraform/emr-eks-ack/README.md | 4 ++-- analytics/terraform/emr-eks-fargate/README.md | 4 ++-- analytics/terraform/emr-eks-karpenter/README.md | 2 +- analytics/terraform/spark-eks-ipv6/spark-team.tf | 2 +- schedulers/terraform/argo-workflow/README.md | 8 ++++---- schedulers/terraform/aws-batch-eks/README.md | 6 +++--- schedulers/terraform/self-managed-airflow/README.md | 10 +++++----- streaming/flink/README.md | 4 ++-- streaming/kafka/README.md | 8 ++++---- streaming/nifi/README.md | 4 ++-- streaming/spark-streaming/terraform/README.md | 8 ++++---- 14 files changed, 35 insertions(+), 35 deletions(-) diff --git a/ai-ml/emr-spark-rapids/README.md b/ai-ml/emr-spark-rapids/README.md index e693a626d..bb91a9799 100644 --- a/ai-ml/emr-spark-rapids/README.md +++ b/ai-ml/emr-spark-rapids/README.md @@ -61,7 +61,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | [enable\_nvidia\_gpu\_operator](#input\_enable\_nvidia\_gpu\_operator) | Enable NVIDIA GPU Operator | `bool` | `false` | no | | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"emr-spark-rapids"` | no | | [region](#input\_region) | Region | `string` | `"us-west-2"` | no | -| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | +| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | | [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/21"` | no | diff --git a/ai-ml/nvidia-triton-server/README.md b/ai-ml/nvidia-triton-server/README.md index b86c32977..66b726b02 100644 --- a/ai-ml/nvidia-triton-server/README.md +++ b/ai-ml/nvidia-triton-server/README.md @@ -79,9 +79,9 @@ | [huggingface\_token](#input\_huggingface\_token) | Hugging Face Secret Token | `string` | `"DUMMY_TOKEN_REPLACE_ME"` | no | | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"nvidia-triton-server"` | no | | [ngc\_api\_key](#input\_ngc\_api\_key) | NGC API Key | `string` | `"DUMMY_NGC_API_KEY_REPLACE_ME"` | no | -| [nim\_models](#input\_nim\_models) | NVIDIA NIM Models |
list(object({
name = string
id = string
enable = bool
num_gpu = string
}))
|
[
{
"enable": false,
"id": "nvcr.io/nim/meta/llama-3.1-8b-instruct",
"name": "llama-3-1-8b-instruct",
"num_gpu": "4"
},
{
"enable": true,
"id": "nvcr.io/nim/meta/llama3-8b-instruct",
"name": "llama3-8b-instruct",
"num_gpu": "1"
}
]
| no | +| [nim\_models](#input\_nim\_models) | NVIDIA NIM Models |
list(object({
name = string
id = string
enable = bool
num_gpu = string
}))
|
[
{
"enable": false,
"id": "nvcr.io/nim/meta/llama-3.1-8b-instruct",
"name": "llama-3-1-8b-instruct",
"num_gpu": "4"
},
{
"enable": true,
"id": "nvcr.io/nim/meta/llama3-8b-instruct",
"name": "llama3-8b-instruct",
"num_gpu": "1"
}
]
| no | | [region](#input\_region) | region | `string` | `"us-west-2"` | no | -| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | +| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/21"` | no | ## Outputs diff --git a/analytics/terraform/datahub-on-eks/README.md b/analytics/terraform/datahub-on-eks/README.md index f749fbb28..fd1d95814 100644 --- a/analytics/terraform/datahub-on-eks/README.md +++ b/analytics/terraform/datahub-on-eks/README.md @@ -46,8 +46,8 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | [enable\_vpc\_endpoints](#input\_enable\_vpc\_endpoints) | Enable VPC Endpoints | `bool` | `false` | no | | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"datahub-on-eks"` | no | | [private\_subnet\_ids](#input\_private\_subnet\_ids) | Ids for existing private subnets - needed when create\_vpc set to false | `list(string)` | `[]` | no | -| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` |
[
"10.1.0.0/17",
"10.1.128.0/18"
]
| no | -| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` |
[
"10.1.255.128/26",
"10.1.255.192/26"
]
| no | +| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` |
[
"10.1.0.0/17",
"10.1.128.0/18"
]
| no | +| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` |
[
"10.1.255.128/26",
"10.1.255.192/26"
]
| no | | [region](#input\_region) | Region | `string` | `"us-west-2"` | no | | [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR - must change to match the cidr of the existing VPC if create\_vpc set to false | `string` | `"10.1.0.0/16"` | no | diff --git a/analytics/terraform/emr-eks-ack/README.md b/analytics/terraform/emr-eks-ack/README.md index e5308048a..05b4872ed 100644 --- a/analytics/terraform/emr-eks-ack/README.md +++ b/analytics/terraform/emr-eks-ack/README.md @@ -54,8 +54,8 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ |------|-------------|------|---------|:--------:| | [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.27"` | no | | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"emr-eks-ack"` | no | -| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` |
[
"10.1.0.0/17",
"10.1.128.0/18"
]
| no | -| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` |
[
"10.1.255.128/26",
"10.1.255.192/26"
]
| no | +| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` |
[
"10.1.0.0/17",
"10.1.128.0/18"
]
| no | +| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` |
[
"10.1.255.128/26",
"10.1.255.192/26"
]
| no | | [region](#input\_region) | Region | `string` | `"us-west-2"` | no | | [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.1.0.0/16"` | no | diff --git a/analytics/terraform/emr-eks-fargate/README.md b/analytics/terraform/emr-eks-fargate/README.md index 2d6a8aa25..dae1b4eae 100644 --- a/analytics/terraform/emr-eks-fargate/README.md +++ b/analytics/terraform/emr-eks-fargate/README.md @@ -49,8 +49,8 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ |------|-------------|------|---------|:--------:| | [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.27"` | no | | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"emr-eks-fargate"` | no | -| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` |
[
"10.1.0.0/17",
"10.1.128.0/18"
]
| no | -| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` |
[
"10.1.255.128/26",
"10.1.255.192/26"
]
| no | +| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` |
[
"10.1.0.0/17",
"10.1.128.0/18"
]
| no | +| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` |
[
"10.1.255.128/26",
"10.1.255.192/26"
]
| no | | [region](#input\_region) | Region | `string` | `"us-west-2"` | no | | [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.1.0.0/16"` | no | diff --git a/analytics/terraform/emr-eks-karpenter/README.md b/analytics/terraform/emr-eks-karpenter/README.md index ded52b607..b8e7166fd 100644 --- a/analytics/terraform/emr-eks-karpenter/README.md +++ b/analytics/terraform/emr-eks-karpenter/README.md @@ -89,7 +89,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | [enable\_yunikorn](#input\_enable\_yunikorn) | Enable Apache YuniKorn Scheduler | `bool` | `false` | no | | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"emr-eks-karpenter"` | no | | [region](#input\_region) | Region | `string` | `"us-west-2"` | no | -| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | +| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | | [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/21"` | no | diff --git a/analytics/terraform/spark-eks-ipv6/spark-team.tf b/analytics/terraform/spark-eks-ipv6/spark-team.tf index beeddc9ba..b4c0bab3c 100644 --- a/analytics/terraform/spark-eks-ipv6/spark-team.tf +++ b/analytics/terraform/spark-eks-ipv6/spark-team.tf @@ -66,7 +66,7 @@ module "spark_team_a_irsa" { #--------------------------------------------------------------- resource "aws_iam_policy" "spark" { description = "IAM role policy for Spark Job execution" - name_prefix = "${local.name}-spark-irsa" + name_prefix = "${local.name}-spark-irsa" policy = data.aws_iam_policy_document.spark_operator.json } diff --git a/schedulers/terraform/argo-workflow/README.md b/schedulers/terraform/argo-workflow/README.md index ebb39c7c2..6119fccdd 100644 --- a/schedulers/terraform/argo-workflow/README.md +++ b/schedulers/terraform/argo-workflow/README.md @@ -78,15 +78,15 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.29"` | no | -| [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` |
[
"100.64.0.0/17",
"100.64.128.0/17"
]
| no | +| [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` |
[
"100.64.0.0/17",
"100.64.128.0/17"
]
| no | | [enable\_amazon\_prometheus](#input\_enable\_amazon\_prometheus) | Enable AWS Managed Prometheus service | `bool` | `true` | no | | [enable\_vpc\_endpoints](#input\_enable\_vpc\_endpoints) | Enable VPC Endpoints | `bool` | `false` | no | | [enable\_yunikorn](#input\_enable\_yunikorn) | Enable Apache YuniKorn Scheduler | `bool` | `true` | no | | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"doeks-spark-argo"` | no | -| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` |
[
"10.1.1.0/24",
"10.1.2.0/24"
]
| no | -| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` |
[
"10.1.0.0/26",
"10.1.0.64/26"
]
| no | +| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` |
[
"10.1.1.0/24",
"10.1.2.0/24"
]
| no | +| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` |
[
"10.1.0.0/26",
"10.1.0.64/26"
]
| no | | [region](#input\_region) | Region | `string` | `"us-west-2"` | no | -| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | +| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/16"` | no | ## Outputs diff --git a/schedulers/terraform/aws-batch-eks/README.md b/schedulers/terraform/aws-batch-eks/README.md index 3a066e501..49fedd1cf 100644 --- a/schedulers/terraform/aws-batch-eks/README.md +++ b/schedulers/terraform/aws-batch-eks/README.md @@ -58,7 +58,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | [aws\_batch\_doeks\_jd\_name](#input\_aws\_batch\_doeks\_jd\_name) | The AWS Batch example job definition name | `string` | `"doeks-hello-world"` | no | | [aws\_batch\_doeks\_jq\_name](#input\_aws\_batch\_doeks\_jq\_name) | The AWS Batch EKS namespace | `string` | `"doeks-JQ1"` | no | | [aws\_batch\_doeks\_namespace](#input\_aws\_batch\_doeks\_namespace) | The AWS Batch EKS namespace | `string` | `"doeks-aws-batch"` | no | -| [aws\_batch\_instance\_types](#input\_aws\_batch\_instance\_types) | The set of instance types to launch for AWS Batch jobs. | `list(string)` |
[
"optimal"
]
| no | +| [aws\_batch\_instance\_types](#input\_aws\_batch\_instance\_types) | The set of instance types to launch for AWS Batch jobs. | `list(string)` |
[
"optimal"
]
| no | | [aws\_batch\_max\_vcpus](#input\_aws\_batch\_max\_vcpus) | The minimum aggregate vCPU for AWS Batch compute environment | `number` | `256` | no | | [aws\_batch\_min\_vcpus](#input\_aws\_batch\_min\_vcpus) | The minimum aggregate vCPU for AWS Batch compute environment | `number` | `0` | no | | [aws\_region](#input\_aws\_region) | AWS Region | `string` | `"us-east-1"` | no | @@ -67,8 +67,8 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | [eks\_private\_cluster\_endpoint](#input\_eks\_private\_cluster\_endpoint) | Whether to have a private cluster endpoint for the EKS cluster. | `bool` | `true` | no | | [eks\_public\_cluster\_endpoint](#input\_eks\_public\_cluster\_endpoint) | Whether to have a public cluster endpoint for the EKS cluster. #WARNING: Avoid a public endpoint in preprod or prod accounts. This feature is designed for sandbox accounts, simplifying cluster deployment and testing. | `bool` | `true` | no | | [num\_azs](#input\_num\_azs) | The number of Availability Zones to deploy subnets to. Must be 2 or more | `number` | `2` | no | -| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` |
[
"10.1.0.0/17",
"10.1.128.0/18"
]
| no | -| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` |
[
"10.1.255.128/26",
"10.1.255.192/26"
]
| no | +| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` |
[
"10.1.0.0/17",
"10.1.128.0/18"
]
| no | +| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` |
[
"10.1.255.128/26",
"10.1.255.192/26"
]
| no | | [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.1.0.0/16"` | no | diff --git a/schedulers/terraform/self-managed-airflow/README.md b/schedulers/terraform/self-managed-airflow/README.md index 5d7260551..c0f229deb 100644 --- a/schedulers/terraform/self-managed-airflow/README.md +++ b/schedulers/terraform/self-managed-airflow/README.md @@ -99,17 +99,17 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [db\_private\_subnets](#input\_db\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Airflow DB. | `list(string)` |
[
"10.0.20.0/26",
"10.0.21.0/26"
]
| no | +| [db\_private\_subnets](#input\_db\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Airflow DB. | `list(string)` |
[
"10.0.20.0/26",
"10.0.21.0/26"
]
| no | | [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.29"` | no | -| [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` |
[
"100.64.0.0/17",
"100.64.128.0/17"
]
| no | +| [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` |
[
"100.64.0.0/17",
"100.64.128.0/17"
]
| no | | [enable\_airflow](#input\_enable\_airflow) | Enable Apache Airflow | `bool` | `true` | no | | [enable\_airflow\_spark\_example](#input\_enable\_airflow\_spark\_example) | Enable Apache Airflow and Spark Operator example | `bool` | `false` | no | | [enable\_amazon\_prometheus](#input\_enable\_amazon\_prometheus) | Enable AWS Managed Prometheus service | `bool` | `true` | no | | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"self-managed-airflow"` | no | -| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` |
[
"10.0.1.0/24",
"10.0.2.0/24"
]
| no | -| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` |
[
"10.0.0.0/26",
"10.0.0.64/26"
]
| no | +| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` |
[
"10.0.1.0/24",
"10.0.2.0/24"
]
| no | +| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` |
[
"10.0.0.0/26",
"10.0.0.64/26"
]
| no | | [region](#input\_region) | Region | `string` | `"us-west-2"` | no | -| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | +| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.0.0.0/16"` | no | ## Outputs diff --git a/streaming/flink/README.md b/streaming/flink/README.md index fb2673697..876d564fb 100755 --- a/streaming/flink/README.md +++ b/streaming/flink/README.md @@ -73,8 +73,8 @@ | [enable\_vpc\_endpoints](#input\_enable\_vpc\_endpoints) | Enable VPC Endpoints | `bool` | `false` | no | | [enable\_yunikorn](#input\_enable\_yunikorn) | Enable Apache YuniKorn Scheduler | `bool` | `true` | no | | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"flink-operator-doeks"` | no | -| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` |
[
"10.1.0.0/17",
"10.1.128.0/18"
]
| no | -| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` |
[
"10.1.255.128/26",
"10.1.255.192/26"
]
| no | +| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` |
[
"10.1.0.0/17",
"10.1.128.0/18"
]
| no | +| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` |
[
"10.1.255.128/26",
"10.1.255.192/26"
]
| no | | [region](#input\_region) | Region | `string` | `"us-west-2"` | no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.1.0.0/16"` | no | diff --git a/streaming/kafka/README.md b/streaming/kafka/README.md index f1deaafb1..56fc5cf55 100644 --- a/streaming/kafka/README.md +++ b/streaming/kafka/README.md @@ -59,13 +59,13 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.31"` | no | -| [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` |
[
"100.64.0.0/17",
"100.64.128.0/17"
]
| no | +| [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` |
[
"100.64.0.0/17",
"100.64.128.0/17"
]
| no | | [enable\_amazon\_prometheus](#input\_enable\_amazon\_prometheus) | Enable AWS Managed Prometheus service | `bool` | `true` | no | | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"kafka-on-eks"` | no | -| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` |
[
"10.1.1.0/24",
"10.1.2.0/24"
]
| no | -| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` |
[
"10.1.0.0/26",
"10.1.0.64/26"
]
| no | +| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` |
[
"10.1.1.0/24",
"10.1.2.0/24"
]
| no | +| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` |
[
"10.1.0.0/26",
"10.1.0.64/26"
]
| no | | [region](#input\_region) | Region | `string` | `"us-west-2"` | no | -| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | +| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/16"` | no | ## Outputs diff --git a/streaming/nifi/README.md b/streaming/nifi/README.md index c1894ab32..25ce367da 100644 --- a/streaming/nifi/README.md +++ b/streaming/nifi/README.md @@ -82,8 +82,8 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"nifi-on-eks"` | no | | [nifi\_sub\_domain](#input\_nifi\_sub\_domain) | Subdomain for NiFi cluster. | `string` | `"mynifi"` | no | | [nifi\_username](#input\_nifi\_username) | NiFi login username | `string` | `"admin"` | no | -| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 16382 IPs per Subnet | `list(string)` |
[
"10.1.0.0/18",
"10.1.64.0/18",
"10.1.128.0/18"
]
| no | -| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 4094 IPs per Subnet | `list(string)` |
[
"10.1.192.0/20",
"10.1.208.0/20",
"10.1.224.0/20"
]
| no | +| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 16382 IPs per Subnet | `list(string)` |
[
"10.1.0.0/18",
"10.1.64.0/18",
"10.1.128.0/18"
]
| no | +| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 4094 IPs per Subnet | `list(string)` |
[
"10.1.192.0/20",
"10.1.208.0/20",
"10.1.224.0/20"
]
| no | | [region](#input\_region) | Region | `string` | `"us-west-2"` | no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.1.0.0/16"` | no | diff --git a/streaming/spark-streaming/terraform/README.md b/streaming/spark-streaming/terraform/README.md index 9dd02dfbf..d60b6dd78 100644 --- a/streaming/spark-streaming/terraform/README.md +++ b/streaming/spark-streaming/terraform/README.md @@ -70,15 +70,15 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.29"` | no | -| [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` |
[
"100.64.0.0/17",
"100.64.128.0/17"
]
| no | +| [eks\_data\_plane\_subnet\_secondary\_cidr](#input\_eks\_data\_plane\_subnet\_secondary\_cidr) | Secondary CIDR blocks. 32766 IPs per Subnet per Subnet/AZ for EKS Node and Pods | `list(string)` |
[
"100.64.0.0/17",
"100.64.128.0/17"
]
| no | | [enable\_amazon\_prometheus](#input\_enable\_amazon\_prometheus) | Enable AWS Managed Prometheus service | `bool` | `true` | no | | [enable\_vpc\_endpoints](#input\_enable\_vpc\_endpoints) | Enable VPC Endpoints | `bool` | `false` | no | | [enable\_yunikorn](#input\_enable\_yunikorn) | Enable Apache YuniKorn Scheduler | `bool` | `false` | no | | [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"spark-streaming-doeks"` | no | -| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` |
[
"10.1.1.0/24",
"10.1.2.0/24"
]
| no | -| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` |
[
"10.1.0.0/26",
"10.1.0.64/26"
]
| no | +| [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 254 IPs per Subnet/AZ for Private NAT + NLB + Airflow + EC2 Jumphost etc. | `list(string)` |
[
"10.1.1.0/24",
"10.1.2.0/24"
]
| no | +| [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet/AZ | `list(string)` |
[
"10.1.0.0/26",
"10.1.0.64/26"
]
| no | | [region](#input\_region) | Region | `string` | `"us-west-2"` | no | -| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | +| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no | | [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/16"` | no | ## Outputs From f1f840884f5f280bbdb4a639dddd22a3cd609d25 Mon Sep 17 00:00:00 2001 From: Vara Bonthu Date: Mon, 28 Oct 2024 16:27:01 -0700 Subject: [PATCH 4/7] Updates to the blueprint with Apache YuniKorn --- .../terraform/spark-k8s-operator/addons.tf | 18 ++- analytics/terraform/spark-k8s-operator/eks.tf | 88 ++++++++--- .../examples/benchmark/README.md | 37 +++++ .../benchmark/tpcds-benchmark-1t-c7gd.yaml | 149 ++++++++++++++++++ ...rk-3t.yaml => tpcds-benchmark-1t-r6g.yaml} | 32 ++-- .../benchmark/tpcds-benchmark-1t-r8g.yaml | 125 +++++++++++++++ ...> tpcds-benchmark-data-generation-1t.yaml} | 26 +-- .../helm-values/yunikorn-values.yaml | 104 +----------- 8 files changed, 420 insertions(+), 159 deletions(-) create mode 100644 analytics/terraform/spark-k8s-operator/examples/benchmark/README.md create mode 100644 analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-c7gd.yaml rename analytics/terraform/spark-k8s-operator/examples/benchmark/{tpcds-benchmark-3t.yaml => tpcds-benchmark-1t-r6g.yaml} (82%) create mode 100644 analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml rename analytics/terraform/spark-k8s-operator/examples/benchmark/{tpcds-benchmark-data-generation-3t.yaml => tpcds-benchmark-data-generation-1t.yaml} (86%) diff --git a/analytics/terraform/spark-k8s-operator/addons.tf b/analytics/terraform/spark-k8s-operator/addons.tf index ae5328e25..90e4bb4e6 100644 --- a/analytics/terraform/spark-k8s-operator/addons.tf +++ b/analytics/terraform/spark-k8s-operator/addons.tf @@ -84,9 +84,9 @@ module "eks_data_addons" { - key: "karpenter.k8s.aws/instance-family" operator: In values: ["c5d"] - - key: "karpenter.k8s.aws/instance-cpu" + - key: "karpenter.k8s.aws/instance-size" operator: In - values: ["4", "8", "16", "36"] + values: ["4xlarge", "9xlarge", "12xlarge", "18xlarge", "24xlarge"] - key: "karpenter.k8s.aws/instance-hypervisor" operator: In values: ["nitro"] @@ -97,7 +97,7 @@ module "eks_data_addons" { cpu: 1000 disruption: consolidationPolicy: WhenEmptyOrUnderutilized - consolidateAfter: 5m + consolidateAfter: 1m weight: 100 EOT ] @@ -185,7 +185,7 @@ module "eks_data_addons" { requirements: - key: "karpenter.sh/capacity-type" operator: In - values: ["spot", "on-demand"] + values: ["on-demand"] - key: "kubernetes.io/arch" operator: In values: ["arm64"] @@ -258,7 +258,7 @@ module "eks_data_addons" { cpu: 1000 disruption: consolidationPolicy: WhenEmptyOrUnderutilized - consolidateAfter: 5m + consolidateAfter: 1m weight: 100 EOT ] @@ -390,7 +390,7 @@ module "eks_data_addons" { cpu: 1000 disruption: consolidationPolicy: WhenEmptyOrUnderutilized - consolidateAfter: 5m + consolidateAfter: 1m weight: 100 EOT ] @@ -410,6 +410,10 @@ module "eks_data_addons" { version = "2.0.2" values = [ <<-EOT + controller: + batchScheduler: + enable: true + default: "yunikorn" spark: # -- List of namespaces where to run spark jobs. # If empty string is included, all namespaces will be allowed. @@ -435,7 +439,7 @@ module "eks_data_addons" { enable_yunikorn = var.enable_yunikorn yunikorn_helm_config = { values = [templatefile("${path.module}/helm-values/yunikorn-values.yaml", { - image_version = "1.2.0" + image_version = "1.6.0" })] } diff --git a/analytics/terraform/spark-k8s-operator/eks.tf b/analytics/terraform/spark-k8s-operator/eks.tf index cc66a5ddf..65fd31252 100644 --- a/analytics/terraform/spark-k8s-operator/eks.tf +++ b/analytics/terraform/spark-k8s-operator/eks.tf @@ -174,8 +174,43 @@ module "eks" { } } - spark_graviton_bench = { - name = "spark-graviton-bench" + # ec2-instance-selector --vcpus=48 --gpus 0 -a arm64 --allow-list '.*d.*' + # This command will give you the list of the instances with similar vcpus for arm64 dense instances + spark_spot_x86_48cpu = { + name = "spark-spot-48cpu" + description = "Spark Spot node group for executor workloads" + # Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned + subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : + substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0) + ] + + min_size = 0 + max_size = 12 + desired_size = 0 + + instance_types = ["r5d.12xlarge", "r6id.12xlarge", "c5ad.12xlarge", "c5d.12xlarge", "c6id.12xlarge", "m5ad.12xlarge", "m5d.12xlarge", "m6id.12xlarge"] # 48cpu - 2 x 1425 NVMe SSD + + labels = { + WorkerType = "SPOT" + NodeGroupType = "spark-spot-ca" + } + + taints = [{ + key = "spark-spot-ca" + value = true + effect = "NO_SCHEDULE" + }] + + tags = { + Name = "spark-node-grp" + WorkerType = "SPOT" + NodeGroupType = "spark" + } + } + + # The following Node groups are a placeholder to create Node groups for running Spark TPC-DS benchmarks + spark_graviton_r8g = { + name = "spark-graviton-r8g" description = "Spark managed node group for Graviton Benchmarks" # Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : @@ -184,7 +219,7 @@ module "eks" { ami_type = "AL2023_ARM_64_STANDARD" - min_size = 0 + min_size = 0 # Change min and desired to 6 for running benchmarks max_size = 8 desired_size = 0 # This storage is used as a shuffle for non NVMe SSD instances. e.g., r8g instances @@ -204,46 +239,51 @@ module "eks" { instance_types = ["r8g.12xlarge"] # Change Instance type to run the benchmark with various instance types labels = { - NodeGroupType = "spark-graviton-benchmark-mng" + NodeGroupType = "spark-graviton-benchmark-mng-r8g" } tags = { - Name = "spark-graviton-benchmark-mng" - NodeGroupType = "spark-graviton-benchmark-mng" + Name = "spark-graviton-benchmark-mng-r8g" + NodeGroupType = "spark-graviton-benchmark-mng-r8g" } } - # ec2-instance-selector --vcpus=48 --gpus 0 -a arm64 --allow-list '.*d.*' - # This command will give you the list of the instances with similar vcpus for arm64 dense instances - spark_spot_x86_48cpu = { - name = "spark-spot-48cpu" - description = "Spark Spot node group for executor workloads" + spark_graviton_r6g = { + name = "spark-graviton-r6g" + description = "Spark managed node group for Graviton Benchmarks" # Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0) ] + ami_type = "AL2023_ARM_64_STANDARD" + min_size = 0 - max_size = 12 + max_size = 8 desired_size = 0 + # This storage is used as a shuffle for non NVMe SSD instances. e.g., r8g instances + block_device_mappings = { + xvda = { + device_name = "/dev/xvda" + ebs = { + volume_size = 300 + volume_type = "gp3" + iops = 3000 + encrypted = true + delete_on_termination = true + } + } + } - instance_types = ["r5d.12xlarge", "r6id.12xlarge", "c5ad.12xlarge", "c5d.12xlarge", "c6id.12xlarge", "m5ad.12xlarge", "m5d.12xlarge", "m6id.12xlarge"] # 48cpu - 2 x 1425 NVMe SSD + instance_types = ["r6g.12xlarge"] # Change Instance type to run the benchmark with various instance types labels = { - WorkerType = "SPOT" - NodeGroupType = "spark-spot-ca" + NodeGroupType = "spark-graviton-benchmark-mng-r6g" } - taints = [{ - key = "spark-spot-ca" - value = true - effect = "NO_SCHEDULE" - }] - tags = { - Name = "spark-node-grp" - WorkerType = "SPOT" - NodeGroupType = "spark" + Name = "spark-graviton-benchmark-mng-r6g" + NodeGroupType = "spark-graviton-benchmark-mng-r6g" } } } diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/README.md b/analytics/terraform/spark-k8s-operator/examples/benchmark/README.md new file mode 100644 index 000000000..8506dce99 --- /dev/null +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/README.md @@ -0,0 +1,37 @@ +# Spark Benchmarking on Graviton Instances + +This document provides a step-by-step guide to execute Spark benchmarks on Graviton instances, specifically for comparing Graviton3 and Graviton4. + + - r6g Benchmark Job: analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml + - r8g Benchmark Job: analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml + +## Steps to Execute the Job + +### Step 1: Deploy the Spark Operator Blueprint +Deploy the Spark Operator Blueprint with YuniKorn scheduler. Follow the instructions provided in the [Data on EKS - Spark Operator with YuniKorn documentation](https://awslabs.github.io/data-on-eks/docs/blueprints/data-analytics/spark-operator-yunikorn). + +### Step 2: Create an S3 Bucket for TPC-DS Data +Set up a dedicated S3 bucket to store the TPC-DS data output. + +### Step 3: Configure the S3 Bucket in the YAML File +Replace `` with the name of your S3 bucket in the provided YAML configuration file. + + - **r6g Benchmark Job**: analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml + - **r8g Benchmark Job**: analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml + +### Step 4: Ensure an EKS Managed Node Group with `r6g` Instances +Make sure an EKS managed node group with `r6g` instances is available. Check the `eks.tf` file under [EKS Terraform Config](https://github.com/awslabs/data-on-eks/blob/main/analytics/terraform/spark-k8s-operator/eks.tf) for configuration details. + +### Step 5: Apply the Configuration +Run the following command to apply the YAML configuration file: + +```bash +kubectl apply -f +``` + +### Step 6: Verify Benchmark Results in S3 +After the job completes, navigate to the output path in your S3 bucket as specified in the YAML config file. You should see: + +One JSON file with the benchmark results +One CSV file with the benchmark results +The output files contain detailed benchmarking data for analysis. \ No newline at end of file diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-c7gd.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-c7gd.yaml new file mode 100644 index 000000000..b9cf37319 --- /dev/null +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-c7gd.yaml @@ -0,0 +1,149 @@ +# > **Note:** Please complete the following prerequisites before executing these jobs: + +# 1. Create an S3 bucket to store the generated data. +# 2. Set up a managed node group with `c7gd` instances in the `eks.tf` file, configuring it with 6 instances. Use the existing node group as a reference. +# 3. Confirm that the `"spark-team-a"` namespace is present. +# 4. Replace `` with your actual S3 bucket name in the configuration. +# 5. Run `kubectl apply -f ` to deploy. + +--- +apiVersion: "sparkoperator.k8s.io/v1beta2" +kind: SparkApplication +metadata: + name: tpcds-benchmark-1tb-c7gd + namespace: spark-team-a +spec: + batchScheduler: yunikorn + batchSchedulerOptions: + queue: root.default + type: Scala + mode: cluster + image: public.ecr.aws/data-on-eks/spark3.5.3-scala2.12-java17-python3-ubuntu-tpcds:v2 + imagePullPolicy: IfNotPresent + sparkVersion: 3.5.3 + mainClass: com.amazonaws.eks.tpcds.BenchmarkSQL + mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar + arguments: + # TPC-DS data location + - "s3a:///TPCDS-TEST-1TB" + # results location + - "s3a:///TPCDS-TEST-1T-RESULT" + # Path to kit in the docker image + - "/opt/tpcds-kit/tools" + # Data Format + - "parquet" + # Scale factor (in GB) + - "1000" # changed from 3000 to 100gb for demo + # Number of iterations + - "1" + # Optimize queries with hive tables + - "false" + # Filter queries, will run all if empty - "q98-v2.4,q99-v2.4,ss_max-v2.4,q95-v2.4" + - "" + # Logging set to WARN + - "true" + sparkConf: + "spark.network.timeout": "2000s" + "spark.executor.heartbeatInterval": "300s" + # AQE + "spark.sql.adaptive.enabled": "true" + "spark.sql.adaptive.localShuffleReader.enabled": "true" + "spark.sql.adaptive.coalescePartitions.enabled": "true" + "spark.sql.adaptive.skewJoin.enabled": "true" + "spark.kubernetes.executor.podNamePrefix": "benchmark-exec-c7gd-" + # S3 Optimizations + # "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" # This is using AWS SDK V1 in maintenance mode + "spark.hadoop.fs.s3a.aws.credentials.provider.mapping": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider=software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" + "spark.hadoop.fs.s3a.aws.credentials.provider": "software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" # AWS SDK V2 https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html + "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem" + "spark.hadoop.fs.s3a.fast.upload": "true" + "spark.hadoop.fs.s3a.path.style.access": "true" + "spark.hadoop.fs.s3a.fast.upload.buffer": "disk" + "spark.hadoop.fs.s3a.buffer.dir": "/data1/s3a" + "spark.hadoop.fs.s3a.multipart.size": "128M" # Good for large files + "spark.hadoop.fs.s3a.multipart.threshold": "256M" + "spark.hadoop.fs.s3a.threads.max": "50" + "spark.hadoop.fs.s3a.connection.maximum": "200" + + "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2" + "spark.executor.defaultJavaOptions": "-verbose:gc -XX:+UseParallelGC -XX:InitiatingHeapOccupancyPercent=70" + # "spark.hadoop.fs.s3a.readahead.range": "256K" + + # ----------------------------------------------------- + # This block is very critical when you get errors like + # Exception in thread \"main\" io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred + # Caused by: java.net.SocketTimeoutException: timeout + # spark.kubernetes.local.dirs.tmpfs: "true" # More details here https://spark.apache.org/docs/latest/running-on-kubernetes.html#using-ram-for-local-storage + spark.kubernetes.submission.connectionTimeout: "120000" # milliseconds + spark.kubernetes.submission.requestTimeout: "120000" + spark.kubernetes.driver.connectionTimeout: "120000" + spark.kubernetes.driver.requestTimeout: "120000" + # spark.kubernetes.allocation.batch.size: "20" # default 5 but adjust according to your cluster size + # ----------------------------------------------------- + # S3 Optimizations + "spark.hadoop.fs.s3a.multipart.size": "67108864" # 64 MB part size for S3 uploads + "spark.hadoop.fs.s3a.threads.max": "40" # Limit S3 threads for optimized throughput + "spark.hadoop.fs.s3a.connection.maximum": "100" # Set max connections for S3 + + # Data writing and shuffle tuning + "spark.shuffle.file.buffer": "1m" # Increase shuffle buffer for better disk I/O + "spark.reducer.maxSizeInFlight": "48m" # Increase reducer buffer size in-flight data + + # Optional: Tuning multipart upload threshold + "spark.hadoop.fs.s3a.multipart.purge": "true" # Automatically clear failed multipart uploads + "spark.hadoop.fs.s3a.multipart.threshold": "134217728" # 128 MB threshold to start multi-part upload + driver: + cores: 4 + coreLimit: "4.1" + memory: "5g" + memoryOverhead: "1g" + serviceAccount: spark-team-a + securityContext: + runAsUser: 185 + volumeMounts: + - name: spark-local-dir-1 + mountPath: /data1 + initContainers: + - name: volume-permission + image: public.ecr.aws/docker/library/busybox + command: ['sh', '-c', 'mkdir -p /data1; chown -R 185:185 /data1'] + volumeMounts: + - name: spark-local-dir-1 + mountPath: /data1 + env: + - name: JAVA_HOME + value: "/opt/java/openjdk" + nodeSelector: + NodeGroupType: spark-graviton-benchmark-mng-c7gd # Create a node group with this label in eks.tf + executor: + cores: 4 + coreLimit: "4.3" + memory: "6g" + memoryOverhead: "2g" + # 8 executors per node + instances: 47 + serviceAccount: spark-team-a + securityContext: + runAsUser: 185 + volumeMounts: + - name: spark-local-dir-1 + mountPath: /data1 + initContainers: + - name: volume-permission + image: public.ecr.aws/docker/library/busybox + command: ['sh', '-c', 'mkdir -p /data1; chown -R 185:185 /data1'] + volumeMounts: + - name: spark-local-dir-1 + mountPath: /data1 + env: + - name: JAVA_HOME + value: "/opt/java/openjdk" + nodeSelector: + NodeGroupType: spark-graviton-benchmark-mng-c7gd # Create a node group with this label in eks.tf + restartPolicy: + type: Never + volumes: + - name: spark-local-dir-1 + hostPath: + path: "/mnt/k8s-disks/0" + type: DirectoryOrCreate \ No newline at end of file diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-3t.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml similarity index 82% rename from analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-3t.yaml rename to analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml index fa57826cc..837c4a008 100644 --- a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-3t.yaml +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml @@ -1,19 +1,21 @@ -# NOTE: This example requires the following prerequisites before executing the jobs -# 1. Create an S3 bucket to store the generated data -# 2. Ensure spark-team-a name space exists -# 3. replace with your bucket name +# > **Note:** Before running these jobs, make sure the following prerequisites are completed: + +# 1. Set up an S3 bucket to store the generated data. +# 2. Update the `eks.tf` file to modify `min_size=6` and `desired_size=6` for the managed node group `"spark_graviton_r6g"`, then run `terraform apply`. +# 3. Confirm that the `"spark-team-a"` namespace exists. +# 4. Replace `` in the configuration with your actual bucket name. +# 5. Run `kubectl apply -f ` to apply the configuration. --- apiVersion: "sparkoperator.k8s.io/v1beta2" kind: SparkApplication metadata: - name: tpcds-benchmark-3tb + name: tpcds-benchmark-1tb-r6g namespace: spark-team-a - spec: - # batchScheduler: yunikorn - # batchSchedulerOptions: - # queue: root.default + batchScheduler: yunikorn + batchSchedulerOptions: + queue: root.default type: Scala mode: cluster image: public.ecr.aws/data-on-eks/spark3.5.3-scala2.12-java17-python3-ubuntu-tpcds:v2 @@ -48,7 +50,7 @@ spec: "spark.sql.adaptive.localShuffleReader.enabled": "true" "spark.sql.adaptive.coalescePartitions.enabled": "true" "spark.sql.adaptive.skewJoin.enabled": "true" - "spark.kubernetes.executor.podNamePrefix": "benchmark-exec" + "spark.kubernetes.executor.podNamePrefix": "benchmark-exec-r6g-" # S3 Optimizations # "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" # This is using AWS SDK V1 in maintenance mode "spark.hadoop.fs.s3a.aws.credentials.provider.mapping": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider=software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" @@ -101,11 +103,8 @@ spec: env: - name: JAVA_HOME value: "/opt/java/openjdk" - # the c5d instances that Karpenter will launch will have the NVMe storage preformatted and available to the pod - # we do not need to leverage a hostPath mount or volume to leverage that storage. - # ephemeral-storage requests and limits can be used to manage the storage utilization nodeSelector: - NodeGroupType: SparkGravitonBenchmark + NodeGroupType: spark-graviton-benchmark-mng-r6g executor: cores: 4 coreLimit: "4.3" @@ -119,10 +118,7 @@ spec: env: - name: JAVA_HOME value: "/opt/java/openjdk" - # the c5d instances that Karpenter will launch will have the NVMe storage preformatted and available to the pod - # we do not need to leverage a hostPath mount or volume to leverage that storage. - # ephemeral-storage requests and limits can be used to manage the storage utilization nodeSelector: - NodeGroupType: SparkGravitonBenchmark + NodeGroupType: spark-graviton-benchmark-mng-r6g restartPolicy: type: Never diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml new file mode 100644 index 000000000..a7d0b2ee4 --- /dev/null +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml @@ -0,0 +1,125 @@ +# > **Note:** Before running these jobs, make sure the following prerequisites are completed: + +# 1. Set up an S3 bucket to store the generated data. +# 2. Update the `eks.tf` file to modify `min_size=6` and `desired_size=6` for the managed node group `"spark_graviton_r8g"`, then run `terraform apply`. +# 3. Confirm that the `"spark-team-a"` namespace exists. +# 4. Replace `` in the configuration with your actual bucket name. +# 5. Run `kubectl apply -f ` to apply the configuration. + +--- +apiVersion: "sparkoperator.k8s.io/v1beta2" +kind: SparkApplication +metadata: + name: tpcds-benchmark-1tb-r8g + namespace: spark-team-a + +spec: + batchScheduler: yunikorn + batchSchedulerOptions: + queue: root.default + type: Scala + mode: cluster + image: public.ecr.aws/data-on-eks/spark3.5.3-scala2.12-java17-python3-ubuntu-tpcds:v2 + imagePullPolicy: IfNotPresent + sparkVersion: 3.5.3 + mainClass: com.amazonaws.eks.tpcds.BenchmarkSQL + mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar + arguments: + # TPC-DS data location + - "s3a:///TPCDS-TEST-1TB" + # results location + - "s3a:///TPCDS-TEST-1T-RESULT" + # Path to kit in the docker image + - "/opt/tpcds-kit/tools" + # Data Format + - "parquet" + # Scale factor (in GB) + - "1000" # changed from 3000 to 100gb for demo + # Number of iterations + - "1" + # Optimize queries with hive tables + - "false" + # Filter queries, will run all if empty - "q98-v2.4,q99-v2.4,ss_max-v2.4,q95-v2.4" + - "" + # Logging set to WARN + - "true" + sparkConf: + "spark.network.timeout": "2000s" + "spark.executor.heartbeatInterval": "300s" + # AQE + "spark.sql.adaptive.enabled": "true" + "spark.sql.adaptive.localShuffleReader.enabled": "true" + "spark.sql.adaptive.coalescePartitions.enabled": "true" + "spark.sql.adaptive.skewJoin.enabled": "true" + "spark.kubernetes.executor.podNamePrefix": "benchmark-exec-r8g-" + # S3 Optimizations + # "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" # This is using AWS SDK V1 in maintenance mode + "spark.hadoop.fs.s3a.aws.credentials.provider.mapping": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider=software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" + "spark.hadoop.fs.s3a.aws.credentials.provider": "software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" # AWS SDK V2 https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html + "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem" + "spark.hadoop.fs.s3a.fast.upload": "true" + "spark.hadoop.fs.s3a.path.style.access": "true" + "spark.hadoop.fs.s3a.fast.upload.buffer": "disk" + "spark.hadoop.fs.s3a.buffer.dir": "/tmp/s3a" + "spark.hadoop.fs.s3a.multipart.size": "128M" # Good for large files + "spark.hadoop.fs.s3a.multipart.threshold": "256M" + "spark.hadoop.fs.s3a.threads.max": "50" + "spark.hadoop.fs.s3a.connection.maximum": "200" + + "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2" + "spark.executor.defaultJavaOptions": "-verbose:gc -XX:+UseParallelGC -XX:InitiatingHeapOccupancyPercent=70" + # "spark.hadoop.fs.s3a.readahead.range": "256K" + + # ----------------------------------------------------- + # This block is very critical when you get errors like + # Exception in thread \"main\" io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred + # Caused by: java.net.SocketTimeoutException: timeout + # spark.kubernetes.local.dirs.tmpfs: "true" # More details here https://spark.apache.org/docs/latest/running-on-kubernetes.html#using-ram-for-local-storage + spark.kubernetes.submission.connectionTimeout: "120000" # milliseconds + spark.kubernetes.submission.requestTimeout: "120000" + spark.kubernetes.driver.connectionTimeout: "120000" + spark.kubernetes.driver.requestTimeout: "120000" + # spark.kubernetes.allocation.batch.size: "20" # default 5 but adjust according to your cluster size + # ----------------------------------------------------- + # S3 Optimizations + "spark.hadoop.fs.s3a.multipart.size": "67108864" # 64 MB part size for S3 uploads + "spark.hadoop.fs.s3a.threads.max": "40" # Limit S3 threads for optimized throughput + "spark.hadoop.fs.s3a.connection.maximum": "100" # Set max connections for S3 + + # Data writing and shuffle tuning + "spark.shuffle.file.buffer": "1m" # Increase shuffle buffer for better disk I/O + "spark.reducer.maxSizeInFlight": "48m" # Increase reducer buffer size in-flight data + + # Optional: Tuning multipart upload threshold + "spark.hadoop.fs.s3a.multipart.purge": "true" # Automatically clear failed multipart uploads + "spark.hadoop.fs.s3a.multipart.threshold": "134217728" # 128 MB threshold to start multi-part upload + driver: + cores: 4 + coreLimit: "4.1" + memory: "5g" + memoryOverhead: "1g" + serviceAccount: spark-team-a + securityContext: + runAsUser: 185 + env: + - name: JAVA_HOME + value: "/opt/java/openjdk" + nodeSelector: + NodeGroupType: spark-graviton-benchmark-mng-r8g + executor: + cores: 4 + coreLimit: "4.3" + memory: "6g" + memoryOverhead: "2g" + # 8 executors per node + instances: 47 + serviceAccount: spark-team-a + securityContext: + runAsUser: 185 + env: + - name: JAVA_HOME + value: "/opt/java/openjdk" + nodeSelector: + NodeGroupType: spark-graviton-benchmark-mng-r8g + restartPolicy: + type: Never diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-3t.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml similarity index 86% rename from analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-3t.yaml rename to analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml index 0092b7ce4..212460ed4 100644 --- a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-3t.yaml +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml @@ -9,9 +9,9 @@ metadata: name: tpcds-data-generation-1tb namespace: spark-team-a spec: - # batchScheduler: yunikorn - # batchSchedulerOptions: - # queue: root.default + batchScheduler: yunikorn + batchSchedulerOptions: + queue: root.default type: Scala mode: cluster image: public.ecr.aws/data-on-eks/spark3.5.3-scala2.12-java17-python3-ubuntu-tpcds:v2 @@ -21,7 +21,7 @@ spec: mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar arguments: # TPC-DS data location - - "s3a:///TPCDS-TEST-1TB" + - "s3a:///TPCDS-TEST-1TB-YUNIKORN" # Path to kit in the docker image - "/opt/tpcds-kit/tools" # Data Format @@ -89,9 +89,12 @@ spec: restartPolicy: type: Never driver: - cores: 10 - coreLimit: "10.1" - memory: "10g" + cores: 11 + coreLimit: "11" + # The maximum memory size of the container to the running executor is determined by the sum of + # spark.executor.memoryoverHead, spark.executor.memory, spark.memory.offHeap.size, spark.executor.pyspark.memory + # memory: "15g" converts to 16896Mi MemoryOverhead added by the YuniKorn + memory: "15g" serviceAccount: spark-team-a securityContext: runAsUser: 185 @@ -112,9 +115,12 @@ spec: NodeGroupType: SparkComputeOptimized executor: cores: 11 - coreLimit: "11.1" - memory: "15g" - instances: 26 + coreLimit: "11" + # The maximum memory size of the container to the running executor is determined by the sum of + # spark.executor.memoryoverHead, spark.executor.memory, spark.memory.offHeap.size, spark.executor.pyspark.memory + # memory: "15g" converts to 16896Mi MemoryOverhead added by the YuniKorn + memory: "15g" + instances: 4 serviceAccount: spark-team-a securityContext: runAsUser: 185 diff --git a/analytics/terraform/spark-k8s-operator/helm-values/yunikorn-values.yaml b/analytics/terraform/spark-k8s-operator/helm-values/yunikorn-values.yaml index 079653a85..2f8174d61 100644 --- a/analytics/terraform/spark-k8s-operator/helm-values/yunikorn-values.yaml +++ b/analytics/terraform/spark-k8s-operator/helm-values/yunikorn-values.yaml @@ -1,99 +1,3 @@ - - -imagePullSecrets: -serviceAccount: yunikorn-admin - -image: - repository: apache/yunikorn - tag: scheduler-${image_version} - pullPolicy: Always - -pluginImage: - repository: apache/yunikorn - tag: scheduler-plugin-${image_version} - pullPolicy: Always - -nodeSelector: {} -tolerations: [] -affinity: {} - -configuration: null # deprecated; use queues.yaml in yunikornDefaults -operatorPlugins: null # deprecated; use service.operatorPlugins in yunikornDefaults -placeHolderImage: null # deprecated; use service.placeholderImage in yunikornDefaults - -admissionController: - image: - repository: apache/yunikorn - tag: admission-${image_version} - pullPolicy: Always - replicaCount: 1 - serviceAccount: yunikorn-admission-controller - hostNetwork: true - resources: - requests: - cpu: 200m - memory: 500Mi - limits: - cpu: 500m - memory: 500Mi - nodeSelector: {} - tolerations: [] - affinity: {} - service: - type: ClusterIP - processNamespaces: null # deprecated; use admissionController.processNamespaces in yunikornDefaults - bypassNamespaces: null # deprecated; use admissionController.bypassNamespaces in yunikornDefaults - labelNamespaces: null # deprecated; use admissionController.labelNamespaces in yunikornDefaults - noLabelNamespaces: null # deprecated; use admissionController.noLabelNamespaces in yunikornDefaults - -web: - image: - repository: apache/yunikorn - tag: web-${image_version} - pullPolicy: Always - resources: - requests: - memory: 500Mi - cpu: 500m - limits: - memory: 500Mi - cpu: 500m - -service: - type: ClusterIP - port: 9080 - portWeb: 9889 - -ingress: - enabled: false - ingressClassName: "" - annotations: {} - hosts: - - host: chart-example.local - paths: [] - pathType: Prefix - tls: [] - -resources: - requests: - cpu: 400m - memory: 2Gi - limits: - cpu: 4 - memory: 2Gi - - - -# When this flag is true, the admission controller will be installed along with the scheduler. -# When this flag is false, the admission controller will not be installed. -# Once the admission controller is installed, all traffic will be routing to yunikorn. -embedAdmissionController: true - -# When this flag is true, the scheduler will be deployed as Kubernetes scheduler plugin. -# When this flag is false, the scheduler will be deployed as a standalone scheduler. -enableSchedulerPlugin: false - - # Bootstrap configuration for YuniKorn - will be rendered into yunikorn-defaults ConfigMap. # Any valid options for YuniKorn may be specified here. # Use this link for more values -> https://yunikorn.apache.org/docs/user_guide/service_config/#yunikorn-configuration @@ -117,11 +21,11 @@ yunikornDefaults: - name: default resources: guaranteed: - memory: 100G - vcore: 10 + memory: 1000G + vcore: 1000 max: - memory: 100G - vcore: 10 + memory: 1000G + vcore: 1000 - name: prod resources: guaranteed: From 9d23413861b5d6175fcfc7110ae0df294180400e Mon Sep 17 00:00:00 2001 From: Vara Bonthu Date: Mon, 28 Oct 2024 20:20:54 -0700 Subject: [PATCH 5/7] YuniKorn gang scheduling now by default with Spark Operator so removed the config --- .../terraform/spark-k8s-operator/addons.tf | 5 +-- .../examples/benchmark/README.md | 2 +- .../benchmark/tpcds-benchmark-1t-c7gd.yaml | 4 +- .../benchmark/tpcds-benchmark-1t-r6g.yaml | 2 +- .../benchmark/tpcds-benchmark-1t-r8g.yaml | 2 +- .../tpcds-benchmark-data-generation-1t.yaml | 27 +++++++------ ...nvme-storage-yunikorn-gang-scheduling.yaml | 38 ++----------------- ...nvme-storage-yunikorn-gang-scheduling.yaml | 38 ++----------------- 8 files changed, 26 insertions(+), 92 deletions(-) diff --git a/analytics/terraform/spark-k8s-operator/addons.tf b/analytics/terraform/spark-k8s-operator/addons.tf index 90e4bb4e6..8e66c2da2 100644 --- a/analytics/terraform/spark-k8s-operator/addons.tf +++ b/analytics/terraform/spark-k8s-operator/addons.tf @@ -438,9 +438,8 @@ module "eks_data_addons" { #--------------------------------------------------------------- enable_yunikorn = var.enable_yunikorn yunikorn_helm_config = { - values = [templatefile("${path.module}/helm-values/yunikorn-values.yaml", { - image_version = "1.6.0" - })] + version = "1.6.0" + values = [templatefile("${path.module}/helm-values/yunikorn-values.yaml", {})] } #--------------------------------------------------------------- diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/README.md b/analytics/terraform/spark-k8s-operator/examples/benchmark/README.md index 8506dce99..b1f956e2d 100644 --- a/analytics/terraform/spark-k8s-operator/examples/benchmark/README.md +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/README.md @@ -34,4 +34,4 @@ After the job completes, navigate to the output path in your S3 bucket as specif One JSON file with the benchmark results One CSV file with the benchmark results -The output files contain detailed benchmarking data for analysis. \ No newline at end of file +The output files contain detailed benchmarking data for analysis. diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-c7gd.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-c7gd.yaml index b9cf37319..434b19931 100644 --- a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-c7gd.yaml +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-c7gd.yaml @@ -94,7 +94,7 @@ spec: "spark.hadoop.fs.s3a.multipart.threshold": "134217728" # 128 MB threshold to start multi-part upload driver: cores: 4 - coreLimit: "4.1" + coreLimit: "4.3" memory: "5g" memoryOverhead: "1g" serviceAccount: spark-team-a @@ -146,4 +146,4 @@ spec: - name: spark-local-dir-1 hostPath: path: "/mnt/k8s-disks/0" - type: DirectoryOrCreate \ No newline at end of file + type: DirectoryOrCreate diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml index 837c4a008..de48774b0 100644 --- a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r6g.yaml @@ -94,7 +94,7 @@ spec: "spark.hadoop.fs.s3a.multipart.threshold": "134217728" # 128 MB threshold to start multi-part upload driver: cores: 4 - coreLimit: "4.1" + coreLimit: "4.3" memory: "5g" memoryOverhead: "1g" serviceAccount: spark-team-a diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml index a7d0b2ee4..c339c385b 100644 --- a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-1t-r8g.yaml @@ -95,7 +95,7 @@ spec: "spark.hadoop.fs.s3a.multipart.threshold": "134217728" # 128 MB threshold to start multi-part upload driver: cores: 4 - coreLimit: "4.1" + coreLimit: "4.3" memory: "5g" memoryOverhead: "1g" serviceAccount: spark-team-a diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml index 212460ed4..52ec0810a 100644 --- a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml @@ -41,20 +41,23 @@ spec: "spark.driverEnv.JAVA_HOME": "/opt/java/openjdk" "spark.network.timeout": "2000s" "spark.executor.heartbeatInterval": "300s" - "spark.kubernetes.memoryOverheadFactor": "0.3" + # Bug: memoryOverheadFactor is not calculated correctly with Spark Operator when used with YuniKorn Gang Scheduling. Just use memoryOverhead for driver and executor instead. + # "spark.kubernetes.memoryOverheadFactor": "0.3" # AQE "spark.sql.adaptive.enabled": "true" "spark.sql.adaptive.localShuffleReader.enabled": "true" "spark.sql.adaptive.coalescePartitions.enabled": "true" "spark.sql.adaptive.skewJoin.enabled": "true" - # "spark.kubernetes.memoryOverheadFactor": "0.35" # Optional tweak for memory-intensive jobs "spark.sql.files.maxRecordsPerFile": "30000000" "spark.serializer": "org.apache.spark.serializer.KryoSerializer" + # S3 Optimizations - # "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" # This is using AWS SDK V1 in maintenance mode - "spark.hadoop.fs.s3a.aws.credentials.provider.mapping": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider=software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" - "spark.hadoop.fs.s3a.aws.credentials.provider": "software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" # AWS SDK V2 https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem" + # AWS SDK V1 is in maintenance mode hence commented and enabled AWS SDK V2 Class with S3 CRT support + # "spark.hadoop.fs.s3a.aws.credentials.provider": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider" + # AWS SDK V2 https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html + "spark.hadoop.fs.s3a.aws.credentials.provider.mapping": "com.amazonaws.auth.WebIdentityTokenCredentialsProvider=software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" + "spark.hadoop.fs.s3a.aws.credentials.provider": "software.amazon.awssdk.auth.credentials.WebIdentityTokenFileCredentialsProvider" "spark.hadoop.fs.s3a.fast.upload": "true" "spark.hadoop.fs.s3a.path.style.access": "true" "spark.hadoop.fs.s3a.fast.upload.buffer": "disk" @@ -66,26 +69,22 @@ spec: "spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2" "spark.kubernetes.executor.podNamePrefix": "oss-data-gen" "spark.sql.shuffle.partitions": "2000" # Adjust according to your job size - # "spark.hadoop.fs.s3a.committer.staging.conflict-mode": "append" # Data writing and shuffle tuning "spark.shuffle.file.buffer": "1m" "spark.reducer.maxSizeInFlight": "128m" # Java options for driver and executor - # Other existing configurations "spark.executor.defaultJavaOptions": "-verbose:gc -XX:+UseG1GC" "spark.driver.defaultJavaOptions": "-XX:+UseG1GC" # ----------------------------------------------------- # This block is very critical when you get errors like # Exception in thread \"main\" io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred # Caused by: java.net.SocketTimeoutException: timeout - # spark.kubernetes.local.dirs.tmpfs: "true" # Timeout settings for large data generation spark.kubernetes.submission.connectionTimeout: "60000000" spark.kubernetes.submission.requestTimeout: "60000000" spark.kubernetes.driver.connectionTimeout: "60000000" spark.kubernetes.driver.requestTimeout: "60000000" - # spark.kubernetes.allocation.batch.size: "20" # default 5 but adjust according to your cluster size restartPolicy: type: Never driver: @@ -93,8 +92,8 @@ spec: coreLimit: "11" # The maximum memory size of the container to the running executor is determined by the sum of # spark.executor.memoryoverHead, spark.executor.memory, spark.memory.offHeap.size, spark.executor.pyspark.memory - # memory: "15g" converts to 16896Mi MemoryOverhead added by the YuniKorn - memory: "15g" + memory: "15g" + memoryOverhead: "4g" serviceAccount: spark-team-a securityContext: runAsUser: 185 @@ -118,9 +117,9 @@ spec: coreLimit: "11" # The maximum memory size of the container to the running executor is determined by the sum of # spark.executor.memoryoverHead, spark.executor.memory, spark.memory.offHeap.size, spark.executor.pyspark.memory - # memory: "15g" converts to 16896Mi MemoryOverhead added by the YuniKorn - memory: "15g" - instances: 4 + memory: "15g" + memoryOverhead: "4g" + instances: 26 serviceAccount: spark-team-a securityContext: runAsUser: 185 diff --git a/analytics/terraform/spark-k8s-operator/examples/cluster-autoscaler/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml b/analytics/terraform/spark-k8s-operator/examples/cluster-autoscaler/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml index 1ac2012a4..d6e173a8b 100644 --- a/analytics/terraform/spark-k8s-operator/examples/cluster-autoscaler/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml +++ b/analytics/terraform/spark-k8s-operator/examples/cluster-autoscaler/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml @@ -9,12 +9,10 @@ kind: SparkApplication metadata: name: "taxi-trip" namespace: spark-team-a - labels: - app: "taxi-trip" - applicationId: "taxi-trip-yunikorn" - # Assign the job to a Yunikorn Queue via label. - queue: root.test spec: + batchScheduler: yunikorn + batchSchedulerOptions: + queue: root.default # To create Ingress object for Spark driver. # Ensure Spark Operator Helm Chart deployed with Ingress enabled to use this feature # sparkUIOptions: @@ -88,36 +86,6 @@ spec: serviceAccount: spark-team-a labels: version: 3.2.1 - annotations: - yunikorn.apache.org/schedulingPolicyParameters: "placeholderTimeoutSeconds=30 gangSchedulingStyle=Hard" - yunikorn.apache.org/task-group-name: "spark-driver" - # minMember should match with driver and executor instances - # minResource cpu and memory should match with driver and executor cpu and memory - yunikorn.apache.org/task-groups: |- - [{ - "name": "spark-driver", - "minMember": 1, - "minResource": { - "cpu": "1200m", - "memory": "14Gi" - }, - "nodeSelector": { - "NodeGroupType": "spark-on-demand-ca" - }, - "tolerations": [{"key": "spark-on-demand-ca", "operator": "Exists", "effect": "NoSchedule"}] - }, - { - "name": "spark-executor", - "minMember": 4, - "minResource": { - "cpu": "1200m", - "memory": "14Gi" - }, - "nodeSelector": { - "NodeGroupType": "spark-spot-ca" - }, - "tolerations": [{"key": "spark-spot-ca", "operator": "Exists", "effect": "NoSchedule"}] - }] # the r5d instances that Karpenter will launch will have the NVMe storage preformatted and available to the pod # we do not need to leverage a hostPath mount or volume to leverage that storage. # ephemeral-storage requests and limits can be used to manage the storage utilization diff --git a/analytics/terraform/spark-k8s-operator/examples/karpenter/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml b/analytics/terraform/spark-k8s-operator/examples/karpenter/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml index 20243db25..268a2665d 100644 --- a/analytics/terraform/spark-k8s-operator/examples/karpenter/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml +++ b/analytics/terraform/spark-k8s-operator/examples/karpenter/nvme-yunikorn-gang-scheduling/nvme-storage-yunikorn-gang-scheduling.yaml @@ -9,12 +9,10 @@ kind: SparkApplication metadata: name: "taxi-trip" namespace: spark-team-a - labels: - app: "taxi-trip" - applicationId: "taxi-trip-yunikorn" - # Assign the job to a Yunikorn Queue via label. - queue: root.test spec: + batchScheduler: yunikorn + batchSchedulerOptions: + queue: root.default # To create Ingress object for Spark driver. # Ensure Spark Operator Helm Chart deployed with Ingress enabled to use this feature # sparkUIOptions: @@ -98,36 +96,6 @@ spec: effect: "NoSchedule" labels: version: 3.2.1 - annotations: - yunikorn.apache.org/schedulingPolicyParameters: "placeholderTimeoutSeconds=30 gangSchedulingStyle=Hard" - yunikorn.apache.org/task-group-name: "spark-driver" - # minMember should match with driver and executor instances - # minResource cpu and memory should match with driver and executor cpu and memory - yunikorn.apache.org/task-groups: |- - [{ - "name": "spark-driver", - "minMember": 1, - "minResource": { - "cpu": "1200m", - "memory": "14Gi" - }, - "nodeSelector": { - "NodeGroupType": "SparkComputeOptimized" - }, - "tolerations": [{"key": "spark-compute-optimized", "operator": "Exists", "effect": "NoSchedule"}] - }, - { - "name": "spark-executor", - "minMember": 4, - "minResource": { - "cpu": "1200m", - "memory": "14Gi" - }, - "nodeSelector": { - "NodeGroupType": "SparkComputeOptimized" - }, - "tolerations": [{"key": "spark-compute-optimized", "operator": "Exists", "effect": "NoSchedule"}] - }] executor: cores: 1 coreLimit: "1200m" From 5d0765be4be4798afd92464022ad7dcf928effa5 Mon Sep 17 00:00:00 2001 From: Vara Bonthu Date: Mon, 28 Oct 2024 20:22:38 -0700 Subject: [PATCH 6/7] INput table fixed --- .../benchmark/tpcds-benchmark-data-generation-1t.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml index 52ec0810a..e12a0bad9 100644 --- a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml @@ -9,9 +9,9 @@ metadata: name: tpcds-data-generation-1tb namespace: spark-team-a spec: - batchScheduler: yunikorn - batchSchedulerOptions: - queue: root.default + # batchScheduler: yunikorn + # batchSchedulerOptions: + # queue: root.default type: Scala mode: cluster image: public.ecr.aws/data-on-eks/spark3.5.3-scala2.12-java17-python3-ubuntu-tpcds:v2 @@ -21,7 +21,7 @@ spec: mainApplicationFile: local:///opt/spark/examples/jars/eks-spark-benchmark-assembly-1.0.jar arguments: # TPC-DS data location - - "s3a:///TPCDS-TEST-1TB-YUNIKORN" + - "s3a:///TPCDS-TEST-1TB" # Path to kit in the docker image - "/opt/tpcds-kit/tools" # Data Format From 422dbb433dc6c6948efa4f2b59f224f3e9893251 Mon Sep 17 00:00:00 2001 From: Vara Bonthu Date: Mon, 28 Oct 2024 22:16:09 -0700 Subject: [PATCH 7/7] minor fixes to the manifest --- .../benchmark/tpcds-benchmark-data-generation-1t.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml index e12a0bad9..7b855ccab 100644 --- a/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml +++ b/analytics/terraform/spark-k8s-operator/examples/benchmark/tpcds-benchmark-data-generation-1t.yaml @@ -9,9 +9,9 @@ metadata: name: tpcds-data-generation-1tb namespace: spark-team-a spec: - # batchScheduler: yunikorn - # batchSchedulerOptions: - # queue: root.default + batchScheduler: yunikorn + batchSchedulerOptions: + queue: root.default type: Scala mode: cluster image: public.ecr.aws/data-on-eks/spark3.5.3-scala2.12-java17-python3-ubuntu-tpcds:v2 @@ -89,7 +89,6 @@ spec: type: Never driver: cores: 11 - coreLimit: "11" # The maximum memory size of the container to the running executor is determined by the sum of # spark.executor.memoryoverHead, spark.executor.memory, spark.memory.offHeap.size, spark.executor.pyspark.memory memory: "15g" @@ -114,7 +113,6 @@ spec: NodeGroupType: SparkComputeOptimized executor: cores: 11 - coreLimit: "11" # The maximum memory size of the container to the running executor is determined by the sum of # spark.executor.memoryoverHead, spark.executor.memory, spark.memory.offHeap.size, spark.executor.pyspark.memory memory: "15g"