Skip to content

Commit

Permalink
chore: Argo Workflow blueprint example updated (#364)
Browse files Browse the repository at this point in the history
* chore: Refactor blueprint, updated website

* chore: Refactor blueprint, updated website

* chore: Refactor blueprint, updated website

* chore: Add newline end of the file

* chore: Update the aws region

* chore: Update typos on website

* chore: Update example config

* chore: Update example config
  • Loading branch information
ovaleanu authored Nov 14, 2023
1 parent 0ecfd45 commit 37df993
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 18 deletions.
2 changes: 1 addition & 1 deletion schedulers/terraform/argo-workflow/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
| <a name="module_amp_ingest_irsa"></a> [amp\_ingest\_irsa](#module\_amp\_ingest\_irsa) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
| <a name="module_ebs_csi_driver_irsa"></a> [ebs\_csi\_driver\_irsa](#module\_ebs\_csi\_driver\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 5.20 |
| <a name="module_eks"></a> [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.15 |
| <a name="module_eks_blueprints_addons"></a> [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 |
| <a name="module_eks_blueprints_addons"></a> [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | 1.9.2 |
| <a name="module_eks_data_addons"></a> [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | ~> 1.0 |
| <a name="module_irsa_argo_events"></a> [irsa\_argo\_events](#module\_irsa\_argo\_events) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
| <a name="module_s3_bucket"></a> [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 3.0 |
Expand Down
3 changes: 2 additions & 1 deletion schedulers/terraform/argo-workflow/addons.tf
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ module "ebs_csi_driver_irsa" {
#---------------------------------------------------------------
module "eks_blueprints_addons" {
source = "aws-ia/eks-blueprints-addons/aws"
version = "~> 1.2"
version = "1.9.2"


cluster_name = module.eks.cluster_name
cluster_endpoint = module.eks.cluster_endpoint
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ spec:
template:
serviceAccountName: event-sa
sqs:
example:
sqs-spark-workflow:
# jsonBody specifies that all event body payload coming from this
# source will be JSON
jsonBody: true
Expand Down
6 changes: 4 additions & 2 deletions schedulers/terraform/argo-workflow/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,13 @@ module "eks" {

labels = {
WorkerType = "ON_DEMAND"
NodeGroupType = "core"
NodeGroupType = "core-nodes"
}

tags = {
Name = "core-node-grp"
Name = "core-node-group"
WorkerType = "ON_DEMAND"
NodeGroupType = "core-nodes"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ spec:
version: 3.1.1
# Using Karpenter provisioner nodeSelectors and tolerations
nodeSelector:
provisioner: spark-compute-optimized
NodeGroupType: SparkComputeOptimized
karpenter.sh/capacity-type: "spot"
tolerations:
- key: "spark-compute-optimized"
operator: "Exists"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ spec:
dependencies:
- name: test-dep
eventSourceName: aws-sqs
eventName: example
eventName: sqs-spark-workflow
triggers:
- template:
name: sqs-spark-workflow
Expand Down Expand Up @@ -58,7 +58,7 @@ spec:
template: whalesay
arguments:
parameters: [{name: message, value: "spark-done!"}]
- name: spark-operator-taxi-job
- - name: spark-operator-taxi-job
template: sparkapp-operator-taxi
- name: whalesay
inputs:
Expand Down Expand Up @@ -132,7 +132,8 @@ spec:
"memory": "14Gi"
},
"nodeSelector": {
"NodeGroupType": "SparkComputeOptimized"
"NodeGroupType": "SparkComputeOptimized",
"karpenter.sh/capacity-type": "spot"
},
"tolerations": [{"key": "spark-compute-optimized", "operator": "Exists", "effect": "NoSchedule"}]
}]
Expand All @@ -148,6 +149,7 @@ spec:
serviceAccount: spark-team-a
nodeSelector:
NodeGroupType: "SparkComputeOptimized"
karpenter.sh/capacity-type: "spot"
tolerations:
- key: "spark-compute-optimized"
operator: "Exists"
Expand All @@ -169,10 +171,6 @@ spec:
metadata:
generateName: event-wf-sparkapp-taxi-yunikorn-
namespace: spark-team-a
labels:
app: "taxi-trip-yunikorn"
applicationId: "taxi-trip-yunikorn"
queue: root.test
spec:
type: Python
sparkVersion: "3.2.1"
Expand All @@ -189,9 +187,6 @@ spec:
"fs.s3a.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem"
"mapreduce.fileoutputcommitter.algorithm.version": "2"
sparkConf:
"spark.app.name": "taxi-trip"
"spark.kubernetes.driver.pod.name": "taxi-trip"
"spark.kubernetes.executor.podNamePrefix": "taxi-trip"
"spark.local.dir": "/data1"
"spark.speculation": "false"
"spark.network.timeout": "2400"
Expand Down Expand Up @@ -285,7 +280,8 @@ spec:
"memory": "14Gi"
},
"nodeSelector": {
"NodeGroupType": "SparkComputeOptimized"
"NodeGroupType": "SparkComputeOptimized",
"karpenter.sh/capacity-type": "spot"
},
"tolerations": [{"key": "spark-compute-optimized", "operator": "Exists", "effect": "NoSchedule"}]
}]
Expand Down Expand Up @@ -315,6 +311,7 @@ spec:
yunikorn.apache.org/task-group-name: "spark-executor"
nodeSelector:
NodeGroupType: "SparkComputeOptimized"
karpenter.sh/capacity-type: "spot"
tolerations:
- key: "spark-compute-optimized"
operator: "Exists"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ aws s3 cp pyspark-taxi-trip.py s3://${s3_bucket}/taxi-trip/scripts/ --region ${r
wget https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2022-01.parquet -O "input/yellow_tripdata_2022-0.parquet"

# Making duplicate copies to increase the size of the data.
max=200
max=100
for (( i=1; i <= $max; ++i ))
do
cp -rf "input/yellow_tripdata_2022-0.parquet" "input/yellow_tripdata_2022-${i}.parquet"
Expand Down

0 comments on commit 37df993

Please sign in to comment.