Move repetative paragraphs to partial includes

awslabs · Apr 2, 2024 · 2519999 · 2519999
1 parent 71d7c01
commit 2519999
Show file tree

Hide file tree

Showing 4 changed files with 184 additions and 198 deletions.
diff --git a/website/docs/blueprints/data-analytics/_compute_optimized_nodepool.md b/website/docs/blueprints/data-analytics/_compute_optimized_nodepool.md
@@ -0,0 +1,141 @@
+```yaml
+  # spark-compute-optimized
+    name: spark-compute-optimized
+    clusterName: ${module.eks.cluster_name}
+    ec2NodeClass:
+      karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
+      subnetSelectorTerms:
+        tags:
+          Name: "${module.eks.cluster_name}-private*"
+      securityGroupSelectorTerms:
+        tags:
+          Name: ${module.eks.cluster_name}-node
+      userData: |
+        MIME-Version: 1.0
+        Content-Type: multipart/mixed; boundary="BOUNDARY"
+
+        --BOUNDARY
+        Content-Type: text/x-shellscript; charset="us-ascii"
+
+        cat <<-EOF > /etc/profile.d/bootstrap.sh
+        #!/bin/sh
+
+
+        # Configure the NVMe volumes in RAID0 configuration in the bootstrap.sh call.
+        # https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh#L35
+        # This will create a RAID volume and mount it at /mnt/k8s-disks/0
+        #   then mount that volume to /var/lib/kubelet, /var/lib/containerd, and /var/log/pods
+        #   this allows the container daemons and pods to write to the RAID0 by default without needing PersistentVolumes
+        export LOCAL_DISKS='raid0'
+        EOF
+
+        # Source extra environment variables in bootstrap script
+        sed -i '/^set -o errexit/a\\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh
+
+        --BOUNDARY--
+
+    nodePool:
+      labels:
+        - type: karpenter
+        - NodeGroupType: SparkComputeOptimized
+        - multiArch: Spark
+      requirements:
+        - key: "karpenter.sh/capacity-type"
+          operator: In
+          values: ["spot", "on-demand"]
+        - key: "kubernetes.io/arch"
+          operator: In
+          values: ["amd64"]
+        - key: "karpenter.k8s.aws/instance-category"
+          operator: In
+          values: ["c"]
+        - key: "karpenter.k8s.aws/instance-family"
+          operator: In
+          values: ["c5d"]
+        - key: "karpenter.k8s.aws/instance-cpu"
+          operator: In
+          values: ["4", "8", "16", "36"]
+        - key: "karpenter.k8s.aws/instance-hypervisor"
+          operator: In
+          values: ["nitro"]
+        - key: "karpenter.k8s.aws/instance-generation"
+          operator: Gt
+          values: ["2"]
+      limits:
+        cpu: 20 # Change this to 1000 or more for production according to your needs
+      disruption:
+        consolidationPolicy: WhenEmpty
+        consolidateAfter: 30s
+        expireAfter: 720h
+      weight: 100
+
+    # spark-graviton-memory-optimized Nodepool
+
+    name: spark-graviton-memory-optimized
+    clusterName: ${module.eks.cluster_name}
+    ec2NodeClass:
+      karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
+      subnetSelectorTerms:
+        tags:
+          Name: "${module.eks.cluster_name}-private*"
+      securityGroupSelectorTerms:
+        tags:
+          Name: ${module.eks.cluster_name}-node
+      userData: |
+        MIME-Version: 1.0
+        Content-Type: multipart/mixed; boundary="BOUNDARY"
+
+        --BOUNDARY
+        Content-Type: text/x-shellscript; charset="us-ascii"
+
+        cat <<-EOF > /etc/profile.d/bootstrap.sh
+        #!/bin/sh
+
+
+        # Configure the NVMe volumes in RAID0 configuration in the bootstrap.sh call.
+        # https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh#L35
+        # This will create a RAID volume and mount it at /mnt/k8s-disks/0
+        #   then mount that volume to /var/lib/kubelet, /var/lib/containerd, and /var/log/pods
+        #   this allows the container daemons and pods to write to the RAID0 by default without needing PersistentVolumes
+        export LOCAL_DISKS='raid0'
+        EOF
+
+        # Source extra environment variables in bootstrap script
+        sed -i '/^set -o errexit/a\\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh
+
+        --BOUNDARY--
+    nodePool:
+      labels:
+        - type: karpenter
+        - NodeGroupType: SparkGravitonMemoryOptimized
+        - multiArch: Spark
+      requirements:
+        - key: "karpenter.sh/capacity-type"
+          operator: In
+          values: ["spot", "on-demand"]
+        - key: "kubernetes.io/arch"
+          operator: In
+          values: ["arm64"]
+        - key: "karpenter.k8s.aws/instance-category"
+          operator: In
+          values: ["r"]
+        - key: "karpenter.k8s.aws/instance-family"
+          operator: In
+          values: ["r6gd"]
+        - key: "karpenter.k8s.aws/instance-cpu"
+          operator: In
+          values: ["4", "8", "16", "32"]
+        - key: "karpenter.k8s.aws/instance-hypervisor"
+          operator: In
+          values: ["nitro"]
+        - key: "karpenter.k8s.aws/instance-generation"
+          operator: Gt
+          values: ["2"]
+      limits:
+        cpu: 1000
+      disruption:
+        consolidationPolicy: WhenEmpty
+        consolidateAfter: 30s
+        expireAfter: 720h
+      weight: 50
+```
diff --git a/website/docs/blueprints/data-analytics/_replace_s3_bucket_placeholders.mdx b/website/docs/blueprints/data-analytics/_replace_s3_bucket_placeholders.mdx
@@ -0,0 +1,6 @@
+Once our sample data is uploaded you can run the Spark job. You will need to
+replace the *\<S3_BUCKET\>* placeholders in this file with the name of the bucket
+created earlier. You can get that value by running `echo $S3_BUCKET`.
+
+To do this automatically you can run the following, which will create a .old
+backup file and do the replacement for you.
diff --git a/website/docs/blueprints/data-analytics/_taxi_trip_exec.md b/website/docs/blueprints/data-analytics/_taxi_trip_exec.md
@@ -0,0 +1,9 @@
+Run the *taxi-trip-execute.sh* script with the following input. You will use the *S3_BUCKET* variable created earlier. Additionally, you must change YOUR_REGION_HERE with the region of your choice, *us-west-2* for example.
+
+This script will download some example taxi trip data and create duplicates of
+it in order to increase the size a bit. This will take a bit of time and will
+require a relatively fast internet connection.
+
+```bash
+./taxi-trip-execute.sh ${S3_BUCKET} YOUR_REGION_HERE
+```