Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Flink blueprint upgrade to latest Karpenter #439

Merged
merged 1 commit into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions streaming/flink/addons.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ module "ebs_csi_driver_irsa" {
#---------------------------------------------------------------
module "eks_blueprints_addons" {
source = "aws-ia/eks-blueprints-addons/aws"
version = "~> 1.2"
version = "~> 1.2" # change this to version = 1.2.2 for oldder version of Karpenter deployment

cluster_name = module.eks.cluster_name
cluster_endpoint = module.eks.cluster_endpoint
Expand Down Expand Up @@ -75,14 +75,14 @@ module "eks_blueprints_addons" {
enable_karpenter = true
karpenter_enable_spot_termination = true
karpenter_node = {
create_iam_role = true
iam_role_use_name_prefix = false
# We are defining role name so that we can add this to aws-auth during EKS Cluster creation
iam_role_name = local.karpenter_iam_role_name
iam_role_name = "${local.name}-karpenter-node"
iam_role_additional_policies = {
AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
}
}

karpenter = {
timeout = "300"
chart_version = "v0.33.1"
repository_username = data.aws_ecrpublic_authorization_token.token.user_name
repository_password = data.aws_ecrpublic_authorization_token.token.password
}
Expand Down
21 changes: 4 additions & 17 deletions streaming/flink/examples/karpenter/flink-sample-job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ metadata:
name: basic-example
namespace: flink-team-a-ns
spec:
image: flink:1.16
flinkVersion: v1_16
image: flink:1.17
flinkVersion: v1_17
flinkConfiguration:
taskmanager.numberOfTaskSlots: "2"
state.savepoints.dir: file:///flink/data/checkpoint/savepoints
Expand All @@ -30,18 +30,7 @@ spec:
name: pod-template
spec:
nodeSelector:
NodeGroupType: "flink-compute-optimized"
tolerations:
- key: "flink-compute-optimized"
operator: "Exists"
effect: "NoSchedule"
initContainers:
- name: flink-ssd-volume-permissions
image: public.ecr.aws/y4g4v0z7/busybox
command: [ 'sh', '-c', 'chown -R 9999 /local1' ]
volumeMounts:
- mountPath: /local1
name: flink-ssd-volume
NodeGroupType: "FlinkComputeOptimized"
containers:
# Do not change the main container name
- name: flink-main-container
Expand All @@ -54,9 +43,7 @@ spec:
name: flink-ssd-volume
volumes:
- name: flink-ssd-volume
hostPath:
path: /local1
type: Directory

jobManager:
resource:
memory: "2048m"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,108 +1,111 @@
apiVersion: karpenter.sh/v1alpha5
kind: Provisioner
---
apiVersion: karpenter.sh/v1beta1
kind: NodePool # Previously kind: Provisioner
metadata:
name: flink-compute-optimized
namespace: karpenter # Same namespace as Karpenter add-on installed
spec:
kubeletConfiguration:
containerRuntime: containerd
# podsPerCore: 2
# maxPods: 20
requirements:
- key: "topology.kubernetes.io/zone"
operator: In
values: [${azs}a] # Update the correct region and zones
- key: "karpenter.sh/capacity-type"
operator: In
values: ["spot", "on-demand"]
- key: "node.kubernetes.io/instance-type" #If not included, all instance types are considered
operator: In
values: ["c5d.large","c5d.xlarge","c5d.2xlarge","c5d.4xlarge","c5d.9xlarge"] # 1 NVMe disk
- key: "kubernetes.io/arch"
operator: In
values: ["amd64"]
template:
metadata:
labels:
type: karpenter
provisioner: flink-compute-optimized
NodeGroupType: FlinkComputeOptimized
spec:
nodeClassRef:
name: flink-compute-optimized
requirements:
- key: "topology.kubernetes.io/zone"
operator: In
values: [${azs}a] #Update the correct region and zones
- key: "karpenter.sh/capacity-type"
operator: In
values: ["spot", "on-demand"]
- key: "kubernetes.io/arch"
operator: In
values: ["amd64"]
- key: "karpenter.k8s.aws/instance-category"
operator: In
values: ["c"]
- key: "karpenter.k8s.aws/instance-family"
operator: In
values: ["c5d"]
- key: "karpenter.k8s.aws/instance-cpu"
operator: In
values: ["4", "8", "16", "36"]
- key: "karpenter.k8s.aws/instance-hypervisor"
operator: In
values: ["nitro"]
- key: "karpenter.k8s.aws/instance-generation"
operator: Gt
values: ["2"]
limits:
resources:
cpu: 1000
providerRef:
name: flink-compute-optimized
labels:
type: karpenter
provisioner: flink-compute-optimized
NodeGroupType: flink-compute-optimized
taints:
- key: flink-compute-optimized
value: 'true'
effect: NoSchedule
ttlSecondsAfterEmpty: 120 # optional, but never scales down if not set
cpu: 1000
disruption:
# Describes which types of Nodes Karpenter should consider for consolidation
# If using 'WhenUnderutilized', Karpenter will consider all nodes for consolidation and attempt to remove or replace Nodes when it discovers that the Node is underutilized and could be changed to reduce cost
# If using `WhenEmpty`, Karpenter will only consider nodes for consolidation that contain no workload pods
consolidationPolicy: WhenEmpty
# The amount of time Karpenter should wait after discovering a consolidation decision
# This value can currently only be set when the consolidationPolicy is 'WhenEmpty'
# You can choose to disable consolidation entirely by setting the string value 'Never' here
consolidateAfter: 30s
# The amount of time a Node can live on the cluster before being removed
# Avoiding long-running Nodes helps to reduce security vulnerabilities as well as to reduce the chance of issues that can plague Nodes with long uptimes such as file fragmentation or memory leaks from system processes
# You can choose to disable expiration entirely by setting the string value 'Never' here
expireAfter: 720h

# Priority given to the NodePool when the scheduler considers which NodePool
# to select. Higher weights indicate higher priority when comparing NodePools.
# Specifying no weight is equivalent to specifying a weight of 0.
weight: 10



# NOTE: Multiple NodePools may point to the same EC2NodeClass.
---
apiVersion: karpenter.k8s.aws/v1alpha1
kind: AWSNodeTemplate
apiVersion: karpenter.k8s.aws/v1beta1
kind: EC2NodeClass # Previously kind: AWSNodeTemplate
metadata:
name: flink-compute-optimized
namespace: karpenter
spec:
amiFamily: AL2
blockDeviceMappings:
- deviceName: /dev/xvda
ebs:
volumeSize: 100Gi
volumeSize: 50Gi
volumeType: gp3
encrypted: true
deleteOnTermination: true
metadataOptions:
httpEndpoint: enabled
httpProtocolIPv6: disabled
httpPutResponseHopLimit: 2
httpTokens: required
subnetSelector:
Name: "${eks_cluster_id}-private*" # Name of the Subnets to spin up the nodes
securityGroupSelector: # required, when not using launchTemplate
Name: "${eks_cluster_id}-node*" # name of the SecurityGroup to be used with Nodes
# instanceProfile: "" # optional, if already set in controller args
#RAID0 config example
role: "${eks_cluster_id}-karpenter-node"
subnetSelectorTerms:
- tags: # Update the correct region and zones
Name: "${eks_cluster_id}-private*"
securityGroupSelectorTerms:
- name: "${eks_cluster_id}-node*"
userData: |
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="BOUNDARY"

--BOUNDARY
Content-Type: text/x-shellscript; charset="us-ascii"

#!/bin/bash
echo "Running a custom user data script"
set -ex
yum install mdadm -y
cat <<-EOF > /etc/profile.d/bootstrap.sh
#!/bin/sh

DEVICES=$(lsblk -o NAME,TYPE -dsn | awk '/disk/ {print $1}')

DISK_ARRAY=()
# Configure the NVMe volumes in RAID0 configuration in the bootstrap.sh call.
# https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh#L35
# This will create a RAID volume and mount it at /mnt/k8s-disks/0
# then mount that volume to /var/lib/kubelet, /var/lib/containerd, and /var/log/pods
# this allows the container daemons and pods to write to the RAID0 by default without needing PersistentVolumes
export LOCAL_DISKS='raid0'
EOF

for DEV in $DEVICES
do
DISK_ARRAY+=("/dev/$${DEV}")
done

DISK_COUNT=$${#DISK_ARRAY[@]}

if [ $${DISK_COUNT} -eq 0 ]; then
echo "No SSD disks available. No further action needed."
else
if [ $${DISK_COUNT} -eq 1 ]; then
TARGET_DEV=$${DISK_ARRAY[0]}
mkfs.xfs $${TARGET_DEV}
else
mdadm --create --verbose /dev/md0 --level=0 --raid-devices=$${DISK_COUNT} $${DISK_ARRAY[@]}
mkfs.xfs /dev/md0
TARGET_DEV=/dev/md0
fi

mkdir -p /local1
echo $${TARGET_DEV} /local1 xfs defaults,noatime 1 2 >> /etc/fstab
mount -a
/usr/bin/chown -hR +999:+1000 /local1
fi
# Source extra environment variables in bootstrap script
sed -i '/^set -o errexit/a\\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh

--BOUNDARY--

tags:
InstanceType: "flink-compute-optimized" # optional, add tags for your own use
InstanceType: "flink-compute-optimized" # optional, add tags for your own use
Loading
Loading