From 461b27a194b465bcfecb096be2946d347e92a5ae Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Fri, 15 Mar 2024 13:01:50 -0700
Subject: [PATCH 01/25] EMR EKS Flnk blue rpint

---
 streaming/emr-eks-flink/addons.tf             | 174 +++++++++++
 .../basic-example-app-cluster.yaml            |  52 ++++
 streaming/emr-eks-flink/data.tf               |  81 ++++++
 streaming/emr-eks-flink/flink-team.tf         | 136 +++++++++
 .../aws-cloudwatch-metrics-values.yaml        |  11 +
 .../helm-values/aws-for-fluentbit-values.yaml | 102 +++++++
 .../cluster-autoscaler-values.yaml            |  25 ++
 .../helm-values/metrics-server-values.yaml    |  52 ++++
 .../emr-eks-flink/helm-values/stock.json      |   5 +
 .../flink-compute-optimized-provisioner.yaml  | 111 +++++++
 streaming/emr-eks-flink/locals.tf             |  14 +
 streaming/emr-eks-flink/main.tf               | 273 ++++++++++++++++++
 streaming/emr-eks-flink/outputs.tf            |   9 +
 streaming/emr-eks-flink/providers.tf          |  18 ++
 streaming/emr-eks-flink/readme.md             |  17 ++
 streaming/emr-eks-flink/variables.tf          |  10 +
 streaming/emr-eks-flink/versions.tf           |  30 ++
 17 files changed, 1120 insertions(+)
 create mode 100644 streaming/emr-eks-flink/addons.tf
 create mode 100644 streaming/emr-eks-flink/basic-example-app-cluster.yaml
 create mode 100644 streaming/emr-eks-flink/data.tf
 create mode 100644 streaming/emr-eks-flink/flink-team.tf
 create mode 100644 streaming/emr-eks-flink/helm-values/aws-cloudwatch-metrics-values.yaml
 create mode 100644 streaming/emr-eks-flink/helm-values/aws-for-fluentbit-values.yaml
 create mode 100644 streaming/emr-eks-flink/helm-values/cluster-autoscaler-values.yaml
 create mode 100644 streaming/emr-eks-flink/helm-values/metrics-server-values.yaml
 create mode 100644 streaming/emr-eks-flink/helm-values/stock.json
 create mode 100644 streaming/emr-eks-flink/karpenter-provisioners/flink-compute-optimized-provisioner.yaml
 create mode 100644 streaming/emr-eks-flink/locals.tf
 create mode 100644 streaming/emr-eks-flink/main.tf
 create mode 100644 streaming/emr-eks-flink/outputs.tf
 create mode 100644 streaming/emr-eks-flink/providers.tf
 create mode 100644 streaming/emr-eks-flink/readme.md
 create mode 100644 streaming/emr-eks-flink/variables.tf
 create mode 100644 streaming/emr-eks-flink/versions.tf

diff --git a/streaming/emr-eks-flink/addons.tf b/streaming/emr-eks-flink/addons.tf
new file mode 100644
index 000000000..60bcdc873
--- /dev/null
+++ b/streaming/emr-eks-flink/addons.tf
@@ -0,0 +1,174 @@
+module "s3_bucket" {
+  source  = "terraform-aws-modules/s3-bucket/aws"
+  version = "~> 3.0"
+
+  bucket_prefix = "${local.name}-flink-logs-"
+
+  # For example only - please evaluate for your environment
+  force_destroy = true
+
+  attach_deny_insecure_transport_policy = true
+  attach_require_latest_tls_policy      = true
+
+  block_public_acls       = true
+  block_public_policy     = true
+  ignore_public_acls      = true
+  restrict_public_buckets = true
+
+  server_side_encryption_configuration = {
+    rule = {
+      apply_server_side_encryption_by_default = {
+        sse_algorithm = "AES256"
+      }
+    }
+  }
+
+  tags = local.tags
+}
+#---------------------------------------------------------------
+# IRSA for EBS CSI Driver
+#---------------------------------------------------------------
+module "ebs_csi_driver_irsa" {
+  source                = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"
+  version               = "~> 5.20"
+  role_name_prefix      = format("%s-%s", local.name, "ebs-csi-driver")
+  attach_ebs_csi_policy = true
+  oidc_providers = {
+    main = {
+      provider_arn               = module.eks.oidc_provider_arn
+      namespace_service_accounts = ["kube-system:ebs-csi-controller-sa"]
+    }
+  }
+  tags = local.tags
+}
+#---------------------------------------------------------------
+# EKS Blueprints Kubernetes Addons
+#---------------------------------------------------------------
+module "eks_blueprints_addons" {
+  source  = "aws-ia/eks-blueprints-addons/aws"
+  version = "~> 1.2"
+
+  cluster_name      = module.eks.cluster_name
+  cluster_endpoint  = module.eks.cluster_endpoint
+  cluster_version   = module.eks.cluster_version
+  oidc_provider_arn = module.eks.oidc_provider_arn
+
+  #---------------------------------------
+  # Amazon EKS Managed Add-ons
+  #---------------------------------------
+  eks_addons = {
+    aws-ebs-csi-driver = {
+      service_account_role_arn = module.ebs_csi_driver_irsa.iam_role_arn
+    }
+    coredns = {
+      preserve = true
+    }
+    vpc-cni = {
+      preserve = true
+    }
+    kube-proxy = {
+      preserve = true
+    }
+  }
+  # install cert-manager
+  enable_cert_manager = true
+  cert_manager = {
+    set_values = [
+      {
+        name  = "extraArgs[0]"
+        value = "--enable-certificate-owner-ref=false"
+      },
+    ]
+  }
+  #---------------------------------------
+  # Metrics Server
+  #---------------------------------------
+  enable_metrics_server = true
+  metrics_server = {
+    values = [templatefile("${path.module}/helm-values/metrics-server-values.yaml", {})]
+  }
+
+  #---------------------------------------
+  # Cluster Autoscaler
+  #---------------------------------------
+  enable_cluster_autoscaler = true
+  cluster_autoscaler = {
+    create_role = true
+    values = [templatefile("${path.module}/helm-values/cluster-autoscaler-values.yaml", {
+      aws_region     = var.region,
+      eks_cluster_id = module.eks.cluster_name
+    })]
+  }
+  #---------------------------------------
+  # Karpenter Autoscaler for EKS Cluster
+  #---------------------------------------
+  enable_karpenter                  = true
+  karpenter_enable_spot_termination = true
+  karpenter = {
+    repository_username = data.aws_ecrpublic_authorization_token.token.user_name
+    repository_password = data.aws_ecrpublic_authorization_token.token.password
+  }
+  karpenter_node = {
+    iam_role_additional_policies = {
+      AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
+    }
+  }
+
+  #---------------------------------------
+  # CloudWatch metrics for EKS
+  #---------------------------------------
+  enable_aws_cloudwatch_metrics = true
+  aws_cloudwatch_metrics = {
+    values = [templatefile("${path.module}/helm-values/aws-cloudwatch-metrics-values.yaml", {})]
+  }
+  #---------------------------------------
+  # Adding AWS Load Balancer Controller
+  #---------------------------------------
+  enable_aws_load_balancer_controller = true
+
+  #---------------------------------------
+  # AWS for FluentBit - DaemonSet
+  #---------------------------------------
+  # Fluentbit is required to stream the logs to S3  when EMR Spark Operator is enabled
+  enable_aws_for_fluentbit = true
+  aws_for_fluentbit_cw_log_group = {
+    use_name_prefix   = false
+    name              = "/${local.name}/aws-fluentbit-logs" # Add-on creates this log group
+    retention_in_days = 30
+  }
+  aws_for_fluentbit = {
+    s3_bucket_arns = [
+      module.s3_bucket.s3_bucket_arn,
+      "${module.s3_bucket.s3_bucket_arn}/*}"
+    ]
+    values = [templatefile("${path.module}/helm-values/aws-for-fluentbit-values.yaml", {
+      region               = var.region,
+      cloudwatch_log_group = "/${local.name}/aws-fluentbit-logs"
+      s3_bucket_name       = module.s3_bucket.s3_bucket_id
+      cluster_name         = module.eks.cluster_name
+    })]
+  }
+
+
+}
+#---------------------------------------------------------------
+# Data on EKS Kubernetes Addons
+#---------------------------------------------------------------
+module "eks_data_addons" {
+  source  = "aws-ia/eks-data-addons/aws"
+  version = "~> 1.0" # ensure to update this to the latest/desired version
+
+  oidc_provider_arn = module.eks.oidc_provider_arn
+
+
+}
+#---------------------------------------
+# Karpenter Provisioners
+#---------------------------------------
+data "kubectl_path_documents" "karpenter_provisioners" {
+  pattern = "${path.module}/karpenter-provisioners/flink-*.yaml"
+  vars = {
+    azs            = local.region
+    eks_cluster_id = module.eks.cluster_name
+  }
+}
diff --git a/streaming/emr-eks-flink/basic-example-app-cluster.yaml b/streaming/emr-eks-flink/basic-example-app-cluster.yaml
new file mode 100644
index 000000000..fe10f7761
--- /dev/null
+++ b/streaming/emr-eks-flink/basic-example-app-cluster.yaml
@@ -0,0 +1,52 @@
+apiVersion: flink.apache.org/v1beta1
+kind: FlinkDeployment
+metadata:
+  name: basic-example-app-cluster-flink
+spec:
+  flinkConfiguration:
+    taskmanager.numberOfTaskSlots: "2"
+    # Autotuning parameters
+    kubernetes.operator.job.autoscaler.autotune.enable: "true"
+    kubernetes.operator.job.autoscaler.autotune.metrics.history.max.count: "2"
+    kubernetes.operator.job.autoscaler.autotune.metrics.restart.count: "1"
+    metrics.job.status.enable: TOTAL_TIME
+
+    # Autoscaler parameters
+    kubernetes.operator.job.autoscaler.enabled: "true"
+    kubernetes.operator.job.autoscaler.scaling.enabled: "true"
+    kubernetes.operator.job.autoscaler.stabilization.interval: "5s"
+    kubernetes.operator.job.autoscaler.metrics.window: "1m"
+
+    jobmanager.scheduler: adaptive
+    # Replace with s3 bucket in your own account
+    state.checkpoints.dir: s3://emr-flink-data/checkpoints
+    state.savepoints.dir: s3://emr-flink-data/savepoints 
+    
+    
+   
+  flinkVersion: v1_17
+  # add service account
+  serviceAccount: flink-team-a-sa
+
+  # executionRoleArn: arn:aws:iam::681921237057:role/flink-job-execution-role
+  emrReleaseLabel: "emr-7.0.0-flink-latest" 
+  jobManager:
+    # Replace with s3 bucket in your own account
+    storageDir: s3://emr-flink-data/data/basic-example-app-cluster/jobmanager
+    resource:
+      memory: "2048m"
+      cpu: 1
+  taskManager:
+    resource:
+      memory: "2048m"
+      cpu: 1
+  job:
+    # if you have your job jar in S3 bucket you can use that path as well
+    jarURI: local:///opt/flink/examples/streaming/StateMachineExample.jar
+    parallelism: 2
+    upgradeMode: savepoint
+    savepointTriggerNonce: 0
+  monitoringConfiguration:    
+    cloudWatchMonitoringConfiguration:
+       logGroupName: LOG_GROUP_NAME
+     
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/data.tf b/streaming/emr-eks-flink/data.tf
new file mode 100644
index 000000000..c2b808e12
--- /dev/null
+++ b/streaming/emr-eks-flink/data.tf
@@ -0,0 +1,81 @@
+data "aws_eks_cluster_auth" "this" {
+  name = module.eks.cluster_name
+}
+
+data "aws_ecrpublic_authorization_token" "token" {
+  provider = aws.ecr
+}
+
+data "aws_availability_zones" "available" {}
+
+data "aws_region" "current" {}
+
+data "aws_caller_identity" "current" {}
+
+data "aws_partition" "current" {}
+
+# This data source can be used to get the latest AMI for Managed Node Groups
+data "aws_ami" "x86" {
+  owners      = ["amazon"]
+  most_recent = true
+
+  filter {
+    name   = "name"
+    values = ["amazon-eks-node-${module.eks.cluster_version}-*"] # Update this for ARM ["amazon-eks-arm64-node-${module.eks.cluster_version}-*"]
+  }
+}
+
+#---------------------------------------------------------------
+# IAM policy for FluentBit
+#---------------------------------------------------------------
+data "aws_iam_policy_document" "fluent_bit" {
+  statement {
+    sid       = ""
+    effect    = "Allow"
+    resources = ["*"]
+
+    actions = [
+      "s3:ListBucket",
+      "s3:PutObject",
+      "s3:PutObjectAcl",
+      "s3:GetObject",
+      "s3:GetObjectAcl",
+      "s3:DeleteObject",
+      "s3:DeleteObjectVersion"
+    ]
+  }
+}
+
+#---------------------------------------------------------------
+# Example IAM policy for Flink job execution
+#---------------------------------------------------------------
+data "aws_iam_policy_document" "flink_operator" {
+  statement {
+    sid       = ""
+    effect    = "Allow"
+    resources = ["*"]
+
+    actions = [
+      "s3:ListBucket",
+      "s3:GetObject",
+      "s3:PutObject",
+      "s3:DeleteObject",
+      "s3:GetBucketLocation",
+      "s3:GetObjectVersion"
+    ]
+  }
+
+  statement {
+    sid       = ""
+    effect    = "Allow"
+    resources = ["*"]
+
+    actions = [
+      "logs:CreateLogGroup",
+      "logs:CreateLogStream",
+      "logs:DescribeLogGroups",
+      "logs:DescribeLogStreams",
+      "logs:PutLogEvents",
+    ]
+  }
+}
diff --git a/streaming/emr-eks-flink/flink-team.tf b/streaming/emr-eks-flink/flink-team.tf
new file mode 100644
index 000000000..290d15051
--- /dev/null
+++ b/streaming/emr-eks-flink/flink-team.tf
@@ -0,0 +1,136 @@
+
+resource "kubernetes_namespace_v1" "flink_team_a" {
+
+  metadata {
+    name = "${local.flink_team}-ns"
+  }
+  timeouts {
+    delete = "15m"
+  }
+}
+
+resource "kubernetes_service_account_v1" "flink_team_a" {
+  metadata {
+    name        = "${local.flink_team}-sa"
+    namespace   = "${local.flink_team}-ns"
+    annotations = { "eks.amazonaws.com/role-arn" : module.flink_irsa.iam_role_arn }
+  }
+
+  automount_service_account_token = true
+}
+#---------------------------------------------------------------
+# Creates IAM policy for IRSA. Provides IAM permissions for flink pods
+#---------------------------------------------------------------
+resource "aws_iam_policy" "flink" {
+  description = "IAM role policy for flink Job execution"
+  name        = "${local.name}-flink-irsa"
+  policy      = data.aws_iam_policy_document.flink_operator.json
+}
+
+#---------------------------------------------------------------
+# IRSA for flink pods for "flink-team-a"
+#---------------------------------------------------------------
+module "flink_irsa" {
+  source  = "aws-ia/eks-blueprints-addon/aws"
+  version = "~> 1.0"
+
+  # Disable helm release
+  create_release = false
+
+  # IAM role for service account (IRSA)
+  create_role   = true
+  role_name     = "${local.name}-${local.flink_team}"
+  create_policy = false
+  role_policies = {
+    flink_team_a_policy = aws_iam_policy.flink.arn
+  }
+
+  oidc_providers = {
+    this = {
+      provider_arn    = module.eks.oidc_provider_arn
+      namespace       = "${local.flink_team}-ns"
+      service_account = "${local.flink_team}-sa"
+    }
+  }
+}
+#---------------------------------------------------------------
+# Flink Role
+#---------------------------------------------------------------
+resource "kubernetes_role" "flink" {
+  metadata {
+    name      = "${local.flink_team}-role"
+    namespace = "${local.flink_team}-ns"
+
+    labels = {
+      "app.kubernetes.io/name"    = "flink-kubernetes-operator"
+      "app.kubernetes.io/version" = "1.4.0"
+    }
+  }
+
+  rule {
+    verbs      = ["*"]
+    api_groups = [""]
+    resources  = ["pods", "pods/log", "configmaps", "endpoints", "persistentvolumes", "persistentvolumeclaims"]
+  }
+
+  rule {
+    verbs      = ["create", "patch", "delete", "watch"]
+    api_groups = [""]
+    resources  = ["secrets"]
+  }
+
+  rule {
+    verbs      = ["*"]
+    api_groups = ["apps"]
+    resources  = ["deployments", "statefulsets"]
+  }
+
+  rule {
+    verbs      = ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
+    api_groups = ["extensions", "networking.k8s.io"]
+    resources  = ["ingresses"]
+  }
+
+  rule {
+    verbs      = ["list", "watch"]
+    api_groups = ["storage.k8s.io"]
+    resources  = ["storageclasses"]
+  }
+
+  rule {
+    verbs      = ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
+    api_groups = ["batch"]
+    resources  = ["jobs"]
+  }
+
+  depends_on = [module.flink_irsa]
+}
+
+#---------------------------------------------------------------
+# Flink Rolebinding
+#---------------------------------------------------------------
+resource "kubernetes_role_binding" "flink" {
+  metadata {
+    name      = "${local.flink_team}-role-binding"
+    namespace = "${local.flink_team}-ns"
+
+    labels = {
+      "app.kubernetes.io/name"    = "emr-flink-kubernetes-operator"
+      "app.kubernetes.io/version" = "7.0.0"
+    }
+  }
+
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "Role"
+    name      = "${local.flink_team}-role"
+  }
+
+  subject {
+    kind      = "ServiceAccount"
+    name      = "${local.flink_team}-sa"
+    namespace = "${local.flink_team}-ns"
+  }
+
+  depends_on = [module.flink_irsa]
+}
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/helm-values/aws-cloudwatch-metrics-values.yaml b/streaming/emr-eks-flink/helm-values/aws-cloudwatch-metrics-values.yaml
new file mode 100644
index 000000000..3b19a5d18
--- /dev/null
+++ b/streaming/emr-eks-flink/helm-values/aws-cloudwatch-metrics-values.yaml
@@ -0,0 +1,11 @@
+resources:
+  limits:
+    cpu: 500m
+    memory: 2Gi
+  requests:
+    cpu: 200m
+    memory: 1Gi
+
+# This toleration allows Daemonset pod to be scheduled on any node, regardless of their Taints.
+tolerations:
+  - operator: Exists
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/helm-values/aws-for-fluentbit-values.yaml b/streaming/emr-eks-flink/helm-values/aws-for-fluentbit-values.yaml
new file mode 100644
index 000000000..a70bdfb81
--- /dev/null
+++ b/streaming/emr-eks-flink/helm-values/aws-for-fluentbit-values.yaml
@@ -0,0 +1,102 @@
+global:
+
+#hostNetwork and dnsPolicy are critical for enabling large clusters to avoid making calls to API server
+# see this link https://docs.fluentbit.io/manual/pipeline/filters/kubernetes#optional-feature-using-kubelet-to-get-metadata
+hostNetwork: true
+dnsPolicy: ClusterFirstWithHostNet
+
+service:
+  parsersFiles:
+    - /fluent-bit/parsers/parsers.conf
+  extraParsers: |
+    [PARSER]
+        Name    kubernetes
+        Format  regex
+        Regex   ^(?<namespace_name>[^_]+)\.(?<container_name>.+)\.(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)\.(?<docker_id>[a-z0-9]{64})-$
+
+input:
+  name: "tail"
+  enabled: true
+  tag: "systempods.<namespace_name>.<container_name>.<pod_name>.<docker_id>-"
+  path: "/var/log/containers/*.log"
+  db: "/var/log/flb_kube.db"
+  memBufLimit: 5MB
+  skipLongLines: "On"
+  refreshInterval: 10
+  extraInputs: |
+    multiline.parser  docker, cri
+    Tag_Regex         (?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$
+
+
+# NOTE: extraFilters config for using Kubelet to get the Metadata instead of talking to API server for large clusters
+filter:
+  name: "kubernetes"
+  match: "systempods.*"
+  kubeURL: "https://kubernetes.default.svc.cluster.local:443"
+  mergeLog: "On"
+  mergeLogKey: "log_processed"
+  keepLog: "On"
+  k8sLoggingParser: "On"
+  k8sLoggingExclude: "Off"
+  bufferSize: "0"
+  extraFilters: |
+    Kube_Tag_Prefix     systempods.
+    Regex_Parser        kubernetes
+    Labels              On
+    Annotations         Off
+    Use_Kubelet         true
+    Kubelet_Port        10250
+    Kube_CA_File        /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+    Kube_Token_File     /var/run/secrets/kubernetes.io/serviceaccount/token
+
+# CATION: Do not use `cloudwatch` plugin. This Golang Plugin is not recommended by AWS anymore instead use C plugin(`cloudWatchLogs`) for better performance.
+# cloudWatch:
+#   enabled: false
+
+# This is a new high performance C Plugin for CloudWatchLogs. See docs here https://docs.fluentbit.io/manual/pipeline/outputs/cloudwatch
+cloudWatchLogs:
+  enabled: true
+  match: "systempods.*"
+  region: ${region}
+  logGroupName: ${cloudwatch_log_group}
+  autoCreateGroup: false
+  extraOutputs: |
+    log_key               log
+
+#----------------------------------------------------------#
+# OUTPUT logs to S3
+#----------------------------------------------------------#
+
+# This is an example for writing logs to S3 bucket.
+# This example writes system pod logs and spark logs into dedicated prefix.
+# This second output is using the rewrite_tag filter commented above
+
+additionalOutputs: |
+  [OUTPUT]
+      Name                            s3
+      Match                           systempods.*
+      region                          ${region}
+      bucket                          ${s3_bucket_name}
+      total_file_size                 100M
+      s3_key_format                   /${cluster_name}/system-pod-logs/$TAG[1]/$TAG[2]/$TAG[3]/$TAG[3]_%H%M%S_$UUID.log
+      s3_key_format_tag_delimiters    ..
+      store_dir                       /home/ec2-user/buffer
+      upload_timeout                  10m
+      log_key                         log
+
+
+# Resource config for large clusters
+resources:
+  limits:
+    cpu: 1000m
+    memory: 1500Mi
+  requests:
+    cpu: 500m
+    memory: 500Mi
+
+## Assign a PriorityClassName to pods if set
+priorityClassName: system-node-critical
+
+# This toleration allows Daemonset pod to be scheduled on any node, regardless of their Taints.
+tolerations:
+  - operator: Exists
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/helm-values/cluster-autoscaler-values.yaml b/streaming/emr-eks-flink/helm-values/cluster-autoscaler-values.yaml
new file mode 100644
index 000000000..bac1129f1
--- /dev/null
+++ b/streaming/emr-eks-flink/helm-values/cluster-autoscaler-values.yaml
@@ -0,0 +1,25 @@
+autoDiscovery:
+  clusterName: ${eks_cluster_id}
+
+awsRegion: ${aws_region}
+
+cloudProvider: aws
+
+extraArgs:
+  aws-use-static-instance-list: true
+
+# Best practice to update the resource requests and limits for each add-on
+resources:
+   limits:
+     cpu: 1000m
+     memory: 1G
+   requests:
+     cpu: 200m
+     memory: 512Mi
+
+# Best practice to updateStrategy for each add-on
+updateStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 0
+    maxUnavailable: 1
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/helm-values/metrics-server-values.yaml b/streaming/emr-eks-flink/helm-values/metrics-server-values.yaml
new file mode 100644
index 000000000..6e2b4df58
--- /dev/null
+++ b/streaming/emr-eks-flink/helm-values/metrics-server-values.yaml
@@ -0,0 +1,52 @@
+# HA config for metrics-server
+image:
+  repository: registry.k8s.io/metrics-server/metrics-server
+  pullPolicy: IfNotPresent
+
+serviceAccount:
+  create: true
+  name: metrics-server
+
+rbac:
+  create: true
+  pspEnabled: false
+
+apiService:
+  create: true
+
+podLabels:
+  k8s-app: metrics-server
+
+# HA enabled by enabling replicas to 2, updateStrategy and podDisruptionBudget to true
+replicas: 2
+
+updateStrategy:
+   type: RollingUpdate
+   rollingUpdate:
+     maxSurge: 0
+     maxUnavailable: 1
+
+podDisruptionBudget:
+  enabled: true
+  minAvailable: 1
+
+defaultArgs:
+  - --cert-dir=/tmp
+  - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
+  - --kubelet-use-node-status-port
+  - --metric-resolution=15s
+
+resources:
+  requests:
+    cpu: 200m
+    memory: 512Mi
+
+affinity:
+  podAntiAffinity:
+    requiredDuringSchedulingIgnoredDuringExecution:
+      - labelSelector:
+          matchLabels:
+            k8s-app: metrics-server
+        namespaces:
+          - kube-system
+        topologyKey: kubernetes.io/hostname
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/helm-values/stock.json b/streaming/emr-eks-flink/helm-values/stock.json
new file mode 100644
index 000000000..5f83d9ce7
--- /dev/null
+++ b/streaming/emr-eks-flink/helm-values/stock.json
@@ -0,0 +1,5 @@
+{
+            'event_time': datetime.datetime.now().isoformat(),
+            'ticker': 'MSFT',
+            'price': round(random.random() * 100, 2)
+}
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/karpenter-provisioners/flink-compute-optimized-provisioner.yaml b/streaming/emr-eks-flink/karpenter-provisioners/flink-compute-optimized-provisioner.yaml
new file mode 100644
index 000000000..aa845cfaf
--- /dev/null
+++ b/streaming/emr-eks-flink/karpenter-provisioners/flink-compute-optimized-provisioner.yaml
@@ -0,0 +1,111 @@
+---
+apiVersion: karpenter.sh/v1beta1
+kind: NodePool # Previously kind: Provisioner
+metadata:
+  name: flink-compute-optimized
+  namespace: karpenter # Same namespace as Karpenter add-on installed
+spec:
+  template:
+    metadata:
+      labels:
+        type: karpenter
+        provisioner: flink-compute-optimized
+        NodeGroupType: flinkComputeOptimized
+    spec:
+      nodeClassRef:
+        name: flink-compute-optimized
+      requirements:
+        - key: "topology.kubernetes.io/zone"
+          operator: In
+          values: [${azs}a] #Update the correct region and zones
+        - key: "karpenter.sh/capacity-type"
+          operator: In
+          values: ["spot", "on-demand"]
+        - key: "kubernetes.io/arch"
+          operator: In
+          values: ["amd64"]
+        - key: "karpenter.k8s.aws/instance-category"
+          operator: In
+          values: ["c"]
+        - key: "karpenter.k8s.aws/instance-family"
+          operator: In
+          values: ["c5d"]
+        - key: "karpenter.k8s.aws/instance-cpu"
+          operator: In
+          values: ["4", "8", "16", "36"]
+        - key: "karpenter.k8s.aws/instance-hypervisor"
+          operator: In
+          values: ["nitro"]
+        - key: "karpenter.k8s.aws/instance-generation"
+          operator: Gt
+          values: ["2"]
+  limits:
+    cpu: 1000
+  disruption:
+    # Describes which types of Nodes Karpenter should consider for consolidation
+    # If using 'WhenUnderutilized', Karpenter will consider all nodes for consolidation and attempt to remove or replace Nodes when it discovers that the Node is underutilized and could be changed to reduce cost
+    # If using `WhenEmpty`, Karpenter will only consider nodes for consolidation that contain no workload pods
+    consolidationPolicy: WhenEmpty
+    # The amount of time Karpenter should wait after discovering a consolidation decision
+    # This value can currently only be set when the consolidationPolicy is 'WhenEmpty'
+    # You can choose to disable consolidation entirely by setting the string value 'Never' here
+    consolidateAfter: 30s
+    # The amount of time a Node can live on the cluster before being removed
+    # Avoiding long-running Nodes helps to reduce security vulnerabilities as well as to reduce the chance of issues that can plague Nodes with long uptimes such as file fragmentation or memory leaks from system processes
+    # You can choose to disable expiration entirely by setting the string value 'Never' here
+    expireAfter: 720h
+
+  # Priority given to the NodePool when the scheduler considers which NodePool
+  # to select. Higher weights indicate higher priority when comparing NodePools.
+  # Specifying no weight is equivalent to specifying a weight of 0.
+  weight: 10
+
+
+
+# NOTE: Multiple NodePools may point to the same EC2NodeClass.
+---
+apiVersion: karpenter.k8s.aws/v1beta1
+kind: EC2NodeClass # Previously kind: AWSNodeTemplate
+metadata:
+  name: flink-compute-optimized
+  namespace: karpenter
+spec:
+  amiFamily: AL2
+  blockDeviceMappings:
+    - deviceName: /dev/xvda
+      ebs:
+        volumeSize: 50Gi
+        volumeType: gp3
+        encrypted: true
+        deleteOnTermination: true
+  role: "${eks_cluster_id}-karpenter-node"
+  subnetSelectorTerms:
+    - tags: # Update the correct region and zones
+        Name: "${eks_cluster_id}-private*"
+  securityGroupSelectorTerms:
+    - name: "${eks_cluster_id}-node*"
+  userData: |
+    MIME-Version: 1.0
+    Content-Type: multipart/mixed; boundary="BOUNDARY"
+
+    --BOUNDARY
+    Content-Type: text/x-shellscript; charset="us-ascii"
+
+    cat <<-EOF > /etc/profile.d/bootstrap.sh
+    #!/bin/sh
+
+
+    # Configure the NVMe volumes in RAID0 configuration in the bootstrap.sh call.
+    # https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh#L35
+    # This will create a RAID volume and mount it at /mnt/k8s-disks/0
+    #   then mount that volume to /var/lib/kubelet, /var/lib/containerd, and /var/log/pods
+    #   this allows the container daemons and pods to write to the RAID0 by default without needing PersistentVolumes
+    export LOCAL_DISKS='raid0'
+    EOF
+
+    # Source extra environment variables in bootstrap script
+    sed -i '/^set -o errexit/a\\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh
+
+    --BOUNDARY--
+  tags:
+    InstanceType: "flink-compute-optimized"    # optional, add tags for your own use
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/locals.tf b/streaming/emr-eks-flink/locals.tf
new file mode 100644
index 000000000..f6f486d1b
--- /dev/null
+++ b/streaming/emr-eks-flink/locals.tf
@@ -0,0 +1,14 @@
+#create local 
+locals {
+  name                      = "emr-eks-flink-starter"
+  service_account_namespace = "kube-system"
+  service_account_name      = "aws-load-balancer-controller"
+
+  karpenter_iam_role_name = format("%s-%s", "karpenter", local.name)
+  tags = {
+    Blueprint  = local.name
+    GithubRepo = "github.com/awslabs/data-on-eks"
+  }
+  flink_team = "flink-team-a"
+  region = "us-west-2"
+}
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/main.tf b/streaming/emr-eks-flink/main.tf
new file mode 100644
index 000000000..bd9b64341
--- /dev/null
+++ b/streaming/emr-eks-flink/main.tf
@@ -0,0 +1,273 @@
+# create eks cluster
+module "eks" {
+  source  = "terraform-aws-modules/eks/aws"
+  version = "~> 19.15"
+
+  cluster_name    = local.name
+  cluster_version = var.eks_cluster_version
+
+  cluster_endpoint_private_access = true # if true, Kubernetes API requests within your cluster's VPC (such as node to control plane communication) use the private VPC endpoint
+  cluster_endpoint_public_access  = true # if true, Your cluster API server is accessible from the internet. You can, optionally, limit the CIDR blocks that can access the public endpoint.
+
+  vpc_id     = module.vpc.vpc_id
+  subnet_ids = module.vpc.private_subnets
+
+  manage_aws_auth_configmap = true
+  aws_auth_roles = [
+    {
+      # Required for EMR on EKS virtual cluster
+      rolearn  = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/AWSServiceRoleForAmazonEMRContainers"
+      username = "emr-containers"
+    },
+  ]
+
+
+  #---------------------------------------
+  # Note: This can further restricted to specific required for each Add-on and your application
+  #---------------------------------------
+  # Extend cluster security group rules
+  cluster_security_group_additional_rules = {
+    ingress_nodes_ephemeral_ports_tcp = {
+      description                = "Nodes on ephemeral ports"
+      protocol                   = "tcp"
+      from_port                  = 1025
+      to_port                    = 65535
+      type                       = "ingress"
+      source_node_security_group = true
+    }
+  }
+
+  # Extend node-to-node security group rules
+  node_security_group_additional_rules = {
+    ingress_self_all = {
+      description = "Node to node all ports/protocols"
+      protocol    = "-1"
+      from_port   = 0
+      to_port     = 0
+      type        = "ingress"
+      self        = true
+    }
+    egress_all = {
+      description      = "Node all egress"
+      protocol         = "-1"
+      from_port        = 0
+      to_port          = 0
+      type             = "egress"
+      cidr_blocks      = ["0.0.0.0/0"]
+      ipv6_cidr_blocks = ["::/0"]
+    }
+    # Allows Control Plane Nodes to talk to Worker nodes on all ports. Added this to simplify the example and further avoid issues with Add-ons communication with Control plane.
+    # This can be restricted further to specific port based on the requirement for each Add-on e.g., metrics-server 4443, spark-operator 8080, karpenter 8443 etc.
+    # Change this according to your security requirements if needed
+    ingress_cluster_to_node_all_traffic = {
+      description                   = "Cluster API to Nodegroup all traffic"
+      protocol                      = "-1"
+      from_port                     = 0
+      to_port                       = 0
+      type                          = "ingress"
+      source_cluster_security_group = true
+    }
+  }
+
+  eks_managed_node_group_defaults = {
+    iam_role_additional_policies = {
+      # Not required, but used in the example to access the nodes to inspect mounted volumes
+      AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
+    }
+  }
+  eks_managed_node_groups = {
+    #  We recommend to have a MNG to place your critical workloads and add-ons
+    #  Then rely on Karpenter to scale your workloads
+    #  You can also make uses on nodeSelector and Taints/tolerations to spread workloads on MNG or Karpenter provisioners
+    core_node_group = {
+      name        = "core-node-group"
+      description = "EKS managed node group example launch template"
+
+      ami_id = data.aws_ami.x86.image_id
+      # This will ensure the bootstrap user data is used to join the node
+      # By default, EKS managed node groups will not append bootstrap script;
+      # this adds it back in using the default template provided by the module
+      # Note: this assumes the AMI provided is an EKS optimized AMI derivative
+      enable_bootstrap_user_data = true
+
+      # Optional - This is to show how you can pass pre bootstrap data
+      pre_bootstrap_user_data = <<-EOT
+        echo "Node bootstrap process started by Data on EKS"
+      EOT
+
+      # Optional - Post bootstrap data to verify anything
+      post_bootstrap_user_data = <<-EOT
+        echo "Bootstrap complete.Ready to Go!"
+      EOT
+
+      subnet_ids = module.vpc.private_subnets
+
+      min_size     = 3
+      max_size     = 9
+      desired_size = 3
+
+      force_update_version = true
+      instance_types       = ["m5.xlarge"]
+
+      ebs_optimized = true
+      block_device_mappings = {
+        xvda = {
+          device_name = "/dev/xvda"
+          ebs = {
+            volume_size = 100
+            volume_type = "gp3"
+          }
+        }
+      }
+
+      update_config = {
+        max_unavailable_percentage = 50
+      }
+
+      labels = {
+        WorkerType    = "ON_DEMAND"
+        NodeGroupType = "core"
+      }
+
+      tags = {
+        Name                     = "core-node-grp",
+        "karpenter.sh/discovery" = local.name
+      }
+    }
+
+
+  }
+
+}
+
+#import module vpc
+module "vpc" {
+  source  = "terraform-aws-modules/vpc/aws"
+  version = "5.5.1"
+
+  name = "vpc-emr-flink-eks"
+
+  cidr = "10.0.0.0/16"
+  azs  = slice(data.aws_availability_zones.available.names, 0, 3)
+
+  private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
+  public_subnets  = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
+
+  enable_nat_gateway   = true
+  single_nat_gateway   = true
+  enable_dns_hostnames = true
+
+  public_subnet_tags = {
+    "kubernetes.io/cluster/${local.name}" = "shared"
+    "kubernetes.io/role/elb"              = 1
+  }
+
+  private_subnet_tags = {
+    "kubernetes.io/cluster/${local.name}" = "shared"
+    "kubernetes.io/role/internal-elb"     = 1
+  }
+}
+
+
+
+# create a virtual cluster in the eks cluster
+resource "aws_emrcontainers_virtual_cluster" "emr_eks_flink_cluster" {
+
+  container_provider {
+    id   = module.eks.cluster_name
+    type = "EKS"
+
+    info {
+      eks_info {
+        namespace = "${local.flink_team}-ns"
+      }
+    }
+  }
+
+
+  name = "emr-eks-flink-cluster"
+}
+
+
+
+# deploy a helm chart for flink-kubernetes-operator
+resource "helm_release" "flink_kubernetes_operator" {
+  depends_on = [module.flink_irsa]
+  name       = "flink-kubernetes-operator"
+  repository = "oci://public.ecr.aws/emr-on-eks"
+  chart      = "flink-kubernetes-operator"
+  namespace  = "${local.flink_team}-ns"
+
+
+
+  set {
+    name  = "watchNamespace"
+    value = "${local.flink_team}-ns"
+  }
+
+  set {
+    name  = "serviceAccount.name"
+    value = "${local.flink_team}-sa"
+  }
+
+  set {
+    name  = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn"
+    value = module.flink_irsa.iam_role_arn
+  }
+
+  set {
+    name  = "env.AWS_REGION"
+    value = var.region
+  }
+
+  set {
+    name  = "env.EMR_VIRTUAL_CLUSTER_ID"
+    value = aws_emrcontainers_virtual_cluster.emr_eks_flink_cluster.id
+  }
+
+  set {
+    name  = "env.JOB_MANAGER_IAM_ROLE"
+    value = module.flink_irsa.iam_role_arn
+  }
+
+  set {
+    name  = "env.TASK_MANAGER_IAM_ROLE"
+    value = module.flink_irsa.iam_role_arn
+  }
+  # set the version
+  set {
+    name  = "image.tag"
+    value = "1.12.0"
+  }
+  # set the emr release version
+  set {
+    name  = "env.VERSION"
+    value = "7.0.0"
+  }
+  
+
+  # set prometheus metrics
+  set {
+    name  = "prometheus.enabled"
+    value = "true"
+  }
+
+  #set prometheus metrics
+  set {
+    name  = "prometheus.metrics.port"
+    value = "8081"
+  }
+
+}
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/streaming/emr-eks-flink/outputs.tf b/streaming/emr-eks-flink/outputs.tf
new file mode 100644
index 000000000..c088a73ae
--- /dev/null
+++ b/streaming/emr-eks-flink/outputs.tf
@@ -0,0 +1,9 @@
+# create output for flink operator role arn
+output "flink_operator_role_arn" {
+  value = module.flink_irsa.iam_role_arn
+}
+
+
+
+
+
diff --git a/streaming/emr-eks-flink/providers.tf b/streaming/emr-eks-flink/providers.tf
new file mode 100644
index 000000000..244d7eec2
--- /dev/null
+++ b/streaming/emr-eks-flink/providers.tf
@@ -0,0 +1,18 @@
+#import aws provider
+provider "aws" {
+  region = "us-west-2"
+}
+provider "aws" {
+  alias  = "ecr"
+  region = "us-east-1"
+}
+provider "kubernetes" {
+  config_path = "~/.kube/config"
+
+}
+
+provider "helm" {
+  kubernetes {
+    config_path = "~/.kube/config"
+  }
+}
diff --git a/streaming/emr-eks-flink/readme.md b/streaming/emr-eks-flink/readme.md
new file mode 100644
index 000000000..2c879b8e2
--- /dev/null
+++ b/streaming/emr-eks-flink/readme.md
@@ -0,0 +1,17 @@
+# Steps to deploy the blupeprint
+  1. Run the terraform script
+
+   ``` 
+    terraform init
+
+    terraform apply
+  ```
+  1. Edit the basic-example-app-cluster.yaml to use your **own** S3 bucket url
+  2. Deploy a sample job to run a Flink job
+   ```
+   kubectl apply -f basic-example-app-cluster.yaml
+   ```
+
+
+
+    
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/variables.tf b/streaming/emr-eks-flink/variables.tf
new file mode 100644
index 000000000..59255c72b
--- /dev/null
+++ b/streaming/emr-eks-flink/variables.tf
@@ -0,0 +1,10 @@
+#create a variable
+variable "eks_cluster_version" {
+  type    = string
+  default = "1.28"
+}
+variable "region" {
+  type    = string
+  default = "us-west-2"
+}
+
diff --git a/streaming/emr-eks-flink/versions.tf b/streaming/emr-eks-flink/versions.tf
new file mode 100644
index 000000000..c541aa763
--- /dev/null
+++ b/streaming/emr-eks-flink/versions.tf
@@ -0,0 +1,30 @@
+terraform {
+  required_version = ">= 1.0"
+
+  required_providers {
+
+    kubernetes = {
+      source  = "hashicorp/kubernetes"
+      version = ">= 2.10"
+    }
+    helm = {
+      source  = "hashicorp/helm"
+      version = ">= 2.4"
+    }
+    kubectl = {
+      source  = "gavinbunney/kubectl"
+      version = ">= 1.14"
+    }
+    random = {
+      source  = "hashicorp/random"
+      version = ">= 3.3"
+    }
+  }
+
+  # ##  Used for end-to-end testing on project; update to suit your needs
+  # backend "s3" {
+  #   bucket = "doeks-github-actions-e2e-test-state"
+  #   region = "us-west-2"
+  #   key    = "e2e/emr-eks-karpenter/terraform.tfstate"
+  # }
+}
\ No newline at end of file

From 4681e36816e9eb3f5babfe2ea439e8ee47f94314 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Tue, 26 Mar 2024 17:36:23 -0700
Subject: [PATCH 02/25] fixes based on pre-commit

---
 streaming/emr-eks-flink/addons.tf             | 10 -----
 streaming/emr-eks-flink/cleanup.sh            | 45 +++++++++++++++++++
 streaming/emr-eks-flink/data.tf               | 25 ++---------
 .../basic-example-app-cluster.yaml            | 15 +++----
 streaming/emr-eks-flink/flink-team.tf         |  2 +-
 .../aws-cloudwatch-metrics-values.yaml        |  2 +-
 .../helm-values/aws-for-fluentbit-values.yaml |  2 +-
 .../cluster-autoscaler-values.yaml            |  2 +-
 .../helm-values/metrics-server-values.yaml    |  2 +-
 .../emr-eks-flink/helm-values/stock.json      |  5 ---
 streaming/emr-eks-flink/install.sh            | 34 ++++++++++++++
 .../flink-compute-optimized-provisioner.yaml  |  2 +-
 streaming/emr-eks-flink/locals.tf             | 13 +++---
 streaming/emr-eks-flink/main.tf               | 45 ++-----------------
 streaming/emr-eks-flink/outputs.tf            |  8 +---
 streaming/emr-eks-flink/providers.tf          | 24 +++++++---
 streaming/emr-eks-flink/readme.md             | 17 -------
 streaming/emr-eks-flink/variables.tf          | 11 ++---
 streaming/emr-eks-flink/versions.tf           | 15 ++++---
 19 files changed, 141 insertions(+), 138 deletions(-)
 create mode 100644 streaming/emr-eks-flink/cleanup.sh
 rename streaming/emr-eks-flink/{ => examples}/basic-example-app-cluster.yaml (86%)
 delete mode 100644 streaming/emr-eks-flink/helm-values/stock.json
 create mode 100644 streaming/emr-eks-flink/install.sh
 delete mode 100644 streaming/emr-eks-flink/readme.md

diff --git a/streaming/emr-eks-flink/addons.tf b/streaming/emr-eks-flink/addons.tf
index 60bcdc873..90b80a575 100644
--- a/streaming/emr-eks-flink/addons.tf
+++ b/streaming/emr-eks-flink/addons.tf
@@ -162,13 +162,3 @@ module "eks_data_addons" {
 
 
 }
-#---------------------------------------
-# Karpenter Provisioners
-#---------------------------------------
-data "kubectl_path_documents" "karpenter_provisioners" {
-  pattern = "${path.module}/karpenter-provisioners/flink-*.yaml"
-  vars = {
-    azs            = local.region
-    eks_cluster_id = module.eks.cluster_name
-  }
-}
diff --git a/streaming/emr-eks-flink/cleanup.sh b/streaming/emr-eks-flink/cleanup.sh
new file mode 100644
index 000000000..da1fb7c16
--- /dev/null
+++ b/streaming/emr-eks-flink/cleanup.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+set -o errexit
+set -o pipefail
+
+targets=(
+  "module.eks"
+  "module.vpc"
+)
+
+#-------------------------------------------
+# Helpful to delete the stuck in "Terminating" namespaces
+# Rerun the cleanup.sh script to detect and delete the stuck resources
+#-------------------------------------------
+terminating_namespaces=$(kubectl get namespaces --field-selector status.phase=Terminating -o json | jq -r '.items[].metadata.name')
+
+# If there are no terminating namespaces, exit the script
+if [[ -z $terminating_namespaces ]]; then
+    echo "No terminating namespaces found"
+fi
+
+for ns in $terminating_namespaces; do
+    echo "Terminating namespace: $ns"
+    kubectl get namespace $ns -o json | sed 's/"kubernetes"//' | kubectl replace --raw "/api/v1/namespaces/$ns/finalize" -f -
+done
+
+for target in "${targets[@]}"
+do
+  terraform destroy -target="$target" -auto-approve
+  destroy_output=$(terraform destroy -target="$target" -auto-approve 2>&1)
+  if [[ $? -eq 0 && $destroy_output == *"Destroy complete!"* ]]; then
+    echo "SUCCESS: Terraform destroy of $target completed successfully"
+  else
+    echo "FAILED: Terraform destroy of $target failed"
+    exit 1
+  fi
+done
+
+terraform destroy -auto-approve
+destroy_output=$(terraform destroy -auto-approve 2>&1)
+if [[ $? -eq 0 && $destroy_output == *"Destroy complete!"* ]]; then
+  echo "SUCCESS: Terraform destroy of all targets completed successfully"
+else
+  echo "FAILED: Terraform destroy of all targets failed"
+  exit 1
+fi
diff --git a/streaming/emr-eks-flink/data.tf b/streaming/emr-eks-flink/data.tf
index c2b808e12..8e7ddffe5 100644
--- a/streaming/emr-eks-flink/data.tf
+++ b/streaming/emr-eks-flink/data.tf
@@ -1,3 +1,4 @@
+
 data "aws_eks_cluster_auth" "this" {
   name = module.eks.cluster_name
 }
@@ -8,11 +9,11 @@ data "aws_ecrpublic_authorization_token" "token" {
 
 data "aws_availability_zones" "available" {}
 
-data "aws_region" "current" {}
+
 
 data "aws_caller_identity" "current" {}
 
-data "aws_partition" "current" {}
+
 
 # This data source can be used to get the latest AMI for Managed Node Groups
 data "aws_ami" "x86" {
@@ -25,26 +26,6 @@ data "aws_ami" "x86" {
   }
 }
 
-#---------------------------------------------------------------
-# IAM policy for FluentBit
-#---------------------------------------------------------------
-data "aws_iam_policy_document" "fluent_bit" {
-  statement {
-    sid       = ""
-    effect    = "Allow"
-    resources = ["*"]
-
-    actions = [
-      "s3:ListBucket",
-      "s3:PutObject",
-      "s3:PutObjectAcl",
-      "s3:GetObject",
-      "s3:GetObjectAcl",
-      "s3:DeleteObject",
-      "s3:DeleteObjectVersion"
-    ]
-  }
-}
 
 #---------------------------------------------------------------
 # Example IAM policy for Flink job execution
diff --git a/streaming/emr-eks-flink/basic-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
similarity index 86%
rename from streaming/emr-eks-flink/basic-example-app-cluster.yaml
rename to streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
index fe10f7761..68752a1c1 100644
--- a/streaming/emr-eks-flink/basic-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
@@ -20,16 +20,16 @@ spec:
     jobmanager.scheduler: adaptive
     # Replace with s3 bucket in your own account
     state.checkpoints.dir: s3://emr-flink-data/checkpoints
-    state.savepoints.dir: s3://emr-flink-data/savepoints 
-    
-    
-   
+    state.savepoints.dir: s3://emr-flink-data/savepoints
+
+
+
   flinkVersion: v1_17
   # add service account
   serviceAccount: flink-team-a-sa
 
-  # executionRoleArn: arn:aws:iam::681921237057:role/flink-job-execution-role
-  emrReleaseLabel: "emr-7.0.0-flink-latest" 
+  #executionRoleArn: arn:aws:iam::681921237057:role/flink-job-execution-role
+  emrReleaseLabel: "emr-7.0.0-flink-latest"
   jobManager:
     # Replace with s3 bucket in your own account
     storageDir: s3://emr-flink-data/data/basic-example-app-cluster/jobmanager
@@ -46,7 +46,6 @@ spec:
     parallelism: 2
     upgradeMode: savepoint
     savepointTriggerNonce: 0
-  monitoringConfiguration:    
+  monitoringConfiguration:
     cloudWatchMonitoringConfiguration:
        logGroupName: LOG_GROUP_NAME
-     
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/flink-team.tf b/streaming/emr-eks-flink/flink-team.tf
index 290d15051..f3a3c63e9 100644
--- a/streaming/emr-eks-flink/flink-team.tf
+++ b/streaming/emr-eks-flink/flink-team.tf
@@ -133,4 +133,4 @@ resource "kubernetes_role_binding" "flink" {
   }
 
   depends_on = [module.flink_irsa]
-}
\ No newline at end of file
+}
diff --git a/streaming/emr-eks-flink/helm-values/aws-cloudwatch-metrics-values.yaml b/streaming/emr-eks-flink/helm-values/aws-cloudwatch-metrics-values.yaml
index 3b19a5d18..ae3c41d44 100644
--- a/streaming/emr-eks-flink/helm-values/aws-cloudwatch-metrics-values.yaml
+++ b/streaming/emr-eks-flink/helm-values/aws-cloudwatch-metrics-values.yaml
@@ -8,4 +8,4 @@ resources:
 
 # This toleration allows Daemonset pod to be scheduled on any node, regardless of their Taints.
 tolerations:
-  - operator: Exists
\ No newline at end of file
+  - operator: Exists
diff --git a/streaming/emr-eks-flink/helm-values/aws-for-fluentbit-values.yaml b/streaming/emr-eks-flink/helm-values/aws-for-fluentbit-values.yaml
index a70bdfb81..0bea5188d 100644
--- a/streaming/emr-eks-flink/helm-values/aws-for-fluentbit-values.yaml
+++ b/streaming/emr-eks-flink/helm-values/aws-for-fluentbit-values.yaml
@@ -99,4 +99,4 @@ priorityClassName: system-node-critical
 
 # This toleration allows Daemonset pod to be scheduled on any node, regardless of their Taints.
 tolerations:
-  - operator: Exists
\ No newline at end of file
+  - operator: Exists
diff --git a/streaming/emr-eks-flink/helm-values/cluster-autoscaler-values.yaml b/streaming/emr-eks-flink/helm-values/cluster-autoscaler-values.yaml
index bac1129f1..5a42794f2 100644
--- a/streaming/emr-eks-flink/helm-values/cluster-autoscaler-values.yaml
+++ b/streaming/emr-eks-flink/helm-values/cluster-autoscaler-values.yaml
@@ -22,4 +22,4 @@ updateStrategy:
   type: RollingUpdate
   rollingUpdate:
     maxSurge: 0
-    maxUnavailable: 1
\ No newline at end of file
+    maxUnavailable: 1
diff --git a/streaming/emr-eks-flink/helm-values/metrics-server-values.yaml b/streaming/emr-eks-flink/helm-values/metrics-server-values.yaml
index 6e2b4df58..026d97a6a 100644
--- a/streaming/emr-eks-flink/helm-values/metrics-server-values.yaml
+++ b/streaming/emr-eks-flink/helm-values/metrics-server-values.yaml
@@ -49,4 +49,4 @@ affinity:
             k8s-app: metrics-server
         namespaces:
           - kube-system
-        topologyKey: kubernetes.io/hostname
\ No newline at end of file
+        topologyKey: kubernetes.io/hostname
diff --git a/streaming/emr-eks-flink/helm-values/stock.json b/streaming/emr-eks-flink/helm-values/stock.json
deleted file mode 100644
index 5f83d9ce7..000000000
--- a/streaming/emr-eks-flink/helm-values/stock.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-            'event_time': datetime.datetime.now().isoformat(),
-            'ticker': 'MSFT',
-            'price': round(random.random() * 100, 2)
-}
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/install.sh b/streaming/emr-eks-flink/install.sh
new file mode 100644
index 000000000..8430565fc
--- /dev/null
+++ b/streaming/emr-eks-flink/install.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# List of Terraform modules to apply in sequence
+targets=(
+  "module.vpc"
+  "module.eks"
+)
+
+# Initialize Terraform
+echo "Initializing ..."
+terraform init --upgrade || echo "\"terraform init\" failed"
+
+# Apply modules in sequence
+for target in "${targets[@]}"
+do
+  echo "Applying module $target..."
+  apply_output=$(terraform apply -target="$target" -auto-approve 2>&1 | tee /dev/tty)
+  if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then
+    echo "SUCCESS: Terraform apply of $target completed successfully"
+  else
+    echo "FAILED: Terraform apply of $target failed"
+    exit 1
+  fi
+done
+
+# Final apply to catch any remaining resources
+echo "Applying remaining resources..."
+apply_output=$(terraform apply -auto-approve 2>&1 | tee /dev/tty)
+if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then
+  echo "SUCCESS: Terraform apply of all modules completed successfully"
+else
+  echo "FAILED: Terraform apply of all modules failed"
+  exit 1
+fi
diff --git a/streaming/emr-eks-flink/karpenter-provisioners/flink-compute-optimized-provisioner.yaml b/streaming/emr-eks-flink/karpenter-provisioners/flink-compute-optimized-provisioner.yaml
index aa845cfaf..aa1934178 100644
--- a/streaming/emr-eks-flink/karpenter-provisioners/flink-compute-optimized-provisioner.yaml
+++ b/streaming/emr-eks-flink/karpenter-provisioners/flink-compute-optimized-provisioner.yaml
@@ -108,4 +108,4 @@ spec:
 
     --BOUNDARY--
   tags:
-    InstanceType: "flink-compute-optimized"    # optional, add tags for your own use
\ No newline at end of file
+    InstanceType: "flink-compute-optimized"    # optional, add tags for your own use
diff --git a/streaming/emr-eks-flink/locals.tf b/streaming/emr-eks-flink/locals.tf
index f6f486d1b..db951c2d2 100644
--- a/streaming/emr-eks-flink/locals.tf
+++ b/streaming/emr-eks-flink/locals.tf
@@ -1,14 +1,13 @@
-#create local 
+#create local
 locals {
-  name                      = "emr-eks-flink-starter"
-  service_account_namespace = "kube-system"
-  service_account_name      = "aws-load-balancer-controller"
+  name = "emr-eks-flink-starter"
+
+  region = var.region
 
-  karpenter_iam_role_name = format("%s-%s", "karpenter", local.name)
   tags = {
     Blueprint  = local.name
     GithubRepo = "github.com/awslabs/data-on-eks"
   }
   flink_team = "flink-team-a"
-  region = "us-west-2"
-}
\ No newline at end of file
+
+}
diff --git a/streaming/emr-eks-flink/main.tf b/streaming/emr-eks-flink/main.tf
index bd9b64341..65ff820bd 100644
--- a/streaming/emr-eks-flink/main.tf
+++ b/streaming/emr-eks-flink/main.tf
@@ -184,7 +184,6 @@ resource "aws_emrcontainers_virtual_cluster" "emr_eks_flink_cluster" {
     }
   }
 
-
   name = "emr-eks-flink-cluster"
 }
 
@@ -198,53 +197,29 @@ resource "helm_release" "flink_kubernetes_operator" {
   chart      = "flink-kubernetes-operator"
   namespace  = "${local.flink_team}-ns"
 
-
-
-  set {
-    name  = "watchNamespace"
-    value = "${local.flink_team}-ns"
-  }
-
+  # set service account
   set {
     name  = "serviceAccount.name"
     value = "${local.flink_team}-sa"
   }
 
   set {
-    name  = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn"
-    value = module.flink_irsa.iam_role_arn
+    name  = "watchNamespace"
+    value = "${local.flink_team}-ns"
   }
 
+
   set {
     name  = "env.AWS_REGION"
     value = var.region
   }
 
-  set {
-    name  = "env.EMR_VIRTUAL_CLUSTER_ID"
-    value = aws_emrcontainers_virtual_cluster.emr_eks_flink_cluster.id
-  }
-
-  set {
-    name  = "env.JOB_MANAGER_IAM_ROLE"
-    value = module.flink_irsa.iam_role_arn
-  }
 
-  set {
-    name  = "env.TASK_MANAGER_IAM_ROLE"
-    value = module.flink_irsa.iam_role_arn
-  }
   # set the version
   set {
     name  = "image.tag"
-    value = "1.12.0"
-  }
-  # set the emr release version
-  set {
-    name  = "env.VERSION"
     value = "7.0.0"
   }
-  
 
   # set prometheus metrics
   set {
@@ -259,15 +234,3 @@ resource "helm_release" "flink_kubernetes_operator" {
   }
 
 }
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/streaming/emr-eks-flink/outputs.tf b/streaming/emr-eks-flink/outputs.tf
index c088a73ae..5cb6efad5 100644
--- a/streaming/emr-eks-flink/outputs.tf
+++ b/streaming/emr-eks-flink/outputs.tf
@@ -1,9 +1,5 @@
 # create output for flink operator role arn
 output "flink_operator_role_arn" {
-  value = module.flink_irsa.iam_role_arn
+  value       = module.flink_irsa.iam_role_arn
+  description = "IAM linked role for the flink operator"
 }
-
-
-
-
-
diff --git a/streaming/emr-eks-flink/providers.tf b/streaming/emr-eks-flink/providers.tf
index 244d7eec2..2dab41db8 100644
--- a/streaming/emr-eks-flink/providers.tf
+++ b/streaming/emr-eks-flink/providers.tf
@@ -1,18 +1,32 @@
-#import aws provider
 provider "aws" {
-  region = "us-west-2"
+  region = local.region
 }
+
+# ECR always authenticates with `us-east-1` region
+# Docs -> https://docs.aws.amazon.com/AmazonECR/latest/public/public-registries.html
 provider "aws" {
   alias  = "ecr"
   region = "us-east-1"
 }
-provider "kubernetes" {
-  config_path = "~/.kube/config"
 
+provider "kubernetes" {
+  host                   = module.eks.cluster_endpoint
+  cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
+  token                  = data.aws_eks_cluster_auth.this.token
 }
 
 provider "helm" {
   kubernetes {
-    config_path = "~/.kube/config"
+    host                   = module.eks.cluster_endpoint
+    cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
+    token                  = data.aws_eks_cluster_auth.this.token
   }
 }
+
+provider "kubectl" {
+  apply_retry_count      = 30
+  host                   = module.eks.cluster_endpoint
+  cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
+  load_config_file       = false
+  token                  = data.aws_eks_cluster_auth.this.token
+}
diff --git a/streaming/emr-eks-flink/readme.md b/streaming/emr-eks-flink/readme.md
deleted file mode 100644
index 2c879b8e2..000000000
--- a/streaming/emr-eks-flink/readme.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# Steps to deploy the blupeprint
-  1. Run the terraform script
-
-   ``` 
-    terraform init
-
-    terraform apply
-  ```
-  1. Edit the basic-example-app-cluster.yaml to use your **own** S3 bucket url
-  2. Deploy a sample job to run a Flink job
-   ```
-   kubectl apply -f basic-example-app-cluster.yaml
-   ```
-
-
-
-    
\ No newline at end of file
diff --git a/streaming/emr-eks-flink/variables.tf b/streaming/emr-eks-flink/variables.tf
index 59255c72b..22ab2852e 100644
--- a/streaming/emr-eks-flink/variables.tf
+++ b/streaming/emr-eks-flink/variables.tf
@@ -1,10 +1,11 @@
 #create a variable
 variable "eks_cluster_version" {
-  type    = string
-  default = "1.28"
+  type        = string
+  default     = "1.28"
+  description = "EKS version for the cluster"
 }
 variable "region" {
-  type    = string
-  default = "us-west-2"
+  type        = string
+  default     = "us-west-2"
+  description = "Region for deployment"
 }
-
diff --git a/streaming/emr-eks-flink/versions.tf b/streaming/emr-eks-flink/versions.tf
index c541aa763..9599a6e77 100644
--- a/streaming/emr-eks-flink/versions.tf
+++ b/streaming/emr-eks-flink/versions.tf
@@ -1,15 +1,18 @@
 terraform {
-  required_version = ">= 1.0"
+  required_version = ">= 1.0.0"
 
   required_providers {
-
+    aws = {
+      source  = "hashicorp/aws"
+      version = ">= 3.72"
+    }
     kubernetes = {
       source  = "hashicorp/kubernetes"
       version = ">= 2.10"
     }
     helm = {
       source  = "hashicorp/helm"
-      version = ">= 2.4"
+      version = ">= 2.4.1"
     }
     kubectl = {
       source  = "gavinbunney/kubectl"
@@ -17,7 +20,7 @@ terraform {
     }
     random = {
       source  = "hashicorp/random"
-      version = ">= 3.3"
+      version = "3.3.2"
     }
   }
 
@@ -25,6 +28,6 @@ terraform {
   # backend "s3" {
   #   bucket = "doeks-github-actions-e2e-test-state"
   #   region = "us-west-2"
-  #   key    = "e2e/emr-eks-karpenter/terraform.tfstate"
+  #   key    = "e2e/flink/terraform.tfstate"
   # }
-}
\ No newline at end of file
+}

From 51eb6bac2d1a6dff935701c443ac93637acbc962 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Tue, 26 Mar 2024 17:37:18 -0700
Subject: [PATCH 03/25] fixes based on pre-commit

---
 README.md | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/README.md b/README.md
index e4b06c80b..eca93be2a 100755
--- a/README.md
+++ b/README.md
@@ -75,3 +75,29 @@ This library is licensed under the Apache 2.0 License.
 We welcome all individuals who are enthusiastic about data on Kubernetes to become a part of this open source community. Your contributions and participation are invaluable to the success of this project.
 
 Built with ❤️ at AWS.
+
+<!-- BEGIN_TF_DOCS -->
+## Requirements
+
+No requirements.
+
+## Providers
+
+No providers.
+
+## Modules
+
+No modules.
+
+## Resources
+
+No resources.
+
+## Inputs
+
+No inputs.
+
+## Outputs
+
+No outputs.
+<!-- END_TF_DOCS -->
\ No newline at end of file

From 883ef3dbdf18537330732fc7b4ba3144e0afd1e3 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Tue, 26 Mar 2024 17:39:46 -0700
Subject: [PATCH 04/25] added readme

---
 streaming/emr-eks-flink/README.md | 64 +++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 streaming/emr-eks-flink/README.md

diff --git a/streaming/emr-eks-flink/README.md b/streaming/emr-eks-flink/README.md
new file mode 100644
index 000000000..a5543d42a
--- /dev/null
+++ b/streaming/emr-eks-flink/README.md
@@ -0,0 +1,64 @@
+<!-- BEGIN_TF_DOCS -->
+## Requirements
+
+| Name | Version |
+|------|---------|
+| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.0.0 |
+| <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 3.72 |
+| <a name="requirement_helm"></a> [helm](#requirement\_helm) | >= 2.4.1 |
+| <a name="requirement_kubectl"></a> [kubectl](#requirement\_kubectl) | >= 1.14 |
+| <a name="requirement_kubernetes"></a> [kubernetes](#requirement\_kubernetes) | >= 2.10 |
+| <a name="requirement_random"></a> [random](#requirement\_random) | 3.3.2 |
+
+## Providers
+
+| Name | Version |
+|------|---------|
+| <a name="provider_aws"></a> [aws](#provider\_aws) | 5.42.0 |
+| <a name="provider_aws.ecr"></a> [aws.ecr](#provider\_aws.ecr) | 5.42.0 |
+| <a name="provider_helm"></a> [helm](#provider\_helm) | 2.12.1 |
+| <a name="provider_kubernetes"></a> [kubernetes](#provider\_kubernetes) | 2.27.0 |
+
+## Modules
+
+| Name | Source | Version |
+|------|--------|---------|
+| <a name="module_ebs_csi_driver_irsa"></a> [ebs\_csi\_driver\_irsa](#module\_ebs\_csi\_driver\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 5.20 |
+| <a name="module_eks"></a> [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.15 |
+| <a name="module_eks_blueprints_addons"></a> [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 |
+| <a name="module_eks_data_addons"></a> [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | ~> 1.0 |
+| <a name="module_flink_irsa"></a> [flink\_irsa](#module\_flink\_irsa) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
+| <a name="module_s3_bucket"></a> [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 3.0 |
+| <a name="module_vpc"></a> [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | 5.5.1 |
+
+## Resources
+
+| Name | Type |
+|------|------|
+| [aws_emrcontainers_virtual_cluster.emr_eks_flink_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/emrcontainers_virtual_cluster) | resource |
+| [aws_iam_policy.flink](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
+| [helm_release.flink_kubernetes_operator](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
+| [kubernetes_namespace_v1.flink_team_a](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace_v1) | resource |
+| [kubernetes_role.flink](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/role) | resource |
+| [kubernetes_role_binding.flink](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/role_binding) | resource |
+| [kubernetes_service_account_v1.flink_team_a](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/service_account_v1) | resource |
+| [aws_ami.x86](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ami) | data source |
+| [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source |
+| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
+| [aws_ecrpublic_authorization_token.token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecrpublic_authorization_token) | data source |
+| [aws_eks_cluster_auth.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster_auth) | data source |
+| [aws_iam_policy_document.flink_operator](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS version for the cluster | `string` | `"1.28"` | no |
+| <a name="input_region"></a> [region](#input\_region) | Region for deployment | `string` | `"us-west-2"` | no |
+
+## Outputs
+
+| Name | Description |
+|------|-------------|
+| <a name="output_flink_operator_role_arn"></a> [flink\_operator\_role\_arn](#output\_flink\_operator\_role\_arn) | IAM linked role for the flink operator |
+<!-- END_TF_DOCS -->
\ No newline at end of file

From b6987da4a3496a0333af1b06551e29e9c1eef7b9 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Tue, 26 Mar 2024 17:41:39 -0700
Subject: [PATCH 05/25] fixed readme alignment

---
 streaming/emr-eks-flink/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/streaming/emr-eks-flink/README.md b/streaming/emr-eks-flink/README.md
index a5543d42a..5dbefe9ba 100644
--- a/streaming/emr-eks-flink/README.md
+++ b/streaming/emr-eks-flink/README.md
@@ -61,4 +61,4 @@
 | Name | Description |
 |------|-------------|
 | <a name="output_flink_operator_role_arn"></a> [flink\_operator\_role\_arn](#output\_flink\_operator\_role\_arn) | IAM linked role for the flink operator |
-<!-- END_TF_DOCS -->
\ No newline at end of file
+<!-- END_TF_DOCS -->

From 0fc6f27ffe5fa7876b56b62299a06efd748780f1 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Wed, 27 Mar 2024 16:10:08 -0700
Subject: [PATCH 06/25] applied changes to have auto sa creation

---
 streaming/emr-eks-flink/README.md             |  3 -
 .../examples/basic-example-app-cluster.yaml   |  4 +-
 streaming/emr-eks-flink/flink-team.tf         | 98 +------------------
 streaming/emr-eks-flink/main.tf               |  7 --
 4 files changed, 6 insertions(+), 106 deletions(-)

diff --git a/streaming/emr-eks-flink/README.md b/streaming/emr-eks-flink/README.md
index 5dbefe9ba..90daf1df5 100644
--- a/streaming/emr-eks-flink/README.md
+++ b/streaming/emr-eks-flink/README.md
@@ -39,9 +39,6 @@
 | [aws_iam_policy.flink](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
 | [helm_release.flink_kubernetes_operator](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
 | [kubernetes_namespace_v1.flink_team_a](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace_v1) | resource |
-| [kubernetes_role.flink](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/role) | resource |
-| [kubernetes_role_binding.flink](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/role_binding) | resource |
-| [kubernetes_service_account_v1.flink_team_a](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/service_account_v1) | resource |
 | [aws_ami.x86](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ami) | data source |
 | [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source |
 | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
diff --git a/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
index 68752a1c1..c8095002b 100644
--- a/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
@@ -26,9 +26,9 @@ spec:
 
   flinkVersion: v1_17
   # add service account
-  serviceAccount: flink-team-a-sa
+  # serviceAccount: flink-team-a-sa
 
-  #executionRoleArn: arn:aws:iam::681921237057:role/flink-job-execution-role
+  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-starter-flink-team-a-2024013119010949270000000d
   emrReleaseLabel: "emr-7.0.0-flink-latest"
   jobManager:
     # Replace with s3 bucket in your own account
diff --git a/streaming/emr-eks-flink/flink-team.tf b/streaming/emr-eks-flink/flink-team.tf
index f3a3c63e9..0b3d2cda2 100644
--- a/streaming/emr-eks-flink/flink-team.tf
+++ b/streaming/emr-eks-flink/flink-team.tf
@@ -1,4 +1,3 @@
-
 resource "kubernetes_namespace_v1" "flink_team_a" {
 
   metadata {
@@ -9,15 +8,6 @@ resource "kubernetes_namespace_v1" "flink_team_a" {
   }
 }
 
-resource "kubernetes_service_account_v1" "flink_team_a" {
-  metadata {
-    name        = "${local.flink_team}-sa"
-    namespace   = "${local.flink_team}-ns"
-    annotations = { "eks.amazonaws.com/role-arn" : module.flink_irsa.iam_role_arn }
-  }
-
-  automount_service_account_token = true
-}
 #---------------------------------------------------------------
 # Creates IAM policy for IRSA. Provides IAM permissions for flink pods
 #---------------------------------------------------------------
@@ -44,93 +34,13 @@ module "flink_irsa" {
   role_policies = {
     flink_team_a_policy = aws_iam_policy.flink.arn
   }
-
+  assume_role_condition_test = "StringLike"
   oidc_providers = {
     this = {
-      provider_arn    = module.eks.oidc_provider_arn
-      namespace       = "${local.flink_team}-ns"
-      service_account = "${local.flink_team}-sa"
-    }
-  }
-}
-#---------------------------------------------------------------
-# Flink Role
-#---------------------------------------------------------------
-resource "kubernetes_role" "flink" {
-  metadata {
-    name      = "${local.flink_team}-role"
-    namespace = "${local.flink_team}-ns"
-
-    labels = {
-      "app.kubernetes.io/name"    = "flink-kubernetes-operator"
-      "app.kubernetes.io/version" = "1.4.0"
-    }
-  }
-
-  rule {
-    verbs      = ["*"]
-    api_groups = [""]
-    resources  = ["pods", "pods/log", "configmaps", "endpoints", "persistentvolumes", "persistentvolumeclaims"]
-  }
-
-  rule {
-    verbs      = ["create", "patch", "delete", "watch"]
-    api_groups = [""]
-    resources  = ["secrets"]
-  }
-
-  rule {
-    verbs      = ["*"]
-    api_groups = ["apps"]
-    resources  = ["deployments", "statefulsets"]
-  }
-
-  rule {
-    verbs      = ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
-    api_groups = ["extensions", "networking.k8s.io"]
-    resources  = ["ingresses"]
-  }
-
-  rule {
-    verbs      = ["list", "watch"]
-    api_groups = ["storage.k8s.io"]
-    resources  = ["storageclasses"]
-  }
-
-  rule {
-    verbs      = ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
-    api_groups = ["batch"]
-    resources  = ["jobs"]
-  }
-
-  depends_on = [module.flink_irsa]
-}
-
-#---------------------------------------------------------------
-# Flink Rolebinding
-#---------------------------------------------------------------
-resource "kubernetes_role_binding" "flink" {
-  metadata {
-    name      = "${local.flink_team}-role-binding"
-    namespace = "${local.flink_team}-ns"
-
-    labels = {
-      "app.kubernetes.io/name"    = "emr-flink-kubernetes-operator"
-      "app.kubernetes.io/version" = "7.0.0"
+      provider_arn = module.eks.oidc_provider_arn
+      namespace    = "${local.flink_team}-ns"
+      service_account = "emr-containers-sa-*-*-${data.aws_caller_identity.current.account_id}-*"
     }
   }
 
-  role_ref {
-    api_group = "rbac.authorization.k8s.io"
-    kind      = "Role"
-    name      = "${local.flink_team}-role"
-  }
-
-  subject {
-    kind      = "ServiceAccount"
-    name      = "${local.flink_team}-sa"
-    namespace = "${local.flink_team}-ns"
-  }
-
-  depends_on = [module.flink_irsa]
 }
diff --git a/streaming/emr-eks-flink/main.tf b/streaming/emr-eks-flink/main.tf
index 65ff820bd..6300f4462 100644
--- a/streaming/emr-eks-flink/main.tf
+++ b/streaming/emr-eks-flink/main.tf
@@ -196,13 +196,6 @@ resource "helm_release" "flink_kubernetes_operator" {
   repository = "oci://public.ecr.aws/emr-on-eks"
   chart      = "flink-kubernetes-operator"
   namespace  = "${local.flink_team}-ns"
-
-  # set service account
-  set {
-    name  = "serviceAccount.name"
-    value = "${local.flink_team}-sa"
-  }
-
   set {
     name  = "watchNamespace"
     value = "${local.flink_team}-ns"

From 3096d80395ad2ff78c6e94de0e3a9a36900faa12 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Mon, 1 Apr 2024 16:36:29 -0700
Subject: [PATCH 07/25] using irsa for operator role

---
 streaming/emr-eks-flink/README.md             |  7 ++--
 .../examples/basic-example-app-cluster.yaml   |  2 +-
 streaming/emr-eks-flink/flink-team.tf         | 35 ++++++++++++++++-
 streaming/emr-eks-flink/install.sh            | 11 ++++--
 streaming/emr-eks-flink/locals.tf             |  6 +--
 streaming/emr-eks-flink/main.tf               | 39 +++++++------------
 streaming/emr-eks-flink/variables.tf          |  6 +++
 7 files changed, 67 insertions(+), 39 deletions(-)
 mode change 100644 => 100755 streaming/emr-eks-flink/install.sh

diff --git a/streaming/emr-eks-flink/README.md b/streaming/emr-eks-flink/README.md
index 90daf1df5..9604d2ef8 100644
--- a/streaming/emr-eks-flink/README.md
+++ b/streaming/emr-eks-flink/README.md
@@ -14,8 +14,8 @@
 
 | Name | Version |
 |------|---------|
-| <a name="provider_aws"></a> [aws](#provider\_aws) | 5.42.0 |
-| <a name="provider_aws.ecr"></a> [aws.ecr](#provider\_aws.ecr) | 5.42.0 |
+| <a name="provider_aws"></a> [aws](#provider\_aws) | 5.43.0 |
+| <a name="provider_aws.ecr"></a> [aws.ecr](#provider\_aws.ecr) | 5.43.0 |
 | <a name="provider_helm"></a> [helm](#provider\_helm) | 2.12.1 |
 | <a name="provider_kubernetes"></a> [kubernetes](#provider\_kubernetes) | 2.27.0 |
 
@@ -28,6 +28,7 @@
 | <a name="module_eks_blueprints_addons"></a> [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 |
 | <a name="module_eks_data_addons"></a> [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | ~> 1.0 |
 | <a name="module_flink_irsa"></a> [flink\_irsa](#module\_flink\_irsa) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
+| <a name="module_flink_irsa_operator"></a> [flink\_irsa\_operator](#module\_flink\_irsa\_operator) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
 | <a name="module_s3_bucket"></a> [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 3.0 |
 | <a name="module_vpc"></a> [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | 5.5.1 |
 
@@ -35,7 +36,6 @@
 
 | Name | Type |
 |------|------|
-| [aws_emrcontainers_virtual_cluster.emr_eks_flink_cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/emrcontainers_virtual_cluster) | resource |
 | [aws_iam_policy.flink](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
 | [helm_release.flink_kubernetes_operator](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
 | [kubernetes_namespace_v1.flink_team_a](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace_v1) | resource |
@@ -51,6 +51,7 @@
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
 | <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS version for the cluster | `string` | `"1.28"` | no |
+| <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"emr-eks-flink"` | no |
 | <a name="input_region"></a> [region](#input\_region) | Region for deployment | `string` | `"us-west-2"` | no |
 
 ## Outputs
diff --git a/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
index c8095002b..03bd0c499 100644
--- a/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
@@ -28,7 +28,7 @@ spec:
   # add service account
   # serviceAccount: flink-team-a-sa
 
-  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-starter-flink-team-a-2024013119010949270000000d
+  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240401212643803900000001
   emrReleaseLabel: "emr-7.0.0-flink-latest"
   jobManager:
     # Replace with s3 bucket in your own account
diff --git a/streaming/emr-eks-flink/flink-team.tf b/streaming/emr-eks-flink/flink-team.tf
index 0b3d2cda2..dada17cc9 100644
--- a/streaming/emr-eks-flink/flink-team.tf
+++ b/streaming/emr-eks-flink/flink-team.tf
@@ -24,6 +24,7 @@ module "flink_irsa" {
   source  = "aws-ia/eks-blueprints-addon/aws"
   version = "~> 1.0"
 
+
   # Disable helm release
   create_release = false
 
@@ -37,10 +38,40 @@ module "flink_irsa" {
   assume_role_condition_test = "StringLike"
   oidc_providers = {
     this = {
-      provider_arn = module.eks.oidc_provider_arn
-      namespace    = "${local.flink_team}-ns"
+      provider_arn    = module.eks.oidc_provider_arn
+      namespace       = "${local.flink_team}-ns"
       service_account = "emr-containers-sa-*-*-${data.aws_caller_identity.current.account_id}-*"
     }
   }
 
 }
+
+
+#---------------------------------------------------------------
+# IRSA for flink pods for "flink-operator"
+#---------------------------------------------------------------
+module "flink_irsa_operator" {
+  source  = "aws-ia/eks-blueprints-addon/aws"
+  version = "~> 1.0"
+
+
+  # Disable helm release
+  create_release = false
+
+  # IAM role for service account (IRSA)
+  create_role   = true
+  role_name     = "${local.name}-operator"
+  create_policy = false
+  role_policies = {
+    flink_team_a_policy = aws_iam_policy.flink.arn
+  }
+  assume_role_condition_test = "StringLike"
+  oidc_providers = {
+    this = {
+      provider_arn    = module.eks.oidc_provider_arn
+      namespace       = "${local.flink_operator}-ns"
+      service_account = "emr-containers-sa-flink-operator"
+    }
+  }
+
+}
diff --git a/streaming/emr-eks-flink/install.sh b/streaming/emr-eks-flink/install.sh
old mode 100644
new mode 100755
index 8430565fc..9d43e080b
--- a/streaming/emr-eks-flink/install.sh
+++ b/streaming/emr-eks-flink/install.sh
@@ -1,20 +1,23 @@
 #!/bin/bash
 
+read -p "Enter the region: " region
+export AWS_DEFAULT_REGION=$region
+
 # List of Terraform modules to apply in sequence
 targets=(
   "module.vpc"
   "module.eks"
+
 )
 
 # Initialize Terraform
-echo "Initializing ..."
-terraform init --upgrade || echo "\"terraform init\" failed"
+terraform init --upgrade
 
 # Apply modules in sequence
 for target in "${targets[@]}"
 do
   echo "Applying module $target..."
-  apply_output=$(terraform apply -target="$target" -auto-approve 2>&1 | tee /dev/tty)
+  apply_output=$(terraform apply -target="$target" -var="region=$region" -auto-approve 2>&1 | tee /dev/tty)
   if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then
     echo "SUCCESS: Terraform apply of $target completed successfully"
   else
@@ -25,7 +28,7 @@ done
 
 # Final apply to catch any remaining resources
 echo "Applying remaining resources..."
-apply_output=$(terraform apply -auto-approve 2>&1 | tee /dev/tty)
+apply_output=$(terraform apply -var="region=$region" -auto-approve 2>&1 | tee /dev/tty)
 if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then
   echo "SUCCESS: Terraform apply of all modules completed successfully"
 else
diff --git a/streaming/emr-eks-flink/locals.tf b/streaming/emr-eks-flink/locals.tf
index db951c2d2..ecfb50c4b 100644
--- a/streaming/emr-eks-flink/locals.tf
+++ b/streaming/emr-eks-flink/locals.tf
@@ -1,6 +1,6 @@
 #create local
 locals {
-  name = "emr-eks-flink-starter"
+  name = var.name
 
   region = var.region
 
@@ -8,6 +8,6 @@ locals {
     Blueprint  = local.name
     GithubRepo = "github.com/awslabs/data-on-eks"
   }
-  flink_team = "flink-team-a"
-
+  flink_team     = "flink-team-a"
+  flink_operator = "flink-kubernetes-operator"
 }
diff --git a/streaming/emr-eks-flink/main.tf b/streaming/emr-eks-flink/main.tf
index 6300f4462..f8f1767b2 100644
--- a/streaming/emr-eks-flink/main.tf
+++ b/streaming/emr-eks-flink/main.tf
@@ -145,7 +145,7 @@ module "vpc" {
   source  = "terraform-aws-modules/vpc/aws"
   version = "5.5.1"
 
-  name = "vpc-emr-flink-eks"
+  name = local.name
 
   cidr = "10.0.0.0/16"
   azs  = slice(data.aws_availability_zones.available.names, 0, 3)
@@ -170,44 +170,31 @@ module "vpc" {
 
 
 
-# create a virtual cluster in the eks cluster
-resource "aws_emrcontainers_virtual_cluster" "emr_eks_flink_cluster" {
-
-  container_provider {
-    id   = module.eks.cluster_name
-    type = "EKS"
-
-    info {
-      eks_info {
-        namespace = "${local.flink_team}-ns"
-      }
-    }
-  }
-
-  name = "emr-eks-flink-cluster"
-}
-
-
 
 # deploy a helm chart for flink-kubernetes-operator
 resource "helm_release" "flink_kubernetes_operator" {
-  depends_on = [module.flink_irsa]
-  name       = "flink-kubernetes-operator"
-  repository = "oci://public.ecr.aws/emr-on-eks"
-  chart      = "flink-kubernetes-operator"
-  namespace  = "${local.flink_team}-ns"
+
+  depends_on       = [module.flink_irsa, module.flink_irsa_operator]
+  name             = "flink-kubernetes-operator"
+  repository       = "oci://public.ecr.aws/emr-on-eks"
+  chart            = "flink-kubernetes-operator"
+  create_namespace = true
+  namespace        = "${local.flink_operator}-ns"
+
   set {
     name  = "watchNamespace"
     value = "${local.flink_team}-ns"
   }
-
+  set {
+    name  = "emrContainers.operatorExecutionRoleArn"
+    value = module.flink_irsa_operator.iam_role_arn
+  }
 
   set {
     name  = "env.AWS_REGION"
     value = var.region
   }
 
-
   # set the version
   set {
     name  = "image.tag"
diff --git a/streaming/emr-eks-flink/variables.tf b/streaming/emr-eks-flink/variables.tf
index 22ab2852e..d9204d6ad 100644
--- a/streaming/emr-eks-flink/variables.tf
+++ b/streaming/emr-eks-flink/variables.tf
@@ -9,3 +9,9 @@ variable "region" {
   default     = "us-west-2"
   description = "Region for deployment"
 }
+
+variable "name" {
+  description = "Name of the VPC and EKS Cluster"
+  default     = "emr-eks-flink"
+  type        = string
+}

From 1ca710bb75213f479d62c20160ac676623b3b757 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Wed, 3 Apr 2024 23:01:48 -0700
Subject: [PATCH 08/25] karpenter related changes

---
 streaming/emr-eks-flink/README.md             |   3 +-
 streaming/emr-eks-flink/addons.tf             |  71 ++++++++++-
 streaming/emr-eks-flink/data.tf               |   1 +
 .../examples/basic-example-app-cluster.yaml   |   7 +-
 .../examples/basic-flink-client.yaml          |  20 ++++
 .../cluster-autoscaler/flink-sample-job.yaml  |  72 ++++++++++++
 .../examples/karpenter/flink-sample-job.yaml  |  63 ++++++++++
 streaming/emr-eks-flink/flink-team.tf         |   2 +-
 .../flink-compute-optimized-provisioner.yaml  | 111 ------------------
 streaming/emr-eks-flink/main.tf               |   2 +-
 streaming/emr-eks-flink/outputs.tf            |   6 +-
 streaming/emr-eks-flink/providers.tf          |   4 +
 12 files changed, 241 insertions(+), 121 deletions(-)
 create mode 100644 streaming/emr-eks-flink/examples/basic-flink-client.yaml
 create mode 100644 streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
 create mode 100644 streaming/emr-eks-flink/examples/karpenter/flink-sample-job.yaml
 delete mode 100644 streaming/emr-eks-flink/karpenter-provisioners/flink-compute-optimized-provisioner.yaml

diff --git a/streaming/emr-eks-flink/README.md b/streaming/emr-eks-flink/README.md
index 9604d2ef8..c9ee833e5 100644
--- a/streaming/emr-eks-flink/README.md
+++ b/streaming/emr-eks-flink/README.md
@@ -27,7 +27,7 @@
 | <a name="module_eks"></a> [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.15 |
 | <a name="module_eks_blueprints_addons"></a> [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 |
 | <a name="module_eks_data_addons"></a> [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | ~> 1.0 |
-| <a name="module_flink_irsa"></a> [flink\_irsa](#module\_flink\_irsa) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
+| <a name="module_flink_irsa_jobs"></a> [flink\_irsa\_jobs](#module\_flink\_irsa\_jobs) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
 | <a name="module_flink_irsa_operator"></a> [flink\_irsa\_operator](#module\_flink\_irsa\_operator) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
 | <a name="module_s3_bucket"></a> [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 3.0 |
 | <a name="module_vpc"></a> [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | 5.5.1 |
@@ -58,5 +58,6 @@
 
 | Name | Description |
 |------|-------------|
+| <a name="output_flink_jobs_role_arn"></a> [flink\_jobs\_role\_arn](#output\_flink\_jobs\_role\_arn) | IAM linked role for the flink job |
 | <a name="output_flink_operator_role_arn"></a> [flink\_operator\_role\_arn](#output\_flink\_operator\_role\_arn) | IAM linked role for the flink operator |
 <!-- END_TF_DOCS -->
diff --git a/streaming/emr-eks-flink/addons.tf b/streaming/emr-eks-flink/addons.tf
index 90b80a575..6298ff8c8 100644
--- a/streaming/emr-eks-flink/addons.tf
+++ b/streaming/emr-eks-flink/addons.tf
@@ -105,15 +105,21 @@ module "eks_blueprints_addons" {
   enable_karpenter                  = true
   karpenter_enable_spot_termination = true
   karpenter = {
+    chart_version       = "v0.33.1"
     repository_username = data.aws_ecrpublic_authorization_token.token.user_name
     repository_password = data.aws_ecrpublic_authorization_token.token.password
   }
   karpenter_node = {
+    iam_role_use_name_prefix = false
+    iam_role_name            = "${local.name}-karpenter-node"
+
     iam_role_additional_policies = {
       AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
     }
   }
 
+
+
   #---------------------------------------
   # CloudWatch metrics for EKS
   #---------------------------------------
@@ -129,7 +135,7 @@ module "eks_blueprints_addons" {
   #---------------------------------------
   # AWS for FluentBit - DaemonSet
   #---------------------------------------
-  # Fluentbit is required to stream the logs to S3  when EMR Spark Operator is enabled
+  # Fluentbit is required to stream the logs to S3  when EMR Flink Operator is enabled
   enable_aws_for_fluentbit = true
   aws_for_fluentbit_cw_log_group = {
     use_name_prefix   = false
@@ -156,9 +162,66 @@ module "eks_blueprints_addons" {
 #---------------------------------------------------------------
 module "eks_data_addons" {
   source  = "aws-ia/eks-data-addons/aws"
-  version = "~> 1.0" # ensure to update this to the latest/desired version
+  version = "~> 1.30" # ensure to update this to the latest/desired version
+
+  oidc_provider_arn          = module.eks.oidc_provider_arn
+  enable_karpenter_resources = true
+  karpenter_resources_helm_config = {
+    flink-compute-optimized = {
+      values = [
+        <<-EOT
+      name: flink-compute-optimized
+      clusterName: ${module.eks.cluster_name}
+      ec2NodeClass:
+        karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
+        subnetSelectorTerms:
+          tags:
+            Name: "${module.eks.cluster_name}-private*"
+        securityGroupSelectorTerms:
+          tags:
+            Name: ${module.eks.cluster_name}-node
+        instanceStorePolicy: RAID0
+
+      nodePool:
+        labels:
+          - type: karpenter
+          - NodeGroupType: FlinkComputeOptimized
+          - multiArch: Flink
+        requirements:
+          - key: "karpenter.sh/capacity-type"
+            operator: In
+            values: ["spot", "on-demand"]
+          - key: "kubernetes.io/arch"
+            operator: In
+            values: ["amd64"]
+          - key: "karpenter.k8s.aws/instance-category"
+            operator: In
+            values: ["c"]
+          - key: "karpenter.k8s.aws/instance-family"
+            operator: In
+            values: ["c5d"]
+          - key: "karpenter.k8s.aws/instance-cpu"
+            operator: In
+            values: ["4", "8", "16", "36"]
+          - key: "karpenter.k8s.aws/instance-hypervisor"
+            operator: In
+            values: ["nitro"]
+          - key: "karpenter.k8s.aws/instance-generation"
+            operator: Gt
+            values: ["2"]
+          
+        limits:
+          cpu: 1000
+        disruption:
+          consolidationPolicy: WhenEmpty
+          consolidateAfter: 30s
+          expireAfter: 720h
+        weight: 100
+      EOT
+      ]
+    }
+  }
 
-  oidc_provider_arn = module.eks.oidc_provider_arn
+}
 
 
-}
diff --git a/streaming/emr-eks-flink/data.tf b/streaming/emr-eks-flink/data.tf
index 8e7ddffe5..ff9ce09aa 100644
--- a/streaming/emr-eks-flink/data.tf
+++ b/streaming/emr-eks-flink/data.tf
@@ -3,6 +3,7 @@ data "aws_eks_cluster_auth" "this" {
   name = module.eks.cluster_name
 }
 
+
 data "aws_ecrpublic_authorization_token" "token" {
   provider = aws.ecr
 }
diff --git a/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
index 03bd0c499..7f189f6d5 100644
--- a/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
@@ -28,7 +28,7 @@ spec:
   # add service account
   # serviceAccount: flink-team-a-sa
 
-  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240401212643803900000001
+  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240402170622025900000001
   emrReleaseLabel: "emr-7.0.0-flink-latest"
   jobManager:
     # Replace with s3 bucket in your own account
@@ -36,14 +36,17 @@ spec:
     resource:
       memory: "2048m"
       cpu: 1
+
   taskManager:
+    replicas: 2
     resource:
       memory: "2048m"
       cpu: 1
+
   job:
     # if you have your job jar in S3 bucket you can use that path as well
     jarURI: local:///opt/flink/examples/streaming/StateMachineExample.jar
-    parallelism: 2
+    parallelism: 4
     upgradeMode: savepoint
     savepointTriggerNonce: 0
   monitoringConfiguration:
diff --git a/streaming/emr-eks-flink/examples/basic-flink-client.yaml b/streaming/emr-eks-flink/examples/basic-flink-client.yaml
new file mode 100644
index 000000000..ba5e470ad
--- /dev/null
+++ b/streaming/emr-eks-flink/examples/basic-flink-client.yaml
@@ -0,0 +1,20 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ssh-pod
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ssh-pod
+  template:
+    metadata:
+      labels:
+        app: ssh-pod
+    spec:
+      containers:
+      - name: ssh-server
+        image: linuxserver/openssh-server
+        ports:
+        - containerPort: 22 # SSH server runs on port 22
+        # Add additional configurations as needed, such as volume mounts, environment variables, etc.
diff --git a/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml b/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
new file mode 100644
index 000000000..789eaa2f3
--- /dev/null
+++ b/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
@@ -0,0 +1,72 @@
+---
+apiVersion: flink.apache.org/v1beta1
+kind: FlinkDeployment
+metadata:
+  name: basic-example
+  namespace: flink-team-a-ns
+spec:
+  image: flink:1.16
+  flinkVersion: v1_16
+  flinkConfiguration:
+    taskmanager.numberOfTaskSlots: "2"
+    state.savepoints.dir: file:///flink/data/checkpoint/savepoints
+    state.checkpoints.dir: file:///flink/data/checkpoint/checkpoints
+    high-availability: org.apache.flink.kubernetes.highavailability.KubernetesHaServicesFactory
+    high-availability.storageDir: file:///flink/data/checkpoint/ha
+    job-result-store.storage-path: file:///flink/data/checkpoint/job-result-store
+    # restartNonce: 123 # Application restarts without spec change
+    kubernetes.operator.periodic.savepoint.interval: 1h # periodic savepoint triggering
+    kubernetes.operator.deployment.rollback.enabled: "true"
+    env.log.dir: "/flink/data/log"
+    env.taskmanager.tmp.dirs: "/flink/data/tmp"
+  serviceAccount: flink-team-a-sa
+  # Pod templates permit customization of the Flink job and task manager pods,
+  # for example to specify volume mounts, ephemeral storage, sidecar containers etc.
+  #  A common pod template may hold the settings that apply to both job and task manager, like volumeMounts
+  podTemplate:
+    apiVersion: v1
+    kind: Pod
+    metadata:
+      name: pod-template
+    spec:
+      nodeSelector:
+        NodeGroupType: "flink-on-demand-ca"
+      tolerations:
+        - key: "flink-on-demand-ca"
+          operator: "Exists"
+          effect: "NoSchedule"
+      initContainers:
+        - name: flink-ssd-volume-permissions
+          image: public.ecr.aws/y4g4v0z7/busybox
+          command: [ 'sh', '-c', 'chown -R 9999 /local1' ]
+          volumeMounts:
+            - mountPath: /local1
+              name: flink-ssd-volume
+      containers:
+        # Do not change the main container name
+        - name: flink-main-container
+          volumeMounts:
+            - mountPath: /flink/data/checkpoint
+              name: flink-ssd-volume
+            - mountPath: /flink/data/tmp
+              name: flink-ssd-volume
+            - mountPath: /flink/data/log
+              name: flink-ssd-volume
+      volumes:
+        - name: flink-ssd-volume
+          hostPath:
+            path: /local1
+            type: Directory
+  jobManager:
+    resource:
+      memory: "2048m"
+      cpu: 1
+  taskManager:
+    replicas: 2
+    resource:
+      memory: "2048m"
+      cpu: 1
+  job:
+    jarURI: local:///opt/flink/examples/streaming/StateMachineExample.jar
+    parallelism: 2
+    upgradeMode: stateless # last-state
diff --git a/streaming/emr-eks-flink/examples/karpenter/flink-sample-job.yaml b/streaming/emr-eks-flink/examples/karpenter/flink-sample-job.yaml
new file mode 100644
index 000000000..dba5795e4
--- /dev/null
+++ b/streaming/emr-eks-flink/examples/karpenter/flink-sample-job.yaml
@@ -0,0 +1,63 @@
+---
+apiVersion: flink.apache.org/v1beta1
+kind: FlinkDeployment
+metadata:
+  name: sample-job
+  namespace: flink-team-a-ns
+spec:
+  
+  flinkVersion: v1_17
+  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240402170622025900000001
+  emrReleaseLabel: "emr-7.0.0-flink-latest"
+  flinkConfiguration:
+    taskmanager.numberOfTaskSlots: "2"
+    # Autotuning parameters
+    kubernetes.operator.job.autoscaler.autotune.enable: "true"
+    kubernetes.operator.job.autoscaler.autotune.metrics.history.max.count: "2"
+    kubernetes.operator.job.autoscaler.autotune.metrics.restart.count: "1"
+    metrics.job.status.enable: TOTAL_TIME
+
+    # Autoscaler parameters
+    kubernetes.operator.job.autoscaler.enabled: "true"
+    kubernetes.operator.job.autoscaler.scaling.enabled: "true"
+    kubernetes.operator.job.autoscaler.stabilization.interval: "5s"
+    kubernetes.operator.job.autoscaler.metrics.window: "1m"
+
+    jobmanager.scheduler: adaptive
+    # Replace with s3 bucket in your own account
+    state.checkpoints.dir: s3://emr-flink-data/checkpoints
+    state.savepoints.dir: s3://emr-flink-data/savepoints
+
+
+  # Pod templates permit customization of the Flink job and task manager pods,
+  # for example to specify volume mounts, ephemeral storage, sidecar containers etc.
+  #  A common pod template may hold the settings that apply to both job and task manager, like volumeMounts
+  podTemplate:
+    apiVersion: v1
+    kind: Pod
+    metadata:
+      name: pod-template
+    spec:
+      nodeSelector:
+        NodeGroupType: "FlinkComputeOptimized"
+      
+
+  jobManager:
+    storageDir: s3://emr-flink-data/data/basic-example-app-cluster/jobmanager
+    resource:
+      memory: "2048m"
+      cpu: 1
+  taskManager:
+    replicas: 2
+    resource:
+      memory: "2048m"
+      cpu: 1
+  job:
+    jarURI: local:///opt/flink/examples/streaming/StateMachineExample.jar
+    parallelism: 2
+    upgradeMode: stateless # last-state
+
+
+  monitoringConfiguration:
+    cloudWatchMonitoringConfiguration:
+       logGroupName: LOG_GROUP_NAME
diff --git a/streaming/emr-eks-flink/flink-team.tf b/streaming/emr-eks-flink/flink-team.tf
index dada17cc9..4a34c9420 100644
--- a/streaming/emr-eks-flink/flink-team.tf
+++ b/streaming/emr-eks-flink/flink-team.tf
@@ -20,7 +20,7 @@ resource "aws_iam_policy" "flink" {
 #---------------------------------------------------------------
 # IRSA for flink pods for "flink-team-a"
 #---------------------------------------------------------------
-module "flink_irsa" {
+module "flink_irsa_jobs" {
   source  = "aws-ia/eks-blueprints-addon/aws"
   version = "~> 1.0"
 
diff --git a/streaming/emr-eks-flink/karpenter-provisioners/flink-compute-optimized-provisioner.yaml b/streaming/emr-eks-flink/karpenter-provisioners/flink-compute-optimized-provisioner.yaml
deleted file mode 100644
index aa1934178..000000000
--- a/streaming/emr-eks-flink/karpenter-provisioners/flink-compute-optimized-provisioner.yaml
+++ /dev/null
@@ -1,111 +0,0 @@
----
-apiVersion: karpenter.sh/v1beta1
-kind: NodePool # Previously kind: Provisioner
-metadata:
-  name: flink-compute-optimized
-  namespace: karpenter # Same namespace as Karpenter add-on installed
-spec:
-  template:
-    metadata:
-      labels:
-        type: karpenter
-        provisioner: flink-compute-optimized
-        NodeGroupType: flinkComputeOptimized
-    spec:
-      nodeClassRef:
-        name: flink-compute-optimized
-      requirements:
-        - key: "topology.kubernetes.io/zone"
-          operator: In
-          values: [${azs}a] #Update the correct region and zones
-        - key: "karpenter.sh/capacity-type"
-          operator: In
-          values: ["spot", "on-demand"]
-        - key: "kubernetes.io/arch"
-          operator: In
-          values: ["amd64"]
-        - key: "karpenter.k8s.aws/instance-category"
-          operator: In
-          values: ["c"]
-        - key: "karpenter.k8s.aws/instance-family"
-          operator: In
-          values: ["c5d"]
-        - key: "karpenter.k8s.aws/instance-cpu"
-          operator: In
-          values: ["4", "8", "16", "36"]
-        - key: "karpenter.k8s.aws/instance-hypervisor"
-          operator: In
-          values: ["nitro"]
-        - key: "karpenter.k8s.aws/instance-generation"
-          operator: Gt
-          values: ["2"]
-  limits:
-    cpu: 1000
-  disruption:
-    # Describes which types of Nodes Karpenter should consider for consolidation
-    # If using 'WhenUnderutilized', Karpenter will consider all nodes for consolidation and attempt to remove or replace Nodes when it discovers that the Node is underutilized and could be changed to reduce cost
-    # If using `WhenEmpty`, Karpenter will only consider nodes for consolidation that contain no workload pods
-    consolidationPolicy: WhenEmpty
-    # The amount of time Karpenter should wait after discovering a consolidation decision
-    # This value can currently only be set when the consolidationPolicy is 'WhenEmpty'
-    # You can choose to disable consolidation entirely by setting the string value 'Never' here
-    consolidateAfter: 30s
-    # The amount of time a Node can live on the cluster before being removed
-    # Avoiding long-running Nodes helps to reduce security vulnerabilities as well as to reduce the chance of issues that can plague Nodes with long uptimes such as file fragmentation or memory leaks from system processes
-    # You can choose to disable expiration entirely by setting the string value 'Never' here
-    expireAfter: 720h
-
-  # Priority given to the NodePool when the scheduler considers which NodePool
-  # to select. Higher weights indicate higher priority when comparing NodePools.
-  # Specifying no weight is equivalent to specifying a weight of 0.
-  weight: 10
-
-
-
-# NOTE: Multiple NodePools may point to the same EC2NodeClass.
----
-apiVersion: karpenter.k8s.aws/v1beta1
-kind: EC2NodeClass # Previously kind: AWSNodeTemplate
-metadata:
-  name: flink-compute-optimized
-  namespace: karpenter
-spec:
-  amiFamily: AL2
-  blockDeviceMappings:
-    - deviceName: /dev/xvda
-      ebs:
-        volumeSize: 50Gi
-        volumeType: gp3
-        encrypted: true
-        deleteOnTermination: true
-  role: "${eks_cluster_id}-karpenter-node"
-  subnetSelectorTerms:
-    - tags: # Update the correct region and zones
-        Name: "${eks_cluster_id}-private*"
-  securityGroupSelectorTerms:
-    - name: "${eks_cluster_id}-node*"
-  userData: |
-    MIME-Version: 1.0
-    Content-Type: multipart/mixed; boundary="BOUNDARY"
-
-    --BOUNDARY
-    Content-Type: text/x-shellscript; charset="us-ascii"
-
-    cat <<-EOF > /etc/profile.d/bootstrap.sh
-    #!/bin/sh
-
-
-    # Configure the NVMe volumes in RAID0 configuration in the bootstrap.sh call.
-    # https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh#L35
-    # This will create a RAID volume and mount it at /mnt/k8s-disks/0
-    #   then mount that volume to /var/lib/kubelet, /var/lib/containerd, and /var/log/pods
-    #   this allows the container daemons and pods to write to the RAID0 by default without needing PersistentVolumes
-    export LOCAL_DISKS='raid0'
-    EOF
-
-    # Source extra environment variables in bootstrap script
-    sed -i '/^set -o errexit/a\\nsource /etc/profile.d/bootstrap.sh' /etc/eks/bootstrap.sh
-
-    --BOUNDARY--
-  tags:
-    InstanceType: "flink-compute-optimized"    # optional, add tags for your own use
diff --git a/streaming/emr-eks-flink/main.tf b/streaming/emr-eks-flink/main.tf
index f8f1767b2..0ba024260 100644
--- a/streaming/emr-eks-flink/main.tf
+++ b/streaming/emr-eks-flink/main.tf
@@ -174,7 +174,7 @@ module "vpc" {
 # deploy a helm chart for flink-kubernetes-operator
 resource "helm_release" "flink_kubernetes_operator" {
 
-  depends_on       = [module.flink_irsa, module.flink_irsa_operator]
+  depends_on       = [module.flink_irsa_jobs, module.flink_irsa_operator]
   name             = "flink-kubernetes-operator"
   repository       = "oci://public.ecr.aws/emr-on-eks"
   chart            = "flink-kubernetes-operator"
diff --git a/streaming/emr-eks-flink/outputs.tf b/streaming/emr-eks-flink/outputs.tf
index 5cb6efad5..a65b6a2f5 100644
--- a/streaming/emr-eks-flink/outputs.tf
+++ b/streaming/emr-eks-flink/outputs.tf
@@ -1,5 +1,9 @@
 # create output for flink operator role arn
+output "flink_jobs_role_arn" {
+  value       = trimspace(module.flink_irsa_jobs.iam_role_arn)
+  description = "IAM linked role for the flink job"
+}
 output "flink_operator_role_arn" {
-  value       = module.flink_irsa.iam_role_arn
+  value       = module.flink_irsa_operator.iam_role_arn
   description = "IAM linked role for the flink operator"
 }
diff --git a/streaming/emr-eks-flink/providers.tf b/streaming/emr-eks-flink/providers.tf
index 2dab41db8..d7b000730 100644
--- a/streaming/emr-eks-flink/providers.tf
+++ b/streaming/emr-eks-flink/providers.tf
@@ -1,7 +1,11 @@
+
 provider "aws" {
   region = local.region
+
 }
 
+
+
 # ECR always authenticates with `us-east-1` region
 # Docs -> https://docs.aws.amazon.com/AmazonECR/latest/public/public-registries.html
 provider "aws" {

From 6a1cf6cd770e2c4395a0572118916d80b303ee6c Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Wed, 3 Apr 2024 23:02:54 -0700
Subject: [PATCH 09/25] doc added

---
 .../streaming-platforms/emr-eks-flink.md      | 338 ++++++++++++++++++
 1 file changed, 338 insertions(+)
 create mode 100644 website/docs/blueprints/streaming-platforms/emr-eks-flink.md

diff --git a/website/docs/blueprints/streaming-platforms/emr-eks-flink.md b/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
new file mode 100644
index 000000000..8a205f62c
--- /dev/null
+++ b/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
@@ -0,0 +1,338 @@
+---
+sidebar_position: 6
+title: EMR on EKS with Flink Streaming
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+import CollapsibleContent from '../../../src/components/CollapsibleContent';
+
+:::info
+Please note that we are working on adding more features to this blueprint such as Flink examples with multiple connectors, Ingress for WebUI, Grafana dashboards etc.
+:::
+
+## Introduction to Apache Flink
+[Apache Flink](https://flink.apache.org/) is an open-source, unified stream processing and batch processing framework that was designed to process large amounts of data. It provides fast, reliable, and scalable data processing with fault tolerance and exactly-once semantics.
+Some of the key features of Flink are:
+- **Distributed Processing**: Flink is designed to process large volumes of data in a distributed fashion, making it horizontally scalable and fault-tolerant.
+- **Stream Processing and Batch Processing**: Flink provides APIs for both stream processing and batch processing. This means you can process data in real-time, as it's being generated, or process data in batches.
+- **Fault Tolerance**: Flink has built-in mechanisms for handling node failures, network partitions, and other types of failures.
+- **Exactly-once Semantics**: Flink supports exactly-once processing, which ensures that each record is processed exactly once, even in the presence of failures.
+- **Low Latency**: Flink's streaming engine is optimized for low-latency processing, making it suitable for use cases that require real-time processing of data.
+- **Extensibility**: Flink provides a rich set of APIs and libraries, making it easy to extend and customize to fit your specific use case.
+
+## Architecture
+
+Flink Architecture high level design with EKS.
+
+![Flink Design UI](img/flink-design.png)
+
+## EMR on EKS Flink Kubernetes Operator
+Amazon EMR releases 6.13.0 and higher support Amazon EMR on EKS with Apache Flink, or the ![EMR Flink Kubernetes operator](https://gallery.ecr.aws/emr-on-eks/flink-kubernetes-operator), as a job submission model for Amazon EMR on EKS. With Amazon EMR on EKS with Apache Flink, you can deploy and manage Flink applications with the Amazon EMR release runtime on your own Amazon EKS clusters. Once you deploy the Flink Kubernetes operator in your Amazon EKS cluster, you can directly submit Flink applications with the operator. The operator manages the lifecycle of Flink applications.
+1. Running, suspending and deleting applications
+2. Stateful and stateless application upgrades
+3. Triggering and managing savepoints
+4. Handling errors, rolling-back broken upgrades
+
+In addition to the above features, EMR Flink Kubernetes operator provides the following additional capabilities:
+1. Launching Flink application using jars in Amazon S3
+2. Monitoring integration with Amazon S3 and Amazon CloudWatch and container log rotation.
+3. Automatically tunes Autoscaler configurations based on historical trends of observed metrics.
+4. Faster Flink Job Restart during scaling or Failure Recovery
+5. IRSA (IAM Roles for Service Accounts) Native Integration
+6. Apache Airflow Integration
+7. Pyflink support
+   
+
+Flink Operator defines two types of Custom Resources(CR) which are the extensions of the Kubernetes API.
+
+<Tabs>
+<TabItem value="FlinkDeployment" label="FlinkDeployment">
+
+
+**FlinkDeployment**
+- FlinkDeployment CR defines **Flink Application** and **Session Cluster** deployments.
+- Application deployments manage a single job deployment on a dedicated Flink cluster in Application mode.
+- Session clusters allows you to run multiple Flink Jobs on an existing Session cluster.
+
+    <details>
+    <summary>FlinkDeployment in Application modes, Click to toggle content!</summary>
+
+    ```yaml
+    apiVersion: flink.apache.org/v1beta1
+    kind: FlinkDeployment
+    metadata:
+    namespace: default
+    name: basic-example
+    spec:
+    image: flink:1.16
+    flinkVersion: v1_16
+    flinkConfiguration:
+        taskmanager.numberOfTaskSlots: "2"
+    serviceAccount: flink
+    jobManager:
+        resource:
+        memory: "2048m"
+        cpu: 1
+    taskManager:
+        resource:
+        memory: "2048m"
+        cpu: 1
+    job:
+        jarURI: local:///opt/flink/examples/streaming/StateMachineExample.jar
+        parallelism: 2
+        upgradeMode: stateless
+        state: running
+    ```
+    </details>
+
+</TabItem>
+
+<TabItem value="FlinkSessionJob" label="FlinkSessionJob">
+
+**FlinkSessionJob**
+- The `FlinkSessionJob` CR defines the session job on the **Session cluster** and each Session cluster can run multiple `FlinkSessionJob`.
+- Session deployments manage Flink Session clusters without providing any job management for it
+
+    <details>
+    <summary>FlinkSessionJob using an existing "basic-session-cluster" session cluster deployment</summary>
+
+    ```yaml
+    apiVersion: flink.apache.org/v1beta1
+    kind: FlinkSessionJob
+    metadata:
+    name: basic-session-job-example
+    spec:
+    deploymentName: basic-session-cluster
+    job:
+        jarURI: https://repo1.maven.org/maven2/org/apache/flink/flink-examples-streaming_2.12/1.15.3/flink-examples-streaming_2.12-1.15.3-TopSpeedWindowing.jar
+        parallelism: 4
+        upgradeMode: stateless
+    ```
+
+    </details>
+
+</TabItem>
+</Tabs>
+
+:::info
+Session clusters use a similar spec to Application clusters with the only difference that `job` is not defined in the yaml spec.
+:::
+
+:::info
+According to the Flink documentation, it is recommended to use FlinkDeployment in Application mode for production environments.
+:::
+
+On top of the deployment types the Flink Kubernetes Operator also supports two modes of deployments: `Native` and `Standalone`.
+
+<Tabs>
+<TabItem value="Native" label="Native">
+
+**Native**
+
+- Native cluster deployment is the default deployment mode and uses Flink’s built in integration with Kubernetes when deploying the cluster.
+- Flink cluster communicates directly with Kubernetes and allows it to manage Kubernetes resources, e.g. dynamically allocate and de-allocate TaskManager pods.
+- Flink Native can be useful for advanced users who want to build their own cluster management system or integrate with existing management systems.
+- Flink Native allows for more flexibility in terms of job scheduling and execution.
+- For standard Operator use, running your own Flink Jobs in Native mode is recommended.
+
+```yaml
+apiVersion: flink.apache.org/v1beta1
+kind: FlinkDeployment
+...
+spec:
+...
+mode: native
+```
+</TabItem>
+
+<TabItem value="Standalone" label="Standalone">
+
+**Standalone**
+
+- Standalone cluster deployment simply uses Kubernetes as an orchestration platform that the Flink cluster is running on.
+- Flink is unaware that it is running on Kubernetes and therefore all Kubernetes resources need to be managed externally, by the Kubernetes Operator.
+
+    ```yaml
+    apiVersion: flink.apache.org/v1beta1
+    kind: FlinkDeployment
+    ...
+    spec:
+    ...
+    mode: standalone
+    ```
+
+</TabItem>
+</Tabs>
+
+## Best Practices for Running Flink Jobs on Kubernetes
+To get the most out of Flink on Kubernetes, here are some best practices to follow:
+
+- **Use the Kubernetes Operator**: Install and use the Flink Kubernetes Operator to automate the deployment and management of Flink clusters on Kubernetes.
+- **Deploy in dedicated namespaces**: Create a separate namespace for the Flink Kubernetes Operator and another one for Flink jobs/workloads. This ensures that the Flink jobs are isolated and have their own resources.
+- **Use high-quality storage**: Store Flink checkpoints and savepoints in high-quality storage such as Amazon S3 or another durable external storage. These storage options are reliable, scalable, and offer durability for large volumes of data.
+- **Optimize resource allocation**: Allocate sufficient resources to Flink jobs to ensure optimal performance. This can be done by setting resource requests and limits for Flink containers.
+- **Proper network isolation**: Use Kubernetes Network Policies to isolate Flink jobs from other workloads running on the same Kubernetes cluster. This ensures that Flink jobs have the required network access without being impacted by other workloads.
+- **Configure Flink optimally**: Tune Flink settings according to your use case. For example, adjust Flink's parallelism settings to ensure that Flink jobs are scaled appropriately based on the size of the input data.
+- **Use checkpoints and savepoints**: Use checkpoints for periodic snapshots of Flink application state and savepoints for more advanced use cases such as upgrading or downgrading the application.
+- **Store checkpoints and savepoints in the right places**: Store checkpoints in distributed file systems or key-value stores like Amazon S3 or another durable external storage. Store savepoints in a durable external storage like Amazon S3.
+
+## Flink Upgrade
+Flink Operator provides three upgrade modes for Flink jobs. Checkout the [Flink upgrade docs](https://nightlies.apache.org/flink/flink-kubernetes-operator-docs-main/docs/custom-resource/job-management/#stateful-and-stateless-application-upgrades) for up-to-date information.
+
+1. **stateless**: Stateless application upgrades from empty state
+2. **last-state**: Quick upgrades in any application state (even for failing jobs), does not require a healthy job as it always uses the latest checkpoint information. Manual recovery may be necessary if HA metadata is lost.
+3. **savepoint**: Use savepoint for upgrade, providing maximal safety and possibility to serve as backup/fork point. The savepoint will be created during the upgrade process. Note that the Flink job needs to be running to allow the savepoint to get created. If the job is in an unhealthy state, the last checkpoint will be used (unless kubernetes.operator.job.upgrade.last-state-fallback.enabled is set to false). If the last checkpoint is not available, the job upgrade will fail.
+
+:::info
+`last-state` or `savepoint` are recommended modes for production
+:::
+
+
+<CollapsibleContent header={<h2><span>Deploying the Solution</span></h2>}>
+
+In this [example](https://github.com/awslabs/data-on-eks/tree/main/streaming/flink), you will provision the following resources required to run Flink Jobs with Flink Operator and Apache YuniKorn.
+
+This example deploys an EKS Cluster running the Flink Operator into a new VPC.
+
+- Creates a new sample VPC, 2 Private Subnets and 2 Public Subnets
+- Creates Internet gateway for Public Subnets and NAT Gateway for Private Subnets
+- Creates EKS Cluster Control plane with public endpoint (for demo reasons only) with core managed node group, on-demand node group and Spot node group for Flink workloads
+- Deploys Metrics server, Cluster Autoscaler, Apache YuniKorn, Karpenter, Grafana, AMP and Prometheus server
+- Deploys Cert Manager and EMR Flink Operator. Flink Operator has dependency on Cert Manager
+- Creates a new Flink Data team resources that includes namespace, IRSA, Role and Role binding
+- Deploys three Karpenter provisioners for different compute types
+
+### Prerequisites
+
+Ensure that you have installed the following tools on your machine.
+
+1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html)
+2. [kubectl](https://Kubernetes.io/docs/tasks/tools/)
+3. [terraform](https://learn.hashicorp.com/tutorials/terraform/install-cli)
+
+### Deploy
+
+Clone the repository.
+
+```bash
+git clone https://github.com/awslabs/data-on-eks.git
+```
+
+Navigate into Flink's Terraform template directory and run `install.sh` script.
+
+```bash
+cd data-on-eks/streaming/emr-flink-eks
+chmod +x install.sh
+./install.sh
+```
+Verify the cluster status
+
+```bash
+    ➜ kubectl get nodes -A
+    NAME                                         STATUS   ROLES    AGE   VERSION
+    ip-10-1-160-150.us-west-2.compute.internal   Ready    <none>   24h   v1.24.11-eks-a59e1f0
+    ip-10-1-169-249.us-west-2.compute.internal   Ready    <none>   6d    v1.24.11-eks-a59e1f0
+    ip-10-1-69-244.us-west-2.compute.internal    Ready    <none>   6d    v1.24.11-eks-a59e1f0
+
+    ➜  ~ kubectl get pods -n flink-kubernetes-operator-ns
+    NAME                                         READY   STATUS    RESTARTS   AGE
+    flink-kubernetes-operator-555776785f-pzx8p   2/2     Running   0          4h21m
+    flink-kubernetes-operator-555776785f-z5jpt   2/2     Running   0          4h18m
+
+    ➜  ~ kubectl get pods -n cert-manager
+    NAME                                      READY   STATUS    RESTARTS   AGE
+    cert-manager-77fc7548dc-dzdms             1/1     Running   0          24h
+    cert-manager-cainjector-8869b7ff7-4w754   1/1     Running   0          24h
+    cert-manager-webhook-586ddf8589-g6s87     1/1     Running   0          24h
+```
+
+To list all the resources created for Flink team to run Flink jobs using this namespace
+
+```bash
+    ➜  ~ kubectl get all,role,rolebinding,serviceaccount --namespace flink-team-a-ns
+    NAME                                               CREATED AT
+    role.rbac.authorization.k8s.io/flink-team-a-role   2023-04-06T13:17:05Z
+
+    NAME                                                              ROLE                     AGE
+    rolebinding.rbac.authorization.k8s.io/flink-team-a-role-binding   Role/flink-team-a-role   22h
+
+    NAME                             SECRETS   AGE
+    serviceaccount/default           0         22h
+    serviceaccount/flink-team-a-sa   0         22h
+```
+
+</CollapsibleContent>
+
+
+<CollapsibleContent header={<h2><span>Execute Sample Flink job with Karpenter</span></h2>}>
+
+Navigate to example directory and submit the Flink job.
+
+```bash
+cd data-on-eks/streaming/emr-eks-flink/examples
+```
+Get the role arn linked to the job execution service account.
+```bash
+terraform output flink_jobs_role_arn
+
+"arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240402170622025900000001"
+```
+
+Open the basic-example-app-cluster.yaml in any editor and replace the place holder for **REPLACE_WITH_JOB_EXECUTION_ROLE_ARN** with the role ARN that you got from the terraform output command and save the file.
+
+Deploy the job by running the kubectl deply command.
+
+```bash
+kubectl apply -f basic-example-app-cluster.yaml
+```
+
+Monitor the job status using the below command.
+You should see the new nodes triggered by the karpenter and the YuniKorn will schedule one Job manager pod and one Taskmanager pods on this node.
+
+```bash
+kubectl get deployments -n flink-team-a-ns
+NAME                              READY   UP-TO-DATE   AVAILABLE   AGE
+basic-example-app-cluster-flink   2/2     2            2           3h6m
+
+kubectl get pods -n flink-team-a-ns
+NAME                                               READY   STATUS    RESTARTS   AGE
+basic-example-app-cluster-flink-7c7d9c6fd9-cdfmd   2/2     Running   0          3h7m
+basic-example-app-cluster-flink-7c7d9c6fd9-pjxj2   2/2     Running   0          3h7m
+basic-example-app-cluster-flink-taskmanager-1-1    2/2     Running   0          3h6m
+
+kubectl get services -n flink-team-a-ns
+NAME                                   TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)    AGE
+basic-example-app-cluster-flink-rest   ClusterIP   172.20.17.152   <none>        8081/TCP   3h7m
+```
+
+To access the Flink WebUI for the job run this command locally.
+
+```bash
+kubectl port-forward svc/basic-example-app-cluster-flink-rest 8081 -n flink-team-a-ns
+```
+
+![Flink Job UI](img/flink1.png)
+![Flink Job UI](img/flink2.png)
+![Flink Job UI](img/flink3.png)
+![Flink Job UI](img/flink4.png)
+![Flink Job UI](img/flink5.png)
+
+</CollapsibleContent>
+
+
+<CollapsibleContent header={<h2><span>Cleanup</span></h2>}>
+
+This script will cleanup the environment using `-target` option to ensure all the resources are deleted in correct order.
+
+```bash
+cd .. && chmod +x cleanup.sh
+./cleanup.sh
+```
+
+</CollapsibleContent>
+
+:::caution
+To avoid unwanted charges to your AWS account, delete all the AWS resources created during this deployment
+:::
+

From 44a100b4828b6a6e5c70f4ab9080c6bed8cf31ab Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Wed, 3 Apr 2024 23:05:08 -0700
Subject: [PATCH 10/25] ran pre-commit

---
 streaming/emr-eks-flink/README.md                             | 2 +-
 streaming/emr-eks-flink/addons.tf                             | 4 +---
 .../emr-eks-flink/examples/karpenter/flink-sample-job.yaml    | 4 ++--
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/streaming/emr-eks-flink/README.md b/streaming/emr-eks-flink/README.md
index c9ee833e5..384970dcd 100644
--- a/streaming/emr-eks-flink/README.md
+++ b/streaming/emr-eks-flink/README.md
@@ -26,7 +26,7 @@
 | <a name="module_ebs_csi_driver_irsa"></a> [ebs\_csi\_driver\_irsa](#module\_ebs\_csi\_driver\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 5.20 |
 | <a name="module_eks"></a> [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.15 |
 | <a name="module_eks_blueprints_addons"></a> [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 |
-| <a name="module_eks_data_addons"></a> [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | ~> 1.0 |
+| <a name="module_eks_data_addons"></a> [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | ~> 1.30 |
 | <a name="module_flink_irsa_jobs"></a> [flink\_irsa\_jobs](#module\_flink\_irsa\_jobs) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
 | <a name="module_flink_irsa_operator"></a> [flink\_irsa\_operator](#module\_flink\_irsa\_operator) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
 | <a name="module_s3_bucket"></a> [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 3.0 |
diff --git a/streaming/emr-eks-flink/addons.tf b/streaming/emr-eks-flink/addons.tf
index 6298ff8c8..32b2b6208 100644
--- a/streaming/emr-eks-flink/addons.tf
+++ b/streaming/emr-eks-flink/addons.tf
@@ -209,7 +209,7 @@ module "eks_data_addons" {
           - key: "karpenter.k8s.aws/instance-generation"
             operator: Gt
             values: ["2"]
-          
+
         limits:
           cpu: 1000
         disruption:
@@ -223,5 +223,3 @@ module "eks_data_addons" {
   }
 
 }
-
-
diff --git a/streaming/emr-eks-flink/examples/karpenter/flink-sample-job.yaml b/streaming/emr-eks-flink/examples/karpenter/flink-sample-job.yaml
index dba5795e4..80c2fac89 100644
--- a/streaming/emr-eks-flink/examples/karpenter/flink-sample-job.yaml
+++ b/streaming/emr-eks-flink/examples/karpenter/flink-sample-job.yaml
@@ -5,7 +5,7 @@ metadata:
   name: sample-job
   namespace: flink-team-a-ns
 spec:
-  
+
   flinkVersion: v1_17
   executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240402170622025900000001
   emrReleaseLabel: "emr-7.0.0-flink-latest"
@@ -40,7 +40,7 @@ spec:
     spec:
       nodeSelector:
         NodeGroupType: "FlinkComputeOptimized"
-      
+
 
   jobManager:
     storageDir: s3://emr-flink-data/data/basic-example-app-cluster/jobmanager

From ddf17e2eb5469b74d58f608b39e7b53bd8c12314 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Wed, 3 Apr 2024 23:05:50 -0700
Subject: [PATCH 11/25] pre-commit changes

---
 website/docs/blueprints/streaming-platforms/emr-eks-flink.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/website/docs/blueprints/streaming-platforms/emr-eks-flink.md b/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
index 8a205f62c..7afab6b29 100644
--- a/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
+++ b/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
@@ -42,7 +42,7 @@ In addition to the above features, EMR Flink Kubernetes operator provides the fo
 5. IRSA (IAM Roles for Service Accounts) Native Integration
 6. Apache Airflow Integration
 7. Pyflink support
-   
+
 
 Flink Operator defines two types of Custom Resources(CR) which are the extensions of the Kubernetes API.
 
@@ -335,4 +335,3 @@ cd .. && chmod +x cleanup.sh
 :::caution
 To avoid unwanted charges to your AWS account, delete all the AWS resources created during this deployment
 :::
-

From 3cc5bad1703b1bf7271d331416422286b121da6a Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Fri, 5 Apr 2024 19:09:36 -0700
Subject: [PATCH 12/25] karpenter config fixes

---
 streaming/emr-eks-flink/addons.tf                  | 12 ++++++++----
 streaming/emr-eks-flink/cleanup.sh                 |  0
 .../examples/basic-example-app-cluster.yaml        | 12 +++++++++++-
 streaming/emr-eks-flink/main.tf                    | 14 ++++++++++----
 4 files changed, 29 insertions(+), 9 deletions(-)
 mode change 100644 => 100755 streaming/emr-eks-flink/cleanup.sh

diff --git a/streaming/emr-eks-flink/addons.tf b/streaming/emr-eks-flink/addons.tf
index 32b2b6208..df4ba7e46 100644
--- a/streaming/emr-eks-flink/addons.tf
+++ b/streaming/emr-eks-flink/addons.tf
@@ -105,14 +105,13 @@ module "eks_blueprints_addons" {
   enable_karpenter                  = true
   karpenter_enable_spot_termination = true
   karpenter = {
-    chart_version       = "v0.33.1"
+    chart_version       = "v0.34.0"
     repository_username = data.aws_ecrpublic_authorization_token.token.user_name
     repository_password = data.aws_ecrpublic_authorization_token.token.password
   }
   karpenter_node = {
-    iam_role_use_name_prefix = false
     iam_role_name            = "${local.name}-karpenter-node"
-
+    iam_role_use_name_prefix = false
     iam_role_additional_policies = {
       AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
     }
@@ -171,8 +170,10 @@ module "eks_data_addons" {
       values = [
         <<-EOT
       name: flink-compute-optimized
+
       clusterName: ${module.eks.cluster_name}
       ec2NodeClass:
+
         karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
         subnetSelectorTerms:
           tags:
@@ -183,14 +184,17 @@ module "eks_data_addons" {
         instanceStorePolicy: RAID0
 
       nodePool:
+
         labels:
           - type: karpenter
           - NodeGroupType: FlinkComputeOptimized
           - multiArch: Flink
+        nodeClassRef:
+          name: flink-compute-optimized
         requirements:
           - key: "karpenter.sh/capacity-type"
             operator: In
-            values: ["spot", "on-demand"]
+            values: ["on-demand"]
           - key: "kubernetes.io/arch"
             operator: In
             values: ["amd64"]
diff --git a/streaming/emr-eks-flink/cleanup.sh b/streaming/emr-eks-flink/cleanup.sh
old mode 100644
new mode 100755
diff --git a/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
index 7f189f6d5..18a15253f 100644
--- a/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
@@ -28,8 +28,18 @@ spec:
   # add service account
   # serviceAccount: flink-team-a-sa
 
-  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240402170622025900000001
+  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240406012025932700000008
   emrReleaseLabel: "emr-7.0.0-flink-latest"
+  podTemplate:
+    apiVersion: v1
+    kind: Pod
+    metadata:
+      name: pod-template
+    spec:
+      nodeSelector:
+        NodeGroupType: "FlinkComputeOptimized"
+
+
   jobManager:
     # Replace with s3 bucket in your own account
     storageDir: s3://emr-flink-data/data/basic-example-app-cluster/jobmanager
diff --git a/streaming/emr-eks-flink/main.tf b/streaming/emr-eks-flink/main.tf
index 0ba024260..e2c7bd18d 100644
--- a/streaming/emr-eks-flink/main.tf
+++ b/streaming/emr-eks-flink/main.tf
@@ -14,14 +14,20 @@ module "eks" {
 
   manage_aws_auth_configmap = true
   aws_auth_roles = [
+
+    # We need to add in the Karpenter node IAM role for nodes launched by Karpenter
     {
-      # Required for EMR on EKS virtual cluster
-      rolearn  = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/AWSServiceRoleForAmazonEMRContainers"
-      username = "emr-containers"
-    },
+      rolearn  = module.eks_blueprints_addons.karpenter.node_iam_role_arn
+      username = "system:node:{{EC2PrivateDNSName}}"
+      groups = [
+        "system:bootstrappers",
+        "system:nodes",
+      ]
+    }
   ]
 
 
+
   #---------------------------------------
   # Note: This can further restricted to specific required for each Add-on and your application
   #---------------------------------------

From 880c18254bd1b1762f33b71b04ec77a86c26bdfd Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Sat, 6 Apr 2024 11:14:52 -0700
Subject: [PATCH 13/25] added spot instace

---
 streaming/emr-eks-flink/addons.tf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/streaming/emr-eks-flink/addons.tf b/streaming/emr-eks-flink/addons.tf
index df4ba7e46..54c159dbc 100644
--- a/streaming/emr-eks-flink/addons.tf
+++ b/streaming/emr-eks-flink/addons.tf
@@ -194,7 +194,7 @@ module "eks_data_addons" {
         requirements:
           - key: "karpenter.sh/capacity-type"
             operator: In
-            values: ["on-demand"]
+            values: ["spot","on-demand"]
           - key: "kubernetes.io/arch"
             operator: In
             values: ["amd64"]

From a1ee99425679b4b047a4c565a9e526c5d9c89c0f Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Sat, 6 Apr 2024 11:23:16 -0700
Subject: [PATCH 14/25] updated README

---
 streaming/emr-eks-flink/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/streaming/emr-eks-flink/README.md b/streaming/emr-eks-flink/README.md
index 384970dcd..39338ea87 100644
--- a/streaming/emr-eks-flink/README.md
+++ b/streaming/emr-eks-flink/README.md
@@ -14,9 +14,9 @@
 
 | Name | Version |
 |------|---------|
-| <a name="provider_aws"></a> [aws](#provider\_aws) | 5.43.0 |
-| <a name="provider_aws.ecr"></a> [aws.ecr](#provider\_aws.ecr) | 5.43.0 |
-| <a name="provider_helm"></a> [helm](#provider\_helm) | 2.12.1 |
+| <a name="provider_aws"></a> [aws](#provider\_aws) | 5.44.0 |
+| <a name="provider_aws.ecr"></a> [aws.ecr](#provider\_aws.ecr) | 5.44.0 |
+| <a name="provider_helm"></a> [helm](#provider\_helm) | 2.13.0 |
 | <a name="provider_kubernetes"></a> [kubernetes](#provider\_kubernetes) | 2.27.0 |
 
 ## Modules

From 978659ad79374b4a1d35325651a30297998b0a5d Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Sun, 14 Apr 2024 13:45:18 -0700
Subject: [PATCH 15/25] PR comments

---
 streaming/emr-eks-flink/README.md             |   7 +-
 streaming/emr-eks-flink/addons.tf             |  21 +---
 streaming/emr-eks-flink/data.tf               |   1 +
 .../examples/basic-flink-client.yaml          |  20 ----
 .../cluster-autoscaler/flink-sample-job.yaml  |  97 ++++++++---------
 .../basic-example-app-cluster.yaml            |  24 +++--
 .../examples/karpenter/flink-sample-job.yaml  |  63 -----------
 streaming/emr-eks-flink/flink-team.tf         |   6 ++
 .../helm-values/aws-for-fluentbit-values.yaml | 102 ------------------
 9 files changed, 72 insertions(+), 269 deletions(-)
 delete mode 100644 streaming/emr-eks-flink/examples/basic-flink-client.yaml
 rename streaming/emr-eks-flink/examples/{ => karpenter}/basic-example-app-cluster.yaml (76%)
 delete mode 100644 streaming/emr-eks-flink/examples/karpenter/flink-sample-job.yaml
 delete mode 100644 streaming/emr-eks-flink/helm-values/aws-for-fluentbit-values.yaml

diff --git a/streaming/emr-eks-flink/README.md b/streaming/emr-eks-flink/README.md
index 39338ea87..fe4c818a7 100644
--- a/streaming/emr-eks-flink/README.md
+++ b/streaming/emr-eks-flink/README.md
@@ -14,10 +14,10 @@
 
 | Name | Version |
 |------|---------|
-| <a name="provider_aws"></a> [aws](#provider\_aws) | 5.44.0 |
-| <a name="provider_aws.ecr"></a> [aws.ecr](#provider\_aws.ecr) | 5.44.0 |
+| <a name="provider_aws"></a> [aws](#provider\_aws) | 5.45.0 |
+| <a name="provider_aws.ecr"></a> [aws.ecr](#provider\_aws.ecr) | 5.45.0 |
 | <a name="provider_helm"></a> [helm](#provider\_helm) | 2.13.0 |
-| <a name="provider_kubernetes"></a> [kubernetes](#provider\_kubernetes) | 2.27.0 |
+| <a name="provider_kubernetes"></a> [kubernetes](#provider\_kubernetes) | 2.29.0 |
 
 ## Modules
 
@@ -36,6 +36,7 @@
 
 | Name | Type |
 |------|------|
+| [aws_cloudwatch_log_group.flink_team_a](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource |
 | [aws_iam_policy.flink](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
 | [helm_release.flink_kubernetes_operator](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
 | [kubernetes_namespace_v1.flink_team_a](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace_v1) | resource |
diff --git a/streaming/emr-eks-flink/addons.tf b/streaming/emr-eks-flink/addons.tf
index 54c159dbc..b88438b06 100644
--- a/streaming/emr-eks-flink/addons.tf
+++ b/streaming/emr-eks-flink/addons.tf
@@ -134,25 +134,8 @@ module "eks_blueprints_addons" {
   #---------------------------------------
   # AWS for FluentBit - DaemonSet
   #---------------------------------------
-  # Fluentbit is required to stream the logs to S3  when EMR Flink Operator is enabled
-  enable_aws_for_fluentbit = true
-  aws_for_fluentbit_cw_log_group = {
-    use_name_prefix   = false
-    name              = "/${local.name}/aws-fluentbit-logs" # Add-on creates this log group
-    retention_in_days = 30
-  }
-  aws_for_fluentbit = {
-    s3_bucket_arns = [
-      module.s3_bucket.s3_bucket_arn,
-      "${module.s3_bucket.s3_bucket_arn}/*}"
-    ]
-    values = [templatefile("${path.module}/helm-values/aws-for-fluentbit-values.yaml", {
-      region               = var.region,
-      cloudwatch_log_group = "/${local.name}/aws-fluentbit-logs"
-      s3_bucket_name       = module.s3_bucket.s3_bucket_id
-      cluster_name         = module.eks.cluster_name
-    })]
-  }
+  # With EMR Flink on EKS, set up monitoring configurations to archive application logs to S3/CW.
+  enable_aws_for_fluentbit = false
 
 
 }
diff --git a/streaming/emr-eks-flink/data.tf b/streaming/emr-eks-flink/data.tf
index ff9ce09aa..260ac6dab 100644
--- a/streaming/emr-eks-flink/data.tf
+++ b/streaming/emr-eks-flink/data.tf
@@ -60,4 +60,5 @@ data "aws_iam_policy_document" "flink_operator" {
       "logs:PutLogEvents",
     ]
   }
+
 }
diff --git a/streaming/emr-eks-flink/examples/basic-flink-client.yaml b/streaming/emr-eks-flink/examples/basic-flink-client.yaml
deleted file mode 100644
index ba5e470ad..000000000
--- a/streaming/emr-eks-flink/examples/basic-flink-client.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: ssh-pod
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: ssh-pod
-  template:
-    metadata:
-      labels:
-        app: ssh-pod
-    spec:
-      containers:
-      - name: ssh-server
-        image: linuxserver/openssh-server
-        ports:
-        - containerPort: 22 # SSH server runs on port 22
-        # Add additional configurations as needed, such as volume mounts, environment variables, etc.
diff --git a/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml b/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
index 789eaa2f3..9f1976fd0 100644
--- a/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
+++ b/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
@@ -1,72 +1,61 @@
----
 apiVersion: flink.apache.org/v1beta1
 kind: FlinkDeployment
 metadata:
-  name: basic-example
+  name: basic-example-cluster-auto-flink
   namespace: flink-team-a-ns
 spec:
-  image: flink:1.16
-  flinkVersion: v1_16
+  imagePullPolicy: Always
+  emrReleaseLabel: "emr-7.0.0-flink-latest"
+  flinkVersion: v1_18
   flinkConfiguration:
     taskmanager.numberOfTaskSlots: "2"
-    state.savepoints.dir: file:///flink/data/checkpoint/savepoints
-    state.checkpoints.dir: file:///flink/data/checkpoint/checkpoints
-    high-availability: org.apache.flink.kubernetes.highavailability.KubernetesHaServicesFactory
-    high-availability.storageDir: file:///flink/data/checkpoint/ha
-    job-result-store.storage-path: file:///flink/data/checkpoint/job-result-store
-    # restartNonce: 123 # Application restarts without spec change
-    kubernetes.operator.periodic.savepoint.interval: 1h # periodic savepoint triggering
-    kubernetes.operator.deployment.rollback.enabled: "true"
-    env.log.dir: "/flink/data/log"
-    env.taskmanager.tmp.dirs: "/flink/data/tmp"
-  serviceAccount: flink-team-a-sa
-  # Pod templates permit customization of the Flink job and task manager pods,
-  # for example to specify volume mounts, ephemeral storage, sidecar containers etc.
-  #  A common pod template may hold the settings that apply to both job and task manager, like volumeMounts
-  podTemplate:
-    apiVersion: v1
-    kind: Pod
-    metadata:
-      name: pod-template
-    spec:
-      nodeSelector:
-        NodeGroupType: "flink-on-demand-ca"
-      tolerations:
-        - key: "flink-on-demand-ca"
-          operator: "Exists"
-          effect: "NoSchedule"
-      initContainers:
-        - name: flink-ssd-volume-permissions
-          image: public.ecr.aws/y4g4v0z7/busybox
-          command: [ 'sh', '-c', 'chown -R 9999 /local1' ]
-          volumeMounts:
-            - mountPath: /local1
-              name: flink-ssd-volume
-      containers:
-        # Do not change the main container name
-        - name: flink-main-container
-          volumeMounts:
-            - mountPath: /flink/data/checkpoint
-              name: flink-ssd-volume
-            - mountPath: /flink/data/tmp
-              name: flink-ssd-volume
-            - mountPath: /flink/data/log
-              name: flink-ssd-volume
-      volumes:
-        - name: flink-ssd-volume
-          hostPath:
-            path: /local1
-            type: Directory
+    # Autotuning parameters
+    kubernetes.operator.job.autoscaler.autotune.enable: "true"
+    kubernetes.operator.job.autoscaler.autotune.metrics.history.max.count: "2"
+    kubernetes.operator.job.autoscaler.autotune.metrics.restart.count: "1"
+    metrics.job.status.enable: TOTAL_TIME
+
+    # Autoscaler parameters
+    kubernetes.operator.job.autoscaler.enabled: "true"
+    kubernetes.operator.job.autoscaler.scaling.enabled: "true"
+    kubernetes.operator.job.autoscaler.stabilization.interval: "5s"
+    kubernetes.operator.job.autoscaler.metrics.window: "1m"
+
+    jobmanager.scheduler: adaptive
+    # Replace with s3 bucket in your own account
+    state.checkpoints.dir: s3://emr-flink-data/checkpoints
+    state.savepoints.dir: s3://emr-flink-data/savepoints
+
+
+  executionRoleArn: arn:aws:iam::xxxxxxxxx:role/emr-eks-flink-flink-team-a-20240406012025932700000008
   jobManager:
+    # Replace with s3 bucket in your own account
+    storageDir: s3://emr-flink-data/data/basic-example-app-cluster/jobmanager
     resource:
       memory: "2048m"
       cpu: 1
+
   taskManager:
     replicas: 2
     resource:
       memory: "2048m"
       cpu: 1
+
   job:
+    # if you have your job jar in S3 bucket you can use that path as well
     jarURI: local:///opt/flink/examples/streaming/StateMachineExample.jar
-    parallelism: 2
-    upgradeMode: stateless # last-state
+    parallelism: 4
+    upgradeMode: savepoint
+    savepointTriggerNonce: 0
+  monitoringConfiguration:
+    s3MonitoringConfiguration:
+      logUri: emr-flink-data/logs
+    cloudWatchMonitoringConfiguration:
+       logGroupName: /aws/emr-flink/flink-team-a
+    sideCarResources:
+      limits:
+        cpuLimit: 500m
+        memoryLimit: 250Mi
+    containerLogRotationConfiguration:
+        rotationSize: 2GB
+        maxFilesToKeep: "10"
diff --git a/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
similarity index 76%
rename from streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
rename to streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
index 18a15253f..c46d0c581 100644
--- a/streaming/emr-eks-flink/examples/basic-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
@@ -1,8 +1,12 @@
 apiVersion: flink.apache.org/v1beta1
 kind: FlinkDeployment
 metadata:
-  name: basic-example-app-cluster-flink
+  name: basic-example-karpenter-flink
+  namespace: flink-team-a-ns
 spec:
+  imagePullPolicy: Always
+  emrReleaseLabel: "emr-7.0.0-flink-latest"
+  flinkVersion: v1_18
   flinkConfiguration:
     taskmanager.numberOfTaskSlots: "2"
     # Autotuning parameters
@@ -23,13 +27,8 @@ spec:
     state.savepoints.dir: s3://emr-flink-data/savepoints
 
 
+  executionRoleArn: arn:aws:iam::xxxxxxxxx:role/emr-eks-flink-flink-team-a-20240406012025932700000008
 
-  flinkVersion: v1_17
-  # add service account
-  # serviceAccount: flink-team-a-sa
-
-  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240406012025932700000008
-  emrReleaseLabel: "emr-7.0.0-flink-latest"
   podTemplate:
     apiVersion: v1
     kind: Pod
@@ -60,5 +59,14 @@ spec:
     upgradeMode: savepoint
     savepointTriggerNonce: 0
   monitoringConfiguration:
+    s3MonitoringConfiguration:
+      logUri: emr-flink-data/logs
     cloudWatchMonitoringConfiguration:
-       logGroupName: LOG_GROUP_NAME
+       logGroupName: /aws/emr-flink/flink-team-a
+    sideCarResources:
+      limits:
+        cpuLimit: 500m
+        memoryLimit: 250Mi
+    containerLogRotationConfiguration:
+        rotationSize: 2GB
+        maxFilesToKeep: "10"
diff --git a/streaming/emr-eks-flink/examples/karpenter/flink-sample-job.yaml b/streaming/emr-eks-flink/examples/karpenter/flink-sample-job.yaml
deleted file mode 100644
index 80c2fac89..000000000
--- a/streaming/emr-eks-flink/examples/karpenter/flink-sample-job.yaml
+++ /dev/null
@@ -1,63 +0,0 @@
----
-apiVersion: flink.apache.org/v1beta1
-kind: FlinkDeployment
-metadata:
-  name: sample-job
-  namespace: flink-team-a-ns
-spec:
-
-  flinkVersion: v1_17
-  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240402170622025900000001
-  emrReleaseLabel: "emr-7.0.0-flink-latest"
-  flinkConfiguration:
-    taskmanager.numberOfTaskSlots: "2"
-    # Autotuning parameters
-    kubernetes.operator.job.autoscaler.autotune.enable: "true"
-    kubernetes.operator.job.autoscaler.autotune.metrics.history.max.count: "2"
-    kubernetes.operator.job.autoscaler.autotune.metrics.restart.count: "1"
-    metrics.job.status.enable: TOTAL_TIME
-
-    # Autoscaler parameters
-    kubernetes.operator.job.autoscaler.enabled: "true"
-    kubernetes.operator.job.autoscaler.scaling.enabled: "true"
-    kubernetes.operator.job.autoscaler.stabilization.interval: "5s"
-    kubernetes.operator.job.autoscaler.metrics.window: "1m"
-
-    jobmanager.scheduler: adaptive
-    # Replace with s3 bucket in your own account
-    state.checkpoints.dir: s3://emr-flink-data/checkpoints
-    state.savepoints.dir: s3://emr-flink-data/savepoints
-
-
-  # Pod templates permit customization of the Flink job and task manager pods,
-  # for example to specify volume mounts, ephemeral storage, sidecar containers etc.
-  #  A common pod template may hold the settings that apply to both job and task manager, like volumeMounts
-  podTemplate:
-    apiVersion: v1
-    kind: Pod
-    metadata:
-      name: pod-template
-    spec:
-      nodeSelector:
-        NodeGroupType: "FlinkComputeOptimized"
-
-
-  jobManager:
-    storageDir: s3://emr-flink-data/data/basic-example-app-cluster/jobmanager
-    resource:
-      memory: "2048m"
-      cpu: 1
-  taskManager:
-    replicas: 2
-    resource:
-      memory: "2048m"
-      cpu: 1
-  job:
-    jarURI: local:///opt/flink/examples/streaming/StateMachineExample.jar
-    parallelism: 2
-    upgradeMode: stateless # last-state
-
-
-  monitoringConfiguration:
-    cloudWatchMonitoringConfiguration:
-       logGroupName: LOG_GROUP_NAME
diff --git a/streaming/emr-eks-flink/flink-team.tf b/streaming/emr-eks-flink/flink-team.tf
index 4a34c9420..61b06c946 100644
--- a/streaming/emr-eks-flink/flink-team.tf
+++ b/streaming/emr-eks-flink/flink-team.tf
@@ -75,3 +75,9 @@ module "flink_irsa_operator" {
   }
 
 }
+
+#create a log group
+resource "aws_cloudwatch_log_group" "flink_team_a" {
+  name              = "/aws/emr-flink/flink-team-a"
+  retention_in_days = 30
+}
diff --git a/streaming/emr-eks-flink/helm-values/aws-for-fluentbit-values.yaml b/streaming/emr-eks-flink/helm-values/aws-for-fluentbit-values.yaml
deleted file mode 100644
index 0bea5188d..000000000
--- a/streaming/emr-eks-flink/helm-values/aws-for-fluentbit-values.yaml
+++ /dev/null
@@ -1,102 +0,0 @@
-global:
-
-#hostNetwork and dnsPolicy are critical for enabling large clusters to avoid making calls to API server
-# see this link https://docs.fluentbit.io/manual/pipeline/filters/kubernetes#optional-feature-using-kubelet-to-get-metadata
-hostNetwork: true
-dnsPolicy: ClusterFirstWithHostNet
-
-service:
-  parsersFiles:
-    - /fluent-bit/parsers/parsers.conf
-  extraParsers: |
-    [PARSER]
-        Name    kubernetes
-        Format  regex
-        Regex   ^(?<namespace_name>[^_]+)\.(?<container_name>.+)\.(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)\.(?<docker_id>[a-z0-9]{64})-$
-
-input:
-  name: "tail"
-  enabled: true
-  tag: "systempods.<namespace_name>.<container_name>.<pod_name>.<docker_id>-"
-  path: "/var/log/containers/*.log"
-  db: "/var/log/flb_kube.db"
-  memBufLimit: 5MB
-  skipLongLines: "On"
-  refreshInterval: 10
-  extraInputs: |
-    multiline.parser  docker, cri
-    Tag_Regex         (?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$
-
-
-# NOTE: extraFilters config for using Kubelet to get the Metadata instead of talking to API server for large clusters
-filter:
-  name: "kubernetes"
-  match: "systempods.*"
-  kubeURL: "https://kubernetes.default.svc.cluster.local:443"
-  mergeLog: "On"
-  mergeLogKey: "log_processed"
-  keepLog: "On"
-  k8sLoggingParser: "On"
-  k8sLoggingExclude: "Off"
-  bufferSize: "0"
-  extraFilters: |
-    Kube_Tag_Prefix     systempods.
-    Regex_Parser        kubernetes
-    Labels              On
-    Annotations         Off
-    Use_Kubelet         true
-    Kubelet_Port        10250
-    Kube_CA_File        /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-    Kube_Token_File     /var/run/secrets/kubernetes.io/serviceaccount/token
-
-# CATION: Do not use `cloudwatch` plugin. This Golang Plugin is not recommended by AWS anymore instead use C plugin(`cloudWatchLogs`) for better performance.
-# cloudWatch:
-#   enabled: false
-
-# This is a new high performance C Plugin for CloudWatchLogs. See docs here https://docs.fluentbit.io/manual/pipeline/outputs/cloudwatch
-cloudWatchLogs:
-  enabled: true
-  match: "systempods.*"
-  region: ${region}
-  logGroupName: ${cloudwatch_log_group}
-  autoCreateGroup: false
-  extraOutputs: |
-    log_key               log
-
-#----------------------------------------------------------#
-# OUTPUT logs to S3
-#----------------------------------------------------------#
-
-# This is an example for writing logs to S3 bucket.
-# This example writes system pod logs and spark logs into dedicated prefix.
-# This second output is using the rewrite_tag filter commented above
-
-additionalOutputs: |
-  [OUTPUT]
-      Name                            s3
-      Match                           systempods.*
-      region                          ${region}
-      bucket                          ${s3_bucket_name}
-      total_file_size                 100M
-      s3_key_format                   /${cluster_name}/system-pod-logs/$TAG[1]/$TAG[2]/$TAG[3]/$TAG[3]_%H%M%S_$UUID.log
-      s3_key_format_tag_delimiters    ..
-      store_dir                       /home/ec2-user/buffer
-      upload_timeout                  10m
-      log_key                         log
-
-
-# Resource config for large clusters
-resources:
-  limits:
-    cpu: 1000m
-    memory: 1500Mi
-  requests:
-    cpu: 500m
-    memory: 500Mi
-
-## Assign a PriorityClassName to pods if set
-priorityClassName: system-node-critical
-
-# This toleration allows Daemonset pod to be scheduled on any node, regardless of their Taints.
-tolerations:
-  - operator: Exists

From 3879966db9faeb868e928fc08671a53a7f675ee9 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Mon, 22 Apr 2024 13:56:53 -0700
Subject: [PATCH 16/25] addressed PR review comments

---
 streaming/emr-eks-flink/README.md             |  92 ++++++++++++++-
 streaming/emr-eks-flink/addons.tf             |  95 ++++++++++++++-
 streaming/emr-eks-flink/data.tf               |  64 -----------
 .../cluster-autoscaler/flink-sample-job.yaml  |   4 +-
 .../karpenter/basic-example-app-cluster.yaml  |   6 +-
 .../graviton-example-app-cluster.yaml         |  72 ++++++++++++
 streaming/emr-eks-flink/flink-team.tf         |  37 +++++-
 streaming/emr-eks-flink/locals.tf             |  13 ---
 streaming/emr-eks-flink/main.tf               | 108 +++++-------------
 streaming/emr-eks-flink/outputs.tf            |  18 +++
 streaming/emr-eks-flink/versions.tf           |   2 +-
 streaming/emr-eks-flink/vpc.tf                |  27 +++++
 12 files changed, 360 insertions(+), 178 deletions(-)
 delete mode 100644 streaming/emr-eks-flink/data.tf
 create mode 100644 streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
 delete mode 100644 streaming/emr-eks-flink/locals.tf
 create mode 100644 streaming/emr-eks-flink/vpc.tf

diff --git a/streaming/emr-eks-flink/README.md b/streaming/emr-eks-flink/README.md
index fe4c818a7..1c05db8b4 100644
--- a/streaming/emr-eks-flink/README.md
+++ b/streaming/emr-eks-flink/README.md
@@ -1,3 +1,77 @@
+# EMR-EKS-Flink Blueprint
+
+Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/blueprints/streaming/emr-eks-flink) to deploy this pattern and run sample tests.
+
+<!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
+## Requirements
+
+| Name | Version |
+|------|---------|
+| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.0.0 |
+| <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 3.72 |
+| <a name="requirement_helm"></a> [helm](#requirement\_helm) | >= 2.13.0 |
+| <a name="requirement_kubectl"></a> [kubectl](#requirement\_kubectl) | >= 1.14 |
+| <a name="requirement_kubernetes"></a> [kubernetes](#requirement\_kubernetes) | >= 2.10 |
+| <a name="requirement_random"></a> [random](#requirement\_random) | 3.3.2 |
+
+## Providers
+
+| Name | Version |
+|------|---------|
+| <a name="provider_aws"></a> [aws](#provider\_aws) | >= 3.72 |
+| <a name="provider_aws.ecr"></a> [aws.ecr](#provider\_aws.ecr) | >= 3.72 |
+| <a name="provider_kubernetes"></a> [kubernetes](#provider\_kubernetes) | >= 2.10 |
+
+## Modules
+
+| Name | Source | Version |
+|------|--------|---------|
+| <a name="module_ebs_csi_driver_irsa"></a> [ebs\_csi\_driver\_irsa](#module\_ebs\_csi\_driver\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 5.20 |
+| <a name="module_eks"></a> [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.15 |
+| <a name="module_eks_blueprints_addons"></a> [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 |
+| <a name="module_eks_data_addons"></a> [eks\_data\_addons](#module\_eks\_data\_addons) | aws-ia/eks-data-addons/aws | ~> 1.30 |
+| <a name="module_flink_irsa_jobs"></a> [flink\_irsa\_jobs](#module\_flink\_irsa\_jobs) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
+| <a name="module_flink_irsa_operator"></a> [flink\_irsa\_operator](#module\_flink\_irsa\_operator) | aws-ia/eks-blueprints-addon/aws | ~> 1.0 |
+| <a name="module_s3_bucket"></a> [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 3.0 |
+| <a name="module_vpc"></a> [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | 5.5.1 |
+
+## Resources
+
+| Name | Type |
+|------|------|
+| [aws_cloudwatch_log_group.flink_team_a](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource |
+| [aws_iam_policy.flink](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
+| [aws_s3_object.checkpoints](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource |
+| [aws_s3_object.jobmanager](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource |
+| [aws_s3_object.logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource |
+| [aws_s3_object.savepoints](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource |
+| [kubernetes_namespace_v1.flink_team_a](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace_v1) | resource |
+| [aws_ami.x86](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ami) | data source |
+| [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source |
+| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
+| [aws_ecrpublic_authorization_token.token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecrpublic_authorization_token) | data source |
+| [aws_eks_cluster_auth.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster_auth) | data source |
+| [aws_iam_policy_document.flink_sample_job](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS version for the cluster | `string` | `"1.28"` | no |
+| <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"emr-eks-flink"` | no |
+| <a name="input_region"></a> [region](#input\_region) | Region for deployment | `string` | `"us-west-2"` | no |
+
+## Outputs
+
+| Name | Description |
+|------|-------------|
+| <a name="output_flink_checkpoint_path"></a> [flink\_checkpoint\_path](#output\_flink\_checkpoint\_path) | S3 path for checkpoint data |
+| <a name="output_flink_jobmanager_path"></a> [flink\_jobmanager\_path](#output\_flink\_jobmanager\_path) | S3 path for jobmanager data |
+| <a name="output_flink_jobs_role_arn"></a> [flink\_jobs\_role\_arn](#output\_flink\_jobs\_role\_arn) | IAM linked role for the flink job |
+| <a name="output_flink_logs_path"></a> [flink\_logs\_path](#output\_flink\_logs\_path) | S3 path for logs |
+| <a name="output_flink_operator_role_arn"></a> [flink\_operator\_role\_arn](#output\_flink\_operator\_role\_arn) | IAM linked role for the flink operator |
+| <a name="output_flink_savepoint_path"></a> [flink\_savepoint\_path](#output\_flink\_savepoint\_path) | S3 path for savepoint data |
+<!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
 <!-- BEGIN_TF_DOCS -->
 ## Requirements
 
@@ -5,7 +79,7 @@
 |------|---------|
 | <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.0.0 |
 | <a name="requirement_aws"></a> [aws](#requirement\_aws) | >= 3.72 |
-| <a name="requirement_helm"></a> [helm](#requirement\_helm) | >= 2.4.1 |
+| <a name="requirement_helm"></a> [helm](#requirement\_helm) | >= 2.13.0 |
 | <a name="requirement_kubectl"></a> [kubectl](#requirement\_kubectl) | >= 1.14 |
 | <a name="requirement_kubernetes"></a> [kubernetes](#requirement\_kubernetes) | >= 2.10 |
 | <a name="requirement_random"></a> [random](#requirement\_random) | 3.3.2 |
@@ -14,9 +88,8 @@
 
 | Name | Version |
 |------|---------|
-| <a name="provider_aws"></a> [aws](#provider\_aws) | 5.45.0 |
-| <a name="provider_aws.ecr"></a> [aws.ecr](#provider\_aws.ecr) | 5.45.0 |
-| <a name="provider_helm"></a> [helm](#provider\_helm) | 2.13.0 |
+| <a name="provider_aws"></a> [aws](#provider\_aws) | 5.46.0 |
+| <a name="provider_aws.ecr"></a> [aws.ecr](#provider\_aws.ecr) | 5.46.0 |
 | <a name="provider_kubernetes"></a> [kubernetes](#provider\_kubernetes) | 2.29.0 |
 
 ## Modules
@@ -38,14 +111,17 @@
 |------|------|
 | [aws_cloudwatch_log_group.flink_team_a](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource |
 | [aws_iam_policy.flink](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
-| [helm_release.flink_kubernetes_operator](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
+| [aws_s3_object.checkpoints](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource |
+| [aws_s3_object.jobmanager](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource |
+| [aws_s3_object.logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource |
+| [aws_s3_object.savepoints](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource |
 | [kubernetes_namespace_v1.flink_team_a](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace_v1) | resource |
 | [aws_ami.x86](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ami) | data source |
 | [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source |
 | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source |
 | [aws_ecrpublic_authorization_token.token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ecrpublic_authorization_token) | data source |
 | [aws_eks_cluster_auth.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster_auth) | data source |
-| [aws_iam_policy_document.flink_operator](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
+| [aws_iam_policy_document.flink_sample_job](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
 
 ## Inputs
 
@@ -59,6 +135,10 @@
 
 | Name | Description |
 |------|-------------|
+| <a name="output_flink_checkpoint_path"></a> [flink\_checkpoint\_path](#output\_flink\_checkpoint\_path) | S3 path for checkpoint data |
+| <a name="output_flink_jobmanager_path"></a> [flink\_jobmanager\_path](#output\_flink\_jobmanager\_path) | S3 path for jobmanager data |
 | <a name="output_flink_jobs_role_arn"></a> [flink\_jobs\_role\_arn](#output\_flink\_jobs\_role\_arn) | IAM linked role for the flink job |
+| <a name="output_flink_logs_path"></a> [flink\_logs\_path](#output\_flink\_logs\_path) | S3 path for logs |
 | <a name="output_flink_operator_role_arn"></a> [flink\_operator\_role\_arn](#output\_flink\_operator\_role\_arn) | IAM linked role for the flink operator |
+| <a name="output_flink_savepoint_path"></a> [flink\_savepoint\_path](#output\_flink\_savepoint\_path) | S3 path for savepoint data |
 <!-- END_TF_DOCS -->
diff --git a/streaming/emr-eks-flink/addons.tf b/streaming/emr-eks-flink/addons.tf
index b88438b06..db9031420 100644
--- a/streaming/emr-eks-flink/addons.tf
+++ b/streaming/emr-eks-flink/addons.tf
@@ -2,13 +2,9 @@ module "s3_bucket" {
   source  = "terraform-aws-modules/s3-bucket/aws"
   version = "~> 3.0"
 
-  bucket_prefix = "${local.name}-flink-logs-"
+  bucket_prefix = "${local.name}-"
 
-  # For example only - please evaluate for your environment
-  force_destroy = true
-
-  attach_deny_insecure_transport_policy = true
-  attach_require_latest_tls_policy      = true
+  attach_require_latest_tls_policy = true
 
   block_public_acls       = true
   block_public_policy     = true
@@ -143,11 +139,27 @@ module "eks_blueprints_addons" {
 # Data on EKS Kubernetes Addons
 #---------------------------------------------------------------
 module "eks_data_addons" {
+
+  depends_on = [module.flink_irsa_jobs, module.flink_irsa_operator]
+  #source  = "git@github.com:mithun008/terraform-aws-eks-data-addons/"
   source  = "aws-ia/eks-data-addons/aws"
   version = "~> 1.30" # ensure to update this to the latest/desired version
 
   oidc_provider_arn          = module.eks.oidc_provider_arn
   enable_karpenter_resources = true
+  enable_emr_flink_operator  = true
+  emr_flink_operator_helm_config = {
+    namespace                = "emr-flink-operator"
+    create_namespace         = true
+    namespace                = "${local.flink_operator}-ns"
+    name                     = "flink-kubernetes-operator"
+    repository               = "oci://public.ecr.aws/emr-on-eks"
+    chart                    = "flink-kubernetes-operator"
+    operatorExecutionRoleArn = module.flink_irsa_operator.iam_role_arn
+
+
+  }
+
   karpenter_resources_helm_config = {
     flink-compute-optimized = {
       values = [
@@ -207,6 +219,77 @@ module "eks_data_addons" {
       EOT
       ]
     }
+    flink-graviton-memory-optimized = {
+      values = [
+        <<-EOT
+      name: flink-graviton-memory-optimized
+      clusterName: ${module.eks.cluster_name}
+      ec2NodeClass:
+        karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
+        subnetSelectorTerms:
+          tags:
+            Name: "${module.eks.cluster_name}-private*"
+        securityGroupSelectorTerms:
+          tags:
+            Name: ${module.eks.cluster_name}-node
+        instanceStorePolicy: RAID0
+      nodePool:
+        labels:
+          - type: karpenter
+          - NodeGroupType: FlinkGravitonMemoryOptimized
+          - multiArch: Flink
+        requirements:
+          - key: "karpenter.sh/capacity-type"
+            operator: In
+            values: ["spot", "on-demand"]
+          - key: "kubernetes.io/arch"
+            operator: In
+            values: ["arm64"]
+          - key: "karpenter.k8s.aws/instance-category"
+            operator: In
+            values: ["r"]
+          - key: "karpenter.k8s.aws/instance-family"
+            operator: In
+            values: ["r6gd"]
+          - key: "karpenter.k8s.aws/instance-cpu"
+            operator: In
+            values: ["4", "8", "16", "32"]
+          - key: "karpenter.k8s.aws/instance-hypervisor"
+            operator: In
+            values: ["nitro"]
+          - key: "karpenter.k8s.aws/instance-generation"
+            operator: Gt
+            values: ["2"]
+        limits:
+          cpu: 1000
+        disruption:
+          consolidationPolicy: WhenEmpty
+          consolidateAfter: 30s
+          expireAfter: 720h
+        weight: 50
+      EOT
+      ]
+    }
   }
+}
 
+resource "aws_s3_object" "checkpoints" {
+  bucket       = module.s3_bucket.s3_bucket_id
+  key          = "checkpoints/"
+  content_type = "application/x-directory"
+}
+resource "aws_s3_object" "savepoints" {
+  bucket       = module.s3_bucket.s3_bucket_id
+  key          = "savepoints/"
+  content_type = "application/x-directory"
+}
+resource "aws_s3_object" "jobmanager" {
+  bucket       = module.s3_bucket.s3_bucket_id
+  key          = "jobmanager/"
+  content_type = "application/x-directory"
+}
+resource "aws_s3_object" "logs" {
+  bucket       = module.s3_bucket.s3_bucket_id
+  key          = "logs/"
+  content_type = "application/x-directory"
 }
diff --git a/streaming/emr-eks-flink/data.tf b/streaming/emr-eks-flink/data.tf
deleted file mode 100644
index 260ac6dab..000000000
--- a/streaming/emr-eks-flink/data.tf
+++ /dev/null
@@ -1,64 +0,0 @@
-
-data "aws_eks_cluster_auth" "this" {
-  name = module.eks.cluster_name
-}
-
-
-data "aws_ecrpublic_authorization_token" "token" {
-  provider = aws.ecr
-}
-
-data "aws_availability_zones" "available" {}
-
-
-
-data "aws_caller_identity" "current" {}
-
-
-
-# This data source can be used to get the latest AMI for Managed Node Groups
-data "aws_ami" "x86" {
-  owners      = ["amazon"]
-  most_recent = true
-
-  filter {
-    name   = "name"
-    values = ["amazon-eks-node-${module.eks.cluster_version}-*"] # Update this for ARM ["amazon-eks-arm64-node-${module.eks.cluster_version}-*"]
-  }
-}
-
-
-#---------------------------------------------------------------
-# Example IAM policy for Flink job execution
-#---------------------------------------------------------------
-data "aws_iam_policy_document" "flink_operator" {
-  statement {
-    sid       = ""
-    effect    = "Allow"
-    resources = ["*"]
-
-    actions = [
-      "s3:ListBucket",
-      "s3:GetObject",
-      "s3:PutObject",
-      "s3:DeleteObject",
-      "s3:GetBucketLocation",
-      "s3:GetObjectVersion"
-    ]
-  }
-
-  statement {
-    sid       = ""
-    effect    = "Allow"
-    resources = ["*"]
-
-    actions = [
-      "logs:CreateLogGroup",
-      "logs:CreateLogStream",
-      "logs:DescribeLogGroups",
-      "logs:DescribeLogStreams",
-      "logs:PutLogEvents",
-    ]
-  }
-
-}
diff --git a/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml b/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
index 9f1976fd0..8c191a0da 100644
--- a/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
+++ b/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
@@ -5,7 +5,7 @@ metadata:
   namespace: flink-team-a-ns
 spec:
   imagePullPolicy: Always
-  emrReleaseLabel: "emr-7.0.0-flink-latest"
+  emrReleaseLabel: "emr-7.1.0-flink-latest"
   flinkVersion: v1_18
   flinkConfiguration:
     taskmanager.numberOfTaskSlots: "2"
@@ -26,7 +26,7 @@ spec:
     state.checkpoints.dir: s3://emr-flink-data/checkpoints
     state.savepoints.dir: s3://emr-flink-data/savepoints
 
-
+  # Replace this execution role ARN with your own
   executionRoleArn: arn:aws:iam::xxxxxxxxx:role/emr-eks-flink-flink-team-a-20240406012025932700000008
   jobManager:
     # Replace with s3 bucket in your own account
diff --git a/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
index c46d0c581..429a6d3cb 100644
--- a/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
@@ -5,7 +5,7 @@ metadata:
   namespace: flink-team-a-ns
 spec:
   imagePullPolicy: Always
-  emrReleaseLabel: "emr-7.0.0-flink-latest"
+  emrReleaseLabel: "emr-7.1.0-flink-latest"
   flinkVersion: v1_18
   flinkConfiguration:
     taskmanager.numberOfTaskSlots: "2"
@@ -26,8 +26,8 @@ spec:
     state.checkpoints.dir: s3://emr-flink-data/checkpoints
     state.savepoints.dir: s3://emr-flink-data/savepoints
 
-
-  executionRoleArn: arn:aws:iam::xxxxxxxxx:role/emr-eks-flink-flink-team-a-20240406012025932700000008
+  # Replace this execution role ARN with your own
+  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240406012025932700000008
 
   podTemplate:
     apiVersion: v1
diff --git a/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
new file mode 100644
index 000000000..19b650676
--- /dev/null
+++ b/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
@@ -0,0 +1,72 @@
+apiVersion: flink.apache.org/v1beta1
+kind: FlinkDeployment
+metadata:
+  name: graviton-example-karpenter-flink
+  namespace: flink-team-a-ns
+spec:
+  imagePullPolicy: Always
+  emrReleaseLabel: "emr-7.1.0-flink-latest"
+  flinkVersion: v1_18
+  flinkConfiguration:
+    taskmanager.numberOfTaskSlots: "2"
+    # Autotuning parameters
+    kubernetes.operator.job.autoscaler.autotune.enable: "true"
+    kubernetes.operator.job.autoscaler.autotune.metrics.history.max.count: "2"
+    kubernetes.operator.job.autoscaler.autotune.metrics.restart.count: "1"
+    metrics.job.status.enable: TOTAL_TIME
+
+    # Autoscaler parameters
+    kubernetes.operator.job.autoscaler.enabled: "true"
+    kubernetes.operator.job.autoscaler.scaling.enabled: "true"
+    kubernetes.operator.job.autoscaler.stabilization.interval: "5s"
+    kubernetes.operator.job.autoscaler.metrics.window: "1m"
+
+    jobmanager.scheduler: adaptive
+    # Replace with s3 bucket in your own account
+    state.checkpoints.dir: s3://emr-flink-data/checkpoints
+    state.savepoints.dir: s3://emr-flink-data/savepoints
+
+  # Replace this execution role ARN with your own
+  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240406012025932700000008
+
+  podTemplate:
+    apiVersion: v1
+    kind: Pod
+    metadata:
+      name: pod-template
+    spec:
+      nodeSelector:
+        NodeGroupType: "FlinkGravitonMemoryOptimized"
+
+
+  jobManager:
+    # Replace with s3 bucket in your own account
+    storageDir: s3://emr-flink-data/data/basic-example-app-cluster/jobmanager
+    resource:
+      memory: "2048m"
+      cpu: 1
+
+  taskManager:
+    replicas: 2
+    resource:
+      memory: "2048m"
+      cpu: 1
+
+  job:
+    # if you have your job jar in S3 bucket you can use that path as well
+    jarURI: local:///opt/flink/examples/streaming/StateMachineExample.jar
+    parallelism: 4
+    upgradeMode: savepoint
+    savepointTriggerNonce: 0
+  monitoringConfiguration:
+    s3MonitoringConfiguration:
+      logUri: emr-flink-data/logs
+    cloudWatchMonitoringConfiguration:
+       logGroupName: /aws/emr-flink/flink-team-a
+    sideCarResources:
+      limits:
+        cpuLimit: 500m
+        memoryLimit: 250Mi
+    containerLogRotationConfiguration:
+        rotationSize: 2GB
+        maxFilesToKeep: "10"
diff --git a/streaming/emr-eks-flink/flink-team.tf b/streaming/emr-eks-flink/flink-team.tf
index 61b06c946..baf11836c 100644
--- a/streaming/emr-eks-flink/flink-team.tf
+++ b/streaming/emr-eks-flink/flink-team.tf
@@ -14,7 +14,7 @@ resource "kubernetes_namespace_v1" "flink_team_a" {
 resource "aws_iam_policy" "flink" {
   description = "IAM role policy for flink Job execution"
   name        = "${local.name}-flink-irsa"
-  policy      = data.aws_iam_policy_document.flink_operator.json
+  policy      = data.aws_iam_policy_document.flink_sample_job.json
 }
 
 #---------------------------------------------------------------
@@ -46,7 +46,6 @@ module "flink_irsa_jobs" {
 
 }
 
-
 #---------------------------------------------------------------
 # IRSA for flink pods for "flink-operator"
 #---------------------------------------------------------------
@@ -79,5 +78,37 @@ module "flink_irsa_operator" {
 #create a log group
 resource "aws_cloudwatch_log_group" "flink_team_a" {
   name              = "/aws/emr-flink/flink-team-a"
-  retention_in_days = 30
+  retention_in_days = 7
+}
+
+#---------------------------------------------------------------
+# Example IAM policy for Flink job execution
+#---------------------------------------------------------------
+data "aws_iam_policy_document" "flink_sample_job" {
+  statement {
+    sid       = ""
+    effect    = "Allow"
+    resources = ["*"]
+    actions = [
+      "s3:ListBucket",
+      "s3:GetObject",
+      "s3:PutObject",
+      "s3:DeleteObject",
+      "s3:GetBucketLocation",
+      "s3:GetObjectVersion"
+    ]
+  }
+  statement {
+    sid       = ""
+    effect    = "Allow"
+    resources = ["*"]
+
+    actions = [
+      "logs:CreateLogGroup",
+      "logs:CreateLogStream",
+      "logs:DescribeLogGroups",
+      "logs:DescribeLogStreams",
+      "logs:PutLogEvents",
+    ]
+  }
 }
diff --git a/streaming/emr-eks-flink/locals.tf b/streaming/emr-eks-flink/locals.tf
deleted file mode 100644
index ecfb50c4b..000000000
--- a/streaming/emr-eks-flink/locals.tf
+++ /dev/null
@@ -1,13 +0,0 @@
-#create local
-locals {
-  name = var.name
-
-  region = var.region
-
-  tags = {
-    Blueprint  = local.name
-    GithubRepo = "github.com/awslabs/data-on-eks"
-  }
-  flink_team     = "flink-team-a"
-  flink_operator = "flink-kubernetes-operator"
-}
diff --git a/streaming/emr-eks-flink/main.tf b/streaming/emr-eks-flink/main.tf
index e2c7bd18d..805d61320 100644
--- a/streaming/emr-eks-flink/main.tf
+++ b/streaming/emr-eks-flink/main.tf
@@ -1,3 +1,31 @@
+#create local
+locals {
+  name   = var.name
+  region = var.region
+  tags = {
+    Blueprint  = local.name
+    GithubRepo = "github.com/awslabs/data-on-eks"
+  }
+  flink_team     = "flink-team-a"
+  flink_operator = "flink-kubernetes-operator"
+}
+data "aws_eks_cluster_auth" "this" {
+  name = module.eks.cluster_name
+}
+data "aws_ecrpublic_authorization_token" "token" {
+  provider = aws.ecr
+}
+data "aws_availability_zones" "available" {}
+data "aws_caller_identity" "current" {}
+# This data source can be used to get the latest AMI for Managed Node Groups
+data "aws_ami" "x86" {
+  owners      = ["amazon"]
+  most_recent = true
+  filter {
+    name   = "name"
+    values = ["amazon-eks-node-${module.eks.cluster_version}-*"] # Update this for ARM ["amazon-eks-arm64-node-${module.eks.cluster_version}-*"]
+  }
+}
 # create eks cluster
 module "eks" {
   source  = "terraform-aws-modules/eks/aws"
@@ -26,8 +54,6 @@ module "eks" {
     }
   ]
 
-
-
   #---------------------------------------
   # Note: This can further restricted to specific required for each Add-on and your application
   #---------------------------------------
@@ -140,83 +166,5 @@ module "eks" {
         "karpenter.sh/discovery" = local.name
       }
     }
-
-
   }
-
-}
-
-#import module vpc
-module "vpc" {
-  source  = "terraform-aws-modules/vpc/aws"
-  version = "5.5.1"
-
-  name = local.name
-
-  cidr = "10.0.0.0/16"
-  azs  = slice(data.aws_availability_zones.available.names, 0, 3)
-
-  private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
-  public_subnets  = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
-
-  enable_nat_gateway   = true
-  single_nat_gateway   = true
-  enable_dns_hostnames = true
-
-  public_subnet_tags = {
-    "kubernetes.io/cluster/${local.name}" = "shared"
-    "kubernetes.io/role/elb"              = 1
-  }
-
-  private_subnet_tags = {
-    "kubernetes.io/cluster/${local.name}" = "shared"
-    "kubernetes.io/role/internal-elb"     = 1
-  }
-}
-
-
-
-
-# deploy a helm chart for flink-kubernetes-operator
-resource "helm_release" "flink_kubernetes_operator" {
-
-  depends_on       = [module.flink_irsa_jobs, module.flink_irsa_operator]
-  name             = "flink-kubernetes-operator"
-  repository       = "oci://public.ecr.aws/emr-on-eks"
-  chart            = "flink-kubernetes-operator"
-  create_namespace = true
-  namespace        = "${local.flink_operator}-ns"
-
-  set {
-    name  = "watchNamespace"
-    value = "${local.flink_team}-ns"
-  }
-  set {
-    name  = "emrContainers.operatorExecutionRoleArn"
-    value = module.flink_irsa_operator.iam_role_arn
-  }
-
-  set {
-    name  = "env.AWS_REGION"
-    value = var.region
-  }
-
-  # set the version
-  set {
-    name  = "image.tag"
-    value = "7.0.0"
-  }
-
-  # set prometheus metrics
-  set {
-    name  = "prometheus.enabled"
-    value = "true"
-  }
-
-  #set prometheus metrics
-  set {
-    name  = "prometheus.metrics.port"
-    value = "8081"
-  }
-
 }
diff --git a/streaming/emr-eks-flink/outputs.tf b/streaming/emr-eks-flink/outputs.tf
index a65b6a2f5..1efc9632b 100644
--- a/streaming/emr-eks-flink/outputs.tf
+++ b/streaming/emr-eks-flink/outputs.tf
@@ -7,3 +7,21 @@ output "flink_operator_role_arn" {
   value       = module.flink_irsa_operator.iam_role_arn
   description = "IAM linked role for the flink operator"
 }
+
+output "flink_checkpoint_path" {
+  value       = "s3://${module.s3_bucket.s3_bucket_id}/checkpoints"
+  description = "S3 path for checkpoint data"
+}
+output "flink_savepoint_path" {
+  value       = "s3://${module.s3_bucket.s3_bucket_id}/savepoints"
+  description = "S3 path for savepoint data"
+}
+output "flink_jobmanager_path" {
+  value       = "s3://${module.s3_bucket.s3_bucket_id}/jobmanager"
+  description = "S3 path for jobmanager data"
+}
+
+output "flink_logs_path" {
+  value       = "s3://${module.s3_bucket.s3_bucket_id}/logs"
+  description = "S3 path for logs"
+}
diff --git a/streaming/emr-eks-flink/versions.tf b/streaming/emr-eks-flink/versions.tf
index 9599a6e77..6eeabc436 100644
--- a/streaming/emr-eks-flink/versions.tf
+++ b/streaming/emr-eks-flink/versions.tf
@@ -12,7 +12,7 @@ terraform {
     }
     helm = {
       source  = "hashicorp/helm"
-      version = ">= 2.4.1"
+      version = ">= 2.13.0"
     }
     kubectl = {
       source  = "gavinbunney/kubectl"
diff --git a/streaming/emr-eks-flink/vpc.tf b/streaming/emr-eks-flink/vpc.tf
new file mode 100644
index 000000000..cddc5a13c
--- /dev/null
+++ b/streaming/emr-eks-flink/vpc.tf
@@ -0,0 +1,27 @@
+#import module vpc
+module "vpc" {
+  source  = "terraform-aws-modules/vpc/aws"
+  version = "5.5.1"
+
+  name = local.name
+
+  cidr = "10.0.0.0/16"
+  azs  = slice(data.aws_availability_zones.available.names, 0, 3)
+
+  private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
+  public_subnets  = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
+
+  enable_nat_gateway   = true
+  single_nat_gateway   = true
+  enable_dns_hostnames = true
+
+  public_subnet_tags = {
+    "kubernetes.io/cluster/${local.name}" = "shared"
+    "kubernetes.io/role/elb"              = 1
+  }
+
+  private_subnet_tags = {
+    "kubernetes.io/cluster/${local.name}" = "shared"
+    "kubernetes.io/role/internal-elb"     = 1
+  }
+}

From 527668bcb375a151e771bcc3f58bd6dff558db94 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Mon, 22 Apr 2024 14:09:20 -0700
Subject: [PATCH 17/25] removed invalid source line

---
 streaming/emr-eks-flink/addons.tf | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/streaming/emr-eks-flink/addons.tf b/streaming/emr-eks-flink/addons.tf
index db9031420..96656644c 100644
--- a/streaming/emr-eks-flink/addons.tf
+++ b/streaming/emr-eks-flink/addons.tf
@@ -139,12 +139,9 @@ module "eks_blueprints_addons" {
 # Data on EKS Kubernetes Addons
 #---------------------------------------------------------------
 module "eks_data_addons" {
-
-  depends_on = [module.flink_irsa_jobs, module.flink_irsa_operator]
-  #source  = "git@github.com:mithun008/terraform-aws-eks-data-addons/"
-  source  = "aws-ia/eks-data-addons/aws"
-  version = "~> 1.30" # ensure to update this to the latest/desired version
-
+  depends_on                 = [module.flink_irsa_jobs, module.flink_irsa_operator]
+  source                     = "aws-ia/eks-data-addons/aws"
+  version                    = "~> 1.30" # ensure to update this to the latest/desired version
   oidc_provider_arn          = module.eks.oidc_provider_arn
   enable_karpenter_resources = true
   enable_emr_flink_operator  = true

From 6e91fea059f838b7118ba811a3246539dc1ac15c Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Mon, 22 Apr 2024 14:15:46 -0700
Subject: [PATCH 18/25] removed account id

---
 .../examples/karpenter/basic-example-app-cluster.yaml           | 2 +-
 .../examples/karpenter/graviton-example-app-cluster.yaml        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
index 429a6d3cb..c0d3c4147 100644
--- a/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
@@ -27,7 +27,7 @@ spec:
     state.savepoints.dir: s3://emr-flink-data/savepoints
 
   # Replace this execution role ARN with your own
-  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240406012025932700000008
+  executionRoleArn: arn:aws:iam::xxxxxxxxxx:role/emr-eks-flink-flink-team-a-20240406012025932700000008
 
   podTemplate:
     apiVersion: v1
diff --git a/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
index 19b650676..48ca2596b 100644
--- a/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
@@ -27,7 +27,7 @@ spec:
     state.savepoints.dir: s3://emr-flink-data/savepoints
 
   # Replace this execution role ARN with your own
-  executionRoleArn: arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240406012025932700000008
+  executionRoleArn: arn:aws:iam::xxxxxxxxxx:role/emr-eks-flink-flink-team-a-20240406012025932700000008
 
   podTemplate:
     apiVersion: v1

From 7560e0c3307eb802e5582ad990a8431fb90a0272 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Mon, 22 Apr 2024 14:20:25 -0700
Subject: [PATCH 19/25] updated web site doc

---
 .../streaming-platforms/emr-eks-flink.md      | 24 +++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/website/docs/blueprints/streaming-platforms/emr-eks-flink.md b/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
index 7afab6b29..3e56e976a 100644
--- a/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
+++ b/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
@@ -40,8 +40,7 @@ In addition to the above features, EMR Flink Kubernetes operator provides the fo
 3. Automatically tunes Autoscaler configurations based on historical trends of observed metrics.
 4. Faster Flink Job Restart during scaling or Failure Recovery
 5. IRSA (IAM Roles for Service Accounts) Native Integration
-6. Apache Airflow Integration
-7. Pyflink support
+6. Pyflink support
 
 
 Flink Operator defines two types of Custom Resources(CR) which are the extensions of the Kubernetes API.
@@ -201,7 +200,7 @@ This example deploys an EKS Cluster running the Flink Operator into a new VPC.
 - Deploys Metrics server, Cluster Autoscaler, Apache YuniKorn, Karpenter, Grafana, AMP and Prometheus server
 - Deploys Cert Manager and EMR Flink Operator. Flink Operator has dependency on Cert Manager
 - Creates a new Flink Data team resources that includes namespace, IRSA, Role and Role binding
-- Deploys three Karpenter provisioners for different compute types
+- Deploys Karpenter provisioner for flink-compute-optimized types
 
 ### Prerequisites
 
@@ -270,16 +269,31 @@ To list all the resources created for Flink team to run Flink jobs using this na
 Navigate to example directory and submit the Flink job.
 
 ```bash
-cd data-on-eks/streaming/emr-eks-flink/examples
+cd data-on-eks/streaming/emr-eks-flink/examples/karpenter
 ```
 Get the role arn linked to the job execution service account.
 ```bash
 terraform output flink_jobs_role_arn
 
 "arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240402170622025900000001"
+```
+Get the S3 bucket paths for checkpoint,savepoint,logs and job storage data.
+```bash
+terraform output flink_checkpoint_path
+"s3://emr-eks-flink-20240417234319144300000001/checkpoints"
+
+terraform output flink_savepoint_path
+"s3://emr-eks-flink-20240417234319144300000001/savepoints"
+
+terraform output flink_jobmanager_path
+"s3://emr-eks-flink-20240417234319144300000001/jobmanager"
+
+terraform output flink_logs_path
+"s3://emr-eks-flink-20240417234319144300000001/logs"
+
 ```
 
-Open the basic-example-app-cluster.yaml in any editor and replace the place holder for **REPLACE_WITH_JOB_EXECUTION_ROLE_ARN** with the role ARN that you got from the terraform output command and save the file.
+Open the basic-example-app-cluster.yaml in any editor and replace the place holder for **REPLACE_WITH_JOB_EXECUTION_ROLE_ARN** with the role ARN that you got from the terraform output command. Replace the s3 object paths with the s3 object paths that you created in previous step.The monitoring jobUri only takes the object name.
 
 Deploy the job by running the kubectl deply command.
 

From 5d0d834e6133404f228a9ea641c9620ab454debf Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Tue, 23 Apr 2024 16:54:01 -0700
Subject: [PATCH 20/25] removed lines

---
 README.md | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)
 mode change 100755 => 100644 README.md

diff --git a/README.md b/README.md
old mode 100755
new mode 100644
index eca93be2a..3c5449bc3
--- a/README.md
+++ b/README.md
@@ -74,30 +74,4 @@ This library is licensed under the Apache 2.0 License.
 ## 🙌 Community
 We welcome all individuals who are enthusiastic about data on Kubernetes to become a part of this open source community. Your contributions and participation are invaluable to the success of this project.
 
-Built with ❤️ at AWS.
-
-<!-- BEGIN_TF_DOCS -->
-## Requirements
-
-No requirements.
-
-## Providers
-
-No providers.
-
-## Modules
-
-No modules.
-
-## Resources
-
-No resources.
-
-## Inputs
-
-No inputs.
-
-## Outputs
-
-No outputs.
-<!-- END_TF_DOCS -->
\ No newline at end of file
+Built with ❤️ at AWS.
\ No newline at end of file

From cd389059cbe0c1f1a7f5151d1c68d5f626db341a Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Wed, 24 Apr 2024 11:02:37 -0700
Subject: [PATCH 21/25] changes based on PR review

---
 streaming/emr-eks-flink/README.md             | 16 ++--
 streaming/emr-eks-flink/addons.tf             | 87 +++++--------------
 .../cluster-autoscaler/flink-sample-job.yaml  | 13 +--
 .../karpenter/basic-example-app-cluster.yaml  | 14 +--
 .../graviton-example-app-cluster.yaml         | 13 +--
 streaming/emr-eks-flink/flink-team.tf         | 58 +++++++++++--
 streaming/emr-eks-flink/main.tf               | 50 +++++------
 streaming/emr-eks-flink/outputs.tf            | 32 ++++---
 streaming/emr-eks-flink/providers.tf          |  3 -
 streaming/emr-eks-flink/variables.tf          | 12 +--
 streaming/emr-eks-flink/versions.tf           |  2 +-
 streaming/emr-eks-flink/vpc.tf                | 21 +++--
 12 files changed, 156 insertions(+), 165 deletions(-)

diff --git a/streaming/emr-eks-flink/README.md b/streaming/emr-eks-flink/README.md
index 1c05db8b4..52710bdc0 100644
--- a/streaming/emr-eks-flink/README.md
+++ b/streaming/emr-eks-flink/README.md
@@ -65,12 +65,10 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 
 | Name | Description |
 |------|-------------|
-| <a name="output_flink_checkpoint_path"></a> [flink\_checkpoint\_path](#output\_flink\_checkpoint\_path) | S3 path for checkpoint data |
-| <a name="output_flink_jobmanager_path"></a> [flink\_jobmanager\_path](#output\_flink\_jobmanager\_path) | S3 path for jobmanager data |
-| <a name="output_flink_jobs_role_arn"></a> [flink\_jobs\_role\_arn](#output\_flink\_jobs\_role\_arn) | IAM linked role for the flink job |
-| <a name="output_flink_logs_path"></a> [flink\_logs\_path](#output\_flink\_logs\_path) | S3 path for logs |
+| <a name="output_configure_kubectl"></a> [configure\_kubectl](#output\_configure\_kubectl) | Configure kubectl: make sure you're logged in with the correct AWS profile and run the following command to update your kubeconfig |
+| <a name="output_flink_job_execution_role_arn"></a> [flink\_job\_execution\_role\_arn](#output\_flink\_job\_execution\_role\_arn) | IAM linked role for the flink job |
+| <a name="output_flink_operator_bucket"></a> [flink\_operator\_bucket](#output\_flink\_operator\_bucket) | S3 bucket name for Flink operator data,logs,checkpoint and savepoint |
 | <a name="output_flink_operator_role_arn"></a> [flink\_operator\_role\_arn](#output\_flink\_operator\_role\_arn) | IAM linked role for the flink operator |
-| <a name="output_flink_savepoint_path"></a> [flink\_savepoint\_path](#output\_flink\_savepoint\_path) | S3 path for savepoint data |
 <!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
 <!-- BEGIN_TF_DOCS -->
 ## Requirements
@@ -135,10 +133,8 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 
 | Name | Description |
 |------|-------------|
-| <a name="output_flink_checkpoint_path"></a> [flink\_checkpoint\_path](#output\_flink\_checkpoint\_path) | S3 path for checkpoint data |
-| <a name="output_flink_jobmanager_path"></a> [flink\_jobmanager\_path](#output\_flink\_jobmanager\_path) | S3 path for jobmanager data |
-| <a name="output_flink_jobs_role_arn"></a> [flink\_jobs\_role\_arn](#output\_flink\_jobs\_role\_arn) | IAM linked role for the flink job |
-| <a name="output_flink_logs_path"></a> [flink\_logs\_path](#output\_flink\_logs\_path) | S3 path for logs |
+| <a name="output_configure_kubectl"></a> [configure\_kubectl](#output\_configure\_kubectl) | Configure kubectl: make sure you're logged in with the correct AWS profile and run the following command to update your kubeconfig |
+| <a name="output_flink_job_execution_role_arn"></a> [flink\_job\_execution\_role\_arn](#output\_flink\_job\_execution\_role\_arn) | IAM linked role for the flink job |
+| <a name="output_flink_operator_bucket"></a> [flink\_operator\_bucket](#output\_flink\_operator\_bucket) | S3 bucket name for Flink operator data,logs,checkpoint and savepoint |
 | <a name="output_flink_operator_role_arn"></a> [flink\_operator\_role\_arn](#output\_flink\_operator\_role\_arn) | IAM linked role for the flink operator |
-| <a name="output_flink_savepoint_path"></a> [flink\_savepoint\_path](#output\_flink\_savepoint\_path) | S3 path for savepoint data |
 <!-- END_TF_DOCS -->
diff --git a/streaming/emr-eks-flink/addons.tf b/streaming/emr-eks-flink/addons.tf
index 96656644c..ce15fc8c0 100644
--- a/streaming/emr-eks-flink/addons.tf
+++ b/streaming/emr-eks-flink/addons.tf
@@ -1,26 +1,3 @@
-module "s3_bucket" {
-  source  = "terraform-aws-modules/s3-bucket/aws"
-  version = "~> 3.0"
-
-  bucket_prefix = "${local.name}-"
-
-  attach_require_latest_tls_policy = true
-
-  block_public_acls       = true
-  block_public_policy     = true
-  ignore_public_acls      = true
-  restrict_public_buckets = true
-
-  server_side_encryption_configuration = {
-    rule = {
-      apply_server_side_encryption_by_default = {
-        sse_algorithm = "AES256"
-      }
-    }
-  }
-
-  tags = local.tags
-}
 #---------------------------------------------------------------
 # IRSA for EBS CSI Driver
 #---------------------------------------------------------------
@@ -66,7 +43,10 @@ module "eks_blueprints_addons" {
       preserve = true
     }
   }
-  # install cert-manager
+
+  #---------------------------------------
+  # Install cert-manager
+  #---------------------------------------
   enable_cert_manager = true
   cert_manager = {
     set_values = [
@@ -76,6 +56,7 @@ module "eks_blueprints_addons" {
       },
     ]
   }
+
   #---------------------------------------
   # Metrics Server
   #---------------------------------------
@@ -95,6 +76,7 @@ module "eks_blueprints_addons" {
       eks_cluster_id = module.eks.cluster_name
     })]
   }
+
   #---------------------------------------
   # Karpenter Autoscaler for EKS Cluster
   #---------------------------------------
@@ -113,8 +95,6 @@ module "eks_blueprints_addons" {
     }
   }
 
-
-
   #---------------------------------------
   # CloudWatch metrics for EKS
   #---------------------------------------
@@ -122,29 +102,23 @@ module "eks_blueprints_addons" {
   aws_cloudwatch_metrics = {
     values = [templatefile("${path.module}/helm-values/aws-cloudwatch-metrics-values.yaml", {})]
   }
-  #---------------------------------------
-  # Adding AWS Load Balancer Controller
-  #---------------------------------------
-  enable_aws_load_balancer_controller = true
-
-  #---------------------------------------
-  # AWS for FluentBit - DaemonSet
-  #---------------------------------------
-  # With EMR Flink on EKS, set up monitoring configurations to archive application logs to S3/CW.
-  enable_aws_for_fluentbit = false
-
 
 }
+
 #---------------------------------------------------------------
 # Data on EKS Kubernetes Addons
 #---------------------------------------------------------------
 module "eks_data_addons" {
-  depends_on                 = [module.flink_irsa_jobs, module.flink_irsa_operator]
-  source                     = "aws-ia/eks-data-addons/aws"
-  version                    = "~> 1.30" # ensure to update this to the latest/desired version
-  oidc_provider_arn          = module.eks.oidc_provider_arn
-  enable_karpenter_resources = true
-  enable_emr_flink_operator  = true
+  depends_on = [module.flink_irsa_jobs, module.flink_irsa_operator]
+
+  source            = "aws-ia/eks-data-addons/aws"
+  version           = "~> 1.30" # ensure to update this to the latest/desired version
+  oidc_provider_arn = module.eks.oidc_provider_arn
+
+  #---------------------------------------------------------------
+  # EMR Flink operator
+  #---------------------------------------------------------------
+  enable_emr_flink_operator = true
   emr_flink_operator_helm_config = {
     namespace                = "emr-flink-operator"
     create_namespace         = true
@@ -153,10 +127,12 @@ module "eks_data_addons" {
     repository               = "oci://public.ecr.aws/emr-on-eks"
     chart                    = "flink-kubernetes-operator"
     operatorExecutionRoleArn = module.flink_irsa_operator.iam_role_arn
-
-
   }
 
+  #---------------------------------------------------------------
+  # Karpenter nodepools
+  #---------------------------------------------------------------
+  enable_karpenter_resources = true
   karpenter_resources_helm_config = {
     flink-compute-optimized = {
       values = [
@@ -269,24 +245,3 @@ module "eks_data_addons" {
     }
   }
 }
-
-resource "aws_s3_object" "checkpoints" {
-  bucket       = module.s3_bucket.s3_bucket_id
-  key          = "checkpoints/"
-  content_type = "application/x-directory"
-}
-resource "aws_s3_object" "savepoints" {
-  bucket       = module.s3_bucket.s3_bucket_id
-  key          = "savepoints/"
-  content_type = "application/x-directory"
-}
-resource "aws_s3_object" "jobmanager" {
-  bucket       = module.s3_bucket.s3_bucket_id
-  key          = "jobmanager/"
-  content_type = "application/x-directory"
-}
-resource "aws_s3_object" "logs" {
-  bucket       = module.s3_bucket.s3_bucket_id
-  key          = "logs/"
-  content_type = "application/x-directory"
-}
diff --git a/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml b/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
index 8c191a0da..8bb36510f 100644
--- a/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
+++ b/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
@@ -1,3 +1,6 @@
+# NOTE: Make sure you you replce <ENTER_S3_BUCKET> with your flink_operator_bucket output before running this job
+# Replace the <JOB_EXECUTION_ROLE_ARN> with the flink_job_execution_role_arn output
+---
 apiVersion: flink.apache.org/v1beta1
 kind: FlinkDeployment
 metadata:
@@ -23,14 +26,14 @@ spec:
 
     jobmanager.scheduler: adaptive
     # Replace with s3 bucket in your own account
-    state.checkpoints.dir: s3://emr-flink-data/checkpoints
-    state.savepoints.dir: s3://emr-flink-data/savepoints
+    state.checkpoints.dir: s3://<ENTER_S3_BUCKET>/checkpoints
+    state.savepoints.dir: s3://<ENTER_S3_BUCKET>/savepoints
 
   # Replace this execution role ARN with your own
-  executionRoleArn: arn:aws:iam::xxxxxxxxx:role/emr-eks-flink-flink-team-a-20240406012025932700000008
+  executionRoleArn: <JOB_EXECUTION_ROLE_ARN>
   jobManager:
     # Replace with s3 bucket in your own account
-    storageDir: s3://emr-flink-data/data/basic-example-app-cluster/jobmanager
+    storageDir: s3://<ENTER_S3_BUCKET>/data/basic-example-app-cluster/jobmanager
     resource:
       memory: "2048m"
       cpu: 1
@@ -49,7 +52,7 @@ spec:
     savepointTriggerNonce: 0
   monitoringConfiguration:
     s3MonitoringConfiguration:
-      logUri: emr-flink-data/logs
+      logUri: <ENTER_S3_BUCKET>/logs
     cloudWatchMonitoringConfiguration:
        logGroupName: /aws/emr-flink/flink-team-a
     sideCarResources:
diff --git a/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
index c0d3c4147..9b9e63848 100644
--- a/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
@@ -1,3 +1,6 @@
+# NOTE: Make sure you you replce <ENTER_S3_BUCKET> and with your S3 Bucket before running this job.
+# Replace <JOB_EXECUTION_ROLE_ARN> with flink_job_execution_role_arn
+---
 apiVersion: flink.apache.org/v1beta1
 kind: FlinkDeployment
 metadata:
@@ -23,11 +26,11 @@ spec:
 
     jobmanager.scheduler: adaptive
     # Replace with s3 bucket in your own account
-    state.checkpoints.dir: s3://emr-flink-data/checkpoints
-    state.savepoints.dir: s3://emr-flink-data/savepoints
+    state.checkpoints.dir: s3://<ENTER_S3_BUCKET>/checkpoints
+    state.savepoints.dir: s3://<ENTER_S3_BUCKET>/savepoints
 
   # Replace this execution role ARN with your own
-  executionRoleArn: arn:aws:iam::xxxxxxxxxx:role/emr-eks-flink-flink-team-a-20240406012025932700000008
+  executionRoleArn: <<JOB_EXECUTION_ROLE_ARN>>
 
   podTemplate:
     apiVersion: v1
@@ -38,10 +41,9 @@ spec:
       nodeSelector:
         NodeGroupType: "FlinkComputeOptimized"
 
-
   jobManager:
     # Replace with s3 bucket in your own account
-    storageDir: s3://emr-flink-data/data/basic-example-app-cluster/jobmanager
+    storageDir: s3://<ENTER_S3_BUCKET>/data/basic-example-app-cluster/jobmanager
     resource:
       memory: "2048m"
       cpu: 1
@@ -60,7 +62,7 @@ spec:
     savepointTriggerNonce: 0
   monitoringConfiguration:
     s3MonitoringConfiguration:
-      logUri: emr-flink-data/logs
+      logUri: <ENTER_S3_BUCKET>/logs
     cloudWatchMonitoringConfiguration:
        logGroupName: /aws/emr-flink/flink-team-a
     sideCarResources:
diff --git a/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
index 48ca2596b..798423084 100644
--- a/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
@@ -1,3 +1,6 @@
+# NOTE: Make sure you you replce <ENTER_S3_BUCKET> with your flink_operator_bucket output before running this job
+# Replace the <JOB_EXECUTION_ROLE_ARN> with the flink_job_execution_role_arn output
+---
 apiVersion: flink.apache.org/v1beta1
 kind: FlinkDeployment
 metadata:
@@ -23,11 +26,11 @@ spec:
 
     jobmanager.scheduler: adaptive
     # Replace with s3 bucket in your own account
-    state.checkpoints.dir: s3://emr-flink-data/checkpoints
-    state.savepoints.dir: s3://emr-flink-data/savepoints
+    state.checkpoints.dir: s3://<ENTER_S3_BUCKET>/checkpoints
+    state.savepoints.dir: s3://<ENTER_S3_BUCKET>/savepoints
 
   # Replace this execution role ARN with your own
-  executionRoleArn: arn:aws:iam::xxxxxxxxxx:role/emr-eks-flink-flink-team-a-20240406012025932700000008
+  executionRoleArn: <JOB_EXECUTION_ROLE_ARN>
 
   podTemplate:
     apiVersion: v1
@@ -41,7 +44,7 @@ spec:
 
   jobManager:
     # Replace with s3 bucket in your own account
-    storageDir: s3://emr-flink-data/data/basic-example-app-cluster/jobmanager
+    storageDir: s3://<ENTER_S3_BUCKET>/data/basic-example-app-cluster/jobmanager
     resource:
       memory: "2048m"
       cpu: 1
@@ -60,7 +63,7 @@ spec:
     savepointTriggerNonce: 0
   monitoringConfiguration:
     s3MonitoringConfiguration:
-      logUri: emr-flink-data/logs
+      logUri: <ENTER_S3_BUCKET>/logs
     cloudWatchMonitoringConfiguration:
        logGroupName: /aws/emr-flink/flink-team-a
     sideCarResources:
diff --git a/streaming/emr-eks-flink/flink-team.tf b/streaming/emr-eks-flink/flink-team.tf
index baf11836c..80d157f38 100644
--- a/streaming/emr-eks-flink/flink-team.tf
+++ b/streaming/emr-eks-flink/flink-team.tf
@@ -1,5 +1,4 @@
 resource "kubernetes_namespace_v1" "flink_team_a" {
-
   metadata {
     name = "${local.flink_team}-ns"
   }
@@ -24,10 +23,8 @@ module "flink_irsa_jobs" {
   source  = "aws-ia/eks-blueprints-addon/aws"
   version = "~> 1.0"
 
-
   # Disable helm release
   create_release = false
-
   # IAM role for service account (IRSA)
   create_role   = true
   role_name     = "${local.name}-${local.flink_team}"
@@ -43,7 +40,6 @@ module "flink_irsa_jobs" {
       service_account = "emr-containers-sa-*-*-${data.aws_caller_identity.current.account_id}-*"
     }
   }
-
 }
 
 #---------------------------------------------------------------
@@ -53,10 +49,8 @@ module "flink_irsa_operator" {
   source  = "aws-ia/eks-blueprints-addon/aws"
   version = "~> 1.0"
 
-
   # Disable helm release
   create_release = false
-
   # IAM role for service account (IRSA)
   create_role   = true
   role_name     = "${local.name}-operator"
@@ -72,10 +66,11 @@ module "flink_irsa_operator" {
       service_account = "emr-containers-sa-flink-operator"
     }
   }
-
 }
 
-#create a log group
+#---------------------------------------------------------------
+# Creates a log group
+#---------------------------------------------------------------
 resource "aws_cloudwatch_log_group" "flink_team_a" {
   name              = "/aws/emr-flink/flink-team-a"
   retention_in_days = 7
@@ -112,3 +107,50 @@ data "aws_iam_policy_document" "flink_sample_job" {
     ]
   }
 }
+
+#---------------------------------------------------------------
+# S3 bucket for Flink related data,logs and checkpoint
+#---------------------------------------------------------------
+module "s3_bucket" {
+  source                           = "terraform-aws-modules/s3-bucket/aws"
+  version                          = "~> 3.0"
+  bucket_prefix                    = "${local.name}-"
+  attach_require_latest_tls_policy = true
+  block_public_acls                = true
+  block_public_policy              = true
+  ignore_public_acls               = true
+  restrict_public_buckets          = true
+
+  server_side_encryption_configuration = {
+    rule = {
+      apply_server_side_encryption_by_default = {
+        sse_algorithm = "AES256"
+      }
+    }
+  }
+  tags = local.tags
+}
+
+resource "aws_s3_object" "checkpoints" {
+  bucket       = module.s3_bucket.s3_bucket_id
+  key          = "checkpoints/"
+  content_type = "application/x-directory"
+}
+
+resource "aws_s3_object" "savepoints" {
+  bucket       = module.s3_bucket.s3_bucket_id
+  key          = "savepoints/"
+  content_type = "application/x-directory"
+}
+
+resource "aws_s3_object" "jobmanager" {
+  bucket       = module.s3_bucket.s3_bucket_id
+  key          = "jobmanager/"
+  content_type = "application/x-directory"
+}
+
+resource "aws_s3_object" "logs" {
+  bucket       = module.s3_bucket.s3_bucket_id
+  key          = "logs/"
+  content_type = "application/x-directory"
+}
diff --git a/streaming/emr-eks-flink/main.tf b/streaming/emr-eks-flink/main.tf
index 805d61320..b34e44c99 100644
--- a/streaming/emr-eks-flink/main.tf
+++ b/streaming/emr-eks-flink/main.tf
@@ -9,14 +9,19 @@ locals {
   flink_team     = "flink-team-a"
   flink_operator = "flink-kubernetes-operator"
 }
+
 data "aws_eks_cluster_auth" "this" {
   name = module.eks.cluster_name
 }
+
 data "aws_ecrpublic_authorization_token" "token" {
   provider = aws.ecr
 }
+
 data "aws_availability_zones" "available" {}
+
 data "aws_caller_identity" "current" {}
+
 # This data source can be used to get the latest AMI for Managed Node Groups
 data "aws_ami" "x86" {
   owners      = ["amazon"]
@@ -26,23 +31,23 @@ data "aws_ami" "x86" {
     values = ["amazon-eks-node-${module.eks.cluster_version}-*"] # Update this for ARM ["amazon-eks-arm64-node-${module.eks.cluster_version}-*"]
   }
 }
-# create eks cluster
+
+#---------------------------------------------------------------
+# EKS Cluster
+#---------------------------------------------------------------
 module "eks" {
   source  = "terraform-aws-modules/eks/aws"
   version = "~> 19.15"
 
-  cluster_name    = local.name
-  cluster_version = var.eks_cluster_version
-
+  cluster_name                    = local.name
+  cluster_version                 = var.eks_cluster_version
   cluster_endpoint_private_access = true # if true, Kubernetes API requests within your cluster's VPC (such as node to control plane communication) use the private VPC endpoint
   cluster_endpoint_public_access  = true # if true, Your cluster API server is accessible from the internet. You can, optionally, limit the CIDR blocks that can access the public endpoint.
+  vpc_id                          = module.vpc.vpc_id
+  subnet_ids                      = module.vpc.private_subnets
+  manage_aws_auth_configmap       = true
 
-  vpc_id     = module.vpc.vpc_id
-  subnet_ids = module.vpc.private_subnets
-
-  manage_aws_auth_configmap = true
   aws_auth_roles = [
-
     # We need to add in the Karpenter node IAM role for nodes launched by Karpenter
     {
       rolearn  = module.eks_blueprints_addons.karpenter.node_iam_role_arn
@@ -107,6 +112,7 @@ module "eks" {
       AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
     }
   }
+
   eks_managed_node_groups = {
     #  We recommend to have a MNG to place your critical workloads and add-ons
     #  Then rely on Karpenter to scale your workloads
@@ -114,34 +120,27 @@ module "eks" {
     core_node_group = {
       name        = "core-node-group"
       description = "EKS managed node group example launch template"
-
-      ami_id = data.aws_ami.x86.image_id
+      ami_id      = data.aws_ami.x86.image_id
       # This will ensure the bootstrap user data is used to join the node
       # By default, EKS managed node groups will not append bootstrap script;
       # this adds it back in using the default template provided by the module
       # Note: this assumes the AMI provided is an EKS optimized AMI derivative
       enable_bootstrap_user_data = true
-
       # Optional - This is to show how you can pass pre bootstrap data
       pre_bootstrap_user_data = <<-EOT
         echo "Node bootstrap process started by Data on EKS"
       EOT
-
       # Optional - Post bootstrap data to verify anything
       post_bootstrap_user_data = <<-EOT
         echo "Bootstrap complete.Ready to Go!"
       EOT
-
-      subnet_ids = module.vpc.private_subnets
-
-      min_size     = 3
-      max_size     = 9
-      desired_size = 3
-
-      force_update_version = true
-      instance_types       = ["m5.xlarge"]
-
-      ebs_optimized = true
+      subnet_ids               = module.vpc.private_subnets
+      min_size                 = 3
+      max_size                 = 9
+      desired_size             = 3
+      force_update_version     = true
+      instance_types           = ["m5.xlarge"]
+      ebs_optimized            = true
       block_device_mappings = {
         xvda = {
           device_name = "/dev/xvda"
@@ -151,16 +150,13 @@ module "eks" {
           }
         }
       }
-
       update_config = {
         max_unavailable_percentage = 50
       }
-
       labels = {
         WorkerType    = "ON_DEMAND"
         NodeGroupType = "core"
       }
-
       tags = {
         Name                     = "core-node-grp",
         "karpenter.sh/discovery" = local.name
diff --git a/streaming/emr-eks-flink/outputs.tf b/streaming/emr-eks-flink/outputs.tf
index 1efc9632b..c09749ec1 100644
--- a/streaming/emr-eks-flink/outputs.tf
+++ b/streaming/emr-eks-flink/outputs.tf
@@ -1,27 +1,25 @@
-# create output for flink operator role arn
-output "flink_jobs_role_arn" {
-  value       = trimspace(module.flink_irsa_jobs.iam_role_arn)
+################################################################################
+# EMR Flink operator
+################################################################################
+output "flink_job_execution_role_arn" {
+  value       = module.flink_irsa_jobs.iam_role_arn
   description = "IAM linked role for the flink job"
 }
+
 output "flink_operator_role_arn" {
   value       = module.flink_irsa_operator.iam_role_arn
   description = "IAM linked role for the flink operator"
 }
 
-output "flink_checkpoint_path" {
-  value       = "s3://${module.s3_bucket.s3_bucket_id}/checkpoints"
-  description = "S3 path for checkpoint data"
-}
-output "flink_savepoint_path" {
-  value       = "s3://${module.s3_bucket.s3_bucket_id}/savepoints"
-  description = "S3 path for savepoint data"
-}
-output "flink_jobmanager_path" {
-  value       = "s3://${module.s3_bucket.s3_bucket_id}/jobmanager"
-  description = "S3 path for jobmanager data"
+output "flink_operator_bucket" {
+  value       = module.s3_bucket.s3_bucket_id
+  description = "S3 bucket name for Flink operator data,logs,checkpoint and savepoint"
 }
 
-output "flink_logs_path" {
-  value       = "s3://${module.s3_bucket.s3_bucket_id}/logs"
-  description = "S3 path for logs"
+################################################################################
+# EKS Managed Node Group
+################################################################################
+output "configure_kubectl" {
+  description = "Configure kubectl: make sure you're logged in with the correct AWS profile and run the following command to update your kubeconfig"
+  value       = "aws eks --region ${local.region} update-kubeconfig --name ${module.eks.cluster_name}"
 }
diff --git a/streaming/emr-eks-flink/providers.tf b/streaming/emr-eks-flink/providers.tf
index d7b000730..09aa297b0 100644
--- a/streaming/emr-eks-flink/providers.tf
+++ b/streaming/emr-eks-flink/providers.tf
@@ -1,11 +1,8 @@
 
 provider "aws" {
   region = local.region
-
 }
 
-
-
 # ECR always authenticates with `us-east-1` region
 # Docs -> https://docs.aws.amazon.com/AmazonECR/latest/public/public-registries.html
 provider "aws" {
diff --git a/streaming/emr-eks-flink/variables.tf b/streaming/emr-eks-flink/variables.tf
index d9204d6ad..e19d5ad69 100644
--- a/streaming/emr-eks-flink/variables.tf
+++ b/streaming/emr-eks-flink/variables.tf
@@ -1,9 +1,3 @@
-#create a variable
-variable "eks_cluster_version" {
-  type        = string
-  default     = "1.28"
-  description = "EKS version for the cluster"
-}
 variable "region" {
   type        = string
   default     = "us-west-2"
@@ -15,3 +9,9 @@ variable "name" {
   default     = "emr-eks-flink"
   type        = string
 }
+
+variable "eks_cluster_version" {
+  type        = string
+  default     = "1.28"
+  description = "EKS version for the cluster"
+}
diff --git a/streaming/emr-eks-flink/versions.tf b/streaming/emr-eks-flink/versions.tf
index 6eeabc436..a18160120 100644
--- a/streaming/emr-eks-flink/versions.tf
+++ b/streaming/emr-eks-flink/versions.tf
@@ -28,6 +28,6 @@ terraform {
   # backend "s3" {
   #   bucket = "doeks-github-actions-e2e-test-state"
   #   region = "us-west-2"
-  #   key    = "e2e/flink/terraform.tfstate"
+  #   key    = "e2e/emr-eks-flink/terraform.tfstate"
   # }
 }
diff --git a/streaming/emr-eks-flink/vpc.tf b/streaming/emr-eks-flink/vpc.tf
index cddc5a13c..38f9ce7b2 100644
--- a/streaming/emr-eks-flink/vpc.tf
+++ b/streaming/emr-eks-flink/vpc.tf
@@ -1,25 +1,24 @@
-#import module vpc
+#---------------------------------------------------------------
+# Supporting Network Resources
+#---------------------------------------------------------------
+# WARNING: This VPC module includes the creation of an Internet Gateway and NAT Gateway, which simplifies cluster deployment and testing, primarily intended for sandbox accounts.
+# IMPORTANT: For preprod and prod use cases, it is crucial to consult with your security team and AWS architects to design a private infrastructure solution that aligns with your security requirements
 module "vpc" {
   source  = "terraform-aws-modules/vpc/aws"
   version = "5.5.1"
 
-  name = local.name
-
-  cidr = "10.0.0.0/16"
-  azs  = slice(data.aws_availability_zones.available.names, 0, 3)
-
-  private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
-  public_subnets  = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
-
+  name                 = local.name
+  cidr                 = "10.0.0.0/16"
+  azs                  = slice(data.aws_availability_zones.available.names, 0, 3)
+  private_subnets      = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
+  public_subnets       = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
   enable_nat_gateway   = true
   single_nat_gateway   = true
   enable_dns_hostnames = true
-
   public_subnet_tags = {
     "kubernetes.io/cluster/${local.name}" = "shared"
     "kubernetes.io/role/elb"              = 1
   }
-
   private_subnet_tags = {
     "kubernetes.io/cluster/${local.name}" = "shared"
     "kubernetes.io/role/internal-elb"     = 1

From bfb8b3743052ad2a1fb1d9e98e4b7778a1ddaa83 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Wed, 24 Apr 2024 11:13:02 -0700
Subject: [PATCH 22/25] updated website docs

---
 .../streaming-platforms/emr-eks-flink.md      | 23 ++++---------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/website/docs/blueprints/streaming-platforms/emr-eks-flink.md b/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
index 3e56e976a..8754a1743 100644
--- a/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
+++ b/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 6
+sidebar_position: 3
 title: EMR on EKS with Flink Streaming
 ---
 
@@ -273,27 +273,14 @@ cd data-on-eks/streaming/emr-eks-flink/examples/karpenter
 ```
 Get the role arn linked to the job execution service account.
 ```bash
-terraform output flink_jobs_role_arn
-
-"arn:aws:iam::681921237057:role/emr-eks-flink-flink-team-a-20240402170622025900000001"
+terraform output flink_job_execution_role_arn
 ```
-Get the S3 bucket paths for checkpoint,savepoint,logs and job storage data.
+Get the S3 bucket name for checkpoint,savepoint,logs and job storage data.
 ```bash
-terraform output flink_checkpoint_path
-"s3://emr-eks-flink-20240417234319144300000001/checkpoints"
-
-terraform output flink_savepoint_path
-"s3://emr-eks-flink-20240417234319144300000001/savepoints"
-
-terraform output flink_jobmanager_path
-"s3://emr-eks-flink-20240417234319144300000001/jobmanager"
-
-terraform output flink_logs_path
-"s3://emr-eks-flink-20240417234319144300000001/logs"
-
+terraform output flink_operator_bucket
 ```
 
-Open the basic-example-app-cluster.yaml in any editor and replace the place holder for **REPLACE_WITH_JOB_EXECUTION_ROLE_ARN** with the role ARN that you got from the terraform output command. Replace the s3 object paths with the s3 object paths that you created in previous step.The monitoring jobUri only takes the object name.
+Open the basic-example-app-cluster.yaml in any editor and replace the place holder for **JOB_EXECUTION_ROLE_ARN** with the flink_job_execution_role_arn terraform output command. Replace the **ENTER_S3_BUCKET** placeholder with the flink_operator_bucket output.
 
 Deploy the job by running the kubectl deply command.
 

From b3538e137729f9195241329628946d996f75ae08 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Wed, 24 Apr 2024 12:44:29 -0700
Subject: [PATCH 23/25] fixed typos and helm config

---
 streaming/emr-eks-flink/addons.tf                            | 5 -----
 .../examples/cluster-autoscaler/flink-sample-job.yaml        | 4 ++--
 .../examples/karpenter/basic-example-app-cluster.yaml        | 4 ++--
 .../examples/karpenter/graviton-example-app-cluster.yaml     | 4 ++--
 4 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/streaming/emr-eks-flink/addons.tf b/streaming/emr-eks-flink/addons.tf
index ce15fc8c0..0c884dfba 100644
--- a/streaming/emr-eks-flink/addons.tf
+++ b/streaming/emr-eks-flink/addons.tf
@@ -120,12 +120,7 @@ module "eks_data_addons" {
   #---------------------------------------------------------------
   enable_emr_flink_operator = true
   emr_flink_operator_helm_config = {
-    namespace                = "emr-flink-operator"
-    create_namespace         = true
-    namespace                = "${local.flink_operator}-ns"
-    name                     = "flink-kubernetes-operator"
     repository               = "oci://public.ecr.aws/emr-on-eks"
-    chart                    = "flink-kubernetes-operator"
     operatorExecutionRoleArn = module.flink_irsa_operator.iam_role_arn
   }
 
diff --git a/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml b/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
index 8bb36510f..a4d109cf1 100644
--- a/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
+++ b/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
@@ -1,5 +1,5 @@
-# NOTE: Make sure you you replce <ENTER_S3_BUCKET> with your flink_operator_bucket output before running this job
-# Replace the <JOB_EXECUTION_ROLE_ARN> with the flink_job_execution_role_arn output
+# NOTE: Make sure you replace <ENTER_S3_BUCKET> with your S3 Bucket before running this job.
+# Replace the <JOB_EXECUTION_ROLE_ARN> with the Flink_job_execution_role_arn output.
 ---
 apiVersion: flink.apache.org/v1beta1
 kind: FlinkDeployment
diff --git a/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
index 9b9e63848..4f2574f7a 100644
--- a/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
@@ -1,5 +1,5 @@
-# NOTE: Make sure you you replce <ENTER_S3_BUCKET> and with your S3 Bucket before running this job.
-# Replace <JOB_EXECUTION_ROLE_ARN> with flink_job_execution_role_arn
+# NOTE: Make sure you replace <ENTER_S3_BUCKET> with your S3 Bucket before running this job.
+# Replace the <JOB_EXECUTION_ROLE_ARN> with the Flink_job_execution_role_arn output.
 ---
 apiVersion: flink.apache.org/v1beta1
 kind: FlinkDeployment
diff --git a/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
index 798423084..da3123614 100644
--- a/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
@@ -1,5 +1,5 @@
-# NOTE: Make sure you you replce <ENTER_S3_BUCKET> with your flink_operator_bucket output before running this job
-# Replace the <JOB_EXECUTION_ROLE_ARN> with the flink_job_execution_role_arn output
+# NOTE: Make sure you replace <ENTER_S3_BUCKET> with your S3 Bucket before running this job.
+# Replace the <JOB_EXECUTION_ROLE_ARN> with the Flink_job_execution_role_arn output.
 ---
 apiVersion: flink.apache.org/v1beta1
 kind: FlinkDeployment

From dd43a6a749e056d88b9bd195b1ab8b7b50b2ffd1 Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Wed, 24 Apr 2024 12:48:39 -0700
Subject: [PATCH 24/25] fixed case of the output

---
 .../examples/cluster-autoscaler/flink-sample-job.yaml           | 2 +-
 .../examples/karpenter/basic-example-app-cluster.yaml           | 2 +-
 .../examples/karpenter/graviton-example-app-cluster.yaml        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml b/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
index a4d109cf1..875200393 100644
--- a/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
+++ b/streaming/emr-eks-flink/examples/cluster-autoscaler/flink-sample-job.yaml
@@ -1,5 +1,5 @@
 # NOTE: Make sure you replace <ENTER_S3_BUCKET> with your S3 Bucket before running this job.
-# Replace the <JOB_EXECUTION_ROLE_ARN> with the Flink_job_execution_role_arn output.
+# Replace the <JOB_EXECUTION_ROLE_ARN> with the flink_job_execution_role_arn output.
 ---
 apiVersion: flink.apache.org/v1beta1
 kind: FlinkDeployment
diff --git a/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
index 4f2574f7a..51992b810 100644
--- a/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/karpenter/basic-example-app-cluster.yaml
@@ -1,5 +1,5 @@
 # NOTE: Make sure you replace <ENTER_S3_BUCKET> with your S3 Bucket before running this job.
-# Replace the <JOB_EXECUTION_ROLE_ARN> with the Flink_job_execution_role_arn output.
+# Replace the <JOB_EXECUTION_ROLE_ARN> with the flink_job_execution_role_arn output.
 ---
 apiVersion: flink.apache.org/v1beta1
 kind: FlinkDeployment
diff --git a/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml b/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
index da3123614..720ffdc6f 100644
--- a/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
+++ b/streaming/emr-eks-flink/examples/karpenter/graviton-example-app-cluster.yaml
@@ -1,5 +1,5 @@
 # NOTE: Make sure you replace <ENTER_S3_BUCKET> with your S3 Bucket before running this job.
-# Replace the <JOB_EXECUTION_ROLE_ARN> with the Flink_job_execution_role_arn output.
+# Replace the <JOB_EXECUTION_ROLE_ARN> with the flink_job_execution_role_arn output.
 ---
 apiVersion: flink.apache.org/v1beta1
 kind: FlinkDeployment

From 24585f27790961828969be0369f8f3cdd395e4bc Mon Sep 17 00:00:00 2001
From: Mithun Mallick <mithumal@amazon.com>
Date: Thu, 25 Apr 2024 09:17:10 -0700
Subject: [PATCH 25/25] fixed typo

---
 website/docs/blueprints/streaming-platforms/emr-eks-flink.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/blueprints/streaming-platforms/emr-eks-flink.md b/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
index 8754a1743..b417bf151 100644
--- a/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
+++ b/website/docs/blueprints/streaming-platforms/emr-eks-flink.md
@@ -282,7 +282,7 @@ terraform output flink_operator_bucket
 
 Open the basic-example-app-cluster.yaml in any editor and replace the place holder for **JOB_EXECUTION_ROLE_ARN** with the flink_job_execution_role_arn terraform output command. Replace the **ENTER_S3_BUCKET** placeholder with the flink_operator_bucket output.
 
-Deploy the job by running the kubectl deply command.
+Deploy the job by running the kubectl deploy command.
 
 ```bash
 kubectl apply -f basic-example-app-cluster.yaml