diff --git a/schedulers/terraform/managed-airflow-mwaa/README.md b/schedulers/terraform/managed-airflow-mwaa/README.md
index b900d1413..f952b4bff 100644
--- a/schedulers/terraform/managed-airflow-mwaa/README.md
+++ b/schedulers/terraform/managed-airflow-mwaa/README.md
@@ -1,5 +1,5 @@
# Amazon Managed Workflows for Apache Airflow (MWAA)
-Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/job-schedulers/aws-managed-airflow) to deploy this pattern and run sample tests.
+Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/blueprints/job-schedulers/aws-managed-airflow) to deploy this pattern and run sample tests.
## Requirements
@@ -24,8 +24,10 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
| Name | Source | Version |
|------|--------|---------|
-| [eks\_blueprints](#module\_eks\_blueprints) | github.com/aws-ia/terraform-aws-eks-blueprints | v4.32.1 |
-| [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons | v4.32.1 |
+| [ebs\_csi\_driver\_irsa](#module\_ebs\_csi\_driver\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 5.14 |
+| [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.15 |
+| [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 |
+| [emr\_containers](#module\_emr\_containers) | ../../../workshop/modules/emr-eks-containers | n/a |
| [mwaa](#module\_mwaa) | aws-ia/mwaa/aws | 0.0.4 |
| [s3\_bucket](#module\_s3\_bucket) | terraform-aws-modules/s3-bucket/aws | ~> 3.0 |
| [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | ~> 5.0 |
diff --git a/schedulers/terraform/managed-airflow-mwaa/addons.tf b/schedulers/terraform/managed-airflow-mwaa/addons.tf
new file mode 100644
index 000000000..7563c7409
--- /dev/null
+++ b/schedulers/terraform/managed-airflow-mwaa/addons.tf
@@ -0,0 +1,54 @@
+#------------------------------------------------------------------------
+# EKS Addons
+#------------------------------------------------------------------------
+module "eks_blueprints_addons" {
+ # Users should pin the version to the latest available release
+ # tflint-ignore: terraform_module_pinned_source
+ source = "aws-ia/eks-blueprints-addons/aws"
+ version = "~> 1.2"
+
+ cluster_name = module.eks.cluster_name
+ cluster_endpoint = module.eks.cluster_endpoint
+ cluster_version = module.eks.cluster_version
+ oidc_provider_arn = module.eks.oidc_provider_arn
+
+ #---------------------------------------
+ # Amazon EKS Managed Add-ons
+ #---------------------------------------
+ eks_addons = {
+ aws-ebs-csi-driver = {
+ service_account_role_arn = module.ebs_csi_driver_irsa.iam_role_arn
+ }
+ coredns = {
+ preserve = true
+ }
+ vpc-cni = {
+ preserve = true
+ }
+ kube-proxy = {
+ preserve = true
+ }
+ }
+
+ enable_metrics_server = true
+ enable_cluster_autoscaler = true
+
+ tags = local.tags
+}
+
+#---------------------------------------------------------------
+# IRSA for EBS CSI Driver
+#---------------------------------------------------------------
+module "ebs_csi_driver_irsa" {
+ source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"
+ version = "~> 5.20"
+ role_name = format("%s-%s", local.name, "ebs-csi-driver")
+ attach_ebs_csi_policy = true
+ oidc_providers = {
+ main = {
+ provider_arn = module.eks.oidc_provider_arn
+ namespace_service_accounts = ["kube-system:ebs-csi-controller-sa"]
+ }
+ }
+ tags = local.tags
+}
diff --git a/schedulers/terraform/managed-airflow-mwaa/cleanup.sh b/schedulers/terraform/managed-airflow-mwaa/cleanup.sh
new file mode 100644
index 000000000..3e6223b61
--- /dev/null
+++ b/schedulers/terraform/managed-airflow-mwaa/cleanup.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+set -o errexit
+set -o pipefail
+
+targets=(
+ "module.emr_containers"
+ "module.eks_blueprints_addons"
+ "module.eks"
+ "module.mwaa"
+)
+
+#-------------------------------------------
+# Helpful to delete the stuck in "Terminating" namespaces
+# Rerun the cleanup.sh script to detect and delete the stuck resources
+#-------------------------------------------
+terminating_namespaces=$(kubectl get namespaces --field-selector status.phase=Terminating -o json | jq -r '.items[].metadata.name')
+
+# If there are no terminating namespaces, exit the script
+if [[ -z $terminating_namespaces ]]; then
+ echo "No terminating namespaces found"
+fi
+
+for ns in $terminating_namespaces; do
+ echo "Terminating namespace: $ns"
+ kubectl get namespace $ns -o json | sed 's/"kubernetes"//' | kubectl replace --raw "/api/v1/namespaces/$ns/finalize" -f -
+done
+
+#-------------------------------------------
+# Terraform destroy per module target
+#-------------------------------------------
+for target in "${targets[@]}"
+do
+ terraform destroy -auto-approve
+ destroy_output=$(terraform destroy -auto-approve 2>&1)
+ if [[ $? -eq 0 && $destroy_output == *"Destroy complete!"* ]]; then
+ echo "SUCCESS: Terraform destroy of $target completed successfully"
+ else
+ echo "FAILED: Terraform destroy of $target failed"
+ exit 1
+ fi
+done
+
+#-------------------------------------------
+# Terraform destroy full
+#-------------------------------------------
+terraform destroy -auto-approve
+destroy_output=$(terraform destroy -auto-approve 2>&1)
+if [[ $? -eq 0 && $destroy_output == *"Destroy complete!"* ]]; then
+ echo "SUCCESS: Terraform destroy of all targets completed successfully"
+else
+ echo "FAILED: Terraform destroy of all targets failed"
+ exit 1
+fi
diff --git a/schedulers/terraform/managed-airflow-mwaa/data.tf b/schedulers/terraform/managed-airflow-mwaa/data.tf
index 7ddff9a0a..2cf8955b7 100644
--- a/schedulers/terraform/managed-airflow-mwaa/data.tf
+++ b/schedulers/terraform/managed-airflow-mwaa/data.tf
@@ -1,5 +1,5 @@
data "aws_eks_cluster_auth" "this" {
- name = module.eks_blueprints.eks_cluster_id
+ name = module.eks.cluster_name
}
data "aws_availability_zones" "available" {}
@@ -10,36 +10,6 @@ data "aws_region" "current" {}
data "aws_partition" "current" {}
-data "aws_iam_policy_document" "emr_on_eks" {
- statement {
- sid = ""
- effect = "Allow"
- resources = ["arn:${data.aws_partition.current.partition}:s3:::*"]
-
- actions = [
- "s3:DeleteObject",
- "s3:DeleteObjectVersion",
- "s3:GetObject",
- "s3:ListBucket",
- "s3:PutObject",
- ]
- }
-
- statement {
- sid = ""
- effect = "Allow"
- resources = ["arn:${data.aws_partition.current.partition}:logs:${data.aws_region.current.id}:${data.aws_caller_identity.current.account_id}:log-group:*"]
-
- actions = [
- "logs:CreateLogGroup",
- "logs:CreateLogStream",
- "logs:DescribeLogGroups",
- "logs:DescribeLogStreams",
- "logs:PutLogEvents",
- ]
- }
-}
-
data "aws_iam_policy_document" "mwaa_emrjob" {
statement {
actions = [
diff --git a/schedulers/terraform/managed-airflow-mwaa/eks.tf b/schedulers/terraform/managed-airflow-mwaa/eks.tf
index be4304e1c..4a332bb34 100644
--- a/schedulers/terraform/managed-airflow-mwaa/eks.tf
+++ b/schedulers/terraform/managed-airflow-mwaa/eks.tf
@@ -1,60 +1,104 @@
-#---------------------------------------------------------------
-# EKS Blueprints
-#---------------------------------------------------------------
-module "eks_blueprints" {
- source = "github.com/aws-ia/terraform-aws-eks-blueprints?ref=v4.32.1"
+module "eks" {
+ source = "terraform-aws-modules/eks/aws"
+ version = "~> 19.15"
cluster_name = local.name
cluster_version = var.eks_cluster_version
- vpc_id = module.vpc.vpc_id
- private_subnet_ids = module.vpc.private_subnets
+ cluster_endpoint_public_access = true # if true, Your cluster API server is accessible from the internet. You can, optionally, limit the CIDR blocks that can access the public endpoint.
- cluster_kms_key_additional_admin_arns = [data.aws_caller_identity.current.arn]
+ vpc_id = module.vpc.vpc_id
+ subnet_ids = module.vpc.private_subnets
- # Add MWAA IAM Role to aws-auth configmap
- map_roles = [
+ manage_aws_auth_configmap = true
+ aws_auth_roles = [
{
rolearn = module.mwaa.mwaa_role_arn
username = "mwaa-service"
groups = ["system:masters"]
+ },
+ {
+ # Required for EMR on EKS virtual cluster
+ rolearn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/AWSServiceRoleForAmazonEMRContainers"
+ username = "emr-containers"
+ groups = []
}
]
- managed_node_groups = {
- # EKS MANAGED NODE GROUPS
- # We recommend to have a MNG to place your critical workloads and add-ons
- # Then rely on Karpenter to scale your workloads
- # You can also make uses on nodeSelector and Taints/tolerations to spread workloads on MNG or Karpenter provisioners
- mng1 = {
- node_group_name = "core-node-grp"
- subnet_ids = module.vpc.private_subnets
+ #---------------------------------------
+ # Note: This can further restricted to specific required for each Add-on and your application
+ #---------------------------------------
+ # Extend cluster security group rules
+ cluster_security_group_additional_rules = {
+ ingress_nodes_ephemeral_ports_tcp = {
+ description = "Nodes on ephemeral ports"
+ protocol = "tcp"
+ from_port = 1025
+ to_port = 65535
+ type = "ingress"
+ source_node_security_group = true
+ }
- instance_types = ["m5.xlarge"]
- ami_type = "AL2_x86_64"
- capacity_type = "ON_DEMAND"
+ allow_access_from_mwaa = {
+ description = "Nodes on ephemeral ports"
+ protocol = "tcp"
+ from_port = 1025
+ to_port = 65535
+ type = "ingress"
+ cidr_blocks = [local.vpc_cidr]
+ }
+ }
- disk_size = 100
- disk_type = "gp3"
+ # Extend node-to-node security group rules
+ node_security_group_additional_rules = {
+ # Extend node-to-node security group rules. Recommended and required for the Add-ons
+ ingress_self_all = {
+ description = "Node to node all ports/protocols"
+ protocol = "-1"
+ from_port = 0
+ to_port = 0
+ type = "ingress"
+ self = true
+ }
- max_size = 9
- min_size = 3
- desired_size = 3
- create_launch_template = true
- launch_template_os = "amazonlinux2eks"
+ }
- update_config = [{
- max_unavailable_percentage = 50
- }]
+ eks_managed_node_groups = {
+ # We recommend to have a MNG to place your critical workloads and add-ons
+ # Then rely on Karpenter to scale your workloads
+ # You can also make uses on nodeSelector and Taints/tolerations to spread workloads on MNG or Karpenter provisioners
+ core_node_group = {
+ name = "core-node-group"
+ subnet_ids = module.vpc.private_subnets
+
+ max_size = 9
+ min_size = 3
+ desired_size = 3
+
+ force_update_version = true
+ instance_types = ["m5.xlarge"]
+ ami_type = "AL2_x86_64"
+ capacity_type = "ON_DEMAND"
+
+ ebs_optimized = true
+ block_device_mappings = {
+ xvda = {
+ device_name = "/dev/xvda"
+ ebs = {
+ volume_size = 100
+ volume_type = "gp3"
+ }
+ }
+ }
- k8s_labels = {
- Environment = "preprod"
- Zone = "test"
+ labels = {
WorkerType = "ON_DEMAND"
NodeGroupType = "core"
+ Environment = "preprod"
+ Zone = "test"
}
- additional_tags = {
+ tags = {
Name = "core-node-grp"
subnet_type = "private"
"k8s.io/cluster-autoscaler/node-template/label/arch" = "x86"
@@ -64,138 +108,8 @@ module "eks_blueprints" {
"k8s.io/cluster-autoscaler/experiments" = "owned"
"k8s.io/cluster-autoscaler/enabled" = "true"
}
- },
- }
-
- #---------------------------------------
- # ENABLE EMR ON EKS
- # 1. Creates namespace
- # 2. k8s role and role binding(emr-containers user) for the above namespace
- # 3. IAM role for the team execution role
- # 4. Update AWS_AUTH config map with emr-containers user and AWSServiceRoleForAmazonEMRContainers role
- # 5. Create a trust relationship between the job execution role and the identity of the EMR managed service account
- #---------------------------------------
- enable_emr_on_eks = true
- emr_on_eks_teams = {
- emr-mwaa-team = {
- namespace = "emr-mwaa"
- job_execution_role = "emr-eks-mwaa-team"
- additional_iam_policies = [aws_iam_policy.emr_on_eks.arn]
}
}
tags = local.tags
}
-
-#------------------------------------------------------------------------
-# Kubernetes Add-on Module
-#------------------------------------------------------------------------
-module "eks_blueprints_addons" {
- source = "github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons?ref=v4.32.1"
-
- eks_cluster_id = module.eks_blueprints.eks_cluster_id
- eks_cluster_endpoint = module.eks_blueprints.eks_cluster_endpoint
- eks_oidc_provider = module.eks_blueprints.oidc_provider
- eks_cluster_version = module.eks_blueprints.eks_cluster_version
-
- # EKS Managed Add-ons
- enable_amazon_eks_vpc_cni = true
- enable_amazon_eks_coredns = true
- enable_amazon_eks_kube_proxy = true
- enable_amazon_eks_aws_ebs_csi_driver = true
-
- enable_metrics_server = true
- enable_cluster_autoscaler = true
-
- tags = local.tags
-}
-#---------------------------------------------------------------
-# Example IAM policies for EMR job execution
-#---------------------------------------------------------------
-resource "aws_iam_policy" "emr_on_eks" {
- name = format("%s-%s", local.name, "emr-job-iam-policies")
- description = "IAM policy for EMR on EKS Job execution"
- path = "/"
- policy = data.aws_iam_policy_document.emr_on_eks.json
-}
-
-#---------------------------------------------------------------
-# Create EMR on EKS Virtual Cluster
-#---------------------------------------------------------------
-resource "aws_emrcontainers_virtual_cluster" "this" {
- name = format("%s-%s", module.eks_blueprints.eks_cluster_id, "emr-mwaa-team")
-
- container_provider {
- id = module.eks_blueprints.eks_cluster_id
- type = "EKS"
-
- info {
- eks_info {
- namespace = "emr-mwaa"
- }
- }
- }
-}
-#------------------------------------------------------------------------
-# Create K8s Namespace and Role for mwaa access directly
-#------------------------------------------------------------------------
-
-resource "kubernetes_namespace_v1" "mwaa" {
- metadata {
- name = "mwaa"
- }
-}
-
-resource "kubernetes_role_v1" "mwaa" {
- metadata {
- name = "mwaa-role"
- namespace = kubernetes_namespace_v1.mwaa.metadata[0].name
- }
-
- rule {
- api_groups = [
- "",
- "apps",
- "batch",
- "extensions",
- ]
- resources = [
- "jobs",
- "pods",
- "pods/attach",
- "pods/exec",
- "pods/log",
- "pods/portforward",
- "secrets",
- "services",
- ]
- verbs = [
- "create",
- "delete",
- "describe",
- "get",
- "list",
- "patch",
- "update",
- ]
- }
-}
-
-resource "kubernetes_role_binding_v1" "mwaa" {
- metadata {
- name = "mwaa-role-binding"
- namespace = kubernetes_namespace_v1.mwaa.metadata[0].name
- }
-
- role_ref {
- api_group = "rbac.authorization.k8s.io"
- kind = "Role"
- name = kubernetes_namespace_v1.mwaa.metadata[0].name
- }
-
- subject {
- kind = "User"
- name = "mwaa-service"
- api_group = "rbac.authorization.k8s.io"
- }
-}
diff --git a/schedulers/terraform/managed-airflow-mwaa/emr-eks.tf b/schedulers/terraform/managed-airflow-mwaa/emr-eks.tf
new file mode 100644
index 000000000..35a9bcfef
--- /dev/null
+++ b/schedulers/terraform/managed-airflow-mwaa/emr-eks.tf
@@ -0,0 +1,17 @@
+#----------------------------------------------------------------------------
+# EMR on EKS
+#----------------------------------------------------------------------------
+module "emr_containers" {
+ source = "terraform-aws-modules/emr/aws//modules/virtual-cluster"
+ version = "~> 1.0"
+
+ eks_cluster_id = module.eks.cluster_name
+ oidc_provider_arn = module.eks.oidc_provider_arn
+
+ name = format("%s-%s", module.eks.cluster_name, "emr-mwaa-team")
+ namespace = "emr-mwaa"
+ iam_role_description = "EMR execution role emr-eks-mwaa-team"
+ iam_role_additional_policies = ["arn:aws:iam::aws:policy/AmazonS3FullAccess"]
+
+ tags = merge(local.tags, { Name = "emr-mwaa" })
+}
diff --git a/schedulers/terraform/managed-airflow-mwaa/install.sh b/schedulers/terraform/managed-airflow-mwaa/install.sh
new file mode 100644
index 000000000..64b00cd73
--- /dev/null
+++ b/schedulers/terraform/managed-airflow-mwaa/install.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+read -p "Enter the region: " region
+export AWS_DEFAULT_REGION=$region
+
+echo "Initializing ..."
+terraform init || echo "\"terraform init\" failed"
+
+# List of Terraform modules to apply in sequence
+targets=(
+ "module.vpc"
+ "module.mwaa"
+ "module.eks"
+ "module.eks_blueprints_addons"
+ "module.emr_containers"
+)
+
+# Apply modules in sequence
+for target in "${targets[@]}"
+do
+ echo "Applying module $target..."
+ apply_output=$(terraform apply -target="$target" -auto-approve 2>&1 | tee /dev/tty)
+ if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then
+ echo "SUCCESS: Terraform apply of $target completed successfully"
+ else
+ echo "FAILED: Terraform apply of $target failed"
+ exit 1
+ fi
+done
+
+# Final apply to catch any remaining resources
+echo "Applying remaining resources..."
+apply_output=$(terraform apply -auto-approve 2>&1 | tee /dev/tty)
+if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then
+ echo "SUCCESS: Terraform apply of all modules completed successfully"
+else
+ echo "FAILED: Terraform apply of all modules failed"
+ exit 1
+fi
diff --git a/schedulers/terraform/managed-airflow-mwaa/main.tf b/schedulers/terraform/managed-airflow-mwaa/main.tf
index f33ca7dc8..2de30958c 100644
--- a/schedulers/terraform/managed-airflow-mwaa/main.tf
+++ b/schedulers/terraform/managed-airflow-mwaa/main.tf
@@ -1,173 +1,17 @@
-#------------------------------------------------------------------------
-# AWS MWAA Module
-#------------------------------------------------------------------------
-
-module "mwaa" {
- source = "aws-ia/mwaa/aws"
- version = "0.0.4"
-
- name = local.name
- airflow_version = "2.2.2"
- environment_class = "mw1.medium" # mw1.small / mw1.medium / mw1.large
- webserver_access_mode = "PUBLIC_ONLY" # Default PRIVATE_ONLY for production environments
-
- create_s3_bucket = false
- source_bucket_arn = module.s3_bucket.s3_bucket_arn
-
- dag_s3_path = local.dag_s3_path
- requirements_s3_path = "${local.dag_s3_path}/requirements.txt"
-
- min_workers = 1
- max_workers = 25
-
- vpc_id = module.vpc.vpc_id
- private_subnet_ids = slice(module.vpc.private_subnets, 0, 2) # Required 2 subnets only
- source_cidr = [module.vpc.vpc_cidr_block] # Add your IP here to access Airflow UI
-
- airflow_configuration_options = {
- "core.load_default_connections" = "false"
- "core.load_examples" = "false"
- "webserver.dag_default_view" = "tree"
- "webserver.dag_orientation" = "TB"
- "logging.logging_level" = "INFO"
- }
-
- logging_configuration = {
- dag_processing_logs = {
- enabled = true
- log_level = "INFO"
- }
-
- scheduler_logs = {
- enabled = true
- log_level = "INFO"
- }
-
- task_logs = {
- enabled = true
- log_level = "INFO"
- }
-
- webserver_logs = {
- enabled = true
- log_level = "INFO"
- }
-
- worker_logs = {
- enabled = true
- log_level = "INFO"
- }
- }
-
- tags = local.tags
-}
-
-#------------------------------------------------------------------------
-# Additional IAM policies for MWAA execution role to run emr on eks job
-#------------------------------------------------------------------------
-resource "aws_iam_policy" "this" {
- name = format("%s-%s", local.name, "mwaa-emr-job")
- description = "IAM policy for MWAA RUN EMR on EKS Job execution"
- path = "/"
- policy = data.aws_iam_policy_document.mwaa_emrjob.json
+provider "aws" {
+ region = local.region
}
-resource "aws_iam_role_policy_attachment" "this" {
- role = module.mwaa.mwaa_role_name
- policy_arn = aws_iam_policy.this.arn
+provider "kubernetes" {
+ host = module.eks.cluster_endpoint
+ cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
+ token = data.aws_eks_cluster_auth.this.token
}
-#------------------------------------------------------------------------
-# Dags and Requirements
-#------------------------------------------------------------------------
-
-#tfsec:ignore:*
-module "s3_bucket" {
- source = "terraform-aws-modules/s3-bucket/aws"
- version = "~> 3.0"
-
- bucket = "mwaa-${random_id.this.hex}"
- acl = "private"
-
- # For example only - please evaluate for your environment
- force_destroy = true
-
- attach_deny_insecure_transport_policy = true
- attach_require_latest_tls_policy = true
-
- block_public_acls = true
- block_public_policy = true
- ignore_public_acls = true
- restrict_public_buckets = true
-
- server_side_encryption_configuration = {
- rule = {
- apply_server_side_encryption_by_default = {
- sse_algorithm = "AES256"
- }
- }
+provider "helm" {
+ kubernetes {
+ host = module.eks.cluster_endpoint
+ cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
+ token = data.aws_eks_cluster_auth.this.token
}
-
- tags = local.tags
-}
-
-# Kubeconfig is required for KubernetesPodOperator
-# https://airflow.apache.org/docs/apache-airflow-providers-cncf-kubernetes/stable/operators.html
-locals {
- kubeconfig = yamlencode({
- apiVersion = "v1"
- kind = "Config"
- current-context = "mwaa"
- clusters = [{
- name = module.eks_blueprints.eks_cluster_arn
- cluster = {
- certificate-authority-data = module.eks_blueprints.eks_cluster_certificate_authority_data
- server = module.eks_blueprints.eks_cluster_endpoint
- }
- }]
- contexts = [{
- name = "mwaa" # must match KubernetesPodOperator context
- context = {
- cluster = module.eks_blueprints.eks_cluster_arn
- user = "mwaa"
- }
- }]
- users = [{
- name = "mwaa"
- user = {
- exec = {
- apiVersion = "client.authentication.k8s.io/v1beta1"
- command = "aws"
- args = [
- "--region",
- local.region,
- "eks",
- "get-token",
- "--cluster-name",
- local.name
- ]
- }
- }
- }]
- })
-}
-
-resource "aws_s3_bucket_object" "kube_config" {
- bucket = module.s3_bucket.s3_bucket_id
- key = "${local.dag_s3_path}/kube_config.yaml"
- content = local.kubeconfig
- etag = md5(local.kubeconfig)
-}
-
-resource "aws_s3_bucket_object" "uploads" {
- for_each = fileset("${local.dag_s3_path}/", "*")
-
- bucket = module.s3_bucket.s3_bucket_id
- key = "${local.dag_s3_path}/${each.value}"
- source = "${local.dag_s3_path}/${each.value}"
- etag = filemd5("${local.dag_s3_path}/${each.value}")
-}
-
-resource "random_id" "this" {
- byte_length = "2"
}
diff --git a/schedulers/terraform/managed-airflow-mwaa/mwaa.tf b/schedulers/terraform/managed-airflow-mwaa/mwaa.tf
new file mode 100644
index 000000000..46bb08c71
--- /dev/null
+++ b/schedulers/terraform/managed-airflow-mwaa/mwaa.tf
@@ -0,0 +1,241 @@
+#------------------------------------------------------------------------
+# AWS MWAA Module
+#------------------------------------------------------------------------
+
+module "mwaa" {
+ source = "aws-ia/mwaa/aws"
+ version = "0.0.4"
+
+ depends_on = [aws_s3_object.uploads, module.vpc_endpoints]
+
+ name = local.name
+ airflow_version = "2.5.1"
+ environment_class = "mw1.medium" # mw1.small / mw1.medium / mw1.large
+ webserver_access_mode = "PUBLIC_ONLY" # Default PRIVATE_ONLY for production environments
+
+ create_s3_bucket = false
+ source_bucket_arn = module.s3_bucket.s3_bucket_arn
+
+ dag_s3_path = local.dag_s3_path
+ requirements_s3_path = "${local.dag_s3_path}/requirements.txt"
+
+ min_workers = 1
+ max_workers = 25
+
+ vpc_id = module.vpc.vpc_id
+ private_subnet_ids = slice(module.vpc.private_subnets, 0, 2) # Required 2 subnets only
+ source_cidr = [module.vpc.vpc_cidr_block] # Add your IP here to access Airflow UI
+
+ airflow_configuration_options = {
+ "core.load_default_connections" = "false"
+ "core.load_examples" = "false"
+ "webserver.dag_default_view" = "tree"
+ "webserver.dag_orientation" = "TB"
+ "logging.logging_level" = "INFO"
+ }
+
+ logging_configuration = {
+ dag_processing_logs = {
+ enabled = true
+ log_level = "INFO"
+ }
+
+ scheduler_logs = {
+ enabled = true
+ log_level = "INFO"
+ }
+
+ task_logs = {
+ enabled = true
+ log_level = "INFO"
+ }
+
+ webserver_logs = {
+ enabled = true
+ log_level = "INFO"
+ }
+
+ worker_logs = {
+ enabled = true
+ log_level = "INFO"
+ }
+ }
+
+ tags = local.tags
+}
+
+#------------------------------------------------------------------------
+# Additional IAM policies for MWAA execution role to run emr on eks job
+#------------------------------------------------------------------------
+resource "aws_iam_policy" "this" {
+ name = format("%s-%s", local.name, "mwaa-emr-job")
+ description = "IAM policy for MWAA RUN EMR on EKS Job execution"
+ path = "/"
+ policy = data.aws_iam_policy_document.mwaa_emrjob.json
+}
+
+resource "aws_iam_role_policy_attachment" "this" {
+ role = module.mwaa.mwaa_role_name
+ policy_arn = aws_iam_policy.this.arn
+}
+
+#------------------------------------------------------------------------
+# Dags and Requirements
+#------------------------------------------------------------------------
+
+#tfsec:ignore:*
+module "s3_bucket" {
+ source = "terraform-aws-modules/s3-bucket/aws"
+ version = "~> 3.14"
+
+ bucket = "mwaa-${random_id.this.hex}"
+
+ # For example only - please evaluate for your environment
+ force_destroy = true
+
+ attach_deny_insecure_transport_policy = true
+ attach_require_latest_tls_policy = true
+
+ block_public_acls = true
+ block_public_policy = true
+ ignore_public_acls = true
+ restrict_public_buckets = true
+
+ server_side_encryption_configuration = {
+ rule = {
+ apply_server_side_encryption_by_default = {
+ sse_algorithm = "AES256"
+ }
+ }
+ }
+
+ tags = local.tags
+}
+
+# Kubeconfig is required for KubernetesPodOperator
+# https://airflow.apache.org/docs/apache-airflow-providers-cncf-kubernetes/stable/operators.html
+locals {
+ kubeconfig = yamlencode({
+ apiVersion = "v1"
+ kind = "Config"
+ current-context = "mwaa"
+ clusters = [{
+ name = module.eks.cluster_arn
+ cluster = {
+ certificate-authority-data = module.eks.cluster_certificate_authority_data
+ server = module.eks.cluster_endpoint
+ }
+ }]
+ contexts = [{
+ name = "mwaa" # must match KubernetesPodOperator context
+ context = {
+ cluster = module.eks.cluster_arn
+ user = "mwaa"
+ }
+ }]
+ users = [{
+ name = "mwaa"
+ user = {
+ exec = {
+ apiVersion = "client.authentication.k8s.io/v1beta1"
+ command = "aws"
+ args = [
+ "--region",
+ local.region,
+ "eks",
+ "get-token",
+ "--cluster-name",
+ local.name
+ ]
+ }
+ }
+ }]
+ })
+}
+
+resource "aws_s3_object" "kube_config" {
+ bucket = module.s3_bucket.s3_bucket_id
+ key = "${local.dag_s3_path}/kube_config.yaml"
+ content = local.kubeconfig
+ etag = md5(local.kubeconfig)
+}
+
+resource "aws_s3_object" "uploads" {
+ for_each = fileset("${local.dag_s3_path}/", "*")
+
+ bucket = module.s3_bucket.s3_bucket_id
+ key = "${local.dag_s3_path}/${each.value}"
+ source = "${local.dag_s3_path}/${each.value}"
+ etag = filemd5("${local.dag_s3_path}/${each.value}")
+}
+
+resource "random_id" "this" {
+ byte_length = "2"
+}
+
+#------------------------------------------------------------------------
+# Create K8s Namespace and Role for mwaa access directly
+#------------------------------------------------------------------------
+resource "kubernetes_namespace_v1" "mwaa" {
+ metadata {
+ annotations = {
+ name = "mwaa"
+ }
+
+ name = "mwaa"
+ }
+}
+
+resource "kubernetes_role_v1" "mwaa" {
+ metadata {
+ name = "mwaa-role"
+ namespace = kubernetes_namespace_v1.mwaa.metadata[0].name
+ }
+
+ rule {
+ api_groups = [
+ "",
+ "apps",
+ "batch",
+ "extensions",
+ ]
+ resources = [
+ "jobs",
+ "pods",
+ "pods/attach",
+ "pods/exec",
+ "pods/log",
+ "pods/portforward",
+ "secrets",
+ "services",
+ ]
+ verbs = [
+ "create",
+ "delete",
+ "describe",
+ "get",
+ "list",
+ "patch",
+ "update",
+ ]
+ }
+}
+
+resource "kubernetes_role_binding_v1" "mwaa" {
+ metadata {
+ name = "mwaa-role-binding"
+ namespace = kubernetes_namespace_v1.mwaa.metadata[0].name
+ }
+
+ role_ref {
+ api_group = "rbac.authorization.k8s.io"
+ kind = "Role"
+ name = kubernetes_namespace_v1.mwaa.metadata[0].name
+ }
+
+ subject {
+ kind = "User"
+ name = "mwaa-service"
+ api_group = "rbac.authorization.k8s.io"
+ }
+}
diff --git a/schedulers/terraform/managed-airflow-mwaa/outputs.tf b/schedulers/terraform/managed-airflow-mwaa/outputs.tf
index 0c33b0124..7670c2a2d 100644
--- a/schedulers/terraform/managed-airflow-mwaa/outputs.tf
+++ b/schedulers/terraform/managed-airflow-mwaa/outputs.tf
@@ -1,19 +1,14 @@
output "configure_kubectl" {
description = "Configure kubectl: make sure you're logged in with the correct AWS profile and run the following command to update your kubeconfig"
- value = module.eks_blueprints.configure_kubectl
+ value = "aws eks --region ${local.region} update-kubeconfig --name ${module.eks.cluster_name}"
}
-output "emrcontainers_virtual_cluster_id" {
- description = "EMR Containers Virtual cluster ID"
- value = aws_emrcontainers_virtual_cluster.this.id
+output "emr_on_eks" {
+ description = "EMR on EKS"
+ value = module.emr_containers
}
-output "emr_on_eks_role_id" {
- description = "IAM execution role ID for EMR on EKS"
- value = module.eks_blueprints.emr_on_eks_role_id
-}
-
-output "emr_on_eks_role_arn" {
- description = "IAM execution role arn for EMR on EKS"
- value = module.eks_blueprints.emr_on_eks_role_arn
+output "mwaa_webserver_url" {
+ description = "MWAA Webserver Url"
+ value = module.mwaa.mwaa_webserver_url
}
diff --git a/schedulers/terraform/managed-airflow-mwaa/providers.tf b/schedulers/terraform/managed-airflow-mwaa/providers.tf
deleted file mode 100644
index 7ec193458..000000000
--- a/schedulers/terraform/managed-airflow-mwaa/providers.tf
+++ /dev/null
@@ -1,17 +0,0 @@
-provider "aws" {
- region = local.region
-}
-
-provider "kubernetes" {
- host = module.eks_blueprints.eks_cluster_endpoint
- cluster_ca_certificate = base64decode(module.eks_blueprints.eks_cluster_certificate_authority_data)
- token = data.aws_eks_cluster_auth.this.token
-}
-
-provider "helm" {
- kubernetes {
- host = module.eks_blueprints.eks_cluster_endpoint
- cluster_ca_certificate = base64decode(module.eks_blueprints.eks_cluster_certificate_authority_data)
- token = data.aws_eks_cluster_auth.this.token
- }
-}
diff --git a/schedulers/terraform/managed-airflow-mwaa/variables.tf b/schedulers/terraform/managed-airflow-mwaa/variables.tf
index cdd7e1673..814fab673 100644
--- a/schedulers/terraform/managed-airflow-mwaa/variables.tf
+++ b/schedulers/terraform/managed-airflow-mwaa/variables.tf
@@ -13,7 +13,7 @@ variable "region" {
variable "eks_cluster_version" {
description = "EKS Cluster version"
type = string
- default = "1.23"
+ default = "1.26"
}
variable "vpc_cidr" {
diff --git a/schedulers/terraform/managed-airflow-mwaa/vpc.tf b/schedulers/terraform/managed-airflow-mwaa/vpc.tf
index fe3f4fa22..37fa3f671 100644
--- a/schedulers/terraform/managed-airflow-mwaa/vpc.tf
+++ b/schedulers/terraform/managed-airflow-mwaa/vpc.tf
@@ -7,8 +7,8 @@ module "vpc" {
name = local.name
cidr = local.vpc_cidr
+ azs = local.azs
- azs = local.azs
private_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 4, k)]
public_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k + 48)]
@@ -36,29 +36,25 @@ module "vpc" {
tags = local.tags
}
-module "vpc_endpoints_sg" {
- source = "terraform-aws-modules/security-group/aws"
- version = "~> 5.0"
-
+resource "aws_security_group" "vpc_endpoints_sg" {
name = "${local.name}-vpc-endpoints"
description = "Security group for VPC endpoint access"
vpc_id = module.vpc.vpc_id
- ingress_with_cidr_blocks = [
- {
- rule = "https-443-tcp"
- description = "VPC CIDR HTTPS"
- cidr_blocks = join(",", module.vpc.private_subnets_cidr_blocks)
- },
- ]
+ ingress {
+ description = "VPC CIDR HTTPS"
+ cidr_blocks = [module.vpc.vpc_cidr_block]
+ from_port = 443
+ to_port = 443
+ protocol = "tcp"
+ }
- egress_with_cidr_blocks = [
- {
- rule = "https-443-tcp"
- description = "All egress HTTPS"
- cidr_blocks = "0.0.0.0/0"
- },
- ]
+ egress {
+ from_port = 0
+ to_port = 0
+ protocol = "-1"
+ cidr_blocks = ["0.0.0.0/0"]
+ }
tags = local.tags
}
@@ -68,7 +64,7 @@ module "vpc_endpoints" {
version = "~> 5.0"
vpc_id = module.vpc.vpc_id
- security_group_ids = [module.vpc_endpoints_sg.security_group_id]
+ security_group_ids = [aws_security_group.vpc_endpoints_sg.id]
endpoints = merge({
s3 = {
diff --git a/website/docs/blueprints/job-schedulers/aws-managed-airflow.md b/website/docs/blueprints/job-schedulers/aws-managed-airflow.md
index 15ca21c21..b84f0b322 100644
--- a/website/docs/blueprints/job-schedulers/aws-managed-airflow.md
+++ b/website/docs/blueprints/job-schedulers/aws-managed-airflow.md
@@ -32,11 +32,11 @@ To provision this example:
```bash
git clone https://github.com/awslabs/data-on-eks.git
cd data-on-eks/schedulers/terraform/managed-airflow-mwaa
-terraform init
-terraform apply -var region=us-west-2 # Change according to your region
+chmod +x install.sh
+./install.sh
```
-Enter `yes` at command prompt to apply
+Enter region at command prompt to continue.
Once done, you will see terraform output like below.
@@ -58,6 +58,7 @@ The following command will update the `kubeconfig` on your local machine and all
### Run `update-kubeconfig` command
+Run the command below. You may also copy the command from the terraform output 'configure_kubectl'.
```bash
aws eks --region us-west-2 update-kubeconfig --name managed-airflow-mwaa
```
@@ -69,9 +70,9 @@ kubectl get nodes
# Output should look like below
NAME STATUS ROLES AGE VERSION
-ip-10-0-0-42.ec2.internal Ready 5h15m v1.23.9-eks-ba74326
-ip-10-0-22-71.ec2.internal Ready 5h15m v1.23.9-eks-ba74326
-ip-10-0-44-63.ec2.internal Ready 5h15m v1.23.9-eks-ba74326
+ip-10-0-0-42.ec2.internal Ready 5h15m v1.26.4-eks-0a21954
+ip-10-0-22-71.ec2.internal Ready 5h15m v1.26.4-eks-0a21954
+ip-10-0-44-63.ec2.internal Ready 5h15m v1.26.4-eks-0a21954
```
### List the namespaces in EKS cluster
@@ -171,9 +172,10 @@ mwaa-pod-test.4bed823d645844bc8e6899fd858f119d 0/1 Completed 0
## Destroy
-To teardown and remove the resources created in this example:
+To clean up your environment, run the `cleanup.sh` script.script
```bash
-terraform destroy -auto-approve
+chmod +x cleanup.sh
+./cleanup.sh
```
---