From d9ed7735000451f73fdf2475750f924fa767b018 Mon Sep 17 00:00:00 2001 From: Vara Bonthu Date: Fri, 5 Apr 2024 13:45:12 -0700 Subject: [PATCH] Apache Superset updates --- analytics/terraform/superset-on-eks/addons.tf | 162 +++++++++++------- .../superset-on-eks/examples/create_image.sh | 34 ---- .../helm-values/superset-values.yaml | 42 +++++ .../terraform/superset-on-eks/helm/main.tf | 1 - .../terraform/superset-on-eks/helm/outputs.tf | 1 - .../helm/storageclass/Chart.yaml | 5 - .../templates/storage-class-gp3.yaml | 11 -- .../superset-on-eks/helm/variables.tf | 1 - .../terraform/superset-on-eks/ingres.yaml | 20 --- analytics/terraform/superset-on-eks/main.tf | 96 ++++------- .../superset-on-eks/superset-yamls/pv.yaml | 14 -- .../superset-on-eks/{helm => }/versions.tf | 16 +- 12 files changed, 181 insertions(+), 222 deletions(-) delete mode 100755 analytics/terraform/superset-on-eks/examples/create_image.sh create mode 100644 analytics/terraform/superset-on-eks/helm-values/superset-values.yaml delete mode 100644 analytics/terraform/superset-on-eks/helm/main.tf delete mode 100644 analytics/terraform/superset-on-eks/helm/outputs.tf delete mode 100644 analytics/terraform/superset-on-eks/helm/storageclass/Chart.yaml delete mode 100644 analytics/terraform/superset-on-eks/helm/storageclass/templates/storage-class-gp3.yaml delete mode 100644 analytics/terraform/superset-on-eks/helm/variables.tf delete mode 100644 analytics/terraform/superset-on-eks/ingres.yaml delete mode 100644 analytics/terraform/superset-on-eks/superset-yamls/pv.yaml rename analytics/terraform/superset-on-eks/{helm => }/versions.tf (55%) mode change 100755 => 100644 diff --git a/analytics/terraform/superset-on-eks/addons.tf b/analytics/terraform/superset-on-eks/addons.tf index b081c7c58..1aea094c3 100755 --- a/analytics/terraform/superset-on-eks/addons.tf +++ b/analytics/terraform/superset-on-eks/addons.tf @@ -1,3 +1,40 @@ +#--------------------------------------------------------------- +# GP3 Encrypted Storage Class +#--------------------------------------------------------------- +resource "kubernetes_annotations" "disable_gp2" { + annotations = { + "storageclass.kubernetes.io/is-default-class" : "false" + } + api_version = "storage.k8s.io/v1" + kind = "StorageClass" + metadata { + name = "gp2" + } + force = true + + depends_on = [module.eks.eks_cluster_id] +} + +resource "kubernetes_storage_class" "default_gp3" { + metadata { + name = "gp3" + annotations = { + "storageclass.kubernetes.io/is-default-class" : "true" + } + } + + storage_provisioner = "ebs.csi.aws.com" + reclaim_policy = "Delete" + allow_volume_expansion = true + volume_binding_mode = "WaitForFirstConsumer" + parameters = { + fsType = "ext4" + encrypted = true + type = "gp3" + } + + depends_on = [kubernetes_annotations.disable_gp2] +} #--------------------------------------------------------------- # IRSA for EBS CSI Driver @@ -16,48 +53,79 @@ module "ebs_csi_driver_irsa" { tags = local.tags } -resource "kubernetes_ingress_class_v1" "aws_alb" { - metadata { - name = "aws-alb" +module "eks_blueprints_addons" { + source = "aws-ia/eks-blueprints-addons/aws" + version = "~> 1.2" + + cluster_name = module.eks.cluster_name + cluster_endpoint = module.eks.cluster_endpoint + cluster_version = module.eks.cluster_version + oidc_provider_arn = module.eks.oidc_provider_arn + + #--------------------------------------- + # Amazon EKS Managed Add-ons + #--------------------------------------- + eks_addons = { + aws-ebs-csi-driver = { + service_account_role_arn = module.ebs_csi_driver_irsa.iam_role_arn + } + coredns = { + preserve = true + } + vpc-cni = { + preserve = true + } + kube-proxy = { + preserve = true + } } - spec { - controller = "ingress.k8s.aws/alb" + #--------------------------------------- + # AWS Load Balancer Controller Add-on + #--------------------------------------- + enable_aws_load_balancer_controller = true + # turn off the mutating webhook for services because we are using + # service.beta.kubernetes.io/aws-load-balancer-type: external + aws_load_balancer_controller = { + set = [{ + name = "enableServiceMutatorWebhook" + value = "false" + }] } + + tags = local.tags } -module "lb_role" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.37.1" - role_name = format("%s-%s", local.name, "lb-controller-role") - attach_load_balancer_controller_policy = true +module "eks_data_addons" { + source = "aws-ia/eks-data-addons/aws" + version = "~> 1.31.5" # ensure to update this to the latest/desired version - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = ["kube-system:aws-load-balancer-controller"] - } + oidc_provider_arn = module.eks.oidc_provider_arn + + #--------------------------------------- + # AWS Apache Superset Add-on + #--------------------------------------- + enable_superset = true + superset_helm_config = { + values = [templatefile("${path.module}/helm-values/superset-values.yaml", {})] } -} +} -resource "kubernetes_service_account" "service_account" { +#------------------------------------------------------------ +# Create AWS Application Load balancer with Ingres +#------------------------------------------------------------ +resource "kubernetes_ingress_class_v1" "aws_alb" { metadata { - name = "aws-load-balancer-controller" - namespace = "kube-system" - labels = { - "app.kubernetes.io/name" = "aws-load-balancer-controller" - "app.kubernetes.io/component" = "controller" - } - annotations = { - "eks.amazonaws.com/role-arn" = module.lb_role.iam_role_arn - "eks.amazonaws.com/sts-regional-endpoints" = "true" - } + name = "aws-alb" } - depends_on = [module.eks] -} + spec { + controller = "ingress.k8s.aws/alb" + } + depends_on = [module.eks.cluster_id] +} resource "kubernetes_ingress_v1" "superset" { metadata { @@ -86,40 +154,6 @@ resource "kubernetes_ingress_v1" "superset" { } } } - depends_on = [helm_release.superset] -} - - -module "eks_blueprints_addons" { - source = "aws-ia/eks-blueprints-addons/aws" - version = "~> 1.2" - - cluster_name = module.eks.cluster_name - cluster_endpoint = module.eks.cluster_endpoint - cluster_version = module.eks.cluster_version - oidc_provider_arn = module.eks.oidc_provider_arn - #--------------------------------------- - # Amazon EKS Managed Add-ons - #--------------------------------------- - eks_addons = { - # aws-ebs-csi-driver = { - # most_recent = true - # service_account_role_arn = module.ebs_csi_driver_irsa.iam_role_arn - # } - coredns = { - preserve = true - } - vpc-cni = { - preserve = true - } - kube-proxy = { - preserve = true - } - } - enable_aws_load_balancer_controller = true - aws_load_balancer_controller = { - chart_version = "1.5.4" - } - tags = local.tags + depends_on = [module.eks_blueprints_addons] } diff --git a/analytics/terraform/superset-on-eks/examples/create_image.sh b/analytics/terraform/superset-on-eks/examples/create_image.sh deleted file mode 100755 index 1fc47c446..000000000 --- a/analytics/terraform/superset-on-eks/examples/create_image.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -# Set the AWS region and the name of the ECR repository - -REGION=us-west-2 -ECR_REPO_NAME=superset -DOCKER_FILE=docker/superset.Dockerfile - -# Check if the ECR repository exists -if aws ecr describe-repositories --repository-names "$ECR_REPO_NAME" --region "$REGION" >/dev/null 2>&1; then - echo "ECR repository '$ECR_REPO_NAME' already exists." - - # Get the ECR_REPO_URI for the existing repository - ECR_REPO_URI=$(aws ecr describe-repositories --repository-name "$ECR_REPO_NAME" --query 'repositories[0].repositoryUri' --region "$REGION" --output text) - echo "Repository URL: $ECR_REPO_URI" -else - # Create a new ECR repository with the specified name and region - aws ecr create-repository --repository-name "$ECR_REPO_NAME" --region "$REGION" - - # Retrieve the URL of the newly created ECR repository - ECR_REPO_URI=$(aws ecr describe-repositories --repository-name "$ECR_REPO_NAME" --query 'repositories[0].repositoryUri' --region "$REGION" --output text) - echo "Repository URL: $ECR_REPO_URI" -fi - -# Log in to Amazon ECR using docker -echo -e "Logging in to Amazon ECR..." -aws ecr get-login-password --region "$REGION" | docker login --username AWS --password-stdin "$ECR_REPO_URI" - -echo -e "Building, tagging and pushing docker image... $ECR_REPO_URI:latest" -docker buildx build --push --tag "$ECR_REPO_URI:latest" -o type=image --platform=linux/amd64 -f $DOCKER_FILE . - -# Wait for 5 seconds -sleep 5 -echo -e "Sleeping for 5 seconds..." diff --git a/analytics/terraform/superset-on-eks/helm-values/superset-values.yaml b/analytics/terraform/superset-on-eks/helm-values/superset-values.yaml new file mode 100644 index 000000000..334698db6 --- /dev/null +++ b/analytics/terraform/superset-on-eks/helm-values/superset-values.yaml @@ -0,0 +1,42 @@ +# Superset node configuration +supersetNode: + replicaCount: 1 + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + + resources: + limits: + cpu: 200m + memory: 256Mi + requests: + cpu: 200m + memory: 256Mi + +# Superset Celery worker configuration +supersetWorker: + replicaCount: 1 + autoscaling: + enabled: true + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + + resources: + limits: + cpu: 200m + memory: 256Mi + requests: + cpu: 200m + memory: 256Mi + +postgresql: + ## Set to false if bringing your own PostgreSQL. + enabled: true + primary: + persistence: + ## Enable PostgreSQL persistence using Persistent Volume Claims. + enabled: true + storageClass: gp3 diff --git a/analytics/terraform/superset-on-eks/helm/main.tf b/analytics/terraform/superset-on-eks/helm/main.tf deleted file mode 100644 index 4908a64a0..000000000 --- a/analytics/terraform/superset-on-eks/helm/main.tf +++ /dev/null @@ -1 +0,0 @@ -#main.tf diff --git a/analytics/terraform/superset-on-eks/helm/outputs.tf b/analytics/terraform/superset-on-eks/helm/outputs.tf deleted file mode 100644 index ff1efe22e..000000000 --- a/analytics/terraform/superset-on-eks/helm/outputs.tf +++ /dev/null @@ -1 +0,0 @@ -#outputs.tf diff --git a/analytics/terraform/superset-on-eks/helm/storageclass/Chart.yaml b/analytics/terraform/superset-on-eks/helm/storageclass/Chart.yaml deleted file mode 100644 index 74c9ff0e4..000000000 --- a/analytics/terraform/superset-on-eks/helm/storageclass/Chart.yaml +++ /dev/null @@ -1,5 +0,0 @@ -apiVersion: v2 -name: storage -description: Helm chart for storage options on the cluster -version: 0.0.1 -appVersion: 0.0.1 diff --git a/analytics/terraform/superset-on-eks/helm/storageclass/templates/storage-class-gp3.yaml b/analytics/terraform/superset-on-eks/helm/storageclass/templates/storage-class-gp3.yaml deleted file mode 100644 index 2269839e1..000000000 --- a/analytics/terraform/superset-on-eks/helm/storageclass/templates/storage-class-gp3.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: storage.k8s.io/v1 -kind: StorageClass -metadata: - name: gp3 - annotations: - storageclass.kubernetes.io/is-default-class: "true" -provisioner: ebs.csi.aws.com -parameters: - type: gp3 - csi.storage.k8s.io/fstype: ext4 - encrypted: "true" diff --git a/analytics/terraform/superset-on-eks/helm/variables.tf b/analytics/terraform/superset-on-eks/helm/variables.tf deleted file mode 100644 index d347e6999..000000000 --- a/analytics/terraform/superset-on-eks/helm/variables.tf +++ /dev/null @@ -1 +0,0 @@ -#variables.tf diff --git a/analytics/terraform/superset-on-eks/ingres.yaml b/analytics/terraform/superset-on-eks/ingres.yaml deleted file mode 100644 index 68dd5e347..000000000 --- a/analytics/terraform/superset-on-eks/ingres.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - namespace: superset - name: ingress - annotations: - alb.ingress.kubernetes.io/scheme: internal-facing - alb.ingress.kubernetes.io/target-type: ip -spec: - ingressClassName: alb - rules: - - http: - paths: - - path: "/*" - pathType: ImplementationSpecific - backend: - service: - name: superset - port: - number: 8088 diff --git a/analytics/terraform/superset-on-eks/main.tf b/analytics/terraform/superset-on-eks/main.tf index 071bb0c1f..afb11c53f 100755 --- a/analytics/terraform/superset-on-eks/main.tf +++ b/analytics/terraform/superset-on-eks/main.tf @@ -10,28 +10,6 @@ locals { } } -terraform { - required_version = "~> 1.3.0" - - required_providers { - null = { - source = "hashicorp/null" - version = "3.2.2" - } - helm = { - source = "hashicorp/helm" - version = "2.12.1" - } - aws = { - source = "hashicorp/aws" - version = " 5.43.0" - } - kubernetes = { - source = "hashicorp/kubernetes" - version = " 2.27.0" - } - } -} #--------------------------------------------------------------- # EKS Cluster #--------------------------------------------------------------- @@ -109,14 +87,14 @@ module "eks" { # It's recommended to have a Managed Node group for hosting critical add-ons # You can leverage nodeSelector and Taints/tolerations to distribute workloads across Managed Node group or Karpenter nodes. core_node_group = { - name = "superset-node-group" + name = "core-node-group" description = "EKS Core node group for hosting critical add-ons" # Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned subnet_ids = compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : substr(cidr_block, 0, 4) == "100." ? subnet_id : null]) - min_size = 4 - max_size = 8 - desired_size = 4 + min_size = 2 + max_size = 6 + desired_size = 2 instance_types = ["m5.xlarge"] @@ -138,48 +116,38 @@ module "eks" { tags = merge(local.tags, { Name = "core-node-grp" }) - } - } -} - -resource "kubernetes_namespace" "superset" { - metadata { - name = "superset" - } -} - + }, -# Add Helm repo -resource "null_resource" "add_superset_repo" { - provisioner "local-exec" { - command = "helm repo add superset https://apache.github.io/superset" - } -} + superset_node_group = { + name = "superset-node-group" + description = "Apache Superset node group" + # Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned + subnet_ids = compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : substr(cidr_block, 0, 4) == "100." ? subnet_id : null]) -# Update repos -resource "null_resource" "helm_update_repos" { - provisioner "local-exec" { - command = "helm repo update" - } + min_size = 4 + max_size = 8 + desired_size = 4 - # Depends on add repo - depends_on = [ - null_resource.add_superset_repo - ] -} + instance_types = ["m5.xlarge"] -resource "helm_release" "superset" { - name = "superset" - repository = "superset" - chart = "superset" - namespace = "superset" + ebs_optimized = true + block_device_mappings = { + xvda = { + device_name = "/dev/xvda" + ebs = { + volume_size = 100 + volume_type = "gp3" + } + } + } - values = [ - file("${path.module}/values.yaml") - ] - depends_on = [ - kubernetes_namespace.superset, - null_resource.add_superset_repo + labels = { + WorkerType = "ON_DEMAND" + NodeGroupType = "superset" + } - ] + tags = merge(local.tags, { + Name = "superset-node-grp" }) + } + } } diff --git a/analytics/terraform/superset-on-eks/superset-yamls/pv.yaml b/analytics/terraform/superset-on-eks/superset-yamls/pv.yaml deleted file mode 100644 index ebd2ddaf1..000000000 --- a/analytics/terraform/superset-on-eks/superset-yamls/pv.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: v1 -kind: PersistentVolume -metadata: - name: gp2-pv -spec: - capacity: - storage: 8Gi - accessModes: - - ReadWriteOnce - persistentVolumeReclaimPolicy: Delete - storageClassName: gp2 - csi: - driver: ebs.csi.aws.com - volumeHandle: auto diff --git a/analytics/terraform/superset-on-eks/helm/versions.tf b/analytics/terraform/superset-on-eks/versions.tf old mode 100755 new mode 100644 similarity index 55% rename from analytics/terraform/superset-on-eks/helm/versions.tf rename to analytics/terraform/superset-on-eks/versions.tf index 9b6678a5f..591d6754b --- a/analytics/terraform/superset-on-eks/helm/versions.tf +++ b/analytics/terraform/superset-on-eks/versions.tf @@ -12,16 +12,18 @@ terraform { } helm = { source = "hashicorp/helm" - version = ">= 2.12.1" + version = ">= 2.4.1" } random = { source = "hashicorp/random" - version = "3.1.0" # Replace with the appropriate version of the random provider - } - - archive = { - source = "hashicorp/archive" - version = "2.4.0" + version = "3.3.2" } } + + # ## Used for end-to-end testing on project; update to suit your needs + # backend "s3" { + # bucket = "doeks-github-actions-e2e-test-state" + # region = "us-west-2" + # key = "e2e/superset/terraform.tfstate" + # } }