diff --git a/analytics/terraform/superset-on-eks/README.md b/analytics/terraform/superset-on-eks/README.md
new file mode 100644
index 000000000..89efc0bcd
--- /dev/null
+++ b/analytics/terraform/superset-on-eks/README.md
@@ -0,0 +1,52 @@
+## Requirements
+
+ For security reasons, ALB is deployed as internal one and it can be changed to internet-facing during the deployment, if needed.
+## Providers
+
+| Name | Version |
+|------|---------|
+| [aws](#provider\_aws) | 5.36.0 |
+| [helm](#provider\_helm) | 2.12.1 |
+| [kubernetes](#provider\_kubernetes) | 2.25.2 |
+| [null](#provider\_null) | 3.2.2 |
+
+## Modules
+
+| Name | Source | Version |
+|------|--------|---------|
+| [ebs\_csi\_driver\_irsa](#module\_ebs\_csi\_driver\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 5.20 |
+| [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.15 |
+| [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 |
+| [lb\_role](#module\_lb\_role) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | 5.37.1 |
+| [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | ~> 5.0 |
+
+## Resources
+
+| Name | Type |
+|------|------|
+| [helm_release.alb_controller](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
+| [helm_release.superset](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
+| [kubernetes_ingress_class_v1.aws_alb](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/ingress_class_v1) | resource |
+| [kubernetes_ingress_v1.superset](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/ingress_v1) | resource |
+| [kubernetes_namespace.superset](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource |
+| [kubernetes_service_account.service_account](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/service_account) | resource |
+| [null_resource.add_superset_repo](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
+| [null_resource.helm_update_repos](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
+| [aws_availability_zones.available](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/availability_zones) | data source |
+
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.28"` | no |
+| [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"superset-on-eks"` | no |
+| [region](#input\_region) | Region | `string` | `"us-east-1"` | no |
+| [secondary\_cidr\_blocks](#input\_secondary\_cidr\_blocks) | Secondary CIDR blocks to be attached to VPC | `list(string)` |
[
"100.64.0.0/16"
]
| no |
+| [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR. This should be a valid private (RFC 1918) CIDR range | `string` | `"10.1.0.0/21"` | no |
+
+## Outputs
+
+| Name | Description |
+|------|-------------|
+| [configure\_kubectl](#output\_configure\_kubectl) | Configure kubectl: make sure you're logged in with the correct AWS profile and run the following command to update your kubeconfig |
+| [superset\_url](#output\_superset\_url) | Configure kubectl: Once the kubeconfig is configured as above, use the below command to get the Superset URL |
diff --git a/analytics/terraform/superset-on-eks/addons.tf b/analytics/terraform/superset-on-eks/addons.tf
new file mode 100755
index 000000000..c6aecd758
--- /dev/null
+++ b/analytics/terraform/superset-on-eks/addons.tf
@@ -0,0 +1,160 @@
+#---------------------------------------------------------------
+# GP3 Encrypted Storage Class
+#---------------------------------------------------------------
+resource "kubernetes_annotations" "disable_gp2" {
+ annotations = {
+ "storageclass.kubernetes.io/is-default-class" : "false"
+ }
+ api_version = "storage.k8s.io/v1"
+ kind = "StorageClass"
+ metadata {
+ name = "gp2"
+ }
+ force = true
+
+ depends_on = [module.eks.eks_cluster_id]
+}
+
+resource "kubernetes_storage_class" "default_gp3" {
+ metadata {
+ name = "gp3"
+ annotations = {
+ "storageclass.kubernetes.io/is-default-class" : "true"
+ }
+ }
+
+ storage_provisioner = "ebs.csi.aws.com"
+ reclaim_policy = "Delete"
+ allow_volume_expansion = true
+ volume_binding_mode = "WaitForFirstConsumer"
+ parameters = {
+ fsType = "ext4"
+ encrypted = true
+ type = "gp3"
+ }
+
+ depends_on = [kubernetes_annotations.disable_gp2]
+}
+
+#---------------------------------------------------------------
+# IRSA for EBS CSI Driver
+#---------------------------------------------------------------
+module "ebs_csi_driver_irsa" {
+ source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"
+ version = "~> 5.20"
+ role_name_prefix = format("%s-%s", local.name, "ebs-csi-driver-")
+ attach_ebs_csi_policy = true
+ oidc_providers = {
+ main = {
+ provider_arn = module.eks.oidc_provider_arn
+ namespace_service_accounts = ["kube-system:ebs-csi-controller-sa"]
+ }
+ }
+ tags = local.tags
+}
+
+module "eks_blueprints_addons" {
+ source = "aws-ia/eks-blueprints-addons/aws"
+ version = "~> 1.2"
+
+ cluster_name = module.eks.cluster_name
+ cluster_endpoint = module.eks.cluster_endpoint
+ cluster_version = module.eks.cluster_version
+ oidc_provider_arn = module.eks.oidc_provider_arn
+
+ #---------------------------------------
+ # Amazon EKS Managed Add-ons
+ #---------------------------------------
+ eks_addons = {
+ aws-ebs-csi-driver = {
+ service_account_role_arn = module.ebs_csi_driver_irsa.iam_role_arn
+ }
+ coredns = {
+ preserve = true
+ }
+ vpc-cni = {
+ preserve = true
+ }
+ kube-proxy = {
+ preserve = true
+ }
+ }
+
+ #---------------------------------------
+ # AWS Load Balancer Controller Add-on
+ #---------------------------------------
+ enable_aws_load_balancer_controller = true
+ # turn off the mutating webhook for services because we are using
+ # service.beta.kubernetes.io/aws-load-balancer-type: external
+ aws_load_balancer_controller = {
+ set = [{
+ name = "enableServiceMutatorWebhook"
+ value = "false"
+ }]
+ }
+
+ tags = local.tags
+}
+
+module "eks_data_addons" {
+ source = "aws-ia/eks-data-addons/aws"
+ version = "~> 1.31.5" # ensure to update this to the latest/desired version
+
+ oidc_provider_arn = module.eks.oidc_provider_arn
+
+ #---------------------------------------
+ # AWS Apache Superset Add-on
+ #---------------------------------------
+ enable_superset = true
+ superset_helm_config = {
+ values = [templatefile("${path.module}/helm-values/superset-values.yaml", {})]
+ }
+ depends_on = [module.eks_blueprints_addons]
+
+}
+
+#------------------------------------------------------------
+# Create AWS Application Load balancer with Ingres
+#------------------------------------------------------------
+resource "kubernetes_ingress_class_v1" "aws_alb" {
+ metadata {
+ name = "aws-alb"
+ }
+
+ spec {
+ controller = "ingress.k8s.aws/alb"
+ }
+
+ depends_on = [module.eks.cluster_id]
+}
+
+resource "kubernetes_ingress_v1" "superset" {
+ metadata {
+ name = "superset-ingress3"
+ namespace = "superset"
+ annotations = {
+ "alb.ingress.kubernetes.io/scheme" = "internet-facing"
+ "alb.ingress.kubernetes.io/target-type" = "ip"
+ }
+ }
+ spec {
+ ingress_class_name = "aws-alb"
+ rule {
+ http {
+ path {
+ path = "/*"
+ backend {
+ service {
+ name = "superset"
+ port {
+ number = 8088
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ depends_on = [module.eks_blueprints_addons, module.eks_data_addons]
+}
diff --git a/analytics/terraform/superset-on-eks/cleanup.sh b/analytics/terraform/superset-on-eks/cleanup.sh
new file mode 100755
index 000000000..b020546f3
--- /dev/null
+++ b/analytics/terraform/superset-on-eks/cleanup.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+set -o errexit
+set -o pipefail
+
+targets=(
+ "module.eks_blueprints_addons"
+ "module.eks"
+ "module.vpc"
+)
+
+#-------------------------------------------
+# Helpful to delete the stuck in "Terminating" namespaces
+# Rerun the cleanup.sh script to detect and delete the stuck resources
+#-------------------------------------------
+terminating_namespaces=$(kubectl get namespaces --field-selector status.phase=Terminating -o json | jq -r '.items[].metadata.name')
+
+# If there are no terminating namespaces, exit the script
+if [[ -z $terminating_namespaces ]]; then
+ echo "No terminating namespaces found"
+fi
+
+for ns in $terminating_namespaces; do
+ echo "Terminating namespace: $ns"
+ kubectl get namespace $ns -o json | sed 's/"kubernetes"//' | kubectl replace --raw "/api/v1/namespaces/$ns/finalize" -f -
+done
+
+#-------------------------------------------
+# Terraform destroy per module target
+#-------------------------------------------
+for target in "${targets[@]}"
+do
+ destroy_output=$(terraform destroy -target="$target" -auto-approve | tee /dev/tty)
+ if [[ ${PIPESTATUS[0]} -eq 0 && $destroy_output == *"Destroy complete!"* ]]; then
+ echo "SUCCESS: Terraform destroy of $target completed successfully"
+ else
+ echo "FAILED: Terraform destroy of $target failed"
+ exit 1
+ fi
+done
+
+#-------------------------------------------
+# Terraform destroy full
+#-------------------------------------------
+destroy_output=$(terraform destroy -target="$target" -auto-approve | tee /dev/tty)
+if [[ ${PIPESTATUS[0]} -eq 0 && $destroy_output == *"Destroy complete!"* ]]; then
+ echo "SUCCESS: Terraform destroy of all targets completed successfully"
+else
+ echo "FAILED: Terraform destroy of all targets failed"
+ exit 1
+fi
diff --git a/analytics/terraform/superset-on-eks/helm-values/superset-values.yaml b/analytics/terraform/superset-on-eks/helm-values/superset-values.yaml
new file mode 100644
index 000000000..40a5d4955
--- /dev/null
+++ b/analytics/terraform/superset-on-eks/helm-values/superset-values.yaml
@@ -0,0 +1,95 @@
+# Superset node configuration
+supersetNode:
+ replicaCount: 1
+ autoscaling:
+ enabled: true
+ minReplicas: 1
+ maxReplicas: 100
+ targetCPUUtilizationPercentage: 80
+
+ resources:
+ limits:
+ cpu: 200m
+ memory: 256Mi
+ requests:
+ cpu: 200m
+ memory: 256Mi
+
+# Superset Celery worker configuration
+supersetWorker:
+ replicaCount: 1
+ autoscaling:
+ enabled: true
+ minReplicas: 1
+ maxReplicas: 100
+ targetCPUUtilizationPercentage: 80
+ command:
+ - "/bin/sh"
+ - "-c"
+ - ". {{ .Values.configMountPath }}/superset_bootstrap.sh; celery --app=superset.tasks.celery_app:app worker"
+ # -- If true, forces deployment to reload on each upgrade
+ forceReload: false
+ # -- Init container
+ # @default -- a container waiting for postgres and redis
+ initContainers:
+ - name: wait-for-postgres-redis
+ image: "{{ .Values.initImage.repository }}:{{ .Values.initImage.tag }}"
+ imagePullPolicy: "{{ .Values.initImage.pullPolicy }}"
+ envFrom:
+ - secretRef:
+ name: "{{ tpl .Values.envFromSecret . }}"
+ command:
+ - /bin/sh
+ - -c
+ - dockerize -wait "tcp://$DB_HOST:$DB_PORT" -wait "tcp://$REDIS_HOST:$REDIS_PORT" -timeout 120s
+
+ resources:
+ limits:
+ cpu: 200m
+ memory: 512Mi
+ requests:
+ cpu: 200m
+ memory: 400Mi
+
+ persistence:
+ enabled: true
+
+
+postgresql:
+ ## Set to false if bringing your own PostgreSQL.
+ enabled: true
+ loadExamples: true
+ primary:
+ persistence:
+ ## Enable PostgreSQL persistence using Persistent Volume Claims.
+ enabled: true
+ storageClass: gp3
+
+configOverrides:
+ secret: |
+ SECRET_KEY = '5WPuGEgPfGTrk9MCVLFkzNk0fO4hyfsykSrM03fHn1m8d3yQQd4yjyvf'
+
+
+
+redis:
+
+ master:
+ ##
+ ## Image configuration
+ # image:
+ ##
+ ## docker registry secret names (list)
+ # pullSecrets: nil
+ ##
+ persistence:
+ ##
+ ## Use a PVC to persist data.
+ enabled: true
+ ##
+ ## Persistent class
+ # storageClass: classname
+ ##
+ ## Access mode:
+ accessModes:
+ - ReadWriteOnce
+runAsUser: 1000
\ No newline at end of file
diff --git a/analytics/terraform/superset-on-eks/install.sh b/analytics/terraform/superset-on-eks/install.sh
new file mode 100755
index 000000000..0e7b7166d
--- /dev/null
+++ b/analytics/terraform/superset-on-eks/install.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+echo "Initializing ..."
+terraform init || echo "\"terraform init\" failed"
+
+# List of Terraform modules to apply in sequence
+targets=(
+ "module.vpc"
+ "module.eks"
+ "module.eks_blueprints_addons"
+)
+
+# Apply modules in sequence
+for target in "${targets[@]}"
+do
+ echo "Applying module $target..."
+ apply_output=$(terraform apply -target="$target" -auto-approve 2>&1 | tee /dev/tty)
+ if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then
+ echo "SUCCESS: Terraform apply of $target completed successfully"
+ else
+ echo "FAILED: Terraform apply of $target failed"
+ exit 1
+ fi
+done
+
+# Final apply to catch any remaining resources
+echo "Applying remaining resources..."
+apply_output=$(terraform apply -auto-approve 2>&1 | tee /dev/tty)
+if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then
+ echo "SUCCESS: Terraform apply of all modules completed successfully"
+else
+ echo "FAILED: Terraform apply of all modules failed"
+ exit 1
+fi
diff --git a/analytics/terraform/superset-on-eks/main.tf b/analytics/terraform/superset-on-eks/main.tf
new file mode 100755
index 000000000..fa15b3eb5
--- /dev/null
+++ b/analytics/terraform/superset-on-eks/main.tf
@@ -0,0 +1,153 @@
+data "aws_availability_zones" "available" {}
+
+locals {
+ name = var.name
+ region = var.region
+ azs = slice(data.aws_availability_zones.available.names, 0, 2)
+ tags = {
+ Blueprint = local.name
+ GithubRepo = "github.com/awslabs/data-on-eks"
+ }
+}
+
+#---------------------------------------------------------------
+# EKS Cluster
+#---------------------------------------------------------------
+module "eks" {
+ source = "terraform-aws-modules/eks/aws"
+ version = "~> 19.15"
+
+ cluster_name = local.name
+ cluster_version = var.eks_cluster_version
+
+ #WARNING: Avoid using this option (cluster_endpoint_public_access = true) in preprod or prod accounts. This feature is designed for sandbox accounts, simplifying cluster deployment and testing.
+ cluster_endpoint_public_access = true # if true, Your cluster API server is accessible from the internet. You can, optionally, limit the CIDR blocks that can access the public endpoint.
+
+ vpc_id = module.vpc.vpc_id
+ # Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the EKS Control Plane ENIs will be created
+ subnet_ids = compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : substr(cidr_block, 0, 4) == "100." ? subnet_id : null])
+
+
+ #manage_aws_auth_configmap = true
+
+ #---------------------------------------
+ # Note: This can further restricted to specific required for each Add-on and your application
+ #---------------------------------------
+ # Extend cluster security group rules
+ cluster_security_group_additional_rules = {
+ ingress_nodes_ephemeral_ports_tcp = {
+ description = "Nodes on ephemeral ports"
+ protocol = "tcp"
+ from_port = 1025
+ to_port = 65535
+ type = "ingress"
+ source_node_security_group = true
+ }
+ }
+
+ # Extend node-to-node security group rules
+ node_security_group_additional_rules = {
+ ingress_self_all = {
+ description = "Node to node all ports/protocols"
+ protocol = "-1"
+ from_port = 0
+ to_port = 0
+ type = "ingress"
+ self = true
+ }
+
+ # Allows Control Plane Nodes to talk to Worker nodes on all ports. Added this to simplify the example and further avoid issues with Add-ons communication with Control plane.
+ # This can be restricted further to specific port based on the requirement for each Add-on e.g., coreDNS 53, metrics-server 4443, spark-operator 8080, karpenter 8443 etc.
+ # Update this according to your security requirements if needed
+ ingress_cluster_to_node_all_traffic = {
+ description = "Cluster API to Nodegroup all traffic"
+ protocol = "-1"
+ from_port = 0
+ to_port = 0
+ type = "ingress"
+ source_cluster_security_group = true
+ }
+ }
+ # cluster_addons = {
+ # aws-ebs-csi-driver = {
+ # service_account_role_arn = module.ebs_csi_driver_irsa.iam_role_arn
+ # most_recent = true
+ # resolve_conflicts_on_update = "PRESERVE"
+ # }
+ # }
+
+ eks_managed_node_group_defaults = {
+ iam_role_additional_policies = {
+ # Not required, but used in the example to access the nodes to inspect mounted volumes
+ AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
+ }
+ }
+
+ eks_managed_node_groups = {
+ # It's recommended to have a Managed Node group for hosting critical add-ons
+ # You can leverage nodeSelector and Taints/tolerations to distribute workloads across Managed Node group or Karpenter nodes.
+ core_node_group = {
+ name = "core-node-group"
+ description = "EKS Core node group for hosting critical add-ons"
+ # Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned
+ subnet_ids = compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : substr(cidr_block, 0, 4) == "100." ? subnet_id : null])
+
+ min_size = 2
+ max_size = 6
+ desired_size = 2
+
+ instance_types = ["m5.xlarge"]
+
+ ebs_optimized = true
+ block_device_mappings = {
+ xvda = {
+ device_name = "/dev/xvda"
+ ebs = {
+ volume_size = 100
+ volume_type = "gp3"
+ }
+ }
+ }
+
+ labels = {
+ WorkerType = "ON_DEMAND"
+ NodeGroupType = "core"
+ }
+
+ tags = merge(local.tags, {
+ Name = "core-node-grp" })
+ },
+
+ superset_node_group = {
+ name = "superset-node-group"
+ description = "Apache Superset node group"
+ # Filtering only Secondary CIDR private subnets starting with "100.". Subnet IDs where the nodes/node groups will be provisioned
+ subnet_ids = compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : substr(cidr_block, 0, 4) == "100." ? subnet_id : null])
+
+ min_size = 4
+ max_size = 8
+ desired_size = 4
+
+ instance_types = ["m5.xlarge"]
+
+ ebs_optimized = true
+ block_device_mappings = {
+ xvda = {
+ device_name = "/dev/xvda"
+ ebs = {
+ volume_size = 100
+ volume_type = "gp3"
+ }
+ }
+ }
+
+ labels = {
+ WorkerType = "ON_DEMAND"
+ NodeGroupType = "superset"
+ }
+
+ tags = merge(local.tags, {
+ Name = "superset-node-grp" })
+ }
+ }
+}
diff --git a/analytics/terraform/superset-on-eks/outputs.tf b/analytics/terraform/superset-on-eks/outputs.tf
new file mode 100755
index 000000000..c3a5d2a38
--- /dev/null
+++ b/analytics/terraform/superset-on-eks/outputs.tf
@@ -0,0 +1,11 @@
+output "configure_kubectl" {
+ description = "Configure kubectl: make sure you're logged in with the correct AWS profile and run the following command to update your kubeconfig"
+ value = "aws eks --region ${var.region} update-kubeconfig --name ${var.name}"
+}
+
+output "superset_url" {
+ description = "Configure kubectl: Once the kubeconfig is configured as above, use the below command to get the Superset URL"
+ value = < https://docs.aws.amazon.com/AmazonECR/latest/public/public-registries.html
+provider "aws" {
+ alias = "ecr"
+ region = "us-east-1"
+}
diff --git a/analytics/terraform/superset-on-eks/values.yaml b/analytics/terraform/superset-on-eks/values.yaml
new file mode 100644
index 000000000..f3017bbcc
--- /dev/null
+++ b/analytics/terraform/superset-on-eks/values.yaml
@@ -0,0 +1,831 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default values for superset.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# A README is automatically generated from this file to document it, using helm-docs (see https://github.com/norwoodj/helm-docs)
+# To update it, install helm-docs and run helm-docs from the root of this chart
+
+# -- Provide a name to override the name of the chart
+nameOverride: ~
+# -- Provide a name to override the full names of resources
+fullnameOverride: ~
+
+# -- User ID directive. This user must have enough permissions to run the bootstrap script
+# Running containers as root is not recommended in production. Change this to another UID - e.g. 1000 to be more secure
+runAsUser: 0
+
+# -- Specify service account name to be used
+serviceAccountName: ~
+serviceAccount:
+ # -- Create custom service account for Superset. If create: true and serviceAccountName is not provided, `superset.fullname` will be used.
+ create: false
+ annotations: {}
+
+# -- Install additional packages and do any other bootstrap configuration in this script
+# For production clusters it's recommended to build own image with this step done in CI
+# @default -- see `values.yaml`
+bootstrapScript: |
+ #!/bin/bash
+ if [ ! -f ~/bootstrap ]; then echo "Running Superset with uid {{ .Values.runAsUser }}" > ~/bootstrap; fi
+
+# -- The name of the secret which we will use to generate a superset_config.py file
+# Note: this secret must have the key superset_config.py in it and can include other files as well
+configFromSecret: '{{ template "superset.fullname" . }}-config'
+
+# -- The name of the secret which we will use to populate env vars in deployed pods
+# This can be useful for secret keys, etc.
+envFromSecret: '{{ template "superset.fullname" . }}-env'
+# -- This can be a list of templated strings
+envFromSecrets: []
+
+# -- Extra environment variables that will be passed into pods
+extraEnv: {}
+ # Different gunicorn settings, refer to the gunicorn documentation
+ # https://docs.gunicorn.org/en/stable/settings.html#
+ # These variables are used as Flags at the gunicorn startup
+ # https://github.com/apache/superset/blob/master/docker/run-server.sh#L22
+ # Extend timeout to allow long running queries.
+ # GUNICORN_TIMEOUT: 300
+ # Increase the gunicorn worker amount, can improve performance drastically
+ # See: https://docs.gunicorn.org/en/stable/design.html#how-many-workers
+ # SERVER_WORKER_AMOUNT: 4
+ # WORKER_MAX_REQUESTS: 0
+ # WORKER_MAX_REQUESTS_JITTER: 0
+ # SERVER_THREADS_AMOUNT: 20
+ # GUNICORN_KEEPALIVE: 2
+ # SERVER_LIMIT_REQUEST_LINE: 0
+ # SERVER_LIMIT_REQUEST_FIELD_SIZE: 0
+
+ # OAUTH_HOME_DOMAIN: ..
+ # # If a whitelist is not set, any address that can use your OAuth2 endpoint will be able to login.
+ # # this includes any random Gmail address if your OAuth2 Web App is set to External.
+ # OAUTH_WHITELIST_REGEX: ...
+
+# -- Extra environment variables in RAW format that will be passed into pods
+extraEnvRaw: []
+ # Load DB password from other secret (e.g. for zalando operator)
+ # - name: DB_PASS
+ # valueFrom:
+ # secretKeyRef:
+ # name: superset.superset-postgres.credentials.postgresql.acid.zalan.do
+ # key: password
+
+# -- Extra environment variables to pass as secrets
+extraSecretEnv: {}
+ # MAPBOX_API_KEY: ...
+ # # Google API Keys: https://console.cloud.google.com/apis/credentials
+ # GOOGLE_KEY: ...
+ # GOOGLE_SECRET: ...
+ # # Generate your own secret key for encryption. Use openssl rand -base64 42 to generate a good key
+ # SUPERSET_SECRET_KEY: 'CHANGE_ME_TO_A_COMPLEX_RANDOM_SECRET'
+
+# -- Extra files to mount on `/app/pythonpath`
+extraConfigs: {}
+ # import_datasources.yaml: |
+ # databases:
+ # - allow_file_upload: true
+ # allow_ctas: true
+ # allow_cvas: true
+ # database_name: example-db
+ # extra: "{\r\n \"metadata_params\": {},\r\n \"engine_params\": {},\r\n \"\
+ # metadata_cache_timeout\": {},\r\n \"schemas_allowed_for_file_upload\": []\r\n\
+ # }"
+ # sqlalchemy_uri: example://example-db.local
+ # tables: []
+
+# -- Extra files to mount on `/app/pythonpath` as secrets
+extraSecrets: {}
+
+extraVolumes: []
+ # - name: customConfig
+ # configMap:
+ # name: '{{ template "superset.fullname" . }}-custom-config'
+ # - name: additionalSecret
+ # secret:
+ # secretName: my-secret
+ # defaultMode: 0600
+
+extraVolumeMounts: []
+ # - name: customConfig
+ # mountPath: /mnt/config
+ # readOnly: true
+ # - name: additionalSecret:
+ # mountPath: /mnt/secret
+
+# -- A dictionary of overrides to append at the end of superset_config.py - the name does not matter
+# WARNING: the order is not guaranteed
+# Files can be passed as helm --set-file configOverrides.my-override=my-file.py
+configOverrides:
+ secret: |
+ SECRET_KEY = '5WPuGEgPfGTrk9MCVLFkzNk0fO4hyfsykSrM03fHn1m8d3yQQd4yjyvf'
+
+ # extend_timeout: |
+ # # Extend timeout to allow long running queries.
+ # SUPERSET_WEBSERVER_TIMEOUT = ...
+ # enable_oauth: |
+ # from flask_appbuilder.security.manager import (AUTH_DB, AUTH_OAUTH)
+ # AUTH_TYPE = AUTH_OAUTH
+ # OAUTH_PROVIDERS = [
+ # {
+ # "name": "google",
+ # "whitelist": [ os.getenv("OAUTH_WHITELIST_REGEX", "") ],
+ # "icon": "fa-google",
+ # "token_key": "access_token",
+ # "remote_app": {
+ # "client_id": os.environ.get("GOOGLE_KEY"),
+ # "client_secret": os.environ.get("GOOGLE_SECRET"),
+ # "api_base_url": "https://www.googleapis.com/oauth2/v2/",
+ # "client_kwargs": {"scope": "email profile"},
+ # "request_token_url": None,
+ # "access_token_url": "https://accounts.google.com/o/oauth2/token",
+ # "authorize_url": "https://accounts.google.com/o/oauth2/auth",
+ # "authorize_params": {"hd": os.getenv("OAUTH_HOME_DOMAIN", "")}
+ # }
+ # }
+ # ]
+ # # Map Authlib roles to superset roles
+ # AUTH_ROLE_ADMIN = 'Admin'
+ # AUTH_ROLE_PUBLIC = 'Public'
+ # # Will allow user self registration, allowing to create Flask users from Authorized User
+ # AUTH_USER_REGISTRATION = True
+ # # The default user self registration role
+ # AUTH_USER_REGISTRATION_ROLE = "Admin"
+ #secret: |
+ # SECRET_KEY = '5WPuGEgPfGTrk9MCVLFkzNk0fO4hyfsykSrM03fHn1m8d3yQQd4yjyvf'
+
+# -- Same as above but the values are files
+configOverridesFiles: {}
+ # extend_timeout: extend_timeout.py
+ # enable_oauth: enable_oauth.py
+
+configMountPath: "/app/pythonpath"
+
+extraConfigMountPath: "/app/configs"
+
+image:
+ repository: apachesuperset.docker.scarf.sh/apache/superset
+ tag: ~
+ pullPolicy: IfNotPresent
+
+imagePullSecrets: []
+
+initImage:
+ repository: apache/superset
+ tag: dockerize
+ pullPolicy: IfNotPresent
+
+service:
+ type: ClusterIP
+ port: 8088
+ annotations: {}
+ # cloud.google.com/load-balancer-type: "Internal"
+ loadBalancerIP: ~
+ nodePort:
+ # -- (int)
+ http: nil
+
+ingress:
+ enabled: false
+ ingressClassName: ~
+ annotations: {}
+ # kubernetes.io/tls-acme: "true"
+ ## Extend timeout to allow long running queries.
+ # nginx.ingress.kubernetes.io/proxy-connect-timeout: "300"
+ # nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
+ # nginx.ingress.kubernetes.io/proxy-send-timeout: "300"
+ path: /
+ pathType: ImplementationSpecific
+ hosts:
+ - chart-example.local
+ tls: []
+ extraHostsRaw: []
+ # - secretName: chart-example-tls
+ # hosts:
+ # - chart-example.local
+
+resources: {}
+ # We usually recommend not to specify default resources and to leave this as a conscious
+ # choice for the user. This also increases chances charts run on environments with little
+ # resources, such as Minikube. If you do want to specify resources, uncomment the following
+ # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+ # The limits below will apply to all Superset components. To set individual resource limitations refer to the pod specific values below.
+ # The pod specific values will overwrite anything that is set here.
+ # limits:
+ # cpu: 100m
+ # memory: 128Mi
+ # requests:
+ # cpu: 100m
+ # memory: 128Mi
+
+# -- Custom hostAliases for all superset pods
+## https://kubernetes.io/docs/tasks/network/customize-hosts-file-for-pods/
+hostAliases: []
+# - hostnames:
+# - nodns.my.lan
+# ip: 18.27.36.45
+
+# Superset node configuration
+supersetNode:
+ replicaCount: 1
+ autoscaling:
+ enabled: false
+ minReplicas: 1
+ maxReplicas: 100
+ targetCPUUtilizationPercentage: 80
+ # targetMemoryUtilizationPercentage: 80
+
+ # -- Startup command
+ # @default -- See `values.yaml`
+ command:
+ - "/bin/sh"
+ - "-c"
+ - ". {{ .Values.configMountPath }}/superset_bootstrap.sh; /usr/bin/run-server.sh"
+ connections:
+ # -- Change in case of bringing your own redis and then also set redis.enabled:false
+ redis_host: '{{ .Release.Name }}-redis-headless'
+ redis_port: "6379"
+ redis_user: ""
+ # redis_password: superset
+ redis_cache_db: "1"
+ redis_celery_db: "0"
+ # Or SSL port is usually 6380
+ # Update following for using Redis with SSL
+ redis_ssl:
+ enabled: false
+ ssl_cert_reqs: CERT_NONE
+ db_host: '{{ .Release.Name }}-postgresql'
+ db_port: "5432"
+ db_user: superset
+ db_pass: superset
+ db_name: superset
+ env: {}
+ # -- If true, forces deployment to reload on each upgrade
+ forceReload: false
+ # -- Init containers
+ # @default -- a container waiting for postgres
+ initContainers:
+ - name: wait-for-postgres
+ image: "{{ .Values.initImage.repository }}:{{ .Values.initImage.tag }}"
+ imagePullPolicy: "{{ .Values.initImage.pullPolicy }}"
+ envFrom:
+ - secretRef:
+ name: "{{ tpl .Values.envFromSecret . }}"
+ command:
+ - /bin/sh
+ - -c
+ - dockerize -wait "tcp://$DB_HOST:$DB_PORT" -timeout 120s
+
+ # -- Launch additional containers into supersetNode pod
+ extraContainers: []
+ # -- Annotations to be added to supersetNode deployment
+ deploymentAnnotations: {}
+ # -- Labels to be added to supersetNode deployment
+ deploymentLabels: {}
+ # -- Affinity to be added to supersetNode deployment
+ affinity: {}
+ # -- TopologySpreadConstrains to be added to supersetNode deployments
+ topologySpreadConstraints: []
+ # -- Annotations to be added to supersetNode pods
+ podAnnotations: {}
+ # -- Labels to be added to supersetNode pods
+ podLabels: {}
+ startupProbe:
+ httpGet:
+ path: /health
+ port: http
+ initialDelaySeconds: 15
+ timeoutSeconds: 1
+ failureThreshold: 60
+ periodSeconds: 5
+ successThreshold: 1
+ livenessProbe:
+ httpGet:
+ path: /health
+ port: http
+ initialDelaySeconds: 15
+ timeoutSeconds: 1
+ failureThreshold: 3
+ periodSeconds: 15
+ successThreshold: 1
+ readinessProbe:
+ httpGet:
+ path: /health
+ port: http
+ initialDelaySeconds: 15
+ timeoutSeconds: 1
+ failureThreshold: 3
+ periodSeconds: 15
+ successThreshold: 1
+ # -- Resource settings for the supersetNode pods - these settings overwrite might existing values from the global resources object defined above.
+ resources: {}
+ # limits:
+ # cpu: 100m
+ # memory: 128Mi
+ # requests:
+ # cpu: 100m
+ # memory: 128Mi
+ podSecurityContext: {}
+ containerSecurityContext: {}
+ strategy: {}
+ # type: RollingUpdate
+ # rollingUpdate:
+ # maxSurge: 25%
+ # maxUnavailable: 25%
+
+# Superset Celery worker configuration
+supersetWorker:
+ replicaCount: 1
+ autoscaling:
+ enabled: false
+ minReplicas: 1
+ maxReplicas: 100
+ targetCPUUtilizationPercentage: 80
+ # targetMemoryUtilizationPercentage: 80
+
+ # -- Worker startup command
+ # @default -- a `celery worker` command
+ command:
+ - "/bin/sh"
+ - "-c"
+ - ". {{ .Values.configMountPath }}/superset_bootstrap.sh; celery --app=superset.tasks.celery_app:app worker"
+ # -- If true, forces deployment to reload on each upgrade
+ forceReload: false
+ # -- Init container
+ # @default -- a container waiting for postgres and redis
+ initContainers:
+ - name: wait-for-postgres-redis
+ image: "{{ .Values.initImage.repository }}:{{ .Values.initImage.tag }}"
+ imagePullPolicy: "{{ .Values.initImage.pullPolicy }}"
+ envFrom:
+ - secretRef:
+ name: "{{ tpl .Values.envFromSecret . }}"
+ command:
+ - /bin/sh
+ - -c
+ - dockerize -wait "tcp://$DB_HOST:$DB_PORT" -wait "tcp://$REDIS_HOST:$REDIS_PORT" -timeout 120s
+ # -- Launch additional containers into supersetWorker pod
+ extraContainers: []
+ # -- Annotations to be added to supersetWorker deployment
+ deploymentAnnotations: {}
+ # -- Labels to be added to supersetWorker deployment
+ deploymentLabels: {}
+ # -- Affinity to be added to supersetWorker deployment
+ affinity: {}
+ # -- TopologySpreadConstrains to be added to supersetWorker deployments
+ topologySpreadConstraints: []
+ # -- Annotations to be added to supersetWorker pods
+ podAnnotations: {}
+ # -- Labels to be added to supersetWorker pods
+ podLabels: {}
+ # -- Resource settings for the supersetWorker pods - these settings overwrite might existing values from the global resources object defined above.
+ resources: {}
+ # limits:
+ # cpu: 100m
+ # memory: 128Mi
+ # requests:
+ # cpu: 100m
+ # memory: 128Mi
+ podSecurityContext: {}
+ containerSecurityContext: {}
+ strategy: {}
+ # type: RollingUpdate
+ # rollingUpdate:
+ # maxSurge: 25%
+ # maxUnavailable: 25%
+ livenessProbe:
+ exec:
+ # -- Liveness probe command
+ # @default -- a `celery inspect ping` command
+ command:
+ - sh
+ - -c
+ - celery -A superset.tasks.celery_app:app inspect ping -d celery@$HOSTNAME
+ initialDelaySeconds: 120
+ timeoutSeconds: 60
+ failureThreshold: 3
+ periodSeconds: 60
+ successThreshold: 1
+ # -- No startup/readiness probes by default since we don't really care about its startup time (it doesn't serve traffic)
+ startupProbe: {}
+ # -- No startup/readiness probes by default since we don't really care about its startup time (it doesn't serve traffic)
+ readinessProbe: {}
+
+# Superset beat configuration (to trigger scheduled jobs like reports)
+supersetCeleryBeat:
+ # -- This is only required if you intend to use alerts and reports
+ enabled: false
+ # -- Command
+ # @default -- a `celery beat` command
+ command:
+ - "/bin/sh"
+ - "-c"
+ - ". {{ .Values.configMountPath }}/superset_bootstrap.sh; celery --app=superset.tasks.celery_app:app beat --pidfile /tmp/celerybeat.pid --schedule /tmp/celerybeat-schedule"
+ # -- If true, forces deployment to reload on each upgrade
+ forceReload: false
+ # -- List of init containers
+ # @default -- a container waiting for postgres
+ initContainers:
+ - name: wait-for-postgres-redis
+ image: "{{ .Values.initImage.repository }}:{{ .Values.initImage.tag }}"
+ imagePullPolicy: "{{ .Values.initImage.pullPolicy }}"
+ envFrom:
+ - secretRef:
+ name: "{{ tpl .Values.envFromSecret . }}"
+ command:
+ - /bin/sh
+ - -c
+ - dockerize -wait "tcp://$DB_HOST:$DB_PORT" -wait "tcp://$REDIS_HOST:$REDIS_PORT" -timeout 120s
+ # -- Launch additional containers into supersetCeleryBeat pods
+ extraContainers: []
+ # -- Annotations to be added to supersetCeleryBeat deployment
+ deploymentAnnotations: {}
+ # -- Affinity to be added to supersetCeleryBeat deployment
+ affinity: {}
+ # -- TopologySpreadConstrains to be added to supersetCeleryBeat deployments
+ topologySpreadConstraints: []
+ # -- Annotations to be added to supersetCeleryBeat pods
+ podAnnotations: {}
+ # -- Labels to be added to supersetCeleryBeat pods
+ podLabels: {}
+ # -- Resource settings for the CeleryBeat pods - these settings overwrite might existing values from the global resources object defined above.
+ resources: {}
+ # limits:
+ # cpu: 100m
+ # memory: 128Mi
+ # requests:
+ # cpu: 100m
+ # memory: 128Mi
+ podSecurityContext: {}
+ containerSecurityContext: {}
+
+supersetCeleryFlower:
+ # -- Enables a Celery flower deployment (management UI to monitor celery jobs)
+ # WARNING: on superset 1.x, this requires a Superset image that has `flower<1.0.0` installed (which is NOT the case of the default images)
+ # flower>=1.0.0 requires Celery 5+ which Superset 1.5 does not support
+ enabled: false
+ replicaCount: 1
+ # -- Command
+ # @default -- a `celery flower` command
+ command:
+ - "/bin/sh"
+ - "-c"
+ - "celery --app=superset.tasks.celery_app:app flower"
+ service:
+ type: ClusterIP
+ annotations: {}
+ loadBalancerIP: ~
+ port: 5555
+ nodePort:
+ # -- (int)
+ http: nil
+ startupProbe:
+ httpGet:
+ path: /api/workers
+ port: flower
+ initialDelaySeconds: 5
+ timeoutSeconds: 1
+ failureThreshold: 60
+ periodSeconds: 5
+ successThreshold: 1
+ livenessProbe:
+ httpGet:
+ path: /api/workers
+ port: flower
+ initialDelaySeconds: 5
+ timeoutSeconds: 1
+ failureThreshold: 3
+ periodSeconds: 5
+ successThreshold: 1
+ readinessProbe:
+ httpGet:
+ path: /api/workers
+ port: flower
+ initialDelaySeconds: 5
+ timeoutSeconds: 1
+ failureThreshold: 3
+ periodSeconds: 5
+ successThreshold: 1
+ # -- List of init containers
+ # @default -- a container waiting for postgres and redis
+ initContainers:
+ - name: wait-for-postgres-redis
+ image: "{{ .Values.initImage.repository }}:{{ .Values.initImage.tag }}"
+ imagePullPolicy: "{{ .Values.initImage.pullPolicy }}"
+ envFrom:
+ - secretRef:
+ name: "{{ tpl .Values.envFromSecret . }}"
+ command:
+ - /bin/sh
+ - -c
+ - dockerize -wait "tcp://$DB_HOST:$DB_PORT" -wait "tcp://$REDIS_HOST:$REDIS_PORT" -timeout 120s
+ # -- Launch additional containers into supersetCeleryFlower pods
+ extraContainers: []
+ # -- Annotations to be added to supersetCeleryFlower deployment
+ deploymentAnnotations: {}
+ # -- Affinity to be added to supersetCeleryFlower deployment
+ affinity: {}
+ # -- TopologySpreadConstrains to be added to supersetCeleryFlower deployments
+ topologySpreadConstraints: []
+ # -- Annotations to be added to supersetCeleryFlower pods
+ podAnnotations: {}
+ # -- Labels to be added to supersetCeleryFlower pods
+ podLabels: {}
+ # -- Resource settings for the CeleryBeat pods - these settings overwrite might existing values from the global resources object defined above.
+ resources: {}
+ # limits:
+ # cpu: 100m
+ # memory: 128Mi
+ # requests:
+ # cpu: 100m
+ # memory: 128Mi
+ podSecurityContext: {}
+ containerSecurityContext: {}
+
+supersetWebsockets:
+ # -- This is only required if you intend to use `GLOBAL_ASYNC_QUERIES` in `ws` mode
+ # see https://github.com/apache/superset/blob/master/CONTRIBUTING.md#async-chart-queries
+ enabled: false
+ replicaCount: 1
+ ingress:
+ path: /ws
+ pathType: Prefix
+ image:
+ # -- There is no official image (yet), this one is community-supported
+ repository: oneacrefund/superset-websocket
+ tag: latest
+ pullPolicy: IfNotPresent
+ # -- The config.json to pass to the server, see https://github.com/apache/superset/tree/master/superset-websocket
+ # Note that the configuration can also read from environment variables (which will have priority), see https://github.com/apache/superset/blob/master/superset-websocket/src/config.ts for a list of supported variables
+ # @default -- see `values.yaml`
+ config:
+ {
+ "port": 8080,
+ "logLevel": "debug",
+ "logToFile": false,
+ "logFilename": "app.log",
+ "statsd": { "host": "127.0.0.1", "port": 8125, "globalTags": [] },
+ "redis":
+ {
+ "port": 6379,
+ "host": "127.0.0.1",
+ "password": "",
+ "db": 0,
+ "ssl": false,
+ },
+ "redisStreamPrefix": "async-events-",
+ "jwtSecret": "CHANGE-ME",
+ "jwtCookieName": "async-token",
+ }
+ service:
+ type: ClusterIP
+ annotations: {}
+ loadBalancerIP: ~
+ port: 8080
+ nodePort:
+ # -- (int)
+ http: nil
+ command: []
+ resources: {}
+ # -- Launch additional containers into supersetWebsockets pods
+ extraContainers: []
+ deploymentAnnotations: {}
+ # -- Affinity to be added to supersetWebsockets deployment
+ affinity: {}
+ # -- TopologySpreadConstrains to be added to supersetWebsockets deployments
+ topologySpreadConstraints: []
+ podAnnotations: {}
+ podLabels: {}
+ strategy: {}
+ podSecurityContext: {}
+ containerSecurityContext: {}
+ startupProbe:
+ httpGet:
+ path: /health
+ port: ws
+ initialDelaySeconds: 5
+ timeoutSeconds: 1
+ failureThreshold: 60
+ periodSeconds: 5
+ successThreshold: 1
+ livenessProbe:
+ httpGet:
+ path: /health
+ port: ws
+ initialDelaySeconds: 5
+ timeoutSeconds: 1
+ failureThreshold: 3
+ periodSeconds: 5
+ successThreshold: 1
+ readinessProbe:
+ httpGet:
+ path: /health
+ port: ws
+ initialDelaySeconds: 5
+ timeoutSeconds: 1
+ failureThreshold: 3
+ periodSeconds: 5
+ successThreshold: 1
+
+init:
+ # Configure resources
+ # Warning: fab command consumes a lot of ram and can
+ # cause the process to be killed due to OOM if it exceeds limit
+ # Make sure you are giving a strong password for the admin user creation( else make sure you are changing after setup)
+ # Also change the admin email to your own custom email.
+ resources: {}
+ # limits:
+ # cpu:
+ # memory:
+ # requests:
+ # cpu:
+ # memory:
+ # -- Command
+ # @default -- a `superset_init.sh` command
+ command:
+ - "/bin/sh"
+ - "-c"
+ - ". {{ .Values.configMountPath }}/superset_bootstrap.sh; . {{ .Values.configMountPath }}/superset_init.sh"
+ enabled: true
+ jobAnnotations:
+ "helm.sh/hook": post-install,post-upgrade
+ "helm.sh/hook-delete-policy": "before-hook-creation"
+ loadExamples: false
+ createAdmin: true
+ adminUser:
+ username: admin
+ firstname: Superset
+ lastname: Admin
+ email: admin@superset.com
+ password: admin
+ # -- List of initContainers
+ # @default -- a container waiting for postgres
+ initContainers:
+ - name: wait-for-postgres
+ image: "{{ .Values.initImage.repository }}:{{ .Values.initImage.tag }}"
+ imagePullPolicy: "{{ .Values.initImage.pullPolicy }}"
+ envFrom:
+ - secretRef:
+ name: "{{ tpl .Values.envFromSecret . }}"
+ command:
+ - /bin/sh
+ - -c
+ - dockerize -wait "tcp://$DB_HOST:$DB_PORT" -timeout 120s
+ # -- A Superset init script
+ # @default -- a script to create admin user and initialize roles
+ initscript: |-
+ #!/bin/sh
+ set -eu
+ echo "Upgrading DB schema..."
+ superset db upgrade
+ echo "Initializing roles..."
+ superset init
+ {{ if .Values.init.createAdmin }}
+ echo "Creating admin user..."
+ superset fab create-admin \
+ --username {{ .Values.init.adminUser.username }} \
+ --firstname {{ .Values.init.adminUser.firstname }} \
+ --lastname {{ .Values.init.adminUser.lastname }} \
+ --email {{ .Values.init.adminUser.email }} \
+ --password {{ .Values.init.adminUser.password }} \
+ || true
+ {{- end }}
+ {{ if .Values.init.loadExamples }}
+ echo "Loading examples..."
+ superset load_examples
+ {{- end }}
+ if [ -f "{{ .Values.extraConfigMountPath }}/import_datasources.yaml" ]; then
+ echo "Importing database connections.... "
+ superset import_datasources -p {{ .Values.extraConfigMountPath }}/import_datasources.yaml
+ fi
+ # -- Launch additional containers into init job pod
+ extraContainers: []
+ ## Annotations to be added to init job pods
+ podAnnotations: {}
+ podSecurityContext: {}
+ containerSecurityContext: {}
+ ## Tolerations to be added to init job pods
+ tolerations: []
+ ## Affinity to be added to init job pods
+ affinity: {}
+ # -- TopologySpreadConstrains to be added to init job
+ topologySpreadConstraints: []
+
+# -- Configuration values for the postgresql dependency.
+# ref: https://github.com/bitnami/charts/tree/main/bitnami/postgresql
+# @default -- see `values.yaml`
+postgresql:
+ ##
+ ## Use the PostgreSQL chart dependency.
+ ## Set to false if bringing your own PostgreSQL.
+ enabled: true
+
+ ## Authentication parameters
+ auth:
+ ## The name of an existing secret that contains the postgres password.
+ existingSecret:
+ ## PostgreSQL name for a custom user to create
+ username: superset
+ ## PostgreSQL password for the custom user to create. Ignored if `auth.existingSecret` with key `password` is provided
+ password: superset
+ ## PostgreSQL name for a custom database to create
+ database: superset
+
+ image:
+ tag: "14.6.0-debian-11-r13"
+
+ ## PostgreSQL Primary parameters
+ primary:
+ ##
+ ## Persistent Volume Storage configuration.
+ ## ref: https://kubernetes.io/docs/user-guide/persistent-volumes
+ persistence:
+ ##
+ ## Enable PostgreSQL persistence using Persistent Volume Claims.
+ enabled: true
+ ##
+ ## Persistent class
+ # storageClass: classname
+ ##
+ ## Access modes:
+ accessModes:
+ - ReadWriteOnce
+ ## PostgreSQL port
+ service:
+ ports:
+ postgresql: "5432"
+
+# -- Configuration values for the Redis dependency.
+# ref: https://github.com/bitnami/charts/blob/master/bitnami/redis
+# More documentation can be found here: https://artifacthub.io/packages/helm/bitnami/redis
+# @default -- see `values.yaml`
+redis:
+ ##
+ ## Use the redis chart dependency.
+ ##
+ ## If you are bringing your own redis, you can set the host in supersetNode.connections.redis_host
+ ##
+ ## Set to false if bringing your own redis.
+ enabled: true
+ ##
+ ## Set architecture to standalone/replication
+ architecture: standalone
+ ##
+ ## Auth configuration:
+ ##
+ auth:
+ ## Enable password authentication
+ enabled: false
+ ## The name of an existing secret that contains the redis password.
+ existingSecret: ""
+ ## Name of the key containing the secret.
+ existingSecretKey: ""
+ ## Redis password
+ password: superset
+ ##
+ ## Master configuration
+ ##
+ master:
+ ##
+ ## Image configuration
+ # image:
+ ##
+ ## docker registry secret names (list)
+ # pullSecrets: nil
+ ##
+ persistence:
+ ##
+ ## Use a PVC to persist data.
+ enabled: false
+ ##
+ ## Persistent class
+ # storageClass: classname
+ ##
+ ## Access mode:
+ accessModes:
+ - ReadWriteOnce
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+# -- TopologySpreadConstrains to be added to all deployments
+topologySpreadConstraints: []
diff --git a/analytics/terraform/superset-on-eks/variables.tf b/analytics/terraform/superset-on-eks/variables.tf
new file mode 100755
index 000000000..49366ba68
--- /dev/null
+++ b/analytics/terraform/superset-on-eks/variables.tf
@@ -0,0 +1,32 @@
+variable "name" {
+ description = "Name of the VPC and EKS Cluster"
+ default = "superset-on-eks"
+ type = string
+}
+
+variable "region" {
+ description = "Region"
+ type = string
+ default = "us-east-1"
+}
+
+variable "eks_cluster_version" {
+ description = "EKS Cluster version"
+ default = "1.29"
+ type = string
+}
+
+# VPC with 2046 IPs (10.1.0.0/21) and 2 AZs
+variable "vpc_cidr" {
+ description = "VPC CIDR. This should be a valid private (RFC 1918) CIDR range"
+ default = "10.1.0.0/21"
+ type = string
+}
+
+# RFC6598 range 100.64.0.0/10
+# Note you can only /16 range to VPC. You can add multiples of /16 if required
+variable "secondary_cidr_blocks" {
+ description = "Secondary CIDR blocks to be attached to VPC"
+ default = ["100.64.0.0/16"]
+ type = list(string)
+}
diff --git a/analytics/terraform/superset-on-eks/versions.tf b/analytics/terraform/superset-on-eks/versions.tf
new file mode 100644
index 000000000..591d6754b
--- /dev/null
+++ b/analytics/terraform/superset-on-eks/versions.tf
@@ -0,0 +1,29 @@
+terraform {
+ required_version = ">= 1.0.0"
+
+ required_providers {
+ aws = {
+ source = "hashicorp/aws"
+ version = ">= 3.72"
+ }
+ kubernetes = {
+ source = "hashicorp/kubernetes"
+ version = ">= 2.10"
+ }
+ helm = {
+ source = "hashicorp/helm"
+ version = ">= 2.4.1"
+ }
+ random = {
+ source = "hashicorp/random"
+ version = "3.3.2"
+ }
+ }
+
+ # ## Used for end-to-end testing on project; update to suit your needs
+ # backend "s3" {
+ # bucket = "doeks-github-actions-e2e-test-state"
+ # region = "us-west-2"
+ # key = "e2e/superset/terraform.tfstate"
+ # }
+}
diff --git a/analytics/terraform/superset-on-eks/vpc.tf b/analytics/terraform/superset-on-eks/vpc.tf
new file mode 100755
index 000000000..e9d80ee29
--- /dev/null
+++ b/analytics/terraform/superset-on-eks/vpc.tf
@@ -0,0 +1,51 @@
+locals {
+ # Routable Private subnets only for Private NAT Gateway -> Transit Gateway -> Second VPC for overlapping CIDRs
+ # e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.0.0/24", "10.1.1.0/24"] => 256-2 = 254 usable IPs per subnet/AZ
+ private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 3, k)]
+ # Routable Public subnets with NAT Gateway and Internet Gateway
+ # e.g., var.vpc_cidr = "10.1.0.0/21" => output: ["10.1.2.0/26", "10.1.2.64/26"] => 64-2 = 62 usable IPs per subnet/AZ
+ public_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 5, k + 8)]
+ # RFC6598 range 100.64.0.0/16 for EKS Data Plane for two subnets(32768 IPs per Subnet) across two AZs for EKS Control Plane ENI + Nodes + Pods
+ # e.g., var.secondary_cidr_blocks = "100.64.0.0/16" => output: ["100.64.0.0/17", "100.64.128.0/17"] => 32768-2 = 32766 usable IPs per subnet/AZ
+ secondary_ip_range_private_subnets = [for k, v in local.azs : cidrsubnet(element(var.secondary_cidr_blocks, 0), 1, k)]
+}
+
+#---------------------------------------------------------------
+# VPC
+#---------------------------------------------------------------
+# WARNING: This VPC module includes the creation of an Internet Gateway and NAT Gateway, which simplifies cluster deployment and testing, primarily intended for sandbox accounts.
+# IMPORTANT: For preprod and prod use cases, it is crucial to consult with your security team and AWS architects to design a private infrastructure solution that aligns with your security requirements
+
+module "vpc" {
+ source = "terraform-aws-modules/vpc/aws"
+ version = "~> 5.0"
+
+ name = local.name
+ cidr = var.vpc_cidr
+ azs = local.azs
+
+ # Secondary CIDR block attached to VPC for EKS Control Plane ENI + Nodes + Pods
+ secondary_cidr_blocks = var.secondary_cidr_blocks
+
+ # 1/ EKS Data Plane secondary CIDR blocks for two subnets across two AZs for EKS Control Plane ENI + Nodes + Pods
+ # 2/ Two private Subnets with RFC1918 private IPv4 address range for Private NAT + NLB + Airflow + EC2 Jumphost etc.
+ private_subnets = concat(local.private_subnets, local.secondary_ip_range_private_subnets)
+
+ # ------------------------------
+ # Optional Public Subnets for NAT and IGW for PoC/Dev/Test environments
+ # Public Subnets can be disabled while deploying to Production and use Private NAT + TGW
+ public_subnets = local.public_subnets
+ enable_nat_gateway = true
+ single_nat_gateway = true
+ #-------------------------------
+
+ public_subnet_tags = {
+ "kubernetes.io/role/elb" = 1
+ }
+
+ private_subnet_tags = {
+ "kubernetes.io/role/internal-elb" = 1
+ }
+
+ tags = local.tags
+}
diff --git a/website/docs/blueprints/data-analytics/img/superset1.png b/website/docs/blueprints/data-analytics/img/superset1.png
new file mode 100644
index 000000000..dcfd7e76a
Binary files /dev/null and b/website/docs/blueprints/data-analytics/img/superset1.png differ
diff --git a/website/docs/blueprints/data-analytics/img/superset2.png b/website/docs/blueprints/data-analytics/img/superset2.png
new file mode 100644
index 000000000..ac9f22ca0
Binary files /dev/null and b/website/docs/blueprints/data-analytics/img/superset2.png differ
diff --git a/website/docs/blueprints/data-analytics/superset-on-eks.md b/website/docs/blueprints/data-analytics/superset-on-eks.md
new file mode 100644
index 000000000..880badd4e
--- /dev/null
+++ b/website/docs/blueprints/data-analytics/superset-on-eks.md
@@ -0,0 +1,102 @@
+---
+sidebar_position: 5
+sidebar_label: Superset on EKS
+---
+# Superset on EKS
+
+## Introduction
+[Apache Superset](https://superset.apache.org/) is a popular open source data exploration and visualization platform. Superset provides a rich set of data visualizations and easy ad-hoc query and analysis functionalities for data scientists, analysts, and business users.
+
+This [blueprint](https://github.com/awslabs/data-on-eks/tree/main/analytics/terraform/superset-on-eks) deploys Superset on an EKS cluster using Postgres as the backend database and Amazon Elastic Block Store (Amazon EBS) for persistent storage.
+
+## Superset on AWS
+
+On AWS, Superset can run on an EKS cluster. By using EKS, you can leverage Kubernetes for deployment, scaling, and management of Superset services. Other AWS services like VPC, IAM, and EBS provide the networking, security, and storage capabilities.
+
+Key AWS services used:
+
+- Amazon EKS as the managed Kubernetes cluster to run Superset pods and services.
+- Amazon EBS to provide a scalable block store for Superset persistent storage.
+- Amazon ECR to store Docker container images for Superset and dependencies
+
+## Deploying the Solution
+
+The blueprint performs the following to deploy Superset on EKS:
+
+- Create a new VPC with public and private subnets
+- Provision an EKS cluster control plane and managed worker nodes
+- Create an Amazon EBS file system and access point
+- Build Docker images and push to Amazon ECR
+- Install Superset and services on EKS via Helm chart
+- Expose Superset UI through a load balancer
+
+ Ingress is enabled and AWS LoadBalancer Controller will provision an ALB to expose the Superset frontend UI.
+
+:::info
+You may customize the blueprint by changing values in `variables.tf`, to deploy to a different region (default to `us-west-1` ), use different cluster name, number of subnets / AZs, or disable addons like fluentbit
+:::
+
+
+### Prerequisites
+
+Ensure that you have installed the following tools on your machine.
+
+1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html)
+2. [kubectl](https://Kubernetes.io/docs/tasks/tools/)
+3. [terraform](https://learn.hashicorp.com/tutorials/terraform/install-cli)
+4. [Helm](https://helm.sh)
+
+### Deploy
+
+Clone the repository
+
+```bash
+git clone https://github.com/awslabs/data-on-eks.git
+```
+
+Navigate into one of the example directories and run `install.sh` script
+
+```bash
+cd data-on-eks/analytics/terraform/superset-on-eks
+chmod +x install.sh
+./install.sh
+```
+or simply
+```bash
+terraform init
+terraform apply --auto-approve
+```
+
+
+### Verify Deployment
+
+After the deployment completes, we can access the Superset UI . For demo purpose, this blueprint creates the Ingress object for the Superset FrontEnd UI with public LoadBalancer.
+
+
+You may find the URL to the Superset frontend from the output superset_url, or by running kubectl command below:
+
+```sh
+kubectl get ingress -n superset
+
+# OUTPUT should looks like below
+NAME CLASS HOSTS ADDRESS PORTS AGE
+superset-ingress aws-alb * k8s-superset-***.***.elb.amazonaws.com 80 125m
+```
+
+Copy the ADDRESS field from the output, then open browser and enter the URL as `http:///`. Enter `admin` as both user name and password when prompted. We can view the Superset UI like below.
+
+![img.png](img/superset1.png)
+![img.png](img/superset2.png)
+
+## Cleanup
+
+To clean up your environment, run the `cleanup.sh` script.
+
+```bash
+chmod +x cleanup.sh
+./cleanup.sh
+```
+otherwise
+```bash
+terraform destroy --auto-approve
+```