Skip to content

Commit

Permalink
fix: Datahub upgrade to version 0.12.0 (#452)
Browse files Browse the repository at this point in the history
  • Loading branch information
bbgu1 authored Mar 5, 2024
1 parent cde1440 commit a38b7ba
Show file tree
Hide file tree
Showing 13 changed files with 63 additions and 85 deletions.
1 change: 0 additions & 1 deletion analytics/terraform/datahub-on-eks/datahub-addon/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ module "prereq" {
vpc_cidr = var.vpc_cidr
vpc_private_subnets = var.vpc_private_subnets

create_iam_service_linked_role_es = var.create_iam_service_linked_role_es
}

resource "kubernetes_namespace" "datahub" {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
locals {
cidr_blocks = [coalesce(var.vpc_cidr, "10.1.0.0/16")]
}

#---------------------------------------------------------------
# OpenSearch For DataHub metadata
#---------------------------------------------------------------
Expand All @@ -13,15 +9,10 @@ resource "aws_security_group" "es" {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = local.cidr_blocks
cidr_blocks = [var.vpc_cidr]
}
}

resource "aws_iam_service_linked_role" "es" {
aws_service_name = "opensearchservice.amazonaws.com"
count = var.create_iam_service_linked_role_es ? 1 : 0
}

resource "random_password" "master_password" {
length = 16
special = true
Expand All @@ -32,7 +23,6 @@ resource "random_password" "master_password" {
}

resource "aws_opensearch_domain" "es" {
depends_on = [aws_iam_service_linked_role.es]
domain_name = "${var.prefix}-es-domain"
engine_version = "OpenSearch_1.1"
cluster_config {
Expand Down Expand Up @@ -78,6 +68,7 @@ resource "aws_opensearch_domain" "es" {

# Creating the AWS Elasticsearch domain policy
resource "aws_opensearch_domain_policy" "main" {
depends_on = [aws_msk_cluster.msk]
domain_name = aws_opensearch_domain.es.domain_name
access_policies = <<POLICIES
{
Expand Down Expand Up @@ -105,7 +96,7 @@ resource "aws_security_group" "msk" {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = local.cidr_blocks
cidr_blocks = [var.vpc_cidr]
}
}

Expand Down Expand Up @@ -184,7 +175,7 @@ resource "aws_security_group" "rds" {
from_port = 0
to_port = 3306
protocol = "tcp"
cidr_blocks = local.cidr_blocks
cidr_blocks = [var.vpc_cidr]
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,3 @@ variable "vpc_cidr" {
type = string
default = ""
}

variable "create_iam_service_linked_role_es" {
type = bool
default = true
description = "Whether to create `AWSServiceRoleForAmazonOpensearchService` service-linked role. Set it to `false` if the role already exists"
}
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ global:
# secretKey: postgres-password

datahub:
version: v0.10.4
version: v0.12.0

systemUpdate:
enabled: true
Expand All @@ -277,7 +277,7 @@ global:

managed_ingestion:
enabled: true
defaultCliVersion: "0.10.4"
defaultCliVersion: "0.12.0"

metadata_service_authentication:
enabled: true
Expand Down
6 changes: 0 additions & 6 deletions analytics/terraform/datahub-on-eks/datahub-addon/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,3 @@ variable "vpc_cidr" {
type = string
default = ""
}

variable "create_iam_service_linked_role_es" {
type = bool
default = true
description = "Whether to create `AWSServiceRoleForAmazonOpensearchService` service-linked role. Set it to `false` if the role already exists"
}
5 changes: 2 additions & 3 deletions analytics/terraform/datahub-on-eks/datahub.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@ module "datahub" {
depends_on = [module.eks, module.eks_blueprints_addons]
source = "./datahub-addon"
prefix = local.name
vpc_id = module.vpc.vpc_id
vpc_id = local.vpc_id
vpc_cidr = local.vpc_cidr
vpc_private_subnets = module.vpc.private_subnets
vpc_private_subnets = local.private_subnets

create_iam_service_linked_role_es = var.create_iam_service_linked_role_es
}
5 changes: 2 additions & 3 deletions analytics/terraform/datahub-on-eks/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ terraform init || echo "\"terraform init\" failed"
# List of Terraform modules to apply in sequence
targets=(
"module.vpc"
"module.vpc_endpoints_sg"
"module.vpc_endpoints"
"module.eks"
"module.ebs_csi_driver_irsa"
Expand All @@ -22,7 +21,7 @@ targets=(
for target in "${targets[@]}"
do
echo "Applying module $target..."
apply_output=$(terraform apply -target="$target" -auto-approve 2>&1 | tee /dev/tty)
apply_output=$(terraform apply -target="$target" -var="region=$region" -auto-approve 2>&1 | tee /dev/tty)
if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then
echo "SUCCESS: Terraform apply of $target completed successfully"
else
Expand All @@ -33,7 +32,7 @@ done

# Final apply to catch any remaining resources
echo "Applying remaining resources..."
apply_output=$(terraform apply -auto-approve 2>&1 | tee /dev/tty)
apply_output=$(terraform apply -var="region=$region" -auto-approve 2>&1 | tee /dev/tty)
if [[ ${PIPESTATUS[0]} -eq 0 && $apply_output == *"Apply complete"* ]]; then
echo "SUCCESS: Terraform apply of all modules completed successfully"
else
Expand Down
3 changes: 3 additions & 0 deletions analytics/terraform/datahub-on-eks/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,7 @@ locals {
Blueprint = local.name
GithubRepo = "github.com/awslabs/data-on-eks"
})

vpc_id = var.create_vpc ? module.vpc.vpc_id : var.vpc_id
private_subnets = var.create_vpc ? module.vpc.private_subnets : var.private_subnet_ids
}
6 changes: 3 additions & 3 deletions analytics/terraform/datahub-on-eks/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ module "eks" {
cluster_endpoint_private_access = true # if true, Kubernetes API requests within your cluster's VPC (such as node to control plane communication) use the private VPC endpoint
cluster_endpoint_public_access = true # if true, Your cluster API server is accessible from the internet. You can, optionally, limit the CIDR blocks that can access the public endpoint.

vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets
vpc_id = local.vpc_id
subnet_ids = local.private_subnets


eks_managed_node_group_defaults = {
Expand All @@ -26,7 +26,7 @@ module "eks" {
core_node_group = {
name = "core-node-group"
description = "EKS managed node group example launch template"
subnet_ids = module.vpc.private_subnets
subnet_ids = local.private_subnets

min_size = 1
max_size = 9
Expand Down
7 changes: 0 additions & 7 deletions analytics/terraform/datahub-on-eks/providers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,3 @@ provider "helm" {
}
}

provider "kubectl" {
apply_retry_count = 30
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
load_config_file = false
token = data.aws_eks_cluster_auth.this.token
}
26 changes: 19 additions & 7 deletions analytics/terraform/datahub-on-eks/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,26 @@ variable "tags" {
type = map(string)
}

variable "create_vpc" {
description = "Create VPC"
default = true
type = bool
}

variable "vpc_id" {
description = "VPC Id for the existing vpc - needed when create_vpc set to false"
default = ""
type = string
}

variable "private_subnet_ids" {
description = "Ids for existing private subnets - needed when create_vpc set to false"
default = []
type = list(string)
}

variable "vpc_cidr" {
description = "VPC CIDR"
description = "VPC CIDR - must change to match the cidr of the existing VPC if create_vpc set to false"
default = "10.1.0.0/16"
type = string
}
Expand All @@ -46,9 +64,3 @@ variable "private_subnets" {
default = ["10.1.0.0/17", "10.1.128.0/18"]
type = list(string)
}

variable "create_iam_service_linked_role_es" {
type = bool
default = true
description = "Whether to create `AWSServiceRoleForAmazonOpensearchService` service-linked role. Set it to `false` if the role already exists"
}
47 changes: 15 additions & 32 deletions analytics/terraform/datahub-on-eks/vpc.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "~> 5.0"

create_vpc = var.create_vpc

name = local.name
cidr = local.vpc_cidr
azs = local.azs
Expand Down Expand Up @@ -36,43 +38,24 @@ module "vpc" {
tags = local.tags
}

module "vpc_endpoints_sg" {
source = "terraform-aws-modules/security-group/aws"
version = "~> 5.0"

create = var.enable_vpc_endpoints

name = "${local.name}-vpc-endpoints"
description = "Security group for VPC endpoint access"
vpc_id = module.vpc.vpc_id

ingress_with_cidr_blocks = [
{
rule = "https-443-tcp"
description = "VPC CIDR HTTPS"
cidr_blocks = join(",", module.vpc.private_subnets_cidr_blocks)
},
]

egress_with_cidr_blocks = [
{
rule = "https-443-tcp"
description = "All egress HTTPS"
cidr_blocks = "0.0.0.0/0"
},
]

tags = local.tags
}

module "vpc_endpoints" {
source = "terraform-aws-modules/vpc/aws//modules/vpc-endpoints"
version = "~> 5.0"
version = "~> 5.1"

create = var.enable_vpc_endpoints

vpc_id = module.vpc.vpc_id
security_group_ids = [module.vpc_endpoints_sg.security_group_id]
vpc_id = module.vpc.vpc_id

# Security group
create_security_group = true
security_group_name_prefix = "${local.name}-vpc-endpoints-"
security_group_description = "VPC endpoint security group"
security_group_rules = {
ingress_https = {
description = "HTTPS from VPC"
cidr_blocks = [module.vpc.vpc_cidr_block]
}
}

endpoints = merge({
s3 = {
Expand Down
15 changes: 13 additions & 2 deletions website/docs/blueprints/data-analytics/datahub-on-eks.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,19 @@ DataHub also depends on many underlying infrastructure and services to function,

## Deploying the Solution

This blueprint deploys an EKS Cluster into a new VPC.
This blueprint deploys an EKS Cluster into a new VPC by default:

- Creates a new sample VPC, 2 Private Subnets and 2 Public Subnets
- Creates Internet gateway for Public Subnets and NAT Gateway for Private Subnets

You may also deploy to an existing VPC by setting value for `create_vpc` variable to `false` and specify `vpc_id`, `private_subnet_ids`, and `vpc_cidr` values.

- Creates EKS Cluster Control plane with public endpoint (for demo reasons only) with core managed node group, on-demand node group and Spot node group for Spark workloads.
- Deploys Metrics server, Cluster Autoscaler, Prometheus server and AMP workspace, and AWS LoadBalancer Controller.

It then provisions the storage services for DataHub.

- Creates service-linked role, security group, and an OpenSearch domain with one data node in each of the private subnets / AZs that EKS cluster is deployed on.
- Creates security group, and an OpenSearch domain with one data node in each of the private subnets / AZs that EKS cluster is deployed on.
- Creates security group, kms key, and configuration for MSK. Creates the MSK cluster with one broker in each of the private subnets.
- Creates an RDS MySQL db instance with multi-AZ enabled.

Expand All @@ -55,6 +58,14 @@ Ensure that you have installed the following tools on your machine.
2. [kubectl](https://Kubernetes.io/docs/tasks/tools/)
3. [terraform](https://learn.hashicorp.com/tutorials/terraform/install-cli)

Also, you need opensearch service-linked role created in the account. To verify and create the role if needed, run:
```
aws iam create-service-linked-role --aws-service-name opensearchservice.amazonaws.com || true
```

# If the role has already been successfully created, you will see:
# An error occurred (InvalidInput) when calling the CreateServiceLinkedRole operation: Service role name AWSServiceRoleForOpenSearch has been taken in this account, please try a different suffix.

### Deploy

Clone the repository
Expand Down

0 comments on commit a38b7ba

Please sign in to comment.