dydxprotocol · ttl33 · Oct 16, 2024 · Aug 7, 2024 · Aug 8, 2024 · Aug 8, 2024
@@ -1,5 +1,6 @@
 module "backup_full_node_ap_northeast_1" {
   source = "../modules/validator"
+  count  = var.create_backup_full_node ? 1 : 0
 
   environment = var.environment
 
@@ -37,7 +38,16 @@ module "backup_full_node_ap_northeast_1" {
 
   use_persistent_docker_volume = var.full_node_use_persistent_docker_volume
 
+  root_block_device_size                  = var.full_node_root_block_device_size
+  root_block_device_delete_on_termination = true
+  ecs_task_cpu_architecture               = var.fullnode_ecs_task_cpu_architecture
+
   providers = {
     aws = aws.ap_northeast_1
   }
 }
+
+moved {
+  from = module.backup_full_node_ap_northeast_1
+  to   = module.backup_full_node_ap_northeast_1[0]
+}
@@ -50,7 +50,7 @@ resource "aws_ecs_service" "main" {
       aws_subnet.private_subnets[subnet_name].id
     ] : [for subnet in aws_subnet.private_subnets : subnet.id]
     security_groups  = [aws_security_group.services[each.key].id]
-    assign_public_ip = true
+    assign_public_ip = false
   }
 
   dynamic "load_balancer" {
@@ -162,6 +162,7 @@ resource "aws_ecs_task_definition" "main" {
 
   runtime_platform {
     operating_system_family = "LINUX"
+    cpu_architecture        = var.indexer_ecs_task_cpu_architecture
   }
 
   tags = {

@@ -36,6 +36,10 @@ module "full_node_ap_northeast_1" {
 
   use_persistent_docker_volume = var.full_node_use_persistent_docker_volume
 
+  root_block_device_size                  = var.full_node_root_block_device_size
+  root_block_device_delete_on_termination = true
+  ecs_task_cpu_architecture               = var.fullnode_ecs_task_cpu_architecture
+
   providers = {
     aws = aws.ap_northeast_1
   }

@@ -12,8 +12,8 @@ resource "aws_lambda_function" "main" {
   package_type  = "Image"
   function_name = "${each.key}_lambda_function"
   role          = aws_iam_role.lambda_services[each.key].arn
-  architectures = ["x86_64"]
-  timeout       = 120
+  architectures = [lower(var.lambda_cpu_architecture)]
+  timeout       = 300
 
   environment {
     variables = merge(

@@ -38,6 +38,10 @@ locals {
       should_deploy_in_rds_subnet : true,
       ecs_environment_variables : flatten(
         [
+          {
+            name : "AWS_REGION",
+            value : var.region,
+          },
           {
             name : "PG_POOL_MAX",
             value : "30"
@@ -51,7 +55,7 @@ locals {
       ),
     },
     "${local.service_names["comlink"]}" : {
-      ecs_desired_count : 5,
+      ecs_desired_count : var.comlink_ecs_desired_count,
       task_definition_memory : 4096,
       task_definition_cpu : 2048,
       is_public_facing : true,
@@ -97,8 +101,8 @@ locals {
       ),
     },
     "${local.service_names["socks"]}" : {
-      ecs_desired_count : 5,
-      task_definition_memory : 20480,
+      ecs_desired_count : var.socks_ecs_desired_count,
+      task_definition_memory : 8192,
       task_definition_cpu : 4096,
       is_public_facing : true,
       ports : [8080, 8000],
@@ -109,6 +113,10 @@ locals {
       should_deploy_in_rds_subnet : false,
       ecs_environment_variables : flatten(
         [
+          {
+            name : "AWS_REGION",
+            value : var.region,
+          },
           {
             name : "COMLINK_URL",
             value : aws_lb.public.dns_name,
@@ -195,7 +203,7 @@ locals {
       ),
     },
     "${local.service_names["vulcan"]}" : {
-      ecs_desired_count : 5,
+      ecs_desired_count : var.vulcan_ecs_desired_count,
       task_definition_memory : 8192,
       task_definition_cpu : 4096,
       is_public_facing : false,
@@ -207,6 +215,10 @@ locals {
       should_deploy_in_rds_subnet : false,
       ecs_environment_variables : flatten(
         [
+          {
+            name : "AWS_REGION",
+            value : var.region,
+          },
           {
             name : "PG_POOL_MAX",
             value : "2"

@@ -19,6 +19,8 @@ resource "aws_msk_configuration" "main" {
   message.max.bytes=4194304
   unclean.leader.election.enable=true
   zookeeper.session.timeout.ms=6000
+  replica.selector.class = org.apache.kafka.common.replica.RackAwareReplicaSelector
+  log.retention.hours = 120
   PROPERTIES
 
   lifecycle {
@@ -36,7 +38,7 @@ resource "aws_msk_cluster" "main" {
     instance_type = var.msk_instance_type
     storage_info {
       ebs_storage_info {
-        volume_size = var.environment == "mainnet" ? 2000 : 1000 # in GB
+        volume_size = var.msk_storage_size
       }
     }
     client_subnets = [
@@ -56,4 +58,4 @@ resource "aws_msk_cluster" "main" {
     arn      = aws_msk_configuration.main.arn
     revision = aws_msk_configuration.main.latest_revision
   }
-}
+}
@@ -215,7 +215,7 @@ resource "aws_db_instance" "main" {
   performance_insights_enabled          = true
   performance_insights_retention_period = 31
   auto_minor_version_upgrade            = false
-  multi_az                              = true
+  multi_az                              = var.enable_rds_main_multiaz
 
   tags = {
     Name        = local.aws_db_instance_main_name
@@ -231,6 +231,7 @@ resource "aws_db_instance" "read_replica" {
   # be specified for a replica, and will match the properties on the source db.
   vpc_security_group_ids = [aws_security_group.rds.id]
   parameter_group_name   = aws_db_parameter_group.main.name
+  allocated_storage      = var.rds_read_replica_db_allocated_storage_gb
   publicly_accessible    = false
   # Set to true if any planned changes need to be applied before the next maintenance window.
   apply_immediately                     = false
@@ -250,12 +251,14 @@ resource "aws_db_instance" "read_replica" {
 
 # Read replica 2
 resource "aws_db_instance" "read_replica_2" {
+  count          = var.create_read_replica_2 ? 1 : 0
   identifier     = "${local.aws_db_instance_main_name}-read-replica-2"
   instance_class = var.rds_db_instance_class
   # engine, engine_version, name, username, db_subnet_group_name, allocated_storage do not have to
   # be specified for a replica, and will match the properties on the source db.
   vpc_security_group_ids = [aws_security_group.rds.id]
   parameter_group_name   = aws_db_parameter_group.main.name
+  allocated_storage      = var.rds_read_replica_db_allocated_storage_gb
   publicly_accessible    = false
   # Set to true if any planned changes need to be applied before the next maintenance window.
   apply_immediately                     = false

@@ -19,11 +19,12 @@ resource "aws_route53_record" "read_replica_1" {
 }
 
 resource "aws_route53_record" "read_replica_2" {
+  count   = var.create_read_replica_2 ? 1 : 0
   zone_id = aws_route53_zone.main.zone_id
   name    = "postgres-main-rr.dydx-indexer.private"
   type    = "CNAME"
   ttl     = "30"
-  records = ["${aws_db_instance.read_replica_2.address}"]
+  records = ["${aws_db_instance.read_replica_2[count.index].address}"]
   weighted_routing_policy {
     weight = 1
   }

@@ -68,9 +68,10 @@ resource "aws_route" "full_node_route_to_indexer" {
 # NOTE: This is not an individual AWS resource, but rather an attachment to the route table, and so
 # no tags are added.
 resource "aws_route" "backup_full_node_route_to_indexer" {
-  route_table_id            = module.backup_full_node_ap_northeast_1.route_table_id
+  count                     = var.create_backup_full_node ? 1 : 0
+  route_table_id            = module.backup_full_node_ap_northeast_1[0].route_table_id
   destination_cidr_block    = var.indexers[var.region].vpc_cidr_block
-  vpc_peering_connection_id = aws_vpc_peering_connection.backup_full_node_peer.id
+  vpc_peering_connection_id = aws_vpc_peering_connection.backup_full_node_peer[0].id
 }
 
 # Route from the Indexer's private subnets to the full node's VPC. Needed so that the full node can
@@ -88,9 +89,9 @@ resource "aws_route" "indexer_route_to_full_node" {
 }
 
 resource "aws_route" "indexer_route_to_backup_full_node" {
-  for_each = aws_route_table.private
+  for_each = var.create_backup_full_node ? aws_route_table.private : {}
 
   route_table_id            = each.value.id
   destination_cidr_block    = var.backup_full_node_cidr_vpc
-  vpc_peering_connection_id = aws_vpc_peering_connection.backup_full_node_peer.id
+  vpc_peering_connection_id = aws_vpc_peering_connection.backup_full_node_peer[0].id
 }
@@ -9,6 +9,20 @@ resource "aws_s3_bucket" "load_balancer" {
   }
 }
 
+resource "aws_s3_bucket_lifecycle_configuration" "load_balancer" {
+  count  = var.enable_s3_load_balancer_logs_lifecycle ? 1 : 0
+  bucket = aws_s3_bucket.load_balancer.id
+
+  rule {
+    id     = "expire-old-logs"
+    status = "Enabled"
+
+    expiration {
+      days = var.s3_load_balancer_logs_expiration_days
+    }
+  }
+}
+
 # TODO: refactor snapshotting full node into a separate module
 # AWS S3 bucket to store all Indexer full node snapshots
 resource "aws_s3_bucket" "indexer_full_node_snapshots" {
@@ -22,6 +36,21 @@ resource "aws_s3_bucket" "indexer_full_node_snapshots" {
   }
 }
 
+resource "aws_s3_bucket_lifecycle_configuration" "indexer_full_node_snapshots" {
+  count  = var.enable_s3_snapshot_lifecycle ? 1 : 0
+  bucket = aws_s3_bucket.indexer_full_node_snapshots.id
+
+  rule {
+    id     = "expire-old-snapshots"
+    status = "Enabled"
+
+    expiration {
+      days = var.s3_snapshot_expiration_days
+    }
+  }
+}
+
+
 # Enable S3 bucket metrics to be sent to Datadog for monitoring
 resource "aws_s3_bucket_metric" "indexer_full_node_snapshots" {
   bucket = aws_s3_bucket.indexer_full_node_snapshots.id
@@ -64,3 +93,17 @@ resource "aws_s3_bucket" "athena_rds_snapshots" {
     Environment = var.environment
   }
 }
+
+resource "aws_s3_bucket_lifecycle_configuration" "athena_rds_snapshots" {
+  count  = var.enable_s3_rds_snapshot_lifecycle ? 1 : 0
+  bucket = aws_s3_bucket.athena_rds_snapshots.id
+
+  rule {
+    id     = "expire-old-snapshots"
+    status = "Enabled"
+
+    expiration {
+      days = var.s3_rds_snapshot_expiration_days
+    }
+  }
+}
@@ -62,7 +62,7 @@ resource "aws_security_group" "msk" {
     security_groups = flatten([
       aws_security_group.devbox.id,
       module.full_node_ap_northeast_1.aws_security_group_id,
-      module.backup_full_node_ap_northeast_1.aws_security_group_id,
+      var.create_backup_full_node ? [module.backup_full_node_ap_northeast_1[0].aws_security_group_id] : [],
       # Lambda Services
       [
         for service in keys(local.lambda_services) :

@@ -42,7 +42,9 @@ module "full_node_snapshot_ap_northeast_1" {
 
   datadog_env = "snapshot-${var.environment}"
 
-  root_block_device_size = var.full_node_snapshot_ebs_volume_size
+  root_block_device_size                  = var.full_node_snapshot_ebs_volume_size
+  root_block_device_delete_on_termination = true
+  ecs_task_cpu_architecture               = var.fullnode_ecs_task_cpu_architecture
 
   entry_point = [
     "sh",