Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
131879: drtprod: add drt-scale yaml for 150 node testing r=nameisbhaskar a=vidit-bhat

This PR adds a YAML to support creating a 150 node single region cluster for scale testing and its corresponding 9 node workload cluster. It also enables WAL Failover for `drt-large` and `drt-chaos` and increases stores in `drt-chaos` to 4.

Epic: none
Release note: None

Co-authored-by: Vidit Bhat <[email protected]>
  • Loading branch information
craig[bot] and vidit-bhat committed Oct 5, 2024
2 parents f842c3b + 3b8758d commit b5ca162
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pkg/cmd/drtprod/configs/drt_chaos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ targets:
- "./cockroach"
flags:
enable-fluent-sink: true
store-count: 4
args: --wal-failover=among-stores
restart: false
sql-port: 26257
on_rollback:
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/drtprod/configs/drt_large.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ targets:
flags:
enable-fluent-sink: true
store-count: 4
args: --wal-failover=among-stores
restart: false
sql-port: 26257
on_rollback:
Expand Down
108 changes: 108 additions & 0 deletions pkg/cmd/drtprod/configs/drt_scale.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Yaml for creating and configuring the drt-scale cluster. This also configures the datadog.
environment:
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected]
ROACHPROD_DNS: drt.crdb.io
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io
ROACHPROD_GCE_DNS_ZONE: drt
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt
CLUSTER: drt-scale
WORKLOAD_CLUSTER: workload-scale

targets:
# crdb cluster specs
- target_name: $CLUSTER
steps:
- command: create
args:
- $CLUSTER
flags:
clouds: gce
gce-managed: true
gce-enable-multiple-stores: true
gce-zones: "us-central1-a"
nodes: 150
gce-machine-type: n2-standard-16
local-ssd: true
gce-local-ssd-count: 4
os-volume-size: 100
username: drt
lifetime: 8760h
gce-image: "ubuntu-2204-jammy-v20240319"
on_rollback:
- command: destroy
args:
- $CLUSTER
- command: sync
flags:
clouds: gce
- command: stage
args:
- $CLUSTER
- cockroach
- script: "pkg/cmd/drtprod/configs/setup_datadog_cluster"
- command: start
args:
- $CLUSTER
- "--binary"
- "./cockroach"
flags:
# add flag to set provisioned throughput on each store according to their cloud provider limits
enable-fluent-sink: true
store-count: 4
args: --wal-failover=among-stores
restart: false
sql-port: 26257
on_rollback:
- command: stop
args:
- $CLUSTER
- command: run
args:
- $CLUSTER
- --
- "sudo systemctl unmask cron.service ; sudo systemctl enable cron.service ; echo \"crontab -l ; echo '@reboot sleep 100 && ~/cockroach.sh' | crontab -\" > t.sh ; sh t.sh ; rm t.sh"
# workload cluster specs
- command: create
args:
- $WORKLOAD_CLUSTER
flags:
clouds: gce
gce-zones: "us-central1-a"
nodes: 9
gce-machine-type: n2-standard-8
os-volume-size: 100
username: workload
lifetime: 8760h
on_rollback:
- command: destroy
args:
- $WORKLOAD_CLUSTER
- command: sync
flags:
clouds: gce
- command: stage
args:
- $WORKLOAD_CLUSTER
- cockroach
- command: stage
args:
- $WORKLOAD_CLUSTER
- workload
- script: "pkg/cmd/drtprod/configs/setup_datadog_workload"
- command: get
args:
- $CLUSTER:1
- certs
- certs-$CLUSTER
- command: put
args:
- $WORKLOAD_CLUSTER
- certs-$CLUSTER
- certs
- command: ssh
args:
- $WORKLOAD_CLUSTER
- --
- chmod
- 600
- ./certs/*
21 changes: 21 additions & 0 deletions pkg/cmd/drtprod/configs/drt_scale_destroy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Yaml for destroying the drt-scale cluster.
environment:
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected]
ROACHPROD_DNS: drt.crdb.io
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io
ROACHPROD_GCE_DNS_ZONE: drt
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt
CLUSTER: drt-scale
WORKLOAD_CLUSTER: workload-scale

targets:
- target_name: $CLUSTER
steps:
- command: destroy
args:
- $CLUSTER
- target_name: $WORKLOAD_CLUSTER
steps:
- command: destroy
args:
- $WORKLOAD_CLUSTER

0 comments on commit b5ca162

Please sign in to comment.