From 3b8758ddc23bf3e506fcf814c3532ef96a5fd774 Mon Sep 17 00:00:00 2001 From: Vidit Bhat Date: Fri, 4 Oct 2024 10:29:16 +0530 Subject: [PATCH] drtprod: add drt-scale yaml for 150 node testing This PR adds a YAML to support creating a 150 node single region cluster for scale testing and its corresponding 9 node workload cluster. It also enables WAL Failover for `drt-large` and `drt-chaos` and increases stores in `drt-chaos` to 4. Epic: none Release note: None --- pkg/cmd/drtprod/configs/drt_chaos.yaml | 2 + pkg/cmd/drtprod/configs/drt_large.yaml | 1 + pkg/cmd/drtprod/configs/drt_scale.yaml | 108 ++++++++++++++++++ .../drtprod/configs/drt_scale_destroy.yaml | 21 ++++ 4 files changed, 132 insertions(+) create mode 100644 pkg/cmd/drtprod/configs/drt_scale.yaml create mode 100644 pkg/cmd/drtprod/configs/drt_scale_destroy.yaml diff --git a/pkg/cmd/drtprod/configs/drt_chaos.yaml b/pkg/cmd/drtprod/configs/drt_chaos.yaml index 04d412a6c30a..7258d8f5bbb9 100644 --- a/pkg/cmd/drtprod/configs/drt_chaos.yaml +++ b/pkg/cmd/drtprod/configs/drt_chaos.yaml @@ -45,6 +45,8 @@ targets: - "./cockroach" flags: enable-fluent-sink: true + store-count: 4 + args: --wal-failover=among-stores restart: false sql-port: 26257 on_rollback: diff --git a/pkg/cmd/drtprod/configs/drt_large.yaml b/pkg/cmd/drtprod/configs/drt_large.yaml index b6a4ea9c749c..4bf22da62b26 100644 --- a/pkg/cmd/drtprod/configs/drt_large.yaml +++ b/pkg/cmd/drtprod/configs/drt_large.yaml @@ -46,6 +46,7 @@ targets: flags: enable-fluent-sink: true store-count: 4 + args: --wal-failover=among-stores restart: false sql-port: 26257 on_rollback: diff --git a/pkg/cmd/drtprod/configs/drt_scale.yaml b/pkg/cmd/drtprod/configs/drt_scale.yaml new file mode 100644 index 000000000000..d6a2b3d3725a --- /dev/null +++ b/pkg/cmd/drtprod/configs/drt_scale.yaml @@ -0,0 +1,108 @@ +# Yaml for creating and configuring the drt-scale cluster. This also configures the datadog. +environment: + ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: 622274581499-compute@developer.gserviceaccount.com + ROACHPROD_DNS: drt.crdb.io + ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io + ROACHPROD_GCE_DNS_ZONE: drt + ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt + CLUSTER: drt-scale + WORKLOAD_CLUSTER: workload-scale + +targets: + # crdb cluster specs + - target_name: $CLUSTER + steps: + - command: create + args: + - $CLUSTER + flags: + clouds: gce + gce-managed: true + gce-enable-multiple-stores: true + gce-zones: "us-central1-a" + nodes: 150 + gce-machine-type: n2-standard-16 + local-ssd: true + gce-local-ssd-count: 4 + os-volume-size: 100 + username: drt + lifetime: 8760h + gce-image: "ubuntu-2204-jammy-v20240319" + on_rollback: + - command: destroy + args: + - $CLUSTER + - command: sync + flags: + clouds: gce + - command: stage + args: + - $CLUSTER + - cockroach + - script: "pkg/cmd/drtprod/configs/setup_datadog_cluster" + - command: start + args: + - $CLUSTER + - "--binary" + - "./cockroach" + flags: + # add flag to set provisioned throughput on each store according to their cloud provider limits + enable-fluent-sink: true + store-count: 4 + args: --wal-failover=among-stores + restart: false + sql-port: 26257 + on_rollback: + - command: stop + args: + - $CLUSTER + - command: run + args: + - $CLUSTER + - -- + - "sudo systemctl unmask cron.service ; sudo systemctl enable cron.service ; echo \"crontab -l ; echo '@reboot sleep 100 && ~/cockroach.sh' | crontab -\" > t.sh ; sh t.sh ; rm t.sh" + # workload cluster specs + - command: create + args: + - $WORKLOAD_CLUSTER + flags: + clouds: gce + gce-zones: "us-central1-a" + nodes: 9 + gce-machine-type: n2-standard-8 + os-volume-size: 100 + username: workload + lifetime: 8760h + on_rollback: + - command: destroy + args: + - $WORKLOAD_CLUSTER + - command: sync + flags: + clouds: gce + - command: stage + args: + - $WORKLOAD_CLUSTER + - cockroach + - command: stage + args: + - $WORKLOAD_CLUSTER + - workload + - script: "pkg/cmd/drtprod/configs/setup_datadog_workload" + - command: get + args: + - $CLUSTER:1 + - certs + - certs-$CLUSTER + - command: put + args: + - $WORKLOAD_CLUSTER + - certs-$CLUSTER + - certs + - command: ssh + args: + - $WORKLOAD_CLUSTER + - -- + - chmod + - 600 + - ./certs/* diff --git a/pkg/cmd/drtprod/configs/drt_scale_destroy.yaml b/pkg/cmd/drtprod/configs/drt_scale_destroy.yaml new file mode 100644 index 000000000000..8103ed89ec06 --- /dev/null +++ b/pkg/cmd/drtprod/configs/drt_scale_destroy.yaml @@ -0,0 +1,21 @@ +# Yaml for destroying the drt-scale cluster. +environment: + ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: 622274581499-compute@developer.gserviceaccount.com + ROACHPROD_DNS: drt.crdb.io + ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io + ROACHPROD_GCE_DNS_ZONE: drt + ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt + CLUSTER: drt-scale + WORKLOAD_CLUSTER: workload-scale + +targets: + - target_name: $CLUSTER + steps: + - command: destroy + args: + - $CLUSTER + - target_name: $WORKLOAD_CLUSTER + steps: + - command: destroy + args: + - $WORKLOAD_CLUSTER