-
Notifications
You must be signed in to change notification settings - Fork 811
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Replication/failover simulation skeleton (#6627)
- Loading branch information
1 parent
8b5e2a5
commit 6f0a746
Showing
8 changed files
with
525 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# This file is used as dynamicconfig override for "default" replication simulation scenario configured via host/testdata/replication_simulation_default.yaml | ||
|
||
history.replicatorTaskBatchSize: | ||
- value: 25 | ||
constraints: {} |
205 changes: 205 additions & 0 deletions
205
docker/buildkite/docker-compose-local-replication-simulation.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
version: "3.5" | ||
|
||
services: | ||
cassandra: | ||
image: cassandra:4.1.1 | ||
environment: | ||
- "MAX_HEAP_SIZE=256M" | ||
- "HEAP_NEWSIZE=128M" | ||
expose: | ||
- "9042" | ||
networks: | ||
services-network: | ||
aliases: | ||
- cassandra | ||
healthcheck: | ||
test: ["CMD", "cqlsh", "-u cassandra", "-p cassandra" ,"-e describe keyspaces"] | ||
interval: 15s | ||
timeout: 30s | ||
retries: 10 | ||
|
||
prometheus: | ||
image: prom/prometheus:v3.0.1 | ||
volumes: | ||
- ./prometheus:/etc/prometheus | ||
command: | ||
- '--config.file=/etc/prometheus/replication_simulation_prometheus.yml' | ||
ports: | ||
- '9090:9090' | ||
networks: | ||
services-network: | ||
aliases: | ||
- prometheus | ||
|
||
grafana: | ||
image: grafana/grafana:11.4.0 | ||
volumes: | ||
- ./grafana:/etc/grafana | ||
user: "1000" | ||
depends_on: | ||
- prometheus | ||
ports: | ||
- '3000:3000' | ||
networks: | ||
services-network: | ||
aliases: | ||
- grafana | ||
|
||
cadence-cluster0: | ||
build: | ||
context: ../../ | ||
dockerfile: ./Dockerfile | ||
args: | ||
TARGET: auto-setup | ||
command: | ||
- /start.sh | ||
ports: | ||
- "7933:7933" # frontend thrift | ||
- "7833:7833" # frontend grpc | ||
- "7934:7934" # history thrift | ||
- "7834:7834" # history grpc | ||
- "7935:7935" # matching thrift | ||
- "7835:7835" # matching grpc | ||
- "7939:7939" # worker thrift | ||
- "7000:7000" # frontend prometheus | ||
- "7001:7001" # matching prometheus | ||
- "7002:7002" # history prometheus | ||
- "7003:7003" # worker prometheus | ||
environment: | ||
- "BIND_ON_IP=0.0.0.0" | ||
- "PRIMARY_FRONTEND_SERVICE=cadence-cluster0" | ||
- "SECONDARY_FRONTEND_SERVICE=cadence-cluster1" | ||
- "CASSANDRA_SEEDS=cassandra" | ||
- "DYNAMIC_CONFIG_FILE_PATH=config/dynamicconfig/replication_simulation_${SCENARIO}.yml" | ||
- "ENABLE_GLOBAL_DOMAIN=true" | ||
- "KEYSPACE=cadence_primary" | ||
- "VISIBILITY_KEYSPACE=cadence_visibility_primary" | ||
- "PROMETHEUS_ENDPOINT_0=0.0.0.0:7000" # frontend scrape endpoint | ||
- "PROMETHEUS_ENDPOINT_1=0.0.0.0:7001" # matching scrape endpoint | ||
- "PROMETHEUS_ENDPOINT_2=0.0.0.0:7002" # history scrape endpoint | ||
- "PROMETHEUS_ENDPOINT_3=0.0.0.0:7003" # worker scrape endpoint | ||
depends_on: | ||
cassandra: | ||
condition: service_healthy | ||
prometheus: | ||
condition: service_started | ||
networks: | ||
services-network: | ||
aliases: | ||
- cadence-cluster0 | ||
|
||
cadence-cluster1: | ||
build: | ||
context: ../../ | ||
dockerfile: ./Dockerfile | ||
args: | ||
TARGET: auto-setup | ||
command: | ||
- /start.sh | ||
ports: # cluster1 uses 8xxx host ports to avoid conflicts with cluster0 | ||
- "8933:7933" # frontend thrift | ||
- "8833:7833" # frontend grpc | ||
- "8934:7934" # history thrift | ||
- "8834:7834" # history grpc | ||
- "8935:7935" # matching thrift | ||
- "8835:7835" # matching grpc | ||
- "8939:7939" # worker thrift | ||
- "8000:8000" # frontend prometheus | ||
- "8001:8001" # matching prometheus | ||
- "8002:8002" # history prometheus | ||
- "8003:8003" # worker prometheus | ||
environment: | ||
- "BIND_ON_IP=0.0.0.0" | ||
- "PRIMARY_FRONTEND_SERVICE=cadence-cluster0" | ||
- "SECONDARY_FRONTEND_SERVICE=cadence-cluster1" | ||
- "CASSANDRA_SEEDS=cassandra" | ||
- "DYNAMIC_CONFIG_FILE_PATH=config/dynamicconfig/replication_simulation_${SCENARIO}.yml" | ||
- "IS_NOT_PRIMARY=true" | ||
- "ENABLE_GLOBAL_DOMAIN=true" | ||
- "KEYSPACE=cadence_secondary" | ||
- "VISIBILITY_KEYSPACE=cadence_visibility_secondary" | ||
- "PROMETHEUS_ENDPOINT_0=0.0.0.0:8000" # frontend scrape endpoint | ||
- "PROMETHEUS_ENDPOINT_1=0.0.0.0:8001" # matching scrape endpoint | ||
- "PROMETHEUS_ENDPOINT_2=0.0.0.0:8002" # history scrape endpoint | ||
- "PROMETHEUS_ENDPOINT_3=0.0.0.0:8003" # worker scrape endpoint | ||
depends_on: | ||
cassandra: | ||
condition: service_healthy | ||
prometheus: | ||
condition: service_started | ||
networks: | ||
services-network: | ||
aliases: | ||
- cadence-cluster1 | ||
|
||
cadence-web-cluster0: | ||
image: ubercadence/web:latest | ||
environment: | ||
- "CADENCE_TCHANNEL_PEERS=cadence-cluster0:7933" | ||
ports: | ||
- "8088:8088" | ||
depends_on: | ||
- cadence-cluster0 | ||
networks: | ||
services-network: | ||
aliases: | ||
- cadence-web-cluster0 | ||
|
||
cadence-web-cluster1: | ||
image: ubercadence/web:latest | ||
environment: | ||
- "CADENCE_TCHANNEL_PEERS=cadence-cluster1:7933" | ||
ports: | ||
- "8089:8088" | ||
depends_on: | ||
- cadence-cluster1 | ||
networks: | ||
services-network: | ||
aliases: | ||
- cadence-web-cluster0 | ||
|
||
replication-simulator: | ||
build: | ||
context: ../../ | ||
dockerfile: ./docker/buildkite/Dockerfile | ||
command: | ||
- /bin/sh | ||
- -e | ||
- -c | ||
- > | ||
go test -timeout 180s | ||
-run ^TestReplicationSimulation.*$ | ||
-count 1 | ||
-v | ||
-tags replicationsim | ||
github.com/uber/cadence/host | ||
| tee test.log | ||
depends_on: | ||
cadence-cluster0: | ||
condition: service_started | ||
cadence-cluster1: | ||
condition: service_started | ||
cadence-web-cluster0: | ||
condition: service_started | ||
cadence-web-cluster1: | ||
condition: service_started | ||
grafana: | ||
condition: service_started | ||
ports: # expose prometheus ports so they can be scraped | ||
- '8306:8306' | ||
- '8307:8307' | ||
- '8308:8308' | ||
- '8309:8309' | ||
volumes: | ||
- ../../:/cadence | ||
- /cadence/.build/ # ensure we don't mount the build directory | ||
- /cadence/.bin/ # ensure we don't mount the bin directory | ||
networks: | ||
services-network: | ||
aliases: | ||
- replication-simulator | ||
|
||
networks: | ||
services-network: | ||
name: services-network | ||
driver: bridge |
23 changes: 23 additions & 0 deletions
23
docker/buildkite/prometheus/replication_simulation_prometheus.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
global: | ||
scrape_interval: 5s | ||
external_labels: | ||
monitor: 'cadence-monitor' | ||
query_log_file: /etc/prometheus/query.log | ||
scrape_failure_log_file: /etc/prometheus/scrape.log | ||
scrape_configs: | ||
- job_name: 'prometheus' | ||
static_configs: | ||
- targets: # addresses to scrape from cluster0 | ||
- 'cadence-cluster0:7000' # frontend | ||
- 'cadence-cluster0:7001' # matching | ||
- 'cadence-cluster0:7002' # history | ||
- 'cadence-cluster0:7003' # worker | ||
labels: | ||
cluster: 'cluster0' | ||
- targets: # addresses to scrape from cluster1 | ||
- 'cadence-cluster1:8000' # frontend | ||
- 'cadence-cluster1:8001' # matching | ||
- 'cadence-cluster1:8002' # history | ||
- 'cadence-cluster1:8003' # worker | ||
labels: | ||
cluster: 'cluster1' |
Oops, something went wrong.