From 5f2e190d2f6fe1d8c843af8aba2b8f4980821da5 Mon Sep 17 00:00:00 2001 From: Bilal Akhtar Date: Fri, 6 Oct 2023 22:30:03 -0400 Subject: [PATCH] roachtest: wait before calling WaitForRebalance in disagg-rebalance This change addresses a relatively rare flake seen in #111817 where waitForRebalance returned rightaway after being called after a node was added to the cluster, as it sees that the last replica movement on the cluster was before the node's addition and was sufficiently far in the past to consider the node rebalanced. This change waits for the new node to get at least one replica before calling waitForRebalance, avoiding this race. Fixes #111817. Epic: none Release note: None --- pkg/cmd/roachtest/tests/disagg_rebalance.go | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pkg/cmd/roachtest/tests/disagg_rebalance.go b/pkg/cmd/roachtest/tests/disagg_rebalance.go index 3058c3a5f845..83c7fb51b3f5 100644 --- a/pkg/cmd/roachtest/tests/disagg_rebalance.go +++ b/pkg/cmd/roachtest/tests/disagg_rebalance.go @@ -22,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/errors" "github.com/dustin/go-humanize" ) @@ -86,6 +87,22 @@ func registerDisaggRebalance(r registry.Registry) { t.Status("verify rebalance") db := c.Conn(ctx, t.L(), 4) + + // Wait for the new node to get at least one replica before calling + // waitForRebalance. + testutils.SucceedsSoon(t, func() error { + var count int + if err := db.QueryRow( + "SELECT count(*) FROM crdb_internal.ranges WHERE array_position(replicas, $1) IS NOT NULL", + 4, + ).Scan(&count); err != nil { + t.Fatal(err) + } + if count <= 0 { + return errors.New("newly added node n4 has zero replicas") + } + return nil + }) defer func() { _ = db.Close() }() @@ -96,7 +113,7 @@ func registerDisaggRebalance(r registry.Registry) { var count int if err := db.QueryRow( - // Check if the down node has any replicas. + // Check if the new node has any replicas. "SELECT count(*) FROM crdb_internal.ranges WHERE array_position(replicas, $1) IS NOT NULL", 4, ).Scan(&count); err != nil {