Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
111975: roachtest: wait before calling WaitForRebalance in disagg-rebalance r=RaduBerinde a=itsbilal

This change addresses a relatively rare flake seen in cockroachdb#111817 where waitForRebalance returned rightaway after being called after a node was added to the cluster, as it sees that the last replica movement on the cluster was before the node's addition and was sufficiently far in the past to consider the node rebalanced. This change waits for the new node to at least get one replica before calling `waitForRebalance` to avoid that race.

Fixes cockroachdb#111817.

Epic: none

Release note: None

Co-authored-by: Bilal Akhtar <[email protected]>
  • Loading branch information
craig[bot] and itsbilal committed Oct 10, 2023
2 parents d0c9907 + 5f2e190 commit c456346
Showing 1 changed file with 18 additions and 1 deletion.
19 changes: 18 additions & 1 deletion pkg/cmd/roachtest/tests/disagg_rebalance.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/errors"
"github.com/dustin/go-humanize"
)

Expand Down Expand Up @@ -86,6 +87,22 @@ func registerDisaggRebalance(r registry.Registry) {
t.Status("verify rebalance")

db := c.Conn(ctx, t.L(), 4)

// Wait for the new node to get at least one replica before calling
// waitForRebalance.
testutils.SucceedsSoon(t, func() error {
var count int
if err := db.QueryRow(
"SELECT count(*) FROM crdb_internal.ranges WHERE array_position(replicas, $1) IS NOT NULL",
4,
).Scan(&count); err != nil {
t.Fatal(err)
}
if count <= 0 {
return errors.New("newly added node n4 has zero replicas")
}
return nil
})
defer func() {
_ = db.Close()
}()
Expand All @@ -96,7 +113,7 @@ func registerDisaggRebalance(r registry.Registry) {

var count int
if err := db.QueryRow(
// Check if the down node has any replicas.
// Check if the new node has any replicas.
"SELECT count(*) FROM crdb_internal.ranges WHERE array_position(replicas, $1) IS NOT NULL",
4,
).Scan(&count); err != nil {
Expand Down

0 comments on commit c456346

Please sign in to comment.