Skip to content

Commit

Permalink
Add missing test file
Browse files Browse the repository at this point in the history
Signed-off-by: Ping Xie <[email protected]>
  • Loading branch information
PingXie committed May 7, 2024
1 parent 091303a commit fa0285c
Showing 1 changed file with 357 additions and 0 deletions.
357 changes: 357 additions & 0 deletions tests/unit/cluster/slot-migration.tcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,357 @@
proc get_open_slots {srv_idx} {
set slots [dict get [cluster_get_myself $srv_idx] slots]
if {[regexp {\[.*} $slots slots]} {
set slots [regsub -all {[{}]} $slots ""]
return $slots
} else {
return {}
}
}

proc get_cluster_role {srv_idx} {
set flags [dict get [cluster_get_myself $srv_idx] flags]
set role [lindex $flags 1]
return $role
}

proc wait_for_role {srv_idx role} {
set node_timeout [lindex [R 0 CONFIG GET cluster-node-timeout] 1]
# wait for a gossip cycle for states to be propagated throughout the cluster
after $node_timeout
wait_for_condition 100 100 {
[lindex [split [R $srv_idx ROLE] " "] 0] eq $role
} else {
fail "R $srv_idx didn't assume the replication $role in time"
}
wait_for_condition 100 100 {
[get_cluster_role $srv_idx] eq $role
} else {
fail "R $srv_idx didn't assume the cluster $role in time"
}
wait_for_cluster_propagation
}

proc wait_for_slot_state {srv_idx pattern} {
wait_for_condition 100 100 {
[get_open_slots $srv_idx] eq $pattern
} else {
fail "incorrect slot state on R $srv_idx: expected $pattern; got [get_open_slots $srv_idx]"
}
}

# Check if the server responds with "PONG"
proc check_server_response {server_id} {
# Send a PING command and check if the response is "PONG"
return [expr {[catch {R $server_id PING} result] == 0 && $result eq "PONG"}]
}

# restart a server and wait for it to come back online
proc restart_server_and_wait {server_id} {
set node_timeout [lindex [R 0 CONFIG GET cluster-node-timeout] 1]
set result [catch {R $server_id DEBUG RESTART [expr 3*$node_timeout]} err]

# Check if the error is the expected "I/O error reading reply"
if {$result != 0 && $err ne "I/O error reading reply"} {
fail "Unexpected error restarting server $server_id: $err"
}

wait_for_condition 100 100 {
[check_server_response $server_id] eq 1
} else {
fail "Server $server_id didn't come back online in time"
}
}

start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica-migration no cluster-node-timeout 1000} } {

set node_timeout [lindex [R 0 CONFIG GET cluster-node-timeout] 1]
set R0_id [R 0 CLUSTER MYID]
set R1_id [R 1 CLUSTER MYID]
set R2_id [R 2 CLUSTER MYID]
set R3_id [R 3 CLUSTER MYID]
set R4_id [R 4 CLUSTER MYID]
set R5_id [R 5 CLUSTER MYID]

test "Slot migration states are replicated" {
# Validate initial states
assert_not_equal [get_open_slots 0] "\[609->-$R1_id\]"
assert_not_equal [get_open_slots 1] "\[609-<-$R0_id\]"
assert_not_equal [get_open_slots 3] "\[609->-$R1_id\]"
assert_not_equal [get_open_slots 4] "\[609-<-$R0_id\]"
# Kick off the migration of slot 609 from R0 to R1
assert_equal {OK} [R 0 CLUSTER SETSLOT 609 MIGRATING $R1_id]
assert_equal {OK} [R 1 CLUSTER SETSLOT 609 IMPORTING $R0_id]
# Validate that R0 is migrating slot 609 to R1
assert_equal [get_open_slots 0] "\[609->-$R1_id\]"
# Validate that R1 is importing slot 609 from R0
assert_equal [get_open_slots 1] "\[609-<-$R0_id\]"
# Validate final states
wait_for_slot_state 0 "\[609->-$R1_id\]"
wait_for_slot_state 1 "\[609-<-$R0_id\]"
wait_for_slot_state 3 "\[609->-$R1_id\]"
wait_for_slot_state 4 "\[609-<-$R0_id\]"
}

test "Migration target is auto-updated after failover in target shard" {
# Restart R1 to trigger an auto-failover to R4
restart_server_and_wait 1
# Wait for R1 to become a replica
wait_for_role 1 slave
# Validate final states
wait_for_slot_state 0 "\[609->-$R4_id\]"
wait_for_slot_state 1 "\[609-<-$R0_id\]"
wait_for_slot_state 3 "\[609->-$R4_id\]"
wait_for_slot_state 4 "\[609-<-$R0_id\]"
# Restore R1's primaryship
assert_equal {OK} [R 1 cluster failover]
wait_for_role 1 master
# Validate initial states
wait_for_slot_state 0 "\[609->-$R1_id\]"
wait_for_slot_state 1 "\[609-<-$R0_id\]"
wait_for_slot_state 3 "\[609->-$R1_id\]"
wait_for_slot_state 4 "\[609-<-$R0_id\]"
}

test "Migration source is auto-updated after failover in source shard" {
# Restart R0 to trigger an auto-failover to R3
restart_server_and_wait 0
# Wait for R0 to become a replica
wait_for_role 0 slave
# Validate final states
wait_for_slot_state 0 "\[609->-$R1_id\]"
wait_for_slot_state 1 "\[609-<-$R3_id\]"
wait_for_slot_state 3 "\[609->-$R1_id\]"
wait_for_slot_state 4 "\[609-<-$R3_id\]"
# Restore R0's primaryship
assert_equal {OK} [R 0 cluster failover]
wait_for_role 0 master
# Validate final states
wait_for_slot_state 0 "\[609->-$R1_id\]"
wait_for_slot_state 1 "\[609-<-$R0_id\]"
wait_for_slot_state 3 "\[609->-$R1_id\]"
wait_for_slot_state 4 "\[609-<-$R0_id\]"
}

test "Replica redirects key access in migrating slots" {
# Validate initial states
assert_equal [get_open_slots 0] "\[609->-$R1_id\]"
assert_equal [get_open_slots 1] "\[609-<-$R0_id\]"
assert_equal [get_open_slots 3] "\[609->-$R1_id\]"
assert_equal [get_open_slots 4] "\[609-<-$R0_id\]"
catch {[R 3 get aga]} e
assert_equal {MOVED} [lindex [split $e] 0]
assert_equal {609} [lindex [split $e] 1]
}

test "New replica inherits migrating slot" {
# Reset R3 to turn it into an empty node
assert_equal [get_open_slots 3] "\[609->-$R1_id\]"
assert_equal {OK} [R 3 CLUSTER RESET]
assert_not_equal [get_open_slots 3] "\[609->-$R1_id\]"
# Add R3 back as a replica of R0
assert_equal {OK} [R 3 CLUSTER MEET [srv 0 "host"] [srv 0 "port"]]
wait_for_role 0 master
assert_equal {OK} [R 3 CLUSTER REPLICATE $R0_id]
wait_for_role 3 slave
# Validate that R3 now sees slot 609 open
assert_equal [get_open_slots 3] "\[609->-$R1_id\]"
}

test "New replica inherits importing slot" {
# Reset R4 to turn it into an empty node
assert_equal [get_open_slots 4] "\[609-<-$R0_id\]"
assert_equal {OK} [R 4 CLUSTER RESET]
assert_not_equal [get_open_slots 4] "\[609-<-$R0_id\]"
# Add R4 back as a replica of R1
assert_equal {OK} [R 4 CLUSTER MEET [srv -1 "host"] [srv -1 "port"]]
wait_for_role 1 master
assert_equal {OK} [R 4 CLUSTER REPLICATE $R1_id]
wait_for_role 4 slave
# Validate that R4 now sees slot 609 open
assert_equal [get_open_slots 4] "\[609-<-$R0_id\]"
}
}

proc create_empty_shard {p r} {
set node_timeout [lindex [R 0 CONFIG GET cluster-node-timeout] 1]
assert_equal {OK} [R $p CLUSTER RESET]
assert_equal {OK} [R $r CLUSTER RESET]
assert_equal {OK} [R $p CLUSTER MEET [srv 0 "host"] [srv 0 "port"]]
assert_equal {OK} [R $r CLUSTER MEET [srv 0 "host"] [srv 0 "port"]]
wait_for_role $p master
assert_equal {OK} [R $r CLUSTER REPLICATE [R $p CLUSTER MYID]]
wait_for_role $r slave
wait_for_role $p master
}

start_cluster 3 5 {tags {external:skip cluster} overrides {cluster-allow-replica-migration no cluster-node-timeout 1000} } {

set node_timeout [lindex [R 0 CONFIG GET cluster-node-timeout] 1]
set R0_id [R 0 CLUSTER MYID]
set R1_id [R 1 CLUSTER MYID]
set R2_id [R 2 CLUSTER MYID]
set R3_id [R 3 CLUSTER MYID]
set R4_id [R 4 CLUSTER MYID]
set R5_id [R 5 CLUSTER MYID]

create_empty_shard 6 7
set R6_id [R 6 CLUSTER MYID]
set R7_id [R 7 CLUSTER MYID]

test "Empty-shard migration replicates slot importing states" {
# Validate initial states
assert_not_equal [get_open_slots 0] "\[609->-$R6_id\]"
assert_not_equal [get_open_slots 6] "\[609-<-$R0_id\]"
assert_not_equal [get_open_slots 3] "\[609->-$R6_id\]"
assert_not_equal [get_open_slots 7] "\[609-<-$R0_id\]"
# Kick off the migration of slot 609 from R0 to R6
assert_equal {OK} [R 0 CLUSTER SETSLOT 609 MIGRATING $R6_id]
assert_equal {OK} [R 6 CLUSTER SETSLOT 609 IMPORTING $R0_id]
# Validate that R0 is migrating slot 609 to R6
assert_equal [get_open_slots 0] "\[609->-$R6_id\]"
# Validate that R6 is importing slot 609 from R0
assert_equal [get_open_slots 6] "\[609-<-$R0_id\]"
# Validate final states
wait_for_slot_state 0 "\[609->-$R6_id\]"
wait_for_slot_state 6 "\[609-<-$R0_id\]"
wait_for_slot_state 3 "\[609->-$R6_id\]"
wait_for_slot_state 7 "\[609-<-$R0_id\]"
}

test "Empty-shard migration target is auto-updated after faiover in target shard" {
wait_for_role 6 master
# Restart R6 to trigger an auto-failover to R7
restart_server_and_wait 6
# Wait for R6 to become a replica
wait_for_role 6 slave
# Validate final states
wait_for_slot_state 0 "\[609->-$R7_id\]"
wait_for_slot_state 6 "\[609-<-$R0_id\]"
wait_for_slot_state 3 "\[609->-$R7_id\]"
wait_for_slot_state 7 "\[609-<-$R0_id\]"
# Restore R6's primaryship
assert_equal {OK} [R 6 cluster failover]
wait_for_role 6 master
# Validate final states
wait_for_slot_state 0 "\[609->-$R6_id\]"
wait_for_slot_state 6 "\[609-<-$R0_id\]"
wait_for_slot_state 3 "\[609->-$R6_id\]"
wait_for_slot_state 7 "\[609-<-$R0_id\]"
}

test "Empty-shard migration source is auto-updated after faiover in source shard" {
wait_for_role 0 master
# Restart R0 to trigger an auto-failover to R3
restart_server_and_wait 0
# Wait for R0 to become a replica
wait_for_role 0 slave
# Validate final states
wait_for_slot_state 0 "\[609->-$R6_id\]"
wait_for_slot_state 6 "\[609-<-$R3_id\]"
wait_for_slot_state 3 "\[609->-$R6_id\]"
wait_for_slot_state 7 "\[609-<-$R3_id\]"
# Restore R0's primaryship
assert_equal {OK} [R 0 cluster failover]
wait_for_role 0 master
# Validate final states
wait_for_slot_state 0 "\[609->-$R6_id\]"
wait_for_slot_state 6 "\[609-<-$R0_id\]"
wait_for_slot_state 3 "\[609->-$R6_id\]"
wait_for_slot_state 7 "\[609-<-$R0_id\]"
}
}

proc migrate_slot {from to slot} {
set from_id [R $from CLUSTER MYID]
set to_id [R $to CLUSTER MYID]
assert_equal {OK} [R $from CLUSTER SETSLOT $slot MIGRATING $to_id]
assert_equal {OK} [R $to CLUSTER SETSLOT $slot IMPORTING $from_id]
}

start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica-migration no cluster-node-timeout 1000} } {

set node_timeout [lindex [R 0 CONFIG GET cluster-node-timeout] 1]
set R0_id [R 0 CLUSTER MYID]
set R1_id [R 1 CLUSTER MYID]
set R2_id [R 2 CLUSTER MYID]
set R3_id [R 3 CLUSTER MYID]
set R4_id [R 4 CLUSTER MYID]
set R5_id [R 5 CLUSTER MYID]

test "Multiple slot migration states are replicated" {
migrate_slot 0 1 13
migrate_slot 0 1 7
migrate_slot 0 1 17
# Validate final states
wait_for_slot_state 0 "\[7->-$R1_id\] \[13->-$R1_id\] \[17->-$R1_id\]"
wait_for_slot_state 1 "\[7-<-$R0_id\] \[13-<-$R0_id\] \[17-<-$R0_id\]"
wait_for_slot_state 3 "\[7->-$R1_id\] \[13->-$R1_id\] \[17->-$R1_id\]"
wait_for_slot_state 4 "\[7-<-$R0_id\] \[13-<-$R0_id\] \[17-<-$R0_id\]"
}

test "New replica inherits multiple migrating slots" {
# Reset R3 to turn it into an empty node
assert_equal {OK} [R 3 CLUSTER RESET]
# Add R3 back as a replica of R0
assert_equal {OK} [R 3 CLUSTER MEET [srv 0 "host"] [srv 0 "port"]]
wait_for_role 0 master
assert_equal {OK} [R 3 CLUSTER REPLICATE $R0_id]
wait_for_role 3 slave
# Validate final states
wait_for_slot_state 3 "\[7->-$R1_id\] \[13->-$R1_id\] \[17->-$R1_id\]"
}

test "Slot finalization succeeds on both primary and replicas" {
assert_equal {OK} [R 1 CLUSTER SETSLOT 7 NODE $R1_id]
wait_for_slot_state 1 "\[13-<-$R0_id\] \[17-<-$R0_id\]"
wait_for_slot_state 4 "\[13-<-$R0_id\] \[17-<-$R0_id\]"
assert_equal {OK} [R 1 CLUSTER SETSLOT 13 NODE $R1_id]
wait_for_slot_state 1 "\[17-<-$R0_id\]"
wait_for_slot_state 4 "\[17-<-$R0_id\]"
assert_equal {OK} [R 1 CLUSTER SETSLOT 17 NODE $R1_id]
wait_for_slot_state 1 ""
wait_for_slot_state 4 ""
}

}

start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica-migration no cluster-node-timeout 1000} } {

set node_timeout [lindex [R 0 CONFIG GET cluster-node-timeout] 1]
set R0_id [R 0 CLUSTER MYID]
set R1_id [R 1 CLUSTER MYID]

test "Slot is auto-claimed by target after source relinquishes ownership" {
migrate_slot 0 1 609
#Validate that R1 doesn't own slot 609
catch {[R 1 get aga]} e
assert_equal {MOVED} [lindex [split $e] 0]
#Finalize the slot on the source first
assert_equal {OK} [R 0 CLUSTER SETSLOT 609 NODE $R1_id]
after $node_timeout
#R1 should claim slot 609 since it is still importing slot 609
#from R0 but R0 no longer owns this slot
assert_equal {OK} [R 1 set aga foo]
}
}

start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica-migration no cluster-node-timeout 1000} } {
set R1_id [R 1 CLUSTER MYID]

test "CLUSTER SETSLOT with an explicit timeout" {
# Simulate a replica crash
catch {R 3 DEBUG SEGFAULT} e

# Setslot with an explicit 1ms timeoout
set start_time [clock milliseconds]
catch {R 0 CLUSTER SETSLOT 609 MIGRATING $R1_id TIMEOUT 3000} e
set end_time [clock milliseconds]
set duration [expr {$end_time - $start_time}]

# Assert that the execution time is greater than the default 2s timeout
assert {$duration > 2000}

# Setslot should fail with not enough good replicas to write after the timeout
assert_equal {NOREPLICAS Not enough good replicas to write.} $e
}
}

0 comments on commit fa0285c

Please sign in to comment.