Skip to content

Commit

Permalink
integration: ensure leader is up in waitLeader() and clusterMustProgr…
Browse files Browse the repository at this point in the history
…ess()

The issue is caused by leader loss even after waitLeader() returns
which can happen if the test machine is flaky which triggers a leader loss
or the killed node is the leader since waitLeader() only scans followers in
TestRestartMember() and they can have the same older leader.
In those cases, clusterMustProgress() proceeds with no leader which triggers
the no leader error.

To get around that, use linearizable get in waitLeader() to ensure leader is up
and retries on kapi.create() in clusterMustProgress() to ensure it proceeds with
a leader.

FIX etcd-io#7258
  • Loading branch information
fanminshi committed Feb 28, 2017
1 parent 86c9bf5 commit 324d238
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 4 deletions.
18 changes: 17 additions & 1 deletion integration/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,12 @@ func (c *cluster) URL(i int) string {

// URLs returns a list of all active client URLs in the cluster
func (c *cluster) URLs() []string {
return getMembersURLs(c.Members)
}

func getMembersURLs(members []*member) []string {
urls := make([]string, 0)
for _, m := range c.Members {
for _, m := range members {
select {
case <-m.s.StopNotify():
continue
Expand Down Expand Up @@ -343,6 +347,18 @@ func (c *cluster) waitLeader(t *testing.T, membs []*member) int {
for _, m := range membs {
possibleLead[uint64(m.s.ID())] = true
}
cc := MustNewHTTPClient(t, getMembersURLs(membs), nil)
kapi := client.NewKeysAPI(cc)

// ensure leader is up via linearizable get
for {
ctx, cancel := context.WithTimeout(context.Background(), 10*tickDuration)
_, err := kapi.Get(ctx, "0", &client.GetOptions{Quorum: true})
cancel()
if err == nil || strings.Contains(err.Error(), "Key not found") {
break
}
}

for lead == 0 || !possibleLead[lead] {
lead = 0
Expand Down
17 changes: 14 additions & 3 deletions integration/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,13 +447,24 @@ func TestRejectUnhealthyRemove(t *testing.T) {
func clusterMustProgress(t *testing.T, membs []*member) {
cc := MustNewHTTPClient(t, []string{membs[0].URL()}, nil)
kapi := client.NewKeysAPI(cc)
ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
key := fmt.Sprintf("foo%d", rand.Int())
resp, err := kapi.Create(ctx, "/"+key, "bar")
var (
err error
resp *client.Response
)
// retry in case of leader loss induced by slow CI
for i := 0; i < 3; i++ {
ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
resp, err = kapi.Create(ctx, "/"+key, "bar")
cancel()
if err == nil {
break
}
t.Logf("failed to create key on %q (%v)", membs[0].URL(), err)
}
if err != nil {
t.Fatalf("create on %s error: %v", membs[0].URL(), err)
}
cancel()

for i, m := range membs {
u := m.URL()
Expand Down

0 comments on commit 324d238

Please sign in to comment.