Skip to content

Commit

Permalink
Merge pull request #277 from 3scale-ops/avoid-failover-delays
Browse files Browse the repository at this point in the history
Avoid failover delays in twemproxy reconfig when using master targets
  • Loading branch information
3scale-robot authored Oct 17, 2023
2 parents aad968e + 79e9e89 commit e081080
Show file tree
Hide file tree
Showing 14 changed files with 341 additions and 92 deletions.
3 changes: 2 additions & 1 deletion controllers/sentinel_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ func (r *SentinelReconciler) reconcileStatus(ctx context.Context, instance *saas
// and rely on reconciles triggered by sentinel events to correct the situation.
masterError := &sharded.DiscoveryError_Master_SingleServerFailure{}
slaveError := &sharded.DiscoveryError_Slave_SingleServerFailure{}
if errors.As(merr, masterError) || errors.As(merr, slaveError) {
failoverInProgressError := &sharded.DiscoveryError_Slave_FailoverInProgress{}
if errors.As(merr, masterError) || errors.As(merr, slaveError) || errors.As(merr, failoverInProgressError) {
log.Error(merr, "DiscoveryError")
}

Expand Down
6 changes: 3 additions & 3 deletions examples/backend/redis-v6/redis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ metadata:
name: shard01
spec:
image:
tag: 6.2.7-alpine
tag: 6.2.13-alpine
---
apiVersion: saas.3scale.net/v1alpha1
kind: RedisShard
metadata:
name: shard02
spec:
image:
tag: 6.2.7-alpine
tag: 6.2.13-alpine

---
apiVersion: saas.3scale.net/v1alpha1
Expand All @@ -22,4 +22,4 @@ metadata:
spec:
slaveCount: 0
image:
tag: 6.2.7-alpine
tag: 6.2.13-alpine
2 changes: 1 addition & 1 deletion examples/backend/redis-v6/sentinel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
spec:
replicas: 3
image:
tag: 6.2.7-debian-11-r17
tag: 6.2.13-debian-11-r76
config:
monitoredShards:
# DNS should not be used in production. DNS is
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/3scale/saas-operator
go 1.20

require (
github.com/3scale-ops/basereconciler v0.3.4
github.com/3scale-ops/basereconciler v0.3.5
github.com/3scale-ops/marin3r v0.12.2
github.com/MakeNowJust/heredoc v1.0.0
github.com/aws/aws-sdk-go-v2 v1.21.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ contrib.go.opencensus.io/exporter/ocagent v0.7.1-0.20200907061046-05415f1de66d/g
contrib.go.opencensus.io/exporter/prometheus v0.4.0 h1:0QfIkj9z/iVZgK31D9H9ohjjIDApI2GOPScCKwxedbs=
contrib.go.opencensus.io/exporter/prometheus v0.4.0/go.mod h1:o7cosnyfuPVK0tB8q0QmaQNhGnptITnPQB+z1+qeFB0=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/3scale-ops/basereconciler v0.3.4 h1:KRA535+TNR0HCdGpxp/MwI9H2hPkTsIf0F+bo9ZqOFI=
github.com/3scale-ops/basereconciler v0.3.4/go.mod h1:tPJ50gfzbDC52reWkDDaEHO1leuRx9JzqggwIRA+cL0=
github.com/3scale-ops/basereconciler v0.3.5 h1:If2KDs6aOQ467Ja88bGFe2zkZy87AjXvXEWJDGwHbZs=
github.com/3scale-ops/basereconciler v0.3.5/go.mod h1:xKM3TsE4qr4fiYCpKeodlcoJUruOpYsJ8M8Iz8FlEpg=
github.com/3scale-ops/marin3r v0.12.2 h1:sU4N7RZ5AQzfejrfP0KgpagmL/XD8a5NdSIv1qvaAnU=
github.com/3scale-ops/marin3r v0.12.2/go.mod h1:BOU7EFv58PgPMgKgJFDd4ingfBhH6KC2AGJoD7i9SaU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
Expand Down
56 changes: 56 additions & 0 deletions pkg/generators/twemproxyconfig/generator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,27 @@ func TestNewGenerator(t *testing.T) {
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard0)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "1000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelMaster (shard1)
InjectResponse: func() interface{} {
return &redis_client.SentinelMasterCmdResult{Name: "shard1", IP: "127.0.0.1", Port: 5000, Flags: "master"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard1)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "5000"}
},
InjectError: func() error { return nil },
},
),
),
log: logr.Discard(),
Expand Down Expand Up @@ -258,6 +272,13 @@ func TestNewGenerator(t *testing.T) {
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard0)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "1000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelSlaves (shard0)
InjectResponse: func() interface{} {
Expand Down Expand Up @@ -285,6 +306,13 @@ func TestNewGenerator(t *testing.T) {
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard1)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "5000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelSlaves (shard1)
InjectResponse: func() interface{} {
Expand Down Expand Up @@ -428,6 +456,13 @@ func TestNewGenerator(t *testing.T) {
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard0)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "1000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelSlaves (shard0)
InjectResponse: func() interface{} {
Expand Down Expand Up @@ -455,6 +490,13 @@ func TestNewGenerator(t *testing.T) {
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard1)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "5000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelSlaves (shard1)
InjectResponse: func() interface{} {
Expand Down Expand Up @@ -593,13 +635,27 @@ func TestNewGenerator(t *testing.T) {
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard0)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "1000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelMaster (shard1)
InjectResponse: func() interface{} {
return &redis_client.SentinelMasterCmdResult{Name: "shard1", IP: "127.0.0.1", Port: 5000, Flags: "master"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard1)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "5000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelSlaves (shard1)
InjectResponse: func() interface{} {
Expand Down
5 changes: 5 additions & 0 deletions pkg/redis/client/fake_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ func (fc *FakeClient) SentinelMaster(ctx context.Context, shard string) (*Sentin
return rsp.InjectResponse().(*SentinelMasterCmdResult), rsp.InjectError()
}

func (fc *FakeClient) SentinelGetMasterAddrByName(ctx context.Context, shard string) ([]string, error) {
rsp := fc.pop()
return rsp.InjectResponse().([]string), rsp.InjectError()
}

func (fc *FakeClient) SentinelMasters(ctx context.Context) ([]interface{}, error) {
rsp := fc.pop()
return rsp.InjectResponse().([]interface{}), rsp.InjectError()
Expand Down
6 changes: 6 additions & 0 deletions pkg/redis/client/goredis_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ func (c *GoRedisClient) SentinelMaster(ctx context.Context, shard string) (*Sent
return result, err
}

func (c *GoRedisClient) SentinelGetMasterAddrByName(ctx context.Context, shard string) ([]string, error) {

values, err := c.sentinel.GetMasterAddrByName(ctx, shard).Result()
return values, err
}

func (c *GoRedisClient) SentinelMasters(ctx context.Context) ([]interface{}, error) {

values, err := c.sentinel.Masters(ctx).Result()
Expand Down
1 change: 1 addition & 0 deletions pkg/redis/client/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
type TestableInterface interface {
SentinelMaster(context.Context, string) (*SentinelMasterCmdResult, error)
SentinelMasters(context.Context) ([]interface{}, error)
SentinelGetMasterAddrByName(ctx context.Context, shard string) ([]string, error)
SentinelSlaves(context.Context, string) ([]interface{}, error)
SentinelMonitor(context.Context, string, string, string, int) error
SentinelSet(context.Context, string, string, string) error
Expand Down
14 changes: 14 additions & 0 deletions pkg/redis/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bufio"
"context"
"net"
"strconv"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -109,6 +110,19 @@ func (srv *Server) SentinelMaster(ctx context.Context, shard string) (*client.Se
return result, nil
}

func (srv *Server) SentinelGetMasterAddrByName(ctx context.Context, shard string) (string, int, error) {

values, err := srv.client.SentinelGetMasterAddrByName(ctx, shard)
if err != nil {
return "", 0, err
}
port, err := strconv.Atoi(values[1])
if err != nil {
return "", 0, err
}
return values[0], port, nil
}

func (srv *Server) SentinelMasters(ctx context.Context) ([]client.SentinelMasterCmdResult, error) {

values, err := srv.client.SentinelMasters(ctx)
Expand Down
1 change: 1 addition & 0 deletions pkg/redis/sharded/discover.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,5 +105,6 @@ func (srv *RedisServer) Discover(ctx context.Context, opts ...DiscoveryOption) e
// Discovery errors
type DiscoveryError_Sentinel_Failure struct{ error }
type DiscoveryError_Master_SingleServerFailure struct{ error }
type DiscoveryError_Slave_FailoverInProgress struct{ error }
type DiscoveryError_Slave_SingleServerFailure struct{ error }
type DiscoveryError_UnknownRole_SingleServerFailure struct{ error }
23 changes: 23 additions & 0 deletions pkg/redis/sharded/redis_shard.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package sharded
import (
"context"
"fmt"
"net"
"sort"
"strconv"
"strings"

"github.com/3scale/saas-operator/pkg/redis/client"
Expand Down Expand Up @@ -79,6 +81,22 @@ func (shard *Shard) Discover(ctx context.Context, sentinel *SentinelServer, opti
return append(merr, DiscoveryError_Sentinel_Failure{err})
}

// If a failover is currently in progress, only the GetMasterAddrByName command returns the
// updated address of the master. The other commands ('sentinel masters' and 'sentinel master <shard>)
// wait until all the replicas are also reconfigured to start announcing the new IP, which can cause delays
// to reconfigure the clients.
failoverInProgress := false
masterIP, masterPort, err := sentinel.SentinelGetMasterAddrByName(ctx, shard.Name)
if err != nil {
return append(merr, DiscoveryError_Sentinel_Failure{err})
}
if masterIP != sentinelMasterResult.IP || masterPort != sentinelMasterResult.Port {
failoverInProgress = true
logger.Info("failover in progress", "oldMaster", net.JoinHostPort(sentinelMasterResult.IP, strconv.Itoa(sentinelMasterResult.Port)), "newMaster", net.JoinHostPort(masterIP, strconv.Itoa(masterPort)))
sentinelMasterResult.IP = masterIP
sentinelMasterResult.Port = masterPort
}

// Get the corresponding server or add a new one if not found
srv, err := shard.GetServerByID(fmt.Sprintf("%s:%d", sentinelMasterResult.IP, sentinelMasterResult.Port))
if err != nil {
Expand Down Expand Up @@ -108,6 +126,11 @@ func (shard *Shard) Discover(ctx context.Context, sentinel *SentinelServer, opti
return merr.ErrorOrNil()
}

// don't discover slaves if a failover is in progress
if failoverInProgress {
return append(merr, DiscoveryError_Slave_FailoverInProgress{fmt.Errorf("refusing to discover slaves while a failover is in progress")})
}

// discover slaves
sentinelSlavesResult, err := sentinel.SentinelSlaves(ctx, shard.Name)
if err != nil {
Expand Down
Loading

0 comments on commit e081080

Please sign in to comment.