Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid failover delays in twemproxy reconfig when using master targets #277

Merged
merged 1 commit into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion controllers/sentinel_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ func (r *SentinelReconciler) reconcileStatus(ctx context.Context, instance *saas
// and rely on reconciles triggered by sentinel events to correct the situation.
masterError := &sharded.DiscoveryError_Master_SingleServerFailure{}
slaveError := &sharded.DiscoveryError_Slave_SingleServerFailure{}
if errors.As(merr, masterError) || errors.As(merr, slaveError) {
failoverInProgressError := &sharded.DiscoveryError_Slave_FailoverInProgress{}
if errors.As(merr, masterError) || errors.As(merr, slaveError) || errors.As(merr, failoverInProgressError) {
log.Error(merr, "DiscoveryError")
}

Expand Down
6 changes: 3 additions & 3 deletions examples/backend/redis-v6/redis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ metadata:
name: shard01
spec:
image:
tag: 6.2.7-alpine
tag: 6.2.13-alpine
---
apiVersion: saas.3scale.net/v1alpha1
kind: RedisShard
metadata:
name: shard02
spec:
image:
tag: 6.2.7-alpine
tag: 6.2.13-alpine

---
apiVersion: saas.3scale.net/v1alpha1
Expand All @@ -22,4 +22,4 @@ metadata:
spec:
slaveCount: 0
image:
tag: 6.2.7-alpine
tag: 6.2.13-alpine
2 changes: 1 addition & 1 deletion examples/backend/redis-v6/sentinel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
spec:
replicas: 3
image:
tag: 6.2.7-debian-11-r17
tag: 6.2.13-debian-11-r76
config:
monitoredShards:
# DNS should not be used in production. DNS is
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/3scale/saas-operator
go 1.20

require (
github.com/3scale-ops/basereconciler v0.3.4
github.com/3scale-ops/basereconciler v0.3.5
github.com/3scale-ops/marin3r v0.12.2
github.com/MakeNowJust/heredoc v1.0.0
github.com/aws/aws-sdk-go-v2 v1.21.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ contrib.go.opencensus.io/exporter/ocagent v0.7.1-0.20200907061046-05415f1de66d/g
contrib.go.opencensus.io/exporter/prometheus v0.4.0 h1:0QfIkj9z/iVZgK31D9H9ohjjIDApI2GOPScCKwxedbs=
contrib.go.opencensus.io/exporter/prometheus v0.4.0/go.mod h1:o7cosnyfuPVK0tB8q0QmaQNhGnptITnPQB+z1+qeFB0=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/3scale-ops/basereconciler v0.3.4 h1:KRA535+TNR0HCdGpxp/MwI9H2hPkTsIf0F+bo9ZqOFI=
github.com/3scale-ops/basereconciler v0.3.4/go.mod h1:tPJ50gfzbDC52reWkDDaEHO1leuRx9JzqggwIRA+cL0=
github.com/3scale-ops/basereconciler v0.3.5 h1:If2KDs6aOQ467Ja88bGFe2zkZy87AjXvXEWJDGwHbZs=
github.com/3scale-ops/basereconciler v0.3.5/go.mod h1:xKM3TsE4qr4fiYCpKeodlcoJUruOpYsJ8M8Iz8FlEpg=
github.com/3scale-ops/marin3r v0.12.2 h1:sU4N7RZ5AQzfejrfP0KgpagmL/XD8a5NdSIv1qvaAnU=
github.com/3scale-ops/marin3r v0.12.2/go.mod h1:BOU7EFv58PgPMgKgJFDd4ingfBhH6KC2AGJoD7i9SaU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
Expand Down
56 changes: 56 additions & 0 deletions pkg/generators/twemproxyconfig/generator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,27 @@ func TestNewGenerator(t *testing.T) {
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard0)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "1000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelMaster (shard1)
InjectResponse: func() interface{} {
return &redis_client.SentinelMasterCmdResult{Name: "shard1", IP: "127.0.0.1", Port: 5000, Flags: "master"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard1)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "5000"}
},
InjectError: func() error { return nil },
},
),
),
log: logr.Discard(),
Expand Down Expand Up @@ -258,6 +272,13 @@ func TestNewGenerator(t *testing.T) {
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard0)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "1000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelSlaves (shard0)
InjectResponse: func() interface{} {
Expand Down Expand Up @@ -285,6 +306,13 @@ func TestNewGenerator(t *testing.T) {
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard1)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "5000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelSlaves (shard1)
InjectResponse: func() interface{} {
Expand Down Expand Up @@ -428,6 +456,13 @@ func TestNewGenerator(t *testing.T) {
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard0)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "1000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelSlaves (shard0)
InjectResponse: func() interface{} {
Expand Down Expand Up @@ -455,6 +490,13 @@ func TestNewGenerator(t *testing.T) {
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard1)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "5000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelSlaves (shard1)
InjectResponse: func() interface{} {
Expand Down Expand Up @@ -593,13 +635,27 @@ func TestNewGenerator(t *testing.T) {
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard0)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "1000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelMaster (shard1)
InjectResponse: func() interface{} {
return &redis_client.SentinelMasterCmdResult{Name: "shard1", IP: "127.0.0.1", Port: 5000, Flags: "master"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelGetMasterAddrByName (shard1)
InjectResponse: func() interface{} {
return []string{"127.0.0.1", "5000"}
},
InjectError: func() error { return nil },
},
redis_client.FakeResponse{
// cmd: SentinelSlaves (shard1)
InjectResponse: func() interface{} {
Expand Down
5 changes: 5 additions & 0 deletions pkg/redis/client/fake_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ func (fc *FakeClient) SentinelMaster(ctx context.Context, shard string) (*Sentin
return rsp.InjectResponse().(*SentinelMasterCmdResult), rsp.InjectError()
}

func (fc *FakeClient) SentinelGetMasterAddrByName(ctx context.Context, shard string) ([]string, error) {
rsp := fc.pop()
return rsp.InjectResponse().([]string), rsp.InjectError()
}

func (fc *FakeClient) SentinelMasters(ctx context.Context) ([]interface{}, error) {
rsp := fc.pop()
return rsp.InjectResponse().([]interface{}), rsp.InjectError()
Expand Down
6 changes: 6 additions & 0 deletions pkg/redis/client/goredis_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ func (c *GoRedisClient) SentinelMaster(ctx context.Context, shard string) (*Sent
return result, err
}

func (c *GoRedisClient) SentinelGetMasterAddrByName(ctx context.Context, shard string) ([]string, error) {

values, err := c.sentinel.GetMasterAddrByName(ctx, shard).Result()
return values, err
}

func (c *GoRedisClient) SentinelMasters(ctx context.Context) ([]interface{}, error) {

values, err := c.sentinel.Masters(ctx).Result()
Expand Down
1 change: 1 addition & 0 deletions pkg/redis/client/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
type TestableInterface interface {
SentinelMaster(context.Context, string) (*SentinelMasterCmdResult, error)
SentinelMasters(context.Context) ([]interface{}, error)
SentinelGetMasterAddrByName(ctx context.Context, shard string) ([]string, error)
SentinelSlaves(context.Context, string) ([]interface{}, error)
SentinelMonitor(context.Context, string, string, string, int) error
SentinelSet(context.Context, string, string, string) error
Expand Down
14 changes: 14 additions & 0 deletions pkg/redis/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bufio"
"context"
"net"
"strconv"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -109,6 +110,19 @@ func (srv *Server) SentinelMaster(ctx context.Context, shard string) (*client.Se
return result, nil
}

func (srv *Server) SentinelGetMasterAddrByName(ctx context.Context, shard string) (string, int, error) {

values, err := srv.client.SentinelGetMasterAddrByName(ctx, shard)
if err != nil {
return "", 0, err
}
port, err := strconv.Atoi(values[1])
if err != nil {
return "", 0, err
}
return values[0], port, nil
}

func (srv *Server) SentinelMasters(ctx context.Context) ([]client.SentinelMasterCmdResult, error) {

values, err := srv.client.SentinelMasters(ctx)
Expand Down
1 change: 1 addition & 0 deletions pkg/redis/sharded/discover.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,5 +105,6 @@ func (srv *RedisServer) Discover(ctx context.Context, opts ...DiscoveryOption) e
// Discovery errors
type DiscoveryError_Sentinel_Failure struct{ error }
type DiscoveryError_Master_SingleServerFailure struct{ error }
type DiscoveryError_Slave_FailoverInProgress struct{ error }
type DiscoveryError_Slave_SingleServerFailure struct{ error }
type DiscoveryError_UnknownRole_SingleServerFailure struct{ error }
23 changes: 23 additions & 0 deletions pkg/redis/sharded/redis_shard.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package sharded
import (
"context"
"fmt"
"net"
"sort"
"strconv"
"strings"

"github.com/3scale/saas-operator/pkg/redis/client"
Expand Down Expand Up @@ -79,6 +81,22 @@ func (shard *Shard) Discover(ctx context.Context, sentinel *SentinelServer, opti
return append(merr, DiscoveryError_Sentinel_Failure{err})
}

// If a failover is currently in progress, only the GetMasterAddrByName command returns the
// updated address of the master. The other commands ('sentinel masters' and 'sentinel master <shard>)
// wait until all the replicas are also reconfigured to start announcing the new IP, which can cause delays
// to reconfigure the clients.
failoverInProgress := false
masterIP, masterPort, err := sentinel.SentinelGetMasterAddrByName(ctx, shard.Name)
if err != nil {
return append(merr, DiscoveryError_Sentinel_Failure{err})
}
if masterIP != sentinelMasterResult.IP || masterPort != sentinelMasterResult.Port {
failoverInProgress = true
logger.Info("failover in progress", "oldMaster", net.JoinHostPort(sentinelMasterResult.IP, strconv.Itoa(sentinelMasterResult.Port)), "newMaster", net.JoinHostPort(masterIP, strconv.Itoa(masterPort)))
sentinelMasterResult.IP = masterIP
sentinelMasterResult.Port = masterPort
}

// Get the corresponding server or add a new one if not found
srv, err := shard.GetServerByID(fmt.Sprintf("%s:%d", sentinelMasterResult.IP, sentinelMasterResult.Port))
if err != nil {
Expand Down Expand Up @@ -108,6 +126,11 @@ func (shard *Shard) Discover(ctx context.Context, sentinel *SentinelServer, opti
return merr.ErrorOrNil()
}

// don't discover slaves if a failover is in progress
if failoverInProgress {
return append(merr, DiscoveryError_Slave_FailoverInProgress{fmt.Errorf("refusing to discover slaves while a failover is in progress")})
}

// discover slaves
sentinelSlavesResult, err := sentinel.SentinelSlaves(ctx, shard.Name)
if err != nil {
Expand Down
Loading