Skip to content

Commit

Permalink
Merge pull request #517 from sgotti/sentinel_use_only_one_sync_standb…
Browse files Browse the repository at this point in the history
…y_pg9.5

sentinel: use only one sync standby on postgres <= 9.5
  • Loading branch information
sgotti committed Jun 17, 2018
2 parents 6c47055 + 946ffd1 commit e81e5d7
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 42 deletions.
91 changes: 53 additions & 38 deletions cmd/sentinel/cmd/sentinel.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,15 +365,6 @@ func (s *Sentinel) activeProxiesInfos(proxiesInfo cluster.ProxiesInfo) cluster.P
return activeProxiesInfo
}

func (s *Sentinel) getDBForKeeper(cd *cluster.ClusterData, keeperUID string) *cluster.DB {
for _, db := range cd.DBs {
if db.Spec.KeeperUID == keeperUID {
return db
}
}
return nil
}

func (s *Sentinel) findInitialKeeper(cd *cluster.ClusterData) (*cluster.Keeper, error) {
if len(cd.Keepers) < 1 {
return nil, fmt.Errorf("no keepers registered")
Expand Down Expand Up @@ -953,7 +944,7 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
dbsToRemove := []*cluster.DB{}
for _, k := range newcd.Keepers {
// get db associated to the keeper
db := s.getDBForKeeper(cd, k.UID)
db := cd.FindDB(k)
if db != nil {
// skip keepers with an assigned db
continue
Expand Down Expand Up @@ -1088,6 +1079,7 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf

if curMasterDBUID == wantedMasterDBUID {
masterDB := newcd.DBs[curMasterDBUID]
masterDBKeeper := newcd.Keepers[masterDB.Spec.KeeperUID]

if newcd.Proxy.Spec.MasterDBUID == "" {
// if the Proxy.Spec.MasterDBUID is empty we have to wait for all
Expand Down Expand Up @@ -1189,6 +1181,27 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf

// Setup synchronous standbys
if s.syncRepl(clusterSpec) {
minSynchronousStandbys := int(*clusterSpec.MinSynchronousStandbys)
maxSynchronousStandbys := int(*clusterSpec.MaxSynchronousStandbys)
merge := true
// PostgresSQL <= 9.5 only supports one sync standby at a
// time (defining multiple sync standbys is like doing "1
// (standby1, standby2)" on postgres >= 9.6 and so we won't
// be able to know which is the real in sync standby.
//
// So we always have to define 1 standby in
// masterDB.Spec.SynchronousStandbys with the downside that
// can there be a time window where we cannot elect the
// synchronous standby as a new primary if it's not yet in
// sync
if masterDBKeeper.Status.PostgresBinaryVersion.Maj != 0 {
if masterDBKeeper.Status.PostgresBinaryVersion.Maj == 9 && masterDBKeeper.Status.PostgresBinaryVersion.Min <= 5 {
minSynchronousStandbys = 1
maxSynchronousStandbys = 1
merge = false
}
}

// update synchronousStandbys only if the reported
// SynchronousStandbys are the same as the required ones. In
// this way, when we have to choose a new master we are sure
Expand Down Expand Up @@ -1234,8 +1247,8 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
}

// Remove synchronous standbys in excess
if uint16(len(synchronousStandbys)) > *clusterSpec.MaxSynchronousStandbys {
rc := len(synchronousStandbys) - int(*clusterSpec.MaxSynchronousStandbys)
if len(synchronousStandbys) > maxSynchronousStandbys {
rc := len(synchronousStandbys) - maxSynchronousStandbys
removedCount := 0
toRemove = map[string]struct{}{}
for dbUID, _ := range synchronousStandbys {
Expand All @@ -1254,7 +1267,7 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
// try to add missing standbys up to MaxSynchronousStandbys
bestStandbys := s.findBestStandbys(newcd, curMasterDB)

ac := int(*clusterSpec.MaxSynchronousStandbys) - len(synchronousStandbys)
ac := maxSynchronousStandbys - len(synchronousStandbys)
addedCount := 0
for _, bestStandby := range bestStandbys {
if addedCount >= ac {
Expand All @@ -1273,7 +1286,7 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
// also if not in a good state. In this way we have more
// possibilities to choose a sync standby to replace a
// failed master if they becoe healthy again
ac = int(*clusterSpec.MinSynchronousStandbys) - len(synchronousStandbys)
ac = minSynchronousStandbys - len(synchronousStandbys)
addedCount = 0
for _, db := range newcd.DBs {
if addedCount >= ac {
Expand All @@ -1289,29 +1302,31 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
}
}

// if some of the new synchronousStandbys are not inside
// the prevSynchronousStandbys then also add all
// the prevSynchronousStandbys. In this way when there's
// a synchronousStandbys change we'll have, in a first
// step, both the old and the new standbys, then in the
// second step the old will be removed (since the new
// standbys are all inside prevSynchronousStandbys), so
// we'll always be able to choose a sync standby that we
// know was defined in the primary and in sync if the
// primary fails.
allInPrev := true
for k, _ := range synchronousStandbys {
if _, ok := prevSynchronousStandbys[k]; !ok {
allInPrev = false
if merge {
// if some of the new synchronousStandbys are not inside
// the prevSynchronousStandbys then also add all
// the prevSynchronousStandbys. In this way when there's
// a synchronousStandbys change we'll have, in a first
// step, both the old and the new standbys, then in the
// second step the old will be removed (since the new
// standbys are all inside prevSynchronousStandbys), so
// we'll always be able to choose a sync standby that we
// know was defined in the primary and in sync if the
// primary fails.
allInPrev := true
for k, _ := range synchronousStandbys {
if _, ok := prevSynchronousStandbys[k]; !ok {
allInPrev = false
}
}
}
if !allInPrev {
log.Infow("merging current and previous synchronous standbys", "masterDB", masterDB.UID, "prevSynchronousStandbys", prevSynchronousStandbys, "synchronousStandbys", synchronousStandbys)
// use only existing dbs
for _, db := range newcd.DBs {
if _, ok := prevSynchronousStandbys[db.UID]; ok {
log.Infow("adding previous synchronous standby", "masterDB", masterDB.UID, "synchronousStandbyDB", db.UID, "keeper", db.Spec.KeeperUID)
synchronousStandbys[db.UID] = struct{}{}
if !allInPrev {
log.Infow("merging current and previous synchronous standbys", "masterDB", masterDB.UID, "prevSynchronousStandbys", prevSynchronousStandbys, "synchronousStandbys", synchronousStandbys)
// use only existing dbs
for _, db := range newcd.DBs {
if _, ok := prevSynchronousStandbys[db.UID]; ok {
log.Infow("adding previous synchronous standby", "masterDB", masterDB.UID, "synchronousStandbyDB", db.UID, "keeper", db.Spec.KeeperUID)
synchronousStandbys[db.UID] = struct{}{}
}
}
}
}
Expand All @@ -1323,8 +1338,8 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
}

// If there're not enough real synchronous standbys add a fake synchronous standby because we have to be strict and make the master block transactions until MinSynchronousStandbys real standbys are available
if len(synchronousStandbys)+len(externalSynchronousStandbys) < int(*clusterSpec.MinSynchronousStandbys) {
log.Infow("using a fake synchronous standby since there are not enough real standbys available", "masterDB", masterDB.UID, "required", int(*clusterSpec.MinSynchronousStandbys))
if len(synchronousStandbys)+len(externalSynchronousStandbys) < minSynchronousStandbys {
log.Infow("using a fake synchronous standby since there are not enough real standbys available", "masterDB", masterDB.UID, "required", minSynchronousStandbys)
addFakeStandby = true
}

Expand Down
4 changes: 2 additions & 2 deletions doc/syncrepl.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ You can enable/disable synchronous replication at any time and the keepers will

### Min and Max number of synchronous replication standbys

In the cluster spec you can set the `MinSynchronousStandbys` and `MaxSynchronousStandbys` values (they both defaults to 1). Having multiple synchronous standbys is a feature provided starting from [PostgreSQL 9.6](https://www.postgresql.org/docs/9.6/static/warm-standby.html#SYNCHRONOUS-REPLICATION). Increasing these value above 1 for postgres versions below 9.6 will lead to errors starting the instance.
In the cluster spec you can set the `MinSynchronousStandbys` and `MaxSynchronousStandbys` values (they both defaults to 1). Having multiple synchronous standbys is a feature provided starting from [PostgreSQL 9.6](https://www.postgresql.org/docs/9.6/static/warm-standby.html#SYNCHRONOUS-REPLICATION). Values different than 1 for postgres versions below 9.6 will be ignored.

## Enable synchronous replication.

Expand All @@ -24,7 +24,7 @@ stolonctl --cluster-name=mycluster --store-backend=etcd update --patch '{ "synch

## Set min and max number of synchronous replication standbys

Set MinSynchronousStandbys/MaxSynchronousStandbys to a value different than 1 only when using PostgreSQL >= 9.6
Set MinSynchronousStandbys/MaxSynchronousStandbys to a value greater than 1 (only when using PostgreSQL >= 9.6)

```
stolonctl --cluster-name=mycluster --store-backend=etcd update --patch '{ "synchronousReplication" : true, "minSynchronousStandbys": 2, "maxSynchronousStandbys": 3 }'
Expand Down
24 changes: 22 additions & 2 deletions tests/integration/ha_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1512,9 +1512,29 @@ func testKeeperRemovalStolonCtl(t *testing.T, syncRepl bool) {

master, standbys := waitMasterStandbysReady(t, sm, tks)

maj, min, err := master.PGDataVersion()
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
// on postgresql <= 9.5 we can have only 1 synchronous standby
if syncRepl {
if err := WaitClusterDataSynchronousStandbys([]string{standbys[0].uid, standbys[1].uid}, sm, 30*time.Second); err != nil {
t.Fatalf("expected synchronous standbys")
if maj == 9 && min <= 5 {
ok := false
if err := WaitClusterDataSynchronousStandbys([]string{standbys[0].uid}, sm, 30*time.Second); err == nil {
ok = true
}
if !ok {
if err := WaitClusterDataSynchronousStandbys([]string{standbys[1].uid}, sm, 30*time.Second); err == nil {
ok = true
}
}
if !ok {
t.Fatalf("expected synchronous standbys")
}
} else {
if err := WaitClusterDataSynchronousStandbys([]string{standbys[0].uid, standbys[1].uid}, sm, 30*time.Second); err != nil {
t.Fatalf("expected synchronous standbys")
}
}
}

Expand Down

0 comments on commit e81e5d7

Please sign in to comment.