Skip to content

Commit

Permalink
Support keepers that cannot become master/sync
Browse files Browse the repository at this point in the history
Keeper new flags:

--can-be-master, prevent keeper from being elected as master
--can-be-synchronous-replica, prevent keeper from being chosen as
synchronous replica

Updates sentinel to support keepers with new flags:

- findBestNewMasters: ignoring keepers that cannot become master

- updateKeeperStatus: update `KeeperStatus` to have `NeverMaster` and
`NeverSynchronousReplica` properties

- updateCluster: ignore standbys that cannot be synchronous standbys
  • Loading branch information
rnaveiras committed Apr 28, 2020
1 parent 5374939 commit 05b1b0f
Show file tree
Hide file tree
Showing 6 changed files with 452 additions and 4 deletions.
15 changes: 15 additions & 0 deletions cmd/keeper/cmd/keeper.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ type config struct {
pgSUUsername string
pgSUPassword string
pgSUPasswordFile string

canBeMaster bool
canBeSynchronousReplica bool
}

var cfg config
Expand All @@ -137,6 +140,9 @@ func init() {
CmdKeeper.PersistentFlags().StringVar(&cfg.pgSUPasswordFile, "pg-su-passwordfile", "", "postgres superuser password file. Only one of --pg-su-password or --pg-su-passwordfile must be provided. Must be the same for all keepers)")
CmdKeeper.PersistentFlags().BoolVar(&cfg.debug, "debug", false, "enable debug logging")

CmdKeeper.PersistentFlags().BoolVar(&cfg.canBeMaster, "can-be-master", true, "prevent keeper from being elected as master")
CmdKeeper.PersistentFlags().BoolVar(&cfg.canBeSynchronousReplica, "can-be-synchronous-replica", true, "prevent keeper from being chosen as synchronous replica")

if err := CmdKeeper.PersistentFlags().MarkDeprecated("id", "please use --uid"); err != nil {
log.Fatal(err)
}
Expand Down Expand Up @@ -461,6 +467,9 @@ type PostgresKeeper struct {
lastPGState *cluster.PostgresState

waitSyncStandbysSynced bool

canBeMaster *bool
canBeSynchronousReplica *bool
}

func NewPostgresKeeper(cfg *config, end chan error) (*PostgresKeeper, error) {
Expand Down Expand Up @@ -500,6 +509,9 @@ func NewPostgresKeeper(cfg *config, end chan error) (*PostgresKeeper, error) {
keeperLocalState: &KeeperLocalState{},
dbLocalState: &DBLocalState{},

canBeMaster: &cfg.canBeMaster,
canBeSynchronousReplica: &cfg.canBeSynchronousReplica,

e: e,
end: end,
}
Expand Down Expand Up @@ -567,6 +579,9 @@ func (p *PostgresKeeper) updateKeeperInfo() error {
Min: min,
},
PostgresState: p.getLastPGState(),

CanBeMaster: p.canBeMaster,
CanBeSynchronousReplica: p.canBeSynchronousReplica,
}

// The time to live is just to automatically remove old entries, it's
Expand Down
44 changes: 40 additions & 4 deletions cmd/sentinel/cmd/sentinel.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,16 @@ func (s *Sentinel) updateKeepersStatus(cd *cluster.ClusterData, keepersInfo clus
}
}

// Keepers support several command line arguments that should be populated in the
// KeeperStatus by the sentinel. This allows us to make decisions about how to arrange
// the cluster that take into consideration the configuration of each keeper.
for keeperUID, k := range cd.Keepers {
if ki, ok := keepersInfo[keeperUID]; ok {
k.Status.CanBeMaster = ki.CanBeMaster
k.Status.CanBeSynchronousReplica = ki.CanBeSynchronousReplica
}
}

// Mark keepers without a keeperInfo (cleaned up above from not updated
// ones) as in error
for keeperUID, k := range cd.Keepers {
Expand Down Expand Up @@ -720,20 +730,35 @@ func (s *Sentinel) findBestStandbys(cd *cluster.ClusterData, masterDB *cluster.D
return bestDBs
}

// findBestNewMasters identifies the DBs that are elegible to become a new master. We do
// this by selecting from valid standbys (those keepers that follow the same timeline as
// our master, and have an acceptable replication lag) and also selecting from those nodes
// that are valid to become master by their status.
func (s *Sentinel) findBestNewMasters(cd *cluster.ClusterData, masterDB *cluster.DB) []*cluster.DB {
bestNewMasters := s.findBestStandbys(cd, masterDB)
bestNewMasters := []*cluster.DB{}
for _, db := range s.findBestStandbys(cd, masterDB) {
if k, ok := cd.Keepers[db.Spec.KeeperUID]; ok && (k.Status.CanBeMaster != nil && !*k.Status.CanBeMaster) {
log.Infow("ignoring keeper since it cannot be master (--can-be-master=false)", "db", db.UID, "keeper", db.Spec.KeeperUID)
continue
}

bestNewMasters = append(bestNewMasters, db)
}

// Add the previous masters to the best standbys (if valid and in good state)
goodMasters, _, _ := s.validMastersByStatus(cd)
log.Debugf("goodMasters: %s", spew.Sdump(goodMasters))
for _, db := range goodMasters {
validMastersByStatus, _, _ := s.validMastersByStatus(cd)
log.Debugf("validMastersByStatus: %s", spew.Sdump(validMastersByStatus))
for _, db := range validMastersByStatus {
if db.UID == masterDB.UID {
log.Debugw("ignoring db since it's the current master", "db", db.UID, "keeper", db.Spec.KeeperUID)
continue
}

if db.Status.TimelineID != masterDB.Status.TimelineID {
log.Debugw("ignoring keeper since its pg timeline is different than master timeline", "db", db.UID, "dbTimeline", db.Status.TimelineID, "masterTimeline", masterDB.Status.TimelineID)
continue
}

// do this only when not using synchronous replication since in sync repl we
// have to ignore the last reported xlogpos or valid sync standby will be
// skipped
Expand All @@ -743,8 +768,10 @@ func (s *Sentinel) findBestNewMasters(cd *cluster.ClusterData, masterDB *cluster
continue
}
}

bestNewMasters = append(bestNewMasters, db)
}

// Sort by XLogPos
sort.Sort(dbSlice(bestNewMasters))
log.Debugf("bestNewMasters: %s", spew.Sdump(bestNewMasters))
Expand Down Expand Up @@ -1302,6 +1329,15 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
if _, ok := synchronousStandbys[bestStandby.UID]; ok {
continue
}

// ignore standbys that cannot be synchronous standbys
if db, ok := newcd.DBs[bestStandby.UID]; ok {
if keeper, ok := newcd.Keepers[db.Spec.KeeperUID]; ok && (keeper.Status.CanBeSynchronousReplica != nil && !*keeper.Status.CanBeSynchronousReplica) {
log.Infow("cannot choose standby as synchronous (--can-be-synchronous-replica=false)", "db", db.UID, "keeper", keeper.UID)
continue
}
}

log.Infow("adding new synchronous standby in good state trying to reach MaxSynchronousStandbys", "masterDB", masterDB.UID, "synchronousStandbyDB", bestStandby.UID, "keeper", bestStandby.Spec.KeeperUID)
synchronousStandbys[bestStandby.UID] = struct{}{}
addedCount++
Expand Down
Loading

0 comments on commit 05b1b0f

Please sign in to comment.