From 279433bb0e1fc2ab38e137e888cb92736c44bbf9 Mon Sep 17 00:00:00 2001 From: Simone Gotti Date: Mon, 21 Aug 2017 13:02:17 +0200 Subject: [PATCH] *: add ability to define custom pg_hba.conf entries Add a new cluster spec option called `pgHBA` where users can define a custom list of pg_hba.conf entries. These entries will be added to the pg_hba.conf after all the stolon managed entries so we'll guarantee local connections from the keeper and replication connection between pg instances. These entries aren't validated by stolon so if any of them is wrong the postgres instance will fail to start of return a warning on reload. If no custom pg_hba.conf entries are provided then we'll use the current behavior of accepting all hosts for all dbs and users with md5 authentincation: ``` host all all 0.0.0.0/0 md5 host all all ::0/0 md5 ``` --- cmd/keeper/keeper.go | 32 +++++++++++++++----- cmd/sentinel/sentinel.go | 1 + cmd/stolonctl/update.go | 1 - doc/README.md | 1 + doc/cluster_spec.md | 45 ++++++++++++++-------------- doc/custom_pg_hba_entries.md | 16 ++++++++++ pkg/cluster/cluster.go | 18 +++++++++-- pkg/postgresql/postgresql.go | 58 +++++++++++++++++++++++++++++++----- 8 files changed, 130 insertions(+), 42 deletions(-) create mode 100644 doc/custom_pg_hba_entries.md diff --git a/cmd/keeper/keeper.go b/cmd/keeper/keeper.go index 1fdd89833..d67de577b 100644 --- a/cmd/keeper/keeper.go +++ b/cmd/keeper/keeper.go @@ -21,6 +21,7 @@ import ( "os" "os/signal" "path/filepath" + "reflect" "strconv" "strings" "sync" @@ -658,8 +659,7 @@ func (p *PostgresKeeper) Start() { // TODO(sgotti) reconfigure the various configurations options // (RequestTimeout) after a changed cluster config - pgParameters := make(common.Parameters) - pgm := postgresql.NewManager(p.pgBinPath, p.dataDir, pgParameters, p.getLocalConnParams(), p.getLocalReplConnParams(), p.pgSUUsername, p.pgSUPassword, p.pgReplUsername, p.pgReplPassword, p.requestTimeout) + pgm := postgresql.NewManager(p.pgBinPath, p.dataDir, p.getLocalConnParams(), p.getLocalReplConnParams(), p.pgSUUsername, p.pgSUPassword, p.pgReplUsername, p.pgReplPassword, p.requestTimeout) p.pgm = pgm p.pgm.Stop(true) @@ -885,7 +885,8 @@ func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) { followersUIDs := db.Spec.Followers - prevPGParameters := pgm.GetParameters() + pgm.SetHba(db.Spec.PGHBA) + var pgParameters common.Parameters dbls := p.dbLocalState @@ -1465,7 +1466,7 @@ func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) { pgParameters = p.createPGParameters(db) // Log synchronous replication changes - prevSyncStandbyNames := prevPGParameters["synchronous_standby_names"] + prevSyncStandbyNames := pgm.CurParameters()["synchronous_standby_names"] syncStandbyNames := pgParameters["synchronous_standby_names"] if db.Spec.SynchronousReplication { if prevSyncStandbyNames != syncStandbyNames { @@ -1477,17 +1478,32 @@ func (p *PostgresKeeper) postgresKeeperSM(pctx context.Context) { } } - if !pgParameters.Equals(prevPGParameters) { + needsReload := false + + if !pgParameters.Equals(pgm.CurParameters()) { log.Infow("postgres parameters changed, reloading postgres instance") pgm.SetParameters(pgParameters) - if err := pgm.Reload(); err != nil { - log.Errorw("failed to reload postgres instance", err) - } + needsReload = true } else { // for tests log.Infow("postgres parameters not changed") } + if !reflect.DeepEqual(db.Spec.PGHBA, pgm.CurHba()) { + log.Infow("postgres hba entries changed, reloading postgres instance") + pgm.SetHba(db.Spec.PGHBA) + needsReload = true + } else { + // for tests + log.Infow("postgres hba entries not changed") + } + + if needsReload { + if err := pgm.Reload(); err != nil { + log.Errorw("failed to reload postgres instance", err) + } + } + // If we are here, then all went well and we can update the db generation and save it locally p.localStateMutex.Lock() dbls.Generation = db.Generation diff --git a/cmd/sentinel/sentinel.go b/cmd/sentinel/sentinel.go index 8dd67ea22..677133a47 100644 --- a/cmd/sentinel/sentinel.go +++ b/cmd/sentinel/sentinel.go @@ -368,6 +368,7 @@ func (s *Sentinel) setDBSpecFromClusterSpec(cd *cluster.ClusterData) { db.Spec.SynchronousReplication = s.syncRepl(clusterSpec) db.Spec.UsePgrewind = *clusterSpec.UsePgrewind db.Spec.PGParameters = clusterSpec.PGParameters + db.Spec.PGHBA = clusterSpec.PGHBA if db.Spec.FollowConfig != nil && db.Spec.FollowConfig.Type == cluster.FollowTypeExternal { db.Spec.FollowConfig.StandbySettings = clusterSpec.StandbySettings } diff --git a/cmd/stolonctl/update.go b/cmd/stolonctl/update.go index fe1423849..066be40d4 100644 --- a/cmd/stolonctl/update.go +++ b/cmd/stolonctl/update.go @@ -117,7 +117,6 @@ func update(cmd *cobra.Command, args []string) { if err != nil { die("failed to patch cluster spec: %v", err) } - } else { if err := json.Unmarshal(data, &newcs); err != nil { die("failed to unmarshal cluster spec: %v", err) diff --git a/doc/README.md b/doc/README.md index 9aa1b55c0..0687dc7ea 100644 --- a/doc/README.md +++ b/doc/README.md @@ -7,6 +7,7 @@ We suggest that you first read the [Stolon Architecture and Requirements](archit * [Cluster Specification](cluster_spec.md) * [Cluster Initialization](initialization.md) * [Setting instance parameters](postgres_parameters.md) +* [Custom pg_hba.conf entries](custom_pg_hba_entries.md) * [Stolon Client](stolonctl.md) * Backup/Restore * [Point In Time Recovery](pitr.md) diff --git a/doc/cluster_spec.md b/doc/cluster_spec.md index 2be60439f..f9727a334 100644 --- a/doc/cluster_spec.md +++ b/doc/cluster_spec.md @@ -12,28 +12,29 @@ Some options in a running cluster specification can be changed to update the des ### Cluster Specification Format. -| Name | Description | Required | Type | Default | -|---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------|-------------------|---------| -| sleepInterval | interval to wait before next check (for every component: keeper, sentinel, proxy). | no | string (duration) | 5s | -| requestTimeout | time after which any request (keepers checks from sentinel etc...) will fail. | no | string (duration) | 10s | -| failInterval | interval after the first fail to declare a keeper as not healthy. | no | string (duration) | 20s | -| deadKeeperRemovalInterval | interval after which a dead keeper will be removed from the cluster data | no | string (duration) | 48h | -| maxStandbys | max number of standbys. This needs to be greater enough to cover both standby managed by stolon and additional standbys configured by the user. Its value affect different postgres parameters like max_replication_slots and max_wal_senders. Setting this to a number lower than the sum of stolon managed standbys and user managed standbys will have unpredicatable effects due to problems creating replication slots or replication problems due to exhausted wal senders. | no | uint16 | 20 | -| maxStandbysPerSender | max number of standbys for every sender. A sender can be a master or another standby (with cascading replication). | no | uint16 | 3 | -| maxStandbyLag | maximum lag (from the last reported master state, in bytes) that an asynchronous standby can have to be elected in place of a failed master. | no | uint32 | 1MiB | -| synchronousReplication | use synchronous replication between the master and its standbys | no | bool | false | -| minSynchronousStandbys | minimum number of required synchronous standbys when synchronous replication is enabled (only set this to a value > 1 when using PostgreSQL >= 9.6) | no | uint16 | 1 | -| maxSynchronousStandbys | maximum number of required synchronous standbys when synchronous replication is enabled (only set this to a value > 1 when using PostgreSQL >= 9.6) | no | uint16 | 1 | -| additionalWalSenders | number of additional wal_senders in addition to the ones internally defined by stolon, useful to provide enough wal senders for external standbys (changing this value requires an instance restart) | no | uint16 | 5 | -| usePgrewind | try to use pg_rewind for faster instance resyncronization. | no | bool | false | -| initMode | The cluster initialization mode. Can be *new* or *existing*. *new* means that a new db cluster will be created on a random keeper and the other keepers will sync with it. *existing* means that a keeper (that needs to have an already created db cluster) will be choosed as the initial master and the other keepers will sync with it. In this case the `existingConfig` object needs to be populated. | yes | string | | -| existingConfig | configuration for initMode of type "existing" | if initMode is "existing" | ExistingConfig | | -| mergePgParameters | merge pgParameters of the initialized db cluster, useful the retain initdb generated parameters when InitMode is new, retain current parameters when initMode is existing or pitr. | no | bool | true | -| role | cluster role (master or standby) | no | bool | master | -| newConfig | configuration for initMode of type "new" | if initMode is "new" | NewConfig | | -| pitrConfig | configuration for initMode of type "pitr" | if initMode is "pitr" | PITRConfig | | -| standbySettings | standby settings when the cluster is a standby cluster | if role is "standby" | StandbySettings | | -| pgParameters | a map containing the postgres server parameters and their values. The parameters value don't have to be quoted and single quotes don't have to be doubled since this is already done by the keeper when writing the postgresql.conf file | no | map[string]string | | +| Name | Description | Required | Type | Default | +|---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------|-------------------|-------------------------------------------------------------------------------------------------------------------------------------| +| sleepInterval | interval to wait before next check (for every component: keeper, sentinel, proxy). | no | string (duration) | 5s | +| requestTimeout | time after which any request (keepers checks from sentinel etc...) will fail. | no | string (duration) | 10s | +| failInterval | interval after the first fail to declare a keeper as not healthy. | no | string (duration) | 20s | +| deadKeeperRemovalInterval | interval after which a dead keeper will be removed from the cluster data | no | string (duration) | 48h | +| maxStandbys | max number of standbys. This needs to be greater enough to cover both standby managed by stolon and additional standbys configured by the user. Its value affect different postgres parameters like max_replication_slots and max_wal_senders. Setting this to a number lower than the sum of stolon managed standbys and user managed standbys will have unpredicatable effects due to problems creating replication slots or replication problems due to exhausted wal senders. | no | uint16 | 20 | +| maxStandbysPerSender | max number of standbys for every sender. A sender can be a master or another standby (with cascading replication). | no | uint16 | 3 | +| maxStandbyLag | maximum lag (from the last reported master state, in bytes) that an asynchronous standby can have to be elected in place of a failed master. | no | uint32 | 1MiB | +| synchronousReplication | use synchronous replication between the master and its standbys | no | bool | false | +| minSynchronousStandbys | minimum number of required synchronous standbys when synchronous replication is enabled (only set this to a value > 1 when using PostgreSQL >= 9.6) | no | uint16 | 1 | +| maxSynchronousStandbys | maximum number of required synchronous standbys when synchronous replication is enabled (only set this to a value > 1 when using PostgreSQL >= 9.6) | no | uint16 | 1 | +| additionalWalSenders | number of additional wal_senders in addition to the ones internally defined by stolon, useful to provide enough wal senders for external standbys (changing this value requires an instance restart) | no | uint16 | 5 | +| usePgrewind | try to use pg_rewind for faster instance resyncronization. | no | bool | false | +| initMode | The cluster initialization mode. Can be *new* or *existing*. *new* means that a new db cluster will be created on a random keeper and the other keepers will sync with it. *existing* means that a keeper (that needs to have an already created db cluster) will be choosed as the initial master and the other keepers will sync with it. In this case the `existingConfig` object needs to be populated. | yes | string | | +| existingConfig | configuration for initMode of type "existing" | if initMode is "existing" | ExistingConfig | | +| mergePgParameters | merge pgParameters of the initialized db cluster, useful the retain initdb generated parameters when InitMode is new, retain current parameters when initMode is existing or pitr. | no | bool | true | +| role | cluster role (master or standby) | no | bool | master | +| newConfig | configuration for initMode of type "new" | if initMode is "new" | NewConfig | | +| pitrConfig | configuration for initMode of type "pitr" | if initMode is "pitr" | PITRConfig | | +| standbySettings | standby settings when the cluster is a standby cluster | if role is "standby" | StandbySettings | | +| pgParameters | a map containing the postgres server parameters and their values. The parameters value don't have to be quoted and single quotes don't have to be doubled since this is already done by the keeper when writing the postgresql.conf file | no | map[string]string | | +| pgHBA | a list containing additional pg_hba.conf entries. They will be added to the pg_hba.conf generated by stolon. **NOTE**: these lines aren't validated so if some of them are wrong postgres will refuse to start or, on reload, will log a warning and ignore the updated pg_hba.conf file | no | []string | null. Will use the default behiavior of accepting connections from all hosts for all dbs and users with md5 password authentication | #### ExistingConfig diff --git a/doc/custom_pg_hba_entries.md b/doc/custom_pg_hba_entries.md new file mode 100644 index 000000000..0d19ffb3c --- /dev/null +++ b/doc/custom_pg_hba_entries.md @@ -0,0 +1,16 @@ +## Setting custom pg_hba.conf entries + +Stolon manages the pg_hba.conf file entries. The first rules are generated by stolon to permit local keeper connections and remote replication connections since these are needed to ensure the correct operation of the cluster. + +Users can specify custom pg_hba.conf entries setting the [cluster_specification](cluster_spec.md) `pgHBA` option. It must be a list of string containing additional pg_hba.conf entries. They will be added to the pg_hba.conf generated by stolon. + +Since clients connection will pass through the stolon-proxy the host part of the entries should match at least the stolon-proxies source addresses. For the same reason it's not possible to directly filter by client. If you have clients that requires different accesses you should use different set of stolon proxies for every kind of access. + +**NOTE**: these lines aren't validated so if some of them are wrong postgres will refuse to start or, on reload, will log a warning and ignore the updated pg_hba.conf file. Stolon will just check that the string doesn't contain newlines characters. + +By default, if no custom pg_hba entries are defined (clusterpsec pgHBA option is null, not an empty list), to keep backward compatibility, stolon will add two rules to permit tcp (both ipv4 and ipv6) connections from every host to all dbs and usernames with md5 password authentication: + +``` +host all all 0.0.0.0/0 md5 +host all all ::0/0 md5 +``` diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 97545a8cb..b0d6ad542 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -16,15 +16,14 @@ package cluster import ( "encoding/json" + "fmt" "reflect" + "sort" "strings" "time" "github.com/sorintlab/stolon/common" - "fmt" - "sort" - "github.com/mitchellh/copystructure" ) @@ -243,6 +242,9 @@ type ClusterSpec struct { StandbySettings *StandbySettings `json:"standbySettings,omitempty"` // Map of postgres parameters PGParameters PGParameters `json:"pgParameters,omitempty"` + // Additional pg_hba.conf entries + // we don't set omitempty since we want to distinguish between null or empty slice + PGHBA []string `json:"pgHBA"` } type ClusterStatus struct { @@ -392,6 +394,13 @@ func (os *ClusterSpec) Validate() error { if s.InitMode == nil { return fmt.Errorf("initMode undefined") } + // The unique validation we're doing on pgHBA entries is that they don't contain a newline character + for _, e := range s.PGHBA { + if strings.Contains(e, "\n") { + return fmt.Errorf("pgHBA entries cannot contain newline characters") + } + } + switch *s.InitMode { case ClusterInitModeNew: if *s.Role == ClusterRoleStandby { @@ -526,6 +535,9 @@ type DBSpec struct { PITRConfig *PITRConfig `json:"pitrConfig,omitempty"` // Map of postgres parameters PGParameters PGParameters `json:"pgParameters,omitempty"` + // Additional pg_hba.conf entries + // We don't set omitempty since we want to distinguish between null or empty slice + PGHBA []string `json:"pgHBA"` // DB Role (master or standby) Role common.Role `json:"role,omitempty"` // FollowConfig when Role is "standby" diff --git a/pkg/postgresql/postgresql.go b/pkg/postgresql/postgresql.go index 97ba8b3b8..7fe2f36ba 100644 --- a/pkg/postgresql/postgresql.go +++ b/pkg/postgresql/postgresql.go @@ -31,6 +31,7 @@ import ( slog "github.com/sorintlab/stolon/pkg/log" _ "github.com/lib/pq" + "github.com/mitchellh/copystructure" "golang.org/x/net/context" ) @@ -45,6 +46,9 @@ type Manager struct { pgBinPath string dataDir string parameters common.Parameters + hba []string + curParameters common.Parameters + curHba []string localConnParams ConnParams replConnParams ConnParams suUsername string @@ -72,11 +76,12 @@ type InitConfig struct { DataChecksums bool } -func NewManager(pgBinPath string, dataDir string, parameters common.Parameters, localConnParams, replConnParams ConnParams, suUsername, suPassword, replUsername, replPassword string, requestTimeout time.Duration) *Manager { +func NewManager(pgBinPath string, dataDir string, localConnParams, replConnParams ConnParams, suUsername, suPassword, replUsername, replPassword string, requestTimeout time.Duration) *Manager { return &Manager{ pgBinPath: pgBinPath, dataDir: filepath.Join(dataDir, "postgres"), - parameters: parameters, + parameters: make(common.Parameters), + curParameters: make(common.Parameters), replConnParams: replConnParams, localConnParams: localConnParams, suUsername: suUsername, @@ -91,8 +96,32 @@ func (p *Manager) SetParameters(parameters common.Parameters) { p.parameters = parameters } -func (p *Manager) GetParameters() common.Parameters { - return p.parameters +func (p *Manager) CurParameters() common.Parameters { + return p.curParameters +} + +func (p *Manager) SetHba(hba []string) { + p.hba = hba +} + +func (p *Manager) CurHba() []string { + return p.curHba +} + +func (p *Manager) UpdateCurParameters() { + n, err := copystructure.Copy(p.parameters) + if err != nil { + panic(err) + } + p.curParameters = n.(common.Parameters) +} + +func (p *Manager) UpdateCurHba() { + n, err := copystructure.Copy(p.hba) + if err != nil { + panic(err) + } + p.curHba = n.([]string) } func (p *Manager) Init(initConfig *InitConfig) error { @@ -221,6 +250,9 @@ func (p *Manager) start(args ...string) error { return fmt.Errorf("error: %v", err) } + p.UpdateCurParameters() + p.UpdateCurHba() + // pg_ctl with -w will exit after the timeout and return 0 also if the // instance isn't accepting connection because already in recovery (usually // waiting for wals during a pitr or a pg_rewind) @@ -280,6 +312,10 @@ func (p *Manager) Reload() error { if err := cmd.Run(); err != nil { return fmt.Errorf("error: %v", err) } + + p.UpdateCurParameters() + p.UpdateCurHba() + return nil } @@ -608,10 +644,16 @@ func (p *Manager) writePgHba() error { f.WriteString(fmt.Sprintf("host replication %s %s md5\n", p.replUsername, "0.0.0.0/0")) f.WriteString(fmt.Sprintf("host replication %s %s md5\n", p.replUsername, "::0/0")) - // By default accept connections for all databases and users with md5 auth - // TODO(sgotti) Do not set this but let the user provide its pg_hba.conf file/entries - f.WriteString("host all all 0.0.0.0/0 md5\n") - f.WriteString("host all all ::0/0 md5\n") + if p.hba != nil { + for _, e := range p.hba { + f.WriteString(e + "\n") + } + } else { + // By default, if no custom pg_hba entries are provided, accept + // connections for all databases and users with md5 auth + f.WriteString("host all all 0.0.0.0/0 md5\n") + f.WriteString("host all all ::0/0 md5\n") + } if err = os.Rename(f.Name(), filepath.Join(p.dataDir, "pg_hba.conf")); err != nil { os.Remove(f.Name())