From ef76f94220af9e5c96e0cd8207d0ad3676af2eaf Mon Sep 17 00:00:00 2001 From: Gabor Retvari Date: Tue, 6 Feb 2024 23:48:27 +0100 Subject: [PATCH] fix: Make sure CDS clients get an update on config deletion So far we have suppressed CDS updates on config deletion to avoid interference with graceful shutdown. This commit makes sure the update is indeed sent (actually we send a zero-config after a configurable delay, 5 sec by default). Since stunnerd graceful shutdown now disables the config watcher, the deleted configs will not cause a premature termination of active listeners. --- pkg/config/cds_test.go | 11 ++++++++--- pkg/config/server/config.go | 25 +++++++++++++++++++------ pkg/config/server/server.go | 28 ++++++++++++++++++++++++++-- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/pkg/config/cds_test.go b/pkg/config/cds_test.go index 001884ba..fdfb6a08 100644 --- a/pkg/config/cds_test.go +++ b/pkg/config/cds_test.go @@ -939,19 +939,24 @@ func TestServerAPI(t *testing.T) { s2 = watchConfig(ch1, 500*time.Millisecond) assert.NotNil(t, s2) s3 = watchConfig(ch1, 500*time.Millisecond) - assert.Nil(t, s3) - lst = []*stnrv1.StunnerConfig{s1, s2} + assert.NotNil(t, s3) + lst = []*stnrv1.StunnerConfig{s1, s2, s3} assert.NotNil(t, findConfById(lst, "ns1/gw1")) assert.True(t, findConfById(lst, "ns1/gw1").DeepEqual(sc1), "deepeq") assert.NotNil(t, findConfById(lst, "ns3/gw1")) assert.True(t, findConfById(lst, "ns3/gw1").DeepEqual(sc4), "deepeq") + assert.NotNil(t, findConfById(lst, "ns1/gw2")) + gw2zero := client.ZeroConfig("ns1/gw2") // deleted! + assert.NoError(t, gw2zero.Validate()) + assert.True(t, findConfById(lst, "ns1/gw2").DeepEqual(gw2zero), "deepeq") // 1 config from client2 watch (removed config never updated) s1 = watchConfig(ch2, 50*time.Millisecond) assert.NotNil(t, s1) s2 = watchConfig(ch2, 50*time.Millisecond) - assert.Nil(t, s2) + assert.NotNil(t, s2) assert.True(t, s1.DeepEqual(sc1), "deepeq") + assert.True(t, s2.DeepEqual(gw2zero), "deepeq") // deleted! // no config from client3 watch s = watchConfig(ch3, 50*time.Millisecond) diff --git a/pkg/config/server/config.go b/pkg/config/server/config.go index 1571ed8a..fe0b6c4a 100644 --- a/pkg/config/server/config.go +++ b/pkg/config/server/config.go @@ -5,6 +5,7 @@ import ( "sync" stnrv1 "github.com/l7mp/stunner/pkg/apis/v1" + "github.com/l7mp/stunner/pkg/config/client" "github.com/l7mp/stunner/pkg/config/client/api" ) @@ -27,14 +28,26 @@ func (s *Server) UpsertConfig(id string, c *stnrv1.StunnerConfig) { s.configCh <- Config{Id: id, Config: cpy} } -// DeleteConfig should remove a config from the client. Theoretically, this would be done by -// sending the client a zero-config. However, in order to avoid that a client being removed and -// entering the graceful shutdown cycle receive a zeroconfig and abruprly kill all listeners with -// all active connections allocated to it, currently we suppress the config update. +// DeleteConfig removes a config from the client. Theoretically, this should send the client a +// zero-config immediately. However, in order to avoid that a client being removed and entering the +// graceful shutdown cycle receive a zeroconfig and abruprly kill all listeners with all active +// connections allocated to them, we actually delay sending the zeroconfig with a configurable time +// (default is 5 sec, but a zero delay will suppress sending the zero-config all together). This +// should allow the client comfortable time to enter the grafeul shutdown cycle. Note that clients +// should stop actively reconciling config updates once they initiated graceful shutdown for this +// to work. func (s *Server) DeleteConfig(id string) { s.configs.Delete(id) - s.log.Info("suppressing config update for terminating client", "client", id) - // s.configCh <- Config{Id: id, Config: client.ZeroConfig(id)} + if ConfigDeletionUpdateDelay == 0 { + s.log.Info("suppressing config update for deleted config", + "client", id) + return + } + + s.log.Info("delaying config update for deleted config", + "client", id, "delay", ConfigDeletionUpdateDelay) + + s.deleteCh <- Config{Id: id, Config: client.ZeroConfig(id)} } // UpdateConfig receives a set of ids and newConfigs that represent the state-of-the-world at a diff --git a/pkg/config/server/server.go b/pkg/config/server/server.go index ab1a4193..bc25c732 100644 --- a/pkg/config/server/server.go +++ b/pkg/config/server/server.go @@ -7,6 +7,7 @@ import ( "errors" "net" "net/http" + "time" "github.com/go-logr/logr" "github.com/gorilla/mux" @@ -16,6 +17,13 @@ import ( stnrv1 "github.com/l7mp/stunner/pkg/apis/v1" ) +var ( + // ConfigDeletionUpdateDelay is the delay between deleting a config from the server and + // sending the corresponing zero-config to the client. Set this to zero to suppress sending + // the zero-config all together. + ConfigDeletionUpdateDelay = 5 * time.Second +) + // Server is a generic config discovery server implementation. type Server struct { *http.Server @@ -24,6 +32,7 @@ type Server struct { conns *ConnTrack configs *ConfigStore configCh chan Config + deleteCh chan Config patch ConfigPatcher log logr.Logger } @@ -39,6 +48,7 @@ func New(addr string, patch ConfigPatcher, logger logr.Logger) *Server { conns: NewConnTrack(), configs: NewConfigStore(), configCh: make(chan Config, 8), + deleteCh: make(chan Config, 8), addr: addr, patch: patch, log: logger, @@ -67,12 +77,26 @@ func (s *Server) Start(ctx context.Context) error { go func() { defer close(s.configCh) + defer close(s.deleteCh) defer s.Close() for { select { - case e := <-s.configCh: - s.broadcastConfig(e) + case c := <-s.configCh: + s.broadcastConfig(c) + + case c := <-s.deleteCh: + // delayed config deletion + go func() { + select { + case <-ctx.Done(): + return + case <-time.After(ConfigDeletionUpdateDelay): + s.configCh <- Config{Id: c.Id, Config: c.Config} + return + } + }() + case <-ctx.Done(): return }