From 5a8abf584edd605ac7bfc6f72b31142c857128c4 Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Thu, 14 Mar 2024 19:38:13 +0300 Subject: [PATCH] fix: get rid of the issue with `MachineSets` stuck in `Reconfiguring` `ClusterMachineConfigPatches` resources of old clusters are missing label `omni.sidero.dev/machine-set` and the controller always thinks that the machines are not configured yet. Signed-off-by: Artem Chernyshev --- .../backend/runtime/omni/migration/manager.go | 4 ++ .../runtime/omni/migration/migration_test.go | 44 +++++++++++++++++++ .../runtime/omni/migration/migrations.go | 25 +++++++++++ 3 files changed, 73 insertions(+) diff --git a/internal/backend/runtime/omni/migration/manager.go b/internal/backend/runtime/omni/migration/manager.go index f4fe08664..5fc39de73 100644 --- a/internal/backend/runtime/omni/migration/manager.go +++ b/internal/backend/runtime/omni/migration/manager.go @@ -132,6 +132,10 @@ func NewManager(state state.State, logger *zap.Logger) *Manager { callback: fixClusterTalosVersionOwnership, name: "fixClusterTalosVersionOwnership", }, + { + callback: updateClusterMachineConfigPatchesLabels, + name: "updateClusterMachineConfigPatchesLabels", + }, }, } } diff --git a/internal/backend/runtime/omni/migration/migration_test.go b/internal/backend/runtime/omni/migration/migration_test.go index 947a58fc7..75ecd7442 100644 --- a/internal/backend/runtime/omni/migration/migration_test.go +++ b/internal/backend/runtime/omni/migration/migration_test.go @@ -1183,6 +1183,50 @@ func (suite *MigrationSuite) TestFixClusterConfigVersionOwnership() { suite.Require().Equal(expectedName, c2.Metadata().Owner()) } +func (suite *MigrationSuite) TestUpdateClusterMachineConfigPatchesLabels() { + ctx := context.Background() + + version := system.NewDBVersion(resources.DefaultNamespace, system.DBVersionID) + version.TypedSpec().Value.Version = 23 + suite.Require().NoError(suite.state.Create(ctx, version)) + + cp1 := omni.NewClusterMachineConfigPatches(resources.DefaultNamespace, "1") + cp2 := omni.NewClusterMachineConfigPatches(resources.DefaultNamespace, "2") + + suite.Require().NoError(suite.state.Create(ctx, cp1, state.WithCreateOwner("MachineSetStatusController"))) + suite.Require().NoError(suite.state.Create(ctx, cp2, state.WithCreateOwner("MachineSetStatusController"))) + + cm := omni.NewClusterMachine(resources.DefaultNamespace, "2") + + cm.Metadata().Labels().Set(omni.LabelMachineSet, "some") + cm.Metadata().Labels().Set(omni.LabelCluster, "c1") + + suite.Require().NoError(suite.state.Create(ctx, cm, state.WithCreateOwner("MachineSetStatusController"))) + + suite.Require().NoError(suite.manager.Run(ctx)) + + var err error + + cp1, err = safe.StateGetByID[*omni.ClusterMachineConfigPatches](ctx, suite.state, "1") + suite.Require().NoError(err) + + cp2, err = safe.StateGetByID[*omni.ClusterMachineConfigPatches](ctx, suite.state, "2") + suite.Require().NoError(err) + + suite.Assert().True(cp1.Metadata().Labels().Empty()) + suite.Assert().False(cp2.Metadata().Labels().Empty()) + + val, ok := cp2.Metadata().Labels().Get(omni.LabelMachineSet) + suite.Require().True(ok) + + suite.Assert().Equal("some", val) + + val, ok = cp2.Metadata().Labels().Get(omni.LabelCluster) + suite.Require().True(ok) + + suite.Assert().Equal("c1", val) +} + func TestMigrationSuite(t *testing.T) { suite.Run(t, new(MigrationSuite)) } diff --git a/internal/backend/runtime/omni/migration/migrations.go b/internal/backend/runtime/omni/migration/migrations.go index 45f9a7294..9cc97421b 100644 --- a/internal/backend/runtime/omni/migration/migrations.go +++ b/internal/backend/runtime/omni/migration/migrations.go @@ -1025,3 +1025,28 @@ func fixClusterTalosVersionOwnership(ctx context.Context, s state.State, _ *zap. return s.Update(ctx, updated, state.WithUpdateOwner(owner)) }) } + +// add machine-set label to all cluster machine config patches resources. +func updateClusterMachineConfigPatchesLabels(ctx context.Context, s state.State, _ *zap.Logger) error { + list, err := safe.StateListAll[*omni.ClusterMachineConfigPatches](ctx, s) + if err != nil { + return err + } + + return list.ForEachErr(func(res *omni.ClusterMachineConfigPatches) error { + clusterMachine, err := safe.ReaderGetByID[*omni.ClusterMachine](ctx, s, res.Metadata().ID()) + if err != nil { + if state.IsNotFoundError(err) { + return nil + } + } + + _, err = safe.StateUpdateWithConflicts(ctx, s, res.Metadata(), func(r *omni.ClusterMachineConfigPatches) error { + helpers.CopyAllLabels(clusterMachine, r) + + return nil + }, state.WithUpdateOwner(res.Metadata().Owner()), state.WithExpectedPhaseAny()) + + return err + }) +}