From 3b4af282893f4b51cd9d57f2ad68aa09b7380fa2 Mon Sep 17 00:00:00 2001 From: YZ775 Date: Fri, 20 Oct 2023 04:41:44 +0000 Subject: [PATCH] add reboot queue backoff reset command Signed-off-by: YZ775 --- docs/ckecli.md | 6 +++ mtest/reboot_test.go | 9 ++++ pkg/ckecli/cmd/reboot_queue_reset_backoff.go | 43 ++++++++++++++++++++ 3 files changed, 58 insertions(+) create mode 100644 pkg/ckecli/cmd/reboot_queue_reset_backoff.go diff --git a/docs/ckecli.md b/docs/ckecli.md index 656660cdf..2b6595855 100644 --- a/docs/ckecli.md +++ b/docs/ckecli.md @@ -47,6 +47,7 @@ $ ckecli [--config FILE] args... - [`ckecli reboot-queue list`](#ckecli-reboot-queue-list) - [`ckecli reboot-queue cancel INDEX`](#ckecli-reboot-queue-cancel-index) - [`ckecli reboot-queue cancel-all`](#ckecli-reboot-queue-cancel-all) + - [`ckecli reboot-queue reset-backoff`](#ckecli-reboot-queue-reset-backoff) - [`ckecli sabakan`](#ckecli-sabakan) - [`ckecli sabakan enable|disable`](#ckecli-sabakan-enabledisable) - [`ckecli sabakan is-enabled`](#ckecli-sabakan-is-enabled) @@ -299,6 +300,11 @@ Cancel the specified reboot queue entry. Cancel all the reboot queue entries. +### `ckecli reboot-queue reset-backoff` + +Reset `drain_backoff_count` and `drain_backoff_expire` of the entries in reboot queue. +Resetting these values makes CKE try to reboot nodes again immediately. + ## `ckecli sabakan` Control [sabakan integration feature](sabakan-integration.md). diff --git a/mtest/reboot_test.go b/mtest/reboot_test.go index 30488232e..d403b5328 100644 --- a/mtest/reboot_test.go +++ b/mtest/reboot_test.go @@ -406,6 +406,15 @@ func testRebootOperations() { Expect(re).Should(HaveLen(1)) Expect(re[0].Status).Should(Equal(cke.RebootStatusQueued)) Expect(re[0].DrainBackOffExpire).ShouldNot(Equal(time.Time{})) + Expect(re[0].DrainBackOffCount).Should(Not(BeZero())) + + By("Checking reset-backoff command resets drain backoff") + ckecliSafe("reboot-queue", "reset-backoff") + re, err = getRebootEntries() + Expect(err).ShouldNot(HaveOccurred()) + Expect(re).Should(HaveLen(1)) + Expect(re[0].DrainBackOffExpire).Should(Equal(time.Time{})) + Expect(re[0].DrainBackOffCount).Should(BeZero()) By("Waiting for reboot completion") waitRebootCompletion(cluster) diff --git a/pkg/ckecli/cmd/reboot_queue_reset_backoff.go b/pkg/ckecli/cmd/reboot_queue_reset_backoff.go new file mode 100644 index 000000000..53d1fedb5 --- /dev/null +++ b/pkg/ckecli/cmd/reboot_queue_reset_backoff.go @@ -0,0 +1,43 @@ +package cmd + +import ( + "context" + "time" + + "github.com/cybozu-go/cke" + "github.com/cybozu-go/well" + "github.com/spf13/cobra" +) + +var rebootQueueResetBackoffCmd = &cobra.Command{ + Use: "reset-backoff", + Short: "Reset drain backoff of the entries in reboot queue", + Long: `Reset drain_backoff_count and drain_backoff_expire of the entries in reboot queue`, + RunE: func(cmd *cobra.Command, args []string) error { + well.Go(func(ctx context.Context) error { + entries, err := storage.GetRebootsEntries(ctx) + if err != nil { + return err + } + for _, entry := range entries { + entry.DrainBackOffCount = 0 + entry.DrainBackOffExpire = time.Time{} + err := storage.UpdateRebootsEntry(ctx, entry) + if err == cke.ErrNotFound { + // The entry has just finished + continue + } + if err != nil { + return err + } + } + return nil + }) + well.Stop() + return well.Wait() + }, +} + +func init() { + rebootQueueCmd.AddCommand(rebootQueueResetBackoffCmd) +}