From 0d05a7f308f2b29ed85bfd4f1fd9a2aa360fe960 Mon Sep 17 00:00:00 2001 From: Chi Wai Chan Date: Fri, 20 Dec 2024 12:23:52 +0800 Subject: [PATCH] Apply suggestion. Signed-off-by: Chi Wai Chan --- microceph/ceph/maintenance.go | 297 ------------------ microceph/ceph/operations.go | 262 +++++++++++++++ ...maintenance_test.go => operations_test.go} | 100 +++--- microceph/ceph/osd.go | 2 +- microceph/ceph/osd_test.go | 14 +- .../microceph/cluster_maintenance_enter.go | 35 ++- .../cmd/microceph/cluster_maintenance_exit.go | 19 +- 7 files changed, 368 insertions(+), 361 deletions(-) delete mode 100644 microceph/ceph/maintenance.go create mode 100644 microceph/ceph/operations.go rename microceph/ceph/{maintenance_test.go => operations_test.go} (75%) diff --git a/microceph/ceph/maintenance.go b/microceph/ceph/maintenance.go deleted file mode 100644 index b7df0e75..00000000 --- a/microceph/ceph/maintenance.go +++ /dev/null @@ -1,297 +0,0 @@ -package ceph - -import ( - "fmt" - - "github.com/canonical/lxd/shared/logger" - - microCli "github.com/canonical/microcluster/v2/client" - - "github.com/canonical/microceph/microceph/client" -) - -// EnterMaintenance put a given node into maintanence mode. -func EnterMaintenance(clusterClient *microCli.Client, cephClient client.ClientInterface, name string, force, dryRun, setNoout, stopOsds bool) error { - ops := []operation{} - - // pre-flight checks - if !force { - ops = append(ops, []operation{ - &checkNodeInClusterOps{cephClient, clusterClient}, - &checkOsdOkToStopOps{cephClient, clusterClient}, - &checkNonOsdSvcEnoughOps{cephClient, clusterClient, 3, 1, 1}, - }...) - } - - // optionally set noout - if setNoout { - ops = append(ops, []operation{ - &setNooutOps{}, - &assertNooutFlagSetOps{}, - }...) - } - - // optionally stop osd service - if stopOsds { - ops = append(ops, []operation{ - &stopOsdOps{cephClient, clusterClient}, - }...) - } - - m := maintenance{name} - err := m.Run(ops, dryRun) - if err != nil { - return fmt.Errorf("Failed to enter maintenance mode: %v", err) - } - return nil -} - -// ExitMaintenance recover a given node from maintanence mode. -func ExitMaintenance(clusterClient *microCli.Client, cephClient client.ClientInterface, name string, dryRun bool) error { - ops := []operation{} - - // preflight checks - ops = append(ops, []operation{ - &checkNodeInClusterOps{cephClient, clusterClient}, - }...) - - // idempotently unset noout and start osd service - ops = append(ops, []operation{ - &unsetNooutOps{}, - &assertNooutFlagUnsetOps{}, - &startOsdOps{cephClient, clusterClient}, - }...) - - m := maintenance{name} - err := m.Run(ops, dryRun) - if err != nil { - return fmt.Errorf("Failed to exit maintenance mode: %v", err) - } - return nil -} - -type maintenance struct { - nodeName string -} - -func (m *maintenance) Run(operations []operation, dryRun bool) error { - for _, ops := range operations { - if dryRun { - fmt.Println(ops.DryRun(m.nodeName)) - } else { - err := ops.Run(m.nodeName) - if err != nil { - return err - } - } - } - return nil -} - -// -// operations -// - -type operation interface { - Run(string) error - DryRun(string) string -} - -type checkNodeInClusterOps struct { - cephClient client.ClientInterface - clusterClient *microCli.Client -} - -func (o *checkNodeInClusterOps) Run(name string) error { - clusterMembers, err := o.cephClient.GetClusterMembers(o.clusterClient) - if err != nil { - return fmt.Errorf("Error getting cluster members: %v", err) - } - - for _, member := range clusterMembers { - if member == name { - logger.Infof("Node '%s' is in the cluster.", name) - return nil - } - } - - return fmt.Errorf("Node '%s' not found", name) -} - -func (o *checkNodeInClusterOps) DryRun(name string) string { - return fmt.Sprintf("Check if node '%s' is in the cluster.", name) -} - -type checkOsdOkToStopOps struct { - cephClient client.ClientInterface - clusterClient *microCli.Client -} - -func (o *checkOsdOkToStopOps) Run(name string) error { - disks, err := o.cephClient.GetDisks(o.clusterClient) - if err != nil { - return fmt.Errorf("Error getting disks: %v", err) - } - - OsdsToCheck := []int64{} - for _, disk := range disks { - if disk.Location == name { - OsdsToCheck = append(OsdsToCheck, disk.OSD) - } - } - - if !testSafeStop(OsdsToCheck) { - return fmt.Errorf("osd.%v cannot be safely stopped", OsdsToCheck) - } - - logger.Infof("osd.%v can be safely stopped.", OsdsToCheck) - return nil -} - -func (o *checkOsdOkToStopOps) DryRun(name string) string { - return fmt.Sprintf("Check if osds in node '%s' are ok-to-stop.", name) -} - -type checkNonOsdSvcEnoughOps struct { - cephClient client.ClientInterface - clusterClient *microCli.Client - - minMon int - minMds int - minMgr int -} - -func (o *checkNonOsdSvcEnoughOps) Run(name string) error { - services, err := o.cephClient.GetServices(o.clusterClient) - if err != nil { - return fmt.Errorf("Error getting services: %v", err) - } - - remains := map[string]int{ - "mon": 0, - "mgr": 0, - "mds": 0, - } - for _, service := range services { - // do not count the service on this node - if service.Location != name { - remains[service.Service]++ - } - } - - // the remaining services must be sufficient to make the cluster healthy after the node enters - // maintanence mode. - if remains["mon"] < o.minMon || remains["mds"] < o.minMds || remains["mgr"] < o.minMgr { - return fmt.Errorf("Need at least %d mon, %d mds, and %d mgr services in the cluster besides those in node '%s'", o.minMon, o.minMds, o.minMgr, name) - } - logger.Infof("Remaining mon (%d), mds (%d), and mgr (%d) services in the cluster are enough after '%s' enters maintenance mode", remains["mon"], remains["mds"], remains["mgr"], name) - - return nil -} - -func (o *checkNonOsdSvcEnoughOps) DryRun(name string) string { - return fmt.Sprintf("Check if there are at least %d mon, %d mds, and %d mgr services in the cluster besides those in node '%s'", o.minMon, o.minMds, o.minMgr, name) -} - -type setNooutOps struct{} - -func (o *setNooutOps) Run(name string) error { - err := osdNooutFlag(true) - if err != nil { - return err - } - return nil -} - -func (o *setNooutOps) DryRun(name string) string { - return fmt.Sprint("Run `ceph osd set noout`.") -} - -type assertNooutFlagSetOps struct{} - -func (o *assertNooutFlagSetOps) Run(name string) error { - set, err := isOsdNooutSet() - if err != nil { - return err - } - if !set { - return fmt.Errorf("OSD has 'noout' flag unset.") - } - logger.Info("OSD has 'noout' flag set.") - return nil -} - -func (o *assertNooutFlagSetOps) DryRun(name string) string { - return fmt.Sprint("Assert OSD has 'noout' flag set.") -} - -type assertNooutFlagUnsetOps struct{} - -func (o *assertNooutFlagUnsetOps) Run(name string) error { - set, err := isOsdNooutSet() - if err != nil { - return err - } - if set { - return fmt.Errorf("OSD has 'noout' flag set.") - } - logger.Info("OSD has 'noout' flag unset.") - return nil -} - -func (o *assertNooutFlagUnsetOps) DryRun(name string) string { - return fmt.Sprint("Assert OSD has 'noout' flag unset.") -} - -type stopOsdOps struct { - cephClient client.ClientInterface - clusterClient *microCli.Client -} - -func (o *stopOsdOps) Run(name string) error { - err := o.cephClient.PutOsds(o.clusterClient, false, name) - if err != nil { - logger.Errorf("Unable to stop OSD service in node '%s'.", name) - return err - } - logger.Infof("Stopped OSD service in node '%s'.", name) - return nil -} - -func (o *stopOsdOps) DryRun(name string) string { - return fmt.Sprintf("Stop OSD service in node '%s'.", name) -} - -type startOsdOps struct { - cephClient client.ClientInterface - clusterClient *microCli.Client -} - -func (o *startOsdOps) Run(name string) error { - err := o.cephClient.PutOsds(o.clusterClient, true, name) - if err != nil { - logger.Errorf("Unable to start OSD service in node '%s'.", name) - return err - } - logger.Infof("Started OSD service in node '%s'", name) - return nil -} - -func (o *startOsdOps) DryRun(name string) string { - return fmt.Sprintf("Start osd services in node '%s'.", name) -} - -type unsetNooutOps struct{} - -func (o *unsetNooutOps) Run(name string) error { - err := osdNooutFlag(false) - if err != nil { - return err - } - logger.Info("Unset osd noout.") - return nil -} - -func (o *unsetNooutOps) DryRun(name string) string { - return fmt.Sprint("Run `ceph osd unset noout`.") -} diff --git a/microceph/ceph/operations.go b/microceph/ceph/operations.go new file mode 100644 index 00000000..b809e72a --- /dev/null +++ b/microceph/ceph/operations.go @@ -0,0 +1,262 @@ +package ceph + +import ( + "fmt" + + "github.com/canonical/lxd/shared/logger" + + microCli "github.com/canonical/microcluster/v2/client" + + "github.com/canonical/microceph/microceph/client" +) + +// RunOperations runs the provided operations or prints out the action plan. +func RunOperations(name string, operations []Operation, dryRun bool) error { + for _, ops := range operations { + if dryRun { + fmt.Println(ops.DryRun(name)) + } else { + err := ops.Run(name) + if err != nil { + return err + } + } + } + return nil +} + +// Operation is a interface for ceph and microceph operations. +// +type Operation interface { + // Run executes the operation and return the error if any. + Run(string) error + + // DryRun returns the string representation of the operation. + DryRun(string) string +} + +// CheckNodeInClusterOps is an operation to check if a node is in the microceph cluster. +type CheckNodeInClusterOps struct { + CephClient client.ClientInterface + ClusterClient *microCli.Client +} + +// Run checks if a node is in the microceph cluster. +func (o *CheckNodeInClusterOps) Run(name string) error { + clusterMembers, err := o.CephClient.GetClusterMembers(o.ClusterClient) + if err != nil { + return fmt.Errorf("Error getting cluster members: %v", err) + } + + for _, member := range clusterMembers { + if member == name { + logger.Infof("Node '%s' is in the cluster.", name) + return nil + } + } + + return fmt.Errorf("Node '%s' not found", name) +} + +// DryRun prints out the action plan. +func (o *CheckNodeInClusterOps) DryRun(name string) string { + return fmt.Sprintf("Check if node '%s' is in the cluster.", name) +} + +// CheckOsdOkToStopOps is an operation to check if osds in a node are ok-to-stop. +type CheckOsdOkToStopOps struct { + CephClient client.ClientInterface + ClusterClient *microCli.Client +} + +// Run checks osds in a node are ok-to-stop. +func (o *CheckOsdOkToStopOps) Run(name string) error { + disks, err := o.CephClient.GetDisks(o.ClusterClient) + if err != nil { + return fmt.Errorf("Error getting disks: %v", err) + } + + OsdsToCheck := []int64{} + for _, disk := range disks { + if disk.Location == name { + OsdsToCheck = append(OsdsToCheck, disk.OSD) + } + } + + if !testSafeStop(OsdsToCheck) { + return fmt.Errorf("osd.%v cannot be safely stopped", OsdsToCheck) + } + + logger.Infof("osd.%v can be safely stopped.", OsdsToCheck) + return nil +} + +// DryRun prints out the action plan. +func (o *CheckOsdOkToStopOps) DryRun(name string) string { + return fmt.Sprintf("Check if osds in node '%s' are ok-to-stop.", name) +} + +// CheckNonOsdSvcEnoughOps is an operation to check if non-osd service in a node are enough. +type CheckNonOsdSvcEnoughOps struct { + CephClient client.ClientInterface + ClusterClient *microCli.Client + + MinMon int + MinMds int + MinMgr int +} + +// Run checks if non-osds service in a node are enough. +func (o *CheckNonOsdSvcEnoughOps) Run(name string) error { + services, err := o.CephClient.GetServices(o.ClusterClient) + if err != nil { + return fmt.Errorf("Error getting services: %v", err) + } + + remains := map[string]int{ + "mon": 0, + "mgr": 0, + "mds": 0, + } + for _, service := range services { + // do not count the service on this node + if service.Location != name { + remains[service.Service]++ + } + } + + // the remaining services must be sufficient to make the cluster healthy after the node enters + // maintanence mode. + if remains["mon"] < o.MinMon || remains["mds"] < o.MinMds || remains["mgr"] < o.MinMgr { + return fmt.Errorf("Need at least %d mon, %d mds, and %d mgr services in the cluster besides those in node '%s'", o.MinMon, o.MinMds, o.MinMgr, name) + } + logger.Infof("Remaining mon (%d), mds (%d), and mgr (%d) services in the cluster are enough after '%s' enters maintenance mode", remains["mon"], remains["mds"], remains["mgr"], name) + + return nil +} + +// DryRun prints out the action plan. +func (o *CheckNonOsdSvcEnoughOps) DryRun(name string) string { + return fmt.Sprintf("Check if there are at least %d mon, %d mds, and %d mgr services in the cluster besides those in node '%s'", o.MinMon, o.MinMds, o.MinMgr, name) +} + +// SetNooutOps is an operation to set noout for the ceph cluster. +type SetNooutOps struct{} + +// Run `ceph osd set noout` for the ceph cluster. +func (o *SetNooutOps) Run(name string) error { + err := setOsdNooutFlag(true) + if err != nil { + return err + } + return nil +} + +// DryRun prints out the action plan. +func (o *SetNooutOps) DryRun(name string) string { + return fmt.Sprint("Run `ceph osd set noout`.") +} + +// AssertNooutFlagSetOps is an operation to assert noout has been set for the ceph cluster. +type AssertNooutFlagSetOps struct{} + +// Run asserts noout has been set for the ceph cluster. +func (o *AssertNooutFlagSetOps) Run(name string) error { + set, err := isOsdNooutSet() + if err != nil { + return err + } + if !set { + return fmt.Errorf("OSD has 'noout' flag unset.") + } + logger.Info("OSD has 'noout' flag set.") + return nil +} + +// DryRun prints out the action plan. +func (o *AssertNooutFlagSetOps) DryRun(name string) string { + return fmt.Sprint("Assert OSD has 'noout' flag set.") +} + +// AssertNooutFlagUnsetOps is an operation to assert noout has been unset for the ceph cluster. +type AssertNooutFlagUnsetOps struct{} + +// Run asserts noout has been unset for the ceph cluster. +func (o *AssertNooutFlagUnsetOps) Run(name string) error { + set, err := isOsdNooutSet() + if err != nil { + return err + } + if set { + return fmt.Errorf("OSD has 'noout' flag set.") + } + logger.Info("OSD has 'noout' flag unset.") + return nil +} + +// DryRun prints out the action plan. +func (o *AssertNooutFlagUnsetOps) DryRun(name string) string { + return fmt.Sprint("Assert OSD has 'noout' flag unset.") +} + +// StopOsdOps is an operation to stop osd service for a node. +type StopOsdOps struct { + CephClient client.ClientInterface + ClusterClient *microCli.Client +} + +// Run stops the osd service for a node. +func (o *StopOsdOps) Run(name string) error { + err := o.CephClient.PutOsds(o.ClusterClient, false, name) + if err != nil { + logger.Errorf("Unable to stop OSD service in node '%s'.", name) + return err + } + logger.Infof("Stopped OSD service in node '%s'.", name) + return nil +} + +// DryRun prints out the action plan. +func (o *StopOsdOps) DryRun(name string) string { + return fmt.Sprintf("Stop OSD service in node '%s'.", name) +} + +// StartOsdOps is an operation to start osd service for a node. +type StartOsdOps struct { + CephClient client.ClientInterface + ClusterClient *microCli.Client +} + +// Run starts the osd service for a node. +func (o *StartOsdOps) Run(name string) error { + err := o.CephClient.PutOsds(o.ClusterClient, true, name) + if err != nil { + logger.Errorf("Unable to start OSD service in node '%s'.", name) + return err + } + logger.Infof("Started OSD service in node '%s'", name) + return nil +} + +// DryRun prints out the action plan. +func (o *StartOsdOps) DryRun(name string) string { + return fmt.Sprintf("Start osd services in node '%s'.", name) +} + +// UnsetNooutOps is an operation to unset noout for the ceph cluster. +type UnsetNooutOps struct{} + +// Run `ceph osd unset noout` for the ceph cluster. +func (o *UnsetNooutOps) Run(name string) error { + err := setOsdNooutFlag(false) + if err != nil { + return err + } + logger.Info("Unset osd noout.") + return nil +} + +// DryRun prints out the action plan. +func (o *UnsetNooutOps) DryRun(name string) string { + return fmt.Sprint("Run `ceph osd unset noout`.") +} diff --git a/microceph/ceph/maintenance_test.go b/microceph/ceph/operations_test.go similarity index 75% rename from microceph/ceph/maintenance_test.go rename to microceph/ceph/operations_test.go index 6fba5eb7..ea2d907e 100644 --- a/microceph/ceph/maintenance_test.go +++ b/microceph/ceph/operations_test.go @@ -14,17 +14,17 @@ import ( "github.com/stretchr/testify/suite" ) -func TestMaintenance(t *testing.T) { - suite.Run(t, new(maintenanceSuite)) +func TestOperations(t *testing.T) { + suite.Run(t, new(operationsSuite)) } -// maintenanceSuite is the test suite for maintenance mode. -type maintenanceSuite struct { +// operationsSuite is the test suite for maintenance mode. +type operationsSuite struct { tests.BaseSuite TestStateInterface *mocks.StateInterface } -func (s *maintenanceSuite) TestCheckNodeInClusterOpsTrue() { +func (s *operationsSuite) TestCheckNodeInClusterOpsTrue() { m := mocks.NewClientInterface(s.T()) m.On("GetClusterMembers", mock.Anything).Return([]string{"microceph-0", "microceph-1"}, nil).Once() @@ -32,12 +32,12 @@ func (s *maintenanceSuite) TestCheckNodeInClusterOpsTrue() { client.MClient = m // node microceph-0 is in the cluster - ops := checkNodeInClusterOps{client.MClient, nil} + ops := CheckNodeInClusterOps{client.MClient, nil} err := ops.Run("microceph-0") assert.NoError(s.T(), err) } -func (s *maintenanceSuite) TestCheckNodeInClusterOpsFalse() { +func (s *operationsSuite) TestCheckNodeInClusterOpsFalse() { m := mocks.NewClientInterface(s.T()) m.On("GetClusterMembers", mock.Anything).Return([]string{"microceph-0", "microceph-1"}, nil).Once() @@ -45,12 +45,12 @@ func (s *maintenanceSuite) TestCheckNodeInClusterOpsFalse() { client.MClient = m // node microceph-2 is not in the cluster - ops := checkNodeInClusterOps{client.MClient, nil} + ops := CheckNodeInClusterOps{client.MClient, nil} err := ops.Run("microceph-2") assert.ErrorContains(s.T(), err, "not found") } -func (s *maintenanceSuite) TestCheckNodeInClusterOpsError() { +func (s *operationsSuite) TestCheckNodeInClusterOpsError() { m := mocks.NewClientInterface(s.T()) m.On("GetClusterMembers", mock.Anything).Return([]string{}, fmt.Errorf("some reasons")).Once() @@ -58,12 +58,12 @@ func (s *maintenanceSuite) TestCheckNodeInClusterOpsError() { client.MClient = m // cannot get cluster member - ops := checkNodeInClusterOps{client.MClient, nil} + ops := CheckNodeInClusterOps{client.MClient, nil} err := ops.Run("some-node-name") assert.ErrorContains(s.T(), err, "Error getting cluster members") } -func (s *maintenanceSuite) TestCheckOsdOkToStopOpsTrue() { +func (s *operationsSuite) TestCheckOsdOkToStopOpsTrue() { m := mocks.NewClientInterface(s.T()) m.On("GetDisks", mock.Anything).Return( types.Disks{ @@ -87,12 +87,12 @@ func (s *maintenanceSuite) TestCheckOsdOkToStopOpsTrue() { processExec = r // osd.1 in microceph-0 is okay to stop - ops := checkOsdOkToStopOps{client.MClient, nil} + ops := CheckOsdOkToStopOps{client.MClient, nil} err := ops.Run("microceph-0") assert.NoError(s.T(), err) } -func (s *maintenanceSuite) TestCheckOsdOkToStopOpsFalse() { +func (s *operationsSuite) TestCheckOsdOkToStopOpsFalse() { m := mocks.NewClientInterface(s.T()) m.On("GetDisks", mock.Anything).Return( types.Disks{ @@ -116,12 +116,12 @@ func (s *maintenanceSuite) TestCheckOsdOkToStopOpsFalse() { processExec = r // osd.1 in microceph-0 is not okay to stop - ops := checkOsdOkToStopOps{client.MClient, nil} + ops := CheckOsdOkToStopOps{client.MClient, nil} err := ops.Run("microceph-0") assert.ErrorContains(s.T(), err, "cannot be safely stopped") } -func (s *maintenanceSuite) TestCheckOsdOkToStopOpsError() { +func (s *operationsSuite) TestCheckOsdOkToStopOpsError() { m := mocks.NewClientInterface(s.T()) m.On("GetDisks", mock.Anything).Return(types.Disks{}, fmt.Errorf("some reasons")).Once() @@ -129,12 +129,12 @@ func (s *maintenanceSuite) TestCheckOsdOkToStopOpsError() { client.MClient = m // cannot get disks - ops := checkOsdOkToStopOps{client.MClient, nil} + ops := CheckOsdOkToStopOps{client.MClient, nil} err := ops.Run("some-node-name") assert.ErrorContains(s.T(), err, "Error getting disks") } -func (s *maintenanceSuite) TestCheckNonOsdSvcEnoughOpsTrue() { +func (s *operationsSuite) TestCheckNonOsdSvcEnoughOpsTrue() { m := mocks.NewClientInterface(s.T()) // 4 mons, 1 mds, 1 mgr m.On("GetServices", mock.Anything).Return( @@ -169,12 +169,12 @@ func (s *maintenanceSuite) TestCheckNonOsdSvcEnoughOpsTrue() { client.MClient = m // microceph-3 go to maintenance mode -> 3 mons, 1 mds, 1 mgr -> ok - ops := checkNonOsdSvcEnoughOps{client.MClient, nil, 3, 1, 1} + ops := CheckNonOsdSvcEnoughOps{client.MClient, nil, 3, 1, 1} err := ops.Run("microceph-3") assert.NoError(s.T(), err) } -func (s *maintenanceSuite) TestCheckNonOsdSvcEnoughOpsFalse() { +func (s *operationsSuite) TestCheckNonOsdSvcEnoughOpsFalse() { m := mocks.NewClientInterface(s.T()) // 4 mons, 1 mds, 1 mgr m.On("GetServices", mock.Anything).Return( @@ -209,12 +209,12 @@ func (s *maintenanceSuite) TestCheckNonOsdSvcEnoughOpsFalse() { client.MClient = m // microceph-0 go to maintenance mode -> 3 mons, 0 mds, 0 mgr -> no ok - ops := checkNonOsdSvcEnoughOps{client.MClient, nil, 3, 1, 1} + ops := CheckNonOsdSvcEnoughOps{client.MClient, nil, 3, 1, 1} err := ops.Run("microceph-0") assert.Error(s.T(), err) } -func (s *maintenanceSuite) TestCheckNonOsdSvcEnoughOpsError() { +func (s *operationsSuite) TestCheckNonOsdSvcEnoughOpsError() { m := mocks.NewClientInterface(s.T()) m.On("GetServices", mock.Anything).Return(types.Services{}, fmt.Errorf("some reasons")).Once() @@ -222,175 +222,175 @@ func (s *maintenanceSuite) TestCheckNonOsdSvcEnoughOpsError() { client.MClient = m // cannot get services - ops := checkNonOsdSvcEnoughOps{client.MClient, nil, 3, 1, 1} + ops := CheckNonOsdSvcEnoughOps{client.MClient, nil, 3, 1, 1} err := ops.Run("some-node-name") assert.ErrorContains(s.T(), err, "Error getting services") } -func (s *maintenanceSuite) TestSetNooutOpsOkay() { +func (s *operationsSuite) TestSetNooutOpsOkay() { r := mocks.NewRunner(s.T()) r.On("RunCommand", "ceph", "osd", "set", "noout").Return("ok", nil).Once() // patch processExec processExec = r - ops := setNooutOps{} + ops := SetNooutOps{} err := ops.Run("microceph-0") assert.NoError(s.T(), err) } -func (s *maintenanceSuite) TestSetNooutOpsFail() { +func (s *operationsSuite) TestSetNooutOpsFail() { r := mocks.NewRunner(s.T()) r.On("RunCommand", "ceph", "osd", "set", "noout").Return("fail", fmt.Errorf("some reasons")).Once() // patch processExec processExec = r - ops := setNooutOps{} + ops := SetNooutOps{} err := ops.Run("microceph-0") assert.Error(s.T(), err) } -func (s *maintenanceSuite) TestAssertNooutFlagSetOpsTrue() { +func (s *operationsSuite) TestAssertNooutFlagSetOpsTrue() { r := mocks.NewRunner(s.T()) r.On("RunCommand", "ceph", "osd", "dump").Return("flags noout", nil).Once() // patch processExec processExec = r - ops := assertNooutFlagSetOps{} + ops := AssertNooutFlagSetOps{} err := ops.Run("microceph-0") assert.NoError(s.T(), err) } -func (s *maintenanceSuite) TestAssertNooutFlagSetOpsFalse() { +func (s *operationsSuite) TestAssertNooutFlagSetOpsFalse() { r := mocks.NewRunner(s.T()) r.On("RunCommand", "ceph", "osd", "dump").Return("flags", nil).Once() // patch processExec processExec = r - ops := assertNooutFlagSetOps{} + ops := AssertNooutFlagSetOps{} err := ops.Run("microceph-0") assert.ErrorContains(s.T(), err, "unset") } -func (s *maintenanceSuite) TestAssertNooutFlagSetOpsError() { +func (s *operationsSuite) TestAssertNooutFlagSetOpsError() { r := mocks.NewRunner(s.T()) r.On("RunCommand", "ceph", "osd", "dump").Return("fail", fmt.Errorf("some reasons")).Once() // patch processExec processExec = r - ops := assertNooutFlagSetOps{} + ops := AssertNooutFlagSetOps{} err := ops.Run("microceph-0") assert.Error(s.T(), err) } -func (s *maintenanceSuite) TestAssertNooutFlagUnsetOpsTrue() { +func (s *operationsSuite) TestAssertNooutFlagUnsetOpsTrue() { r := mocks.NewRunner(s.T()) r.On("RunCommand", "ceph", "osd", "dump").Return("flags", nil).Once() // patch processExec processExec = r - ops := assertNooutFlagUnsetOps{} + ops := AssertNooutFlagUnsetOps{} err := ops.Run("microceph-0") assert.NoError(s.T(), err) } -func (s *maintenanceSuite) TestAssertNooutFlagUnsetOpsFalse() { +func (s *operationsSuite) TestAssertNooutFlagUnsetOpsFalse() { r := mocks.NewRunner(s.T()) r.On("RunCommand", "ceph", "osd", "dump").Return("flags noout", nil).Once() // patch processExec processExec = r - ops := assertNooutFlagUnsetOps{} + ops := AssertNooutFlagUnsetOps{} err := ops.Run("microceph-0") assert.ErrorContains(s.T(), err, "set") } -func (s *maintenanceSuite) TestAssertNooutFlagUnsetOpsError() { +func (s *operationsSuite) TestAssertNooutFlagUnsetOpsError() { r := mocks.NewRunner(s.T()) r.On("RunCommand", "ceph", "osd", "dump").Return("fail", fmt.Errorf("some reasons")).Once() // patch processExec processExec = r - ops := assertNooutFlagUnsetOps{} + ops := AssertNooutFlagUnsetOps{} err := ops.Run("microceph-0") assert.Error(s.T(), err) } -func (s *maintenanceSuite) TestStopOsdOpsOkay() { +func (s *operationsSuite) TestStopOsdOpsOkay() { m := mocks.NewClientInterface(s.T()) m.On("PutOsds", mock.Anything, false, mock.Anything).Return(nil) // patch ceph client client.MClient = m - ops := stopOsdOps{client.MClient, nil} + ops := StopOsdOps{client.MClient, nil} err := ops.Run("microceph-0") assert.NoError(s.T(), err) } -func (s *maintenanceSuite) TestStopOsdOpsFail() { +func (s *operationsSuite) TestStopOsdOpsFail() { m := mocks.NewClientInterface(s.T()) m.On("PutOsds", mock.Anything, false, mock.Anything).Return(fmt.Errorf("some reasons")) // patch ceph client client.MClient = m - ops := stopOsdOps{client.MClient, nil} + ops := StopOsdOps{client.MClient, nil} err := ops.Run("microceph-0") assert.Error(s.T(), err, "Unable to stop OSD service in node") } -func (s *maintenanceSuite) TestStartOsdOpsOkay() { +func (s *operationsSuite) TestStartOsdOpsOkay() { m := mocks.NewClientInterface(s.T()) m.On("PutOsds", mock.Anything, true, mock.Anything).Return(nil) // patch ceph client client.MClient = m - ops := startOsdOps{client.MClient, nil} + ops := StartOsdOps{client.MClient, nil} err := ops.Run("microceph-0") assert.NoError(s.T(), err) } -func (s *maintenanceSuite) TestStartOsdOpsFail() { +func (s *operationsSuite) TestStartOsdOpsFail() { m := mocks.NewClientInterface(s.T()) m.On("PutOsds", mock.Anything, true, mock.Anything).Return(fmt.Errorf("some reasons")) // patch ceph client client.MClient = m - ops := startOsdOps{client.MClient, nil} + ops := StartOsdOps{client.MClient, nil} err := ops.Run("microceph-0") assert.Error(s.T(), err, "Unable to start OSD service in node") } -func (s *maintenanceSuite) TestUnsetNooutOpsOkay() { +func (s *operationsSuite) TestUnSetNooutOpsOkay() { r := mocks.NewRunner(s.T()) r.On("RunCommand", "ceph", "osd", "unset", "noout").Return("ok", nil).Once() // patch processExec processExec = r - ops := unsetNooutOps{} + ops := UnsetNooutOps{} err := ops.Run("microceph-0") assert.NoError(s.T(), err) } -func (s *maintenanceSuite) TestUnsetNooutOpsFail() { +func (s *operationsSuite) TestUnSetNooutOpsFail() { r := mocks.NewRunner(s.T()) r.On("RunCommand", "ceph", "osd", "unset", "noout").Return("fail", fmt.Errorf("some reasons")).Once() // patch processExec processExec = r - ops := unsetNooutOps{} + ops := UnsetNooutOps{} err := ops.Run("microceph-0") assert.Error(s.T(), err) } diff --git a/microceph/ceph/osd.go b/microceph/ceph/osd.go index c3922711..1585a7a6 100644 --- a/microceph/ceph/osd.go +++ b/microceph/ceph/osd.go @@ -954,7 +954,7 @@ func outDownOSD(osd int64) error { return nil } -func osdNooutFlag(set bool) error { +func setOsdNooutFlag(set bool) error { var command string switch set { diff --git a/microceph/ceph/osd_test.go b/microceph/ceph/osd_test.go index a8e4d7f5..c0bd78ea 100644 --- a/microceph/ceph/osd_test.go +++ b/microceph/ceph/osd_test.go @@ -258,8 +258,8 @@ func (s *osdSuite) TestSetOsdStateFail() { assert.Error(s.T(), err) } -// TestOsdNooutFlagOkay tests the osdNooutFlag function when no error occurs -func (s *osdSuite) TestOsdNooutFlagOkay() { +// TestSetOsdNooutFlagOkay tests the setOsdNooutFlag function when no error occurs +func (s *osdSuite) TestSetOsdNooutFlagOkay() { r := mocks.NewRunner(s.T()) addOsdtNooutFlagTrueExpectations(r) addOsdtNooutFlagFalseExpectations(r) @@ -267,22 +267,22 @@ func (s *osdSuite) TestOsdNooutFlagOkay() { // patch processExec processExec = r - err := osdNooutFlag(true) + err := setOsdNooutFlag(true) assert.NoError(s.T(), err) - err = osdNooutFlag(false) + err = setOsdNooutFlag(false) assert.NoError(s.T(), err) } -// TestOsdNooutFlagFail tests the osdNooutFlag function when error occurs -func (s *osdSuite) TestOsdNooutFlagFail() { +// TestSetOsdNooutFlagFail tests the setOsdNooutFlag function when error occurs +func (s *osdSuite) TestSetOsdNooutFlagFail() { r := mocks.NewRunner(s.T()) addOsdtNooutFlagFailedExpectations(r) // patch processExec processExec = r - err := osdNooutFlag(true) + err := setOsdNooutFlag(true) assert.Error(s.T(), err) } diff --git a/microceph/cmd/microceph/cluster_maintenance_enter.go b/microceph/cmd/microceph/cluster_maintenance_enter.go index 1e9e3e5a..27014be8 100644 --- a/microceph/cmd/microceph/cluster_maintenance_enter.go +++ b/microceph/cmd/microceph/cluster_maintenance_enter.go @@ -1,6 +1,7 @@ package main import ( + "fmt" "github.com/canonical/microcluster/v2/microcluster" "github.com/spf13/cobra" @@ -41,14 +42,42 @@ func (c *cmdClusterMaintenanceEnter) Run(cmd *cobra.Command, args []string) erro return err } - clusterClient, err := m.LocalClient() + cli, err := m.LocalClient() if err != nil { return err } - err = ceph.EnterMaintenance(clusterClient, client.MClient, args[0], c.flagForce, c.flagDryRun, c.flagSetNoout, c.flagStopOsds) + name := args[0] + operations := []ceph.Operation{ + &ceph.CheckNodeInClusterOps{client.MClient, cli}, + } + + // pre-flight checks + if !c.flagForce { + operations = append(operations, []ceph.Operation{ + &ceph.CheckOsdOkToStopOps{client.MClient, cli}, + &ceph.CheckNonOsdSvcEnoughOps{client.MClient, cli, 3, 1, 1}, + }...) + } + + // optionally set noout + if c.flagSetNoout { + operations = append(operations, []ceph.Operation{ + &ceph.SetNooutOps{}, + &ceph.AssertNooutFlagSetOps{}, + }...) + } + + // optionally stop osd service + if c.flagStopOsds { + operations = append(operations, []ceph.Operation{ + &ceph.StopOsdOps{client.MClient, cli}, + }...) + } + + err = ceph.RunOperations(name, operations, c.flagDryRun) if err != nil { - return err + return fmt.Errorf("Failed to enter maintenance mode: %v", err) } return nil diff --git a/microceph/cmd/microceph/cluster_maintenance_exit.go b/microceph/cmd/microceph/cluster_maintenance_exit.go index db13aef5..d8e69b5a 100644 --- a/microceph/cmd/microceph/cluster_maintenance_exit.go +++ b/microceph/cmd/microceph/cluster_maintenance_exit.go @@ -1,6 +1,7 @@ package main import ( + "fmt" "github.com/canonical/microcluster/v2/microcluster" "github.com/spf13/cobra" @@ -36,14 +37,26 @@ func (c *cmdClusterMaintenanceExit) Run(cmd *cobra.Command, args []string) error return err } - clusterClient, err := m.LocalClient() + cli, err := m.LocalClient() if err != nil { return err } - err = ceph.ExitMaintenance(clusterClient, client.MClient, args[0], c.flagDryRun) + name := args[0] + operations := []ceph.Operation{ + &ceph.CheckNodeInClusterOps{client.MClient, cli}, + } + + // idempotently unset noout and start osd service + operations = append(operations, []ceph.Operation{ + &ceph.UnsetNooutOps{}, + &ceph.AssertNooutFlagUnsetOps{}, + &ceph.StartOsdOps{client.MClient, cli}, + }...) + + err = ceph.RunOperations(name, operations, c.flagDryRun) if err != nil { - return err + return fmt.Errorf("Failed to exit maintenance mode: %v", err) } return nil