From e36a3689b889452437764e3bc49043eac7dbc457 Mon Sep 17 00:00:00 2001 From: Utkarsh Bhatt Date: Mon, 7 Oct 2024 18:55:03 +0530 Subject: [PATCH] Added support to perform cluster promotion/demotion Signed-off-by: Utkarsh Bhatt --- .github/workflows/tests.yml | 3 + docs/how-to/index.rst | 1 + docs/how-to/perform-site-failover.rst | 81 ++++++++++++ .../commands/remote-replication-rbd.rst | 29 +++++ microceph/api/ops_replication.go | 9 ++ microceph/api/types/replication.go | 10 +- microceph/ceph/rbd_mirror.go | 102 ++++++++++++++- microceph/ceph/rbd_mirror_test.go | 40 ++++++ microceph/ceph/replication.go | 25 +++- microceph/ceph/replication_rbd.go | 121 ++++++++++++++++-- .../rbd_mirror_promote_secondary_failure.txt | 10 ++ microceph/client/remote_replication.go | 6 +- .../cmd/microceph/remote_replication_rbd.go | 8 ++ .../remote_replication_rbd_demote.go | 69 ++++++++++ .../remote_replication_rbd_promote.go | 69 ++++++++++ microceph/constants/constants.go | 6 + tests/scripts/actionutils.sh | 71 +++++++++- 17 files changed, 631 insertions(+), 29 deletions(-) create mode 100644 docs/how-to/perform-site-failover.rst create mode 100644 microceph/ceph/test_assets/rbd_mirror_promote_secondary_failure.txt create mode 100644 microceph/cmd/microceph/remote_replication_rbd_demote.go create mode 100644 microceph/cmd/microceph/remote_replication_rbd_promote.go diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 80beea31..8a90eedc 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -718,6 +718,9 @@ jobs: - name: Verify RBD mirror run : ~/actionutils.sh remote_verify_rbd_mirroring + - name: Failover site A to Site B + run : ~/actionutils.sh remote_failover_to_siteb + - name: Disable RBD mirror run : ~/actionutils.sh remote_disable_rbd_mirroring diff --git a/docs/how-to/index.rst b/docs/how-to/index.rst index ae7c5cee..bdbb10b0 100644 --- a/docs/how-to/index.rst +++ b/docs/how-to/index.rst @@ -55,6 +55,7 @@ RBD pools and images. import-remote-cluster configure-rbd-mirroring + perform-site-failover Upgrading your cluster ---------------------- diff --git a/docs/how-to/perform-site-failover.rst b/docs/how-to/perform-site-failover.rst new file mode 100644 index 00000000..d34a0097 --- /dev/null +++ b/docs/how-to/perform-site-failover.rst @@ -0,0 +1,81 @@ +============================================= +Perform failover for replicated RBD resources +============================================= + +In case of a disaster, all replicated RBD pools can be failed over to a non-primary remote. + +An operator can perform promotion on a non-primary cluster, this will in turn promote all replicated rbd +images in all rbd pools and make them primary. This enables them to be consumed by vms and other workloads. + +Prerequisites +-------------- +1. A primary and a secondary MicroCeph cluster, for example named "primary_cluster" and "secondary_cluster" +2. primary_cluster has imported configurations from secondary_cluster and vice versa. refer to :doc:`import remote <./import-remote-cluster>` +3. RBD remote replication is configured for atleast 1 rbd image. refer to :doc:`configure rbd replication <./configure-rbd-mirroring>` + +Failover to a non-primary remote cluster +----------------------------------------- +List all the resources on 'secondary_cluster' to check primary status. + +.. code-block:: none + + sudo microceph remote replication rbd list + +-----------+------------+------------+---------------------+ + | POOL NAME | IMAGE NAME | IS PRIMARY | LAST LOCAL UPDATE | + +-----------+------------+------------+---------------------+ + | pool_one | image_one | false | 2024-10-14 09:03:17 | + | pool_one | image_two | false | 2024-10-14 09:03:17 | + +-----------+------------+------------+---------------------+ + +An operator can perform cluster wide promotion as follows: + +.. code-block:: none + + sudo microceph remote replication rbd promote --remote primary_cluster --yes-i-really-mean-it + +Here, paramter helps microceph filter the resources to promote. +Since promotion of secondary_cluster may cause a split-brain condition in future, +it is necessary to pass --yes-i-really-mean-it flag. + +Verify RBD remote replication primary status +--------------------------------------------- + +List all the resources on 'secondary_cluster' again to check primary status. + +.. code-block:: none + + sudo microceph remote replication rbd status pool_one + +-----------+------------+------------+---------------------+ + | POOL NAME | IMAGE NAME | IS PRIMARY | LAST LOCAL UPDATE | + +-----------+------------+------------+---------------------+ + | pool_one | image_one | true | 2024-10-14 09:06:12 | + | pool_one | image_two | true | 2024-10-14 09:06:12 | + +-----------+------------+------------+---------------------+ + +The status shows that there are 2 replicated images and both of them are now primary. + +Failback to old primary +------------------------ + +Once the disaster struck cluster (primary_cluster) is back online the RBD resources +can be failed back to it, but, by this time the RBD images at the current primary (secondary_cluster) +would have diverged from primary_cluster. Thus, to have a clean sync, the operator must decide +which cluster would be demoted to the non-primary status. This cluster will then receive the +RBD mirror updates from the standing primary. + +Note: Demotion can cause data loss and hence can only be performed with the 'yes-i-really-mean-it' flag. + +At primary_cluster (was primary before disaster), perform demotion. +.. code-block:: none + + sudo microceph remote replication rbd demote --remote secondary_cluster + failed to process demote_replication request for rbd: demotion may cause data loss on this cluster. If you + understand the *RISK* and you're *ABSOLUTELY CERTAIN* that is what you want, pass --yes-i-really-mean-it. + +Now, again at the 'primary_cluster', perform demotion with --yes-i-really-mean-it flag. +.. code-block:: none + + sudo microceph remote replication rbd demote --remote secondary_cluster --yes-i-really-mean-it + +Note: MicroCeph with demote the primary pools and will issue a resync for all the mirroring images, hence it may +cause data loss at the old primary cluster. diff --git a/docs/reference/commands/remote-replication-rbd.rst b/docs/reference/commands/remote-replication-rbd.rst index 7d3db611..2915d913 100644 --- a/docs/reference/commands/remote-replication-rbd.rst +++ b/docs/reference/commands/remote-replication-rbd.rst @@ -96,3 +96,32 @@ Usage: --force forcefully disable replication for rbd resource +``promote`` +------------ + +Promote local cluster to primary + +.. code-block:: none + + microceph remote replication rbd promote [flags] + +.. code-block:: none + + --remote remote MicroCeph cluster name + --force forcefully promote site to primary + +``demote`` +------------ + +Demote local cluster to secondary + +Usage: + +.. code-block:: none + + microceph remote replication rbd demote [flags] + +.. code-block:: none + + --remote remote MicroCeph cluster name + diff --git a/microceph/api/ops_replication.go b/microceph/api/ops_replication.go index 406ff148..21edfa29 100644 --- a/microceph/api/ops_replication.go +++ b/microceph/api/ops_replication.go @@ -31,6 +31,7 @@ var opsReplicationCmd = rest.Endpoint{ var opsReplicationWorkloadCmd = rest.Endpoint{ Path: "ops/replication/{wl}", Get: rest.EndpointAction{Handler: getOpsReplicationWorkload, ProxyTarget: false}, + Put: rest.EndpointAction{Handler: putOpsReplicationWorkload, ProxyTarget: false}, } // CRUD Replication @@ -47,6 +48,12 @@ func getOpsReplicationWorkload(s state.State, r *http.Request) response.Response return cmdOpsReplication(s, r, types.ListReplicationRequest) } +// putOpsReplicationWorkload handles site level (promote/demoteR) operation +func putOpsReplicationWorkload(s state.State, r *http.Request) response.Response { + // either promote or demote (already encoded in request) + return cmdOpsReplication(s, r, "") +} + // getOpsReplicationResource handles status operation for a certain resource. func getOpsReplicationResource(s state.State, r *http.Request) response.Response { return cmdOpsReplication(s, r, types.StatusReplicationRequest) @@ -105,6 +112,8 @@ func cmdOpsReplication(s state.State, r *http.Request, patchRequest types.Replic return response.SmartError(fmt.Errorf("unknown workload %s, resource %s", wl, resource)) } + logger.Debugf("REPOPS: %s received for %s: %s", req.GetWorkloadRequestType(), wl, resource) + return handleReplicationRequest(s, r.Context(), req) } diff --git a/microceph/api/types/replication.go b/microceph/api/types/replication.go index fb512c17..7fea4682 100644 --- a/microceph/api/types/replication.go +++ b/microceph/api/types/replication.go @@ -12,9 +12,13 @@ type ReplicationRequestType string const ( EnableReplicationRequest ReplicationRequestType = "POST-" + constants.EventEnableReplication ConfigureReplicationRequest ReplicationRequestType = "PUT-" + constants.EventConfigureReplication - DisableReplicationRequest ReplicationRequestType = "DELETE-" + constants.EventDisableReplication - StatusReplicationRequest ReplicationRequestType = "GET-" + constants.EventStatusReplication - ListReplicationRequest ReplicationRequestType = "GET-" + constants.EventListReplication + PromoteReplicationRequest ReplicationRequestType = "PUT-" + constants.EventPromoteReplication + DemoteReplicationRequest ReplicationRequestType = "PUT-" + constants.EventDemoteReplication + // Delete Requests + DisableReplicationRequest ReplicationRequestType = "DELETE-" + constants.EventDisableReplication + // Get Requests + StatusReplicationRequest ReplicationRequestType = "GET-" + constants.EventStatusReplication + ListReplicationRequest ReplicationRequestType = "GET-" + constants.EventListReplication ) type CephWorkloadType string diff --git a/microceph/ceph/rbd_mirror.go b/microceph/ceph/rbd_mirror.go index 6fc2b85f..6b37ebbd 100644 --- a/microceph/ceph/rbd_mirror.go +++ b/microceph/ceph/rbd_mirror.go @@ -214,8 +214,8 @@ func DisablePoolMirroring(pool string, peer RbdReplicationPeer, localName string return nil } -// DisableMirroringAllImagesInPool disables mirroring for all images for a pool enabled in pool mirroring mode. -func DisableMirroringAllImagesInPool(poolName string) error { +// DisableAllMirroringImagesInPool disables mirroring for all images for a pool enabled in pool mirroring mode. +func DisableAllMirroringImagesInPool(poolName string) error { poolStatus, err := GetRbdMirrorVerbosePoolStatus(poolName, "", "") if err != nil { err := fmt.Errorf("failed to fetch status for %s pool: %v", poolName, err) @@ -236,6 +236,28 @@ func DisableMirroringAllImagesInPool(poolName string) error { return nil } +// ResyncAllMirroringImagesInPool triggers a resync for all mirroring images inside a mirroring pool. +func ResyncAllMirroringImagesInPool(poolName string) error { + poolStatus, err := GetRbdMirrorVerbosePoolStatus(poolName, "", "") + if err != nil { + err := fmt.Errorf("failed to fetch status for %s pool: %v", poolName, err) + logger.Error(err.Error()) + return err + } + + flaggedImages := []string{} + for _, image := range poolStatus.Images { + err := flagImageForResync(poolName, image.Name) + if err != nil { + return fmt.Errorf("failed to resync %s/%s", poolName, image.Name) + } + flaggedImages = append(flaggedImages, image.Name) + } + + logger.Debugf("REPRBD: Resynced %v images in %s pool.", flaggedImages, poolName) + return nil +} + // getPeerUUID returns the peer ID for the requested peer name. func getPeerUUID(pool string, peerName string, client string, cluster string) (string, error) { poolInfo, err := GetRbdMirrorPoolInfo(pool, cluster, client) @@ -304,6 +326,7 @@ func BootstrapPeer(pool string, localName string, remoteName string) error { } // ############################# Ceph Commands ############################# +// configurePoolMirroring enables/disables mirroring for a pool. func configurePoolMirroring(pool string, mode types.RbdResourceType, localName string, remoteName string) error { var args []string if mode == types.RbdResourceDisabled { @@ -361,6 +384,7 @@ func configureImageMirroring(req types.RbdReplicationRequest) error { return nil } +// getSnapshotSchedule fetches the schedule of the snapshots. func getSnapshotSchedule(pool string, image string) (imageSnapshotSchedule, error) { if len(pool) == 0 || len(image) == 0 { return imageSnapshotSchedule{}, fmt.Errorf("ImageName(%s/%s) not complete", pool, image) @@ -484,6 +508,42 @@ func configureImageFeatures(pool string, image string, op string, feature string return nil } +// enableImageFeatures enables the list of rbd features on the requested resource. +func enableRbdImageFeatures(poolName string, imageName string, features []string) error { + for _, feature := range features { + err := configureImageFeatures(poolName, imageName, "enable", feature) + if err != nil && !strings.Contains(err.Error(), "one or more requested features are already enabled") { + return err + } + } + return nil +} + +// disableRbdImageFeatures disables the list of rbd features on the requested resource. +func disableRbdImageFeatures(poolName string, imageName string, features []string) error { + for _, feature := range features { + err := configureImageFeatures(poolName, imageName, "disable", feature) + if err != nil { + return err + } + } + return nil +} + +// flagImageForResync flags requested mirroring image in the given pool for resync. +func flagImageForResync(poolName string, imageName string) error { + args := []string{ + "mirror", "image", "resync", fmt.Sprintf("%s/%s", poolName, imageName), + } + + _, err := processExec.RunCommand("rbd", args...) + if err != nil { + return err + } + + return nil +} + // peerBootstrapCreate generates peer bootstrap token on remote ceph cluster. func peerBootstrapCreate(pool string, client string, cluster string) (string, error) { args := []string{ @@ -548,6 +608,44 @@ func peerRemove(pool string, peerId string, localName string, remoteName string) return nil } +func promotePool(poolName string, isForce bool, remoteName string, localName string) error { + args := []string{ + "mirror", "pool", "promote", poolName, + } + + if isForce { + args = append(args, "--force") + } + + // add --cluster and --id args + args = appendRemoteClusterArgs(args, remoteName, localName) + + output, err := processExec.RunCommand("rbd", args...) + if err != nil { + return fmt.Errorf("failed to promote pool(%s): %v", poolName, err) + } + + logger.Debugf("REPRBD: Promotion Output: %s", output) + return nil +} + +func demotePool(poolName string, remoteName string, localName string) error { + args := []string{ + "mirror", "pool", "demote", poolName, + } + + // add --cluster and --id args + args = appendRemoteClusterArgs(args, remoteName, localName) + + output, err := processExec.RunCommand("rbd", args...) + if err != nil { + return fmt.Errorf("failed to promote pool(%s): %v", poolName, err) + } + + logger.Debugf("REPRBD: Demotion Output: %s", output) + return nil +} + // ########################### HELPERS ########################### func IsRemoteConfiguredForRbdMirror(remoteName string) bool { diff --git a/microceph/ceph/rbd_mirror_test.go b/microceph/ceph/rbd_mirror_test.go index b609bf9c..27d02813 100644 --- a/microceph/ceph/rbd_mirror_test.go +++ b/microceph/ceph/rbd_mirror_test.go @@ -1,6 +1,7 @@ package ceph import ( + "fmt" "os" "testing" @@ -93,3 +94,42 @@ func (ks *RbdMirrorSuite) TestPoolInfo() { assert.Equal(ks.T(), resp.LocalSiteName, "magical") assert.Equal(ks.T(), resp.Peers[0].RemoteName, "simple") } +func (ks *RbdMirrorSuite) TestPromotePoolOnSecondary() { + r := mocks.NewRunner(ks.T()) + output, _ := os.ReadFile("./test_assets/rbd_mirror_promote_secondary_failure.txt") + + // mocks and expectations + r.On("RunCommand", []interface{}{ + "rbd", "mirror", "pool", "promote", "pool"}...).Return("", fmt.Errorf("%s", string(output))).Once() + r.On("RunCommand", []interface{}{ + "rbd", "mirror", "pool", "promote", "pool", "--force"}...).Return("ok", nil).Once() + processExec = r + + // Test stardard promotion. + err := handlePoolPromotion("pool", false) + assert.ErrorContains(ks.T(), err, "If you understand the *RISK* and you're *ABSOLUTELY CERTAIN*") + + err = handlePoolPromotion("pool", true) + assert.NoError(ks.T(), err) +} + +func (ks *RbdMirrorSuite) TestDemotePoolOnSecondary() { + r := mocks.NewRunner(ks.T()) + + output, _ := os.ReadFile("./test_assets/rbd_mirror_verbose_pool_status.json") + + // mocks and expectations + r.On("RunCommand", []interface{}{ + "rbd", "mirror", "pool", "demote", "pool"}...).Return("ok", nil).Once() + r.On("RunCommand", []interface{}{ + "rbd", "mirror", "pool", "status", "pool", "--verbose", "--format", "json"}...).Return(string(output), nil).Once() + r.On("RunCommand", []interface{}{ + "rbd", "mirror", "image", "resync", "pool/image_one"}...).Return("ok", nil).Once() + r.On("RunCommand", []interface{}{ + "rbd", "mirror", "image", "resync", "pool/image_two"}...).Return("ok", nil).Once() + processExec = r + + // Test stardard promotion. + err := handlePoolDemotion("pool") + assert.NoError(ks.T(), err) +} diff --git a/microceph/ceph/replication.go b/microceph/ceph/replication.go index 829e7eff..2db7ead4 100644 --- a/microceph/ceph/replication.go +++ b/microceph/ceph/replication.go @@ -32,8 +32,11 @@ type ReplicationHandlerInterface interface { EnableHandler(ctx context.Context, args ...any) error DisableHandler(ctx context.Context, args ...any) error ConfigureHandler(ctx context.Context, args ...any) error - ListHandler(ctx context.Context, args ...any) error StatusHandler(ctx context.Context, args ...any) error + // Cluster wide Operations (don't require any pool/image info.) + ListHandler(ctx context.Context, args ...any) error + PromoteHandler(ctx context.Context, args ...any) error + DemoteHandler(ctx context.Context, args ...any) error } func GetReplicationHandler(name string) ReplicationHandlerInterface { @@ -57,6 +60,8 @@ func getAllEvents() []stateless.Trigger { constants.EventConfigureReplication, constants.EventListReplication, constants.EventStatusReplication, + constants.EventPromoteReplication, + constants.EventDemoteReplication, } } @@ -67,7 +72,9 @@ func GetReplicationStateMachine(initialState ReplicationState) *stateless.StateM Permit(constants.EventEnableReplication, StateEnabledReplication). OnEntryFrom(constants.EventDisableReplication, disableHandler). InternalTransition(constants.EventListReplication, listHandler). - InternalTransition(constants.EventDisableReplication, disableHandler) + InternalTransition(constants.EventDisableReplication, disableHandler). + InternalTransition(constants.EventPromoteReplication, promoteHandler). + InternalTransition(constants.EventDemoteReplication, demoteHandler) // Configure transitions for enabled state. newFsm.Configure(StateEnabledReplication). @@ -75,7 +82,9 @@ func GetReplicationStateMachine(initialState ReplicationState) *stateless.StateM OnEntryFrom(constants.EventEnableReplication, enableHandler). InternalTransition(constants.EventConfigureReplication, configureHandler). InternalTransition(constants.EventListReplication, listHandler). - InternalTransition(constants.EventStatusReplication, statusHandler) + InternalTransition(constants.EventStatusReplication, statusHandler). + InternalTransition(constants.EventPromoteReplication, promoteHandler). + InternalTransition(constants.EventDemoteReplication, demoteHandler) // Check Event params type. var outputType *string @@ -123,3 +132,13 @@ func statusHandler(ctx context.Context, args ...any) error { rh := args[repArgHandler].(ReplicationHandlerInterface) return rh.StatusHandler(ctx, args...) } +func promoteHandler(ctx context.Context, args ...any) error { + rh := args[repArgHandler].(ReplicationHandlerInterface) + logger.Infof("REPFSM: Entered Status Handler") + return rh.PromoteHandler(ctx, args...) +} +func demoteHandler(ctx context.Context, args ...any) error { + rh := args[repArgHandler].(ReplicationHandlerInterface) + logger.Infof("REPFSM: Entered Status Handler") + return rh.DemoteHandler(ctx, args...) +} diff --git a/microceph/ceph/replication_rbd.go b/microceph/ceph/replication_rbd.go index 70fba9b1..f2cc3c10 100644 --- a/microceph/ceph/replication_rbd.go +++ b/microceph/ceph/replication_rbd.go @@ -248,7 +248,6 @@ func (rh *RbdReplicationHandler) StatusHandler(ctx context.Context, args ...any) } // Also add image info - resp = types.RbdPoolStatus{ Name: rh.Request.SourcePool, Type: string(rh.PoolInfo.Mode), @@ -302,6 +301,19 @@ func (rh *RbdReplicationHandler) StatusHandler(ctx context.Context, args ...any) return nil } +// PromoteHandler promotes sequentially promote all secondary cluster pools to primary. +func (rh *RbdReplicationHandler) PromoteHandler(ctx context.Context, args ...any) error { + return handleSiteOp(rh) +} + +func (rh *RbdReplicationHandler) DemoteHandler(ctx context.Context, args ...any) error { + if !rh.Request.IsForceOp { + return fmt.Errorf("demotion may cause data loss on this cluster. %s", constants.CliForcePrompt) + } + + return handleSiteOp(rh) +} + // ################### Helper Functions ################### // Enable handler for pool resource. func handlePoolEnablement(rh *RbdReplicationHandler, localSite string, remoteSite string) error { @@ -380,7 +392,7 @@ func handlePoolDisablement(rh *RbdReplicationHandler, localSite string, remoteSi // If pool in pool mirroring mode, disable all images. if rh.PoolInfo.Mode == types.RbdResourcePool { - err := DisableMirroringAllImagesInPool(rh.Request.SourcePool) + err := DisableAllMirroringImagesInPool(rh.Request.SourcePool) if err != nil { return err } @@ -410,24 +422,107 @@ func handleImageDisablement(rh *RbdReplicationHandler) error { return configureImageMirroring(rh.Request) } -// enableImageFeatures enables the list of rbd features on the requested resource. -func enableRbdImageFeatures(poolName string, imageName string, features []string) error { - for _, feature := range features { - err := configureImageFeatures(poolName, imageName, "enable", feature) - if err != nil && !strings.Contains(err.Error(), "one or more requested features are already enabled") { - return err +func isPeerRegisteredForMirroring(peers []RbdReplicationPeer, peerName string) bool { + for _, peer := range peers { + if peer.RemoteName == peerName { + return true } } + return false +} + +// getMirrorPoolMetadata fetches pool status and info if mirroring is enabled on pool. +func getMirrorPoolMetadata(poolName string) (RbdReplicationPoolStatus, RbdReplicationPoolInfo, error) { + poolStatus, err := GetRbdMirrorPoolStatus(poolName, "", "") + if err != nil { + logger.Warnf("REPRBD: failed to fetch status for %s pool: %v", poolName, err) + return RbdReplicationPoolStatus{}, RbdReplicationPoolInfo{}, err + } + + poolInfo, err := GetRbdMirrorPoolInfo(poolName, "", "") + if err != nil { + logger.Warnf("REPRBD: failed to fetch status for %s pool: %v", poolName, err) + return RbdReplicationPoolStatus{}, RbdReplicationPoolInfo{}, err + } + + return poolStatus, poolInfo, nil +} + +// Promote local pool to primary. +func handlePoolPromotion(poolName string, isforce bool) error { + err := promotePool(poolName, isforce, "", "") + if err != nil { + logger.Errorf("failed to promote pool (%s): %v", poolName, err) + + if strings.Contains(err.Error(), constants.RbdMirrorNonPrimaryPromoteErr) { + return fmt.Errorf(constants.CliForcePrompt) + } + + return err + } + return nil +} + +// Demote local pool to secondary. +func handlePoolDemotion(poolName string) error { + err := demotePool(poolName, "", "") + if err != nil { + logger.Errorf("failed to demote pool (%s): %v", poolName, err) + return err + } + + err = ResyncAllMirroringImagesInPool(poolName) + if err != nil { + logger.Warnf("failed to trigger resync for pool %s: %v", poolName, err) + return err + } return nil } -// disableRbdImageFeatures disables the list of rbd features on the requested resource. -func disableRbdImageFeatures(poolName string, imageName string, features []string) error { - for _, feature := range features { - err := configureImageFeatures(poolName, imageName, "disable", feature) +func handleSiteOp(rh *RbdReplicationHandler) error { + // fetch all rbd pools. + pools := ListPools("rbd") + + logger.Debugf("REPRBD: Scan active pools %v", pools) + + // perform requested op per pool + for _, pool := range pools { + poolStatus, poolInfo, err := getMirrorPoolMetadata(pool.Name) if err != nil { - return err + ne := fmt.Errorf("failed to fetch pool (%s) metadata: %v", pool.Name, err) + logger.Errorf(ne.Error()) + return ne + } + + if poolStatus.State != StateEnabledReplication { + // mirroring not enabled on rbd pool. + logger.Infof("REPRBD: pool(%s) is not an rbd mirror pool.", pool.Name) + continue + } + + if !isPeerRegisteredForMirroring(poolInfo.Peers, rh.Request.RemoteName) { + logger.Infof("REPRBD: pool(%s) has no peer(%s), skipping", pool.Name, rh.Request.RemoteName) + continue + } + + if rh.Request.RequestType == types.PromoteReplicationRequest { + err := handlePoolPromotion(pool.Name, rh.Request.IsForceOp) + if err != nil { + return err + } + // continue to next pool + continue + } + + if rh.Request.RequestType == types.DemoteReplicationRequest { + err := handlePoolDemotion(pool.Name) + if err != nil { + return nil + } + // continue to next pool + continue } } + return nil } diff --git a/microceph/ceph/test_assets/rbd_mirror_promote_secondary_failure.txt b/microceph/ceph/test_assets/rbd_mirror_promote_secondary_failure.txt new file mode 100644 index 00000000..a8d4ea0b --- /dev/null +++ b/microceph/ceph/test_assets/rbd_mirror_promote_secondary_failure.txt @@ -0,0 +1,10 @@ +rbd: failed to 2024-10-09T11:00:10.804+0000 7f65a4bce6c0 -1 librbd::mirror::PromoteRequest: 0x7f6588018e20 handle_get_info: image is primary within a remote cluster or demotion is not propagated yet +promote image image_one: (16) Device or resource busy +2024-10-09T11:00:10.804+0000 7f65a4bce6c0 -1 librbd::io::AioCompletion: 0x7f65980061c0 fail: (16) Device or resource busy +2024-10-09T11:00:10.808+0000 7f65a4bce6c0 -1 librbd::mirror::PromoteRequest: 0x7f658c008c50 handle_get_info: image is primary within a remote cluster or demotion is not propagated yet +2024-10-09T11:00:10.808+0000 7f65a4bce6c0 -1 librbd::io::AioCompletion: 0x7f65980061c0 fail: (16) Device or resource busy +rbd: failed to promote image image_two: (16) Device or resource busy +2024-10-09T11:00:10.812+0000 7f65a53cf6c0 -1 librbd::mirror::PromoteRequest: 0x7f6588018e20 handle_get_info: image is primary within a remote cluster or demotion is not propagated yet +2024-10-09T11:00:10.812+0000 7f65a53cf6c0 -1 librbd::io::AioCompletion: 0x7f658c0069e0 fail: (16) Device or resource busy +rbd: failed to promote image image_three: (16) Device or resource busy +Promoted 0 mirrored images \ No newline at end of file diff --git a/microceph/client/remote_replication.go b/microceph/client/remote_replication.go index cd15f807..80929849 100644 --- a/microceph/client/remote_replication.go +++ b/microceph/client/remote_replication.go @@ -7,7 +7,6 @@ import ( "github.com/canonical/lxd/shared/api" "github.com/canonical/microceph/microceph/api/types" - "github.com/canonical/microceph/microceph/constants" microCli "github.com/canonical/microcluster/v2/client" ) @@ -18,8 +17,9 @@ func SendRemoteReplicationRequest(ctx context.Context, c *microCli.Client, data queryCtx, cancel := context.WithTimeout(ctx, time.Second*120) defer cancel() - if data.GetWorkloadRequestType() == constants.EventListReplication { - // list request uses replication/$workload endpoint + // If no API object provided, create API request to the root endpoint. + if len(data.GetAPIObjectId()) == 0 { + // uses replication/$workload endpoint err = c.Query( queryCtx, data.GetAPIRequestType(), types.ExtendedPathPrefix, api.NewURL().Path("ops", "replication", string(data.GetWorkloadType())), diff --git a/microceph/cmd/microceph/remote_replication_rbd.go b/microceph/cmd/microceph/remote_replication_rbd.go index 4a595c0a..cadfe3c4 100644 --- a/microceph/cmd/microceph/remote_replication_rbd.go +++ b/microceph/cmd/microceph/remote_replication_rbd.go @@ -34,5 +34,13 @@ func (c *cmdRemoteReplicationRbd) Command() *cobra.Command { remoteReplicationRbdConfigureCmd := cmdRemoteReplicationConfigureRbd{common: c.common} cmd.AddCommand(remoteReplicationRbdConfigureCmd.Command()) + // Replication promote command + remoteReplicationRbdPromoteCmd := cmdRemoteReplicationPromoteRbd{common: c.common} + cmd.AddCommand(remoteReplicationRbdPromoteCmd.Command()) + + // Replication demote command + remoteReplicationRbdDemoteCmd := cmdRemoteReplicationDemoteRbd{common: c.common} + cmd.AddCommand(remoteReplicationRbdDemoteCmd.Command()) + return cmd } diff --git a/microceph/cmd/microceph/remote_replication_rbd_demote.go b/microceph/cmd/microceph/remote_replication_rbd_demote.go new file mode 100644 index 00000000..8be123b5 --- /dev/null +++ b/microceph/cmd/microceph/remote_replication_rbd_demote.go @@ -0,0 +1,69 @@ +package main + +import ( + "context" + + "github.com/canonical/microceph/microceph/api/types" + "github.com/canonical/microceph/microceph/client" + "github.com/canonical/microcluster/v2/microcluster" + "github.com/spf13/cobra" +) + +type cmdRemoteReplicationDemoteRbd struct { + common *CmdControl + remoteName string + isForce bool +} + +func (c *cmdRemoteReplicationDemoteRbd) Command() *cobra.Command { + cmd := &cobra.Command{ + Use: "demote", + Short: "Demote a primary cluster to non-primary status", + RunE: c.Run, + } + + cmd.Flags().StringVar(&c.remoteName, "remote", "", "remote MicroCeph cluster name") + cmd.Flags().BoolVar(&c.isForce, "yes-i-really-mean-it", false, "demote cluster irrespective of data loss") + cmd.MarkFlagRequired("remote") + return cmd +} + +func (c *cmdRemoteReplicationDemoteRbd) Run(cmd *cobra.Command, args []string) error { + if len(args) != 0 { + return cmd.Help() + } + + m, err := microcluster.App(microcluster.Args{StateDir: c.common.FlagStateDir}) + if err != nil { + return err + } + + cli, err := m.LocalClient() + if err != nil { + return err + } + + payload, err := c.prepareRbdPayload(types.DemoteReplicationRequest) + if err != nil { + return err + } + + _, err = client.SendRemoteReplicationRequest(context.Background(), cli, payload) + if err != nil { + return err + } + + return nil +} + +func (c *cmdRemoteReplicationDemoteRbd) prepareRbdPayload(requestType types.ReplicationRequestType) (types.RbdReplicationRequest, error) { + retReq := types.RbdReplicationRequest{ + RemoteName: c.remoteName, + RequestType: requestType, + ResourceType: types.RbdResourcePool, + SourcePool: "", + IsForceOp: c.isForce, + } + + return retReq, nil +} diff --git a/microceph/cmd/microceph/remote_replication_rbd_promote.go b/microceph/cmd/microceph/remote_replication_rbd_promote.go new file mode 100644 index 00000000..1603fd7e --- /dev/null +++ b/microceph/cmd/microceph/remote_replication_rbd_promote.go @@ -0,0 +1,69 @@ +package main + +import ( + "context" + + "github.com/canonical/microceph/microceph/api/types" + "github.com/canonical/microceph/microceph/client" + "github.com/canonical/microcluster/v2/microcluster" + "github.com/spf13/cobra" +) + +type cmdRemoteReplicationPromoteRbd struct { + common *CmdControl + remoteName string + isForce bool +} + +func (c *cmdRemoteReplicationPromoteRbd) Command() *cobra.Command { + cmd := &cobra.Command{ + Use: "promote", + Short: "Promote a non-primary cluster to primary status", + RunE: c.Run, + } + + cmd.Flags().StringVar(&c.remoteName, "remote", "", "remote MicroCeph cluster name") + cmd.Flags().BoolVar(&c.isForce, "yes-i-really-mean-it", false, "forcefully promote site to primary") + cmd.MarkFlagRequired("remote") + return cmd +} + +func (c *cmdRemoteReplicationPromoteRbd) Run(cmd *cobra.Command, args []string) error { + if len(args) != 0 { + return cmd.Help() + } + + m, err := microcluster.App(microcluster.Args{StateDir: c.common.FlagStateDir}) + if err != nil { + return err + } + + cli, err := m.LocalClient() + if err != nil { + return err + } + + payload, err := c.prepareRbdPayload(types.PromoteReplicationRequest) + if err != nil { + return err + } + + _, err = client.SendRemoteReplicationRequest(context.Background(), cli, payload) + if err != nil { + return err + } + + return nil +} + +func (c *cmdRemoteReplicationPromoteRbd) prepareRbdPayload(requestType types.ReplicationRequestType) (types.RbdReplicationRequest, error) { + retReq := types.RbdReplicationRequest{ + RemoteName: c.remoteName, + RequestType: requestType, + IsForceOp: c.isForce, + ResourceType: types.RbdResourcePool, + SourcePool: "", + } + + return retReq, nil +} diff --git a/microceph/constants/constants.go b/microceph/constants/constants.go index 4fe2b4a8..662c8819 100644 --- a/microceph/constants/constants.go +++ b/microceph/constants/constants.go @@ -29,6 +29,9 @@ const CephConfFileName = "ceph.conf" const AdminKeyringFieldName = "keyring.client.admin" const AdminKeyringTemplate = "keyring.client.%s" +// Ceph Error Substrings +const RbdMirrorNonPrimaryPromoteErr = "image is primary within a remote cluster or demotion is not propagated yet" + type PathConst struct { ConfPath string RunPath string @@ -79,3 +82,6 @@ const EventConfigureReplication = "configure_replication" // Rbd features var RbdJournalingEnableFeatureSet = [...]string{"exclusive-lock", "journaling"} + +const EventPromoteReplication = "promote_replication" +const EventDemoteReplication = "demote_replication" diff --git a/tests/scripts/actionutils.sh b/tests/scripts/actionutils.sh index e495c1a2..a97fc324 100755 --- a/tests/scripts/actionutils.sh +++ b/tests/scripts/actionutils.sh @@ -292,17 +292,78 @@ function remote_verify_rbd_mirroring() { lxc exec node-wrk3 -- sh -c "sudo microceph remote replication rbd list" | grep "pool_two.*image_two" } +function remote_failover_to_siteb() { + set -eux + + # check images are secondary on siteb + img_count=$(lxc exec node-wrk2 -- sh -c "sudo microceph remote replication rbd list --json" | grep -c "\"is_primary\":false") + if [[ $img_count -lt 1 ]]; then + echo "Site B has $img_count secondary images" + exit -1 + fi + + # promote site b to primary + lxc exec node-wrk2 -- sh -c "sudo microceph remote replication rbd promote --remote sitea --yes-i-really-mean-it" + + # wait for the site images to show as primary + is_primary_count=0 + for index in {1..100}; do + echo "Check run #$index" + list_output=$(lxc exec node-wrk2 -- sh -c "sudo microceph remote replication rbd list --json") + echo $list_output + images=$(echo $list_output | jq .[].Images) + echo $images + is_primary_count=$(echo $images | grep -c "\"is_primary\": true" || true) + echo $is_primary_count + if [[ $is_primary_count -gt 0 ]] ; then + break + fi + + echo "#################" + sleep 30 + done + if [[ $is_primary_count -eq 0 ]] ; then + echo "No images promoted after 100 rounds." + exit 1 + fi + + # resolve the split brain situation by demoting the old primary. + lxc exec node-wrk0 -- sh -c "sudo microceph remote replication rbd demote --remote siteb --yes-i-really-mean-it" + + # wait for the site images to show as non-primary + is_primary_count=0 + for index in {1..100}; do + echo "Check run #$index" + list_output=$(lxc exec node-wrk0 -- sh -c "sudo microceph remote replication rbd list --json") + echo $list_output + images=$(echo $list_output | jq .[].Images) + echo $images + is_primary_count=$(echo $images | grep -c "\"is_primary\": false" || true) + echo $is_primary_count + if [[ $is_primary_count -gt 0 ]] ; then + break + fi + + echo "#################" + sleep 30 + done + if [[ $is_primary_count -eq 0 ]] ; then + echo "No images demoted after 100 rounds." + exit 1 + fi +} + function remote_disable_rbd_mirroring() { set -eux # check disables fail for image mirroring pools with images currently being mirrored - lxc exec node-wrk0 -- sh -c "sudo microceph remote replication rbd disable pool_two 2>&1 || true" | grep "in Image mirroring mode" + lxc exec node-wrk2 -- sh -c "sudo microceph remote replication rbd disable pool_two 2>&1 || true" | grep "in Image mirroring mode" # disable both images in pool_two and then disable pool_two - lxc exec node-wrk0 -- sh -c "sudo microceph remote replication rbd disable pool_two/image_one" - lxc exec node-wrk0 -- sh -c "sudo microceph remote replication rbd disable pool_two/image_two" - lxc exec node-wrk0 -- sh -c "sudo microceph remote replication rbd disable pool_two" + lxc exec node-wrk2 -- sh -c "sudo microceph remote replication rbd disable pool_two/image_one" + lxc exec node-wrk2 -- sh -c "sudo microceph remote replication rbd disable pool_two/image_two" + lxc exec node-wrk2 -- sh -c "sudo microceph remote replication rbd disable pool_two" # disable pool one - lxc exec node-wrk0 -- sh -c "sudo microceph remote replication rbd disable pool_one" + lxc exec node-wrk2 -- sh -c "sudo microceph remote replication rbd disable pool_one" } function remote_remove_and_verify() {