Skip to content

Commit

Permalink
Merge pull request #241 from sabaini/feature/remove-node
Browse files Browse the repository at this point in the history
Implement node removal
  • Loading branch information
sabaini authored Oct 17, 2023
2 parents 23deb40 + 79b2339 commit fa596e0
Show file tree
Hide file tree
Showing 7 changed files with 491 additions and 10 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,20 @@ jobs:
lxc exec node-head -- sh -c "microceph status" | grep -F -A 1 node-wrk1 | grep -E "^ Services: osd$"
lxc exec node-head -- sh -c "microceph status" | grep -F -A 1 node-wrk3 | grep -E "^ Services: mds, mgr, mon$"
- name: Enable services on wrk1
run: ~/actionutils.sh headexec enable_services node-wrk1

- name: Test remove node wrk3
run: |
set -uex
~/actionutils.sh headexec remove_node node-wrk3
if lxc exec node-head -- sh -c "microceph status" | grep -q "^- node-wrk3 " ; then
echo "Failed: node-wrk3 still present"
exit 1
fi
lxc exec node-head -- sh -c "microceph.ceph -s" | fgrep "mon: 3 daemons"
upgrade-quincy-tests:
name: Test quincy upgrades
runs-on: ubuntu-22.04
Expand Down
16 changes: 8 additions & 8 deletions microceph/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ import (
"time"

"github.com/canonical/lxd/shared/api"
"github.com/canonical/microcluster/client"
microCli "github.com/canonical/microcluster/client"

"github.com/canonical/microceph/microceph/api/types"
)

func SetConfig(ctx context.Context, c *client.Client, data *types.Config) error {
func SetConfig(ctx context.Context, c *microCli.Client, data *types.Config) error {
queryCtx, cancel := context.WithTimeout(ctx, time.Second*200)
defer cancel()

Expand All @@ -26,7 +26,7 @@ func SetConfig(ctx context.Context, c *client.Client, data *types.Config) error
return nil
}

func ClearConfig(ctx context.Context, c *client.Client, data *types.Config) error {
func ClearConfig(ctx context.Context, c *microCli.Client, data *types.Config) error {
queryCtx, cancel := context.WithTimeout(ctx, time.Second*200)
defer cancel()

Expand All @@ -38,7 +38,7 @@ func ClearConfig(ctx context.Context, c *client.Client, data *types.Config) erro
return nil
}

func GetConfig(ctx context.Context, c *client.Client, data *types.Config) (types.Configs, error) {
func GetConfig(ctx context.Context, c *microCli.Client, data *types.Config) (types.Configs, error) {
queryCtx, cancel := context.WithTimeout(ctx, time.Second*5)
defer cancel()

Expand All @@ -53,7 +53,7 @@ func GetConfig(ctx context.Context, c *client.Client, data *types.Config) (types
}

// AddDisk requests Ceph sets up a new OSD.
func AddDisk(ctx context.Context, c *client.Client, data *types.DisksPost) error {
func AddDisk(ctx context.Context, c *microCli.Client, data *types.DisksPost) error {
queryCtx, cancel := context.WithTimeout(ctx, time.Second*120)
defer cancel()

Expand All @@ -66,7 +66,7 @@ func AddDisk(ctx context.Context, c *client.Client, data *types.DisksPost) error
}

// GetDisks returns the list of configured disks.
func GetDisks(ctx context.Context, c *client.Client) (types.Disks, error) {
func GetDisks(ctx context.Context, c *microCli.Client) (types.Disks, error) {
queryCtx, cancel := context.WithTimeout(ctx, time.Second*5)
defer cancel()

Expand All @@ -81,7 +81,7 @@ func GetDisks(ctx context.Context, c *client.Client) (types.Disks, error) {
}

// GetResources returns the list of storage devices on the system.
func GetResources(ctx context.Context, c *client.Client) (*api.ResourcesStorage, error) {
func GetResources(ctx context.Context, c *microCli.Client) (*api.ResourcesStorage, error) {
queryCtx, cancel := context.WithTimeout(ctx, time.Second*5)
defer cancel()

Expand All @@ -96,7 +96,7 @@ func GetResources(ctx context.Context, c *client.Client) (*api.ResourcesStorage,
}

// RemoveDisk requests Ceph removes an OSD.
func RemoveDisk(ctx context.Context, c *client.Client, data *types.DisksDelete) error {
func RemoveDisk(ctx context.Context, c *microCli.Client, data *types.DisksDelete) error {
timeout := time.Second * time.Duration(data.Timeout+5) // wait a bit longer than the operation timeout
queryCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
Expand Down
59 changes: 59 additions & 0 deletions microceph/client/wrap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package client

import (
"context"
"github.com/canonical/microceph/microceph/api/types"

microCli "github.com/canonical/microcluster/client"
)

// ClientInterface wraps client functions
// This is useful for mocking in unit tests
type ClientInterface interface {
GetClusterMembers(*microCli.Client) ([]string, error)
GetDisks(*microCli.Client) (types.Disks, error)
GetServices(*microCli.Client) (types.Services, error)
DeleteService(*microCli.Client, string, string) error
DeleteClusterMember(*microCli.Client, string, bool) error
}

type ClientImpl struct{}

// GetClusterMembers gets the cluster member names
// We return names only here because the Member type is internal to microclient
func (c ClientImpl) GetClusterMembers(cli *microCli.Client) ([]string, error) {
memberNames := make([]string, 3)
members, err := cli.GetClusterMembers(context.Background())
if err != nil {
return nil, err
}

for _, member := range members {
memberNames = append(memberNames, member.Name)
}

return memberNames, nil
}

// GetDisks wraps the GetDisks function above
func (c ClientImpl) GetDisks(cli *microCli.Client) (types.Disks, error) {
return GetDisks(context.Background(), cli)
}

// GetServices wraps the GetServices function above
func (c ClientImpl) GetServices(cli *microCli.Client) (types.Services, error) {
return GetServices(context.Background(), cli)
}

// DeleteService wraps the DeleteService function
func (c ClientImpl) DeleteService(cli *microCli.Client, target string, service string) error {
return DeleteService(context.Background(), cli, target, service)
}

// DeleteClusterMember wraps the DeleteClusterMember function
func (c ClientImpl) DeleteClusterMember(cli *microCli.Client, name string, force bool) error {
return cli.DeleteClusterMember(context.Background(), name, force)
}

// mocking point for unit tests
var MClient ClientInterface = ClientImpl{}
114 changes: 112 additions & 2 deletions microceph/cmd/microceph/cluster_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@ package main

import (
"context"
"fmt"
"github.com/canonical/lxd/shared/logger"
microCli "github.com/canonical/microcluster/client"

"github.com/canonical/microcluster/microcluster"
"github.com/spf13/cobra"

"github.com/canonical/microceph/microceph/client"
)

type cmdClusterRemove struct {
Expand Down Expand Up @@ -36,15 +41,120 @@ func (c *cmdClusterRemove) Run(cmd *cobra.Command, args []string) error {
return err
}

client, err := m.LocalClient()
cli, err := m.LocalClient()
if err != nil {
return err
}

err = client.DeleteClusterMember(context.Background(), args[0], c.flagForce)
return removeNode(cli, args[0], c.flagForce)
}

func removeNode(cli *microCli.Client, node string, force bool) error {

logger.Debugf("Removing cluster member %v, force: %v", node, force)

// check prerquisites unless we're forcing
if !force {
err := checkPrerequisites(cli, node)
if err != nil {
return err
}
}

// delete from ceph
err := deleteNodeServices(cli, node)
if err != nil {
// forcing makes errs non-fatal
if !force {
return err
}
logger.Warnf("Error deleting services from node %v: %v", node, err)
}

// delete from cluster db
err = client.MClient.DeleteClusterMember(cli, node, force)
logger.Debugf("DeleteClusterMember %v: %v", node, err)
if err != nil {
return err
}

return nil
}

func checkPrerequisites(cli *microCli.Client, name string) error {
// check if member exists
clusterMembers, err := client.MClient.GetClusterMembers(cli)
if err != nil {
return fmt.Errorf("Error getting cluster members: %v", err)
}
found := false
for _, member := range clusterMembers {
if member == name {
found = true
}
}
if !found {
return fmt.Errorf("Node %v not found", name)
}

// check if any OSDs present
disks, err := client.MClient.GetDisks(cli)
if err != nil {
return fmt.Errorf("Error getting disks: %v", err)
}
found = false
for _, disk := range disks {
if disk.Location == name {
found = true
}
}
logger.Debugf("Disks: %v, found: %v", disks, found)
if found {
return fmt.Errorf("Node %v still has disks configured, remove before proceeding", name)
}

// check if this node has the last mon
services, err := client.MClient.GetServices(cli)
if err != nil {
return fmt.Errorf("Error getting services: %v", err)
}
// create a map of service names to bool values
// init with false
foundMap := map[string]bool{
"mon": false,
"mgr": false,
"mds": false,
}
// loop through services and check if we have any services that are not on the named node
for _, service := range services {
if service.Location == name {
continue
}
foundMap[service.Service] = true
}
logger.Debugf("Services: %v, foundMap: %v", services, foundMap)
if !foundMap["mon"] || !foundMap["mgr"] || !foundMap["mds"] {
return fmt.Errorf("Need at least one mon, mds, and mgr besides %v", name)
}

return nil
}

func deleteNodeServices(cli *microCli.Client, name string) error {
services, err := client.MClient.GetServices(cli)
if err != nil {
return err
}
for _, service := range services {
logger.Debugf("Check for deletion: %s", service)
if service.Location == name {
logger.Debugf("Deleting service %s", service)
err = client.MClient.DeleteService(cli, service.Location, service.Service)
if err != nil {
logger.Warnf("Fault deleting service %v on node %v: %v", service.Service, service.Location, err)
}
}
}
return nil

}
Loading

0 comments on commit fa596e0

Please sign in to comment.