Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement node removal #241

Merged
merged 3 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,20 @@ jobs:
lxc exec node-head -- sh -c "microceph status" | grep -F -A 1 node-wrk1 | grep -E "^ Services: osd$"
lxc exec node-head -- sh -c "microceph status" | grep -F -A 1 node-wrk3 | grep -E "^ Services: mds, mgr, mon$"

- name: Enable services on wrk1
run: ~/actionutils.sh headexec enable_services node-wrk1

- name: Test remove node wrk3
run: |
set -uex
~/actionutils.sh headexec remove_node node-wrk3
if lxc exec node-head -- sh -c "microceph status" | grep -q "^- node-wrk3 " ; then
echo "Failed: node-wrk3 still present"
exit 1
fi
lxc exec node-head -- sh -c "microceph.ceph -s" | fgrep "mon: 3 daemons"


upgrade-quincy-tests:
name: Test quincy upgrades
runs-on: ubuntu-22.04
Expand Down
16 changes: 8 additions & 8 deletions microceph/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ import (
"time"

"github.com/canonical/lxd/shared/api"
"github.com/canonical/microcluster/client"
microCli "github.com/canonical/microcluster/client"

"github.com/canonical/microceph/microceph/api/types"
)

func SetConfig(ctx context.Context, c *client.Client, data *types.Config) error {
func SetConfig(ctx context.Context, c *microCli.Client, data *types.Config) error {
queryCtx, cancel := context.WithTimeout(ctx, time.Second*200)
defer cancel()

Expand All @@ -26,7 +26,7 @@ func SetConfig(ctx context.Context, c *client.Client, data *types.Config) error
return nil
}

func ClearConfig(ctx context.Context, c *client.Client, data *types.Config) error {
func ClearConfig(ctx context.Context, c *microCli.Client, data *types.Config) error {
queryCtx, cancel := context.WithTimeout(ctx, time.Second*200)
defer cancel()

Expand All @@ -38,7 +38,7 @@ func ClearConfig(ctx context.Context, c *client.Client, data *types.Config) erro
return nil
}

func GetConfig(ctx context.Context, c *client.Client, data *types.Config) (types.Configs, error) {
func GetConfig(ctx context.Context, c *microCli.Client, data *types.Config) (types.Configs, error) {
queryCtx, cancel := context.WithTimeout(ctx, time.Second*5)
defer cancel()

Expand All @@ -53,7 +53,7 @@ func GetConfig(ctx context.Context, c *client.Client, data *types.Config) (types
}

// AddDisk requests Ceph sets up a new OSD.
func AddDisk(ctx context.Context, c *client.Client, data *types.DisksPost) error {
func AddDisk(ctx context.Context, c *microCli.Client, data *types.DisksPost) error {
queryCtx, cancel := context.WithTimeout(ctx, time.Second*120)
defer cancel()

Expand All @@ -66,7 +66,7 @@ func AddDisk(ctx context.Context, c *client.Client, data *types.DisksPost) error
}

// GetDisks returns the list of configured disks.
func GetDisks(ctx context.Context, c *client.Client) (types.Disks, error) {
func GetDisks(ctx context.Context, c *microCli.Client) (types.Disks, error) {
queryCtx, cancel := context.WithTimeout(ctx, time.Second*5)
defer cancel()

Expand All @@ -81,7 +81,7 @@ func GetDisks(ctx context.Context, c *client.Client) (types.Disks, error) {
}

// GetResources returns the list of storage devices on the system.
func GetResources(ctx context.Context, c *client.Client) (*api.ResourcesStorage, error) {
func GetResources(ctx context.Context, c *microCli.Client) (*api.ResourcesStorage, error) {
queryCtx, cancel := context.WithTimeout(ctx, time.Second*5)
defer cancel()

Expand All @@ -96,7 +96,7 @@ func GetResources(ctx context.Context, c *client.Client) (*api.ResourcesStorage,
}

// RemoveDisk requests Ceph removes an OSD.
func RemoveDisk(ctx context.Context, c *client.Client, data *types.DisksDelete) error {
func RemoveDisk(ctx context.Context, c *microCli.Client, data *types.DisksDelete) error {
timeout := time.Second * time.Duration(data.Timeout+5) // wait a bit longer than the operation timeout
queryCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
Expand Down
59 changes: 59 additions & 0 deletions microceph/client/wrap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package client

import (
"context"
"github.com/canonical/microceph/microceph/api/types"

microCli "github.com/canonical/microcluster/client"
)

// ClientInterface wraps client functions
// This is useful for mocking in unit tests
type ClientInterface interface {
GetClusterMembers(*microCli.Client) ([]string, error)
GetDisks(*microCli.Client) (types.Disks, error)
GetServices(*microCli.Client) (types.Services, error)
DeleteService(*microCli.Client, string, string) error
DeleteClusterMember(*microCli.Client, string, bool) error
}

type ClientImpl struct{}

// GetClusterMembers gets the cluster member names
// We return names only here because the Member type is internal to microclient
func (c ClientImpl) GetClusterMembers(cli *microCli.Client) ([]string, error) {
memberNames := make([]string, 3)
members, err := cli.GetClusterMembers(context.Background())
if err != nil {
return nil, err
}

for _, member := range members {
memberNames = append(memberNames, member.Name)
}

return memberNames, nil
}

// GetDisks wraps the GetDisks function above
func (c ClientImpl) GetDisks(cli *microCli.Client) (types.Disks, error) {
return GetDisks(context.Background(), cli)
}

// GetServices wraps the GetServices function above
func (c ClientImpl) GetServices(cli *microCli.Client) (types.Services, error) {
return GetServices(context.Background(), cli)
}

// DeleteService wraps the DeleteService function
func (c ClientImpl) DeleteService(cli *microCli.Client, target string, service string) error {
return DeleteService(context.Background(), cli, target, service)
}

// DeleteClusterMember wraps the DeleteClusterMember function
func (c ClientImpl) DeleteClusterMember(cli *microCli.Client, name string, force bool) error {
return cli.DeleteClusterMember(context.Background(), name, force)
}

// mocking point for unit tests
var MClient ClientInterface = ClientImpl{}
114 changes: 112 additions & 2 deletions microceph/cmd/microceph/cluster_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@ package main

import (
"context"
"fmt"
"github.com/canonical/lxd/shared/logger"
microCli "github.com/canonical/microcluster/client"

"github.com/canonical/microcluster/microcluster"
"github.com/spf13/cobra"

"github.com/canonical/microceph/microceph/client"
)

type cmdClusterRemove struct {
Expand Down Expand Up @@ -36,15 +41,120 @@ func (c *cmdClusterRemove) Run(cmd *cobra.Command, args []string) error {
return err
}

client, err := m.LocalClient()
cli, err := m.LocalClient()
if err != nil {
return err
}

err = client.DeleteClusterMember(context.Background(), args[0], c.flagForce)
return removeNode(cli, args[0], c.flagForce)
}

func removeNode(cli *microCli.Client, node string, force bool) error {

logger.Debugf("Removing cluster member %v, force: %v", node, force)

// check prerquisites unless we're forcing
if !force {
err := checkPrerequisites(cli, node)
if err != nil {
return err
}
}

// delete from ceph
err := deleteNodeServices(cli, node)
if err != nil {
// forcing makes errs non-fatal
if !force {
return err
}
logger.Warnf("Error deleting services from node %v: %v", node, err)
}

// delete from cluster db
err = client.MClient.DeleteClusterMember(cli, node, force)
logger.Debugf("DeleteClusterMember %v: %v", node, err)
if err != nil {
return err
}

return nil
}

func checkPrerequisites(cli *microCli.Client, name string) error {
// check if member exists
clusterMembers, err := client.MClient.GetClusterMembers(cli)
if err != nil {
return fmt.Errorf("Error getting cluster members: %v", err)
}
found := false
for _, member := range clusterMembers {
if member == name {
found = true
}
}
if !found {
return fmt.Errorf("Node %v not found", name)
}

// check if any OSDs present
disks, err := client.MClient.GetDisks(cli)
if err != nil {
return fmt.Errorf("Error getting disks: %v", err)
}
found = false
for _, disk := range disks {
if disk.Location == name {
found = true
}
}
logger.Debugf("Disks: %v, found: %v", disks, found)
if found {
return fmt.Errorf("Node %v still has disks configured, remove before proceeding", name)
}

// check if this node has the last mon
services, err := client.MClient.GetServices(cli)
if err != nil {
return fmt.Errorf("Error getting services: %v", err)
}
// create a map of service names to bool values
// init with false
foundMap := map[string]bool{
"mon": false,
"mgr": false,
"mds": false,
}
// loop through services and check if we have any services that are not on the named node
for _, service := range services {
if service.Location == name {
continue
}
foundMap[service.Service] = true
}
logger.Debugf("Services: %v, foundMap: %v", services, foundMap)
if !foundMap["mon"] || !foundMap["mgr"] || !foundMap["mds"] {
return fmt.Errorf("Need at least one mon, mds, and mgr besides %v", name)
}

return nil
}

func deleteNodeServices(cli *microCli.Client, name string) error {
services, err := client.MClient.GetServices(cli)
if err != nil {
return err
}
for _, service := range services {
logger.Debugf("Check for deletion: %s", service)
if service.Location == name {
logger.Debugf("Deleting service %s", service)
err = client.MClient.DeleteService(cli, service.Location, service.Service)
if err != nil {
logger.Warnf("Fault deleting service %v on node %v: %v", service.Service, service.Location, err)
}
}
}
return nil

}
Loading