Skip to content

Commit

Permalink
Adds custom parameters to MicroCeph Bootstrap process (#266)
Browse files Browse the repository at this point in the history
Adds following Parameters to the bootstrap process:
1. `--mon-ip`
2. `--public-network`
3. `--cluster-network`

---------

Signed-off-by: Utkarsh Bhatt <[email protected]>
  • Loading branch information
UtkarshBhatthere authored Dec 1, 2023
1 parent 3d1e909 commit 287ee68
Show file tree
Hide file tree
Showing 17 changed files with 738 additions and 139 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/candidate-upgrade.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:

- name: Add 3 OSDs
run: |
for c in node-head node-wrk1 node-wrk2 ; do
for c in node-wrk0 node-wrk1 node-wrk2 ; do
~/actionutils.sh add_osd_to_node $c
done
~/actionutils.sh headexec wait_for_osds 3
Expand Down
34 changes: 18 additions & 16 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -218,40 +218,40 @@ jobs:
run: ~/actionutils.sh install_multinode

- name: Bootstrap
run: ~/actionutils.sh bootstrap_head
run: ~/actionutils.sh bootstrap_head custom

- name: Setup cluster
run: ~/actionutils.sh cluster_nodes
run: ~/actionutils.sh cluster_nodes custom

- name: Add 2 OSDs
run: |
for c in node-wrk1 node-wrk2 ; do
~/actionutils.sh add_osd_to_node $c
done
lxc exec node-head -- sh -c "microceph.ceph -s"
lxc exec node-wrk0 -- sh -c "microceph.ceph -s"
- name: Test failure domain scale up
run: |
set -uex
# We still have failure domain OSD
lxc exec node-head -- sh -c "sudo microceph.ceph config get mon osd_pool_default_crush_rule" | fgrep -x 1
lxc exec node-wrk0 -- sh -c "sudo microceph.ceph config get mon osd_pool_default_crush_rule" | fgrep -x 1
# Add a 3rd OSD, should switch to host failure domain
~/actionutils.sh add_osd_to_node node-head
~/actionutils.sh add_osd_to_node node-wrk0
~/actionutils.sh headexec wait_for_osds 3
sleep 1
lxc exec node-head -- sh -c "sudo microceph.ceph -s"
lxc exec node-wrk0 -- sh -c "sudo microceph.ceph -s"
# Now default to host rule
lxc exec node-head -- sh -c "sudo microceph.ceph config get mon osd_pool_default_crush_rule" | fgrep -x 2
lxc exec node-wrk0 -- sh -c "sudo microceph.ceph config get mon osd_pool_default_crush_rule" | fgrep -x 2
- name: Test 3 osds present
run: |
set -uex
lxc exec node-head -- sh -c "microceph.ceph -s" | egrep "osd: 3 osds: 3 up.*3 in"
lxc exec node-wrk0 -- sh -c "microceph.ceph -s" | egrep "osd: 3 osds: 3 up.*3 in"
- name: Test crush rules
run: |
set -uex
lxc exec node-head -- sh -c "microceph.ceph osd crush rule ls" | grep -F microceph_auto_host
lxc exec node-head -- sh -c "microceph.ceph osd pool ls detail" | grep -F "crush_rule 2"
lxc exec node-wrk0 -- sh -c "microceph.ceph osd crush rule ls" | grep -F microceph_auto_host
lxc exec node-wrk0 -- sh -c "microceph.ceph osd pool ls detail" | grep -F "crush_rule 2"
- name: Add another OSD
run: |
Expand All @@ -261,11 +261,13 @@ jobs:
- name: Remove OSD again
run: |
set -uex
lxc exec node-head -- sh -c "microceph disk remove 4"
lxc exec node-head -- sh -c "microceph.ceph -s" | egrep "osd: 3 osds: 3 up.*3 in"
lxc exec node-wrk0 -- sh -c "microceph disk remove 4"
lxc exec node-wrk0 -- sh -c "microceph.ceph -s" | egrep "osd: 3 osds: 3 up.*3 in"
- name: Test migrate services
run: ~/actionutils.sh test_migration node-wrk1 node-wrk3
run: |
set -uex
~/actionutils.sh test_migration node-wrk1 node-wrk3
- name: Enable services on wrk1
run: ~/actionutils.sh headexec enable_services node-wrk1
Expand All @@ -274,11 +276,11 @@ jobs:
run: |
set -uex
~/actionutils.sh headexec remove_node node-wrk3
if lxc exec node-head -- sh -c "microceph status" | grep -q "^- node-wrk3 " ; then
if lxc exec node-wrk0 -- sh -c "microceph status" | grep -q "^- node-wrk3 " ; then
echo "Failed: node-wrk3 still present"
exit 1
fi
lxc exec node-head -- sh -c "microceph.ceph -s" | fgrep "mon: 3 daemons"
lxc exec node-wrk0 -- sh -c "microceph.ceph -s" | fgrep "mon: 3 daemons"
- name: Test client configurations
run: ~/actionutils.sh check_client_configs
Expand Down Expand Up @@ -374,7 +376,7 @@ jobs:

- name: Add 3 OSDs
run: |
for c in node-head node-wrk1 node-wrk2 ; do
for c in node-wrk0 node-wrk1 node-wrk2 ; do
~/actionutils.sh add_osd_to_node $c
done
~/actionutils.sh headexec wait_for_osds 3
Expand Down
6 changes: 6 additions & 0 deletions docs/reference/commands/cluster.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ Usage:
microceph cluster bootstrap [flags]
Flags:

.. code-block:: none
--mon-ip string Public address for bootstrapping ceph mon service.
``config``
----------

Expand Down
122 changes: 101 additions & 21 deletions microceph/ceph/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,45 @@ import (
"fmt"
"os"
"path/filepath"
"strings"

"github.com/pborman/uuid"

apiTypes "github.com/canonical/microceph/microceph/api/types"
"github.com/canonical/microceph/microceph/common"
"github.com/canonical/microceph/microceph/database"
)

// Bootstrap will initialize a new Ceph deployment.
func Bootstrap(s common.StateInterface) error {
func Bootstrap(s common.StateInterface, data common.BootstrapConfig) error {
pathConsts := common.GetPathConst()
pathFileMode := common.GetPathFileMode()

// Create our various paths.
for path, perm := range pathFileMode {
err := os.MkdirAll(path, perm)
if err != nil {
return fmt.Errorf("Unable to create %q: %w", path, err)
return fmt.Errorf("unable to create %q: %w", path, err)
}
}

// Generate a new FSID.
fsid := uuid.NewRandom().String()
conf := newCephConfig(pathConsts.ConfPath)
err := conf.WriteConfig(
err := prepareCephBootstrapData(s, &data)
if err != nil {
return err
}

err = conf.WriteConfig(
map[string]any{
"fsid": fsid,
"runDir": pathConsts.RunPath,
"monitors": s.ClusterState().Address().Hostname(),
"addr": s.ClusterState().Address().Hostname(),
"fsid": fsid,
"runDir": pathConsts.RunPath,
// First monitor bootstrap IP as passed to microcluster.
"monitors": data.MonIp,
"pubNet": data.PublicNet,
"ipv4": strings.Contains(data.PublicNet, "."),
"ipv6": strings.Contains(data.PublicNet, ":"),
},
0644,
)
Expand All @@ -52,10 +62,10 @@ func Bootstrap(s common.StateInterface) error {

adminKey, err := parseKeyring(filepath.Join(pathConsts.ConfPath, "ceph.client.admin.keyring"))
if err != nil {
return fmt.Errorf("Failed parsing admin keyring: %w", err)
return fmt.Errorf("failed parsing admin keyring: %w", err)
}

err = createMonMap(s, path, fsid)
err = createMonMap(s, path, fsid, data.MonIp)
if err != nil {
return err
}
Expand Down Expand Up @@ -86,7 +96,7 @@ func Bootstrap(s common.StateInterface) error {
}

// Update the database.
err = updateDatabase(s, fsid, adminKey)
err = populateDatabase(s, fsid, adminKey, data)
if err != nil {
return err
}
Expand All @@ -102,10 +112,68 @@ func Bootstrap(s common.StateInterface) error {
return err
}

// Configure defaults cluster configs for network.
err = setDefaultNetwork(data.ClusterNet)
if err != nil {
return err
}

// Re-generate the configuration from the database.
err = UpdateConfig(s)
if err != nil {
return fmt.Errorf("Failed to re-generate the configuration: %w", err)
return fmt.Errorf("failed to re-generate the configuration: %w", err)
}

return nil
}

// setDefaultNetwork configures the cluster network on mon KV store.
func setDefaultNetwork(cn string) error {
// Cluster Network
err := SetConfigItem(apiTypes.Config{
Key: "cluster_network",
Value: cn,
})
if err != nil {
return err
}

return nil
}

func prepareCephBootstrapData(s common.StateInterface, data *common.BootstrapConfig) error {
var err error

// if no mon-ip is provided, either deduce from public network or fallback to default.
if len(data.MonIp) == 0 {
if len(data.PublicNet) == 0 {
// Use default value if public addres is also not provided.
data.MonIp = s.ClusterState().Address().Hostname()
} else {
// deduce mon-ip from the public network parameter.
data.MonIp, err = common.Network.FindIpOnSubnet(data.PublicNet)
if err != nil {
return fmt.Errorf("failed to locate %s on host: %w", data.MonIp, err)
}
}
}

if len(data.PublicNet) != 0 {
// Verify that the public network and mon-ip params are coherent.
if !common.Network.IsIpOnSubnet(data.MonIp, data.PublicNet) {
return fmt.Errorf("monIp %s is not available on public network %s", data.MonIp, data.PublicNet)
}
} else {
// Deduce Public network based on mon-ip param.
data.PublicNet, err = common.Network.FindNetworkAddress(data.MonIp)
if err != nil {
return fmt.Errorf("failed to locate %s on host: %w", data.MonIp, err)
}
}

if len(data.ClusterNet) == 0 {
// Cluster Network defaults to Public Network.
data.ClusterNet = data.PublicNet
}

return nil
Expand Down Expand Up @@ -137,16 +205,16 @@ func createKeyrings(confPath string) (string, error) {
return path, nil
}

func createMonMap(s common.StateInterface, path string, fsid string) error {
func createMonMap(s common.StateInterface, path string, fsid string, address string) error {
// Generate initial monitor map.
err := genMonmap(filepath.Join(path, "mon.map"), fsid)
if err != nil {
return fmt.Errorf("Failed to generate monitor map: %w", err)
return fmt.Errorf("failed to generate monitor map: %w", err)
}

err = addMonmap(filepath.Join(path, "mon.map"), s.ClusterState().Name(), s.ClusterState().Address().Hostname())
err = addMonmap(filepath.Join(path, "mon.map"), s.ClusterState().Name(), address)
if err != nil {
return fmt.Errorf("Failed to add monitor map: %w", err)
return fmt.Errorf("failed to add monitor map: %w", err)
}

return nil
Expand Down Expand Up @@ -194,36 +262,48 @@ func initMgr(s common.StateInterface, dataPath string) error {
return nil
}

func updateDatabase(s common.StateInterface, fsid string, adminKey string) error {
// populateDatabase injects the bootstrap entries to the internal database.
func populateDatabase(s common.StateInterface, fsid string, adminKey string, data common.BootstrapConfig) error {
if s.ClusterState().Database == nil {
return fmt.Errorf("no database")
}
err := s.ClusterState().Database.Transaction(s.ClusterState().Context, func(ctx context.Context, tx *sql.Tx) error {
// Record the roles.
_, err := database.CreateService(ctx, tx, database.Service{Member: s.ClusterState().Name(), Service: "mon"})
if err != nil {
return fmt.Errorf("Failed to record role: %w", err)
return fmt.Errorf("failed to record role: %w", err)
}

_, err = database.CreateService(ctx, tx, database.Service{Member: s.ClusterState().Name(), Service: "mgr"})
if err != nil {
return fmt.Errorf("Failed to record role: %w", err)
return fmt.Errorf("failed to record role: %w", err)
}

_, err = database.CreateService(ctx, tx, database.Service{Member: s.ClusterState().Name(), Service: "mds"})
if err != nil {
return fmt.Errorf("Failed to record role: %w", err)
return fmt.Errorf("failed to record role: %w", err)
}

// Record the configuration.
_, err = database.CreateConfigItem(ctx, tx, database.ConfigItem{Key: "fsid", Value: fsid})
if err != nil {
return fmt.Errorf("Failed to record fsid: %w", err)
return fmt.Errorf("failed to record fsid: %w", err)
}

_, err = database.CreateConfigItem(ctx, tx, database.ConfigItem{Key: "keyring.client.admin", Value: adminKey})
if err != nil {
return fmt.Errorf("Failed to record keyring: %w", err)
return fmt.Errorf("failed to record keyring: %w", err)
}

key := fmt.Sprintf("mon.host.%s", s.ClusterState().Name())
_, err = database.CreateConfigItem(ctx, tx, database.ConfigItem{Key: key, Value: data.MonIp})
if err != nil {
return fmt.Errorf("failed to record mon host: %w", err)
}

_, err = database.CreateConfigItem(ctx, tx, database.ConfigItem{Key: "public_network", Value: data.PublicNet})
if err != nil {
return fmt.Errorf("failed to record public_network: %w", err)
}

return nil
Expand Down
Loading

0 comments on commit 287ee68

Please sign in to comment.