Skip to content

Commit

Permalink
Fixes for disk remove issues
Browse files Browse the repository at this point in the history
Fix regexp for stopping OSD services.

Don't treat a failure to stop an OSD service as fatal, might be down
already.

Increase default timeout and retries, the default was overly
optimistic.

Test cleanup.

Signed-off-by: Peter Sabaini <[email protected]>
  • Loading branch information
sabaini committed Oct 9, 2023
1 parent d6334c1 commit 0353dd0
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 14 deletions.
13 changes: 5 additions & 8 deletions microceph/ceph/osd.go
Original file line number Diff line number Diff line change
Expand Up @@ -709,12 +709,9 @@ func doRemoveOSD(ctx context.Context, s common.StateInterface, osd int64, bypass
return err
}
}
// stop the OSD service
// stop the OSD service, but don't fail if it's not running
if isPresent {
err = killOSD(osd)
}
if err != nil {
return err
_ = killOSD(osd)
}
// perform safety check for destroying
if isPresent && !bypassSafety {
Expand Down Expand Up @@ -775,7 +772,7 @@ func outDownOSD(osd int64) error {
func safetyCheckStop(osd int64) error {
var safeStop bool

retries := 12
retries := 16
var backoff time.Duration

for i := 0; i < retries; i++ {
Expand All @@ -799,7 +796,7 @@ func safetyCheckStop(osd int64) error {
func safetyCheckDestroy(osd int64) error {
var safeDestroy bool

retries := 12
retries := 16
var backoff time.Duration

for i := 0; i < retries; i++ {
Expand Down Expand Up @@ -878,7 +875,7 @@ func haveOSDInCeph(osd int64) (bool, error) {

// killOSD terminates the osd process for an osd.id
func killOSD(osd int64) error {
cmdline := fmt.Sprintf("ceph-osd .* --id %d", osd)
cmdline := fmt.Sprintf("ceph-osd .* --id %d$", osd)
_, err := processExec.RunCommand("pkill", "-f", cmdline)
if err != nil {
logger.Errorf("Failed to kill osd.%d: %v", osd, err)
Expand Down
5 changes: 0 additions & 5 deletions microceph/ceph/osd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ func addCrushRuleLsExpectations(r *mocks.Runner) {
r.On("RunCommand", cmdAny("ceph", 4)...).Return("microceph_auto_osd", nil).Once()
}

// Expect: run ceph osd crush rule create-replicated
func addCrushRuleCreateExpectations(r *mocks.Runner) {
r.On("RunCommand", cmdAny("ceph", 7)...).Return("ok", nil).Once()
}

// Expect: run ceph osd crush rule dump
func addCrushRuleDumpExpectations(r *mocks.Runner) {
json := `{ "rule_id": 77 }`
Expand Down
2 changes: 1 addition & 1 deletion microceph/cmd/microceph/disk_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func (c *cmdDiskRemove) Command() *cobra.Command {
RunE: c.Run,
}

cmd.PersistentFlags().Int64Var(&c.flagTimeout, "timeout", 300, "Timeout to wait for safe removal (seconds), default=300")
cmd.PersistentFlags().Int64Var(&c.flagTimeout, "timeout", 1800, "Timeout to wait for safe removal (seconds), default=1800")
cmd.PersistentFlags().BoolVar(&c.flagBypassSafety, "bypass-safety-checks", false, "Bypass safety checks")
cmd.PersistentFlags().BoolVar(&c.flagConfirmDowngrade, "confirm-failure-domain-downgrade", false, "Confirm failure domain downgrade if required")

Expand Down

0 comments on commit 0353dd0

Please sign in to comment.