From 1d284b720f5b66979475365aa1553bebb347b624 Mon Sep 17 00:00:00 2001 From: Jacob Weinstock Date: Mon, 13 Mar 2023 20:43:35 -0600 Subject: [PATCH] Do Rufio/BMC actions before creating a workflow: This is to avoid a race condition that could occur if a machine is running HookOS and Tink worker is connected and running when a workflow is created. The workflow would start to execute but then be cut off when the reboot action is called. This has been show to cause undefined behavior. Signed-off-by: Jacob Weinstock --- controllers/machine.go | 47 ++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/controllers/machine.go b/controllers/machine.go index 898e7bf2..4b361d26 100644 --- a/controllers/machine.go +++ b/controllers/machine.go @@ -143,37 +143,40 @@ func (mrc *machineReconcileContext) Reconcile() error { } func (mrc *machineReconcileContext) reconcile(hw *tinkv1.Hardware) error { - if !isHardwareReady(hw) { - wf, err := mrc.ensureTemplateAndWorkflow(hw) + if isHardwareReady(hw) { + mrc.log.Info("Marking TinkerbellMachine as Ready") + mrc.tinkerbellMachine.Status.Ready = true - if ensureJobErr := mrc.ensureHardwareProvisionJob(hw); ensureJobErr != nil { - return fmt.Errorf("failed to ensure hardware ready for provisioning: %w", ensureJobErr) - } + return nil + } - switch { - case errors.Is(err, &errRequeueRequested{}): - return nil - case err != nil: - return fmt.Errorf("ensure template and workflow returned: %w", err) - } + if ensureJobErr := mrc.ensureHardwareProvisionJob(hw); ensureJobErr != nil { + return fmt.Errorf("failed to ensure hardware ready for provisioning: %w", ensureJobErr) + } - s := wf.GetCurrentActionState() + wf, err := mrc.ensureTemplateAndWorkflow(hw) - if s == tinkv1.WorkflowStateFailed || s == tinkv1.WorkflowStateTimeout { - return errWorkflowFailed - } + switch { + case errors.Is(err, &errRequeueRequested{}): + return nil + case err != nil: + return fmt.Errorf("ensure template and workflow returned: %w", err) + } - if !lastActionStarted(wf) { - return nil - } + s := wf.GetCurrentActionState() + if s == tinkv1.WorkflowStateFailed || s == tinkv1.WorkflowStateTimeout { + return errWorkflowFailed + } - if err := mrc.patchHardwareStates(hw, inUse, provisioned); err != nil { - return fmt.Errorf("failed to patch hardware: %w", err) - } + if !lastActionStarted(wf) { + return nil } - mrc.log.Info("Marking TinkerbellMachine as Ready") + if err := mrc.patchHardwareStates(hw, inUse, provisioned); err != nil { + return fmt.Errorf("failed to patch hardware: %w", err) + } + mrc.log.Info("Marking TinkerbellMachine as Ready") mrc.tinkerbellMachine.Status.Ready = true return nil