From 5ba199e91981fb6292f46eb13a87663dc769e9ab Mon Sep 17 00:00:00 2001 From: Andreas Janning Date: Fri, 6 Sep 2024 11:19:27 +0200 Subject: [PATCH] Terminate instances immediately if they fail to launch. This gets rid of zombie offline nodes in jenkins that failed to start in the cloud. --- .../ComputeEngineComputerLauncher.java | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerLauncher.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerLauncher.java index b0b0be1c..3bd94c50 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerLauncher.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerLauncher.java @@ -154,14 +154,16 @@ public void launch(SlaveComputer slaveComputer, TaskListener listener) { } if (opError != null) { LOGGER.info(String.format( - "Launch failed while waiting for operation %s to complete. Operation error was %s", + "Launch failed while waiting for operation %s to complete. Operation error was %s. Terminating instance.", insertOperationId, opError.getErrors().get(0).getMessage())); + terminateNode(computer, listener); return; } } catch (InterruptedException e) { LOGGER.info(String.format( - "Launch failed while waiting for operation %s to complete. Operation error was %s", + "Launch failed while waiting for operation %s to complete. Operation error was %s. Terminating instance", insertOperationId, opError.getErrors().get(0).getMessage())); + terminateNode(computer, listener); return; } @@ -214,19 +216,23 @@ public void launch(SlaveComputer slaveComputer, TaskListener listener) { launch(computer, listener); } catch (IOException ioe) { ioe.printStackTrace(listener.error(ioe.getMessage())); - node = (ComputeEngineInstance) slaveComputer.getNode(); - if (node != null) { - try { - node.terminate(); - } catch (Exception e) { - listener.error(String.format("Failed to terminate node %s", node.getDisplayName())); - } - } + terminateNode(slaveComputer, listener); } catch (InterruptedException ie) { } } + private static void terminateNode(SlaveComputer slaveComputer, TaskListener listener) { + ComputeEngineInstance node = (ComputeEngineInstance) slaveComputer.getNode(); + if (node != null) { + try { + node.terminate(); + } catch (Exception e) { + listener.error(String.format("Failed to terminate node %s", node.getDisplayName())); + } + } + } + private boolean testCommand( ComputeEngineComputer computer, Connection conn,