Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Terminate the instance when 404 occured. #489

Merged
merged 8 commits into from
Dec 12, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package com.google.jenkins.plugins.computeengine;

import com.google.api.client.googleapis.json.GoogleJsonResponseException;
import com.google.api.services.compute.model.AccessConfig;
import com.google.api.services.compute.model.Instance;
import com.google.api.services.compute.model.NetworkInterface;
Expand All @@ -40,6 +41,7 @@
import java.io.PrintStream;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.SocketTimeoutException;
import java.util.Base64;
import java.util.Optional;
import java.util.logging.Level;
Expand Down Expand Up @@ -154,14 +156,16 @@ public void launch(SlaveComputer slaveComputer, TaskListener listener) {
}
if (opError != null) {
LOGGER.info(String.format(
"Launch failed while waiting for operation %s to complete. Operation error was %s",
"Launch failed while waiting for operation %s to complete. Operation error was %s. Terminating instance.",
insertOperationId, opError.getErrors().get(0).getMessage()));
terminateNode(computer, listener);
return;
}
} catch (InterruptedException e) {
LOGGER.info(String.format(
"Launch failed while waiting for operation %s to complete. Operation error was %s",
"Launch failed while waiting for operation %s to complete. Operation error was %s. Terminating instance",
insertOperationId, opError.getErrors().get(0).getMessage()));
terminateNode(computer, listener);
return;
}

Expand Down Expand Up @@ -214,19 +218,23 @@ public void launch(SlaveComputer slaveComputer, TaskListener listener) {
launch(computer, listener);
} catch (IOException ioe) {
ioe.printStackTrace(listener.error(ioe.getMessage()));
node = (ComputeEngineInstance) slaveComputer.getNode();
if (node != null) {
try {
node.terminate();
} catch (Exception e) {
listener.error(String.format("Failed to terminate node %s", node.getDisplayName()));
}
}
terminateNode(slaveComputer, listener);
} catch (InterruptedException ie) {

}
}

private static void terminateNode(SlaveComputer slaveComputer, TaskListener listener) {
ComputeEngineInstance node = (ComputeEngineInstance) slaveComputer.getNode();
if (node != null) {
Artmorse marked this conversation as resolved.
Show resolved Hide resolved
try {
node.terminate();
} catch (Exception e) {
listener.error(String.format("Failed to terminate node %s", node.getDisplayName()));
}
}
}

private boolean testCommand(
ComputeEngineComputer computer,
Connection conn,
Expand Down Expand Up @@ -343,6 +351,10 @@ protected Connection connectToSsh(ComputeEngineComputer computer, TaskListener l
+ ")");
}
Instance instance = computer.refreshInstance();
// the instance will be null when the node is terminated
if (instance == null) {
return null;
}

String host = "";

Expand Down Expand Up @@ -410,10 +422,25 @@ protected Connection connectToSsh(ComputeEngineComputer computer, TaskListener l
SSH_TIMEOUT_MILLIS);
logInfo(computer, listener, "Connected via SSH.");
return conn;
} catch (IOException e) {
} catch (GoogleJsonResponseException e) {
if (e.getStatusCode() == 404) {
log(
LOGGER,
Level.SEVERE,
listener,
String.format("Instance %s not found. Terminating instance.", computer.getName()));
terminateNode(computer, listener);
}
} catch (SocketTimeoutException e) {
// keep retrying until SSH comes up
logInfo(computer, listener, "Failed to connect via ssh: " + e.getMessage());
logInfo(computer, listener, "Waiting for SSH to come up. Sleeping 5.");
logInfo(computer, listener, String.format("Failed to connect via ssh: %s", e.getMessage()));
logInfo(
computer,
listener,
String.format("Waiting for SSH to come up. Sleeping %d.", SSH_SLEEP_MILLIS / 1000));
Thread.sleep(SSH_SLEEP_MILLIS);
} catch (IOException e) {
Artmorse marked this conversation as resolved.
Show resolved Hide resolved
logWarning(computer, listener, String.format("An error occured: %s", e.getMessage()));
Thread.sleep(SSH_SLEEP_MILLIS);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@

package com.google.jenkins.plugins.computeengine;

import static com.google.jenkins.plugins.computeengine.ComputeEngineCloud.CLOUD_ID_LABEL_KEY;

import com.google.cloud.graphite.platforms.plugin.client.ComputeClient.OperationException;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableMap;
import com.google.jenkins.plugins.computeengine.ssh.GoogleKeyCredential;
import edu.umd.cs.findbugs.annotations.Nullable;
import hudson.Extension;
Expand All @@ -30,6 +33,7 @@
import hudson.slaves.RetentionStrategy;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Optional;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand Down Expand Up @@ -130,7 +134,6 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted
.createSnapshotSync(cloud.getProjectId(), this.zone, this.getNodeName(), createSnapshotTimeout);
}

// If the instance is running, attempt to terminate it. This is an async call and we
// return immediately, hoping for the best.
cloud.getClient().terminateInstanceAsync(cloud.getProjectId(), zone, name);
} catch (CloudNotFoundException cnfe) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ private Optional<Connection> bootstrap(
logInfo(computer, listener, "Authenticating as " + node.getSshUser());
try {
bootstrapConn = connectToSsh(computer, listener);
if (bootstrapConn == null) {
break;
}
isAuthenticated = bootstrapConn.authenticateWithPublicKey(
node.getSshUser(),
Secret.toString(keyCred.getPrivateKey()).toCharArray(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ private Optional<Connection> bootstrap(ComputeEngineComputer computer, TaskListe
logInfo(computer, listener, "Authenticating as " + node.getSshUser());
try {
bootstrapConn = connectToSsh(computer, listener);
if (bootstrapConn == null) {
break;
}
isAuthenticated = authenticateSSH(node.getSshUser(), windowsConfig, bootstrapConn, listener);
} catch (IOException e) {
logException(computer, listener, "Exception trying to authenticate", e);
Expand Down
Loading