Skip to content

Commit

Permalink
add an integration test for spot vms with maximum run durations
Browse files Browse the repository at this point in the history
  • Loading branch information
gbhat618 committed Dec 13, 2024
1 parent d57ea0c commit 579410e
Show file tree
Hide file tree
Showing 8 changed files with 285 additions and 10 deletions.
67 changes: 67 additions & 0 deletions docs/integration-tests.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Integration Tests

* GCP Project
* Create a service account with relevant access - See [Refer to IAM Credentials](Home.md#iam-credentials)
* Most of the tests require a VM with Java pre-installed at `/usr/bin/java`.
There are one or two which don't require java pre-installed, as they supply `startup-script` to gcloud apis, and install `java` on a plain linux Debian image.
* You can create an image with `java` preinstalled as,
```bash
project=<your-project>
zone=<your-zone>

# Create a debian based VM
gcloud compute instances create java-install-instance \
--project=$project \
--zone=$zone \
--machine-type=e2-medium \
--image-project=debian-cloud \
--image-family=debian-12

# Wait for the machine to start and access ssh connections. Install java via ssh
gcloud compute ssh java-install-instance \
--project=$project \
--zone=$zone \
--command="sudo apt-get update && sudo apt-get install -y openjdk-17-jdk"

# Ensure java is installed and print the java path
gcloud compute ssh java-install-instance \
--project=$project \
--zone=$zone \
--command="java -version"

gcloud compute ssh java-install-instance \
--project=$project \
--zone=$zone \
--command="which java"

# For creating image, you need to first stop the VM
gcloud compute instances stop java-install-instance \
--project=$project \
--zone=$zone \

# Create an image from the VM
gcloud compute images create java-debian-12-image \
--source-disk=java-install-instance \
--source-disk-zone=$zone \
--project=$project \
--family=custom-java-debian-family

# Delete the VM
gcloud compute instances delete java-install-instance \
--project=$project \
--zone=$zone
```
* Export these environment variables
```bash
export GOOGLE_PROJECT_ID=<your-project>
export GOOGLE_SA_NAME=<name of the SA created in first step>
export GOOGLE_CREDENTIALS_FILE=<full path to the SA JSON file>
export GOOGLE_ZONE=<your-compute-zone>
export GOOGLE_REGION=<your-compute-region>
export GOOGLE_BOOT_DISK_PROJECT_ID=<your-project>
export GOOGLE_BOOT_DISK_IMAGE_NAME=java-debian-12-image # this is created in previous step
```
* Execute an integration test (example)
```bash
mvn clean test -Dtest=ComputeEngineCloudRestartPreemptedIT#testIfNodeWasPreempted
```
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ void onConnected(TaskListener listener) {
ComputeEngineInstance node = getNode();
if (node != null) {
node.onConnected();
if (getPreemptible()) {
if (getPreemptible()) { // TODO: maybe need to handle similarly when `maxRunDuration` is set.

Check warning on line 47 in src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 47 is not covered by tests
String nodeName = node.getNodeName();
final String msg = "Instance " + nodeName + " is preemptive, setting up preemption listener";
log.log(Level.INFO, msg);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -510,23 +510,27 @@ private Tags tags() {
private Scheduling scheduling() {
Scheduling scheduling = new Scheduling();
long maxRunDurationSeconds = 0;
if (provisioningType != null) {
if (provisioningType != null) { // check `null` for backward compatibility

Check warning on line 513 in src/main/java/com/google/jenkins/plugins/computeengine/InstanceConfiguration.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 513 is only partially covered, one branch is missing
ProvisioningTypeValue ptValue = provisioningType.getValue();
if (ptValue == PREEMPTIBLE) {
scheduling.setPreemptible(true);
} else if (provisioningType.getValue() == SPOT) {
maxRunDurationSeconds = ((SpotVm) provisioningType).getMaxRunDurationSeconds();
scheduling.setProvisioningModel("SPOT");
// only the instance is deleted, the disk deletion is based on bootDiskAutoDelete config value
scheduling.setInstanceTerminationAction("DELETE");
} else {
maxRunDurationSeconds = ((Standard) provisioningType).getMaxRunDurationSeconds();
}
if (maxRunDurationSeconds > 0) {
GenericJson j = new GenericJson();
j.set("seconds", maxRunDurationSeconds);
scheduling.set("maxRunDuration", j);
// only the instance is deleted, the disk deletion is based on bootDiskAutoDelete config value
/* Note: Only the instance is set to delete here, not the disk. Disk deletion is based on the
`bootDiskAutoDelete` config value. For instance termination at `maxRunDuration`, GCP supports two
termination actions: DELETE and STOP.
For Jenkins agents, DELETE is more appropriate. If the agent instance is needed again, it can be
recreated using the disk, which should have been anticipated and disk should be set to not delete in
`bootDiskAutoDelete`.
*/
scheduling.setInstanceTerminationAction("DELETE");

Check warning on line 534 in src/main/java/com/google/jenkins/plugins/computeengine/InstanceConfiguration.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered lines

Lines 514-534 are not covered by tests
}
} else if (preemptible) { // keeping the check for `preemptible` for backward compatibility

Check warning on line 536 in src/main/java/com/google/jenkins/plugins/computeengine/InstanceConfiguration.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 536 is only partially covered, one branch is missing
Expand Down Expand Up @@ -1007,6 +1011,8 @@ public InstanceConfiguration build() {
instanceConfiguration.setNumExecutorsStr(this.numExecutorsStr);
instanceConfiguration.setStartupScript(this.startupScript);
instanceConfiguration.setMinCpuPlatform(this.minCpuPlatform);
// even though `preemptible` is deprecated, we still set it here for backward compatibility
instanceConfiguration.setPreemptible(this.preemptible);
instanceConfiguration.setProvisioningType(this.provisioningType);
instanceConfiguration.setLabelString(this.labels);
instanceConfiguration.setRunAsUser(this.runAsUser);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import com.google.jenkins.plugins.credentials.oauth.GoogleOAuth2Credentials;
import com.google.jenkins.plugins.credentials.oauth.GoogleRobotCredentials;
import hudson.AbortException;
import hudson.Main;
import hudson.model.ItemGroup;
import hudson.security.ACL;
import java.io.IOException;
Expand Down Expand Up @@ -73,6 +74,20 @@ private static GoogleRobotCredentials getRobotCredentials(
ItemGroup itemGroup, List<DomainRequirement> domainRequirements, String credentialsId)
throws AbortException {

/* During the integration tests, the parameter `credentialId`=<Project-Id> that we have set during
integration test. But the actual credential created within Jenkins is having `id` as a random UUID.
So the `CredentialsMatchers.firstOrNull` was returning `null` due to `CredentialsMatchers.withId(credentialsId)`
*/
if (Main.isUnitTest) {

Check warning on line 82 in src/main/java/com/google/jenkins/plugins/computeengine/client/ClientUtil.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 82 is only partially covered, one branch is missing
var credentialList = CredentialsProvider.lookupCredentials(
GoogleOAuth2Credentials.class, itemGroup, ACL.SYSTEM, domainRequirements);
if (!credentialList.isEmpty()) {

Check warning on line 85 in src/main/java/com/google/jenkins/plugins/computeengine/client/ClientUtil.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 85 is only partially covered, one branch is missing
return (GoogleRobotCredentials) credentialList.get(0);
}
return null;

Check warning on line 88 in src/main/java/com/google/jenkins/plugins/computeengine/client/ClientUtil.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered line

Line 88 is not covered by tests
}

GoogleOAuth2Credentials credentials = CredentialsMatchers.firstOrNull(
CredentialsProvider.lookupCredentials(
GoogleOAuth2Credentials.class, itemGroup, ACL.SYSTEM, domainRequirements),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public static void teardown() throws IOException {

@Test
public void testGetImage() throws Exception {
Image image = client.getImage("debian-cloud", "debian-9-stretch-v20180820");
Image image = client.getImage("debian-cloud", "debian-12-bookworm-v20241210");
assertNotNull(image);
assertEquals("READY", image.getStatus());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,13 @@ class ITUtil {
private static final String CONFIG_DESC = "integration";
private static final String BOOT_DISK_TYPE = ZONE_BASE + "/diskTypes/pd-ssd";
private static final boolean BOOT_DISK_AUTODELETE = true;
private static final String BOOT_DISK_PROJECT_ID =
windows ? System.getenv("GOOGLE_BOOT_DISK_PROJECT_ID") : "debian-cloud";
private static final String BOOT_DISK_IMAGE_NAME = windows
private static final String BOOT_DISK_PROJECT_ID = System.getenv("GOOGLE_BOOT_DISK_PROJECT_ID") != null
? System.getenv("GOOGLE_BOOT_DISK_PROJECT_ID")
: "debian" + "-cloud";
private static final String BOOT_DISK_IMAGE_NAME = System.getenv("GOOGLE_BOOT_DISK_IMAGE_NAME") != null
? String.format(
"projects/%s/global/images/%s", BOOT_DISK_PROJECT_ID, System.getenv("GOOGLE_BOOT_DISK_IMAGE_NAME"))
: "projects/debian-cloud/global/images/family/debian-9";
: "projects/debian-cloud/global/images/family/debian-12";
private static final String BOOT_DISK_SIZE_GB_STR = windows ? "50" : "10";
private static final Node.Mode NODE_MODE = Node.Mode.EXCLUSIVE;
private static final String ACCELERATOR_NAME = "";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright 2024 CloudBees, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.jenkins.plugins.computeengine.integration;

import static com.google.jenkins.plugins.computeengine.integration.ITUtil.LABEL;
import static com.google.jenkins.plugins.computeengine.integration.ITUtil.PROJECT_ID;
import static com.google.jenkins.plugins.computeengine.integration.ITUtil.ZONE;
import static com.google.jenkins.plugins.computeengine.integration.ITUtil.execute;
import static com.google.jenkins.plugins.computeengine.integration.ITUtil.getLabel;
import static com.google.jenkins.plugins.computeengine.integration.ITUtil.initClient;
import static com.google.jenkins.plugins.computeengine.integration.ITUtil.initCloud;
import static com.google.jenkins.plugins.computeengine.integration.ITUtil.initCredentials;
import static com.google.jenkins.plugins.computeengine.integration.ITUtil.windows;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assume.assumeFalse;

import com.google.api.client.util.ArrayMap;
import com.google.api.services.compute.model.Instance;
import com.google.api.services.compute.model.Scheduling;
import com.google.cloud.graphite.platforms.plugin.client.ComputeClient;
import com.google.jenkins.plugins.computeengine.ComputeEngineCloud;
import hudson.model.FreeStyleBuild;
import hudson.model.FreeStyleProject;
import hudson.model.Result;
import hudson.model.labels.LabelAtom;
import hudson.slaves.NodeProvisioner.PlannedNode;
import hudson.tasks.Builder;
import io.jenkins.plugins.casc.ConfigurationAsCode;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import lombok.extern.java.Log;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.rules.Timeout;
import org.jvnet.hudson.test.JenkinsRule;

@Log
public class MaxRunDurationCasCIT {

@ClassRule
public static Timeout timeout = new Timeout(20, TimeUnit.MINUTES);

@ClassRule
public static JenkinsRule j = new JenkinsRule();

private static ComputeClient client;
private static ComputeEngineCloud cloud;
private static Map<String, String> label = getLabel(MaxRunDurationCasCIT.class);

@BeforeClass
public static void init() throws Exception {
assumeFalse(windows);
log.info("init");
initCredentials(j);
cloud = initCloud(j);
client = initClient(j, label, log);
}

@Test
public void testMaxRunDurationDeletesAndNoNewBuilds() throws Exception {
assumeFalse(windows);
ConfigurationAsCode.get()
.configure(Objects.requireNonNull(this.getClass().getResource("casc-max-run-duration-agent-it.yml"))
.toString());
ComputeEngineCloud cloud = (ComputeEngineCloud) j.jenkins.clouds.getByName("gce-integration");
cloud.getConfigurations().get(0).setGoogleLabels(label);
Collection<PlannedNode> planned = cloud.provision(new LabelAtom(LABEL), 1);
planned.iterator().next().future.get(); // wait for the node creation to finish
Instance instance =
client.getInstance(PROJECT_ID, ZONE, planned.iterator().next().displayName);
String instanceName = instance.getName();
log.info("Instance: " + instance.getName());

// assert the scheduling configurations.
Scheduling sch = instance.getScheduling();
assertEquals("SPOT", sch.get("provisioningModel"));
assertEquals("DELETE", sch.get("instanceTerminationAction"));
assertEquals(180, Integer.parseInt((String) ((ArrayMap) sch.get("maxRunDuration")).get("seconds")));
log.info("instance scheduling configs are correct");

// try to execute a build on the agent
FreeStyleProject fp = j.createFreeStyleProject();
Builder step = execute(Commands.ECHO, "hello world");
fp.getBuildersList().add(step);
fp.setAssignedLabel(new LabelAtom(LABEL));
Future<FreeStyleBuild> buildFuture = fp.scheduleBuild2(0);
FreeStyleBuild build = buildFuture.get();
assertEquals(Result.SUCCESS, build.getResult());
String agent1 = printLogsAndReturnAgentName(build);
log.info("first build completed");
assertEquals(agent1, instanceName);

// wait for 3 minutes to make sure the instance is fully deleted due to `maxRunDuration`
log.info("sleeping 180s to make sure the instance is deleted");
TimeUnit.SECONDS.sleep(180);
log.info("sleeping completed");

// assert there are no nodes remaining;
assertTrue(client.listInstancesWithLabel(PROJECT_ID, label).isEmpty());

// trigger another build, notice a new instance is being created
log.info("proceeding to 2nd build, after no remaining instances");
buildFuture = fp.scheduleBuild2(0);
build = buildFuture.get();
String agent2 = printLogsAndReturnAgentName(build);
log.info("second build completed");

assertNotEquals(agent1, agent2);
}

private static String printLogsAndReturnAgentName(FreeStyleBuild build) throws IOException {
List<String> logs = build.getLog(50);
String agentName = null;
for (String line : logs) {
if (line.contains("Building remotely on")) {
agentName = line.split(" ")[3];
}
log.info(line);
}
return agentName;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
jenkins:
clouds:
- computeEngine:
cloudName: integration
projectId: ${env.GOOGLE_PROJECT_ID}
instanceCapStr: 10
credentialsId: ${env.GOOGLE_PROJECT_ID}
configurations:
- namePrefix: max-run-duration
description: max-run-duration
launchTimeoutSecondsStr: ''
retentionTimeMinutesStr: ''
mode: EXCLUSIVE
labelString: integration
numExecutorsStr: 1
runAsUser: jenkins
remoteFs: ''
oneShot: true
createSnapshot: false
region: "https://www.googleapis.com/compute/v1/projects/${env.GOOGLE_PROJECT_ID}/regions/${env.GOOGLE_REGION}"
zone: "https://www.googleapis.com/compute/v1/projects/${env.GOOGLE_PROJECT_ID}/zones/${env.GOOGLE_ZONE}"
template: '' # tried not setting, added when 'saved' in UI
machineType: "https://www.googleapis.com/compute/v1/projects/${env.GOOGLE_PROJECT_ID}/zones/${env.GOOGLE_ZONE}/machineTypes/n1-standard-1"
javaExecPath: '/usr/bin/java'
provisioningType:
SpotVm:
maxRunDurationSeconds: 180
networkConfiguration:
autofilled:
network: default
subnetwork: default
networkTags: "jenkins-agent ssh"
networkInterfaceIpStackMode:
singleStack:
externalIPV4Address: true
useInternalAddress: false
bootDiskSourceImageProject: ${env.GOOGLE_BOOT_DISK_PROJECT_ID}
bootDiskSourceImageName: "projects/${env.GOOGLE_BOOT_DISK_PROJECT_ID}/global/images/${env.GOOGLE_BOOT_DISK_IMAGE_NAME}"
bootDiskType: "https://www.googleapis.com/compute/v1/projects/${env.GOOGLE_PROJECT_ID}/zones/${env.GOOGLE_ZONE}/diskTypes/pd-standard"
bootDiskSizeGbStr: 10
bootDiskAutoDelete: true
serviceAccountEmail: "${env.GOOGLE_SA_NAME}@${env.GOOGLE_PROJECT_ID}.iam.gserviceaccount.com"

0 comments on commit 579410e

Please sign in to comment.