Skip to content
This repository has been archived by the owner on May 5, 2024. It is now read-only.

Commit

Permalink
Adding retry logic to git clone operation. KP is having issues where …
Browse files Browse the repository at this point in the history
…clones are failing and a node will fail continuously on one job after the next while in a broken state (i.e. disk full, network connectivity issue, etc) and empty the queue. This is a stop gap to slow the node down and retry the operation a set number of times w/ a set delay that can be configured in the environment. The next step will be to add self repair capabilities to the script like freeing disk space or removing the node taking itself offline until someone can check on the issue(s)
  • Loading branch information
Thomas Branch authored and Thomas Branch committed Sep 16, 2021
1 parent 2826a77 commit 76b9f0c
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 17 deletions.
2 changes: 2 additions & 0 deletions bin/stampede-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ const conf = require("rc")("stampede", {
logQueuePath: null,
// Heartbeat
heartbeatInterval: 15000,
cloneRetryInterval: 1 * 60 * 1000, // retry every minute must be specified in milliseconds
cloneRetryAttempts: 3 // retry 3 tiems
});

// Configure winston logging
Expand Down
71 changes: 54 additions & 17 deletions lib/workingDirectory.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const fs = require("fs");
const { exec } = require("child_process");
const git = require("git-last-commit");

let retries = 0;
/**
* prepare the working directory
* @param {*} taskExecutionConfig
Expand Down Expand Up @@ -79,7 +80,7 @@ async function prepareWorkingDirectory(taskExecutionConfig, conf, logger) {

// Perform the clone
if (gitOperations.clone != null) {
const cloneResult = await cloneRepo(
let cloneResult = await cloneRepo(
gitRepoURL,
gitOperations.clone,
dir,
Expand All @@ -88,11 +89,36 @@ async function prepareWorkingDirectory(taskExecutionConfig, conf, logger) {
logger,
conf.workspaceRoot
);
if (cloneResult === false) {
if (conf.cloneRetryAttempts && cloneResult !== true) {
if (retries > conf.cloneRetryAttempts) {
while (true) {
await delay(conf.cloneRetryInterval);
cloneResult = await cloneRepo(
gitRepoURL,
gitOperations.clone,
dir,
gitOperations.depth,
taskExecutionConfig.gitCloneOptions,
logger,
conf.workspaceRoot
);
retries++;
if (retries > conf.cloneRetryAttempts || cloneResult === true) {
break; // we run until we get a positive clone result or we've run out of retries
}
}
}
}
retries = 0; // reset our retry attempts
if (cloneResult !== true) {
retries++;
let err = "Unable to clone the repository, please contact the service desk and report the issue.\n"
+ cloneResult + "\n"
+ "Directory: " + dir + "\n"
+ "Repo URL: " + gitRepoURL + "\n"
return {
error: "clone-error",
message:
"Unable to clone the repository, please contact the service desk and report the issue.",
message: err,
};
}
}
Expand All @@ -111,7 +137,7 @@ async function prepareWorkingDirectory(taskExecutionConfig, conf, logger) {
// Perform the merge
if (gitOperations.merge == true) {
const mergeResult = await gitMerge(gitOperations.mergeBase, dir, logger);
if (mergeResult === false) {
if (mergeResult == false) {
return {
error: "merge-error",
message:
Expand All @@ -138,6 +164,13 @@ async function prepareWorkingDirectory(taskExecutionConfig, conf, logger) {
};
}

/**
* Pauses execution for specified ms
* @param {*} ms
* @returns
*/
const delay = ms => new Promise(res => setTimeout(res, ms));

/**
* Clone the repository to our working directory
* @param {*} cloneUrl
Expand Down Expand Up @@ -171,30 +204,34 @@ async function cloneRepo(
workingDirectory;
logger.verbose("clone: " + cloneCommand);
return new Promise((resolve) => {
let success = true;
exec(cloneCommand, { cwd: workspaceRoot }, (error, stdout, stderr) => {
if (error) {
success = false;
logger.error(`cloneRepo error: ${error}`);
const cloneErrorLog =
"cloneRepo error: " +
error +
"\n" +
"stdout: " +
stdout +
"\n" +
"stderr: " +
stderr +
"\n";
try {
const cloneErrorLog =
"cloneRepo error: " +
error +
"\n" +
"stdout: " +
stdout +
"\n" +
"stderr: " +
stderr +
"\n";
fs.writeFileSync(workingDirectory + "/cloneerror.log", cloneErrorLog);
} catch (e) {
logger.error(`error writing out clone log: ` + e);
}
resolve(false);
resolve(cloneErrorLog); // no retries attempted
return;
}
logger.verbose(`stdout: ${stdout}`);
logger.verbose(`stderr: ${stderr}`);
resolve(true);
if (success) {
resolve(true); // original attempt or a retry succeeded
}
});
});
}
Expand Down

0 comments on commit 76b9f0c

Please sign in to comment.