From 76b9f0c293277205a92e166845b9c961e07f44b9 Mon Sep 17 00:00:00 2001 From: Thomas Branch Date: Thu, 16 Sep 2021 15:59:04 -0400 Subject: [PATCH] Adding retry logic to git clone operation. KP is having issues where clones are failing and a node will fail continuously on one job after the next while in a broken state (i.e. disk full, network connectivity issue, etc) and empty the queue. This is a stop gap to slow the node down and retry the operation a set number of times w/ a set delay that can be configured in the environment. The next step will be to add self repair capabilities to the script like freeing disk space or removing the node taking itself offline until someone can check on the issue(s) --- bin/stampede-worker.js | 2 ++ lib/workingDirectory.js | 71 +++++++++++++++++++++++++++++++---------- 2 files changed, 56 insertions(+), 17 deletions(-) diff --git a/bin/stampede-worker.js b/bin/stampede-worker.js index db2a66e..1422612 100755 --- a/bin/stampede-worker.js +++ b/bin/stampede-worker.js @@ -53,6 +53,8 @@ const conf = require("rc")("stampede", { logQueuePath: null, // Heartbeat heartbeatInterval: 15000, + cloneRetryInterval: 1 * 60 * 1000, // retry every minute must be specified in milliseconds + cloneRetryAttempts: 3 // retry 3 tiems }); // Configure winston logging diff --git a/lib/workingDirectory.js b/lib/workingDirectory.js index 78abe91..f800b88 100644 --- a/lib/workingDirectory.js +++ b/lib/workingDirectory.js @@ -3,6 +3,7 @@ const fs = require("fs"); const { exec } = require("child_process"); const git = require("git-last-commit"); +let retries = 0; /** * prepare the working directory * @param {*} taskExecutionConfig @@ -79,7 +80,7 @@ async function prepareWorkingDirectory(taskExecutionConfig, conf, logger) { // Perform the clone if (gitOperations.clone != null) { - const cloneResult = await cloneRepo( + let cloneResult = await cloneRepo( gitRepoURL, gitOperations.clone, dir, @@ -88,11 +89,36 @@ async function prepareWorkingDirectory(taskExecutionConfig, conf, logger) { logger, conf.workspaceRoot ); - if (cloneResult === false) { + if (conf.cloneRetryAttempts && cloneResult !== true) { + if (retries > conf.cloneRetryAttempts) { + while (true) { + await delay(conf.cloneRetryInterval); + cloneResult = await cloneRepo( + gitRepoURL, + gitOperations.clone, + dir, + gitOperations.depth, + taskExecutionConfig.gitCloneOptions, + logger, + conf.workspaceRoot + ); + retries++; + if (retries > conf.cloneRetryAttempts || cloneResult === true) { + break; // we run until we get a positive clone result or we've run out of retries + } + } + } + } + retries = 0; // reset our retry attempts + if (cloneResult !== true) { + retries++; + let err = "Unable to clone the repository, please contact the service desk and report the issue.\n" + + cloneResult + "\n" + + "Directory: " + dir + "\n" + + "Repo URL: " + gitRepoURL + "\n" return { error: "clone-error", - message: - "Unable to clone the repository, please contact the service desk and report the issue.", + message: err, }; } } @@ -111,7 +137,7 @@ async function prepareWorkingDirectory(taskExecutionConfig, conf, logger) { // Perform the merge if (gitOperations.merge == true) { const mergeResult = await gitMerge(gitOperations.mergeBase, dir, logger); - if (mergeResult === false) { + if (mergeResult == false) { return { error: "merge-error", message: @@ -138,6 +164,13 @@ async function prepareWorkingDirectory(taskExecutionConfig, conf, logger) { }; } +/** + * Pauses execution for specified ms + * @param {*} ms + * @returns + */ +const delay = ms => new Promise(res => setTimeout(res, ms)); + /** * Clone the repository to our working directory * @param {*} cloneUrl @@ -171,30 +204,34 @@ async function cloneRepo( workingDirectory; logger.verbose("clone: " + cloneCommand); return new Promise((resolve) => { + let success = true; exec(cloneCommand, { cwd: workspaceRoot }, (error, stdout, stderr) => { if (error) { + success = false; logger.error(`cloneRepo error: ${error}`); + const cloneErrorLog = + "cloneRepo error: " + + error + + "\n" + + "stdout: " + + stdout + + "\n" + + "stderr: " + + stderr + + "\n"; try { - const cloneErrorLog = - "cloneRepo error: " + - error + - "\n" + - "stdout: " + - stdout + - "\n" + - "stderr: " + - stderr + - "\n"; fs.writeFileSync(workingDirectory + "/cloneerror.log", cloneErrorLog); } catch (e) { logger.error(`error writing out clone log: ` + e); } - resolve(false); + resolve(cloneErrorLog); // no retries attempted return; } logger.verbose(`stdout: ${stdout}`); logger.verbose(`stderr: ${stderr}`); - resolve(true); + if (success) { + resolve(true); // original attempt or a retry succeeded + } }); }); }