From 1fb3dc822d5c90cb3da987f12e93da74c7fb9d1c Mon Sep 17 00:00:00 2001
From: Michael Woolnough <mw31@sanger.ac.uk>
Date: Thu, 5 Sep 2024 13:39:14 +0100
Subject: [PATCH] Change from bash script to flag on multi.

---
 cmd/multi.go |  23 ++++++----
 finish.sh    | 120 ---------------------------------------------------
 main_test.go |  58 ++++++++++++++++++++++---
 3 files changed, 67 insertions(+), 134 deletions(-)
 delete mode 100755 finish.sh

diff --git a/cmd/multi.go b/cmd/multi.go
index 1e105993..cdf696a1 100644
--- a/cmd/multi.go
+++ b/cmd/multi.go
@@ -56,6 +56,7 @@ var (
 	partialDirMerge     string
 	partialDirClean     bool
 	createPartial       bool
+	finishPartial       bool
 	multiInodes         int
 	multiStatJobs       int
 	multiCh             string
@@ -161,6 +162,7 @@ func init() {
 		"from specified directory after merging")
 	multiCmd.Flags().BoolVarP(&createPartial, "create_partial_dir", "p", false, "perform the walk, "+
 		"stat, and combine steps only")
+	multiCmd.Flags().BoolVarP(&finishPartial, "partial_dir_finish", "c", false, "perform the basedir and tidy step on a partial run")
 	multiCmd.Flags().IntVarP(&multiInodes, "inodes_per_stat", "n",
 		defaultInodesPerJob, "number of inodes per parallel stat job")
 	multiCmd.Flags().IntVarP(&multiStatJobs, "num_stat_jobs", "j",
@@ -215,17 +217,19 @@ func doMultiScheduling(args []string, sudo bool) error {
 		return err
 	}
 
-	scheduleWalkJobs(outputRoot, args, unique, multiStatJobs, multiInodes, multiCh, forcedQueue, s)
+	if !finishPartial {
+		scheduleWalkJobs(outputRoot, args, unique, multiStatJobs, multiInodes, multiCh, forcedQueue, s)
 
-	if partialDirMerge != "" {
-		unique = scheduleStaticCopy(outputRoot, unique, partialDirMerge, partialDirClean, s)
-	}
+		if partialDirMerge != "" {
+			unique = scheduleStaticCopy(outputRoot, unique, partialDirMerge, partialDirClean, s)
+		}
 
-	if createPartial {
-		s.DisableSudo()
-		schedulePartialSentinel(outputRoot, unique, s)
+		if createPartial {
+			s.DisableSudo()
+			schedulePartialSentinel(outputRoot, unique, s)
 
-		return nil
+			return nil
+		}
 	}
 
 	scheduleBasedirsJob(outputRoot, unique, s)
@@ -349,7 +353,8 @@ func copyReqs() *jqs.Requirements {
 }
 
 func scheduleStaticCopy(outputRoot, unique, partialDirMerge string, partialDirClean bool,
-	s *scheduler.Scheduler) string {
+	s *scheduler.Scheduler,
+) string {
 	var remove string
 
 	if partialDirClean {
diff --git a/finish.sh b/finish.sh
deleted file mode 100755
index f1cf9bf9..00000000
--- a/finish.sh
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail;
-IFS=$'\n\t';
-
-declare DATE="$(date "+%Y%m%d")";
-declare -a TIDY_FLAGS=();
-declare -a BASEDIR_FLAGS=();
-declare OUTPUT="";
-declare FINAL="";
-declare WRSTAT="wrstat";
-declare SET=(false false false false false false)
-
-help() {
-	echo "Usage: $0 [-b BASEDIRS_CONFIG] -f FINAL_OUTDIR -o OWNERS_FILE -q QUOTA_FILE [-w WRSTAT_EXE] WORKING_DIR";
-	echo;
-	echo "This script is used to run the final two steps of a full WRStat run, the 'basedirs' and 'tidy' steps.";
-	echo;
-	echo "	-b,--config	path to basedirs config file.";
-	echo "	-d,--date	date for output files. [default: $DATE]";
-	echo "	-f,--final	final output directory.";
-	echo "	-h,--help	print this help text.";
-	echo "	-o,--owners	gid,owner csv file.";
-	echo "	-q,--quota	csv of gid,disk,size_quota,inode_quota.";
-	echo "	-w,--wrstat	location of wrstat executable. [default: $WRSTAT]";
-}
-
-error() {
-	echo -e "Error: $1.\n" >&2;
-	help >&2;
-
-	exit 1;
-}
-
-set() {
-	if "${SET[$1]}"; then
-		error "Can only set flag $2 once.";
-	fi;
-
-	if [ -z "$3" ]; then
-		error "Value of $2 cannot be empty";
-	fi;
-
-	SET[$1]=true;
-}
-
-while [ $# -gt 0 ]; do
-	case "$1" in
-	"-q"|"--quota")
-		set 0 "$1" "${2:-}";
-
-		shift;
-
-		BASEDIR_FLAGS+=( "-q" "$1" );;
-	"-o"|"--owners")
-		set 1 "$1" "${2:-}";
-
-		shift;
-
-		BASEDIR_FLAGS+=( "-o" "$1" );;
-	"-f"|"--final")
-		set 2 "$1" "${2:-}";
-
-		shift;
-
-		FINAL="$1";
-
-		TIDY_FLAGS+=( "-f" "$1" );;
-	"-b"|"--config")
-		set 3 "$1" "${2:-}";
-
-		shift;
-
-		BASEDIR_FLAGS+=( "-b" "$1" );;
-	"-d"|"--date")
-		set 4 "$1" "${2:-}";
-
-		shift;
-
-		DATE="$1";;
-	"-w"|"--wrstat")
-		set 5 "$1" "${2:-}";
-
-		shift;
-
-		WRSTAT="$1";;
-	"-h"|"--help")
-		help;
-
-		exit 0;;
-	*)
-		if [ -s "$OUTPUT" ]; then
-			error "Can only set a single output directory."
-		fi;
-
-		OUTPUT="$1";;
-	esac;
-
-	shift;
-done;
-
-declare -i flag=0;
-
-for var in "Quota CSV" "Owners CSV" "Final Output Directory"; do
-	if [ "${SET[$flag]}" = "false" ]; then
-		error "$var is required.";
-	fi;
-
-	flag=$(( $flag + 1 ));
-done;
-
-if [ -z "$OUTPUT" ]; then
-	error "No Working Directory specified.";
-fi;
-
-TIDY_FLAGS+=( "-d" "$DATE" "$OUTPUT" );
-BASEDIR_FLAGS+=( "$OUTPUT" "$FINAL" );
-
-"$WRSTAT" basedir "${BASEDIR_FLAGS[@]}";
-"$WRSTAT" tidy "${TIDY_FLAGS[@]}";
diff --git a/main_test.go b/main_test.go
index 82cc3b42..10e9ff94 100644
--- a/main_test.go
+++ b/main_test.go
@@ -192,11 +192,11 @@ func multiTests(t *testing.T, subcommand ...string) {
 	date := time.Now().Format("20060102")
 
 	Convey("A partial 'wrstat multi' command produces the correct jobs to run, with sudo enabled", func() {
-		testPartial(t, true, subcommand, date, walkReqs, touchReqs, combineReqs)
+		testPartial(t, true, subcommand, date, walkReqs, touchReqs, combineReqs, baseDirsReqs, tidyReqs)
 	})
 
 	Convey("A partial 'wrstat multi' command produces the correct jobs to run, with sudo not enabled", func() {
-		testPartial(t, false, subcommand, date, walkReqs, touchReqs, combineReqs)
+		testPartial(t, false, subcommand, date, walkReqs, touchReqs, combineReqs, baseDirsReqs, tidyReqs)
 	})
 
 	Convey("A full 'wrstat multi' command produces the correct jobs to run", func() {
@@ -430,7 +430,8 @@ func multiTests(t *testing.T, subcommand ...string) {
 }
 
 func testPartial(t *testing.T, sudo bool, subcommand []string, date string, walkReqs,
-	touchReqs, combineReqs *scheduler.Requirements) {
+	touchReqs, combineReqs, baseDirsReqs, tidyReqs *scheduler.Requirements,
+) {
 	t.Helper()
 
 	workingDir := t.TempDir()
@@ -533,6 +534,51 @@ func testPartial(t *testing.T, sudo bool, subcommand []string, date string, walk
 	}
 
 	So(jobs, ShouldResemble, expectation)
+
+	Convey("…finishing the partial run runs the correct jobs", func() {
+		workingDir := t.TempDir()
+		_, _, jobs, err := runWRStat(append(subcommand, "-w", workingDir, "-f", "final_output", "-q", "quota_file",
+			"-o", "owners_file", "-c", "/some/path", "/some-other/path")...)
+		So(err, ShouldBeNil)
+
+		So(len(jobs), ShouldEqual, 2)
+
+		repGroup := jobs[0].RepGroup[len(jobs[0].RepGroup)-20:]
+		expectation := []*jobqueue.Job{
+			{
+				Cmd: fmt.Sprintf("%s basedir -q \"quota_file\" -o \"owners_file\"  \"%s/%s\" \"final_output\"",
+					exe, workingDir, repGroup),
+				CwdMatters:   true,
+				RepGroup:     fmt.Sprintf("wrstat-basedir-%s-%s", date, repGroup),
+				ReqGroup:     "wrstat-basedir",
+				Requirements: baseDirsReqs,
+				Override:     1,
+				Retries:      30,
+				DepGroups:    []string{repGroup + ".basedir"},
+				Dependencies: jobqueue.Dependencies{
+					{
+						DepGroup: repGroup,
+					},
+				},
+			},
+			{
+				Cmd:          fmt.Sprintf("%s tidy -f final_output -d %s %s/%s", exe, date, workingDir, repGroup),
+				CwdMatters:   true,
+				RepGroup:     fmt.Sprintf("wrstat-tidy-final_output-%s-%s", date, repGroup),
+				ReqGroup:     "wrstat-tidy",
+				Requirements: tidyReqs,
+				Override:     1,
+				Retries:      30,
+				Dependencies: jobqueue.Dependencies{
+					{
+						DepGroup: repGroup + ".basedir",
+					},
+				},
+			},
+		}
+
+		So(jobs, ShouldResemble, expectation)
+	})
 }
 
 func TestMulti(t *testing.T) {
@@ -588,8 +634,10 @@ func TestWalk(t *testing.T) {
 		So(jobs, ShouldResemble, jobsExpectation)
 
 		expected := ""
-		for _, subPath := range []string{"", "/a", "/a/b", "/a/b/c", "/a/b/c/d", "/a/b/c/d/e",
-			"/a/b/c/test.txt", "/a/b/f", "/a/b/f/tes\nt2.csv", "/a/g", "/a/g/h", "/a/test3"} {
+		for _, subPath := range []string{
+			"", "/a", "/a/b", "/a/b/c", "/a/b/c/d", "/a/b/c/d/e",
+			"/a/b/c/test.txt", "/a/b/f", "/a/b/f/tes\nt2.csv", "/a/g", "/a/g/h", "/a/test3",
+		} {
 			expected += encode.Base64Encode(tmp+subPath) + "\n"
 		}