polishing the mass-importer staging

- make it an option (-g, --staging) - play nicely with --append - typo fixes
cbg-ethz · Mar 30, 2022 · 5054a3a · 5054a3a
1 parent 5bf2bf1
commit 5054a3a
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 29 deletions.
diff --git a/utils/README.md b/utils/README.md
@@ -118,6 +118,7 @@ Usage: ./sort_samples_dumb -f <DIR> -b <BATCH> [-l <LEN>] [-L {''|--link|--symbo
 	-L : link parameter to pass to cp when copying (default: --link)
 	-t : tsv file (default: samples.<BATCH>.tsv)
 	-T : do not truncate (empty) the file before starting
+	-g : store list in .tsv.staging instead and only rename into final .tsv if successful
 	-D : sample have duplicates (e.g.: across lanes)
 	-p : prefix to prepend to fastq files (e.g.: for fusing runs)
 	-s : suffix to append to fastq files (e.g.: for fusing runs)
@@ -183,6 +184,7 @@ optional arguments:
   --force               Force overwriting any existing file when moving
   -s, --summary         Only display a summary of datasets, not an exhaustive list of all samples
   -a, --append          Append to the end of movedatafiles.sh, instead of overwritting (use when calling from an external combiner wrapper)
+  -g, --staging         Write samples list in .tsv.staging and only rename them to the final .tsv at the end of movedatafiles.sh if there were no errors.
   -n, --noempty         skip fastq.gz files with bad yield (0 reads)
   -p TSV, --patchmap TSV
                         patchmap file to rename samples
@@ -239,6 +241,7 @@ optional arguments:
   -b LAB, --batch LAB   generate batch description
   -s, --summary         Only display a summary of datasets, not an exhaustive list of all samples
   -a, --append          Append to the end of movedatafiles.sh, instead of overwritting (use when calling from an external combiner wrapper)
+  -g, --staging         Write samples list in .tsv.staging and only rename them to the final .tsv at the end of movedatafiles.sh if there were no errors.
   -l, --forcelanes      Explicitly look for sample in each lane (for replicates across lanes)
   -p TSV, --patchmap TSV
                         patchmap file to rename samples

diff --git a/utils/sort_samples_demultiplexstats b/utils/sort_samples_demultiplexstats
@@ -29,6 +29,8 @@ argparser.add_argument('-s', '--summary', required=False,
 	action='store_true', dest='summary', help="Only display a summary of datasets, not an exhaustive list of all samples")
 argparser.add_argument('-a', '--append', required=False,
 	action='store_true', dest='append', help="Append to the end of movedatafiles.sh, instead of overwritting\n(use when calling from an external combiner wrapper)")
+argparser.add_argument('-g', '--staging', required=False,
+	action='store_true', dest='staging', help="Write samples list in .tsv.staging and only rename them to the final .tsv at the end of movedatafiles.sh if there were no errors.")
 argparser.add_argument('-n', '--noempty', required=False,
 	action='store_true', dest='noempty', help="skip fastq.gz files with bad yield (0 reads)")
 argparser.add_argument('-p', '--patchmap', metavar='TSV', required=False, default=None,
@@ -42,6 +44,7 @@ sampleset=args.outdir
 link=args.link
 append=args.append
 noempty=args.noempty
+staging_suffix='.staging' if args.staging else ''
 
 statsjson=os.path.join(statsdir, 'Stats/Stats.json')
 
@@ -117,7 +120,7 @@ if not os.path.isdir(sampleset):
 
 # output files
 batch=f"{rundate}_{flowcell}"
-tsv=open(os.path.join(sampleset,f'samples.{batch}.tsv'), 'wt')
+tsv=open(os.path.join(sampleset,f'samples.{batch}.tsv{staging_suffix}'), 'wt')
 # shell script file with all moving instructions inside
 sh=open(os.path.join(sampleset,'movedatafiles.sh'), 'at' if append else 'wt')
 
@@ -150,7 +153,7 @@ X() {
 # per batch directory checks
 print(r"[[ -d '%(download)s' ]] || fail 'No download directory:' '%(download)s'" % {'download':fastqdir}, file=sh)
 if qcdir:
-    print(r"[[ -d '%(qc)s' ]] || fail 'No download directory:' '%(qc)s'" % {'qc': qcdir}, file=sh)
+	print(r"[[ -d '%(qc)s' ]] || fail 'No download directory:' '%(qc)s'" % {'qc': qcdir}, file=sh)
 
 
 # parse info about samples
@@ -197,13 +200,16 @@ for file in "${fastq[@]}"; do
 			print('done', file=sh)
 
 
-# coda: return status
-if not append: print(f"""
-if (( ALLOK )); then
-	echo All Ok
-	exit 0
-else
+# coda: rename staging and return status
+if args.staging:
+	print(f"(( ALLOK )) && mv -v {sampleset}/samples.{batch}.tsv{staging_suffix} {sampleset}/samples.{batch}.tsv", file=sh)
+
+if not append: print("""
+if (( ! ALLOK )); then
 	echo Some errors
 	exit 1
 fi;
+
+echo All Ok
+exit 0
 """, file=sh)
diff --git a/utils/sort_samples_dumb b/utils/sort_samples_dumb
@@ -28,6 +28,7 @@ usage() { echo "Usage: $0 -f <DIR> -b <BATCH> [-l <LEN>] [-L {''|--link|--symbol
 	-L : link parameter to pass to cp when copying (default: --link)
 	-t : tsv file (default: samples.<BATCH>.tsv)
 	-T : do not truncate (empty) the file before starting
+	-g : store list in .tsv.staging instead and only rename into final .tsv if successful
 	-D : sample have duplicates (e.g.: across lanes)
 	-p : prefix to prepend to fastq files (e.g.: for fusing runs)
 	-s : suffix to append to fastq files (e.g.: for fusing runs)
@@ -42,8 +43,9 @@ prefix=''
 suffix=''
 quiet=0
 duplicates=0
+staging=0
 mode=
-while getopts "f:b:Dl:L:p:s:o:m:t:Tqh" o; do
+while getopts "f:b:Dl:L:p:s:o:m:t:Tgqh" o; do
 	case "${o}" in
 		f)	fastq_dir="${OPTARG}"
 			# shellcheck disable=SC2206
@@ -67,6 +69,7 @@ while getopts "f:b:Dl:L:p:s:o:m:t:Tqh" o; do
 			[[ $mode =~ ^[0-7]{,4}$ ]]	|| fail "Invalid characters <${mode//[0-7]/}> in <${mode}>" 'mode should be an octal chmod value, see <mkdir --help> for informations'
 			;;
 		t)	tsv="${OPTARG}"	;;
+		g)	staging=1	;;
 		T)	truncate=0	;;
 		q)	quiet=1	;;
 		h)	usage 0	;;
@@ -78,8 +81,14 @@ done
 : "${batch_name:?missing mandatory batch name, use option -b}"
 : "${out_dir:?missing mandatory output dir use option -o}"
 
-: "${tsv:=${out_dir}/samples.${batch_name}.tsv.staging}"
+: "${tsv:=${out_dir}/samples.${batch_name}.tsv}"
 
+if (( staging )); then
+	staging_suffix=".staging"
+	tsv="${tsv}${staging_suffix}"
+else
+	staging_suffix=
+fi
 
 # RegEx
 
@@ -224,12 +233,11 @@ if (( numdup )); then
 		warn "$(( paired + numsam )) samples, but only ${numdup} duplicates" "(missing: $((numdup % ( paired + numsam ) )) )"
 	fi
 fi
-if (( ALLOK )); then
-        mv -v ${tsv} ${tsv//\.staging/}
-        info All Ok
-        exit 0
-else
-        warn Some errors
-        exit 1
+if (( ! ALLOK )); then
+	warn Some errors
+	exit 1
 fi
 
+(( staging )) && mv -v "${tsv}" "${tsv%%${staging_suffix}}"
+info All Ok
+exit 0
diff --git a/utils/sort_samples_jobinfo b/utils/sort_samples_jobinfo
@@ -32,6 +32,8 @@ argparser.add_argument('-s', '--summary', required=False,
 	action='store_true', dest='summary', help="Only display a summary of datasets, not an exhaustive list of all samples")
 argparser.add_argument('-a', '--append', required=False,
 	action='store_true', dest='append', help="Append to the end of movedatafiles.sh, instead of overwritting\n(use when calling from an external combiner wrapper)")
+argparser.add_argument('-g', '--staging', required=False,
+	action='store_true', dest='staging', help="Write samples list in .tsv.staging and only rename them to the final .tsv at the end of movedatafiles.sh if there were no errors.")
 argparser.add_argument('-l', '--forcelanes', required=False,
 	action='store_true', dest='forcelanes', help="Explicitly look for sample in each lane (for replicates across lanes)")
 argparser.add_argument('-p', '--patchmap', metavar='TSV', required=False, default=None,
@@ -44,6 +46,8 @@ sampleset=args.outdir
 link=args.link
 append=args.append
 lab = args.batch
+staging_suffix='.staging' if args.staging else ''
+
 
 # parse the chmod parameter
 try:
@@ -136,7 +140,7 @@ if not os.path.isdir(sampleset):
 
 # output files
 batch=f"{date}_{flowcell}"
-tsv=open(os.path.join(sampleset,f'samples.{batch}.tsv.staging'), 'wt')
+tsv=open(os.path.join(sampleset,f'samples.{batch}.tsv{staging_suffix}'), 'wt')
 # shell script file with all moving instructions inside
 sh=open(os.path.join(sampleset,'movedatafiles.sh'), 'at' if append else 'wt')
 
@@ -230,19 +234,16 @@ if args.batch:
 	with open(os.path.join(sampleset,f'batch.{batch}.yaml'), 'wt') as byml:
 		print(yaml.dump({'type':'jobinfo','lab':lab,'runfolder':runfolder,'date':date,'instrument':instr,'runnum':runnum,'flowcell':flowcell,'lanes':lanes,'library':library,'folder':folder}, sort_keys=False), file=byml)
 
-# coda: return status
-if not append: print(f"""
-if (( !ALLOK )); then
-        echo Some errors
-        exit 1
-fi;
-
-""", file=sh)
+# coda: rename staging and return status
+if args.staging:
+	print(f"(( ALLOK )) && mv -v {sampleset}/samples.{batch}.tsv{staging_suffix} {sampleset}/samples.{batch}.tsv", file=sh)
 
-print(f"mv -v {sampleset}/sample.{batch}.tsv.staging samples {sampleset}/sample.{batch}.tsv", file=sh)
+if not append: print("""
+if (( ! ALLOK )); then
+	echo Some errors
+	exit 1
+fi;
 
-print("""
 echo All Ok
 exit 0
 """, file=sh)
-