Skip to content

Commit

Permalink
polishing the mass-importer staging
Browse files Browse the repository at this point in the history
 - make it an option (-g, --staging)
 - play nicely with --append
 - typo fixes
  • Loading branch information
DrYak committed Mar 30, 2022
1 parent 5bf2bf1 commit 5054a3a
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 29 deletions.
3 changes: 3 additions & 0 deletions utils/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ Usage: ./sort_samples_dumb -f <DIR> -b <BATCH> [-l <LEN>] [-L {''|--link|--symbo
-L : link parameter to pass to cp when copying (default: --link)
-t : tsv file (default: samples.<BATCH>.tsv)
-T : do not truncate (empty) the file before starting
-g : store list in .tsv.staging instead and only rename into final .tsv if successful
-D : sample have duplicates (e.g.: across lanes)
-p : prefix to prepend to fastq files (e.g.: for fusing runs)
-s : suffix to append to fastq files (e.g.: for fusing runs)
Expand Down Expand Up @@ -183,6 +184,7 @@ optional arguments:
--force Force overwriting any existing file when moving
-s, --summary Only display a summary of datasets, not an exhaustive list of all samples
-a, --append Append to the end of movedatafiles.sh, instead of overwritting (use when calling from an external combiner wrapper)
-g, --staging Write samples list in .tsv.staging and only rename them to the final .tsv at the end of movedatafiles.sh if there were no errors.
-n, --noempty skip fastq.gz files with bad yield (0 reads)
-p TSV, --patchmap TSV
patchmap file to rename samples
Expand Down Expand Up @@ -239,6 +241,7 @@ optional arguments:
-b LAB, --batch LAB generate batch description
-s, --summary Only display a summary of datasets, not an exhaustive list of all samples
-a, --append Append to the end of movedatafiles.sh, instead of overwritting (use when calling from an external combiner wrapper)
-g, --staging Write samples list in .tsv.staging and only rename them to the final .tsv at the end of movedatafiles.sh if there were no errors.
-l, --forcelanes Explicitly look for sample in each lane (for replicates across lanes)
-p TSV, --patchmap TSV
patchmap file to rename samples
Expand Down
22 changes: 14 additions & 8 deletions utils/sort_samples_demultiplexstats
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ argparser.add_argument('-s', '--summary', required=False,
action='store_true', dest='summary', help="Only display a summary of datasets, not an exhaustive list of all samples")
argparser.add_argument('-a', '--append', required=False,
action='store_true', dest='append', help="Append to the end of movedatafiles.sh, instead of overwritting\n(use when calling from an external combiner wrapper)")
argparser.add_argument('-g', '--staging', required=False,
action='store_true', dest='staging', help="Write samples list in .tsv.staging and only rename them to the final .tsv at the end of movedatafiles.sh if there were no errors.")
argparser.add_argument('-n', '--noempty', required=False,
action='store_true', dest='noempty', help="skip fastq.gz files with bad yield (0 reads)")
argparser.add_argument('-p', '--patchmap', metavar='TSV', required=False, default=None,
Expand All @@ -42,6 +44,7 @@ sampleset=args.outdir
link=args.link
append=args.append
noempty=args.noempty
staging_suffix='.staging' if args.staging else ''

statsjson=os.path.join(statsdir, 'Stats/Stats.json')

Expand Down Expand Up @@ -117,7 +120,7 @@ if not os.path.isdir(sampleset):

# output files
batch=f"{rundate}_{flowcell}"
tsv=open(os.path.join(sampleset,f'samples.{batch}.tsv'), 'wt')
tsv=open(os.path.join(sampleset,f'samples.{batch}.tsv{staging_suffix}'), 'wt')
# shell script file with all moving instructions inside
sh=open(os.path.join(sampleset,'movedatafiles.sh'), 'at' if append else 'wt')

Expand Down Expand Up @@ -150,7 +153,7 @@ X() {
# per batch directory checks
print(r"[[ -d '%(download)s' ]] || fail 'No download directory:' '%(download)s'" % {'download':fastqdir}, file=sh)
if qcdir:
print(r"[[ -d '%(qc)s' ]] || fail 'No download directory:' '%(qc)s'" % {'qc': qcdir}, file=sh)
print(r"[[ -d '%(qc)s' ]] || fail 'No download directory:' '%(qc)s'" % {'qc': qcdir}, file=sh)


# parse info about samples
Expand Down Expand Up @@ -197,13 +200,16 @@ for file in "${fastq[@]}"; do
print('done', file=sh)


# coda: return status
if not append: print(f"""
if (( ALLOK )); then
echo All Ok
exit 0
else
# coda: rename staging and return status
if args.staging:
print(f"(( ALLOK )) && mv -v {sampleset}/samples.{batch}.tsv{staging_suffix} {sampleset}/samples.{batch}.tsv", file=sh)

if not append: print("""
if (( ! ALLOK )); then
echo Some errors
exit 1
fi;
echo All Ok
exit 0
""", file=sh)
26 changes: 17 additions & 9 deletions utils/sort_samples_dumb
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ usage() { echo "Usage: $0 -f <DIR> -b <BATCH> [-l <LEN>] [-L {''|--link|--symbol
-L : link parameter to pass to cp when copying (default: --link)
-t : tsv file (default: samples.<BATCH>.tsv)
-T : do not truncate (empty) the file before starting
-g : store list in .tsv.staging instead and only rename into final .tsv if successful
-D : sample have duplicates (e.g.: across lanes)
-p : prefix to prepend to fastq files (e.g.: for fusing runs)
-s : suffix to append to fastq files (e.g.: for fusing runs)
Expand All @@ -42,8 +43,9 @@ prefix=''
suffix=''
quiet=0
duplicates=0
staging=0
mode=
while getopts "f:b:Dl:L:p:s:o:m:t:Tqh" o; do
while getopts "f:b:Dl:L:p:s:o:m:t:Tgqh" o; do
case "${o}" in
f) fastq_dir="${OPTARG}"
# shellcheck disable=SC2206
Expand All @@ -67,6 +69,7 @@ while getopts "f:b:Dl:L:p:s:o:m:t:Tqh" o; do
[[ $mode =~ ^[0-7]{,4}$ ]] || fail "Invalid characters <${mode//[0-7]/}> in <${mode}>" 'mode should be an octal chmod value, see <mkdir --help> for informations'
;;
t) tsv="${OPTARG}" ;;
g) staging=1 ;;
T) truncate=0 ;;
q) quiet=1 ;;
h) usage 0 ;;
Expand All @@ -78,8 +81,14 @@ done
: "${batch_name:?missing mandatory batch name, use option -b}"
: "${out_dir:?missing mandatory output dir use option -o}"

: "${tsv:=${out_dir}/samples.${batch_name}.tsv.staging}"
: "${tsv:=${out_dir}/samples.${batch_name}.tsv}"

if (( staging )); then
staging_suffix=".staging"
tsv="${tsv}${staging_suffix}"
else
staging_suffix=
fi

# RegEx

Expand Down Expand Up @@ -224,12 +233,11 @@ if (( numdup )); then
warn "$(( paired + numsam )) samples, but only ${numdup} duplicates" "(missing: $((numdup % ( paired + numsam ) )) )"
fi
fi
if (( ALLOK )); then
mv -v ${tsv} ${tsv//\.staging/}
info All Ok
exit 0
else
warn Some errors
exit 1
if (( ! ALLOK )); then
warn Some errors
exit 1
fi

(( staging )) && mv -v "${tsv}" "${tsv%%${staging_suffix}}"
info All Ok
exit 0
25 changes: 13 additions & 12 deletions utils/sort_samples_jobinfo
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ argparser.add_argument('-s', '--summary', required=False,
action='store_true', dest='summary', help="Only display a summary of datasets, not an exhaustive list of all samples")
argparser.add_argument('-a', '--append', required=False,
action='store_true', dest='append', help="Append to the end of movedatafiles.sh, instead of overwritting\n(use when calling from an external combiner wrapper)")
argparser.add_argument('-g', '--staging', required=False,
action='store_true', dest='staging', help="Write samples list in .tsv.staging and only rename them to the final .tsv at the end of movedatafiles.sh if there were no errors.")
argparser.add_argument('-l', '--forcelanes', required=False,
action='store_true', dest='forcelanes', help="Explicitly look for sample in each lane (for replicates across lanes)")
argparser.add_argument('-p', '--patchmap', metavar='TSV', required=False, default=None,
Expand All @@ -44,6 +46,8 @@ sampleset=args.outdir
link=args.link
append=args.append
lab = args.batch
staging_suffix='.staging' if args.staging else ''


# parse the chmod parameter
try:
Expand Down Expand Up @@ -136,7 +140,7 @@ if not os.path.isdir(sampleset):

# output files
batch=f"{date}_{flowcell}"
tsv=open(os.path.join(sampleset,f'samples.{batch}.tsv.staging'), 'wt')
tsv=open(os.path.join(sampleset,f'samples.{batch}.tsv{staging_suffix}'), 'wt')
# shell script file with all moving instructions inside
sh=open(os.path.join(sampleset,'movedatafiles.sh'), 'at' if append else 'wt')

Expand Down Expand Up @@ -230,19 +234,16 @@ if args.batch:
with open(os.path.join(sampleset,f'batch.{batch}.yaml'), 'wt') as byml:
print(yaml.dump({'type':'jobinfo','lab':lab,'runfolder':runfolder,'date':date,'instrument':instr,'runnum':runnum,'flowcell':flowcell,'lanes':lanes,'library':library,'folder':folder}, sort_keys=False), file=byml)

# coda: return status
if not append: print(f"""
if (( !ALLOK )); then
echo Some errors
exit 1
fi;
""", file=sh)
# coda: rename staging and return status
if args.staging:
print(f"(( ALLOK )) && mv -v {sampleset}/samples.{batch}.tsv{staging_suffix} {sampleset}/samples.{batch}.tsv", file=sh)

print(f"mv -v {sampleset}/sample.{batch}.tsv.staging samples {sampleset}/sample.{batch}.tsv", file=sh)
if not append: print("""
if (( ! ALLOK )); then
echo Some errors
exit 1
fi;
print("""
echo All Ok
exit 0
""", file=sh)

0 comments on commit 5054a3a

Please sign in to comment.