Skip to content

Commit

Permalink
Add a population clustering fraction cutoff argument (#39)
Browse files Browse the repository at this point in the history
* Add population cutoff parameter

* Clean MIPWrangler scripts

* Update changelog
  • Loading branch information
arisp99 authored May 9, 2022
1 parent 4376805 commit 25ef3b9
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 44 deletions.
8 changes: 6 additions & 2 deletions MIPTools.def
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,8 @@ From: amd64/ubuntu:20.04
echo " -c Number of available processors to use. Default: 1."
echo " -e Required. A unique ID given to each sequencing run by"
echo " the user."
echo " -f The population fraction cutoff used by MIPWrangler."
echo " Default: 0.005."
echo " -h Print the help page."
echo " -k Keep intermediate files generated by MIPWrangler."
echo " -l Required. File providing a list of samples with "
Expand Down Expand Up @@ -354,12 +356,14 @@ From: amd64/ubuntu:20.04
min_capture_length="none"
stitch_options="none"
keep_files=""
population_fraction_cutoff=0.005

# Parse options
while getopts "c:e:hkl:m:n:p:s:w:x:" opt; do
while getopts "c:e:f:hkl:m:n:p:s:w:x:" opt; do
case ${opt} in
c) cpu_count=${OPTARG} ;;
e) experiment_id=${OPTARG} ;;
f) population_fraction_cutoff=${OPTARG} ;;
h) help
exit 1 ;;
k) keep_files=-k ;;
Expand Down Expand Up @@ -394,7 +398,7 @@ From: amd64/ubuntu:20.04
-c ${cpu_count} -e ${experiment_id} ${keep_files} \
-l /opt/analysis/${sample_list} -m ${min_capture_length} \
-n ${server_number} -p ${probe_sets} -s ${sample_sets} \
-w ${cluster_script} -x ${stitch_options}
-w ${cluster_script} -x ${stitch_options} -f ${population_fraction_cutoff}

# Run wrangler scripts.
# The dot space is used to let the sourced script modify the current
Expand Down
17 changes: 12 additions & 5 deletions base_resources/MIPWrangler_scripts/runMIPWranglerCurrent.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
#!/usr/bin/env bash
if [[ $# -ne 2 ]]; then
echo "Illegal number of parameters, needs 2 argument, 1) name of mip server number, 2) num of threads to use"
exit

if [[ $# -ne 3 ]]; then
msg="Illegal number of parameters. Needs three arguments:\n"
msg="${msg}1) The name of the MIP server number\n"
msg="${msg}2) The number of threads to use\n"
msg="${msg}3) The population clustering fraction cutoff."
echo ${msg} >&2
exit 2
fi

# Correct barcodes
MIPWrangler mipBarcodeCorrectionMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipBarcodeCorrecting_run1 --allowableErrors 6
MIPWrangler mipCorrectForContamWithSameBarcodesMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipCorrectForContamWithSameBarcodes_run1

# Cluster barcodes and MIPs
MIPWrangler mipClusteringMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipClustering_run1 --par /opt/resources/clustering_pars/illumina_collapseHomoploymers.pars.txt --countEndGaps
MIPWrangler mipPopulationClusteringMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipPopClustering_run1 --cutoff 0 --countEndGaps
#nohup MIPWrangler mav --masterDir $(realpath ./) --numThreads $2 --port $((10000+$1)) --name mip$1 &
MIPWrangler mipPopulationClusteringMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipPopClustering_run1 --cutoff 0 --countEndGaps --fraccutoff $3

This file was deleted.

15 changes: 11 additions & 4 deletions base_resources/MIPWrangler_scripts/runMIPWranglerSwga.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
#!/usr/bin/env bash
if [[ $# -ne 2 ]]; then
echo "Illegal number of parameters, needs 2 argument, 1) name of mip server number, 2) num of threads to use"
exit

if [[ $# -ne 3 ]]; then
msg="Illegal number of parameters. Needs three arguments:\n"
msg="${msg}1) The name of the MIP server number\n"
msg="${msg}2) The number of threads to use\n"
msg="${msg}3) The population clustering fraction cutoff."
echo ${msg} >&2
exit 2
fi

# Correct barcodes
MIPWrangler mipBarcodeCorrectionMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipBarcodeCorrecting_run1 --allowableErrors 6
MIPWrangler mipCorrectForContamWithSameBarcodesMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipCorrectForContamWithSameBarcodes_run1

# Cluster barcodes and MIPs
MIPWrangler mipClusteringMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipClustering_run1 --par /opt/resources/clustering_pars/illumina_swga.pars.txt --countEndGaps
MIPWrangler mipPopulationClusteringMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipPopClustering_run1 --cutoff 0 --countEndGaps
#nohup MIPWrangler mav --masterDir $(realpath ./) --numThreads $2 --port $((10000+$1)) --name mip$1 &
15 changes: 11 additions & 4 deletions base_resources/MIPWrangler_scripts/runMIPWranglerSwgaPop.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
#!/usr/bin/env bash
if [[ $# -ne 2 ]]; then
echo "Illegal number of parameters, needs 2 argument, 1) name of mip server number, 2) num of threads to use"
exit

if [[ $# -ne 3 ]]; then
msg="Illegal number of parameters. Needs three arguments:\n"
msg="${msg}1) The name of the MIP server number\n"
msg="${msg}2) The number of threads to use\n"
msg="${msg}3) The population clustering fraction cutoff."
echo ${msg} >&2
exit 2
fi

# Correct barcodes
MIPWrangler mipBarcodeCorrectionMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipBarcodeCorrecting_run1 --allowableErrors 6
MIPWrangler mipCorrectForContamWithSameBarcodesMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipCorrectForContamWithSameBarcodes_run1

# Cluster barcodes and MIPs
MIPWrangler mipClusteringMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipClustering_run1 --par /opt/resources/clustering_pars/illumina_swga_pop.pars.txt --countEndGaps
MIPWrangler mipPopulationClusteringMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipPopClustering_run1 --cutoff 0 --countEndGaps
#nohup MIPWrangler mav --masterDir $(realpath ./) --numThreads $2 --port $((10000+$1)) --name mip$1 &
15 changes: 11 additions & 4 deletions bin/runMIPWranglerCurrent.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
#!/usr/bin/env bash
if [[ $# -ne 2 ]]; then
echo "Illegal number of parameters. Needs 2 arguments: 1) name of mip server number, 2) num of threads to use." >&2

if [[ $# -ne 3 ]]; then
msg="Illegal number of parameters. Needs three arguments:\n"
msg="${msg}1) The name of the MIP server number\n"
msg="${msg}2) The number of threads to use\n"
msg="${msg}3) The population clustering fraction cutoff."
echo ${msg} >&2
exit 2
fi

# Correct barcodes
MIPWrangler mipBarcodeCorrectionMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipBarcodeCorrecting_run1 --allowableErrors 6
MIPWrangler mipCorrectForContamWithSameBarcodesMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipCorrectForContamWithSameBarcodes_run1

# Cluster barcodes and MIPs
MIPWrangler mipClusteringMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipClustering_run1 --par /opt/resources/clustering_pars/illumina_collapseHomoploymers.pars.txt --countEndGaps
MIPWrangler mipPopulationClusteringMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipPopClustering_run1 --cutoff 0 --countEndGaps
#nohup MIPWrangler mav --masterDir $(realpath ./) --numThreads $2 --port $((10000+$1)) --name mip$1 &
MIPWrangler mipPopulationClusteringMultiple --masterDir $(realpath ./) --numThreads $2 --overWriteDirs --overWriteLog --logFile mipPopClustering_run1 --cutoff 0 --countEndGaps --fraccutoff $3
11 changes: 0 additions & 11 deletions bin/runMIPWranglerNoCutoffCurrent.sh

This file was deleted.

6 changes: 4 additions & 2 deletions docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ MIPTools (development version)
New Features
------------

- Add an additional argument to the ``wrangler`` app to control the population
clustering fraction cutoff defined by :github:repo:`MIPWrangler
<bailey-lab/MIPWrangler>` (:github:user:`arisp99`, :github:pull:`39`).
- Add the capability to freeze software version numbers when building the
container. Additionally, the version number for key software tools has been
fixed (:github:user:`arisp99`,
:github:pull:`32`).
fixed (:github:user:`arisp99`, :github:pull:`32`).
- Install :github:repo:`mipscripts <bailey-lab/mipscripts>`, which contains
additional tools for analysis pipelines.
- Perform additional argument parsing to ensure arguments are formatted
Expand Down
2 changes: 2 additions & 0 deletions docs/app-reference/wrangler-app.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Options
# Optional
-c Number of available processors to use.
-f Population fraction cutoff used by MIPWrangler.
-h Print the help page.
-k Keep intermediate files generated by MIPWrangler.
-m Minimum capture length for stitching excluding probe arms.
Expand All @@ -44,6 +45,7 @@ Defaults
# Optional
-c Default: 1
-f Default: 0.005
-k Default: false
-m Default: 100
-n Default: 1
Expand Down
16 changes: 15 additions & 1 deletion src/generate_wrangler_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@
help="Minimum capture length for stitching, excluding probe arms.",
type=int,
)
parser.add_argument(
"-f",
"--population-fraction-cutoff",
help="Population clustering fraction cutoff.",
default=0.005,
type=float,
)

# Parse arguments from command line
args = vars(parser.parse_args())
Expand Down Expand Up @@ -312,7 +319,14 @@
)
wrangler_commands = [
["cd", "analysis"],
["nohup", "bash", cluster_script, str(server_num), str(cpu_count)],
[
"nohup",
"bash",
cluster_script,
str(server_num),
str(cpu_count),
str(args["population_fraction_cutoff"]),
],
["mv", os.path.join(analysis_dir, "analysis/logs"), analysis_dir],
["mv", os.path.join(analysis_dir, "analysis/scripts"), analysis_dir],
["mv", os.path.join(analysis_dir, "analysis/resources"), analysis_dir],
Expand Down

0 comments on commit 25ef3b9

Please sign in to comment.