From b4b81a1a13dad81fc826c91b32e2169d983244c6 Mon Sep 17 00:00:00 2001 From: d4straub Date: Fri, 11 Oct 2024 16:19:44 +0200 Subject: [PATCH 1/9] output multiple depth files --- modules/local/convert_depths.nf | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/modules/local/convert_depths.nf b/modules/local/convert_depths.nf index f61e0c29..9d6f0e67 100644 --- a/modules/local/convert_depths.nf +++ b/modules/local/convert_depths.nf @@ -11,14 +11,22 @@ process CONVERT_DEPTHS { output: // need to add empty val because representing reads as we dont want maxbin to calculate for us. - tuple val(meta), path(fasta), val([]), path("*_mb2_depth.txt"), emit: output - path "versions.yml" , emit: versions + tuple val(meta), path(fasta), val([]), path("*_mb2_depth_*.txt"), emit: output + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" """ gunzip -f $depth - bioawk -t '{ { if (NR > 1) { { print \$1, \$3 } } } }' ${depth.toString() - '.gz'} > ${prefix}_mb2_depth.txt + + # Determine the number of abundance columns + n_abund=\$(awk 'NR==1 {print int((NF-3)/2)}' ${depth.toString() - '.gz'}) + + # Generate abundance files for each read set + for i in \$(seq 1 \$n_abund); do + col=\$((i*2+2)) + bioawk -t '{if (NR > 1) {print \$1, \$'"\$col"'}}' ${depth.toString() - '.gz'} > ${prefix}_mb2_depth_\$i.txt + done cat <<-END_VERSIONS > versions.yml "${task.process}": From a6a0732b3b221757f1e13559fd38b9ef63e6aae3 Mon Sep 17 00:00:00 2001 From: d4straub Date: Fri, 11 Oct 2024 16:20:37 +0200 Subject: [PATCH 2/9] accept multiple depth files --- modules/nf-core/maxbin2/main.nf | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/maxbin2/main.nf b/modules/nf-core/maxbin2/main.nf index d5f49344..f67f64d0 100644 --- a/modules/nf-core/maxbin2/main.nf +++ b/modules/nf-core/maxbin2/main.nf @@ -18,7 +18,8 @@ process MAXBIN2 { tuple val(meta), path("*.noclass.gz") , emit: unbinned_fasta tuple val(meta), path("*.tooshort.gz"), emit: tooshort_fasta tuple val(meta), path("*_bin.tar.gz") , emit: marker_bins , optional: true - tuple val(meta), path("*_gene.tar.gz"), emit: marker_genes, optional: true + tuple val(meta), path("*.abundance") , emit: marker_genes, optional: true + tuple val(meta), path("*_gene.tar.gz"), emit: abundance , optional: true path "versions.yml" , emit: versions when: @@ -27,7 +28,15 @@ process MAXBIN2 { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def associate_files = reads ? "-reads $reads" : "-abund $abund" + def associate_files = "" + if ( reads ) { + associate_files = "-reads $reads" + } else if ( abund instanceof List ) { + associate_files = "-abund ${abund[0]}" + for (i in 2..abund.size()) { associate_files += " -abund$i ${abund[i-1]}" } + } else { + associate_files = "-abund $abund" + } """ mkdir input/ && mv $contigs input/ run_MaxBin.pl \\ From 0184057a01118ff1ae52bb98ee96657cfbede240 Mon Sep 17 00:00:00 2001 From: d4straub Date: Fri, 11 Oct 2024 16:20:55 +0200 Subject: [PATCH 3/9] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 494e967b..31a83120 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#690](https://github.com/nf-core/mag/pull/690) - MaxBin2 is using the abundance information from different samples now as expected (reported by @uel3 and fixed by @d4straub) + ### `Dependencies` ### `Deprecated` From a2ffbf0c40b6b2e695162a29ba559183aa97a547 Mon Sep 17 00:00:00 2001 From: d4straub Date: Fri, 11 Oct 2024 16:28:41 +0200 Subject: [PATCH 4/9] nf-core modules patch --- modules.json | 211 ++++++++++++++++++++------- modules/nf-core/maxbin2/main.nf | 4 +- modules/nf-core/maxbin2/maxbin2.diff | 30 ++++ 3 files changed, 190 insertions(+), 55 deletions(-) create mode 100644 modules/nf-core/maxbin2/maxbin2.diff diff --git a/modules.json b/modules.json index 5f4eb8bb..368d8685 100644 --- a/modules.json +++ b/modules.json @@ -8,244 +8,341 @@ "adapterremoval": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "aria2": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/aria2/aria2.diff" }, "bbmap/bbnorm": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/consensus": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/view": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/fastq": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "centrifuge/centrifuge": { "branch": "master", "git_sha": "9a07a1293d9b818d1e06d0f7b58152f74d462012", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "centrifuge/kreport": { "branch": "master", "git_sha": "9a07a1293d9b818d1e06d0f7b58152f74d462012", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/centrifuge/kreport/centrifuge-kreport.diff" }, "checkm/lineagewf": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "checkm/qa": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "concoct/concoct": { "branch": "master", "git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353", - "installed_by": ["fasta_binning_concoct"] + "installed_by": [ + "fasta_binning_concoct" + ] }, "concoct/concoctcoveragetable": { "branch": "master", "git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353", - "installed_by": ["fasta_binning_concoct"] + "installed_by": [ + "fasta_binning_concoct" + ] }, "concoct/cutupfasta": { "branch": "master", "git_sha": "73a6d7e6077b88aba1c5d6805635d79d6718270c", - "installed_by": ["fasta_binning_concoct"] + "installed_by": [ + "fasta_binning_concoct" + ] }, "concoct/extractfastabins": { "branch": "master", "git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353", - "installed_by": ["fasta_binning_concoct"] + "installed_by": [ + "fasta_binning_concoct" + ] }, "concoct/mergecutupclustering": { "branch": "master", "git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353", - "installed_by": ["fasta_binning_concoct"] + "installed_by": [ + "fasta_binning_concoct" + ] }, "dastool/dastool": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "dastool/fastatocontig2bin": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastp": { "branch": "master", "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "freebayes": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "genomad/download": { "branch": "master", "git_sha": "ca813f3f73adedf3547a5a677e992d9d43a71870", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "genomad/endtoend": { "branch": "master", "git_sha": "ca813f3f73adedf3547a5a677e992d9d43a71870", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gtdbtk/classifywf": { "branch": "master", "git_sha": "7b9ce4b817926f17ec82cc0099d2d0ff095a2fac", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gunc/downloaddb": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gunc/mergecheckm": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gunc/run": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gunzip": { "branch": "master", "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krakentools/kreport2krona": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/kronadb": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "maxbin2": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ], + "patch": "modules/nf-core/maxbin2/maxbin2.diff" }, "metabat2/jgisummarizebamcontigdepths": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "metabat2/metabat2": { "branch": "master", "git_sha": "d2e220fdec3aa2f4482c70017df4cdf8a4c94f27", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "metaeuk/easypredict": { "branch": "master", "git_sha": "30d06da5bd7ae67be32758bf512cd75a4325d386", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mmseqs/databases": { "branch": "master", "git_sha": "699e078133f580548aeb43114f93ac29928c6143", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "nanolyse": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "nanoplot": { "branch": "master", "git_sha": "3135090b46f308a260fc9d5991d7d2f9c0785309", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "porechop/porechop": { "branch": "master", "git_sha": "1d68c7f248d1a480c5959548a9234602b771199e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "prodigal": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "prokka": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pydamage/analyze": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pydamage/filter": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "seqtk/mergepe": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tiara/tiara": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -254,25 +351,33 @@ "fasta_binning_concoct": { "branch": "master", "git_sha": "c60c14b285b89bdd0607e371417dadb80385ad6e", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/maxbin2/main.nf b/modules/nf-core/maxbin2/main.nf index f67f64d0..5c558856 100644 --- a/modules/nf-core/maxbin2/main.nf +++ b/modules/nf-core/maxbin2/main.nf @@ -18,8 +18,8 @@ process MAXBIN2 { tuple val(meta), path("*.noclass.gz") , emit: unbinned_fasta tuple val(meta), path("*.tooshort.gz"), emit: tooshort_fasta tuple val(meta), path("*_bin.tar.gz") , emit: marker_bins , optional: true - tuple val(meta), path("*.abundance") , emit: marker_genes, optional: true - tuple val(meta), path("*_gene.tar.gz"), emit: abundance , optional: true + tuple val(meta), path("*.abundance") , emit: abundance , optional: true + tuple val(meta), path("*_gene.tar.gz"), emit: marker_genes, optional: true path "versions.yml" , emit: versions when: diff --git a/modules/nf-core/maxbin2/maxbin2.diff b/modules/nf-core/maxbin2/maxbin2.diff new file mode 100644 index 00000000..5f7e04c6 --- /dev/null +++ b/modules/nf-core/maxbin2/maxbin2.diff @@ -0,0 +1,30 @@ +Changes in module 'nf-core/maxbin2' +--- modules/nf-core/maxbin2/main.nf ++++ modules/nf-core/maxbin2/main.nf +@@ -18,6 +18,7 @@ + tuple val(meta), path("*.noclass.gz") , emit: unbinned_fasta + tuple val(meta), path("*.tooshort.gz"), emit: tooshort_fasta + tuple val(meta), path("*_bin.tar.gz") , emit: marker_bins , optional: true ++ tuple val(meta), path("*.abundance") , emit: abundance , optional: true + tuple val(meta), path("*_gene.tar.gz"), emit: marker_genes, optional: true + path "versions.yml" , emit: versions + +@@ -27,7 +28,15 @@ + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" +- def associate_files = reads ? "-reads $reads" : "-abund $abund" ++ def associate_files = "" ++ if ( reads ) { ++ associate_files = "-reads $reads" ++ } else if ( abund instanceof List ) { ++ associate_files = "-abund ${abund[0]}" ++ for (i in 2..abund.size()) { associate_files += " -abund$i ${abund[i-1]}" } ++ } else { ++ associate_files = "-abund $abund" ++ } + """ + mkdir input/ && mv $contigs input/ + run_MaxBin.pl \\ + +************************************************************ From a17b1a4b852a4e88fc515adedd03ff4ec2619537 Mon Sep 17 00:00:00 2001 From: d4straub Date: Fri, 11 Oct 2024 16:35:53 +0200 Subject: [PATCH 5/9] nf-core modules patch now for modules.json --- modules.json | 210 +++++++++++++-------------------------------------- 1 file changed, 53 insertions(+), 157 deletions(-) diff --git a/modules.json b/modules.json index 368d8685..63fbc190 100644 --- a/modules.json +++ b/modules.json @@ -8,341 +8,245 @@ "adapterremoval": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "aria2": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/aria2/aria2.diff" }, "bbmap/bbnorm": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/consensus": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/view": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/fastq": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "centrifuge/centrifuge": { "branch": "master", "git_sha": "9a07a1293d9b818d1e06d0f7b58152f74d462012", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "centrifuge/kreport": { "branch": "master", "git_sha": "9a07a1293d9b818d1e06d0f7b58152f74d462012", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/centrifuge/kreport/centrifuge-kreport.diff" }, "checkm/lineagewf": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "checkm/qa": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "concoct/concoct": { "branch": "master", "git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353", - "installed_by": [ - "fasta_binning_concoct" - ] + "installed_by": ["fasta_binning_concoct"] }, "concoct/concoctcoveragetable": { "branch": "master", "git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353", - "installed_by": [ - "fasta_binning_concoct" - ] + "installed_by": ["fasta_binning_concoct"] }, "concoct/cutupfasta": { "branch": "master", "git_sha": "73a6d7e6077b88aba1c5d6805635d79d6718270c", - "installed_by": [ - "fasta_binning_concoct" - ] + "installed_by": ["fasta_binning_concoct"] }, "concoct/extractfastabins": { "branch": "master", "git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353", - "installed_by": [ - "fasta_binning_concoct" - ] + "installed_by": ["fasta_binning_concoct"] }, "concoct/mergecutupclustering": { "branch": "master", "git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353", - "installed_by": [ - "fasta_binning_concoct" - ] + "installed_by": ["fasta_binning_concoct"] }, "dastool/dastool": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "dastool/fastatocontig2bin": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastp": { "branch": "master", "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastqc": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "freebayes": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "genomad/download": { "branch": "master", "git_sha": "ca813f3f73adedf3547a5a677e992d9d43a71870", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "genomad/endtoend": { "branch": "master", "git_sha": "ca813f3f73adedf3547a5a677e992d9d43a71870", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gtdbtk/classifywf": { "branch": "master", "git_sha": "7b9ce4b817926f17ec82cc0099d2d0ff095a2fac", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gunc/downloaddb": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gunc/mergecheckm": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gunc/run": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gunzip": { "branch": "master", "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krakentools/kreport2krona": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krona/kronadb": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "krona/ktimporttaxonomy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "maxbin2": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/maxbin2/maxbin2.diff" }, "metabat2/jgisummarizebamcontigdepths": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "metabat2/metabat2": { "branch": "master", "git_sha": "d2e220fdec3aa2f4482c70017df4cdf8a4c94f27", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "metaeuk/easypredict": { "branch": "master", "git_sha": "30d06da5bd7ae67be32758bf512cd75a4325d386", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mmseqs/databases": { "branch": "master", "git_sha": "699e078133f580548aeb43114f93ac29928c6143", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "nanolyse": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "nanoplot": { "branch": "master", "git_sha": "3135090b46f308a260fc9d5991d7d2f9c0785309", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "porechop/porechop": { "branch": "master", "git_sha": "1d68c7f248d1a480c5959548a9234602b771199e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "prodigal": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "prokka": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pydamage/analyze": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pydamage/filter": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "seqtk/mergepe": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tiara/tiara": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "untar": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -351,33 +255,25 @@ "fasta_binning_concoct": { "branch": "master", "git_sha": "c60c14b285b89bdd0607e371417dadb80385ad6e", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From a29b48bbbc0cb5f9d752d7ca65f11ea4d8b36c24 Mon Sep 17 00:00:00 2001 From: d4straub Date: Mon, 14 Oct 2024 09:39:31 +0200 Subject: [PATCH 6/9] publish summary and abundance files in results --- conf/modules.config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index b226ba01..be9fb47b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -657,6 +657,11 @@ process { path: { "${params.outdir}/GenomeBinning/MaxBin2/discarded" }, mode: params.publish_dir_mode, pattern: '*.tooshort.gz' + ], + [ + path: { "${params.outdir}/GenomeBinning/MaxBin2/" }, + mode: params.publish_dir_mode, + pattern: '*.{summary,abundance}' ] ] ext.prefix = { "${meta.assembler}-MaxBin2-${meta.id}" } From cde08a898715a5a1da4bdb5012613340fbacc6ab Mon Sep 17 00:00:00 2001 From: d4straub Date: Mon, 14 Oct 2024 12:57:52 +0200 Subject: [PATCH 7/9] use sample names for MaxBin2 abund --- modules/local/convert_depths.nf | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/modules/local/convert_depths.nf b/modules/local/convert_depths.nf index 9d6f0e67..0c54e5c6 100644 --- a/modules/local/convert_depths.nf +++ b/modules/local/convert_depths.nf @@ -11,8 +11,8 @@ process CONVERT_DEPTHS { output: // need to add empty val because representing reads as we dont want maxbin to calculate for us. - tuple val(meta), path(fasta), val([]), path("*_mb2_depth_*.txt"), emit: output - path "versions.yml" , emit: versions + tuple val(meta), path(fasta), val([]), path("*.abund"), emit: output + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" @@ -22,10 +22,15 @@ process CONVERT_DEPTHS { # Determine the number of abundance columns n_abund=\$(awk 'NR==1 {print int((NF-3)/2)}' ${depth.toString() - '.gz'}) + # Get column names + read -r header<${depth.toString() - '.gz'} + header=(\$header) + # Generate abundance files for each read set for i in \$(seq 1 \$n_abund); do col=\$((i*2+2)) - bioawk -t '{if (NR > 1) {print \$1, \$'"\$col"'}}' ${depth.toString() - '.gz'} > ${prefix}_mb2_depth_\$i.txt + name=\$( echo \${header[\$col-1]} | sed s/\\.bam\$// ) + bioawk -t '{if (NR > 1) {print \$1, \$'"\$col"'}}' ${depth.toString() - '.gz'} > \${name}.abund done cat <<-END_VERSIONS > versions.yml From e6875dea3a5d09d341ffe5869307c0ee8e523e0f Mon Sep 17 00:00:00 2001 From: d4straub Date: Mon, 14 Oct 2024 13:00:42 +0200 Subject: [PATCH 8/9] force install latest MaxBin2 module --- modules.json | 5 +- modules/nf-core/maxbin2/environment.yml | 5 + modules/nf-core/maxbin2/main.nf | 5 +- modules/nf-core/maxbin2/maxbin2.diff | 30 ---- modules/nf-core/maxbin2/meta.yml | 166 ++++++++++++------ modules/nf-core/maxbin2/tests/main.nf.test | 47 +++++ .../nf-core/maxbin2/tests/main.nf.test.snap | 59 +++++++ 7 files changed, 231 insertions(+), 86 deletions(-) create mode 100644 modules/nf-core/maxbin2/environment.yml delete mode 100644 modules/nf-core/maxbin2/maxbin2.diff create mode 100644 modules/nf-core/maxbin2/tests/main.nf.test create mode 100644 modules/nf-core/maxbin2/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index 25ab1b6c..21267891 100644 --- a/modules.json +++ b/modules.json @@ -169,9 +169,8 @@ }, "maxbin2": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"], - "patch": "modules/nf-core/maxbin2/maxbin2.diff" + "git_sha": "283613159e079152f1336cef0db1c836086206e0", + "installed_by": ["modules"] }, "metabat2/jgisummarizebamcontigdepths": { "branch": "master", diff --git a/modules/nf-core/maxbin2/environment.yml b/modules/nf-core/maxbin2/environment.yml new file mode 100644 index 00000000..8a881999 --- /dev/null +++ b/modules/nf-core/maxbin2/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::maxbin2=2.2.7 diff --git a/modules/nf-core/maxbin2/main.nf b/modules/nf-core/maxbin2/main.nf index 5c558856..845c8e4e 100644 --- a/modules/nf-core/maxbin2/main.nf +++ b/modules/nf-core/maxbin2/main.nf @@ -2,7 +2,7 @@ process MAXBIN2 { tag "$meta.id" label 'process_medium' - conda "bioconda::maxbin2=2.2.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/maxbin2:2.2.7--he1b5a44_2' : 'biocontainers/maxbin2:2.2.7--he1b5a44_2' }" @@ -13,12 +13,12 @@ process MAXBIN2 { output: tuple val(meta), path("*.fasta.gz") , emit: binned_fastas tuple val(meta), path("*.summary") , emit: summary + tuple val(meta), path("*.abundance") , emit: abundance , optional: true tuple val(meta), path("*.log.gz") , emit: log tuple val(meta), path("*.marker.gz") , emit: marker_counts tuple val(meta), path("*.noclass.gz") , emit: unbinned_fasta tuple val(meta), path("*.tooshort.gz"), emit: tooshort_fasta tuple val(meta), path("*_bin.tar.gz") , emit: marker_bins , optional: true - tuple val(meta), path("*.abundance") , emit: abundance , optional: true tuple val(meta), path("*_gene.tar.gz"), emit: marker_genes, optional: true path "versions.yml" , emit: versions @@ -28,6 +28,7 @@ process MAXBIN2 { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + if (reads && abund) { error("ERROR: MaxBin2 can only accept one of `reads` or `abund`, no both. Check input.") } def associate_files = "" if ( reads ) { associate_files = "-reads $reads" diff --git a/modules/nf-core/maxbin2/maxbin2.diff b/modules/nf-core/maxbin2/maxbin2.diff deleted file mode 100644 index 5f7e04c6..00000000 --- a/modules/nf-core/maxbin2/maxbin2.diff +++ /dev/null @@ -1,30 +0,0 @@ -Changes in module 'nf-core/maxbin2' ---- modules/nf-core/maxbin2/main.nf -+++ modules/nf-core/maxbin2/main.nf -@@ -18,6 +18,7 @@ - tuple val(meta), path("*.noclass.gz") , emit: unbinned_fasta - tuple val(meta), path("*.tooshort.gz"), emit: tooshort_fasta - tuple val(meta), path("*_bin.tar.gz") , emit: marker_bins , optional: true -+ tuple val(meta), path("*.abundance") , emit: abundance , optional: true - tuple val(meta), path("*_gene.tar.gz"), emit: marker_genes, optional: true - path "versions.yml" , emit: versions - -@@ -27,7 +28,15 @@ - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" -- def associate_files = reads ? "-reads $reads" : "-abund $abund" -+ def associate_files = "" -+ if ( reads ) { -+ associate_files = "-reads $reads" -+ } else if ( abund instanceof List ) { -+ associate_files = "-abund ${abund[0]}" -+ for (i in 2..abund.size()) { associate_files += " -abund$i ${abund[i-1]}" } -+ } else { -+ associate_files = "-abund $abund" -+ } - """ - mkdir input/ && mv $contigs input/ - run_MaxBin.pl \\ - -************************************************************ diff --git a/modules/nf-core/maxbin2/meta.yml b/modules/nf-core/maxbin2/meta.yml index 7971d481..9546afb1 100644 --- a/modules/nf-core/maxbin2/meta.yml +++ b/modules/nf-core/maxbin2/meta.yml @@ -11,69 +11,133 @@ keywords: - contigs tools: - maxbin2: - description: MaxBin is software for binning assembled metagenomic sequences based on an Expectation-Maximization algorithm. + description: MaxBin is software for binning assembled metagenomic sequences based + on an Expectation-Maximization algorithm. homepage: https://sourceforge.net/projects/maxbin/ documentation: https://sourceforge.net/projects/maxbin/ tool_dev_url: https://sourceforge.net/projects/maxbin/ doi: "10.1093/bioinformatics/btv638" licence: ["BSD 3-clause"] - + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - contigs: - type: file - description: Multi FASTA file containing assembled contigs of a given sample - pattern: "*.fasta" - - reads: - type: file - description: Reads used to assemble contigs in FASTA or FASTQ format. Do not supply at the same time as abundance files. - pattern: "*.fasta" - - abund: - type: file - description: Contig abundance files, i.e. reads against each contig. See MaxBin2 README for details. Do not supply at the same time as read files. - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - contigs: + type: file + description: Multi FASTA file containing assembled contigs of a given sample + pattern: "*.fasta" + - reads: + type: file + description: Reads used to assemble contigs in FASTA or FASTQ format. Do not + supply at the same time as abundance files. + pattern: "*.fasta" + - abund: + type: list + description: One or more contig abundance files, i.e. average depth of reads against each contig. See MaxBin2 + README for details. Do not supply at the same time as read files. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - binned_fastas: - type: file - description: Binned contigs, one per bin designated with numeric IDs - pattern: "*.fasta.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fasta.gz": + type: file + description: Binned contigs, one per bin designated with numeric IDs + pattern: "*.fasta.gz" - summary: - type: file - description: Summary file describing which contigs are being classified into which bin - pattern: "*.summary" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.summary": + type: file + description: Summary file describing which contigs are being classified into + which bin + pattern: "*.summary" + - abundance: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.abundance": + type: file + description: Abundance of each bin if multiple abundance files were supplied + which bin + pattern: "*.abundance" - log: - type: file - description: Log file recording the core steps of MaxBin algorithm - pattern: "*.log.gz" - - marker: - type: file - description: Marker gene presence numbers for each bin - pattern: "*.marker.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log.gz": + type: file + description: Log file recording the core steps of MaxBin algorithm + pattern: "*.log.gz" + - marker_counts: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.marker.gz": + type: file + description: Marker counts + pattern: "*.marker.gz" - unbinned_fasta: - type: file - description: All sequences that pass the minimum length threshold but are not classified successfully. - pattern: "*.noclass.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.noclass.gz": + type: file + description: All sequences that pass the minimum length threshold but are not + classified successfully. + pattern: "*.noclass.gz" - tooshort_fasta: - type: file - description: All sequences that do not meet the minimum length threshold. - pattern: "*.tooshort.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tooshort.gz": + type: file + description: All sequences that do not meet the minimum length threshold. + pattern: "*.tooshort.gz" + - marker_bins: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_bin.tar.gz": + type: file + description: Marker bins + pattern: "*_bin.tar.gz" - marker_genes: - type: file - description: All sequences that do not meet the minimum length threshold. - pattern: "*.marker_of_each_gene.tar.gz" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_gene.tar.gz": + type: file + description: Marker genes + pattern: "*_gene.tar.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/maxbin2/tests/main.nf.test b/modules/nf-core/maxbin2/tests/main.nf.test new file mode 100644 index 00000000..efb23c2b --- /dev/null +++ b/modules/nf-core/maxbin2/tests/main.nf.test @@ -0,0 +1,47 @@ + +nextflow_process { + + name "Test Process MAXBIN2" + script "../main.nf" + process "MAXBIN2" + + tag "modules" + tag "modules_nfcore" + tag "maxbin2" + + test("test-maxbin2") { + + when { + process { + """ + input[0] = [ + [ id:'test1', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz', checkIfExists: true), + [] + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.binned_fastas, + process.out.summary, + file(process.out.log[0][1]).name, + process.out.marker_counts, + file(process.out.unbinned_fasta[0][1]).name, // empty + process.out.tooshort_fasta, + file(process.out.marker_bins[0][1]).name, // unstable + process.out.marker_genes, + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/maxbin2/tests/main.nf.test.snap b/modules/nf-core/maxbin2/tests/main.nf.test.snap new file mode 100644 index 00000000..caecef8e --- /dev/null +++ b/modules/nf-core/maxbin2/tests/main.nf.test.snap @@ -0,0 +1,59 @@ +{ + "test-maxbin2": { + "content": [ + [ + [ + { + "id": "test1", + "single_end": false + }, + [ + "test1.001.fasta.gz:md5,92eeca569534d770af91a1c07e62afa9", + "test1.002.fasta.gz:md5,628ef3b2e6647aed95511c28ea0dc229" + ] + ] + ], + [ + [ + { + "id": "test1", + "single_end": false + }, + "test1.summary:md5,7cdbedbfadd7a96203bdeca55ad822da" + ] + ], + "test1.log.gz", + [ + [ + { + "id": "test1", + "single_end": false + }, + "test1.marker.gz:md5,928994e84b9d723a8a48841432e1a262" + ] + ], + "test1.noclass.gz", + [ + [ + { + "id": "test1", + "single_end": false + }, + "test1.tooshort.gz:md5,b4e48e83637217aa9eba7f27f5990b24" + ] + ], + "test1.marker_of_each_bin.tar.gz", + [ + + ], + [ + "versions.yml:md5,a8b5754ee5df020d62ff25306376fc0a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-30T14:56:43.557114" + } +} \ No newline at end of file From f2859c77100cb1d6ec1fbdb6a8256b9a6983dd05 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 16 Oct 2024 15:33:08 +0200 Subject: [PATCH 9/9] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 983de13a..3aa6b0eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#674](https://github.com/nf-core/mag/pull/674) - Make longread preprocessing a subworkflow (added by @muabnezor) - [#674](https://github.com/nf-core/mag/pull/674) - Add porechop and filtlong logs to multiqc (added by @muabnezor) - [#674](https://github.com/nf-core/mag/pull/674) - Change local filtlong module to the official nf-core/filtlong module (added by @muabnezor) -- [#690](https://github.com/nf-core/mag/pull/690) - MaxBin2 is using the abundance information from different samples now as expected (reported by @uel3 and fixed by @d4straub) +- [#690](https://github.com/nf-core/mag/pull/690) - MaxBin2 now using the abundance information from different samples rather than an average (reported by @uel3 and fixed by @d4straub) ### `Dependencies`