From 28cbdd0c81fc7c65c594cb2d22b3f1e7708728e4 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 15 May 2024 17:31:46 +0200 Subject: [PATCH 001/110] Remove old test --- tests/test_default.yml | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 tests/test_default.yml diff --git a/tests/test_default.yml b/tests/test_default.yml deleted file mode 100644 index d7994f56..00000000 --- a/tests/test_default.yml +++ /dev/null @@ -1,7 +0,0 @@ -- name: Run default pipeline - command: nextflow run main.nf -profile test --outdir results --genome GRCh37 - tags: - - default - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e From 5f6517646aabb414b3a93adba40c4d13d9134cec Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 15 May 2024 17:33:20 +0200 Subject: [PATCH 002/110] Add nf-test --- nf-test.config | 3 +- tests/pipeline/test_all.nf.test | 142 ++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 tests/pipeline/test_all.nf.test diff --git a/nf-test.config b/nf-test.config index b466a958..607a2e68 100644 --- a/nf-test.config +++ b/nf-test.config @@ -1,12 +1,13 @@ config { // location for all nf-tests - testsDir "." + testsDir "tests/pipeline" // nf-test directory including temporary files for each test workDir System.getenv("NXF_TEST_DIR") ?: ".nf-test" // location of an optional nextflow.config file specific for executing tests configFile "tests/config/nf-test.config" + configFile "./nextflow.config" // run all test with the defined docker profile from the main nextflow.config profile "" diff --git a/tests/pipeline/test_all.nf.test b/tests/pipeline/test_all.nf.test new file mode 100644 index 00000000..cb401dc5 --- /dev/null +++ b/tests/pipeline/test_all.nf.test @@ -0,0 +1,142 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + tag "pipeline" + tag "pipeline/phaseimpute" + + + test("Check test_glimpse1") { + config "../../conf/test.config" + when { + params { + outdir = "$outputDir" + max_memory = "5.GB" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path("$outputDir/imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz").linesGzip().size() == 1756 } + ) + } + + } + + /*test("Check test_glimpse2") { + config "../../conf/test_glimpse2.config" + when { + params { + outdir = "$outputDir" + max_memory = "5.GB" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path("$outputDir/imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz").linesGzip().size() == 1756 } + ) + } + + }*/ + + test("Check test_quilt") { + config "../../conf/test_quilt.config" + when { + params { + outdir = "$outputDir" + max_memory = "5.GB" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path("$outputDir/imputation/quilt/concat/NA12878_quilt.vcf.gz").linesGzip().size() == 1756 } + ) + } + + } + + test("Check test_stitch") { + config "../../conf/test_stitch.config" + when { + params { + outdir = "$outputDir" + max_memory = "5.GB" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path("$outputDir/imputation/stitch/concat/NA12878_stitch.vcf.gz").linesGzip().size() == 1756 } + ) + } + + } + + test("Check test_sim") { + config "../../conf/test_sim.config" + when { + params { + outdir = "$outputDir" + max_memory = "5.GB" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/simulation/"), + ).match() } + ) + } + + } + + test("Check test_validate") { + config "../../conf/test_validate.config" + when { + params { + outdir = "$outputDir" + max_memory = "5.GB" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("$outputDir/validate/"), + ).match() } + ) + } + + } + + test("Check test_all") { + config "../../conf/test_all.config" + when { + params { + outdir = "$outputDir" + max_memory = "5.GB" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path("$outputDir/imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz").linesGzip().size() == 1756 } + { assert snapshot( + path("$outputDir/simulation/"), + ).match() } + ) + } + + } + +} From d43c59bbae3527b2909b5ad503628f9f8afd3d3b Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 13 May 2024 16:58:14 +0200 Subject: [PATCH 003/110] Uniformize config files --- conf/steps/imputation_glimpse1.config | 36 ++++++--------- conf/steps/imputation_quilt.config | 27 +++--------- conf/steps/imputation_stitch.config | 63 +++++++-------------------- conf/steps/initialisation.config | 6 +-- conf/steps/panel_prep.config | 12 +++-- conf/steps/simulation.config | 13 +----- conf/steps/validation.config | 25 ++++------- conf/test_all.config | 2 +- conf/test_quilt.config | 2 +- conf/test_sim.config | 2 +- conf/test_stitch.config | 2 +- conf/test_validate.config | 2 +- 12 files changed, 58 insertions(+), 134 deletions(-) diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index 521c8beb..96aa857d 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -13,13 +13,9 @@ process { // Configuration for the glimpse1 imputation subworkflow withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:.*' { - publishDir = [ - path: { "${params.outdir}/imputation/glimpse1/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: false - ] + publishDir = [ enabled: false ] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:BCFTOOLS_MPILEUP' { ext.args = [ "-I", @@ -34,18 +30,18 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:BCFTOOLS_ANNOTATE' { - ext.args = "--set-id '%CHROM:%POS:%REF:%ALT' -Oz" + ext.args = "--set-id '%CHROM:%POS:%REF:%ALT' -Oz" ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.annotate" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:BCFTOOLS_INDEX' { - ext.args = "--tbi" + ext.args = "--tbi" } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:.*' { publishDir = [ - path: { "${params.outdir}/imputation/glimpse1/" }, - mode: params.publish_dir_mode, + path : { "${params.outdir}/imputation/glimpse1/" }, + mode : params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -60,9 +56,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_PHASE' { - ext.args = [ - "--impute-reference-only-variants" - ].join(' ') + ext.args = ["--impute-reference-only-variants"].join(' ') ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.phase" } ext.suffix = "bcf" publishDir = [ enabled: false ] @@ -79,24 +73,20 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:INDEX_LIGATE' { publishDir = [ path: { "${params.outdir}/imputation/glimpse1" } + mode: params.publish_dir_mode, ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE1:.*' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE1:.*' { publishDir = [ - [ - path: { "${params.outdir}/imputation/glimpse1/concat" }, - mode: params.publish_dir_mode, - ], + path: { "${params.outdir}/imputation/glimpse1/concat" }, + mode: params.publish_dir_mode, ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE1:BCFTOOLS_CONCAT' { - ext.args = {[ - "--ligate", - "--output-type z", - ].join(" ").trim()} - ext.prefix = { "${meta.id}_glimpse1" } + ext.args = ["--ligate", "--output-type z"].join(' ') + ext.prefix = { "${meta.id}_glimpse1" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE1:BCFTOOLS_INDEX' { diff --git a/conf/steps/imputation_quilt.config b/conf/steps/imputation_quilt.config index f43f5119..9801d2ab 100644 --- a/conf/steps/imputation_quilt.config +++ b/conf/steps/imputation_quilt.config @@ -12,14 +12,6 @@ process { - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:.*' { publishDir = [ path: { "${params.outdir}/prep_panel/chunks/" }, @@ -39,10 +31,8 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:.*' { publishDir = [ - [ - path: { "${params.outdir}/imputation/quilt/" }, - mode: params.publish_dir_mode, - ], + path: { "${params.outdir}/imputation/quilt/" }, + mode: params.publish_dir_mode, ] } @@ -67,19 +57,14 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_QUILT:.*' { publishDir = [ - [ - path: { "${params.outdir}/imputation/quilt/concat" }, - mode: params.publish_dir_mode, - ], + path: { "${params.outdir}/imputation/quilt/concat" }, + mode: params.publish_dir_mode, ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_QUILT:BCFTOOLS_CONCAT' { - ext.args = {[ - "--ligate", - "--output-type z", - ].join(" ").trim()} - ext.prefix = { "${meta.id}_quilt" } + ext.args = ["--ligate", "--output-type z",].join(' ') + ext.prefix = { "${meta.id}_quilt" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_QUILT:BCFTOOLS_INDEX' { diff --git a/conf/steps/imputation_stitch.config b/conf/steps/imputation_stitch.config index 920255e0..ea3395f3 100644 --- a/conf/steps/imputation_stitch.config +++ b/conf/steps/imputation_stitch.config @@ -12,14 +12,6 @@ process { - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_POSFILE_TSV:.*' { publishDir = [ path: { "${params.outdir}/prep_panel/posfile/" }, @@ -29,43 +21,26 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_POSFILE_TSV:GAWK' { - ext.args = "'{ key = \$1 FS \$2 } !seen[key]++'" - ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" } - ext.suffix = ".txt" + ext.args = "'{ key = \$1 FS \$2 } !seen[key]++'" // Remove duplicates + ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" } + ext.suffix = "txt" } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_INPUT_STITCH:BCFTOOLS_NORM' { - ext.args = '-m +any --output-type z' - ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" } - maxRetries = 2 + ext.args = '-m +any --output-type z' + ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" } publishDir = [enabled: false] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_INPUT_STITCH:BCFTOOLS_VIEW' { - ext.args = '-v snps -Oz' - ext.prefix = { "${meta.id}_${meta.chr}_biallelic" } - maxRetries = 2 - publishDir = [enabled: false] - - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_INPUT_STITCH:BCFTOOLS_INDEX' { - maxRetries = 2 - publishDir = [enabled: false] - - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_INPUT_STITCH:BCFTOOLS_INDEX_2' { - ext.args = '--tbi' - maxRetries = 2 + ext.args = '-v snps -Oz' + ext.prefix = { "${meta.id}_${meta.chr}_biallelic" } publishDir = [enabled: false] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_POSFILE_TSV:BCFTOOLS_QUERY' { - ext.args = [ - "-f'%CHROM\t%POS\t%REF\t%ALT\\n'", - ].join(' ') + ext.args = "-f'%CHROM\t%POS\t%REF\t%ALT\\n'" ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" } publishDir = [enabled: false] } @@ -79,33 +54,25 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_STITCH:BCFTOOLS_INDEX' { - ext.args = '--tbi' - maxRetries = 2 + ext.args = '--tbi' publishDir = [enabled: false] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_STITCH:.*' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_STITCH:.*' { publishDir = [ - [ - path: { "${params.outdir}/imputation/stitch/concat" }, - mode: params.publish_dir_mode, - ], + path: { "${params.outdir}/imputation/stitch/concat" }, + mode: params.publish_dir_mode, ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_STITCH:BCFTOOLS_CONCAT' { - ext.args = {[ - "--ligate", - "--output-type z", - ].join(" ").trim()} - ext.prefix = { "${meta.id}_stitch" } + ext.args = ["--ligate", "--output-type z"].join(' ') + ext.prefix = { "${meta.id}_stitch" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_STITCH:BCFTOOLS_INDEX' { - ext.args = "--tbi" + ext.args = "--tbi" ext.prefix = { "${meta.id}_stitch" } } - - } diff --git a/conf/steps/initialisation.config b/conf/steps/initialisation.config index 7cc8daf5..59d44707 100644 --- a/conf/steps/initialisation.config +++ b/conf/steps/initialisation.config @@ -12,10 +12,6 @@ process { withName: 'PIPELINE_INITIALISATION:.*' { - publishDir = [ - path: { "${params.outdir}/initialisation/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - enabled: false - ] + publishDir = [ enabled: false ] } } diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index c5e0d944..99d8028b 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -20,11 +20,15 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHR_CHECK:VCF_CHR_RENAME:BCFTOOLS_ANNOTATE' { - ext.args = [ - "-Oz", - "--no-version" - ].join(' ') + ext.args = ["-Oz", "--no-version"].join(' ') + ext.prefix = { "${meta.id}_chrrename" } + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHR_CHECK:VCF_CHR_RENAME:GAWK' { + ext.args = 'BEGIN {FS="\\t"} NR==1 { if ($1 ~ /^chr/) { col1=""; col2="chr" } else { col1="chr"; col2="" } } { sub(/^chr/, "", $1); if ($1 ~ /^[0-9]+|[XYMT]$/) print col1$1, col2$1; else print $1, $1 }' ext.prefix = { "${meta.id}_chrrename" } + ext.suffix = "txt" publishDir = [ enabled: false ] } diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index 412c82a4..ff894175 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -12,16 +12,9 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_REGION:.*' { - publishDir = [ - path: { "${params.outdir}/simulation/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: false - ] + publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_REGION:SAMTOOLS_VIEW' { - ext.args = [ - ].join(' ') ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:.*' { @@ -32,13 +25,9 @@ process { ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_COVERAGE' { - ext.args = [ - ].join(' ') ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.stats" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { - ext.args = [ - ].join(' ') ext.prefix = { "${meta.id}_D${meta.depth}_R${meta.region.replace(':','_')}" } } } diff --git a/conf/steps/validation.config b/conf/steps/validation.config index d84d2c63..ae74def8 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -13,12 +13,7 @@ process { // Configuration for the validation step withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:.*' { - publishDir = [ - path: { "${params.outdir}/validation/truth" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: false - ] + publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_MPILEUP' { ext.args = [ @@ -34,32 +29,29 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_ANNOTATE' { - ext.args = "--set-id '%CHROM:%POS:%REF:%ALT' -Oz" + ext.args = ["--set-id '%CHROM:%POS:%REF:%ALT'", "-Oz"].join(' ') ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.annotate" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_INDEX' { - ext.args = "--tbi" + ext.args = "--tbi" } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:.*' { + ext.prefix = { "${meta.id}_truth_concat" } publishDir = [ path: { "${params.outdir}/validation/concat" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - ext.prefix = { "${meta.id}_truth_concat" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_CONCAT' { - ext.args = {[ - "--ligate", - "--output-type z", - ].join(" ").trim()} + ext.args = ["--ligate", "--output-type z",].join(' ') } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_INDEX' { - ext.args = "--tbi" + ext.args = "--tbi" } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:.*' { @@ -71,12 +63,13 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GLIMPSE2_CONCORDANCE' { + ext.args = "--out-r2-per-site" ext.prefix = { "${meta.id}.concordance" } - ext.args = "--out-r2-per-site" publishDir = [ enabled: false ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:CONCATENATE' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK' { + ext.args = "'(NR == 1) || (FNR > 1)'" // Skip header line ext.suffix = { "txt" } } diff --git a/conf/test_all.config b/conf/test_all.config index 3e95cb32..fcd7e356 100644 --- a/conf/test_all.config +++ b/conf/test_all.config @@ -25,7 +25,7 @@ params { depth = 1 // Genome references - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa" + fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" panel = "${projectDir}/tests/csv/panel.csv" phased = true map = "${projectDir}/tests/csv/map.csv" diff --git a/conf/test_quilt.config b/conf/test_quilt.config index 27d31445..decaf5ec 100644 --- a/conf/test_quilt.config +++ b/conf/test_quilt.config @@ -24,7 +24,7 @@ params { input_region = "${projectDir}/tests/csv/region.csv" // Genome references - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa" + fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" panel = "${projectDir}/tests/csv/panel.csv" phased = true diff --git a/conf/test_sim.config b/conf/test_sim.config index 6f18229a..d19d3aa7 100644 --- a/conf/test_sim.config +++ b/conf/test_sim.config @@ -25,6 +25,6 @@ params { depth = 1 // Genome references - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa" + fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" step = "simulate" } diff --git a/conf/test_stitch.config b/conf/test_stitch.config index 11508421..1ef0f62f 100644 --- a/conf/test_stitch.config +++ b/conf/test_stitch.config @@ -24,7 +24,7 @@ params { input_region = "${projectDir}/tests/csv/region.csv" // Genome references - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa" + fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" posfile = "${projectDir}/tests/csv/posfile.csv" // Impute parameters diff --git a/conf/test_validate.config b/conf/test_validate.config index c92c39bb..d47a4dbb 100644 --- a/conf/test_validate.config +++ b/conf/test_validate.config @@ -25,7 +25,7 @@ params { input_region = "${projectDir}/tests/csv/region.csv" // Genome references - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/reference_genome/21_22/hs38DH.chr21_22.fa" + fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" panel = "${projectDir}/tests/csv/panel.csv" phased = true map = "${projectDir}/tests/csv/map.csv" From ffa00d057d0c1019995b5abbe042d375f735fcf6 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 13 May 2024 16:58:46 +0200 Subject: [PATCH 004/110] Uniformize subworkflows and process --- subworkflows/local/vcf_chr_rename/main.nf | 4 +- .../local/vcf_concordance_glimpse2/main.nf | 12 +++--- .../tests/main.nf.test | 40 +++++++++---------- .../tests/nextflow.config | 2 +- .../vcf_normalize_bcftools.nf | 4 +- subworkflows/local/vcf_region/main.nf | 8 ++-- .../local/vcf_sites_extract_bcftools/main.nf | 6 +-- 7 files changed, 37 insertions(+), 39 deletions(-) diff --git a/subworkflows/local/vcf_chr_rename/main.nf b/subworkflows/local/vcf_chr_rename/main.nf index 20c2e967..f169ecb9 100644 --- a/subworkflows/local/vcf_chr_rename/main.nf +++ b/subworkflows/local/vcf_chr_rename/main.nf @@ -1,6 +1,6 @@ include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate' include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' -include { GAWK as FAITOCHR } from '../../../modules/nf-core/gawk' +include { GAWK } from '../../../modules/nf-core/gawk' workflow VCF_CHR_RENAME { take: @@ -12,7 +12,7 @@ workflow VCF_CHR_RENAME { ch_versions = Channel.empty() // Generate the chromosome renaming file - FAITOCHR( + GAWK( ch_fasta.map{ metaG, fasta, fai -> [metaG, fai] }, Channel.of( 'BEGIN {FS="\\t"} NR==1 { if ($1 ~ /^chr/) { col1=""; col2="chr" } else { col1="chr"; col2="" } } { sub(/^chr/, "", $1); if ($1 ~ /^[0-9]+|[XYMT]$/) print col1$1, col2$1; else print $1, $1 }' diff --git a/subworkflows/local/vcf_concordance_glimpse2/main.nf b/subworkflows/local/vcf_concordance_glimpse2/main.nf index bc76d0f2..5bc0d74e 100644 --- a/subworkflows/local/vcf_concordance_glimpse2/main.nf +++ b/subworkflows/local/vcf_concordance_glimpse2/main.nf @@ -1,5 +1,5 @@ include { GLIMPSE2_CONCORDANCE } from '../../../modules/nf-core/glimpse2/concordance' -include { GAWK as CONCATENATE } from '../../../modules/nf-core/gawk' +include { GAWK } from '../../../modules/nf-core/gawk' include { ADD_COLUMNS } from '../../../modules/local/addcolumns' include { GUNZIP } from '../../../modules/nf-core/gunzip' @@ -43,18 +43,16 @@ workflow VCF_CONCORDANCE_GLIMPSE2 { ADD_COLUMNS(GUNZIP.out.gunzip) ch_versions = ch_versions.mix(ADD_COLUMNS.out.versions.first()) - CONCATENATE( + GAWK( ADD_COLUMNS.out.txt .map{meta, txt -> [["id":"TestQuality"], txt]} .groupTuple(), - Channel.of( - '(NR == 1) || (FNR > 1)' - ).collectFile(name:"program.txt") + [] ) - ch_versions = ch_versions.mix(CONCATENATE.out.versions.first()) + ch_versions = ch_versions.mix(GAWK.out.versions.first()) emit: - stats = CONCATENATE.out.output // [ meta, txt ] + stats = GAWK.out.output // [ meta, txt ] versions = ch_versions // channel: [ versions.yml ] multiqc_files = ch_multiqc_files } diff --git a/subworkflows/local/vcf_concordance_glimpse2/tests/main.nf.test b/subworkflows/local/vcf_concordance_glimpse2/tests/main.nf.test index e1aca9a5..44da70ef 100644 --- a/subworkflows/local/vcf_concordance_glimpse2/tests/main.nf.test +++ b/subworkflows/local/vcf_concordance_glimpse2/tests/main.nf.test @@ -101,42 +101,42 @@ nextflow_workflow { allele_freq = Channel.fromList([ [ [panel:'1000GP', chr:'21'], // meta map - file(params.phaseimpute_testdata_path + "panel/21/1000GP.chr21.s.norel.sites.vcf.gz",checkIfExists:true), - file(params.phaseimpute_testdata_path + "panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi",checkIfExists:true) + file(params.pipelines_testdata_base_path + "panel/21/1000GP.chr21.s.norel.sites.vcf.gz",checkIfExists:true), + file(params.pipelines_testdata_base_path + "panel/21/1000GP.chr21.s.norel.sites.vcf.gz.csi",checkIfExists:true) ], [ [panel:'1000GP', chr:'22'], // meta map - file(params.phaseimpute_testdata_path + "panel/22/1000GP.chr22.s.norel.sites.vcf.gz",checkIfExists:true), - file(params.phaseimpute_testdata_path + "panel/22/1000GP.chr22.s.norel.sites.vcf.gz.csi",checkIfExists:true) + file(params.pipelines_testdata_base_path + "panel/22/1000GP.chr22.s.norel.sites.vcf.gz",checkIfExists:true), + file(params.pipelines_testdata_base_path + "panel/22/1000GP.chr22.s.norel.sites.vcf.gz.csi",checkIfExists:true) ] ]) truth = Channel.fromList([ [[id:'NA12878', chr:'21', region:'chr21:16570000-16610000'], // meta map - file(params.phaseimpute_testdata_path + "individuals/NA12878/NA12878.s.bcf",checkIfExists:true), - file(params.phaseimpute_testdata_path + "individuals/NA12878/NA12878.s.bcf.csi",checkIfExists:true)], + file(params.pipelines_testdata_base_path + "individuals/NA12878/NA12878.s.bcf",checkIfExists:true), + file(params.pipelines_testdata_base_path + "individuals/NA12878/NA12878.s.bcf.csi",checkIfExists:true)], [[id:'NA12878', chr:'22', region:'chr22:16570000-16610000'], // meta map - file(params.phaseimpute_testdata_path + "individuals/NA12878/NA12878.s.bcf",checkIfExists:true), - file(params.phaseimpute_testdata_path + "individuals/NA12878/NA12878.s.bcf.csi",checkIfExists:true)], + file(params.pipelines_testdata_base_path + "individuals/NA12878/NA12878.s.bcf",checkIfExists:true), + file(params.pipelines_testdata_base_path + "individuals/NA12878/NA12878.s.bcf.csi",checkIfExists:true)], [[id:'NA19401', chr:'21', region:'chr21:16570000-16610000'], // meta map - file(params.phaseimpute_testdata_path + "individuals/NA19401/NA19401.s.bcf",checkIfExists:true), - file(params.phaseimpute_testdata_path + "individuals/NA19401/NA19401.s.bcf.csi",checkIfExists:true)], + file(params.pipelines_testdata_base_path + "individuals/NA19401/NA19401.s.bcf",checkIfExists:true), + file(params.pipelines_testdata_base_path + "individuals/NA19401/NA19401.s.bcf.csi",checkIfExists:true)], [[id:'NA19401', chr:'22', region:'chr22:16570000-16610000'], // meta map - file(params.phaseimpute_testdata_path + "individuals/NA19401/NA19401.s.bcf",checkIfExists:true), - file(params.phaseimpute_testdata_path + "individuals/NA19401/NA19401.s.bcf.csi",checkIfExists:true)] + file(params.pipelines_testdata_base_path + "individuals/NA19401/NA19401.s.bcf",checkIfExists:true), + file(params.pipelines_testdata_base_path + "individuals/NA19401/NA19401.s.bcf.csi",checkIfExists:true)] ]) estimate = Channel.fromList([ [[id:'NA12878', chr:'21', region:'chr21:16650000-16700000'], // meta map - file(params.phaseimpute_testdata_path + "individuals/NA12878/NA12878.s_imputed.bcf",checkIfExists:true), - file(params.phaseimpute_testdata_path + "individuals/NA12878/NA12878.s_imputed.bcf.csi",checkIfExists:true)], + file(params.pipelines_testdata_base_path + "individuals/NA12878/NA12878.s_imputed.bcf",checkIfExists:true), + file(params.pipelines_testdata_base_path + "individuals/NA12878/NA12878.s_imputed.bcf.csi",checkIfExists:true)], [[id:'NA12878', chr:'22', region:'chr22:16650000-16700000'], // meta map - file(params.phaseimpute_testdata_path + "individuals/NA12878/NA12878.s_imputed.bcf",checkIfExists:true), - file(params.phaseimpute_testdata_path + "individuals/NA12878/NA12878.s_imputed.bcf.csi",checkIfExists:true)], + file(params.pipelines_testdata_base_path + "individuals/NA12878/NA12878.s_imputed.bcf",checkIfExists:true), + file(params.pipelines_testdata_base_path + "individuals/NA12878/NA12878.s_imputed.bcf.csi",checkIfExists:true)], [[id:'NA19401', chr:'21', region:'chr21:16650000-16700000'], // meta map - file(params.phaseimpute_testdata_path + "individuals/NA19401/NA19401.s_imputed.bcf",checkIfExists:true), - file(params.phaseimpute_testdata_path + "individuals/NA19401/NA19401.s_imputed.bcf.csi",checkIfExists:true)], + file(params.pipelines_testdata_base_path + "individuals/NA19401/NA19401.s_imputed.bcf",checkIfExists:true), + file(params.pipelines_testdata_base_path + "individuals/NA19401/NA19401.s_imputed.bcf.csi",checkIfExists:true)], [[id:'NA19401', chr:'22', region:'chr22:16650000-16700000'], // meta map - file(params.phaseimpute_testdata_path + "individuals/NA19401/NA19401.s_imputed.bcf",checkIfExists:true), - file(params.phaseimpute_testdata_path + "individuals/NA19401/NA19401.s_imputed.bcf.csi",checkIfExists:true)] + file(params.pipelines_testdata_base_path + "individuals/NA19401/NA19401.s_imputed.bcf",checkIfExists:true), + file(params.pipelines_testdata_base_path + "individuals/NA19401/NA19401.s_imputed.bcf.csi",checkIfExists:true)] ]) input[0] = estimate input[1] = truth diff --git a/subworkflows/local/vcf_concordance_glimpse2/tests/nextflow.config b/subworkflows/local/vcf_concordance_glimpse2/tests/nextflow.config index 8b9e3c3f..97bf81c8 100644 --- a/subworkflows/local/vcf_concordance_glimpse2/tests/nextflow.config +++ b/subworkflows/local/vcf_concordance_glimpse2/tests/nextflow.config @@ -3,7 +3,7 @@ params { } process { - withName: 'VCF_CONCORDANCE_GLIMPSE2:CONCATENATE' { + withName: 'VCF_CONCORDANCE_GLIMPSE2:GAWK' { ext.suffix = "txt" } } diff --git a/subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf b/subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf index 2f004352..312cca57 100644 --- a/subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf +++ b/subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf @@ -22,10 +22,10 @@ workflow VCF_NORMALIZE_BCFTOOLS { BCFTOOLS_NORM(ch_vcf, ch_fasta) // Index multiallelic VCF - BCFTOOLS_INDEX(BCFTOOLS_NORM.out.vcf) + BCFTOOLS_INDEX_1(BCFTOOLS_NORM.out.vcf) // Join multiallelic VCF and TBI - ch_multiallelic_vcf_tbi = BCFTOOLS_NORM.out.vcf.join(BCFTOOLS_INDEX.out.tbi) + ch_multiallelic_vcf_tbi = BCFTOOLS_NORM.out.vcf.join(BCFTOOLS_INDEX_1.out.tbi) // Remove all multiallelic records: BCFTOOLS_VIEW(ch_multiallelic_vcf_tbi, [], [], []) diff --git a/subworkflows/local/vcf_region/main.nf b/subworkflows/local/vcf_region/main.nf index d4337793..815c882b 100644 --- a/subworkflows/local/vcf_region/main.nf +++ b/subworkflows/local/vcf_region/main.nf @@ -1,5 +1,5 @@ include { BCFTOOLS_VIEW as VIEW_VCF_REGION } from '../../../modules/nf-core/bcftools/view/main.nf' -include { BCFTOOLS_INDEX as VCF_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' workflow VCF_REGION { @@ -22,11 +22,11 @@ workflow VCF_REGION { VIEW_VCF_REGION(ch_input_region, [], [], []) ch_versions = ch_versions.mix(VIEW_VCF_REGION.out.versions.first()) - VCF_INDEX(VIEW_VCF_REGION.out.vcf) - ch_versions = ch_versions.mix(VCF_INDEX.out.versions.first()) + BCFTOOLS_INDEX(VIEW_VCF_REGION.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions.first()) ch_vcf_region = VIEW_VCF_REGION.out.vcf - .combine(VCF_INDEX.out.csi) + .combine(BCFTOOLS_INDEX.out.csi) emit: vcf_region = ch_vcf_region // channel: [ metaIR, vcf, index ] diff --git a/subworkflows/local/vcf_sites_extract_bcftools/main.nf b/subworkflows/local/vcf_sites_extract_bcftools/main.nf index 9755da9e..202d996c 100644 --- a/subworkflows/local/vcf_sites_extract_bcftools/main.nf +++ b/subworkflows/local/vcf_sites_extract_bcftools/main.nf @@ -24,11 +24,11 @@ workflow VCF_SITES_EXTRACT_BCFTOOLS { ch_panel_sites = VIEW_VCF_SITES.out.vcf.combine(BCFTOOLS_INDEX_2.out.csi, by:0) // Convert to TSV with structure for Glimpse - BCFTOOLS_QUERY(ch_panel_sites, [], [], []) - ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first()) + BCFTOOLS_QUERY_TSV(ch_panel_sites, [], [], []) + ch_versions = ch_versions.mix(BCFTOOLS_QUERY_TSV.out.versions.first()) // Compress TSV - TABIX_BGZIP(BCFTOOLS_QUERY.out.output) + TABIX_BGZIP(BCFTOOLS_QUERY_TSV.out.output) ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) // Index compressed TSV From 773185d3738332fb4c414e61c6f5a636a5e010ad Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 13 May 2024 17:01:47 +0200 Subject: [PATCH 005/110] Uniformize subworkflow and main.nf --- subworkflows/local/bam_impute_stitch/main.nf | 31 ++++++++++++++++++++ tests/config/nf-test.config | 1 - workflows/phaseimpute/main.nf | 2 +- 3 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 subworkflows/local/bam_impute_stitch/main.nf diff --git a/subworkflows/local/bam_impute_stitch/main.nf b/subworkflows/local/bam_impute_stitch/main.nf new file mode 100644 index 00000000..ea162fd0 --- /dev/null +++ b/subworkflows/local/bam_impute_stitch/main.nf @@ -0,0 +1,31 @@ +include { STITCH } from '../../../modules/nf-core/stitch/main' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main' + + +workflow BAM_IMPUTE_STITCH { + + take: + ch_parameters + ch_samples + ch_fasta + + main: + + ch_versions = Channel.empty() + + // Run STITCH + seed = params.seed + STITCH( ch_samples, ch_parameters, ch_fasta, seed ) + + // Index imputed annotated VCF + BCFTOOLS_INDEX(STITCH.out.vcf) + + // Join VCFs and TBIs + ch_vcf_tbi = STITCH.out.vcf.join(BCFTOOLS_INDEX.out.tbi) + + + emit: + vcf_tbi = ch_vcf_tbi // channel: [ meta, vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] + +} diff --git a/tests/config/nf-test.config b/tests/config/nf-test.config index 32ca7b47..417172e2 100644 --- a/tests/config/nf-test.config +++ b/tests/config/nf-test.config @@ -3,7 +3,6 @@ params { singularity_pull_docker_container = false test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules' modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - phaseimpute_testdata_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' } process { diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 2ac06d02..ac7a3479 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -43,7 +43,7 @@ include { VCF_CONCATENATE_BCFTOOLS as CONCAT_QUILT } from '../../subworkflows/ // STITCH subworkflows include { PREPARE_INPUT_STITCH } from '../../subworkflows/local/prepare_input_stitch/prepare_input_stitch' -include { BAM_IMPUTE_STITCH } from '../../subworkflows/local/bam_impute_stitch/bam_impute_stitch' +include { BAM_IMPUTE_STITCH } from '../../subworkflows/local/bam_impute_stitch' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_STITCH } from '../../subworkflows/local/vcf_concatenate_bcftools' // CONCAT subworkflows From 7f46a464831f1d0085760c234d8df0e8ce7653bc Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 14 May 2024 12:05:52 +0200 Subject: [PATCH 006/110] Rename to main.nf --- .../{bam_impute_quilt.nf => main.nf} | 0 .../bam_impute_stitch/bam_impute_stitch.nf | 31 ------------------- .../{prepare_input_stitch.nf => main.nf} | 0 .../main.nf} | 0 .../{vcf_chunk_glimpse.nf => main.nf} | 0 .../{vcf_normalize_bcftools.nf => main.nf} | 0 6 files changed, 31 deletions(-) rename subworkflows/local/bam_impute_quilt/{bam_impute_quilt.nf => main.nf} (100%) delete mode 100644 subworkflows/local/bam_impute_stitch/bam_impute_stitch.nf rename subworkflows/local/prepare_input_stitch/{prepare_input_stitch.nf => main.nf} (100%) rename subworkflows/local/{prepare_input_stitch/prepare_posfile_tsv.nf => prepare_posfile_tsv/main.nf} (100%) rename subworkflows/local/vcf_chunk_glimpse/{vcf_chunk_glimpse.nf => main.nf} (100%) rename subworkflows/local/vcf_normalize_bcftools/{vcf_normalize_bcftools.nf => main.nf} (100%) diff --git a/subworkflows/local/bam_impute_quilt/bam_impute_quilt.nf b/subworkflows/local/bam_impute_quilt/main.nf similarity index 100% rename from subworkflows/local/bam_impute_quilt/bam_impute_quilt.nf rename to subworkflows/local/bam_impute_quilt/main.nf diff --git a/subworkflows/local/bam_impute_stitch/bam_impute_stitch.nf b/subworkflows/local/bam_impute_stitch/bam_impute_stitch.nf deleted file mode 100644 index ea162fd0..00000000 --- a/subworkflows/local/bam_impute_stitch/bam_impute_stitch.nf +++ /dev/null @@ -1,31 +0,0 @@ -include { STITCH } from '../../../modules/nf-core/stitch/main' -include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main' - - -workflow BAM_IMPUTE_STITCH { - - take: - ch_parameters - ch_samples - ch_fasta - - main: - - ch_versions = Channel.empty() - - // Run STITCH - seed = params.seed - STITCH( ch_samples, ch_parameters, ch_fasta, seed ) - - // Index imputed annotated VCF - BCFTOOLS_INDEX(STITCH.out.vcf) - - // Join VCFs and TBIs - ch_vcf_tbi = STITCH.out.vcf.join(BCFTOOLS_INDEX.out.tbi) - - - emit: - vcf_tbi = ch_vcf_tbi // channel: [ meta, vcf, tbi ] - versions = ch_versions // channel: [ versions.yml ] - -} diff --git a/subworkflows/local/prepare_input_stitch/prepare_input_stitch.nf b/subworkflows/local/prepare_input_stitch/main.nf similarity index 100% rename from subworkflows/local/prepare_input_stitch/prepare_input_stitch.nf rename to subworkflows/local/prepare_input_stitch/main.nf diff --git a/subworkflows/local/prepare_input_stitch/prepare_posfile_tsv.nf b/subworkflows/local/prepare_posfile_tsv/main.nf similarity index 100% rename from subworkflows/local/prepare_input_stitch/prepare_posfile_tsv.nf rename to subworkflows/local/prepare_posfile_tsv/main.nf diff --git a/subworkflows/local/vcf_chunk_glimpse/vcf_chunk_glimpse.nf b/subworkflows/local/vcf_chunk_glimpse/main.nf similarity index 100% rename from subworkflows/local/vcf_chunk_glimpse/vcf_chunk_glimpse.nf rename to subworkflows/local/vcf_chunk_glimpse/main.nf diff --git a/subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf b/subworkflows/local/vcf_normalize_bcftools/main.nf similarity index 100% rename from subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf rename to subworkflows/local/vcf_normalize_bcftools/main.nf From 29108c0d0c755ff4b3590457a00287b516089725 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 14 May 2024 12:11:10 +0200 Subject: [PATCH 007/110] Set all sbwf call without main --- subworkflows/local/bam_downsample/main.nf | 6 +++--- subworkflows/local/bam_region/main.nf | 4 ++-- subworkflows/local/compute_gl/main.nf | 6 +++--- subworkflows/local/get_region/main.nf | 2 +- subworkflows/local/prepare_posfile_tsv/main.nf | 2 +- subworkflows/local/vcf_chr_check/main.nf | 6 +++--- subworkflows/local/vcf_chunk_glimpse/main.nf | 6 +++--- subworkflows/local/vcf_impute_glimpse2/main.nf | 8 ++++---- .../local/vcf_normalize_bcftools/main.nf | 16 ++++++++-------- subworkflows/local/vcf_phase_panel/main.nf | 2 +- subworkflows/local/vcf_region/main.nf | 4 ++-- .../local/vcf_sites_extract_bcftools/main.nf | 10 +++++----- workflows/phaseimpute/main.nf | 16 +++++++++------- 13 files changed, 45 insertions(+), 43 deletions(-) diff --git a/subworkflows/local/bam_downsample/main.nf b/subworkflows/local/bam_downsample/main.nf index 106cf2a3..13f6a41e 100644 --- a/subworkflows/local/bam_downsample/main.nf +++ b/subworkflows/local/bam_downsample/main.nf @@ -1,6 +1,6 @@ -include { SAMTOOLS_COVERAGE } from '../../../modules/nf-core/samtools/coverage/main.nf' -include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main.nf' -include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view/main.nf' +include { SAMTOOLS_COVERAGE } from '../../../modules/nf-core/samtools/coverage' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view' workflow BAM_DOWNSAMPLE { diff --git a/subworkflows/local/bam_region/main.nf b/subworkflows/local/bam_region/main.nf index 1968cd38..1900ee98 100644 --- a/subworkflows/local/bam_region/main.nf +++ b/subworkflows/local/bam_region/main.nf @@ -1,5 +1,5 @@ -include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main.nf' -include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view/main.nf' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view' workflow BAM_REGION { diff --git a/subworkflows/local/compute_gl/main.nf b/subworkflows/local/compute_gl/main.nf index b11623d4..5fa462fa 100644 --- a/subworkflows/local/compute_gl/main.nf +++ b/subworkflows/local/compute_gl/main.nf @@ -1,6 +1,6 @@ -include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup/main.nf' -include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main.nf' +include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate' workflow COMPUTE_GL { diff --git a/subworkflows/local/get_region/main.nf b/subworkflows/local/get_region/main.nf index 58f84e10..866ae08f 100644 --- a/subworkflows/local/get_region/main.nf +++ b/subworkflows/local/get_region/main.nf @@ -1,4 +1,4 @@ -include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' workflow GET_REGION { take: diff --git a/subworkflows/local/prepare_posfile_tsv/main.nf b/subworkflows/local/prepare_posfile_tsv/main.nf index 0612d9bc..c11f3195 100644 --- a/subworkflows/local/prepare_posfile_tsv/main.nf +++ b/subworkflows/local/prepare_posfile_tsv/main.nf @@ -1,4 +1,4 @@ -include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query/main' +include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query' include { GAWK } from '../../../modules/nf-core/gawk' diff --git a/subworkflows/local/vcf_chr_check/main.nf b/subworkflows/local/vcf_chr_check/main.nf index 3c9d5f79..c43501a1 100644 --- a/subworkflows/local/vcf_chr_check/main.nf +++ b/subworkflows/local/vcf_chr_check/main.nf @@ -1,6 +1,6 @@ -include { VCFCHREXTRACT as VCFCHRBFR } from '../../../modules/local/vcfchrextract/main.nf' -include { VCFCHREXTRACT as VCFCHRAFT } from '../../../modules/local/vcfchrextract/main.nf' -include { VCF_CHR_RENAME } from '../vcf_chr_rename/main.nf' +include { VCFCHREXTRACT as VCFCHRBFR } from '../../../modules/local/vcfchrextract' +include { VCFCHREXTRACT as VCFCHRAFT } from '../../../modules/local/vcfchrextract' +include { VCF_CHR_RENAME } from '../vcf_chr_rename' workflow VCF_CHR_CHECK { take: diff --git a/subworkflows/local/vcf_chunk_glimpse/main.nf b/subworkflows/local/vcf_chunk_glimpse/main.nf index cc16b9b0..cd4ca25e 100644 --- a/subworkflows/local/vcf_chunk_glimpse/main.nf +++ b/subworkflows/local/vcf_chunk_glimpse/main.nf @@ -1,6 +1,6 @@ -include { GLIMPSE_CHUNK } from '../../../modules/nf-core/glimpse/chunk/main' -include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk/main' -include { GLIMPSE2_SPLITREFERENCE } from '../../../modules/nf-core/glimpse2/splitreference/main' +include { GLIMPSE_CHUNK } from '../../../modules/nf-core/glimpse/chunk' +include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk' +include { GLIMPSE2_SPLITREFERENCE } from '../../../modules/nf-core/glimpse2/splitreference' workflow VCF_CHUNK_GLIMPSE { diff --git a/subworkflows/local/vcf_impute_glimpse2/main.nf b/subworkflows/local/vcf_impute_glimpse2/main.nf index 32a20972..3cef9206 100644 --- a/subworkflows/local/vcf_impute_glimpse2/main.nf +++ b/subworkflows/local/vcf_impute_glimpse2/main.nf @@ -1,7 +1,7 @@ -include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase/main' -include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main' -include { BCFTOOLS_INDEX as INDEX_PHASE } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_INDEX as INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index/main.nf' +include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase' +include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate' +include { BCFTOOLS_INDEX as INDEX_PHASE } from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_INDEX as INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index' workflow VCF_IMPUTE_GLIMPSE2 { diff --git a/subworkflows/local/vcf_normalize_bcftools/main.nf b/subworkflows/local/vcf_normalize_bcftools/main.nf index 312cca57..f9991537 100644 --- a/subworkflows/local/vcf_normalize_bcftools/main.nf +++ b/subworkflows/local/vcf_normalize_bcftools/main.nf @@ -1,11 +1,11 @@ -include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm/main' -include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view/main' -include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2} from '../../../modules/nf-core/bcftools/index/main' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_3} from '../../../modules/nf-core/bcftools/index/main' -include { BCFTOOLS_CONVERT } from '../../../modules/nf-core/bcftools/convert/main' -include { BCFTOOLS_VIEW as BCFTOOLS_REMOVE } from '../../../modules/nf-core/bcftools/view/main' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_4} from '../../../modules/nf-core/bcftools/index/main' +include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm' +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2} from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_3} from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_CONVERT } from '../../../modules/nf-core/bcftools/convert' +include { BCFTOOLS_VIEW as BCFTOOLS_REMOVE } from '../../../modules/nf-core/bcftools/view' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_4} from '../../../modules/nf-core/bcftools/index' workflow VCF_NORMALIZE_BCFTOOLS { diff --git a/subworkflows/local/vcf_phase_panel/main.nf b/subworkflows/local/vcf_phase_panel/main.nf index a54ca8e6..d100c189 100644 --- a/subworkflows/local/vcf_phase_panel/main.nf +++ b/subworkflows/local/vcf_phase_panel/main.nf @@ -1,4 +1,4 @@ -include { VCF_PHASE_SHAPEIT5 } from '../../../subworkflows/nf-core/vcf_phase_shapeit5/main' +include { VCF_PHASE_SHAPEIT5 } from '../../../subworkflows/nf-core/vcf_phase_shapeit5' workflow VCF_PHASE_PANEL { take: diff --git a/subworkflows/local/vcf_region/main.nf b/subworkflows/local/vcf_region/main.nf index 815c882b..e7aae8d4 100644 --- a/subworkflows/local/vcf_region/main.nf +++ b/subworkflows/local/vcf_region/main.nf @@ -1,5 +1,5 @@ -include { BCFTOOLS_VIEW as VIEW_VCF_REGION } from '../../../modules/nf-core/bcftools/view/main.nf' -include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_VIEW as VIEW_VCF_REGION } from '../../../modules/nf-core/bcftools/view' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' workflow VCF_REGION { diff --git a/subworkflows/local/vcf_sites_extract_bcftools/main.nf b/subworkflows/local/vcf_sites_extract_bcftools/main.nf index 202d996c..191d4ae6 100644 --- a/subworkflows/local/vcf_sites_extract_bcftools/main.nf +++ b/subworkflows/local/vcf_sites_extract_bcftools/main.nf @@ -1,8 +1,8 @@ -include { BCFTOOLS_VIEW as VIEW_VCF_SITES } from '../../../modules/nf-core/bcftools/view/main.nf' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index/main.nf' -include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main' -include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query/main.nf' +include { BCFTOOLS_VIEW as VIEW_VCF_SITES } from '../../../modules/nf-core/bcftools/view' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' +include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix' +include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query' workflow VCF_SITES_EXTRACT_BCFTOOLS { take: diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index ac7a3479..2f60b2a0 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -7,7 +7,7 @@ // // MODULE: Installed directly from nf-core/modules // -include { MULTIQC } from '../../modules/nf-core/multiqc/main' +include { MULTIQC } from '../../modules/nf-core/multiqc' include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' @@ -24,25 +24,27 @@ include { BAM_DOWNSAMPLE } from '../../subworkflows/ // Panelprep subworkflows include { VCF_CHR_CHECK } from '../../subworkflows/local/vcf_chr_check' -include { VCF_NORMALIZE_BCFTOOLS } from '../../subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools' +include { VCF_NORMALIZE_BCFTOOLS } from '../../subworkflows/local/vcf_normalize_bcftools' include { VCF_SITES_EXTRACT_BCFTOOLS } from '../../subworkflows/local/vcf_sites_extract_bcftools' include { VCF_PHASE_PANEL } from '../../subworkflows/local/vcf_phase_panel' -include { PREPARE_POSFILE_TSV } from '../../subworkflows/local/prepare_input_stitch/prepare_posfile_tsv' +include { PREPARE_POSFILE_TSV } from '../../subworkflows/local/prepare_posfile_tsv' -// GLIMPSE subworkflows +// GLIMPSE1 subworkflows include { VCF_IMPUTE_GLIMPSE as VCF_IMPUTE_GLIMPSE1 } from '../../subworkflows/nf-core/vcf_impute_glimpse' include { COMPUTE_GL as GL_TRUTH } from '../../subworkflows/local/compute_gl' include { COMPUTE_GL as GL_INPUT } from '../../subworkflows/local/compute_gl' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_GLIMPSE1} from '../../subworkflows/local/vcf_concatenate_bcftools' + +// GLIMPSE2 subworkflows include { VCF_IMPUTE_GLIMPSE2 } from '../../subworkflows/local/vcf_impute_glimpse2' // QUILT subworkflows -include { VCF_CHUNK_GLIMPSE } from '../../subworkflows/local/vcf_chunk_glimpse/vcf_chunk_glimpse' -include { BAM_IMPUTE_QUILT } from '../../subworkflows/local/bam_impute_quilt/bam_impute_quilt' +include { VCF_CHUNK_GLIMPSE } from '../../subworkflows/local/vcf_chunk_glimpse' +include { BAM_IMPUTE_QUILT } from '../../subworkflows/local/bam_impute_quilt' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_QUILT } from '../../subworkflows/local/vcf_concatenate_bcftools' // STITCH subworkflows -include { PREPARE_INPUT_STITCH } from '../../subworkflows/local/prepare_input_stitch/prepare_input_stitch' +include { PREPARE_INPUT_STITCH } from '../../subworkflows/local/prepare_input_stitch' include { BAM_IMPUTE_STITCH } from '../../subworkflows/local/bam_impute_stitch' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_STITCH } from '../../subworkflows/local/vcf_concatenate_bcftools' From 0d7a4e08ea8c841a66b4da189755f7318e753e70 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 14 May 2024 12:19:13 +0200 Subject: [PATCH 008/110] Normalize panel_prep config --- conf/steps/panel_prep.config | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 99d8028b..2172ae14 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -41,7 +41,6 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_NORM' { ext.args = '-m +any --no-version --output-type z' ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" } - maxRetries = 2 publishDir = [ enabled: false ] } @@ -53,7 +52,6 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_VIEW' { ext.args = '-v snps -m 2 -M 2 -Oz' ext.prefix = { "${meta.id}_${meta.chr}_biallelic_snps" } - maxRetries = 2 publishDir = [ enabled: false ] } @@ -64,7 +62,6 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_CONVERT' { ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} - maxRetries = 2 publishDir = [ path: { "${params.outdir}/prep_panel/haplegend/" }, mode: params.publish_dir_mode, @@ -76,7 +73,6 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_REMOVE' { ext.args = { "-Oz -s^${params.remove_samples}" } ext.prefix = { "${meta.id}_${meta.chr}_biallelic_removed_samples" } - maxRetries = 2 publishDir = [ enabled: false ] } @@ -109,7 +105,6 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_INDEX_2' { - maxRetries = 2 ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites" } publishDir = [ path: { "${params.outdir}/prep_panel/sites/vcf/" }, @@ -119,9 +114,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_QUERY' { - ext.args = [ - "-f'%CHROM\t%POS\t%REF,%ALT\\n'", - ].join(' ') + ext.args = ["-f'%CHROM\t%POS\t%REF,%ALT\\n'"].join(' ') ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites_tsv" } publishDir = [ enabled: false ] } @@ -151,10 +144,7 @@ process { // (Optional) Subworkflow: Phasing withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_PHASE_PANEL:VCF_PHASE_SHAPEIT5:BEDTOOLS_MAKEWINDOWS' { - ext.args = [ - '-w 60000', - '-s 40000' - ].join(' ') + ext.args = ['-w 60000', '-s 40000'].join(' ') ext.prefix = { "${meta.id}_chunks" } publishDir = [ enabled: false ] } @@ -165,7 +155,6 @@ process { } // Subworkflow: Make chunks - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:.*' { publishDir = [ path: { "${params.outdir}/prep_panel/chunks/" }, @@ -174,11 +163,8 @@ process { ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' { - ext.args = [ - "--window-mb 2.0" - ].join(' ') + ext.args = ["--window-mb 2.0"].join(' ') ext.prefix = { "${meta.panel}_${meta.chr}_chunks_glimpse2" } publishDir = [ path: { "${params.outdir}/prep_panel/chunks/glimpse2/" }, From 012ee727afa3a7bb8702bef6db6d2ed3113c06c0 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 14 May 2024 12:49:28 +0200 Subject: [PATCH 009/110] Uniformize conf and workflows --- conf/steps/imputation_glimpse1.config | 9 ++-- conf/steps/imputation_quilt.config | 20 ++------ conf/steps/imputation_stitch.config | 23 ++------- conf/steps/panel_prep.config | 41 +++++++++------- conf/steps/validation.config | 3 ++ subworkflows/local/bam_impute_quilt/main.nf | 4 +- subworkflows/local/bam_impute_stitch/main.nf | 4 +- .../local/prepare_posfile_tsv/main.nf | 4 +- subworkflows/local/vcf_chr_rename/main.nf | 11 ++--- subworkflows/local/vcf_chunk_glimpse/main.nf | 48 +++++++++++-------- .../local/vcf_normalize_bcftools/main.nf | 25 +++++----- .../local/vcf_sites_extract_bcftools/main.nf | 26 +++++----- 12 files changed, 101 insertions(+), 117 deletions(-) diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index 96aa857d..1e0d0731 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -12,6 +12,8 @@ process { // Configuration for the glimpse1 imputation subworkflow + + // Call the variants before imputation withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:.*' { publishDir = [ enabled: false ] } @@ -38,6 +40,7 @@ process { ext.args = "--tbi" } + // Impute with GLIMPSE1 withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:.*' { publishDir = [ path : { "${params.outdir}/imputation/glimpse1/" }, @@ -47,10 +50,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_CHUNK' { - ext.args = [ - "--window-size 200000", - "--buffer-size 20000" - ].join(' ') + ext.args = ["--window-size 200000", "--buffer-size 20000"].join(' ') ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.chunk" } publishDir = [ enabled: false ] } @@ -77,6 +77,7 @@ process { ] } + // Concatenate the imputed chunks withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE1:.*' { publishDir = [ path: { "${params.outdir}/imputation/glimpse1/concat" }, diff --git a/conf/steps/imputation_quilt.config b/conf/steps/imputation_quilt.config index 9801d2ab..32f8d270 100644 --- a/conf/steps/imputation_quilt.config +++ b/conf/steps/imputation_quilt.config @@ -12,23 +12,6 @@ process { - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:.*' { - publishDir = [ - path: { "${params.outdir}/prep_panel/chunks/" }, - mode: params.publish_dir_mode, - enabled: true - ] - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { - ext.prefix = { "${meta.panel}_${meta.chr}_chunks_glimpse1" } - publishDir = [ - path: { "${params.outdir}/prep_panel/chunks/glimpse1/" }, - mode: params.publish_dir_mode, - enabled: true - ] - } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:.*' { publishDir = [ path: { "${params.outdir}/imputation/quilt/" }, @@ -36,6 +19,7 @@ process { ] } + // Impute quilt withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:QUILT_QUILT' { ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.impute" } publishDir = [enabled: false] @@ -46,6 +30,7 @@ process { publishDir = [enabled: false] } + // Annotate quilt imputed VCFs withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:BCFTOOLS_ANNOTATE' { ext.args = "--set-id '%CHROM:%POS:%REF:%ALT' -Oz" ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.impute.annotate" } @@ -55,6 +40,7 @@ process { ext.args = "--tbi" } + // Concatenate quilt imputed VCFs withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_QUILT:.*' { publishDir = [ path: { "${params.outdir}/imputation/quilt/concat" }, diff --git a/conf/steps/imputation_stitch.config b/conf/steps/imputation_stitch.config index ea3395f3..31bee00e 100644 --- a/conf/steps/imputation_stitch.config +++ b/conf/steps/imputation_stitch.config @@ -20,29 +20,16 @@ process { ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_POSFILE_TSV:GAWK' { - ext.args = "'{ key = \$1 FS \$2 } !seen[key]++'" // Remove duplicates + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_POSFILE_TSV:BCFTOOLS_QUERY' { + ext.args = "-f'%CHROM\t%POS\t%REF\t%ALT\\n'" ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" } - ext.suffix = "txt" - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_INPUT_STITCH:BCFTOOLS_NORM' { - ext.args = '-m +any --output-type z' - ext.prefix = { "${meta.id}_${meta.chr}_multiallelic" } publishDir = [enabled: false] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_INPUT_STITCH:BCFTOOLS_VIEW' { - ext.args = '-v snps -Oz' - ext.prefix = { "${meta.id}_${meta.chr}_biallelic" } - publishDir = [enabled: false] - - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_POSFILE_TSV:BCFTOOLS_QUERY' { - ext.args = "-f'%CHROM\t%POS\t%REF\t%ALT\\n'" + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_POSFILE_TSV:GAWK' { + ext.args = "'{ key = \$1 FS \$2 } !seen[key]++'" // Remove duplicates ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" } - publishDir = [enabled: false] + ext.suffix = "txt" } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_STITCH:.*' { diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 2172ae14..8d793df4 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -33,7 +33,6 @@ process { } // Subworkflow: VCF_NORMALIZE_BCFTOOLS - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:.*' { publishDir = [ enabled: false ] } @@ -44,12 +43,12 @@ process { publishDir = [ enabled: false ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX_1' { ext.args = "--tbi" publishDir = [enabled: false] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_VIEW' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_DEL_MLT_ALL' { ext.args = '-v snps -m 2 -M 2 -Oz' ext.prefix = { "${meta.id}_${meta.chr}_biallelic_snps" } publishDir = [ enabled: false ] @@ -60,34 +59,33 @@ process { publishDir = [enabled: false] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_CONVERT' { - ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} - publishDir = [ - path: { "${params.outdir}/prep_panel/haplegend/" }, - mode: params.publish_dir_mode, - enabled: true - ] - } - // (Optional) Subworkflow: Remove samples from panel - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_REMOVE' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_DEL_SPL' { ext.args = { "-Oz -s^${params.remove_samples}" } ext.prefix = { "${meta.id}_${meta.chr}_biallelic_removed_samples" } publishDir = [ enabled: false ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX_4' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX_3' { ext.args = "--tbi" publishDir = [enabled: false] } - // Subworkflow: VCF_SITES_EXTRACT_BCFTOOLS + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_CONVERT' { + ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} + publishDir = [ + path: { "${params.outdir}/prep_panel/haplegend/" }, + mode: params.publish_dir_mode, + enabled: true + ] + } + // Subworkflow: VCF_SITES_EXTRACT_BCFTOOLS withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:.*' { publishDir = [ enabled: false ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:VIEW_VCF_SITES' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_VIEW' { ext.args = [ "-G", "-m 2", @@ -104,7 +102,7 @@ process { ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_INDEX_2' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_INDEX' { ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites" } publishDir = [ path: { "${params.outdir}/prep_panel/sites/vcf/" }, @@ -163,6 +161,15 @@ process { ] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { + ext.prefix = { "${meta.panel}_${meta.chr}_chunks_glimpse1" } + publishDir = [ + path: { "${params.outdir}/prep_panel/chunks/glimpse1/" }, + mode: params.publish_dir_mode, + enabled: true + ] + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' { ext.args = ["--window-mb 2.0"].join(' ') ext.prefix = { "${meta.panel}_${meta.chr}_chunks_glimpse2" } diff --git a/conf/steps/validation.config b/conf/steps/validation.config index ae74def8..a1c22475 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -12,6 +12,8 @@ process { // Configuration for the validation step + + // Compute genotype likelihoods for the truth set withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:.*' { publishDir = [ enabled: false ] } @@ -54,6 +56,7 @@ process { ext.args = "--tbi" } + // Validation subworkflow withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:.*' { publishDir = [ path: { "${params.outdir}/validation/" }, diff --git a/subworkflows/local/bam_impute_quilt/main.nf b/subworkflows/local/bam_impute_quilt/main.nf index 3e7cf8f9..c7993088 100644 --- a/subworkflows/local/bam_impute_quilt/main.nf +++ b/subworkflows/local/bam_impute_quilt/main.nf @@ -1,5 +1,5 @@ -include { QUILT_QUILT } from '../../../modules/nf-core/quilt/quilt' -include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate' +include { QUILT_QUILT } from '../../../modules/nf-core/quilt/quilt' +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate' include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index' include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' diff --git a/subworkflows/local/bam_impute_stitch/main.nf b/subworkflows/local/bam_impute_stitch/main.nf index ea162fd0..b2f1b659 100644 --- a/subworkflows/local/bam_impute_stitch/main.nf +++ b/subworkflows/local/bam_impute_stitch/main.nf @@ -1,5 +1,5 @@ -include { STITCH } from '../../../modules/nf-core/stitch/main' -include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main' +include { STITCH } from '../../../modules/nf-core/stitch' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' workflow BAM_IMPUTE_STITCH { diff --git a/subworkflows/local/prepare_posfile_tsv/main.nf b/subworkflows/local/prepare_posfile_tsv/main.nf index c11f3195..8390039d 100644 --- a/subworkflows/local/prepare_posfile_tsv/main.nf +++ b/subworkflows/local/prepare_posfile_tsv/main.nf @@ -1,5 +1,5 @@ -include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query' -include { GAWK } from '../../../modules/nf-core/gawk' +include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query' +include { GAWK } from '../../../modules/nf-core/gawk' workflow PREPARE_POSFILE_TSV { diff --git a/subworkflows/local/vcf_chr_rename/main.nf b/subworkflows/local/vcf_chr_rename/main.nf index f169ecb9..8b47c8c1 100644 --- a/subworkflows/local/vcf_chr_rename/main.nf +++ b/subworkflows/local/vcf_chr_rename/main.nf @@ -12,19 +12,14 @@ workflow VCF_CHR_RENAME { ch_versions = Channel.empty() // Generate the chromosome renaming file - GAWK( - ch_fasta.map{ metaG, fasta, fai -> [metaG, fai] }, - Channel.of( - 'BEGIN {FS="\\t"} NR==1 { if ($1 ~ /^chr/) { col1=""; col2="chr" } else { col1="chr"; col2="" } } { sub(/^chr/, "", $1); if ($1 ~ /^[0-9]+|[XYMT]$/) print col1$1, col2$1; else print $1, $1 }' - ).collectFile(name:"program.txt") - ) - ch_versions = ch_versions.mix(FAITOCHR.out.versions) + GAWK(ch_fasta.map{ metaG, fasta, fai -> [metaG, fai] }, []) + ch_versions = ch_versions.mix(GAWK.out.versions) // Rename the chromosome without prefix BCFTOOLS_ANNOTATE( ch_vcf // channel: [ [id], vcf, index ] .combine(Channel.of([[],[],[]])) - .combine(FAITOCHR.out.output.map{it[1]}) + .combine(GAWK.out.output.map{it[1]}) ) ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first()) diff --git a/subworkflows/local/vcf_chunk_glimpse/main.nf b/subworkflows/local/vcf_chunk_glimpse/main.nf index cd4ca25e..7172b1f2 100644 --- a/subworkflows/local/vcf_chunk_glimpse/main.nf +++ b/subworkflows/local/vcf_chunk_glimpse/main.nf @@ -1,6 +1,6 @@ -include { GLIMPSE_CHUNK } from '../../../modules/nf-core/glimpse/chunk' -include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk' -include { GLIMPSE2_SPLITREFERENCE } from '../../../modules/nf-core/glimpse2/splitreference' +include { GLIMPSE_CHUNK } from '../../../modules/nf-core/glimpse/chunk' +include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk' +include { GLIMPSE2_SPLITREFERENCE } from '../../../modules/nf-core/glimpse2/splitreference' workflow VCF_CHUNK_GLIMPSE { @@ -21,17 +21,20 @@ workflow VCF_CHUNK_GLIMPSE { // Rearrange chunks into channel for QUILT ch_chunks_quilt = GLIMPSE_CHUNK.out.chunk_chr - .splitText() - .map { metamap, line -> - def fields = line.split("\t") - def startEnd = fields[2].split(':')[1].split('-') - [metamap, metamap.chr, startEnd[0], startEnd[1]] - } + .splitText() + .map { metamap, line -> + def fields = line.split("\t") + def startEnd = fields[2].split(':')[1].split('-') + [metamap, metamap.chr, startEnd[0], startEnd[1]] + } // Rearrange chunks into channel for GLIMPSE1 ch_chunks_glimpse1 = GLIMPSE_CHUNK.out.chunk_chr - .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) - .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} + .splitCsv( + header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], + sep: "\t", skip: 0 + ) + .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} // Make chunks with Glimpse2 (does not work with "sequential" mode) chunk_model = "recursive" @@ -41,10 +44,13 @@ workflow VCF_CHUNK_GLIMPSE { // Rearrange channels ch_chunks_glimpse2 = GLIMPSE2_CHUNK.out.chunk_chr - .splitCsv(header: ['ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm', - 'WindowMb', 'NbTotVariants', 'NbComVariants'], - sep: "\t", skip: 0) - .map { meta, it -> [meta, it["RegionBuf"], it["RegionCnk"]]} + .splitCsv( + header: [ + 'ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm', + 'WindowMb', 'NbTotVariants', 'NbComVariants' + ], sep: "\t", skip: 0 + ) + .map { meta, it -> [meta, it["RegionBuf"], it["RegionCnk"]]} // Split reference panel in bin files // Segmentation fault occurs in small-sized panels @@ -53,14 +59,14 @@ workflow VCF_CHUNK_GLIMPSE { ch_bins = [[]] if (params.binaryref == true) { - // Create channel to split reference - split_input = ch_reference.combine(ch_chunks_glimpse2, by: 0) + // Create channel to split reference + split_input = ch_reference.combine(ch_chunks_glimpse2, by: 0) - // Create a binary reference panel for quick reading time - GLIMPSE2_SPLITREFERENCE( split_input, ch_map ) - ch_versions = ch_versions.mix( GLIMPSE2_SPLITREFERENCE.out.versions.first() ) + // Create a binary reference panel for quick reading time + GLIMPSE2_SPLITREFERENCE( split_input, ch_map ) + ch_versions = ch_versions.mix( GLIMPSE2_SPLITREFERENCE.out.versions.first() ) - ch_bins = GLIMPSE2_SPLITREFERENCE.out.bin_ref + ch_bins = GLIMPSE2_SPLITREFERENCE.out.bin_ref } emit: diff --git a/subworkflows/local/vcf_normalize_bcftools/main.nf b/subworkflows/local/vcf_normalize_bcftools/main.nf index f9991537..768b58ce 100644 --- a/subworkflows/local/vcf_normalize_bcftools/main.nf +++ b/subworkflows/local/vcf_normalize_bcftools/main.nf @@ -1,11 +1,10 @@ -include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm' -include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view' -include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2} from '../../../modules/nf-core/bcftools/index' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_3} from '../../../modules/nf-core/bcftools/index' -include { BCFTOOLS_CONVERT } from '../../../modules/nf-core/bcftools/convert' -include { BCFTOOLS_VIEW as BCFTOOLS_REMOVE } from '../../../modules/nf-core/bcftools/view' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_4} from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_3 } from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_VIEW as BCFTOOLS_DEL_MLT_ALL } from '../../../modules/nf-core/bcftools/view' +include { BCFTOOLS_VIEW as BCFTOOLS_DEL_SPL } from '../../../modules/nf-core/bcftools/view' +include { BCFTOOLS_CONVERT } from '../../../modules/nf-core/bcftools/convert' workflow VCF_NORMALIZE_BCFTOOLS { @@ -28,19 +27,19 @@ workflow VCF_NORMALIZE_BCFTOOLS { ch_multiallelic_vcf_tbi = BCFTOOLS_NORM.out.vcf.join(BCFTOOLS_INDEX_1.out.tbi) // Remove all multiallelic records: - BCFTOOLS_VIEW(ch_multiallelic_vcf_tbi, [], [], []) + BCFTOOLS_DEL_MLT_ALL(ch_multiallelic_vcf_tbi, [], [], []) // Index biallelic VCF - BCFTOOLS_INDEX_2(BCFTOOLS_VIEW.out.vcf) + BCFTOOLS_INDEX_2(BCFTOOLS_DEL_MLT_ALL.out.vcf) // Join biallelic VCF and TBI - ch_biallelic_vcf_tbi = BCFTOOLS_VIEW.out.vcf.join(BCFTOOLS_INDEX_2.out.tbi) + ch_biallelic_vcf_tbi = BCFTOOLS_DEL_MLT_ALL.out.vcf.join(BCFTOOLS_INDEX_2.out.tbi) // (Optional) Remove benchmarking samples (e.g. NA12878) from the reference panel if (!(params.remove_samples == null)){ BCFTOOLS_REMOVE(ch_biallelic_vcf_tbi, [], [], []) - BCFTOOLS_INDEX_4(BCFTOOLS_REMOVE.out.vcf) - ch_biallelic_vcf_tbi = BCFTOOLS_REMOVE.out.vcf.join(BCFTOOLS_INDEX_4.out.tbi) + BCFTOOLS_INDEX_3(BCFTOOLS_REMOVE.out.vcf) + ch_biallelic_vcf_tbi = BCFTOOLS_REMOVE.out.vcf.join(BCFTOOLS_INDEX_3.out.tbi) } // Convert VCF to Hap and Legend files diff --git a/subworkflows/local/vcf_sites_extract_bcftools/main.nf b/subworkflows/local/vcf_sites_extract_bcftools/main.nf index 191d4ae6..a2a4c473 100644 --- a/subworkflows/local/vcf_sites_extract_bcftools/main.nf +++ b/subworkflows/local/vcf_sites_extract_bcftools/main.nf @@ -1,8 +1,8 @@ -include { BCFTOOLS_VIEW as VIEW_VCF_SITES } from '../../../modules/nf-core/bcftools/view' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' -include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip' -include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix' -include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query' +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' +include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix' +include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query' workflow VCF_SITES_EXTRACT_BCFTOOLS { take: @@ -13,22 +13,22 @@ workflow VCF_SITES_EXTRACT_BCFTOOLS { ch_versions = Channel.empty() // Extract sites positions - VIEW_VCF_SITES( ch_vcf,[], [], []) - ch_versions = ch_versions.mix(VIEW_VCF_SITES.out.versions.first()) + BCFTOOLS_VIEW(ch_vcf, [], [], []) + ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions.first()) // Index extracted sites - BCFTOOLS_INDEX_2(VIEW_VCF_SITES.out.vcf) - ch_versions = ch_versions.mix(BCFTOOLS_INDEX_2.out.versions.first()) + BCFTOOLS_INDEX(BCFTOOLS_VIEW.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions.first()) // Join extracted sites and index - ch_panel_sites = VIEW_VCF_SITES.out.vcf.combine(BCFTOOLS_INDEX_2.out.csi, by:0) + ch_panel_sites = BCFTOOLS_VIEW.out.vcf.combine(BCFTOOLS_INDEX.out.csi, by:0) // Convert to TSV with structure for Glimpse - BCFTOOLS_QUERY_TSV(ch_panel_sites, [], [], []) - ch_versions = ch_versions.mix(BCFTOOLS_QUERY_TSV.out.versions.first()) + BCFTOOLS_QUERY(ch_panel_sites, [], [], []) + ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first()) // Compress TSV - TABIX_BGZIP(BCFTOOLS_QUERY_TSV.out.output) + TABIX_BGZIP(BCFTOOLS_QUERY.out.output) ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) // Index compressed TSV From 71e95b664493c18617b2c28f0dbbcbd9b0b3fb19 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 15 May 2024 11:18:20 +0200 Subject: [PATCH 010/110] Fix config --- conf/steps/imputation_glimpse1.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index 1e0d0731..b79e2f6c 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -72,7 +72,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:INDEX_LIGATE' { publishDir = [ - path: { "${params.outdir}/imputation/glimpse1" } + path: { "${params.outdir}/imputation/glimpse1" }, mode: params.publish_dir_mode, ] } From 3ded0ac4ab1f00cc4c5b3a20edddbdd321f3108a Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 15 May 2024 15:06:58 +0200 Subject: [PATCH 011/110] Add channel description --- modules/nf-core/glimpse2/chunk/main.nf | 3 +- subworkflows/local/bam_impute_quilt/main.nf | 16 ++++---- subworkflows/local/bam_impute_stitch/main.nf | 15 ++++---- subworkflows/local/bam_region/main.nf | 4 +- subworkflows/local/compute_gl/main.nf | 6 +-- subworkflows/local/get_region/main.nf | 4 +- .../local/prepare_input_stitch/main.nf | 12 +++--- .../local/prepare_posfile_tsv/main.nf | 11 +++--- .../utils_nfcore_phaseimpute_pipeline/main.nf | 21 ++++------- subworkflows/local/vcf_chr_check/main.nf | 8 ++-- subworkflows/local/vcf_chr_rename/main.nf | 2 +- subworkflows/local/vcf_chunk_glimpse/main.nf | 37 ++++++++++++------- .../local/vcf_concatenate_bcftools/main.nf | 8 ++-- .../local/vcf_concordance_glimpse2/main.nf | 11 +++--- .../local/vcf_impute_glimpse2/main.nf | 9 +++-- .../local/vcf_normalize_bcftools/main.nf | 17 +++++++-- subworkflows/local/vcf_phase_panel/main.nf | 26 ++++++------- subworkflows/local/vcf_region/main.nf | 14 +++---- .../local/vcf_sites_extract_bcftools/main.nf | 6 +-- workflows/phaseimpute/main.nf | 20 +++++----- 20 files changed, 133 insertions(+), 117 deletions(-) diff --git a/modules/nf-core/glimpse2/chunk/main.nf b/modules/nf-core/glimpse2/chunk/main.nf index 4ff4b2a7..3fcc1757 100644 --- a/modules/nf-core/glimpse2/chunk/main.nf +++ b/modules/nf-core/glimpse2/chunk/main.nf @@ -17,8 +17,7 @@ process GLIMPSE2_CHUNK { 'biocontainers/glimpse-bio:2.0.0--hf340a29_0' }" input: - tuple val(meta) , path(input), path(input_index), val(region) - tuple val(meta2), path(map) + tuple val(meta) , path(input), path(input_index), val(region), path(map) val(model) output: diff --git a/subworkflows/local/bam_impute_quilt/main.nf b/subworkflows/local/bam_impute_quilt/main.nf index c7993088..077df87a 100644 --- a/subworkflows/local/bam_impute_quilt/main.nf +++ b/subworkflows/local/bam_impute_quilt/main.nf @@ -8,7 +8,7 @@ workflow BAM_IMPUTE_QUILT { take: ch_hap_legend // channel: [ [panel, chr], hap, legend ] - ch_input // channel: [ [id, chr], bam, bai ] + ch_input // channel: [ [id, chr, region], bam, bai ] ch_chunks // channel: [ [panel, chr], start_coordinate, end_coordinate, number ] @@ -37,15 +37,15 @@ workflow BAM_IMPUTE_QUILT { } ch_quilt = ch_input - .map{ metaIC, bam, bai -> [metaIC.subMap("chr"), metaIC, bam, bai]} + .map{ metaICR, bam, bai -> [metaICR.subMap("chr"), metaICR, bam, bai]} .combine(ch_hap_chunks - .map{ metaIC, hap, legend, chr, start, end, ngen, buffer, gmap -> - [metaIC.subMap("chr"), metaIC, hap, legend, chr, start, end, ngen, buffer, gmap] + .map{ metaPC, hap, legend, chr, start, end, ngen, buffer, gmap -> + [metaPC.subMap("chr"), metaPC, hap, legend, chr, start, end, ngen, buffer, gmap] }, by:0 ) .map { - metaC, metaIC, bam, bai, metaPC, hap, legend, chr, start, end, ngen, buffer, gmap -> - [metaIC + ["panel": metaPC.id], bam, bai, hap, legend, chr, start, end, ngen, buffer, gmap] + metaC, metaICR, bam, bai, metaPC, hap, legend, chr, start, end, ngen, buffer, gmap -> + [metaICR + ["panel": metaPC.id], bam, bai, hap, legend, chr, start, end, ngen, buffer, gmap] } // Run QUILT @@ -71,6 +71,6 @@ workflow BAM_IMPUTE_QUILT { ch_vcf_tbi = BCFTOOLS_ANNOTATE.out.vcf.join(BCFTOOLS_INDEX_2.out.tbi) emit: - vcf_tbi = ch_vcf_tbi // channel: [ meta, vcf, tbi ] - versions = ch_versions // channel: [ versions.yml ] + vcf_tbi = ch_vcf_tbi // channel: [ [id, panel, chr, region], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/bam_impute_stitch/main.nf b/subworkflows/local/bam_impute_stitch/main.nf index b2f1b659..2bd05078 100644 --- a/subworkflows/local/bam_impute_stitch/main.nf +++ b/subworkflows/local/bam_impute_stitch/main.nf @@ -5,27 +5,28 @@ include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' workflow BAM_IMPUTE_STITCH { take: - ch_parameters - ch_samples - ch_fasta + ch_parameters // channel: [ [chr], posfile, input, rdata, chr, k_val, ngen] + ch_bam // channel: [ [id, chr], bam, bai, bamlist ] + ch_fasta // channel: [ [genome], fa, fai ] main: ch_versions = Channel.empty() - // Run STITCH seed = params.seed - STITCH( ch_samples, ch_parameters, ch_fasta, seed ) + STITCH( ch_bam, ch_parameters, ch_fasta, seed ) + ch_versions = ch_versions.mix(STITCH.out.versions) // Index imputed annotated VCF BCFTOOLS_INDEX(STITCH.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions) // Join VCFs and TBIs ch_vcf_tbi = STITCH.out.vcf.join(BCFTOOLS_INDEX.out.tbi) emit: - vcf_tbi = ch_vcf_tbi // channel: [ meta, vcf, tbi ] - versions = ch_versions // channel: [ versions.yml ] + vcf_tbi = ch_vcf_tbi // channel: [ [id, chr], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/bam_region/main.nf b/subworkflows/local/bam_region/main.nf index 1900ee98..1078bdb1 100644 --- a/subworkflows/local/bam_region/main.nf +++ b/subworkflows/local/bam_region/main.nf @@ -34,6 +34,6 @@ workflow BAM_REGION { .combine(SAMTOOLS_INDEX.out.bai, by: 0) emit: - bam_region = ch_bam_region // channel: [ metaIGCR, bam, index ] - versions = ch_versions // channel: [ versions.yml ] + bam_region = ch_bam_region // channel: [ metaIGCR, bam, index ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/compute_gl/main.nf b/subworkflows/local/compute_gl/main.nf index 5fa462fa..b3f6b8cc 100644 --- a/subworkflows/local/compute_gl/main.nf +++ b/subworkflows/local/compute_gl/main.nf @@ -7,7 +7,7 @@ workflow COMPUTE_GL { take: ch_input // channel: [ [id, chr, region], bam, bai ] ch_target // channel: [ [panel, chr], sites, tsv] - ch_fasta // channel: [ [ref], fasta, fai] + ch_fasta // channel: [ [genome], fasta, fai] main: @@ -46,7 +46,7 @@ workflow COMPUTE_GL { ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_MPILEUP.out.stats.map{ it[1] }) emit: - vcf = ch_output // channel: [ [id, panel], vcf, tbi ] - versions = ch_versions // channel: [ versions.yml ] + vcf = ch_output // channel: [ [id, panel, chr, region], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] multiqc_files = ch_multiqc_files } diff --git a/subworkflows/local/get_region/main.nf b/subworkflows/local/get_region/main.nf index 866ae08f..68b5f548 100644 --- a/subworkflows/local/get_region/main.nf +++ b/subworkflows/local/get_region/main.nf @@ -3,7 +3,7 @@ include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/ workflow GET_REGION { take: input_region // Region string to use ["all", "chr1", "chr1:0-1000"] - ch_fasta // [[meta], fasta, fai] + ch_fasta // [[genome], fasta, fai] main: ch_versions = Channel.empty() @@ -28,6 +28,6 @@ workflow GET_REGION { } emit: - regions = ch_regions // channel: [ meta, region ] + regions = ch_regions // channel: [ [chr, region], region ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/prepare_input_stitch/main.nf b/subworkflows/local/prepare_input_stitch/main.nf index 6b2e7a37..f6d3c8d5 100644 --- a/subworkflows/local/prepare_input_stitch/main.nf +++ b/subworkflows/local/prepare_input_stitch/main.nf @@ -1,9 +1,9 @@ workflow PREPARE_INPUT_STITCH { take: - ch_posfile - ch_fasta - ch_input_impute + ch_posfile // channel: [ [chr], posfile ] + ch_fasta // channel: [ [genome], fa, fai ] + ch_input_impute // channel: [ [id, chr, region], bam, bai ] main: @@ -44,8 +44,8 @@ workflow PREPARE_INPUT_STITCH { .collect() emit: - stitch_parameters - stitch_samples - versions = ch_versions // channel: [ versions.yml ] + stitch_parameters = stitch_parameters // channel: [ [chr], posfile, [], [], chr, k_val, ngen ] + stitch_samples = stitch_samples // channel: [ [id], bam, bai, bamlist ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/prepare_posfile_tsv/main.nf b/subworkflows/local/prepare_posfile_tsv/main.nf index 8390039d..abd00d7e 100644 --- a/subworkflows/local/prepare_posfile_tsv/main.nf +++ b/subworkflows/local/prepare_posfile_tsv/main.nf @@ -5,8 +5,7 @@ include { GAWK } from '../../../modules/nf-core/gawk' workflow PREPARE_POSFILE_TSV { take: - ch_panel_sites - ch_fasta + ch_panel_sites // channel: [ [id, chr], vcf, csi ] main: @@ -14,13 +13,15 @@ workflow PREPARE_POSFILE_TSV { // Convert position file to tab-separated file BCFTOOLS_QUERY(ch_panel_sites, [], [], []) - ch_posfile = BCFTOOLS_QUERY.out.output + ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions) + ch_posfile = BCFTOOLS_QUERY.out.output // Remove multiallelic positions from tsv GAWK(ch_posfile, []) + ch_versions = ch_versions.mix(GAWK.out.versions) emit: - posfile = GAWK.out.output // channel: [ [id, chr], txt ] - versions = ch_versions // channel: [ versions.yml ] + posfile = GAWK.out.output // channel: [ [id, chr], tsv ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 6961c1d6..652d0c0e 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -159,12 +159,9 @@ workflow PIPELINE_INITIALISATION { // if (params.input_region == null){ // #TODO Add support for string input - GET_REGION ( - "all", - ch_ref_gen - ) - ch_versions = ch_versions.mix(GET_REGION.out.versions) - ch_regions = GET_REGION.out.regions + GET_REGION ("all", ch_ref_gen) + ch_versions = ch_versions.mix(GET_REGION.out.versions) + ch_regions = GET_REGION.out.regions } else if (params.input_region.endsWith(".csv")) { println "Region file provided as input is a csv file" ch_regions = Channel.fromSamplesheet("input_region") @@ -212,13 +209,11 @@ workflow PIPELINE_INITIALISATION { // if (params.posfile) { - ch_posfile = Channel - .fromSamplesheet("posfile") - .map { - meta, file -> - [ meta, file ] - }} else { - ch_posfile = [[]] + ch_posfile = Channel + .fromSamplesheet("posfile") + .map {meta, file -> [ meta, file ]} + } else { + ch_posfile = [[]] } emit: diff --git a/subworkflows/local/vcf_chr_check/main.nf b/subworkflows/local/vcf_chr_check/main.nf index c43501a1..b5d6508f 100644 --- a/subworkflows/local/vcf_chr_check/main.nf +++ b/subworkflows/local/vcf_chr_check/main.nf @@ -5,14 +5,14 @@ include { VCF_CHR_RENAME } from '../vcf_chr_rename' workflow VCF_CHR_CHECK { take: ch_vcf // channel: [ [id], vcf, index ] - ch_fasta // channel: [ [id], fasta, fai ] + ch_fasta // channel: [ [genome], fasta, fai ] main: ch_versions = Channel.empty() // Get contig names from the VCF - VCFCHRBFR(ch_vcf.map{ metaV, vcf, csi -> [metaV, vcf] }) + VCFCHRBFR(ch_vcf.map{ meta, vcf, csi -> [meta, vcf] }) ch_versions = ch_versions.mix(VCFCHRBFR.out.versions) // Check if the contig names are the same as the reference @@ -27,7 +27,7 @@ workflow VCF_CHR_CHECK { ch_versions = ch_versions.mix(VCF_CHR_RENAME.out.versions) // Check if modification has solved the problem - VCFCHRAFT(VCF_CHR_RENAME.out.vcf_renamed.map{ metaV, vcf, csi -> [metaV, vcf] }) + VCFCHRAFT(VCF_CHR_RENAME.out.vcf_renamed.map{ meta, vcf, csi -> [meta, vcf] }) ch_versions = ch_versions.mix(VCFCHRAFT.out.versions) chr_disjoint_after = check_chr(VCFCHRAFT.out.chr, VCF_CHR_RENAME.out.vcf_renamed, ch_fasta) @@ -49,7 +49,7 @@ workflow VCF_CHR_CHECK { .mix(ch_vcf_renamed) emit: - vcf = ch_vcf_out // [ meta, vcf, csi ] + vcf = ch_vcf_out // [ [id], vcf, csi ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_chr_rename/main.nf b/subworkflows/local/vcf_chr_rename/main.nf index 8b47c8c1..5a17f3ff 100644 --- a/subworkflows/local/vcf_chr_rename/main.nf +++ b/subworkflows/local/vcf_chr_rename/main.nf @@ -30,6 +30,6 @@ workflow VCF_CHR_RENAME { .combine(BCFTOOLS_INDEX.out.csi, by:0) emit: - vcf_renamed = ch_vcf_renamed // [ meta, vcf, csi ] + vcf_renamed = ch_vcf_renamed // [ [id], vcf, csi ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_chunk_glimpse/main.nf b/subworkflows/local/vcf_chunk_glimpse/main.nf index 7172b1f2..0a1aa439 100644 --- a/subworkflows/local/vcf_chunk_glimpse/main.nf +++ b/subworkflows/local/vcf_chunk_glimpse/main.nf @@ -5,15 +5,16 @@ include { GLIMPSE2_SPLITREFERENCE } from '../../../modules/nf-core/glimpse2/spli workflow VCF_CHUNK_GLIMPSE { take: - ch_reference // channel: [ val(meta),vcf ] - ch_map // channel (optional): [ meta, map ] + ch_reference // channel: [ [panel, chr], vcf, csi ] + ch_map // channel (optional): [ [chr], map ] main: ch_versions = Channel.empty() - + ch_reference.view() + ch_map.view() // Add chromosome to channel - ch_vcf_csi_chr = ch_reference.map{meta, vcf, csi -> [meta, vcf, csi, meta.chr]} + ch_vcf_csi_chr = ch_reference.map{metaPC, vcf, csi -> [metaPC, vcf, csi, metaPC.chr]} // Make chunks with Glimpse1 GLIMPSE_CHUNK(ch_vcf_csi_chr) @@ -22,10 +23,10 @@ workflow VCF_CHUNK_GLIMPSE { // Rearrange chunks into channel for QUILT ch_chunks_quilt = GLIMPSE_CHUNK.out.chunk_chr .splitText() - .map { metamap, line -> + .map { metaPC, line -> def fields = line.split("\t") def startEnd = fields[2].split(':')[1].split('-') - [metamap, metamap.chr, startEnd[0], startEnd[1]] + [metaPC, metaPC.chr, startEnd[0], startEnd[1]] } // Rearrange chunks into channel for GLIMPSE1 @@ -34,12 +35,20 @@ workflow VCF_CHUNK_GLIMPSE { header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0 ) - .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} + .map { metaPC, it -> [metaPC, it["RegionIn"], it["RegionOut"]]} // Make chunks with Glimpse2 (does not work with "sequential" mode) chunk_model = "recursive" - GLIMPSE2_CHUNK ( ch_vcf_csi_chr, ch_map, chunk_model ) + ch_input_glimpse2 = ch_vcf_csi_chr + .map{ + metaPC, vcf, csi, chr -> [metaPC.subMap("chr"), metaPC, vcf, csi, chr] + } + .join(ch_map) + .map{ + metaC, metaPC, vcf, csi, chr, gmap -> [metaPC, vcf, csi, chr, gmap] + } + GLIMPSE2_CHUNK ( ch_input_glimpse2, chunk_model ) ch_versions = ch_versions.mix( GLIMPSE2_CHUNK.out.versions.first() ) // Rearrange channels @@ -50,7 +59,7 @@ workflow VCF_CHUNK_GLIMPSE { 'WindowMb', 'NbTotVariants', 'NbComVariants' ], sep: "\t", skip: 0 ) - .map { meta, it -> [meta, it["RegionBuf"], it["RegionCnk"]]} + .map { metaPC, it -> [metaPC, it["RegionBuf"], it["RegionCnk"]]} // Split reference panel in bin files // Segmentation fault occurs in small-sized panels @@ -60,7 +69,7 @@ workflow VCF_CHUNK_GLIMPSE { if (params.binaryref == true) { // Create channel to split reference - split_input = ch_reference.combine(ch_chunks_glimpse2, by: 0) + split_input = ch_reference.join(ch_chunks_glimpse2) // Create a binary reference panel for quick reading time GLIMPSE2_SPLITREFERENCE( split_input, ch_map ) @@ -70,9 +79,9 @@ workflow VCF_CHUNK_GLIMPSE { } emit: - chunks_quilt = ch_chunks_quilt // channel: [ val(meta), chr, start, end ] - chunks_glimpse1 = ch_chunks_glimpse1 // channel: [ val(meta), chr, region1, region2 ] - chunks_glimpse2 = ch_chunks_glimpse2 // channel: [ val(meta), chr, region1, region2 ] - binary = ch_bins // channel: [ [meta], bin] + chunks_quilt = ch_chunks_quilt // channel: [ [panel, chr], chr, start, end ] + chunks_glimpse1 = ch_chunks_glimpse1 // channel: [ [panel, chr], chr, region1, region2 ] + chunks_glimpse2 = ch_chunks_glimpse2 // channel: [ [panel, chr], chr, region1, region2 ] + binary = ch_bins // channel: [ [panel, chr], bin] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_concatenate_bcftools/main.nf b/subworkflows/local/vcf_concatenate_bcftools/main.nf index 583b6070..627a2e52 100644 --- a/subworkflows/local/vcf_concatenate_bcftools/main.nf +++ b/subworkflows/local/vcf_concatenate_bcftools/main.nf @@ -4,14 +4,14 @@ include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' workflow VCF_CONCATENATE_BCFTOOLS { take: - ch_vcf_tbi // channel: [ val(meta), vcf, tbi ] + ch_vcf_tbi // channel: [ [id, chr], vcf, tbi ] main: ch_versions = Channel.empty() - // Remove chromosome from meta - ch_vcf_tbi_grouped = ch_vcf_tbi.map{ meta, vcf, tbi -> [['id' : meta.id], vcf, tbi] } + // Keep only id from meta + ch_vcf_tbi_grouped = ch_vcf_tbi.map{ metaI, vcf, tbi -> [metaI.subMap("id"), vcf, tbi] } // Group by ID ch_vcf_tbi_grouped = ch_vcf_tbi_grouped.groupTuple( by:0 ) @@ -28,6 +28,6 @@ workflow VCF_CONCATENATE_BCFTOOLS { ch_vcf_tbi_join = BCFTOOLS_CONCAT.out.vcf.join(BCFTOOLS_INDEX.out.tbi) emit: - vcf_tbi_join = ch_vcf_tbi_join // channel: [ meta, vcf, tbi ] + vcf_tbi_join = ch_vcf_tbi_join // channel: [ [id], vcf, tbi ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_concordance_glimpse2/main.nf b/subworkflows/local/vcf_concordance_glimpse2/main.nf index 5bc0d74e..3f07ac73 100644 --- a/subworkflows/local/vcf_concordance_glimpse2/main.nf +++ b/subworkflows/local/vcf_concordance_glimpse2/main.nf @@ -6,10 +6,10 @@ include { GUNZIP } from '../../../modules/nf-core/gunzip' workflow VCF_CONCORDANCE_GLIMPSE2 { take: - ch_vcf_emul // VCF file with imputed genotypes [[id, chr, region, panel, simulate, tools], vcf, csi] - ch_vcf_truth // VCF file with truth genotypes [[id, chr, region], vcf, csi] - ch_vcf_freq // VCF file with panel frequencies [[panel, chr], vcf, csi] - ch_region // Regions to process [[chr, region], region] + ch_vcf_emul // VCF file with imputed genotypes [ [id], vcf, csi] + ch_vcf_truth // VCF file with truth genotypes [ [id], vcf, csi] + ch_vcf_freq // VCF file with panel frequencies [ [panel], vcf, csi] + ch_region // Regions to process [ [chr, region], region] main: @@ -40,6 +40,7 @@ workflow VCF_CONCORDANCE_GLIMPSE2 { GUNZIP(GLIMPSE2_CONCORDANCE.out.errors_grp) ch_versions = ch_versions.mix(GUNZIP.out.versions.first()) + ADD_COLUMNS(GUNZIP.out.gunzip) ch_versions = ch_versions.mix(ADD_COLUMNS.out.versions.first()) @@ -52,7 +53,7 @@ workflow VCF_CONCORDANCE_GLIMPSE2 { ch_versions = ch_versions.mix(GAWK.out.versions.first()) emit: - stats = GAWK.out.output // [ meta, txt ] + stats = GAWK.out.output // [ [all], txt ] versions = ch_versions // channel: [ versions.yml ] multiqc_files = ch_multiqc_files } diff --git a/subworkflows/local/vcf_impute_glimpse2/main.nf b/subworkflows/local/vcf_impute_glimpse2/main.nf index 3cef9206..f5e9275e 100644 --- a/subworkflows/local/vcf_impute_glimpse2/main.nf +++ b/subworkflows/local/vcf_impute_glimpse2/main.nf @@ -6,10 +6,10 @@ include { BCFTOOLS_INDEX as INDEX_LIGATE } from '../../../modules/nf-core/bcftoo workflow VCF_IMPUTE_GLIMPSE2 { take: - ch_input // channel (mandatory): [ meta, vcf, csi, infos ] - ch_panel // channel (mandatory): [ meta, vcf, csi, region ] - ch_chunks // channel (optional): [ meta, region1, region2 ] - ch_fasta + ch_input // channel (mandatory): [ [id], vcf, csi, infos ] + ch_panel // channel (mandatory): [ [panel, chr, region], vcf, csi, region ] + ch_chunks // channel (optional): [ [chr], region1, region2 ] + ch_fasta // channel (mandatory): [ [genome], fa, fai ] main: @@ -40,6 +40,7 @@ workflow VCF_IMPUTE_GLIMPSE2 { //Impute with Glimpse2 GLIMPSE2_PHASE(ch_input_glimpse2, ch_fasta) // Error: AC/AN INFO fields in VCF are inconsistent with GT field, update the values in the VCF + ch_versions = ch_versions.mix(GLIMPSE2_PHASE.out.versions) emit: versions = ch_versions // channel: [ versions.yml ] diff --git a/subworkflows/local/vcf_normalize_bcftools/main.nf b/subworkflows/local/vcf_normalize_bcftools/main.nf index 768b58ce..093d7ce2 100644 --- a/subworkflows/local/vcf_normalize_bcftools/main.nf +++ b/subworkflows/local/vcf_normalize_bcftools/main.nf @@ -19,37 +19,46 @@ workflow VCF_NORMALIZE_BCFTOOLS { // Join duplicated biallelic sites into multiallelic records BCFTOOLS_NORM(ch_vcf, ch_fasta) + ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions) // Index multiallelic VCF BCFTOOLS_INDEX_1(BCFTOOLS_NORM.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_1.out.versions) // Join multiallelic VCF and TBI ch_multiallelic_vcf_tbi = BCFTOOLS_NORM.out.vcf.join(BCFTOOLS_INDEX_1.out.tbi) // Remove all multiallelic records: BCFTOOLS_DEL_MLT_ALL(ch_multiallelic_vcf_tbi, [], [], []) + ch_versions = ch_versions.mix(BCFTOOLS_DEL_MLT_ALL.out.versions) // Index biallelic VCF BCFTOOLS_INDEX_2(BCFTOOLS_DEL_MLT_ALL.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_2.out.versions) // Join biallelic VCF and TBI ch_biallelic_vcf_tbi = BCFTOOLS_DEL_MLT_ALL.out.vcf.join(BCFTOOLS_INDEX_2.out.tbi) // (Optional) Remove benchmarking samples (e.g. NA12878) from the reference panel if (!(params.remove_samples == null)){ - BCFTOOLS_REMOVE(ch_biallelic_vcf_tbi, [], [], []) - BCFTOOLS_INDEX_3(BCFTOOLS_REMOVE.out.vcf) - ch_biallelic_vcf_tbi = BCFTOOLS_REMOVE.out.vcf.join(BCFTOOLS_INDEX_3.out.tbi) + BCFTOOLS_DEL_SPL(ch_biallelic_vcf_tbi, [], [], []) + ch_versions = ch_versions.mix(BCFTOOLS_DEL_SPL.out.versions) + + BCFTOOLS_INDEX_3(BCFTOOLS_DEL_SPL.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_3.out.versions) + + ch_biallelic_vcf_tbi = BCFTOOLS_DEL_SPL.out.vcf.join(BCFTOOLS_INDEX_3.out.tbi) } // Convert VCF to Hap and Legend files BCFTOOLS_CONVERT(ch_biallelic_vcf_tbi, ch_fasta, []) + ch_versions = ch_versions.mix(BCFTOOLS_CONVERT.out.versions) // Output hap and legend files ch_hap_legend = BCFTOOLS_CONVERT.out.hap.join(BCFTOOLS_CONVERT.out.legend) emit: vcf_tbi = ch_biallelic_vcf_tbi // channel: [ [id, chr], vcf, tbi ] - hap_legend = ch_hap_legend // channel: [ [id, chr] '.hap', '.legend' ] + hap_legend = ch_hap_legend // channel: [ [id, chr], '.hap', '.legend' ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_phase_panel/main.nf b/subworkflows/local/vcf_phase_panel/main.nf index d100c189..fd006506 100644 --- a/subworkflows/local/vcf_phase_panel/main.nf +++ b/subworkflows/local/vcf_phase_panel/main.nf @@ -2,10 +2,10 @@ include { VCF_PHASE_SHAPEIT5 } from '../../../subworkflows/n workflow VCF_PHASE_PANEL { take: - ch_vcf // channel: [ [id, chr], vcf, index ] - ch_panel_norm - ch_panel_sites - ch_panel_tsv + ch_vcf // channel: [ [id, chr, region], vcf, index ] + ch_panel_norm // channel: [ [panel, chr], norm, index ] + ch_panel_sites // channel: [ [panel, chr], sites, index ] + ch_panel_tsv // channel: [ [panel, chr], tsv, index ] main: @@ -14,7 +14,7 @@ workflow VCF_PHASE_PANEL { // Phase panel if (params.phased == false) { VCF_PHASE_SHAPEIT5(ch_vcf - .map { meta, vcf, csi -> [meta, vcf, csi, [], meta.region] }, + .map { metaICR, vcf, csi -> [metaICR, vcf, csi, [], metaICR.region] }, Channel.of([[],[],[]]).collect(), Channel.of([[],[],[]]).collect(), Channel.of([[],[]]).collect()) @@ -26,15 +26,15 @@ workflow VCF_PHASE_PANEL { } ch_panel = ch_panel_norm - .combine(ch_panel_sites, by: 0) - .combine(ch_panel_tsv, by: 0) - .combine(ch_panel_phased, by: 0) - .map{ metaIC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - -> [[panel:metaIC.id, chr:metaIC.chr ], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] - } + .combine(ch_panel_sites, by: 0) + .combine(ch_panel_tsv, by: 0) + .combine(ch_panel_phased, by: 0) + .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index + -> [[panel:metaPC.id, chr:metaPC.chr ], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] + } emit: - vcf_tbi = ch_panel_phased - panel = ch_panel + vcf_tbi = ch_panel_phased // channel: [ [id, chr], vcf, index ] + panel = ch_panel // channel: [ [panel, chr], norm, n_index, sites, s_index, tsv, t_index, phased, p_index ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_region/main.nf b/subworkflows/local/vcf_region/main.nf index e7aae8d4..04d568a0 100644 --- a/subworkflows/local/vcf_region/main.nf +++ b/subworkflows/local/vcf_region/main.nf @@ -5,19 +5,19 @@ include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcft workflow VCF_REGION { take: ch_vcf // channel: [ [id], vcf ] - ch_region // channel: [ [region], val(region) ] - ch_fasta // channel: [ fasta ] + ch_region // channel: [ [chr, region], region ] main: ch_versions = Channel.empty() - // Filter the region of interest of the panel file + // Filter the region of interest of the vcf file ch_input_region = ch_vcf - .combine(ch_fasta) .combine(ch_region) - .map{ metaI, vcf, index, fasta, metaR, region -> - [metaI + metaR, vcf, index, region+",chr"+region]} + .map{ + metaI, vcf, index, metaCR, region -> + [metaI + metaCR, vcf, index, region+",chr"+region] + } VIEW_VCF_REGION(ch_input_region, [], [], []) ch_versions = ch_versions.mix(VIEW_VCF_REGION.out.versions.first()) @@ -29,7 +29,7 @@ workflow VCF_REGION { .combine(BCFTOOLS_INDEX.out.csi) emit: - vcf_region = ch_vcf_region // channel: [ metaIR, vcf, index ] + vcf_region = ch_vcf_region // channel: [ [id, chr, region], vcf, index ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_sites_extract_bcftools/main.nf b/subworkflows/local/vcf_sites_extract_bcftools/main.nf index a2a4c473..68db5f6f 100644 --- a/subworkflows/local/vcf_sites_extract_bcftools/main.nf +++ b/subworkflows/local/vcf_sites_extract_bcftools/main.nf @@ -39,8 +39,8 @@ workflow VCF_SITES_EXTRACT_BCFTOOLS { ch_panel_tsv = TABIX_BGZIP.out.output.combine(TABIX_TABIX.out.tbi, by: 0) emit: - panel_tsv = ch_panel_tsv - vcf_tbi = ch_vcf - panel_sites = ch_panel_sites + panel_tsv = ch_panel_tsv // channel: [ [id, chr], tsv, tbi ] + vcf_tbi = ch_vcf // channel: [ [id, chr], vcf, tbi ] + panel_sites = ch_panel_sites // channel: [ [id, chr], vcf, csi ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 2f60b2a0..2b757cde 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -136,8 +136,8 @@ workflow PHASEIMPUTE { ch_versions = ch_versions.mix(VCF_SITES_EXTRACT_BCFTOOLS.out.versions) // Prepare posfile stitch - PREPARE_POSFILE_TSV(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites, ch_fasta) - ch_versions = ch_versions.mix(PREPARE_POSFILE_TSV.out.versions) + PREPARE_POSFILE_TSV(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites) + ch_versions = ch_versions.mix(PREPARE_POSFILE_TSV.out.versions) // If required, phase panel (currently not working, a test should be added) // Phase panel with tool of choice (e.g. SHAPEIT5) @@ -167,7 +167,7 @@ workflow PHASEIMPUTE { // Create chunks from reference VCF VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map) - ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) + ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) } if (params.step.split(',').contains("impute") || params.step.split(',').contains("all")) { @@ -211,7 +211,7 @@ workflow PHASEIMPUTE { // Concatenate by chromosomes CONCAT_GLIMPSE1(output_glimpse1) - ch_versions = ch_versions.mix(CONCAT_GLIMPSE1.out.versions) + ch_versions = ch_versions.mix(CONCAT_GLIMPSE1.out.versions) // Add results to input validate ch_input_validate = ch_input_validate.mix(CONCAT_GLIMPSE1.out.vcf_tbi_join) @@ -242,7 +242,7 @@ workflow PHASEIMPUTE { // Obtain the user's posfile if provided or calculate it from ref panel file if (params.posfile ) { // User supplied posfile - ch_posfile = ch_posfile + ch_posfile = ch_posfile } else if (params.panel && params.step.split(',').contains("panelprep")) { // Panelprep posfile ch_posfile = PREPARE_POSFILE_TSV.out.posfile } else { @@ -250,20 +250,20 @@ workflow PHASEIMPUTE { } // Prepare inputs PREPARE_INPUT_STITCH(ch_posfile, ch_fasta, ch_input_impute) - ch_versions = ch_versions.mix(PREPARE_INPUT_STITCH.out.versions) + ch_versions = ch_versions.mix(PREPARE_INPUT_STITCH.out.versions) // Impute with STITCH BAM_IMPUTE_STITCH ( PREPARE_INPUT_STITCH.out.stitch_parameters, PREPARE_INPUT_STITCH.out.stitch_samples, ch_fasta ) - ch_versions = ch_versions.mix(BAM_IMPUTE_STITCH.out.versions) + ch_versions = ch_versions.mix(BAM_IMPUTE_STITCH.out.versions) // Output channel to concat ch_impute_output = ch_impute_output.mix(BAM_IMPUTE_STITCH.out.vcf_tbi) // Concatenate by chromosomes CONCAT_STITCH(BAM_IMPUTE_STITCH.out.vcf_tbi) - ch_versions = ch_versions.mix(CONCAT_STITCH.out.versions) + ch_versions = ch_versions.mix(CONCAT_STITCH.out.versions) // Add results to input validate ch_input_validate = ch_input_validate.mix(CONCAT_STITCH.out.vcf_tbi_join) @@ -282,7 +282,7 @@ workflow PHASEIMPUTE { // Concatenate by chromosomes CONCAT_QUILT(BAM_IMPUTE_QUILT.out.vcf_tbi) - ch_versions = ch_versions.mix(CONCAT_QUILT.out.versions) + ch_versions = ch_versions.mix(CONCAT_QUILT.out.versions) // Add results to input validate ch_input_validate = ch_input_validate.mix(CONCAT_QUILT.out.vcf_tbi_join) @@ -327,7 +327,7 @@ workflow PHASEIMPUTE { ch_region ) ch_multiqc_files = ch_multiqc_files.mix(VCF_CONCORDANCE_GLIMPSE2.out.multiqc_files) - ch_versions = ch_versions.mix(VCF_CONCORDANCE_GLIMPSE2.out.versions) + ch_versions = ch_versions.mix(VCF_CONCORDANCE_GLIMPSE2.out.versions) } if (params.step.split(',').contains("refine")) { From c74df72669cced9ca2ef13b945944106c67d2afe Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 15 May 2024 15:12:26 +0200 Subject: [PATCH 012/110] Delete white space --- subworkflows/local/vcf_concordance_glimpse2/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/vcf_concordance_glimpse2/main.nf b/subworkflows/local/vcf_concordance_glimpse2/main.nf index 3f07ac73..e7d696c4 100644 --- a/subworkflows/local/vcf_concordance_glimpse2/main.nf +++ b/subworkflows/local/vcf_concordance_glimpse2/main.nf @@ -40,7 +40,7 @@ workflow VCF_CONCORDANCE_GLIMPSE2 { GUNZIP(GLIMPSE2_CONCORDANCE.out.errors_grp) ch_versions = ch_versions.mix(GUNZIP.out.versions.first()) - + ADD_COLUMNS(GUNZIP.out.gunzip) ch_versions = ch_versions.mix(ADD_COLUMNS.out.versions.first()) From 0ee1847a7a17fdc0b7adb7fe17043baf665216d8 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 15 May 2024 15:13:32 +0200 Subject: [PATCH 013/110] Patch glimpse2 chunk for map --- modules.json | 153 +++++++++++++----- .../glimpse2/chunk/glimpse2-chunk.diff | 15 ++ 2 files changed, 130 insertions(+), 38 deletions(-) create mode 100644 modules/nf-core/glimpse2/chunk/glimpse2-chunk.diff diff --git a/modules.json b/modules.json index 101f6723..57ed12bf 100644 --- a/modules.json +++ b/modules.json @@ -8,164 +8,229 @@ "bcftools/annotate": { "branch": "master", "git_sha": "2ad29c2aed06d815d9f68ad7ba20b3b1c574ce9c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/annotate/bcftools-annotate.diff" }, "bcftools/concat": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/concat/bcftools-concat.diff" }, "bcftools/convert": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/index": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["multiple_impute_glimpse2", "vcf_impute_glimpse", "vcf_phase_shapeit5"] + "installed_by": [ + "multiple_impute_glimpse2", + "vcf_impute_glimpse", + "vcf_phase_shapeit5" + ] }, "bcftools/mpileup": { "branch": "master", "git_sha": "e7df38a545d7d72083eededabd8849f731a01502", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/mpileup/bcftools-mpileup.diff" }, "bcftools/norm": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/query": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/view": { "branch": "master", "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/view/bcftools-view.diff" }, "bedtools/makewindows": { "branch": "master", "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", - "installed_by": ["vcf_phase_shapeit5"] + "installed_by": [ + "vcf_phase_shapeit5" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gawk": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "glimpse/chunk": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": ["vcf_impute_glimpse"] + "installed_by": [ + "vcf_impute_glimpse" + ] }, "glimpse/ligate": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": ["vcf_impute_glimpse"] + "installed_by": [ + "vcf_impute_glimpse" + ] }, "glimpse/phase": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": ["vcf_impute_glimpse"] + "installed_by": [ + "vcf_impute_glimpse" + ] }, "glimpse2/chunk": { "branch": "master", "git_sha": "14ba46490cae3c78ed8e8f48d2c0f8f3be1e7c03", - "installed_by": ["multiple_impute_glimpse2"] + "installed_by": [ + "multiple_impute_glimpse2" + ], + "patch": "modules/nf-core/glimpse2/chunk/glimpse2-chunk.diff" }, "glimpse2/concordance": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "glimpse2/ligate": { "branch": "master", "git_sha": "09d793219114004f268b98663b12f8062097a8c5", - "installed_by": ["multiple_impute_glimpse2"] + "installed_by": [ + "multiple_impute_glimpse2" + ] }, "glimpse2/phase": { "branch": "master", "git_sha": "9c71d32e372650e8bb3e1fb15339017aad5e3f7f", - "installed_by": ["multiple_impute_glimpse2"] + "installed_by": [ + "multiple_impute_glimpse2" + ] }, "glimpse2/splitreference": { "branch": "master", "git_sha": "fa12139827a18b324bd63fce654818586a8e9cc7", - "installed_by": ["multiple_impute_glimpse2"] + "installed_by": [ + "multiple_impute_glimpse2" + ] }, "gunzip": { "branch": "master", "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "quilt/quilt": { "branch": "master", "git_sha": "46265545d61e7f482adf40de941cc9a94e479bbe", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/coverage": { "branch": "master", "git_sha": "38afbe42f7db7f19c7a89607c0a71c68f3be3131", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/samtools/coverage/samtools-coverage.diff" }, "samtools/faidx": { "branch": "master", "git_sha": "f153f1f10e1083c49935565844cccb7453021682", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/samtools/view/samtools-view.diff" }, "shapeit5/ligate": { "branch": "master", "git_sha": "dcf17cc0ed8fd5ea57e61a13e0147cddb5c1ee30", - "installed_by": ["vcf_phase_shapeit5"] + "installed_by": [ + "vcf_phase_shapeit5" + ] }, "shapeit5/phasecommon": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["vcf_phase_shapeit5"] + "installed_by": [ + "vcf_phase_shapeit5" + ] }, "stitch": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/stitch/stitch.diff" }, "tabix/bgzip": { "branch": "master", "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/tabix": { "branch": "master", "git_sha": "9502adb23c0b97ed8e616bbbdfa73b4585aec9a1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -174,35 +239,47 @@ "multiple_impute_glimpse2": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "vcf_impute_glimpse": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "vcf_phase_shapeit5": { "branch": "master", "git_sha": "dcf17cc0ed8fd5ea57e61a13e0147cddb5c1ee30", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/glimpse2/chunk/glimpse2-chunk.diff b/modules/nf-core/glimpse2/chunk/glimpse2-chunk.diff new file mode 100644 index 00000000..224fae67 --- /dev/null +++ b/modules/nf-core/glimpse2/chunk/glimpse2-chunk.diff @@ -0,0 +1,15 @@ +Changes in module 'nf-core/glimpse2/chunk' +--- modules/nf-core/glimpse2/chunk/main.nf ++++ modules/nf-core/glimpse2/chunk/main.nf +@@ -17,8 +17,7 @@ + 'biocontainers/glimpse-bio:2.0.0--hf340a29_0' }" + + input: +- tuple val(meta) , path(input), path(input_index), val(region) +- tuple val(meta2), path(map) ++ tuple val(meta) , path(input), path(input_index), val(region), path(map) + val(model) + + output: + +************************************************************ From 7f9a558ff4cf5eb2ee2483817c528b5c3b5d954a Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 15 May 2024 15:19:42 +0200 Subject: [PATCH 014/110] Fix prettier --- modules.json | 152 +++++++++++++-------------------------------------- 1 file changed, 38 insertions(+), 114 deletions(-) diff --git a/modules.json b/modules.json index 57ed12bf..0d14775b 100644 --- a/modules.json +++ b/modules.json @@ -8,229 +8,165 @@ "bcftools/annotate": { "branch": "master", "git_sha": "2ad29c2aed06d815d9f68ad7ba20b3b1c574ce9c", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/annotate/bcftools-annotate.diff" }, "bcftools/concat": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/concat/bcftools-concat.diff" }, "bcftools/convert": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/index": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "multiple_impute_glimpse2", - "vcf_impute_glimpse", - "vcf_phase_shapeit5" - ] + "installed_by": ["multiple_impute_glimpse2", "vcf_impute_glimpse", "vcf_phase_shapeit5"] }, "bcftools/mpileup": { "branch": "master", "git_sha": "e7df38a545d7d72083eededabd8849f731a01502", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/mpileup/bcftools-mpileup.diff" }, "bcftools/norm": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/query": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/view": { "branch": "master", "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/view/bcftools-view.diff" }, "bedtools/makewindows": { "branch": "master", "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", - "installed_by": [ - "vcf_phase_shapeit5" - ] + "installed_by": ["vcf_phase_shapeit5"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gawk": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "glimpse/chunk": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": [ - "vcf_impute_glimpse" - ] + "installed_by": ["vcf_impute_glimpse"] }, "glimpse/ligate": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": [ - "vcf_impute_glimpse" - ] + "installed_by": ["vcf_impute_glimpse"] }, "glimpse/phase": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": [ - "vcf_impute_glimpse" - ] + "installed_by": ["vcf_impute_glimpse"] }, "glimpse2/chunk": { "branch": "master", "git_sha": "14ba46490cae3c78ed8e8f48d2c0f8f3be1e7c03", - "installed_by": [ - "multiple_impute_glimpse2" - ], + "installed_by": ["multiple_impute_glimpse2"], "patch": "modules/nf-core/glimpse2/chunk/glimpse2-chunk.diff" }, "glimpse2/concordance": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "glimpse2/ligate": { "branch": "master", "git_sha": "09d793219114004f268b98663b12f8062097a8c5", - "installed_by": [ - "multiple_impute_glimpse2" - ] + "installed_by": ["multiple_impute_glimpse2"] }, "glimpse2/phase": { "branch": "master", "git_sha": "9c71d32e372650e8bb3e1fb15339017aad5e3f7f", - "installed_by": [ - "multiple_impute_glimpse2" - ] + "installed_by": ["multiple_impute_glimpse2"] }, "glimpse2/splitreference": { "branch": "master", "git_sha": "fa12139827a18b324bd63fce654818586a8e9cc7", - "installed_by": [ - "multiple_impute_glimpse2" - ] + "installed_by": ["multiple_impute_glimpse2"] }, "gunzip": { "branch": "master", "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "quilt/quilt": { "branch": "master", "git_sha": "46265545d61e7f482adf40de941cc9a94e479bbe", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/coverage": { "branch": "master", "git_sha": "38afbe42f7db7f19c7a89607c0a71c68f3be3131", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/samtools/coverage/samtools-coverage.diff" }, "samtools/faidx": { "branch": "master", "git_sha": "f153f1f10e1083c49935565844cccb7453021682", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/view": { "branch": "master", "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/samtools/view/samtools-view.diff" }, "shapeit5/ligate": { "branch": "master", "git_sha": "dcf17cc0ed8fd5ea57e61a13e0147cddb5c1ee30", - "installed_by": [ - "vcf_phase_shapeit5" - ] + "installed_by": ["vcf_phase_shapeit5"] }, "shapeit5/phasecommon": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "vcf_phase_shapeit5" - ] + "installed_by": ["vcf_phase_shapeit5"] }, "stitch": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/stitch/stitch.diff" }, "tabix/bgzip": { "branch": "master", "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tabix/tabix": { "branch": "master", "git_sha": "9502adb23c0b97ed8e616bbbdfa73b4585aec9a1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -239,47 +175,35 @@ "multiple_impute_glimpse2": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "vcf_impute_glimpse": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "vcf_phase_shapeit5": { "branch": "master", "git_sha": "dcf17cc0ed8fd5ea57e61a13e0147cddb5c1ee30", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From d99c6feb4d935798e1f76a17a5e781e0ee144aee Mon Sep 17 00:00:00 2001 From: Louis LE NEZET <58640615+LouisLeNezet@users.noreply.github.com> Date: Wed, 15 May 2024 22:04:02 +0200 Subject: [PATCH 015/110] Update subworkflows/local/vcf_chunk_glimpse/main.nf Co-authored-by: Anabella Trigila <18577080+atrigila@users.noreply.github.com> --- subworkflows/local/vcf_chunk_glimpse/main.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/subworkflows/local/vcf_chunk_glimpse/main.nf b/subworkflows/local/vcf_chunk_glimpse/main.nf index 0a1aa439..f01c70ca 100644 --- a/subworkflows/local/vcf_chunk_glimpse/main.nf +++ b/subworkflows/local/vcf_chunk_glimpse/main.nf @@ -11,8 +11,6 @@ workflow VCF_CHUNK_GLIMPSE { main: ch_versions = Channel.empty() - ch_reference.view() - ch_map.view() // Add chromosome to channel ch_vcf_csi_chr = ch_reference.map{metaPC, vcf, csi -> [metaPC, vcf, csi, metaPC.chr]} From 5c5e8b3abc11500bcfe3753f82083b4026b3f4bd Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 15 May 2024 22:08:18 +0200 Subject: [PATCH 016/110] Move to BCFTOOLS_INDEX for glimpse sbwf --- conf/steps/imputation_glimpse1.config | 4 ++-- .../nf-core/vcf_impute_glimpse/main.nf | 22 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index b79e2f6c..868c4143 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -62,7 +62,7 @@ process { publishDir = [ enabled: false ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:INDEX_PHASE' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BCFTOOLS_INDEX_2' { publishDir = [ enabled: false ] } @@ -70,7 +70,7 @@ process { ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.ligate" } } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:INDEX_LIGATE' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BCFTOOLS_INDEX_2' { publishDir = [ path: { "${params.outdir}/imputation/glimpse1" }, mode: params.publish_dir_mode, diff --git a/subworkflows/nf-core/vcf_impute_glimpse/main.nf b/subworkflows/nf-core/vcf_impute_glimpse/main.nf index 94262e34..e2ff6b1b 100644 --- a/subworkflows/nf-core/vcf_impute_glimpse/main.nf +++ b/subworkflows/nf-core/vcf_impute_glimpse/main.nf @@ -1,8 +1,8 @@ -include { GLIMPSE_CHUNK } from '../../../modules/nf-core/glimpse/chunk/main' -include { GLIMPSE_PHASE } from '../../../modules/nf-core/glimpse/phase/main' -include { GLIMPSE_LIGATE } from '../../../modules/nf-core/glimpse/ligate/main' -include { BCFTOOLS_INDEX as INDEX_PHASE } from '../../../modules/nf-core/bcftools/index/main.nf' -include { BCFTOOLS_INDEX as INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index/main.nf' +include { GLIMPSE_CHUNK } from '../../../modules/nf-core/glimpse/chunk/main' +include { GLIMPSE_PHASE } from '../../../modules/nf-core/glimpse/phase/main' +include { GLIMPSE_LIGATE } from '../../../modules/nf-core/glimpse/ligate/main' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index/main.nf' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index/main.nf' workflow VCF_IMPUTE_GLIMPSE { @@ -33,13 +33,13 @@ workflow VCF_IMPUTE_GLIMPSE { GLIMPSE_PHASE ( phase_input ) // [meta, vcf, index, sample_infos, regionin, regionout, ref, ref_index, map] ch_versions = ch_versions.mix(GLIMPSE_PHASE.out.versions ) - INDEX_PHASE ( GLIMPSE_PHASE.out.phased_variants ) - ch_versions = ch_versions.mix( INDEX_PHASE.out.versions ) + BCFTOOLS_INDEX_1 ( GLIMPSE_PHASE.out.phased_variants ) + ch_versions = ch_versions.mix( BCFTOOLS_INDEX_1.out.versions ) // Ligate all phased files in one and index it ligate_input = GLIMPSE_PHASE.out.phased_variants .groupTuple( by: 0 ) - .combine( INDEX_PHASE.out.csi + .combine( BCFTOOLS_INDEX_1.out.csi .groupTuple( by: 0 ), by: 0 ) @@ -47,13 +47,13 @@ workflow VCF_IMPUTE_GLIMPSE { GLIMPSE_LIGATE ( ligate_input ) ch_versions = ch_versions.mix(GLIMPSE_LIGATE.out.versions ) - INDEX_LIGATE ( GLIMPSE_LIGATE.out.merged_variants ) - ch_versions = ch_versions.mix( INDEX_LIGATE.out.versions ) + BCFTOOLS_INDEX_2 ( GLIMPSE_LIGATE.out.merged_variants ) + ch_versions = ch_versions.mix( BCFTOOLS_INDEX_2.out.versions ) emit: chunk_chr = GLIMPSE_CHUNK.out.chunk_chr // channel: [ val(meta), txt ] merged_variants = GLIMPSE_LIGATE.out.merged_variants // channel: [ val(meta), bcf ] - merged_variants_index = INDEX_LIGATE.out.csi // channel: [ val(meta), csi ] + merged_variants_index = BCFTOOLS_INDEX_2.out.csi // channel: [ val(meta), csi ] versions = ch_versions // channel: [ versions.yml ] } From acd46d6c450d2ea3ef23695cbeba19bfa3d58daf Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 20:34:41 +0200 Subject: [PATCH 017/110] Move ci to nf-test --- .github/workflows/ci.yml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03f6bcdc..d55b03e3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,6 +11,7 @@ on: env: NXF_ANSI_LOG: false + NFTEST_VER: "0.8.4" concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" @@ -18,11 +19,12 @@ concurrency: jobs: test: - name: Run pipeline with test data + name: Run nf-test with standard profiles # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/phaseimpute') }}" runs-on: ubuntu-latest strategy: + fail-fast: false matrix: NXF_VER: - "23.04.0" @@ -44,9 +46,20 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Install nf-test + run: | + wget -qO- https://code.askimed.com/install/nf-test | bash -s $NFTEST_VER + sudo mv nf-test /usr/local/bin/ + - name: Run pipeline with test data # TODO nf-core: You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile "${{ matrix.TEST_PROFILE }}",docker --outdir ./results + nf-test test --tag "${{ matrix.TEST_PROFILE }}"" --profile docker --junitxml=test.xml + + - name: Output log on failure + if: failure() + run: | + sudo apt install bat > /dev/null + batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/tests/*/output/pipeline_info/software_versions.yml From 17a840b678ccbc69b5f12994acb1c8d64e1b4c4e Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 20:37:06 +0200 Subject: [PATCH 018/110] Fix config file --- conf/steps/panel_prep.config | 4 ++++ conf/steps/validation.config | 13 ++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 8d793df4..e292dd01 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -152,6 +152,10 @@ process { publishDir = [ enabled: false ] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_PANEL:BCFTOOLS_INDEX' { + ext.args = "--tbi" + } + // Subworkflow: Make chunks withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:.*' { publishDir = [ diff --git a/conf/steps/validation.config b/conf/steps/validation.config index a1c22475..7a662e85 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -71,17 +71,16 @@ process { publishDir = [ enabled: false ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK' { - ext.args = "'(NR == 1) || (FNR > 1)'" // Skip header line - ext.suffix = { "txt" } - } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GUNZIP' { publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:ADD_COLUMNS' { - ext.prefix = { "${meta.id}_D${meta.depth}_P${meta.panel}_SNP" } - publishDir = [ enabled: false ] + ext.prefix = { "${meta.id}_D${meta.depth}_P${meta.panel}_C${meta.chr}_SNP" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK' { + ext.args = "'(NR == 1) || (FNR > 1)'" // Skip header line + ext.suffix = { "txt" } } } From a346e9e41546b0e97aa106ff6401399d814f49c5 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 20:38:15 +0200 Subject: [PATCH 019/110] Add documentation --- CHANGELOG.md | 1 + docs/development.md | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52cf7bb4..b189d41e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1/2 and shapeit5) - [#40](https://github.com/nf-core/phaseimpute/pull/40) - Add STITCH method. Reorganize panelprep subworkflows. - [#51](https://github.com/nf-core/phaseimpute/pull/51) - Update all process and fix linting errors. Remove fastqc added by the template. +- [#56](https://github.com/nf-core/phaseimpute/pull/56) - Move to nf-test to check the output files names generated. Fix validation and concatenation by chromosomes missing. ### `Fixed` diff --git a/docs/development.md b/docs/development.md index c2bd3d19..53527908 100644 --- a/docs/development.md +++ b/docs/development.md @@ -21,6 +21,8 @@ ## Run tests +### Launch with Nextflow + ```bash nextflow run main.nf -profile singularity,test --outdir results -resume nextflow run main.nf -profile singularity,test_sim --outdir results -resume @@ -29,6 +31,12 @@ nextflow run main.nf -profile singularity,test_all --outdir results -resume nextflow run main.nf -profile singularity,test_quilt --outdir results -resume ``` +### Launch with nf-test + +```bash +nf-test test --verbose --profile singularity +``` + ## Problematic ### Channel management and combination From 399f814b09e294afc446d526f793486739028f07 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 20:40:05 +0200 Subject: [PATCH 020/110] Fix sbwf and concatenation of chromosomes --- subworkflows/local/bam_region/main.nf | 2 +- subworkflows/local/vcf_concatenate_bcftools/main.nf | 2 +- subworkflows/local/vcf_concordance_glimpse2/main.nf | 6 +++--- workflows/phaseimpute/main.nf | 10 +++++----- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/subworkflows/local/bam_region/main.nf b/subworkflows/local/bam_region/main.nf index 1078bdb1..c28ebd82 100644 --- a/subworkflows/local/bam_region/main.nf +++ b/subworkflows/local/bam_region/main.nf @@ -34,6 +34,6 @@ workflow BAM_REGION { .combine(SAMTOOLS_INDEX.out.bai, by: 0) emit: - bam_region = ch_bam_region // channel: [ metaIGCR, bam, index ] + bam_region = ch_bam_region // channel: [ [id, chr, region], bam, index ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_concatenate_bcftools/main.nf b/subworkflows/local/vcf_concatenate_bcftools/main.nf index 627a2e52..70ea926c 100644 --- a/subworkflows/local/vcf_concatenate_bcftools/main.nf +++ b/subworkflows/local/vcf_concatenate_bcftools/main.nf @@ -11,7 +11,7 @@ workflow VCF_CONCATENATE_BCFTOOLS { ch_versions = Channel.empty() // Keep only id from meta - ch_vcf_tbi_grouped = ch_vcf_tbi.map{ metaI, vcf, tbi -> [metaI.subMap("id"), vcf, tbi] } + ch_vcf_tbi_grouped = ch_vcf_tbi.map{ metaI, vcf, tbi -> [metaI.subMap("id") + ["chr": "all"], vcf, tbi] } // Group by ID ch_vcf_tbi_grouped = ch_vcf_tbi_grouped.groupTuple( by:0 ) diff --git a/subworkflows/local/vcf_concordance_glimpse2/main.nf b/subworkflows/local/vcf_concordance_glimpse2/main.nf index e7d696c4..5e37f710 100644 --- a/subworkflows/local/vcf_concordance_glimpse2/main.nf +++ b/subworkflows/local/vcf_concordance_glimpse2/main.nf @@ -6,8 +6,8 @@ include { GUNZIP } from '../../../modules/nf-core/gunzip' workflow VCF_CONCORDANCE_GLIMPSE2 { take: - ch_vcf_emul // VCF file with imputed genotypes [ [id], vcf, csi] - ch_vcf_truth // VCF file with truth genotypes [ [id], vcf, csi] + ch_vcf_emul // VCF file with imputed genotypes [ [id, chr, region, panel], vcf, csi] + ch_vcf_truth // VCF file with truth genotypes [ [id, chr, region, panel], vcf, csi] ch_vcf_freq // VCF file with panel frequencies [ [panel], vcf, csi] ch_region // Regions to process [ [chr, region], region] @@ -21,7 +21,7 @@ workflow VCF_CONCORDANCE_GLIMPSE2 { .combine(ch_vcf_freq) .combine(ch_region.map{[it[1]]}.collect().toList()) .map{metaI, emul, e_csi, truth, t_csi, metaP, freq, f_csi, regions -> - [metaI, emul, e_csi, truth, t_csi, freq, f_csi, [], regions] + [metaI + ["panel":metaP.id], emul, e_csi, truth, t_csi, freq, f_csi, [], regions] } GLIMPSE2_CONCORDANCE ( diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 2b757cde..99dfa3c4 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -310,14 +310,14 @@ workflow PHASEIMPUTE { ch_multiqc_files = ch_multiqc_files.mix(GL_TRUTH.out.multiqc_files) ch_versions = ch_versions.mix(GL_TRUTH.out.versions) + // Concatenate by chromosomes + CONCAT_TRUTH(GL_TRUTH.out.vcf) + ch_versions = ch_versions.mix(CONCAT_TRUTH.out.versions) + // Mix the original vcf and the computed vcf ch_truth_vcf = ch_truth.vcf .map { [it[0], it[1], it[2]] } - .mix(GL_TRUTH.out.vcf) - - // Concatenate by chromosomes - // CONCAT_TRUTH(ch_truth_vcf) - // ch_versions = ch_versions.mix(CONCAT_TRUTH.out.versions) + .mix(CONCAT_TRUTH.out.vcf_tbi_join) // Compute concordance analysis VCF_CONCORDANCE_GLIMPSE2( From ba57af75721fe0f500746a069c7b2055a4676616 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 20:40:37 +0200 Subject: [PATCH 021/110] Delete whitespace --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index f0747cc9..0cb6d0dd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -224,7 +224,7 @@ profiles { test_all { includeConfig 'conf/test_all.config' } test_quilt { includeConfig 'conf/test_quilt.config' } test_stitch { includeConfig 'conf/test_stitch.config' } - test_glimpse2 { includeConfig 'conf/test_glimpse2.config' } + test_glimpse2 { includeConfig 'conf/test_glimpse2.config' } } From ada2331f8592335c214f5537dcfdbc1fcd90753c Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 20:41:05 +0200 Subject: [PATCH 022/110] Fix nf-test and assert files name. Add snapshot --- tests/pipeline/test_all.nf.test | 96 +++++++-- tests/pipeline/test_all.nf.test.snap | 286 +++++++++++++++++++++++++++ 2 files changed, 368 insertions(+), 14 deletions(-) create mode 100644 tests/pipeline/test_all.nf.test.snap diff --git a/tests/pipeline/test_all.nf.test b/tests/pipeline/test_all.nf.test index cb401dc5..6d5d7b0e 100644 --- a/tests/pipeline/test_all.nf.test +++ b/tests/pipeline/test_all.nf.test @@ -6,10 +6,13 @@ nextflow_pipeline { tag "pipeline/phaseimpute" + test("Check test_glimpse1") { + tag "test_glimpse1" config "../../conf/test.config" when { params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_memory = "5.GB" } @@ -18,16 +21,25 @@ nextflow_pipeline { then { assertAll( { assert workflow.success }, - { assert path("$outputDir/imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz").linesGzip().size() == 1756 } + { assert snapshot( + path("${outputDir}/imputation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("$outputDir/imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz").linesGzip.size() + ).match() + } ) } } - - /*test("Check test_glimpse2") { + /* + test("Check test_glimpse2") { + tag "test_glimpse2" config "../../conf/test_glimpse2.config" when { params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_memory = "5.GB" } @@ -36,16 +48,24 @@ nextflow_pipeline { then { assertAll( { assert workflow.success }, - { assert path("$outputDir/imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz").linesGzip().size() == 1756 } + { assert snapshot( + path("${outputDir}/imputation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("$outputDir/imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz").linesGzip.size() + ).match() + } ) } - }*/ test("Check test_quilt") { + tag "test_quilt" config "../../conf/test_quilt.config" when { params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_memory = "5.GB" } @@ -54,16 +74,25 @@ nextflow_pipeline { then { assertAll( { assert workflow.success }, - { assert path("$outputDir/imputation/quilt/concat/NA12878_quilt.vcf.gz").linesGzip().size() == 1756 } + { assert snapshot( + path("${outputDir}/imputation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("$outputDir/imputation/quilt/concat/NA12878_quilt.vcf.gz").linesGzip.size() + ).match() + } ) } } test("Check test_stitch") { + tag "test_stitch" config "../../conf/test_stitch.config" when { params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_memory = "5.GB" } @@ -72,16 +101,25 @@ nextflow_pipeline { then { assertAll( { assert workflow.success }, - { assert path("$outputDir/imputation/stitch/concat/NA12878_stitch.vcf.gz").linesGzip().size() == 1756 } + { assert snapshot( + path("${outputDir}/imputation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("$outputDir/imputation/stitch/concat/all_samples_stitch.vcf.gz").linesGzip.size() + ).match() + } ) } } test("Check test_sim") { + tag "test_sim" config "../../conf/test_sim.config" when { params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_memory = "5.GB" } @@ -91,17 +129,21 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot( - path("$outputDir/simulation/"), + path("${outputDir}/simulation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), ).match() } ) } - } test("Check test_validate") { + tag "test_validate" config "../../conf/test_validate.config" when { params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_memory = "5.GB" } @@ -111,7 +153,10 @@ nextflow_pipeline { assertAll( { assert workflow.success }, { assert snapshot( - path("$outputDir/validate/"), + path("${outputDir}/validation/") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten() ).match() } ) } @@ -119,9 +164,11 @@ nextflow_pipeline { } test("Check test_all") { + tag "test_all" config "../../conf/test_all.config" when { params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_memory = "5.GB" } @@ -130,13 +177,34 @@ nextflow_pipeline { then { assertAll( { assert workflow.success }, - { assert path("$outputDir/imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz").linesGzip().size() == 1756 } { assert snapshot( - path("$outputDir/simulation/"), - ).match() } + path("${outputDir}/simulation") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("${outputDir}/imputation") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("${outputDir}/prep_panel") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("${outputDir}/validation") + .list() + .collect { getRecursiveFileNames(it, outputDir) } + .flatten(), + path("$outputDir/imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz").linesGzip.size() + ).match() + } ) } - } +} +def getRecursiveFileNames(fileOrDir, outputDir) { + if(file(fileOrDir.toString()).isDirectory()) { + return fileOrDir.list().collect { getRecursiveFileNames(it, outputDir) } + } + return fileOrDir.toString().replace("${outputDir}/", "") } diff --git a/tests/pipeline/test_all.nf.test.snap b/tests/pipeline/test_all.nf.test.snap new file mode 100644 index 00000000..eeba142e --- /dev/null +++ b/tests/pipeline/test_all.nf.test.snap @@ -0,0 +1,286 @@ +{ + "Check test_stitch": { + "content": [ + [ + "imputation/stitch/RData/EM.all.chr22.RData", + "imputation/stitch/RData/end.chr22.RData", + "imputation/stitch/RData/endEM.chr22.RData", + "imputation/stitch/RData/sampleNames.chr22.RData", + "imputation/stitch/RData/start.chr22.RData", + "imputation/stitch/RData/startEM.chr22.RData", + "imputation/stitch/concat/all_samples_stitch.vcf.gz", + "imputation/stitch/concat/all_samples_stitch.vcf.gz.tbi", + "imputation/stitch/concat/versions.yml", + "imputation/stitch/input/sample.1.input.chr22.RData", + "imputation/stitch/input/sample.2.input.chr22.RData", + "imputation/stitch/input/sample.3.input.chr22.RData", + "imputation/stitch/plots/alphaMat.chr22.all.s.1.png", + "imputation/stitch/plots/alphaMat.chr22.normalized.s.1.png", + "imputation/stitch/plots/hapSum.chr22.s.1.png", + "imputation/stitch/plots/hapSum_log.chr22.s.1.png", + "imputation/stitch/plots/metricsForPostImputationQC.chr22.sample.jpg", + "imputation/stitch/plots/metricsForPostImputationQCChromosomeWide.chr22.sample.jpg", + "imputation/stitch/plots/r2.chr22.goodonly.jpg", + "imputation/stitch/stitch.chr22.vcf.gz", + "imputation/stitch/versions.yml" + ], + 927 + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T19:35:17.258722111" + }, + "Check test_all": { + "content": [ + [ + "simulation/NA12878_D1_Rchr21_16570000-16610000.bam", + "simulation/NA12878_D1_Rchr21_16570000-16610000.bam.bai", + "simulation/NA12878_D1_Rchr22_16570000-16610000.bam", + "simulation/NA12878_D1_Rchr22_16570000-16610000.bam.bai", + "simulation/NA12878_Rchr21_16570000-16610000.stats.txt", + "simulation/NA12878_Rchr22_16570000-16610000.stats.txt", + "simulation/NA19401_D1_Rchr21_16570000-16610000.bam", + "simulation/NA19401_D1_Rchr21_16570000-16610000.bam.bai", + "simulation/NA19401_D1_Rchr22_16570000-16610000.bam", + "simulation/NA19401_D1_Rchr22_16570000-16610000.bam.bai", + "simulation/NA19401_Rchr21_16570000-16610000.stats.txt", + "simulation/NA19401_Rchr22_16570000-16610000.stats.txt", + "simulation/NA20359_D1_Rchr21_16570000-16610000.bam", + "simulation/NA20359_D1_Rchr21_16570000-16610000.bam.bai", + "simulation/NA20359_D1_Rchr22_16570000-16610000.bam", + "simulation/NA20359_D1_Rchr22_16570000-16610000.bam.bai", + "simulation/NA20359_Rchr21_16570000-16610000.stats.txt", + "simulation/NA20359_Rchr22_16570000-16610000.stats.txt" + ], + [ + "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.ligate.vcf.gz", + "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.phase.bcf.csi", + "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.ligate.vcf.gz", + "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.phase.bcf.csi", + "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.ligate.vcf.gz", + "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.phase.bcf.csi", + "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.ligate.vcf.gz", + "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.phase.bcf.csi", + "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.ligate.vcf.gz", + "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.phase.bcf.csi", + "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.ligate.vcf.gz", + "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.phase.bcf.csi", + "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/versions.yml", + "imputation/glimpse1/versions.yml" + ], + [ + "prep_panel/chunks/glimpse1/1000GP.s.norel_chr21_chunks_glimpse1.txt", + "prep_panel/chunks/glimpse1/1000GP.s.norel_chr22_chunks_glimpse1.txt", + "prep_panel/chunks/glimpse1/versions.yml", + "prep_panel/chunks/glimpse2/1000GP.s.norel_chr21_chunks_glimpse2.txt", + "prep_panel/chunks/glimpse2/1000GP.s.norel_chr22_chunks_glimpse2.txt", + "prep_panel/chunks/glimpse2/versions.yml", + "prep_panel/haplegend/1000GP.s.norel_chr21.hap.gz", + "prep_panel/haplegend/1000GP.s.norel_chr21.legend.gz", + "prep_panel/haplegend/1000GP.s.norel_chr21.samples", + "prep_panel/haplegend/1000GP.s.norel_chr22.hap.gz", + "prep_panel/haplegend/1000GP.s.norel_chr22.legend.gz", + "prep_panel/haplegend/1000GP.s.norel_chr22.samples", + "prep_panel/haplegend/versions.yml", + "prep_panel/posfile/1000GP.s.norel_chr21_posfile_stitch.txt", + "prep_panel/posfile/1000GP.s.norel_chr22_posfile_stitch.txt", + "prep_panel/posfile/versions.yml", + "prep_panel/sites/tsv/1000GP.s.norel_chr21_glimpse1_sites_tsv.txt.gz", + "prep_panel/sites/tsv/1000GP.s.norel_chr21_glimpse1_sites_tsv.txt.gz.tbi", + "prep_panel/sites/tsv/1000GP.s.norel_chr22_glimpse1_sites_tsv.txt.gz", + "prep_panel/sites/tsv/1000GP.s.norel_chr22_glimpse1_sites_tsv.txt.gz.tbi", + "prep_panel/sites/tsv/versions.yml", + "prep_panel/sites/vcf/1000GP.s.norel_chr21_glimpse1_sites.vcf.gz", + "prep_panel/sites/vcf/1000GP.s.norel_chr21_glimpse1_sites.vcf.gz.csi", + "prep_panel/sites/vcf/1000GP.s.norel_chr22_glimpse1_sites.vcf.gz", + "prep_panel/sites/vcf/1000GP.s.norel_chr22_glimpse1_sites.vcf.gz.csi", + "prep_panel/sites/vcf/versions.yml" + ], + [ + "validation/NA12878.concordance.error.cal.txt.gz", + "validation/NA12878.concordance.error.grp.txt.gz", + "validation/NA12878.concordance.error.spl.txt.gz", + "validation/NA12878.concordance.rsquare.grp.txt.gz", + "validation/NA12878.concordance.rsquare.spl.txt.gz", + "validation/NA12878.concordance_r2_sites.txt.gz", + "validation/NA12878_Dnull_P1000GP.s.norel_Call_SNP.txt", + "validation/NA19401.concordance.error.cal.txt.gz", + "validation/NA19401.concordance.error.grp.txt.gz", + "validation/NA19401.concordance.error.spl.txt.gz", + "validation/NA19401.concordance.rsquare.grp.txt.gz", + "validation/NA19401.concordance.rsquare.spl.txt.gz", + "validation/NA19401.concordance_r2_sites.txt.gz", + "validation/NA19401_Dnull_P1000GP.s.norel_Call_SNP.txt", + "validation/NA20359.concordance.error.cal.txt.gz", + "validation/NA20359.concordance.error.grp.txt.gz", + "validation/NA20359.concordance.error.spl.txt.gz", + "validation/NA20359.concordance.rsquare.grp.txt.gz", + "validation/NA20359.concordance.rsquare.spl.txt.gz", + "validation/NA20359.concordance_r2_sites.txt.gz", + "validation/NA20359_Dnull_P1000GP.s.norel_Call_SNP.txt", + "validation/TestQuality.txt", + "validation/concat/NA12878_truth_concat.vcf.gz", + "validation/concat/NA12878_truth_concat.vcf.gz.tbi", + "validation/concat/NA19401_truth_concat.vcf.gz", + "validation/concat/NA19401_truth_concat.vcf.gz.tbi", + "validation/concat/NA20359_truth_concat.vcf.gz", + "validation/concat/NA20359_truth_concat.vcf.gz.tbi" + ], + 1779 + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T20:26:31.380656045" + }, + "Check test_validate": { + "content": [ + [ + "validation/NA12878.concordance.error.cal.txt.gz", + "validation/NA12878.concordance.error.grp.txt.gz", + "validation/NA12878.concordance.error.spl.txt.gz", + "validation/NA12878.concordance.rsquare.grp.txt.gz", + "validation/NA12878.concordance.rsquare.spl.txt.gz", + "validation/NA12878.concordance_r2_sites.txt.gz", + "validation/NA12878_Dnull_P1000GP.s.norel_Cchr21_SNP.txt", + "validation/NA12878_Dnull_P1000GP.s.norel_Cchr22_SNP.txt", + "validation/NA19401.concordance.error.cal.txt.gz", + "validation/NA19401.concordance.error.grp.txt.gz", + "validation/NA19401.concordance.error.spl.txt.gz", + "validation/NA19401.concordance.rsquare.grp.txt.gz", + "validation/NA19401.concordance.rsquare.spl.txt.gz", + "validation/NA19401.concordance_r2_sites.txt.gz", + "validation/NA19401_Dnull_P1000GP.s.norel_Cchr21_SNP.txt", + "validation/NA19401_Dnull_P1000GP.s.norel_Cchr22_SNP.txt", + "validation/NA20359.concordance.error.cal.txt.gz", + "validation/NA20359.concordance.error.grp.txt.gz", + "validation/NA20359.concordance.error.spl.txt.gz", + "validation/NA20359.concordance.rsquare.grp.txt.gz", + "validation/NA20359.concordance.rsquare.spl.txt.gz", + "validation/NA20359.concordance_r2_sites.txt.gz", + "validation/NA20359_Dnull_P1000GP.s.norel_Cchr21_SNP.txt", + "validation/NA20359_Dnull_P1000GP.s.norel_Cchr22_SNP.txt", + "validation/TestQuality.txt" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T19:38:29.792086544" + }, + "Check test_quilt": { + "content": [ + [ + "imputation/quilt/NA12878_Rchr21_16570000-16610000.impute.annotate.vcf.gz", + "imputation/quilt/NA12878_Rchr21_16570000-16610000.impute.annotate.vcf.gz.tbi", + "imputation/quilt/NA12878_Rchr22_16570000-16610000.impute.annotate.vcf.gz", + "imputation/quilt/NA12878_Rchr22_16570000-16610000.impute.annotate.vcf.gz.tbi", + "imputation/quilt/NA19401_Rchr21_16570000-16610000.impute.annotate.vcf.gz", + "imputation/quilt/NA19401_Rchr21_16570000-16610000.impute.annotate.vcf.gz.tbi", + "imputation/quilt/NA19401_Rchr22_16570000-16610000.impute.annotate.vcf.gz", + "imputation/quilt/NA19401_Rchr22_16570000-16610000.impute.annotate.vcf.gz.tbi", + "imputation/quilt/NA20359_Rchr21_16570000-16610000.impute.annotate.vcf.gz", + "imputation/quilt/NA20359_Rchr21_16570000-16610000.impute.annotate.vcf.gz.tbi", + "imputation/quilt/NA20359_Rchr22_16570000-16610000.impute.annotate.vcf.gz", + "imputation/quilt/NA20359_Rchr22_16570000-16610000.impute.annotate.vcf.gz.tbi", + "imputation/quilt/concat/NA12878_quilt.vcf.gz", + "imputation/quilt/concat/NA12878_quilt.vcf.gz.tbi", + "imputation/quilt/concat/NA19401_quilt.vcf.gz", + "imputation/quilt/concat/NA19401_quilt.vcf.gz.tbi", + "imputation/quilt/concat/NA20359_quilt.vcf.gz", + "imputation/quilt/concat/NA20359_quilt.vcf.gz.tbi", + "imputation/quilt/concat/versions.yml", + "imputation/quilt/versions.yml" + ], + 1779 + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T19:34:26.554486719" + }, + "Check test_sim": { + "content": [ + [ + "simulation/NA12878_D1_Rchr21_16570000-16610000.bam", + "simulation/NA12878_D1_Rchr21_16570000-16610000.bam.bai", + "simulation/NA12878_D1_Rchr22_16570000-16610000.bam", + "simulation/NA12878_D1_Rchr22_16570000-16610000.bam.bai", + "simulation/NA12878_Rchr21_16570000-16610000.stats.txt", + "simulation/NA12878_Rchr22_16570000-16610000.stats.txt", + "simulation/NA19401_D1_Rchr21_16570000-16610000.bam", + "simulation/NA19401_D1_Rchr21_16570000-16610000.bam.bai", + "simulation/NA19401_D1_Rchr22_16570000-16610000.bam", + "simulation/NA19401_D1_Rchr22_16570000-16610000.bam.bai", + "simulation/NA19401_Rchr21_16570000-16610000.stats.txt", + "simulation/NA19401_Rchr22_16570000-16610000.stats.txt", + "simulation/NA20359_D1_Rchr21_16570000-16610000.bam", + "simulation/NA20359_D1_Rchr21_16570000-16610000.bam.bai", + "simulation/NA20359_D1_Rchr22_16570000-16610000.bam", + "simulation/NA20359_D1_Rchr22_16570000-16610000.bam.bai", + "simulation/NA20359_Rchr21_16570000-16610000.stats.txt", + "simulation/NA20359_Rchr22_16570000-16610000.stats.txt" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T19:36:21.014655792" + }, + "Check test_glimpse1": { + "content": [ + [ + "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.ligate.vcf.gz", + "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.phase.bcf.csi", + "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.ligate.vcf.gz", + "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.phase.bcf.csi", + "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.ligate.vcf.gz", + "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.phase.bcf.csi", + "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.ligate.vcf.gz", + "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.phase.bcf.csi", + "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.ligate.vcf.gz", + "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.phase.bcf.csi", + "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.ligate.vcf.gz", + "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.phase.bcf.csi", + "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/versions.yml", + "imputation/glimpse1/versions.yml" + ], + 1779 + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-17T19:31:46.118712605" + } +} \ No newline at end of file From 744c105cfcc351804daac5070c57f9fdaf656d13 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 21:52:35 +0200 Subject: [PATCH 023/110] Fix ci.yml --- .github/workflows/ci.yml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d55b03e3..fc0a2a10 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,14 +52,5 @@ jobs: sudo mv nf-test /usr/local/bin/ - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix run: | - nf-test test --tag "${{ matrix.TEST_PROFILE }}"" --profile docker --junitxml=test.xml - - - name: Output log on failure - if: failure() - run: | - sudo apt install bat > /dev/null - batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/tests/*/output/pipeline_info/software_versions.yml + nf-test test --tag "${{ matrix.TEST_PROFILE }}" --profile docker From 780fbf6bf6e748c665f51ac29dd2dd02ff91bb07 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 21:53:52 +0200 Subject: [PATCH 024/110] Move test to workflows folder for unifomrization --- docs/development.md | 2 +- nf-test.config | 2 +- .../phaseimpute/tests}/test_all.nf.test | 14 +++---- .../phaseimpute/tests}/test_all.nf.test.snap | 40 +------------------ 4 files changed, 11 insertions(+), 47 deletions(-) rename {tests/pipeline => workflows/phaseimpute/tests}/test_all.nf.test (94%) rename {tests/pipeline => workflows/phaseimpute/tests}/test_all.nf.test.snap (85%) diff --git a/docs/development.md b/docs/development.md index 53527908..1c32398f 100644 --- a/docs/development.md +++ b/docs/development.md @@ -34,7 +34,7 @@ nextflow run main.nf -profile singularity,test_quilt --outdir results -resume ### Launch with nf-test ```bash -nf-test test --verbose --profile singularity +nf-test test --verbose --profile singularity --tag test_all ``` ## Problematic diff --git a/nf-test.config b/nf-test.config index 607a2e68..69ad4f8b 100644 --- a/nf-test.config +++ b/nf-test.config @@ -1,6 +1,6 @@ config { // location for all nf-tests - testsDir "tests/pipeline" + testsDir "workflows" // nf-test directory including temporary files for each test workDir System.getenv("NXF_TEST_DIR") ?: ".nf-test" diff --git a/tests/pipeline/test_all.nf.test b/workflows/phaseimpute/tests/test_all.nf.test similarity index 94% rename from tests/pipeline/test_all.nf.test rename to workflows/phaseimpute/tests/test_all.nf.test index 6d5d7b0e..8c14b540 100644 --- a/tests/pipeline/test_all.nf.test +++ b/workflows/phaseimpute/tests/test_all.nf.test @@ -9,7 +9,7 @@ nextflow_pipeline { test("Check test_glimpse1") { tag "test_glimpse1" - config "../../conf/test.config" + config "../../../conf/test.config" when { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' @@ -36,7 +36,7 @@ nextflow_pipeline { /* test("Check test_glimpse2") { tag "test_glimpse2" - config "../../conf/test_glimpse2.config" + config "../../../conf/test_glimpse2.config" when { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' @@ -62,7 +62,7 @@ nextflow_pipeline { test("Check test_quilt") { tag "test_quilt" - config "../../conf/test_quilt.config" + config "../../../conf/test_quilt.config" when { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' @@ -89,7 +89,7 @@ nextflow_pipeline { test("Check test_stitch") { tag "test_stitch" - config "../../conf/test_stitch.config" + config "../../../conf/test_stitch.config" when { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' @@ -116,7 +116,7 @@ nextflow_pipeline { test("Check test_sim") { tag "test_sim" - config "../../conf/test_sim.config" + config "../../../conf/test_sim.config" when { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' @@ -140,7 +140,7 @@ nextflow_pipeline { test("Check test_validate") { tag "test_validate" - config "../../conf/test_validate.config" + config "../../../conf/test_validate.config" when { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' @@ -165,7 +165,7 @@ nextflow_pipeline { test("Check test_all") { tag "test_all" - config "../../conf/test_all.config" + config "../../../conf/test_all.config" when { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' diff --git a/tests/pipeline/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap similarity index 85% rename from tests/pipeline/test_all.nf.test.snap rename to workflows/phaseimpute/tests/test_all.nf.test.snap index eeba142e..ffd7e33b 100644 --- a/tests/pipeline/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -111,26 +111,8 @@ "prep_panel/sites/vcf/versions.yml" ], [ - "validation/NA12878.concordance.error.cal.txt.gz", - "validation/NA12878.concordance.error.grp.txt.gz", - "validation/NA12878.concordance.error.spl.txt.gz", - "validation/NA12878.concordance.rsquare.grp.txt.gz", - "validation/NA12878.concordance.rsquare.spl.txt.gz", - "validation/NA12878.concordance_r2_sites.txt.gz", "validation/NA12878_Dnull_P1000GP.s.norel_Call_SNP.txt", - "validation/NA19401.concordance.error.cal.txt.gz", - "validation/NA19401.concordance.error.grp.txt.gz", - "validation/NA19401.concordance.error.spl.txt.gz", - "validation/NA19401.concordance.rsquare.grp.txt.gz", - "validation/NA19401.concordance.rsquare.spl.txt.gz", - "validation/NA19401.concordance_r2_sites.txt.gz", "validation/NA19401_Dnull_P1000GP.s.norel_Call_SNP.txt", - "validation/NA20359.concordance.error.cal.txt.gz", - "validation/NA20359.concordance.error.grp.txt.gz", - "validation/NA20359.concordance.error.spl.txt.gz", - "validation/NA20359.concordance.rsquare.grp.txt.gz", - "validation/NA20359.concordance.rsquare.spl.txt.gz", - "validation/NA20359.concordance_r2_sites.txt.gz", "validation/NA20359_Dnull_P1000GP.s.norel_Call_SNP.txt", "validation/TestQuality.txt", "validation/concat/NA12878_truth_concat.vcf.gz", @@ -146,33 +128,15 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-17T20:26:31.380656045" + "timestamp": "2024-05-17T21:48:07.027867847" }, "Check test_validate": { "content": [ [ - "validation/NA12878.concordance.error.cal.txt.gz", - "validation/NA12878.concordance.error.grp.txt.gz", - "validation/NA12878.concordance.error.spl.txt.gz", - "validation/NA12878.concordance.rsquare.grp.txt.gz", - "validation/NA12878.concordance.rsquare.spl.txt.gz", - "validation/NA12878.concordance_r2_sites.txt.gz", "validation/NA12878_Dnull_P1000GP.s.norel_Cchr21_SNP.txt", "validation/NA12878_Dnull_P1000GP.s.norel_Cchr22_SNP.txt", - "validation/NA19401.concordance.error.cal.txt.gz", - "validation/NA19401.concordance.error.grp.txt.gz", - "validation/NA19401.concordance.error.spl.txt.gz", - "validation/NA19401.concordance.rsquare.grp.txt.gz", - "validation/NA19401.concordance.rsquare.spl.txt.gz", - "validation/NA19401.concordance_r2_sites.txt.gz", "validation/NA19401_Dnull_P1000GP.s.norel_Cchr21_SNP.txt", "validation/NA19401_Dnull_P1000GP.s.norel_Cchr22_SNP.txt", - "validation/NA20359.concordance.error.cal.txt.gz", - "validation/NA20359.concordance.error.grp.txt.gz", - "validation/NA20359.concordance.error.spl.txt.gz", - "validation/NA20359.concordance.rsquare.grp.txt.gz", - "validation/NA20359.concordance.rsquare.spl.txt.gz", - "validation/NA20359.concordance_r2_sites.txt.gz", "validation/NA20359_Dnull_P1000GP.s.norel_Cchr21_SNP.txt", "validation/NA20359_Dnull_P1000GP.s.norel_Cchr22_SNP.txt", "validation/TestQuality.txt" @@ -182,7 +146,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-17T19:38:29.792086544" + "timestamp": "2024-05-17T21:42:10.644445872" }, "Check test_quilt": { "content": [ From bf8a6e56fb1ea40e443aef03a8e838b9d27d5979 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 22:01:19 +0200 Subject: [PATCH 025/110] Set max usage to nf-test.config --- tests/config/nf-test.config | 4 +++- workflows/phaseimpute/tests/test_all.nf.test | 11 ++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/tests/config/nf-test.config b/tests/config/nf-test.config index 417172e2..2099d775 100644 --- a/tests/config/nf-test.config +++ b/tests/config/nf-test.config @@ -3,10 +3,12 @@ params { singularity_pull_docker_container = false test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules' modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + max_memory = '5.GB' + max_cpus = 4 } process { - cpus = 2 + cpus = 4 memory = 3.GB time = 2.h } diff --git a/workflows/phaseimpute/tests/test_all.nf.test b/workflows/phaseimpute/tests/test_all.nf.test index 8c14b540..c3c4d92e 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test +++ b/workflows/phaseimpute/tests/test_all.nf.test @@ -7,14 +7,13 @@ nextflow_pipeline { - test("Check test_glimpse1") { - tag "test_glimpse1" + test("Check test") { + tag "test" config "../../../conf/test.config" when { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" - max_memory = "5.GB" } } @@ -41,7 +40,6 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" - max_memory = "5.GB" } } @@ -67,7 +65,6 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" - max_memory = "5.GB" } } @@ -94,7 +91,6 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" - max_memory = "5.GB" } } @@ -121,7 +117,6 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" - max_memory = "5.GB" } } @@ -145,7 +140,6 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" - max_memory = "5.GB" } } @@ -170,7 +164,6 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" - max_memory = "5.GB" } } From 28bbd86f054cc2eb4c02ae8078421d8f4f562483 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 22:05:43 +0200 Subject: [PATCH 026/110] Reduce max_cpus --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 0cb6d0dd..7d761717 100644 --- a/nextflow.config +++ b/nextflow.config @@ -85,7 +85,7 @@ params { // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' - max_cpus = 16 + max_cpus = 4 max_time = '240.h' // Schema validation default options From f702748d54ab4f74ea187971f9567f3934a7cb2c Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 22:08:43 +0200 Subject: [PATCH 027/110] Change order config --- nextflow.config | 2 +- nf-test.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 7d761717..0cb6d0dd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -85,7 +85,7 @@ params { // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' - max_cpus = 4 + max_cpus = 16 max_time = '240.h' // Schema validation default options diff --git a/nf-test.config b/nf-test.config index 69ad4f8b..066bcd28 100644 --- a/nf-test.config +++ b/nf-test.config @@ -6,8 +6,8 @@ config { workDir System.getenv("NXF_TEST_DIR") ?: ".nf-test" // location of an optional nextflow.config file specific for executing tests - configFile "tests/config/nf-test.config" configFile "./nextflow.config" + configFile "tests/config/nf-test.config" // run all test with the defined docker profile from the main nextflow.config profile "" From 640900139145611553c5c1161d58997f78e37a98 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 22:13:34 +0200 Subject: [PATCH 028/110] Max cpus in .nf.test --- workflows/phaseimpute/tests/test_all.nf.test | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/workflows/phaseimpute/tests/test_all.nf.test b/workflows/phaseimpute/tests/test_all.nf.test index c3c4d92e..6bb7a3a4 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test +++ b/workflows/phaseimpute/tests/test_all.nf.test @@ -14,6 +14,7 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" + max_cpus = 4 } } @@ -40,6 +41,7 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" + max_cpus = 4 } } @@ -65,6 +67,7 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" + max_cpus = 4 } } @@ -91,6 +94,7 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" + max_cpus = 4 } } @@ -117,6 +121,7 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" + max_cpus = 4 } } @@ -140,6 +145,7 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" + max_cpus = 4 } } @@ -164,6 +170,7 @@ nextflow_pipeline { params { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" + max_cpus = 4 } } From 0cef5547890c9668642028397c8a0704cceb36d8 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 17 May 2024 22:18:24 +0200 Subject: [PATCH 029/110] Add max_memory --- tests/config/nf-test.config | 2 -- workflows/phaseimpute/tests/test_all.nf.test | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/config/nf-test.config b/tests/config/nf-test.config index 2099d775..6ca1e66c 100644 --- a/tests/config/nf-test.config +++ b/tests/config/nf-test.config @@ -3,8 +3,6 @@ params { singularity_pull_docker_container = false test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules' modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - max_memory = '5.GB' - max_cpus = 4 } process { diff --git a/workflows/phaseimpute/tests/test_all.nf.test b/workflows/phaseimpute/tests/test_all.nf.test index 6bb7a3a4..12394c2b 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test +++ b/workflows/phaseimpute/tests/test_all.nf.test @@ -15,6 +15,7 @@ nextflow_pipeline { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_cpus = 4 + max_memory = '5.GB' } } @@ -42,6 +43,7 @@ nextflow_pipeline { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_cpus = 4 + max_memory = '5.GB' } } @@ -68,6 +70,7 @@ nextflow_pipeline { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_cpus = 4 + max_memory = '5.GB' } } @@ -95,6 +98,7 @@ nextflow_pipeline { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_cpus = 4 + max_memory = '5.GB' } } @@ -122,6 +126,7 @@ nextflow_pipeline { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_cpus = 4 + max_memory = '5.GB' } } @@ -146,6 +151,7 @@ nextflow_pipeline { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_cpus = 4 + max_memory = '5.GB' } } @@ -171,6 +177,7 @@ nextflow_pipeline { pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/data/' outdir = "$outputDir" max_cpus = 4 + max_memory = '5.GB' } } From 2f3ff98dd34cd0738fc02fb62cea5f9cbfd67cf4 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 20 May 2024 11:27:05 +0200 Subject: [PATCH 030/110] Remove intermediary files from output --- conf/steps/imputation_glimpse1.config | 11 ++--------- conf/steps/imputation_quilt.config | 2 ++ 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index 868c4143..db6f2813 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -42,11 +42,7 @@ process { // Impute with GLIMPSE1 withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:.*' { - publishDir = [ - path : { "${params.outdir}/imputation/glimpse1/" }, - mode : params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_CHUNK' { @@ -71,10 +67,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BCFTOOLS_INDEX_2' { - publishDir = [ - path: { "${params.outdir}/imputation/glimpse1" }, - mode: params.publish_dir_mode, - ] + publishDir = [ enabled: false ] } // Concatenate the imputed chunks diff --git a/conf/steps/imputation_quilt.config b/conf/steps/imputation_quilt.config index 32f8d270..98706137 100644 --- a/conf/steps/imputation_quilt.config +++ b/conf/steps/imputation_quilt.config @@ -34,10 +34,12 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:BCFTOOLS_ANNOTATE' { ext.args = "--set-id '%CHROM:%POS:%REF:%ALT' -Oz" ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.impute.annotate" } + publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:BCFTOOLS_INDEX_2' { ext.args = "--tbi" + publishDir = [ enabled: false ] } // Concatenate quilt imputed VCFs From f175c85a21ef10a0119808da372b2202dfc1d0ca Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 20 May 2024 13:57:35 +0200 Subject: [PATCH 031/110] Change BAM downsampling to 1 per individual --- conf/steps/simulation.config | 15 ++ modules.json | 5 + .../nf-core/samtools/merge/environment.yml | 8 + modules/nf-core/samtools/merge/main.nf | 60 +++++ modules/nf-core/samtools/merge/meta.yml | 83 +++++++ .../nf-core/samtools/merge/tests/index.config | 3 + .../nf-core/samtools/merge/tests/main.nf.test | 137 +++++++++++ .../samtools/merge/tests/main.nf.test.snap | 228 ++++++++++++++++++ modules/nf-core/samtools/merge/tests/tags.yml | 2 + subworkflows/local/bam_downsample/main.nf | 37 ++- workflows/phaseimpute/main.nf | 3 +- 11 files changed, 571 insertions(+), 10 deletions(-) create mode 100644 modules/nf-core/samtools/merge/environment.yml create mode 100644 modules/nf-core/samtools/merge/main.nf create mode 100644 modules/nf-core/samtools/merge/meta.yml create mode 100644 modules/nf-core/samtools/merge/tests/index.config create mode 100644 modules/nf-core/samtools/merge/tests/main.nf.test create mode 100644 modules/nf-core/samtools/merge/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/merge/tests/tags.yml diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index ff894175..0666560a 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -25,9 +25,24 @@ process { ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_COVERAGE' { + publishDir = [ + path: { "${params.outdir}/simulation/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.stats" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_VIEW' { ext.prefix = { "${meta.id}_D${meta.depth}_R${meta.region.replace(':','_')}" } + publishDir = [ enabled: false ] + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_INDEX_1' { + publishDir = [ enabled: false ] + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_MERGE' { + ext.prefix = { "${meta.id}_${meta.depth}x" } + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_INDEX_2' { + ext.args = "" } } diff --git a/modules.json b/modules.json index 0d14775b..2e48ab45 100644 --- a/modules.json +++ b/modules.json @@ -136,6 +136,11 @@ "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["modules"] }, + "samtools/merge": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["modules"] + }, "samtools/view": { "branch": "master", "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml new file mode 100644 index 00000000..fc669b1b --- /dev/null +++ b/modules/nf-core/samtools/merge/environment.yml @@ -0,0 +1,8 @@ +name: samtools_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf new file mode 100644 index 00000000..58803d42 --- /dev/null +++ b/modules/nf-core/samtools/merge/main.nf @@ -0,0 +1,60 @@ +process SAMTOOLS_MERGE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(input_files, stageAs: "?/*"), path(index, stageAs: "?/*") + tuple val(meta2), path(fasta), path(fai) + + output: + tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai") , optional:true, emit: crai + path "versions.yml" , emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + merge \\ + --threads ${task.cpus-1} \\ + $args \\ + ${reference} \\ + ${prefix}.${file_type} \\ + $input_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def index_type = file_type == "bam" ? "csi" : "crai" + def index = args.contains("--write-index") ? "touch ${prefix}.${index_type}" : "" + """ + touch ${prefix}.${file_type} + ${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml new file mode 100644 index 00000000..2e8f3dbb --- /dev/null +++ b/modules/nf-core/samtools/merge/meta.yml @@ -0,0 +1,83 @@ +name: samtools_merge +description: Merge BAM or CRAM file +keywords: + - merge + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of the reference file the CRAM was created with (optional) + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - cram: + type: file + description: CRAM file + pattern: "*.{cram}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" + - crai: + type: file + description: CRAM index file (optional) + pattern: "*.crai" +authors: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/merge/tests/index.config b/modules/nf-core/samtools/merge/tests/index.config new file mode 100644 index 00000000..8c5668cf --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test new file mode 100644 index 00000000..40b36e82 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test @@ -0,0 +1,137 @@ +nextflow_process { + + name "Test Process SAMTOOLS_MERGE" + script "../main.nf" + process "SAMTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/merge" + + test("bams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_bam") }, + { assert snapshot(process.out.cram).match("bams_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_csi") }, + { assert snapshot(process.out.crai).match("bams_crai") }, + { assert snapshot(process.out.versions).match("bams_versions") } + ) + } + } + + test("crams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("crams_cram") }, + { assert snapshot(process.out.bam).match("crams_bam") }, + { assert snapshot(file(process.out.crai[0][1]).name).match("crams_crai") }, + { assert snapshot(process.out.csi).match("crams_csi") }, + { assert snapshot(process.out.versions).match("crams_versions") } + ) + } + } + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("bams_stub") { + + config "./index.config" + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_stub_bam") }, + { assert snapshot(process.out.cram).match("bams_stub_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_stub_csi") }, + { assert snapshot(process.out.crai).match("bams_stub_crai") }, + { assert snapshot(process.out.versions).match("bams_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..f7da7699 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -0,0 +1,228 @@ +{ + "crams_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.647389" + }, + "bams_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.937013" + }, + "bams_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.928616" + }, + "bams_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.923289" + }, + "bams_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.925716" + }, + "crams_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.655959" + }, + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.319539" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:33.782637377" + }, + "bams_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.92719" + }, + "bams_stub_csi": { + "content": [ + "test.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.940498" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.328852" + }, + "bams_stub_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:42.594476052" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.324219" + }, + "bams_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.933153" + }, + "bams_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:04.805335656" + }, + "crams_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.650652" + }, + "crams_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:25.889394689" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.33292" + }, + "crams_crai": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.653512" + }, + "bams_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.943839" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/tags.yml b/modules/nf-core/samtools/merge/tests/tags.yml new file mode 100644 index 00000000..b869abcb --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/merge: + - "modules/nf-core/samtools/merge/**" diff --git a/subworkflows/local/bam_downsample/main.nf b/subworkflows/local/bam_downsample/main.nf index 13f6a41e..0270731f 100644 --- a/subworkflows/local/bam_downsample/main.nf +++ b/subworkflows/local/bam_downsample/main.nf @@ -1,6 +1,8 @@ -include { SAMTOOLS_COVERAGE } from '../../../modules/nf-core/samtools/coverage' -include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' -include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view' +include { SAMTOOLS_COVERAGE } from '../../../modules/nf-core/samtools/coverage' +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_1 } from '../../../modules/nf-core/samtools/index' +include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_2 } from '../../../modules/nf-core/samtools/index' workflow BAM_DOWNSAMPLE { @@ -15,7 +17,7 @@ workflow BAM_DOWNSAMPLE { // Add region to channel ch_coverage = ch_bam .map{ metaICR, bam, index -> - [ metaICR, bam, index, metaICR["region"] ] + [ metaICR, bam, index, metaICR.region ] } // Get coverage of the region @@ -52,15 +54,32 @@ workflow BAM_DOWNSAMPLE { ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) // Index result - SAMTOOLS_INDEX(SAMTOOLS_VIEW.out.bam) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + SAMTOOLS_INDEX_1(SAMTOOLS_VIEW.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_1.out.versions.first()) // Aggregate bam and index ch_bam_emul = SAMTOOLS_VIEW.out.bam - .combine(SAMTOOLS_INDEX.out.bai, by:0) + .combine(SAMTOOLS_INDEX_1.out.bai, by:0) + + SAMTOOLS_MERGE( + ch_bam_emul + .map{ + metaICRD, bam, index -> [metaICRD.subMap("id", "depth"), bam, index] + } + .groupTuple() + .map{ metaID, bam, index -> + [ metaID + ["chr": "all"], bam, index ] + }, + ch_fasta + ) + SAMTOOLS_INDEX_2(SAMTOOLS_MERGE.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_2.out.versions.first()) + + ch_bam_emul_all = SAMTOOLS_MERGE.out.bam + .combine(SAMTOOLS_INDEX_2.out.bai, by:0) emit: - bam_emul = ch_bam_emul // channel: [ [id, genome, chr, region, depth], bam, bai ] - coverage = SAMTOOLS_COVERAGE.out.coverage // channel: [ [id, genome, chr, region, depth], txt ] + bam_emul = ch_bam_emul_all // channel: [ [id, chr, region, depth], bam, bai ] + coverage = SAMTOOLS_COVERAGE.out.coverage // channel: [ [id, chr, region, depth], txt ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 99dfa3c4..0577115b 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -21,6 +21,7 @@ include { getAllFilesExtension } from '../../subworkflows/local/utils_nfc // Simulate subworkflows include { BAM_REGION } from '../../subworkflows/local/bam_region' include { BAM_DOWNSAMPLE } from '../../subworkflows/local/bam_downsample' +include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' // Panelprep subworkflows include { VCF_CHR_CHECK } from '../../subworkflows/local/vcf_chr_check' @@ -111,7 +112,7 @@ workflow PHASEIMPUTE { ch_versions = ch_versions.mix(BAM_DOWNSAMPLE.out.versions) ch_multiqc_files = ch_multiqc_files.mix(BAM_DOWNSAMPLE.out.coverage.map{ [it[1]] }) ch_input_impute = BAM_DOWNSAMPLE.out.bam_emul - ch_input_validate_truth = BAM_REGION.out.bam_region + ch_input_validate_truth = ch_input_sim } if (params.genotype) { From cf841722bf6641f5bfb85a530989743d863230ec Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 20 May 2024 14:01:08 +0200 Subject: [PATCH 032/110] Patch samtools merge --- modules.json | 3 ++- .../nf-core/samtools/merge/samtools-merge.diff | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 modules/nf-core/samtools/merge/samtools-merge.diff diff --git a/modules.json b/modules.json index 2e48ab45..377c0dce 100644 --- a/modules.json +++ b/modules.json @@ -139,7 +139,8 @@ "samtools/merge": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/merge/samtools-merge.diff" }, "samtools/view": { "branch": "master", diff --git a/modules/nf-core/samtools/merge/samtools-merge.diff b/modules/nf-core/samtools/merge/samtools-merge.diff new file mode 100644 index 00000000..0a7b4c0b --- /dev/null +++ b/modules/nf-core/samtools/merge/samtools-merge.diff @@ -0,0 +1,17 @@ +Changes in module 'nf-core/samtools/merge' +--- modules/nf-core/samtools/merge/main.nf ++++ modules/nf-core/samtools/merge/main.nf +@@ -8,9 +8,8 @@ + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: +- tuple val(meta), path(input_files, stageAs: "?/*") +- tuple val(meta2), path(fasta) +- tuple val(meta3), path(fai) ++ tuple val(meta), path(input_files, stageAs: "?/*"), path(index, stageAs: "?/*") ++ tuple val(meta2), path(fasta), path(fai) + + output: + tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam + +************************************************************ From 3937ba845ac90c4aadfc214904de9d74352dcbba Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 20 May 2024 14:53:23 +0200 Subject: [PATCH 033/110] Set validation by whole samples --- conf/steps/validation.config | 2 +- main.nf | 30 ++++++------------- subworkflows/local/compute_gl/main.nf | 9 +++--- .../local/vcf_concordance_glimpse2/main.nf | 4 +-- workflows/phaseimpute/main.nf | 6 +--- 5 files changed, 17 insertions(+), 34 deletions(-) diff --git a/conf/steps/validation.config b/conf/steps/validation.config index 7a662e85..e826f6fb 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -76,7 +76,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:ADD_COLUMNS' { - ext.prefix = { "${meta.id}_D${meta.depth}_P${meta.panel}_C${meta.chr}_SNP" } + ext.prefix = { "${meta.id}_P${meta.panel}_SNP" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK' { diff --git a/main.nf b/main.nf index 1e1c3dd7..d5fc2571 100644 --- a/main.nf +++ b/main.nf @@ -49,38 +49,26 @@ workflow NFCORE_PHASEIMPUTE { // Initialise input channels // - input_impute = Channel.empty() - input_simulate = Channel.empty() - input_validate = Channel.empty() + ch_input_impute = Channel.empty() + ch_input_simulate = Channel.empty() + ch_input_validate = Channel.empty() if (params.step.split(',').contains("impute")) { - input_impute = ch_input - .combine(ch_regions) - .map { metaI, file, index, metaCR, region -> - [ metaI+metaCR, file, index ] - } + ch_input_impute = ch_input } else if (params.step.split(',').contains("simulate") || params.step.split(',').contains("all")) { - input_simulate = ch_input + ch_input_simulate = ch_input } else if (params.step.split(',').contains("validate")) { - input_validate = ch_input - .combine(ch_regions) - .map { metaI, file, index, metaCR, region -> - [ metaI+metaCR, file, index ] - } + ch_input_validate = ch_input ch_input_truth = ch_input_truth - .combine(ch_regions) - .map { metaI, file, index, metaCR, region -> - [ metaI+metaCR, file, index ] - } } // // WORKFLOW: Run pipeline // PHASEIMPUTE ( - input_impute, - input_simulate, - input_validate, + ch_input_impute, + ch_input_simulate, + ch_input_validate, ch_input_truth, ch_fasta, ch_panel, diff --git a/subworkflows/local/compute_gl/main.nf b/subworkflows/local/compute_gl/main.nf index b3f6b8cc..fd3b0f63 100644 --- a/subworkflows/local/compute_gl/main.nf +++ b/subworkflows/local/compute_gl/main.nf @@ -5,7 +5,7 @@ include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/an workflow COMPUTE_GL { take: - ch_input // channel: [ [id, chr, region], bam, bai ] + ch_input // channel: [ [id], bam, bai ] ch_target // channel: [ [panel, chr], sites, tsv] ch_fasta // channel: [ [genome], fasta, fai] @@ -15,10 +15,9 @@ workflow COMPUTE_GL { ch_multiqc_files = Channel.empty() ch_mpileup = ch_input - .map{metaICR, bam, bai -> [metaICR.subMap("chr"), metaICR, bam, bai]} - .combine(ch_target.map{metaPC, sites, tsv -> [metaPC.subMap("chr"), metaPC, sites, tsv]}, by:0) - .map{metaC, metaICR, bam, bai, metaPC, sites, tsv -> - [metaICR + metaPC, bam, sites, tsv] + .combine(ch_target) + .map{metaI, bam, bai, metaPC, sites, tsv -> + [metaI + metaPC, bam, sites, tsv] } BCFTOOLS_MPILEUP( diff --git a/subworkflows/local/vcf_concordance_glimpse2/main.nf b/subworkflows/local/vcf_concordance_glimpse2/main.nf index 5e37f710..1fb46575 100644 --- a/subworkflows/local/vcf_concordance_glimpse2/main.nf +++ b/subworkflows/local/vcf_concordance_glimpse2/main.nf @@ -6,8 +6,8 @@ include { GUNZIP } from '../../../modules/nf-core/gunzip' workflow VCF_CONCORDANCE_GLIMPSE2 { take: - ch_vcf_emul // VCF file with imputed genotypes [ [id, chr, region, panel], vcf, csi] - ch_vcf_truth // VCF file with truth genotypes [ [id, chr, region, panel], vcf, csi] + ch_vcf_emul // VCF file with imputed genotypes [ [id], vcf, csi] + ch_vcf_truth // VCF file with truth genotypes [ [id], vcf, csi] ch_vcf_freq // VCF file with panel frequencies [ [panel], vcf, csi] ch_region // Regions to process [ [chr, region], region] diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 0577115b..d7b1a6a5 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -153,11 +153,7 @@ workflow PHASEIMPUTE { .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index -> [metaPC, sites, tsv] } - CONCAT_PANEL(VCF_PHASE_PANEL.out.panel - .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - -> [[id:metaPC.panel], sites, s_index] - } - ) + CONCAT_PANEL(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites) ch_panel_sites = CONCAT_PANEL.out.vcf_tbi_join ch_versions = ch_versions.mix(CONCAT_PANEL.out.versions) From 3fd5bc9ab7835e388b41abf2fe55e9bd3e0dffde Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 20 May 2024 15:12:33 +0200 Subject: [PATCH 034/110] Change chromosomes usage --- conf/steps/imputation_glimpse1.config | 4 +- conf/steps/validation.config | 22 ++--------- subworkflows/local/vcf_phase_panel/main.nf | 12 ------ workflows/phaseimpute/main.nf | 45 +++++++++------------- 4 files changed, 24 insertions(+), 59 deletions(-) diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index db6f2813..fcf4777b 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -28,12 +28,12 @@ process { "-Aim", "-C alleles" ].join(' ') - ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.call" } + ext.prefix = { "${meta.id}.call" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:BCFTOOLS_ANNOTATE' { ext.args = "--set-id '%CHROM:%POS:%REF:%ALT' -Oz" - ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.annotate" } + ext.prefix = { "${meta.id}.annotate" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:BCFTOOLS_INDEX' { diff --git a/conf/steps/validation.config b/conf/steps/validation.config index e826f6fb..ce55ba5a 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -17,6 +17,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:.*' { publishDir = [ enabled: false ] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_MPILEUP' { ext.args = [ "-I", @@ -27,35 +28,18 @@ process { "-Aim", "-C alleles" ].join(' ') - ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}_truth.call" } + ext.prefix = { "${meta.id}_truth.call" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_ANNOTATE' { ext.args = ["--set-id '%CHROM:%POS:%REF:%ALT'", "-Oz"].join(' ') - ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.annotate" } + ext.prefix = { "${meta.id}.annotate" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_INDEX' { ext.args = "--tbi" } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:.*' { - ext.prefix = { "${meta.id}_truth_concat" } - publishDir = [ - path: { "${params.outdir}/validation/concat" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_CONCAT' { - ext.args = ["--ligate", "--output-type z",].join(' ') - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_INDEX' { - ext.args = "--tbi" - } - // Validation subworkflow withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:.*' { publishDir = [ diff --git a/subworkflows/local/vcf_phase_panel/main.nf b/subworkflows/local/vcf_phase_panel/main.nf index fd006506..108a1c8c 100644 --- a/subworkflows/local/vcf_phase_panel/main.nf +++ b/subworkflows/local/vcf_phase_panel/main.nf @@ -3,9 +3,6 @@ include { VCF_PHASE_SHAPEIT5 } from '../../../subworkflows/n workflow VCF_PHASE_PANEL { take: ch_vcf // channel: [ [id, chr, region], vcf, index ] - ch_panel_norm // channel: [ [panel, chr], norm, index ] - ch_panel_sites // channel: [ [panel, chr], sites, index ] - ch_panel_tsv // channel: [ [panel, chr], tsv, index ] main: @@ -25,16 +22,7 @@ workflow VCF_PHASE_PANEL { ch_panel_phased = ch_vcf } - ch_panel = ch_panel_norm - .combine(ch_panel_sites, by: 0) - .combine(ch_panel_tsv, by: 0) - .combine(ch_panel_phased, by: 0) - .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - -> [[panel:metaPC.id, chr:metaPC.chr ], norm, n_index, sites, s_index, tsv, t_index, phased, p_index] - } - emit: vcf_tbi = ch_panel_phased // channel: [ [id, chr], vcf, index ] - panel = ch_panel // channel: [ [panel, chr], norm, n_index, sites, s_index, tsv, t_index, phased, p_index ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index d7b1a6a5..1edafa65 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -142,25 +142,24 @@ workflow PHASEIMPUTE { // If required, phase panel (currently not working, a test should be added) // Phase panel with tool of choice (e.g. SHAPEIT5) - VCF_PHASE_PANEL(VCF_SITES_EXTRACT_BCFTOOLS.out.vcf_tbi, - VCF_SITES_EXTRACT_BCFTOOLS.out.vcf_tbi, - VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites, - VCF_SITES_EXTRACT_BCFTOOLS.out.panel_tsv) + VCF_PHASE_PANEL(VCF_SITES_EXTRACT_BCFTOOLS.out.vcf_tbi) ch_versions = ch_versions.mix(VCF_PHASE_PANEL.out.versions) + ch_panel = VCF_SITES_EXTRACT_BCFTOOLS.out.vcf_tbi + .join(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites) + .join(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_tsv) + .join(VCF_PHASE_PANEL.out.vcf_tbi) + // Generate channels (to be simplified) - ch_panel_sites_tsv = VCF_PHASE_PANEL.out.panel - .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - -> [metaPC, sites, tsv] - } + ch_panel_sites_tsv = ch_panel + .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index + -> [metaPC, sites, tsv] + } CONCAT_PANEL(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites) - ch_panel_sites = CONCAT_PANEL.out.vcf_tbi_join ch_versions = ch_versions.mix(CONCAT_PANEL.out.versions) - ch_panel_phased = VCF_PHASE_PANEL.out.panel - .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - -> [metaPC, phased, p_index] - } + ch_panel_sites = CONCAT_PANEL.out.vcf_tbi_join + ch_panel_phased = VCF_PHASE_PANEL.out.vcf_tbi // Create chunks from reference VCF VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map) @@ -173,7 +172,7 @@ workflow PHASEIMPUTE { if (params.tools.split(',').contains("glimpse1")) { println "Impute with Glimpse1" // Glimpse1 subworkflow - GL_INPUT( // Compute GL for input data once per panel + GL_INPUT( // Compute GL for input data once per panel by chromosome ch_input_impute, ch_panel_sites_tsv, ch_fasta @@ -183,14 +182,14 @@ workflow PHASEIMPUTE { impute_input = GL_INPUT.out.vcf // [metaIPC, vcf, index] .map {metaIPC, vcf, index -> [metaIPC.subMap("panel", "chr"), metaIPC, vcf, index] } - .combine(ch_panel_phased, by: 0) + .join(ch_panel_phased) .combine(Channel.of([[]])) .map { metaPC, metaIPC, vcf, index, panel, p_index, sample -> [metaPC.subMap("chr"), metaIPC, vcf, index, panel, p_index, sample]} - .combine(ch_region - .map {metaCR, region -> [metaCR.subMap("chr"), metaCR, region]}, - by: 0) - .combine(ch_map, by: 0) + .join(ch_region + .map {metaCR, region -> [metaCR.subMap("chr"), metaCR, region]} + ) + .join(ch_map) .map{ metaC, metaIPC, vcf, index, panel, p_index, sample, metaCR, region, map -> [metaIPC+metaCR.subMap("Region"), vcf, index, sample, region, panel, p_index, map] @@ -233,7 +232,6 @@ workflow PHASEIMPUTE { } - if (params.tools.split(',').contains("stitch")) { print("Impute with STITCH") @@ -266,7 +264,6 @@ workflow PHASEIMPUTE { ch_input_validate = ch_input_validate.mix(CONCAT_STITCH.out.vcf_tbi_join) } - if (params.tools.split(',').contains("quilt")) { print("Impute with QUILT") @@ -307,14 +304,10 @@ workflow PHASEIMPUTE { ch_multiqc_files = ch_multiqc_files.mix(GL_TRUTH.out.multiqc_files) ch_versions = ch_versions.mix(GL_TRUTH.out.versions) - // Concatenate by chromosomes - CONCAT_TRUTH(GL_TRUTH.out.vcf) - ch_versions = ch_versions.mix(CONCAT_TRUTH.out.versions) - // Mix the original vcf and the computed vcf ch_truth_vcf = ch_truth.vcf .map { [it[0], it[1], it[2]] } - .mix(CONCAT_TRUTH.out.vcf_tbi_join) + .mix(GL_TRUTH.out.vcf) // Compute concordance analysis VCF_CONCORDANCE_GLIMPSE2( From 844808df4faffdd3a83c9dff1b6f65b9d55ec1db Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 20 May 2024 19:48:20 +0200 Subject: [PATCH 035/110] Fix chromosome usage --- conf/steps/imputation_quilt.config | 4 ++-- conf/test_all.config | 2 +- docs/development.md | 1 + subworkflows/local/bam_impute_quilt/main.nf | 25 +++++++++------------ subworkflows/local/compute_gl/main.nf | 4 ++-- workflows/phaseimpute/main.nf | 2 +- 6 files changed, 17 insertions(+), 21 deletions(-) diff --git a/conf/steps/imputation_quilt.config b/conf/steps/imputation_quilt.config index 98706137..3df6e284 100644 --- a/conf/steps/imputation_quilt.config +++ b/conf/steps/imputation_quilt.config @@ -21,7 +21,7 @@ process { // Impute quilt withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:QUILT_QUILT' { - ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.impute" } + ext.prefix = { "${meta.id}_C${meta.chr}.impute" } publishDir = [enabled: false] } @@ -33,7 +33,7 @@ process { // Annotate quilt imputed VCFs withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT:BCFTOOLS_ANNOTATE' { ext.args = "--set-id '%CHROM:%POS:%REF:%ALT' -Oz" - ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.impute.annotate" } + ext.prefix = { "${meta.id}_C${meta.chr}.impute.annotate" } publishDir = [ enabled: false ] } diff --git a/conf/test_all.config b/conf/test_all.config index fcd7e356..031c7e39 100644 --- a/conf/test_all.config +++ b/conf/test_all.config @@ -31,5 +31,5 @@ params { map = "${projectDir}/tests/csv/map.csv" step = "all" - tools = "glimpse1" + tools = "quilt" } diff --git a/docs/development.md b/docs/development.md index 1c32398f..770c0ac2 100644 --- a/docs/development.md +++ b/docs/development.md @@ -35,6 +35,7 @@ nextflow run main.nf -profile singularity,test_quilt --outdir results -resume ```bash nf-test test --verbose --profile singularity --tag test_all +nf-test test --verbose --profile singularity --tag test_all --update-snap #To update the snaps of a given test ``` ## Problematic diff --git a/subworkflows/local/bam_impute_quilt/main.nf b/subworkflows/local/bam_impute_quilt/main.nf index 077df87a..742f4aeb 100644 --- a/subworkflows/local/bam_impute_quilt/main.nf +++ b/subworkflows/local/bam_impute_quilt/main.nf @@ -7,8 +7,8 @@ include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bc workflow BAM_IMPUTE_QUILT { take: + ch_input // channel: [ [id], bam, bai ] ch_hap_legend // channel: [ [panel, chr], hap, legend ] - ch_input // channel: [ [id, chr, region], bam, bai ] ch_chunks // channel: [ [panel, chr], start_coordinate, end_coordinate, number ] @@ -26,26 +26,21 @@ workflow BAM_IMPUTE_QUILT { ngen = params.ngen buffer = params.buffer - // Rename panel to id - ch_chunks = ch_chunks.map{meta, chr, start, end -> return[['id': meta.panel, 'chr': meta.chr], chr, start, end]} - if (genetic_map_file.isEmpty()) { - ch_hap_chunks = ch_hap_legend.combine(ch_chunks, by:0).map { it + ngen + buffer + [[]] } + ch_hap_chunks = ch_hap_legend.join(ch_chunks).map { it + ngen + buffer + [[]] } } else { // Add ngen and buffer + genetic map file (untested) - ch_hap_chunks = ch_hap_legend.join(ch_chunks, by:0).join(genetic_map_file) + ch_hap_chunks = ch_hap_legend.join(ch_chunks).join(genetic_map_file) } ch_quilt = ch_input - .map{ metaICR, bam, bai -> [metaICR.subMap("chr"), metaICR, bam, bai]} - .combine(ch_hap_chunks - .map{ metaPC, hap, legend, chr, start, end, ngen, buffer, gmap -> - [metaPC.subMap("chr"), metaPC, hap, legend, chr, start, end, ngen, buffer, gmap] - }, by:0 - ) + .combine(ch_hap_chunks) .map { - metaC, metaICR, bam, bai, metaPC, hap, legend, chr, start, end, ngen, buffer, gmap -> - [metaICR + ["panel": metaPC.id], bam, bai, hap, legend, chr, start, end, ngen, buffer, gmap] + metaIC, bam, bai, metaPC, hap, legend, chr, start, end, ngen, buffer, gmap -> + [ + metaIC.subMap("id") + ["panel": metaPC.id, "chr": metaPC.chr], + bam, bai, hap, legend, chr, start, end, ngen, buffer, gmap + ] } // Run QUILT @@ -71,6 +66,6 @@ workflow BAM_IMPUTE_QUILT { ch_vcf_tbi = BCFTOOLS_ANNOTATE.out.vcf.join(BCFTOOLS_INDEX_2.out.tbi) emit: - vcf_tbi = ch_vcf_tbi // channel: [ [id, panel, chr, region], vcf, tbi ] + vcf_tbi = ch_vcf_tbi // channel: [ [id, panel], vcf, tbi ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/compute_gl/main.nf b/subworkflows/local/compute_gl/main.nf index fd3b0f63..8afe1a70 100644 --- a/subworkflows/local/compute_gl/main.nf +++ b/subworkflows/local/compute_gl/main.nf @@ -17,7 +17,7 @@ workflow COMPUTE_GL { ch_mpileup = ch_input .combine(ch_target) .map{metaI, bam, bai, metaPC, sites, tsv -> - [metaI + metaPC, bam, sites, tsv] + [metaI + ["panel": metaPC.id, "chr": metaPC.chr], bam, sites, tsv] } BCFTOOLS_MPILEUP( @@ -45,7 +45,7 @@ workflow COMPUTE_GL { ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_MPILEUP.out.stats.map{ it[1] }) emit: - vcf = ch_output // channel: [ [id, panel, chr, region], vcf, tbi ] + vcf = ch_output // channel: [ [id, panel, chr], vcf, tbi ] versions = ch_versions // channel: [ versions.yml ] multiqc_files = ch_multiqc_files } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 1edafa65..f8dde8ed 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -268,7 +268,7 @@ workflow PHASEIMPUTE { print("Impute with QUILT") // Impute BAMs with QUILT - BAM_IMPUTE_QUILT(VCF_NORMALIZE_BCFTOOLS.out.hap_legend, ch_input_impute, VCF_CHUNK_GLIMPSE.out.chunks_quilt) + BAM_IMPUTE_QUILT(ch_input_impute, VCF_NORMALIZE_BCFTOOLS.out.hap_legend, VCF_CHUNK_GLIMPSE.out.chunks_quilt) ch_versions = ch_versions.mix(BAM_IMPUTE_QUILT.out.versions) // Add to output channel From 1510b760e13b6e456304531d6f1fe77aceecd7b9 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 14 May 2024 19:32:02 +0000 Subject: [PATCH 036/110] add chunks params --- assets/chr_22_sample_chunks.txt | 28 +++++++++++++++++++ assets/schema_chunks.json | 24 ++++++++++++++++ main.nf | 3 ++ nextflow_schema.json | 6 +++- .../utils_nfcore_phaseimpute_pipeline/main.nf | 15 ++++++++++ tests/csv/chunks.csv | 2 ++ workflows/phaseimpute/main.nf | 1 + 7 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 assets/chr_22_sample_chunks.txt create mode 100644 assets/schema_chunks.json create mode 100644 tests/csv/chunks.csv diff --git a/assets/chr_22_sample_chunks.txt b/assets/chr_22_sample_chunks.txt new file mode 100644 index 00000000..140ae705 --- /dev/null +++ b/assets/chr_22_sample_chunks.txt @@ -0,0 +1,28 @@ +0 chr22 chr22:10519276-12275757 chr22:10519276-12037013 1517738 28060 +1 chr22 chr22:11837008-15527118 chr22:12037015-15327085 3290071 28059 +2 chr22 chr22:12901674-17385277 chr22:15327100-17185274 1858175 56117 +3 chr22 chr22:16985310-19055470 chr22:17185309-18855417 1670109 28060 +4 chr22 chr22:18652607-20295493 chr22:18855435-20095485 1240051 28058 +5 chr22 chr22:19895555-21483821 chr22:20095577-21283696 1188120 28059 +6 chr22 chr22:21083688-22696916 chr22:21283708-22496899 1213192 28057 +7 chr22 chr22:22296905-24767355 chr22:22496904-24567322 2070419 56118 +8 chr22 chr22:24367358-25828350 chr22:24567383-25628198 1060816 28059 +9 chr22 chr22:25428329-27109082 chr22:25628335-26909081 1280747 28058 +10 chr22 chr22:26709031-28352205 chr22:26909086-28152066 1242981 28059 +11 chr22 chr22:27952368-29815483 chr22:28152378-29615446 1463069 28058 +12 chr22 chr22:29415446-31242420 chr22:29615475-31042403 1426929 28059 +13 chr22 chr22:30842323-32584404 chr22:31042413-32384346 1341934 28057 +14 chr22 chr22:32184303-33841365 chr22:32384361-33641357 1256997 28060 +15 chr22 chr22:33441326-34995392 chr22:33641388-34795293 1153906 28058 +16 chr22 chr22:34595199-36301899 chr22:34795389-36101899 1306511 28059 +17 chr22 chr22:35901810-37463533 chr22:36101997-37263419 1161423 28058 +18 chr22 chr22:37063419-38704958 chr22:37263425-38504928 1241504 28060 +19 chr22 chr22:38304943-39948058 chr22:38504951-39748047 1243097 28058 +20 chr22 chr22:39548081-41459454 chr22:39748134-41259435 1511302 28059 +21 chr22 chr22:41059335-42754140 chr22:41259436-42554131 1294696 28057 +22 chr22 chr22:42354063-43871119 chr22:42554203-43671018 1116816 28060 +23 chr22 chr22:43471051-44991834 chr22:43671067-44791832 1120766 28058 +24 chr22 chr22:44591757-46111616 chr22:44791836-45911609 1119774 28059 +25 chr22 chr22:45711548-47115293 chr22:45911625-46915275 1003651 28057 +26 chr22 chr22:46715160-49093641 chr22:46915277-48893636 1978360 56117 +27 chr22 chr22:48693630-50807843 chr22:48893661-50807843 1914183 56116 diff --git a/assets/schema_chunks.json b/assets/schema_chunks.json new file mode 100644 index 00000000..b3cd7382 --- /dev/null +++ b/assets/schema_chunks.json @@ -0,0 +1,24 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/phaseimpute/master/assets/schema_chunks.json", + "title": "nf-core/phaseimpute pipeline - params.chunks schema", + "description": "Schema for the file provided with params.chunks", + "type": "array", + "items": { + "type": "object", + "properties": { + "chr": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Chromosome name must be provided as a string and cannot contain spaces", + "meta": ["chr"] + }, + "file": { + "type": "string", + "pattern": "^\\S+\\.(txt|bin)$", + "errorMessage": "File with chunks per chromosome must be provided. Must have .txt or .bin extension" + } + }, + "required": ["chr", "file"] + } +} diff --git a/main.nf b/main.nf index d5fc2571..6350ea32 100644 --- a/main.nf +++ b/main.nf @@ -41,6 +41,7 @@ workflow NFCORE_PHASEIMPUTE { ch_depth // channel: depth of coverage file [[depth], depth] ch_map // channel: map file for imputation ch_posfile // channel: samplesheet read in from --posfile + ch_chunks // channel: samplesheet read in from --chunks ch_versions // channel: versions of software used main: @@ -76,6 +77,7 @@ workflow NFCORE_PHASEIMPUTE { ch_depth, ch_map, ch_posfile, + ch_chunks, ch_versions ) @@ -119,6 +121,7 @@ workflow { PIPELINE_INITIALISATION.out.depth, PIPELINE_INITIALISATION.out.map, PIPELINE_INITIALISATION.out.posfile, + PIPELINE_INITIALISATION.out.chunks, PIPELINE_INITIALISATION.out.versions ) diff --git a/nextflow_schema.json b/nextflow_schema.json index f4e79f20..36e2feeb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -127,7 +127,11 @@ "properties": { "chunks": { "type": "string", - "description": "Path to comma-separated file containing tab-separated files with the genomic chunks to be used for imputation." + "description": "Path to comma-separated file containing tab-separated files with the genomic chunks to be used for imputation.", + "format": "file-path", + "mimetype": "text/csv", + "schema": "/assets/schema_chunks.json", + "pattern": "^\\S+\\.csv$" } } }, diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 652d0c0e..f00558dc 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -216,6 +216,20 @@ workflow PIPELINE_INITIALISATION { ch_posfile = [[]] } + // + // Create chunks channel + // + + if (params.chunks) { + ch_chunks = Channel + .fromSamplesheet("chunks") + .map { + meta, file -> + [ meta, file ] + }} else { + ch_chunks = [[]] + } + emit: input = ch_input // [ [meta], file, index ] input_truth = ch_input_truth // [ [meta], file, index ] @@ -225,6 +239,7 @@ workflow PIPELINE_INITIALISATION { regions = ch_regions // [ [chr, region], region ] map = ch_map // [ [map], map ] posfile = ch_posfile // [ [chr], txt ] + chunks = ch_chunks // [ [chr], txt ] versions = ch_versions } diff --git a/tests/csv/chunks.csv b/tests/csv/chunks.csv new file mode 100644 index 00000000..35e818c6 --- /dev/null +++ b/tests/csv/chunks.csv @@ -0,0 +1,2 @@ +chr,file +chr22,"assets/chr_22_sample_chunks.txt" diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index f8dde8ed..3d6b7065 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -76,6 +76,7 @@ workflow PHASEIMPUTE { ch_depth // channel: depth select [ [depth], depth ] ch_map // channel: genetic map [ [chr], map] ch_posfile // channel: posfile [ [chr], txt] + ch_chunks // channel: chunks [ [chr], txt] ch_versions // channel: versions of software used main: From c7d8b740b472bffa1fd148fd8cb9c36d9fc9b28d Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 15 May 2024 14:19:48 +0000 Subject: [PATCH 037/110] add remote file --- tests/csv/chunks.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/csv/chunks.csv b/tests/csv/chunks.csv index 35e818c6..1a7e2a08 100644 --- a/tests/csv/chunks.csv +++ b/tests/csv/chunks.csv @@ -1,2 +1,2 @@ chr,file -chr22,"assets/chr_22_sample_chunks.txt" +chr22,"https://github.com/nf-core/test-datasets/raw/f0622133837c9494a965d6cd3957629d4c7415f0/data/panel/22/chr22_chunks_glimpse1.txt" From f7b0e469f076ccdce910538e93624857f9d2b26e Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Thu, 16 May 2024 16:12:36 +0000 Subject: [PATCH 038/110] add ch_chunks to pipeline --- workflows/phaseimpute/main.nf | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 3d6b7065..2bba031a 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -168,6 +168,13 @@ workflow PHASEIMPUTE { } if (params.step.split(',').contains("impute") || params.step.split(',').contains("all")) { + + // if (params.chunks) { + // ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} + // .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) + // .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} + // // Use channel ch_chunks for GLIMPSE1 imputation + // } // Output channel of input process ch_impute_output = Channel.empty() if (params.tools.split(',').contains("glimpse1")) { @@ -226,7 +233,10 @@ workflow PHASEIMPUTE { // ch_chunks, // ch_fasta) // } else if (params.chunks) { - // // use provided chunks + // ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} + // .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) + // .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} + // Use channel ch_chunks for GLIMPSE2 imputation // } else { // error "Either no reference panel was included or you did not set step --panelprep or you did not provide --chunks" // } @@ -268,6 +278,19 @@ workflow PHASEIMPUTE { if (params.tools.split(',').contains("quilt")) { print("Impute with QUILT") + //Use previous chunks if --step panelprep + if (params.panel && params.step.split(',').contains("panelprep") && !params.chunks) { + ch_chunks_quilt = VCF_CHUNK_GLIMPSE.out.chunks_quilt + //Use provided chunks if --chunks + } else if (params.chunks) { + ch_chunks_quilt = ch_chunks.map { chr, txt -> [chr, file(txt)]} + .splitText() + .map { metamap, line -> + def fields = line.split("\t") + def startEnd = fields[2].split(':')[1].split('-') + [metamap, metamap.chr, startEnd[0], startEnd[1]] + } + } // Impute BAMs with QUILT BAM_IMPUTE_QUILT(ch_input_impute, VCF_NORMALIZE_BCFTOOLS.out.hap_legend, VCF_CHUNK_GLIMPSE.out.chunks_quilt) ch_versions = ch_versions.mix(BAM_IMPUTE_QUILT.out.versions) From 3a1159bf72c5d355e16aeb72e3a7b6bbc6c8dd51 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 17 May 2024 11:11:25 +0000 Subject: [PATCH 039/110] update schema and sample file --- assets/chr_22_sample_chunks.txt | 56 ++++++++++++++++----------------- assets/schema_chunks.json | 8 ++++- tests/csv/chunks.csv | 4 +-- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/assets/chr_22_sample_chunks.txt b/assets/chr_22_sample_chunks.txt index 140ae705..df989ac7 100644 --- a/assets/chr_22_sample_chunks.txt +++ b/assets/chr_22_sample_chunks.txt @@ -1,28 +1,28 @@ -0 chr22 chr22:10519276-12275757 chr22:10519276-12037013 1517738 28060 -1 chr22 chr22:11837008-15527118 chr22:12037015-15327085 3290071 28059 -2 chr22 chr22:12901674-17385277 chr22:15327100-17185274 1858175 56117 -3 chr22 chr22:16985310-19055470 chr22:17185309-18855417 1670109 28060 -4 chr22 chr22:18652607-20295493 chr22:18855435-20095485 1240051 28058 -5 chr22 chr22:19895555-21483821 chr22:20095577-21283696 1188120 28059 -6 chr22 chr22:21083688-22696916 chr22:21283708-22496899 1213192 28057 -7 chr22 chr22:22296905-24767355 chr22:22496904-24567322 2070419 56118 -8 chr22 chr22:24367358-25828350 chr22:24567383-25628198 1060816 28059 -9 chr22 chr22:25428329-27109082 chr22:25628335-26909081 1280747 28058 -10 chr22 chr22:26709031-28352205 chr22:26909086-28152066 1242981 28059 -11 chr22 chr22:27952368-29815483 chr22:28152378-29615446 1463069 28058 -12 chr22 chr22:29415446-31242420 chr22:29615475-31042403 1426929 28059 -13 chr22 chr22:30842323-32584404 chr22:31042413-32384346 1341934 28057 -14 chr22 chr22:32184303-33841365 chr22:32384361-33641357 1256997 28060 -15 chr22 chr22:33441326-34995392 chr22:33641388-34795293 1153906 28058 -16 chr22 chr22:34595199-36301899 chr22:34795389-36101899 1306511 28059 -17 chr22 chr22:35901810-37463533 chr22:36101997-37263419 1161423 28058 -18 chr22 chr22:37063419-38704958 chr22:37263425-38504928 1241504 28060 -19 chr22 chr22:38304943-39948058 chr22:38504951-39748047 1243097 28058 -20 chr22 chr22:39548081-41459454 chr22:39748134-41259435 1511302 28059 -21 chr22 chr22:41059335-42754140 chr22:41259436-42554131 1294696 28057 -22 chr22 chr22:42354063-43871119 chr22:42554203-43671018 1116816 28060 -23 chr22 chr22:43471051-44991834 chr22:43671067-44791832 1120766 28058 -24 chr22 chr22:44591757-46111616 chr22:44791836-45911609 1119774 28059 -25 chr22 chr22:45711548-47115293 chr22:45911625-46915275 1003651 28057 -26 chr22 chr22:46715160-49093641 chr22:46915277-48893636 1978360 56117 -27 chr22 chr22:48693630-50807843 chr22:48893661-50807843 1914183 56116 +0 chr22 chr22:10519276-12275757 chr22:10519276-12037013 1517738 28060 +1 chr22 chr22:11837008-15527118 chr22:12037015-15327085 3290071 28059 +2 chr22 chr22:12901674-17385277 chr22:15327100-17185274 1858175 56117 +3 chr22 chr22:16985310-19055470 chr22:17185309-18855417 1670109 28060 +4 chr22 chr22:18652607-20295493 chr22:18855435-20095485 1240051 28058 +5 chr22 chr22:19895555-21483821 chr22:20095577-21283696 1188120 28059 +6 chr22 chr22:21083688-22696916 chr22:21283708-22496899 1213192 28057 +7 chr22 chr22:22296905-24767355 chr22:22496904-24567322 2070419 56118 +8 chr22 chr22:24367358-25828350 chr22:24567383-25628198 1060816 28059 +9 chr22 chr22:25428329-27109082 chr22:25628335-26909081 1280747 28058 +10 chr22 chr22:26709031-28352205 chr22:26909086-28152066 1242981 28059 +11 chr22 chr22:27952368-29815483 chr22:28152378-29615446 1463069 28058 +12 chr22 chr22:29415446-31242420 chr22:29615475-31042403 1426929 28059 +13 chr22 chr22:30842323-32584404 chr22:31042413-32384346 1341934 28057 +14 chr22 chr22:32184303-33841365 chr22:32384361-33641357 1256997 28060 +15 chr22 chr22:33441326-34995392 chr22:33641388-34795293 1153906 28058 +16 chr22 chr22:34595199-36301899 chr22:34795389-36101899 1306511 28059 +17 chr22 chr22:35901810-37463533 chr22:36101997-37263419 1161423 28058 +18 chr22 chr22:37063419-38704958 chr22:37263425-38504928 1241504 28060 +19 chr22 chr22:38304943-39948058 chr22:38504951-39748047 1243097 28058 +20 chr22 chr22:39548081-41459454 chr22:39748134-41259435 1511302 28059 +21 chr22 chr22:41059335-42754140 chr22:41259436-42554131 1294696 28057 +22 chr22 chr22:42354063-43871119 chr22:42554203-43671018 1116816 28060 +23 chr22 chr22:43471051-44991834 chr22:43671067-44791832 1120766 28058 +24 chr22 chr22:44591757-46111616 chr22:44791836-45911609 1119774 28059 +25 chr22 chr22:45711548-47115293 chr22:45911625-46915275 1003651 28057 +26 chr22 chr22:46715160-49093641 chr22:46915277-48893636 1978360 56117 +27 chr22 chr22:48693630-50807843 chr22:48893661-50807843 1914183 56116 diff --git a/assets/schema_chunks.json b/assets/schema_chunks.json index b3cd7382..a295d003 100644 --- a/assets/schema_chunks.json +++ b/assets/schema_chunks.json @@ -7,6 +7,12 @@ "items": { "type": "object", "properties": { + "panel": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Panel name must be provided as a string and cannot contain spaces", + "meta": ["panel"] + }, "chr": { "type": "string", "pattern": "^\\S+$", @@ -19,6 +25,6 @@ "errorMessage": "File with chunks per chromosome must be provided. Must have .txt or .bin extension" } }, - "required": ["chr", "file"] + "required": ["panel", "chr", "file"] } } diff --git a/tests/csv/chunks.csv b/tests/csv/chunks.csv index 1a7e2a08..473031ee 100644 --- a/tests/csv/chunks.csv +++ b/tests/csv/chunks.csv @@ -1,2 +1,2 @@ -chr,file -chr22,"https://github.com/nf-core/test-datasets/raw/f0622133837c9494a965d6cd3957629d4c7415f0/data/panel/22/chr22_chunks_glimpse1.txt" +panel,chr,file +1000GP.s.norel,chr22,"https://github.com/nf-core/test-datasets/raw/phaseimpute/data/panel/22/chr22_chunks_glimpse1.txt" From ce8646491bb316e36799a578ac7e4d372e85f936 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 17 May 2024 22:20:03 +0000 Subject: [PATCH 040/110] indent --- .../local/utils_nfcore_phaseimpute_pipeline/main.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index f00558dc..29fdeee8 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -221,13 +221,13 @@ workflow PIPELINE_INITIALISATION { // if (params.chunks) { - ch_chunks = Channel - .fromSamplesheet("chunks") - .map { - meta, file -> - [ meta, file ] + ch_chunks = Channel + .fromSamplesheet("chunks") + .map { + meta, file -> + [ meta, file ] }} else { - ch_chunks = [[]] + ch_chunks = [[]] } emit: From 78a4ef3b7de332ef620f4745a994311d57467b0a Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 17 May 2024 22:20:17 +0000 Subject: [PATCH 041/110] remove file --- assets/chr_22_sample_chunks.txt | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 assets/chr_22_sample_chunks.txt diff --git a/assets/chr_22_sample_chunks.txt b/assets/chr_22_sample_chunks.txt deleted file mode 100644 index df989ac7..00000000 --- a/assets/chr_22_sample_chunks.txt +++ /dev/null @@ -1,28 +0,0 @@ -0 chr22 chr22:10519276-12275757 chr22:10519276-12037013 1517738 28060 -1 chr22 chr22:11837008-15527118 chr22:12037015-15327085 3290071 28059 -2 chr22 chr22:12901674-17385277 chr22:15327100-17185274 1858175 56117 -3 chr22 chr22:16985310-19055470 chr22:17185309-18855417 1670109 28060 -4 chr22 chr22:18652607-20295493 chr22:18855435-20095485 1240051 28058 -5 chr22 chr22:19895555-21483821 chr22:20095577-21283696 1188120 28059 -6 chr22 chr22:21083688-22696916 chr22:21283708-22496899 1213192 28057 -7 chr22 chr22:22296905-24767355 chr22:22496904-24567322 2070419 56118 -8 chr22 chr22:24367358-25828350 chr22:24567383-25628198 1060816 28059 -9 chr22 chr22:25428329-27109082 chr22:25628335-26909081 1280747 28058 -10 chr22 chr22:26709031-28352205 chr22:26909086-28152066 1242981 28059 -11 chr22 chr22:27952368-29815483 chr22:28152378-29615446 1463069 28058 -12 chr22 chr22:29415446-31242420 chr22:29615475-31042403 1426929 28059 -13 chr22 chr22:30842323-32584404 chr22:31042413-32384346 1341934 28057 -14 chr22 chr22:32184303-33841365 chr22:32384361-33641357 1256997 28060 -15 chr22 chr22:33441326-34995392 chr22:33641388-34795293 1153906 28058 -16 chr22 chr22:34595199-36301899 chr22:34795389-36101899 1306511 28059 -17 chr22 chr22:35901810-37463533 chr22:36101997-37263419 1161423 28058 -18 chr22 chr22:37063419-38704958 chr22:37263425-38504928 1241504 28060 -19 chr22 chr22:38304943-39948058 chr22:38504951-39748047 1243097 28058 -20 chr22 chr22:39548081-41459454 chr22:39748134-41259435 1511302 28059 -21 chr22 chr22:41059335-42754140 chr22:41259436-42554131 1294696 28057 -22 chr22 chr22:42354063-43871119 chr22:42554203-43671018 1116816 28060 -23 chr22 chr22:43471051-44991834 chr22:43671067-44791832 1120766 28058 -24 chr22 chr22:44591757-46111616 chr22:44791836-45911609 1119774 28059 -25 chr22 chr22:45711548-47115293 chr22:45911625-46915275 1003651 28057 -26 chr22 chr22:46715160-49093641 chr22:46915277-48893636 1978360 56117 -27 chr22 chr22:48693630-50807843 chr22:48893661-50807843 1914183 56116 From 5046cf5b799d6ab051896314e3fdaae990ab2cc4 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 17 May 2024 22:20:36 +0000 Subject: [PATCH 042/110] add warnings when --panel and other params are used together --- workflows/phaseimpute/main.nf | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 2bba031a..ee98c4fc 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -169,6 +169,13 @@ workflow PHASEIMPUTE { if (params.step.split(',').contains("impute") || params.step.split(',').contains("all")) { + if (params.panel && params.chunks) { + log.warn("Both `--chunks` and `--panel` have been provided. Provided `--chunks` will override `--panel` generated chunks in `--impute` mode.") + } + if (params.panel && params.posfile) { + log.warn("Both `--posfile` and `--panel` have been provided. Provided `--posfile` will override `--panel` generated posfile in `--impute` mode.") + } + // if (params.chunks) { // ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} // .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) @@ -284,12 +291,12 @@ workflow PHASEIMPUTE { //Use provided chunks if --chunks } else if (params.chunks) { ch_chunks_quilt = ch_chunks.map { chr, txt -> [chr, file(txt)]} - .splitText() - .map { metamap, line -> - def fields = line.split("\t") - def startEnd = fields[2].split(':')[1].split('-') - [metamap, metamap.chr, startEnd[0], startEnd[1]] - } + .splitText() + .map { metamap, line -> + def fields = line.split("\t") + def startEnd = fields[2].split(':')[1].split('-') + [metamap, metamap.chr, startEnd[0], startEnd[1]] + } } // Impute BAMs with QUILT BAM_IMPUTE_QUILT(ch_input_impute, VCF_NORMALIZE_BCFTOOLS.out.hap_legend, VCF_CHUNK_GLIMPSE.out.chunks_quilt) From 37588feb79f36e5642bc062c135cb0ff3b3551fa Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 17 May 2024 22:20:44 +0000 Subject: [PATCH 043/110] add impute docs --- docs/usage.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index d8741e45..202e23ab 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -165,6 +165,8 @@ For starting from the imputation step, the required flags are: - `--input input.csv`: The samplesheet containing the input sample files in `bam` format. - `--panel samplesheet_reference.csv`: The files in `samplesheet_reference.csv` are the filtered, quality controlled, bi-allelic VCFs obtained from `--step panelprep`. - `--tools [glimpse1, quilt, stitch]`: A selection of one or more of the available imputation tools. Each imputation tool has their own set of specific flags and input files. These are produced by `--step panelprep`. +- `--chunks chunks.csv`: A samplesheet containing chunks per chromosome. These are produced by `--step panelprep` using `GLIMPSE1`. +- `--posfile posfile.csv`: A samplesheet containing a TSV with the list of positions to genotype per chromosome. These are required by tools (for STITCH/GLIMPSE1). The posfile can be generated with `--step panelprep`. You can find an overview of the results produced by this steps in the [Output](output.md). From 23ee31dd62927dd7cd8d56a56376cbfc82811960 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 17 May 2024 22:32:31 +0000 Subject: [PATCH 044/110] merge branch dev --- conf/steps/validation.config | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/conf/steps/validation.config b/conf/steps/validation.config index ce55ba5a..e8f5f8ce 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -40,6 +40,23 @@ process { ext.args = "--tbi" } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:.*' { + ext.prefix = { "${meta.id}_truth_concat" } + publishDir = [ + path: { "${params.outdir}/validation/concat" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_CONCAT' { + ext.args = ["--ligate", "--output-type z",].join(' ') + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_INDEX' { + ext.args = "--tbi" + } + // Validation subworkflow withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:.*' { publishDir = [ @@ -55,6 +72,11 @@ process { publishDir = [ enabled: false ] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK' { + ext.args = "'(NR == 1) || (FNR > 1)'" // Skip header line + ext.suffix = { "txt" } + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GUNZIP' { publishDir = [ enabled: false ] } From 2c570de8337b82d1c8cc3c7a8a138c59f3ffc35e Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 18 May 2024 22:30:31 +0000 Subject: [PATCH 045/110] add test panelprep --- nextflow.config | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 0cb6d0dd..e4e1ba29 100644 --- a/nextflow.config +++ b/nextflow.config @@ -224,7 +224,9 @@ profiles { test_all { includeConfig 'conf/test_all.config' } test_quilt { includeConfig 'conf/test_quilt.config' } test_stitch { includeConfig 'conf/test_stitch.config' } - test_glimpse2 { includeConfig 'conf/test_glimpse2.config' } + test_glimpse2 { includeConfig 'conf/test_glimpse2.config' } + test_panelprep { includeConfig 'conf/test_panelprep.config'} + } From 8d3135c7416025b4a9633866c2d8ea7634c355ce Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 18 May 2024 22:30:48 +0000 Subject: [PATCH 046/110] add test panelprep --- conf/test_panelprep.config | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 conf/test_panelprep.config diff --git a/conf/test_panelprep.config b/conf/test_panelprep.config new file mode 100644 index 00000000..8b1a9f3f --- /dev/null +++ b/conf/test_panelprep.config @@ -0,0 +1,29 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_panelprep, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check panel prepation step' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '2.GB' + max_time = '1.h' + + // Genome references + fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" + panel = "${projectDir}/tests/csv/panel.csv" + phased = true + + // Step + step = "panelprep" +} From 691b792641a59ce0db69d838f3b82c1296c3b48a Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 18 May 2024 22:36:33 +0000 Subject: [PATCH 047/110] separate steps from tools --- conf/test.config | 4 +++- conf/test_all.config | 5 ++++- conf/test_full.config | 8 ++++++-- conf/test_glimpse2.config | 4 +++- conf/test_panelprep.config | 2 +- conf/test_quilt.config | 4 +++- conf/test_sim.config | 2 ++ conf/test_stitch.config | 4 +++- conf/test_validate.config | 2 ++ 9 files changed, 27 insertions(+), 8 deletions(-) diff --git a/conf/test.config b/conf/test.config index 8a7e5331..85ab9cd6 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,7 +28,9 @@ params { panel = "${projectDir}/tests/csv/panel.csv" phased = true - // Impute parameters + // Pipeline steps step = "panelprep,impute" + + // Impute tools tools = "glimpse1" } diff --git a/conf/test_all.config b/conf/test_all.config index 031c7e39..1d5d27dc 100644 --- a/conf/test_all.config +++ b/conf/test_all.config @@ -30,6 +30,9 @@ params { phased = true map = "${projectDir}/tests/csv/map.csv" + // Pipeline steps step = "all" - tools = "quilt" + + // Impute tools + tools = "glimpse1" } diff --git a/conf/test_full.config b/conf/test_full.config index aaa8cb0f..b0991b91 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -26,9 +26,13 @@ params { // Input data input = "${projectDir}/tests/csv/sample_sim_full.csv" panel = "${projectDir}/tests/csv/panel_full.csv" + + // Pipeline steps step = "all" - // Settings - tools = "glimpse1" + // Panelprep optional args remove_samples = "NA12878,NA12891,NA12892" + + // Impute tools + tools = "glimpse1" } diff --git a/conf/test_glimpse2.config b/conf/test_glimpse2.config index d6823a6d..de645512 100644 --- a/conf/test_glimpse2.config +++ b/conf/test_glimpse2.config @@ -27,7 +27,9 @@ params { panel = "${projectDir}/tests/csv/panel.csv" phased = true - // Impute parameters + // Pipeline steps step = "panelprep,impute" + + // Impute tools tools = "glimpse2" } diff --git a/conf/test_panelprep.config b/conf/test_panelprep.config index 8b1a9f3f..4b5ec319 100644 --- a/conf/test_panelprep.config +++ b/conf/test_panelprep.config @@ -24,6 +24,6 @@ params { panel = "${projectDir}/tests/csv/panel.csv" phased = true - // Step + // Pipeline steps step = "panelprep" } diff --git a/conf/test_quilt.config b/conf/test_quilt.config index decaf5ec..acc6e718 100644 --- a/conf/test_quilt.config +++ b/conf/test_quilt.config @@ -28,7 +28,9 @@ params { panel = "${projectDir}/tests/csv/panel.csv" phased = true - // Impute parameters + // Pipeline steps step = "panelprep,impute" + + // Impute tools tools = "quilt" } diff --git a/conf/test_sim.config b/conf/test_sim.config index d19d3aa7..cd011798 100644 --- a/conf/test_sim.config +++ b/conf/test_sim.config @@ -26,5 +26,7 @@ params { // Genome references fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" + + // Pipeline steps step = "simulate" } diff --git a/conf/test_stitch.config b/conf/test_stitch.config index 1ef0f62f..97ae8ae0 100644 --- a/conf/test_stitch.config +++ b/conf/test_stitch.config @@ -27,7 +27,9 @@ params { fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" posfile = "${projectDir}/tests/csv/posfile.csv" - // Impute parameters + // Pipeline steps step = "impute" + + // Impute tools tools = "stitch" } diff --git a/conf/test_validate.config b/conf/test_validate.config index d47a4dbb..56da9785 100644 --- a/conf/test_validate.config +++ b/conf/test_validate.config @@ -29,5 +29,7 @@ params { panel = "${projectDir}/tests/csv/panel.csv" phased = true map = "${projectDir}/tests/csv/map.csv" + + // Pipeline steps step = "validate" } From 720bcdae40548864aff936c93129c1adeadd3a82 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 18 May 2024 22:37:54 +0000 Subject: [PATCH 048/110] add docs --- docs/usage.md | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 202e23ab..fec18f83 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -80,10 +80,10 @@ or you can specify a custom genome using: ## Running the pipeline -The typical command for running the pipeline is as follows: +The typical command for running the pre-processing of the panel and imputation of samples is as follows: ```bash -nextflow run nf-core/phaseimpute --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute --input samplesheet.csv --outdir results --genome GRCh37 -profile docker --step panelprep,impute ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -127,7 +127,7 @@ You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-c ### Running the pipeline -Phaseimpute can be started at different points in the analysis by setting the flag `--step` and the available options `[simulate, panelprep, impute, validate, all]`. You can also run several steps simultaneously by listing the required processes as `--step panelprep,impute` or you can choose to run all steps sequentially by using `--step all`. +nf-core/phaseimpute can be started at different points in the analysis by setting the flag `--step` and the available options `[simulate, panelprep, impute, validate, all]`. You can also run several steps simultaneously by listing the required processes as `--step panelprep,impute` or you can choose to run all steps sequentially by using `--step all`. ### Start with simulation `--step simulate` @@ -142,8 +142,9 @@ The required flags for this mode are: - `--step simulate`: The step to run. - `--input samplesheet.csv`: The samplesheet containing the input sample files in `bam` format. - `--depth`: The final depth of the file [default: 1]. +- `--genome` or `--fasta`: The reference genome of the samples. -You can find an overview of the results produced by this steps in the [Output](output.md). +You can find an overview of the results produced by this step in the [Output](output.md). ### Start with panel preparation `--step panelprep` @@ -155,7 +156,7 @@ For starting from panel preparation, the required flags are `--step panelprep` a nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --step panelprep --outdir results --genome GRCh37 -profile docker ``` -You can find an overview of the results produced by this steps in the [Output](output.md). +You can find an overview of the results produced by this step in the [Output](output.md). ### Start with imputation `--step impute` @@ -163,12 +164,13 @@ For starting from the imputation step, the required flags are: - `--step impute` - `--input input.csv`: The samplesheet containing the input sample files in `bam` format. -- `--panel samplesheet_reference.csv`: The files in `samplesheet_reference.csv` are the filtered, quality controlled, bi-allelic VCFs obtained from `--step panelprep`. -- `--tools [glimpse1, quilt, stitch]`: A selection of one or more of the available imputation tools. Each imputation tool has their own set of specific flags and input files. These are produced by `--step panelprep`. -- `--chunks chunks.csv`: A samplesheet containing chunks per chromosome. These are produced by `--step panelprep` using `GLIMPSE1`. -- `--posfile posfile.csv`: A samplesheet containing a TSV with the list of positions to genotype per chromosome. These are required by tools (for STITCH/GLIMPSE1). The posfile can be generated with `--step panelprep`. +- `--genome` or `--fasta`: The reference genome of the samples. +- `--tools [glimpse1, quilt, stitch]`: A selection of one or more of the available imputation tools. Each imputation tool has their own set of specific flags and input files. These required files are produced by `--step panelprep` and used as input in: + - `--chunks chunks.csv`: A samplesheet containing chunks per chromosome. These are produced by `--step panelprep` using `GLIMPSE1`. + - `--posfile posfile.csv`: A samplesheet containing a TSV with the list of positions to genotype per chromosome. These are required by tools (for STITCH/GLIMPSE1). The posfile can be generated with `--step panelprep`. + - `--panel panel.csv`: A samplesheet containing the post-processed VCF. This is required by GLIMPSE1. This file can be obtained with `--step panelprep`. -You can find an overview of the results produced by this steps in the [Output](output.md). +You can find an overview of the results produced by this step in the [Output](output.md). ### Imputation tools `--step impute --tools [glimpse1, quilt, stitch]` @@ -176,8 +178,27 @@ You can choose different software to perform the imputation. In the following se #### QUILT +[QUILT](https://github.com/rwdavies/QUILT) is an R and C++ program for rapid genotype imputation from low-coverage sequence using a large reference panel. The required inputs for this program are bam samples provided in the input samplesheet (`--input`) and a csv file with the genomic chunks (`--chunks`). + +```bash +nextflow run nf-core/phaseimpute --input samplesheet.csv --chunks chunks.csv --step impute --tool quilt --outdir results --genome GRCh37 -profile docker +``` + +The csv provided in `--chunks` must contain two columns [chr, file]. The first column is the chromosome and the file column are txt with the chunks produced by GLIMPSE1, unique to each chromosome. + +```console +chr,file +chr1,chunks_chr1.txt +chr2,chunks_chr2.txt +chr3,chunks_chr3.txt +``` + +The file column should contain a TSV obtained from GLIMPSE1 with the following [structure] (https://github.com/nf-core/test-datasets/blob/phaseimpute/data/panel/22/chr22_chunks_glimpse1.txt). + +If you do not have a csv with chunks, you can provide a reference panel to run the `--step panelprep` which produces a csv with these chunks, which is then used as input for QUILT. You can choose to run both steps sequentially as `--step panelprep,impute` or simply collect the files produced by `--step panelprep`. + ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --step impute --tool quilt --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute --input samplesheet.csv --step panelprep,impute --panel samplesheet_reference.csv --outdir results --genome GRCh37 -profile docker --tools quilt ``` #### STITCH @@ -218,8 +239,10 @@ chr22 16570211 T C #### GLIMPSE1 +[GLIMPSE1](https://github.com/odelaneau/GLIMPSE/tree/glimpse1) is a set of tools for phasing and imputation for low-coverage sequencing datasets. This is an example command to run this tool from the `--step impute`: + ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --step impute --tool glimpse1 --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --step impute --tool glimpse1 --outdir results --genome GRCh37 -profile docker --posfile posfile.csv --chunks chunks.csv ``` ### Start with validation `--step validate` From 2cdd0e68f5ec9231b18c393b210dbe082027b8b2 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 18 May 2024 22:42:56 +0000 Subject: [PATCH 049/110] add assertions for required params --- .../utils_nfcore_phaseimpute_pipeline/main.nf | 39 ++++++++++++++++--- workflows/phaseimpute/main.nf | 36 ++++++++++++----- 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 29fdeee8..d33d6d50 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -108,15 +108,18 @@ workflow PIPELINE_INITIALISATION { // // Create channel from input file provided through params.input // - ch_input = Channel + if (params.input) { + ch_input = Channel .fromSamplesheet("input") .map { meta, file, index -> [ meta, file, index ] } - - // Check if all extension are identical - getAllFilesExtension(ch_input) + // Check if all extension are identical + getAllFilesExtension(ch_input) + } else { + ch_input = Channel.of([[],[]]) + } // // Create channel from input file provided through params.input_truth // @@ -302,9 +305,35 @@ def validateInputParameters() { assert params.step, "A step must be provided" // Check that at least one tool is provided - if (params.step.split(',').contains("impute") || params.step.split(',').contains("panelprep")) { + if (params.step.split(',').contains("impute")) { assert params.tools, "No tools provided" } + + // Check that input is provided for all steps, except panelprep + if (params.step.split(',').contains("all") || params.step.split(',').contains("impute") || params.step.split(',').contains("simulate") || params.step.split(',').contains("validate")) { + assert params.input, "No input provided" + } + + // Check that posfile and chunks are provided when running impute only. Steps with panelprep generate those files. + if (params.step.split(',').contains("impute") && !params.step.split(',').find { it in ["all", "panelprep"] }) { + // Required by all tools except glimpse2 + if (!params.tools.split(',').contains("glimpse2")) { + assert params.posfile, "No posfile provided for impute" + } + // Required by all tools except STITCH + if (!params.tools.split(',').contains("stitch")) { + assert params.chunks, "No chunks provided for impute" + } + } + + // Emit a warning if both panel and (chunks || posfile) are used as input + if (params.panel && params.chunks) { + log.warn("Both `--chunks` and `--panel` have been provided. Provided `--chunks` will override `--panel` generated chunks in `--step impute` mode.") + } + if (params.panel && params.posfile) { + log.warn("Both `--posfile` and `--panel` have been provided. Provided `--posfile` will override `--panel` generated posfile in `--step impute` mode.") + } + } // diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index ee98c4fc..f19b7c46 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -168,20 +168,25 @@ workflow PHASEIMPUTE { } if (params.step.split(',').contains("impute") || params.step.split(',').contains("all")) { - - if (params.panel && params.chunks) { - log.warn("Both `--chunks` and `--panel` have been provided. Provided `--chunks` will override `--panel` generated chunks in `--impute` mode.") - } - if (params.panel && params.posfile) { - log.warn("Both `--posfile` and `--panel` have been provided. Provided `--posfile` will override `--panel` generated posfile in `--impute` mode.") - } - // if (params.chunks) { // ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} // .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) // .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} // // Use channel ch_chunks for GLIMPSE1 imputation // } + + // Params posfile should replace part of ch_panel_sites_tsv (specifically, the .txt) + // The VCF with the sites and post-prepared panel should be used as input in --panel. + + // if (params.posfile) { + // // Use channel ch_posfile for GLIMPSE1 imputation + // ch_panel_sites_tsv = ch_posfile + // } else if (params.panel && params.step.split(',').contains("panelprep") && !params.posfile) { + // ch_panel_sites_tsv = VCF_PHASE_PANEL.out.panel + // .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index + // -> [metaPC, sites, tsv] + // } + //} // Output channel of input process ch_impute_output = Channel.empty() if (params.tools.split(',').contains("glimpse1")) { @@ -285,10 +290,10 @@ workflow PHASEIMPUTE { if (params.tools.split(',').contains("quilt")) { print("Impute with QUILT") - //Use previous chunks if --step panelprep + // Use previous chunks if --step panelprep if (params.panel && params.step.split(',').contains("panelprep") && !params.chunks) { ch_chunks_quilt = VCF_CHUNK_GLIMPSE.out.chunks_quilt - //Use provided chunks if --chunks + // Use provided chunks if --chunks } else if (params.chunks) { ch_chunks_quilt = ch_chunks.map { chr, txt -> [chr, file(txt)]} .splitText() @@ -315,6 +320,17 @@ workflow PHASEIMPUTE { } if (params.step.split(',').contains("validate") || params.step.split(',').contains("all")) { + + // if (params.posfile) { + // Use channel ch_posfile for validation + // ch_panel_sites_tsv = ch_posfile + // } else if (params.panel && params.step.split(',').contains("panelprep") && !params.posfile) { + // ch_panel_sites_tsv = VCF_PHASE_PANEL.out.panel + // .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index + // -> [metaPC, sites, tsv] + // } + //} + ch_truth_vcf = Channel.empty() // Get extension of input files truth_ext = getAllFilesExtension(ch_input_validate_truth) From 6c0f48c67490a8bd7e45255a81b8e9b6838047dd Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sat, 18 May 2024 23:58:37 +0000 Subject: [PATCH 050/110] check params panel in glimpse tools --- .../local/utils_nfcore_phaseimpute_pipeline/main.nf | 8 ++++++-- workflows/phaseimpute/main.nf | 10 +++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index d33d6d50..390959c5 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -318,11 +318,15 @@ def validateInputParameters() { if (params.step.split(',').contains("impute") && !params.step.split(',').find { it in ["all", "panelprep"] }) { // Required by all tools except glimpse2 if (!params.tools.split(',').contains("glimpse2")) { - assert params.posfile, "No posfile provided for impute" + assert params.posfile, "No --posfile provided for impute" } // Required by all tools except STITCH if (!params.tools.split(',').contains("stitch")) { - assert params.chunks, "No chunks provided for impute" + assert params.chunks, "No --chunks provided for impute" + } + // Required by GLIMPSE1 and GLIMPSE2 only + if (params.tools.split(',').contains("glimpse")) { + assert params.panel, "No --panel provided for imputation with GLIMPSE" } } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index f19b7c46..58526b57 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -259,12 +259,12 @@ workflow PHASEIMPUTE { print("Impute with STITCH") // Obtain the user's posfile if provided or calculate it from ref panel file - if (params.posfile ) { // User supplied posfile - ch_posfile = ch_posfile - } else if (params.panel && params.step.split(',').contains("panelprep")) { // Panelprep posfile + if (params.posfile) { // User supplied posfile + ch_posfile = ch_posfile + } else if (params.panel && params.step.split(',').find { it in ["all", "panelprep"] }) { // Panelprep posfile ch_posfile = PREPARE_POSFILE_TSV.out.posfile } else { - error "No posfile or reference panel preparation was included" + error "Error with STITCH imputation. No posfile or reference panel preparation was included" } // Prepare inputs PREPARE_INPUT_STITCH(ch_posfile, ch_fasta, ch_input_impute) @@ -291,7 +291,7 @@ workflow PHASEIMPUTE { print("Impute with QUILT") // Use previous chunks if --step panelprep - if (params.panel && params.step.split(',').contains("panelprep") && !params.chunks) { + if (params.panel && params.step.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { ch_chunks_quilt = VCF_CHUNK_GLIMPSE.out.chunks_quilt // Use provided chunks if --chunks } else if (params.chunks) { From 1cd2c69869dadca2f71a1ce42b42f3441798c6f1 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 00:21:11 +0000 Subject: [PATCH 051/110] check that input truth is provided when running validation --- .../local/utils_nfcore_phaseimpute_pipeline/main.nf | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 390959c5..d81b805b 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -318,16 +318,21 @@ def validateInputParameters() { if (params.step.split(',').contains("impute") && !params.step.split(',').find { it in ["all", "panelprep"] }) { // Required by all tools except glimpse2 if (!params.tools.split(',').contains("glimpse2")) { - assert params.posfile, "No --posfile provided for impute" + assert params.posfile, "No --posfile provided for --step impute" } // Required by all tools except STITCH if (!params.tools.split(',').contains("stitch")) { - assert params.chunks, "No --chunks provided for impute" + assert params.chunks, "No --chunks provided for --step impute" } // Required by GLIMPSE1 and GLIMPSE2 only if (params.tools.split(',').contains("glimpse")) { assert params.panel, "No --panel provided for imputation with GLIMPSE" } + + // Check that input_truth is provided when running validate + if (params.step.split(',').find { it in ["all", "validate"] } ) { + assert params.input_truth, "No --input_truth was provided for --step validate" + } } // Emit a warning if both panel and (chunks || posfile) are used as input From 522f25cbb8aff33977c617e6a38d2c0e9e517d0e Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 00:23:44 +0000 Subject: [PATCH 052/110] test all steps with all tools --- conf/test_all.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_all.config b/conf/test_all.config index 1d5d27dc..9cd023e3 100644 --- a/conf/test_all.config +++ b/conf/test_all.config @@ -34,5 +34,5 @@ params { step = "all" // Impute tools - tools = "glimpse1" + tools = "glimpse1,stitch,quilt" } From 5a9de4ccdad497ac06ae3fba8ea8225548a80476 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 16:38:55 +0000 Subject: [PATCH 053/110] add vcffixup module --- modules.json | 5 ++ .../nf-core/vcflib/vcffixup/environment.yml | 9 +++ modules/nf-core/vcflib/vcffixup/main.nf | 46 ++++++++++++++ modules/nf-core/vcflib/vcffixup/meta.yml | 46 ++++++++++++++ .../vcflib/vcffixup/tests/main.nf.test | 61 +++++++++++++++++++ .../vcflib/vcffixup/tests/main.nf.test.snap | 50 +++++++++++++++ .../nf-core/vcflib/vcffixup/tests/tags.yml | 2 + 7 files changed, 219 insertions(+) create mode 100644 modules/nf-core/vcflib/vcffixup/environment.yml create mode 100644 modules/nf-core/vcflib/vcffixup/main.nf create mode 100644 modules/nf-core/vcflib/vcffixup/meta.yml create mode 100644 modules/nf-core/vcflib/vcffixup/tests/main.nf.test create mode 100644 modules/nf-core/vcflib/vcffixup/tests/main.nf.test.snap create mode 100644 modules/nf-core/vcflib/vcffixup/tests/tags.yml diff --git a/modules.json b/modules.json index 377c0dce..dbb7f48d 100644 --- a/modules.json +++ b/modules.json @@ -173,6 +173,11 @@ "branch": "master", "git_sha": "9502adb23c0b97ed8e616bbbdfa73b4585aec9a1", "installed_by": ["modules"] + }, + "vcflib/vcffixup": { + "branch": "master", + "git_sha": "072cf562e143252953a1b5c5ed38ec55eaa930c8", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/vcflib/vcffixup/environment.yml b/modules/nf-core/vcflib/vcffixup/environment.yml new file mode 100644 index 00000000..d06c4c7b --- /dev/null +++ b/modules/nf-core/vcflib/vcffixup/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "vcflib_vcffixup" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::vcflib=1.0.3" diff --git a/modules/nf-core/vcflib/vcffixup/main.nf b/modules/nf-core/vcflib/vcffixup/main.nf new file mode 100644 index 00000000..43f8ea63 --- /dev/null +++ b/modules/nf-core/vcflib/vcffixup/main.nf @@ -0,0 +1,46 @@ +process VCFLIB_VCFFIXUP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/vcflib:1.0.3--hecb563c_1': + 'biocontainers/vcflib:1.0.3--hecb563c_1' }" + + input: + tuple val(meta), path(vcf), path(tbi) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + """ + vcffixup \\ + $vcf | bgzip -c $args > ${prefix}_fixed.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcflib: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.3' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.fixup.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcflib: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/vcflib/vcffixup/meta.yml b/modules/nf-core/vcflib/vcffixup/meta.yml new file mode 100644 index 00000000..be7eac6a --- /dev/null +++ b/modules/nf-core/vcflib/vcffixup/meta.yml @@ -0,0 +1,46 @@ +name: "vcflib_vcffixup" +description: Generates a VCF stream where AC and NS have been generated for each record using sample genotypes. +keywords: + - vcf + - vcflib + - vcflib/vcffixup + - AC/NS/AF +tools: + - "vcflib": + description: "Command-line tools for manipulating VCF files" + homepage: https://github.com/vcflib/vcflib + documentation: https://github.com/vcflib/vcflib#USAGE + doi: "10.1101/2021.05.21.445151" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Compressed VCF file + pattern: "*.{.vcf.gz,vcf}" + - tbi: + type: file + description: Index of VCF file + pattern: "*.vcf.gz.tbi" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/modules/nf-core/vcflib/vcffixup/tests/main.nf.test b/modules/nf-core/vcflib/vcffixup/tests/main.nf.test new file mode 100644 index 00000000..f819eef6 --- /dev/null +++ b/modules/nf-core/vcflib/vcffixup/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process VCFLIB_VCFFIXUP" + script "../main.nf" + process "VCFLIB_VCFFIXUP" + + tag "modules" + tag "modules_nfcore" + tag "vcflib" + tag "vcflib/vcffixup" + + test("sarscov2 - vcf") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - vcf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + + +} diff --git a/modules/nf-core/vcflib/vcffixup/tests/main.nf.test.snap b/modules/nf-core/vcflib/vcffixup/tests/main.nf.test.snap new file mode 100644 index 00000000..957d8354 --- /dev/null +++ b/modules/nf-core/vcflib/vcffixup/tests/main.nf.test.snap @@ -0,0 +1,50 @@ +{ + "sarscov2 - vcf": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_fixed.vcf.gz:md5,639ca71cddc7f5444f4376cdf474007e" + ] + ], + "1": [ + "versions.yml:md5,fac118ed81e4ca76d3a75c2f47f4c4ce" + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_fixed.vcf.gz:md5,639ca71cddc7f5444f4376cdf474007e" + ] + ], + "versions": [ + "versions.yml:md5,fac118ed81e4ca76d3a75c2f47f4c4ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-13T18:48:21.949531776" + }, + "sarscov2 - vcf - stub": { + "content": [ + "test.fixup.vcf.gz", + [ + "versions.yml:md5,fac118ed81e4ca76d3a75c2f47f4c4ce" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-13T18:48:32.365674014" + } +} \ No newline at end of file diff --git a/modules/nf-core/vcflib/vcffixup/tests/tags.yml b/modules/nf-core/vcflib/vcffixup/tests/tags.yml new file mode 100644 index 00000000..51cf4712 --- /dev/null +++ b/modules/nf-core/vcflib/vcffixup/tests/tags.yml @@ -0,0 +1,2 @@ +vcflib/vcffixup: + - "modules/nf-core/vcflib/vcffixup/**" From f067b2fa3b03a05ec1dd4148c3bcb2730e417f8c Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 16:39:13 +0000 Subject: [PATCH 054/110] add glimpse2 config --- conf/steps/imputation_glimpse2.config | 62 +++++++++++++++++++++++++++ nextflow.config | 1 + 2 files changed, 63 insertions(+) create mode 100644 conf/steps/imputation_glimpse2.config diff --git a/conf/steps/imputation_glimpse2.config b/conf/steps/imputation_glimpse2.config new file mode 100644 index 00000000..b5091593 --- /dev/null +++ b/conf/steps/imputation_glimpse2.config @@ -0,0 +1,62 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:.*' { + publishDir = [ + path: { "${params.outdir}/imputation/glimpse2/" }, + mode: params.publish_dir_mode, + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:VCFLIB_VCFFIXUP' { + ext.prefix = { "${meta.panel}_${meta.chr}" } + publishDir = [enabled: false] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_1' { + ext.args = "--tbi" + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE' { + ext.args = "--keep-monomorphic-ref-sites" + ext.suffix = "vcf.gz" + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2' { + ext.args = "--tbi" + publishDir = [ enabled: false ] + } + + // Concatenate the imputed chunks + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE2:.*' { + publishDir = [ + path: { "${params.outdir}/imputation/glimpse2/concat" }, + mode: params.publish_dir_mode, + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE2:BCFTOOLS_CONCAT' { + ext.args = ["--ligate", "--output-type z"].join(' ') + ext.prefix = { "${meta.id}_glimpse2" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE2:BCFTOOLS_INDEX' { + ext.args = "--tbi" + ext.prefix = { "${meta.id}_glimpse2" } + } + +} diff --git a/nextflow.config b/nextflow.config index e4e1ba29..31ff6faf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -311,6 +311,7 @@ includeConfig 'conf/steps/panel_prep.config' includeConfig 'conf/steps/imputation_glimpse1.config' includeConfig 'conf/steps/imputation_quilt.config' includeConfig 'conf/steps/imputation_stitch.config' +includeConfig 'conf/steps/imputation_glimpse2.config' // validation step includeConfig 'conf/steps/validation.config' From b5baf8771998591f8ff8c29e459b3b51061b1830 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 16:39:54 +0000 Subject: [PATCH 055/110] add glimpse2 imputation --- .../local/vcf_impute_glimpse2/main.nf | 37 ++++++++++++++----- workflows/phaseimpute/main.nf | 37 +++++++++++-------- 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/subworkflows/local/vcf_impute_glimpse2/main.nf b/subworkflows/local/vcf_impute_glimpse2/main.nf index f5e9275e..1ccff8f5 100644 --- a/subworkflows/local/vcf_impute_glimpse2/main.nf +++ b/subworkflows/local/vcf_impute_glimpse2/main.nf @@ -1,13 +1,16 @@ -include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase' -include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate' -include { BCFTOOLS_INDEX as INDEX_PHASE } from '../../../modules/nf-core/bcftools/index' -include { BCFTOOLS_INDEX as INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index' +include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase' +include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate' +include { BCFTOOLS_INDEX as INDEX_PHASE } from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_INDEX as INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index' +include { VCFLIB_VCFFIXUP } from '../../../modules/nf-core/vcflib/vcffixup/main' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' workflow VCF_IMPUTE_GLIMPSE2 { take: - ch_input // channel (mandatory): [ [id], vcf, csi, infos ] - ch_panel // channel (mandatory): [ [panel, chr, region], vcf, csi, region ] + ch_input // channel (mandatory): [ [id], bam, bai ] + ch_panel // channel (mandatory): [ [panel, chr, region], vcf, tbi ] ch_chunks // channel (optional): [ [chr], region1, region2 ] ch_fasta // channel (mandatory): [ [genome], fa, fai ] @@ -23,6 +26,15 @@ workflow VCF_IMPUTE_GLIMPSE2 { // Add chr as key to input ch_input = ch_input.map{meta, bam, bai -> return[['chr': meta.chr], meta, bam, bai]} + // Fix panel (AC/AN INFO fields in VCF are inconsistent with GT field) + VCFLIB_VCFFIXUP(ch_panel) + + // Index fixed panel + BCFTOOLS_INDEX_1(VCFLIB_VCFFIXUP.out.vcf) + + // Join fixed vcf and tbi + ch_panel = VCFLIB_VCFFIXUP.out.vcf.join(BCFTOOLS_INDEX_1.out.tbi) + // Join chunks and panel ch_chunks_panel = ch_chunks.join(ch_panel) @@ -38,10 +50,17 @@ workflow VCF_IMPUTE_GLIMPSE2 { // Remove chr key ch_input_glimpse2 = ch_input_glimpse2.map{ it[1..-1] } - //Impute with Glimpse2 - GLIMPSE2_PHASE(ch_input_glimpse2, ch_fasta) // Error: AC/AN INFO fields in VCF are inconsistent with GT field, update the values in the VCF + // Impute with Glimpse2 + GLIMPSE2_PHASE(ch_input_glimpse2, ch_fasta) ch_versions = ch_versions.mix(GLIMPSE2_PHASE.out.versions) + // Index phased file + BCFTOOLS_INDEX_2(GLIMPSE2_PHASE.out.phased_variants) + + // Join imputed and index files + ch_imputed_vcf_tbi = GLIMPSE2_PHASE.out.phased_variants.join(BCFTOOLS_INDEX_2.out.tbi) + emit: - versions = ch_versions // channel: [ versions.yml ] + vcf_tbi = ch_imputed_vcf_tbi // [ [id, chr, region], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 58526b57..5a5e6582 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -38,6 +38,7 @@ include { VCF_CONCATENATE_BCFTOOLS as CONCAT_GLIMPSE1} from '../../subworkflows/ // GLIMPSE2 subworkflows include { VCF_IMPUTE_GLIMPSE2 } from '../../subworkflows/local/vcf_impute_glimpse2' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_GLIMPSE2} from '../../subworkflows/local/vcf_concatenate_bcftools' // QUILT subworkflows include { VCF_CHUNK_GLIMPSE } from '../../subworkflows/local/vcf_chunk_glimpse' @@ -234,26 +235,32 @@ workflow PHASEIMPUTE { } if (params.tools.split(',').contains("glimpse2")) { - error "Glimpse2 not yet implemented" // Use previous chunks if --step panelprep - // if (params.panel && params.step.split(',').contains("panelprep") && !params.chunks) { - // ch_chunks = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 - - // VCF_IMPUTE_GLIMPSE2(ch_input_impute, - // ch_panel_phased, - // ch_chunks, - // ch_fasta) - // } else if (params.chunks) { - // ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} - // .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) - // .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} + if (params.panel && params.step.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { + ch_chunks = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 + } else if (params.chunks) { + ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} + .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) + .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} // Use channel ch_chunks for GLIMPSE2 imputation - // } else { - // error "Either no reference panel was included or you did not set step --panelprep or you did not provide --chunks" - // } + } else { + error "Either no reference panel was included or you did not set step --panelprep or you did not provide --chunks" + } + // Run imputation + VCF_IMPUTE_GLIMPSE2(ch_input_impute, + ch_panel_phased, + ch_chunks, + ch_fasta) + // Concatenate by chromosomes + CONCAT_GLIMPSE2(VCF_IMPUTE_GLIMPSE2.out.vcf_tbi) + ch_versions = ch_versions.mix(CONCAT_GLIMPSE2.out.versions) + + // Add results to input validate + ch_input_validate = ch_input_validate.mix(CONCAT_GLIMPSE2.out.vcf_tbi_join) + ch_input_validate.dump(tag:"ch_input_validate") } if (params.tools.split(',').contains("stitch")) { print("Impute with STITCH") From 549307ac3a270009ae8400140e953a3b9a9d52d6 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 16:42:17 +0000 Subject: [PATCH 056/110] allow glimpse2 phase output as vcf.gz --- modules.json | 3 ++- modules/nf-core/glimpse2/phase/glimpse2-phase.diff | 14 ++++++++++++++ modules/nf-core/glimpse2/phase/main.nf | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 modules/nf-core/glimpse2/phase/glimpse2-phase.diff diff --git a/modules.json b/modules.json index dbb7f48d..f816fb4c 100644 --- a/modules.json +++ b/modules.json @@ -98,7 +98,8 @@ "glimpse2/phase": { "branch": "master", "git_sha": "9c71d32e372650e8bb3e1fb15339017aad5e3f7f", - "installed_by": ["multiple_impute_glimpse2"] + "installed_by": ["multiple_impute_glimpse2"], + "patch": "modules/nf-core/glimpse2/phase/glimpse2-phase.diff" }, "glimpse2/splitreference": { "branch": "master", diff --git a/modules/nf-core/glimpse2/phase/glimpse2-phase.diff b/modules/nf-core/glimpse2/phase/glimpse2-phase.diff new file mode 100644 index 00000000..0b622041 --- /dev/null +++ b/modules/nf-core/glimpse2/phase/glimpse2-phase.diff @@ -0,0 +1,14 @@ +Changes in module 'nf-core/glimpse2/phase' +--- modules/nf-core/glimpse2/phase/main.nf ++++ modules/nf-core/glimpse2/phase/main.nf +@@ -22,7 +22,7 @@ + tuple val(meta2), path(fasta_reference), path(fasta_reference_index) + + output: +- tuple val(meta), path("*.{vcf,bcf,bgen}"), emit: phased_variants ++ tuple val(meta), path("*.{vcf.gz,bcf,bgen}"), emit: phased_variants + tuple val(meta), path("*.txt.gz") , emit: stats_coverage, optional: true + path "versions.yml" , emit: versions + + +************************************************************ diff --git a/modules/nf-core/glimpse2/phase/main.nf b/modules/nf-core/glimpse2/phase/main.nf index f61cf022..6e07b441 100644 --- a/modules/nf-core/glimpse2/phase/main.nf +++ b/modules/nf-core/glimpse2/phase/main.nf @@ -22,7 +22,7 @@ process GLIMPSE2_PHASE { tuple val(meta2), path(fasta_reference), path(fasta_reference_index) output: - tuple val(meta), path("*.{vcf,bcf,bgen}"), emit: phased_variants + tuple val(meta), path("*.{vcf.gz,bcf,bgen}"), emit: phased_variants tuple val(meta), path("*.txt.gz") , emit: stats_coverage, optional: true path "versions.yml" , emit: versions From 47762ea61861b7132720ad17b7c7d1fdc69dce49 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 16:48:09 +0000 Subject: [PATCH 057/110] add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b189d41e..54cb5e1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#22](https://github.com/nf-core/phaseimpute/pull/20) - Add validation step for concordance analysis. Input channels changed to match inputs steps. Outdir folder organised by steps. Modules config by subworkflows. - [#26](https://github.com/nf-core/phaseimpute/pull/26) - Added QUILT method - [#47](https://github.com/nf-core/phaseimpute/pull/47) - Add possibility to remove samples from reference panel. Add glimpse2 chunking method. Add full-size test parameters. +- [#58](https://github.com/nf-core/phaseimpute/pull/58) - Add external params posfile and chunks. Add glimpse2 phasing and imputation. ### `Changed` From 4770012db6bc0c80adb980ccc0d01b7d5cc6162c Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 16:54:04 +0000 Subject: [PATCH 058/110] collect versions --- subworkflows/local/vcf_impute_glimpse2/main.nf | 3 +++ workflows/phaseimpute/main.nf | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/vcf_impute_glimpse2/main.nf b/subworkflows/local/vcf_impute_glimpse2/main.nf index 1ccff8f5..24a9f248 100644 --- a/subworkflows/local/vcf_impute_glimpse2/main.nf +++ b/subworkflows/local/vcf_impute_glimpse2/main.nf @@ -28,9 +28,11 @@ workflow VCF_IMPUTE_GLIMPSE2 { // Fix panel (AC/AN INFO fields in VCF are inconsistent with GT field) VCFLIB_VCFFIXUP(ch_panel) + ch_versions = ch_versions.mix(VCFLIB_VCFFIXUP.out.versions) // Index fixed panel BCFTOOLS_INDEX_1(VCFLIB_VCFFIXUP.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_1.out.versions) // Join fixed vcf and tbi ch_panel = VCFLIB_VCFFIXUP.out.vcf.join(BCFTOOLS_INDEX_1.out.tbi) @@ -56,6 +58,7 @@ workflow VCF_IMPUTE_GLIMPSE2 { // Index phased file BCFTOOLS_INDEX_2(GLIMPSE2_PHASE.out.phased_variants) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_2.out.versions) // Join imputed and index files ch_imputed_vcf_tbi = GLIMPSE2_PHASE.out.phased_variants.join(BCFTOOLS_INDEX_2.out.tbi) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 5a5e6582..00d757a3 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -253,6 +253,7 @@ workflow PHASEIMPUTE { ch_panel_phased, ch_chunks, ch_fasta) + ch_versions = ch_versions.mix(VCF_IMPUTE_GLIMPSE2.out.versions) // Concatenate by chromosomes CONCAT_GLIMPSE2(VCF_IMPUTE_GLIMPSE2.out.vcf_tbi) @@ -260,7 +261,6 @@ workflow PHASEIMPUTE { // Add results to input validate ch_input_validate = ch_input_validate.mix(CONCAT_GLIMPSE2.out.vcf_tbi_join) - ch_input_validate.dump(tag:"ch_input_validate") } if (params.tools.split(',').contains("stitch")) { print("Impute with STITCH") From 427cd8dd1c5bc3cfb15c6a65d3916e785f4a6c96 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 17:53:21 +0000 Subject: [PATCH 059/110] update warning --- subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index d81b805b..ceed6444 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -336,10 +336,10 @@ def validateInputParameters() { } // Emit a warning if both panel and (chunks || posfile) are used as input - if (params.panel && params.chunks) { + if (params.panel && params.chunks && params.step.split(',').find { it in ["all", "panelprep"]} ) { log.warn("Both `--chunks` and `--panel` have been provided. Provided `--chunks` will override `--panel` generated chunks in `--step impute` mode.") } - if (params.panel && params.posfile) { + if (params.panel && params.posfile && params.step.split(',').find { it in ["all", "panelprep"]} ) { log.warn("Both `--posfile` and `--panel` have been provided. Provided `--posfile` will override `--panel` generated posfile in `--step impute` mode.") } From 774250aca52dba008a419892e2659dfb54e5aa40 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 17:53:35 +0000 Subject: [PATCH 060/110] add glimpse2 to test all config --- conf/test_all.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_all.config b/conf/test_all.config index 9cd023e3..b12e3ed2 100644 --- a/conf/test_all.config +++ b/conf/test_all.config @@ -34,5 +34,5 @@ params { step = "all" // Impute tools - tools = "glimpse1,stitch,quilt" + tools = "glimpse1,glimpse2,stitch,quilt" } From ce5be3ad59e1f711f5ad35874aa048138f1a6144 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 17:54:04 +0000 Subject: [PATCH 061/110] update docs --- docs/output.md | 14 +++++++++++++- docs/usage.md | 44 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/docs/output.md b/docs/output.md index 20b46c2d..7f8e3069 100644 --- a/docs/output.md +++ b/docs/output.md @@ -82,7 +82,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [STITCH](#quilt) - Perform imputation +- [STITCH](#stitch) - Perform imputation - [Concatenate](#concatenate) - Concatenate all imputed chunks into a single VCF ### STITCH @@ -100,6 +100,18 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d [bcftools concat](https://samtools.github.io/bcftools/bcftools.html) will produce a single VCF from a list of imputed VCFs. +## GLIMPSE2 imputation mode + +The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: + +- [GLIMPSE2](#glimpse2) - Perform imputation +- [Concatenate](#concatenate) - Concatenate all imputed chunks into a single VCF + +### GLIMPSE2 output files + +- `imputation/glimpse2/concat` +- `.*.vcf.gz`: Imputed and concatenated VCF for all the input samples. + ## Reports Reports contain useful metrics and pipeline information for the different modes. diff --git a/docs/usage.md b/docs/usage.md index fec18f83..4bd9c6a8 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -156,6 +156,13 @@ For starting from panel preparation, the required flags are `--step panelprep` a nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --step panelprep --outdir results --genome GRCh37 -profile docker ``` +The required flags for this mode are: + +- `--step panelprep`: The step to run. +- `--panel reference.csv`: The samplesheet containing the reference panel files in `vcf.gz` format. +- `--phased`: (optional) Whether the reference panel is phased (true|false). +- `--remove_samples`: (optional) A comma-separated list of samples to remove from the reference. + You can find an overview of the results produced by this step in the [Output](output.md). ### Start with imputation `--step impute` @@ -170,9 +177,7 @@ For starting from the imputation step, the required flags are: - `--posfile posfile.csv`: A samplesheet containing a TSV with the list of positions to genotype per chromosome. These are required by tools (for STITCH/GLIMPSE1). The posfile can be generated with `--step panelprep`. - `--panel panel.csv`: A samplesheet containing the post-processed VCF. This is required by GLIMPSE1. This file can be obtained with `--step panelprep`. -You can find an overview of the results produced by this step in the [Output](output.md). - -### Imputation tools `--step impute --tools [glimpse1, quilt, stitch]` +### Imputation tools `--step impute --tools [glimpse1, glimpse2, quilt, stitch]` You can choose different software to perform the imputation. In the following sections, the typical commands for running the pipeline with each software are included. @@ -239,16 +244,47 @@ chr22 16570211 T C #### GLIMPSE1 -[GLIMPSE1](https://github.com/odelaneau/GLIMPSE/tree/glimpse1) is a set of tools for phasing and imputation for low-coverage sequencing datasets. This is an example command to run this tool from the `--step impute`: +[GLIMPSE1](https://github.com/odelaneau/GLIMPSE/tree/glimpse1) is a set of tools for phasing and imputation for low-coverage sequencing datasets. Recommended for many samples at >0.5x coverage and small reference panels. This is an example command to run this tool from the `--step impute`: ```bash nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --step impute --tool glimpse1 --outdir results --genome GRCh37 -profile docker --posfile posfile.csv --chunks chunks.csv ``` +#### GLIMPSE2 + +[GLIMPSE2](https://github.com/odelaneau/GLIMPSE) is a set of tools for phasing and imputation for low-coverage sequencing datasets. This is an example command to run this tool from the `--step impute`: + +```bash +nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --step impute --tool glimpse2 --outdir results --genome GRCh37 -profile docker --posfile posfile.csv --chunks chunks.csv +``` + ### Start with validation `--step validate` This step compares a _truth_ VCF to an _imputed_ VCF in order to compute imputation accuracy. +```bash +nextflow run nf-core/phaseimpute --input samplesheet.csv --input_truth truth.csv --step validate --outdir results --genome GRCh37 -profile docker +``` + +The required flags for this mode are: + +- `--step validate`: The step to run. +- `--input samplesheet.csv`: The samplesheet containing the input sample files in `vcf` format. +- `--input_truth samplesheet.csv`: The samplesheet containing the truth VCF files in `vcf` format. + +### Run all steps sequentially `--step all` + +This mode runs all the previous steps. This requires several flags: + +- `--step all`: The step to run. +- `--input samplesheet.csv`: The samplesheet containing the input sample files in `bam` format. +- `--depth`: The final depth of the input file [default: 1]. +- `--genome` or `--fasta`: The reference genome of the samples. +- `--tools [glimpse1, glimpse2, quilt, stitch]`: A selection of one or more of the available imputation tools. +- `--panel reference.csv`: The samplesheet containing the reference panel files in `vcf.gz` format. +- `--remove_samples`: (optional) A comma-separated list of samples to remove from the reference. +- `--input_truth samplesheet.csv`: The samplesheet containing the truth VCF files in `vcf` format. + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: From 4ae2a71b2ac24091b0f4f4132011b5b50e43c31b Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 22:13:19 +0000 Subject: [PATCH 062/110] add chunk prepare channel module --- .../local/chunk_prepare_channel/main.nf | 32 ++++++++++ workflows/phaseimpute/main.nf | 63 ++++++++----------- 2 files changed, 57 insertions(+), 38 deletions(-) create mode 100644 subworkflows/local/chunk_prepare_channel/main.nf diff --git a/subworkflows/local/chunk_prepare_channel/main.nf b/subworkflows/local/chunk_prepare_channel/main.nf new file mode 100644 index 00000000..26e388b5 --- /dev/null +++ b/subworkflows/local/chunk_prepare_channel/main.nf @@ -0,0 +1,32 @@ +workflow CHUNK_PREPARE_CHANNEL { + + take: + ch_chunks // channel: [ [id, chr], txt ] + tool + + main: + + ch_versions = Channel.empty() + + if(tool == "glimpse"){ + ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} + .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) + .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} + } + + if(tool = "quilt") { + ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} + .splitText() + .map { metamap, line -> + def fields = line.split("\t") + def startEnd = fields[2].split(':')[1].split('-') + [metamap, metamap.chr, startEnd[0], startEnd[1]] + } + } + + + + emit: + chunks = ch_chunks // channel: [ [meta], regionstart, regionend ] + +} diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 00d757a3..f4c6bb73 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -35,6 +35,7 @@ include { VCF_IMPUTE_GLIMPSE as VCF_IMPUTE_GLIMPSE1 } from '../../subworkflows/ include { COMPUTE_GL as GL_TRUTH } from '../../subworkflows/local/compute_gl' include { COMPUTE_GL as GL_INPUT } from '../../subworkflows/local/compute_gl' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_GLIMPSE1} from '../../subworkflows/local/vcf_concatenate_bcftools' +include { CHUNK_PREPARE_CHANNEL } from '../../subworkflows/local/chunk_prepare_channel' // GLIMPSE2 subworkflows include { VCF_IMPUTE_GLIMPSE2 } from '../../subworkflows/local/vcf_impute_glimpse2' @@ -169,29 +170,28 @@ workflow PHASEIMPUTE { } if (params.step.split(',').contains("impute") || params.step.split(',').contains("all")) { - // if (params.chunks) { - // ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} - // .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) - // .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} - // // Use channel ch_chunks for GLIMPSE1 imputation - // } - - // Params posfile should replace part of ch_panel_sites_tsv (specifically, the .txt) - // The VCF with the sites and post-prepared panel should be used as input in --panel. - - // if (params.posfile) { - // // Use channel ch_posfile for GLIMPSE1 imputation - // ch_panel_sites_tsv = ch_posfile - // } else if (params.panel && params.step.split(',').contains("panelprep") && !params.posfile) { - // ch_panel_sites_tsv = VCF_PHASE_PANEL.out.panel - // .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - // -> [metaPC, sites, tsv] - // } - //} // Output channel of input process ch_impute_output = Channel.empty() + if (params.tools.split(',').contains("glimpse1")) { println "Impute with Glimpse1" + + if (params.chunks) { + ch_chunks = CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse").out.chunks + } + + //Params posfile should replace part of ch_panel_sites_tsv (specifically, the .txt) + //The VCF with the sites and post-prepared panel should be used as input in --panel. + + // if (params.posfile) { + // ch_panel_sites_tsv = ch_posfile + // } else if (params.panel && params.step.split(',').contains("panelprep") && !params.posfile) { + // ch_panel_sites_tsv = VCF_PHASE_PANEL.out.panel + // .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index + // -> [metaPC, sites, tsv] + // } + // } + // Glimpse1 subworkflow GL_INPUT( // Compute GL for input data once per panel by chromosome ch_input_impute, @@ -240,12 +240,7 @@ workflow PHASEIMPUTE { if (params.panel && params.step.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { ch_chunks = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 } else if (params.chunks) { - ch_chunks = ch_chunks.map { chr, txt -> [chr, file(txt)]} - .splitCsv(header: ['ID', 'Chr', 'RegionIn', 'RegionOut', 'Size1', 'Size2'], sep: "\t", skip: 0) - .map { meta, it -> [meta, it["RegionIn"], it["RegionOut"]]} - // Use channel ch_chunks for GLIMPSE2 imputation - } else { - error "Either no reference panel was included or you did not set step --panelprep or you did not provide --chunks" + ch_chunks = CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse").out.chunks } // Run imputation @@ -265,14 +260,11 @@ workflow PHASEIMPUTE { if (params.tools.split(',').contains("stitch")) { print("Impute with STITCH") - // Obtain the user's posfile if provided or calculate it from ref panel file - if (params.posfile) { // User supplied posfile - ch_posfile = ch_posfile - } else if (params.panel && params.step.split(',').find { it in ["all", "panelprep"] }) { // Panelprep posfile + // Get posfile from panelprep step if --posfile not supplied + if (params.panel && params.step.split(',').find { it in ["all", "panelprep"] }) { ch_posfile = PREPARE_POSFILE_TSV.out.posfile - } else { - error "Error with STITCH imputation. No posfile or reference panel preparation was included" } + // Prepare inputs PREPARE_INPUT_STITCH(ch_posfile, ch_fasta, ch_input_impute) ch_versions = ch_versions.mix(PREPARE_INPUT_STITCH.out.versions) @@ -302,14 +294,9 @@ workflow PHASEIMPUTE { ch_chunks_quilt = VCF_CHUNK_GLIMPSE.out.chunks_quilt // Use provided chunks if --chunks } else if (params.chunks) { - ch_chunks_quilt = ch_chunks.map { chr, txt -> [chr, file(txt)]} - .splitText() - .map { metamap, line -> - def fields = line.split("\t") - def startEnd = fields[2].split(':')[1].split('-') - [metamap, metamap.chr, startEnd[0], startEnd[1]] - } + ch_chunks_quilt = CHUNK_PREPARE_CHANNEL(ch_chunks, "quilt").out.chunks } + // Impute BAMs with QUILT BAM_IMPUTE_QUILT(ch_input_impute, VCF_NORMALIZE_BCFTOOLS.out.hap_legend, VCF_CHUNK_GLIMPSE.out.chunks_quilt) ch_versions = ch_versions.mix(BAM_IMPUTE_QUILT.out.versions) From 1523336faac4f3616c7b5e601d60c0574b633749 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 22:13:36 +0000 Subject: [PATCH 063/110] correct empty channels --- .../local/utils_nfcore_phaseimpute_pipeline/main.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index ceed6444..fe81f1d4 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -118,7 +118,7 @@ workflow PIPELINE_INITIALISATION { // Check if all extension are identical getAllFilesExtension(ch_input) } else { - ch_input = Channel.of([[],[]]) + ch_input = Channel.of([[], [], []]) } // // Create channel from input file provided through params.input_truth @@ -216,7 +216,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("posfile") .map {meta, file -> [ meta, file ]} } else { - ch_posfile = [[]] + ch_posfile = [[[],[]]] } // @@ -230,7 +230,7 @@ workflow PIPELINE_INITIALISATION { meta, file -> [ meta, file ] }} else { - ch_chunks = [[]] + ch_chunks = [[[],[]]] } emit: @@ -316,8 +316,8 @@ def validateInputParameters() { // Check that posfile and chunks are provided when running impute only. Steps with panelprep generate those files. if (params.step.split(',').contains("impute") && !params.step.split(',').find { it in ["all", "panelprep"] }) { - // Required by all tools except glimpse2 - if (!params.tools.split(',').contains("glimpse2")) { + // Required by all tools except glimpse2 and quilt + if (!params.tools.split(',').find { it in ["glimpse2", "quilt"] }) { assert params.posfile, "No --posfile provided for --step impute" } // Required by all tools except STITCH From cb0342c53753e00b431c395f5216d98f0e5044f0 Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 22:36:21 +0000 Subject: [PATCH 064/110] move vcffixup to panelprep --- conf/steps/imputation_glimpse2.config | 12 +--------- conf/steps/panel_prep.config | 10 +++++++++ .../local/vcf_impute_glimpse2/main.nf | 22 +++---------------- .../local/vcf_normalize_bcftools/main.nf | 13 +++++++++++ 4 files changed, 27 insertions(+), 30 deletions(-) diff --git a/conf/steps/imputation_glimpse2.config b/conf/steps/imputation_glimpse2.config index b5091593..cf9600b0 100644 --- a/conf/steps/imputation_glimpse2.config +++ b/conf/steps/imputation_glimpse2.config @@ -19,23 +19,13 @@ process { ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:VCFLIB_VCFFIXUP' { - ext.prefix = { "${meta.panel}_${meta.chr}" } - publishDir = [enabled: false] - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_1' { - ext.args = "--tbi" - publishDir = [ enabled: false ] - } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE' { ext.args = "--keep-monomorphic-ref-sites" ext.suffix = "vcf.gz" publishDir = [ enabled: false ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_1' { ext.args = "--tbi" publishDir = [ enabled: false ] } diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index e292dd01..0f874bf9 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -71,6 +71,16 @@ process { publishDir = [enabled: false] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:VCFLIB_VCFFIXUP' { + ext.prefix = { "${meta.panel}_${meta.chr}" } + publishDir = [enabled: false] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX_4' { + ext.args = "--tbi" + publishDir = [ enabled: false ] + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_CONVERT' { ext.args = {"--haplegendsample ${meta.id}_${meta.chr}"} publishDir = [ diff --git a/subworkflows/local/vcf_impute_glimpse2/main.nf b/subworkflows/local/vcf_impute_glimpse2/main.nf index 24a9f248..e36ac002 100644 --- a/subworkflows/local/vcf_impute_glimpse2/main.nf +++ b/subworkflows/local/vcf_impute_glimpse2/main.nf @@ -1,10 +1,5 @@ include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase' -include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate' -include { BCFTOOLS_INDEX as INDEX_PHASE } from '../../../modules/nf-core/bcftools/index' -include { BCFTOOLS_INDEX as INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index' -include { VCFLIB_VCFFIXUP } from '../../../modules/nf-core/vcflib/vcffixup/main' include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' workflow VCF_IMPUTE_GLIMPSE2 { @@ -26,17 +21,6 @@ workflow VCF_IMPUTE_GLIMPSE2 { // Add chr as key to input ch_input = ch_input.map{meta, bam, bai -> return[['chr': meta.chr], meta, bam, bai]} - // Fix panel (AC/AN INFO fields in VCF are inconsistent with GT field) - VCFLIB_VCFFIXUP(ch_panel) - ch_versions = ch_versions.mix(VCFLIB_VCFFIXUP.out.versions) - - // Index fixed panel - BCFTOOLS_INDEX_1(VCFLIB_VCFFIXUP.out.vcf) - ch_versions = ch_versions.mix(BCFTOOLS_INDEX_1.out.versions) - - // Join fixed vcf and tbi - ch_panel = VCFLIB_VCFFIXUP.out.vcf.join(BCFTOOLS_INDEX_1.out.tbi) - // Join chunks and panel ch_chunks_panel = ch_chunks.join(ch_panel) @@ -57,11 +41,11 @@ workflow VCF_IMPUTE_GLIMPSE2 { ch_versions = ch_versions.mix(GLIMPSE2_PHASE.out.versions) // Index phased file - BCFTOOLS_INDEX_2(GLIMPSE2_PHASE.out.phased_variants) - ch_versions = ch_versions.mix(BCFTOOLS_INDEX_2.out.versions) + BCFTOOLS_INDEX_1(GLIMPSE2_PHASE.out.phased_variants) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_1.out.versions) // Join imputed and index files - ch_imputed_vcf_tbi = GLIMPSE2_PHASE.out.phased_variants.join(BCFTOOLS_INDEX_2.out.tbi) + ch_imputed_vcf_tbi = GLIMPSE2_PHASE.out.phased_variants.join(BCFTOOLS_INDEX_1.out.tbi) emit: vcf_tbi = ch_imputed_vcf_tbi // [ [id, chr, region], vcf, tbi ] diff --git a/subworkflows/local/vcf_normalize_bcftools/main.nf b/subworkflows/local/vcf_normalize_bcftools/main.nf index 093d7ce2..677edaa6 100644 --- a/subworkflows/local/vcf_normalize_bcftools/main.nf +++ b/subworkflows/local/vcf_normalize_bcftools/main.nf @@ -2,9 +2,11 @@ include { BCFTOOLS_NORM } from '../../../modules/nf-core include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index' include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_3 } from '../../../modules/nf-core/bcftools/index' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_4 } from '../../../modules/nf-core/bcftools/index' include { BCFTOOLS_VIEW as BCFTOOLS_DEL_MLT_ALL } from '../../../modules/nf-core/bcftools/view' include { BCFTOOLS_VIEW as BCFTOOLS_DEL_SPL } from '../../../modules/nf-core/bcftools/view' include { BCFTOOLS_CONVERT } from '../../../modules/nf-core/bcftools/convert' +include { VCFLIB_VCFFIXUP } from '../../../modules/nf-core/vcflib/vcffixup/main' workflow VCF_NORMALIZE_BCFTOOLS { @@ -50,6 +52,17 @@ workflow VCF_NORMALIZE_BCFTOOLS { ch_biallelic_vcf_tbi = BCFTOOLS_DEL_SPL.out.vcf.join(BCFTOOLS_INDEX_3.out.tbi) } + // Fix panel (AC/AN INFO fields in VCF are inconsistent with GT field) + VCFLIB_VCFFIXUP(ch_biallelic_vcf_tbi) + ch_versions = ch_versions.mix(VCFLIB_VCFFIXUP.out.versions) + + // Index fixed panel + BCFTOOLS_INDEX_4(VCFLIB_VCFFIXUP.out.vcf) + ch_versions = ch_versions.mix(BCFTOOLS_INDEX_4.out.versions) + + // Join fixed vcf and tbi + ch_biallelic_vcf_tbi = VCFLIB_VCFFIXUP.out.vcf.join(BCFTOOLS_INDEX_4.out.tbi) + // Convert VCF to Hap and Legend files BCFTOOLS_CONVERT(ch_biallelic_vcf_tbi, ch_fasta, []) ch_versions = ch_versions.mix(BCFTOOLS_CONVERT.out.versions) From fc65434cb6e78c0cea902d6216ee1e78668c0bcd Mon Sep 17 00:00:00 2001 From: atrigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 19 May 2024 22:48:01 +0000 Subject: [PATCH 065/110] change "step" for "steps" --- README.md | 16 ++-- conf/test.config | 2 +- conf/test_all.config | 2 +- conf/test_full.config | 2 +- conf/test_glimpse2.config | 2 +- conf/test_panelprep.config | 4 +- conf/test_quilt.config | 2 +- conf/test_sim.config | 2 +- conf/test_stitch.config | 2 +- conf/test_validate.config | 2 +- docs/development.md | 2 +- docs/output.md | 14 ++-- docs/usage.md | 84 +++++++++---------- main.nf | 20 +++-- nextflow.config | 4 +- nextflow_schema.json | 6 +- .../utils_nfcore_phaseimpute_pipeline/main.nf | 26 +++--- workflows/phaseimpute/main.nf | 28 +++---- 18 files changed, 114 insertions(+), 106 deletions(-) diff --git a/README.md b/README.md index e3be393e..cb5fb5b7 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ nextflow run nf-core/phaseimpute \ --input \ --genome "GRCh38" \ --panel \ - --step "panelprep,impute" \ + --steps "panelprep,impute" \ --tools "glimpse1" \ --outdir ``` @@ -79,13 +79,13 @@ For more details and further functionality, please refer to the [usage documenta Here is a short description of the different steps of the pipeline. For more information please refer to the [documentation](https://nf-core.github.io/phaseimpute/usage/). -| Step | Flow chart | Description | -| ------------------ | ---------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **Panel prep** | phase_metro | The preprocessing mode is responsible to the preparation of the multiple input file that will be used by the phasing process.
The main processes are :
- **Haplotypes phasing** of the reference panel using [**Shapeit5**](https://odelaneau.github.io/shapeit5/).
- **Filter** the reference panel to select only the necessary variants.
- **Chunking the reference panel** in a subset of region for all the chromosomes.
- **Extract** the positions where to perform the imputation. | -| **Impute** | phase_metro | The imputation mode is the core mode of this pipeline.
It is constituted of 3 main steps:
- **Phasing**: Phasing of the target dataset on the reference panel using either:
  - [**Glimpse1**](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html)
  It's come with the necessety to compute the genotype likelihoods of the target dataset.
  This step is done using [BCFTOOLS_mpileup](https://samtools.github.io/bcftools/bcftools.html#mpileup)
  - [**Glimpse2**](https://odelaneau.github.io/GLIMPSE/glimpse2/index.html) For this step the reference panel is transformed to binary chunks.
  - [**Stitch**](https://github.com/rwdavies/stitch)
  - [**Quilt**](https://github.com/rwdavies/QUILT)
- **Ligation**: all the different chunks are merged together.
- **Sampling** (optional) | -| **Simulate** | simulate_metro | The simulation mode is used to create artificial low informative genetic information from high density data. This allow to compare the imputed result to a _truth_ and therefore evaluate the quality of the imputation.
For the moment it is possible to simulate:
- Low-pass data by **downsample** BAM or CRAM using [SAMTOOLS_view -s]() at different depth
- Genotype data by **SNP selecting** the position used by a designated SNP chip.
The simulation mode will also compute the **Genotype likelihoods** of the high density data. | -| **Validate** | concordance_metro | This mode compare two vcf together to compute a summary of the differences between them.
To do so it use either:
- [**Glimpse1**](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html) concordance process.
- [**Glimpse2**](https://odelaneau.github.io/GLIMPSE/glimpse2/index.html) concordance process
- Or convert the two vcf fill to `.zarr` using [**Scikit allele**](https://scikit-allel.readthedocs.io/en/stable/) and [**anndata**](https://anndata.readthedocs.io/en/latest/) before comparing the SNPs. | -| **Postprocessing** | postprocessing_metro | This final process unable to loop the whole pipeline for increasing the performance of the imputation. To do so it filter out the best imputed position and rerun the analysis using this positions [to be developed]. | +| steps | Flow chart | Description | +| ------------------ | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **Panel prep** | phase_metro | The preprocessing mode is responsible to the preparation of the multiple input file that will be used by the phasing process.
The main processes are :
- **Haplotypes phasing** of the reference panel using [**Shapeit5**](https://odelaneau.github.io/shapeit5/).
- **Filter** the reference panel to select only the necessary variants.
- **Chunking the reference panel** in a subset of region for all the chromosomes.
- **Extract** the positions where to perform the imputation. | +| **Impute** | phase_metro | The imputation mode is the core mode of this pipeline.
It is constituted of 3 main steps:
- **Phasing**: Phasing of the target dataset on the reference panel using either:
  - [**Glimpse1**](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html)
  It's come with the necessety to compute the genotype likelihoods of the target dataset.
  This steps is done using [BCFTOOLS_mpileup](https://samtools.github.io/bcftools/bcftools.html#mpileup)
  - [**Glimpse2**](https://odelaneau.github.io/GLIMPSE/glimpse2/index.html) For this steps the reference panel is transformed to binary chunks.
  - [**Stitch**](https://github.com/rwdavies/stitch)
  - [**Quilt**](https://github.com/rwdavies/QUILT)
- **Ligation**: all the different chunks are merged together.
- **Sampling** (optional) | +| **Simulate** | simulate_metro | The simulation mode is used to create artificial low informative genetic information from high density data. This allow to compare the imputed result to a _truth_ and therefore evaluate the quality of the imputation.
For the moment it is possible to simulate:
- Low-pass data by **downsample** BAM or CRAM using [SAMTOOLS_view -s]() at different depth
- Genotype data by **SNP selecting** the position used by a designated SNP chip.
The simulation mode will also compute the **Genotype likelihoods** of the high density data. | +| **Validate** | concordance_metro | This mode compare two vcf together to compute a summary of the differences between them.
To do so it use either:
- [**Glimpse1**](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html) concordance process.
- [**Glimpse2**](https://odelaneau.github.io/GLIMPSE/glimpse2/index.html) concordance process
- Or convert the two vcf fill to `.zarr` using [**Scikit allele**](https://scikit-allel.readthedocs.io/en/stable/) and [**anndata**](https://anndata.readthedocs.io/en/latest/) before comparing the SNPs. | +| **Postprocessing** | postprocessing_metro | This final process unable to loop the whole pipeline for increasing the performance of the imputation. To do so it filter out the best imputed position and rerun the analysis using this positions [to be developed]. | ## Pipeline output diff --git a/conf/test.config b/conf/test.config index 85ab9cd6..d881b22e 100644 --- a/conf/test.config +++ b/conf/test.config @@ -29,7 +29,7 @@ params { phased = true // Pipeline steps - step = "panelprep,impute" + steps = "panelprep,impute" // Impute tools tools = "glimpse1" diff --git a/conf/test_all.config b/conf/test_all.config index b12e3ed2..60a7ef39 100644 --- a/conf/test_all.config +++ b/conf/test_all.config @@ -31,7 +31,7 @@ params { map = "${projectDir}/tests/csv/map.csv" // Pipeline steps - step = "all" + steps = "all" // Impute tools tools = "glimpse1,glimpse2,stitch,quilt" diff --git a/conf/test_full.config b/conf/test_full.config index b0991b91..0e15401a 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -28,7 +28,7 @@ params { panel = "${projectDir}/tests/csv/panel_full.csv" // Pipeline steps - step = "all" + steps = "all" // Panelprep optional args remove_samples = "NA12878,NA12891,NA12892" diff --git a/conf/test_glimpse2.config b/conf/test_glimpse2.config index de645512..aa1f3bb8 100644 --- a/conf/test_glimpse2.config +++ b/conf/test_glimpse2.config @@ -28,7 +28,7 @@ params { phased = true // Pipeline steps - step = "panelprep,impute" + steps = "panelprep,impute" // Impute tools tools = "glimpse2" diff --git a/conf/test_panelprep.config b/conf/test_panelprep.config index 4b5ec319..b73a7e4c 100644 --- a/conf/test_panelprep.config +++ b/conf/test_panelprep.config @@ -12,7 +12,7 @@ params { config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check panel prepation step' + config_profile_description = 'Minimal test dataset to check panel prepation steps' // Limit resources so that this can run on GitHub Actions max_cpus = 2 @@ -25,5 +25,5 @@ params { phased = true // Pipeline steps - step = "panelprep" + steps = "panelprep" } diff --git a/conf/test_quilt.config b/conf/test_quilt.config index acc6e718..0e1d01d7 100644 --- a/conf/test_quilt.config +++ b/conf/test_quilt.config @@ -29,7 +29,7 @@ params { phased = true // Pipeline steps - step = "panelprep,impute" + steps = "panelprep,impute" // Impute tools tools = "quilt" diff --git a/conf/test_sim.config b/conf/test_sim.config index cd011798..42418f06 100644 --- a/conf/test_sim.config +++ b/conf/test_sim.config @@ -28,5 +28,5 @@ params { fasta = params.pipelines_testdata_base_path + "reference_genome/21_22/hs38DH.chr21_22.fa" // Pipeline steps - step = "simulate" + steps = "simulate" } diff --git a/conf/test_stitch.config b/conf/test_stitch.config index 97ae8ae0..9520980b 100644 --- a/conf/test_stitch.config +++ b/conf/test_stitch.config @@ -28,7 +28,7 @@ params { posfile = "${projectDir}/tests/csv/posfile.csv" // Pipeline steps - step = "impute" + steps = "impute" // Impute tools tools = "stitch" diff --git a/conf/test_validate.config b/conf/test_validate.config index 56da9785..332aca75 100644 --- a/conf/test_validate.config +++ b/conf/test_validate.config @@ -31,5 +31,5 @@ params { map = "${projectDir}/tests/csv/map.csv" // Pipeline steps - step = "validate" + steps = "validate" } diff --git a/docs/development.md b/docs/development.md index 770c0ac2..f08fea0a 100644 --- a/docs/development.md +++ b/docs/development.md @@ -61,7 +61,7 @@ All channel need to be identified by a meta map as follow: How to use different schema ? - Use nf-validation - For the moment use different input / step. + For the moment use different input / steps. In the futur, if/else logic will be added in the yml nf-core schema. What's the use of dumpcustomsoftware ? diff --git a/docs/output.md b/docs/output.md index 7f8e3069..46a7fe1a 100644 --- a/docs/output.md +++ b/docs/output.md @@ -10,9 +10,9 @@ The directories listed below will be created in the results directory after the ## Pipeline overview -## Panel preparation outputs `--step panelprep` +## Panel preparation outputs `--steps panelprep` -This step of the pipeline performs a QC of the reference panel data and produces the necessary files for imputation (`--step impute`). It has two optional modes: reference panel phasing with SHAPEIT5 and removal of specified samples from reference panel. +This steps of the pipeline performs a QC of the reference panel data and produces the necessary files for imputation (`--steps impute`). It has two optional modes: reference panel phasing with SHAPEIT5 and removal of specified samples from reference panel. - [Remove Multiallelics](#multiallelics) - Remove multiallelic sites from the reference panel - [Convert](#convert) - Convert reference panel to .hap and .legend files @@ -26,7 +26,7 @@ This step of the pipeline performs a QC of the reference panel data and produces - `*.hap`: a .hap file for the reference panel. - `*.legend*`: a .legend file for the reference panel. -[bcftools](https://samtools.github.io/bcftools/bcftools.html) aids in the conversion of vcf files to .hap and .legend files. A .samples file is also generated. Once that you have generated the hap and legend files for your reference panel, you can skip the reference preparation step and directly submit these files for imputation (to be developed). The hap and legend files are input files used with `--tools quilt`. +[bcftools](https://samtools.github.io/bcftools/bcftools.html) aids in the conversion of vcf files to .hap and .legend files. A .samples file is also generated. Once that you have generated the hap and legend files for your reference panel, you can skip the reference preparation steps and directly submit these files for imputation (to be developed). The hap and legend files are input files used with `--tools quilt`. ### Posfile @@ -45,7 +45,7 @@ This step of the pipeline performs a QC of the reference panel data and produces - `*.txt.gz`: TXT file for biallelic SNPs. - `*.tbi`: Index file for TSV. -[bcftools query](https://samtools.github.io/bcftools/bcftools.html) produces VCF (`*.vcf.gz`) files per chromosome. These QCed VCFs can be gathered into a csv and used with all the tools in `--step impute` using the flag `--panel`. +[bcftools query](https://samtools.github.io/bcftools/bcftools.html) produces VCF (`*.vcf.gz`) files per chromosome. These QCed VCFs can be gathered into a csv and used with all the tools in `--steps impute` using the flag `--panel`. In addition, [bcftools query](https://samtools.github.io/bcftools/bcftools.html) produces tab-delimited files (`*_tsv.txt`) and, together with the VCFs, they can be gathered into a samplesheet and directly submitted for imputation with `--tools glimpse1` and `--posfile` (not yet implemented). @@ -54,7 +54,7 @@ In addition, [bcftools query](https://samtools.github.io/bcftools/bcftools.html) - `prep_panel/chunks/` - `*.txt`: TXT file containing the chunks obtained from running Glimpse chunks. -[Glimpse1 chunk](https://odelaneau.github.io/GLIMPSE/) defines chunks where to run imputation. For further reading and documentation see the [Glimpse1 documentation](https://odelaneau.github.io/GLIMPSE/glimpse1/commands.html). Once that you have generated the chunks for your reference panel, you can skip the reference preparation step and directly submit this file for imputation. +[Glimpse1 chunk](https://odelaneau.github.io/GLIMPSE/) defines chunks where to run imputation. For further reading and documentation see the [Glimpse1 documentation](https://odelaneau.github.io/GLIMPSE/glimpse1/commands.html). Once that you have generated the chunks for your reference panel, you can skip the reference preparation steps and directly submit this file for imputation. ## QUILT imputation mode @@ -69,7 +69,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `quilt.*.vcf.gz`: Imputed VCF for a specific chunk. - `quilt.*.vcf.gz.tbi`: TBI for the Imputed VCF for a specific chunk. -[quilt](https://github.com/rwdavies/QUILT) performs the imputation. This step will contain the VCF for each of the chunks. +[quilt](https://github.com/rwdavies/QUILT) performs the imputation. This steps will contain the VCF for each of the chunks. ### Concat @@ -91,7 +91,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `stitch.*.vcf.gz`: Imputed VCF for a specific chunk. - `stitch.*.vcf.gz.tbi`: TBI for the Imputed VCF for a specific chunk. -[STITCH](https://github.com/rwdavies/STITCH) performs the imputation. This step will contain the VCF for each of the chunks. +[STITCH](https://github.com/rwdavies/STITCH) performs the imputation. This steps will contain the VCF for each of the chunks. ### Concat diff --git a/docs/usage.md b/docs/usage.md index 4bd9c6a8..dd133eeb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -83,7 +83,7 @@ or you can specify a custom genome using: The typical command for running the pre-processing of the panel and imputation of samples is as follows: ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --outdir results --genome GRCh37 -profile docker --step panelprep,impute +nextflow run nf-core/phaseimpute --input samplesheet.csv --outdir results --genome GRCh37 -profile docker --steps panelprep,impute ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -127,57 +127,57 @@ You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-c ### Running the pipeline -nf-core/phaseimpute can be started at different points in the analysis by setting the flag `--step` and the available options `[simulate, panelprep, impute, validate, all]`. You can also run several steps simultaneously by listing the required processes as `--step panelprep,impute` or you can choose to run all steps sequentially by using `--step all`. +nf-core/phaseimpute can be started at different points in the analysis by setting the flag `--steps` and the available options `[simulate, panelprep, impute, validate, all]`. You can also run several steps simultaneously by listing the required processes as `--steps panelprep,impute` or you can choose to run all steps sequentially by using `--steps all`. -### Start with simulation `--step simulate` +### Start with simulation `--steps simulate` -This step of the pipeline allows to create synthetic low-coverage input files by downsizing high density input data. A typical use case is to obtain low-coverage input data from a sequenced sample. This method is useful for comparing the imputation results to the truth and evaluate the quality of the imputation. You can skip this step if you already have low-pass genome sequencing data. A sample command for this step is: +This steps of the pipeline allows to create synthetic low-coverage input files by downsizing high density input data. A typical use case is to obtain low-coverage input data from a sequenced sample. This method is useful for comparing the imputation results to the truth and evaluate the quality of the imputation. You can skip this steps if you already have low-pass genome sequencing data. A sample command for this steps is: ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --step simulate --depth 1 --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute --input samplesheet.csv --steps simulate --depth 1 --outdir results --genome GRCh37 -profile docker ``` The required flags for this mode are: -- `--step simulate`: The step to run. +- `--steps simulate`: The steps to run. - `--input samplesheet.csv`: The samplesheet containing the input sample files in `bam` format. - `--depth`: The final depth of the file [default: 1]. - `--genome` or `--fasta`: The reference genome of the samples. -You can find an overview of the results produced by this step in the [Output](output.md). +You can find an overview of the results produced by this steps in the [Output](output.md). -### Start with panel preparation `--step panelprep` +### Start with panel preparation `--steps panelprep` -This step pre-processes the reference panel in order to be ready for imputation. There are a few quality control steps that are applied to reference panels. These include actions such as removing multiallelic SNPs and indels and removing certain samples from the reference panel (such as related samples). In addition, chunks are produced which are then used in the imputation steps. It is recommended that this step is run once and the produced files are saved, to minimize the cost of reading the reference panel each time. Then, the output files from `--step panelprep` can be used as input in the subsequent imputation steps, such as `--step impute`. +This steps pre-processes the reference panel in order to be ready for imputation. There are a few quality control steps that are applied to reference panels. These include actions such as removing multiallelic SNPs and indels and removing certain samples from the reference panel (such as related samples). In addition, chunks are produced which are then used in the imputation steps. It is recommended that this steps is run once and the produced files are saved, to minimize the cost of reading the reference panel each time. Then, the output files from `--steps panelprep` can be used as input in the subsequent imputation steps, such as `--steps impute`. -For starting from panel preparation, the required flags are `--step panelprep` and `--panel samplesheet_reference.csv`. +For starting from panel preparation, the required flags are `--steps panelprep` and `--panel samplesheet_reference.csv`. ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --step panelprep --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --steps panelprep --outdir results --genome GRCh37 -profile docker ``` The required flags for this mode are: -- `--step panelprep`: The step to run. +- `--steps panelprep`: The steps to run. - `--panel reference.csv`: The samplesheet containing the reference panel files in `vcf.gz` format. - `--phased`: (optional) Whether the reference panel is phased (true|false). - `--remove_samples`: (optional) A comma-separated list of samples to remove from the reference. -You can find an overview of the results produced by this step in the [Output](output.md). +You can find an overview of the results produced by this steps in the [Output](output.md). -### Start with imputation `--step impute` +### Start with imputation `--steps impute` -For starting from the imputation step, the required flags are: +For starting from the imputation steps, the required flags are: -- `--step impute` +- `--steps impute` - `--input input.csv`: The samplesheet containing the input sample files in `bam` format. - `--genome` or `--fasta`: The reference genome of the samples. -- `--tools [glimpse1, quilt, stitch]`: A selection of one or more of the available imputation tools. Each imputation tool has their own set of specific flags and input files. These required files are produced by `--step panelprep` and used as input in: - - `--chunks chunks.csv`: A samplesheet containing chunks per chromosome. These are produced by `--step panelprep` using `GLIMPSE1`. - - `--posfile posfile.csv`: A samplesheet containing a TSV with the list of positions to genotype per chromosome. These are required by tools (for STITCH/GLIMPSE1). The posfile can be generated with `--step panelprep`. - - `--panel panel.csv`: A samplesheet containing the post-processed VCF. This is required by GLIMPSE1. This file can be obtained with `--step panelprep`. +- `--tools [glimpse1, quilt, stitch]`: A selection of one or more of the available imputation tools. Each imputation tool has their own set of specific flags and input files. These required files are produced by `--steps panelprep` and used as input in: + - `--chunks chunks.csv`: A samplesheet containing chunks per chromosome. These are produced by `--steps panelprep` using `GLIMPSE1`. + - `--posfile posfile.csv`: A samplesheet containing a TSV with the list of positions to genotype per chromosome. These are required by tools (for STITCH/GLIMPSE1). The posfile can be generated with `--steps panelprep`. + - `--panel panel.csv`: A samplesheet containing the post-processed VCF. This is required by GLIMPSE1. This file can be obtained with `--steps panelprep`. -### Imputation tools `--step impute --tools [glimpse1, glimpse2, quilt, stitch]` +### Imputation tools `--steps impute --tools [glimpse1, glimpse2, quilt, stitch]` You can choose different software to perform the imputation. In the following sections, the typical commands for running the pipeline with each software are included. @@ -186,7 +186,7 @@ You can choose different software to perform the imputation. In the following se [QUILT](https://github.com/rwdavies/QUILT) is an R and C++ program for rapid genotype imputation from low-coverage sequence using a large reference panel. The required inputs for this program are bam samples provided in the input samplesheet (`--input`) and a csv file with the genomic chunks (`--chunks`). ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --chunks chunks.csv --step impute --tool quilt --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute --input samplesheet.csv --chunks chunks.csv --steps impute --tool quilt --outdir results --genome GRCh37 -profile docker ``` The csv provided in `--chunks` must contain two columns [chr, file]. The first column is the chromosome and the file column are txt with the chunks produced by GLIMPSE1, unique to each chromosome. @@ -200,26 +200,26 @@ chr3,chunks_chr3.txt The file column should contain a TSV obtained from GLIMPSE1 with the following [structure] (https://github.com/nf-core/test-datasets/blob/phaseimpute/data/panel/22/chr22_chunks_glimpse1.txt). -If you do not have a csv with chunks, you can provide a reference panel to run the `--step panelprep` which produces a csv with these chunks, which is then used as input for QUILT. You can choose to run both steps sequentially as `--step panelprep,impute` or simply collect the files produced by `--step panelprep`. +If you do not have a csv with chunks, you can provide a reference panel to run the `--steps panelprep` which produces a csv with these chunks, which is then used as input for QUILT. You can choose to run both steps sequentially as `--steps panelprep,impute` or simply collect the files produced by `--steps panelprep`. ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --step panelprep,impute --panel samplesheet_reference.csv --outdir results --genome GRCh37 -profile docker --tools quilt +nextflow run nf-core/phaseimpute --input samplesheet.csv --steps panelprep,impute --panel samplesheet_reference.csv --outdir results --genome GRCh37 -profile docker --tools quilt ``` #### STITCH [STITCH](https://github.com/rwdavies/STITCH) is an R program for low coverage sequencing genotype imputation without using a reference panel. The required inputs for this program are bam samples provided in the input samplesheet (`--input`) and a tsv file with the list of positions to genotype (`--posfile`). -If you do not have a list of position to genotype, you can provide a reference panel to run the `--step panelprep` which produces a tsv with this list. +If you do not have a list of position to genotype, you can provide a reference panel to run the `--steps panelprep` which produces a tsv with this list. ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --step panelprep --panel samplesheet_reference.csv --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute --input samplesheet.csv --steps panelprep --panel samplesheet_reference.csv --outdir results --genome GRCh37 -profile docker ``` -Otherwise, you can provide your own position file in the `--step impute` with STITCH using the the `--posfile` parameter. +Otherwise, you can provide your own position file in the `--steps impute` with STITCH using the the `--posfile` parameter. ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --step impute --posfile samplesheet_posfile.csv --tool stitch --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute --input samplesheet.csv --steps impute --posfile samplesheet_posfile.csv --tool stitch --outdir results --genome GRCh37 -profile docker ``` The csv provided in `--posfile` must contain two columns [chr, file]. The first column is the chromosome and the file column are tsvs with the list of positions, unique to each chromosome. @@ -244,39 +244,39 @@ chr22 16570211 T C #### GLIMPSE1 -[GLIMPSE1](https://github.com/odelaneau/GLIMPSE/tree/glimpse1) is a set of tools for phasing and imputation for low-coverage sequencing datasets. Recommended for many samples at >0.5x coverage and small reference panels. This is an example command to run this tool from the `--step impute`: +[GLIMPSE1](https://github.com/odelaneau/GLIMPSE/tree/glimpse1) is a set of tools for phasing and imputation for low-coverage sequencing datasets. Recommended for many samples at >0.5x coverage and small reference panels. This is an example command to run this tool from the `--steps impute`: ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --step impute --tool glimpse1 --outdir results --genome GRCh37 -profile docker --posfile posfile.csv --chunks chunks.csv +nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --steps impute --tool glimpse1 --outdir results --genome GRCh37 -profile docker --posfile posfile.csv --chunks chunks.csv ``` #### GLIMPSE2 -[GLIMPSE2](https://github.com/odelaneau/GLIMPSE) is a set of tools for phasing and imputation for low-coverage sequencing datasets. This is an example command to run this tool from the `--step impute`: +[GLIMPSE2](https://github.com/odelaneau/GLIMPSE) is a set of tools for phasing and imputation for low-coverage sequencing datasets. This is an example command to run this tool from the `--steps impute`: ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --step impute --tool glimpse2 --outdir results --genome GRCh37 -profile docker --posfile posfile.csv --chunks chunks.csv +nextflow run nf-core/phaseimpute --input samplesheet.csv --panel samplesheet_reference.csv --steps impute --tool glimpse2 --outdir results --genome GRCh37 -profile docker --posfile posfile.csv --chunks chunks.csv ``` -### Start with validation `--step validate` +### Start with validation `--steps validate` -This step compares a _truth_ VCF to an _imputed_ VCF in order to compute imputation accuracy. +This steps compares a _truth_ VCF to an _imputed_ VCF in order to compute imputation accuracy. ```bash -nextflow run nf-core/phaseimpute --input samplesheet.csv --input_truth truth.csv --step validate --outdir results --genome GRCh37 -profile docker +nextflow run nf-core/phaseimpute --input samplesheet.csv --input_truth truth.csv --steps validate --outdir results --genome GRCh37 -profile docker ``` The required flags for this mode are: -- `--step validate`: The step to run. +- `--steps validate`: The steps to run. - `--input samplesheet.csv`: The samplesheet containing the input sample files in `vcf` format. - `--input_truth samplesheet.csv`: The samplesheet containing the truth VCF files in `vcf` format. -### Run all steps sequentially `--step all` +### Run all steps sequentially `--steps all` This mode runs all the previous steps. This requires several flags: -- `--step all`: The step to run. +- `--steps all`: The steps to run. - `--input samplesheet.csv`: The samplesheet containing the input sample files in `bam` format. - `--depth`: The final depth of the input file [default: 1]. - `--genome` or `--fasta`: The reference genome of the samples. @@ -368,13 +368,13 @@ Specify the path to a specific config file (this is a core Nextflow command). Se ### Resource requests -Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each steps in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. ### Custom Containers -In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. +In some cases you may wish to change which container or conda environment a steps of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. @@ -419,13 +419,13 @@ Specify the path to a specific config file (this is a core Nextflow command). Se ### Resource requests -Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each steps in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. ### Custom Containers -In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. +In some cases you may wish to change which container or conda environment a steps of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. diff --git a/main.nf b/main.nf index 6350ea32..2b784c71 100644 --- a/main.nf +++ b/main.nf @@ -54,12 +54,20 @@ workflow NFCORE_PHASEIMPUTE { ch_input_simulate = Channel.empty() ch_input_validate = Channel.empty() - if (params.step.split(',').contains("impute")) { - ch_input_impute = ch_input - } else if (params.step.split(',').contains("simulate") || params.step.split(',').contains("all")) { - ch_input_simulate = ch_input - } else if (params.step.split(',').contains("validate")) { - ch_input_validate = ch_input + if (params.steps.split(',').contains("impute")) { + input_impute = ch_input + .combine(ch_regions) + .map { metaI, file, index, metaCR, region -> + [ metaI+metaCR, file, index ] + } + } else if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { + input_simulate = ch_input + } else if (params.steps.split(',').contains("validate")) { + input_validate = ch_input + .combine(ch_regions) + .map { metaI, file, index, metaCR, region -> + [ metaI+metaCR, file, index ] + } ch_input_truth = ch_input_truth } diff --git a/nextflow.config b/nextflow.config index 31ff6faf..19738465 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,8 +9,8 @@ // Global default params, used in configs params { - // step - step = null + // steps + steps = null // Input options input = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 36e2feeb..c0336430 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -57,7 +57,7 @@ "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" }, - "step": { + "steps": { "type": "string", "description": "Step to run.", "fa_icon": "fas fa-step-forward", @@ -122,7 +122,7 @@ "imputation_options": { "title": "Imputation options", "type": "object", - "description": "Arguments for the imputation step", + "description": "Arguments for the imputation steps", "default": "", "properties": { "chunks": { @@ -276,7 +276,7 @@ "type": "object", "fa_icon": "fab fa-acquisitions-incorporated", "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "help_text": "If you are running on a smaller system, a pipeline steps requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", "properties": { "max_cpus": { "type": "integer", diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index fe81f1d4..804024b4 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -301,28 +301,28 @@ def validateInputParameters() { assert params.genome == null || params.fasta == null, "Either --genome or --fasta must be provided" assert !(params.genome == null && params.fasta == null), "Only one of --genome or --fasta must be provided" - // Check that a step is provided - assert params.step, "A step must be provided" + // Check that a steps is provided + assert params.steps, "A steps must be provided" // Check that at least one tool is provided - if (params.step.split(',').contains("impute")) { + if (params.steps.split(',').contains("impute")) { assert params.tools, "No tools provided" } // Check that input is provided for all steps, except panelprep - if (params.step.split(',').contains("all") || params.step.split(',').contains("impute") || params.step.split(',').contains("simulate") || params.step.split(',').contains("validate")) { + if (params.steps.split(',').contains("all") || params.steps.split(',').contains("impute") || params.steps.split(',').contains("simulate") || params.steps.split(',').contains("validate")) { assert params.input, "No input provided" } // Check that posfile and chunks are provided when running impute only. Steps with panelprep generate those files. - if (params.step.split(',').contains("impute") && !params.step.split(',').find { it in ["all", "panelprep"] }) { + if (params.steps.split(',').contains("impute") && !params.steps.split(',').find { it in ["all", "panelprep"] }) { // Required by all tools except glimpse2 and quilt if (!params.tools.split(',').find { it in ["glimpse2", "quilt"] }) { - assert params.posfile, "No --posfile provided for --step impute" + assert params.posfile, "No --posfile provided for --steps impute" } // Required by all tools except STITCH if (!params.tools.split(',').contains("stitch")) { - assert params.chunks, "No --chunks provided for --step impute" + assert params.chunks, "No --chunks provided for --steps impute" } // Required by GLIMPSE1 and GLIMPSE2 only if (params.tools.split(',').contains("glimpse")) { @@ -330,17 +330,17 @@ def validateInputParameters() { } // Check that input_truth is provided when running validate - if (params.step.split(',').find { it in ["all", "validate"] } ) { - assert params.input_truth, "No --input_truth was provided for --step validate" + if (params.steps.split(',').find { it in ["all", "validate"] } ) { + assert params.input_truth, "No --input_truth was provided for --steps validate" } } // Emit a warning if both panel and (chunks || posfile) are used as input - if (params.panel && params.chunks && params.step.split(',').find { it in ["all", "panelprep"]} ) { - log.warn("Both `--chunks` and `--panel` have been provided. Provided `--chunks` will override `--panel` generated chunks in `--step impute` mode.") + if (params.panel && params.chunks && params.steps.split(',').find { it in ["all", "panelprep"]} ) { + log.warn("Both `--chunks` and `--panel` have been provided. Provided `--chunks` will override `--panel` generated chunks in `--steps impute` mode.") } - if (params.panel && params.posfile && params.step.split(',').find { it in ["all", "panelprep"]} ) { - log.warn("Both `--posfile` and `--panel` have been provided. Provided `--posfile` will override `--panel` generated posfile in `--step impute` mode.") + if (params.panel && params.posfile && params.steps.split(',').find { it in ["all", "panelprep"]} ) { + log.warn("Both `--posfile` and `--panel` have been provided. Provided `--posfile` will override `--panel` generated posfile in `--steps impute` mode.") } } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index f4c6bb73..fcbfc2a1 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -88,7 +88,7 @@ workflow PHASEIMPUTE { // // Simulate data if asked // - if (params.step.split(',').contains("simulate") || params.step.split(',').contains("all")) { + if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { // Output channel of simulate process ch_sim_output = Channel.empty() @@ -126,7 +126,7 @@ workflow PHASEIMPUTE { // // Prepare panel // - if (params.step.split(',').contains("panelprep") || params.step.split(',').contains("validate") || params.step.split(',').contains("all")) { + if (params.steps.split(',').contains("panelprep") || params.steps.split(',').contains("validate") || params.steps.split(',').contains("all")) { // Check chr prefix and remove if necessary VCF_CHR_CHECK(ch_panel, ch_fasta) ch_versions = ch_versions.mix(VCF_CHR_CHECK.out.versions) @@ -169,7 +169,7 @@ workflow PHASEIMPUTE { ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) } - if (params.step.split(',').contains("impute") || params.step.split(',').contains("all")) { + if (params.steps.split(',').contains("impute") || params.steps.split(',').contains("all")) { // Output channel of input process ch_impute_output = Channel.empty() @@ -185,7 +185,7 @@ workflow PHASEIMPUTE { // if (params.posfile) { // ch_panel_sites_tsv = ch_posfile - // } else if (params.panel && params.step.split(',').contains("panelprep") && !params.posfile) { + // } else if (params.panel && params.steps.split(',').contains("panelprep") && !params.posfile) { // ch_panel_sites_tsv = VCF_PHASE_PANEL.out.panel // .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index // -> [metaPC, sites, tsv] @@ -236,8 +236,8 @@ workflow PHASEIMPUTE { } if (params.tools.split(',').contains("glimpse2")) { - // Use previous chunks if --step panelprep - if (params.panel && params.step.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { + // Use previous chunks if --steps panelprep + if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { ch_chunks = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 } else if (params.chunks) { ch_chunks = CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse").out.chunks @@ -260,8 +260,8 @@ workflow PHASEIMPUTE { if (params.tools.split(',').contains("stitch")) { print("Impute with STITCH") - // Get posfile from panelprep step if --posfile not supplied - if (params.panel && params.step.split(',').find { it in ["all", "panelprep"] }) { + // Get posfile from panelprep steps if --posfile not supplied + if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] }) { ch_posfile = PREPARE_POSFILE_TSV.out.posfile } @@ -289,8 +289,8 @@ workflow PHASEIMPUTE { if (params.tools.split(',').contains("quilt")) { print("Impute with QUILT") - // Use previous chunks if --step panelprep - if (params.panel && params.step.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { + // Use previous chunks if --steps panelprep + if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { ch_chunks_quilt = VCF_CHUNK_GLIMPSE.out.chunks_quilt // Use provided chunks if --chunks } else if (params.chunks) { @@ -313,12 +313,12 @@ workflow PHASEIMPUTE { } } - if (params.step.split(',').contains("validate") || params.step.split(',').contains("all")) { + if (params.steps.split(',').contains("validate") || params.steps.split(',').contains("all")) { // if (params.posfile) { // Use channel ch_posfile for validation // ch_panel_sites_tsv = ch_posfile - // } else if (params.panel && params.step.split(',').contains("panelprep") && !params.posfile) { + // } else if (params.panel && params.steps.split(',').contains("panelprep") && !params.posfile) { // ch_panel_sites_tsv = VCF_PHASE_PANEL.out.panel // .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index // -> [metaPC, sites, tsv] @@ -361,8 +361,8 @@ workflow PHASEIMPUTE { ch_versions = ch_versions.mix(VCF_CONCORDANCE_GLIMPSE2.out.versions) } - if (params.step.split(',').contains("refine")) { - error "refine step not yet implemented" + if (params.steps.split(',').contains("refine")) { + error "refine steps not yet implemented" } // From 2dd8941dddb05078ea523c8ebfed5835e74e73f7 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 13 May 2024 16:58:46 +0200 Subject: [PATCH 066/110] Uniformize subworkflows and process --- subworkflows/local/vcf_chr_rename/main.nf | 7 +++ .../vcf_normalize_bcftools.nf | 56 +++++++++++++++++++ .../local/vcf_sites_extract_bcftools/main.nf | 6 +- 3 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf diff --git a/subworkflows/local/vcf_chr_rename/main.nf b/subworkflows/local/vcf_chr_rename/main.nf index 5a17f3ff..dc4a2a95 100644 --- a/subworkflows/local/vcf_chr_rename/main.nf +++ b/subworkflows/local/vcf_chr_rename/main.nf @@ -12,6 +12,13 @@ workflow VCF_CHR_RENAME { ch_versions = Channel.empty() // Generate the chromosome renaming file + GAWK( + ch_fasta.map{ metaG, fasta, fai -> [metaG, fai] }, + Channel.of( + 'BEGIN {FS="\\t"} NR==1 { if ($1 ~ /^chr/) { col1=""; col2="chr" } else { col1="chr"; col2="" } } { sub(/^chr/, "", $1); if ($1 ~ /^[0-9]+|[XYMT]$/) print col1$1, col2$1; else print $1, $1 }' + ).collectFile(name:"program.txt") + ) + ch_versions = ch_versions.mix(FAITOCHR.out.versions) GAWK(ch_fasta.map{ metaG, fasta, fai -> [metaG, fai] }, []) ch_versions = ch_versions.mix(GAWK.out.versions) diff --git a/subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf b/subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf new file mode 100644 index 00000000..312cca57 --- /dev/null +++ b/subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf @@ -0,0 +1,56 @@ +include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view/main' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2} from '../../../modules/nf-core/bcftools/index/main' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_3} from '../../../modules/nf-core/bcftools/index/main' +include { BCFTOOLS_CONVERT } from '../../../modules/nf-core/bcftools/convert/main' +include { BCFTOOLS_VIEW as BCFTOOLS_REMOVE } from '../../../modules/nf-core/bcftools/view/main' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_4} from '../../../modules/nf-core/bcftools/index/main' + + +workflow VCF_NORMALIZE_BCFTOOLS { + take: + ch_vcf // channel: [ [id, chr], vcf, index ] + ch_fasta // channel: [ [genome], fasta, fai ] + + main: + + ch_versions = Channel.empty() + ch_fasta = ch_fasta.map { meta, fasta, fai -> [meta, fasta] } + + // Join duplicated biallelic sites into multiallelic records + BCFTOOLS_NORM(ch_vcf, ch_fasta) + + // Index multiallelic VCF + BCFTOOLS_INDEX_1(BCFTOOLS_NORM.out.vcf) + + // Join multiallelic VCF and TBI + ch_multiallelic_vcf_tbi = BCFTOOLS_NORM.out.vcf.join(BCFTOOLS_INDEX_1.out.tbi) + + // Remove all multiallelic records: + BCFTOOLS_VIEW(ch_multiallelic_vcf_tbi, [], [], []) + + // Index biallelic VCF + BCFTOOLS_INDEX_2(BCFTOOLS_VIEW.out.vcf) + + // Join biallelic VCF and TBI + ch_biallelic_vcf_tbi = BCFTOOLS_VIEW.out.vcf.join(BCFTOOLS_INDEX_2.out.tbi) + + // (Optional) Remove benchmarking samples (e.g. NA12878) from the reference panel + if (!(params.remove_samples == null)){ + BCFTOOLS_REMOVE(ch_biallelic_vcf_tbi, [], [], []) + BCFTOOLS_INDEX_4(BCFTOOLS_REMOVE.out.vcf) + ch_biallelic_vcf_tbi = BCFTOOLS_REMOVE.out.vcf.join(BCFTOOLS_INDEX_4.out.tbi) + } + + // Convert VCF to Hap and Legend files + BCFTOOLS_CONVERT(ch_biallelic_vcf_tbi, ch_fasta, []) + + // Output hap and legend files + ch_hap_legend = BCFTOOLS_CONVERT.out.hap.join(BCFTOOLS_CONVERT.out.legend) + + emit: + vcf_tbi = ch_biallelic_vcf_tbi // channel: [ [id, chr], vcf, tbi ] + hap_legend = ch_hap_legend // channel: [ [id, chr] '.hap', '.legend' ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/vcf_sites_extract_bcftools/main.nf b/subworkflows/local/vcf_sites_extract_bcftools/main.nf index 68db5f6f..e7807199 100644 --- a/subworkflows/local/vcf_sites_extract_bcftools/main.nf +++ b/subworkflows/local/vcf_sites_extract_bcftools/main.nf @@ -24,11 +24,11 @@ workflow VCF_SITES_EXTRACT_BCFTOOLS { ch_panel_sites = BCFTOOLS_VIEW.out.vcf.combine(BCFTOOLS_INDEX.out.csi, by:0) // Convert to TSV with structure for Glimpse - BCFTOOLS_QUERY(ch_panel_sites, [], [], []) - ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first()) + BCFTOOLS_QUERY_TSV(ch_panel_sites, [], [], []) + ch_versions = ch_versions.mix(BCFTOOLS_QUERY_TSV.out.versions.first()) // Compress TSV - TABIX_BGZIP(BCFTOOLS_QUERY.out.output) + TABIX_BGZIP(BCFTOOLS_QUERY_TSV.out.output) ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) // Index compressed TSV From e06626bbdd51104d6d59cbb5234b1a039b9f9ffe Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 13 May 2024 17:01:47 +0200 Subject: [PATCH 067/110] Uniformize subworkflow and main.nf --- workflows/phaseimpute/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index fcbfc2a1..d97b7129 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -47,7 +47,7 @@ include { BAM_IMPUTE_QUILT } from '../../subworkflows/ include { VCF_CONCATENATE_BCFTOOLS as CONCAT_QUILT } from '../../subworkflows/local/vcf_concatenate_bcftools' // STITCH subworkflows -include { PREPARE_INPUT_STITCH } from '../../subworkflows/local/prepare_input_stitch' +include { PREPARE_INPUT_STITCH } from '../../subworkflows/local/prepare_input_stitch/prepare_input_stitch' include { BAM_IMPUTE_STITCH } from '../../subworkflows/local/bam_impute_stitch' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_STITCH } from '../../subworkflows/local/vcf_concatenate_bcftools' From 854d998944b7a66f5dfb835c8013cd2841893824 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 14 May 2024 12:05:52 +0200 Subject: [PATCH 068/110] Rename to main.nf --- .../vcf_normalize_bcftools.nf | 56 ------------------- 1 file changed, 56 deletions(-) delete mode 100644 subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf diff --git a/subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf b/subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf deleted file mode 100644 index 312cca57..00000000 --- a/subworkflows/local/vcf_normalize_bcftools/vcf_normalize_bcftools.nf +++ /dev/null @@ -1,56 +0,0 @@ -include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm/main' -include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view/main' -include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2} from '../../../modules/nf-core/bcftools/index/main' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_3} from '../../../modules/nf-core/bcftools/index/main' -include { BCFTOOLS_CONVERT } from '../../../modules/nf-core/bcftools/convert/main' -include { BCFTOOLS_VIEW as BCFTOOLS_REMOVE } from '../../../modules/nf-core/bcftools/view/main' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_4} from '../../../modules/nf-core/bcftools/index/main' - - -workflow VCF_NORMALIZE_BCFTOOLS { - take: - ch_vcf // channel: [ [id, chr], vcf, index ] - ch_fasta // channel: [ [genome], fasta, fai ] - - main: - - ch_versions = Channel.empty() - ch_fasta = ch_fasta.map { meta, fasta, fai -> [meta, fasta] } - - // Join duplicated biallelic sites into multiallelic records - BCFTOOLS_NORM(ch_vcf, ch_fasta) - - // Index multiallelic VCF - BCFTOOLS_INDEX_1(BCFTOOLS_NORM.out.vcf) - - // Join multiallelic VCF and TBI - ch_multiallelic_vcf_tbi = BCFTOOLS_NORM.out.vcf.join(BCFTOOLS_INDEX_1.out.tbi) - - // Remove all multiallelic records: - BCFTOOLS_VIEW(ch_multiallelic_vcf_tbi, [], [], []) - - // Index biallelic VCF - BCFTOOLS_INDEX_2(BCFTOOLS_VIEW.out.vcf) - - // Join biallelic VCF and TBI - ch_biallelic_vcf_tbi = BCFTOOLS_VIEW.out.vcf.join(BCFTOOLS_INDEX_2.out.tbi) - - // (Optional) Remove benchmarking samples (e.g. NA12878) from the reference panel - if (!(params.remove_samples == null)){ - BCFTOOLS_REMOVE(ch_biallelic_vcf_tbi, [], [], []) - BCFTOOLS_INDEX_4(BCFTOOLS_REMOVE.out.vcf) - ch_biallelic_vcf_tbi = BCFTOOLS_REMOVE.out.vcf.join(BCFTOOLS_INDEX_4.out.tbi) - } - - // Convert VCF to Hap and Legend files - BCFTOOLS_CONVERT(ch_biallelic_vcf_tbi, ch_fasta, []) - - // Output hap and legend files - ch_hap_legend = BCFTOOLS_CONVERT.out.hap.join(BCFTOOLS_CONVERT.out.legend) - - emit: - vcf_tbi = ch_biallelic_vcf_tbi // channel: [ [id, chr], vcf, tbi ] - hap_legend = ch_hap_legend // channel: [ [id, chr] '.hap', '.legend' ] - versions = ch_versions // channel: [ versions.yml ] -} From 524bc51636b74ec11230eca92a55287c96cad33a Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 14 May 2024 12:11:10 +0200 Subject: [PATCH 069/110] Set all sbwf call without main --- workflows/phaseimpute/main.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index d97b7129..d634abaa 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -35,7 +35,6 @@ include { VCF_IMPUTE_GLIMPSE as VCF_IMPUTE_GLIMPSE1 } from '../../subworkflows/ include { COMPUTE_GL as GL_TRUTH } from '../../subworkflows/local/compute_gl' include { COMPUTE_GL as GL_INPUT } from '../../subworkflows/local/compute_gl' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_GLIMPSE1} from '../../subworkflows/local/vcf_concatenate_bcftools' -include { CHUNK_PREPARE_CHANNEL } from '../../subworkflows/local/chunk_prepare_channel' // GLIMPSE2 subworkflows include { VCF_IMPUTE_GLIMPSE2 } from '../../subworkflows/local/vcf_impute_glimpse2' @@ -47,7 +46,7 @@ include { BAM_IMPUTE_QUILT } from '../../subworkflows/ include { VCF_CONCATENATE_BCFTOOLS as CONCAT_QUILT } from '../../subworkflows/local/vcf_concatenate_bcftools' // STITCH subworkflows -include { PREPARE_INPUT_STITCH } from '../../subworkflows/local/prepare_input_stitch/prepare_input_stitch' +include { PREPARE_INPUT_STITCH } from '../../subworkflows/local/prepare_input_stitch' include { BAM_IMPUTE_STITCH } from '../../subworkflows/local/bam_impute_stitch' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_STITCH } from '../../subworkflows/local/vcf_concatenate_bcftools' From b1206d8a75d1fead39cb422b69c83b1bdc14b615 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 14 May 2024 12:19:13 +0200 Subject: [PATCH 070/110] Normalize panel_prep config --- conf/steps/panel_prep.config | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 0f874bf9..f3bccd0b 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -90,6 +90,18 @@ process { ] } + // (Optional) Subworkflow: Remove samples from panel + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_REMOVE' { + ext.args = { "-Oz -s^${params.remove_samples}" } + ext.prefix = { "${meta.id}_${meta.chr}_biallelic_removed_samples" } + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX_4' { + ext.args = "--tbi" + publishDir = [enabled: false] + } + // Subworkflow: VCF_SITES_EXTRACT_BCFTOOLS withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:.*' { publishDir = [ enabled: false ] @@ -112,7 +124,7 @@ process { ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_INDEX' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_INDEX_2' { ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites" } publishDir = [ path: { "${params.outdir}/prep_panel/sites/vcf/" }, From 459c4c9ef01e1506b88716bf4d3493cce7a7b0a9 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Tue, 14 May 2024 12:49:28 +0200 Subject: [PATCH 071/110] Uniformize conf and workflows --- conf/steps/panel_prep.config | 14 +------------- subworkflows/local/vcf_chr_rename/main.nf | 7 ------- .../local/vcf_sites_extract_bcftools/main.nf | 6 +++--- 3 files changed, 4 insertions(+), 23 deletions(-) diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index f3bccd0b..0f874bf9 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -90,18 +90,6 @@ process { ] } - // (Optional) Subworkflow: Remove samples from panel - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_REMOVE' { - ext.args = { "-Oz -s^${params.remove_samples}" } - ext.prefix = { "${meta.id}_${meta.chr}_biallelic_removed_samples" } - publishDir = [ enabled: false ] - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:BCFTOOLS_INDEX_4' { - ext.args = "--tbi" - publishDir = [enabled: false] - } - // Subworkflow: VCF_SITES_EXTRACT_BCFTOOLS withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:.*' { publishDir = [ enabled: false ] @@ -124,7 +112,7 @@ process { ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_INDEX_2' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:BCFTOOLS_INDEX' { ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites" } publishDir = [ path: { "${params.outdir}/prep_panel/sites/vcf/" }, diff --git a/subworkflows/local/vcf_chr_rename/main.nf b/subworkflows/local/vcf_chr_rename/main.nf index dc4a2a95..5a17f3ff 100644 --- a/subworkflows/local/vcf_chr_rename/main.nf +++ b/subworkflows/local/vcf_chr_rename/main.nf @@ -12,13 +12,6 @@ workflow VCF_CHR_RENAME { ch_versions = Channel.empty() // Generate the chromosome renaming file - GAWK( - ch_fasta.map{ metaG, fasta, fai -> [metaG, fai] }, - Channel.of( - 'BEGIN {FS="\\t"} NR==1 { if ($1 ~ /^chr/) { col1=""; col2="chr" } else { col1="chr"; col2="" } } { sub(/^chr/, "", $1); if ($1 ~ /^[0-9]+|[XYMT]$/) print col1$1, col2$1; else print $1, $1 }' - ).collectFile(name:"program.txt") - ) - ch_versions = ch_versions.mix(FAITOCHR.out.versions) GAWK(ch_fasta.map{ metaG, fasta, fai -> [metaG, fai] }, []) ch_versions = ch_versions.mix(GAWK.out.versions) diff --git a/subworkflows/local/vcf_sites_extract_bcftools/main.nf b/subworkflows/local/vcf_sites_extract_bcftools/main.nf index e7807199..68db5f6f 100644 --- a/subworkflows/local/vcf_sites_extract_bcftools/main.nf +++ b/subworkflows/local/vcf_sites_extract_bcftools/main.nf @@ -24,11 +24,11 @@ workflow VCF_SITES_EXTRACT_BCFTOOLS { ch_panel_sites = BCFTOOLS_VIEW.out.vcf.combine(BCFTOOLS_INDEX.out.csi, by:0) // Convert to TSV with structure for Glimpse - BCFTOOLS_QUERY_TSV(ch_panel_sites, [], [], []) - ch_versions = ch_versions.mix(BCFTOOLS_QUERY_TSV.out.versions.first()) + BCFTOOLS_QUERY(ch_panel_sites, [], [], []) + ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first()) // Compress TSV - TABIX_BGZIP(BCFTOOLS_QUERY_TSV.out.output) + TABIX_BGZIP(BCFTOOLS_QUERY.out.output) ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) // Index compressed TSV From 1cff148d903cbe7cd7749c2801e2a3291fba909c Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 15 May 2024 15:13:32 +0200 Subject: [PATCH 072/110] Patch glimpse2 chunk for map --- modules.json | 140 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 105 insertions(+), 35 deletions(-) diff --git a/modules.json b/modules.json index f816fb4c..22ece43a 100644 --- a/modules.json +++ b/modules.json @@ -8,76 +8,106 @@ "bcftools/annotate": { "branch": "master", "git_sha": "2ad29c2aed06d815d9f68ad7ba20b3b1c574ce9c", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/annotate/bcftools-annotate.diff" }, "bcftools/concat": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/concat/bcftools-concat.diff" }, "bcftools/convert": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/index": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["multiple_impute_glimpse2", "vcf_impute_glimpse", "vcf_phase_shapeit5"] + "installed_by": [ + "multiple_impute_glimpse2", + "vcf_impute_glimpse", + "vcf_phase_shapeit5" + ] }, "bcftools/mpileup": { "branch": "master", "git_sha": "e7df38a545d7d72083eededabd8849f731a01502", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/mpileup/bcftools-mpileup.diff" }, "bcftools/norm": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/query": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/view": { "branch": "master", "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/view/bcftools-view.diff" }, "bedtools/makewindows": { "branch": "master", "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", - "installed_by": ["vcf_phase_shapeit5"] + "installed_by": [ + "vcf_phase_shapeit5" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gawk": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "glimpse/chunk": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": ["vcf_impute_glimpse"] + "installed_by": [ + "vcf_impute_glimpse" + ] }, "glimpse/ligate": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": ["vcf_impute_glimpse"] + "installed_by": [ + "vcf_impute_glimpse" + ] }, "glimpse/phase": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": ["vcf_impute_glimpse"] + "installed_by": [ + "vcf_impute_glimpse" + ] }, "glimpse2/chunk": { "branch": "master", @@ -88,12 +118,16 @@ "glimpse2/concordance": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "glimpse2/ligate": { "branch": "master", "git_sha": "09d793219114004f268b98663b12f8062097a8c5", - "installed_by": ["multiple_impute_glimpse2"] + "installed_by": [ + "multiple_impute_glimpse2" + ] }, "glimpse2/phase": { "branch": "master", @@ -104,38 +138,52 @@ "glimpse2/splitreference": { "branch": "master", "git_sha": "fa12139827a18b324bd63fce654818586a8e9cc7", - "installed_by": ["multiple_impute_glimpse2"] + "installed_by": [ + "multiple_impute_glimpse2" + ] }, "gunzip": { "branch": "master", "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "quilt/quilt": { "branch": "master", "git_sha": "46265545d61e7f482adf40de941cc9a94e479bbe", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/coverage": { "branch": "master", "git_sha": "38afbe42f7db7f19c7a89607c0a71c68f3be3131", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/samtools/coverage/samtools-coverage.diff" }, "samtools/faidx": { "branch": "master", "git_sha": "f153f1f10e1083c49935565844cccb7453021682", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", @@ -146,29 +194,39 @@ "samtools/view": { "branch": "master", "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/samtools/view/samtools-view.diff" }, "shapeit5/ligate": { "branch": "master", "git_sha": "dcf17cc0ed8fd5ea57e61a13e0147cddb5c1ee30", - "installed_by": ["vcf_phase_shapeit5"] + "installed_by": [ + "vcf_phase_shapeit5" + ] }, "shapeit5/phasecommon": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["vcf_phase_shapeit5"] + "installed_by": [ + "vcf_phase_shapeit5" + ] }, "stitch": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/stitch/stitch.diff" }, "tabix/bgzip": { "branch": "master", "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/tabix": { "branch": "master", @@ -187,35 +245,47 @@ "multiple_impute_glimpse2": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "vcf_impute_glimpse": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "vcf_phase_shapeit5": { "branch": "master", "git_sha": "dcf17cc0ed8fd5ea57e61a13e0147cddb5c1ee30", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file From b961cef356351e8dd02e33f0e1578db882fe5827 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 15 May 2024 15:19:42 +0200 Subject: [PATCH 073/110] Fix prettier --- modules.json | 140 +++++++++++++-------------------------------------- 1 file changed, 35 insertions(+), 105 deletions(-) diff --git a/modules.json b/modules.json index 22ece43a..f816fb4c 100644 --- a/modules.json +++ b/modules.json @@ -8,106 +8,76 @@ "bcftools/annotate": { "branch": "master", "git_sha": "2ad29c2aed06d815d9f68ad7ba20b3b1c574ce9c", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/annotate/bcftools-annotate.diff" }, "bcftools/concat": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/concat/bcftools-concat.diff" }, "bcftools/convert": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/index": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "multiple_impute_glimpse2", - "vcf_impute_glimpse", - "vcf_phase_shapeit5" - ] + "installed_by": ["multiple_impute_glimpse2", "vcf_impute_glimpse", "vcf_phase_shapeit5"] }, "bcftools/mpileup": { "branch": "master", "git_sha": "e7df38a545d7d72083eededabd8849f731a01502", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/mpileup/bcftools-mpileup.diff" }, "bcftools/norm": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/query": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/view": { "branch": "master", "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/view/bcftools-view.diff" }, "bedtools/makewindows": { "branch": "master", "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", - "installed_by": [ - "vcf_phase_shapeit5" - ] + "installed_by": ["vcf_phase_shapeit5"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gawk": { "branch": "master", "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "glimpse/chunk": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": [ - "vcf_impute_glimpse" - ] + "installed_by": ["vcf_impute_glimpse"] }, "glimpse/ligate": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": [ - "vcf_impute_glimpse" - ] + "installed_by": ["vcf_impute_glimpse"] }, "glimpse/phase": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": [ - "vcf_impute_glimpse" - ] + "installed_by": ["vcf_impute_glimpse"] }, "glimpse2/chunk": { "branch": "master", @@ -118,16 +88,12 @@ "glimpse2/concordance": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "glimpse2/ligate": { "branch": "master", "git_sha": "09d793219114004f268b98663b12f8062097a8c5", - "installed_by": [ - "multiple_impute_glimpse2" - ] + "installed_by": ["multiple_impute_glimpse2"] }, "glimpse2/phase": { "branch": "master", @@ -138,52 +104,38 @@ "glimpse2/splitreference": { "branch": "master", "git_sha": "fa12139827a18b324bd63fce654818586a8e9cc7", - "installed_by": [ - "multiple_impute_glimpse2" - ] + "installed_by": ["multiple_impute_glimpse2"] }, "gunzip": { "branch": "master", "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "quilt/quilt": { "branch": "master", "git_sha": "46265545d61e7f482adf40de941cc9a94e479bbe", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/coverage": { "branch": "master", "git_sha": "38afbe42f7db7f19c7a89607c0a71c68f3be3131", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/samtools/coverage/samtools-coverage.diff" }, "samtools/faidx": { "branch": "master", "git_sha": "f153f1f10e1083c49935565844cccb7453021682", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", @@ -194,39 +146,29 @@ "samtools/view": { "branch": "master", "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/samtools/view/samtools-view.diff" }, "shapeit5/ligate": { "branch": "master", "git_sha": "dcf17cc0ed8fd5ea57e61a13e0147cddb5c1ee30", - "installed_by": [ - "vcf_phase_shapeit5" - ] + "installed_by": ["vcf_phase_shapeit5"] }, "shapeit5/phasecommon": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "vcf_phase_shapeit5" - ] + "installed_by": ["vcf_phase_shapeit5"] }, "stitch": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/stitch/stitch.diff" }, "tabix/bgzip": { "branch": "master", "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tabix/tabix": { "branch": "master", @@ -245,47 +187,35 @@ "multiple_impute_glimpse2": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "vcf_impute_glimpse": { "branch": "master", "git_sha": "7e56daae390ff896b292ddc70823447683a79936", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "vcf_phase_shapeit5": { "branch": "master", "git_sha": "dcf17cc0ed8fd5ea57e61a13e0147cddb5c1ee30", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From a705f6e3d5d61eb8c868eec0090207c64674f78e Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Mon, 20 May 2024 15:12:33 +0200 Subject: [PATCH 074/110] Change chromosomes usage --- conf/steps/validation.config | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/conf/steps/validation.config b/conf/steps/validation.config index e8f5f8ce..df2d955d 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -40,23 +40,6 @@ process { ext.args = "--tbi" } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:.*' { - ext.prefix = { "${meta.id}_truth_concat" } - publishDir = [ - path: { "${params.outdir}/validation/concat" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_CONCAT' { - ext.args = ["--ligate", "--output-type z",].join(' ') - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_INDEX' { - ext.args = "--tbi" - } - // Validation subworkflow withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:.*' { publishDir = [ From 9908e1b4ee2b58b374d4228a398289c242fc7398 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 22 May 2024 15:02:39 +0200 Subject: [PATCH 075/110] Delete samtools merge from main and add back missing concat truth config --- conf/steps/validation.config | 18 ++++++++++++++++++ workflows/phaseimpute/main.nf | 1 - 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/conf/steps/validation.config b/conf/steps/validation.config index df2d955d..a7c4ff17 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -40,6 +40,24 @@ process { ext.args = "--tbi" } + // Concatenate the truth set + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:.*' { + ext.prefix = { "${meta.id}_truth_concat" } + publishDir = [ + path: { "${params.outdir}/validation/concat" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_CONCAT' { + ext.args = ["--ligate", "--output-type z",].join(' ') + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_INDEX' { + ext.args = "--tbi" + } + // Validation subworkflow withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:.*' { publishDir = [ diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 7e298008..47d96a1a 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -21,7 +21,6 @@ include { getAllFilesExtension } from '../../subworkflows/local/utils_nfc // Simulate subworkflows include { BAM_REGION } from '../../subworkflows/local/bam_region' include { BAM_DOWNSAMPLE } from '../../subworkflows/local/bam_downsample' -include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' // Panelprep subworkflows include { VCF_CHR_CHECK } from '../../subworkflows/local/vcf_chr_check' From e65d4f51887f5b6a206388088cb87a5591078686 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 22 May 2024 15:05:50 +0200 Subject: [PATCH 076/110] Rename compute gl sbwf to nf-core guidelines --- subworkflows/local/{compute_gl => bam_gl_bcftools}/main.nf | 2 +- workflows/phaseimpute/main.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename subworkflows/local/{compute_gl => bam_gl_bcftools}/main.nf (98%) diff --git a/subworkflows/local/compute_gl/main.nf b/subworkflows/local/bam_gl_bcftools/main.nf similarity index 98% rename from subworkflows/local/compute_gl/main.nf rename to subworkflows/local/bam_gl_bcftools/main.nf index 8afe1a70..e300b05d 100644 --- a/subworkflows/local/compute_gl/main.nf +++ b/subworkflows/local/bam_gl_bcftools/main.nf @@ -2,7 +2,7 @@ include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mp include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate' -workflow COMPUTE_GL { +workflow BAM_GL_BCFTOOLS { take: ch_input // channel: [ [id], bam, bai ] diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 47d96a1a..9eda568d 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -31,8 +31,8 @@ include { PREPARE_POSFILE_TSV } from '../../subworkflows/ // GLIMPSE1 subworkflows include { VCF_IMPUTE_GLIMPSE as VCF_IMPUTE_GLIMPSE1 } from '../../subworkflows/nf-core/vcf_impute_glimpse' -include { COMPUTE_GL as GL_TRUTH } from '../../subworkflows/local/compute_gl' -include { COMPUTE_GL as GL_INPUT } from '../../subworkflows/local/compute_gl' +include { BAM_GL_BCFTOOLS as GL_TRUTH } from '../../subworkflows/local/bam_gl_bcftools' +include { BAM_GL_BCFTOOLS as GL_INPUT } from '../../subworkflows/local/bam_gl_bcftools' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_GLIMPSE1} from '../../subworkflows/local/vcf_concatenate_bcftools' include { CHUNK_PREPARE_CHANNEL } from '../../subworkflows/local/chunk_prepare_channel' From ea7d0bf5aaac7e7e1217d89f2188e5fcf067c404 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 22 May 2024 16:36:00 +0200 Subject: [PATCH 077/110] Add error control to ch_chunks --- .../utils_nfcore_phaseimpute_pipeline/main.nf | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 804024b4..9e08cc94 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -210,27 +210,25 @@ workflow PIPELINE_INITIALISATION { // // Create posfile channel // - if (params.posfile) { ch_posfile = Channel .fromSamplesheet("posfile") .map {meta, file -> [ meta, file ]} } else { - ch_posfile = [[[],[]]] + ch_posfile = [[],[]] } // // Create chunks channel // - if (params.chunks) { ch_chunks = Channel - .fromSamplesheet("chunks") - .map { - meta, file -> - [ meta, file ] - }} else { - ch_chunks = [[[],[]]] + .fromSamplesheet("chunks") + } else { + ch_chunks = [[],[]] + if (!params.steps.split(',').contains("panelprep") & !params.steps.split(',').contains("all") & params.steps.split(',').contains("impute")) { + error "No --chunks provided for --steps impute and step panel_prep not selected" + } } emit: From b0c3fcd9dd9f90fb4be87f8ac3c903741e79a3f5 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 22 May 2024 16:36:50 +0200 Subject: [PATCH 078/110] Fix input no more by chromosomes --- main.nf | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/main.nf b/main.nf index 2b784c71..6db053c9 100644 --- a/main.nf +++ b/main.nf @@ -55,20 +55,11 @@ workflow NFCORE_PHASEIMPUTE { ch_input_validate = Channel.empty() if (params.steps.split(',').contains("impute")) { - input_impute = ch_input - .combine(ch_regions) - .map { metaI, file, index, metaCR, region -> - [ metaI+metaCR, file, index ] - } + ch_input_impute = ch_input } else if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { - input_simulate = ch_input + ch_input_simulate = ch_input } else if (params.steps.split(',').contains("validate")) { - input_validate = ch_input - .combine(ch_regions) - .map { metaI, file, index, metaCR, region -> - [ metaI+metaCR, file, index ] - } - ch_input_truth = ch_input_truth + ch_input_validate = ch_input } // From c180a75d3ab64c19af7c6233c32229c2d6aa4716 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 22 May 2024 16:37:35 +0200 Subject: [PATCH 079/110] Fix panel meta name in VCFLIB --- conf/steps/panel_prep.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 0f874bf9..0469a7b8 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -72,7 +72,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_NORMALIZE_BCFTOOLS:VCFLIB_VCFFIXUP' { - ext.prefix = { "${meta.panel}_${meta.chr}" } + ext.prefix = { "${meta.id}_${meta.chr}" } publishDir = [enabled: false] } From fc82a7db6c919eab43cc92d75eded27b48842d34 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 22 May 2024 16:38:16 +0200 Subject: [PATCH 080/110] Add dedicated glimpse subworkflow and fix chromosomes propagation --- conf/steps/imputation_glimpse1.config | 29 +++---- .../local/vcf_impute_glimpse1/main.nf | 82 +++++++++++++++++++ .../local/vcf_sites_extract_bcftools/main.nf | 1 - workflows/phaseimpute/main.nf | 62 ++++---------- 4 files changed, 110 insertions(+), 64 deletions(-) create mode 100644 subworkflows/local/vcf_impute_glimpse1/main.nf diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index fcf4777b..a7107980 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -13,12 +13,17 @@ process { // Configuration for the glimpse1 imputation subworkflow + // Impute with GLIMPSE1 + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:.*' { + publishDir = [ enabled: false ] + } + // Call the variants before imputation - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:.*' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BAM_GL_BCFTOOLS:.*' { publishDir = [ enabled: false ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:BCFTOOLS_MPILEUP' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BAM_GL_BCFTOOLS:BCFTOOLS_MPILEUP' { ext.args = [ "-I", "-E", @@ -31,29 +36,19 @@ process { ext.prefix = { "${meta.id}.call" } } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:BCFTOOLS_ANNOTATE' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BAM_GL_BCFTOOLS:BCFTOOLS_ANNOTATE' { ext.args = "--set-id '%CHROM:%POS:%REF:%ALT' -Oz" ext.prefix = { "${meta.id}.annotate" } } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_INPUT:BCFTOOLS_INDEX' { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BAM_GL_BCFTOOLS:BCFTOOLS_INDEX' { ext.args = "--tbi" } - // Impute with GLIMPSE1 - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:.*' { - publishDir = [ enabled: false ] - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_CHUNK' { - ext.args = ["--window-size 200000", "--buffer-size 20000"].join(' ') - ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.chunk" } - publishDir = [ enabled: false ] - } - + // Impute the variants withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_PHASE' { ext.args = ["--impute-reference-only-variants"].join(' ') - ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.phase" } + ext.prefix = { "${meta.id}_${meta.region.replace(':','_')}.phase" } ext.suffix = "bcf" publishDir = [ enabled: false ] } @@ -63,7 +58,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_LIGATE' { - ext.prefix = { "${meta.id}_R${meta.region.replace(':','_')}.ligate" } + ext.prefix = { "${meta.id}_${meta.chr}.ligate" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BCFTOOLS_INDEX_2' { diff --git a/subworkflows/local/vcf_impute_glimpse1/main.nf b/subworkflows/local/vcf_impute_glimpse1/main.nf new file mode 100644 index 00000000..49830c50 --- /dev/null +++ b/subworkflows/local/vcf_impute_glimpse1/main.nf @@ -0,0 +1,82 @@ + +include { BAM_GL_BCFTOOLS } from '../bam_gl_bcftools' +include { GLIMPSE_PHASE } from '../../../modules/nf-core/glimpse/phase' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index' +include { GLIMPSE_LIGATE } from '../../../modules/nf-core/glimpse/ligate' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' + +workflow VCF_IMPUTE_GLIMPSE1 { + + take: + ch_input // channel (mandatory): [ [id], bam, bai ] + ch_sites_tsv // channel (mandatory): [ [panel, chr, region], sites, tsv ] + ch_panel // channel (mandatory): [ [panel, chr, region], vcf, tbi ] + ch_chunks // channel (optional): [ [chr], region1, region2 ] + ch_fasta // channel (mandatory): [ [genome], fa, fai ] + + main: + + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + // Glimpse1 subworkflow + BAM_GL_BCFTOOLS( // Compute GL for input data once per panel by chromosome + ch_input, + ch_sites_tsv, + ch_fasta + ) + ch_multiqc_files = ch_multiqc_files.mix(BAM_GL_BCFTOOLS.out.multiqc_files) + ch_versions = ch_versions.mix(BAM_GL_BCFTOOLS.out.versions) + + samples_file = Channel.of([[]]).collect() + gmap_file = Channel.of([[]]).collect() + + ch_phase_input = BAM_GL_BCFTOOLS.out.vcf // [metaIPC, vcf, index] + .map {metaIPC, vcf, index -> [metaIPC.subMap("panel", "chr"), metaIPC, vcf, index] } + .combine(ch_panel + .map{ + metaPC, vcf, index -> + [["panel": metaPC.id, "chr": metaPC.chr], vcf, index] + }, + by: 0 + ) + .combine(samples_file) + .combine(gmap_file) + .map { metaPC, metaIPC, vcf, index, panel, p_index, sample, gmap -> + [metaPC.subMap("chr"), metaIPC, vcf, index, panel, p_index, sample, gmap]} + .combine(ch_chunks + .map {metaCR, regionin, regionout -> [metaCR.subMap("chr"), metaCR, regionin, regionout]}, + by: 0 + ) + .map{ + metaC, metaIPC, vcf, index, panel, p_index, sample, gmap, metaCR, regionin, regionout + -> [metaIPC + ["region": regionin], vcf, index, sample, regionin, regionout, panel, p_index, gmap] + } + + GLIMPSE_PHASE ( ch_phase_input ) // [meta, vcf, index, sample, regionin, regionout, ref, ref_index, map] + ch_versions = ch_versions.mix(GLIMPSE_PHASE.out.versions ) + + BCFTOOLS_INDEX_1 ( GLIMPSE_PHASE.out.phased_variants ) + ch_versions = ch_versions.mix( BCFTOOLS_INDEX_1.out.versions ) + + // Ligate all phased files in one and index it + ligate_input = GLIMPSE_PHASE.out.phased_variants + .groupTuple() + .join( BCFTOOLS_INDEX_1.out.csi.groupTuple() ) + + GLIMPSE_LIGATE ( ligate_input ) + ch_versions = ch_versions.mix(GLIMPSE_LIGATE.out.versions ) + + BCFTOOLS_INDEX_2 ( GLIMPSE_LIGATE.out.merged_variants ) + ch_versions = ch_versions.mix( BCFTOOLS_INDEX_2.out.versions ) + + + ch_imputed_vcf_tbi = GLIMPSE_LIGATE.out.merged_variants + .join(BCFTOOLS_INDEX_2.out.csi) + .map{ metaIPCR, vcf, csi -> [metaIPCR + [tools: "Glimpse1"], vcf, csi] } + + emit: + vcf_tbi = ch_imputed_vcf_tbi // channel: [ [id, chr], vcf, tbi ] + versions = ch_versions // channel: [ versions.yml ] + multiqc_files = ch_multiqc_files // channel: [ multiqc_files.yml ] +} diff --git a/subworkflows/local/vcf_sites_extract_bcftools/main.nf b/subworkflows/local/vcf_sites_extract_bcftools/main.nf index 68db5f6f..f9c82185 100644 --- a/subworkflows/local/vcf_sites_extract_bcftools/main.nf +++ b/subworkflows/local/vcf_sites_extract_bcftools/main.nf @@ -40,7 +40,6 @@ workflow VCF_SITES_EXTRACT_BCFTOOLS { emit: panel_tsv = ch_panel_tsv // channel: [ [id, chr], tsv, tbi ] - vcf_tbi = ch_vcf // channel: [ [id, chr], vcf, tbi ] panel_sites = ch_panel_sites // channel: [ [id, chr], vcf, csi ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 9eda568d..66712a98 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -30,9 +30,8 @@ include { VCF_PHASE_PANEL } from '../../subworkflows/ include { PREPARE_POSFILE_TSV } from '../../subworkflows/local/prepare_posfile_tsv' // GLIMPSE1 subworkflows -include { VCF_IMPUTE_GLIMPSE as VCF_IMPUTE_GLIMPSE1 } from '../../subworkflows/nf-core/vcf_impute_glimpse' -include { BAM_GL_BCFTOOLS as GL_TRUTH } from '../../subworkflows/local/bam_gl_bcftools' -include { BAM_GL_BCFTOOLS as GL_INPUT } from '../../subworkflows/local/bam_gl_bcftools' +include { CHUNK_PREPARE_CHANNEL } from '../../subworkflows/local/chunk_prepare_channel' +include { VCF_IMPUTE_GLIMPSE1 } from '../../subworkflows/local/vcf_impute_glimpse1' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_GLIMPSE1} from '../../subworkflows/local/vcf_concatenate_bcftools' include { CHUNK_PREPARE_CHANNEL } from '../../subworkflows/local/chunk_prepare_channel' @@ -55,6 +54,7 @@ include { VCF_CONCATENATE_BCFTOOLS as CONCAT_TRUTH } from '../../subworkflows/ include { VCF_CONCATENATE_BCFTOOLS as CONCAT_PANEL } from '../../subworkflows/local/vcf_concatenate_bcftools' // Concordance subworkflows +include { BAM_GL_BCFTOOLS as GL_TRUTH } from '../../subworkflows/local/bam_gl_bcftools' include { VCF_CONCORDANCE_GLIMPSE2 } from '../../subworkflows/local/vcf_concordance_glimpse2' @@ -72,7 +72,7 @@ workflow PHASEIMPUTE { ch_input_validate // channel: input file [ [id], file, index ] ch_input_validate_truth // channel: truth file [ [id], file, index ] ch_fasta // channel: fasta file [ [genome], fasta, fai ] - ch_panel // channel: panel file [ [id, chr], chr, vcf, index ] + ch_panel // channel: panel file [ [id, chr], vcf, index ] ch_region // channel: region to use [ [chr, region], region] ch_depth // channel: depth select [ [depth], depth ] ch_map // channel: genetic map [ [chr], map] @@ -144,10 +144,10 @@ workflow PHASEIMPUTE { // If required, phase panel (currently not working, a test should be added) // Phase panel with tool of choice (e.g. SHAPEIT5) - VCF_PHASE_PANEL(VCF_SITES_EXTRACT_BCFTOOLS.out.vcf_tbi) + VCF_PHASE_PANEL(VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi) ch_versions = ch_versions.mix(VCF_PHASE_PANEL.out.versions) - ch_panel = VCF_SITES_EXTRACT_BCFTOOLS.out.vcf_tbi + ch_panel = VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi .join(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites) .join(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_tsv) .join(VCF_PHASE_PANEL.out.vcf_tbi) @@ -166,17 +166,17 @@ workflow PHASEIMPUTE { // Create chunks from reference VCF VCF_CHUNK_GLIMPSE(ch_panel_phased, ch_map) ch_versions = ch_versions.mix(VCF_CHUNK_GLIMPSE.out.versions) + } if (params.steps.split(',').contains("impute") || params.steps.split(',').contains("all")) { - // Output channel of input process - ch_impute_output = Channel.empty() - if (params.tools.split(',').contains("glimpse1")) { println "Impute with Glimpse1" if (params.chunks) { - ch_chunks = CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse").out.chunks + ch_chunks_glimpse1 = CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse").out.chunks + } else if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { + ch_chunks_glimpse1 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 } //Params posfile should replace part of ch_panel_sites_tsv (specifically, the .txt) @@ -191,42 +191,18 @@ workflow PHASEIMPUTE { // } // } - // Glimpse1 subworkflow - GL_INPUT( // Compute GL for input data once per panel by chromosome + VCF_IMPUTE_GLIMPSE1( ch_input_impute, ch_panel_sites_tsv, + ch_panel_phased, + ch_chunks_glimpse1, ch_fasta ) - ch_multiqc_files = ch_multiqc_files.mix(GL_INPUT.out.multiqc_files) - ch_versions = ch_versions.mix(GL_INPUT.out.versions) - - impute_input = GL_INPUT.out.vcf // [metaIPC, vcf, index] - .map {metaIPC, vcf, index -> [metaIPC.subMap("panel", "chr"), metaIPC, vcf, index] } - .join(ch_panel_phased) - .combine(Channel.of([[]])) - .map { metaPC, metaIPC, vcf, index, panel, p_index, sample -> - [metaPC.subMap("chr"), metaIPC, vcf, index, panel, p_index, sample]} - .join(ch_region - .map {metaCR, region -> [metaCR.subMap("chr"), metaCR, region]} - ) - .join(ch_map) - .map{ - metaC, metaIPC, vcf, index, panel, p_index, sample, metaCR, region, map - -> [metaIPC+metaCR.subMap("Region"), vcf, index, sample, region, panel, p_index, map] - } //[ metaIPCR, vcf, csi, sample, region, ref, ref_index, map ] - - VCF_IMPUTE_GLIMPSE1(impute_input) - output_glimpse1 = VCF_IMPUTE_GLIMPSE1.out.merged_variants - .combine(VCF_IMPUTE_GLIMPSE1.out.merged_variants_index, by: 0) - .map{ metaIPCR, vcf, csi -> [metaIPCR + [tools: "Glimpse1"], vcf, csi] } - ch_multiqc_files = ch_multiqc_files.mix(VCF_IMPUTE_GLIMPSE1.out.chunk_chr.map{ [it[1]]}) - ch_versions = ch_versions.mix(VCF_IMPUTE_GLIMPSE1.out.versions) - - // Add to output channel - ch_impute_output = ch_impute_output.mix(output_glimpse1) + ch_versions = ch_versions.mix(VCF_IMPUTE_GLIMPSE1.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(VCF_IMPUTE_GLIMPSE1.out.multiqc_files) // Concatenate by chromosomes - CONCAT_GLIMPSE1(output_glimpse1) + CONCAT_GLIMPSE1(VCF_IMPUTE_GLIMPSE1.out.vcf_tbi) ch_versions = ch_versions.mix(CONCAT_GLIMPSE1.out.versions) // Add results to input validate @@ -273,9 +249,6 @@ workflow PHASEIMPUTE { ch_fasta ) ch_versions = ch_versions.mix(BAM_IMPUTE_STITCH.out.versions) - // Output channel to concat - ch_impute_output = ch_impute_output.mix(BAM_IMPUTE_STITCH.out.vcf_tbi) - // Concatenate by chromosomes CONCAT_STITCH(BAM_IMPUTE_STITCH.out.vcf_tbi) ch_versions = ch_versions.mix(CONCAT_STITCH.out.versions) @@ -299,9 +272,6 @@ workflow PHASEIMPUTE { BAM_IMPUTE_QUILT(ch_input_impute, VCF_NORMALIZE_BCFTOOLS.out.hap_legend, VCF_CHUNK_GLIMPSE.out.chunks_quilt) ch_versions = ch_versions.mix(BAM_IMPUTE_QUILT.out.versions) - // Add to output channel - ch_impute_output = ch_impute_output.mix(BAM_IMPUTE_QUILT.out.vcf_tbi) - // Concatenate by chromosomes CONCAT_QUILT(BAM_IMPUTE_QUILT.out.vcf_tbi) ch_versions = ch_versions.mix(CONCAT_QUILT.out.versions) From f6fd72b3c8d1deadf19938d520c2e4e14b24ac17 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 22 May 2024 20:52:59 +0200 Subject: [PATCH 081/110] Fix panel id prefix --- conf/steps/panel_prep.config | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 0469a7b8..5ef56918 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -176,7 +176,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { - ext.prefix = { "${meta.panel}_${meta.chr}_chunks_glimpse1" } + ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse1" } publishDir = [ path: { "${params.outdir}/prep_panel/chunks/glimpse1/" }, mode: params.publish_dir_mode, @@ -185,8 +185,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_CHUNK' { - ext.args = ["--window-mb 2.0"].join(' ') - ext.prefix = { "${meta.panel}_${meta.chr}_chunks_glimpse2" } + ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse2" } publishDir = [ path: { "${params.outdir}/prep_panel/chunks/glimpse2/" }, mode: params.publish_dir_mode, @@ -195,7 +194,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE2_SPLITREFERENCE' { - ext.prefix = { "${meta.panel}_${meta.chr}_chunks_glimpse2" } + ext.prefix = { "${meta.id}_${meta.chr}_chunks_glimpse2" } publishDir = [ path: { "${params.outdir}/prep_panel/chunks/glimpse2/" }, mode: params.publish_dir_mode, From 2a9fd3af0fe2ee393202142169a119f35b606a3b Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 22 May 2024 20:53:34 +0200 Subject: [PATCH 082/110] Fix chunk error for stitch --- .../local/utils_nfcore_phaseimpute_pipeline/main.nf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 9e08cc94..7674b4a4 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -226,7 +226,12 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("chunks") } else { ch_chunks = [[],[]] - if (!params.steps.split(',').contains("panelprep") & !params.steps.split(',').contains("all") & params.steps.split(',').contains("impute")) { + if ( + !params.steps.split(',').contains("panelprep") & + !params.steps.split(',').contains("all") & + params.steps.split(',').contains("impute") & + !params.tools.split(',') == ["stitch"] + ) { error "No --chunks provided for --steps impute and step panel_prep not selected" } } From 5e3d4fda0972656c28a102eeda6f4c8090159c98 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 22 May 2024 20:54:19 +0200 Subject: [PATCH 083/110] Reorder import and add comment --- workflows/phaseimpute/main.nf | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 66712a98..2f688fc5 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -28,12 +28,11 @@ include { VCF_NORMALIZE_BCFTOOLS } from '../../subworkflows/ include { VCF_SITES_EXTRACT_BCFTOOLS } from '../../subworkflows/local/vcf_sites_extract_bcftools' include { VCF_PHASE_PANEL } from '../../subworkflows/local/vcf_phase_panel' include { PREPARE_POSFILE_TSV } from '../../subworkflows/local/prepare_posfile_tsv' +include { CHUNK_PREPARE_CHANNEL } from '../../subworkflows/local/chunk_prepare_channel' // GLIMPSE1 subworkflows -include { CHUNK_PREPARE_CHANNEL } from '../../subworkflows/local/chunk_prepare_channel' include { VCF_IMPUTE_GLIMPSE1 } from '../../subworkflows/local/vcf_impute_glimpse1' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_GLIMPSE1} from '../../subworkflows/local/vcf_concatenate_bcftools' -include { CHUNK_PREPARE_CHANNEL } from '../../subworkflows/local/chunk_prepare_channel' // GLIMPSE2 subworkflows include { VCF_IMPUTE_GLIMPSE2 } from '../../subworkflows/local/vcf_impute_glimpse2' @@ -213,16 +212,18 @@ workflow PHASEIMPUTE { // Use previous chunks if --steps panelprep if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] } && !params.chunks) { - ch_chunks = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 + ch_chunks = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 // Chunks from glimpse2 are wrong } else if (params.chunks) { ch_chunks = CHUNK_PREPARE_CHANNEL(ch_chunks, "glimpse").out.chunks } // Run imputation - VCF_IMPUTE_GLIMPSE2(ch_input_impute, - ch_panel_phased, - ch_chunks, - ch_fasta) + VCF_IMPUTE_GLIMPSE2( + ch_input_impute, + ch_panel_phased, + ch_chunks, + ch_fasta + ) ch_versions = ch_versions.mix(VCF_IMPUTE_GLIMPSE2.out.versions) // Concatenate by chromosomes CONCAT_GLIMPSE2(VCF_IMPUTE_GLIMPSE2.out.vcf_tbi) From fe03516ec6ccd16e12917eaa3fb0e241c1121ae2 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Wed, 22 May 2024 20:56:31 +0200 Subject: [PATCH 084/110] Fix glimpse 1 and 2 and simplify channel concatenation --- conf/steps/imputation_glimpse1.config | 4 +- conf/steps/imputation_glimpse2.config | 10 ++++ conf/test.config | 4 ++ conf/test_glimpse2.config | 4 ++ .../local/vcf_impute_glimpse1/main.nf | 43 +++++++-------- .../local/vcf_impute_glimpse2/main.nf | 55 ++++++++++++------- 6 files changed, 77 insertions(+), 43 deletions(-) diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index a7107980..487ffbb0 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -48,12 +48,13 @@ process { // Impute the variants withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_PHASE' { ext.args = ["--impute-reference-only-variants"].join(' ') - ext.prefix = { "${meta.id}_${meta.region.replace(':','_')}.phase" } + ext.prefix = { "${meta.id}_${meta.region.replace(':','_')}_phase" } ext.suffix = "bcf" publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BCFTOOLS_INDEX_2' { + ext.args = "--tbi" publishDir = [ enabled: false ] } @@ -62,6 +63,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BCFTOOLS_INDEX_2' { + ext.args = "--tbi" publishDir = [ enabled: false ] } diff --git a/conf/steps/imputation_glimpse2.config b/conf/steps/imputation_glimpse2.config index cf9600b0..c6d3fa5c 100644 --- a/conf/steps/imputation_glimpse2.config +++ b/conf/steps/imputation_glimpse2.config @@ -20,6 +20,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE' { + ext.prefix = { "${meta.id}_${meta.region.replace(':','_')}_glimpse2" } ext.args = "--keep-monomorphic-ref-sites" ext.suffix = "vcf.gz" publishDir = [ enabled: false ] @@ -30,6 +31,15 @@ process { publishDir = [ enabled: false ] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:GLIMPSE2_LIGATE' { + ext.prefix = { "${meta.id}_${meta.chr}.ligate" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2' { + ext.args = "--tbi" + publishDir = [ enabled: false ] + } + // Concatenate the imputed chunks withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_GLIMPSE2:.*' { diff --git a/conf/test.config b/conf/test.config index d881b22e..942d5b00 100644 --- a/conf/test.config +++ b/conf/test.config @@ -34,3 +34,7 @@ params { // Impute tools tools = "glimpse1" } + +withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { + ext.args = ["--window-size 10000", "--window-count 400", "--buffer-size 5000", "--buffer-count 30"].join(' ') +} diff --git a/conf/test_glimpse2.config b/conf/test_glimpse2.config index aa1f3bb8..b1a13f37 100644 --- a/conf/test_glimpse2.config +++ b/conf/test_glimpse2.config @@ -33,3 +33,7 @@ params { // Impute tools tools = "glimpse2" } + +withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CHUNK_GLIMPSE:GLIMPSE_CHUNK' { + ext.args = ["--window-size 10000", "--window-count 400", "--buffer-size 5000", "--buffer-count 30"].join(' ') +} diff --git a/subworkflows/local/vcf_impute_glimpse1/main.nf b/subworkflows/local/vcf_impute_glimpse1/main.nf index 49830c50..260c0508 100644 --- a/subworkflows/local/vcf_impute_glimpse1/main.nf +++ b/subworkflows/local/vcf_impute_glimpse1/main.nf @@ -31,26 +31,22 @@ workflow VCF_IMPUTE_GLIMPSE1 { samples_file = Channel.of([[]]).collect() gmap_file = Channel.of([[]]).collect() - ch_phase_input = BAM_GL_BCFTOOLS.out.vcf // [metaIPC, vcf, index] - .map {metaIPC, vcf, index -> [metaIPC.subMap("panel", "chr"), metaIPC, vcf, index] } - .combine(ch_panel - .map{ - metaPC, vcf, index -> - [["panel": metaPC.id, "chr": metaPC.chr], vcf, index] - }, - by: 0 - ) + // Combine chunks with panel + ch_chunks_panel = ch_chunks + .combine(ch_panel, by:0) + .map{ metaPC, regionin, regionout, panel, index -> + [["panel": metaPC.id, "chr": metaPC.chr], regionin, regionout, panel, index] + } + + // Join input and chunks reference + ch_phase_input = BAM_GL_BCFTOOLS.out.vcf + .map{ metaIPC, vcf, index -> [metaIPC.subMap("panel", "chr"), metaIPC, vcf, index] } .combine(samples_file) + .combine(ch_chunks_panel, by: 0) .combine(gmap_file) - .map { metaPC, metaIPC, vcf, index, panel, p_index, sample, gmap -> - [metaPC.subMap("chr"), metaIPC, vcf, index, panel, p_index, sample, gmap]} - .combine(ch_chunks - .map {metaCR, regionin, regionout -> [metaCR.subMap("chr"), metaCR, regionin, regionout]}, - by: 0 - ) - .map{ - metaC, metaIPC, vcf, index, panel, p_index, sample, gmap, metaCR, regionin, regionout - -> [metaIPC + ["region": regionin], vcf, index, sample, regionin, regionout, panel, p_index, gmap] + .map{ metaPC, metaIPC, bam, bai, samples, regionin, regionout, panel, panel_index, gmap -> + [metaIPC + ["region": regionin], + bam, bai, samples, regionin, regionout, panel, panel_index, gmap] } GLIMPSE_PHASE ( ch_phase_input ) // [meta, vcf, index, sample, regionin, regionout, ref, ref_index, map] @@ -61,8 +57,9 @@ workflow VCF_IMPUTE_GLIMPSE1 { // Ligate all phased files in one and index it ligate_input = GLIMPSE_PHASE.out.phased_variants + .join( BCFTOOLS_INDEX_1.out.csi ) + .map{ metaIPCR, vcf, index -> [metaIPCR.subMap("id", "panel", "chr"), vcf, index] } .groupTuple() - .join( BCFTOOLS_INDEX_1.out.csi.groupTuple() ) GLIMPSE_LIGATE ( ligate_input ) ch_versions = ch_versions.mix(GLIMPSE_LIGATE.out.versions ) @@ -70,13 +67,13 @@ workflow VCF_IMPUTE_GLIMPSE1 { BCFTOOLS_INDEX_2 ( GLIMPSE_LIGATE.out.merged_variants ) ch_versions = ch_versions.mix( BCFTOOLS_INDEX_2.out.versions ) - + // Join imputed and index files ch_imputed_vcf_tbi = GLIMPSE_LIGATE.out.merged_variants - .join(BCFTOOLS_INDEX_2.out.csi) - .map{ metaIPCR, vcf, csi -> [metaIPCR + [tools: "Glimpse1"], vcf, csi] } + .join(BCFTOOLS_INDEX_2.out.tbi) + .map{ metaIPC, vcf, index -> [metaIPC + [tools: "Glimpse1"], vcf, index] } emit: - vcf_tbi = ch_imputed_vcf_tbi // channel: [ [id, chr], vcf, tbi ] + vcf_tbi = ch_imputed_vcf_tbi // channel: [ [id, panel, chr, tool], vcf, tbi ] versions = ch_versions // channel: [ versions.yml ] multiqc_files = ch_multiqc_files // channel: [ multiqc_files.yml ] } diff --git a/subworkflows/local/vcf_impute_glimpse2/main.nf b/subworkflows/local/vcf_impute_glimpse2/main.nf index e36ac002..242ffdaf 100644 --- a/subworkflows/local/vcf_impute_glimpse2/main.nf +++ b/subworkflows/local/vcf_impute_glimpse2/main.nf @@ -1,5 +1,7 @@ -include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase' -include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index' +include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_1 } from '../../../modules/nf-core/bcftools/index' +include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate' +include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_2 } from '../../../modules/nf-core/bcftools/index' workflow VCF_IMPUTE_GLIMPSE2 { @@ -14,40 +16,55 @@ workflow VCF_IMPUTE_GLIMPSE2 { ch_versions = Channel.empty() // Impute with Glimpse2 without using binary files - def samples_file = [[]] - def gmap = [[]] + samples_file = Channel.of([[]]).collect() + gmap_file = Channel.of([[]]).collect() // Create input channel to impute with Glimpse2 - // Add chr as key to input - ch_input = ch_input.map{meta, bam, bai -> return[['chr': meta.chr], meta, bam, bai]} // Join chunks and panel - ch_chunks_panel = ch_chunks.join(ch_panel) - - // Change key:value names - ch_chunks_panel = ch_chunks_panel.map{meta, vcf, csi, region1, region2 -> return[['id': meta.panel, 'chr': meta.chr], vcf, csi, region1, region2]} - - // Add chr as key - ch_chunks_panel = ch_chunks_panel.map{meta, vcf, csi, region1, region2 -> return[['chr': meta.chr], vcf, csi, region1, region2]} + ch_chunks_panel = ch_chunks + .combine(ch_panel, by:0) + .map{ metaPC, regionin, regionout, panel, index -> + [["panel": metaPC.id, "chr": metaPC.chr], regionin, regionout, panel, index] + } // Join input and chunks reference - ch_input_glimpse2 = ch_input.map { it + samples_file }.join(ch_chunks_panel).map { it + gmap } + ch_phase_input = ch_input + .combine(samples_file) + .combine(ch_chunks_panel) + .combine(gmap_file) + .map{ metaI, bam, bai, samples, metaPC, regionin, regionout, panel, panel_index, gmap -> + [metaI + metaPC + ["region": regionin], + bam, bai, samples, regionin, regionout, panel, panel_index, gmap] + } - // Remove chr key - ch_input_glimpse2 = ch_input_glimpse2.map{ it[1..-1] } // Impute with Glimpse2 - GLIMPSE2_PHASE(ch_input_glimpse2, ch_fasta) + GLIMPSE2_PHASE(ch_phase_input, ch_fasta) ch_versions = ch_versions.mix(GLIMPSE2_PHASE.out.versions) // Index phased file BCFTOOLS_INDEX_1(GLIMPSE2_PHASE.out.phased_variants) ch_versions = ch_versions.mix(BCFTOOLS_INDEX_1.out.versions) + // Ligate all phased files in one and index it + ligate_input = GLIMPSE2_PHASE.out.phased_variants + .join( BCFTOOLS_INDEX_1.out.tbi ) + .map{ metaIPCR, vcf, index -> [metaIPCR.subMap("id", "panel", "chr"), vcf, index] } + .groupTuple() + + GLIMPSE2_LIGATE ( ligate_input ) + ch_versions = ch_versions.mix(GLIMPSE2_LIGATE.out.versions ) + + BCFTOOLS_INDEX_2 ( GLIMPSE2_LIGATE.out.merged_variants ) + ch_versions = ch_versions.mix( BCFTOOLS_INDEX_2.out.versions ) + // Join imputed and index files - ch_imputed_vcf_tbi = GLIMPSE2_PHASE.out.phased_variants.join(BCFTOOLS_INDEX_1.out.tbi) + ch_imputed_vcf_tbi = GLIMPSE2_LIGATE.out.merged_variants + .join(BCFTOOLS_INDEX_2.out.tbi) + .map{ metaIPC, vcf, index -> [metaIPC + [tools: "Glimpse2"], vcf, index] } emit: - vcf_tbi = ch_imputed_vcf_tbi // [ [id, chr, region], vcf, tbi ] + vcf_tbi = ch_imputed_vcf_tbi // channel: [ [id, panel, chr, tool], vcf, tbi ] versions = ch_versions // channel: [ versions.yml ] } From 760afed0e6dd9314faf77cb518ee4cdba30037eb Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Thu, 23 May 2024 17:24:16 +0200 Subject: [PATCH 085/110] Add posfile support --- conf/steps/imputation_stitch.config | 21 ------- conf/steps/panel_prep.config | 8 ++- subworkflows/local/bam_impute_quilt/main.nf | 4 +- subworkflows/local/bam_impute_stitch/main.nf | 5 +- .../local/prepare_input_stitch/main.nf | 40 ++++++++------ .../local/vcf_sites_extract_bcftools/main.nf | 12 +++- workflows/phaseimpute/main.nf | 55 ++++--------------- 7 files changed, 57 insertions(+), 88 deletions(-) diff --git a/conf/steps/imputation_stitch.config b/conf/steps/imputation_stitch.config index 31bee00e..44fb873e 100644 --- a/conf/steps/imputation_stitch.config +++ b/conf/steps/imputation_stitch.config @@ -11,27 +11,6 @@ */ process { - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_POSFILE_TSV:.*' { - publishDir = [ - path: { "${params.outdir}/prep_panel/posfile/" }, - mode: params.publish_dir_mode, - enabled: true - ] - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_POSFILE_TSV:BCFTOOLS_QUERY' { - ext.args = "-f'%CHROM\t%POS\t%REF\t%ALT\\n'" - ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" } - publishDir = [enabled: false] - } - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:PREPARE_POSFILE_TSV:GAWK' { - ext.args = "'{ key = \$1 FS \$2 } !seen[key]++'" // Remove duplicates - ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" } - ext.suffix = "txt" - } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_STITCH:.*' { publishDir = [ path: { "${params.outdir}/imputation/stitch/" }, diff --git a/conf/steps/panel_prep.config b/conf/steps/panel_prep.config index 5ef56918..730c96ff 100644 --- a/conf/steps/panel_prep.config +++ b/conf/steps/panel_prep.config @@ -127,6 +127,12 @@ process { publishDir = [ enabled: false ] } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:GAWK' { + ext.args = "'{ gsub(\",\", \"\\t\") ; key = \$1 FS \$2 } !seen[key]++'" // Remove duplicates + ext.prefix = { "${meta.id}_${meta.chr}_posfile_stitch" } + ext.suffix = "txt" + } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_SITES_EXTRACT_BCFTOOLS:TABIX_BGZIP' { ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites_tsv" } publishDir = [ @@ -142,7 +148,7 @@ process { "-b2", "-e2" ].join(' ') - ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites_tsv" } + ext.prefix = { "${meta.id}_${meta.chr}_glimpse1_sites_tsv_gz" } publishDir = [ path: { "${params.outdir}/prep_panel/sites/tsv/" }, mode: params.publish_dir_mode, diff --git a/subworkflows/local/bam_impute_quilt/main.nf b/subworkflows/local/bam_impute_quilt/main.nf index 742f4aeb..141115d2 100644 --- a/subworkflows/local/bam_impute_quilt/main.nf +++ b/subworkflows/local/bam_impute_quilt/main.nf @@ -63,7 +63,9 @@ workflow BAM_IMPUTE_QUILT { ch_versions = ch_versions.mix(BCFTOOLS_INDEX_2.out.versions.first()) // Join VCFs and TBIs - ch_vcf_tbi = BCFTOOLS_ANNOTATE.out.vcf.join(BCFTOOLS_INDEX_2.out.tbi) + ch_vcf_tbi = BCFTOOLS_ANNOTATE.out.vcf + .join(BCFTOOLS_INDEX_2.out.tbi) + .map { metaIPC, vcf, tbi -> [metaIPC + [tools: "Quilt"], vcf, tbi] } emit: vcf_tbi = ch_vcf_tbi // channel: [ [id, panel], vcf, tbi ] diff --git a/subworkflows/local/bam_impute_stitch/main.nf b/subworkflows/local/bam_impute_stitch/main.nf index 2bd05078..42e997e7 100644 --- a/subworkflows/local/bam_impute_stitch/main.nf +++ b/subworkflows/local/bam_impute_stitch/main.nf @@ -22,8 +22,9 @@ workflow BAM_IMPUTE_STITCH { ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions) // Join VCFs and TBIs - ch_vcf_tbi = STITCH.out.vcf.join(BCFTOOLS_INDEX.out.tbi) - + ch_vcf_tbi = STITCH.out.vcf + .join(BCFTOOLS_INDEX.out.tbi) + .map { metaI, vcf, tbi -> [ metaI + [tools: "Stitch"], vcf, tbi ] } emit: vcf_tbi = ch_vcf_tbi // channel: [ [id, chr], vcf, tbi ] diff --git a/subworkflows/local/prepare_input_stitch/main.nf b/subworkflows/local/prepare_input_stitch/main.nf index f6d3c8d5..0a352d7d 100644 --- a/subworkflows/local/prepare_input_stitch/main.nf +++ b/subworkflows/local/prepare_input_stitch/main.nf @@ -1,9 +1,9 @@ workflow PREPARE_INPUT_STITCH { take: - ch_posfile // channel: [ [chr], posfile ] - ch_fasta // channel: [ [genome], fa, fai ] ch_input_impute // channel: [ [id, chr, region], bam, bai ] + ch_posfile // channel: [ [panel, chr], sites, tsv ] + ch_region // channel: [ [chr, region], region ] main: @@ -16,36 +16,42 @@ workflow PREPARE_INPUT_STITCH { ngen = params.ngen // Get chromosomes of posfile - ch_posfile = ch_posfile.map{meta, posfile -> return[['chr': meta.chr], posfile]} + ch_posfile = ch_posfile + .map{metaPC, posfile -> [[chr: metaPC.chr], metaPC, posfile]} // Get chromosomes of fasta - ch_chromosomes = ch_fasta.map{it -> it[2]} - .splitCsv(header: ["chr", "size", "offset", "lidebase", "linewidth", "qualoffset"], sep: "\t") - .map{it -> return [[chr: it.chr], it.chr]} + ch_chromosomes = ch_region + .map{metaCR, region -> [[chr: metaCR.chr], metaCR.chr]} // Make final channel with parameters - stitch_parameters = ch_posfile.map { it + input_empty + rdata_empty} - .join(ch_chromosomes) - .map { it + k_val + ngen} + stitch_parameters = ch_posfile + .map { it + input_empty + rdata_empty} + .join(ch_chromosomes) + .map { it + k_val + ngen} + .map { metaC, metaPC, posfile, input, rdata, chr, k_val, ngen -> + [metaPC, posfile, input, rdata, chr, k_val, ngen] + } // Prepare sample files for STITCH // Group input by ID - ch_bam_bai = ch_input_impute.map {meta, bam, bai -> [[meta.id], bam, bai]}.unique() + ch_bam_bai = ch_input_impute + .map {metaI, bam, bai -> [metaI.subMap("id"), bam, bai]} + .unique() // Make bamlist from bam input ch_bamlist = ch_bam_bai - .map {it[1].tokenize('/').last()} - .collectFile(name: "bamlist.txt", newLine: true, sort: true) + .map {it[2].toString().tokenize('/').last()} + .collectFile(name: "bamlist.txt", newLine: true, sort: true) // Collect all files - stitch_samples = ch_bam_bai.map {meta, bam, bai -> [["id": "all_samples"], bam, bai]} - .groupTuple() - .combine(ch_bamlist) - .collect() + stitch_samples = ch_bam_bai + .map {meta, bam, bai -> [["id": "all_samples"], bam, bai]} + .groupTuple() + .combine(ch_bamlist) + .collect() emit: stitch_parameters = stitch_parameters // channel: [ [chr], posfile, [], [], chr, k_val, ngen ] stitch_samples = stitch_samples // channel: [ [id], bam, bai, bamlist ] versions = ch_versions // channel: [ versions.yml ] - } diff --git a/subworkflows/local/vcf_sites_extract_bcftools/main.nf b/subworkflows/local/vcf_sites_extract_bcftools/main.nf index f9c82185..ee53e0e9 100644 --- a/subworkflows/local/vcf_sites_extract_bcftools/main.nf +++ b/subworkflows/local/vcf_sites_extract_bcftools/main.nf @@ -3,6 +3,7 @@ include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip' include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix' include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query' +include { GAWK } from '../../../modules/nf-core/gawk' workflow VCF_SITES_EXTRACT_BCFTOOLS { take: @@ -27,6 +28,10 @@ workflow VCF_SITES_EXTRACT_BCFTOOLS { BCFTOOLS_QUERY(ch_panel_sites, [], [], []) ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first()) + // Convert TSC to Stitch format ","" to "\t" + GAWK(BCFTOOLS_QUERY.out.output, []) + ch_versions = ch_versions.mix(GAWK.out.versions) + // Compress TSV TABIX_BGZIP(BCFTOOLS_QUERY.out.output) ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) @@ -39,7 +44,8 @@ workflow VCF_SITES_EXTRACT_BCFTOOLS { ch_panel_tsv = TABIX_BGZIP.out.output.combine(TABIX_TABIX.out.tbi, by: 0) emit: - panel_tsv = ch_panel_tsv // channel: [ [id, chr], tsv, tbi ] - panel_sites = ch_panel_sites // channel: [ [id, chr], vcf, csi ] - versions = ch_versions // channel: [ versions.yml ] + panel_tsv_glimpse = ch_panel_tsv // channel: [ [id, chr], tsv, tbi ] + panel_tsv_stitch = GAWK.out.output // channel: [ [id, chr], txt ] + panel_sites = ch_panel_sites // channel: [ [id, chr], vcf, csi ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 2f688fc5..dbc441a9 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -76,7 +76,7 @@ workflow PHASEIMPUTE { ch_depth // channel: depth select [ [depth], depth ] ch_map // channel: genetic map [ [chr], map] ch_posfile // channel: posfile [ [chr], txt] - ch_chunks // channel: chunks [ [chr], txt] + ch_chunks // channel: chunks [ [chr], txt] ch_versions // channel: versions of software used main: @@ -137,25 +137,18 @@ workflow PHASEIMPUTE { VCF_SITES_EXTRACT_BCFTOOLS(VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi) ch_versions = ch_versions.mix(VCF_SITES_EXTRACT_BCFTOOLS.out.versions) - // Prepare posfile stitch - PREPARE_POSFILE_TSV(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites) - ch_versions = ch_versions.mix(PREPARE_POSFILE_TSV.out.versions) - // If required, phase panel (currently not working, a test should be added) // Phase panel with tool of choice (e.g. SHAPEIT5) VCF_PHASE_PANEL(VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi) ch_versions = ch_versions.mix(VCF_PHASE_PANEL.out.versions) - ch_panel = VCF_NORMALIZE_BCFTOOLS.out.vcf_tbi - .join(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites) - .join(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_tsv) - .join(VCF_PHASE_PANEL.out.vcf_tbi) + // Generate posfile channels + ch_posfile_glimpse = VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites + .join(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_tsv_glimpse) + .map{ metaPC, sites, s_index, tsv, t_index -> [metaPC, sites, tsv]} + + ch_posfile_stitch = VCF_SITES_EXTRACT_BCFTOOLS.out.panel_tsv_stitch - // Generate channels (to be simplified) - ch_panel_sites_tsv = ch_panel - .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - -> [metaPC, sites, tsv] - } CONCAT_PANEL(VCF_SITES_EXTRACT_BCFTOOLS.out.panel_sites) ch_versions = ch_versions.mix(CONCAT_PANEL.out.versions) @@ -178,21 +171,9 @@ workflow PHASEIMPUTE { ch_chunks_glimpse1 = VCF_CHUNK_GLIMPSE.out.chunks_glimpse1 } - //Params posfile should replace part of ch_panel_sites_tsv (specifically, the .txt) - //The VCF with the sites and post-prepared panel should be used as input in --panel. - - // if (params.posfile) { - // ch_panel_sites_tsv = ch_posfile - // } else if (params.panel && params.steps.split(',').contains("panelprep") && !params.posfile) { - // ch_panel_sites_tsv = VCF_PHASE_PANEL.out.panel - // .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - // -> [metaPC, sites, tsv] - // } - // } - VCF_IMPUTE_GLIMPSE1( ch_input_impute, - ch_panel_sites_tsv, + ch_posfile_glimpse, ch_panel_phased, ch_chunks_glimpse1, ch_fasta @@ -235,13 +216,12 @@ workflow PHASEIMPUTE { if (params.tools.split(',').contains("stitch")) { print("Impute with STITCH") - // Get posfile from panelprep steps if --posfile not supplied - if (params.panel && params.steps.split(',').find { it in ["all", "panelprep"] }) { - ch_posfile = PREPARE_POSFILE_TSV.out.posfile + if (params.posfile) { + ch_posfile_stitch = ch_posfile } // Prepare inputs - PREPARE_INPUT_STITCH(ch_posfile, ch_fasta, ch_input_impute) + PREPARE_INPUT_STITCH(ch_input_impute, ch_posfile_stitch, ch_region) ch_versions = ch_versions.mix(PREPARE_INPUT_STITCH.out.versions) // Impute with STITCH @@ -283,17 +263,6 @@ workflow PHASEIMPUTE { } if (params.steps.split(',').contains("validate") || params.steps.split(',').contains("all")) { - - // if (params.posfile) { - // Use channel ch_posfile for validation - // ch_panel_sites_tsv = ch_posfile - // } else if (params.panel && params.steps.split(',').contains("panelprep") && !params.posfile) { - // ch_panel_sites_tsv = VCF_PHASE_PANEL.out.panel - // .map{ metaPC, norm, n_index, sites, s_index, tsv, t_index, phased, p_index - // -> [metaPC, sites, tsv] - // } - //} - ch_truth_vcf = Channel.empty() // Get extension of input files truth_ext = getAllFilesExtension(ch_input_validate_truth) @@ -308,7 +277,7 @@ workflow PHASEIMPUTE { GL_TRUTH( ch_truth.bam.map { [it[0], it[1], it[2]] }, - ch_panel_sites_tsv, + ch_posfile_glimpse, ch_fasta ) ch_multiqc_files = ch_multiqc_files.mix(GL_TRUTH.out.multiqc_files) From b6d01602ba092b34ed35922aeb1674260f5d1245 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 12:23:14 +0200 Subject: [PATCH 086/110] Fix stitch --- subworkflows/local/prepare_input_stitch/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/prepare_input_stitch/main.nf b/subworkflows/local/prepare_input_stitch/main.nf index 0a352d7d..c3e4b0ca 100644 --- a/subworkflows/local/prepare_input_stitch/main.nf +++ b/subworkflows/local/prepare_input_stitch/main.nf @@ -40,7 +40,7 @@ workflow PREPARE_INPUT_STITCH { // Make bamlist from bam input ch_bamlist = ch_bam_bai - .map {it[2].toString().tokenize('/').last()} + .map {it[1].toString().tokenize('/').last()} .collectFile(name: "bamlist.txt", newLine: true, sort: true) // Collect all files From 648aa9d1a1c13a759be520b84b537ea4f767e3f7 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 14:41:03 +0200 Subject: [PATCH 087/110] Fix simulation name for concordance --- conf/steps/simulation.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/steps/simulation.config b/conf/steps/simulation.config index 0666560a..ef7ba0b9 100644 --- a/conf/steps/simulation.config +++ b/conf/steps/simulation.config @@ -40,7 +40,7 @@ process { publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_MERGE' { - ext.prefix = { "${meta.id}_${meta.depth}x" } + ext.prefix = { "${meta.id}" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_DOWNSAMPLE:SAMTOOLS_INDEX_2' { ext.args = "" From 43af30a9f9386d45bb4b8c4cdf9f55cb6e43621d Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 14:41:27 +0200 Subject: [PATCH 088/110] Fix files names for concordance --- conf/steps/validation.config | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/conf/steps/validation.config b/conf/steps/validation.config index a7c4ff17..dcd1f906 100644 --- a/conf/steps/validation.config +++ b/conf/steps/validation.config @@ -33,7 +33,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_ANNOTATE' { ext.args = ["--set-id '%CHROM:%POS:%REF:%ALT'", "-Oz"].join(' ') - ext.prefix = { "${meta.id}.annotate" } + ext.prefix = { "${meta.id}_${meta.chr}.annotate" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_INDEX' { @@ -69,21 +69,17 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GLIMPSE2_CONCORDANCE' { ext.args = "--out-r2-per-site" - ext.prefix = { "${meta.id}.concordance" } + ext.prefix = { "${meta.id}_P${meta.panel}_T${meta.tools}.concordance" } publishDir = [ enabled: false ] } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK' { - ext.args = "'(NR == 1) || (FNR > 1)'" // Skip header line - ext.suffix = { "txt" } - } - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GUNZIP' { + ext.prefix = { "${meta.id}_P${meta.panel}_T${meta.tools}" } publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:ADD_COLUMNS' { - ext.prefix = { "${meta.id}_P${meta.panel}_SNP" } + ext.prefix = { "${meta.id}_P${meta.panel}_T${meta.tools}_SNP" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_CONCORDANCE_GLIMPSE2:GAWK' { From e0fbaafb78240b6efb789eb7dabe69e728c4bedb Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 14:41:58 +0200 Subject: [PATCH 089/110] Delete region from output concordance analysis --- modules/local/addcolumns/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/local/addcolumns/main.nf b/modules/local/addcolumns/main.nf index 7789f76d..24192d95 100644 --- a/modules/local/addcolumns/main.nf +++ b/modules/local/addcolumns/main.nf @@ -26,7 +26,6 @@ process ADD_COLUMNS { tail -n +\$HEADER_START $input | \\ awk 'NR==1{\$(NF+1)="ID"} NR>1{\$(NF+1)="${meta.id}"}1' | \\ - awk 'NR==1{\$(NF+1)="Region"} NR>1{\$(NF+1)="${meta.region}"}1' | \\ awk 'NR==1{\$(NF+1)="Depth"} NR>1{\$(NF+1)="${meta.depth}"}1' | \\ awk 'NR==1{\$(NF+1)="GPArray"} NR>1{\$(NF+1)="${meta.gparray}"}1' | \\ awk 'NR==1{\$(NF+1)="Tools"} NR>1{\$(NF+1)="${meta.tools}"}1' | \\ From d56411b8f2cefe295410b90505d43ae3881885bb Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 14:42:54 +0200 Subject: [PATCH 090/110] Add tools to channel combine --- .../local/vcf_concatenate_bcftools/main.nf | 10 +++++----- .../local/vcf_concordance_glimpse2/main.nf | 14 +++++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/subworkflows/local/vcf_concatenate_bcftools/main.nf b/subworkflows/local/vcf_concatenate_bcftools/main.nf index 70ea926c..22be31b0 100644 --- a/subworkflows/local/vcf_concatenate_bcftools/main.nf +++ b/subworkflows/local/vcf_concatenate_bcftools/main.nf @@ -4,17 +4,17 @@ include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' workflow VCF_CONCATENATE_BCFTOOLS { take: - ch_vcf_tbi // channel: [ [id, chr], vcf, tbi ] + ch_vcf_tbi // channel: [ [id, panel, chr, tools], vcf, tbi ] main: ch_versions = Channel.empty() // Keep only id from meta - ch_vcf_tbi_grouped = ch_vcf_tbi.map{ metaI, vcf, tbi -> [metaI.subMap("id") + ["chr": "all"], vcf, tbi] } - - // Group by ID - ch_vcf_tbi_grouped = ch_vcf_tbi_grouped.groupTuple( by:0 ) + ch_vcf_tbi_grouped = ch_vcf_tbi + .map{ metaIPTC, vcf, tbi -> [metaIPTC.subMap("id", "tools", "panel"), vcf, tbi] } + .groupTuple( by:0 ) + .map{ metaIPT, vcf, tbi -> [metaIPT + ["chr": "all"], vcf, tbi]} // Ligate and concatenate chunks BCFTOOLS_CONCAT(ch_vcf_tbi_grouped) diff --git a/subworkflows/local/vcf_concordance_glimpse2/main.nf b/subworkflows/local/vcf_concordance_glimpse2/main.nf index 1fb46575..8037fdad 100644 --- a/subworkflows/local/vcf_concordance_glimpse2/main.nf +++ b/subworkflows/local/vcf_concordance_glimpse2/main.nf @@ -6,8 +6,8 @@ include { GUNZIP } from '../../../modules/nf-core/gunzip' workflow VCF_CONCORDANCE_GLIMPSE2 { take: - ch_vcf_emul // VCF file with imputed genotypes [ [id], vcf, csi] - ch_vcf_truth // VCF file with truth genotypes [ [id], vcf, csi] + ch_vcf_emul // VCF file with imputed genotypes [ [id, panel, tool, chr], vcf, csi] + ch_vcf_truth // VCF file with truth genotypes [ [id, panel, chr], vcf, csi] ch_vcf_freq // VCF file with panel frequencies [ [panel], vcf, csi] ch_region // Regions to process [ [chr, region], region] @@ -17,11 +17,15 @@ workflow VCF_CONCORDANCE_GLIMPSE2 { ch_multiqc_files = Channel.empty() ch_concordance = ch_vcf_emul - .join(ch_vcf_truth) + .map{metaIPTC, vcf, csi -> [metaIPTC.subMap("id", "panel"), metaIPTC, vcf, csi]} + .combine(ch_vcf_truth + .map{metaIPC, vcf, csi -> [ metaIPC.subMap("id", "panel"), vcf, csi ]} + , by: 0 + ) .combine(ch_vcf_freq) .combine(ch_region.map{[it[1]]}.collect().toList()) - .map{metaI, emul, e_csi, truth, t_csi, metaP, freq, f_csi, regions -> - [metaI + ["panel":metaP.id], emul, e_csi, truth, t_csi, freq, f_csi, [], regions] + .map{metaI, metaIPTC, emul, e_csi, truth, t_csi, metaP, freq, f_csi, regions -> + [metaIPTC, emul, e_csi, truth, t_csi, freq, f_csi, [], regions] } GLIMPSE2_CONCORDANCE ( From 978e6e69997d30fda5085ce8d1ba5fdf621593ff Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 14:43:17 +0200 Subject: [PATCH 091/110] Concat truth --- workflows/phaseimpute/main.nf | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index dbc441a9..11197c3d 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -29,6 +29,7 @@ include { VCF_SITES_EXTRACT_BCFTOOLS } from '../../subworkflows/ include { VCF_PHASE_PANEL } from '../../subworkflows/local/vcf_phase_panel' include { PREPARE_POSFILE_TSV } from '../../subworkflows/local/prepare_posfile_tsv' include { CHUNK_PREPARE_CHANNEL } from '../../subworkflows/local/chunk_prepare_channel' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_PANEL } from '../../subworkflows/local/vcf_concatenate_bcftools' // GLIMPSE1 subworkflows include { VCF_IMPUTE_GLIMPSE1 } from '../../subworkflows/local/vcf_impute_glimpse1' @@ -48,12 +49,9 @@ include { PREPARE_INPUT_STITCH } from '../../subworkflows/ include { BAM_IMPUTE_STITCH } from '../../subworkflows/local/bam_impute_stitch' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_STITCH } from '../../subworkflows/local/vcf_concatenate_bcftools' -// CONCAT subworkflows -include { VCF_CONCATENATE_BCFTOOLS as CONCAT_TRUTH } from '../../subworkflows/local/vcf_concatenate_bcftools' -include { VCF_CONCATENATE_BCFTOOLS as CONCAT_PANEL } from '../../subworkflows/local/vcf_concatenate_bcftools' - // Concordance subworkflows include { BAM_GL_BCFTOOLS as GL_TRUTH } from '../../subworkflows/local/bam_gl_bcftools' +include { VCF_CONCATENATE_BCFTOOLS as CONCAT_TRUTH } from '../../subworkflows/local/vcf_concatenate_bcftools' include { VCF_CONCORDANCE_GLIMPSE2 } from '../../subworkflows/local/vcf_concordance_glimpse2' @@ -288,10 +286,14 @@ workflow PHASEIMPUTE { .map { [it[0], it[1], it[2]] } .mix(GL_TRUTH.out.vcf) + // Concatenate truth vcf by chromosomes + CONCAT_TRUTH(ch_truth_vcf) + ch_versions = ch_versions.mix(CONCAT_TRUTH.out.versions) + // Compute concordance analysis VCF_CONCORDANCE_GLIMPSE2( ch_input_validate, - ch_truth_vcf, + CONCAT_TRUTH.out.vcf_tbi_join, ch_panel_sites, ch_region ) From 46710fb830bdb9d7f9f146a2128b549d864f35d3 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 14:43:58 +0200 Subject: [PATCH 092/110] Update snapshot --- .../phaseimpute/tests/test_all.nf.test.snap | 145 +++++++++++------- 1 file changed, 86 insertions(+), 59 deletions(-) diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index ffd7e33b..ecd86614 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -35,44 +35,20 @@ "Check test_all": { "content": [ [ - "simulation/NA12878_D1_Rchr21_16570000-16610000.bam", - "simulation/NA12878_D1_Rchr21_16570000-16610000.bam.bai", - "simulation/NA12878_D1_Rchr22_16570000-16610000.bam", - "simulation/NA12878_D1_Rchr22_16570000-16610000.bam.bai", - "simulation/NA12878_Rchr21_16570000-16610000.stats.txt", - "simulation/NA12878_Rchr22_16570000-16610000.stats.txt", - "simulation/NA19401_D1_Rchr21_16570000-16610000.bam", - "simulation/NA19401_D1_Rchr21_16570000-16610000.bam.bai", - "simulation/NA19401_D1_Rchr22_16570000-16610000.bam", - "simulation/NA19401_D1_Rchr22_16570000-16610000.bam.bai", - "simulation/NA19401_Rchr21_16570000-16610000.stats.txt", - "simulation/NA19401_Rchr22_16570000-16610000.stats.txt", - "simulation/NA20359_D1_Rchr21_16570000-16610000.bam", - "simulation/NA20359_D1_Rchr21_16570000-16610000.bam.bai", - "simulation/NA20359_D1_Rchr22_16570000-16610000.bam", - "simulation/NA20359_D1_Rchr22_16570000-16610000.bam.bai", - "simulation/NA20359_Rchr21_16570000-16610000.stats.txt", - "simulation/NA20359_Rchr22_16570000-16610000.stats.txt" + "simulation/NA12878.bam", + "simulation/NA12878.bam.bai", + "simulation/NA19401.bam", + "simulation/NA19401.bam.bai", + "simulation/NA20359.bam", + "simulation/NA20359.bam.bai", + "simulation/stats/NA12878_Rchr21_16570000-16610000.stats.txt", + "simulation/stats/NA12878_Rchr22_16570000-16610000.stats.txt", + "simulation/stats/NA19401_Rchr21_16570000-16610000.stats.txt", + "simulation/stats/NA19401_Rchr22_16570000-16610000.stats.txt", + "simulation/stats/NA20359_Rchr21_16570000-16610000.stats.txt", + "simulation/stats/NA20359_Rchr22_16570000-16610000.stats.txt" ], [ - "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.ligate.vcf.gz", - "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.ligate.vcf.gz.csi", - "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.ligate.vcf.gz", - "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.ligate.vcf.gz.csi", - "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.ligate.vcf.gz", - "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.ligate.vcf.gz.csi", - "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.ligate.vcf.gz", - "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.ligate.vcf.gz.csi", - "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.ligate.vcf.gz", - "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.ligate.vcf.gz.csi", - "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.ligate.vcf.gz", - "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.ligate.vcf.gz.csi", - "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.phase.bcf.csi", "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz", "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz.tbi", "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz", @@ -80,7 +56,49 @@ "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz", "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz.tbi", "imputation/glimpse1/concat/versions.yml", - "imputation/glimpse1/versions.yml" + "imputation/glimpse2/NA12878_chr21.ligate.vcf.gz", + "imputation/glimpse2/NA12878_chr22.ligate.vcf.gz", + "imputation/glimpse2/NA19401_chr21.ligate.vcf.gz", + "imputation/glimpse2/NA19401_chr22.ligate.vcf.gz", + "imputation/glimpse2/NA20359_chr21.ligate.vcf.gz", + "imputation/glimpse2/NA20359_chr22.ligate.vcf.gz", + "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz", + "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz.tbi", + "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz", + "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz.tbi", + "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz", + "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz.tbi", + "imputation/glimpse2/concat/versions.yml", + "imputation/glimpse2/versions.yml", + "imputation/quilt/concat/NA12878_quilt.vcf.gz", + "imputation/quilt/concat/NA12878_quilt.vcf.gz.tbi", + "imputation/quilt/concat/NA19401_quilt.vcf.gz", + "imputation/quilt/concat/NA19401_quilt.vcf.gz.tbi", + "imputation/quilt/concat/NA20359_quilt.vcf.gz", + "imputation/quilt/concat/NA20359_quilt.vcf.gz.tbi", + "imputation/quilt/concat/versions.yml", + "imputation/stitch/RData/EM.all.chr22.RData", + "imputation/stitch/RData/end.chr22.RData", + "imputation/stitch/RData/endEM.chr22.RData", + "imputation/stitch/RData/sampleNames.chr22.RData", + "imputation/stitch/RData/start.chr22.RData", + "imputation/stitch/RData/startEM.chr22.RData", + "imputation/stitch/concat/all_samples_stitch.vcf.gz", + "imputation/stitch/concat/all_samples_stitch.vcf.gz.tbi", + "imputation/stitch/concat/versions.yml", + "imputation/stitch/input/sample.1.input.chr22.RData", + "imputation/stitch/input/sample.2.input.chr22.RData", + "imputation/stitch/input/sample.3.input.chr22.RData", + "imputation/stitch/plots/alphaMat.chr22.all.s.1.png", + "imputation/stitch/plots/alphaMat.chr22.normalized.s.1.png", + "imputation/stitch/plots/hapSum.chr22.s.1.png", + "imputation/stitch/plots/hapSum_log.chr22.s.1.png", + "imputation/stitch/plots/metricsForPostImputationQC.chr22.sample.jpg", + "imputation/stitch/plots/metricsForPostImputationQCChromosomeWide.chr22.sample.jpg", + "imputation/stitch/plots/r2.chr22.goodonly.jpg", + "imputation/stitch/stitch.chr21.vcf.gz", + "imputation/stitch/stitch.chr22.vcf.gz", + "imputation/stitch/versions.yml" ], [ "prep_panel/chunks/glimpse1/1000GP.s.norel_chr21_chunks_glimpse1.txt", @@ -96,9 +114,6 @@ "prep_panel/haplegend/1000GP.s.norel_chr22.legend.gz", "prep_panel/haplegend/1000GP.s.norel_chr22.samples", "prep_panel/haplegend/versions.yml", - "prep_panel/posfile/1000GP.s.norel_chr21_posfile_stitch.txt", - "prep_panel/posfile/1000GP.s.norel_chr22_posfile_stitch.txt", - "prep_panel/posfile/versions.yml", "prep_panel/sites/tsv/1000GP.s.norel_chr21_glimpse1_sites_tsv.txt.gz", "prep_panel/sites/tsv/1000GP.s.norel_chr21_glimpse1_sites_tsv.txt.gz.tbi", "prep_panel/sites/tsv/1000GP.s.norel_chr22_glimpse1_sites_tsv.txt.gz", @@ -111,9 +126,15 @@ "prep_panel/sites/vcf/versions.yml" ], [ - "validation/NA12878_Dnull_P1000GP.s.norel_Call_SNP.txt", - "validation/NA19401_Dnull_P1000GP.s.norel_Call_SNP.txt", - "validation/NA20359_Dnull_P1000GP.s.norel_Call_SNP.txt", + "validation/NA12878_P1000GP.s.norel_TGlimpse1_SNP.txt", + "validation/NA12878_P1000GP.s.norel_TGlimpse2_SNP.txt", + "validation/NA12878_P1000GP.s.norel_TQuilt_SNP.txt", + "validation/NA19401_P1000GP.s.norel_TGlimpse1_SNP.txt", + "validation/NA19401_P1000GP.s.norel_TGlimpse2_SNP.txt", + "validation/NA19401_P1000GP.s.norel_TQuilt_SNP.txt", + "validation/NA20359_P1000GP.s.norel_TGlimpse1_SNP.txt", + "validation/NA20359_P1000GP.s.norel_TGlimpse2_SNP.txt", + "validation/NA20359_P1000GP.s.norel_TQuilt_SNP.txt", "validation/TestQuality.txt", "validation/concat/NA12878_truth_concat.vcf.gz", "validation/concat/NA12878_truth_concat.vcf.gz.tbi", @@ -128,7 +149,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-17T21:48:07.027867847" + "timestamp": "2024-05-24T14:39:45.196421216" }, "Check test_validate": { "content": [ @@ -151,26 +172,13 @@ "Check test_quilt": { "content": [ [ - "imputation/quilt/NA12878_Rchr21_16570000-16610000.impute.annotate.vcf.gz", - "imputation/quilt/NA12878_Rchr21_16570000-16610000.impute.annotate.vcf.gz.tbi", - "imputation/quilt/NA12878_Rchr22_16570000-16610000.impute.annotate.vcf.gz", - "imputation/quilt/NA12878_Rchr22_16570000-16610000.impute.annotate.vcf.gz.tbi", - "imputation/quilt/NA19401_Rchr21_16570000-16610000.impute.annotate.vcf.gz", - "imputation/quilt/NA19401_Rchr21_16570000-16610000.impute.annotate.vcf.gz.tbi", - "imputation/quilt/NA19401_Rchr22_16570000-16610000.impute.annotate.vcf.gz", - "imputation/quilt/NA19401_Rchr22_16570000-16610000.impute.annotate.vcf.gz.tbi", - "imputation/quilt/NA20359_Rchr21_16570000-16610000.impute.annotate.vcf.gz", - "imputation/quilt/NA20359_Rchr21_16570000-16610000.impute.annotate.vcf.gz.tbi", - "imputation/quilt/NA20359_Rchr22_16570000-16610000.impute.annotate.vcf.gz", - "imputation/quilt/NA20359_Rchr22_16570000-16610000.impute.annotate.vcf.gz.tbi", "imputation/quilt/concat/NA12878_quilt.vcf.gz", "imputation/quilt/concat/NA12878_quilt.vcf.gz.tbi", "imputation/quilt/concat/NA19401_quilt.vcf.gz", "imputation/quilt/concat/NA19401_quilt.vcf.gz.tbi", "imputation/quilt/concat/NA20359_quilt.vcf.gz", "imputation/quilt/concat/NA20359_quilt.vcf.gz.tbi", - "imputation/quilt/concat/versions.yml", - "imputation/quilt/versions.yml" + "imputation/quilt/concat/versions.yml" ], 1779 ], @@ -178,7 +186,7 @@ "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-17T19:34:26.554486719" + "timestamp": "2024-05-24T14:32:54.985163559" }, "Check test_sim": { "content": [ @@ -246,5 +254,24 @@ "nextflow": "23.10.1" }, "timestamp": "2024-05-17T19:31:46.118712605" + }, + "Check test": { + "content": [ + [ + "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz", + "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz.tbi", + "imputation/glimpse1/concat/versions.yml" + ], + 1779 + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-24T14:30:09.449862457" } } \ No newline at end of file From 329468e42992e7f7adb520763dd7c904cfb332a5 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 15:10:42 +0200 Subject: [PATCH 093/110] Change to _ligate --- conf/steps/imputation_glimpse1.config | 2 +- conf/steps/imputation_glimpse2.config | 2 +- .../phaseimpute/tests/test_all.nf.test.snap | 87 +++++++++---------- 3 files changed, 44 insertions(+), 47 deletions(-) diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index 487ffbb0..a253c896 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -59,7 +59,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_LIGATE' { - ext.prefix = { "${meta.id}_${meta.chr}.ligate" } + ext.prefix = { "${meta.id}_${meta.chr}_ligate" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BCFTOOLS_INDEX_2' { diff --git a/conf/steps/imputation_glimpse2.config b/conf/steps/imputation_glimpse2.config index c6d3fa5c..fcce9b70 100644 --- a/conf/steps/imputation_glimpse2.config +++ b/conf/steps/imputation_glimpse2.config @@ -32,7 +32,7 @@ process { } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:GLIMPSE2_LIGATE' { - ext.prefix = { "${meta.id}_${meta.chr}.ligate" } + ext.prefix = { "${meta.id}_${meta.chr}_ligate" } } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2' { diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index ecd86614..54804f8a 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -56,12 +56,12 @@ "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz", "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz.tbi", "imputation/glimpse1/concat/versions.yml", - "imputation/glimpse2/NA12878_chr21.ligate.vcf.gz", - "imputation/glimpse2/NA12878_chr22.ligate.vcf.gz", - "imputation/glimpse2/NA19401_chr21.ligate.vcf.gz", - "imputation/glimpse2/NA19401_chr22.ligate.vcf.gz", - "imputation/glimpse2/NA20359_chr21.ligate.vcf.gz", - "imputation/glimpse2/NA20359_chr22.ligate.vcf.gz", + "imputation/glimpse2/NA12878_chr21_ligate.vcf.gz", + "imputation/glimpse2/NA12878_chr22_ligate.vcf.gz", + "imputation/glimpse2/NA19401_chr21_ligate.vcf.gz", + "imputation/glimpse2/NA19401_chr22_ligate.vcf.gz", + "imputation/glimpse2/NA20359_chr21_ligate.vcf.gz", + "imputation/glimpse2/NA20359_chr22_ligate.vcf.gz", "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz.tbi", "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz", @@ -154,20 +154,23 @@ "Check test_validate": { "content": [ [ - "validation/NA12878_Dnull_P1000GP.s.norel_Cchr21_SNP.txt", - "validation/NA12878_Dnull_P1000GP.s.norel_Cchr22_SNP.txt", - "validation/NA19401_Dnull_P1000GP.s.norel_Cchr21_SNP.txt", - "validation/NA19401_Dnull_P1000GP.s.norel_Cchr22_SNP.txt", - "validation/NA20359_Dnull_P1000GP.s.norel_Cchr21_SNP.txt", - "validation/NA20359_Dnull_P1000GP.s.norel_Cchr22_SNP.txt", - "validation/TestQuality.txt" + "validation/NA12878_Pnull_Tnull_SNP.txt", + "validation/NA19401_Pnull_Tnull_SNP.txt", + "validation/NA20359_Pnull_Tnull_SNP.txt", + "validation/TestQuality.txt", + "validation/concat/NA12878_truth_concat.vcf.gz", + "validation/concat/NA12878_truth_concat.vcf.gz.tbi", + "validation/concat/NA19401_truth_concat.vcf.gz", + "validation/concat/NA19401_truth_concat.vcf.gz.tbi", + "validation/concat/NA20359_truth_concat.vcf.gz", + "validation/concat/NA20359_truth_concat.vcf.gz.tbi" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-17T21:42:10.644445872" + "timestamp": "2024-05-24T15:09:05.11577274" }, "Check test_quilt": { "content": [ @@ -191,52 +194,46 @@ "Check test_sim": { "content": [ [ - "simulation/NA12878_D1_Rchr21_16570000-16610000.bam", - "simulation/NA12878_D1_Rchr21_16570000-16610000.bam.bai", - "simulation/NA12878_D1_Rchr22_16570000-16610000.bam", - "simulation/NA12878_D1_Rchr22_16570000-16610000.bam.bai", - "simulation/NA12878_Rchr21_16570000-16610000.stats.txt", - "simulation/NA12878_Rchr22_16570000-16610000.stats.txt", - "simulation/NA19401_D1_Rchr21_16570000-16610000.bam", - "simulation/NA19401_D1_Rchr21_16570000-16610000.bam.bai", - "simulation/NA19401_D1_Rchr22_16570000-16610000.bam", - "simulation/NA19401_D1_Rchr22_16570000-16610000.bam.bai", - "simulation/NA19401_Rchr21_16570000-16610000.stats.txt", - "simulation/NA19401_Rchr22_16570000-16610000.stats.txt", - "simulation/NA20359_D1_Rchr21_16570000-16610000.bam", - "simulation/NA20359_D1_Rchr21_16570000-16610000.bam.bai", - "simulation/NA20359_D1_Rchr22_16570000-16610000.bam", - "simulation/NA20359_D1_Rchr22_16570000-16610000.bam.bai", - "simulation/NA20359_Rchr21_16570000-16610000.stats.txt", - "simulation/NA20359_Rchr22_16570000-16610000.stats.txt" + "simulation/NA12878.bam", + "simulation/NA12878.bam.bai", + "simulation/NA19401.bam", + "simulation/NA19401.bam.bai", + "simulation/NA20359.bam", + "simulation/NA20359.bam.bai", + "simulation/stats/NA12878_Rchr21_16570000-16610000.stats.txt", + "simulation/stats/NA12878_Rchr22_16570000-16610000.stats.txt", + "simulation/stats/NA19401_Rchr21_16570000-16610000.stats.txt", + "simulation/stats/NA19401_Rchr22_16570000-16610000.stats.txt", + "simulation/stats/NA20359_Rchr21_16570000-16610000.stats.txt", + "simulation/stats/NA20359_Rchr22_16570000-16610000.stats.txt" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-05-17T19:36:21.014655792" + "timestamp": "2024-05-24T14:48:45.584042427" }, "Check test_glimpse1": { "content": [ [ - "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.ligate.vcf.gz", - "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA12878_Rchr21_16570000-16610000_ligate.vcf.gz", + "imputation/glimpse1/NA12878_Rchr21_16570000-16610000_ligate.vcf.gz.csi", "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.ligate.vcf.gz", - "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA12878_Rchr22_16570000-16610000_ligate.vcf.gz", + "imputation/glimpse1/NA12878_Rchr22_16570000-16610000_ligate.vcf.gz.csi", "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.ligate.vcf.gz", - "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA19401_Rchr21_16570000-16610000_ligate.vcf.gz", + "imputation/glimpse1/NA19401_Rchr21_16570000-16610000_ligate.vcf.gz.csi", "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.ligate.vcf.gz", - "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA19401_Rchr22_16570000-16610000_ligate.vcf.gz", + "imputation/glimpse1/NA19401_Rchr22_16570000-16610000_ligate.vcf.gz.csi", "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.ligate.vcf.gz", - "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA20359_Rchr21_16570000-16610000_ligate.vcf.gz", + "imputation/glimpse1/NA20359_Rchr21_16570000-16610000_ligate.vcf.gz.csi", "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.ligate.vcf.gz", - "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.ligate.vcf.gz.csi", + "imputation/glimpse1/NA20359_Rchr22_16570000-16610000_ligate.vcf.gz", + "imputation/glimpse1/NA20359_Rchr22_16570000-16610000_ligate.vcf.gz.csi", "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.phase.bcf.csi", "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz", "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz.tbi", From f0b79aaa3ed1f7611b8647e6f9807bca76e6563a Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 15:13:14 +0200 Subject: [PATCH 094/110] Add glimpse2 snapshot and nf-test --- workflows/phaseimpute/tests/test_all.nf.test | 4 +-- .../phaseimpute/tests/test_all.nf.test.snap | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/workflows/phaseimpute/tests/test_all.nf.test b/workflows/phaseimpute/tests/test_all.nf.test index 12394c2b..cfca71c0 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test +++ b/workflows/phaseimpute/tests/test_all.nf.test @@ -34,7 +34,7 @@ nextflow_pipeline { } } - /* + test("Check test_glimpse2") { tag "test_glimpse2" config "../../../conf/test_glimpse2.config" @@ -60,7 +60,7 @@ nextflow_pipeline { } ) } - }*/ + } test("Check test_quilt") { tag "test_quilt" diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index 54804f8a..89d06b3f 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -252,6 +252,32 @@ }, "timestamp": "2024-05-17T19:31:46.118712605" }, + "Check test_glimpse2": { + "content": [ + [ + "imputation/glimpse2/NA12878_chr21_ligate.vcf.gz", + "imputation/glimpse2/NA12878_chr22_ligate.vcf.gz", + "imputation/glimpse2/NA19401_chr21_ligate.vcf.gz", + "imputation/glimpse2/NA19401_chr22_ligate.vcf.gz", + "imputation/glimpse2/NA20359_chr21_ligate.vcf.gz", + "imputation/glimpse2/NA20359_chr22_ligate.vcf.gz", + "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz", + "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz.tbi", + "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz", + "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz.tbi", + "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz", + "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz.tbi", + "imputation/glimpse2/concat/versions.yml", + "imputation/glimpse2/versions.yml" + ], + 1777 + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-24T15:12:55.355916727" + }, "Check test": { "content": [ [ From 24656cf620bec99a3dc9e16cbed0dd207b1c96dd Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 15:13:35 +0200 Subject: [PATCH 095/110] Fix tools default value --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 19738465..faaf5412 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,7 +16,7 @@ params { input = null input_region = null map = null - tools = null + tools = "" // Panel preparation panel = null From 08c75afb3156fc15eb5cf83c62948df089d6ec30 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 15:14:09 +0200 Subject: [PATCH 096/110] Fix nf-test command in md --- docs/development.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/development.md b/docs/development.md index f08fea0a..a00dc1bd 100644 --- a/docs/development.md +++ b/docs/development.md @@ -35,7 +35,7 @@ nextflow run main.nf -profile singularity,test_quilt --outdir results -resume ```bash nf-test test --verbose --profile singularity --tag test_all -nf-test test --verbose --profile singularity --tag test_all --update-snap #To update the snaps of a given test +nf-test test --verbose --profile singularity --tag test_all --update-snapshot #To update the snaps of a given test ``` ## Problematic From f04b89491fe4a07f540148e38edf96904cfc268a Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 15:14:41 +0200 Subject: [PATCH 097/110] Move to test_all for github action --- .github/workflows/ci.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fc0a2a10..0f03ef5c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,9 +30,8 @@ jobs: - "23.04.0" - "latest-everything" TEST_PROFILE: - - "test" - - "test_sim" - - "test_quilt" + - "test_all" + - "test_validate" - "test_stitch" steps: - name: Check out pipeline code From 6f5171bc2d0a0540d429f70a896ce5e0e6b36539 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 15:15:19 +0200 Subject: [PATCH 098/110] Fix readme command example --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cb5fb5b7..b73b3314 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ nextflow run nf-core/phaseimpute \ -profile \ --input \ --genome "GRCh38" \ - --panel \ + --panel \ --steps "panelprep,impute" \ --tools "glimpse1" \ --outdir From fbb114fc916ba797d964de0b71ae8d91bb5e6452 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 15:27:11 +0200 Subject: [PATCH 099/110] Fix snapshot, only output necessary files --- conf/steps/imputation_glimpse1.config | 1 + conf/steps/imputation_glimpse2.config | 6 +-- .../phaseimpute/tests/test_all.nf.test.snap | 50 ------------------- 3 files changed, 3 insertions(+), 54 deletions(-) diff --git a/conf/steps/imputation_glimpse1.config b/conf/steps/imputation_glimpse1.config index a253c896..b631d92c 100644 --- a/conf/steps/imputation_glimpse1.config +++ b/conf/steps/imputation_glimpse1.config @@ -60,6 +60,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_LIGATE' { ext.prefix = { "${meta.id}_${meta.chr}_ligate" } + publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BCFTOOLS_INDEX_2' { diff --git a/conf/steps/imputation_glimpse2.config b/conf/steps/imputation_glimpse2.config index fcce9b70..24af4d99 100644 --- a/conf/steps/imputation_glimpse2.config +++ b/conf/steps/imputation_glimpse2.config @@ -13,10 +13,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:.*' { - publishDir = [ - path: { "${params.outdir}/imputation/glimpse2/" }, - mode: params.publish_dir_mode, - ] + publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE' { @@ -33,6 +30,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:GLIMPSE2_LIGATE' { ext.prefix = { "${meta.id}_${meta.chr}_ligate" } + publishDir = [ enabled: false ] } withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE2:BCFTOOLS_INDEX_2' { diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index 89d06b3f..7d0416c7 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -56,12 +56,6 @@ "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz", "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz.tbi", "imputation/glimpse1/concat/versions.yml", - "imputation/glimpse2/NA12878_chr21_ligate.vcf.gz", - "imputation/glimpse2/NA12878_chr22_ligate.vcf.gz", - "imputation/glimpse2/NA19401_chr21_ligate.vcf.gz", - "imputation/glimpse2/NA19401_chr22_ligate.vcf.gz", - "imputation/glimpse2/NA20359_chr21_ligate.vcf.gz", - "imputation/glimpse2/NA20359_chr22_ligate.vcf.gz", "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz.tbi", "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz", @@ -214,53 +208,9 @@ }, "timestamp": "2024-05-24T14:48:45.584042427" }, - "Check test_glimpse1": { - "content": [ - [ - "imputation/glimpse1/NA12878_Rchr21_16570000-16610000_ligate.vcf.gz", - "imputation/glimpse1/NA12878_Rchr21_16570000-16610000_ligate.vcf.gz.csi", - "imputation/glimpse1/NA12878_Rchr21_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA12878_Rchr22_16570000-16610000_ligate.vcf.gz", - "imputation/glimpse1/NA12878_Rchr22_16570000-16610000_ligate.vcf.gz.csi", - "imputation/glimpse1/NA12878_Rchr22_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA19401_Rchr21_16570000-16610000_ligate.vcf.gz", - "imputation/glimpse1/NA19401_Rchr21_16570000-16610000_ligate.vcf.gz.csi", - "imputation/glimpse1/NA19401_Rchr21_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA19401_Rchr22_16570000-16610000_ligate.vcf.gz", - "imputation/glimpse1/NA19401_Rchr22_16570000-16610000_ligate.vcf.gz.csi", - "imputation/glimpse1/NA19401_Rchr22_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA20359_Rchr21_16570000-16610000_ligate.vcf.gz", - "imputation/glimpse1/NA20359_Rchr21_16570000-16610000_ligate.vcf.gz.csi", - "imputation/glimpse1/NA20359_Rchr21_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/NA20359_Rchr22_16570000-16610000_ligate.vcf.gz", - "imputation/glimpse1/NA20359_Rchr22_16570000-16610000_ligate.vcf.gz.csi", - "imputation/glimpse1/NA20359_Rchr22_16570000-16610000.phase.bcf.csi", - "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz", - "imputation/glimpse1/concat/NA12878_glimpse1.vcf.gz.tbi", - "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz", - "imputation/glimpse1/concat/NA19401_glimpse1.vcf.gz.tbi", - "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz", - "imputation/glimpse1/concat/NA20359_glimpse1.vcf.gz.tbi", - "imputation/glimpse1/concat/versions.yml", - "imputation/glimpse1/versions.yml" - ], - 1779 - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-17T19:31:46.118712605" - }, "Check test_glimpse2": { "content": [ [ - "imputation/glimpse2/NA12878_chr21_ligate.vcf.gz", - "imputation/glimpse2/NA12878_chr22_ligate.vcf.gz", - "imputation/glimpse2/NA19401_chr21_ligate.vcf.gz", - "imputation/glimpse2/NA19401_chr22_ligate.vcf.gz", - "imputation/glimpse2/NA20359_chr21_ligate.vcf.gz", - "imputation/glimpse2/NA20359_chr22_ligate.vcf.gz", "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA12878_glimpse2.vcf.gz.tbi", "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz", From 3a08855b19d1af9dcb6fd47300d7a6d2a64a83ed Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 15:42:11 +0200 Subject: [PATCH 100/110] Fix combination in BAM quilt --- subworkflows/local/bam_impute_quilt/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/bam_impute_quilt/main.nf b/subworkflows/local/bam_impute_quilt/main.nf index 141115d2..788c0961 100644 --- a/subworkflows/local/bam_impute_quilt/main.nf +++ b/subworkflows/local/bam_impute_quilt/main.nf @@ -27,10 +27,10 @@ workflow BAM_IMPUTE_QUILT { buffer = params.buffer if (genetic_map_file.isEmpty()) { - ch_hap_chunks = ch_hap_legend.join(ch_chunks).map { it + ngen + buffer + [[]] } + ch_hap_chunks = ch_hap_legend.combine(ch_chunks, by:0).map { it + ngen + buffer + [[]] } } else { // Add ngen and buffer + genetic map file (untested) - ch_hap_chunks = ch_hap_legend.join(ch_chunks).join(genetic_map_file) + ch_hap_chunks = ch_hap_legend.combine(ch_chunks, by:0).join(genetic_map_file) } ch_quilt = ch_input From 1b5447ab28f8da545ddd5b0d7bc7fedac6cdab4e Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 15:42:41 +0200 Subject: [PATCH 101/110] Simplify grouping in concatenate --- subworkflows/local/vcf_concatenate_bcftools/main.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/subworkflows/local/vcf_concatenate_bcftools/main.nf b/subworkflows/local/vcf_concatenate_bcftools/main.nf index 22be31b0..7b9230ac 100644 --- a/subworkflows/local/vcf_concatenate_bcftools/main.nf +++ b/subworkflows/local/vcf_concatenate_bcftools/main.nf @@ -12,9 +12,8 @@ workflow VCF_CONCATENATE_BCFTOOLS { // Keep only id from meta ch_vcf_tbi_grouped = ch_vcf_tbi - .map{ metaIPTC, vcf, tbi -> [metaIPTC.subMap("id", "tools", "panel"), vcf, tbi] } + .map{ metaIPTC, vcf, tbi -> [metaIPTC.subMap("id", "tools", "panel") + ["chr": "all"], vcf, tbi] } .groupTuple( by:0 ) - .map{ metaIPT, vcf, tbi -> [metaIPT + ["chr": "all"], vcf, tbi]} // Ligate and concatenate chunks BCFTOOLS_CONCAT(ch_vcf_tbi_grouped) From 9face9b439cc96f8925dec66d7ecf1375311d61d Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 15:43:25 +0200 Subject: [PATCH 102/110] Update Snapshot --- workflows/phaseimpute/tests/test_all.nf.test.snap | 2 -- 1 file changed, 2 deletions(-) diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index 7d0416c7..8647840b 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -63,7 +63,6 @@ "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz.tbi", "imputation/glimpse2/concat/versions.yml", - "imputation/glimpse2/versions.yml", "imputation/quilt/concat/NA12878_quilt.vcf.gz", "imputation/quilt/concat/NA12878_quilt.vcf.gz.tbi", "imputation/quilt/concat/NA19401_quilt.vcf.gz", @@ -218,7 +217,6 @@ "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz.tbi", "imputation/glimpse2/concat/versions.yml", - "imputation/glimpse2/versions.yml" ], 1777 ], From 30dc4610533a8372ef0ab95f1c2184bfec0684f5 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 15:48:33 +0200 Subject: [PATCH 103/110] Fix snapshot --- workflows/phaseimpute/tests/test_all.nf.test.snap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index 8647840b..3e82bc11 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -216,7 +216,7 @@ "imputation/glimpse2/concat/NA19401_glimpse2.vcf.gz.tbi", "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz", "imputation/glimpse2/concat/NA20359_glimpse2.vcf.gz.tbi", - "imputation/glimpse2/concat/versions.yml", + "imputation/glimpse2/concat/versions.yml" ], 1777 ], From 53def15684966f4c92d0e6bb16e667fdcb76e158 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 15:55:17 +0200 Subject: [PATCH 104/110] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54cb5e1e..e2ca3a83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,7 @@ Initial release of nf-core/phaseimpute, created with the [nf-core](https://nf-co - [#19](https://github.com/nf-core/phaseimpute/pull/19) - Changed reference panel to accept a csv, update modules and subworkflows (glimpse1/2 and shapeit5) - [#40](https://github.com/nf-core/phaseimpute/pull/40) - Add STITCH method. Reorganize panelprep subworkflows. - [#51](https://github.com/nf-core/phaseimpute/pull/51) - Update all process and fix linting errors. Remove fastqc added by the template. -- [#56](https://github.com/nf-core/phaseimpute/pull/56) - Move to nf-test to check the output files names generated. Fix validation and concatenation by chromosomes missing. +- [#56](https://github.com/nf-core/phaseimpute/pull/56) - Move to nf-test to check the output files names generated. Fix validation and concatenation by chromosomes missing. Add dedicated GLIMPSE1 subworkflow. Fix posfile generation to be done once for glimpse and stitch. ### `Fixed` From af7c5c57de74963afda6b09a0b9af46ca6373ced Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 16:22:45 +0200 Subject: [PATCH 105/110] Fix order input channels for simulation and imputation --- main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index 6db053c9..8c1a0bd8 100644 --- a/main.nf +++ b/main.nf @@ -54,10 +54,10 @@ workflow NFCORE_PHASEIMPUTE { ch_input_simulate = Channel.empty() ch_input_validate = Channel.empty() - if (params.steps.split(',').contains("impute")) { - ch_input_impute = ch_input - } else if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { + if (params.steps.split(',').contains("simulate") || params.steps.split(',').contains("all")) { ch_input_simulate = ch_input + } else if (params.steps.split(',').contains("impute")) { + ch_input_impute = ch_input } else if (params.steps.split(',').contains("validate")) { ch_input_validate = ch_input } From 89e6cbaaa05468fff90180d17c28f4f9715a58af Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 16:25:58 +0200 Subject: [PATCH 106/110] Delete unecessary workflow --- .../local/prepare_posfile_tsv/main.nf | 27 ------------------- 1 file changed, 27 deletions(-) delete mode 100644 subworkflows/local/prepare_posfile_tsv/main.nf diff --git a/subworkflows/local/prepare_posfile_tsv/main.nf b/subworkflows/local/prepare_posfile_tsv/main.nf deleted file mode 100644 index abd00d7e..00000000 --- a/subworkflows/local/prepare_posfile_tsv/main.nf +++ /dev/null @@ -1,27 +0,0 @@ -include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query' -include { GAWK } from '../../../modules/nf-core/gawk' - - -workflow PREPARE_POSFILE_TSV { - - take: - ch_panel_sites // channel: [ [id, chr], vcf, csi ] - - main: - - ch_versions = Channel.empty() - - // Convert position file to tab-separated file - BCFTOOLS_QUERY(ch_panel_sites, [], [], []) - ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions) - ch_posfile = BCFTOOLS_QUERY.out.output - - // Remove multiallelic positions from tsv - GAWK(ch_posfile, []) - ch_versions = ch_versions.mix(GAWK.out.versions) - - emit: - posfile = GAWK.out.output // channel: [ [id, chr], tsv ] - versions = ch_versions // channel: [ versions.yml ] - -} From ceb6e92d8d6458a51313f73d607b2c86232b5160 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 16:26:36 +0200 Subject: [PATCH 107/110] Delete unecessary workflow --- workflows/phaseimpute/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 11197c3d..8e76b6fa 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -27,7 +27,6 @@ include { VCF_CHR_CHECK } from '../../subworkflows/ include { VCF_NORMALIZE_BCFTOOLS } from '../../subworkflows/local/vcf_normalize_bcftools' include { VCF_SITES_EXTRACT_BCFTOOLS } from '../../subworkflows/local/vcf_sites_extract_bcftools' include { VCF_PHASE_PANEL } from '../../subworkflows/local/vcf_phase_panel' -include { PREPARE_POSFILE_TSV } from '../../subworkflows/local/prepare_posfile_tsv' include { CHUNK_PREPARE_CHANNEL } from '../../subworkflows/local/chunk_prepare_channel' include { VCF_CONCATENATE_BCFTOOLS as CONCAT_PANEL } from '../../subworkflows/local/vcf_concatenate_bcftools' @@ -89,6 +88,7 @@ workflow PHASEIMPUTE { ch_sim_output = Channel.empty() // Test if the input are all bam files + ch_input_sim. getAllFilesExtension(ch_input_sim) .map{ if (it != "bam") { error "All input files must be in BAM format to perform simulation" From 7503d93f23ff9b474a883ce9d0918cf4221a38f8 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 17:02:08 +0200 Subject: [PATCH 108/110] Add verbosity to ci --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0f03ef5c..bfb8218f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,4 +52,4 @@ jobs: - name: Run pipeline with test data run: | - nf-test test --tag "${{ matrix.TEST_PROFILE }}" --profile docker + nf-test test --tag "${{ matrix.TEST_PROFILE }}" --profile docker --verbose From 863b0cb7dd2a5f0281dc8a3f9f69e62d79634aec Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 17:02:35 +0200 Subject: [PATCH 109/110] Remove duplicate testing of params --- .../local/utils_nfcore_phaseimpute_pipeline/main.nf | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index 7674b4a4..259cda1b 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -226,14 +226,6 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("chunks") } else { ch_chunks = [[],[]] - if ( - !params.steps.split(',').contains("panelprep") & - !params.steps.split(',').contains("all") & - params.steps.split(',').contains("impute") & - !params.tools.split(',') == ["stitch"] - ) { - error "No --chunks provided for --steps impute and step panel_prep not selected" - } } emit: @@ -305,7 +297,7 @@ def validateInputParameters() { assert !(params.genome == null && params.fasta == null), "Only one of --genome or --fasta must be provided" // Check that a steps is provided - assert params.steps, "A steps must be provided" + assert params.steps, "A step must be provided" // Check that at least one tool is provided if (params.steps.split(',').contains("impute")) { @@ -324,7 +316,7 @@ def validateInputParameters() { assert params.posfile, "No --posfile provided for --steps impute" } // Required by all tools except STITCH - if (!params.tools.split(',').contains("stitch")) { + if (params.tools != "stitch") { assert params.chunks, "No --chunks provided for --steps impute" } // Required by GLIMPSE1 and GLIMPSE2 only From 06b77b86a1b7707b9f7e0d81f0471d87c27e9548 Mon Sep 17 00:00:00 2001 From: LouisLeNezet Date: Fri, 24 May 2024 17:04:06 +0200 Subject: [PATCH 110/110] Fix gefileextension usage --- workflows/phaseimpute/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 8e76b6fa..b220b213 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -88,7 +88,6 @@ workflow PHASEIMPUTE { ch_sim_output = Channel.empty() // Test if the input are all bam files - ch_input_sim. getAllFilesExtension(ch_input_sim) .map{ if (it != "bam") { error "All input files must be in BAM format to perform simulation"