Skip to content

Commit

Permalink
Merge branch 'dev' into samtools_index_c
Browse files Browse the repository at this point in the history
  • Loading branch information
atrigila authored Aug 27, 2024
2 parents 7d34115 + 0e95690 commit ff5ace9
Show file tree
Hide file tree
Showing 7 changed files with 290 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [[#382]](https://github.com/nf-core/smrnaseq/pull/382) - Add [collapse_mirtop.R](https://github.com/nf-core/smrnaseq/issues/174) - Add nf-tests for local modules using custom R scripts.
- [[#383]](https://github.com/nf-core/smrnaseq/pull/383) - Fix [parameter `--skip_fastp` throws an error](https://github.com/nf-core/smrnaseq/issues/263) - Fix parameter --skip_fastp.
- [[#384]](https://github.com/nf-core/smrnaseq/pull/384) - Fix [filter status bug fix](https://github.com/nf-core/smrnaseq/issues/360) - Fix filter stats module and add filter contaminants test profile.
- [[#387]](https://github.com/nf-core/smrnaseq/pull/387) - Add nf-test to local module `blat_mirna` and fixes [contaminant filter failure because the Docker image for BLAT cannot be pulled](https://github.com/nf-core/smrnaseq/issues/354). Adds a small test profile to test contaminant filter results.
- [[#391]](https://github.com/nf-core/smrnaseq/pull/391) - Change `.bai` index for `.csi` index in `samtools_index` to fix [error because of large chromosomes](https://github.com/nf-core/smrnaseq/issues/132).

## v2.3.1 - 2024-04-18 - Gray Zinc Dalmation Patch
Expand Down
41 changes: 41 additions & 0 deletions conf/test_contamination.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/smrnaseq -profile test_contamination,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function with contamination filter'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'

// Input data

input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv'
fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa'

mirtrace_species = 'hsa'
skip_mirdeep = true
save_merged = false
save_aligned_mirna_quant = false


filter_contamination = true
cdna = "https://huggingface.co/datasets/nf-core/smrnaseq/resolve/main/GRCh37/Homo_sapiens.GRCh37.cdna.all.fa"
ncrna = "https://huggingface.co/datasets/nf-core/smrnaseq/resolve/main/GRCh37/Homo_sapiens.GRCh37.ncrna.fa"
trna = "https://huggingface.co/datasets/nf-core/smrnaseq/resolve/main/GRCh37/hg19-tRNAs.fa"
}

// Include illumina config to run test without additional profiles

includeConfig 'protocol_illumina.config'
60 changes: 60 additions & 0 deletions modules/local/blat_mirna/blat_mirna.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
process BLAT_MIRNA {
tag "$fasta"
label 'process_medium'

conda 'bioconda::blat=36'
container 'community.wave.seqera.io/library/ucsc-blat:445--32730933d3c2c916'

input:
val db_type
path mirna
path contaminants


output:
path 'filtered.fa' , emit: filtered_set
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
if ( db_type == "cdna" )
"""
echo $db_type
awk '/^>/ { x=index(\$6, "transcript_biotype:miRNA") } { if(!x) print }' $contaminants > subset.fa
blat -out=blast8 $mirna subset.fa /dev/stdout | awk 'BEGIN{FS="\t"}{if(\$11 < 1e-5)print \$1;}' | uniq > mirnahit.txt
awk 'BEGIN { while((getline<"mirnahit.txt")>0) l[">"\$1]=1 } /^>/ {x = l[\$1]} {if(!x) print }' subset.fa > filtered.fa
cat <<-END_VERSIONS > versions.yml
"${task.process}":
blat: \$(echo \$(blat) | grep Standalone | awk '{ if (match(\$0,/[0-9]*[0-9]/,m)) print m[0] }')
END_VERSIONS
"""

else if ( db_type == "ncrna" )
"""
echo $db_type
awk '/^>/ { x=(index(\$6, "transcript_biotype:rRNA") || index(\$6, "transcript_biotype:miRNA")) } { if(!x) print }' $contaminants > subset.fa
blat -out=blast8 $mirna subset.fa /dev/stdout | awk 'BEGIN{FS="\t"}{if(\$11 < 1e-5)print \$1;}' | uniq > mirnahit.txt
awk 'BEGIN { while((getline<"mirnahit.txt")>0) l[">"\$1]=1 } /^>/ {x = l[\$1]} {if(!x) print }' subset.fa > filtered.fa
cat <<-END_VERSIONS > versions.yml
"${task.process}":
blat: \$(echo \$(blat) | grep Standalone | awk '{ if (match(\$0,/[0-9]*[0-9]/,m)) print m[0] }')
END_VERSIONS
"""

else
"""
echo $db_type
blat -out=blast8 $mirna $contaminants /dev/stdout | awk 'BEGIN{FS="\t"}{if(\$11 < 1e-5)print \$1;}' | uniq > mirnahit.txt
awk 'BEGIN { while((getline<"mirnahit.txt")>0) l[">"\$1]=1 } /^>/ {x = l[\$1]} {if(!x) print }' $contaminants > filtered.fa
cat <<-END_VERSIONS > versions.yml
"${task.process}":
blat: \$(echo \$(blat) | grep Standalone | awk '{ if (match(\$0,/[0-9]*[0-9]/,m)) print m[0] }')
END_VERSIONS
"""

}
114 changes: 114 additions & 0 deletions modules/local/blat_mirna/tests/blat_mirna.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
nextflow_process {

name "Test Process BLAT_MIRNA"
script "../blat_mirna.nf"
process "BLAT_MIRNA"
tag "modules"
tag "modules_local"
tag "blat_mirna"

test("cDNA BLAT - Human") {

when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] = "cdna"
input[1] = [file("https://github.com/nf-core/test-datasets/raw/smrnaseq/miRBase/hairpin.fa", checkIfExists: true)]
input[2] = [file("https://huggingface.co/datasets/nf-core/smrnaseq/resolve/main/GRCh37/Homo_sapiens.GRCh37.cdna.all.fa", checkIfExists: true)]
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()

with(process.out.filtered_set) {
with(get(0)) {
assert get(0).endsWith("filtered.fa")

// Check for AWK filtering of specific biotype
def lines = path(get(0)).readLines()
assert !lines.any { it.contains("transcript_biotype:miRNA") }

// Check for exclusion of miRNAs hits
assert !lines.any { it.contains("ENST00000564740.1") }
}
}
}

}

test("ncRNA BLAT - Human") {

when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] = "ncRNA"
input[1] = [file("https://github.com/nf-core/test-datasets/raw/smrnaseq/miRBase/hairpin.fa", checkIfExists: true)]
input[2] = [file("https://huggingface.co/datasets/nf-core/smrnaseq/resolve/main/GRCh37/Homo_sapiens.GRCh37.ncrna.fa", checkIfExists: true)]
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()

with(process.out.filtered_set) {
with(get(0)) {
assert get(0).endsWith("filtered.fa")

// Check for AWK filtering of specific biotype
def lines = path(get(0)).readLines()
// Lines contain transcript_biotype:rRNA or miRNA, so AWK assertions do not work:
//assert !lines.any { it.contains("transcript_biotype:rRNA") }
//assert !lines.any { it.contains("transcript_biotype:miRNA") }

// Check for exclusion of miRNAs hits
assert !lines.any { it.contains("ENST00000564740.1") }
}
}
}

}

test("tRNA BLAT - Human") {

when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] = "tRNA"
input[1] = [file("https://github.com/nf-core/test-datasets/raw/smrnaseq/miRBase/hairpin.fa", checkIfExists: true)]
input[2] = [file("https://huggingface.co/datasets/nf-core/smrnaseq/resolve/main/GRCh37/hg19-tRNAs.fa", checkIfExists: true)]
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()

with(process.out.filtered_set) {
with(get(0)) {
assert get(0).endsWith("filtered.fa")

// Check for exclusion of miRNAs hits
def lines = path(get(0)).readLines()
assert !lines.any { it.contains("ENST00000564740.1") }
}
}
}

}

}
71 changes: 71 additions & 0 deletions modules/local/blat_mirna/tests/blat_mirna.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"ncRNA BLAT - Human": {
"content": [
{
"0": [
"filtered.fa:md5,6bc8a430400e2e78cf7f474981230811"
],
"1": [
"versions.yml:md5,e2957df2cc8f0410101564c8e65d1761"
],
"filtered_set": [
"filtered.fa:md5,6bc8a430400e2e78cf7f474981230811"
],
"versions": [
"versions.yml:md5,e2957df2cc8f0410101564c8e65d1761"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2024-08-23T17:53:35.313580289"
},
"tRNA BLAT - Human": {
"content": [
{
"0": [
"filtered.fa:md5,6b54e95ca5418d7d9c4d331ca3b2c96f"
],
"1": [
"versions.yml:md5,e2957df2cc8f0410101564c8e65d1761"
],
"filtered_set": [
"filtered.fa:md5,6b54e95ca5418d7d9c4d331ca3b2c96f"
],
"versions": [
"versions.yml:md5,e2957df2cc8f0410101564c8e65d1761"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2024-08-23T17:57:58.238216453"
},
"cDNA BLAT - Human": {
"content": [
{
"0": [
"filtered.fa:md5,8fd42894e815999b4278b08297720aae"
],
"1": [
"versions.yml:md5,e2957df2cc8f0410101564c8e65d1761"
],
"filtered_set": [
"filtered.fa:md5,8fd42894e815999b4278b08297720aae"
],
"versions": [
"versions.yml:md5,e2957df2cc8f0410101564c8e65d1761"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2024-08-23T17:53:16.735132971"
}
}
3 changes: 2 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ profiles {
test_index { includeConfig 'conf/test_index.config' }
test_technical_repeats { includeConfig 'conf/test_technical_repeats.config' }
test_mirgenedb { includeConfig 'conf/test_mirgenedb.config' }
test_skipfastp { includeConfig 'conf/test_skipfastp.config' }
test_contamination { includeConfig 'conf/test_contamination.config' }
test_skipfastp { includeConfig 'conf/test_skipfastp.config' }


//Protocol specific profiles
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/contaminant_filter.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
include { BLAT_MIRNA as BLAT_CDNA
BLAT_MIRNA as BLAT_NCRNA
BLAT_MIRNA as BLAT_PIRNA
BLAT_MIRNA as BLAT_OTHER } from '../../modules/local/blat_mirna'
BLAT_MIRNA as BLAT_OTHER } from '../../modules/local/blat_mirna/blat_mirna'

include { INDEX_CONTAMINANTS as INDEX_RRNA
INDEX_CONTAMINANTS as INDEX_TRNA
Expand Down

0 comments on commit ff5ace9

Please sign in to comment.