-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_mt.nf
147 lines (118 loc) · 3.56 KB
/
main_mt.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/*
* 13 November 2019 |
*/
// USEFUL HINTS:
// https://hub.docker.com/r/makaho/hisat2-zstd/tags
// reads_ch.view() - to run script that is above this command
// return
// cd work/e7/ - to see content of the channel
/*
* Step 1. Define the default parameters
*/
params.genome = "/home/user1/NXF_training/test-data/reference/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
params.reads = "/home/user1/NXF_training/test-data/reads/*_{1,2}.fq"
params.annotation = "/home/user1/NXF_training/test-data/reference/ggal_1_48850000_49020000.bed.gff"
params.results = "results"
/*
* Step 2. Parse the input parameters
*/
genome_file = file(params.genome) // value channel because if 10 processes ask for this value it will find it 10 times | in is the same out
annot_file = file(params.annotation)
Channel.fromFilePairs(params.reads).into{reads_ch; reads_ch2} // channelFrom is queue channel | beacuse inpt is queued output is queued
/*
* RUN to check: nextflow run main_mt.nf
*/
/*
* PROCESS 2 | Step 3. Check quality with FASTQC
*/
process 'check_Fa_quality_FASTQC' {
container 'biocontainers/fastqc:v0.11.8dfsg-2-deb_cv1'
input:
tuple val(sampleId), path(reads) from reads_ch
output:
file "*_fastqc.{zip,html}" into results_qual_ch
script:
"""
fastqc $reads -o .
"""
}
/*
* PROCESS 3 | Step 3. Trimming
*/
process 'Trimming_cutadapt' {
container 'quay.io/biocontainers/cutadapt:2.6--py36h516909a_0'
input:
tuple val(sampleId), path(reads) from reads_ch2
output:
file("*_trimmed.fq") into results_trim_ch
script:
"""
cutadapt -a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT -o ${reads[0]}_trimmed.fq -p ${reads[1]}_trimmed.fq $reads
"""
}
/*
* PROCESS 4 | Step 4. Get INDEX file with HISAT2-build
* docker hub
* hisat2-build [options]* <reference_in> <ht2_base>
* tuple is used as we need Fa and Fai together in future
*/
process 'Get indexed file HISAT2' {
container 'makaho/hisat2-zstd:latest'
input:
file(genome) from genome_file
output:
file("indexed.*.ht2") into(genome_index_ch) //, genome_aligned_ch2)
script:
"""
hisat2-build $genome indexed
"""
}
/*
* PROCESS 5 | Step 5. Aligne with HISAT2
* hisat2-build [options]* <reference_in> <ht2_base>
*/
process 'Align HISAT2' {
container 'makaho/hisat2-zstd:latest'
input:
file(reads) from results_trim_ch
file(indexed) from genome_index_ch
output:
tuple file("OUT_aligned_ggal.sam"), file("stats.log") into sam_channel
script:
"""
hisat2 -p 20 --no-spliced-alignment -x indexed -1 ${reads[0]} -2 ${reads[1]} -S OUT_aligned_ggal.sam &> stats.log
"""
}
/*
* PROCESS 6 | Step 6. Sam to Bam
* samtools view -S -b sample.sam > sample.bam
*/
process 'Convert SAM to BAM' {
container 'jweinstk/samtools:latest'
input:
file(aligned_sam) from sam_channel
output:
file("filtered.bam") into bam_channel
script:
"""
samtools view -S -b $aligned_sam | samtools view -b -f 2 -F 256 - > filtered.bam
"""
}
/*
* PROCESS 7 | Step 7. Count reads
* samtools view -S -b sample.sam > sample.bam
*/
process 'Count reads featureCounts' {
publishDir '/home/user1/NXF_training'
container 'nfcore/rnaseq:1.4.2'
input:
file(filtered_bam) from bam_channel
file(annot) from annot_file
output:
file("raw_counts.txt") into count_channel
script:
"""
featureCounts -p -s 2 --fracOverlap 0.8 -a $annot -o raw_counts.txt $filtered_bam
"""
}
// featureCounts [options] -a <annotation_file> -o <output_file> input_file1 [input_file2] .