-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathsecond_ed_ngs2_ass
80 lines (45 loc) · 3.39 KB
/
second_ed_ngs2_ass
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
##varient Calling for RNA seq Data
## Installing STAR for alignning RNA reads
conda install -c bioconda star
conda install -c bioconda/label/cf201901 star
## Generating genome index
## 1- STAR uses genome index files that must be saved in unique directories. (only ch22 will be used not the whole genome)
genomeDir=/home/ngs-01/workdir/Assignment2/chr22_with_ERCC92
mkdir $genomeDir
STAR --runMode genomeGenerate --genomeDir $genomeDir --genomeFastaFiles /home/ngs-01/workdir/sample_data/chr22_with_ERCC92.fa --runThreadN 1 --sjdbGTFfile /home/ngs-01/workdir/sample_data/chr22_with_ERCC92.gtf
## 2- Alignment jobs were executed as follows:
runDir=/home/ngs-01/workdir/Assignment2/1pass
mkdir $runDir
cd $runDir
STAR --genomeDir $genomeDir --readFilesIn /home/ngs-01/workdir/Assignment2/ngs2-assignment-data/SRR8797509_1.part_001.part_001.fastq /home/ngs-01/workdir/Assignment2/ngs2-assignment-data/SRR8797509_2.part_001.part_001.fastq --runThreadN 4
## 3- For the 2-pass STAR, a new index is then created using splice junction information contained in the file SJ.out.tab from the first pass:
genomeDir=/home/ngs-01/workdir/Assignment2/chr22_with_ERCC92_2pass
mkdir $genomeDir
STAR --runMode genomeGenerate --genomeDir $genomeDir --genomeFastaFiles /home/ngs-01/workdir/sample_data/chr22_with_ERCC92.fa --sjdbFileChrStartEnd /home/ngs-01/workdir/Assignment2/1pass/SJ.out.tab --sjdbOverhang 75 --runThreadN 4
## 4- The resulting index is then used to produce the final alignments as follows:
runDir=/home/ngs-01/workdir/Assignment2/2pass
mkdir $runDir
cd $runDir
STAR --genomeDir $genomeDir --readFilesIn /home/ngs-01/workdir/Assignment2/ngs2-assignment-data/SRR8797509_1.part_001.part_001.fastq /home/ngs-01/workdir/Assignment2/ngs2-assignment-data/SRR8797509_2.part_001.part_001.fastq --runThreadN 4
## Add read groups, sort, mark duplicates, and create index
R1=/home/ngs-01/workdir/Assignment2/SRR8797509_1.part_001.part_001.fastq
R2=/home/ngs-01/workdir/Assignment2/SRR8797509_2.part_001.part_001.fastq
SM="SRR8797509"
LB="SRR8797509_2019329"
PL="Illumina"
PU="HiSeqXTen"
RGID=$(cat /home/ngs-01/workdir/Assignment2/ngs2-assignment-data/SRR8797509_*.part_001.part_001.fastq | head -n1 | sed 's/ /_/g' | cut -d "_" -f1)
picard_path="/home/ngs-01/miniconda3/envs/ngs1/share/picard-2.19.2-0/"
java -jar $picard_path/picard.jar AddOrReplaceReadGroups I=/home/ngs-01/workdir/Assignment2/2pass/Aligned.out.sam O=rg_added_sorted.bam SO=coordinate RGID=$RGID RGLB=$LB RGPL=$PL RGPU=$PU RGSM=$SM
java -jar $picard_path/picard.jar MarkDuplicates I=rg_added_sorted.bam O=dedupped.bam CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=output.metrics
# Split'N'Trim and reassign mapping qualities
# conda install -c bioconda gatk4
samtools faidx /home/ngs-01/workdir/sample_data/chr22_with_ERCC92.fa
gatk CreateSequenceDictionary -R /home/ngs-01/workdir/sample_data/chr22_with_ERCC92.fa -O /home/ngs-01/workdir/sample_data/chr22_with_ERCC92.dict
gatk SplitNCigarReads -R /home/ngs-01/workdir/sample_data/chr22_with_ERCC92.fa -I dedupped.bam -O split.bam
#indexing of BAM
picard_path=$picard_path
java -Xmx2g -jar $picard_path/picard.jar BuildBamIndex VALIDATION_STRINGENCY=LENIENT INPUT=split.bam
# Download known varinats
wget ftp://ftp.ensembl.org/pub/grch37/current/variation/vcf/homo_sapiens/homo_sapiens-chr22.vcf.gz -O chr22.vcf.gz```
gunzip chr22.vcf.gz