-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathngsTL.sh
executable file
·131 lines (106 loc) · 4.27 KB
/
ngsTL.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/bin/bash
# causes a script to immediately exit when it encounters an error.
set -e
# keep track of the last executed command
trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG
# echo an error message before exiting
trap 'echo "\"${last_command}\" command filed with exit code $?."' EXIT
# Parse location of this script so we can reference the helper scripts
repoDirectory=$(dirname $0)
echo "repo directory: $repoDirectory"
# default to the repo resources for gc bend and regions to search
gcBedFile=$repoDirectory/resources/GRCh38_full_analysis_set_plus_decoy_hla.1kb.LTL.GC.filtered.bed.gz
regionsSearch=$repoDirectory/resources/25kb.bins.bed
# Estimate TL from a bam or cram file
options=$(getopt -l "cramFile:,craiFile:,rootOutput:,referenceGenome:,gcBedFile:,regionsSearch:,mosdepthFile:" -o "" -a -- "$@")
eval set -- "$options"
while true
do
case "$1" in
-h|--help)
showHelp
exit 0
;;
# The cram (or bam) file to process
--cramFile)
shift
export cramFile="$1"
;;
# The cram (or bam) file's index
--craiFile)
shift
export craiFile="$1"
;;
# The root output path (e.g. rootOutput=/path/to/example should result in the creation of /path/to/example.ltl.estimate.txt.gz)
--rootOutput)
shift
export rootOutput="$1"
;;
# The reference genome used to align the cram or bam (e.g. GRCh38_full_analysis_set_plus_decoy_hla.fa)
--referenceGenome)
shift
export referenceGenome="$1"
;;
# Stores 1kb bins that have gc content similar to telomeric repeats.
# Unless you want to change it, this can be set to https://github.com/PankratzLab/NGS-TL/blob/main/resources/GRCh38_full_analysis_set_plus_decoy_hla.1kb.LTL.GC.filtered.bed.gz
--gcBedFile)
shift
export gcBedFile="$1"
;;
# A bed file defining the regions to look for telomeric reads (typically 25kb from the ends of each chromosome)
# Unless you want to change it, this can be set to https://github.com/PankratzLab/NGS-TL/blob/main/resources/25kb.bins.bed
--regionsSearch)
shift
export regionsSearch="$1"
;;
# (Optional) If a mosdepth file is provided, mosdepth will not be run.
# If mosdepth file is not provided, mosdepth will be run to produce $rootOutput.regions.bed.gz
--mosdepthFile)
shift
export mosdepthFile="$1"
;;
--)
shift
break;;
esac
shift
done
# Report params
echo "cram: $cramFile"
echo "crai: $craiFile"
echo "root output: $rootOutput"
mkdir -p $(dirname $rootOutput)
echo "referenceGenome: $referenceGenome"
echo "gcBedFile: $gcBedFile"
echo "regionsSearch: $regionsSearch"
echo "mosdepthFile: $mosdepthFile"
# We want to have the option to either run mosdepth, or to use an existing mosdepth file
if [ -z "$mosdepthFile" ]
then
# output file of interest
mosdepthFile=$rootOutput.regions.bed.gz
echo "Running mosdepth task"
[ -f "$mosdepthFile" ] || $repoDirectory/scripts/mosdepth.sh "$cramFile" "$rootOutput" "$referenceGenome"
else echo "mosdepthFile is set to '$mosdepthFile'"
fi
# output file of interest
gcStatsFile="$rootOutput.ltl.gc.stats.txt.gz"
echo "extracting GC regions task"
[ -f "$gcStatsFile" ] || $repoDirectory/scripts/extractMosdepthGC.sh "$mosdepthFile" "$gcBedFile" "$gcStatsFile"
# output file of interest
outTLCram="$rootOutput.ltl.cram"
samtoolsStatsFile="$outTLCram".stats.txt.gz
echo "extracting ends task"
[ -f "$outTLCram" ] || $repoDirectory/scripts/extractEnds.sh "$cramFile" "$craiFile" "$outTLCram" "$referenceGenome" "$regionsSearch"
# output file of interest
tlCountFile="$rootOutput.ltl.counts.txt.gz"
echo "counting tl reads task"
[ -f "$tlCountFile" ] || $repoDirectory/scripts/countTLReads.sh $outTLCram $tlCountFile
# output file of interest
outTLEstimate="$rootOutput.ltl.estimate.txt.gz"
echo "estimating tl task"
$repoDirectory/scripts/estimateTL.sh $tlCountFile $gcStatsFile $samtoolsStatsFile $outTLEstimate
if compgen -G "${rootOutput}*.txt" > /dev/null; then
gzip "$rootOutput"*.txt
fi
# gzip any txt files to save space (e.g those from mosdepth)