forked from mattb112885/clusterDbAnalysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
removeOrganism.sh
executable file
·244 lines (221 loc) · 6.47 KB
/
removeOrganism.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#!/bin/sh
###############################################################################################
if [ $# -lt 1 ]; then
echo ""
echo "Usage: removeOrganism.sh [organismId] [DELETEFLAG]";
echo ""
echo "DESCRIPTION: When just called with organismId (no DELETEFLAG), just prints a list of files or parts of files to be deleted"
echo "To actually delete: run ./removeOrganism.sh [organismid] TRUE . This action is IRREVERSIBLE."
echo "The TRUE is case sensitive and nothing else will work"
echo ""
echo "NOTE - for this script to work the files MUST file the standard naming conventions."
echo "If the raw files and genbank files passed the test in checkInputFormat.sh then it should be OK."
echo ""
exit 0;
fi
DELETE=0
if [ $# -gt 1 ]; then
if [ "$2" = "TRUE" ]; then
echo "Deleting and modifying files..."
DELETE=1
fi
fi
if [ $DELETE -eq 0 ]; then
echo ""
echo "This function will only list all files that will be deleted. Pass TRUE as the DELETEFLAG to actually perform these deletions"
echo ""
fi
ORGANISM=$1;
S=$(echo "${ORGANISM}" | grep -P -w "\d+\.\d+");
if [ $? -eq 1 ]; then
echo "ERROR: Specified organism ID ${ORGANISM} is invalid. Stop."
echo ""
exit 0;
fi
# Test if the organism exists as a whole unit
if grep -q -w -F "${ORGANISM}" organisms; then
echo "Organism found!";
echo ""
else
echo "Organism ${ORGANISM} Not found in the database. Stop.";
echo ""
exit 0;
fi
ORGNAME=$(cat organisms | grep -F -w "${ORGANISM}" | cut -f 1);
# Remove organism from organisms file
if [ $DELETE -eq 0 ]; then
echo "Lines that would be removed from organisms file:"
cat organisms | grep -F -w "${ORGANISM}"
echo ""
else
grep -F -v -w "${ORGANISM}" organisms > organisms_mod;
mv organisms_mod organisms
fi
# Remove any groups including that organism from the groups file.
if [ $DELETE -eq 0 ]; then
# Since the groups file now explicitly lists all the organism names we can do this.
echo "Lines that would be removed from GROUPS file:"
cat groups | grep -F "${ORGNAME}"
echo ""
else
cat groups | grep -v -F "${ORGNAME}" > groups_mod
mv groups_mod groups
fi
# Remove aliases involving that organism from the aliases file.
if [ -f ./aliases/aliases ]; then
if [ $DELETE -eq 0 ]; then
echo "Lines that would be removed from the aliases file"
# fig|organism.peg.genenum
grep -F "|${ORGANISM}." ./aliases/aliases
echo ""
else
echo "Removing lines from alias file..."
grep -v -F "|${ORGANISM}." ./aliases/aliases > ./aliases/aliases_mod
mv ./aliases/aliases_mod ./aliases/aliases
fi
fi
# Remove organism-specific blast results,
# being careful not to let 83333.1 be the same as 83333.10
if [ -d ./blastres/ ]; then
echo "blastres folder";
cd ./blastres/;
if [ $DELETE -eq 0 ]; then
echo "Files that would be deleted from BLASTRES folder"
# Beginning of filename (ORGANISM is the query)
ls | grep -P "^${ORGANISM}\.txt";
# Middle of filename (ORGANISM is the target)
ls | grep -P "_${ORGANISM}\.txt";
echo ""
else
ls | grep -P "^${ORGANISM}\.txt" | xargs rm;
ls | grep -P "_${ORGANISM}\.txt" | xargs rm;
fi
cd ..
fi
if [ -d ./blastn_res/ ]; then
echo "blastnres folder";
cd ./blastn_res/;
if [ $DELETE -eq 0 ]; then
echo "Files that would be deleted from BLASTNRES folder"
# Beginning of filename (ORGANISM is the query)
ls | grep -P "^${ORGANISM}\.txt";
# Middle of filename (ORGANISM is the target)
ls | grep -P "_${ORGANISM}\.txt";
echo ""
else
ls | grep -P "^${ORGANISM}\.txt" | xargs rm;
ls | grep -P "_${ORGANISM}\.txt" | xargs rm;
fi
cd ..
fi
if [ -d ./rpsblast_res ]; then
echo "rpsblast_res folder";
cd ./rpsblast_res;
if [ $DELETE -eq 0 ]; then
echo "Files that would be deleted from rpsblast_res folder";
ls | grep -P "^${ORGANISM}\.txt\.faa_rpsout";
else
ls | grep -P "^${ORGANISM}\.txt\.faa_rpsout" | xargs rm;
fi
cd ..
fi
if [ -d ./clusters/ ]; then
echo "Clusters folder";
cd ./clusters/;
if [ $DELETE -eq 0 ]; then
echo "All clusters would be deleted (re-run setup_step2.sh with the same parameters to get clusters without the deleted organism)"
echo "List of files that would be deleted:"
# Just in case we decide we want to put a README here after all...
ls | grep -v "README"
echo ""
else
ls | grep -v "README" | xargs rm;
fi
cd ..;
fi
if [ -d ./flatclusters/ ]; then
echo "Flat Clusters folder";
cd ./flatclusters/;
if [ $DELETE -eq 0 ]; then
echo "All clusters would be deleted (re-run setup_step2.sh with the same parameters to get clusters without the deleted organism)"
echo "List of files to be deleted:"
ls | grep -v "README"
echo ""
else
ls | grep -v "README" | xargs rm;
fi
cd ..;
fi
if [ -d ./db/ ]; then
echo "Database folder";
cd ./db/;
if [ $DELETE -eq 0 ]; then
echo "All data in the database would be deleted (re-run setup scripts to re-build without the deleted organism)"
echo "List of files to be deleted:"
ls | grep -v "README"
echo ""
else
ls | grep -v "README" | xargs rm -r;
fi
cd ..;
fi
if [ -d ./faa/ ]; then
echo "AA fasta folder";
cd ./faa/;
if [ $DELETE -eq 0 ]; then
echo "List of files that would be deleted from the AA fasta folder:"
ls | grep -P "^${ORGANISM}\.txt";
echo ""
else
ls | grep -P "^${ORGANISM}\.txt" | xargs rm;
fi
cd ..;
fi
if [ -d ./fna/ ]; then
echo "NT fasta folder";
cd ./fna/;
if [ $DELETE -eq 0 ]; then
echo "List of files that would be deleted from the NT fasta folder:"
ls | grep -P "^${ORGANISM}\.txt";
echo ""
else
ls | grep -P "^${ORGANISM}\.txt" | xargs rm;
fi
cd ..;
fi
if [ -d ./genbank/ ]; then
echo "Genbank folder";
cd ./genbank/;
if [ $DELETE -eq 0 ]; then
echo "Genbank file that would be deleted:"
ls | grep -P "^${ORGANISM}\.gbk";
echo ""
else
ls | grep -P "^${ORGANISM}\.gbk" | xargs rm;
fi
cd ..;
fi
if [ -d ./modtable/ ]; then
echo "Modified table folder";
cd ./modtable/;
if [ $DELETE -eq 0 ]; then
echo "Modfiied table file that would be deleted:"
ls | grep -P "^${ORGANISM}\.txt";
echo ""
else
ls | grep -P "^${ORGANISM}\.txt" | xargs rm;
fi
cd ..;
fi
if [ -d ./raw/ ]; then
echo "RAW data table folder";
cd ./raw/;
if [ $DELETE -eq 0 ]; then
echo "RAW data table that would be deleted:"
ls | grep -P "^${ORGANISM}\.txt";
echo ""
else
ls | grep -P "^${ORGANISM}\.txt" | xargs rm;
fi
cd ..;
fi