forked from Schlicklab/RAG-IF
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_rnafold_ids.pl
130 lines (116 loc) · 3.86 KB
/
get_rnafold_ids.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/local/bin/perl
#ARGV[0] = diretcory where the rnafold run files are, ARGV[1] directory where RNAfold_Rank_Topo.txt should be written
# get the list of RNAfold output files
$command="ls ".$ARGV[0]."*.rnafold > ".$ARGV[0]."temp_list.txt";
system($command);
@ranks=();
@min_ids=();
@cen_ids=();
$count = -1;
$list_file=$ARGV[0]."temp_list.txt";
open(LIST,"<$list_file");
while($line=<LIST>){ # for every rnafold output file
$count++;
$line=~ s/\n//g;
#getting the rank / number of the struct file
@cols=split(/\//,$line);
@cols2=split(/\./,$cols[2]);
print $cols2[0]."\n";
$cols2[0] =~ s/Top//g;
$ranks[$count] = $cols2[0];
#creating bpseq files from RNAfold output
open(OUT,"<$line");
$garbage=<OUT>;
$sequen=<OUT>;
$sequen =~ s/\n//g;
$struct=<OUT>;
@cols_s=split(/\s+/,$struct);
$min_struct=$cols_s[0]; #dot bracket of the minimum energy structure
$garbage=<OUT>;
$struct=<OUT>;
@cols_s=split(/\s+/,$struct);
$cen_struct=$cols_s[0]; #dot bracket of the centroid energy structure
close(OUT);
$min_seq=$ARGV[0]."temp_min.fa";
open(FA,">$min_seq");
print FA ">Temp mfe\n";
print FA $sequen."\n";
close(FA);
$min_txtfile=$ARGV[0]."temp_min.dotbracket";
open(TXT,">$min_txtfile");
print TXT $min_struct;
close(TXT);
$command="python dotfa2bpseq.py ".$ARGV[0]."temp_min.fa ".$ARGV[0]."temp_min.dotbracket"; #bpseq file for the minimum energy structure
system($command);
$min_seq=$ARGV[0]."temp_cen.fa";
open(FA,">$min_seq");
print FA ">Temp cen\n";
print FA $sequen."\n";
close(FA);
$cen_txtfile=$ARGV[0]."temp_cen.dotbracket";
open(TXT,">$cen_txtfile");
print TXT $cen_struct;
close(TXT);
$command="python dotfa2bpseq.py ".$ARGV[0]."temp_cen.fa ".$ARGV[0]."temp_cen.dotbracket"; #bpseq file for the minimum energy structure
system($command);
#runnning treeGraphs to get the RAG IDs
$command="python2.7 modified-treeGraph/treeGraphs.py ".$ARGV[0]."temp_min.bpseq";
$output_text=`$command`;
@outlines=split(/\n/,$output_text);
$topo_min="";
for(my $i=0; $i<scalar(@outlines); $i++){
@cols5=split(/:/,$outlines[$i]);
if($cols5[0] eq "Graph ID"){ # this is the graph id of the bpseq file
$topo_min = $cols5[1];
$topo_min =~ s/\s+//g;
$topo_min =~ s/\n//g;
@min_ids[$count]=$topo_min;
last;
}
}
$command="python2.7 modified-treeGraph/treeGraphs.py ".$ARGV[0]."temp_cen.bpseq";
$output_text=`$command`;
@outlines=split(/\n/,$output_text);
$topo_cen="";
for(my $i=0; $i<scalar(@outlines); $i++){
@cols5=split(/:/,$outlines[$i]);
if($cols5[0] eq "Graph ID"){ # this is the graph id of the bpseq file
$topo_cen = $cols5[1];
$topo_cen =~ s/\s+//g;
$topo_cen =~ s/\n//g;
@cen_ids[$count]=$topo_cen;
last;
}
}
$command="rm -f ".$ARGV[0]."temp_min*";
system($command);
$command="rm -f ".$ARGV[0]."temp_cen*";
system($command);
}
close(LIST);
#clean up
$command="rm -f ".$ARGV[0]."temp*";
system($command);
#sort accoring to numerical rank
for($i=0;$i<=$count;$i++){
for($j=$i+1;$j<=$count;$j++){
if($ranks[$i] > $ranks[$j]){
$temp = $ranks[$i];
$ranks[$i] = $ranks[$j];
$ranks[$j] = $temp;
$temp = $min_ids[$i];
$min_ids[$i] = $min_ids[$j];
$min_ids[$j] = $temp;
$temp = $cen_ids[$i];
$cen_ids[$i] = $cen_ids[$j];
$cen_ids[$j] = $temp;
}
}
}
#writing the output file
$output_file=$ARGV[1]."RNAfold_Rank_Topo.txt";
open(OUTPUT,">$output_file");
for($i=0;$i<=$count;$i++){
print OUTPUT $ranks[$i]."\t".$min_ids[$i]."\t".$cen_ids[$i]."\n";
}
close(OUTPUT);