-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgmt2tab.pl
62 lines (46 loc) · 1.58 KB
/
gmt2tab.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# !/usr/bin/perl
=head
# this script is mainly used to change gmt file format to tab format for Genomica
=cut
#@files = ("c1.all.v2.5.symbols.gmt.txt", "c2.genmapp.v2.5.symbols.gmt.txt", "c4.all.v2.5.symbols.gmt.txt", "c5.cc.v2.5.symbols.gmt.txt", "c2.all.v2.5.symbols.gmt.txt", "c2.kegg.v2.5.symbols.gmt.txt", "c4.cgn.v2.5.symbols.gmt.txt", "c5.mf.v2.5.symbols.gmt.txt", "c2.biocarta.v2.5.symbols.gmt.txt", "c3.all.v2.5.symbols.gmt.txt", "c4.cm.v2.5.symbols.gmt.txt", "msigdb.v2.5.symbols.gmt.txt", "c2.cgp.v2.5.symbols.gmt.txt", "c3.mir.v2.5.symbols.gmt.txt", "c5.all.v2.5.symbols.gmt.txt", "c2.cp.v2.5.symbols.gmt.txt", "c3.tft.v2.5.symbols.gmt.txt", "c5.bp.v2.5.symbols.gmt.txt");
#@files = ("new.gmt");
@files = ("c5.all.v3.0.symbols.gmt.txt");
$outfile = "c5.all.v3.0.symbols.gmt.tab";
foreach $file (@files) {
open (in, "<$file");
while ($line=<in>) {
chomp $line;
@data = split /\t/, $line;
$geneset = uc($data[0]);
push (@genesets, $geneset);
for ($j=2; $j<=$#data; $j++) {
$data[$j]=uc($data[$j]);
$flag{$geneset}{$data[$j]} = 1;
push (@genes, $data[$j]);
}
}
close in;
}
undef %saw;
@unique_genesets = grep (!$saw{$_}++, @genesets);
undef %saw;
@unique_genes = grep (!$saw{$_}++, @genes);
open (out, ">$outfile");
print out "Gene";
foreach $geneset (@unique_genesets) {
print out "\t$geneset";
}
print out "\n";
foreach $gene (@unique_genes) {
print out "$gene";
foreach $geneset (@unique_genesets) {
if ($flag{$geneset}{$gene}) {
print out "\t1";
}
else {
print out "\t0";
}
}
print out "\n";
}
close out;