-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathmake_shrimp_workflow
executable file
·49 lines (36 loc) · 1.1 KB
/
make_shrimp_workflow
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/perl -w
if($#ARGV!=3) {
print "Use: $0 <query.csfasta> <genome.fasta> <output.txt> <seqs-per-split>\n";
exit(1);
}
$queryfile = $ARGV[0];
$genomefile = $ARGV[1];
$outputfile = $ARGV[2];
$seqspersplit = $ARGV[3];
$nseqs = 0;
open FILE, "$queryfile" or die;
while(<FILE>) {
if($_ =~ /^\>.*/) { $nseqs++; }
}
close FILE;
$nsplits = int(1 + $nseqs / $seqspersplit);
print "# Workflow generated by \"$0 @ARGV\"\n";
print "# $queryfile has $nseqs sequences\n";
print "# splitting into $nsplits files of $seqspersplit each\n";
print "\n";
$inputlist = "$queryfile.0";
$outputlist = "output.0";
for($i=1;$i<$nsplits;$i++) {
$outputlist = "$outputlist output.$i";
$inputlist = "$inputlist $queryfile.$i";
}
print "OPTIONS=-M fast -M 50bp\n";
print "\n";
print "$inputlist: $queryfile\n";
print "\tLOCAL ./split_fasta $seqspersplit $queryfile\n\n";
for($i=0;$i<$nsplits;$i++) {
print "output.$i: $queryfile.$i $genomefile rmapper-cs\n";
print "\t./rmapper-cs \$OPTIONS $queryfile.$i $genomefile > output.$i\n\n";
}
print "$outputfile: $outputlist\n";
print "\tLOCAL cat $outputlist > $outputfile\n";