forked from lh3/fermikit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run-calling
executable file
·67 lines (58 loc) · 2.3 KB
/
run-calling
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env perl
use strict;
use warnings;
use Getopt::Std;
my %opts = (t=>4);
getopts("t:b:o:", \%opts);
die('Usage: run-calling [options] <indexed-ref> <unitigs.mag.gz>
Options:
-o STR prefix of output files [inferred from input]
-b STR prefix for BWA index [same as <indexed-ref>]
-t INT number of threads [4]
') if @ARGV < 2;
# check path
my $exepath = $0 =~/^\S+\/[^\/\s]+/? $0 : &which($0);
my $root = $0 =~/^(\S+)\/[^\/\s]+/? $1 : undef;
$root = $exepath =~/^(\S+)\/[^\/\s]+/? $1 : undef if !defined($root);
die "ERROR: failed to locate the 'fermi.kit' directory\n" if !defined($root);
# check reference sequence
my $ref;
if (-f $ARGV[0]) {
$ref = $ARGV[0];
} elsif (-f "$ARGV[0].gz") {
$ref = "$ARGV[0].gz";
}
die("ERROR: can't find the reference sequences\n") unless defined($ref);
# check BWA index
my $idx = defined($opts{b})? $opts{b} : $ARGV[0];
die("ERROR: failed to locate the BWA index. Please run '$root/bwa index -p $idx ref.fa'.\n")
unless (-f "$idx.bwt" && -f "$idx.pac" && -f "$idx.sa" && -f "$idx.ann" && -f "$idx.amb");
# infer prefix
my $prefix;
if (defined $opts{o}) {
$prefix = $opts{o};
} elsif ($ARGV[1] =~ /\.mag(\.gz?)$/) {
$prefix = $ARGV[1];
$prefix =~ s/\.mag(\.gz?)$//;
}
die("ERROR: failed to identify the prefix for output. Please specify -o.\n") unless defined($prefix);
# construct command lines
my $cmd = '';
$cmd .= "[ ! -f $prefix.unsrt.sam.gz ] && $root/bwa mem -t$opts{t} -x intractg $idx $ARGV[1] 2> $prefix.unsrt.sam.log | gzip -1 > $prefix.unsrt.sam.gz;\n";
$cmd .= "[ ! -f $prefix.srt.bam ] && $root/htsbox samsort -S $prefix.unsrt.sam.gz > $prefix.srt.bam;\n";
$cmd .= "$root/htsbox pileup -cuf $ref $prefix.srt.bam | gzip -1 > $prefix.raw.vcf.gz;\n";
$cmd .= "($root/k8 $root/hapdip.js deovlp $prefix.raw.vcf.gz | $root/k8 $root/hapdip.js anno | gzip -1 > $prefix.tmp.vcf.gz) 2> $prefix.flt.vcf.log;\n";
$cmd .= "($root/k8 $root/hapdip.js filter -q3 $prefix.tmp.vcf.gz | gzip -1 > $prefix.flt.vcf.gz) 2>> $prefix.flt.vcf.log; rm -f $prefix.tmp.vcf.gz;\n";
$cmd .= "$root/htsbox abreak -cuf $ref $prefix.unsrt.sam.gz | gzip -1 > $prefix.sv.vcf.gz;\n";
print $cmd;
sub which
{
my $file = shift;
my $path = (@_)? shift : $ENV{PATH};
return if (!defined($path));
foreach my $x (split(":", $path)) {
$x =~ s/\/$//;
return "$x/$file" if (-x "$x/$file");
}
return;
}