-
Notifications
You must be signed in to change notification settings - Fork 0
/
track.pm
57 lines (49 loc) · 1.73 KB
/
track.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
package track;
# In-memory representation of a BED file, both reasonably compact and
# reasonably idiomatic.
require Exporter ;
@ISA = qw( Exporter ) ;
@EXPORT_OK = qw( read_track contains ) ;
use strict;
# Reads a BED file containing regions. Being a BED file, coordinates
# are zero-based half-open intervals.
sub read_track($) {
my ($targetfile) = @_;
my ($counter, $interval, %target);
open TARGETS, $targetfile or die "could not read $targetfile: $?!\n";
print STDERR "[track.pm] Reading track\n";
while (my $line = <TARGETS>) {
next if $line =~ /^#/;
next if $line =~ /^\s+$/;
$counter++; $interval++; if ($interval == 100_000) {print STDERR "\r$counter"; $interval = 0}
chomp $line;
my ($chromosome, $start, $end) = split /\t/, $line;
die "target end must be after target start: $chromosome $start $end\nForgot -one?" unless $end >= $start;
$target{$chromosome} = [] unless $target{$chromosome} ;
push @{$target{$chromosome}}, (pack "LL", $start, $end);
}
while (my ($k,$v) = each %target) {
my @arr = @$v ;
undef $target{$k} ;
@arr = sort {(unpack "L", $a) <=> (unpack "L", $b)} @arr ;
$target{$k} = \@arr ;
}
print STDERR "\n";
return \%target ;
}
sub contains($$) {
my ($posns, $pos) = @_;
return 0 unless defined $posns ;
my $l1 = 0 ;
my $r1 = scalar @{$posns} ;
while( $l1 != $r1 ) {
my $m = ($l1 + $r1) >> 1;
my ($start,$end) = unpack "LL", $posns->[$m] ;
if( $pos > $end ) { $l1 = $m+1 ; }
else { $r1 = $m ; }
}
return 0 if $l1 == @{$posns} ;
my ($start,$end) = unpack "LL", $posns->[$l1] ;
return ($start <= $pos && $pos < $end)+0 ;
}
1;