-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbigwig_normalize.py
46 lines (37 loc) · 1.33 KB
/
bigwig_normalize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import sys
import numpy as np
import pyBigWig
def bw_write_chrom(bw, chrom, cov):
"""write the coverage of a chromosome into the bw file"""
# note: np.array supports slice/modify by index vector
run_end = np.append(np.where(np.diff(cov) != 0), len(cov)-1) + 1 # 1-based ends
run_start = run_end - np.diff(np.append(0, run_end)) # 0-based starts
run_value = cov[run_start]
# ignore 0 values
non_zero = run_value > 0
run_start = run_start[non_zero]
run_end = run_end[non_zero]
run_value = run_value[non_zero]
if len(run_value) > 0:
bw.addEntries([chrom] * np.sum(non_zero),
run_start, ends = run_end, values = run_value)
return
def main(bw_in_path, bw_out_path):
bw_in = pyBigWig.open(bw_in_path)
bw_out = pyBigWig.open(bw_out_path, 'w')
# write header
bw_header = [(i, bw_in.chroms(i)) for i in bw_in.chroms().keys()]
bw_out.addHeader(bw_header)
# count total reads
total = 0
for i, j in bw_header:
total += np.nansum(bw_in.values(i, 0, j))
# write normalized values
for i, j in bw_header:
new_values = bw_in.values(i, 0, j)
new_values = np.nan_to_num(new_values)/total*1000000
bw_write_chrom(bw_out, i, new_values)
bw_out.close()
return
if __name__ == '__main__':
main(*sys.argv[1:])