-
Notifications
You must be signed in to change notification settings - Fork 1
/
comparison_datasets.py
47 lines (39 loc) · 1.68 KB
/
comparison_datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# class from: https://github.com/tanjimin/C.Origami/blob/main/src/corigami/data/data_feature.py
# made for comparison with the c.origami to compare with HiC data
import numpy as np
class HiComparison():
def load(self, path = None):
self.hic = self.load_hic(path)
def get(self, chromosome, start, window = 2097152, res = 10000):
start_bin = int(start / res)
range_bin = int(window / res)
end_bin = start_bin + range_bin
hic_mat = np.log(self.diag_to_mat(self.hic, start_bin, end_bin)+1)
return hic_mat
def load_hic(self, path):
#print(f'Reading Hi-C: {path}')
return dict(np.load(path))
def diag_to_mat(self, ori_load, start, end):
'''
Only accessing 256 x 256 region max, two loops are okay
'''
square_len = end - start
diag_load = {}
for diag_i in range(square_len):
diag_load[str(diag_i)] = ori_load[str(diag_i)][start : start + square_len - diag_i]
diag_load[str(-diag_i)] = ori_load[str(-diag_i)][start : start + square_len - diag_i]
start -= start
end -= start
diag_region = []
for diag_i in range(square_len):
diag_line = []
for line_i in range(-1 * diag_i, -1 * diag_i + square_len):
if line_i < 0:
diag_line.append(diag_load[str(line_i)][start + line_i + diag_i])
else:
diag_line.append(diag_load[str(line_i)][start + diag_i])
diag_region.append(diag_line)
diag_region = np.array(diag_region).reshape(square_len, square_len)
return diag_region
def __len__(self):
return len(self.hic['0'])