-
Notifications
You must be signed in to change notification settings - Fork 0
/
00_sample_generate_data.py
134 lines (113 loc) · 4.15 KB
/
00_sample_generate_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
This is an example of deriving key variables from raw pyspi output.
"""
from pathlib import Path
import numpy as np
from tqdm import tqdm, trange
import scipy.stats as sstats
import h5py
from .utils import *
data_dir = Path("data")
deriv_dir = data_dir / "derivatives"
pyspi_res_dir = data_dir / "raw"
pyspi_hcp_schaefer100x7_dir = pyspi_res_dir / "pyspi_hcp_schaefer100x7"
hcp_subj_list = np.loadtxt(
pyspi_res_dir / "subjects_reinder326.txt", dtype=str
).tolist()
# pyspi_hcp_schaefer100x7_subj_term_profile
pyspi_hcp_schaefer100x7_subj_term_profile = np.zeros(
(hcp_subj_dim, hcp_run_dim, pyspi_clean_dim, pyspi_clean_dim)
)
for subj_it, subj_id in tqdm(
enumerate(hcp_subj_list), desc="subj_it", total=hcp_subj_dim
):
for run_it in trange(hcp_run_dim, desc="run_it", leave=False):
f = h5py.File(
pyspi_hcp_schaefer100x7_dir / f"subj-{subj_id}_run-{run_it+1}.h5", "r"
)
curr_pyspi = f[f"subj-{subj_id}_run-{run_it+1}"]
#
curr_pyspi_iu = np.array(
[
curr_pyspi[pyspi_clean_indices[term_it], :, :][schaefer100x7_iu]
for term_it in range(pyspi_clean_dim)
]
)
#
pyspi_hcp_schaefer100x7_subj_term_profile[
subj_it, run_it, :, :
] = sstats.spearmanr(curr_pyspi_iu, axis=1).statistic
f.close()
np.save(
deriv_dir / "pyspi_hcp_schaefer100x7_subj_term_profile_updated.npy",
pyspi_hcp_schaefer100x7_subj_term_profile,
)
# pyspi_hcp_schaefer100x7_term_profile_mean/var
pyspi_hcp_schaefer100x7_term_profile_mean = np.nanmean(
pyspi_hcp_schaefer100x7_subj_term_profile.reshape(
(-1, pyspi_clean_dim, pyspi_clean_dim)
),
axis=0,
)
pyspi_hcp_schaefer100x7_term_profile_var = np.nanvar(
pyspi_hcp_schaefer100x7_subj_term_profile.reshape(
(-1, pyspi_clean_dim, pyspi_clean_dim)
),
axis=0,
)
np.save(
deriv_dir / "pyspi_hcp_schaefer100x7_term_profile_mean_updated.npy",
pyspi_hcp_schaefer100x7_term_profile_mean,
)
np.save(
deriv_dir / "pyspi_hcp_schaefer100x7_term_profile_var_updated.npy",
pyspi_hcp_schaefer100x7_term_profile_var,
)
# resave for efficiency
pyspi_hcp_schaefer100x7_resave_dir = pyspi_res_dir / "schaefer100x7_resave_clean_terms"
pyspi_hcp_schaefer100x7_resave_dir.mkdir(exist_ok=True)
for term_i in trange(pyspi_clean_dim):
term_i_mats = np.zeros(
(hcp_subj_dim, hcp_run_dim, schaefer100x7_dim, schaefer100x7_dim)
)
for subj_it, subj_id in tqdm(
enumerate(hcp_subj_list), leave=False, total=hcp_subj_dim
):
for run_it in range(hcp_run_dim):
with h5py.File(
pyspi_hcp_schaefer100x7_dir / f"subj-{subj_id}_run-{run_it+1}.h5", "r"
) as f:
term_i_mats[subj_it, run_it, :, :] = f[
f"subj-{subj_id}_run-{run_it+1}"
][pyspi_clean_indices[term_i], :, :]
term_i_mats_iu = np.array(
[
np.nanmean(term_i_mats[_, :, :, :], axis=0)[schaefer100x7_iu]
for _ in range(hcp_subj_dim)
]
)
term_i_mats_iu_ranked = np.apply_along_axis(sstats.rankdata, 1, term_i_mats_iu)
with h5py.File(pyspi_hcp_schaefer100x7_resave_dir / f"term_{term_i}.h5", "w") as f:
dset = f.create_dataset(f"term_{term_i}", data=term_i_mats)
with h5py.File(
pyspi_hcp_schaefer100x7_resave_dir / f"term_{term_i}_iu.h5", "w"
) as f:
dset = f.create_dataset(f"term_{term_i}_iu", data=term_i_mats_iu)
with h5py.File(
pyspi_hcp_schaefer100x7_resave_dir / f"term_{term_i}_iu_ranked.h5", "w"
) as f:
dset = f.create_dataset(f"term_{term_i}_iu_ranked", data=term_i_mats_iu_ranked)
# pyspi_hcp_schaefer100x7_term_mean
pyspi_hcp_schaefer100x7_term_mean = np.zeros(
(pyspi_clean_dim, schaefer100x7_dim, schaefer100x7_dim)
)
for term_i in trange(pyspi_clean_dim):
f = h5py.File(pyspi_hcp_schaefer100x7_resave_dir / f"term_{term_i}.h5", "r")
term_i_mats = f[f"term_{term_i}"]
pyspi_hcp_schaefer100x7_term_mean[term_i, :, :] = np.nanmean(
term_i_mats, axis=(0, 1)
)
np.save(
deriv_dir / "pyspi_hcp_schaefer100x7_term_mean.npy",
pyspi_hcp_schaefer100x7_term_mean,
)