forked from eric2302/ceballos_peptides
-
Notifications
You must be signed in to change notification settings - Fork 0
/
4-3_s-f_global_coupling_meg.py
168 lines (139 loc) · 7.01 KB
/
4-3_s-f_global_coupling_meg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# %%
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as sstats
import seaborn as sns
from sklearn.linear_model import LinearRegression
from utils import index_structure, non_diagonal_elements, communication_measures
from plot_utils import divergent_green_orange
savefig = False
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# LOAD DATA
###############################################################################
# load all MEG bands
delta_mat = np.load("data/annotations/meg_delta_mat_Schaefer400.npy")
theta_mat = np.load("data/annotations/meg_theta_mat_Schaefer400.npy")
alpha_mat = np.load("data/annotations/meg_alpha_mat_Schaefer400.npy")
beta_mat = np.load("data/annotations/meg_beta_mat_Schaefer400.npy")
lgamma_mat = np.load("data/annotations/meg_lgamma_mat_Schaefer400.npy")
hgamma_mat = np.load("data/annotations/meg_hgamma_mat_Schaefer400.npy")
meg_bands = np.stack([delta_mat, theta_mat, alpha_mat, beta_mat, lgamma_mat, hgamma_mat])
band_names = ['delta', 'theta', 'alpha', 'beta', 'lgamma', 'hgamma']
dist_mat = np.load("data/template_parc-Schaefer400_TianS4_desc-distance.npy")[54:,54:]
sc = np.load("data/template_parc-Schaefer400_TianS4_desc-SC.npy")[54:,54:]
sc_neglog = -1 * np.log(sc / (np.max(sc) + 1))
# load peptide-receptor pairs from Zhang et al. 2021 PNAS
pairs_available = pd.read_csv('data/peptide_receptor_ligand_pairs.csv', index_col=0)
# load receptor and peptide from qc
receptor_genes = pd.read_csv('data/receptor_filtered.csv', index_col=0).index.to_list()
precursor_genes = pd.read_csv('data/precursor_qc.csv', index_col=0).index.to_list()
genes_of_interest = receptor_genes + precursor_genes
# keep only pairs that are in the gene library
pairs_available = pairs_available[pairs_available['Peptide'].isin(genes_of_interest)]
pairs_available = pairs_available[pairs_available['Receptor'].isin(genes_of_interest)]
# load genes of interest from all genes in gene library
all_genes = pd.read_csv('data/abagen_gene_expression_Schaefer2018_400_7N_Tian_Subcortex_S4.csv', index_col=0)
# in all_genes column names, select only ones that are in genes_of_interest
genes = all_genes[all_genes.columns.intersection(genes_of_interest)]
genes = index_structure(genes, structure='CTX')
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# ANNOTATE CONNECTOME WITH PEPTIDE-RECEPTOR PAIRS
###############################################################################
# derive new sc based on peptide-receptor pairs matrix
all_mat = []
pair_names = []
pep_comm = []
for index, row in pairs_available.iterrows():
peptide = genes[row['Peptide']].values[:, np.newaxis]
receptor = genes[row['Receptor']].values[:, np.newaxis]
# make sure peptide and receptor have no zero values
# if so, add minimal value to avoid division by zero
if np.any(peptide == 0):
peptide[np.where(peptide == 0)] = 1e-10
if np.any(receptor == 0):
receptor[np.where(receptor == 0)] = 1e-10
mat = peptide @ receptor.T
all_mat.append(mat)
pair_names.append(f'{row["Receptor"]}-{row["Peptide"]}')
s = sc * mat
s_neglog = -1 * np.log(s / (np.max(s) + 1))
pep_comm.append(communication_measures(s, s_neglog, dist_mat))
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# S-F COUPLING WITH ANNOTATED CONNECTOME
###############################################################################
# outer loop is for each band in MEG bands
# inner loop is for each peptide-receptor pair
all_r2s = []
for band in meg_bands:
y = non_diagonal_elements(band)
band_r2s = []
for comm in pep_comm:
reg = LinearRegression(fit_intercept=True, n_jobs=-1)
X = sstats.zscore(comm, ddof=1, axis=1).T
reg_res = reg.fit(X, y)
yhat = reg.predict(X)
SS_Residual = sum((y - yhat) ** 2)
SS_Total = sum((y - np.mean(y)) ** 2)
r_squared = 1 - (float(SS_Residual)) / SS_Total
num = (1 - r_squared) * (len(y) - 1)
denom = len(y) - X.shape[1] - 1
adjusted_r_squared = 1 - num / denom
band_r2s.append(adjusted_r_squared)
all_r2s.append(band_r2s)
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# PUTTING IT ALL TOGETHER
###############################################################################
# create a dataframe with the results
pred_df = pd.DataFrame([], index=pair_names)
# load results from haemodynamic fc predictability
fc_pred = pd.read_csv('results/fc_pred_peptide_ligand_pairs.csv', index_col=0)
# store predictability from haemodynamic connectivity
pred_df['R2_FC'] = pred_df.index.map(fc_pred['R2_FC'])
# locate no annotation predictability from fc_pred and store for later
adjusted_r_squared_fc = fc_pred.loc['no_annotation']
no_annt_r2s = list(adjusted_r_squared_fc)
# derive structural communication from unannotated connectome
sc_comm_mats = communication_measures(sc, sc_neglog, dist_mat)
# for each band in MEG bands, predict FC using no annotation
for name, band, band_r2s in zip(band_names, meg_bands, all_r2s):
# predict band with no annotation
y = non_diagonal_elements(band)
reg = LinearRegression(fit_intercept=True, n_jobs=-1)
X = sstats.zscore(sc_comm_mats, ddof=1, axis=1).T
reg_res = reg.fit(X, y)
yhat = reg.predict(X)
SS_Residual = sum((y - yhat) ** 2)
SS_Total = sum((y - np.mean(y)) ** 2)
r_squared = 1 - (float(SS_Residual)) / SS_Total
num = (1 - r_squared) * (len(y) - 1)
denom = len(y) - X.shape[1] - 1
adjusted_r_squared = 1 - num / denom
no_annt_r2s.append(adjusted_r_squared)
# store previously computed R2s for this band in pred_df
pred_df[f'R2_{name}'] = band_r2s
# pred_df.to_csv('results/comm_pred_peptide_ligand_pairs.csv')
# plot predictability of FC against alpha
fig, ax = plt.subplots(figsize=(6, 5), dpi=200)
sns.scatterplot(data=pred_df, x='R2_alpha', y='R2_FC', ax=ax)
# add names of pairs
for i, txt in enumerate(pred_df.index):
ax.annotate(txt, (pred_df['R2_alpha'][i], pred_df['R2_FC'][i]), fontsize=6)
# contrast line with no annotation predictability
no_annt_fc_r2 = no_annt_r2s[0]
no_annt_alpha_r2 = no_annt_r2s[3]
ax.axvline(no_annt_alpha_r2, color='lightblue')
ax.axhline(no_annt_fc_r2, color='lightblue')
if savefig:
plt.savefig('figs/structure_function_annotated_connectome.pdf')
# %%
# subtract predictability of having no annotation from predictability of each band
diff_df = pd.DataFrame([], index=pair_names)
for i,name in enumerate(['FC'] + band_names):
diff_df[f'R2_{name}'] = pred_df[f'R2_{name}'] - no_annt_r2s[i]
# diff_df.to_csv('results/comm_pred_diff_peptide_ligand_pairs.csv')
# plot relative predictability difference against baseline
fig, ax = plt.subplots(figsize=(6, 5), dpi=200)
pair_names = pred_df.index
sns.heatmap(diff_df, ax=ax, cmap=divergent_green_orange(), xticklabels=True, yticklabels=True, cbar=True, square=True,
cbar_kws={'label': 'Adjusted R$^2$ - No annotation'}, center=0)