-
Notifications
You must be signed in to change notification settings - Fork 10
/
scpt_genes_cog_pls.m
160 lines (126 loc) · 5.58 KB
/
scpt_genes_cog_pls.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
% This script runs PLS on gene expression and neurosynth probability maps.
% Significance of latent variables is assessed using a
% spatial autocorrelation-preserving permutation test. Correlation between
% gene and term scores are cross-validated using a distance-based set
% assignment (see fcn_crossval_pls_brain_obvs.m). Most contributing terms
% are retained and scores are distributed among structural and functional
% networks.
% PLS code (pls_analysis.m) can be downloaded at
% http://pls.rotman-baycrest.on.ca/source/ ("Latest PLS Applications")
%% load
load('gene_expression.mat') % node by gene expression matrix
load('neurosynth.mat') % node by term probability matrix
load('nodes.mat') % relevant node indices
load('genes.mat') % relevant gene indices
load('terms.mat') % relevant term indices and names
load('spins.mat') % spin test indices
load('coords.mat') % (x,y,z) coordinates for brain regions
%% PLS analysis
% shown here for neurosynth - replicated for brainmap
% set up PLS analysis
n = nodes.scale125.lefthem;
g = genes.scale125.stable;
t = terms.all;
X = zscore(expression125(:,g));
Y = zscore(cogact125(n,t));
nnodes = length(n);
nterms = length(t);
ngenes = length(g);
% behav pls
option.method = 3;
option.num_boot = 10000;
option.num_perm = 0; % zero permutations because they will be run manually later to account for spatial autocorrelation
option.stacked_behavdata = Y;
exp{1} = X;
result = pls_analysis(exp, nnodes, 1, option); % this is the PLS result that is used in all other analyses
% save('result.mat','result')
%% spin test
% this code comes from pls_analysis.m and is modified to account for a
% spatial autocorrelation-preserving permutation test
spins = scale125spins; % spatial autocorrelation-preserving permutation assignments
nspins = 10000; % number of permutations ("spins")
s_spins = zeros(nterms,nspins); % singular values
option.method = 3; % set up PLS
option.num_boot = 0;
option.num_perm = 0;
exp{1} = X;
for k = 1:nspins
option.stacked_behavdata = Y(spins(:,k),:); % permute neurosynth matrix
datamatsvd=rri_xcor(option.stacked_behavdata,exp{1},0); % refer to pls_analysis.m
[r,c] = size(datamatsvd);
if r <= c
[pu, sperm, pv] = svd(datamatsvd',0);
else
[pv, sperm, pu] = svd(datamatsvd,0);
end
% rotate pv to align with the original v
rotatemat = rri_bootprocrust(result.v,pv);
% rescale the vectors
pv = pv * sperm * rotatemat;
sperm = sqrt(sum(pv.^2));
s_spins(:,k) = sperm;
end
sprob = zeros(nterms,1); % p-value for each latent variable
for k = 1:nterms % get permuted (via spin test) p-values
sprob(k) = (1+(nnz(find(s_spins(k,:)>=result.s(k)))))/(1+nspins);
end
%% cross validation
% cross-validate the correlation between gene and term scores using
% distance-based set assignment
[rtrain,rtest] = fcn_crossval_pls_brain_obvs(exp,Y,100,0.75,1,coords125(116:226,:));
% visualize boxplots as distributions of the correlations computed using
% the training and testing set
figure;
boxplot([rtrain rtest])
set(gca,'xticklabel',{'train','test'})
ylabel('correlation')
title('cross validation')
%% get terms
[B,I] = sort(result.boot_result.orig_corr(:,lv),'descend'); % sort loadings
npos = length(find(result.boot_result.orig_corr(:,lv) > 0)); % number of positive loadings
nneg = length(find(result.boot_result.orig_corr(:,lv) < 0)); % number of negative loadings
tpos = I(1:floor(0.25*npos)); % get top 25% of positive loadings
tneg = I(end-floor(0.25*nneg):end); % get top 25% of negative loadings
pos_terms = t(tpos); % these are the positive terms contributing most
neg_terms = t(tneg); % these are the negative terms contributing most
%% distribute PLS-derived gene and term scores
% intrinsic (resting-state) networks from Yeo et al., 2011
load('rsn.mat') % load rsn network assignment
load('rsn_names.mat') % load rsn names
rsn = rsn(109:end); % keep left hemisphere only
figure; % distribute scores (as boxplots) in 7 networks
subplot(2,1,1)
boxplot(result.usc(:,1),rsn) % gene scores
xticklabel(rsn_names)
title('gene scores')
subplot(2,1,2)
boxplot(result.vsc(:,1),rsn) % term scores
xticklabel(rsn_names)
title('term scores')
% von economo cytoarchitectonic classes
load('voneconomo.mat') % load von economo network assignment
load('ve_names.mat') % load von economo names
voneconomo = voneconomo(109:end); % keep left hemisphere only
figure; % distribute scores (as boxplots) in 7 networks
subplot(2,1,1)
boxplot(result.usc(:,1),voneconomo) % gene scores
xticklabel(ve_names)
title('gene scores')
subplot(2,1,2)
boxplot(-result.vsc(:,1),voneconomo) % term scores
xticklabel(ve_names)
title('term scores')
% mesulam levels of laminar differentiation
% level names from https://github.com/MICA-MNI/micaopen/tree/master/MPC
mesulam = table2array(readtable('mesulam_mapping.csv')); % get mesulam class assignments
mesulam = mesulam(109:end); % keep left hemisphere only
mesulam_names = {'paralimbic','heteromodal','unimodal','idiotypic'}; % class names
figure; % distribute scores (as boxplots) in 4 classes
subplot(2,1,1)
boxplot(result.usc(:,1),mesulam) % gene scores
xticklabel(mesulam_names)
title('gene scores')
subplot(2,1,2)
boxplot(result.vsc(:,1),mesulam) % term scores
xticklabel(mesulam_names)
title('term scores')