-
Notifications
You must be signed in to change notification settings - Fork 8
/
energy_dist_test_dmat.m
executable file
·75 lines (60 loc) · 2.27 KB
/
energy_dist_test_dmat.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
function [reject, pval, estat, B] = energy_dist_test_dmat(s1, s2, alpha, B)
% Do the two-sample energy statistic test for equality of distributions.
% s1 = d-by-N1 set of N1 samples of d-dimensional variables
% s2 = d-by-N2 set of N2 samples of d-dimensional variables
% alpha = significance level to use. The hypothesis that the distributions
% are equal is rejected if more than 100*(1-alpha) percent of
% the permutation samples from the pooled data are smaller than
% the observed value.
% B = number of permutation trials to use for approximating the
% distribution of the test statistic. If not provided, is
% automatically computed from alpha.
%
% The test compares the data in s1 to the data in s2 to see if the samples
% appear to come from the same distribution.
%
% reject = True if rejected at alpha significance level, False otherwise.
% i.e., if true, the data supports the notion that s1 and s2 are
% from different distributions at significance level alpha.
% If false, it is undetermined.
% pval = ratio of permutation samples with energy distance greater than or
% equal to the observed energy distance (includes the observed).
% estat = vector of energy statistics for the B+1 permutation samples
% (includes the non-permuted s1, s2)
if nargin < 4
B = 5*ceil(1/alpha)-1;
end
if 1/(B+1) >= alpha
disp('Warning: Choice of B may not be appropriate for desired signifcance, alpha');
end
% Note, need the same d for s1 and s2.
[d, n1] = size(s1);
[d, n2] = size(s2);
dmat = dist_matrix(s1, s2);
N = n1+n2;
pool = [s1, s2];
estat = zeros(B+1, 1);
%eobs = EnSt2(s1, s2);
id1 = 1:n1;
id2 = (n1+1):(n1+n2);
eobs = EnSt2DistMat(id1, id2, dmat);
%estat(1) = eobs;
for j = 1:B+1
%idx = randsample(N, N);
idx = randperm(N);
%x = pool(:, idx(1:n1));
%y = pool(:, idx(n1+1:end));
%estat(j) = EnSt2(x, y);
% The distance-matrix-based version used the permuted indices of the pooled
% sample, not the samples themselves.
x = idx(1:n1);
y = idx(n1+1:end);
estat(j) = EnSt2DistMat(x, y, dmat);
end
pval = sum(eobs < estat) / (B+1);
if alpha > pval
reject = true;
else
reject = false;
end
end