forked from klusta-team/klustakwik
-
Notifications
You must be signed in to change notification settings - Fork 0
/
precomputations.cpp
297 lines (263 loc) · 8.97 KB
/
precomputations.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
/*
* precomputations.cpp
*
* Used for certain precomputations designed to speed up the main loops.
*
* Created on: 19 Nov 2011
* Author: dan
*/
#include "klustakwik.h"
#include<algorithm>
using namespace std;
// Handles doing all the Initial precomputations once the data has been loaded into
// the class.
void KK::DoInitialPrecomputations()
{
if(UseDistributional)
{
// Precompute the indices of the unmasked dimensions for each point
ComputeUnmasked();
// Compute the order of points to consider that minimises the number of
// times the mask changes
ComputeSortIndices();
// Now compute the points at which the mask changes in sorted order
ComputeSortedUnmaskedChangePoints();
// Compute the sum of the masks/float masks for each point (used for computing the cluster penalty)
PointMaskDimension();
// Precompute the noise means and variances
ComputeNoiseMeansAndVariances();
ComputeCorrectionTermsAndReplaceData();
}
}
// Handles doing all the precomputations once the data has been loaded into
// the class. Note that this function has to be called after Data or Masks
// has changed. This is called by TrySplits() and Cluster()
void KK::DoPrecomputations()
{
if(UseDistributional)
{
// Precompute the indices of the unmasked dimensions for each point
ComputeUnmasked();
// Compute the order of points to consider that minimises the number of
// times the mask changes
ComputeSortIndices();
// Now compute the points at which the mask changes in sorted order
ComputeSortedUnmaskedChangePoints();
ComputeCorrectionTermsAndReplaceData();
}
}
void KK::ComputeUnmasked()
{
integer i=0;
if(Unmasked.size() || UnmaskedInd.size())
{
Error("Precomputations have already been done, this indicates a bug.\n");
Error("Error occurred in ComputeUnmasked().\n");
abort();
}
for(integer p=0; p<nPoints; p++)
{
UnmaskedInd.push_back(i);
for(integer j=0; j<nDims; j++)
{
if(Masks[p*nDims+j])
{
Unmasked.push_back(j);
i++;
}
}
}
UnmaskedInd.push_back(i);
}
// This function computes the points at which the mask changes if we iterate
// through the points in sorted order defined by ComputeSortIndices(). After
// the function is called, SortedMaskChange[SortedIndices[q]] is true if
// the mask for SortedIndices[q] is different from the mask for
// SortedIndices[q-1]
void KK::ComputeSortedUnmaskedChangePoints()
{
if(SortedMaskChange.size()>0)
{
Error("Precomputations have already been done, this indicates a bug.\n");
Error("Error occurred in ComputeSortedUnmaskedChangePoints().\n");
abort();
}
SortedMaskChange.resize(nPoints);
SafeArray<integer> safeSortedMaskChange(SortedMaskChange, "CSUCP:SMC");
// The first point when we iterate through the points in sorted order is
// SortedIndices[0] and we consider the mask as having 'changed' for this
// first point, because we use the mask having changed to signal that
// we should recompute the matrices that depend on the masks.
safeSortedMaskChange[SortedIndices[0]] = true;
SafeArray<integer> oldmask(Masks, SortedIndices[0]*nDims,
"ComputeSortedUnmaskedChangePoints:oldmask");
integer numchanged = 0;
for(integer q=1; q<nPoints; q++)
{
integer p = SortedIndices[q];
SafeArray<integer> newmask(Masks, p*nDims,
"ComputeSortedUnmaskedChangePoints:newmask");
bool changed = false;
for(integer i=0; i<nDims; i++)
{
if(newmask[i]!=oldmask[i])
{
oldmask = newmask;
changed = true;
numchanged++;
break;
}
}
safeSortedMaskChange[p] = changed;
}
}
///////////////// SORTING /////////////////////////////////////////////////
// Comparison class, the operator()(i, j) function is used to provide the
// comparison i<j passed to stl::sort. Here i and j are the indices of two
// points, and i<j means that mask_i < mask_j in lexicographical order (we
// could change the ordering, as long as different masks are not considered
// equal).
class KKSort
{
public:
KK *kk;
KKSort(KK *kk) : kk(kk) {};
bool operator()(const integer i, const integer j) const;
};
// Less than operator for KK.Masks, it's just a lexicographical comparison
bool KKSort::operator()(const integer i, const integer j) const
{
integer nDims = kk->nDims;
for(integer k=0; k<nDims; k++)
{
integer x = kk->Masks[i*nDims+k];
integer y = kk->Masks[j*nDims+k];
if(x<y) return true;
if(x>y) return false;
}
return false;
}
/*
* This function computes the order in which indices should be considered in
* order to minimise the number of times the mask changes. We do this simply
* by creating an array SortedIndices=[0,1,2,...,nPoints-1] and then sorting
* this array where i<j if mask_i<mask_j in lexicographical order. The sorting
* is performed by stl::sort and the comparison function is provided by the
* KKSort class above.
*
* The optional force flag forces a recomputation of the sorted indices, which
* is necessary only in TrySplits(), but we should probably change this by
* refactoring.
*/
void KK::ComputeSortIndices()
{
KKSort kksorter(this);
if(SortedIndices.size())
{
Error("Precomputations have already been done, this indicates a bug.\n");
Error("Error occurred in ComputeSortIndices().\n");
abort();
}
SortedIndices.resize(nPoints);
for(integer i=0; i<nPoints; i++)
SortedIndices[i] = i;
stable_sort(SortedIndices.begin(), SortedIndices.end(), kksorter);
}
//void KK::ComputeNoiseMeansAndVariances()
//{
//For TrySplits
//maintain noise mean and variance of each channel
// maintain number of masked points in each channel
// Output("ComputeNoiseMeansandVariances ");
// NoiseMean.resize(nDims);
// NoiseVariance.resize(nDims);
// nMasked.resize(nDims);
//}
void KK::ComputeNoiseMeansAndVariances()
{
// compute noise mean and variance of each channel
// compute number of masked points in each channel
Output("Masked EM: Computing Noise Means and Variances \n -----------------------------------------");
NoiseMean.resize(nDims);
NoiseVariance.resize(nDims);
nMasked.resize(nDims);
// for(integer i=0; i<nDims; i++)
// { NoiseMean[i] = 0;
// NoiseVariance[i] = 0;
// nMasked[i] = 0;
// }
for(integer p=0; p<nPoints; p++)
for(integer i=0; i<nDims; i++)
if(!Masks[p*nDims+i])
{
scalar thisdata = Data[p*nDims+i];
NoiseMean[i] += thisdata;
// NoiseVariance[i] += thisdata*thisdata; // sum of squares
nMasked[i]++;
}
for(integer i=0; i<nDims; i++)
{
if(nMasked[i]==0)
{
NoiseMean[i] = 0.0;
NoiseVariance[i] = 0;
// NoiseVariance[i] = 1.0;
} else
{
NoiseMean[i] /= (scalar)nMasked[i];
// NoiseVariance[i] /= (scalar)nMasked[i]; // E[X^2]
// NoiseVariance[i] -= NoiseMean[i]*NoiseMean[i]; // -E[X]^2
}
}
for(integer p=0; p<nPoints; p++)
for(integer i=0; i<nDims; i++)
if(!Masks[p*nDims+i])
{ scalar thisdata = Data[p*nDims+i];
NoiseVariance[i] += (thisdata-NoiseMean[i])*(thisdata-NoiseMean[i]);
}
for(integer i=0; i<nDims; i++)
{
if(nMasked[i]==0)
{ NoiseVariance[i]= 0;
}else {
NoiseVariance[i] /= (scalar)nMasked[i];
}
}
// for(integer i=0; i<nDims; i++)
// { Output(" NoiseMean[%d] = %f",(int)i,NoiseMean[i]);
// Output(" NoiseVariance[%d] = %f",(int)i,NoiseVariance[i]);
// Output(" nMasked[%d] = %d",(int)i,(int)nMasked[i]);
// }
}
void KK::ComputeCorrectionTermsAndReplaceData()
{
for(integer p=0; p<nPoints; p++)
for(integer i=0; i<nDims; i++)
{
scalar x = Data[p*nDims+i];
scalar w = FloatMasks[p*nDims+i];
scalar nu = NoiseMean[i];
scalar sigma2 = NoiseVariance[i];
scalar y = w*x+(1-w)*nu;
scalar z = w*x*x+(1-w)*(nu*nu+sigma2);
CorrectionTerm[p*nDims+i] = z-y*y;
Data[p*nDims+i] = y;
}
}
//SNK PointMaskDimension() computes the sum of the masks/float masks for each point
void KK::PointMaskDimension()
{
integer i,p;
for (p=0; p<nPoints; p++)
{
UnMaskDims[p]=0;
for (i=0;i<nDims;i++)
{
UnMaskDims[p] += FloatMasks[p*nDims+i];
}
if (Debug)
{
Output("UnMaskDims[%d] = %f ", (int)p, UnMaskDims[p]);
}
}
}