Skip to content

Commit

Permalink
Better method for linkage clustering
Browse files Browse the repository at this point in the history
  • Loading branch information
sarabsethi committed Aug 12, 2016
1 parent c3d2c60 commit ed5bc48
Show file tree
Hide file tree
Showing 5 changed files with 331 additions and 355 deletions.
2 changes: 1 addition & 1 deletion SS_Clustering_main.m
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
% command on line 150 in TS_local_clear_remove

% Set up default run info file
SS_SetupRunInfo;
SS_SetupRunOptions;

% Load your data matrix then normalise and filter it
SS_NormaliseAndFilter;
Expand Down
68 changes: 14 additions & 54 deletions SS_LinkageClusterOps.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,60 +11,20 @@
% Find best operations
chosenOps = Operations(kmed.CCi);

% Correlation distance between reduced set of operations
reducedOpCorrDists = squareform(pdist(reducedDataMat','correlation'));
reducedOpCorrDists = 1 - abs(1-reducedOpCorrDists);

% Iteratively linkage cluster the operations based on a minimum correlation
% distance within the clusters
keepGoing = true;
nLinkageClusters = kmed.k * 0.75;
shouldIncrement = true;
incAmount = round(max(kmed.k/100,1));
while keepGoing
if shouldIncrement
if nLinkageClusters >= kmed.k; break; end

nLinkageClusters = nLinkageClusters + incAmount;
fprintf('Trying with %i linkage clusters\n',nLinkageClusters);
shouldIncrement = false;
else
break;
end

[distMat_cl,cluster_Groupi,ord] = BF_ClusterDown(reducedOpCorrDists,...
nLinkageClusters,'whatDistance','general');

% Check all clusters generated
group = cluster_Groupi{1};
for i = 1:nLinkageClusters
iGroup = group{i};

% Check all combinations inside each cluster
[A,B] = meshgrid(iGroup,iGroup);
c=cat(2,A',B');
d=reshape(c,[],2);
d = d(d(:,1)~=d(:,2),:);
for j = 1:size(d,1)
% If cluster contains operations which are too different try
% again with more clusters
if reducedOpCorrDists(d(j,1),d(j,2)) > corr_dist_threshold
shouldIncrement = true;
close;
break;
end
end
if shouldIncrement; break; end
end
if shouldIncrement; continue; end

% Successfully clustered
fprintf(['Linkage clustering reduced %i operations to %i groups using '...
'a distance threshold of %f \n'],...
kmed.k, nLinkageClusters,corr_dist_threshold);
keepGoing = false;
colormap(BF_getcmap('redyellowgreen',10));
end
% First calculate number of linkage clusters required to group below threshold
distVec = 1- abs(1 - pdist(reducedDataMat','correlation'));
Z = linkage(distVec,'complete');
T = cluster(Z,'cutoff',corr_dist_threshold,'criterion','distance');
nLinkageClusters = max(T);

% Then use Ben's fancy function to plot it nicely
[distMat_cl,cluster_Groupi,ord] = BF_ClusterDown(distVec,...
nLinkageClusters,'whatDistance','general','linkageMeth','complete');
colormap(BF_getcmap('redyellowgreen',10));

fprintf(['Linkage clustering reduced %i operations to %i groups using '...
'a distance threshold of %f \n'],...
kmed.k, nLinkageClusters,corr_dist_threshold);

opNames = {chosenOps.Name};
orderedNames = opNames(ord);
Expand Down
77 changes: 40 additions & 37 deletions SS_SetupRunOptions.m
Original file line number Diff line number Diff line change
@@ -1,37 +1,40 @@
function SS_SetupRunInfo( ks , kToUse , op_km_repeats , ts_km_repeats , ...
inMatFileName , outTxtFileName , corr_dist_threshold)

if ~exist('ks','var')
ks = [5,7,10,20:20:100,200,500,1000];
end
if ~exist('kToUse','var')
kToUse = 40;
end
if ~exist('op_km_repeats','var')
op_km_repeats = 50;
end
if ~exist('ts_km_repeats','var')
ts_km_repeats = 500;
end
if ~exist('inMatFileName','var')
inMatFileName = 'HCTSA_new_data';
end
if ~exist('outTxtFileName','var')
outTxtFileName = 'cluster_info.txt';
end
if ~exist('corr_dist_threshold','var')
corr_dist_threshold = 0.2;
end

kIdx = find(ks == kToUse);
if isempty(kIdx)
fprintf('Could not find K = %i in ks - setting kToUse to %i',...
kToUse , ks(length(ks)));
kIdx = length(ks);
end

save('run_options.mat','ks','kIdx','op_km_repeats','ts_km_repeats',...
'inMatFileName','outTxtFileName','corr_dist_threshold');

end

function SS_SetupRunInfo( ks , kToUse , op_km_repeats , ts_km_repeats , ...
inMatFileName , outTxtFileName , corr_dist_threshold , av_ts_cluster_size)

if ~exist('ks','var')
ks = [5,7,10,20:20:100,200,500,1000];
end
if ~exist('kToUse','var')
kToUse = 40;
end
if ~exist('op_km_repeats','var')
op_km_repeats = 50;
end
if ~exist('ts_km_repeats','var')
ts_km_repeats = 500;
end
if ~exist('inMatFileName','var')
inMatFileName = 'HCTSA_new_data';
end
if ~exist('outTxtFileName','var')
outTxtFileName = 'cluster_info.txt';
end
if ~exist('corr_dist_threshold','var')
corr_dist_threshold = 0.2;
end
if ~exist('av_ts_cluster_size','var')
av_ts_cluster_size = 10;
end

kIdx = find(ks == kToUse);
if isempty(kIdx)
fprintf('Could not find K = %i in ks - setting kToUse to %i',...
kToUse , ks(length(ks)));
kIdx = length(ks);
end

save('run_options.mat','ks','kIdx','op_km_repeats','ts_km_repeats',...
'inMatFileName','outTxtFileName','corr_dist_threshold','av_ts_cluster_size');

end

9 changes: 5 additions & 4 deletions SS_TestOpsOnTSClusters.m
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
function SS_TestOpsOnTSClusters

load('run_options.mat');
load('HCTSA_N.mat');
load('linkage_clustered_ops.mat');
load('resid_variance.mat');

% Cluster the time series using the reduced (K-medoids) operation space
maxIter = 100;
nrepeats = 500;
ts_k = round(size(TS_DataMat,1) / 10);
ts_k = round(size(TS_DataMat,1) / av_ts_cluster_size);

[CCi, Cass, err, Cord] = BF_kmedoids(squareform(S_red), ts_k, maxIter, nrepeats);

Expand All @@ -18,11 +19,11 @@
sPlotNum = 1;
plotsPerGroup = 4;
plotColours = get(0,'DefaultAxesColorOrder');

for i = 1:8
numGroups = 10;
for i = 1:numGroups
series = find(Cass == I(i));
for j = 1:min(plotsPerGroup,length(series))
subplot(8,4,sPlotNum);
subplot(numGroups,plotsPerGroup,sPlotNum);
plot(TimeSeries(series(j)).Data,'color',plotColours(max(1,mod(i,8)),:));
set(gca,'XTickLabel','','YTickLabel','')
if j == 1
Expand Down
Loading

0 comments on commit ed5bc48

Please sign in to comment.