diff --git a/io.cpp b/io.cpp index 6fca701..985e17e 100644 --- a/io.cpp +++ b/io.cpp @@ -314,7 +314,7 @@ void KK::SaveOutput() } // write output to .clu file - with 1 added to cluster numbers, and empties removed. -void KK::SaveTempOutput() +void KK::SaveTempOutput(integer Iter) { integer c; uinteger p; @@ -354,7 +354,7 @@ void KK::SaveTempOutput() // print temp.clu file //This is the clu for the current iteration //This fixes the bug of having a trivial temp.clu file if there is only one iteration - sprintf(fname, "%s.temp.clu.%d", FileBase, (int)ElecNo); + sprintf(fname, "%s.temp.%d.clu.%d", FileBase, (int)Iter, (int)ElecNo); fp = fopen_safe(fname, "w"); fprintf(fp, "%d\n", (int)MaxClass); diff --git a/klustakwik.cpp b/klustakwik.cpp index 53dfc99..6a26633 100644 --- a/klustakwik.cpp +++ b/klustakwik.cpp @@ -1112,7 +1112,7 @@ void KK::LoadClu(char *CluFile) integer KK::TrySplits() { integer c, cc, c2, p, p2, DidSplit = 0; - scalar Score, NewScore, UnsplitScore, SplitScore; + CompoundScore Score, NewScore, UnsplitScore, SplitScore; integer UnusedCluster; //KK K2; // second KK structure for sub-clustering //KK K3; // third one for comparison @@ -1208,7 +1208,7 @@ integer KK::TrySplits() // Fix by Michaƫl Zugaro: replace next line with following two lines // if(SplitScore=2)) { + if((SplitScore.total=2)) { if (AlwaysSplitBimodal) { DidSplit = 1; @@ -1255,9 +1255,9 @@ integer KK::TrySplits() //Output("About to compute K3 class penalties"); if (UseDistributional) K3.ComputeClassPenalties(); //SNK Fixed bug: Need to compute the cluster penalty properly, cluster penalty is only used in UseDistributional mode NewScore = K3.ComputeScore(); - Output("\nSplitting cluster %d changes total score from " SCALARFMT " to " SCALARFMT "\n", (int)c, Score, NewScore); + Output("\nSplitting cluster %d changes total score from " SCALARFMT " to " SCALARFMT "\n", (int)c, Score.total, NewScore.total); - if (NewScore < Score) + if (NewScore.total < Score.total) { DidSplit = 1; Output("\n So it's getting split into cluster %d.\n", (int)UnusedCluster); @@ -1277,7 +1277,7 @@ integer KK::TrySplits() } // ComputeScore() - computes total score. Requires M, E, and C steps to have been run -scalar KK::ComputeScore() +CompoundScore KK::ComputeScore() { integer p; // integer debugadd; @@ -1308,7 +1308,8 @@ scalar KK::ComputeScore() } } - return Score; + CompoundScore cscore(Score - penalty, Score, penalty); + return cscore; } // Initialise starting conditions randomly @@ -1449,14 +1450,14 @@ void KK::StartingConditionsFromMasks() // optional start file loads this cluster file to start iteration // if Recurse is 0, it will not try and split. // if InitRand is 0, use cluster assignments already in structure -scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand, +CompoundScore KK::CEM(char *CluFile, integer Recurse, integer InitRand, bool allow_assign_to_noise) { integer p; integer nChanged; integer Iter; vector OldClass(nPoints); - scalar Score, OldScore; + CompoundScore Score, OldScore; integer LastStepFull; // stores whether the last step was a full one integer DidSplit; @@ -1482,7 +1483,7 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand, // main loop Iter = 0; FullStep = 1; - Score = 0.0; + Score = CompoundScore(0.0, 0.0, 0.0); do { // Store old classifications for(p=0; pChangedThresh*nPoints || nChanged == 0 || Iter%FullStepEvery==0 - || Score > OldScore // SNK: Resurrected - //SNK Score decreases ARE because of quick steps! + || ((Score.raw > OldScore.raw) && (Score.total > OldScore.total)) ) ; if (Iter>MaxIter) { @@ -1571,12 +1571,12 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand, } //Save a temporary clu file when not splitting - if ((SaveTempCluEveryIter && Recurse) && (OldScore> Score)) + if ((SaveTempCluEveryIter && Recurse) && (OldScore.raw >= Score.raw)) { - SaveTempOutput(); //SNK Saves a temporary output clu file on each iteration + SaveTempOutput(Iter); //SNK Saves a temporary output clu file on each iteration Output("Writing temp clu file \n"); - Output("Because OldScore, %f, is greater than current (better) Score,%f \n ", OldScore, Score); + Output("Because OldScore.raw, %f, is greater than current (better) Score.raw,%f \n ", OldScore.raw, Score.raw); } // try splitting @@ -1585,11 +1585,11 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand, //Output("Iter-SplitFirst %d \n",(int)(Iter-SplitFirst)); if ((Recurse && SplitEvery>0) && ( Iter==SplitFirst ||( Iter>=SplitFirst+1 && (Iter-SplitFirst)%SplitEvery==SplitEvery-1 ) || (nChanged==0 && LastStepFull) ) ) { - if (OldScore> Score) //This should be trivially true for the first run of KlustaKwik + if (OldScore.raw >= Score.raw) //This should be trivially true for the first run of KlustaKwik { - SaveTempOutput(); //SNK Saves a temporary output clu file before each split + SaveTempOutput(Iter); //SNK Saves a temporary output clu file before each split Output("Writing temp clu file \n"); - Output("Because OldScore, %f, is greater than current (better) Score,%f \n ", OldScore, Score); + Output("Because OldScore.raw, %f, is greater than current (better) Score.raw,%f \n ", OldScore.raw, Score.raw); } DidSplit = TrySplits(); } else DidSplit = 0; @@ -1606,7 +1606,7 @@ scalar KK::CEM(char *CluFile, integer Recurse, integer InitRand, // then run CEM on this // then use these clusters to do a CEM on the full data // It calls CEM whenever there is no initialization clu file (i.e. the most common usage) -scalar KK::Cluster(char *StartCluFile=NULL) +CompoundScore KK::Cluster(char *StartCluFile = NULL) { if (Debug) { @@ -1783,8 +1783,8 @@ KK::~KK() // Main loop int main(int argc, char **argv) { - scalar Score; - scalar BestScore = HugeScore; + CompoundScore Score; + CompoundScore BestScore(HugeScore, HugeScore, 0.0); integer p, i; SetupParams((integer)argc, argv); // This function is defined in parameters.cpp Output("Starting KlustaKwik. Version: %s\n", VERSION); @@ -1827,7 +1827,7 @@ int main(int argc, char **argv) iterationtime = (clock()-iterationtime)/(scalar) CLOCKS_PER_SEC; Output("Time taken for this iteration:" SCALARFMT " seconds.\n", iterationtime); - Output(" %d->%d Clusters: Score " SCALARFMT "\n\n", (int)K1.nStartingClusters, (int)K1.nClustersAlive, BestScore); + Output(" %d->%d Clusters: Score " SCALARFMT "\n\n", (int)K1.nStartingClusters, (int)K1.nClustersAlive, BestScore.total); for(p=0; p%d Clusters: Score " SCALARFMT ", best is " SCALARFMT "\n", (int)K1.nStartingClusters, (int)K1.nClustersAlive, Score, BestScore); - if (Score < BestScore) + Output(" %d->%d Clusters: Score " SCALARFMT ", best is " SCALARFMT "\n", (int)K1.nStartingClusters, (int)K1.nClustersAlive, Score.total, BestScore.total); + if (Score.total < BestScore.total) { Output("THE BEST YET!\n"); // New best classification found BestScore = Score; diff --git a/klustakwik.h b/klustakwik.h index 0a35426..78ab2ff 100644 --- a/klustakwik.h +++ b/klustakwik.h @@ -23,6 +23,16 @@ using namespace std; +class CompoundScore +{ +public: + scalar raw; + scalar total; + scalar penalty; + CompoundScore() : raw(0.0), total(0.0), penalty(0.0) {}; + CompoundScore(scalar _raw, scalar _total, scalar _penalty) : raw(_raw), total(_total), penalty(_penalty) {}; +}; + class KK { public: /////////////// CONSTRUCTORS /////////////////////////////////////////////// @@ -58,7 +68,7 @@ class KK { // Precomputations for cluster masks void ComputeClusterMasks(); // Score and penalty functions - scalar ComputeScore(); + CompoundScore ComputeScore(); scalar Penalty(integer n); void ComputeClassPenalties(); // Main algorithm functions @@ -67,13 +77,13 @@ class KK { void CStep(bool allow_assign_to_noise=true); void ConsiderDeletion(); integer TrySplits(); - scalar CEM(char *CluFile, integer recurse, integer InitRand, bool allow_assign_to_noise=true); - scalar Cluster(char *CluFile); + CompoundScore CEM(char *CluFile, integer recurse, integer InitRand, bool allow_assign_to_noise = true); + CompoundScore Cluster(char *CluFile); // IO related functions void LoadData(char *FileBase, integer ElecNo, char *UseFeatures); void LoadClu(char *StartCluFile); void SaveOutput(); - void SaveTempOutput(); + void SaveTempOutput(integer Iter); void SaveSortedData(); void SaveSortedClu(); void SaveCovMeans();