-
Notifications
You must be signed in to change notification settings - Fork 0
/
kmean.cpp
144 lines (125 loc) · 3.79 KB
/
kmean.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#include <iostream>
#include <array>
#include <random>
#include <raylib.h>
/*
No banter about the time complexity please!!!!
*/
// 1. Data: generate random numbers how many though? 1M
// 2. apply the algorithm
// 3. display the result
// probably convert into multithreading application
template <int S>
struct Data{
std::array<float, S> Val;
int Cluster;
};
constexpr int NumberOfCluster = 3;
constexpr int MaxNoOfIteration = 40;
constexpr int SizeOfArray = 800;
template <int S>
void KMean(std::array<Data<S>, SizeOfArray>& data){
std::array<std::array<float, S>, NumberOfCluster> Centroids{};
std::random_device r;
std::default_random_engine e1(r());
std::uniform_int_distribution<int> uniform_dist(0, SizeOfArray);
// randomly select centroids
for(int i = 0 ; i < NumberOfCluster ; ++i){
for(int j = 0 ; j < S ; ++j)
Centroids[i][j] = {data[uniform_dist(e1)].Val[0]};
}
int NoOfIterations = 0;
while(NoOfIterations < MaxNoOfIteration) {
for(int i = 0 ; i < SizeOfArray; ++i) {
int SmallestCentroidIndex = 0;
for(int j = 0 ; j < NumberOfCluster ; ++j) {
// calculate the distance between data[i].Val and Centroids[j]
// assign data[i].Cluster to the smallest distance index
float DistSum = 0.0f;
float CentroidDistSum = 0.0f;
for(int k = 0 ; k < S ; ++k){
DistSum += (data[i].Val[k] - Centroids[j][k]) * (data[i].Val[k] - Centroids[j][k]);
CentroidDistSum += (data[i].Val[k] - Centroids[SmallestCentroidIndex][k]) * (data[i].Val[k] - Centroids[SmallestCentroidIndex][k]);
}
if(DistSum < CentroidDistSum){
SmallestCentroidIndex = j;
}
}
data[i].Cluster = SmallestCentroidIndex + 1;
}
for(int i = 0 ; i < NumberOfCluster ; ++i) {
std::array<float, S> Sum{};
Sum.fill(0.0f);
std::array<int, S> Count{};
Count.fill(0);
for(int j = 0 ; j < SizeOfArray ; ++j) {
if(data[j].Cluster == i + 1) {
for(int k = 0 ; k < S ; ++k) {
Sum[k] += data[j].Val[k];
Count[k]++;
}
}
}
// update the centroid
for(int k = 0 ; k < S ; ++k)
Centroids[i][k] = Count[k] > 0 ? Sum[k] / (float)Count[k] : Centroids[i][k];
}
NoOfIterations++;
}
// prints all the centroids
for(const auto& elt: Centroids) {
for(const auto& e : elt)
std::cout << e << " " ;
std::cout << '\n';
}
std::cout << '\n';
}
template<int S>
void plot(const std::array<Data<S>, SizeOfArray>& data){
if(S != 2){
std::cerr << "Only 2d data are supported for plotting";
return;
}
const int screenWidth = 900;
const int screenHeight = 900;
InitWindow(screenWidth, screenHeight, "Plotter");
SetTargetFPS(60);
auto PointColor = RED;
while (!WindowShouldClose())
{
BeginDrawing();
ClearBackground(RAYWHITE);
for(const auto& elt: data){
if(elt.Cluster == 1) PointColor = RED;
else if (elt.Cluster == 2) PointColor = GREEN;
else PointColor = DARKBLUE;
DrawCircle(elt.Val[0] , elt.Val[1], 4, PointColor);
}
EndDrawing();
}
CloseWindow();
}
int main(){
constexpr int S = 2;
std::array<Data<S>, SizeOfArray> data{};
//generate random number here.
// Seed with a real random value, if available
std::random_device r;
std::default_random_engine e1(r());
std::uniform_int_distribution<int> uniform_dist(0, SizeOfArray);
for(int i = 0 ; i < SizeOfArray ; ++i) {
for(int j = 0 ; j < S ; ++j)
data[i].Val[j] = (float)uniform_dist(e1);
data[i].Cluster = 0;
}
KMean(data);
for(auto& elt: data) {
for(auto& e : elt.Val) {
std::cout << e << ' ' ;
}
std::cout << " : " << elt.Cluster << ' ';
std::cout << '\n';
}
plot(data);
return 0;
}