-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.cpp
159 lines (143 loc) · 4.1 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sstream>
#include <cstdlib>
#include "Points/Centroid.hpp"
#include "Points/Points.hpp"
std::vector<std::string> split(std::string s, const char delim)
{
std::vector<std::string> ret{};
std::stringstream ss{s};
std::string temp{};
while (std::getline(ss, temp, delim))
{
ret.push_back(temp);
}
return ret;
}
std::vector<Points> loadData(std::ifstream& inFile)
{
std::vector<Points> ret{};
std::string line{};
std::getline(inFile, line);
std::string header = line;
while(std::getline(inFile, line))
{
auto rawData = split(line, ',');
std::vector<float> entry{std::stof(rawData.at(3)), std::stof(rawData.at(4)) };
ret.emplace_back(Points(entry));
}
return ret;
}
void calculateDistanceToCentroid(std::vector<Points>& data, std::vector<Centroid>& centroids)
{
for( auto& dt : data )
{
for( auto& centro : centroids )
{
dt.distance(centro);
}
}
}
void findNewCoordonateForCentroid(std::vector<Points>& data, std::vector<Centroid>& centroids)
{
for( auto& centro : centroids )
{
centro.reset();
}
for( auto& dt : data )
{
auto clusterIdx = dt.getNearClusterIndex();
centroids.at(clusterIdx).prepareBarryCenter(dt);
dt.resetDistance();
}
for( auto& centro : centroids )
{
centro.setNewCentroidCoordonnees();
}
}
void KmeansIteration(std::vector<Points>& data, std::vector<Centroid>& centroids)
{
calculateDistanceToCentroid(data, centroids);
findNewCoordonateForCentroid(data, centroids);
}
float KmeansAlgorithm(std::vector<Points>& data, std::vector<Centroid>& centroids)
{
for( int i{0} ; i < 300 ; ++i )
{
KmeansIteration(data, centroids);
}
float inertia{0};
for( auto& dt : data )
{
for( auto& centro : centroids )
{
dt.distance(centro);
}
auto clusterIdx = dt.getNearClusterIndex();
inertia+=dt.distance(centroids.at(clusterIdx));
dt.resetDistance();
}
return inertia;
}
Points pickRandomPoint(const std::vector<Points>& data)
{
auto randomIndex = rand() % data.size();
return data.at(randomIndex);
}
std::vector<Centroid> createCentroidRandomInit(int numOfCenter, const std::vector<Points>& data )
{
std::vector<Centroid> ret{};
ret.reserve(numOfCenter);
for( int i{0} ; i < numOfCenter; ++i )
{
ret.push_back(pickRandomPoint(data));
}
return ret;
}
void printDataWhithThereRespectiveCentroid(std::ofstream& outFile, std::vector<Points>& data, std::vector<Centroid>& centroids)
{
for( auto& dt : data )
{
for( auto& centro : centroids )
{
dt.distance(centro);
}
uint8_t idx = dt.getNearClusterIndex();
outFile << "x/y," << dt.getFeatures().at(0) << "," << dt.getFeatures().at(1) << "," << (int)idx <<"\n";
}
}
void printCentroid(std::ofstream& outFile, std::vector<Centroid>& centroids)
{
for( auto& centro : centroids )
{
outFile << "centro: x/y," << centro.getFeatures().at(0) << "," << centro.getFeatures().at(1) << "\n";
}
}
int main()
{
std::ifstream inFile{"Mall_Customers.csv"};
std::ofstream outFilePoint{"point"};
std::ofstream outFileCentroid{"centroid"};
std::ofstream outFileElbow{"elbow"};
std::vector<Points> data{ loadData(inFile) };
//Elbow dtermination
for( int i{1} ; i < 20 ; ++i )
{
std::vector<Centroid> centro{createCentroidRandomInit(i,data)};
auto inertia = KmeansAlgorithm(data, centro);
outFileElbow << i << "," << inertia << "\n";
}
//Example with 5 cluster
std::vector<Centroid> centroids{data.at(3), data.at(64), data.at(101), data.at(104), data.at(149)};
KmeansAlgorithm(data, centroids);
//Print data and centroid result
printDataWhithThereRespectiveCentroid(outFilePoint, data, centroids);
printCentroid(outFileCentroid,centroids);
outFileCentroid.flush();
outFilePoint.flush();
outFileElbow.flush();
return 0;
}