-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoneAPI.cpp
154 lines (128 loc) · 3.86 KB
/
oneAPI.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// f(x1, x2, x3, ..., xM) = theta0 * x0 + theta1 * x1 + theta2 * x2 + ... + thetaM * xM
#include <CL/sycl.hpp>
#include <iostream>
#include <iomanip>
#include <ctime>
#include <cmath>
#include <cstdlib>
#define M 10
#define N 1000
#define MAX_ITERATIONS 1000
#define ALPHA 0.1
#define ACCURACY_TORLERANCE 0.001
#define MAX_DECIMALS 4
using namespace sycl;
/// @brief The function we are trying to find coefficients for
float f(float *x, float *theta)
{
float result = 0;
for (int i = 0; i < M; i++)
{
result += theta[i] * x[i];
}
return result;
}
void init(float inputs[N][M], float outputs[N], float theta[M])
{
srand(time(NULL));
for (int i = 0; i < M; i++)
theta[i] = (float)rand() / (float)RAND_MAX;
for (int i = 0; i < N; i++)
{
for (int k = 0; k < M; k++)
{
// i th data point, k th variable
inputs[i][k] = (float)rand() / (float)RAND_MAX;
}
outputs[i] = f(inputs[i], theta);
}
}
void checkThetaAccuracy(float *theta, float *actualTheta)
{
int thetasAreAccurate = 1;
for (int i = 0; i < M; i++)
{
if (abs(theta[i] - actualTheta[i]) > ACCURACY_TORLERANCE)
{
thetasAreAccurate = 0;
break;
}
}
if (thetasAreAccurate)
std::cout << "Thetas are accurate" << std::endl;
else
std::cout << "Thetas are not accurate" << std::endl;
}
void printError(float inputs[N][M], float outputs[N], float *theta)
{
float error = 0;
for (int n = 0; n < N; n++)
{
float h = f(inputs[n], theta);
error += abs(h - outputs[n]);
}
error /= N;
std::cout << std::fixed << std::setprecision(MAX_DECIMALS) << "error: " << error << std::endl;
}
void printThetaMapping(float *expectedTheta, float *calculatedTheta)
{
std::cout << "Expected Thetas vs Computed Thetas" << std::endl;
for (int i = 0; i < M; i++)
{
std::cout << expectedTheta[i] << " -> " << calculatedTheta[i] << std::endl;
}
}
int main()
{
queue q(gpu_selector_v);
std::cout << "Device: " << q.get_device().get_info<info::device::name>() << std::endl;
float inputs[N][M];
float outputs[N];
float actualTheta[M];
init(inputs, outputs, actualTheta);
// theta are the coefficients we are trying to find
float theta[M];
for (int i = 0; i < M; i++)
theta[i] = 0;
{
buffer buf_inputs(*inputs, range(N, M));
buffer buf_outputs(outputs, range(N));
for (int i = 0; i < MAX_ITERATIONS; i++)
{
float newTheta[M];
{
buffer buf_theta(theta, range(M));
buffer buf_newTheta(newTheta, range(M));
q.submit([&](handler &h)
{
accessor a_inputs(buf_inputs, h, read_only);
accessor a_outputs(buf_outputs, h, read_only);
accessor a_theta(buf_theta, h, read_only);
accessor a_newTheta(buf_newTheta, h, write_only);
h.parallel_for(range(M), [=](id<1> k) {
float t = 0;
for (int n = 0; n < N; n++)
{
float h = 0;
for (int i = 0; i < M; i++)
{
h += a_inputs[n][i] * a_theta[i];
}
t += (h - a_outputs[n]) * a_inputs[n][k];
}
t = a_theta[k] - ALPHA * t / N;
a_newTheta[k] = t;
}); });
}
for (int i = 0; i < M; i++)
theta[i] = newTheta[i];
}
}
// check mapping
printThetaMapping(actualTheta, theta);
// check if thetas are accurate
checkThetaAccuracy(theta, actualTheta);
// check error
printError(inputs, outputs, theta);
return 0;
}