Skip to content

Commit

Permalink
[SYCL-MLIR] Manual optimizations
Browse files Browse the repository at this point in the history
Signed-off-by: Tsang, Whitney <[email protected]>
  • Loading branch information
whitneywhtsang committed Feb 20, 2023
1 parent 2c29b8a commit ff96326
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 75 deletions.
7 changes: 4 additions & 3 deletions polybench/2DConvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,10 @@ class Polybench_2DConvolution {
const DATA_TYPE c13 = +0.4, c23 = +0.7, c33 = +0.10;

if((i > 0) && (j > 0) && (i < size_ - 1) && (j < size_ - 1)) {
B[item] = c11 * A[{(i - 1), (j - 1)}] + c12 * A[{(i + 0), (j - 1)}] + c13 * A[{(i + 1), (j - 1)}] + c21 * A[{(i - 1), (j + 0)}]
+ c22 * A[{(i + 0), (j + 0)}] + c23 * A[{(i + 1), (j + 0)}] + c31 * A[{(i - 1), (j + 1)}] + c32 * A[{(i + 0), (j + 1)}]
+ c33 * A[{(i + 1), (j + 1)}];
DATA_TYPE B0 = c11 * A[{(i - 1), (j - 1)}] + c21 * A[{(i - 1), (j + 0)}] + c31 * A[{(i - 1), (j + 1)}];
DATA_TYPE B1 = c12 * A[{(i + 0), (j - 1)}] + c22 * A[{(i + 0), (j + 0)}] + c32 * A[{(i + 0), (j + 1)}];
DATA_TYPE B2 = c13 * A[{(i + 1), (j - 1)}] + c23 * A[{(i + 1), (j + 0)}] + c33 * A[{(i + 1), (j + 1)}];
B[item] = B0 + B1 + B2;
}
});
}));
Expand Down
3 changes: 1 addition & 2 deletions polybench/2mm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,7 @@ class Polybench_2mm {
const auto i = item[0];
const auto j = item[1];

E[item] = 0;
DATA_TYPE E_reduction = E[item];
DATA_TYPE E_reduction = 0;
for(size_t k = 0; k < size_; k++) {
E_reduction += C[{i, k}] * D[{k, j}];
}
Expand Down
19 changes: 4 additions & 15 deletions polybench/3mm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,32 +115,21 @@ class Polybench_3mm {
auto A = A_buffer.get_access<access::mode::read>(cgh);
auto B = B_buffer.get_access<access::mode::read>(cgh);
auto E = E_buffer.get_access<access::mode::read_write>(cgh);

cgh.parallel_for<Polybench_3mm_1>(E_buffer.get_range(), [=, size_ = size](item<2> item) {
const auto i = item[0];
const auto j = item[1];

DATA_TYPE E_reduction = E[item];
for(size_t k = 0; k < size_; k++) {
E_reduction += A[{i, k}] * B[{k, j}];
}
E[item] = E_reduction;
});
}));

events.push_back(args.device_queue.submit([&](handler& cgh) {
auto C = C_buffer.get_access<access::mode::read>(cgh);
auto D = D_buffer.get_access<access::mode::read>(cgh);
auto F = F_buffer.get_access<access::mode::read_write>(cgh);

cgh.parallel_for<Polybench_3mm_2>(F_buffer.get_range(), [=, size_ = size](item<2> item) {
cgh.parallel_for<Polybench_3mm_1>(E_buffer.get_range(), [=, size_ = size](item<2> item) {
const auto i = item[0];
const auto j = item[1];

DATA_TYPE E_reduction = E[item];
DATA_TYPE F_reduction = F[item];
for(size_t k = 0; k < size_; k++) {
E_reduction += A[{i, k}] * B[{k, j}];
F_reduction += C[{i, k}] * D[{k, j}];
}
E[item] = E_reduction;
F[item] = F_reduction;
});
}));
Expand Down
6 changes: 3 additions & 3 deletions polybench/atax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,23 +73,23 @@ class Polybench_Atax {
cgh.parallel_for<Atax1>(tmp_buffer.get_range(), [=, size_ = size](item<1> item) {
const auto i = item[0];

DATA_TYPE tmp_reduction = tmp[item];
DATA_TYPE tmp_reduction = tmp[item];
for(size_t j = 0; j < size_; j++) {
tmp_reduction += A[{i, j}] * x[j];
}
tmp[item] = tmp_reduction;
});
}));

events.push_back(args.device_queue.submit([&](handler& cgh) {
events.push_back(args.device_queue.submit([&](handler& cgh) {
auto A = A_buffer.get_access<access::mode::read>(cgh);
auto y = y_buffer.get_access<access::mode::read_write>(cgh);
auto tmp = tmp_buffer.get_access<access::mode::read>(cgh);

cgh.parallel_for<Atax2>(y_buffer.get_range(), [=, size_ = size](item<1> item) {
const auto j = item[0];

DATA_TYPE y_reduction = y[item];
DATA_TYPE y_reduction = y[item];
for(size_t i = 0; i < size_; i++) {
y_reduction += A[{i, j}] * tmp[i];
}
Expand Down
19 changes: 4 additions & 15 deletions polybench/bicg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,30 +73,19 @@ class Polybench_Bicg {
auto A = A_buffer.get_access<access::mode::read>(cgh);
auto r = r_buffer.get_access<access::mode::read>(cgh);
auto s = s_buffer.get_access<access::mode::read_write>(cgh);
auto p = p_buffer.get_access<access::mode::read>(cgh);
auto q = q_buffer.get_access<access::mode::read_write>(cgh);

cgh.parallel_for<Bicg1>(s_buffer.get_range(), [=, size_ = size](item<1> item) {
const auto j = item[0];

DATA_TYPE s_reduction = s[item];
DATA_TYPE q_reduction = q[item];
for(size_t i = 0; i < size_; i++) {
s_reduction += A[{i, j}] * r[i];
q_reduction += A[{j, i}] * p[i];
}
s[item] = s_reduction;
});
}));

events.push_back(args.device_queue.submit([&](handler& cgh) {
auto A = A_buffer.get_access<access::mode::read>(cgh);
auto p = p_buffer.get_access<access::mode::read>(cgh);
auto q = q_buffer.get_access<access::mode::read_write>(cgh);

cgh.parallel_for<Bicg2>(q_buffer.get_range(), [=, size_ = size](item<1> item) {
const auto i = item[0];

DATA_TYPE q_reduction = q[item];
for(size_t j = 0; j < size_; j++) {
q_reduction += A[{i, j}] * p[j];
}
q[item] = q_reduction;
});
}));
Expand Down
11 changes: 4 additions & 7 deletions polybench/covariance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,11 @@ class Polybench_Covariance {
cgh.parallel_for<CovarianceMean>(range<1>(size), id<1>(1), [=, N_ = size](item<1> item) {
const auto j = item[0];

mean[item] = 0;
DATA_TYPE mean_reduction = mean[item];
DATA_TYPE mean_reduction = 0;
for(size_t i = 1; i <= N_; i++) {
mean_reduction += data[{i, j}];
}
mean[item] = mean_reduction;
mean[item] /= float_n;
mean[item] = mean_reduction / float_n;
});
}));

Expand All @@ -117,14 +115,13 @@ class Polybench_Covariance {
symmat[{j1, j1}] = 1.0;

for(size_t j2 = j1; j2 <= M_; j2++) {
symmat[{j1, j2}] = 0.0;
DATA_TYPE symmat_reduction = symmat[{j1, j2}];
DATA_TYPE symmat_reduction = 0.0;
for(size_t i = 1; i <= N_; i++) {
symmat_reduction += data[{i, j1}] * data[{i, j2}];
}
symmat[{j1, j2}] = symmat_reduction;

symmat2[{j2, j1}] = symmat[{j1, j2}];
symmat2[{j2, j1}] = symmat_reduction;
}
});
}));
Expand Down
8 changes: 4 additions & 4 deletions polybench/gesummv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ class Polybench_Gesummv {
DATA_TYPE tmp_reduction = tmp[item];
DATA_TYPE y_reduction = y[item];
for(size_t j = 0; j < N_; j++) {
tmp_reduction += A[{i, j}] * x[j];
y_reduction += B[{i, j}] * x[j];
DATA_TYPE xj = x[j];
tmp_reduction += A[{i, j}] * xj;
y_reduction += B[{i, j}] * xj;
}
tmp[item] = tmp_reduction;
y[item] = y_reduction;

y[item] = ALPHA * tmp[item] + BETA * y[item];
y[item] = ALPHA * tmp_reduction + BETA * y_reduction;
});
}));
}
Expand Down
12 changes: 7 additions & 5 deletions polybench/gramschmidt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,13 @@ class Polybench_Gramschmidt {
for(size_t k = 0; k < size; k++) {
events.push_back(args.device_queue.submit([&](handler& cgh) {
auto A = A_buffer.get_access<access::mode::read>(cgh);
auto R = R_buffer.get_access<access::mode::write>(cgh);
auto R = R_buffer.get_access<access::mode::discard_write>(cgh);

cgh.parallel_for<Gramschmidt1>(range<2>(1, 1), [=, M_ = size](item<2> item) {
DATA_TYPE nrm = 0;
for(size_t i = 0; i < M_; i++) {
nrm += A[{i, k}] * A[{i, k}];
DATA_TYPE Aik = A[{i, k}];
nrm += Aik * Aik;
}
R[{k, k}] = cl::sycl::sqrt(nrm);
});
Expand All @@ -89,14 +90,14 @@ class Polybench_Gramschmidt {
events.push_back(args.device_queue.submit([&](handler& cgh) {
auto A = A_buffer.get_access<access::mode::read>(cgh);
auto R = R_buffer.get_access<access::mode::read>(cgh);
auto Q = Q_buffer.get_access<access::mode::write>(cgh);
auto Q = Q_buffer.get_access<access::mode::discard_write>(cgh);

cgh.parallel_for<Gramschmidt2>(range<2>(size, 1), id<2>(0, k), [=](item<2> item) { Q[item] = A[item] / R[{k, k}]; });
}));

events.push_back(args.device_queue.submit([&](handler& cgh) {
auto A = A_buffer.get_access<access::mode::read_write>(cgh);
auto R = R_buffer.get_access<access::mode::write>(cgh);
auto R = R_buffer.get_access<access::mode::discard_write>(cgh);
auto Q = Q_buffer.get_access<access::mode::read>(cgh);

cgh.parallel_for<Gramschmidt3>(range<2>(size, 1), [=, M_ = size, N_ = size](item<2> item) {
Expand All @@ -110,8 +111,9 @@ class Polybench_Gramschmidt {
}
R[item] = R_reduction;

DATA_TYPE R_licm = R_reduction;
for(size_t i = 0; i < M_; i++) {
A[{i, j}] -= Q[{i, k}] * R[item];
A[{i, j}] -= Q[{i, k}] * R_licm;
}
});
}));
Expand Down
24 changes: 7 additions & 17 deletions polybench/mvt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,31 +71,21 @@ class Polybench_Mvt {
auto a = a_buffer.get_access<access::mode::read>(cgh);
auto y1 = y1_buffer.get_access<access::mode::read>(cgh);
auto x1 = x1_buffer.get_access<access::mode::read_write>(cgh);
auto y2 = y2_buffer.get_access<access::mode::read>(cgh);
auto x2 = x2_buffer.get_access<access::mode::read_write>(cgh);

cgh.parallel_for<Mvt1>(x1_buffer.get_range(), [=, N_ = size](item<1> item) {
const auto i = item[0];

DATA_TYPE x1_reduction = x1[i];
DATA_TYPE x2_reduction = x2[i];
for(size_t j = 0; j < N_; j++) {
x1_reduction += a[{i, j}] * y1[j];
DATA_TYPE aij = a[{i, j}];
x1_reduction += aij * y1[j];
x2_reduction += aij * y2[j];
}
x1[i] = x1_reduction;
});
}));

events.push_back(args.device_queue.submit([&](handler& cgh) {
auto a = a_buffer.get_access<access::mode::read>(cgh);
auto y2 = y2_buffer.get_access<access::mode::read>(cgh);
auto x2 = x2_buffer.get_access<access::mode::read_write>(cgh);

cgh.parallel_for<Mvt2>(x1_buffer.get_range(), [=, N_ = size](item<1> item) {
const auto k = item[0];

DATA_TYPE x2_reduction = x2[k];
for(size_t l = 0; l < N_; l++) {
x2_reduction += a[{k, l}] * y2[l];
}
x2[k] = x2_reduction;
x2[i] = x2_reduction;
});
}));
}
Expand Down
4 changes: 2 additions & 2 deletions polybench/syr2k.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ class Polybench_Syr2k {
const auto i = item[0];
const auto j = item[1];

C[item] *= BETA;

DATA_TYPE C_reduction = C[item];
C_reduction *= BETA;

for(size_t k = 0; k < M_; k++) {
C_reduction += ALPHA * A[{i, k}] * B[{j, k}] + ALPHA * B[{i, k}] * A[{j, k}];
}
Expand Down
4 changes: 2 additions & 2 deletions polybench/syrk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ class Polybench_Syrk {
const auto i = item[0];
const auto j = item[1];

C[item] *= beta;

DATA_TYPE C_reduction = C[item];
C_reduction *= beta;

for(size_t k = 0; k < M_; k++) {
C_reduction += alpha * A[{i, k}] * A[{j, k}];
}
Expand Down

0 comments on commit ff96326

Please sign in to comment.