diff --git a/polybench/2DConvolution.cpp b/polybench/2DConvolution.cpp index 3caf371..655304b 100644 --- a/polybench/2DConvolution.cpp +++ b/polybench/2DConvolution.cpp @@ -70,9 +70,10 @@ class Polybench_2DConvolution { const DATA_TYPE c13 = +0.4, c23 = +0.7, c33 = +0.10; if((i > 0) && (j > 0) && (i < size_ - 1) && (j < size_ - 1)) { - B[item] = c11 * A[{(i - 1), (j - 1)}] + c12 * A[{(i + 0), (j - 1)}] + c13 * A[{(i + 1), (j - 1)}] + c21 * A[{(i - 1), (j + 0)}] - + c22 * A[{(i + 0), (j + 0)}] + c23 * A[{(i + 1), (j + 0)}] + c31 * A[{(i - 1), (j + 1)}] + c32 * A[{(i + 0), (j + 1)}] - + c33 * A[{(i + 1), (j + 1)}]; + DATA_TYPE B0 = c11 * A[{(i - 1), (j - 1)}] + c21 * A[{(i - 1), (j + 0)}] + c31 * A[{(i - 1), (j + 1)}]; + DATA_TYPE B1 = c12 * A[{(i + 0), (j - 1)}] + c22 * A[{(i + 0), (j + 0)}] + c32 * A[{(i + 0), (j + 1)}]; + DATA_TYPE B2 = c13 * A[{(i + 1), (j - 1)}] + c23 * A[{(i + 1), (j + 0)}] + c33 * A[{(i + 1), (j + 1)}]; + B[item] = B0 + B1 + B2; } }); })); diff --git a/polybench/2mm.cpp b/polybench/2mm.cpp index ed2d488..f3875a2 100644 --- a/polybench/2mm.cpp +++ b/polybench/2mm.cpp @@ -117,8 +117,7 @@ class Polybench_2mm { const auto i = item[0]; const auto j = item[1]; - E[item] = 0; - DATA_TYPE E_reduction = E[item]; + DATA_TYPE E_reduction = 0; for(size_t k = 0; k < size_; k++) { E_reduction += C[{i, k}] * D[{k, j}]; } diff --git a/polybench/3mm.cpp b/polybench/3mm.cpp index 4c68f42..ad66617 100644 --- a/polybench/3mm.cpp +++ b/polybench/3mm.cpp @@ -115,32 +115,21 @@ class Polybench_3mm { auto A = A_buffer.get_access(cgh); auto B = B_buffer.get_access(cgh); auto E = E_buffer.get_access(cgh); - - cgh.parallel_for(E_buffer.get_range(), [=, size_ = size](item<2> item) { - const auto i = item[0]; - const auto j = item[1]; - - DATA_TYPE E_reduction = E[item]; - for(size_t k = 0; k < size_; k++) { - E_reduction += A[{i, k}] * B[{k, j}]; - } - E[item] = E_reduction; - }); - })); - - events.push_back(args.device_queue.submit([&](handler& cgh) { auto C = C_buffer.get_access(cgh); auto D = D_buffer.get_access(cgh); auto F = F_buffer.get_access(cgh); - cgh.parallel_for(F_buffer.get_range(), [=, size_ = size](item<2> item) { + cgh.parallel_for(E_buffer.get_range(), [=, size_ = size](item<2> item) { const auto i = item[0]; const auto j = item[1]; + DATA_TYPE E_reduction = E[item]; DATA_TYPE F_reduction = F[item]; for(size_t k = 0; k < size_; k++) { + E_reduction += A[{i, k}] * B[{k, j}]; F_reduction += C[{i, k}] * D[{k, j}]; } + E[item] = E_reduction; F[item] = F_reduction; }); })); diff --git a/polybench/atax.cpp b/polybench/atax.cpp index 7fa7036..96257e0 100644 --- a/polybench/atax.cpp +++ b/polybench/atax.cpp @@ -73,7 +73,7 @@ class Polybench_Atax { cgh.parallel_for(tmp_buffer.get_range(), [=, size_ = size](item<1> item) { const auto i = item[0]; - DATA_TYPE tmp_reduction = tmp[item]; + DATA_TYPE tmp_reduction = tmp[item]; for(size_t j = 0; j < size_; j++) { tmp_reduction += A[{i, j}] * x[j]; } @@ -81,7 +81,7 @@ class Polybench_Atax { }); })); - events.push_back(args.device_queue.submit([&](handler& cgh) { + events.push_back(args.device_queue.submit([&](handler& cgh) { auto A = A_buffer.get_access(cgh); auto y = y_buffer.get_access(cgh); auto tmp = tmp_buffer.get_access(cgh); @@ -89,7 +89,7 @@ class Polybench_Atax { cgh.parallel_for(y_buffer.get_range(), [=, size_ = size](item<1> item) { const auto j = item[0]; - DATA_TYPE y_reduction = y[item]; + DATA_TYPE y_reduction = y[item]; for(size_t i = 0; i < size_; i++) { y_reduction += A[{i, j}] * tmp[i]; } diff --git a/polybench/bicg.cpp b/polybench/bicg.cpp index 2e32dea..5a7c70f 100644 --- a/polybench/bicg.cpp +++ b/polybench/bicg.cpp @@ -73,30 +73,19 @@ class Polybench_Bicg { auto A = A_buffer.get_access(cgh); auto r = r_buffer.get_access(cgh); auto s = s_buffer.get_access(cgh); + auto p = p_buffer.get_access(cgh); + auto q = q_buffer.get_access(cgh); cgh.parallel_for(s_buffer.get_range(), [=, size_ = size](item<1> item) { const auto j = item[0]; DATA_TYPE s_reduction = s[item]; + DATA_TYPE q_reduction = q[item]; for(size_t i = 0; i < size_; i++) { s_reduction += A[{i, j}] * r[i]; + q_reduction += A[{j, i}] * p[i]; } s[item] = s_reduction; - }); - })); - - events.push_back(args.device_queue.submit([&](handler& cgh) { - auto A = A_buffer.get_access(cgh); - auto p = p_buffer.get_access(cgh); - auto q = q_buffer.get_access(cgh); - - cgh.parallel_for(q_buffer.get_range(), [=, size_ = size](item<1> item) { - const auto i = item[0]; - - DATA_TYPE q_reduction = q[item]; - for(size_t j = 0; j < size_; j++) { - q_reduction += A[{i, j}] * p[j]; - } q[item] = q_reduction; }); })); diff --git a/polybench/covariance.cpp b/polybench/covariance.cpp index c69cd53..5fce61c 100644 --- a/polybench/covariance.cpp +++ b/polybench/covariance.cpp @@ -86,13 +86,11 @@ class Polybench_Covariance { cgh.parallel_for(range<1>(size), id<1>(1), [=, N_ = size](item<1> item) { const auto j = item[0]; - mean[item] = 0; - DATA_TYPE mean_reduction = mean[item]; + DATA_TYPE mean_reduction = 0; for(size_t i = 1; i <= N_; i++) { mean_reduction += data[{i, j}]; } - mean[item] = mean_reduction; - mean[item] /= float_n; + mean[item] = mean_reduction / float_n; }); })); @@ -117,14 +115,13 @@ class Polybench_Covariance { symmat[{j1, j1}] = 1.0; for(size_t j2 = j1; j2 <= M_; j2++) { - symmat[{j1, j2}] = 0.0; - DATA_TYPE symmat_reduction = symmat[{j1, j2}]; + DATA_TYPE symmat_reduction = 0.0; for(size_t i = 1; i <= N_; i++) { symmat_reduction += data[{i, j1}] * data[{i, j2}]; } symmat[{j1, j2}] = symmat_reduction; - symmat2[{j2, j1}] = symmat[{j1, j2}]; + symmat2[{j2, j1}] = symmat_reduction; } }); })); diff --git a/polybench/gesummv.cpp b/polybench/gesummv.cpp index 5a52114..d7d7e3e 100644 --- a/polybench/gesummv.cpp +++ b/polybench/gesummv.cpp @@ -90,13 +90,13 @@ class Polybench_Gesummv { DATA_TYPE tmp_reduction = tmp[item]; DATA_TYPE y_reduction = y[item]; for(size_t j = 0; j < N_; j++) { - tmp_reduction += A[{i, j}] * x[j]; - y_reduction += B[{i, j}] * x[j]; + DATA_TYPE xj = x[j]; + tmp_reduction += A[{i, j}] * xj; + y_reduction += B[{i, j}] * xj; } tmp[item] = tmp_reduction; - y[item] = y_reduction; - y[item] = ALPHA * tmp[item] + BETA * y[item]; + y[item] = ALPHA * tmp_reduction + BETA * y_reduction; }); })); } diff --git a/polybench/gramschmidt.cpp b/polybench/gramschmidt.cpp index 055643f..d5fb4b2 100644 --- a/polybench/gramschmidt.cpp +++ b/polybench/gramschmidt.cpp @@ -75,12 +75,13 @@ class Polybench_Gramschmidt { for(size_t k = 0; k < size; k++) { events.push_back(args.device_queue.submit([&](handler& cgh) { auto A = A_buffer.get_access(cgh); - auto R = R_buffer.get_access(cgh); + auto R = R_buffer.get_access(cgh); cgh.parallel_for(range<2>(1, 1), [=, M_ = size](item<2> item) { DATA_TYPE nrm = 0; for(size_t i = 0; i < M_; i++) { - nrm += A[{i, k}] * A[{i, k}]; + DATA_TYPE Aik = A[{i, k}]; + nrm += Aik * Aik; } R[{k, k}] = cl::sycl::sqrt(nrm); }); @@ -89,14 +90,14 @@ class Polybench_Gramschmidt { events.push_back(args.device_queue.submit([&](handler& cgh) { auto A = A_buffer.get_access(cgh); auto R = R_buffer.get_access(cgh); - auto Q = Q_buffer.get_access(cgh); + auto Q = Q_buffer.get_access(cgh); cgh.parallel_for(range<2>(size, 1), id<2>(0, k), [=](item<2> item) { Q[item] = A[item] / R[{k, k}]; }); })); events.push_back(args.device_queue.submit([&](handler& cgh) { auto A = A_buffer.get_access(cgh); - auto R = R_buffer.get_access(cgh); + auto R = R_buffer.get_access(cgh); auto Q = Q_buffer.get_access(cgh); cgh.parallel_for(range<2>(size, 1), [=, M_ = size, N_ = size](item<2> item) { @@ -110,8 +111,9 @@ class Polybench_Gramschmidt { } R[item] = R_reduction; + DATA_TYPE R_licm = R_reduction; for(size_t i = 0; i < M_; i++) { - A[{i, j}] -= Q[{i, k}] * R[item]; + A[{i, j}] -= Q[{i, k}] * R_licm; } }); })); diff --git a/polybench/mvt.cpp b/polybench/mvt.cpp index 65c9f96..ec51313 100644 --- a/polybench/mvt.cpp +++ b/polybench/mvt.cpp @@ -71,31 +71,21 @@ class Polybench_Mvt { auto a = a_buffer.get_access(cgh); auto y1 = y1_buffer.get_access(cgh); auto x1 = x1_buffer.get_access(cgh); + auto y2 = y2_buffer.get_access(cgh); + auto x2 = x2_buffer.get_access(cgh); cgh.parallel_for(x1_buffer.get_range(), [=, N_ = size](item<1> item) { const auto i = item[0]; DATA_TYPE x1_reduction = x1[i]; + DATA_TYPE x2_reduction = x2[i]; for(size_t j = 0; j < N_; j++) { - x1_reduction += a[{i, j}] * y1[j]; + DATA_TYPE aij = a[{i, j}]; + x1_reduction += aij * y1[j]; + x2_reduction += aij * y2[j]; } x1[i] = x1_reduction; - }); - })); - - events.push_back(args.device_queue.submit([&](handler& cgh) { - auto a = a_buffer.get_access(cgh); - auto y2 = y2_buffer.get_access(cgh); - auto x2 = x2_buffer.get_access(cgh); - - cgh.parallel_for(x1_buffer.get_range(), [=, N_ = size](item<1> item) { - const auto k = item[0]; - - DATA_TYPE x2_reduction = x2[k]; - for(size_t l = 0; l < N_; l++) { - x2_reduction += a[{k, l}] * y2[l]; - } - x2[k] = x2_reduction; + x2[i] = x2_reduction; }); })); } diff --git a/polybench/syr2k.cpp b/polybench/syr2k.cpp index e115996..7c6f660 100644 --- a/polybench/syr2k.cpp +++ b/polybench/syr2k.cpp @@ -79,9 +79,9 @@ class Polybench_Syr2k { const auto i = item[0]; const auto j = item[1]; - C[item] *= BETA; - DATA_TYPE C_reduction = C[item]; + C_reduction *= BETA; + for(size_t k = 0; k < M_; k++) { C_reduction += ALPHA * A[{i, k}] * B[{j, k}] + ALPHA * B[{i, k}] * A[{j, k}]; } diff --git a/polybench/syrk.cpp b/polybench/syrk.cpp index 7dddd7d..0430321 100644 --- a/polybench/syrk.cpp +++ b/polybench/syrk.cpp @@ -75,9 +75,9 @@ class Polybench_Syrk { const auto i = item[0]; const auto j = item[1]; - C[item] *= beta; - DATA_TYPE C_reduction = C[item]; + C_reduction *= beta; + for(size_t k = 0; k < M_; k++) { C_reduction += alpha * A[{i, k}] * A[{j, k}]; }