From 8121a14b24fd29a1c1bcfe23506e756312d5d027 Mon Sep 17 00:00:00 2001 From: "Tsang, Whitney" Date: Wed, 15 Feb 2023 17:42:01 -0800 Subject: [PATCH] [SYCL-MLIR] Manual optimizations (Scalar replacement) Signed-off-by: Tsang, Whitney --- polybench/2mm.cpp | 8 ++++++-- polybench/3mm.cpp | 12 +++++++++--- polybench/atax.cpp | 8 ++++++-- polybench/bicg.cpp | 8 ++++++-- polybench/covariance.cpp | 12 ++++++++---- polybench/gemm.cpp | 5 +++-- polybench/gesummv.cpp | 8 ++++++-- polybench/gramschmidt.cpp | 5 +++-- polybench/mvt.cpp | 8 ++++++-- polybench/syr2k.cpp | 4 +++- polybench/syrk.cpp | 4 +++- 11 files changed, 59 insertions(+), 23 deletions(-) diff --git a/polybench/2mm.cpp b/polybench/2mm.cpp index d79a9eb..ed2d488 100644 --- a/polybench/2mm.cpp +++ b/polybench/2mm.cpp @@ -100,9 +100,11 @@ class Polybench_2mm { const auto i = item[0]; const auto j = item[1]; + DATA_TYPE C_reduction = C[item]; for(size_t k = 0; k < size_; k++) { - C[item] += A[{i, k}] * B[{k, j}]; + C_reduction += A[{i, k}] * B[{k, j}]; } + C[item] = C_reduction; }); })); @@ -116,9 +118,11 @@ class Polybench_2mm { const auto j = item[1]; E[item] = 0; + DATA_TYPE E_reduction = E[item]; for(size_t k = 0; k < size_; k++) { - E[item] += C[{i, k}] * D[{k, j}]; + E_reduction += C[{i, k}] * D[{k, j}]; } + E[item] = E_reduction; }); })); } diff --git a/polybench/3mm.cpp b/polybench/3mm.cpp index 9b33f40..4c68f42 100644 --- a/polybench/3mm.cpp +++ b/polybench/3mm.cpp @@ -120,9 +120,11 @@ class Polybench_3mm { const auto i = item[0]; const auto j = item[1]; + DATA_TYPE E_reduction = E[item]; for(size_t k = 0; k < size_; k++) { - E[item] += A[{i, k}] * B[{k, j}]; + E_reduction += A[{i, k}] * B[{k, j}]; } + E[item] = E_reduction; }); })); @@ -135,9 +137,11 @@ class Polybench_3mm { const auto i = item[0]; const auto j = item[1]; + DATA_TYPE F_reduction = F[item]; for(size_t k = 0; k < size_; k++) { - F[item] += C[{i, k}] * D[{k, j}]; + F_reduction += C[{i, k}] * D[{k, j}]; } + F[item] = F_reduction; }); })); @@ -150,9 +154,11 @@ class Polybench_3mm { const auto i = item[0]; const auto j = item[1]; + DATA_TYPE G_reduction = G[item]; for(size_t k = 0; k < size_; k++) { - G[item] += E[{i, k}] * F[{k, j}]; + G_reduction += E[{i, k}] * F[{k, j}]; } + G[item] = G_reduction; }); })); } diff --git a/polybench/atax.cpp b/polybench/atax.cpp index cb6a302..7fa7036 100644 --- a/polybench/atax.cpp +++ b/polybench/atax.cpp @@ -73,9 +73,11 @@ class Polybench_Atax { cgh.parallel_for(tmp_buffer.get_range(), [=, size_ = size](item<1> item) { const auto i = item[0]; + DATA_TYPE tmp_reduction = tmp[item]; for(size_t j = 0; j < size_; j++) { - tmp[item] += A[{i, j}] * x[j]; + tmp_reduction += A[{i, j}] * x[j]; } + tmp[item] = tmp_reduction; }); })); @@ -87,9 +89,11 @@ class Polybench_Atax { cgh.parallel_for(y_buffer.get_range(), [=, size_ = size](item<1> item) { const auto j = item[0]; + DATA_TYPE y_reduction = y[item]; for(size_t i = 0; i < size_; i++) { - y[item] += A[{i, j}] * tmp[i]; + y_reduction += A[{i, j}] * tmp[i]; } + y[item] = y_reduction; }); })); } diff --git a/polybench/bicg.cpp b/polybench/bicg.cpp index ed73375..2e32dea 100644 --- a/polybench/bicg.cpp +++ b/polybench/bicg.cpp @@ -77,9 +77,11 @@ class Polybench_Bicg { cgh.parallel_for(s_buffer.get_range(), [=, size_ = size](item<1> item) { const auto j = item[0]; + DATA_TYPE s_reduction = s[item]; for(size_t i = 0; i < size_; i++) { - s[item] += A[{i, j}] * r[i]; + s_reduction += A[{i, j}] * r[i]; } + s[item] = s_reduction; }); })); @@ -91,9 +93,11 @@ class Polybench_Bicg { cgh.parallel_for(q_buffer.get_range(), [=, size_ = size](item<1> item) { const auto i = item[0]; + DATA_TYPE q_reduction = q[item]; for(size_t j = 0; j < size_; j++) { - q[item] += A[{i, j}] * p[j]; + q_reduction += A[{i, j}] * p[j]; } + q[item] = q_reduction; }); })); } diff --git a/polybench/covariance.cpp b/polybench/covariance.cpp index 96d18a9..c69cd53 100644 --- a/polybench/covariance.cpp +++ b/polybench/covariance.cpp @@ -86,10 +86,12 @@ class Polybench_Covariance { cgh.parallel_for(range<1>(size), id<1>(1), [=, N_ = size](item<1> item) { const auto j = item[0]; - mean[item] = 0; + mean[item] = 0; + DATA_TYPE mean_reduction = mean[item]; for(size_t i = 1; i <= N_; i++) { - mean[item] += data[{i, j}]; + mean_reduction += data[{i, j}]; } + mean[item] = mean_reduction; mean[item] /= float_n; }); })); @@ -115,10 +117,12 @@ class Polybench_Covariance { symmat[{j1, j1}] = 1.0; for(size_t j2 = j1; j2 <= M_; j2++) { - symmat[{j1, j2}] = 0.0; + symmat[{j1, j2}] = 0.0; + DATA_TYPE symmat_reduction = symmat[{j1, j2}]; for(size_t i = 1; i <= N_; i++) { - symmat[{j1, j2}] += data[{i, j1}] * data[{i, j2}]; + symmat_reduction += data[{i, j1}] * data[{i, j2}]; } + symmat[{j1, j2}] = symmat_reduction; symmat2[{j2, j1}] = symmat[{j1, j2}]; } diff --git a/polybench/gemm.cpp b/polybench/gemm.cpp index 1015e2a..c46543b 100644 --- a/polybench/gemm.cpp +++ b/polybench/gemm.cpp @@ -84,10 +84,11 @@ class Polybench_Gemm { const auto j = item[1]; C[item] *= BETA; - + DATA_TYPE C_reduction = C[item]; for(size_t k = 0; k < NK_; k++) { - C[item] += ALPHA * A[{i, k}] * B[{k, j}]; + C_reduction += ALPHA * A[{i, k}] * B[{k, j}]; } + C[item] = C_reduction; }); })); } diff --git a/polybench/gesummv.cpp b/polybench/gesummv.cpp index 18f0977..5a52114 100644 --- a/polybench/gesummv.cpp +++ b/polybench/gesummv.cpp @@ -87,10 +87,14 @@ class Polybench_Gesummv { cgh.parallel_for(y.get_range(), [=, N_ = size](item<1> item) { const auto i = item[0]; + DATA_TYPE tmp_reduction = tmp[item]; + DATA_TYPE y_reduction = y[item]; for(size_t j = 0; j < N_; j++) { - tmp[item] += A[{i, j}] * x[j]; - y[item] += B[{i, j}] * x[j]; + tmp_reduction += A[{i, j}] * x[j]; + y_reduction += B[{i, j}] * x[j]; } + tmp[item] = tmp_reduction; + y[item] = y_reduction; y[item] = ALPHA * tmp[item] + BETA * y[item]; }); diff --git a/polybench/gramschmidt.cpp b/polybench/gramschmidt.cpp index 8b0c5a6..055643f 100644 --- a/polybench/gramschmidt.cpp +++ b/polybench/gramschmidt.cpp @@ -104,10 +104,11 @@ class Polybench_Gramschmidt { if(j <= k || j >= N_) return; - R[item] = 0; + DATA_TYPE R_reduction = 0; for(size_t i = 0; i < M_; i++) { - R[item] += Q[{i, k}] * A[{i, j}]; + R_reduction += Q[{i, k}] * A[{i, j}]; } + R[item] = R_reduction; for(size_t i = 0; i < M_; i++) { A[{i, j}] -= Q[{i, k}] * R[item]; diff --git a/polybench/mvt.cpp b/polybench/mvt.cpp index 0df0a1b..65c9f96 100644 --- a/polybench/mvt.cpp +++ b/polybench/mvt.cpp @@ -75,9 +75,11 @@ class Polybench_Mvt { cgh.parallel_for(x1_buffer.get_range(), [=, N_ = size](item<1> item) { const auto i = item[0]; + DATA_TYPE x1_reduction = x1[i]; for(size_t j = 0; j < N_; j++) { - x1[i] += a[{i, j}] * y1[j]; + x1_reduction += a[{i, j}] * y1[j]; } + x1[i] = x1_reduction; }); })); @@ -89,9 +91,11 @@ class Polybench_Mvt { cgh.parallel_for(x1_buffer.get_range(), [=, N_ = size](item<1> item) { const auto k = item[0]; + DATA_TYPE x2_reduction = x2[k]; for(size_t l = 0; l < N_; l++) { - x2[k] += a[{k, l}] * y2[l]; + x2_reduction += a[{k, l}] * y2[l]; } + x2[k] = x2_reduction; }); })); } diff --git a/polybench/syr2k.cpp b/polybench/syr2k.cpp index 14ae3d9..e115996 100644 --- a/polybench/syr2k.cpp +++ b/polybench/syr2k.cpp @@ -81,9 +81,11 @@ class Polybench_Syr2k { C[item] *= BETA; + DATA_TYPE C_reduction = C[item]; for(size_t k = 0; k < M_; k++) { - C[item] += ALPHA * A[{i, k}] * B[{j, k}] + ALPHA * B[{i, k}] * A[{j, k}]; + C_reduction += ALPHA * A[{i, k}] * B[{j, k}] + ALPHA * B[{i, k}] * A[{j, k}]; } + C[item] = C_reduction; }); })); } diff --git a/polybench/syrk.cpp b/polybench/syrk.cpp index 22549cd..7dddd7d 100644 --- a/polybench/syrk.cpp +++ b/polybench/syrk.cpp @@ -77,9 +77,11 @@ class Polybench_Syrk { C[item] *= beta; + DATA_TYPE C_reduction = C[item]; for(size_t k = 0; k < M_; k++) { - C[item] += alpha * A[{i, k}] * A[{j, k}]; + C_reduction += alpha * A[{i, k}] * A[{j, k}]; } + C[item] = C_reduction; }); })); }