From 8c137ca81fb90d932ea3dca2bc4ac3c48862b535 Mon Sep 17 00:00:00 2001 From: RevathiJambunathan Date: Mon, 22 Apr 2024 12:13:03 -0700 Subject: [PATCH] parallel for outside for cuda --- Source/WarpX.H | 4 ++ Source/WarpX.cpp | 150 ++++++++++++++++++++++++++++------------------- 2 files changed, 95 insertions(+), 59 deletions(-) diff --git a/Source/WarpX.H b/Source/WarpX.H index 2ff7ce3fe74..1c99b47562e 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -1127,6 +1127,10 @@ public: // for cuda void BuildBufferMasksInBox ( amrex::Box tbx, amrex::IArrayBox &buffer_mask, const amrex::IArrayBox &guard_mask, int ng ); + + void SetWeightsInGatherBuffer(const amrex::Box tbx, amrex::Array4 wtmsk, + const amrex::Array4 gmsk, const amrex::Array4 bmsk, + const int ngbuffer, const bool do_interpolate,amrex::Real tanh_midpoint); #ifdef AMREX_USE_EB amrex::EBFArrayBoxFactory const& fieldEBFactory (int lev) const noexcept { return static_cast(*m_field_factory[lev]); diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 09d15e48c5d..9f1bfcf36be 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -3149,71 +3149,103 @@ WarpX::BuildBufferMasks () auto const& gmsk = tmp[mfi].const_array(); auto const& bmsk = (*bmasks)[mfi].array(); auto const& wtmsk = (*weight_gbuffer)[mfi].array(); - amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { - wtmsk(i,j,k) = 0._rt; - if (bmsk(i,j,k) == 0 && do_interpolate) { - if(gmsk(i,j,k)==0) { - wtmsk(i,j,k) = 0.; - return; - } - for (int ii = i-1; ii >= i-ngbuffer; --ii) { - if (gmsk(ii,j,k)==0) { - amrex::Real arg = (static_cast(i-ii)-ngbuffer*tanh_midpoint) - / ((1.-tanh_midpoint)*(ngbuffer/3.)); - wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5; - amrex::Print() << " i edge wt is " << wtmsk(i,j,k) << "\n"; - return; - } - } - for (int ii = i+1; ii <= i+ngbuffer; ++ii) { - if (gmsk(ii,j,k)==0) { - amrex::Real arg = (static_cast(ii-i)-ngbuffer*tanh_midpoint) - / ((1.-tanh_midpoint)*(ngbuffer/3.)); - wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5; - amrex::Print() << " wt is " << wtmsk(i,j,k) << "\n"; - return; - } - } - //for (int jj = j-1; jj >= j-ngbuffer; --jj) { - // if (gmsk(i,jj,k)==0) { - // amrex::Real arg = (static_cast(j-jj)-ngbuffer*tanh_midpoint) - // / ((1.-tanh_midpoint)*(ngbuffer/3.)); - // wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5; - // return; - // } - //} - //for (int jj = j+1; jj <= j+ngbuffer; ++jj) { - // if (gmsk(i,jj,k)==0) { - // amrex::Real arg = (static_cast(jj - j)-ngbuffer*tanh_midpoint) - // / ((1.-tanh_midpoint)*(ngbuffer/3.)); - // wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5; - // return; - // } - //} - //for (int kk = k-1; kk >= k-ngbuffer; --kk) { - // if (gmsk(i,j,kk)==0) { - // amrex::Real arg = (static_cast(k-kk)-ngbuffer*tanh_midpoint) - // / ((1.-tanh_midpoint)*(ngbuffer/3.)); - // wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5; - // return; - // } - //} - //for (int kk = k+1; kk <= k+ngbuffer; ++kk) { - // if (gmsk(i,j,kk)==0) { - // amrex::Real arg = (static_cast(kk-k)-ngbuffer*tanh_midpoint) - // / ((1.-tanh_midpoint)*(ngbuffer/3.)); - // wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5; - // return; - // } - //} - } - }); + SetWeightsInGatherBuffer(tbx, wtmsk, gmsk, bmsk, ngbuffer, do_interpolate, tanh_midpoint); + //amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + // wtmsk(i,j,k) = 0._rt; + // if (bmsk(i,j,k) == 0 && do_interpolate) { + // if(gmsk(i,j,k)==0) { + // wtmsk(i,j,k) = 0.; + // return; + // } + // for (int ii = i-1; ii >= i-ngbuffer; --ii) { + // if (gmsk(ii,j,k)==0) { + // amrex::Real arg = (static_cast(i-ii)-ngbuffer*tanh_midpoint) + // / ((1.-tanh_midpoint)*(ngbuffer/3.)); + // wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5; + // amrex::Print() << " i edge wt is " << wtmsk(i,j,k) << "\n"; + // return; + // } + // } + // for (int ii = i+1; ii <= i+ngbuffer; ++ii) { + // if (gmsk(ii,j,k)==0) { + // amrex::Real arg = (static_cast(ii-i)-ngbuffer*tanh_midpoint) + // / ((1.-tanh_midpoint)*(ngbuffer/3.)); + // wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5; + // amrex::Print() << " wt is " << wtmsk(i,j,k) << "\n"; + // return; + // } + // } + // //for (int jj = j-1; jj >= j-ngbuffer; --jj) { + // // if (gmsk(i,jj,k)==0) { + // // amrex::Real arg = (static_cast(j-jj)-ngbuffer*tanh_midpoint) + // // / ((1.-tanh_midpoint)*(ngbuffer/3.)); + // // wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5; + // // return; + // // } + // //} + // //for (int jj = j+1; jj <= j+ngbuffer; ++jj) { + // // if (gmsk(i,jj,k)==0) { + // // amrex::Real arg = (static_cast(jj - j)-ngbuffer*tanh_midpoint) + // // / ((1.-tanh_midpoint)*(ngbuffer/3.)); + // // wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5; + // // return; + // // } + // //} + // //for (int kk = k-1; kk >= k-ngbuffer; --kk) { + // // if (gmsk(i,j,kk)==0) { + // // amrex::Real arg = (static_cast(k-kk)-ngbuffer*tanh_midpoint) + // // / ((1.-tanh_midpoint)*(ngbuffer/3.)); + // // wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5; + // // return; + // // } + // //} + // //for (int kk = k+1; kk <= k+ngbuffer; ++kk) { + // // if (gmsk(i,j,kk)==0) { + // // amrex::Real arg = (static_cast(kk-k)-ngbuffer*tanh_midpoint) + // // / ((1.-tanh_midpoint)*(ngbuffer/3.)); + // // wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5; + // // return; + // // } + // //} + // } + //}); } } } } } +void +WarpX::SetWeightsInGatherBuffer(const amrex::Box tbx, amrex::Array4 wtmsk, + const amrex::Array4 gmsk, const amrex::Array4 bmsk, const int ngbuffer, const bool do_interpolate, amrex::Real tanh_midpoint) +{ + amrex::ParallelFor(tbx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + wtmsk(i,j,k) = 0._rt; + if (bmsk(i,j,k) == 0 && do_interpolate) { + if(gmsk(i,j,k)==0) { + wtmsk(i,j,k) = 0.; + return; + } + for (int ii = i-1; ii >= i-ngbuffer; --ii) { + if (gmsk(ii,j,k)==0) { + amrex::Real arg = (static_cast(i-ii)-ngbuffer*tanh_midpoint) + / ((1.-tanh_midpoint)*(ngbuffer/3.)); + wtmsk(i,j,k) = std::tanh(arg)*0.5 + 0.5; + return; + } + } + for (int ii = i+1; ii <= i+ngbuffer; ++ii) { + if (gmsk(ii,j,k)==0) { + amrex::Real arg = (static_cast(ii-i)-ngbuffer*tanh_midpoint) + / ((1.-tanh_midpoint)*(ngbuffer/3.)); + wtmsk(i,j,k) = std::tanh(arg)*0.5+0.5; + return; + } + } + } + }); +} + /** * \brief Build buffer mask within given FArrayBox *