From 22d6158e94282a740459ba4c0829e54d1de759da Mon Sep 17 00:00:00 2001 From: Max Katz Date: Tue, 10 Oct 2023 09:20:39 -0400 Subject: [PATCH] Convert correct_dterm to C++ (#2620) This uses inline metric calculation to support GPU offloading. Note that the vector s was unused and was previously only in the code to enable the call to sphe to fill in re for spherical geometries, which is now handled through edge_center_metric if we're in a spherical geometry. We also do a loop over dimensions to make the code a little more understandable. --- Source/radiation/CastroRad_1d.f90 | 18 ------- Source/radiation/CastroRad_2d.f90 | 30 ----------- Source/radiation/CastroRad_3d.f90 | 18 ------- Source/radiation/Make.package | 1 - Source/radiation/RAD_F.H | 11 ---- Source/radiation/RadSolve.cpp | 86 ++++++++++++++----------------- 6 files changed, 38 insertions(+), 126 deletions(-) delete mode 100644 Source/radiation/CastroRad_1d.f90 delete mode 100644 Source/radiation/CastroRad_2d.f90 delete mode 100644 Source/radiation/CastroRad_3d.f90 diff --git a/Source/radiation/CastroRad_1d.f90 b/Source/radiation/CastroRad_1d.f90 deleted file mode 100644 index 94732d9c25..0000000000 --- a/Source/radiation/CastroRad_1d.f90 +++ /dev/null @@ -1,18 +0,0 @@ -! no tiling -subroutine ca_correct_dterm(dfx, dfx_l1, dfx_h1, & - re, rc) bind(C, name="ca_correct_dterm") - - use amrex_fort_module, only : rt => amrex_real - implicit none - - integer, intent(in) :: dfx_l1, dfx_h1 - real(rt) , intent(inout) :: dfx(dfx_l1:dfx_h1) - real(rt) , intent(in) :: re(dfx_l1:dfx_h1), rc(1) - - integer :: i - - do i=dfx_l1, dfx_h1 - dfx(i) = dfx(i) / (re(i) + 1.e-50_rt) - end do - -end subroutine ca_correct_dterm diff --git a/Source/radiation/CastroRad_2d.f90 b/Source/radiation/CastroRad_2d.f90 deleted file mode 100644 index 892aadac3b..0000000000 --- a/Source/radiation/CastroRad_2d.f90 +++ /dev/null @@ -1,30 +0,0 @@ -! no tiling -subroutine ca_correct_dterm( & - dfx, dfx_l1, dfx_l2, dfx_h1, dfx_h2, & - dfy, dfy_l1, dfy_l2, dfy_h1, dfy_h2, & - re, rc) bind(C, name="ca_correct_dterm") - - use amrex_fort_module, only : rt => amrex_real - implicit none - - integer, intent(in) :: dfx_l1, dfx_l2, dfx_h1, dfx_h2 - integer, intent(in) :: dfy_l1, dfy_l2, dfy_h1, dfy_h2 - real(rt) , intent(inout) :: dfx(dfx_l1:dfx_h1,dfx_l2:dfx_h2) - real(rt) , intent(inout) :: dfy(dfy_l1:dfy_h1,dfy_l2:dfy_h2) - real(rt) , intent(in) :: re(dfx_l1:dfx_h1), rc(dfy_l1:dfy_h1) - - integer :: i, j - - do j=dfx_l2, dfx_h2 - do i=dfx_l1, dfx_h1 - dfx(i,j) = dfx(i,j) / (re(i) + 1.e-50_rt) - end do - end do - - do j=dfy_l2, dfy_h2 - do i=dfy_l1, dfy_h1 - dfy(i,j) = dfy(i,j) / rc(i) - end do - end do - -end subroutine ca_correct_dterm diff --git a/Source/radiation/CastroRad_3d.f90 b/Source/radiation/CastroRad_3d.f90 deleted file mode 100644 index c21d2945ac..0000000000 --- a/Source/radiation/CastroRad_3d.f90 +++ /dev/null @@ -1,18 +0,0 @@ -subroutine ca_correct_dterm( & - dfx, dfx_l1, dfx_l2, dfx_l3, dfx_h1, dfx_h2, dfx_h3, & - dfy, dfy_l1, dfy_l2, dfy_l3, dfy_h1, dfy_h2, dfy_h3, & - dfz, dfz_l1, dfz_l2, dfz_l3, dfz_h1, dfz_h2, dfz_h3, & - re, rc) bind(C, name="ca_correct_dterm") - - use amrex_fort_module, only : rt => amrex_real - implicit none - - integer, intent(in) :: dfx_l1, dfx_l2, dfx_l3, dfx_h1, dfx_h2, dfx_h3 - integer, intent(in) :: dfy_l1, dfy_l2, dfy_l3, dfy_h1, dfy_h2, dfy_h3 - integer, intent(in) :: dfz_l1, dfz_l2, dfz_l3, dfz_h1, dfz_h2, dfz_h3 - real(rt) , intent(inout) :: dfx(dfx_l1:dfx_h1,dfx_l2:dfx_h2,dfx_l3:dfx_h3) - real(rt) , intent(inout) :: dfy(dfy_l1:dfy_h1,dfy_l2:dfy_h2,dfy_l3:dfy_h3) - real(rt) , intent(inout) :: dfz(dfz_l1:dfz_h1,dfz_l2:dfz_h2,dfz_l3:dfz_h3) - real(rt) , intent(in) :: re(1), rc(1) - -end subroutine ca_correct_dterm diff --git a/Source/radiation/Make.package b/Source/radiation/Make.package index 977a82fee5..1def860164 100644 --- a/Source/radiation/Make.package +++ b/Source/radiation/Make.package @@ -36,7 +36,6 @@ ca_F90EXE_sources += RAD_$(DIM)D.F90 ca_F90EXE_sources += HABEC_$(DIM)D.F90 CEXE_sources += trace_ppm_rad.cpp -ca_f90EXE_sources += CastroRad_$(DIM)d.f90 ca_F90EXE_sources += rad_params.F90 ca_F90EXE_sources += Rad_nd.F90 diff --git a/Source/radiation/RAD_F.H b/Source/radiation/RAD_F.H index 079dedba62..3b6e374ba0 100644 --- a/Source/radiation/RAD_F.H +++ b/Source/radiation/RAD_F.H @@ -53,17 +53,6 @@ BL_FORT_PROC_DECL(CA_INITGROUPS3,ca_initgroups3) BL_FORT_PROC_DECL(CA_COMPUTE_KAPKAP, ca_compute_kapkap) (BL_FORT_FAB_ARG(kapkap), const BL_FORT_FAB_ARG(kap_r)); -#ifdef __cplusplus -extern "C" { -#endif - void ca_correct_dterm - (D_DECL(BL_FORT_FAB_ARG(dfx), - BL_FORT_FAB_ARG(dfy), - BL_FORT_FAB_ARG(dfz)), - const amrex::Real* re, const amrex::Real* rc); -#ifdef __cplusplus -} -#endif #ifdef __cplusplus extern "C" { diff --git a/Source/radiation/RadSolve.cpp b/Source/radiation/RadSolve.cpp index e671cf66f1..12c8fd7b5f 100644 --- a/Source/radiation/RadSolve.cpp +++ b/Source/radiation/RadSolve.cpp @@ -813,6 +813,7 @@ void RadSolve::levelDterm(int level, MultiFab& Dterm, MultiFab& Er, int igroup) const BoxArray& grids = parent->boxArray(level); const DistributionMapping& dmap = parent->DistributionMap(level); const Geometry& geom = parent->Geom(level); + const GeometryData& geomdata = geom.data(); auto dx = parent->Geom(level).CellSizeArray(); const Castro *castro = dynamic_cast(&parent->getLevel(level)); @@ -863,75 +864,64 @@ void RadSolve::levelDterm(int level, MultiFab& Dterm, MultiFab& Er, int igroup) // Correct D terms at physical and coarse-fine boundaries. hem->boundaryDterm(level, &Dterm_face[0], Er, igroup); + // Correct for metric terms (only has an effect in non-Cartesian geometries). + for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) { #ifdef _OPENMP #pragma omp parallel #endif - { - Vector rc, re, s; - - if (geom.IsSPHERICAL() || geom.IsRZ()) { - for (MFIter fi(Dterm_face[0]); fi.isValid(); ++fi) { // omp over boxes - int i = fi.index(); - const Box ® = grids[i]; + for (MFIter mfi(Dterm_face[dir], true); mfi.isValid(); ++mfi) { + const Box& box = mfi.tilebox(); + Array4 const d = Dterm_face[dir][mfi].array(); - parent->Geom(level).GetEdgeLoc(re, reg, 0); - parent->Geom(level).GetCellLoc(rc, reg, 0); - - if (geom.IsSPHERICAL()) { - parent->Geom(level).GetCellLoc(s, reg, 0); + amrex::ParallelFor(box, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (dir == 0 && (geomdata.Coord() == CoordSys::SPHERICAL || geomdata.Coord() == CoordSys::RZ)) { + Real r, s; + edge_center_metric(i, j, k, dir, geomdata, r, s); - const Box &dbox = Dterm_face[0][fi].box(); + d(i,j,k) = d(i,j,k) / (r + 1.0e-50_rt); + } + else if (dir == 1 && geomdata.Coord() == CoordSys::RZ) { + Real r, s; + cell_center_metric(i, j, k, geomdata, r, s); - for (int i = dbox.loVect()[0]; i <= dbox.hiVect()[0]; ++i) { - re[i] *= re[i]; - } -#if AMREX_SPACEDIM >= 2 - Real h2 = 0.5e0_rt * dx[1]; - Real d2 = 1.e0_rt / dx[1]; - for (int j = dbox.loVect()[1]; j <= dbox.hiVect()[1]; ++j) { - s[j] = d2 * (std::cos(s[j] - h2) - std::cos(s[j] + h2)); - } -#endif + d(i,j,k) = d(i,j,k) / r; } + }); + } + } - ca_correct_dterm(D_DECL(BL_TO_FORTRAN(Dterm_face[0][fi]), - BL_TO_FORTRAN(Dterm_face[1][fi]), - BL_TO_FORTRAN(Dterm_face[2][fi])), - re.dataPtr(), rc.dataPtr()); - } #ifdef _OPENMP -#pragma omp barrier +#pragma omp parallel #endif - } - - for (MFIter fi(Dterm,true); fi.isValid(); ++fi) { - const Box& bx = fi.tilebox(); + for (MFIter fi(Dterm,true); fi.isValid(); ++fi) { + const Box& bx = fi.tilebox(); - auto Dx = Dterm_face[0][fi].array(); + auto Dx = Dterm_face[0][fi].array(); #if AMREX_SPACEDIM >= 2 - auto Dy = Dterm_face[1][fi].array(); + auto Dy = Dterm_face[1][fi].array(); #endif #if AMREX_SPACEDIM == 3 - auto Dz = Dterm_face[2][fi].array(); + auto Dz = Dterm_face[2][fi].array(); #endif - auto D = Dterm[fi].array(); + auto D = Dterm[fi].array(); - amrex::ParallelFor(bx, - [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) - { + amrex::ParallelFor(bx, + [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) + { #if AMREX_SPACEDIM == 1 - D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k)) * 0.5_rt; + D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k)) * 0.5_rt; #elif AMREX_SPACEDIM == 2 - D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k) + - Dy(i,j,k) + Dy(i,j+1,k)) * 0.25_rt; + D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k) + + Dy(i,j,k) + Dy(i,j+1,k)) * 0.25_rt; #else - D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k) + - Dy(i,j,k) + Dy(i,j+1,k) + - Dz(i,j,k) + Dz(i,j,k+1)) * (1.0_rt / 6.0_rt); + D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k) + + Dy(i,j,k) + Dy(i,j+1,k) + + Dz(i,j,k) + Dz(i,j,k+1)) * (1.0_rt / 6.0_rt); #endif - }); - } + }); } }