Convert correct_dterm to C++ (#2620)

This uses inline metric calculation to support GPU offloading. Note that the vector s was unused and was previously only in the code to enable the call to sphe to fill in re for spherical geometries, which is now handled through edge_center_metric if we're in a spherical geometry. We also do a loop over dimensions to make the code a little more understandable.
AMReX-Astro · Oct 10, 2023 · 22d6158 · 22d6158
1 parent 19e6805
commit 22d6158
Show file tree

Hide file tree

Showing 6 changed files with 38 additions and 126 deletions.
diff --git a/Source/radiation/CastroRad_1d.f90 b/Source/radiation/CastroRad_1d.f90
diff --git a/Source/radiation/CastroRad_2d.f90 b/Source/radiation/CastroRad_2d.f90
diff --git a/Source/radiation/CastroRad_3d.f90 b/Source/radiation/CastroRad_3d.f90
diff --git a/Source/radiation/Make.package b/Source/radiation/Make.package
@@ -36,7 +36,6 @@ ca_F90EXE_sources += RAD_$(DIM)D.F90
 ca_F90EXE_sources += HABEC_$(DIM)D.F90
 
 CEXE_sources += trace_ppm_rad.cpp
-ca_f90EXE_sources += CastroRad_$(DIM)d.f90
 
 ca_F90EXE_sources += rad_params.F90
 ca_F90EXE_sources += Rad_nd.F90

diff --git a/Source/radiation/RAD_F.H b/Source/radiation/RAD_F.H
@@ -53,17 +53,6 @@ BL_FORT_PROC_DECL(CA_INITGROUPS3,ca_initgroups3)
 BL_FORT_PROC_DECL(CA_COMPUTE_KAPKAP, ca_compute_kapkap)
      (BL_FORT_FAB_ARG(kapkap), const BL_FORT_FAB_ARG(kap_r)); 
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-  void ca_correct_dterm
-    (D_DECL(BL_FORT_FAB_ARG(dfx),
-            BL_FORT_FAB_ARG(dfy),
-            BL_FORT_FAB_ARG(dfz)),
-     const amrex::Real* re, const amrex::Real* rc);
-#ifdef __cplusplus
-}
-#endif
 
 #ifdef __cplusplus
 extern "C" {

diff --git a/Source/radiation/RadSolve.cpp b/Source/radiation/RadSolve.cpp
@@ -813,6 +813,7 @@ void RadSolve::levelDterm(int level, MultiFab& Dterm, MultiFab& Er, int igroup)
   const BoxArray& grids = parent->boxArray(level);
   const DistributionMapping& dmap = parent->DistributionMap(level);
   const Geometry& geom = parent->Geom(level);
+  const GeometryData& geomdata = geom.data();
   auto dx = parent->Geom(level).CellSizeArray();
   const Castro *castro = dynamic_cast<Castro*>(&parent->getLevel(level));
 
@@ -863,75 +864,64 @@ void RadSolve::levelDterm(int level, MultiFab& Dterm, MultiFab& Er, int igroup)
   // Correct D terms at physical and coarse-fine boundaries.
   hem->boundaryDterm(level, &Dterm_face[0], Er, igroup);
 
+  // Correct for metric terms (only has an effect in non-Cartesian geometries).
+  for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
 #ifdef _OPENMP
 #pragma omp parallel
 #endif
-  {
-      Vector<Real> rc, re, s;
-
-      if (geom.IsSPHERICAL() || geom.IsRZ()) {
-          for (MFIter fi(Dterm_face[0]); fi.isValid(); ++fi) {  // omp over boxes
-              int i = fi.index();
-              const Box &reg = grids[i];
+      for (MFIter mfi(Dterm_face[dir], true); mfi.isValid(); ++mfi) {
+          const Box& box = mfi.tilebox();
+          Array4<Real> const d = Dterm_face[dir][mfi].array();
 
-              parent->Geom(level).GetEdgeLoc(re, reg, 0);
-              parent->Geom(level).GetCellLoc(rc, reg, 0);
-
-              if (geom.IsSPHERICAL()) {
-                  parent->Geom(level).GetCellLoc(s, reg, 0);
+          amrex::ParallelFor(box,
+          [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+          {
+              if (dir == 0 && (geomdata.Coord() == CoordSys::SPHERICAL || geomdata.Coord() == CoordSys::RZ)) {
+                  Real r, s;
+                  edge_center_metric(i, j, k, dir, geomdata, r, s);
 
-                  const Box &dbox = Dterm_face[0][fi].box();
+                  d(i,j,k) = d(i,j,k) / (r + 1.0e-50_rt);
+              }
+              else if (dir == 1 && geomdata.Coord() == CoordSys::RZ) {
+                  Real r, s;
+                  cell_center_metric(i, j, k, geomdata, r, s);
 
-                  for (int i = dbox.loVect()[0]; i <= dbox.hiVect()[0]; ++i) {
-                      re[i] *= re[i];
-                  }
-#if AMREX_SPACEDIM >= 2
-                  Real h2 = 0.5e0_rt * dx[1];
-                  Real d2 = 1.e0_rt / dx[1];
-                  for (int j = dbox.loVect()[1]; j <= dbox.hiVect()[1]; ++j) {
-                      s[j] = d2 * (std::cos(s[j] - h2) - std::cos(s[j] + h2));
-                  }
-#endif
+                  d(i,j,k) = d(i,j,k) / r;
               }
+          });
+      }
+  }
 
-              ca_correct_dterm(D_DECL(BL_TO_FORTRAN(Dterm_face[0][fi]),
-                                      BL_TO_FORTRAN(Dterm_face[1][fi]),
-                                      BL_TO_FORTRAN(Dterm_face[2][fi])),
-                               re.dataPtr(), rc.dataPtr());
-          }
 #ifdef _OPENMP
-#pragma omp barrier
+#pragma omp parallel
 #endif
-      }
-
-      for (MFIter fi(Dterm,true); fi.isValid(); ++fi) {
-          const Box& bx = fi.tilebox();
+  for (MFIter fi(Dterm,true); fi.isValid(); ++fi) {
+      const Box& bx = fi.tilebox();
 
-          auto Dx = Dterm_face[0][fi].array();
+      auto Dx = Dterm_face[0][fi].array();
 #if AMREX_SPACEDIM >= 2
-          auto Dy = Dterm_face[1][fi].array();
+      auto Dy = Dterm_face[1][fi].array();
 #endif
 #if AMREX_SPACEDIM == 3
-          auto Dz = Dterm_face[2][fi].array();
+      auto Dz = Dterm_face[2][fi].array();
 #endif
 
-          auto D = Dterm[fi].array();
+      auto D = Dterm[fi].array();
 
-          amrex::ParallelFor(bx,
-          [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k)
-          {
+      amrex::ParallelFor(bx,
+      [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k)
+      {
 #if AMREX_SPACEDIM == 1
-              D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k)) * 0.5_rt;
+          D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k)) * 0.5_rt;
 #elif AMREX_SPACEDIM == 2
-              D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k) +
-                          Dy(i,j,k) + Dy(i,j+1,k)) * 0.25_rt;
+          D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k) +
+                      Dy(i,j,k) + Dy(i,j+1,k)) * 0.25_rt;
 #else
-              D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k) +
-                          Dy(i,j,k) + Dy(i,j+1,k) +
-                          Dz(i,j,k) + Dz(i,j,k+1)) * (1.0_rt / 6.0_rt);
+          D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k) +
+                      Dy(i,j,k) + Dy(i,j+1,k) +
+                      Dz(i,j,k) + Dz(i,j,k+1)) * (1.0_rt / 6.0_rt);
 #endif
-          });
-      }
+      });
   }
 }