Skip to content

Commit

Permalink
Convert correct_dterm to C++ (#2620)
Browse files Browse the repository at this point in the history
This uses inline metric calculation to support GPU offloading. Note that the vector s was unused and was previously only in the code to enable the call to sphe to fill in re for spherical geometries, which is now handled through edge_center_metric if we're in a spherical geometry. We also do a loop over dimensions to make the code a little more understandable.
  • Loading branch information
maxpkatz authored Oct 10, 2023
1 parent 19e6805 commit 22d6158
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 126 deletions.
18 changes: 0 additions & 18 deletions Source/radiation/CastroRad_1d.f90

This file was deleted.

30 changes: 0 additions & 30 deletions Source/radiation/CastroRad_2d.f90

This file was deleted.

18 changes: 0 additions & 18 deletions Source/radiation/CastroRad_3d.f90

This file was deleted.

1 change: 0 additions & 1 deletion Source/radiation/Make.package
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ ca_F90EXE_sources += RAD_$(DIM)D.F90
ca_F90EXE_sources += HABEC_$(DIM)D.F90

CEXE_sources += trace_ppm_rad.cpp
ca_f90EXE_sources += CastroRad_$(DIM)d.f90

ca_F90EXE_sources += rad_params.F90
ca_F90EXE_sources += Rad_nd.F90
Expand Down
11 changes: 0 additions & 11 deletions Source/radiation/RAD_F.H
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,6 @@ BL_FORT_PROC_DECL(CA_INITGROUPS3,ca_initgroups3)
BL_FORT_PROC_DECL(CA_COMPUTE_KAPKAP, ca_compute_kapkap)
(BL_FORT_FAB_ARG(kapkap), const BL_FORT_FAB_ARG(kap_r));

#ifdef __cplusplus
extern "C" {
#endif
void ca_correct_dterm
(D_DECL(BL_FORT_FAB_ARG(dfx),
BL_FORT_FAB_ARG(dfy),
BL_FORT_FAB_ARG(dfz)),
const amrex::Real* re, const amrex::Real* rc);
#ifdef __cplusplus
}
#endif

#ifdef __cplusplus
extern "C" {
Expand Down
86 changes: 38 additions & 48 deletions Source/radiation/RadSolve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,7 @@ void RadSolve::levelDterm(int level, MultiFab& Dterm, MultiFab& Er, int igroup)
const BoxArray& grids = parent->boxArray(level);
const DistributionMapping& dmap = parent->DistributionMap(level);
const Geometry& geom = parent->Geom(level);
const GeometryData& geomdata = geom.data();
auto dx = parent->Geom(level).CellSizeArray();
const Castro *castro = dynamic_cast<Castro*>(&parent->getLevel(level));

Expand Down Expand Up @@ -863,75 +864,64 @@ void RadSolve::levelDterm(int level, MultiFab& Dterm, MultiFab& Er, int igroup)
// Correct D terms at physical and coarse-fine boundaries.
hem->boundaryDterm(level, &Dterm_face[0], Er, igroup);

// Correct for metric terms (only has an effect in non-Cartesian geometries).
for (int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
#ifdef _OPENMP
#pragma omp parallel
#endif
{
Vector<Real> rc, re, s;

if (geom.IsSPHERICAL() || geom.IsRZ()) {
for (MFIter fi(Dterm_face[0]); fi.isValid(); ++fi) { // omp over boxes
int i = fi.index();
const Box &reg = grids[i];
for (MFIter mfi(Dterm_face[dir], true); mfi.isValid(); ++mfi) {
const Box& box = mfi.tilebox();
Array4<Real> const d = Dterm_face[dir][mfi].array();

parent->Geom(level).GetEdgeLoc(re, reg, 0);
parent->Geom(level).GetCellLoc(rc, reg, 0);

if (geom.IsSPHERICAL()) {
parent->Geom(level).GetCellLoc(s, reg, 0);
amrex::ParallelFor(box,
[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
{
if (dir == 0 && (geomdata.Coord() == CoordSys::SPHERICAL || geomdata.Coord() == CoordSys::RZ)) {
Real r, s;
edge_center_metric(i, j, k, dir, geomdata, r, s);

const Box &dbox = Dterm_face[0][fi].box();
d(i,j,k) = d(i,j,k) / (r + 1.0e-50_rt);
}
else if (dir == 1 && geomdata.Coord() == CoordSys::RZ) {
Real r, s;
cell_center_metric(i, j, k, geomdata, r, s);

for (int i = dbox.loVect()[0]; i <= dbox.hiVect()[0]; ++i) {
re[i] *= re[i];
}
#if AMREX_SPACEDIM >= 2
Real h2 = 0.5e0_rt * dx[1];
Real d2 = 1.e0_rt / dx[1];
for (int j = dbox.loVect()[1]; j <= dbox.hiVect()[1]; ++j) {
s[j] = d2 * (std::cos(s[j] - h2) - std::cos(s[j] + h2));
}
#endif
d(i,j,k) = d(i,j,k) / r;
}
});
}
}

ca_correct_dterm(D_DECL(BL_TO_FORTRAN(Dterm_face[0][fi]),
BL_TO_FORTRAN(Dterm_face[1][fi]),
BL_TO_FORTRAN(Dterm_face[2][fi])),
re.dataPtr(), rc.dataPtr());
}
#ifdef _OPENMP
#pragma omp barrier
#pragma omp parallel
#endif
}

for (MFIter fi(Dterm,true); fi.isValid(); ++fi) {
const Box& bx = fi.tilebox();
for (MFIter fi(Dterm,true); fi.isValid(); ++fi) {
const Box& bx = fi.tilebox();

auto Dx = Dterm_face[0][fi].array();
auto Dx = Dterm_face[0][fi].array();
#if AMREX_SPACEDIM >= 2
auto Dy = Dterm_face[1][fi].array();
auto Dy = Dterm_face[1][fi].array();
#endif
#if AMREX_SPACEDIM == 3
auto Dz = Dterm_face[2][fi].array();
auto Dz = Dterm_face[2][fi].array();
#endif

auto D = Dterm[fi].array();
auto D = Dterm[fi].array();

amrex::ParallelFor(bx,
[=] AMREX_GPU_HOST_DEVICE (int i, int j, int k)
{
amrex::ParallelFor(bx,
[=] AMREX_GPU_HOST_DEVICE (int i, int j, int k)
{
#if AMREX_SPACEDIM == 1
D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k)) * 0.5_rt;
D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k)) * 0.5_rt;
#elif AMREX_SPACEDIM == 2
D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k) +
Dy(i,j,k) + Dy(i,j+1,k)) * 0.25_rt;
D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k) +
Dy(i,j,k) + Dy(i,j+1,k)) * 0.25_rt;
#else
D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k) +
Dy(i,j,k) + Dy(i,j+1,k) +
Dz(i,j,k) + Dz(i,j,k+1)) * (1.0_rt / 6.0_rt);
D(i,j,k) = (Dx(i,j,k) + Dx(i+1,j,k) +
Dy(i,j,k) + Dy(i,j+1,k) +
Dz(i,j,k) + Dz(i,j,k+1)) * (1.0_rt / 6.0_rt);
#endif
});
}
});
}
}

Expand Down

0 comments on commit 22d6158

Please sign in to comment.