diff --git a/Exec/science/flame/Prob.cpp b/Exec/science/flame/Prob.cpp index 3036518537..5c538a4ee3 100644 --- a/Exec/science/flame/Prob.cpp +++ b/Exec/science/flame/Prob.cpp @@ -63,25 +63,20 @@ Castro::flame_speed_properties (Real time, Real& rho_fuel_dot) BL_PROFILE("Castro::flame_speed_properties()"); const auto dx = geom.CellSizeArray(); - std::vector spec_names; - for (int i = 0; i < NumSpec; i++) { - spec_names.push_back(short_spec_names_cxx[i]); - } - std::string name; + std::string name; - for (auto nm : spec_names) { - if (nm == "He4") { - name = "rho_omegadot_He4"; - break; - } + for (const auto & nm : short_spec_names_cxx) { + if (nm == "He4") { + name = "rho_omegadot_He4"; + break; + } - if (nm == "he4") { - name = "rho_omegadot_he4"; - break; + if (nm == "he4") { + name = "rho_omegadot_he4"; + break; + } } - - } auto mf = derive(name, time, 0); BL_ASSERT(mf != nullptr); diff --git a/Source/driver/Castro.cpp b/Source/driver/Castro.cpp index ad7a125c91..71fe277e71 100644 --- a/Source/driver/Castro.cpp +++ b/Source/driver/Castro.cpp @@ -3465,12 +3465,12 @@ Castro::errorEst (TagBoxArray& tags, // Apply each of the tagging criteria defined in the inputs. - for (int j = 0; j < error_tags.size(); j++) { + for (const auto & etag : error_tags) { std::unique_ptr mf; - if (!error_tags[j].Field().empty()) { - mf = derive(error_tags[j].Field(), time, error_tags[j].NGrow()); + if (! etag.Field().empty()) { + mf = derive(etag.Field(), time, etag.NGrow()); } - error_tags[j](tags, mf.get(), TagBox::CLEAR, TagBox::SET, time, level, geom); + etag(tags, mf.get(), TagBox::CLEAR, TagBox::SET, time, level, geom); } // Now we'll tag any user-specified zones using the full state array. @@ -4418,10 +4418,10 @@ Castro::build_interior_boundary_mask (int ng) { BL_PROFILE("Castro::build_interior_boundary_mask()"); - for (int i = 0; i < ib_mask.size(); ++i) + for (const auto & ibm : ib_mask) { - if (ib_mask[i]->nGrow() == ng) { - return *ib_mask[i]; + if (ibm->nGrow() == ng) { + return *ibm; } } diff --git a/Source/driver/Castro_advance_sdc.cpp b/Source/driver/Castro_advance_sdc.cpp index e0c9699a10..d350caff83 100644 --- a/Source/driver/Castro_advance_sdc.cpp +++ b/Source/driver/Castro_advance_sdc.cpp @@ -96,7 +96,6 @@ Castro::do_advance_sdc (Real time, #endif if (apply_sources()) { -#ifndef AMREX_USE_GPU if (sdc_order == 4) { // if we are 4th order, convert to cell-center Sborder -> Sborder_cc // we'll use Sburn for this memory buffer at the moment @@ -146,8 +145,6 @@ Castro::do_advance_sdc (Real time, if (sdc_order == 2 && use_pslope == 1) { AmrLevel::FillPatch(*this, old_source, old_source.nGrow(), prev_time, Source_Type, 0, NSRC); } -#endif - } // Now compute the advective term for the current node -- this diff --git a/Source/hydro/Castro_mol_hydro.cpp b/Source/hydro/Castro_mol_hydro.cpp index 79bff263e3..e726930dc5 100644 --- a/Source/hydro/Castro_mol_hydro.cpp +++ b/Source/hydro/Castro_mol_hydro.cpp @@ -38,12 +38,16 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) MultiFab& S_new = get_new_data(State_Type); +#if AMREX_SPACEDIM <= 2 int coord = geom.Coord(); +#endif BL_PROFILE_VAR("Castro::advance_hydro_ca_umdrv()", CA_UMDRV); +#if AMREX_SPACEDIM >= 2 GpuArray domain_lo = geom.Domain().loVect3d(); GpuArray domain_hi = geom.Domain().hiVect3d(); +#endif #ifdef HYBRID_MOMENTUM GeometryData geomdata = geom.data(); @@ -230,9 +234,11 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) Elixir elix_qavg = q_avg.elixir(); auto q_avg_arr = q_avg.array(); +#if AMREX_SPACEDIM >= 2 q_fc.resize(nbx, NQ); Elixir elix_qfc = q_fc.elixir(); auto q_fc_arr = q_fc.array(); +#endif f_avg.resize(ibx[idir], NUM_STATE); Elixir elix_favg = f_avg.elixir(); diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp index 18da975358..7297719366 100644 --- a/Source/sdc/Castro_sdc.cpp +++ b/Source/sdc/Castro_sdc.cpp @@ -410,6 +410,7 @@ Castro::construct_old_react_source(MultiFab& U_state, auto const U_state_arr = U_state.array(mfi); auto const R_source_arr = R_source.array(mfi); + // construct the reactive source term amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { diff --git a/Source/sdc/Castro_sdc_util.H b/Source/sdc/Castro_sdc_util.H index c765ad9a16..c08b289d78 100644 --- a/Source/sdc/Castro_sdc_util.H +++ b/Source/sdc/Castro_sdc_util.H @@ -125,8 +125,6 @@ sdc_update_o2(const int i, const int j, const int k, // Here, dt_m is the timestep between time-nodes m and m+1 - burn_t burn_state; - GpuArray U_old; GpuArray U_new; GpuArray R_full; @@ -163,9 +161,13 @@ sdc_update_o2(const int i, const int j, const int k, sdc_solve(dt_m, U_old, U_new, C_zone, sdc_iteration); - // we solved our system to some tolerance, but let's be sure we are conservative by - // reevaluating the reactions and { doing the full step update - single_zone_react_source(U_new, R_full, burn_state); + // we solved our system to some tolerance, but let's be sure + // we are conservative by reevaluating the reactions and + // doing the full step update + burn_t burn_state; + + copy_cons_to_burn_type(U_new, burn_state); + single_zone_react_source(burn_state, R_full); } for (int n = 0; n < NUM_STATE; ++n) { @@ -212,8 +214,7 @@ instantaneous_react(const int i, const int j, const int k, // (in that case, I am not sure if I can assume UTEMP is defined) if (okay_to_burn(i, j, k, state)) { - burn_t burn_state; - single_zone_react_source(i, j, k, state, R_source, burn_state); + single_zone_react_source(i, j, k, state, R_source); } else { for (int n = 0; n < NUM_STATE; ++n) { R_source(i, j, k, n) = 0.0_rt; diff --git a/Source/sdc/Make.package b/Source/sdc/Make.package index c135c88138..b942621345 100644 --- a/Source/sdc/Make.package +++ b/Source/sdc/Make.package @@ -1,4 +1,5 @@ CEXE_headers += Castro_sdc.H +CEXE_headers += sdc_const_to_burn.H CEXE_sources += sdc_util.cpp diff --git a/Source/sdc/sdc_cons_to_burn.H b/Source/sdc/sdc_cons_to_burn.H new file mode 100644 index 0000000000..d4ef43fd8b --- /dev/null +++ b/Source/sdc/sdc_cons_to_burn.H @@ -0,0 +1,59 @@ +#ifndef SDC_CONS_TO_BURN_H +#define SDC_CONS_TO_BURN_H + +#include + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void +copy_cons_to_burn_type(const int i, const int j, const int k, + Array4 const& state, + burn_t& burn_state) { + + burn_state.y[SRHO] = state(i,j,k,URHO); + burn_state.y[SMX] = state(i,j,k,UMX); + burn_state.y[SMY] = state(i,j,k,UMY); + burn_state.y[SMZ] = state(i,j,k,UMZ); + burn_state.y[SEDEN] = state(i,j,k,UEDEN); + burn_state.y[SEINT] = state(i,j,k,UEINT); + for (int n = 0; n < NumSpec; n++) { + burn_state.y[SFS+n] = state(i,j,k,UFS+n); + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; n++) { + burn_state.y[SFX+n] = state(i,j,k,UFX+n); + } +#endif + + burn_state.T = state(i,j,k,UTEMP); + burn_state.rho = state(i,j,k,URHO); + +} + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void +copy_cons_to_burn_type(GpuArray const& state, + burn_t& burn_state) { + + burn_state.y[SRHO] = state[URHO]; + burn_state.y[SMX] = state[UMX]; + burn_state.y[SMY] = state[UMY]; + burn_state.y[SMZ] = state[UMZ]; + burn_state.y[SEDEN] = state[UEDEN]; + burn_state.y[SEINT] = state[UEINT]; + for (int n = 0; n < NumSpec; n++) { + burn_state.y[SFS+n] = state[UFS+n]; + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; n++) { + burn_state.y[SFX+n] = state[UFX+n]; + } +#endif + + burn_state.T = state[UTEMP]; + burn_state.rho = state[URHO]; + +} + + + +#endif diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H index c76637c521..ffa0cac806 100644 --- a/Source/sdc/sdc_newton_solve.H +++ b/Source/sdc/sdc_newton_solve.H @@ -24,68 +24,63 @@ f_sdc_jac(const Real dt_m, JacNetArray2D& Jac, Array1D& f_source, const Real rho, - GpuArray& mom_source, - const Real T_old, - const Real E_var) { + const Real T_old) { // This is used with the Newton solve and returns f and the Jacobian - GpuArray U_full; GpuArray R_full; - // we are not solving the momentum equations - // create a full state -- we need this for some interfaces + // create a burn_t -- this will hold the full state, reconstructed + // from the current solution U - U_full[URHO] = rho; + burn_t burn_state; + + burn_state.y[SRHO] = rho; for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] = U(n+1); + burn_state.y[SFS+n] = U(n+1); } - U_full[UEINT] = U(NumSpec+1); - U_full[UEDEN] = E_var; + burn_state.y[SEINT] = U(NumSpec+1); + + // we are not solving or accessing the momentum or total energy, + // so we'll just set those to zero here for (int n = 0; n < 3; ++n) { - U_full[UMX+n] = mom_source[n]; + burn_state.y[SMX+n] = 0.0_rt; } + burn_state.y[SEDEN] = 0.0_rt; + // normalize the species auto sum_rhoX = 0.0_rt; for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] = amrex::max(network_rp::small_x, U_full[UFS+n]); - sum_rhoX += U_full[UFS+n]; + // TODO: this should have a rho weighting on small_x + burn_state.y[SFS+n] = amrex::max(network_rp::small_x, burn_state.y[SFS+n]); + sum_rhoX += burn_state.y[SFS+n]; } for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] *= U_full[URHO] / sum_rhoX; + burn_state.y[SFS+n] *= burn_state.y[SRHO] / sum_rhoX; } - // compute the temperature and species derivatives -- - // maybe this should be done using the burn_state - // returned by single_zone_react_source, since it is - // more consistent T from e + // compute the temperature -- this can be removed shortly, since + // we already do this in single_zone_react_source - eos_extra_t eos_state; - eos_state.rho = U_full[URHO]; - eos_state.T = T_old; // initial guess + burn_state.rho = burn_state.y[SRHO]; + burn_state.T = T_old; // initial guess for (int n = 0; n < NumSpec; ++n) { - eos_state.xn[n] = U_full[UFS+n] / U_full[URHO]; + burn_state.xn[n] = burn_state.y[SFS+n] / burn_state.y[SRHO]; } #if NAUX_NET > 0 amrex::Error("error: aux data not currently supported in true SDC"); #endif - eos_state.e = U_full[UEINT] / U_full[URHO]; + burn_state.e = burn_state.y[SEINT] / burn_state.y[SRHO]; - eos(eos_input_re, eos_state); + eos(eos_input_re, burn_state); - U_full[UTEMP] = eos_state.T; - - // we'll create a burn_state to pass stuff from the RHS to the Jac function - - burn_t burn_state; - - single_zone_react_source(U_full, R_full, burn_state); + single_zone_react_source(burn_state, R_full); // we are solving J dU = -f // where f is Eq. 36 evaluated with the current guess for U @@ -102,7 +97,7 @@ f_sdc_jac(const Real dt_m, // form from the simplified-SDC paper (Zingale et al. 2022). Note: // we are not including density anymore. - single_zone_jac(U_full, burn_state, dt_m, Jac); + single_zone_jac(burn_state, dt_m, Jac); // Our Jacobian has the form: J = I - dt dR/dw dwdU // (Eq. 38), so now we fix that @@ -144,7 +139,6 @@ sdc_newton_solve(const Real dt_m, Array1D U_react; Array1D f_source; - GpuArray mom_source; Array1D dU_react; Array1D f; RArray1D f_rhs; @@ -175,19 +169,10 @@ sdc_newton_solve(const Real dt_m, } f_source(NumSpec+1) = U_old[UEINT] + dt_m * C[UEINT]; - // set the momenta to be U_new - for (int n = 0; n < 3; ++n) { - mom_source[n] = U_new[UMX+n]; - } - // temperature will be used as an initial guess in the EOS Real T_old = U_old[UTEMP]; - // we should be able to do an update for this somehow? - - Real E_var = U_new[UEDEN]; - // we need the density too Real rho_new = U_new[URHO]; @@ -212,7 +197,7 @@ sdc_newton_solve(const Real dt_m, while (!converged && iter < MAX_ITER) { int info = 0; - f_sdc_jac(dt_m, U_react, f, Jac, f_source, rho_new, mom_source, T_old, E_var); + f_sdc_jac(dt_m, U_react, f, Jac, f_source, rho_new, T_old); // solve the linear system: Jac dU_react = -f #ifdef NEW_NETWORK_IMPLEMENTATION diff --git a/Source/sdc/sdc_react_util.H b/Source/sdc/sdc_react_util.H index 5c06d58908..dffd1dd1f0 100644 --- a/Source/sdc/sdc_react_util.H +++ b/Source/sdc/sdc_react_util.H @@ -1,33 +1,36 @@ #ifndef SDC_REACT_UTIL_H #define SDC_REACT_UTIL_H +#include +#include + AMREX_GPU_HOST_DEVICE AMREX_INLINE void -single_zone_react_source(GpuArray const& state, - GpuArray& R, - burn_t& burn_state) { +single_zone_react_source(burn_t& burn_state, + GpuArray& R) { // note: we want this burn_state to come out of here so we can // reuse it elsewhere - auto rhoInv = 1.0_rt / state[URHO]; + // we assume that burn_state.y[] holds the current state, except + // for temperature guess, and we will initialize the pieces needed + // to call the RHS + + auto rhoInv = 1.0_rt / burn_state.y[SRHO]; - burn_state.rho = state[URHO]; - burn_state.T = state[UTEMP]; - burn_state.e = state[UEINT] * rhoInv; + burn_state.rho = burn_state.y[SRHO]; + burn_state.e = burn_state.y[SEINT] * rhoInv; for (int n = 0; n < NumSpec; ++n) { - burn_state.xn[n] = amrex::max(amrex::min(state[UFS+n] * rhoInv, 1.0_rt), small_x); + burn_state.xn[n] = amrex::max(amrex::min(burn_state.y[SFS+n] * rhoInv, 1.0_rt), small_x); } #if NAUX_NET > 0 for (int n = 0; n < NumAux; ++n) { - burn_state.aux[n] = state[UFX+n] * rhoInv; + burn_state.aux[n] = burn_state.y[SFX+n] * rhoInv; } #endif - eos_t eos_state; - // Ensure that the temperature going in is consistent with the internal energy. eos(eos_input_re, burn_state); @@ -45,28 +48,26 @@ single_zone_react_source(GpuArray const& state, // species rates come back in terms of molar fractions for (int n = 0; n < NumSpec; ++n) { - R[UFS+n] = state[URHO] * aion[n] * ydot(1+n); + R[UFS+n] = burn_state.rho * aion[n] * ydot(1+n); } - R[UEDEN] = state[URHO] * ydot(net_ienuc); - R[UEINT] = state[URHO] * ydot(net_ienuc); + R[UEDEN] = burn_state.rho * ydot(net_ienuc); + R[UEINT] = burn_state.rho * ydot(net_ienuc); } AMREX_GPU_HOST_DEVICE AMREX_INLINE void single_zone_react_source(const int i, const int j, const int k, Array4 const& state, - Array4 const& R, - burn_t& burn_state) { + Array4 const& R) { - GpuArray state_arr; - GpuArray R_arr; + GpuArray R_arr; - for (int n = 0; n < NUM_STATE; ++n) { - state_arr[n] = state(i,j,k,n); - } + burn_t burn_state; + + copy_cons_to_burn_type(i, j, k, state, burn_state); - single_zone_react_source(state_arr, R_arr, burn_state); + single_zone_react_source(burn_state, R_arr); for (int n = 0; n < NUM_STATE; ++n) { R(i,j,k,n) = R_arr[n]; @@ -75,8 +76,7 @@ single_zone_react_source(const int i, const int j, const int k, AMREX_GPU_HOST_DEVICE AMREX_INLINE void -single_zone_jac(GpuArray const& state, - burn_t& burn_state, const Real dt, +single_zone_jac(burn_t& burn_state, const Real dt, JacNetArray2D& Jac) { #ifdef SIMPLIFIED_SDC diff --git a/Source/sdc/vode_rhs_true_sdc.H b/Source/sdc/vode_rhs_true_sdc.H index 755412197c..6a53995a12 100644 --- a/Source/sdc/vode_rhs_true_sdc.H +++ b/Source/sdc/vode_rhs_true_sdc.H @@ -12,7 +12,7 @@ #include #endif #include - +#include #include @@ -55,20 +55,7 @@ sdc_vode_solve(const Real dt_m, // store the state - burn_state.y[SRHO] = U_old[URHO]; - burn_state.y[SMX] = U_old[UMX]; - burn_state.y[SMY] = U_old[UMY]; - burn_state.y[SMZ] = U_old[UMZ]; - burn_state.y[SEDEN] = U_old[UEDEN]; - burn_state.y[SEINT] = U_old[UEINT]; - for (int n = 0; n < NumSpec; n++) { - burn_state.y[SFS+n] = U_old[UFS+n]; - } -#if NAUX_NET > 0 - for (int n = 0; n < NumAux; n++) { - burn_state.y[SFX+n] = U_old[UFX+n]; - } -#endif + copy_cons_to_burn_type(U_old, burn_state); // we need an initial T guess for the EOS burn_state.T = U_old[UTEMP];