From f8445867f31da12a2d8afb2f574fedb36b883043 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Thu, 5 Oct 2023 12:54:56 -0400 Subject: [PATCH] refactor SDC Newton solver to use burn_t this cleans up some interfaces and reduces memory requirements. --- Source/sdc/Castro_sdc.cpp | 9 ----- Source/sdc/Castro_sdc_util.H | 3 +- Source/sdc/sdc_newton_solve.H | 70 ++++++++++++++--------------------- Source/sdc/sdc_react_util.H | 58 +++++++++++++++++------------ 4 files changed, 63 insertions(+), 77 deletions(-) diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp index 8da8e9925a..5974e11698 100644 --- a/Source/sdc/Castro_sdc.cpp +++ b/Source/sdc/Castro_sdc.cpp @@ -241,9 +241,6 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) Elixir elix_R_new = R_new.elixir(); Array4 const& R_new_arr = R_new.array(); - // ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx1), - // BL_TO_FORTRAN_3D(U_new_center), - // BL_TO_FORTRAN_3D(R_new)); amrex::ParallelFor(bx1, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -359,9 +356,6 @@ Castro::construct_old_react_source(MultiFab& U_state, Elixir elix_r_center = R_center.elixir(); auto const R_center_arr = R_center.array(); - // ca_instantaneous_react(BL_TO_FORTRAN_BOX(obx), - // BL_TO_FORTRAN_3D(U_center), - // BL_TO_FORTRAN_3D(R_center)); amrex::ParallelFor(obx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -399,9 +393,6 @@ Castro::construct_old_react_source(MultiFab& U_state, auto const R_source_arr = R_source.array(mfi); // construct the reactive source term - // ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx), - // BL_TO_FORTRAN_3D(U_state[mfi]), - // BL_TO_FORTRAN_3D(R_source[mfi])); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { diff --git a/Source/sdc/Castro_sdc_util.H b/Source/sdc/Castro_sdc_util.H index 164daff206..ee1066a4d8 100644 --- a/Source/sdc/Castro_sdc_util.H +++ b/Source/sdc/Castro_sdc_util.H @@ -219,8 +219,7 @@ instantaneous_react(const int i, const int j, const int k, // (in that case, I am not sure if I can assume UTEMP is defined) if (okay_to_burn(i, j, k, state)) { - burn_t burn_state; - single_zone_react_source(i, j, k, state, R_source, burn_state); + single_zone_react_source(i, j, k, state, R_source); } else { for (int n = 0; n < NUM_STATE; ++n) { R_source(i, j, k, n) = 0.0_rt; diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H index 312837cc68..8df20b38bd 100644 --- a/Source/sdc/sdc_newton_solve.H +++ b/Source/sdc/sdc_newton_solve.H @@ -20,68 +20,62 @@ f_sdc_jac(const Real dt_m, JacNetArray2D& Jac, Array1D& f_source, const Real rho, - GpuArray& mom_source, - const Real T_old, - const Real E_var) { + const Real T_old) { // This is used with the Newton solve and returns f and the Jacobian - GpuArray U_full; GpuArray R_full; - // we are not solving the momentum equations - // create a full state -- we need this for some interfaces + // create a burn_t -- this will hold the full state, reconstructed + // from the current solution U - U_full[URHO] = rho; + burn_t burn_state; + + burn_state.y[SRHO] = rho; for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] = U(n+1); + burn_state.y[SFS+n] = U(n+1); } - U_full[UEINT] = U(NumSpec+1); - U_full[UEDEN] = E_var; + burn_state.y[SEINT] = U(NumSpec+1); + + // we are not solving or accessing the momentum or total energy, + // so we'll just set those to zero here for (int n = 0; n < 3; ++n) { - U_full[UMX+n] = mom_source[n]; + burn_state.y[SMX+n] = 0.0_rt; } + burn_state.y[SEDEN] = 0.0_rt; + // normalize the species auto sum_rhoX = 0.0_rt; for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] = amrex::max(network_rp::small_x, U_full[UFS+n]); - sum_rhoX += U_full[UFS+n]; + burn_state.y[SFS+n] = amrex::max(burn_state.y[SRHO] * network_rp::small_x, burn_state.y[SFS+n]); + sum_rhoX += burn_state.y[SFS+n]; } for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] *= U_full[URHO] / sum_rhoX; + burn_state.y[SFS+n] *= burn_state.y[SRHO] / sum_rhoX; } - // compute the temperature and species derivatives -- - // maybe this should be done using the burn_state - // returned by single_zone_react_source, since it is - // more consistent T from e + // compute the temperature -- this can be removed shortly, since + // we already do this in single_zone_react_source - eos_extra_t eos_state; - eos_state.rho = U_full[URHO]; - eos_state.T = T_old; // initial guess + burn_state.rho = burn_state.y[SRHO]; + burn_state.T = T_old; // initial guess for (int n = 0; n < NumSpec; ++n) { - eos_state.xn[n] = U_full[UFS+n] / U_full[URHO]; + burn_state.xn[n] = burn_state.y[SFS+n] / burn_state.y[SRHO]; } #if NAUX_NET > 0 amrex::Error("error: aux data not currently supported in true SDC"); #endif - eos_state.e = U_full[UEINT] / U_full[URHO]; + burn_state.e = burn_state.y[SEINT] / burn_state.y[SRHO]; - eos(eos_input_re, eos_state); + eos(eos_input_re, burn_state); - U_full[UTEMP] = eos_state.T; - - // we'll create a burn_state to pass stuff from the RHS to the Jac function - - burn_t burn_state; - - single_zone_react_source(U_full, R_full, burn_state); + single_zone_react_source(burn_state, R_full); // we are solving J dU = -f // where f is Eq. 36 evaluated with the current guess for U @@ -98,7 +92,7 @@ f_sdc_jac(const Real dt_m, // form from the simplified-SDC paper (Zingale et al. 2022). Note: // we are not including density anymore. - single_zone_jac(U_full, burn_state, dt_m, Jac); + single_zone_jac(burn_state, dt_m, Jac); // Our Jacobian has the form: J = I - dt dR/dw dwdU // (Eq. 38), so now we fix that @@ -140,7 +134,6 @@ sdc_newton_solve(const Real dt_m, Array1D U_react; Array1D f_source; - GpuArray mom_source; Array1D dU_react; Array1D f; RArray1D f_rhs; @@ -171,19 +164,10 @@ sdc_newton_solve(const Real dt_m, } f_source(NumSpec+1) = U_old[UEINT] + dt_m * C[UEINT]; - // set the momenta to be U_new - for (int n = 0; n < 3; ++n) { - mom_source[n] = U_new[UMX+n]; - } - // temperature will be used as an initial guess in the EOS Real T_old = U_old[UTEMP]; - // we should be able to do an update for this somehow? - - Real E_var = U_new[UEDEN]; - // we need the density too Real rho_new = U_new[URHO]; @@ -208,7 +192,7 @@ sdc_newton_solve(const Real dt_m, while (!converged && iter < MAX_ITER) { int info = 0; - f_sdc_jac(dt_m, U_react, f, Jac, f_source, rho_new, mom_source, T_old, E_var); + f_sdc_jac(dt_m, U_react, f, Jac, f_source, rho_new, T_old); // solve the linear system: Jac dU_react = -f #ifdef NEW_NETWORK_IMPLEMENTATION diff --git a/Source/sdc/sdc_react_util.H b/Source/sdc/sdc_react_util.H index 5c06d58908..3c066135f0 100644 --- a/Source/sdc/sdc_react_util.H +++ b/Source/sdc/sdc_react_util.H @@ -1,33 +1,35 @@ #ifndef SDC_REACT_UTIL_H #define SDC_REACT_UTIL_H +#include + AMREX_GPU_HOST_DEVICE AMREX_INLINE void -single_zone_react_source(GpuArray const& state, - GpuArray& R, - burn_t& burn_state) { +single_zone_react_source(burn_t& burn_state, + GpuArray& R) { // note: we want this burn_state to come out of here so we can // reuse it elsewhere - auto rhoInv = 1.0_rt / state[URHO]; + // we assume that burn_state.y[] holds the current state, except + // for temperature guess, and we will initialize the pieces needed + // to call the RHS + + auto rhoInv = 1.0_rt / burn_state.y[SRHO]; - burn_state.rho = state[URHO]; - burn_state.T = state[UTEMP]; - burn_state.e = state[UEINT] * rhoInv; + burn_state.rho = burn_state.y[SRHO]; + burn_state.e = burn_state.y[SEINT] * rhoInv; for (int n = 0; n < NumSpec; ++n) { - burn_state.xn[n] = amrex::max(amrex::min(state[UFS+n] * rhoInv, 1.0_rt), small_x); + burn_state.xn[n] = amrex::max(amrex::min(burn_state.y[SFS+n] * rhoInv, 1.0_rt), small_x); } #if NAUX_NET > 0 for (int n = 0; n < NumAux; ++n) { - burn_state.aux[n] = state[UFX+n] * rhoInv; + burn_state.aux[n] = burn_state.y[SFX+n] * rhoInv; } #endif - eos_t eos_state; - // Ensure that the temperature going in is consistent with the internal energy. eos(eos_input_re, burn_state); @@ -45,28 +47,39 @@ single_zone_react_source(GpuArray const& state, // species rates come back in terms of molar fractions for (int n = 0; n < NumSpec; ++n) { - R[UFS+n] = state[URHO] * aion[n] * ydot(1+n); + R[UFS+n] = burn_state.rho * aion[n] * ydot(1+n); } - R[UEDEN] = state[URHO] * ydot(net_ienuc); - R[UEINT] = state[URHO] * ydot(net_ienuc); + R[UEDEN] = burn_state.rho * ydot(net_ienuc); + R[UEINT] = burn_state.rho * ydot(net_ienuc); } AMREX_GPU_HOST_DEVICE AMREX_INLINE void single_zone_react_source(const int i, const int j, const int k, Array4 const& state, - Array4 const& R, - burn_t& burn_state) { + Array4 const& R) { - GpuArray state_arr; - GpuArray R_arr; + GpuArray R_arr; - for (int n = 0; n < NUM_STATE; ++n) { - state_arr[n] = state(i,j,k,n); + burn_t burn_state; + + burn_state.y[SRHO] = state(i,j,k,URHO); + burn_state.y[SMX] = state(i,j,k,UMX); + burn_state.y[SMY] = state(i,j,k,UMY); + burn_state.y[SMZ] = state(i,j,k,UMZ); + burn_state.y[SEDEN] = state(i,j,k,UEDEN); + burn_state.y[SEINT] = state(i,j,k,UEINT); + for (int n = 0; n < NumSpec; n++) { + burn_state.y[SFS+n] = state(i,j,k,UFS+n); + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; n++) { + burn_state.y[SFX+n] = state(i,j,k,UFX+n); } +#endif - single_zone_react_source(state_arr, R_arr, burn_state); + single_zone_react_source(R_arr, burn_state); for (int n = 0; n < NUM_STATE; ++n) { R(i,j,k,n) = R_arr[n]; @@ -75,8 +88,7 @@ single_zone_react_source(const int i, const int j, const int k, AMREX_GPU_HOST_DEVICE AMREX_INLINE void -single_zone_jac(GpuArray const& state, - burn_t& burn_state, const Real dt, +single_zone_jac(burn_t& burn_state, const Real dt, JacNetArray2D& Jac) { #ifdef SIMPLIFIED_SDC