From f8445867f31da12a2d8afb2f574fedb36b883043 Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Thu, 5 Oct 2023 12:54:56 -0400
Subject: [PATCH] refactor SDC Newton solver to use burn_t this cleans up some
 interfaces and reduces memory requirements.

---
 Source/sdc/Castro_sdc.cpp     |  9 -----
 Source/sdc/Castro_sdc_util.H  |  3 +-
 Source/sdc/sdc_newton_solve.H | 70 ++++++++++++++---------------------
 Source/sdc/sdc_react_util.H   | 58 +++++++++++++++++------------
 4 files changed, 63 insertions(+), 77 deletions(-)
diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp
index 8da8e9925a..5974e11698 100644
--- a/Source/sdc/Castro_sdc.cpp
+++ b/Source/sdc/Castro_sdc.cpp
@@ -241,9 +241,6 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
             Elixir elix_R_new = R_new.elixir();
             Array4<Real> const& R_new_arr = R_new.array();
 
-            // ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx1),
-            //                        BL_TO_FORTRAN_3D(U_new_center),
-            //                        BL_TO_FORTRAN_3D(R_new));
             amrex::ParallelFor(bx1,
             [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
@@ -359,9 +356,6 @@ Castro::construct_old_react_source(MultiFab& U_state,
             Elixir elix_r_center = R_center.elixir();
             auto const R_center_arr = R_center.array();
 
-            // ca_instantaneous_react(BL_TO_FORTRAN_BOX(obx),
-            //                        BL_TO_FORTRAN_3D(U_center),
-            //                        BL_TO_FORTRAN_3D(R_center));
             amrex::ParallelFor(obx,
             [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
@@ -399,9 +393,6 @@ Castro::construct_old_react_source(MultiFab& U_state,
             auto const R_source_arr = R_source.array(mfi);
 
             // construct the reactive source term
-            // ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx),
-            //                        BL_TO_FORTRAN_3D(U_state[mfi]),
-            //                        BL_TO_FORTRAN_3D(R_source[mfi]));
             amrex::ParallelFor(bx,
             [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
diff --git a/Source/sdc/Castro_sdc_util.H b/Source/sdc/Castro_sdc_util.H
index 164daff206..ee1066a4d8 100644
--- a/Source/sdc/Castro_sdc_util.H
+++ b/Source/sdc/Castro_sdc_util.H
@@ -219,8 +219,7 @@ instantaneous_react(const int i, const int j, const int k,
     // (in that case, I am not sure if I can assume UTEMP is defined)
 
     if (okay_to_burn(i, j, k, state)) {
-        burn_t burn_state;
-        single_zone_react_source(i, j, k, state, R_source, burn_state);
+        single_zone_react_source(i, j, k, state, R_source);
     } else {
         for (int n = 0; n < NUM_STATE; ++n) {
             R_source(i, j, k, n) = 0.0_rt;
diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H
index 312837cc68..8df20b38bd 100644
--- a/Source/sdc/sdc_newton_solve.H
+++ b/Source/sdc/sdc_newton_solve.H
@@ -20,68 +20,62 @@ f_sdc_jac(const Real dt_m,
           JacNetArray2D& Jac,
           Array1D<Real, 1, NumSpec+1>& f_source,
           const Real rho,
-          GpuArray<Real, 3>& mom_source,
-          const Real T_old,
-          const Real E_var) {
+          const Real T_old) {
 
     // This is used with the Newton solve and returns f and the Jacobian
 
-    GpuArray<Real, NUM_STATE> U_full;
     GpuArray<Real, NUM_STATE> R_full;
 
-    // we are not solving the momentum equations
-    // create a full state -- we need this for some interfaces
+    // create a burn_t -- this will hold the full state, reconstructed
+    // from the current solution U
 
-    U_full[URHO] = rho;
+    burn_t burn_state;
+
+    burn_state.y[SRHO] = rho;
 
     for (int n = 0; n < NumSpec; ++n) {
-        U_full[UFS+n] = U(n+1);
+        burn_state.y[SFS+n] = U(n+1);
     }
 
-    U_full[UEINT] = U(NumSpec+1);
-    U_full[UEDEN] = E_var;
+    burn_state.y[SEINT] = U(NumSpec+1);
+
+    // we are not solving or accessing the momentum or total energy,
+    // so we'll just set those to zero here
 
     for (int n = 0; n < 3; ++n) {
-        U_full[UMX+n] = mom_source[n];
+        burn_state.y[SMX+n] = 0.0_rt;
     }
 
+    burn_state.y[SEDEN] = 0.0_rt;
+
     // normalize the species
     auto sum_rhoX = 0.0_rt;
     for (int n = 0; n < NumSpec; ++n) {
-        U_full[UFS+n] = amrex::max(network_rp::small_x, U_full[UFS+n]);
-        sum_rhoX += U_full[UFS+n];
+        burn_state.y[SFS+n] = amrex::max(burn_state.y[SRHO] * network_rp::small_x, burn_state.y[SFS+n]);
+        sum_rhoX += burn_state.y[SFS+n];
     }
 
     for (int n = 0; n < NumSpec; ++n) {
-        U_full[UFS+n] *= U_full[URHO] / sum_rhoX;
+        burn_state.y[SFS+n] *= burn_state.y[SRHO] / sum_rhoX;
     }
 
-    // compute the temperature and species derivatives --
-    // maybe this should be done using the burn_state
-    // returned by single_zone_react_source, since it is
-    // more consistent T from e
+    // compute the temperature -- this can be removed shortly, since
+    // we already do this in single_zone_react_source
 
-    eos_extra_t eos_state;
-    eos_state.rho = U_full[URHO];
-    eos_state.T = T_old;   // initial guess
+    burn_state.rho = burn_state.y[SRHO];
+    burn_state.T = T_old;   // initial guess
     for (int n = 0; n < NumSpec; ++n) {
-        eos_state.xn[n] = U_full[UFS+n] / U_full[URHO];
+        burn_state.xn[n] = burn_state.y[SFS+n] / burn_state.y[SRHO];
     }
 #if NAUX_NET > 0
     amrex::Error("error: aux data not currently supported in true SDC");
 #endif
 
-    eos_state.e = U_full[UEINT] / U_full[URHO];
+    burn_state.e = burn_state.y[SEINT] / burn_state.y[SRHO];
 
-    eos(eos_input_re, eos_state);
+    eos(eos_input_re, burn_state);
 
-    U_full[UTEMP] = eos_state.T;
-
-    // we'll create a burn_state to pass stuff from the RHS to the Jac function
-
-    burn_t burn_state;
-
-    single_zone_react_source(U_full, R_full, burn_state);
+    single_zone_react_source(burn_state, R_full);
 
     // we are solving J dU = -f
     // where f is Eq. 36 evaluated with the current guess for U
@@ -98,7 +92,7 @@ f_sdc_jac(const Real dt_m,
     // form from the simplified-SDC paper (Zingale et al. 2022).  Note:
     // we are not including density anymore.
 
-    single_zone_jac(U_full, burn_state, dt_m, Jac);
+    single_zone_jac(burn_state, dt_m, Jac);
 
     // Our Jacobian has the form:  J = I - dt dR/dw dwdU
     // (Eq. 38), so now we fix that
@@ -140,7 +134,6 @@ sdc_newton_solve(const Real dt_m,
 
     Array1D<Real, 1, NumSpec+1> U_react;
     Array1D<Real, 1, NumSpec+1> f_source;
-    GpuArray<Real, 3> mom_source;
     Array1D<Real, 1, NumSpec+1> dU_react;
     Array1D<Real, 1, NumSpec+1> f;
     RArray1D f_rhs;
@@ -171,19 +164,10 @@ sdc_newton_solve(const Real dt_m,
     }
     f_source(NumSpec+1) = U_old[UEINT] + dt_m * C[UEINT];
 
-    // set the momenta to be U_new
-    for (int n = 0; n < 3; ++n) {
-        mom_source[n] = U_new[UMX+n];
-    }
-
     // temperature will be used as an initial guess in the EOS
 
     Real T_old = U_old[UTEMP];
 
-    // we should be able to do an update for this somehow?
-
-    Real E_var = U_new[UEDEN];
-
     // we need the density too
 
     Real rho_new = U_new[URHO];
@@ -208,7 +192,7 @@ sdc_newton_solve(const Real dt_m,
 
     while (!converged && iter < MAX_ITER) {
         int info = 0;
-        f_sdc_jac(dt_m, U_react, f, Jac, f_source, rho_new, mom_source, T_old, E_var);
+        f_sdc_jac(dt_m, U_react, f, Jac, f_source, rho_new, T_old);
 
         // solve the linear system: Jac dU_react = -f
 #ifdef NEW_NETWORK_IMPLEMENTATION
diff --git a/Source/sdc/sdc_react_util.H b/Source/sdc/sdc_react_util.H
index 5c06d58908..3c066135f0 100644
--- a/Source/sdc/sdc_react_util.H
+++ b/Source/sdc/sdc_react_util.H
@@ -1,33 +1,35 @@
 #ifndef SDC_REACT_UTIL_H
 #define SDC_REACT_UTIL_H
 
+#include <burn_type.H>
+
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void
-single_zone_react_source(GpuArray<Real, NUM_STATE> const& state,
-                         GpuArray<Real, NUM_STATE>& R,
-                         burn_t& burn_state) {
+single_zone_react_source(burn_t& burn_state,
+                         GpuArray<Real, NUM_STATE>& R) {
 
     // note: we want this burn_state to come out of here so we can
     // reuse it elsewhere
 
-    auto rhoInv = 1.0_rt / state[URHO];
+    // we assume that burn_state.y[] holds the current state, except
+    // for temperature guess, and we will initialize the pieces needed
+    // to call the RHS
+
+    auto rhoInv = 1.0_rt / burn_state.y[SRHO];
 
-    burn_state.rho = state[URHO];
-    burn_state.T = state[UTEMP];
-    burn_state.e = state[UEINT] * rhoInv;
+    burn_state.rho = burn_state.y[SRHO];
+    burn_state.e = burn_state.y[SEINT] * rhoInv;
 
     for (int n = 0; n < NumSpec; ++n) {
-       burn_state.xn[n] = amrex::max(amrex::min(state[UFS+n] * rhoInv, 1.0_rt), small_x);
+       burn_state.xn[n] = amrex::max(amrex::min(burn_state.y[SFS+n] * rhoInv, 1.0_rt), small_x);
     }
 
 #if NAUX_NET > 0
     for (int n = 0; n < NumAux; ++n) {
-       burn_state.aux[n] = state[UFX+n] * rhoInv;
+       burn_state.aux[n] = burn_state.y[SFX+n] * rhoInv;
     }
 #endif
 
-    eos_t eos_state;
-
     // Ensure that the temperature going in is consistent with the internal energy.
     eos(eos_input_re, burn_state);
 
@@ -45,28 +47,39 @@ single_zone_react_source(GpuArray<Real, NUM_STATE> const& state,
 
     // species rates come back in terms of molar fractions
     for (int n = 0; n < NumSpec; ++n) {
-        R[UFS+n] = state[URHO] * aion[n] * ydot(1+n);
+        R[UFS+n] = burn_state.rho * aion[n] * ydot(1+n);
     }
 
-    R[UEDEN] = state[URHO] * ydot(net_ienuc);
-    R[UEINT] = state[URHO] * ydot(net_ienuc);
+    R[UEDEN] = burn_state.rho * ydot(net_ienuc);
+    R[UEINT] = burn_state.rho * ydot(net_ienuc);
 }
 
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void
 single_zone_react_source(const int i, const int j, const int k,
                          Array4<const Real> const& state,
-                         Array4<Real> const& R,
-                         burn_t& burn_state) {
+                         Array4<Real> const& R) {
 
-    GpuArray<Real, NUM_STATE> state_arr;
-    GpuArray<Real, NUM_STATE> R_arr; 
+    GpuArray<Real, NUM_STATE> R_arr;
 
-    for (int n = 0; n < NUM_STATE; ++n) {
-        state_arr[n] = state(i,j,k,n);
+    burn_t burn_state;
+
+    burn_state.y[SRHO] = state(i,j,k,URHO);
+    burn_state.y[SMX] = state(i,j,k,UMX);
+    burn_state.y[SMY] = state(i,j,k,UMY);
+    burn_state.y[SMZ] = state(i,j,k,UMZ);
+    burn_state.y[SEDEN] = state(i,j,k,UEDEN);
+    burn_state.y[SEINT] = state(i,j,k,UEINT);
+    for (int n = 0; n < NumSpec; n++) {
+        burn_state.y[SFS+n] = state(i,j,k,UFS+n);
+    }
+#if NAUX_NET > 0
+    for (int n = 0; n < NumAux; n++) {
+        burn_state.y[SFX+n] = state(i,j,k,UFX+n);
     }
+#endif
 
-    single_zone_react_source(state_arr, R_arr, burn_state);
+    single_zone_react_source(R_arr, burn_state);
 
     for (int n = 0; n < NUM_STATE; ++n) {
         R(i,j,k,n) = R_arr[n];
@@ -75,8 +88,7 @@ single_zone_react_source(const int i, const int j, const int k,
 
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void
-single_zone_jac(GpuArray<Real, NUM_STATE> const& state,
-                burn_t& burn_state, const Real dt,
+single_zone_jac(burn_t& burn_state, const Real dt,
                 JacNetArray2D& Jac) {
 
 #ifdef SIMPLIFIED_SDC