diff --git a/.github/workflows/uniform_cube.yml b/.github/workflows/uniform_cube.yml
new file mode 100644
index 0000000000..33d7e8b5df
--- /dev/null
+++ b/.github/workflows/uniform_cube.yml
@@ -0,0 +1,39 @@
+name: uniform_cube
+
+on: [pull_request]
+jobs:
+  uniform_cube:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Get submodules
+        run: |
+          git submodule update --init
+          cd external/Microphysics
+          git fetch; git checkout development
+          cd ../amrex
+          git fetch; git checkout development
+          cd ../..
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update -y -qq
+          sudo apt-get -qq -y install curl g++>=9.3.0
+
+      - name: Compile uniform_cube
+        run: |
+          cd Exec/gravity_tests/uniform_cube
+          make USE_MPI=FALSE -j 2
+
+      - name: Run uniform_cube
+        run: |
+          cd Exec/gravity_tests/uniform_cube
+          ./convergence.sh &> cube_convergence.out
+
+      - name: Compare to the benchmark
+        run: |
+          cd Exec/gravity_tests/uniform_cube
+          diff cube_convergence.out ci-benchmarks/cube_convergence.out
diff --git a/.github/workflows/uniform_sphere.yml b/.github/workflows/uniform_sphere.yml
new file mode 100644
index 0000000000..1dbd01f90b
--- /dev/null
+++ b/.github/workflows/uniform_sphere.yml
@@ -0,0 +1,39 @@
+name: uniform_sphere
+
+on: [pull_request]
+jobs:
+  uniform_sphere:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Get submodules
+        run: |
+          git submodule update --init
+          cd external/Microphysics
+          git fetch; git checkout development
+          cd ../amrex
+          git fetch; git checkout development
+          cd ../..
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update -y -qq
+          sudo apt-get -qq -y install curl g++>=9.3.0
+
+      - name: Compile uniform_sphere
+        run: |
+          cd Exec/gravity_tests/uniform_sphere
+          make USE_MPI=FALSE -j 2
+
+      - name: Run uniform_sphere
+        run: |
+          cd Exec/gravity_tests/uniform_sphere
+          ./convergence.sh &> sphere_convergence.out
+
+      - name: Compare to the benchmark
+        run: |
+          cd Exec/gravity_tests/uniform_sphere
+          diff sphere_convergence.out ci-benchmarks/sphere_convergence.out
diff --git a/.github/workflows/wdmerger_collision-compare.yml b/.github/workflows/wdmerger_collision-compare.yml
new file mode 100644
index 0000000000..cd1e9c8b5f
--- /dev/null
+++ b/.github/workflows/wdmerger_collision-compare.yml
@@ -0,0 +1,47 @@
+name: wdmerger_collision
+
+on: [pull_request]
+jobs:
+  wdmerger_collision-2d:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Get submodules
+        run: |
+          git submodule update --init
+          cd external/Microphysics
+          git fetch; git checkout development
+          cd ../amrex
+          git fetch; git checkout development
+          cd ../..
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update -y -qq
+          sudo apt-get -qq -y install curl cmake jq clang g++>=9.3.0
+
+      - name: Build the fextrema tool
+        run: |
+          cd external/amrex/Tools/Plotfile
+          make programs=fextrema -j4
+
+      - name: Compile wdmerger 2D
+        run: |
+          cd Exec/science/wdmerger
+          make USE_MPI=FALSE DIM=2 -j4
+
+      - name: Run wdmerger 2D
+        run: |
+          cd Exec/science/wdmerger
+          cp tests/wdmerger_collision/inputs_2d_collision.test .
+          cp tests/wdmerger_collision/inputs_2d_collision .
+          ./Castro2d.gnu.ex inputs_2d_collision.test amr.plot_files_output=1 castro.output_at_completion=1
+
+      - name: Check the extrema
+        run: |
+          cd Exec/science/wdmerger
+          ../../../external/amrex/Tools/Plotfile/fextrema.gnu.ex plt00095 > fextrema.out
+          diff fextrema.out ci-benchmarks/wdmerger_collision_2D.out
diff --git a/CHANGES.md b/CHANGES.md
index 7db160e12f..15ed72d5aa 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,3 +1,18 @@
+# 23.08
+
+  * Time evolution without subcycling on the fine levels, which is enabled via
+    the runtime parameter amr.subcycling_mode = "None", has been significantly
+    rewritten to improve computational performance for certain cases. When the
+    fine levels do not subcycle, the evolution can be done by processing all
+    of the hydro updates on the fine level together and then immediately doing
+    the flux correction to sync the coarse and fine level fluxes at the
+    boundary between levels. This is how many AMR codes that do not subcycle
+    are written. Castro now does this when the user chooses not to subcycle.
+    The benefit of this approach is most evidence for problems with Poisson
+    gravity that use a multigrid solve, as we can significantly reduce the
+    number of Poisson solves per step, performing only a single composite
+    (multi-level) solve at the new-time simultaneously for all levels. (#2505)
+
 # 23.07
 
   * The parameter castro.state_nghost, which allowed State_Type to have ghost
diff --git a/Docs/source/EOSNetwork.rst b/Docs/source/EOS.rst
similarity index 54%
rename from Docs/source/EOSNetwork.rst
rename to Docs/source/EOS.rst
index 390f194709..6ddc5321f9 100644
--- a/Docs/source/EOSNetwork.rst
+++ b/Docs/source/EOS.rst
@@ -1,24 +1,15 @@
-************
-Microphysics
-************
-
+*****************
 Equation of State
-=================
-
-Standard Castro EOSes
----------------------
+*****************
 
-Castro is written in a modular fashion so that the EOS and network
-burning routines can be supplied by the user. However, for the
-examples presented later we use several EOS and network routines
-that come with the Microphysics distribution.
+Castro is written in a modular fashion so that the EOS
+can be supplied by the user.   No equations of state
+are distributed with Castro, instead they are part 
+of the separate `Microphysics repository <https://github.com/amrex-astro/Microphysics>`_.
 
-Castro relies on routines to calculate the equation of state (EOS)
-of a fluid, as well as a species network to define the components of
-the fluid. The network optionally has the ability to do nuclear burning,
-but for this section its main purpose is in defining the species so that
-the EOS can calculate fluid properties that depend on composition, such
-as electron fraction.
+Most equations of state are written to take :math:`(\rho, T, X_k)` as
+input and return the needed thermodynamic quantities.  For other
+inputs, a Newton-Raphson root find is done.
 
 Most of the standard problem setups in Castro (such as the Sedov blast wave)
 use the ``gamma_law`` EOS. This represents a gamma law gas, with equation of state:
@@ -28,7 +19,7 @@ use the ``gamma_law`` EOS. This represents a gamma law gas, with equation of sta
 The gas is currently assumed to be monatomic and ideal.
 
 Runtime Parameters
-------------------
+==================
 
 When inverting the EOS (e.g. by using ``eos_input_re``), an initial guess for
 the temperature is required. This guess is provided by the runtime parameter
@@ -36,7 +27,7 @@ the temperature is required. This guess is provided by the runtime parameter
 (it will vary depending on which EOS is used).
 
 EOS Interfaces and Parameters
------------------------------
+=============================
 
 .. index:: eos_t
 
@@ -44,9 +35,11 @@ Each EOS should have two main routines by which it interfaces to the
 rest of Castro. At the beginning of the simulation, ``eos_init``
 will perform any initialization steps and save EOS variables (mainly
 ``smallt``, the temperature floor, and ``smalld``, the
-density floor). Then, whenever you want to call the EOS, use::
+density floor). Then, whenever you want to call the EOS, use:
+
+.. code:: c++
 
- eos (eos_input, eos_state)
+   eos (eos_input, eos_state)
 
 The first argument specifies the inputs to the EOS. The options
 that are currently available are stored in Microphysics in
@@ -69,19 +62,19 @@ set of thermodynamic
 variables. When calling the EOS, you should first fill the variables
 that are the inputs, for example with
 
-::
+.. code:: c++
 
-      eos_t eos_state;
-      ...
-      eos_state.rho = state(i,j,k,URHO);
-      eos_state.T   = state(i,j,k,UTEMP);
-      eos_state.e   = state(i,j,k,UEINT) / state(i,j,k,URHO);
-      for (int n = 0; n < NumSpec; ++n) {
-          eos_state.xn[n] = state(i,j,k,UFS+n) / state(i,j,k,URHO);
-      }
-      for (int n = 0; n < NumAux; ++n) {
-          eos_state.aux[n] = state(i,j,k,UFX+n) / state(i,j,k,URHO);
-      }
+   eos_t eos_state;
+   ...
+   eos_state.rho = state(i,j,k,URHO);
+   eos_state.T   = state(i,j,k,UTEMP);
+   eos_state.e   = state(i,j,k,UEINT) / state(i,j,k,URHO);
+   for (int n = 0; n < NumSpec; ++n) {
+       eos_state.xn[n] = state(i,j,k,UFS+n) / state(i,j,k,URHO);
+   }
+   for (int n = 0; n < NumAux; ++n) {
+       eos_state.aux[n] = state(i,j,k,UFX+n) / state(i,j,k,URHO);
+   }
 
 Whenever the ``eos_state`` type is initialized, the thermodynamic
 state variables are filled with unphysical numbers. If you do not
@@ -94,15 +87,9 @@ will likely not converge. Usually a prior value of the temperature or
 density suffices if it’s available, but if not then use ``T_guess`` or
 ``small_dens``.
 
-The `Microphysics <https://github.com/AMReX-Astro/Microphysics>`__
-repository is the collection of microphysics routines that are compatible with the
-AMReX-Astro codes. We refer you to the documentation in that repository for how to set it up
-and for information on the equations of state provided. That documentation
-also goes into more detail about the details of the EOS code, in case you are interested in
-how it works (and in case you want to develop your own EOS).
 
 Required Thermodynamics Quantities
-----------------------------------
+==================================
 
 Three input quantities are required of any EOS:
 
@@ -145,7 +132,7 @@ variables that are optional output into the plotfiles.
 
 
 Composition derivatives
------------------------
+=======================
 
 .. index:: eos_xderivs_t
 
@@ -188,73 +175,3 @@ A separate type, ``eos_xderivs_t`` provides access to derivatives with respect t
       \end{align}
 
 (see :cite:`maestro:III`, Appendix A).
-
-
-Nuclear Network
-===============
-
-.. index:: burn_t
-
-The nuclear network serves two purposes: it defines the fluid components used
-in both the equation of state and the hydrodynamics, and it evolves those
-components through a nuclear burning step. Castro comes with a ``general_null``
-network (which lives in the ``networks/`` directory). This is a bare interface for a
-nuclear reaction network. No reactions are enabled, and no auxiliary variables
-are accepted.  It contains several sets of isotopes; for example,
-``networks/general_null/triple_alpha_plus_o.net`` would describe the
-isotopes needed to represent the triple-\ :math:`\alpha` reaction converting
-helium into carbon, as well as oxygen and iron.
-
-The main interface file, ``network.f90``, is a wrapper function. The
-actual network details are defined in ``actual_network.f90``, a
-file which is automatically generated in your work directory when you compile.
-It supplies the number and names of species and auxiliary variables, as
-well as other initializing data, such as their mass numbers, proton numbers,
-and the binding energies.
-
-The burning front-end interface, ``networks/burner.f90``, accepts a different
-derived type called the ``burn_t`` type. Like the ``eos_t``, it has entries
-for the basic thermodynamic quantities:
-
-::
-
-      use burn_type_module
-      ...
-      type (burn_t) :: burn_state
-      ...
-      burn_state % rho = state(i,j,k,URHO)
-      burn_state % T   = state(i,j,k,UTEMP)
-      burn_state % e   = state(i,j,k,UEINT) / state(i,j,k,URHO)
-      burn_state % xn  = state(i,j,k,UFS:UFS+nspec-1) / state(i,j,k,URHO)
-
-It takes in an input ``burn_t`` and returns an output ``burn_t`` after
-the burning has completed. The nuclear energy release can be computed by
-taking the difference of ``burn_state_out % e`` and
-``burn_state_in % e``. The species change can be computed analogously.
-In normal operation in Castro  the integration occurs over a time interval
-of :math:`\Delta t/2`, where :math:`\Delta t` is the hydrodynamics timestep.
-
-If you are interested in using actual nuclear burning networks,
-you should download the `Microphysics <https://github.com/AMReX-Astro/Microphysics>`__
-repository. This is a collection of microphysics routines that are compatible with the
-AMReX Astro codes. We refer you to the documentation in that repository for how to set it up
-and for information on the networks provided. That documentation
-also goes into more detail about the details of the network code, in case you are interested in
-how it works (and in case you want to develop your own network).
-
-
-Controlling burning
--------------------
-
-There are a number of reactions-related parameters that can be set at runtime
-in the inputs file. Reactions are enabled by setting::
-
-    castro.do_react = 1
-
-(Note: turning reactions off for problems where they're not required can help improve
-the efficiency).
-
-It is possible to set the maximum and minimum temperature and density for allowing
-reactions to occur in a zone using the parameters ``castro.react_T_min``,
-``castro.react_T_max``, ``castro.react_rho_min`` and ``castro.react_rho_max``.
-
diff --git a/Docs/source/FlowChart.rst b/Docs/source/FlowChart.rst
index 419c58b877..a8ffa8c525 100644
--- a/Docs/source/FlowChart.rst
+++ b/Docs/source/FlowChart.rst
@@ -1,3 +1,5 @@
+.. _sec:flowchart:
+
 *********
 Flowchart
 *********
@@ -30,7 +32,7 @@ the different code paths.  These fall into two categories:
      reaction source as inputs and do the final conservative update by
      integrating the reaction system using an ODE solver with the
      explicit advective source included in a
-     piecewise-constant-in-time fastion.
+     piecewise-constant-in-time fastion.  This is described in :cite:`castro_simple_sdc`.
 
    - The "true SDC" method.  This fully couples the hydro and reactions
      to either 2nd or 4th order.  This approximates the integral in
@@ -56,8 +58,7 @@ The time-integration method used is controlled by
     order integration implemented.  At the moment, this does not support
     multilevel domains.  Note: because of differences in the interfaces with the 
     default Strang method, you must compile with ``USE_TRUE_SDC = TRUE`` for this
-    method to work (in particular, this defines ``EXTRA_THERMO`` which enables some
-    additional EOS derivatives).
+    method to work.
 
   * ``time_integration_method = 3``: this is the simplified SDC method
     described above that uses the CTU hydro advection and an ODE
diff --git a/Docs/source/faq.rst b/Docs/source/faq.rst
index 48692795f3..fb144d2a17 100644
--- a/Docs/source/faq.rst
+++ b/Docs/source/faq.rst
@@ -202,6 +202,9 @@ Managing Runs
 Runtime Errors
 ==============
 
+.. index:: castro.limit_fluxes_on_small_dens, castro.state_interp_order,
+           castro.abundance_failure_tolerance, castro.abundance_failure_rho_cutoff
+
 #. *When running with retries, Castro requests too many substeps
    and crashes.*
 
@@ -221,7 +224,8 @@ Runtime Errors
 
    If the error continues, try to increase the tolerance of determining
    specie abundance validity check by setting ``castro.abundance_failure_tolerance``
-   to a higher value. 
+   to a higher value, or increasing the density floor below which this is
+   ignored by changing ``castro.abundance_failure_rho_cutoff``.
    
 Visualization
 =============
diff --git a/Docs/source/index.rst b/Docs/source/index.rst
index 97937962a6..14b646d378 100644
--- a/Docs/source/index.rst
+++ b/Docs/source/index.rst
@@ -37,6 +37,8 @@ https://github.com/amrex-astro/Castro
    debugging
    Hydrodynamics
    hse
+   EOS
+   reactions
    mhd
    gravity
    diffusion
@@ -44,7 +46,6 @@ https://github.com/amrex-astro/Castro
    sponge
    radiation
    Particles
-   EOSNetwork
    sdc
    AMR
    ConvertCheckpoint
diff --git a/Docs/source/reactions.rst b/Docs/source/reactions.rst
new file mode 100644
index 0000000000..21d6c64091
--- /dev/null
+++ b/Docs/source/reactions.rst
@@ -0,0 +1,299 @@
+*********
+Reactions
+*********
+
+
+.. index:: burn_t
+
+The nuclear network serves two purposes: it defines the fluid
+components used in both the equation of state and the hydrodynamics,
+and it evolves those components through a nuclear burning step.  All
+of the reaction networks that Castro uses are provided by the
+`Microphysics repository <https://github.com/amrex-astro/Microphysics>`_.
+
+.. note::
+
+   An arbitrary reaction network can be created for Castro via the
+   `pynucastro library <https://pynucastro.github.io/pynucastro/>`_.
+
+Microphysics comes with a ``general_null``
+network. This is a bare interface for a
+nuclear reaction network. No reactions are enabled, and no auxiliary variables
+are accepted.  It contains several sets of isotopes; for example,
+``Microphysics/networks/general_null/triple_alpha_plus_o.net`` would describe the
+isotopes needed to represent the triple-\ :math:`\alpha` reaction converting
+helium into carbon, as well as oxygen and iron.
+
+The main interface for burning is in ``Microphysics/interfaces/burner.H``:
+
+.. code:: c++
+
+   void burner (burn_t& state, Real dt)
+
+Here the ``burn_t`` type contains all of the information needed for the reaction
+network.  It is similar to the equation of state ``eos_t``.
+
+.. tip::
+
+   The equation of state routines can be called directly with a ``burn_t`` in place
+   of an ``eos_t``.
+
+Castro has several different modes of coupling reactions and
+hydrodynamics, selected through the parameter
+``castro.time_integration_method``.  See :ref:`sec:flowchart` for the
+details.
+
+Controlling burning
+===================
+
+.. index:: castro.react_T_min, castro.react_T_max, castro.react_rho_min, castro.react_rho_max
+
+There are a number of reactions-related parameters that can be set at runtime
+in the inputs file. Reactions are enabled by setting::
+
+    castro.do_react = 1
+
+(Note: turning reactions off for problems where they're not required can help improve
+the efficiency).
+
+It is possible to set the maximum and minimum temperature and density for allowing
+reactions to occur in a zone using the parameters ``castro.react_T_min``,
+``castro.react_T_max``, ``castro.react_rho_min`` and ``castro.react_rho_max``.
+
+Reactions Flowchart
+===================
+
+Here we describe how the ``burn_t`` is setup before the burn and how we update the
+castro state afterwards for both Strang and simplified-SDC.
+
+Strang
+------
+
+In ``Castro_react.cpp``, the flow is:
+
+* create ``burn_t burn_state``
+
+* if ``NSE_NET`` is defined, initialize the chemical potentials that
+  will be used as an initial guess for the NSE solve
+
+  * ``burn_state.mu_p`` $= U(\mu_p)$
+
+  * ``burn_state.mu_n`` $= U(\mu_n)$
+
+  * ``burn_state.y_e`` $= 0$ (this will be filled if needed by the NSE routines)
+
+* initialize ``burn_state.dx`` -- this is used for some NSE conditions.
+
+* set ``burn_state.success = true`` : we assume that the burn was successful.  The
+  integrator will set this to ``false`` is a problem occurred.
+
+* fill the thermodynamic quantities for input to the burner:
+
+  * ``burn_state.rho`` $= U(\rho)$
+
+  * ``burn_state.e`` $= U(\rho e) / U(\rho)$
+
+  * ``burn_state.T`` $= U(T)$
+
+    .. note::
+
+       It is assumed here that the temperature is thermodynamically
+       consistent with the energy.  For most networks, the temperature
+       passed in will be used to set the thermodynamics in the burner.
+
+  * ``burn_state.xn[]`` $= U(\rho X_k) / U(\rho)$
+
+  * if ``NAUX_NET > 0``: ``burn_state.aux[]`` $= U(\rho \alpha_k) / U(\rho)$
+
+* If we are doing ``castro.drive_initial_convection`` then we set
+  ``burn_state.T_fixed`` by interpolating from the initial model.
+
+* Initialize the metadata that is used for diagnostics
+
+* Call the burner:
+
+  * We check to make sure that $T$ and $\rho$ are within the limits given
+    by ``castro.react_T_min``, ``castro.react_T_max``, ``castro_react_rho_min``,
+    and ``castro.react_rho_max``.
+
+  * The burner will set ``burn_state.success = false`` if it failed.  This can happen
+    for a number of reasons and is integrator-dependent.
+
+    .. note::
+
+       Castro will not abort by default here if the burn failed.
+       Instead we leave it to the :ref:`ch:retry` mechanism to attempt
+       the step again with a smaller timestep.
+
+* Store the burning sources for plotting
+
+  .. index:: Reactions_Type
+
+  We use the ``Reactions_Type`` ``StateData`` to hold the reactive
+  sources that are output to the plotfile and the ``burn_weights``
+  ``MultiFab`` to hold the number of righthand side evaluations for
+  diagnostics.
+
+  We fill these as:
+
+  .. index:: castro.store_omega_dot
+
+  * energy generation rate:
+
+    $\mathtt{reactions}(\rho e) = \dfrac{U(\rho) \, \cdot\, \mathtt{burn\_state.e}\, -\, U(\rho e)}{\Delta t}$
+
+  * species and auxiliary creation rates (only if ``castro.store_omegadot = 1``):
+
+    * $\mathtt{reactions}(\rho X_k) = U(\rho) \dfrac{\mathtt{burn\_state.xn[k]}\, -\, U(\rho X_k) / U(\rho)}{\Delta t}$
+
+    * $\mathtt{reactions}(\rho \alpha_k) = U(\rho) \dfrac{\mathtt{burn\_state.aux[k]}\, -\, U(\rho \alpha_k) / U(\rho)}{\Delta t}$
+
+  * NSE flag (only if ``NSE`` is defined).  This simply stores the value of ``burn_state.nse``.
+
+* Update the conserved state:
+
+  .. note::
+
+     $\rho$ and $\rho \ub$ are unchanged by reactions so those variables are not
+     updated here.  They are already the "new" state.
+
+  * $U^\mathrm{new}(\rho e) = U^\mathrm{new}(\rho) \cdot \mathtt{burn\_state.e}$
+
+  * $U^\mathrm{new}(\rho E) = U^\mathrm{old}(\rho E) + (U^\mathrm{new}(\rho e) - U^\mathrm{old}(\rho e))$
+
+  * $U^\mathrm{new}(\rho X_k) = U^\mathrm{new}(\rho) \cdot \mathtt{burn\_state.xn[k]}$
+
+  * if ``NAUX_NET > 0``: $U^\mathrm{new}(\rho \alpha_k) = U^\mathrm{new}(\rho) \cdot \mathtt{burn\_state.aux[k]}$
+  
+  * if ``NSE_NET`` :
+
+    * $U(\mu_p) = \mathtt{burn\_state.mu\_p}$
+
+    * $U(\mu_n) = \mathtt{burn\_state.mu\_n}$
+
+
+
+Simplified-SDC
+--------------
+
+In ``Castro_react.cpp``, the flow is:
+
+* create ``burn_t burn_state``
+
+* if ``NSE_NET`` is defined, initialize the chemical potentials that
+  will be used as an initial guess for the NSE solve
+
+  * ``burn_state.mu_p`` $= U(\mu_p)$
+
+  * ``burn_state.mu_n`` $= U(\mu_n)$
+
+  * ``burn_state.y_e`` $= 0$ (this will be filled if needed by the NSE routines)
+
+* initialize ``burn_state.dx`` -- this is used for some NSE conditions.
+
+* set ``burn_state.success = true`` : we assume that the burn was successful.  The
+  integrator will set this to ``false`` is a problem occurred.
+
+* fill the conserved state -- this is stored in the ``burn_t`` only when
+  we are using simplified-SDC.
+
+  * ``burn_state.y[SRHO]`` $= U(\rho)$
+
+  * ``burn_state.y[SMX]`` $= U(\rho u)$
+
+  * ``burn_state.y[SMY]`` $= U(\rho v)$
+
+  * ``burn_state.y[SMZ]`` $= U(\rho w)$
+
+  * ``burn_state.y[SEDEN]`` $= U(\rho E)$
+
+  * ``burn_state.y[SEINT]`` $= U(\rho e)$
+
+  * ``burn_state.y[SFS+k]`` $= U(\rho X_k)$ for $k = 0 \ldots N_{\mathrm{spec}} - 1$
+
+  * if ``NAUX_NET > 0`` : ``burn_state.y[SFX+k]`` $= U(\rho \alpha_k)$ for $k = 0 \ldots N_{\mathrm{aux}} - 1$
+
+
+* fill the thermodynamic quantities in the ``burn_t`` :
+
+  * ``burn_state.rho`` $= U(\rho)$
+
+  * ``burn_state.T`` $= U(T)$ -- this is mainly going to be used as an initial guess
+
+  .. note::
+
+     We don't initialize ``burn_state.xn[]`` or ``burn_state.aux[]``
+
+  * if ``NAUX_NET > 0``: ``burn_state.aux[]`` $= U(\rho \alpha_k) / U(\rho)$
+
+* If we are doing ``castro.drive_initial_convection`` then we set
+  ``burn_state.T_fixed`` by interpolating from the initial model.
+
+* Store the advective update that will be used during the SDC integration.
+
+* Compute 
+
+* Initialize the metadata that is used for diagnostics
+
+* Call the burner:
+
+  * We check to make sure that $T$ and $\rho$ are within the limits given
+    by ``castro.react_T_min``, ``castro.react_T_max``, ``castro_react_rho_min``,
+    and ``castro.react_rho_max``.
+
+  * The burner will set ``burn_state.success = false`` if it failed.  This can happen
+    for a number of reasons and is integrator-dependent.
+
+    .. note::
+
+       Castro will not abort by default here if the burn failed.
+       Instead we leave it to the :ref:`ch:retry` mechanism to attempt
+       the step again with a smaller timestep.
+
+* Store the burning sources for plotting
+
+  .. index:: Reactions_Type
+
+  We use the ``Reactions_Type`` ``StateData`` to hold the reactive
+  sources that are output to the plotfile and the ``burn_weights``
+  ``MultiFab`` to hold the number of righthand side evaluations for
+  diagnostics.
+
+  We fill these as:
+
+  .. index:: castro.store_omega_dot
+
+  * energy generation rate:
+
+    $\mathtt{reactions}(\rho e) = \dfrac{U(\rho) \, \cdot\, \mathtt{burn\_state.e}\, -\, U(\rho e)}{\Delta t}$
+
+  * species and auxiliary creation rates (only if ``castro.store_omegadot = 1``):
+
+    * $\mathtt{reactions}(\rho X_k) = U(\rho) \dfrac{\mathtt{burn\_state.xn[k]}\, -\, U(\rho X_k) / U(\rho)}{\Delta t}$
+
+    * $\mathtt{reactions}(\rho \alpha_k) = U(\rho) \dfrac{\mathtt{burn\_state.aux[k]}\, -\, U(\rho \alpha_k) / U(\rho)}{\Delta t}$
+
+  * NSE flag (only if ``NSE`` is defined).  This simply stores the value of ``burn_state.nse``.
+
+* Update the conserved state:
+
+  .. note::
+
+     $\rho$ and $\rho \ub$ are unchanged by reactions so those variables are not
+     updated here.  They are already the "new" state.
+
+  * $U^\mathrm{new}(\rho e) = U^\mathrm{new}(\rho) \cdot \mathtt{burn\_state.e}$
+
+  * $U^\mathrm{new}(\rho E) = U^\mathrm{old}(\rho E) + (U^\mathrm{new}(\rho e) - U^\mathrm{old}(\rho e))$
+
+  * $U^\mathrm{new}(\rho X_k) = U^\mathrm{new}(\rho) \cdot \mathtt{burn\_state.xn[k]}$
+
+  * if ``NAUX_NET > 0``: $U^\mathrm{new}(\rho \alpha_k) = U^\mathrm{new}(\rho) \cdot \mathtt{burn\_state.aux[k]}$
+  
+  * if ``NSE_NET`` :
+
+    * $U(\mu_p) = \mathtt{burn\_state.mu\_p}$
+
+    * $U(\mu_n) = \mathtt{burn\_state.mu\_n}$
+
+
diff --git a/Docs/source/refs.bib b/Docs/source/refs.bib
index b987b8681a..612f332637 100644
--- a/Docs/source/refs.bib
+++ b/Docs/source/refs.bib
@@ -580,8 +580,9 @@ @book{sedov:1959
 @misc{timmes_sedov_code,
     author = "{Kamm}, J.~R. and {Timmes}, F.~X.",
      title = "{On Efficient Generation of Numerically Robust Sedov Solutions}",
-       url = {http://cococubed.asu.edu/papers/la-ur-07-2849.pdf},
-      note = {Retrieved from http://cococubed.asu.edu/research\_pages/sedov.shtml}
+
+       url = {http://cococubed.com/papers/la-ur-07-2849.pdf},
+      note = {Retrieved from http://cococubed.com/research\_pages/sedov.shtml}
 }
 
 @ARTICLE{omang:2006,
@@ -1104,3 +1105,18 @@ @ARTICLE{timmes_he_flames
        adsurl = {https://ui.adsabs.harvard.edu/abs/2000ApJ...528..913T},
       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 }
+
+@article{castro_simple_sdc,
+doi = {10.3847/1538-4357/ac8478},
+url = {https://dx.doi.org/10.3847/1538-4357/ac8478},
+year = {2022},
+month = {aug},
+publisher = {The American Astronomical Society},
+volume = {936},
+number = {1},
+pages = {6},
+author = {M. Zingale and M. P. Katz and A. Nonaka and M. Rasmussen},
+title = {An Improved Method for Coupling Hydrodynamics with Astrophysical Reaction Networks},
+journal = {The Astrophysical Journal},
+abstract = {Reacting astrophysical flows can be challenging to model, because of the difficulty in accurately coupling hydrodynamics and reactions. This can be particularly acute during explosive burning or at high temperatures where nuclear statistical equilibrium is established. We develop a new approach, based on the ideas of spectral deferred corrections (SDC) coupling of explicit hydrodynamics and stiff reaction sources as an alternative to operator splitting, that is simpler than the more comprehensive SDC approach we demonstrated previously. We apply the new method to a double-detonation problem with a moderately sized astrophysical nuclear reaction network and explore the time step size and reaction network tolerances, to show that the simplified-SDC approach provides improved coupling with decreased computational expense compared to traditional Strang operator splitting. This is all done in the framework of the Castro hydrodynamics code, and all algorithm implementations are freely available.}
+}
\ No newline at end of file
diff --git a/Docs/source/software.rst b/Docs/source/software.rst
index 34d71d51a5..649820bc6d 100644
--- a/Docs/source/software.rst
+++ b/Docs/source/software.rst
@@ -218,6 +218,8 @@ interact with in the C++ portions of the code.
 ``StateData``
 -------------
 
+.. index:: StateData
+
 ``StateData`` is a class that essentially holds a pair of
 ``MultiFab`` s: one at the old time and one at the new
 time. AMReX knows how to interpolate in time between these states to
diff --git a/Docs/source/timestepping.rst b/Docs/source/timestepping.rst
index cc50703154..33bbf5b768 100644
--- a/Docs/source/timestepping.rst
+++ b/Docs/source/timestepping.rst
@@ -205,7 +205,7 @@ which will subcycle twice at every level (except level 0).
 Retry Mechanism
 ---------------
 
-.. index:: castro.use_retry, castro.abundance_failure_tolerance, castro.retry_small_density_cutoff, castro.small_dens
+.. index:: castro.use_retry, castro.abundance_failure_tolerance, castro.abundance_failure_rho_cutoff, castro.retry_small_density_cutoff, castro.small_dens
 
 Castro's Strang CTU solver has a retry mechanism that can discard a
 time step on a level and restart with a smaller timestep, subcycling
@@ -241,5 +241,5 @@ A retry can be triggered by a number of conditions:
 
     By construction, the integration routines in Microphysics will not
     abort if the integration fails, but instead return control to the
-    calling function and set ``burn_t burn_state.success=false`.  This
+    calling function and set ``burn_t burn_state.success=false``.  This
     allows Castro to handle the failure.
diff --git a/Exec/Make.Castro b/Exec/Make.Castro
index 3b0b5cb2ec..4a645154ef 100644
--- a/Exec/Make.Castro
+++ b/Exec/Make.Castro
@@ -186,18 +186,6 @@ endif
 ifeq ($(USE_REACT), TRUE)
   Bdirs += Source/reactions
   DEFINES += -DREACTIONS
-
-  ifeq ($(USE_TRUE_SDC), TRUE)
-    # we need the compositional derivatives for SDC
-    DEFINES += -DEXTRA_THERMO
-  endif
-
-  ifeq ($(USE_SIMPLIFIED_SDC), TRUE)
-    # we need the compositional derivatives for SDC
-    DEFINES += -DEXTRA_THERMO
-    # we only implement this for C++ reactions
-  endif
-
 endif
 
 ifeq ($(USE_REACT_SPARSE_JACOBIAN), TRUE)
diff --git a/Exec/gravity_tests/uniform_cube_sphere/GNUmakefile b/Exec/gravity_tests/uniform_cube/GNUmakefile
similarity index 91%
rename from Exec/gravity_tests/uniform_cube_sphere/GNUmakefile
rename to Exec/gravity_tests/uniform_cube/GNUmakefile
index 48b2dcb35f..e5c37c4da5 100644
--- a/Exec/gravity_tests/uniform_cube_sphere/GNUmakefile
+++ b/Exec/gravity_tests/uniform_cube/GNUmakefile
@@ -6,7 +6,7 @@ CASTRO_HOME := ../../..
 # Location of this directory. Useful if
 # you're trying to compile this from another location.
 
-TEST_DIR = $(CASTRO_HOME)/Exec/gravity_tests/uniform_cube_sphere
+TEST_DIR = $(CASTRO_HOME)/Exec/gravity_tests/uniform_cube
 
 PRECISION   ?= DOUBLE
 PROFILE     ?= FALSE
diff --git a/Exec/gravity_tests/uniform_cube_sphere/Make.package b/Exec/gravity_tests/uniform_cube/Make.package
similarity index 100%
rename from Exec/gravity_tests/uniform_cube_sphere/Make.package
rename to Exec/gravity_tests/uniform_cube/Make.package
diff --git a/Exec/gravity_tests/uniform_cube/Prob.cpp b/Exec/gravity_tests/uniform_cube/Prob.cpp
new file mode 100644
index 0000000000..cb7bce3401
--- /dev/null
+++ b/Exec/gravity_tests/uniform_cube/Prob.cpp
@@ -0,0 +1,160 @@
+#include <Castro.H>
+#include <Castro_F.H>
+
+#include <Gravity.H>
+
+#include <AMReX_ParmParse.H>
+
+#include <AMReX_buildInfo.H>
+
+#include <prob_parameters.H>
+
+#include <fundamental_constants.H>
+
+using namespace amrex;
+
+void Castro::problem_post_init()
+{
+    BL_ASSERT(level == 0);
+
+    gravity->multilevel_solve_for_new_phi(0, parent->finestLevel());
+
+    const int norm_power = 2;
+
+    ReduceOps<ReduceOpSum, ReduceOpSum> reduce_op;
+    ReduceData<Real, Real> reduce_data(reduce_op);
+    using ReduceTuple = typename decltype(reduce_data)::Type;
+
+    for (int lev = 0; lev <= parent->finestLevel(); lev++)
+    {
+        const auto dx = getLevel(lev).geom.CellSizeArray();
+        const auto problo = getLevel(lev).geom.ProbLoArray();
+
+        const Real time = getLevel(lev).state[State_Type].curTime();
+
+        auto phiGrav = getLevel(lev).derive("phiGrav", time, 0);
+
+        if (lev < parent->finestLevel())
+        {
+            const MultiFab& mask = getLevel(lev+1).build_fine_mask();
+            MultiFab::Multiply(*phiGrav, mask, 0, 0, 1, 0);
+        }
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+        for (MFIter mfi(*phiGrav, TilingIfNotGPU()); mfi.isValid(); ++mfi)
+        {
+            const Box& box = mfi.tilebox();
+
+            auto phi = (*phiGrav)[mfi].array();
+            auto vol = getLevel(lev).Volume()[mfi].array();
+
+            // Compute the norm of the difference between the calculated potential
+            // and the analytical solution.
+
+            reduce_op.eval(box, reduce_data,
+            [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) -> ReduceTuple
+            {
+                Real radius = 0.5_rt * problem::diameter;
+                Real mass = (4.0_rt / 3.0_rt) * M_PI * radius * radius * radius * problem::density;
+
+                Real xx = problo[0] + dx[0] * (static_cast<Real>(i) + 0.5_rt) - problem::center[0];
+
+#if AMREX_SPACEDIM >= 2
+                Real yy = problo[1] + dx[1] * (static_cast<Real>(j) + 0.5_rt) - problem::center[1];
+#else
+                Real yy = 0.0_rt;
+#endif
+
+#if AMREX_SPACEDIM == 3
+                Real zz = problo[2] + dx[2] * (static_cast<Real>(k) + 0.5_rt) - problem::center[2];
+#else
+                Real zz = 0.0_rt;
+#endif
+
+                Real rr = std::sqrt(xx * xx + yy * yy + zz * zz);
+
+                Real phiExact = 0.0_rt;
+
+                Real x[2];
+                Real y[2];
+                Real z[2];
+
+                x[0] = radius + xx;
+                x[1] = radius - xx;
+                y[0] = radius + yy;
+                y[1] = radius - yy;
+                z[0] = radius + zz;
+                z[1] = radius - zz;
+
+                for (int ii = 0; ii <= 1; ++ii) {
+                    for (int jj = 0; jj <= 1; ++jj) {
+                        for (int kk = 0; kk <= 1; ++kk) {
+
+                            Real r = std::sqrt(x[ii] * x[ii] + y[jj] * y[jj] + z[kk] * z[kk]);
+
+                            // This is Equation 20 in Katz et al. (2016). There is a special case
+                            // where, e.g., x[ii] = y[jj] = 0, in which case z[kk] = r and the
+                            // atanh will evaluate to infinity, making the product ill-defined.
+                            // Handle this case by only doing the atanh evaluation away from those
+                            // points, since the product should be zero anyway. We also want to
+                            // avoid a divide by zero, so we'll skip all the cases where r = 0.
+
+                            if (r / radius > 1.e-6_rt) {
+
+                                if (std::abs(x[ii]) / radius > 1.e-6_rt && std::abs(y[jj]) / radius > 1.e-6_rt) {
+                                    phiExact -= C::Gconst * problem::density * (x[ii] * y[jj] * std::atanh(z[kk] / r));
+                                }
+
+                                if (std::abs(y[jj]) / radius > 1.e-6_rt && std::abs(z[kk]) / radius > 1.e-6_rt) {
+                                    phiExact -= C::Gconst * problem::density * (y[jj] * z[kk] * std::atanh(x[ii] / r));
+                                }
+
+                                if (std::abs(z[kk]) / radius > 1.e-6_rt && std::abs(x[ii]) / radius > 1.e-6_rt) {
+                                    phiExact -= C::Gconst * problem::density * (z[kk] * x[ii] * std::atanh(y[jj] / r));
+                                }
+
+                                // Also, avoid a divide-by-zero for the atan terms.
+
+                                if (std::abs(x[ii]) / radius > 1.e-6_rt) {
+                                    phiExact += C::Gconst * problem::density * (x[ii] * x[ii] / 2.0_rt * std::atan(y[jj] * z[kk] / (x[ii] * r)));
+                                }
+
+                                if (std::abs(y[jj]) / radius > 1.e-6_rt) {
+                                    phiExact += C::Gconst * problem::density * (y[jj] * y[jj] / 2.0_rt * std::atan(z[kk] * x[ii] / (y[jj] * r)));
+                                }
+
+                                if (std::abs(z[kk]) / radius > 1.e-6_rt) {
+                                    phiExact += C::Gconst * problem::density * (z[kk] * z[kk] / 2.0_rt * std::atan(x[ii] * y[jj] / (z[kk] * r)));
+                                }
+
+                            }
+                        }
+                    }
+                }
+
+                Real norm_diff = vol(i,j,k) * std::pow(phi(i,j,k) - phiExact, norm_power);
+                Real norm_exact = vol(i,j,k) * std::pow(phiExact, norm_power);
+
+                return {norm_diff, norm_exact};
+            });
+        }
+    }
+
+    ReduceTuple hv = reduce_data.value();
+    Real norm_diff = amrex::get<0>(hv);
+    Real norm_exact = amrex::get<1>(hv);
+
+    ParallelDescriptor::ReduceRealSum(norm_diff);
+    ParallelDescriptor::ReduceRealSum(norm_exact);
+
+    norm_diff = std::pow(norm_diff, 1.0 / norm_power);
+    norm_exact = std::pow(norm_exact, 1.0 / norm_power);
+
+    const Real error = norm_diff / norm_exact;
+
+    amrex::Print() << std::endl;
+    amrex::Print() << "Error = " << error << std::endl;
+    amrex::Print() << std::endl;
+}
diff --git a/Exec/gravity_tests/uniform_cube_sphere/Problem.H b/Exec/gravity_tests/uniform_cube/Problem.H
similarity index 100%
rename from Exec/gravity_tests/uniform_cube_sphere/Problem.H
rename to Exec/gravity_tests/uniform_cube/Problem.H
diff --git a/Exec/gravity_tests/uniform_cube/README b/Exec/gravity_tests/uniform_cube/README
new file mode 100644
index 0000000000..4537997ffa
--- /dev/null
+++ b/Exec/gravity_tests/uniform_cube/README
@@ -0,0 +1,3 @@
+This is a simple test of Poisson gravity. It loads a cube of uniform density
+onto the grid. The goal is to determine whether the calculated potential
+converges to the analytical potential as resolution increases.
diff --git a/Exec/gravity_tests/uniform_cube_sphere/_prob_params b/Exec/gravity_tests/uniform_cube/_prob_params
similarity index 74%
rename from Exec/gravity_tests/uniform_cube_sphere/_prob_params
rename to Exec/gravity_tests/uniform_cube/_prob_params
index 95f7f71ede..6e0c70d602 100644
--- a/Exec/gravity_tests/uniform_cube_sphere/_prob_params
+++ b/Exec/gravity_tests/uniform_cube/_prob_params
@@ -3,5 +3,3 @@ ambient_dens       real     1.e-8_rt     y
 density            real     1.0_rt       y
 
 diameter           real     1.0_rt       y
-
-problem            integer  1            y
diff --git a/Exec/gravity_tests/uniform_cube/ci-benchmarks/cube_convergence.out b/Exec/gravity_tests/uniform_cube/ci-benchmarks/cube_convergence.out
new file mode 100644
index 0000000000..7c839879c7
--- /dev/null
+++ b/Exec/gravity_tests/uniform_cube/ci-benchmarks/cube_convergence.out
@@ -0,0 +1,4 @@
+ncell = 16 error = 0.004276641412
+ncell = 32 error = 0.001071555867
+ncell = 64 error = 0.0002676508753
+Average convergence rate = 1.99932628187254717105
diff --git a/Exec/gravity_tests/uniform_cube/convergence.sh b/Exec/gravity_tests/uniform_cube/convergence.sh
new file mode 100755
index 0000000000..c3ef40cb14
--- /dev/null
+++ b/Exec/gravity_tests/uniform_cube/convergence.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+EXEC=./Castro3d.gnu.ex
+
+${EXEC} inputs amr.n_cell=16 16 16 amr.plot_file=cube_plt_16_ &> cube_16.out
+error_16=$(grep "Error" cube_16.out | awk '{print $3}')
+echo "ncell = 16 error =" $error_16
+
+${EXEC} inputs amr.n_cell=32 32 32 amr.plot_file=cube_plt_32_ &> cube_32.out
+error_32=$(grep "Error" cube_32.out | awk '{print $3}')
+echo "ncell = 32 error =" $error_32
+
+${EXEC} inputs amr.n_cell=64 64 64 amr.plot_file=cube_plt_64_ &> cube_64.out
+error_64=$(grep "Error" cube_64.out | awk '{print $3}')
+echo "ncell = 64 error =" $error_64
+
+echo "Average convergence rate =" $(echo "0.5 * (sqrt($error_16 / $error_32) + sqrt($error_32 / $error_64))" | bc -l)
diff --git a/Exec/gravity_tests/uniform_cube/inputs b/Exec/gravity_tests/uniform_cube/inputs
new file mode 100644
index 0000000000..4717fb4cc1
--- /dev/null
+++ b/Exec/gravity_tests/uniform_cube/inputs
@@ -0,0 +1,63 @@
+# ------------------  INPUTS TO MAIN PROGRAM  -------------------
+max_step = 0
+
+# PROBLEM SIZE & GEOMETRY
+geometry.coord_sys   =  0
+geometry.is_periodic =  0    0    0
+geometry.prob_lo     = -1.6 -1.6 -1.6
+geometry.prob_hi     =  1.6  1.6  1.6
+amr.n_cell           =  16   16   16
+
+amr.max_level        = 0
+amr.ref_ratio        = 2 2 2 2 2 2 2 2 2 2 2
+# we are not doing hydro, so there is no reflux and we don't need an error buffer
+amr.n_error_buf      = 0 0 0 0 0 0 0 0 0 0 0
+amr.blocking_factor  = 8
+amr.max_grid_size    = 8
+
+amr.refinement_indicators = denerr
+
+amr.refine.denerr.value_greater = 1.0e0
+amr.refine.denerr.field_name = density
+
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+
+castro.lo_bc       =  2   2   2
+castro.hi_bc       =  2   2   2
+
+# WHICH PHYSICS
+castro.do_hydro = 0
+castro.do_grav  = 1
+
+# GRAVITY
+gravity.gravity_type = PoissonGrav # Full self-gravity with the Poisson equation
+gravity.max_multipole_order = 0    # Multipole expansion includes terms up to r**(-max_multipole_order)
+gravity.rel_tol = 1.e-12           # Relative tolerance for multigrid solver
+gravity.direct_sum_bcs = 1         # Calculate boundary conditions exactly
+
+# DIAGNOSTICS & VERBOSITY
+castro.sum_interval   = 1       # timesteps between computing integrals
+amr.data_log          = grid_diag.out
+
+# CHECKPOINT FILES
+amr.checkpoint_files_output = 1
+amr.check_file        = chk      # root name of checkpoint file
+amr.check_int         = 1        # timesteps between checkpoints
+
+# PLOTFILES
+amr.plot_files_output = 1
+amr.plot_file         = plt      # root name of plotfile
+amr.plot_per          = 1        # timesteps between plotfiles
+amr.derive_plot_vars  = ALL
+
+# PROBLEM PARAMETERS
+problem.density      = 1.0e3
+problem.diameter     = 2.0e0
+problem.ambient_dens = 1.0e-8
+
+# EOS
+eos.eos_assume_neutral = 1
diff --git a/Exec/gravity_tests/uniform_cube_sphere/problem_initialize.H b/Exec/gravity_tests/uniform_cube/problem_initialize.H
similarity index 69%
rename from Exec/gravity_tests/uniform_cube_sphere/problem_initialize.H
rename to Exec/gravity_tests/uniform_cube/problem_initialize.H
index f6e0e77004..cabea0283d 100644
--- a/Exec/gravity_tests/uniform_cube_sphere/problem_initialize.H
+++ b/Exec/gravity_tests/uniform_cube/problem_initialize.H
@@ -2,10 +2,8 @@
 #define problem_initialize_H
 
 #include <prob_parameters.H>
-#include <eos.H>
 
 AMREX_INLINE
-void problem_initialize ()
-{
-}
+void problem_initialize () {}
+
 #endif
diff --git a/Exec/gravity_tests/uniform_cube/problem_initialize_state_data.H b/Exec/gravity_tests/uniform_cube/problem_initialize_state_data.H
new file mode 100644
index 0000000000..c98d3e6456
--- /dev/null
+++ b/Exec/gravity_tests/uniform_cube/problem_initialize_state_data.H
@@ -0,0 +1,39 @@
+#ifndef problem_initialize_state_data_H
+#define problem_initialize_state_data_H
+
+#include <prob_parameters.H>
+
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+void problem_initialize_state_data (int i, int j, int k, Array4<Real> const& state, const GeometryData& geomdata)
+{
+    const Real* dx = geomdata.CellSize();
+    const Real* problo = geomdata.ProbLo();
+
+    Real xx = problo[0] + dx[0] * (static_cast<Real>(i) + 0.5_rt) - problem::center[0];
+    Real yy = problo[1] + dx[1] * (static_cast<Real>(j) + 0.5_rt) - problem::center[1];
+    Real zz = problo[2] + dx[2] * (static_cast<Real>(k) + 0.5_rt) - problem::center[2];
+
+    // Establish the cube
+
+    if (std::abs(xx) < problem::diameter/2 &&
+        std::abs(yy) < problem::diameter/2 &&
+        std::abs(zz) < problem::diameter/2) {
+        state(i,j,k,URHO) = problem::density;
+    }
+    else {
+        state(i,j,k,URHO) = problem::ambient_dens;
+    }
+
+    // Establish the thermodynamic quantities. They don't have to be
+    // valid because this test will never do a hydro step.
+
+    state(i,j,k,UTEMP) = 1.0_rt;
+    state(i,j,k,UEINT) = 1.0_rt;
+    state(i,j,k,UEDEN) = 1.0_rt;
+
+    for (int n = 0; n < NumSpec; n++) {
+        state(i,j,k,UFS+n) = state(i,j,k,URHO) / static_cast<Real>(NumSpec);
+    }
+}
+
+#endif
diff --git a/Exec/gravity_tests/uniform_cube_sphere/Prob.cpp b/Exec/gravity_tests/uniform_cube_sphere/Prob.cpp
deleted file mode 100644
index b76bfb2492..0000000000
--- a/Exec/gravity_tests/uniform_cube_sphere/Prob.cpp
+++ /dev/null
@@ -1,275 +0,0 @@
-#include <Castro.H>
-#include <Castro_F.H>
-
-#include <Gravity.H>
-
-#include <AMReX_ParmParse.H>
-
-#include <AMReX_buildInfo.H>
-
-#include <prob_parameters.H>
-
-#include <fundamental_constants.H>
-
-using namespace amrex;
-
-void Castro::problem_post_init()
-{
-    BL_ASSERT(level == 0);
-
-    // If we're doing problem 2, the normalized sphere,
-    // add up the mass on the domain and then update
-    // the density in the sphere so that it has the 'correct'
-    // amount of total mass.
-
-    if (problem::problem == 2) {
-
-        Real actual_mass = 0.0;
-
-        bool local_flag = true;
-        Real time       = state[State_Type].curTime();
-
-        for (int lev = 0; lev <= parent->finestLevel(); ++lev)
-            actual_mass += getLevel(lev).volWgtSum("density", time, local_flag);
-
-        ParallelDescriptor::ReduceRealSum(actual_mass);
-
-        // The correct amount of mass is the mass of a sphere
-        // with the given diameter and density.
-
-        Real target_mass = problem::density * (1.0e0 / 6.0e0) * M_PI * std::pow(problem::diameter, 3);
-
-        Real update_factor = target_mass / actual_mass;
-
-        // Now update the density given this factor.
-
-        amrex::Print() << "\n";
-        amrex::Print() << "  Updating density by the factor " << update_factor << " to ensure total mass matches target mass.\n";
-        amrex::Print() << "\n";
-
-        problem::density = problem::density * update_factor;
-
-        for (int lev = 0; lev <= parent->finestLevel(); lev++)
-        {
-            MultiFab& state = getLevel(lev).get_new_data(State_Type);
-
-            const auto dx = getLevel(lev).geom.CellSizeArray();
-            const auto problo = getLevel(lev).geom.ProbLoArray();
-
-#ifdef _OPENMP
-#pragma omp parallel
-#endif
-            for (MFIter mfi(state, TilingIfNotGPU()); mfi.isValid(); ++mfi) {
-
-                const Box& box = mfi.tilebox();
-
-                auto u = state[mfi].array();
-
-                // Update the density field. This ensures that the sum of the
-                // mass on the domain is what we intend it to be.
-
-                amrex::ParallelFor(box,
-                [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k)
-                {
-                    if (problem::problem != 2) return;
-
-                    Real xx = problo[0] + dx[0] * (static_cast<Real>(i) + 0.5_rt) - problem::center[0];
-
-#if AMREX_SPACEDIM >= 2
-                    Real yy = problo[1] + dx[1] * (static_cast<Real>(j) + 0.5_rt) - problem::center[1];
-#else
-                    Real yy = 0.0_rt;
-#endif
-
-#if AMREX_SPACEDIM == 3
-                    Real zz = problo[2] + dx[2] * (static_cast<Real>(k) + 0.5_rt) - problem::center[2];
-#else
-                    Real zz = 0.0_rt;
-#endif
-
-                    if (std::sqrt(xx * xx + yy * yy + zz * zz) < problem::diameter / 2) {
-                        u(i,j,k,URHO) *= update_factor;
-                        for (int n = 0; n < NumSpec; ++n) {
-                            u(i,j,k,UFS+n) *= update_factor;
-                        }
-                    }
-                });
-            }
-
-        }
-
-        // Do a final check, for sanity purposes.
-
-        actual_mass = 0.0;
-
-        for (int lev = 0; lev <= parent->finestLevel(); lev++)
-            actual_mass += getLevel(lev).volWgtSum("density", time, local_flag);
-
-        ParallelDescriptor::ReduceRealSum(actual_mass);
-
-        if (std::abs( (actual_mass - target_mass) / target_mass ) > 1.0e-6) {
-            amrex::Print() << "\n";
-            amrex::Print() << "Actual mass: " << actual_mass << "\n";
-            amrex::Print() << "Target mass: " << target_mass << "\n";
-            amrex::Print() << "\n";
-            amrex::Abort("Sphere does not have the right amount of mass.");
-        }
-
-    }
-
-    gravity->multilevel_solve_for_new_phi(0, parent->finestLevel());
-
-    const int norm_power = 2;
-
-    ReduceOps<ReduceOpSum, ReduceOpSum> reduce_op;
-    ReduceData<Real, Real> reduce_data(reduce_op);
-    using ReduceTuple = typename decltype(reduce_data)::Type;
-
-    for (int lev = 0; lev <= parent->finestLevel(); lev++)
-    {
-        const auto dx = getLevel(lev).geom.CellSizeArray();
-        const auto problo = getLevel(lev).geom.ProbLoArray();
-
-        const Real time = getLevel(lev).state[State_Type].curTime();
-
-        auto phiGrav = getLevel(lev).derive("phiGrav", time, 0);
-
-        if (lev < parent->finestLevel())
-        {
-            const MultiFab& mask = getLevel(lev+1).build_fine_mask();
-            MultiFab::Multiply(*phiGrav, mask, 0, 0, 1, 0);
-        }
-
-#ifdef _OPENMP
-#pragma omp parallel
-#endif
-        for (MFIter mfi(*phiGrav, TilingIfNotGPU()); mfi.isValid(); ++mfi)
-        {
-            const Box& box = mfi.tilebox();
-
-            auto phi = (*phiGrav)[mfi].array();
-            auto vol = getLevel(lev).Volume()[mfi].array();
-
-            // Compute the norm of the difference between the calculated potential
-            // and the analytical solution.
-
-            reduce_op.eval(box, reduce_data,
-            [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) -> ReduceTuple
-            {
-                Real radius = 0.5_rt * problem::diameter;
-                Real mass = (4.0_rt / 3.0_rt) * M_PI * radius * radius * radius * problem::density;
-
-                Real xx = problo[0] + dx[0] * (static_cast<Real>(i) + 0.5_rt) - problem::center[0];
-
-#if AMREX_SPACEDIM >= 2
-                Real yy = problo[1] + dx[1] * (static_cast<Real>(j) + 0.5_rt) - problem::center[1];
-#else
-                Real yy = 0.0_rt;
-#endif
-
-#if AMREX_SPACEDIM == 3
-                Real zz = problo[2] + dx[2] * (static_cast<Real>(k) + 0.5_rt) - problem::center[2];
-#else
-                Real zz = 0.0_rt;
-#endif
-
-                Real rr = std::sqrt(xx * xx + yy * yy + zz * zz);
-
-                Real phiExact = 0.0_rt;
-
-                if (problem::problem == 1 || problem::problem == 2) {
-
-                    if (rr <= radius) {
-                        phiExact = -C::Gconst * mass * (3 * radius * radius - rr * rr) / (2 * radius * radius * radius);
-                    } else {
-                        phiExact = -C::Gconst * mass / rr;
-                    }
-
-                }
-                else if (problem::problem == 3) {
-
-                    Real x[2];
-                    Real y[2];
-                    Real z[2];
-
-                    x[0] = radius + xx;
-                    x[1] = radius - xx;
-                    y[0] = radius + yy;
-                    y[1] = radius - yy;
-                    z[0] = radius + zz;
-                    z[1] = radius - zz;
-
-                    for (int ii = 0; ii <= 1; ++ii) {
-                        for (int jj = 0; jj <= 1; ++jj) {
-                            for (int kk = 0; kk <= 1; ++kk) {
-
-                                Real r = std::sqrt(x[ii] * x[ii] + y[jj] * y[jj] + z[kk] * z[kk]);
-
-                                // This is Equation 20 in Katz et al. (2016). There is a special case
-                                // where, e.g., x[ii] = y[jj] = 0, in which case z[kk] = r and the
-                                // atanh will evaluate to infinity, making the product ill-defined.
-                                // Handle this case by only doing the atanh evaluation away from those
-                                // points, since the product should be zero anyway. We also want to
-                                // avoid a divide by zero, so we'll skip all the cases where r = 0.
-
-                                if (r / radius > 1.e-6_rt) {
-
-                                    if (std::abs(x[ii]) / radius > 1.e-6_rt && std::abs(y[jj]) / radius > 1.e-6_rt) {
-                                        phiExact -= C::Gconst * problem::density * (x[ii] * y[jj] * std::atanh(z[kk] / r));
-                                    }
-
-                                    if (std::abs(y[jj]) / radius > 1.e-6_rt && std::abs(z[kk]) / radius > 1.e-6_rt) {
-                                        phiExact -= C::Gconst * problem::density * (y[jj] * z[kk] * std::atanh(x[ii] / r));
-                                    }
-
-                                    if (std::abs(z[kk]) / radius > 1.e-6_rt && std::abs(x[ii]) / radius > 1.e-6_rt) {
-                                        phiExact -= C::Gconst * problem::density * (z[kk] * x[ii] * std::atanh(y[jj] / r));
-                                    }
-
-                                    // Also, avoid a divide-by-zero for the atan terms.
-
-                                    if (std::abs(x[ii]) / radius > 1.e-6_rt) {
-                                        phiExact += C::Gconst * problem::density * (x[ii] * x[ii] / 2.0_rt * std::atan(y[jj] * z[kk] / (x[ii] * r)));
-                                    }
-
-                                    if (std::abs(y[jj]) / radius > 1.e-6_rt) {
-                                        phiExact += C::Gconst * problem::density * (y[jj] * y[jj] / 2.0_rt * std::atan(z[kk] * x[ii] / (y[jj] * r)));
-                                    }
-
-                                    if (std::abs(z[kk]) / radius > 1.e-6_rt) {
-                                        phiExact += C::Gconst * problem::density * (z[kk] * z[kk] / 2.0_rt * std::atan(x[ii] * y[jj] / (z[kk] * r)));
-                                    }
-
-                                }
-                            }
-                        }
-                    }
-
-                }
-
-                Real norm_diff = vol(i,j,k) * std::pow(phi(i,j,k) - phiExact, norm_power);
-                Real norm_exact = vol(i,j,k) * std::pow(phiExact, norm_power);
-
-                return {norm_diff, norm_exact};
-            });
-        }
-
-    }
-
-    ReduceTuple hv = reduce_data.value();
-    Real norm_diff = amrex::get<0>(hv);
-    Real norm_exact = amrex::get<1>(hv);
-
-    ParallelDescriptor::ReduceRealSum(norm_diff);
-    ParallelDescriptor::ReduceRealSum(norm_exact);
-
-    norm_diff = std::pow(norm_diff, 1.0 / norm_power);
-    norm_exact = std::pow(norm_exact, 1.0 / norm_power);
-
-    const Real error = norm_diff / norm_exact;
-
-    amrex::Print() << std::endl;
-    amrex::Print() << "Error = " << error << std::endl;
-    amrex::Print() << std::endl;
-
-}
diff --git a/Exec/gravity_tests/uniform_cube_sphere/README b/Exec/gravity_tests/uniform_cube_sphere/README
deleted file mode 100644
index 6a99f7e5ae..0000000000
--- a/Exec/gravity_tests/uniform_cube_sphere/README
+++ /dev/null
@@ -1,9 +0,0 @@
-This is a simple test of Poisson gravity. It loads a sphere (problem = 1, 2) or
-cube (problem = 3) of uniform density onto the grid. The goal is to determine
-whether the calculated potential converges to the analytical potential
-as resolution increases. Problems 1 and 2 are the same setup except for one
-detail: in Problem 2, instead of using the density requested, we modify
-the density on the grid so that the total amount of mass in the domain
-is equal to the mass of a sphere of the requested diameter. Problem 1
-uses the density requested by the user, and so it will not get the right
-mass: the object will not be exactly spherical due to Cartesian grid effects.
diff --git a/Exec/gravity_tests/uniform_sphere/GNUmakefile b/Exec/gravity_tests/uniform_sphere/GNUmakefile
new file mode 100644
index 0000000000..60fb5ac5bf
--- /dev/null
+++ b/Exec/gravity_tests/uniform_sphere/GNUmakefile
@@ -0,0 +1,35 @@
+# Define the location of the CASTRO top directory,
+# if not already defined by an environment variable.
+
+CASTRO_HOME := ../../..
+
+# Location of this directory. Useful if
+# you're trying to compile this from another location.
+
+TEST_DIR = $(CASTRO_HOME)/Exec/gravity_tests/uniform_sphere
+
+PRECISION   ?= DOUBLE
+PROFILE     ?= FALSE
+
+DEBUG       ?= FALSE
+
+DIM         ?= 3
+
+COMP	    ?= gcc
+
+USE_MPI     ?= FALSE
+USE_OMP     ?= FALSE
+
+USE_GRAV    ?= TRUE
+
+# This sets the EOS directory in $(MICROPHYSICS_HOME)/EOS
+EOS_DIR     := gamma_law
+
+# This sets the network directory in $(MICROPHSICS_HOME)/Networks
+NETWORK_DIR ?= general_null
+NETWORK_INPUTS = gammalaw.net
+
+Bpack   += $(TEST_DIR)/Make.package
+Blocs   += $(TEST_DIR)
+
+include $(CASTRO_HOME)/Exec/Make.Castro
diff --git a/Exec/gravity_tests/uniform_sphere/Make.package b/Exec/gravity_tests/uniform_sphere/Make.package
new file mode 100755
index 0000000000..139597f9cb
--- /dev/null
+++ b/Exec/gravity_tests/uniform_sphere/Make.package
@@ -0,0 +1,2 @@
+
+
diff --git a/Exec/gravity_tests/uniform_sphere/Prob.cpp b/Exec/gravity_tests/uniform_sphere/Prob.cpp
new file mode 100644
index 0000000000..fd6cd8af4e
--- /dev/null
+++ b/Exec/gravity_tests/uniform_sphere/Prob.cpp
@@ -0,0 +1,204 @@
+#include <Castro.H>
+#include <Castro_F.H>
+
+#include <Gravity.H>
+
+#include <AMReX_ParmParse.H>
+
+#include <AMReX_buildInfo.H>
+
+#include <prob_parameters.H>
+
+#include <fundamental_constants.H>
+
+using namespace amrex;
+
+void Castro::problem_post_init()
+{
+    BL_ASSERT(level == 0);
+
+    // Add up the mass on the domain and then update the density
+    // in the sphere so that it has the 'correct' amount of total mass.
+
+    Real actual_mass = 0.0;
+
+    bool local_flag = true;
+    Real time = state[State_Type].curTime();
+
+    for (int lev = 0; lev <= parent->finestLevel(); ++lev) {
+        actual_mass += getLevel(lev).volWgtSum("density", time, local_flag);
+    }
+
+    ParallelDescriptor::ReduceRealSum(actual_mass);
+
+    // The correct amount of mass is the mass of a sphere
+    // with the given diameter and density.
+
+    Real target_mass = problem::density * (1.0e0 / 6.0e0) * M_PI * std::pow(problem::diameter, 3);
+
+    Real update_factor = target_mass / actual_mass;
+
+    // Now update the density given this factor.
+
+    amrex::Print() << "\n";
+    amrex::Print() << "  Updating density by the factor " << update_factor << " to ensure total mass matches target mass.\n";
+    amrex::Print() << "\n";
+
+    problem::density = problem::density * update_factor;
+
+    for (int lev = 0; lev <= parent->finestLevel(); lev++)
+    {
+        MultiFab& state = getLevel(lev).get_new_data(State_Type);
+
+        const auto dx = getLevel(lev).geom.CellSizeArray();
+        const auto problo = getLevel(lev).geom.ProbLoArray();
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+        for (MFIter mfi(state, TilingIfNotGPU()); mfi.isValid(); ++mfi) {
+
+            const Box& box = mfi.tilebox();
+
+            auto u = state[mfi].array();
+
+            // Update the density field. This ensures that the sum of the
+            // mass on the domain is what we intend it to be.
+
+            amrex::ParallelFor(box,
+            [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k)
+            {
+                Real xx = problo[0] + dx[0] * (static_cast<Real>(i) + 0.5_rt) - problem::center[0];
+
+#if AMREX_SPACEDIM >= 2
+                Real yy = problo[1] + dx[1] * (static_cast<Real>(j) + 0.5_rt) - problem::center[1];
+#else
+                Real yy = 0.0_rt;
+#endif
+
+#if AMREX_SPACEDIM == 3
+                Real zz = problo[2] + dx[2] * (static_cast<Real>(k) + 0.5_rt) - problem::center[2];
+#else
+                Real zz = 0.0_rt;
+#endif
+
+                if (std::sqrt(xx * xx + yy * yy + zz * zz) < problem::diameter / 2) {
+                    u(i,j,k,URHO) *= update_factor;
+                    for (int n = 0; n < NumSpec; ++n) {
+                        u(i,j,k,UFS+n) *= update_factor;
+                    }
+                }
+            });
+        }
+
+    }
+
+    // Do a final check to ensure we got what we intended.
+
+    actual_mass = 0.0;
+
+    for (int lev = 0; lev <= parent->finestLevel(); lev++) {
+        actual_mass += getLevel(lev).volWgtSum("density", time, local_flag);
+    }
+
+    ParallelDescriptor::ReduceRealSum(actual_mass);
+
+    if (std::abs( (actual_mass - target_mass) / target_mass ) > 1.0e-6) {
+        amrex::Print() << "\n";
+        amrex::Print() << "Actual mass: " << actual_mass << "\n";
+        amrex::Print() << "Target mass: " << target_mass << "\n";
+        amrex::Print() << "\n";
+        amrex::Abort("Sphere does not have the right amount of mass.");
+    }
+
+    gravity->multilevel_solve_for_new_phi(0, parent->finestLevel());
+
+    const int norm_power = 2;
+
+    ReduceOps<ReduceOpSum, ReduceOpSum> reduce_op;
+    ReduceData<Real, Real> reduce_data(reduce_op);
+    using ReduceTuple = typename decltype(reduce_data)::Type;
+
+    for (int lev = 0; lev <= parent->finestLevel(); lev++)
+    {
+        const auto dx = getLevel(lev).geom.CellSizeArray();
+        const auto problo = getLevel(lev).geom.ProbLoArray();
+
+        const Real time = getLevel(lev).state[State_Type].curTime();
+
+        auto phiGrav = getLevel(lev).derive("phiGrav", time, 0);
+
+        if (lev < parent->finestLevel())
+        {
+            const MultiFab& mask = getLevel(lev+1).build_fine_mask();
+            MultiFab::Multiply(*phiGrav, mask, 0, 0, 1, 0);
+        }
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+        for (MFIter mfi(*phiGrav, TilingIfNotGPU()); mfi.isValid(); ++mfi)
+        {
+            const Box& box = mfi.tilebox();
+
+            auto phi = (*phiGrav)[mfi].array();
+            auto vol = getLevel(lev).Volume()[mfi].array();
+
+            // Compute the norm of the difference between the calculated potential
+            // and the analytical solution.
+
+            reduce_op.eval(box, reduce_data,
+            [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k) -> ReduceTuple
+            {
+                Real radius = 0.5_rt * problem::diameter;
+                Real mass = (4.0_rt / 3.0_rt) * M_PI * radius * radius * radius * problem::density;
+
+                Real xx = problo[0] + dx[0] * (static_cast<Real>(i) + 0.5_rt) - problem::center[0];
+
+#if AMREX_SPACEDIM >= 2
+                Real yy = problo[1] + dx[1] * (static_cast<Real>(j) + 0.5_rt) - problem::center[1];
+#else
+                Real yy = 0.0_rt;
+#endif
+
+#if AMREX_SPACEDIM == 3
+                Real zz = problo[2] + dx[2] * (static_cast<Real>(k) + 0.5_rt) - problem::center[2];
+#else
+                Real zz = 0.0_rt;
+#endif
+
+                Real rr = std::sqrt(xx * xx + yy * yy + zz * zz);
+
+                Real phiExact = 0.0_rt;
+
+                if (rr <= radius) {
+                    phiExact = -C::Gconst * mass * (3 * radius * radius - rr * rr) / (2 * radius * radius * radius);
+                }
+                else {
+                    phiExact = -C::Gconst * mass / rr;
+                }
+
+                Real norm_diff = vol(i,j,k) * std::pow(phi(i,j,k) - phiExact, norm_power);
+                Real norm_exact = vol(i,j,k) * std::pow(phiExact, norm_power);
+
+                return {norm_diff, norm_exact};
+            });
+        }
+    }
+
+    ReduceTuple hv = reduce_data.value();
+    Real norm_diff = amrex::get<0>(hv);
+    Real norm_exact = amrex::get<1>(hv);
+
+    ParallelDescriptor::ReduceRealSum(norm_diff);
+    ParallelDescriptor::ReduceRealSum(norm_exact);
+
+    norm_diff = std::pow(norm_diff, 1.0 / norm_power);
+    norm_exact = std::pow(norm_exact, 1.0 / norm_power);
+
+    const Real error = norm_diff / norm_exact;
+
+    amrex::Print() << std::endl;
+    amrex::Print() << "Error = " << error << std::endl;
+    amrex::Print() << std::endl;
+}
diff --git a/Exec/gravity_tests/uniform_sphere/Problem.H b/Exec/gravity_tests/uniform_sphere/Problem.H
new file mode 100644
index 0000000000..f4bc212035
--- /dev/null
+++ b/Exec/gravity_tests/uniform_sphere/Problem.H
@@ -0,0 +1,7 @@
+// Preprocessor directive for allowing us to do a post-initialization update.
+
+#ifndef DO_PROBLEM_POST_INIT
+#define DO_PROBLEM_POST_INIT
+#endif
+
+void problem_post_init();
diff --git a/Exec/gravity_tests/uniform_sphere/README b/Exec/gravity_tests/uniform_sphere/README
new file mode 100644
index 0000000000..566885de8e
--- /dev/null
+++ b/Exec/gravity_tests/uniform_sphere/README
@@ -0,0 +1,3 @@
+This is a simple test of Poisson gravity. It loads a sphere of uniform density
+onto the grid. The goal is to determine whether the calculated potential
+converges to the analytical potential as resolution increases.
diff --git a/Exec/gravity_tests/uniform_sphere/_prob_params b/Exec/gravity_tests/uniform_sphere/_prob_params
new file mode 100644
index 0000000000..6e0c70d602
--- /dev/null
+++ b/Exec/gravity_tests/uniform_sphere/_prob_params
@@ -0,0 +1,5 @@
+ambient_dens       real     1.e-8_rt     y
+
+density            real     1.0_rt       y
+
+diameter           real     1.0_rt       y
diff --git a/Exec/gravity_tests/uniform_sphere/ci-benchmarks/sphere_convergence.out b/Exec/gravity_tests/uniform_sphere/ci-benchmarks/sphere_convergence.out
new file mode 100644
index 0000000000..585bf5294c
--- /dev/null
+++ b/Exec/gravity_tests/uniform_sphere/ci-benchmarks/sphere_convergence.out
@@ -0,0 +1,4 @@
+ncell = 16 error = 0.05274082586
+ncell = 32 error = 0.008316536152
+ncell = 64 error = 0.001270740323
+Average convergence rate = 2.53825936367487233285
diff --git a/Exec/gravity_tests/uniform_sphere/convergence.sh b/Exec/gravity_tests/uniform_sphere/convergence.sh
new file mode 100755
index 0000000000..c3ef40cb14
--- /dev/null
+++ b/Exec/gravity_tests/uniform_sphere/convergence.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+EXEC=./Castro3d.gnu.ex
+
+${EXEC} inputs amr.n_cell=16 16 16 amr.plot_file=cube_plt_16_ &> cube_16.out
+error_16=$(grep "Error" cube_16.out | awk '{print $3}')
+echo "ncell = 16 error =" $error_16
+
+${EXEC} inputs amr.n_cell=32 32 32 amr.plot_file=cube_plt_32_ &> cube_32.out
+error_32=$(grep "Error" cube_32.out | awk '{print $3}')
+echo "ncell = 32 error =" $error_32
+
+${EXEC} inputs amr.n_cell=64 64 64 amr.plot_file=cube_plt_64_ &> cube_64.out
+error_64=$(grep "Error" cube_64.out | awk '{print $3}')
+echo "ncell = 64 error =" $error_64
+
+echo "Average convergence rate =" $(echo "0.5 * (sqrt($error_16 / $error_32) + sqrt($error_32 / $error_64))" | bc -l)
diff --git a/Exec/gravity_tests/uniform_cube_sphere/inputs b/Exec/gravity_tests/uniform_sphere/inputs
similarity index 100%
rename from Exec/gravity_tests/uniform_cube_sphere/inputs
rename to Exec/gravity_tests/uniform_sphere/inputs
diff --git a/Exec/gravity_tests/uniform_sphere/problem_initialize.H b/Exec/gravity_tests/uniform_sphere/problem_initialize.H
new file mode 100644
index 0000000000..cabea0283d
--- /dev/null
+++ b/Exec/gravity_tests/uniform_sphere/problem_initialize.H
@@ -0,0 +1,9 @@
+#ifndef problem_initialize_H
+#define problem_initialize_H
+
+#include <prob_parameters.H>
+
+AMREX_INLINE
+void problem_initialize () {}
+
+#endif
diff --git a/Exec/gravity_tests/uniform_cube_sphere/problem_initialize_state_data.H b/Exec/gravity_tests/uniform_sphere/problem_initialize_state_data.H
similarity index 57%
rename from Exec/gravity_tests/uniform_cube_sphere/problem_initialize_state_data.H
rename to Exec/gravity_tests/uniform_sphere/problem_initialize_state_data.H
index 81e475f838..9ae21249f6 100644
--- a/Exec/gravity_tests/uniform_cube_sphere/problem_initialize_state_data.H
+++ b/Exec/gravity_tests/uniform_sphere/problem_initialize_state_data.H
@@ -14,30 +14,13 @@ void problem_initialize_state_data (int i, int j, int k, Array4<Real> const& sta
     Real yy = problo[1] + dx[1] * (static_cast<Real>(j) + 0.5_rt) - problem::center[1];
     Real zz = problo[2] + dx[2] * (static_cast<Real>(k) + 0.5_rt) - problem::center[2];
 
-    // Establish the cube or sphere
+    // Establish the sphere
 
-    if (problem::problem == 1 || problem::problem == 2) {
-
-        if (std::pow(xx * xx + yy * yy + zz * zz, 0.5_rt) < problem::diameter / 2) {
-            state(i,j,k,URHO) = problem::density;
-        } else {
-            state(i,j,k,URHO) = problem::ambient_dens;
-        }
-
-    } else if (problem::problem == 3) {
-
-        if (std::abs(xx) < problem::diameter/2 &&
-            std::abs(yy) < problem::diameter/2 &&
-            std::abs(zz) < problem::diameter/2) {
-            state(i,j,k,URHO) = problem::density;
-        } else {
-            state(i,j,k,URHO) = problem::ambient_dens;
-        }
-
-#ifndef AMREX_USE_GPU
-    } else {
-        amrex::Error("Problem not defined.");
-#endif
+    if (std::pow(xx * xx + yy * yy + zz * zz, 0.5_rt) < problem::diameter / 2) {
+        state(i,j,k,URHO) = problem::density;
+    }
+    else {
+        state(i,j,k,URHO) = problem::ambient_dens;
     }
 
     // Establish the thermodynamic quantities. They don't have to be
@@ -51,4 +34,5 @@ void problem_initialize_state_data (int i, int j, int k, Array4<Real> const& sta
         state(i,j,k,UFS+n) = state(i,j,k,URHO) / static_cast<Real>(NumSpec);
     }
 }
+
 #endif
diff --git a/Exec/science/flame_wave/analysis/vis_3d/vol-xrb-abar.py b/Exec/science/flame_wave/analysis/vis_3d/vol-xrb-abar.py
index 078cf554c3..c5faa43ac3 100644
--- a/Exec/science/flame_wave/analysis/vis_3d/vol-xrb-abar.py
+++ b/Exec/science/flame_wave/analysis/vis_3d/vol-xrb-abar.py
@@ -1,16 +1,19 @@
 #!/usr/bin/env python
 
-import matplotlib
-matplotlib.use('agg')
-
 import sys
 
+import matplotlib
+import numpy as np
+
 import yt
 from yt.frontends.boxlib.api import CastroDataset
-import numpy as np
-#from yt.visualization.volume_rendering.render_source import VolumeSource
-from yt.visualization.volume_rendering.api import create_volume_source, Scene
 from yt.units import cm
+#from yt.visualization.volume_rendering.render_source import VolumeSource
+from yt.visualization.volume_rendering.api import Scene, create_volume_source
+
+matplotlib.use('agg')
+
+resolution = (1920, 1080)
 
 # this is for the wdconvect problem
 
@@ -56,7 +59,7 @@ def doit(plotfile):
     sc.get_source(0).transfer_function = tf
 
     cam = sc.add_camera(ds, lens_type="perspective")
-    cam.resolution = (1920, 1280)
+    cam.resolution = resolution
 
     # view 1
 
@@ -77,14 +80,24 @@ def doit(plotfile):
     cam.zoom(3.0)
     sc.camera = cam
 
-    sc.save_annotated("{}_abar_annotated_side.png".format(plotfile),
-                      sigma_clip=3.0,
+    sc.save(f"{plotfile}_abar_noaxes_side.png", sigma_clip=3.0)
+
+    sc.annotate_axes(alpha=0.005, thickness=6)
+
+    sc.save(f"{plotfile}_abar_side.png", sigma_clip=3.0)
+
+    sc.save_annotated(f"{plotfile}_abar_annotated_side.png",
+                      sigma_clip=3.0, label_fmt="%.2f",
                       text_annotate=[[(0.05, 0.05),
                                       f"t = {ds.current_time.d:7.5f} s",
                                       dict(horizontalalignment="left")]])
 
     # view 2
 
+    # remove the annotation source for now
+    print(list(sc.sources.keys()))
+    sc.sources.pop("source_01")
+
     dx = ds.domain_right_edge[0] - ds.domain_left_edge[0]
     cam.position = [0.5*(ds.domain_left_edge[0] + ds.domain_right_edge[0]) + 0.0001 * dx,
                     0.5*(ds.domain_left_edge[1] + ds.domain_right_edge[1]),
@@ -103,8 +116,13 @@ def doit(plotfile):
     cam.zoom(0.6)
     sc.camera = cam
 
+    sc.save(f"{plotfile}_abar_noaxes_top.png", sigma_clip=3.0)
+
+    sc.annotate_axes(alpha=0.005, thickness=6)
+
+    sc.save(f"{plotfile}_abar_top.png", sigma_clip=3.0)
     sc.save_annotated("{}_abar_annotated_top.png".format(plotfile),
-                      sigma_clip=3.0,
+                      sigma_clip=3.0, label_fmt="%.2f",
                       text_annotate=[[(0.05, 0.05),
                                       f"t = {ds.current_time.d:7.5f} s",
                                       dict(horizontalalignment="left")]])
diff --git a/Exec/science/subchandra/GNUmakefile b/Exec/science/subchandra/GNUmakefile
index ed57464fb5..bc66f50e7a 100644
--- a/Exec/science/subchandra/GNUmakefile
+++ b/Exec/science/subchandra/GNUmakefile
@@ -22,7 +22,7 @@ USE_SIMPLIFIED_SDC=TRUE
 EOS_DIR     := helmholtz
 
 # This sets the network directory in $(MICROPHYSICS_HOME)/networks
-NETWORK_DIR := subch_approx
+NETWORK_DIR := subch_simple
 
 INTEGRATOR_DIR := VODE
 
diff --git a/Exec/science/subchandra/inputs_2d.N14.coarse b/Exec/science/subchandra/inputs_2d.N14.coarse
index 829526dac3..77781e0e3f 100644
--- a/Exec/science/subchandra/inputs_2d.N14.coarse
+++ b/Exec/science/subchandra/inputs_2d.N14.coarse
@@ -62,7 +62,7 @@ castro.sponge_timescale     = 1.e-3
 castro.cfl            = 0.2     # cfl number for hyperbolic system
 castro.init_shrink    = 0.05     # scale back initial timestep by this factor
 castro.change_max     = 1.025    # factor by which dt is allowed to change each timestep
-castro.sum_interval   = 0       # timesteps between computing and printing volume averages
+castro.sum_interval   = 5       # timesteps between computing and printing volume averages
 castro.update_sources_after_reflux = 0
 castro.time_integration_method = 3
 
@@ -147,12 +147,12 @@ integrator.rtol_spec = 1.e-5
 integrator.atol_spec = 1.e-5
 integrator.rtol_enuc = 1.e-5
 integrator.atol_enuc = 1.e-5
-integrator.jacobian = 2
+integrator.jacobian = 1
 
 integrator.X_reject_buffer = 4.0
 
 # disable jacobian caching in VODE
 integrator.use_jacobian_caching = 0
 
-integrator.ode_max_steps = 500000
+integrator.ode_max_steps = 1000000
 
diff --git a/Exec/science/wdmerger/ci-benchmarks/wdmerger_collision_2D.out b/Exec/science/wdmerger/ci-benchmarks/wdmerger_collision_2D.out
new file mode 100644
index 0000000000..a933d102df
--- /dev/null
+++ b/Exec/science/wdmerger/ci-benchmarks/wdmerger_collision_2D.out
@@ -0,0 +1,29 @@
+ plotfile = plt00095
+ time = 1.25
+              variables          minimum value          maximum value
+ density                      8.7136176158e-05           13348283.786
+ xmom                        -4.4636648292e+14       1.4969028735e+15
+ ymom                        -1.8931343553e+15       1.9807937913e+15
+ zmom                                        0                      0
+ rho_E                        7.7947390767e+11       5.6568077669e+24
+ rho_e                        7.4321344551e+11       5.6343409475e+24
+ Temp                                   100000           3972527783.9
+ rho_He4                      8.7136176158e-17           1473.9666386
+ rho_C12                      3.4854470463e-05           5030539.1488
+ rho_O16                      5.2281705694e-05           7778301.6377
+ rho_Ne20                     8.7136176158e-17           1023673.1153
+ rho_Mg24                     8.7136176158e-17           1040419.2782
+ rho_Si28                     8.7136176158e-17           4251082.0739
+ rho_S32                      8.7136176158e-17           2179431.2961
+ rho_Ar36                     8.7136176158e-17           497747.48798
+ rho_Ca40                     8.7136176158e-17             382056.037
+ rho_Ti44                     8.7136176158e-17           1576.0930955
+ rho_Cr48                     8.7136176158e-17           1467.9139369
+ rho_Fe52                     8.7136176158e-17           14831.710059
+ rho_Ni56                     8.7136176158e-17           182702.27304
+ phiGrav                     -4.6147467267e+17      -2.2055818332e+16
+ grav_x                          -461195258.85          -48603.568291
+ grav_y                          -444709392.81           392306861.64
+ grav_z                                      0                      0
+ rho_enuc                    -7.6356851771e+21       5.7259582003e+26
+
diff --git a/Exec/science/wdmerger/tests/wdmerger_collision/inputs_2d_collision b/Exec/science/wdmerger/tests/wdmerger_collision/inputs_2d_collision
index 1371988cba..1fd08b126a 100644
--- a/Exec/science/wdmerger/tests/wdmerger_collision/inputs_2d_collision
+++ b/Exec/science/wdmerger/tests/wdmerger_collision/inputs_2d_collision
@@ -74,6 +74,9 @@ amr.max_level = 0
 # Refinement ratio
 amr.ref_ratio = 4 4 4 4 4 4 4 4 4
 
+# Don't do AMR subcycling for this setup
+amr.subcycling_mode = None
+
 # How many coarse timesteps between regridding
 amr.regrid_int = 2
 
diff --git a/Exec/science/wdmerger/tests/wdmerger_collision/inputs_2d_collision.test b/Exec/science/wdmerger/tests/wdmerger_collision/inputs_2d_collision.test
new file mode 100644
index 0000000000..d380a7c20e
--- /dev/null
+++ b/Exec/science/wdmerger/tests/wdmerger_collision/inputs_2d_collision.test
@@ -0,0 +1,14 @@
+FILE = inputs_2d_collision
+
+# Start the problem when the stars are already merging.
+# This is an artificially contrived setup but will ensure
+# that burning begins immediately.
+
+problem.collision_separation = 1.25
+
+amr.n_cell = 64 128
+stop_time = 1.25
+amr.max_level = 2
+amr.ref_ratio = 2
+
+castro.init_shrink = 1.0
diff --git a/Source/diffusion/Castro_diffusion.cpp b/Source/diffusion/Castro_diffusion.cpp
index 5fffe05542..61b28bfbd5 100644
--- a/Source/diffusion/Castro_diffusion.cpp
+++ b/Source/diffusion/Castro_diffusion.cpp
@@ -31,7 +31,7 @@ Castro::construct_old_diff_source(MultiFab& source, MultiFab& state_in, Real tim
 #endif
         ParallelDescriptor::ReduceRealMax(run_time,IOProc);
 
-        amrex::Print() << "Castro::construct_old_diff_source() time = " << run_time << "\n" << "\n";
+        amrex::Print() << "Castro::construct_old_diff_source() time = " << run_time << " on level " << level << "\n" << "\n";
 #ifdef BL_LAZY
         });
 #endif
@@ -68,7 +68,7 @@ Castro::construct_new_diff_source(MultiFab& source, MultiFab& state_old, MultiFa
 #endif
         ParallelDescriptor::ReduceRealMax(run_time,IOProc);
 
-        amrex::Print() << "Castro::construct_new_diff_source() time = " << run_time << "\n" << "\n";
+        amrex::Print() << "Castro::construct_new_diff_source() time = " << run_time << " on level " << level << "\n" << "\n";
 #ifdef BL_LAZY
         });
 #endif
diff --git a/Source/driver/Castro.H b/Source/driver/Castro.H
index 1f862bc8c1..bccebda7f8 100644
--- a/Source/driver/Castro.H
+++ b/Source/driver/Castro.H
@@ -1036,8 +1036,9 @@ public:
 ///
 /// @param crse_level
 /// @param fine_level
+/// @param in_post_timestep
 ///
-    void reflux (int crse_level, int fine_level);
+    void reflux (int crse_level, int fine_level, bool in_post_timestep);
 
 
 ///
@@ -1164,7 +1165,8 @@ public:
 ///
     bool oversubscribing() {    // NOLINT(readability-convert-member-functions-to-static)
 #ifdef AMREX_USE_GPU
-        return (amrex::MultiFab::queryMemUsage("AmrLevel_Level_" + std::to_string(level)) >= amrex::Gpu::Device::totalGlobalMem());
+        return (amrex::MultiFab::queryMemUsage("AmrLevel_Level_" + std::to_string(level)) >= 
+                static_cast<Long>(amrex::Gpu::Device::totalGlobalMem()));
 #else
         return false;
 #endif
@@ -1335,11 +1337,6 @@ protected:
 #endif
 
 
-///
-/// Did we trigger a CFL violation?
-///
-    int cfl_violation;
-
 
 ///
 /// State data to hold if we want to do a retry.
diff --git a/Source/driver/Castro.cpp b/Source/driver/Castro.cpp
index e517af72ed..7ef73d48e4 100644
--- a/Source/driver/Castro.cpp
+++ b/Source/driver/Castro.cpp
@@ -39,7 +39,7 @@
 #include <Diffusion.H>
 #endif
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #include <omp.h>
 #endif
 
@@ -1047,8 +1047,10 @@ Castro::initData ()
        By_new.setVal(0.0);
        Bz_new.setVal(0.0);
 
-       for (MFIter mfi(S_new); mfi.isValid(); ++mfi) {
-
+#ifdef AMREX_USE_OMP
+#pragma omp parallel
+#endif
+       for (MFIter mfi(S_new, TilingIfNotGPU()); mfi.isValid(); ++mfi) {
 
           auto geomdata = geom.data();
 
@@ -1093,9 +1095,12 @@ Castro::initData ()
 
 #endif //MHD
 
-       for (MFIter mfi(S_new); mfi.isValid(); ++mfi)
+#ifdef AMREX_USE_OMP
+#pragma omp parallel
+#endif
+       for (MFIter mfi(S_new, TilingIfNotGPU()); mfi.isValid(); ++mfi)
        {
-          const Box& box     = mfi.validbox();
+          const Box& box = mfi.tilebox();
 
           auto s = S_new[mfi].array();
           auto geomdata = geom.data();
@@ -1131,7 +1136,7 @@ Castro::initData ()
        ReduceData<int, int> reduce_data(reduce_op);
        using ReduceTuple = typename decltype(reduce_data)::Type;
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
        for (MFIter mfi(S_new, TilingIfNotGPU()); mfi.isValid(); ++mfi)
@@ -1185,8 +1190,11 @@ Castro::initData ()
 
        // Verify that the sum of (rho X)_i = rho at every cell
 
-       for (MFIter mfi(S_new); mfi.isValid(); ++mfi) {
-         const Box& bx = mfi.validbox();
+#ifdef AMREX_USE_OMP
+#pragma omp parallel
+#endif
+       for (MFIter mfi(S_new, TilingIfNotGPU()); mfi.isValid(); ++mfi) {
+         const Box& bx = mfi.tilebox();
 
          auto S_arr = S_new.array(mfi);
 
@@ -1353,17 +1361,11 @@ Castro::initData ()
 
 #ifdef RADIATION
     if (do_radiation) {
-      for (MFIter mfi(S_new); mfi.isValid(); ++mfi) {
-          int i = mfi.index();
-
-          if (radiation->verbose > 2) {
-            std::cout << "Calling RADINIT at level " << level << ", grid "
-                 << i << std::endl;
-          }
-
-          const Box& box = mfi.validbox();
-          const int* lo  = box.loVect();
-          const int* hi  = box.hiVect();
+#ifdef AMREX_USE_OMP
+#pragma omp parallel
+#endif
+      for (MFIter mfi(S_new, TilingIfNotGPU()); mfi.isValid(); ++mfi) {
+          const Box& box = mfi.tilebox();
 
           auto r = Rad_new[mfi].array();
           auto geomdata = geom.data();
@@ -1388,9 +1390,7 @@ Castro::initData ()
               // by a problem setup (defaults to an empty routine).
 
               problem_initialize_rad_data(i, j, k, r, xnu_pass, nugroup_pass, dnugroup_pass, geomdata);
-
           });
-
       }
     }
 #endif // RADIATION
@@ -1951,7 +1951,9 @@ Castro::post_timestep (int iteration_local)
     // will also do the sync solve associated with the reflux.
 
     if (do_reflux && level < parent->finestLevel()) {
-      reflux(level, level+1);
+        if (parent->subcyclingMode() != "None") {
+            reflux(level, level+1, true);
+        }
     }
 
     // Ensure consistency with finer grids.
@@ -2537,7 +2539,7 @@ Castro::advance_aux(Real time, Real dt)
     MultiFab&  S_old = get_old_data(State_Type);
     MultiFab&  S_new = get_new_data(State_Type);
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
     for (MFIter mfi(S_old, TilingIfNotGPU()); mfi.isValid(); ++mfi)
@@ -2614,9 +2616,31 @@ Castro::FluxRegFineAdd() {
 
 }
 
+// reflux() synchronizes fluxes between levels and has two modes of operation.
+//
+// When in_post_timestep = true, we are performing the reflux in AmrLevel's
+// post_timestep() routine. This takes place after this level has completed
+// its advance as well as all fine levels above this level have completed
+// their advance. The main activities are:
+//
+// (1) Limit the flux correction so that it won't cause unphysical densities.
+// (2) Update any copies of the full flux arrays with the flux correction.
+// (3) Perform the flux correction on StateData using FluxRegister's Reflux().
+// (4) Do a sync solve for Poisson gravity now that the density has changed.
+// (5) Recompute the new-time sources now that StateData has changed.
+//
+// When in_post_timestep = false, we are performing the reflux immediately
+// after the hydro step has taken place on all levels between crse_level and
+// fine_level. This happens when amr.subcycling_mode = None, or in general when
+// the fine level does not subcycle relative to the coarse level. In this mode
+// we can skip steps (4) and (5), as those steps are only necessary to fix the
+// updates on these levels that took place without full information about the
+// hydro solve being synchronized. The advance on crse_level and fine_level will
+// then be followed by a normal computation of the new-time gravity and new-time
+// sources, which will not need to be corrected later.
 
 void
-Castro::reflux(int crse_level, int fine_level)
+Castro::reflux (int crse_level, int fine_level, bool in_post_timestep)
 {
     BL_PROFILE("Castro::reflux()");
 
@@ -2630,7 +2654,7 @@ Castro::reflux(int crse_level, int fine_level)
     Vector<std::unique_ptr<MultiFab> > drho(nlevs);
     Vector<std::unique_ptr<MultiFab> > dphi(nlevs);
 
-    if (do_grav && gravity->get_gravity_type() == "PoissonGrav" && gravity->NoSync() == 0)  {
+    if (do_grav && gravity->get_gravity_type() == "PoissonGrav" && gravity->NoSync() == 0 && in_post_timestep)  {
 
         for (int lev = crse_level; lev <= fine_level; ++lev) {
 
@@ -2714,6 +2738,9 @@ Castro::reflux(int crse_level, int fine_level)
 
         // Now zero out any problematic flux corrections.
 
+#ifdef AMREX_USE_OMP
+#pragma omp parallel
+#endif
         for (MFIter mfi(crse_state, TilingIfNotGPU()); mfi.isValid(); ++mfi) {
             const Box& bx = mfi.tilebox();
 
@@ -2780,7 +2807,7 @@ Castro::reflux(int crse_level, int fine_level)
             // Update the coarse fluxes MultiFabs using the reflux data. This should only make
             // a difference if we re-evaluate the source terms later.
 
-            if (update_sources_after_reflux) {
+            if (update_sources_after_reflux || !in_post_timestep) {
 
                 MultiFab::Add(*crse_lev.fluxes[idir], temp_fluxes[idir], 0, 0, crse_lev.fluxes[idir]->nComp(), 0);
 
@@ -2800,7 +2827,7 @@ Castro::reflux(int crse_level, int fine_level)
 #ifdef GRAVITY
         int ilev = lev - crse_level - 1;
 
-        if (do_grav && gravity->get_gravity_type() == "PoissonGrav" && gravity->NoSync() == 0) {
+        if (do_grav && gravity->get_gravity_type() == "PoissonGrav" && gravity->NoSync() == 0 && in_post_timestep) {
             reg->Reflux(*drho[ilev], crse_lev.volume, 1.0, 0, URHO, 1, crse_lev.geom);
             amrex::average_down(*drho[ilev + 1], *drho[ilev], 0, 1, getLevel(lev).crse_ratio);
         }
@@ -2823,7 +2850,7 @@ Castro::reflux(int crse_level, int fine_level)
 
             reg->Reflux(crse_state, dr, 1.0, 0, UMX, 1, crse_lev.geom);
 
-            if (update_sources_after_reflux) {
+            if (update_sources_after_reflux || !in_post_timestep) {
 
                 MultiFab tmp_fluxes(crse_lev.P_radial.boxArray(),
                                     crse_lev.P_radial.DistributionMap(),
@@ -2860,7 +2887,7 @@ Castro::reflux(int crse_level, int fine_level)
 
             reg->Reflux(crse_lev.get_new_data(Rad_Type), crse_lev.volume, 1.0, 0, 0, Radiation::nGroups, crse_lev.geom);
 
-            if (update_sources_after_reflux) {
+            if (update_sources_after_reflux || !in_post_timestep) {
 
                 for (int idir = 0; idir < AMREX_SPACEDIM; ++idir) {
 
@@ -2890,7 +2917,7 @@ Castro::reflux(int crse_level, int fine_level)
 #endif
 
 #ifdef GRAVITY
-        if (do_grav && gravity->get_gravity_type() == "PoissonGrav" && gravity->NoSync() == 0)  {
+        if (do_grav && gravity->get_gravity_type() == "PoissonGrav" && gravity->NoSync() == 0 && in_post_timestep)  {
 
             reg = &getLevel(lev).phi_reg;
 
@@ -2919,7 +2946,7 @@ Castro::reflux(int crse_level, int fine_level)
     // Do the sync solve across all levels.
 
 #ifdef GRAVITY
-    if (do_grav && gravity->get_gravity_type() == "PoissonGrav" && gravity->NoSync() == 0) {
+    if (do_grav && gravity->get_gravity_type() == "PoissonGrav" && gravity->NoSync() == 0 && in_post_timestep) {
       gravity->gravity_sync(crse_level, fine_level, amrex::GetVecOfPtrs(drho), amrex::GetVecOfPtrs(dphi));
     }
 #endif
@@ -2935,7 +2962,7 @@ Castro::reflux(int crse_level, int fine_level)
     // ghost zone fills like diffusion depend on the data in the
     // coarser levels.
 
-    if (update_sources_after_reflux &&
+    if (update_sources_after_reflux && in_post_timestep &&
         (time_integration_method == CornerTransportUpwind ||
          time_integration_method == SimplifiedSpectralDeferredCorrections)) {
 
@@ -3076,7 +3103,7 @@ Castro::normalize_species (MultiFab& S_new, int ng)
     ReduceData<Real, Real> reduce_data(reduce_op);
     using ReduceTuple = typename decltype(reduce_data)::Type;
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
     for (MFIter mfi(S_new, TilingIfNotGPU()); mfi.isValid(); ++mfi)
@@ -3150,12 +3177,12 @@ Castro::enforce_consistent_e (
 
     BL_PROFILE("Castro::enforce_consistent_e()");
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
     for (MFIter mfi(S, TilingIfNotGPU()); mfi.isValid(); ++mfi)
     {
-        const Box& box     = mfi.tilebox();
+        const Box& box = mfi.tilebox();
 
         auto S_arr = S.array(mfi);
 
@@ -3214,15 +3241,13 @@ Castro::enforce_min_density (MultiFab& state_in, int ng)
 
     }
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
     for (MFIter mfi(state_in, TilingIfNotGPU()); mfi.isValid(); ++mfi) {
-
         const Box& bx = mfi.growntilebox(ng);
 
         do_enforce_minimum_density(bx, state_in.array(mfi), verbose);
-
     }
 
     if (print_update_diagnostics)
@@ -3248,7 +3273,7 @@ Castro::check_for_negative_density ()
     ReduceData<int, int> reduce_data(reduce_op);
     using ReduceTuple = typename decltype(reduce_data)::Type;
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
     for (MFIter mfi(S_new, TilingIfNotGPU()); mfi.isValid(); ++mfi) {
@@ -3335,11 +3360,10 @@ Castro::enforce_speed_limit (MultiFab& state_in, int ng)
         return;
     }
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
     for (MFIter mfi(state_in, TilingIfNotGPU()); mfi.isValid(); ++mfi) {
-
         const Box& bx = mfi.growntilebox(ng);
 
         auto u = state_in[mfi].array();
@@ -3465,13 +3489,13 @@ Castro::apply_problem_tags (TagBoxArray& tags, Real time)
 
     int lev = level;
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
     {
-        for (MFIter mfi(tags); mfi.isValid(); ++mfi)
+        for (MFIter mfi(tags, TilingIfNotGPU()); mfi.isValid(); ++mfi)
         {
-            const Box& bx = mfi.validbox();
+            const Box& bx = mfi.tilebox();
 
             TagBox& tagfab = tags[mfi];
 
@@ -3530,10 +3554,10 @@ Castro::apply_tagging_restrictions(TagBoxArray& tags, [[maybe_unused]] Real time
             blocking_factor[dim] = parent->blockingFactor(lev)[dim];
         }
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
-        for (MFIter mfi(tags); mfi.isValid(); ++mfi) {
+        for (MFIter mfi(tags, TilingIfNotGPU()); mfi.isValid(); ++mfi) {
             const Box& bx = mfi.tilebox();
 
             auto tag = tags[mfi].array();
@@ -3574,10 +3598,10 @@ Castro::apply_tagging_restrictions(TagBoxArray& tags, [[maybe_unused]] Real time
     auto geomdata = geom.data();
 
     // Allow the user to limit tagging outside of some distance from the problem center.
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
-    for (MFIter mfi(tags); mfi.isValid(); ++mfi) {
+    for (MFIter mfi(tags, TilingIfNotGPU()); mfi.isValid(); ++mfi) {
         const Box& bx = mfi.tilebox();
 
         auto tag = tags[mfi].array();
@@ -3753,7 +3777,7 @@ Castro::reset_internal_energy(
     }
 
     // Ensure (rho e) isn't too small or negative
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
     for (MFIter mfi(State, TilingIfNotGPU()); mfi.isValid(); ++mfi)
@@ -3790,12 +3814,12 @@ Castro::add_magnetic_e( MultiFab& Bx,
                         MultiFab& State)
 {
            
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
   for (MFIter mfi(State, TilingIfNotGPU()); mfi.isValid(); ++mfi)
   {
-      const Box& box     = mfi.tilebox();
+      const Box& box = mfi.tilebox();
       auto S_arr = State.array(mfi);
       auto Bx_arr = Bx.array(mfi);
       auto By_arr = By.array(mfi);
@@ -3835,12 +3859,12 @@ Castro::check_div_B( MultiFab& Bx,
   using ReduceTuple = typename decltype(reduce_data)::Type;
 
            
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
   for (MFIter mfi(State, TilingIfNotGPU()); mfi.isValid(); ++mfi)
   {
-      const Box& box     = mfi.tilebox();
+      const Box& box = mfi.tilebox();
       auto Bx_arr = Bx.array(mfi);
       auto By_arr = By.array(mfi);
       auto Bz_arr = Bz.array(mfi);
@@ -3971,12 +3995,11 @@ Castro::computeTemp(
   }
 #endif
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
   for (MFIter mfi(State, TilingIfNotGPU()); mfi.isValid(); ++mfi)
   {
-
       int num_ghost = ng;
 #ifdef TRUE_SDC
       if (sdc_order == 4) {
@@ -4036,7 +4059,6 @@ Castro::computeTemp(
               }
           });
       }
-
   }
 
 #ifdef TRUE_SDC
@@ -4356,7 +4378,7 @@ Castro::getCPUTime()
 {
 
   int numCores = ParallelDescriptor::NProcs();
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
   numCores = numCores*omp_get_max_threads();
 #endif
 
diff --git a/Source/driver/Castro_advance.cpp b/Source/driver/Castro_advance.cpp
index 0c674e1c23..aa3e068748 100644
--- a/Source/driver/Castro_advance.cpp
+++ b/Source/driver/Castro_advance.cpp
@@ -38,6 +38,11 @@ Castro::advance (Real time,
   //    amr_ncycle    : the number of subcycles at this level
 
 {
+    if (parent->subcyclingMode() == "None" && level > 0) {
+        amrex::Print() << "\n  Advance at this level has already been completed.\n\n";
+        return dt;
+    }
+
     BL_PROFILE("Castro::advance()");
 
     // Save the wall time when we started the step.
@@ -48,7 +53,15 @@ Castro::advance (Real time,
 
     Real dt_new = dt;
 
-    initialize_advance(time, dt, amr_iteration);
+    int max_level_to_advance = level;
+
+    if (parent->subcyclingMode() == "None" && level == 0) {
+        max_level_to_advance = parent->finestLevel();
+    }
+
+    for (int lev = level; lev <= max_level_to_advance; ++lev) {
+        getLevel(lev).initialize_advance(time, dt, amr_iteration);
+    }
 
     // Do the advance.
 
@@ -71,43 +84,35 @@ Castro::advance (Real time,
 #endif //MHD    
     }
 
-    // Optionally kill the job at this point, if we've detected a violation.
-
-    if (cfl_violation == 1 && use_retry != 0) {
-        amrex::Abort("CFL is too high at this level; go back to a checkpoint and restart with lower CFL number, or set castro.use_retry = 1");
-    }
-
-    // If we didn't kill the job, reset the violation counter.
-
-    cfl_violation = 0;
-
     // If the user requests, indicate that we want a regrid at the end of the step.
 
     if (use_post_step_regrid == 1) {
         post_step_regrid = 1;
     }
 
+    for (int lev = level; lev <= max_level_to_advance; ++lev) {
 #ifdef AUX_UPDATE
-    advance_aux(time, dt);
+        getLevel(lev).advance_aux(time, dt);
 #endif
 
 #ifdef GRAVITY
-    // Update the point mass.
-    if (use_point_mass == 1) {
-        pointmass_update(time, dt);
-    }
+        // Update the point mass.
+        if (use_point_mass == 1) {
+            getLevel(lev).pointmass_update(time, dt);
+        }
 #endif
 
 #ifdef RADIATION
-    MultiFab& S_new = get_new_data(State_Type);
-    final_radiation_call(S_new, amr_iteration, amr_ncycle);
+        MultiFab& S_new = getLevel(lev).get_new_data(State_Type);
+        getLevel(lev).final_radiation_call(S_new, amr_iteration, amr_ncycle);
 #endif
 
 #ifdef AMREX_PARTICLES
-    advance_particles(amr_iteration, time, dt);
+        getLevel(lev).advance_particles(amr_iteration, time, dt);
 #endif
 
-    finalize_advance();
+        getLevel(lev).finalize_advance();
+    }
 
     return dt_new;
 }
@@ -122,10 +127,6 @@ Castro::initialize_do_advance (Real time, Real dt)
 
     advance_status status {};
 
-    // Reset the CFL violation flag.
-
-    cfl_violation = 0;
-
 #ifdef RADIATION
     // make sure these are filled to avoid check/plot file errors:
     if (do_radiation) {
@@ -417,9 +418,12 @@ Castro::initialize_advance(Real time, Real dt, int amr_iteration)
 
           amrex::Print() << "<<<<< drive initial convection reset >>>>" << std::endl;
 
-            for (MFIter mfi(S_old); mfi.isValid(); ++mfi)
+#ifdef AMREX_USE_OMP
+#pragma omp parallel
+#endif
+            for (MFIter mfi(S_old, TilingIfNotGPU()); mfi.isValid(); ++mfi)
             {
-                const Box& box     = mfi.validbox();
+                const Box& box = mfi.tilebox();
 
                 auto s = S_old[mfi].array();
                 auto geomdata = geom.data();
@@ -571,7 +575,7 @@ Castro::finalize_advance()
 {
     BL_PROFILE("Castro::finalize_advance()");
 
-    if (do_reflux == 1) {
+    if (do_reflux == 1 && parent->subcyclingMode() != "None") {
         FluxRegCrseInit();
         FluxRegFineAdd();
     }
@@ -581,7 +585,7 @@ Castro::finalize_advance()
     // the fluxes from the full timestep (this will be used
     // later during the reflux operation).
 
-    if (do_reflux == 1 && update_sources_after_reflux == 1) {
+    if (do_reflux == 1 && update_sources_after_reflux == 1 && parent->subcyclingMode() != "None") {
         for (int idir = 0; idir < AMREX_SPACEDIM; ++idir) {
             MultiFab::Copy(*mass_fluxes[idir], *fluxes[idir], URHO, 0, 1, 0);
         }
@@ -625,14 +629,34 @@ Castro::finalize_advance()
 
     // Record how many zones we have advanced.
 
-    num_zones_advanced += static_cast<Real>(grids.numPts()) / static_cast<Real>(getLevel(0).grids.numPts());
+    int max_level_to_advance = level;
+
+    if (parent->subcyclingMode() == "None") {
+        max_level_to_advance = parent->finestLevel();
+    }
+
+    long num_pts_advanced = 0;
+
+    for (int lev = level; lev <= max_level_to_advance; ++lev) {
+        num_pts_advanced += getLevel(lev).grids.numPts();
+    }
+
+    num_zones_advanced += static_cast<Real>(num_pts_advanced) / static_cast<Real>(getLevel(0).grids.numPts());
 
     Real wall_time = ParallelDescriptor::second() - wall_time_start;
-    Real fom_advance = static_cast<Real>(grids.numPts()) / wall_time / 1.e6;
+
+    Real fom_advance = static_cast<Real>(num_pts_advanced) / wall_time / 1.e6;
 
     if (verbose >= 1) {
-        amrex::Print() << "  Zones advanced per microsecond at this level: "
-                       << fom_advance << std::endl << std::endl;
+        if (max_level_to_advance > 0) {
+            if (level == 0) {
+                amrex::Print() << "  Zones advanced per microsecond from level " << level << " to level "
+                               << max_level_to_advance << ": " << fom_advance << std::endl << std::endl;
+            }
+        }
+        else {
+            amrex::Print() << "  Zones advanced per microsecond at this level: "
+                           << fom_advance << std::endl << std::endl;
+        }
     }
-
 }
diff --git a/Source/driver/Castro_advance_ctu.cpp b/Source/driver/Castro_advance_ctu.cpp
index e6eeb2fb34..0fc088abd2 100644
--- a/Source/driver/Castro_advance_ctu.cpp
+++ b/Source/driver/Castro_advance_ctu.cpp
@@ -26,90 +26,110 @@ Castro::do_advance_ctu (Real time, Real dt)
 
 #ifndef TRUE_SDC
 
+    // Advance simultaneously on all levels that are not subcycling
+    // relative to this level.
+
+    int max_level_to_advance = level;
+
+    if (parent->subcyclingMode() == "None" && level == 0) {
+        max_level_to_advance = parent->finestLevel();
+    }
+
     const Real prev_time = state[State_Type].prevTime();
     const Real  cur_time = state[State_Type].curTime();
 
-    // Perform initialization steps.
+    for (int lev = level; lev <= max_level_to_advance; ++lev) {
+        // Perform initialization steps.
 
-    status = initialize_do_advance(time, dt);
+        status = getLevel(lev).initialize_do_advance(time, dt);
 
-    if (status.success == false) {
-        return status;
-    }
+        if (status.success == false) {
+            return status;
+        }
 
-    // Perform all pre-advance operations and then initialize
-    // the new-time state with the output of those operators.
+        // Perform all pre-advance operations and then initialize
+        // the new-time state with the output of those operators.
 
-    status = pre_advance_operators(prev_time, dt);
+        status = getLevel(lev).pre_advance_operators(prev_time, dt);
 
-    if (status.success == false) {
-        return status;
-    }
+        if (status.success == false) {
+            return status;
+        }
 
-    // Construct the old-time sources from Sborder. This will already
-    // be applied to S_new (with full dt weighting), to be corrected
-    // later. Note that this does not affect the prediction of the
-    // interface state; an explicit source will be traced there as
-    // needed.
+        // Construct the old-time sources from Sborder. This will already
+        // be applied to S_new (with full dt weighting), to be corrected
+        // later. Note that this does not affect the prediction of the
+        // interface state; an explicit source will be traced there as
+        // needed.
 
-    status = do_old_sources(prev_time, dt);
+        status = getLevel(lev).do_old_sources(prev_time, dt);
 
-    if (status.success == false) {
-        return status;
-    }
+        if (status.success == false) {
+            return status;
+        }
 
-    // Perform any operations that occur after the sources but before the hydro.
+        // Perform any operations that occur after the sources but before the hydro.
 
-    status = pre_hydro_operators(prev_time, dt);
+        status = getLevel(lev).pre_hydro_operators(prev_time, dt);
 
-    if (status.success == false) {
-        return status;
-    }
+        if (status.success == false) {
+            return status;
+        }
 
-    // Do the hydro update. We build directly off of Sborder, which
-    // is the state that has already seen the burn.
+        // Do the hydro update. We build directly off of Sborder, which
+        // is the state that has already seen the burn.
 
 #ifndef MHD
-    status = construct_ctu_hydro_source(prev_time, dt);
+        status = getLevel(lev).construct_ctu_hydro_source(prev_time, dt);
 #else
-    status = construct_ctu_mhd_source(prev_time, dt);
+        status = getLevel(lev).construct_ctu_mhd_source(prev_time, dt);
 #endif
 
-    if (status.success == false) {
-        return status;
+        if (status.success == false) {
+            return status;
+        }
     }
 
-    // Perform any operations that occur after the hydro but before
-    // the corrector sources.
-
-    status = post_hydro_operators(cur_time, dt);
+    // We can perform the reflux immediately if there's no subcycling
+    // above this level.
 
-    if (status.success == false) {
-        return status;
+    if (do_reflux && level < max_level_to_advance) {
+        reflux(level, max_level_to_advance, false);
     }
 
-    // Construct and apply new-time source terms.
+    for (int lev = level; lev <= max_level_to_advance; ++lev) {
+        // Perform any operations that occur after the hydro but before
+        // the corrector sources.
 
-    status = do_new_sources(cur_time, dt);
+        status = getLevel(lev).post_hydro_operators(cur_time, dt);
 
-    if (status.success == false) {
-        return status;
-    }
+        if (status.success == false) {
+            return status;
+        }
 
-    // Do the second half of the reactions for Strang, or the full burn for simplified SDC.
+        // Construct and apply new-time source terms.
 
-    status = post_advance_operators(cur_time, dt);
+        status = getLevel(lev).do_new_sources(cur_time, dt);
 
-    if (status.success == false) {
-        return status;
-    }
+        if (status.success == false) {
+            return status;
+        }
+
+        // Do the second half of the reactions for Strang, or the full burn for simplified SDC.
+
+        status = getLevel(lev).post_advance_operators(cur_time, dt);
 
-    // Perform finalization steps.
+        if (status.success == false) {
+            return status;
+        }
+
+        // Perform finalization steps.
 
-    status = finalize_do_advance(cur_time, dt);
+        status = getLevel(lev).finalize_do_advance(cur_time, dt);
 
-    if (status.success == false) {
-        return status;
+        if (status.success == false) {
+            return status;
+        }
     }
 
 #endif
@@ -230,6 +250,12 @@ Castro::subcycle_advance_ctu(const Real time, const Real dt, int amr_iteration,
 
     sub_iteration = 0;
 
+    int max_level_to_advance = level;
+
+    if (parent->subcyclingMode() == "None" && level == 0) {
+        max_level_to_advance = parent->finestLevel();
+    }
+
     // Subcycle until we've reached the target time.
     // Compare against a slightly smaller number to
     // avoid roundoff concerns.
@@ -351,7 +377,10 @@ Castro::subcycle_advance_ctu(const Real time, const Real dt, int amr_iteration,
         for (int n = 0; n < num_sub_iters; ++n) {
 
             if (time_integration_method == SimplifiedSpectralDeferredCorrections) {
-                sdc_iteration = n;
+                for (int lev = level; lev <= max_level_to_advance; ++lev) {
+                    getLevel(lev).sdc_iteration = n;
+                }
+
                 amrex::Print() << "Beginning SDC iteration " << n + 1 << " of " << num_sub_iters << "." << std::endl << std::endl;
             }
 
@@ -386,12 +415,6 @@ Castro::subcycle_advance_ctu(const Real time, const Real dt, int amr_iteration,
 
         sdc_iters = sdc_iters_old;
 
-        // If we have hit a CFL violation during this subcycle, we must abort.
-
-        if (cfl_violation && !use_retry) {
-          amrex::Abort("CFL is too high at this level; go back to a checkpoint and restart with lower CFL number, or set castro.use_retry = 1");
-        }
-
         // If we're allowing for retries, check for that here.
 
         if (use_retry) {
diff --git a/Source/driver/Castro_advance_sdc.cpp b/Source/driver/Castro_advance_sdc.cpp
index 54ad501e54..d0c1da2263 100644
--- a/Source/driver/Castro_advance_sdc.cpp
+++ b/Source/driver/Castro_advance_sdc.cpp
@@ -163,10 +163,6 @@ Castro::do_advance_sdc (Real time,
       if (do_hydro) {
         // Check for CFL violations.
         check_for_cfl_violation(S_old, dt);
-
-        // If we detect one, return immediately.
-        if (cfl_violation)
-          return dt;
       }
 
       // construct the update for the current stage -- this fills
diff --git a/Source/driver/Castro_io.cpp b/Source/driver/Castro_io.cpp
index f6cad5cb5c..e3bb00a5d2 100644
--- a/Source/driver/Castro_io.cpp
+++ b/Source/driver/Castro_io.cpp
@@ -26,7 +26,7 @@
 #include <Diffusion.H>
 #endif
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #include <omp.h>
 #endif
 
@@ -280,13 +280,14 @@ Castro::restart (Amr&     papa,
           amrex::Abort();
        }
 
-       for (MFIter mfi(S_new); mfi.isValid(); ++mfi)
+#ifdef AMREX_USE_OMP
+#pragma omp parallel
+#endif
+       for (MFIter mfi(S_new, TilingIfNotGPU()); mfi.isValid(); ++mfi)
        {
+           const Box& bx = mfi.tilebox();
 
-           const Box& bx      = mfi.validbox();
-
-           if (! orig_domain.contains(bx)) {
-
+           if (!orig_domain.contains(bx)) {
                auto s = S_new[mfi].array();
                auto geomdata = geom.data();
 
@@ -297,7 +298,6 @@ Castro::restart (Amr&     papa,
                    // by a problem setup (defaults to an empty routine).
                    problem_initialize_state_data(i, j, k, s, geomdata);
                });
-
            }
        }
     }
@@ -308,7 +308,7 @@ Castro::restart (Amr&     papa,
 
 #ifdef GRAVITY
     if (do_grav && level == 0) {
-       BL_ASSERT(gravity == 0);
+       BL_ASSERT(gravity == nullptr);
        gravity = new Gravity(parent,parent->finestLevel(),&phys_bc, URHO);
     }
 #endif
@@ -554,7 +554,7 @@ Castro::writeJobInfo (const std::string& dir, const Real io_time)
   jobInfoFile << "inputs file: " << inputs_name << "\n\n";
 
   jobInfoFile << "number of MPI processes: " << ParallelDescriptor::NProcs() << "\n";
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
   jobInfoFile << "number of threads:       " << omp_get_max_threads() << "\n";
 #endif
   jobInfoFile << "\n";
diff --git a/Source/driver/sum_utils.cpp b/Source/driver/sum_utils.cpp
index 1b8c3635f8..d84e03eb97 100644
--- a/Source/driver/sum_utils.cpp
+++ b/Source/driver/sum_utils.cpp
@@ -38,7 +38,7 @@ Castro::volWgtSum (const MultiFab& mf, int comp, bool local, bool finemask)
     ReduceData<Real> reduce_data(reduce_op);
     using ReduceTuple = typename decltype(reduce_data)::Type;
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif    
     for (MFIter mfi(mf, TilingIfNotGPU()); mfi.isValid(); ++mfi)
@@ -101,7 +101,7 @@ Castro::locWgtSum (const MultiFab& mf, int comp, int idir, bool local)
     auto dx     = geom.CellSizeArray();
     auto problo = geom.ProbLoArray();
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif    
     for (MFIter mfi(mf, TilingIfNotGPU()); mfi.isValid(); ++mfi)
@@ -195,7 +195,7 @@ Castro::volProductSum (const MultiFab& mf1,
     ReduceData<Real> reduce_data(reduce_op);
     using ReduceTuple = typename decltype(reduce_data)::Type;
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif    
     for (MFIter mfi(mf1, TilingIfNotGPU()); mfi.isValid(); ++mfi)
@@ -250,7 +250,7 @@ Castro::locSquaredSum (const std::string& name,
     auto dx     = geom.CellSizeArray();
     auto problo = geom.ProbLoArray();
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif    
     for (MFIter mfi(*mf, TilingIfNotGPU()); mfi.isValid(); ++mfi)
diff --git a/Source/driver/timestep.cpp b/Source/driver/timestep.cpp
index 10f0655a1b..3f81ae798a 100644
--- a/Source/driver/timestep.cpp
+++ b/Source/driver/timestep.cpp
@@ -474,7 +474,7 @@ Castro::estdt_rad (int is_new)
     ReduceData<Real> reduce_data(reduce_op);
     using ReduceTuple = typename decltype(reduce_data)::Type;
 
-#ifdef _OPENMP
+#ifdef AMREX_USE_OMP
 #pragma omp parallel
 #endif
     for (MFIter mfi(stateMF, TilingIfNotGPU()); mfi.isValid(); ++mfi)
diff --git a/Source/gravity/Castro_gravity.cpp b/Source/gravity/Castro_gravity.cpp
index f0cecf9f44..094727e6a8 100644
--- a/Source/gravity/Castro_gravity.cpp
+++ b/Source/gravity/Castro_gravity.cpp
@@ -39,8 +39,17 @@ Castro::construct_old_gravity (Real time)
     // difference between the multilevel and the single level solutions.
     // Note that we don't need to do this solve for single-level runs,
     // since the solution at the end of the last timestep won't have changed.
+    // Similarly, we can skip this if we aren't subcycling on this level
+    // or all levels above this level, since the new-time composite solve
+    // at the new time in the last step will still be valid.
 
-    if (gravity->get_gravity_type() == "PoissonGrav" && parent->finestLevel() > 0)
+    bool do_old_solve = true;
+
+    if (parent->subcyclingMode() == "None" || parent->finestLevel() == 0) {
+        do_old_solve = false;
+    }
+
+    if (gravity->get_gravity_type() == "PoissonGrav" && do_old_solve)
     {
         // Create a copy of the current (composite) data on this level.
 
@@ -122,7 +131,7 @@ Castro::construct_old_gravity (Real time)
 #endif
         ParallelDescriptor::ReduceRealMax(run_time,IOProc);
 
-        amrex::Print() << "Castro::construct_old_gravity() time = " << run_time << "\n" << "\n";
+        amrex::Print() << "Castro::construct_old_gravity() time = " << run_time << " on level " << level << "\n" << "\n";
 #ifdef BL_LAZY
         });
 #endif
@@ -154,78 +163,94 @@ Castro::construct_new_gravity (Real time)
 
     }
 
-    // If we're doing Poisson gravity, do the new-time level solve here.
+    // If we're doing Poisson gravity, do the new-time level or composite solve here.
 
     if (gravity->get_gravity_type() == "PoissonGrav")
     {
+        if (level == 0 && parent->subcyclingMode() == "None") {
+            if (castro::verbose > 0) {
+                amrex::Print() << "\n... new-time composite Poisson gravity solve from level " << level << " to level " << parent->finestLevel() << std::endl << std::endl;
+            }
 
-        // Use the "old" phi from the current time step as a guess for this solve.
-
-        MultiFab& phi_old = get_old_data(PhiGrav_Type);
+            // Use the "old" phi from the current time step as a guess for this solve.
 
-        MultiFab::Copy(phi_new, phi_old, 0, 0, 1, phi_new.nGrow());
+            for (int lev = level; lev <= parent->finestLevel(); ++lev) {
+                MultiFab& lev_phi_old = getLevel(lev).get_old_data(PhiGrav_Type);
+                MultiFab& lev_phi_new = getLevel(lev).get_new_data(PhiGrav_Type);
 
-        // Subtract off the (composite - level) contribution for the purposes
-        // of the level solve. We'll add it back later.
+                MultiFab::Copy(lev_phi_new, lev_phi_old, 0, 0, 1, lev_phi_new.nGrow());
+            }
 
-        if (gravity->DoCompositeCorrection() && level < parent->finestLevel() && level <= gravity->get_max_solve_level()) {
-            phi_new.minus(comp_minus_level_phi, 0, 1, 0);
+            gravity->multilevel_solve_for_new_phi(level, parent->finestLevel());
         }
+        else if (parent->subcyclingMode() != "None") {
+            // Use the "old" phi from the current time step as a guess for this solve.
 
-        if (castro::verbose && ParallelDescriptor::IOProcessor()) {
-            std::cout << "... new-time level Poisson gravity solve at level " << level << std::endl << std::endl;
-        }
+            MultiFab& phi_old = get_old_data(PhiGrav_Type);
 
-        int is_new = 1;
+            MultiFab::Copy(phi_new, phi_old, 0, 0, 1, phi_new.nGrow());
 
-        gravity->solve_for_phi(level,
-                               phi_new,
-                               amrex::GetVecOfPtrs(gravity->get_grad_phi_curr(level)),
-                               is_new);
+            // Subtract off the (composite - level) contribution for the purposes
+            // of the level solve. We'll add it back later.
 
-        if (gravity->DoCompositeCorrection() == 1 && level < parent->finestLevel() && level <= gravity->get_max_solve_level()) {
+            if (gravity->DoCompositeCorrection() && level < parent->finestLevel() && level <= gravity->get_max_solve_level()) {
+                phi_new.minus(comp_minus_level_phi, 0, 1, 0);
+            }
 
-            if (gravity->test_results_of_solves() == 1) {
+            if (castro::verbose && ParallelDescriptor::IOProcessor()) {
+                std::cout << "... new-time level Poisson gravity solve at level " << level << std::endl << std::endl;
+            }
 
-                if (castro::verbose && ParallelDescriptor::IOProcessor()) {
-                    std::cout << " " << '\n';
-                    std::cout << "... testing grad_phi_curr before adding comp_minus_level_grad_phi " << '\n';
-                }
+            int is_new = 1;
 
-                gravity->test_level_grad_phi_curr(level);
+            gravity->solve_for_phi(level,
+                                   phi_new,
+                                   amrex::GetVecOfPtrs(gravity->get_grad_phi_curr(level)),
+                                   is_new);
 
-            }
+            if (gravity->DoCompositeCorrection() == 1 && level < parent->finestLevel() && level <= gravity->get_max_solve_level()) {
 
-            // Add back the (composite - level) contribution. This ensures that
-            // if we are not doing a sync solve, then we still get the difference
-            // between the composite and level solves added to the force we
-            // calculate, so it is slightly more accurate than it would have been.
+                if (gravity->test_results_of_solves() == 1) {
 
-            phi_new.plus(comp_minus_level_phi, 0, 1, 0);
-            for (int n = 0; n < AMREX_SPACEDIM; ++n) {
-                gravity->get_grad_phi_curr(level)[n]->plus(*comp_minus_level_grad_phi[n], 0, 1, 0);
-            }
+                    if (castro::verbose && ParallelDescriptor::IOProcessor()) {
+                        std::cout << " " << '\n';
+                        std::cout << "... testing grad_phi_curr before adding comp_minus_level_grad_phi " << '\n';
+                    }
 
-            if (gravity->test_results_of_solves() == 1) {
+                    gravity->test_level_grad_phi_curr(level);
 
-                if (castro::verbose && ParallelDescriptor::IOProcessor()) {
-                    std::cout << " " << '\n';
-                    std::cout << "... testing grad_phi_curr after adding comp_minus_level_grad_phi " << '\n';
                 }
 
-                gravity->test_level_grad_phi_curr(level);
+                // Add back the (composite - level) contribution. This ensures that
+                // if we are not doing a sync solve, then we still get the difference
+                // between the composite and level solves added to the force we
+                // calculate, so it is slightly more accurate than it would have been.
 
-            }
+                phi_new.plus(comp_minus_level_phi, 0, 1, 0);
+                for (int n = 0; n < AMREX_SPACEDIM; ++n) {
+                    gravity->get_grad_phi_curr(level)[n]->plus(*comp_minus_level_grad_phi[n], 0, 1, 0);
+                }
 
-        }
+                if (gravity->test_results_of_solves() == 1) {
 
+                    if (castro::verbose && ParallelDescriptor::IOProcessor()) {
+                        std::cout << " " << '\n';
+                        std::cout << "... testing grad_phi_curr after adding comp_minus_level_grad_phi " << '\n';
+                    }
+
+                    gravity->test_level_grad_phi_curr(level);
+
+                }
+
+            }
+        }
     }
 
     // Define new gravity vector.
 
     gravity->get_new_grav_vector(level, grav_new, time);
 
-    if (gravity->get_gravity_type() == "PoissonGrav" && level <= gravity->get_max_solve_level()) {
+    if (gravity->get_gravity_type() == "PoissonGrav" && level <= gravity->get_max_solve_level() && parent->subcyclingMode() != "None") {
 
         if (gravity->DoCompositeCorrection() == 1 && level < parent->finestLevel()) {
 
@@ -262,7 +287,7 @@ Castro::construct_new_gravity (Real time)
 #endif
         ParallelDescriptor::ReduceRealMax(run_time,IOProc);
 
-        amrex::Print() << "Castro::construct_new_gravity() time = " << run_time << "\n" << "\n";
+        amrex::Print() << "Castro::construct_new_gravity() time = " << run_time << " on level " << level << "\n" << "\n";
 #ifdef BL_LAZY
         });
 #endif
@@ -414,7 +439,7 @@ void Castro::construct_old_gravity_source(MultiFab& source, MultiFab& state_in,
 #endif
         ParallelDescriptor::ReduceRealMax(run_time,IOProc);
 
-        amrex::Print() << "Castro::construct_old_gravity_source() time = " << run_time << "\n" << "\n";
+        amrex::Print() << "Castro::construct_old_gravity_source() time = " << run_time << " on level " << level << "\n" << "\n";
 #ifdef BL_LAZY
         });
 #endif
@@ -651,7 +676,7 @@ void Castro::construct_new_gravity_source(MultiFab& source, MultiFab& state_old,
 #endif
         ParallelDescriptor::ReduceRealMax(run_time,IOProc);
 
-        amrex::Print() << "Castro::construct_new_gravity_source() time = " << run_time << "\n" << "\n";
+        amrex::Print() << "Castro::construct_new_gravity_source() time = " << run_time << " on level " << level << "\n" << "\n";
 #ifdef BL_LAZY
         });
 #endif
diff --git a/Source/gravity/Gravity.cpp b/Source/gravity/Gravity.cpp
index cb7d1c1c95..77d12f68e5 100644
--- a/Source/gravity/Gravity.cpp
+++ b/Source/gravity/Gravity.cpp
@@ -470,7 +470,7 @@ Gravity::solve_for_phi (int               level,
         Lazy::QueueReduction( [=] () mutable {
 #endif
         ParallelDescriptor::ReduceRealMax(end,IOProc);
-        amrex::Print() << "Gravity::solve_for_phi() time = " << end << std::endl << std::endl;
+        amrex::Print() << "Gravity::solve_for_phi() time = " << end << " on level " << level << std::endl << std::endl;
 #ifdef BL_LAZY
         });
 #endif
@@ -2420,10 +2420,9 @@ Gravity::fill_direct_sum_BCs(int crse_level, int fine_level, const Vector<MultiF
     int physbc_lo[3];
     int physbc_hi[3];
 
-    for (int dir = 0; dir < 3; dir++)
-    {
-      physbc_lo[dir] = phys_bc->lo(dir);
-      physbc_lo[dir] = phys_bc->hi(dir);
+    for (int dir = 0; dir < 3; dir++) {
+        physbc_lo[dir] = phys_bc->lo(dir);
+        physbc_hi[dir] = phys_bc->hi(dir);
     }
 
     for (int lev = crse_level; lev <= fine_level; ++lev) {
diff --git a/Source/gravity/Gravity_util.H b/Source/gravity/Gravity_util.H
index 744e6d301b..69860f702b 100644
--- a/Source/gravity/Gravity_util.H
+++ b/Source/gravity/Gravity_util.H
@@ -202,12 +202,12 @@ void multipole_symmetric_add(Real x, Real y, Real z,
                              const GpuArray<Real, AMREX_SPACEDIM>& problo,
                              const GpuArray<Real, AMREX_SPACEDIM>& probhi,
                              Real rho, Real vol,
-                             Array4<Real> const& qU0,
-                             Array4<Real> const& qUC,
-                             Array4<Real> const& qUS,
                              Array4<Real> const& qL0,
                              Array4<Real> const& qLC,
                              Array4<Real> const& qLS,
+                             Array4<Real> const& qU0,
+                             Array4<Real> const& qUC,
+                             Array4<Real> const& qUS,
                              int npts, int nlo, int index,
                              amrex::Gpu::Handler const& handler)
 {
diff --git a/Source/hydro/Castro_ctu_hydro.cpp b/Source/hydro/Castro_ctu_hydro.cpp
index 2eeb8298ab..381a49ac45 100644
--- a/Source/hydro/Castro_ctu_hydro.cpp
+++ b/Source/hydro/Castro_ctu_hydro.cpp
@@ -33,7 +33,7 @@ Castro::construct_ctu_hydro_source(Real time, Real dt)
   // divergence) using the CTU framework for unsplit hydrodynamics
 
   if (verbose) {
-      amrex::Print() << "... Entering construct_ctu_hydro_source()" << std::endl << std::endl;
+      amrex::Print() << "... Entering construct_ctu_hydro_source() on level " << level << std::endl << std::endl;
   }
 
 #ifdef HYBRID_MOMENTUM
@@ -1504,8 +1504,17 @@ Castro::construct_ctu_hydro_source(Real time, Real dt)
   }
 #endif
 
+  // Perform reflux (for non-subcycling advances).
+
+  if (parent->subcyclingMode() == "None") {
+      if (do_reflux == 1) {
+          FluxRegCrseInit();
+          FluxRegFineAdd();
+      }
+  }
+
   if (verbose) {
-      amrex::Print() << "... Leaving construct_ctu_hydro_source()" << std::endl << std::endl;
+      amrex::Print() << "... Leaving construct_ctu_hydro_source() on level " << level << std::endl << std::endl;
   }
 
   if (verbose > 0)
@@ -1518,7 +1527,7 @@ Castro::construct_ctu_hydro_source(Real time, Real dt)
 #endif
         ParallelDescriptor::ReduceRealMax(run_time,IOProc);
 
-        amrex::Print() << "Castro::construct_ctu_hydro_source() time = " << run_time << "\n" << "\n";
+        amrex::Print() << "Castro::construct_ctu_hydro_source() time = " << run_time << " on level " << level << "\n" << "\n";
 #ifdef BL_LAZY
         });
 #endif
diff --git a/Source/hydro/Castro_hydro.cpp b/Source/hydro/Castro_hydro.cpp
index 650910226c..184f227cfb 100644
--- a/Source/hydro/Castro_hydro.cpp
+++ b/Source/hydro/Castro_hydro.cpp
@@ -233,9 +233,10 @@ Castro::cons_to_prim_fourth(const Real time)
 void
 Castro::check_for_cfl_violation(const MultiFab& State, const Real dt)
 {
-
     BL_PROFILE("Castro::check_for_cfl_violation()");
 
+    int cfl_violation = 0;
+
     auto dx = geom.CellSizeArray();
 
     Real dtdx = dt / dx[0];
@@ -390,4 +391,8 @@ Castro::check_for_cfl_violation(const MultiFab& State, const Real dt)
         cfl_violation = 1;
     }
 
+    // If we detect a CFL violation, abort.
+    if (cfl_violation) {
+        amrex::Abort("CFL is too high at this level; go back to a checkpoint and restart with lower CFL number");
+    }
 }
diff --git a/Source/hydro/advection_util.cpp b/Source/hydro/advection_util.cpp
index 0002bda56a..c73d1ac106 100644
--- a/Source/hydro/advection_util.cpp
+++ b/Source/hydro/advection_util.cpp
@@ -638,6 +638,8 @@ Castro::do_enforce_minimum_density(const Box& bx,
   GeometryData geomdata = geom.data();
 #endif
 
+  amrex::ignore_unused(verbose);
+
   amrex::ParallelFor(bx,
   [=] AMREX_GPU_HOST_DEVICE (int i, int j, int k)
   {
diff --git a/Source/hydro/trace_ppm.cpp b/Source/hydro/trace_ppm.cpp
index 02c819c9fa..222dc582ca 100644
--- a/Source/hydro/trace_ppm.cpp
+++ b/Source/hydro/trace_ppm.cpp
@@ -60,8 +60,10 @@ Castro::trace_ppm(const Box& bx,
   Real hdt = 0.5_rt * dt;
   Real dtdx = dt / dx[idir];
 
+#ifndef AMREX_USE_GPU
   auto lo = bx.loVect3d();
   auto hi = bx.hiVect3d();
+#endif
 
   auto vlo = vbx.loVect3d();
   auto vhi = vbx.hiVect3d();
diff --git a/Source/mhd/Castro_mhd.cpp b/Source/mhd/Castro_mhd.cpp
index bc9c939ad4..a77a1b054d 100644
--- a/Source/mhd/Castro_mhd.cpp
+++ b/Source/mhd/Castro_mhd.cpp
@@ -746,5 +746,14 @@ Castro::construct_ctu_mhd_source(Real time, Real dt)
 
     check_for_nan(S_new);
 
+    // Perform reflux (for non-subcycling advances).
+
+    if (parent->subcyclingMode() == "None") {
+        if (do_reflux == 1) {
+            FluxRegCrseInit();
+            FluxRegFineAdd();
+        }
+    }
+
     return status;
 }
diff --git a/Source/problems/hse_fill.cpp b/Source/problems/hse_fill.cpp
index 9354b9ef0b..cbff091621 100644
--- a/Source/problems/hse_fill.cpp
+++ b/Source/problems/hse_fill.cpp
@@ -112,7 +112,7 @@ hse_fill(const Box& bx, Array4<Real> const& adv,
                         }
                     }
 
-                    bool converged_hse = false;
+                    [[maybe_unused]] bool converged_hse = false;
 
                     Real p_want;
                     Real drho;
@@ -304,7 +304,7 @@ hse_fill(const Box& bx, Array4<Real> const& adv,
                         }
                     }
 
-                    bool converged_hse = false;
+                    [[maybe_unused]] bool converged_hse = false;
 
                     Real p_want;
                     Real drho;
@@ -501,7 +501,7 @@ hse_fill(const Box& bx, Array4<Real> const& adv,
                         }
                     }
 
-                    bool converged_hse = false;
+                    [[maybe_unused]] bool converged_hse = false;
 
                     Real p_want;
                     Real drho;
@@ -694,7 +694,7 @@ hse_fill(const Box& bx, Array4<Real> const& adv,
                         }
                     }
 
-                    bool converged_hse = false;
+                    [[maybe_unused]] bool converged_hse = false;
 
                     Real p_want;
                     Real drho;
@@ -890,7 +890,7 @@ hse_fill(const Box& bx, Array4<Real> const& adv,
                         }
                     }
 
-                    bool converged_hse = false;
+                    [[maybe_unused]] bool converged_hse = false;
 
                     Real p_want;
                     Real drho;
diff --git a/Source/reactions/Castro_react.cpp b/Source/reactions/Castro_react.cpp
index b76fd5956b..8d77908a7f 100644
--- a/Source/reactions/Castro_react.cpp
+++ b/Source/reactions/Castro_react.cpp
@@ -17,15 +17,25 @@ Castro::do_old_reactions (Real time, Real dt)
     amrex::ignore_unused(time);
     amrex::ignore_unused(dt);
 
-    bool burn_success = true;
+    advance_status status {};
 
 #ifndef SIMPLIFIED_SDC
+    bool burn_success = true;
+
     MultiFab& R_old = get_old_data(Reactions_Type);
     MultiFab& R_new = get_new_data(Reactions_Type);
 
     if (time_integration_method != SimplifiedSpectralDeferredCorrections) {
         // The result of the reactions is added directly to Sborder.
         burn_success = react_state(Sborder, R_old, time, 0.5 * dt, 0);
+
+        if (!burn_success) {
+            status.success = false;
+            status.reason = "burn unsuccessful";
+
+            return status;
+        }
+
         clean_state(
 #ifdef MHD
                     Bx_old_tmp, By_old_tmp, Bz_old_tmp,
@@ -36,13 +46,6 @@ Castro::do_old_reactions (Real time, Real dt)
     }
 #endif
 
-    advance_status status {};
-
-    if (!burn_success) {
-        status.success = false;
-        status.reason = "burn unsuccessful";
-    }
-
     return status;
 }
 
@@ -52,6 +55,8 @@ Castro::do_new_reactions (Real time, Real dt)
     amrex::ignore_unused(time);
     amrex::ignore_unused(dt);
 
+    advance_status status {};
+
     bool burn_success = true;
 
     MultiFab& R_new = get_new_data(Reactions_Type);
@@ -68,6 +73,13 @@ Castro::do_new_reactions (Real time, Real dt)
 
             burn_success = react_state(time, dt);
 
+            if (!burn_success) {
+                status.success = false;
+                status.reason = "burn unsuccessful";
+
+                return status;
+            }
+
             clean_state(S_new, time + dt, S_new.nGrow());
 
             // Check for NaN's.
@@ -94,6 +106,14 @@ Castro::do_new_reactions (Real time, Real dt)
     if (time_integration_method != SimplifiedSpectralDeferredCorrections) {
 
         burn_success = react_state(S_new, R_new, time - 0.5 * dt, 0.5 * dt, 1);
+
+        if (!burn_success) {
+            status.success = false;
+            status.reason = "burn unsuccessful";
+
+            return status;
+        }
+
         clean_state(
 #ifdef MHD
                     Bx_new, By_new, Bz_new,
@@ -104,15 +124,7 @@ Castro::do_new_reactions (Real time, Real dt)
 
 #endif // SIMPLIFIED_SDC
 
-    advance_status status {};
-
-    if (!burn_success) {
-        status.success = false;
-        status.reason = "burn unsuccessful";
-    }
-
     return status;
-
 }
 
 // Strang version
@@ -160,7 +172,7 @@ Castro::react_state(MultiFab& s, MultiFab& r, Real time, Real dt, const int stra
     const int ng = s.nGrow();
 
     if (verbose) {
-        amrex::Print() << "... Entering burner and doing half-timestep of burning." << std::endl << std::endl;
+        amrex::Print() << "... Entering burner on level " << level << " and doing half-timestep of burning." << std::endl << std::endl;
     }
 
     ReduceOps<ReduceOpSum> reduce_op;
@@ -395,7 +407,7 @@ Castro::react_state(MultiFab& s, MultiFab& r, Real time, Real dt, const int stra
     }
 
     if (verbose) {
-        amrex::Print() << "... Leaving burner after completing half-timestep of burning." << std::endl << std::endl;
+        amrex::Print() << "... Leaving burner on level " << level << " after completing half-timestep of burning." << std::endl << std::endl;
     }
 
     if (verbose > 0)
@@ -408,7 +420,7 @@ Castro::react_state(MultiFab& s, MultiFab& r, Real time, Real dt, const int stra
 #endif
         ParallelDescriptor::ReduceRealMax(run_time,IOProc);
 
-        amrex::Print() << "Castro::react_state() time = " << run_time << "\n" << "\n";
+        amrex::Print() << "Castro::react_state() time = " << run_time << " on level " << level << "\n" << "\n";
 #ifdef BL_LAZY
         });
 #endif
@@ -451,7 +463,7 @@ Castro::react_state(Real time, Real dt)
     const Real strt_time = ParallelDescriptor::second();
 
     if (verbose) {
-        amrex::Print() << "... Entering burner and doing full timestep of burning." << std::endl << std::endl;
+        amrex::Print() << "... Entering burner on level " << level << " and doing full timestep of burning." << std::endl << std::endl;
     }
 
     MultiFab& S_old = get_old_data(State_Type);
@@ -778,7 +790,7 @@ Castro::react_state(Real time, Real dt)
 
     if (verbose) {
 
-        amrex::Print() << "... Leaving burner after completing full timestep of burning." << std::endl << std::endl;
+        amrex::Print() << "... Leaving burner on level " << level << " after completing full timestep of burning." << std::endl << std::endl;
 
         const int IOProc   = ParallelDescriptor::IOProcessorNumber();
         Real      run_time = ParallelDescriptor::second() - strt_time;
@@ -788,7 +800,7 @@ Castro::react_state(Real time, Real dt)
 #endif
         ParallelDescriptor::ReduceRealMax(run_time, IOProc);
 
-        amrex::Print() << "Castro::react_state() time = " << run_time << std::endl << std::endl;
+        amrex::Print() << "Castro::react_state() time = " << run_time << " on level " << level << std::endl << std::endl;
 #ifdef BL_LAZY
         });
 #endif
diff --git a/Source/rotation/Castro_rotation.cpp b/Source/rotation/Castro_rotation.cpp
index e2e37f5b84..ce8b0ccb0b 100644
--- a/Source/rotation/Castro_rotation.cpp
+++ b/Source/rotation/Castro_rotation.cpp
@@ -42,7 +42,7 @@ Castro::construct_old_rotation_source(MultiFab& source, MultiFab& state_in, Real
 #endif
         ParallelDescriptor::ReduceRealMax(run_time,IOProc);
 
-        amrex::Print() << "Castro::construct_old_rotation_source() time = " << run_time << "\n" << "\n";
+        amrex::Print() << "Castro::construct_old_rotation_source() time = " << run_time << " on level " << level << "\n" << "\n";
 #ifdef BL_LAZY
         });
 #endif
@@ -96,7 +96,7 @@ Castro::construct_new_rotation_source(MultiFab& source, MultiFab& state_old, Mul
 #endif
         ParallelDescriptor::ReduceRealMax(run_time,IOProc);
 
-        amrex::Print() << "Castro::construct_new_rotation_source() time = " << run_time << "\n" << "\n";
+        amrex::Print() << "Castro::construct_new_rotation_source() time = " << run_time << " on level " << level << "\n" << "\n";
 #ifdef BL_LAZY
         });
 #endif
diff --git a/Source/sdc/Castro_sdc_util.H b/Source/sdc/Castro_sdc_util.H
index 7f84100c57..ef632056b5 100644
--- a/Source/sdc/Castro_sdc_util.H
+++ b/Source/sdc/Castro_sdc_util.H
@@ -108,7 +108,7 @@ f_sdc_jac(const Real dt_m,
     // returned by single_zone_react_source, since it is
     // more consistent T from e
 
-    eos_t eos_state;
+    eos_extra_t eos_state;
     eos_state.rho = U_full[URHO];
     eos_state.T = T_old;   // initial guess
     for (int n = 0; n < NumSpec; ++n) {
diff --git a/Source/sdc/vode_rhs_true_sdc.H b/Source/sdc/vode_rhs_true_sdc.H
index 5a1c18e86c..a01b11b9be 100644
--- a/Source/sdc/vode_rhs_true_sdc.H
+++ b/Source/sdc/vode_rhs_true_sdc.H
@@ -109,7 +109,7 @@ void jac (const Real time, burn_t& burn_state, DvodeT& vode_state, MatrixType& p
     // returned by single_zone_react_source, since it is
     // more consistent T from e
 
-    eos_t eos_state;
+    eos_extra_t eos_state;
     eos_state.rho = U_full[URHO];
     eos_state.T = burn_state.T; // initial guess
     for (int n = 0; n < NumSpec; n++) {
diff --git a/Source/sources/Castro_sources.cpp b/Source/sources/Castro_sources.cpp
index ab31fb5513..507a782929 100644
--- a/Source/sources/Castro_sources.cpp
+++ b/Source/sources/Castro_sources.cpp
@@ -167,7 +167,7 @@ Castro::do_old_sources(
 #endif
         ParallelDescriptor::ReduceRealMax(run_time,IOProc);
 
-        amrex::Print() << "Castro::do_old_sources() time = " << run_time << "\n" << "\n";
+        amrex::Print() << "Castro::do_old_sources() time = " << run_time << " on level " << level << "\n" << "\n";
 #ifdef BL_LAZY
         });
 #endif
@@ -250,7 +250,7 @@ Castro::do_new_sources(
 #endif
         ParallelDescriptor::ReduceRealMax(run_time,IOProc);
 
-        amrex::Print() << "Castro::do_new_sources() time = " << run_time << "\n" << "\n";
+        amrex::Print() << "Castro::do_new_sources() time = " << run_time << " on level " << level << "\n" << "\n";
 #ifdef BL_LAZY
         });
 #endif
@@ -542,7 +542,7 @@ Castro::pre_advance_operators (Real time, Real dt)
 #endif
 #endif
 
-    // If we are using gravity, solve for the potential and gravatational field.
+    // If we are using gravity, solve for the potential and gravitational field.
 
 #ifdef GRAVITY
     construct_old_gravity(time);
diff --git a/external/Microphysics b/external/Microphysics
index bcf06921ae..37551260a0 160000
--- a/external/Microphysics
+++ b/external/Microphysics
@@ -1 +1 @@
-Subproject commit bcf06921ae6144e0f1d24ceb8181af43357e9f84
+Subproject commit 37551260a0c17a85dd6d08fc289a64c38dfa46a6
diff --git a/external/amrex b/external/amrex
index 02ce32baa5..60b23d9d54 160000
--- a/external/amrex
+++ b/external/amrex
@@ -1 +1 @@
-Subproject commit 02ce32baa5f243bbf012ba2824cc4c44682c6748
+Subproject commit 60b23d9d54287d4b4616add69783ab19729664af