From af1683be2b99d66cd3ec933440412197df3f3da9 Mon Sep 17 00:00:00 2001
From: isriva <ishan66@gmail.com>
Date: Thu, 7 Sep 2023 23:50:16 -0700
Subject: [PATCH 01/11] added capability for k-space velocity decomposition
 into solenoidal and dilatational parts. works for staggered velocities. also
 inverse FFT back to real space

---
 exec/tests/FourierDecompVel/GNUmakefile     |  47 +++
 exec/tests/FourierDecompVel/Make.package    |   1 +
 exec/tests/FourierDecompVel/inputs          |   5 +
 exec/tests/FourierDecompVel/main_driver.cpp | 390 ++++++++++++++++++++
 src_analysis/StructFact.H                   |   6 +-
 src_analysis/StructFact.cpp                 | 315 +++++++++++++++-
 6 files changed, 753 insertions(+), 11 deletions(-)
 create mode 100644 exec/tests/FourierDecompVel/GNUmakefile
 create mode 100644 exec/tests/FourierDecompVel/Make.package
 create mode 100644 exec/tests/FourierDecompVel/inputs
 create mode 100644 exec/tests/FourierDecompVel/main_driver.cpp
diff --git a/exec/tests/FourierDecompVel/GNUmakefile b/exec/tests/FourierDecompVel/GNUmakefile
new file mode 100644
index 00000000..4208cc10
--- /dev/null
+++ b/exec/tests/FourierDecompVel/GNUmakefile
@@ -0,0 +1,47 @@
+# AMREX_HOME defines the directory in which we will find all the AMReX code.
+# If you set AMREX_HOME as an environment variable, this line will be ignored
+AMREX_HOME ?= ../../../../amrex/
+
+DEBUG         = FALSE
+USE_MPI       = TRUE
+USE_OMP       = FALSE
+USE_CUDA      = FALSE
+USE_HIP       = FALSE
+COMP          = gnu
+DIM           = 3
+TINY_PROFILE  = FALSE
+MAX_SPEC      = 8
+
+MAXSPECIES := $(strip $(MAX_SPEC))
+DEFINES += -DMAX_SPECIES=$(MAXSPECIES)
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.defs
+
+VPATH_LOCATIONS   += .
+INCLUDE_LOCATIONS += .
+
+include ../../../src_rng/Make.package
+VPATH_LOCATIONS   += ../../../src_rng/
+INCLUDE_LOCATIONS += ../../../src_rng/
+
+include ../../../src_common/Make.package
+VPATH_LOCATIONS   += ../../../src_common/
+INCLUDE_LOCATIONS += ../../../src_common/
+
+include ../../../src_analysis/Make.package
+VPATH_LOCATIONS   += ../../../src_analysis/
+INCLUDE_LOCATIONS += ../../../src_analysis/
+
+include $(AMREX_HOME)/Src/Base/Make.package
+include $(AMREX_HOME)/Tools/GNUMake/Make.rules
+
+ifeq ($(USE_CUDA),TRUE)
+  LIBRARIES += -lcufft
+else ifeq ($(USE_HIP),TRUE)
+  # Use rocFFT.  ROC_PATH is defined in amrex
+  INCLUDE_LOCATIONS += $(ROC_PATH)/rocfft/include
+  LIBRARY_LOCATIONS += $(ROC_PATH)/rocfft/lib
+  LIBRARIES += -L$(ROC_PATH)/rocfft/lib -lrocfft
+else
+  LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3
+endif
diff --git a/exec/tests/FourierDecompVel/Make.package b/exec/tests/FourierDecompVel/Make.package
new file mode 100644
index 00000000..49b5c8e9
--- /dev/null
+++ b/exec/tests/FourierDecompVel/Make.package
@@ -0,0 +1 @@
+CEXE_sources += main_driver.cpp
diff --git a/exec/tests/FourierDecompVel/inputs b/exec/tests/FourierDecompVel/inputs
new file mode 100644
index 00000000..55a31d3d
--- /dev/null
+++ b/exec/tests/FourierDecompVel/inputs
@@ -0,0 +1,5 @@
+prob_type = 6
+prob_lo = 0.0 0.0 0.0
+prob_hi = 1.0 1.0 1.0
+n_cells = 8 8 8
+max_grid_size = 64 64 64
diff --git a/exec/tests/FourierDecompVel/main_driver.cpp b/exec/tests/FourierDecompVel/main_driver.cpp
new file mode 100644
index 00000000..2a634f8d
--- /dev/null
+++ b/exec/tests/FourierDecompVel/main_driver.cpp
@@ -0,0 +1,390 @@
+#include "common_functions.H"
+#include "StructFact.H"
+
+#include <AMReX.H>
+#include <AMReX_MultiFab.H>
+#include <AMReX_Vector.H>
+#include <AMReX_VisMF.H>
+
+// These are for FFTW / cuFFT / rocFFT
+
+#ifdef AMREX_USE_CUDA
+#include <cufft.h>
+#elif AMREX_USE_HIP
+#  if __has_include(<rocfft/rocfft.h>)  // ROCm 5.3+
+#    include <rocfft/rocfft.h>
+#  else
+#    include <rocfft.h>
+#  endif
+#else
+#include <fftw3.h>
+#include <fftw3-mpi.h>
+#endif
+
+#include <AMReX_GpuComplex.H>
+
+#include <string>
+
+#include <AMReX_MultiFabUtil.H>
+#include "AMReX_PlotFileUtil.H"
+#include "AMReX_BoxArray.H"
+#include <AMReX_Vector.H>
+#include <AMReX_MPMD.H>
+#include <AMReX_VisMF.H>
+
+#include "rng_functions.H"
+
+#include "chrono"
+
+#define ALIGN 16
+
+using namespace std::chrono;
+using namespace amrex;
+
+void main_driver (const char* argv)
+{
+
+    // **********************************
+    // DECLARE SIMULATION PARAMETERS
+    // **********************************
+
+    BL_PROFILE_VAR("main_driver()",main_driver);
+    
+    // store the current time so we can later compute total run time.
+    Real strt_time = ParallelDescriptor::second();
+
+    std::string inputs_file = argv;
+
+    amrex::AllPrint() << "Compiled with support for maximum species = " << MAX_SPECIES << "\n";
+    
+    // copy contents of F90 modules to C++ namespaces
+    InitializeCommonNamespace();
+
+    // make BoxArray and Geometry
+    BoxArray ba;
+    Geometry geom;
+    DistributionMapping dmap;
+
+    // define lower and upper indices of domain
+    IntVect dom_lo(AMREX_D_DECL(           0,            0,            0));
+    IntVect dom_hi(AMREX_D_DECL(n_cells[0]-1, n_cells[1]-1, n_cells[2]-1));
+
+    // Make a single box that is the entire domain
+    Box domain(dom_lo, dom_hi);
+
+    // Initialize the boxarray "ba" from the single box "domain"
+    ba.define(domain);
+
+    // create IntVect of max_grid_size
+    // IntVect max_grid_size(AMREX_D_DECL(max_grid_size_x,max_grid_size_y,max_grid_size_z));
+
+    // Break up boxarray "ba" into chunks no larger than "max_grid_size" along a direction
+    ba.maxSize(IntVect(max_grid_size));
+
+    // How Boxes are distrubuted among MPI processes
+    dmap.define(ba);
+
+    // This defines the physical box size in each direction
+    RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])},
+                     {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])});
+
+    // periodic in all direction
+    Array<int,AMREX_SPACEDIM> is_periodic{AMREX_D_DECL(1,1,1)};
+
+    // geometry object for real data
+    geom.define(domain,&real_box,CoordSys::cartesian,is_periodic.data());
+
+    // extract dx from the geometry object
+    GpuArray<Real,3> dx = geom.CellSizeArray();
+    GpuArray<Real,AMREX_SPACEDIM> reallo;
+    GpuArray<Real,AMREX_SPACEDIM> realhi;
+    GpuArray<Real,AMREX_SPACEDIM> center;
+
+    for (int d=0; d<AMREX_SPACEDIM; ++d) {
+        reallo[d] = prob_lo[d];
+        realhi[d] = prob_hi[d];
+        center[d] = ( realhi[d] - reallo[d] ) / 2.;
+    }
+
+    // Setup MultiFabs
+    std::array< MultiFab, 3 > vel; // staggered velocity field
+    for (int d=0; d<AMREX_SPACEDIM; d++) {
+        vel[d].define(convert(ba,nodal_flag_dir[d]), dmap, 1, 1);
+        vel[d].setVal(0.0);
+    }
+
+    MultiFab structFactMFTurb; // storing CC velocities
+    structFactMFTurb.define(ba, dmap, 3, 0);
+    structFactMFTurb.setVal(0.0);
+
+    // Fill MultiFab
+    Real omega = M_PI/2.0;;
+    //for (MFIter mfi(structFactMFTurb); mfi.isValid(); ++mfi) {
+
+    //    const Array4<Real>& vel = structFactMFTurb.array(mfi);
+    //    const Box& bx = mfi.tilebox();
+
+    //    amrex::ParallelFor(bx,
+    //    [=] AMREX_GPU_DEVICE (int i, int j, int k)
+    //    {
+    //        if (prob_type == 0) {
+    //            if ((i==3) and (j==3) and (k==3)) {
+    //                vel(i+1,j,k,0) =  1.0;
+    //                vel(i,j,k,0)   = -1.0;
+    //                vel(i,j+1,k,1) =  1.0;
+    //                vel(i,j,k,1)   = -1.0;
+    //                vel(i,j,k+1,2) =  1.0;
+    //                vel(i,j,k,2)   = -1.0;
+    //            }
+    //        }
+    //        if (prob_type == 1) {
+    //            if ((i==3) and (j==3) and (k==3)) {
+    //                vel(i+1,j,k,0)     =  1.0;
+    //                vel(i+1,j+1,k,0)   = -1.0;
+    //                vel(i+1,j+1,k,1)   =  1.0;
+    //                vel(i,j+1,k,1)     = -1.0;
+    //            }
+    //        }
+    //        if (prob_type == 2) {
+    //            if ((i==3) and (j==3) and (k==3)) {
+    //                vel(i,j,k,0)     =   1.0;
+    //                vel(i,j,k,1)     =  -1.0;
+    //                vel(i+1,j,k,0)   =   1.0;
+    //                vel(i+1,j,k,1)   =   1.0;
+    //                vel(i+1,j+1,k,0) =  -1.0;
+    //                vel(i+1,j+1,k,1) =   1.0;
+    //                vel(i,j+1,k,0)   =  -1.0;
+    //                vel(i,j+1,k,1)   =  -1.0;
+    //            }
+    //        }
+    //        
+    //        Real x = (i+0.5)*dx[0] - center[0]; 
+    //        Real y = (j+0.5)*dx[1] - center[1]; 
+    //        Real z = (k+0.5)*dx[2] - center[2]; 
+    //        
+    //        if (prob_type == 3) { // pure solenoidal
+    //            vel(i,j,k,0) = -1.0*y;
+    //            vel(i,j,k,1) =  1.0*x;
+    //        }
+    //        
+    //        if (prob_type == 4) { // pure dilatational
+    //            vel(i,j,k,0) =  1.0*x;
+    //            vel(i,j,k,1) =  1.0*y;
+    //        }
+    //        
+    //        if (prob_type == 5) { // pure Laplacian
+    //            vel(i,j,k,0) =  1.0*x;
+    //            vel(i,j,k,1) = -1.0*y;
+    //        }
+    //        
+    //        if (prob_type == 6) { // both
+    //            vel(i,j,k,0) =  1.0*x - 1.0*y;
+    //            vel(i,j,k,1) =  1.0*x + 1.0*y;
+    //        }
+    //       
+    //    });
+    //}
+    for (MFIter mfi(structFactMFTurb); mfi.isValid(); ++mfi) {
+
+        AMREX_D_TERM(const Array4<Real>& velx = vel[0].array(mfi);,
+                     const Array4<Real>& vely = vel[1].array(mfi);,
+                     const Array4<Real>& velz = vel[2].array(mfi););
+
+        const Box& tbx = mfi.nodaltilebox(0);
+        const Box& tby = mfi.nodaltilebox(1);
+        const Box& tbz = mfi.nodaltilebox(2);
+
+        amrex::ParallelFor(tbx, 
+        [=] AMREX_GPU_DEVICE (int i, int j, int k)
+        {
+            Real x = prob_lo[0] + (i) * dx[0]     - center[0];
+            Real y = prob_lo[1] + (j+0.5) * dx[1] - center[1];
+            Real z = prob_lo[2] + (k+0.5) * dx[2] - center[2];
+            if (prob_type == 3) { // pure solenoidal
+                velx(i,j,k) = -1.0*y;
+            }
+            
+            if (prob_type == 4) { // pure dilatational
+                velx(i,j,k) =  1.0*x;
+            }
+            
+            if (prob_type == 5) { // pure Laplacian
+                velx(i,j,k) =  1.0*x;
+            }
+            
+            if (prob_type == 6) { // both
+                velx(i,j,k) =  1.0*x - 1.0*y;
+            }
+        });
+        amrex::ParallelFor(tby, 
+        [=] AMREX_GPU_DEVICE (int i, int j, int k)
+        {
+            Real x = prob_lo[0] + (i+0.5) * dx[0] - center[0];
+            Real y = prob_lo[1] + (j) * dx[1]     - center[1];
+            Real z = prob_lo[2] + (k+0.5) * dx[2] - center[2];
+            if (prob_type == 3) { // pure solenoidal
+                vely(i,j,k) =  1.0*x;
+            }
+            
+            if (prob_type == 4) { // pure dilatational
+                vely(i,j,k) =  1.0*y;
+            }
+            
+            if (prob_type == 5) { // pure Laplacian
+                vely(i,j,k) = -1.0*y;
+            }
+            
+            if (prob_type == 6) { // both
+                vely(i,j,k) =  1.0*x + 1.0*y;
+            }
+        });
+        amrex::ParallelFor(tbz, 
+        [=] AMREX_GPU_DEVICE (int i, int j, int k)
+        {
+            Real x = prob_lo[0] + (i+0.5) * dx[0] - center[0];
+            Real y = prob_lo[1] + (j+0.5) * dx[1] - center[1];
+            Real z = prob_lo[2] + (k) * dx[2]     - center[2];
+        });
+    }
+
+    // Shift face-MF to CC-MF
+    for(int d=0; d<3; d++) {
+        ShiftFaceToCC(vel[d], 0, structFactMFTurb, d, 1);
+    }
+        
+    Real time = 0.;
+    int step = 0;
+    WriteSingleLevelPlotfile("vel_full", structFactMFTurb, {"u","v","w"}, geom, time, step);
+
+    /*******************************************/
+    /********** Start FFT Calculations *********/
+    StructFact turbStructFact;
+    int structVarsTurb = AMREX_SPACEDIM;
+    Vector< std::string > var_names_turb;
+    var_names_turb.resize(structVarsTurb);
+    int cnt = 0;
+    std::string x;
+    // velx, vely, velz
+    for (int d=0; d<AMREX_SPACEDIM; d++) {
+      x = "vel";
+      x += (120+d);
+      var_names_turb[cnt++] = x;
+    }
+    // need to use dVol for scaling
+    Real dVol = (AMREX_SPACEDIM==2) ? dx[0]*dx[1]*cell_depth : dx[0]*dx[1]*dx[2];
+    Real dProb = (AMREX_SPACEDIM==2) ? n_cells[0]*n_cells[1] : n_cells[0]*n_cells[1]*n_cells[2];
+    dProb = 1./dProb;
+    Vector<Real> var_scaling_turb(structVarsTurb);
+    for (int d=0; d<var_scaling_turb.size(); ++d) {
+        var_scaling_turb[d] = 1./dVol;
+    }
+    // option to compute only specified pairs
+    amrex::Vector< int > s_pairA_turb(AMREX_SPACEDIM); // vx, vy, vz, rho, P , T
+    amrex::Vector< int > s_pairB_turb(AMREX_SPACEDIM); // vx, vy, vz, rho, P , T
+
+    // Select which variable pairs to include in structure factor:
+    for (int d=0; d<AMREX_SPACEDIM; ++d) {
+        s_pairA_turb[d] = d;
+        s_pairB_turb[d] = d;
+    }    
+    turbStructFact.define(ba,dmap,var_names_turb,var_scaling_turb,s_pairA_turb,s_pairB_turb);
+    
+    // Initialize the boxarray "ba_onegrid" from the single box "domain"
+    long npts;
+    BoxArray ba_onegrid;
+    {
+      Box domain = geom.Domain();
+      ba_onegrid.define(domain);
+      npts = (domain.length(0)*domain.length(1)*domain.length(2));
+    }
+    Real sqrtnpts = std::sqrt(npts);
+    DistributionMapping dmap_onegrid(ba_onegrid);
+
+    MultiFab dft_real, dft_imag;
+    dft_real.define(ba_onegrid, dmap_onegrid, 3, 0);
+    dft_imag.define(ba_onegrid, dmap_onegrid, 3, 0);
+
+    // Forward FFT
+    turbStructFact.ComputeFFT(structFactMFTurb,dft_real,dft_imag,geom,false);
+
+    // Decompose Velocity into Solenoidal and Dilatational Components
+    MultiFab dft_decomp_real, dft_decomp_imag;
+    dft_decomp_real.define(ba_onegrid, dmap_onegrid, 6, 0);
+    dft_decomp_imag.define(ba_onegrid, dmap_onegrid, 6, 0);
+    
+    for (MFIter mfi(dft_decomp_real); mfi.isValid(); ++mfi) {
+        
+        Box bx = mfi.fabbox();
+
+        Array4<const Real> const& real = dft_real.array(mfi);
+        Array4<const Real> const& imag = dft_imag.array(mfi);
+        
+        Array4<      Real> const& real_decomp = dft_decomp_real.array(mfi);
+        Array4<      Real> const& imag_decomp = dft_decomp_imag.array(mfi);
+
+        amrex::ParallelFor(bx,
+        [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            int nx = bx.length(0);
+            int ny = bx.length(1);
+            int nz = bx.length(2);
+            
+            if (i <= bx.length(0)/2) { // only need to do for the first half of k-space
+                
+                // Gradient Operators
+                Real GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0];
+                Real GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0];
+                Real GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1];
+                Real GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1];
+                Real GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2];
+                Real GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2];
+
+                // Inverse Laplacian
+                Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC;
+
+                // Divergence of vel
+                Real divR = real(i,j,k,0)*GxR - imag(i,j,k,0)*GxC +
+                            real(i,j,k,1)*GyR - imag(i,j,k,1)*GyC +
+                            real(i,j,k,2)*GzR - imag(i,j,k,2)*GzC ;
+                Real divC = real(i,j,k,0)*GxC + imag(i,j,k,0)*GxR +
+                            real(i,j,k,1)*GyC + imag(i,j,k,1)*GyR +
+                            real(i,j,k,2)*GzC + imag(i,j,k,2)*GzR ;
+
+                if (Lap < 1.0e-12) { // zero mode for no bulk motion
+                    real_decomp(i,j,k,0) = 0.0;
+                    real_decomp(i,j,k,1) = 0.0;
+                    real_decomp(i,j,k,2) = 0.0;
+                    imag_decomp(i,j,k,0) = 0.0;
+                    imag_decomp(i,j,k,1) = 0.0;
+                    imag_decomp(i,j,k,2) = 0.0;
+                }
+                else {
+                    // Dilatational velocity 
+                    real_decomp(i,j,k,0) = (divR*GxR + divC*GxC) / Lap;
+                    real_decomp(i,j,k,1) = (divR*GyR + divC*GyC) / Lap;
+                    real_decomp(i,j,k,2) = (divR*GzR + divC*GzC) / Lap;
+                    imag_decomp(i,j,k,0) = (divC*GxR - divR*GxC) / Lap;
+                    imag_decomp(i,j,k,1) = (divC*GyR - divR*GyC) / Lap;
+                    imag_decomp(i,j,k,2) = (divC*GzR - divR*GzC) / Lap;
+                    
+                    // Solenoidal velocity
+                    real_decomp(i,j,k,3) = real(i,j,k,0) - real_decomp(i,j,k,0);
+                    real_decomp(i,j,k,4) = real(i,j,k,1) - real_decomp(i,j,k,1); 
+                    real_decomp(i,j,k,5) = real(i,j,k,2) - real_decomp(i,j,k,2);
+                    imag_decomp(i,j,k,3) = imag(i,j,k,0) - imag_decomp(i,j,k,0);
+                    imag_decomp(i,j,k,4) = imag(i,j,k,1) - imag_decomp(i,j,k,1);
+                    imag_decomp(i,j,k,5) = imag(i,j,k,2) - imag_decomp(i,j,k,2);
+                }
+            }
+        });
+    }
+
+    // Backward FFT
+    MultiFab vel_decomp;
+    vel_decomp.define(ba, dmap, 6, 0);
+    turbStructFact.InverseFFT(vel_decomp,dft_decomp_real,dft_decomp_imag,geom);
+
+    // Write Output
+    WriteSingleLevelPlotfile("vel_decomp", vel_decomp, {"uD","vD","wD","uS","vS","wS"}, geom, time, step);
+
+}
diff --git a/src_analysis/StructFact.H b/src_analysis/StructFact.H
index 4a5909bf..154712b1 100644
--- a/src_analysis/StructFact.H
+++ b/src_analysis/StructFact.H
@@ -95,7 +95,11 @@ public:
     void Reset();
     
     void ComputeFFT(const amrex::MultiFab&, amrex::MultiFab&,
-                    amrex::MultiFab&, const amrex::Geometry&);
+                    amrex::MultiFab&, const amrex::Geometry&,
+                    bool unpack=true);
+    
+    void InverseFFT(amrex::MultiFab&, const amrex::MultiFab&,
+                    const amrex::MultiFab&, const amrex::Geometry&);
     
     void WritePlotFile(const int, const amrex::Real, const amrex::Geometry&, 
                        std::string, const int& zero_avg=1);
diff --git a/src_analysis/StructFact.cpp b/src_analysis/StructFact.cpp
index 92dfbae3..d645f1f1 100644
--- a/src_analysis/StructFact.cpp
+++ b/src_analysis/StructFact.cpp
@@ -349,7 +349,8 @@ void StructFact::Reset() {
 void StructFact::ComputeFFT(const MultiFab& variables,
 			    MultiFab& variables_dft_real, 
 			    MultiFab& variables_dft_imag,
-			    const Geometry& geom) 
+			    const Geometry& geom,
+                bool unpack)
 {
 
     BL_PROFILE_VAR("StructFact::ComputeFFT()", ComputeFFT);
@@ -428,7 +429,7 @@ void StructFact::ComputeFFT(const MultiFab& variables,
             }
         }
 
-	if (comp_fft == false) continue;
+	    if (comp_fft == false) continue;
 
         variables_onegrid.ParallelCopy(variables,comp,0,1);
 
@@ -586,12 +587,8 @@ void StructFact::ComputeFFT(const MultiFab& variables,
             result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream());
             assert_rocfft_status("rocfft_execution_info_set_stream", result);
 
-            //result = rocfft_execute(forward_plan[i],
-            //                        (void**)&(variables_onegrid[mfi].dataPtr()), // in
-            //                        (void**)&(reinterpret_cast<FFTcomplex*>(spectral_field[i]->dataPtr())), // out
-            //                        execinfo);
-	    amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr();
-	    FFTcomplex* spectral_field_ptr = reinterpret_cast<FFTcomplex*>(spectral_field[i]->dataPtr());
+	        amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr();
+	        FFTcomplex* spectral_field_ptr = reinterpret_cast<FFTcomplex*>(spectral_field[i]->dataPtr());
             result = rocfft_execute(forward_plan[i],
                                     (void**) &variables_onegrid_ptr, // in
                                     (void**) &spectral_field_ptr, // out
@@ -659,8 +656,14 @@ void StructFact::ComputeFFT(const MultiFab& variables,
 #endif
                     }
 
-                    realpart(i,j,k) =  spectral(iloc,jloc,kloc).real();
-                    imagpart(i,j,k) = -spectral(iloc,jloc,kloc).imag();
+                    if (unpack) {
+                        realpart(i,j,k) =  spectral(iloc,jloc,kloc).real();
+                        imagpart(i,j,k) = -spectral(iloc,jloc,kloc).imag();
+                    }
+                    else {
+                        realpart(i,j,k) =  0.0;
+                        imagpart(i,j,k) =  0.0;
+                    }
                 }
 
                 realpart(i,j,k) /= sqrtnpts;
@@ -698,6 +701,298 @@ void StructFact::ComputeFFT(const MultiFab& variables,
 
 }
 
+void StructFact::InverseFFT(MultiFab& variables,
+			    const MultiFab& variables_dft_real, 
+			    const MultiFab& variables_dft_imag,
+			    const Geometry& geom)
+{
+
+    BL_PROFILE_VAR("StructFact::InverseFFT()", InverseFFT);
+
+#ifdef AMREX_USE_CUDA
+    // Print() << "Using cuFFT\n";
+#elif AMREX_USE_HIP
+    // Print() << "Using rocFFT\n";
+#else
+    // Print() << "Using FFTW\n";
+#endif
+
+    bool is_flattened = false;
+
+    long npts;
+
+    // Initialize the boxarray "ba_onegrid" from the single box "domain"
+    BoxArray ba_onegrid;
+    {
+      Box domain = geom.Domain();
+      ba_onegrid.define(domain);
+
+      if (domain.bigEnd(AMREX_SPACEDIM-1) == 0) {
+          is_flattened = true;
+      }
+
+#if (AMREX_SPACEDIM == 2)
+      npts = (domain.length(0)*domain.length(1));
+#elif (AMREX_SPACEDIM == 3)
+      npts = (domain.length(0)*domain.length(1)*domain.length(2));
+#endif
+
+    }
+
+    Real sqrtnpts = std::sqrt(npts);
+
+    DistributionMapping dmap_onegrid(ba_onegrid);
+
+    // we will take one FFT at a time and copy the answer into the
+    // corresponding component
+    MultiFab variables_onegrid;
+    variables_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0);
+    
+    MultiFab variables_dft_real_onegrid;
+    MultiFab variables_dft_imag_onegrid;
+    variables_dft_real_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0);
+    variables_dft_imag_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0);
+
+//    fftw_mpi_init();
+
+#ifdef AMREX_USE_CUDA
+    using FFTplan = cufftHandle;
+    using FFTcomplex = cuDoubleComplex;
+#elif AMREX_USE_HIP
+    using FFTplan = rocfft_plan;
+    using FFTcomplex = double2;
+#else
+    using FFTplan = fftw_plan;
+    using FFTcomplex = fftw_complex;
+#endif
+
+    // contain to store FFT - note it is shrunk by "half" in x
+    Vector<std::unique_ptr<BaseFab<GpuComplex<Real> > > > spectral_field;
+
+    Vector<FFTplan> backward_plan;
+
+    // for CUDA builds we only need to build the plan once; track whether we did
+    bool built_plan = false;
+    
+    for (int comp=0; comp<variables_dft_real.nComp(); comp++) {
+
+        // build spectral field from multifabs
+        variables_dft_real_onegrid.ParallelCopy(variables_dft_real,comp,0,1);
+        variables_dft_imag_onegrid.ParallelCopy(variables_dft_imag,comp,0,1);
+        
+        for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) {
+
+            // grab a single box including ghost cell range
+            Box realspace_bx = mfi.fabbox();
+
+            // size of box including ghost cell range
+            IntVect fft_size = realspace_bx.length(); // This will be different for hybrid FFT
+
+            // this is the size of the box, except the 0th component is 'halved plus 1'
+            IntVect spectral_bx_size = fft_size;
+            spectral_bx_size[0] = fft_size[0]/2 + 1;
+
+            // spectral box
+            Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1));
+
+            spectral_field.emplace_back(new BaseFab<GpuComplex<Real> >(spectral_bx,1,
+                                                                   The_Device_Arena()));
+            spectral_field.back()->setVal<RunOn::Device>(0.0); // touch the memory
+
+            Array4< GpuComplex<Real> > spectral = (*spectral_field[0]).array();
+            Array4<Real> const& realpart = variables_dft_real_onegrid.array(mfi);
+            Array4<Real> const& imagpart = variables_dft_imag_onegrid.array(mfi);
+
+            Box bx = mfi.fabbox();
+
+            amrex::ParallelFor(bx, 
+            [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+            {
+                if (i <= bx.length(0)/2) {
+                    GpuComplex<Real> copy(realpart(i,j,k),imagpart(i,j,k));
+                    spectral(i,j,k) = copy;
+                }
+            });
+        }
+
+        // build FFTplan if necessary
+        for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) {
+            
+            if (!built_plan) {
+
+                Box realspace_bx = mfi.fabbox();
+
+                IntVect fft_size = realspace_bx.length();
+
+                FFTplan bplan;
+
+#ifdef AMREX_USE_CUDA // CUDA
+                if (is_flattened) {
+#if (AMREX_SPACEDIM == 2)
+                    cufftResult result = cufftPlan1d(&bplan, fft_size[0], CUFFT_Z2D, 1);
+                    if (result != CUFFT_SUCCESS) {
+                        amrex::AllPrint() << " cufftplan1d forward failed! Error: "
+                                          << cufftErrorToString(result) << "\n";
+                    }
+#elif (AMREX_SPACEDIM == 3)
+                    cufftResult result = cufftPlan2d(&bplan, fft_size[1], fft_size[0], CUFFT_Z2D);
+                    if (result != CUFFT_SUCCESS) {
+                        amrex::AllPrint() << " cufftplan2d forward failed! Error: "
+                                          << cufftErrorToString(result) << "\n";
+                    }
+#endif
+                } else {
+#if (AMREX_SPACEDIM == 2)
+                    cufftResult result = cufftPlan2d(&bplan, fft_size[1], fft_size[0], CUFFT_Z2D);
+                    if (result != CUFFT_SUCCESS) {
+                        amrex::AllPrint() << " cufftplan2d forward failed! Error: "
+                                          << cufftErrorToString(result) << "\n";
+                    }
+#elif (AMREX_SPACEDIM == 3)
+                    cufftResult result = cufftPlan3d(&bplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D);
+                    if (result != CUFFT_SUCCESS) {
+                        amrex::AllPrint() << " cufftplan3d forward failed! Error: "
+                                          << cufftErrorToString(result) << "\n";
+                    }
+#endif
+                }
+#elif AMREX_USE_HIP // HIP
+                if (is_flattened) {
+#if (AMREX_SPACEDIM == 2)
+                    const std::size_t lengths[] = {std::size_t(fft_size[0])};
+                    rocfft_status result = rocfft_plan_create(&bplan, rocfft_placement_notinplace, 
+                                                              rocfft_transform_type_real_inverse, rocfft_precision_double,
+                                                              1, lengths, 1, nullptr);
+                    assert_rocfft_status("rocfft_plan_create", result);
+#elif (AMREX_SPACEDIM == 3)
+                    const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1])};
+                    rocfft_status result = rocfft_plan_create(&bplan, rocfft_placement_notinplace, 
+                                                              rocfft_transform_type_real_inverse, rocfft_precision_double,
+                                                              2, lengths, 1, nullptr);
+                    assert_rocfft_status("rocfft_plan_create", result);
+#endif
+                } else {
+#if (AMREX_SPACEDIM == 2)
+                    const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1])};
+                    rocfft_status result = rocfft_plan_create(&bplan, rocfft_placement_notinplace, 
+                                                              rocfft_transform_type_real_inverse, rocfft_precision_double,
+                                                              2, lengths, 1, nullptr);
+                    assert_rocfft_status("rocfft_plan_create", result);
+#elif (AMREX_SPACEDIM == 3)
+                    const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])};
+                    rocfft_status result = rocfft_plan_create(&bplan, rocfft_placement_notinplace, 
+                                                              rocfft_transform_type_real_inverse, rocfft_precision_double,
+                                                              3, lengths, 1, nullptr);
+                    assert_rocfft_status("rocfft_plan_create", result);
+#endif
+                }
+#else // host
+
+                if (is_flattened) {
+#if (AMREX_SPACEDIM == 2)
+                    bplan = fftw_plan_dft_c2r_1d(fft_size[0],
+                                                 reinterpret_cast<FFTcomplex*>
+                                                 (spectral_field.back()->dataPtr()),
+                                                 variables_onegrid[mfi].dataPtr(),
+                                                 FFTW_ESTIMATE);
+#elif (AMREX_SPACEDIM == 3)
+                    bplan = fftw_plan_dft_c2r_2d(fft_size[1], fft_size[0],
+                                                 reinterpret_cast<FFTcomplex*>
+                                                 (spectral_field.back()->dataPtr()),
+                                                 variables_onegrid[mfi].dataPtr(),
+                                                 FFTW_ESTIMATE);
+#endif
+                } else {
+#if (AMREX_SPACEDIM == 2)
+                    bplan = fftw_plan_dft_c2r_2d(fft_size[1], fft_size[0],
+                                                 reinterpret_cast<FFTcomplex*>
+                                                 (spectral_field.back()->dataPtr()),
+                                                 variables_onegrid[mfi].dataPtr(),
+                                                 FFTW_ESTIMATE);
+#elif (AMREX_SPACEDIM == 3)
+                    bplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0],
+                                                 reinterpret_cast<FFTcomplex*>
+                                                 (spectral_field.back()->dataPtr()),
+                                                 variables_onegrid[mfi].dataPtr(),
+                                                 FFTW_ESTIMATE);
+#endif
+                }
+#endif
+
+                backward_plan.push_back(bplan);
+            }
+	        
+            built_plan = true;
+        
+        } // end MFITer
+
+        ParallelDescriptor::Barrier();
+
+        // InverseTransform
+        for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) {
+            int i = mfi.LocalIndex();
+#ifdef AMREX_USE_CUDA
+            cufftSetStream(backward_plan[i], amrex::Gpu::gpuStream());
+            cufftResult result = cufftExecZ2D(backward_plan[i],
+                                              reinterpret_cast<FFTcomplex*>
+                                                  (spectral_field[i]->dataPtr())
+                                              variables_onegrid[mfi].dataPtr());
+            if (result != CUFFT_SUCCESS) {
+                amrex::AllPrint() << " forward transform using cufftExec failed! Error: "
+                                  << cufftErrorToString(result) << "\n";
+	        }
+#elif AMREX_USE_HIP
+            rocfft_execution_info execinfo = nullptr;
+            rocfft_status result = rocfft_execution_info_create(&execinfo);
+            assert_rocfft_status("rocfft_execution_info_create", result);
+
+            std::size_t buffersize = 0;
+            result = rocfft_plan_get_work_buffer_size(backward_plan[i], &buffersize);
+            assert_rocfft_status("rocfft_plan_get_work_buffer_size", result);
+
+            void* buffer = amrex::The_Arena()->alloc(buffersize);
+            result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize);
+            assert_rocfft_status("rocfft_execution_info_set_work_buffer", result);
+
+            result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream());
+            assert_rocfft_status("rocfft_execution_info_set_stream", result);
+
+	        amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr();
+	        FFTcomplex* spectral_field_ptr = reinterpret_cast<FFTcomplex*>(spectral_field[i]->dataPtr());
+            result = rocfft_execute(backward_plan[i],
+                                    (void**) &spectral_field_ptr, // in
+                                    (void**) &variables_onegrid_ptr, // out
+                                    execinfo);
+            assert_rocfft_status("rocfft_execute", result);
+            amrex::Gpu::streamSynchronize();
+            amrex::The_Arena()->free(buffer);
+            result = rocfft_execution_info_destroy(execinfo);
+            assert_rocfft_status("rocfft_execution_info_destroy", result);
+#else
+            fftw_execute(backward_plan[i]);
+#endif
+            variables_onegrid[mfi] /= sqrtnpts;
+        }
+
+        variables.ParallelCopy(variables_onegrid,0,comp,1);
+    }
+
+    // destroy fft plan
+    for (int i = 0; i < backward_plan.size(); ++i) {
+#ifdef AMREX_USE_CUDA
+        cufftDestroy(backward_plan[i]);
+#elif AMREX_USE_HIP
+        rocfft_plan_destroy(backward_plan[i]);
+#else
+        fftw_destroy_plan(backward_plan[i]);
+#endif
+    }
+
+//    fftw_mpi_cleanup();
+
+}
+
+
 void StructFact::WritePlotFile(const int step, const Real time, const Geometry& geom,
                                std::string plotfile_base,
                                const int& zero_avg) {

From ce81d297f40fbdabc255099679e85400a9ad1e14 Mon Sep 17 00:00:00 2001
From: isriva <ishan66@gmail.com>
Date: Tue, 12 Sep 2023 15:09:38 -0700
Subject: [PATCH 02/11] remove more unecessary random number creations

---
 src_compressible_stag/timeStepStag.cpp | 108 ++++++++++++-------------
 1 file changed, 54 insertions(+), 54 deletions(-)

diff --git a/src_compressible_stag/timeStepStag.cpp b/src_compressible_stag/timeStepStag.cpp
index a020af18..20f58635 100644
--- a/src_compressible_stag/timeStepStag.cpp
+++ b/src_compressible_stag/timeStepStag.cpp
@@ -269,33 +269,33 @@ void RK3stepStag(MultiFab& cu,
     // fill stochastic face fluxes
     if (do_1D) { // 1D need only for x- face
 	    MultiFab::LinComb(stochface[0], 
-            stoch_weights[0], stochface_A[0], 1, 
-            stoch_weights[1], stochface_B[0], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[0], 4, 
+            stoch_weights[1], stochface_B[0], 4,
+            4, nvars-4, 0);
     }
     else if (do_2D) { // 2D need only for x- and y- faces
 	    MultiFab::LinComb(stochface[0], 
-            stoch_weights[0], stochface_A[0], 1, 
-            stoch_weights[1], stochface_B[0], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[0], 4, 
+            stoch_weights[1], stochface_B[0], 4,
+            4, nvars-4, 0);
 	    MultiFab::LinComb(stochface[1], 
-            stoch_weights[0], stochface_A[1], 1, 
-            stoch_weights[1], stochface_B[1], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[1], 4, 
+            stoch_weights[1], stochface_B[1], 4,
+            4, nvars-4, 0);
     }
     else { // 3D
 	    MultiFab::LinComb(stochface[0], 
-            stoch_weights[0], stochface_A[0], 1, 
-            stoch_weights[1], stochface_B[0], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[0], 4, 
+            stoch_weights[1], stochface_B[0], 4,
+            4, nvars-4, 0);
 	    MultiFab::LinComb(stochface[1], 
-            stoch_weights[0], stochface_A[1], 1, 
-            stoch_weights[1], stochface_B[1], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[1], 4, 
+            stoch_weights[1], stochface_B[1], 4,
+            4, nvars-4, 0);
 	    MultiFab::LinComb(stochface[2], 
-            stoch_weights[0], stochface_A[2], 1, 
-            stoch_weights[1], stochface_B[2], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[2], 4, 
+            stoch_weights[1], stochface_B[2], 4,
+            4, nvars-4, 0);
     }
     
     // fill stochastic edge fluxes
@@ -592,33 +592,33 @@ void RK3stepStag(MultiFab& cu,
     // fill stochastic face fluxes
     if (do_1D) { // 1D need only for x- face
 	    MultiFab::LinComb(stochface[0], 
-            stoch_weights[0], stochface_A[0], 1, 
-            stoch_weights[1], stochface_B[0], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[0], 4, 
+            stoch_weights[1], stochface_B[0], 4,
+            4, nvars-4, 0);
     }
     else if (do_2D) { // 2D need only for x- and y- faces
 	    MultiFab::LinComb(stochface[0], 
-            stoch_weights[0], stochface_A[0], 1, 
-            stoch_weights[1], stochface_B[0], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[0], 4, 
+            stoch_weights[1], stochface_B[0], 4,
+            4, nvars-4, 0);
 	    MultiFab::LinComb(stochface[1], 
-            stoch_weights[0], stochface_A[1], 1, 
-            stoch_weights[1], stochface_B[1], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[1], 4, 
+            stoch_weights[1], stochface_B[1], 4,
+            4, nvars-4, 0);
     }
     else { // 3D
 	    MultiFab::LinComb(stochface[0], 
-            stoch_weights[0], stochface_A[0], 1, 
-            stoch_weights[1], stochface_B[0], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[0], 4, 
+            stoch_weights[1], stochface_B[0], 4,
+            4, nvars-4, 0);
 	    MultiFab::LinComb(stochface[1], 
-            stoch_weights[0], stochface_A[1], 1, 
-            stoch_weights[1], stochface_B[1], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[1], 4, 
+            stoch_weights[1], stochface_B[1], 4,
+            4, nvars-4, 0);
 	    MultiFab::LinComb(stochface[2], 
-            stoch_weights[0], stochface_A[2], 1, 
-            stoch_weights[1], stochface_B[2], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[2], 4, 
+            stoch_weights[1], stochface_B[2], 4,
+            4, nvars-4, 0);
     }
     
     // fill stochastic edge fluxes
@@ -920,33 +920,33 @@ void RK3stepStag(MultiFab& cu,
     // fill stochastic face fluxes
     if (do_1D) { // 1D need only for x- face
 	    MultiFab::LinComb(stochface[0], 
-            stoch_weights[0], stochface_A[0], 1, 
-            stoch_weights[1], stochface_B[0], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[0], 4, 
+            stoch_weights[1], stochface_B[0], 4,
+            4, nvars-4, 0);
     }
     else if (do_2D) { // 2D need only for x- and y- faces
 	    MultiFab::LinComb(stochface[0], 
-            stoch_weights[0], stochface_A[0], 1, 
-            stoch_weights[1], stochface_B[0], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[0], 4, 
+            stoch_weights[1], stochface_B[0], 4,
+            4, nvars-4, 0);
 	    MultiFab::LinComb(stochface[1], 
-            stoch_weights[0], stochface_A[1], 1, 
-            stoch_weights[1], stochface_B[1], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[1], 4, 
+            stoch_weights[1], stochface_B[1], 4,
+            4, nvars-4, 0);
     }
     else { // 3D
 	    MultiFab::LinComb(stochface[0], 
-            stoch_weights[0], stochface_A[0], 1, 
-            stoch_weights[1], stochface_B[0], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[0], 4, 
+            stoch_weights[1], stochface_B[0], 4,
+            4, nvars-4, 0);
 	    MultiFab::LinComb(stochface[1], 
-            stoch_weights[0], stochface_A[1], 1, 
-            stoch_weights[1], stochface_B[1], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[1], 4, 
+            stoch_weights[1], stochface_B[1], 4,
+            4, nvars-4, 0);
 	    MultiFab::LinComb(stochface[2], 
-            stoch_weights[0], stochface_A[2], 1, 
-            stoch_weights[1], stochface_B[2], 1,
-            1, nvars-1, 0);
+            stoch_weights[0], stochface_A[2], 4, 
+            stoch_weights[1], stochface_B[2], 4,
+            4, nvars-4, 0);
     }
     
     // fill stochastic edge fluxes

From 9d9b204d088e2616eccd8d89966af16f989f6920 Mon Sep 17 00:00:00 2001
From: isriva <ishan66@gmail.com>
Date: Tue, 12 Sep 2023 16:23:10 -0700
Subject: [PATCH 03/11] velocity decomposition in Fourier space implemented
 pending testing

---
 src_analysis/StructFact.H             |  32 +-
 src_analysis/StructFact.cpp           | 454 +++++++++++++++++++++++++-
 src_compressible_stag/main_driver.cpp | 241 +++++++-------
 3 files changed, 597 insertions(+), 130 deletions(-)

diff --git a/src_analysis/StructFact.H b/src_analysis/StructFact.H
index 154712b1..b4e70d04 100644
--- a/src_analysis/StructFact.H
+++ b/src_analysis/StructFact.H
@@ -42,6 +42,9 @@ class StructFact {
     // Total number of states to average over, updated by FortStructure()
     int nsamples = 0;
 
+    // decompose velocity field
+    bool decompose = false;
+
     // Vector containing covariance scaling
     Vector< Real > scaling;
 
@@ -65,6 +68,12 @@ public:
     // Vector of MultiFabs containing final magnitude of covariances
     MultiFab cov_mag;
 
+    // MultiFabs of real/imag for solenoidal/dilatational
+    MultiFab vel_sol_real;
+    MultiFab vel_sol_imag;
+    MultiFab vel_dil_real;
+    MultiFab vel_dil_imag;
+
     StructFact();
 
     StructFact(const amrex::BoxArray&, const amrex::DistributionMapping&, 
@@ -89,9 +98,23 @@ public:
                 const amrex::Vector< int >&, const amrex::Vector< int >&,
                 const int& verbosity=0);
 
+    void defineDecomp(const amrex::BoxArray&, const amrex::DistributionMapping&, 
+                      const amrex::Vector< std::string >&,
+                      const amrex::Vector< amrex::Real >&,
+                      const amrex::Vector< int >&,
+                      const amrex::Vector< int >&,
+                      const amrex::Geometry&);
+
     void FortStructure(const amrex::MultiFab&, const amrex::Geometry&,
                        const int& reset=0);
 
+    void FortStructureDecomp(const amrex::MultiFab& vel, const amrex::Geometry& geom,
+                             const int& reset=0);
+
+    void DecomposeVelFourier(const amrex::MultiFab& vel_dft_real, 
+                             const amrex::MultiFab& vel_dft_imag, 
+                             const amrex::Geometry& geom);
+
     void Reset();
     
     void ComputeFFT(const amrex::MultiFab&, amrex::MultiFab&,
@@ -100,6 +123,8 @@ public:
     
     void InverseFFT(amrex::MultiFab&, const amrex::MultiFab&,
                     const amrex::MultiFab&, const amrex::Geometry&);
+
+    void GetDecompVel(amrex::MultiFab&, const amrex::Geometry&); 
     
     void WritePlotFile(const int, const amrex::Real, const amrex::Geometry&, 
                        std::string, const int& zero_avg=1);
@@ -112,9 +137,12 @@ public:
     void ShiftFFT(amrex::MultiFab&,  const Geometry& geom,
                   const int& zero_avg=1);
 
-    void IntegratekShells(const int& step, const amrex::Geometry& geom);
+    void IntegratekShells(const int& step, const amrex::Geometry& geom, const std::string& name="");
+
+    void IntegratekShellsScalar(const int& step, const amrex::Geometry& geom, const amrex::Vector< std::string >& names);
 
-    void IntegratekShellsMisc(const int& step, const amrex::Geometry& geom);
+    void IntegratekShellsDecomp(const int& step, const amrex::Geometry& geom, 
+                                const std::string& name_sol="vel_sol", const std::string& name_dil="vel_dil");
 
     void AddToExternal(amrex::MultiFab& x_mag, amrex::MultiFab& x_realimag, const amrex::Geometry&, const int& zero_avg=1);
 
diff --git a/src_analysis/StructFact.cpp b/src_analysis/StructFact.cpp
index d645f1f1..8a136caa 100644
--- a/src_analysis/StructFact.cpp
+++ b/src_analysis/StructFact.cpp
@@ -251,6 +251,73 @@ void StructFact::define(const BoxArray& ba_in, const DistributionMapping& dmap_i
   }
 }
 
+void StructFact::defineDecomp(const amrex::BoxArray& ba_in,
+                              const amrex::DistributionMapping& dmap_in,
+                              const Vector< std::string >& /*var_names*/,
+                              const amrex::Vector< amrex::Real >& var_scaling_in,
+                              const Vector< int >& s_pairA_in,
+                              const Vector< int >& s_pairB_in,
+                              const amrex::Geometry& geom) 
+{
+
+  BL_PROFILE_VAR("StructFact::defineDecomp()",StructFactDefineDecomp);
+
+  decompose = true;
+  
+  if (s_pairA_in.size() != s_pairB_in.size())
+        amrex::Error("StructFact::define() - Must have an equal number of components");
+
+  NVAR = 3;
+  NCOV = 6;
+  scaling.resize(NCOV);
+  for (int n=0; n<NCOV; n++) {
+      scaling[n] = 1.0/var_scaling_in[n];
+  }
+  
+  s_pairA.resize(3);
+  s_pairB.resize(3);
+  
+  // Set vectors identifying covariance pairs
+  for (int n=0; n<3; n++) {
+    s_pairA[n] = s_pairA_in[n];
+    s_pairB[n] = s_pairB_in[n];
+  }
+  
+  // Create vector of unique variable indices to select which to take the FFT
+  NVARU = NVAR;    // temporary before selecting unique variables
+  var_u.resize(NVARU);
+  for (int n=0; n<NVARU; n++) {
+    var_u[n] = s_pairA[n];
+  }
+
+  //BoxArray ba_onegrid;
+  //{
+  //  Box domain = geom.Domain();
+  //  ba_onegrid.define(domain);
+  //}
+  //DistributionMapping dmap_onegrid(ba_onegrid);
+
+  vel_sol_real.define(ba_in, dmap_in, 3, 0);
+  vel_sol_imag.define(ba_in, dmap_in, 3, 0);
+  vel_dil_real.define(ba_in, dmap_in, 3, 0);
+  vel_dil_imag.define(ba_in, dmap_in, 3, 0);
+
+  cov_real.define(ba_in, dmap_in, 6, 0);
+  cov_imag.define(ba_in, dmap_in, 6, 0);
+  cov_mag.define( ba_in, dmap_in, 6, 0);
+  cov_real.setVal(0.0);
+  cov_imag.setVal(0.0);
+  cov_mag.setVal (0.0);
+  
+  NCOV = 6;
+  NVAR = 3;
+  scaling.resize(NCOV);
+  for (int n=0; n<NCOV; n++) {
+      scaling[n] = 1.0/var_scaling_in[n];
+  }
+
+}
+
 void StructFact::FortStructure(const MultiFab& variables, const Geometry& geom,
                                const int& reset) {
 
@@ -336,6 +403,117 @@ void StructFact::FortStructure(const MultiFab& variables, const Geometry& geom,
   
 }
 
+void StructFact::FortStructureDecomp(const MultiFab& vel, const Geometry& geom,
+                                     const int& reset) 
+{
+
+
+  BL_PROFILE_VAR("StructFact::FortStructureDecomp()",FortStructureDecomp);
+
+  if (!decompose) amrex::Error("StructFact::FortStructureDecomp() is specific for vel decomposition in turbulence");
+
+  const BoxArray& ba = vel.boxArray();
+  const DistributionMapping& dm = vel.DistributionMap();
+  
+  MultiFab vel_dft_real, vel_dft_imag;
+  //BoxArray ba_onegrid;
+  //{
+  //  Box domain = geom.Domain();
+  //  ba_onegrid.define(domain);
+  //}
+  //DistributionMapping dmap_onegrid(ba_onegrid);
+  vel_dft_real.define(ba, dm, 3, 0);
+  vel_dft_imag.define(ba, dm, 3, 0);
+
+  ComputeFFT(vel, vel_dft_real, vel_dft_imag, geom);
+  
+  DecomposeVelFourier(vel_dft_real, vel_dft_imag, geom);
+
+  // temporary storage built on BoxArray and DistributionMapping of "variables"
+  // One case where "variables" and "cov_real/imag/mag" may have different DistributionMappings
+  // is for flattened MFs with one grid newly built flattened MFs may be on a different
+  // processor than the flattened MF used to build cov_real/imag/mag
+  // or in general, problems that are not perfectly load balanced
+  MultiFab cov_temp;
+  cov_temp.define(ba, dm, 1, 0);
+
+  // temporary storage built on BoxArray and DistributionMapping of "cov_real/imag/mag"
+  MultiFab cov_temp2;
+  cov_temp2.define(cov_real.boxArray(), cov_real.DistributionMap(), 1, 0);
+ 
+  // solenoidal
+  int index = 0;
+  for (int n=0; n<3; n++) {
+    cov_temp.setVal(0.0);
+    MultiFab::AddProduct(cov_temp,vel_sol_real,n,vel_sol_real,n,0,1,0);
+    MultiFab::AddProduct(cov_temp,vel_sol_imag,n,vel_sol_imag,n,0,1,0);
+
+    // copy into a MF with same ba and dm as cov_real/imag/mag
+    cov_temp2.ParallelCopy(cov_temp,0,0,1);
+        
+    if (reset == 1) {
+        MultiFab::Copy(cov_real,cov_temp2,0,index,1,0);
+    } else {        
+        MultiFab::Add(cov_real,cov_temp2,0,index,1,0);
+    }
+
+    // Imaginary component of covariance
+    cov_temp.setVal(0.0);
+    MultiFab::AddProduct(cov_temp,vel_sol_imag,n,vel_sol_real,n,0,1,0);
+    cov_temp.mult(-1.0,0);
+    MultiFab::AddProduct(cov_temp,vel_sol_real,n,vel_sol_imag,n,0,1,0);
+
+    // copy into a MF with same ba and dm as cov_real/imag/mag
+    cov_temp2.ParallelCopy(cov_temp,0,0,1);
+    
+    if (reset == 1) {
+        MultiFab::Copy(cov_imag,cov_temp2,0,index,1,0);
+    } else {
+        MultiFab::Add(cov_imag,cov_temp2,0,index,1,0);
+    }
+    index++;
+  }
+  
+  // dilatational
+  for (int n=0; n<3; n++) {
+    cov_temp.setVal(0.0);
+    MultiFab::AddProduct(cov_temp,vel_dil_real,n,vel_dil_real,n,0,1,0);
+    MultiFab::AddProduct(cov_temp,vel_dil_imag,n,vel_dil_imag,n,0,1,0);
+
+    // copy into a MF with same ba and dm as cov_real/imag/mag
+    cov_temp2.ParallelCopy(cov_temp,0,0,1);
+        
+    if (reset == 1) {
+        MultiFab::Copy(cov_real,cov_temp2,0,index,1,0);
+    } else {        
+        MultiFab::Add(cov_real,cov_temp2,0,index,1,0);
+    }
+
+    // Imaginary component of covariance
+    cov_temp.setVal(0.0);
+    MultiFab::AddProduct(cov_temp,vel_dil_imag,n,vel_dil_real,n,0,1,0);
+    cov_temp.mult(-1.0,0);
+    MultiFab::AddProduct(cov_temp,vel_dil_real,n,vel_dil_imag,n,0,1,0);
+
+    // copy into a MF with same ba and dm as cov_real/imag/mag
+    cov_temp2.ParallelCopy(cov_temp,0,0,1);
+    
+    if (reset == 1) {
+        MultiFab::Copy(cov_imag,cov_temp2,0,index,1,0);
+    } else {
+        MultiFab::Add(cov_imag,cov_temp2,0,index,1,0);
+    }
+    index++;
+  }
+
+  if (reset == 1) {
+      nsamples = 1;
+  } else {
+      nsamples++;
+  }
+
+}
+
 void StructFact::Reset() {
 
     BL_PROFILE_VAR("StructFact::Reset()", StructFactReset);
@@ -696,9 +874,7 @@ void StructFact::ComputeFFT(const MultiFab& variables,
         fftw_destroy_plan(forward_plan[i]);
 #endif
     }
-
 //    fftw_mpi_cleanup();
-
 }
 
 void StructFact::InverseFFT(MultiFab& variables,
@@ -935,7 +1111,7 @@ void StructFact::InverseFFT(MultiFab& variables,
             cufftSetStream(backward_plan[i], amrex::Gpu::gpuStream());
             cufftResult result = cufftExecZ2D(backward_plan[i],
                                               reinterpret_cast<FFTcomplex*>
-                                                  (spectral_field[i]->dataPtr())
+                                                  (spectral_field[i]->dataPtr()),
                                               variables_onegrid[mfi].dataPtr());
             if (result != CUFFT_SUCCESS) {
                 amrex::AllPrint() << " forward transform using cufftExec failed! Error: "
@@ -971,9 +1147,9 @@ void StructFact::InverseFFT(MultiFab& variables,
 #else
             fftw_execute(backward_plan[i]);
 #endif
-            variables_onegrid[mfi] /= sqrtnpts;
         }
 
+        variables_onegrid.mult(1.0/sqrtnpts);
         variables.ParallelCopy(variables_onegrid,0,comp,1);
     }
 
@@ -1215,7 +1391,7 @@ void StructFact::ShiftFFT(MultiFab& dft_out, const Geometry& geom, const int& ze
 }
 
 // integrate cov_mag over k shells
-void StructFact::IntegratekShells(const int& step, const Geometry& /*geom*/) {
+void StructFact::IntegratekShells(const int& step, const Geometry& /*geom*/, const std::string& name) {
 
     BL_PROFILE_VAR("StructFact::IntegratekShells",IntegratekShells);
 
@@ -1349,7 +1525,8 @@ void StructFact::IntegratekShells(const int& step, const Geometry& /*geom*/) {
     
     if (ParallelDescriptor::IOProcessor()) {
         std::ofstream turb;
-        std::string turbBaseName = "turb";
+        std::string turbBaseName = "turb_";
+        turbBaseName += name;
         //turbName += std::to_string(step);
         std::string turbName = Concatenate(turbBaseName,step,7);
         turbName += ".txt";
@@ -1360,13 +1537,137 @@ void StructFact::IntegratekShells(const int& step, const Geometry& /*geom*/) {
         }
     }
 }
+    
+void StructFact::IntegratekShellsDecomp(const int& step, 
+                                        const amrex::Geometry& /*geom*/, 
+                                        const std::string& name_sol, 
+                                        const std::string& name_dil)
+{
+    BL_PROFILE_VAR("StructFact::IntegratekShellsDecomp",IntegratekShellsDecomp);
+
+    GpuArray<int,AMREX_SPACEDIM> center;
+    for (int d=0; d<AMREX_SPACEDIM; ++d) {
+        center[d] = n_cells[d]/2;
+    }
+
+    int npts = n_cells[0]/2-1;
+    //int npts_sq = npts*npts;
+
+    Gpu::DeviceVector<Real> phisum_sol_device(npts);
+    Gpu::DeviceVector<Real> phisum_dil_device(npts);
+    Gpu::DeviceVector<int>  phicnt_device(npts);
+
+    Gpu::HostVector<Real> phisum_sol_host(npts);
+    Gpu::HostVector<Real> phisum_dil_host(npts);
+    
+    Real* phisum_sol_ptr = phisum_sol_device.dataPtr();  // pointer to data
+    Real* phisum_dil_ptr = phisum_dil_device.dataPtr();  // pointer to data
+    int*  phicnt_ptr = phicnt_device.dataPtr();  // pointer to data
+
+    amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept
+    {
+      phisum_sol_ptr[d] = 0.;
+      phisum_dil_ptr[d] = 0.;
+      phicnt_ptr[d] = 0;
+    });
+    
+    // only consider cells that are within 15k of the center point
+    
+    for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+        
+        const Box& bx = mfi.tilebox();
+
+        const Array4<Real> & cov = cov_mag.array(mfi);
+
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            int ilen = amrex::Math::abs(i-center[0]);
+            int jlen = amrex::Math::abs(j-center[1]);
+            int klen = (AMREX_SPACEDIM == 3) ? amrex::Math::abs(k-center[2]) : 0;
+
+            Real dist = (ilen*ilen + jlen*jlen + klen*klen);
+            dist = std::sqrt(dist);
+            
+            if ( dist <= center[0]-0.5) {
+	            dist = dist+0.5;
+                int cell = int(dist);
+                for (int d=0; d<AMREX_SPACEDIM; ++d) {
+		            amrex::HostDevice::Atomic::Add(&(phisum_sol_ptr[cell]), cov(i,j,k,d));
+		            amrex::HostDevice::Atomic::Add(&(phisum_dil_ptr[cell]), cov(i,j,k,d+3));
+                }
+		        amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1);
+            }
+        });
+    }
+        
+    for (int d=1; d<npts; ++d) {
+        ParallelDescriptor::ReduceRealSum(phisum_sol_device[d]);
+        ParallelDescriptor::ReduceRealSum(phisum_dil_device[d]);
+        ParallelDescriptor::ReduceIntSum(phicnt_device[d]);
+    }
+
+    Real dk = 1.;
+    
+#if (AMREX_SPACEDIM == 2)
+    amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept
+    {
+        if (d != 0) {
+	        phisum_sol_ptr[d] *= 2.*M_PI*(d*dk+.5*dk*dk)/phicnt_ptr[d];
+	        phisum_dil_ptr[d] *= 2.*M_PI*(d*dk+.5*dk*dk)/phicnt_ptr[d];
+        }
+    });
+#else
+    amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept
+    {
+        if (d != 0) {
+	        phisum_sol_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d];
+	        phisum_dil_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d];
+	    }
+    });
+#endif
+
+    Gpu::copy(Gpu::deviceToHost, phisum_sol_device.begin(), 
+              phisum_sol_device.end(), phisum_sol_host.begin());
+
+    Gpu::copy(Gpu::deviceToHost, phisum_dil_device.begin(), 
+              phisum_dil_device.end(), phisum_dil_host.begin());
+    
+    if (ParallelDescriptor::IOProcessor()) {
+        {
+            std::ofstream turb;
+            std::string turbBaseName = "turb_";
+            turbBaseName += name_sol;
+            std::string turbName = Concatenate(turbBaseName,step,7);
+            turbName += ".txt";
+            
+            turb.open(turbName);
+            for (int d=1; d<npts; ++d) {
+                turb << d << " " << phisum_sol_host[d] << std::endl;
+            }
+        }
+        {
+            std::ofstream turb;
+            std::string turbBaseName = "turb_";
+            turbBaseName += name_dil;
+            std::string turbName = Concatenate(turbBaseName,step,7);
+            turbName += ".txt";
+            
+            turb.open(turbName);
+            for (int d=1; d<npts; ++d) {
+                turb << d << " " << phisum_dil_host[d] << std::endl;
+            }
+        }
+    }
+}
 
-// integrate cov_mag over k shells for miscellaneous qtys (after energy spectrum)
-void StructFact::IntegratekShellsMisc(const int& step, const Geometry& /*geom*/) {
+// integrate cov_mag over k shells for scalar qtys
+void StructFact::IntegratekShellsScalar(const int& step, const Geometry& /*geom*/, const amrex::Vector< std::string >& names) {
 
     BL_PROFILE_VAR("StructFact::IntegratekShellsMisc",IntegratekShellsMisc);
 
-    int turbvars = NVAR - AMREX_SPACEDIM;
+    int turbvars = NVAR;
+
+    if (names.size() != turbvars) amrex::Error("StructFact::IntegratekShellsMisc() requires names to be of the same length as NVAR");
 
     GpuArray<int,AMREX_SPACEDIM> center;
     for (int d=0; d<AMREX_SPACEDIM; ++d) {
@@ -1411,7 +1712,7 @@ void StructFact::IntegratekShellsMisc(const int& step, const Geometry& /*geom*/)
                 if ( dist <= center[0]-0.5) {
                     dist = dist+0.5;
                     int cell = int(dist);
-                    amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,AMREX_SPACEDIM+var_ind));
+                    amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,var_ind));
                     amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1);
                 }
             });
@@ -1459,7 +1760,7 @@ void StructFact::IntegratekShellsMisc(const int& step, const Geometry& /*geom*/)
         
         if (ParallelDescriptor::IOProcessor()) {
             std::ofstream turb;
-            std::string turbBaseName = "turb"+cov_names[AMREX_SPACEDIM+var_ind];
+            std::string turbBaseName = "turb_"+names[var_ind];
             std::string turbName = Concatenate(turbBaseName,step,7);
             turbName += ".txt";
             
@@ -1502,3 +1803,134 @@ void StructFact::AddToExternal(MultiFab& x_mag, MultiFab& x_realimag, const Geom
     MultiFab::Add(x_realimag,plotfile,0,0,2*NCOV,0);
 
 }
+
+
+void StructFact::DecomposeVelFourier(const amrex::MultiFab& vel_dft_real, 
+                                     const amrex::MultiFab& vel_dft_imag, 
+                                     const amrex::Geometry& geom)
+{
+    BL_PROFILE_VAR("StructFact::DecomposeVelFourier",DecomposeVelFourier);
+
+    const BoxArray& ba = vel_sol_real.boxArray();
+    const DistributionMapping& dm = vel_sol_real.DistributionMap();
+    MultiFab dft_real, dft_imag;
+    dft_real.define(ba, dm, 3, 0);
+    dft_imag.define(ba, dm, 3, 0);
+    dft_real.ParallelCopy(vel_dft_real,0,0,3);
+    dft_imag.ParallelCopy(vel_dft_imag,0,0,3);
+    
+    const GpuArray<Real, AMREX_SPACEDIM> dx = geom.CellSizeArray();
+
+    for (MFIter mfi(dft_real); mfi.isValid(); ++mfi) {
+        
+        Box bx = mfi.fabbox();
+
+        Array4<const Real> const& real = dft_real.array(mfi);
+        Array4<const Real> const& imag = dft_imag.array(mfi);
+        
+        Array4<      Real> const& real_sol = vel_sol_real.array(mfi);
+        Array4<      Real> const& imag_sol = vel_sol_imag.array(mfi);
+        
+        Array4<      Real> const& real_dil = vel_dil_real.array(mfi);
+        Array4<      Real> const& imag_dil = vel_dil_imag.array(mfi);
+
+        amrex::ParallelFor(bx,
+        [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            int nx = bx.length(0);
+            int ny = bx.length(1);
+            int nz = bx.length(2);
+
+            Real GxR, GxC, GyR, GyC, GzR, GzC;
+            
+            if (i <= bx.length(0)/2) { 
+                // Gradient Operators
+                GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0];
+                GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0];
+                GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1];
+                GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1];
+                GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2];
+                GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2];
+            }
+            else { // conjugate
+                // Gradient Operators
+                GxR = (cos(2.0*M_PI*(nx-i)/nx)-1.0)/dx[0];
+                GxC = (sin(2.0*M_PI*(nx-i)/nx)-0.0)/dx[0];
+                GyR = (cos(2.0*M_PI*(ny-j)/ny)-1.0)/dx[1];
+                GyC = (sin(2.0*M_PI*(ny-j)/ny)-0.0)/dx[1];
+                GzR = (cos(2.0*M_PI*(nz-k)/nz)-1.0)/dx[2];
+                GzC = (sin(2.0*M_PI*(nz-k)/nz)-0.0)/dx[2];
+            }
+
+            // Inverse Laplacian
+            Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC;
+
+            // Divergence of vel
+            Real divR = real(i,j,k,0)*GxR - imag(i,j,k,0)*GxC +
+                        real(i,j,k,1)*GyR - imag(i,j,k,1)*GyC +
+                        real(i,j,k,2)*GzR - imag(i,j,k,2)*GzC ;
+            Real divC = real(i,j,k,0)*GxC + imag(i,j,k,0)*GxR +
+                        real(i,j,k,1)*GyC + imag(i,j,k,1)*GyR +
+                        real(i,j,k,2)*GzC + imag(i,j,k,2)*GzR ;
+
+            if (Lap < 1.0e-12) { // zero mode for no bulk motion
+                real_sol(i,j,k,0) = 0.0;
+                real_sol(i,j,k,1) = 0.0;
+                real_sol(i,j,k,2) = 0.0;
+                imag_sol(i,j,k,0) = 0.0;
+                imag_sol(i,j,k,1) = 0.0;
+                imag_sol(i,j,k,2) = 0.0;
+            }
+            else {
+                // Solenoidal velocity 
+                real_sol(i,j,k,0) = (divR*GxR + divC*GxC) / Lap;
+                real_sol(i,j,k,1) = (divR*GyR + divC*GyC) / Lap;
+                real_sol(i,j,k,2) = (divR*GzR + divC*GzC) / Lap;
+                imag_sol(i,j,k,0) = (divC*GxR - divR*GxC) / Lap;
+                imag_sol(i,j,k,1) = (divC*GyR - divR*GyC) / Lap;
+                imag_sol(i,j,k,2) = (divC*GzR - divR*GzC) / Lap;
+                
+                // Solenoidal velocity
+                real_dil(i,j,k,0) = real(i,j,k,0) - real_sol(i,j,k,0);
+                real_dil(i,j,k,1) = real(i,j,k,1) - real_sol(i,j,k,1); 
+                real_dil(i,j,k,2) = real(i,j,k,2) - real_sol(i,j,k,2);
+                imag_dil(i,j,k,0) = imag(i,j,k,0) - imag_sol(i,j,k,0);
+                imag_dil(i,j,k,1) = imag(i,j,k,1) - imag_sol(i,j,k,1);
+                imag_dil(i,j,k,2) = imag(i,j,k,2) - imag_sol(i,j,k,2);
+            }
+        });
+    }
+}
+
+void StructFact::GetDecompVel(MultiFab& vel_decomp, const Geometry& geom)
+{
+    BL_PROFILE_VAR("StructFact::GetDecompVel()", GetDecompVel);
+    
+    if (!decompose) 
+        amrex::Error("StructFact::GetDecompVel() is specific for vel decomposition in turbulence");
+
+    const BoxArray& ba_in = vel_decomp.boxArray();
+    const DistributionMapping& dmap_in = vel_decomp.DistributionMap();
+
+    MultiFab vel;
+    vel.define(ba_in, dmap_in, 3, 0);
+
+    const BoxArray& ba = vel_sol_real.boxArray();
+    const DistributionMapping& dm = vel_sol_real.DistributionMap();
+    MultiFab dft_real, dft_imag;
+    dft_real.define(ba, dm, 3, 0);
+    dft_imag.define(ba, dm, 3, 0);
+    
+    dft_real.ParallelCopy(vel_sol_real,0,0,3);
+    dft_imag.ParallelCopy(vel_sol_imag,0,0,3);
+
+    InverseFFT(vel, dft_real, dft_imag, geom);
+    vel_decomp.ParallelCopy(vel,0,0,3);
+
+    dft_real.ParallelCopy(vel_dil_real,0,0,3);
+    dft_imag.ParallelCopy(vel_dil_imag,0,0,3);
+
+    InverseFFT(vel, dft_real, dft_imag, geom);
+    vel_decomp.ParallelCopy(vel,0,3,3);
+
+}
diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp
index f4358421..e7c2c289 100644
--- a/src_compressible_stag/main_driver.cpp
+++ b/src_compressible_stag/main_driver.cpp
@@ -302,7 +302,9 @@ void main_driver(const char* argv)
 
 #if defined(TURB)
     // Structure factor for compressible turbulence
-    StructFact turbStructFact;
+    StructFact turbStructFactVelTotal; // total velocity
+    StructFact turbStructFactVelDecomp; // decomposed velocity
+    StructFact turbStructFactScalar; // scalars 
 #endif
     
     Geometry geom_flat;
@@ -431,51 +433,41 @@ void main_driver(const char* argv)
         var_scaling_cons[d] = 1./(dx[0]*dx[1]*dx[2]);
     }
 
+#if defined(TURB)
     //////////////////////////////////////////////////////////////
     // structure factor variables names and scaling for turbulence
     // variables are velocities, density, pressure and temperature
     //////////////////////////////////////////////////////////////
-#if defined(TURB)
-    int structVarsTurb = AMREX_SPACEDIM+3;
-    
-    Vector< std::string > var_names_turb;
-    var_names_turb.resize(structVarsTurb);
-    
-    cnt = 0;
-
-    // velx, vely, velz
-    for (int d=0; d<AMREX_SPACEDIM; d++) {
-      x = "vel";
-      x += (120+d);
-      var_names_turb[cnt++] = x;
-    }
-    var_names_turb[cnt++] = "rho";
-    var_names_turb[cnt++] = "temp";
-    var_names_turb[cnt++] = "press";
-
-    MultiFab structFactMFTurb;
-
     // need to use dVol for scaling
     Real dVol = (AMREX_SPACEDIM==2) ? dx[0]*dx[1]*cell_depth : dx[0]*dx[1]*dx[2];
-    Real dProb = (AMREX_SPACEDIM==2) ? n_cells[0]*n_cells[1] : n_cells[0]*n_cells[1]*n_cells[2];
+    Real dVolinv = 1.0/dVol;
+    Real dProb = (AMREX_SPACEDIM==2) ? n_cells[0]*n_cells[1] : 
+                                       n_cells[0]*n_cells[1]*n_cells[2];
     dProb = 1./dProb;
     
-    Vector<Real> var_scaling_turb(structVarsTurb);
-    for (int d=0; d<var_scaling_turb.size(); ++d) {
-        var_scaling_turb[d] = 1./dVol;
+    MultiFab structFactMFTurbVel;
+    MultiFab structFactMFTurbScalar;
+    MultiFab vel_decomp;
+
+    Vector< std::string > var_names_turbVelTotal{"ux","uy","uz"};
+    Vector<Real> var_scaling_turbVelTotal(3, dVolinv);
+    amrex::Vector< int > s_pairA_turbVelTotal(3);
+    amrex::Vector< int > s_pairB_turbVelTotal(3);
+    for (int d=0; d<3; ++d) {
+        s_pairA_turbVelTotal[d] = d;
+        s_pairB_turbVelTotal[d] = d;
+    }
+    
+    Vector<Real> var_scaling_turbVelDecomp(6, dVolinv);
+    
+    Vector< std::string > var_names_turbScalar{"rho","tenp","press"};
+    Vector<Real> var_scaling_turbScalar(3, dVolinv);
+    amrex::Vector< int > s_pairA_turbScalar(3);
+    amrex::Vector< int > s_pairB_turbScalar(3);
+    for (int d=0; d<3; ++d) {
+        s_pairA_turbScalar[d] = d;
+        s_pairB_turbScalar[d] = d;
     }
-
-    // option to compute only specified pairs
-    amrex::Vector< int > s_pairA_turb(AMREX_SPACEDIM+3); // vx, vy, vz, rho, P , T
-    amrex::Vector< int > s_pairB_turb(AMREX_SPACEDIM+3); // vx, vy, vz, rho, P , T
-
-    // Select which variable pairs to include in structure factor:
-    for (int d=0; d<AMREX_SPACEDIM+3; ++d) {
-        s_pairA_turb[d] = d;
-        s_pairB_turb[d] = d;
-    }    
-
-    //////////////////////////////////////////////////////////////
 #endif
     
     // object for turbulence forcing
@@ -956,8 +948,20 @@ void main_driver(const char* argv)
     
 #if defined(TURB)
     if (turbForcing >= 1) {
-        structFactMFTurb.define(ba, dmap, structVarsTurb, 0);
-        turbStructFact.define(ba,dmap,var_names_turb,var_scaling_turb,s_pairA_turb,s_pairB_turb);
+        
+        structFactMFTurbVel.define(ba, dmap, 3, 0);
+        structFactMFTurbScalar.define(ba, dmap, 6, 0);
+        vel_decomp.define(ba, dmap, 6, 0);
+
+        turbStructFactVelTotal.define(ba,dmap,
+                var_names_turbVelTotal,var_scaling_turbVelTotal,
+                s_pairA_turbVelTotal,s_pairB_turbVelTotal);
+        turbStructFactScalar.define(ba,dmap,
+                var_names_turbScalar,var_scaling_turbScalar,
+                s_pairA_turbScalar,s_pairB_turbScalar);
+        turbStructFactVelDecomp.defineDecomp(ba,dmap,
+                var_names_turbVelTotal,var_scaling_turbVelDecomp,
+                s_pairA_turbVelTotal,s_pairB_turbVelTotal,geom);
     }
 #endif
 
@@ -1179,9 +1183,87 @@ void main_driver(const char* argv)
                            << "\n";
         }
 
+
+        // write a plotfile
+        bool writePlt = false;
+        if (plot_int > 0) {
+            if (n_steps_skip >= 0) { // for positive n_steps_skip, write out at plot_int
+                writePlt = (step%plot_int == 0);
+            }
+            else if (n_steps_skip < 0) { // for negative n_steps_skip, write out at plot_int-1
+                writePlt = ((step+1)%plot_int == 0);
+            }
+        }
+        
+        if (writePlt) {
+            //yzAverage(cuMeans, cuVars, primMeans, primVars, spatialCross,
+            //          cuMeansAv, cuVarsAv, primMeansAv, primVarsAv, spatialCrossAv);
+            WritePlotFileStag(step, time, geom, cu, cuMeans, cuVars, cumom, cumomMeans, cumomVars,
+                              prim, primMeans, primVars, vel, velMeans, velVars, coVars, surfcov, surfcovMeans, surfcovVars, eta, kappa);
+            
+#if defined(TURB)
+            if (turbForcing > 0) {
+                EvaluateWritePlotFileVelGrad(step, time, geom, vel);
+            }
+#endif
+
+            if (plot_cross) {
+                if (do_1D) {
+                    WriteSpatialCross1D(spatialCross1D, step, geom, ncross);
+                }
+                else if (do_2D) {
+                //    WriteSpatialCross2D(spatialCross2D, step, geom, ncross); // (do later)
+                }
+                else {
+                    WriteSpatialCross3D(spatialCross3D, step, geom, ncross);
+                    if (ParallelDescriptor::IOProcessor()) {
+                        outfile << step << " ";
+                        for (auto l=0; l<2*nvars+8+2*nspecies; ++l) {
+                            outfile << dataSliceMeans_xcross[l] << " ";
+                        }
+                        outfile << std::endl;
+                    }
+                }
+            }
+
+#if defined(TURB)
+            // compressible turbulence structure factor snapshot
+            if (turbForcing >= 1) {
+
+                // copy velocities into structFactMFTurb
+                for(int d=0; d<AMREX_SPACEDIM; d++) {
+                    ShiftFaceToCC(vel[d], 0, structFactMFTurbVel, d, 1);
+                }
+                MultiFab::Copy(structFactMFTurbScalar, prim, 0, 0, 1, 0);
+                MultiFab::Copy(structFactMFTurbScalar, prim, 4, 1, 1, 0);
+                MultiFab::Copy(structFactMFTurbScalar, prim, 5, 2, 1, 0);
+                
+                 // decomposed velocities
+                    turbStructFactVelDecomp.FortStructureDecomp(structFactMFTurbVel,geom,1);
+                    turbStructFactVelDecomp.GetDecompVel(vel_decomp,geom);
+                    turbStructFactVelDecomp.CallFinalize(geom);
+                    turbStructFactVelDecomp.IntegratekShellsDecomp(step,geom,"vel_solenoid","vel_dilation");
+                
+                 // total velocity
+                    turbStructFactVelTotal.FortStructure(structFactMFTurbVel,geom,1);
+                    turbStructFactVelTotal.CallFinalize(geom);
+                    turbStructFactVelTotal.IntegratekShells(step,geom,"vel_total");
+                
+                 // scalars
+                    turbStructFactScalar.FortStructure(structFactMFTurbScalar,geom,1);
+                    turbStructFactScalar.CallFinalize(geom);
+                    turbStructFactScalar.IntegratekShellsScalar(step,geom,var_names_turbScalar);
+                
+
+            }
+#endif
+
+        }
+
+
 #if defined(TURB)
         // turbulence outputs
-        if ((turbForcing >= 1) and (step%1000 == 0)) {
+        if ((turbForcing >= 1) and (writePlt)) {
 
             for (int i=0; i<AMREX_SPACEDIM; ++i) {
                 vel[i].FillBoundary(geom.periodicity());
@@ -1229,7 +1311,7 @@ void main_driver(const char* argv)
             ccTemp.setVal(0.0);
             for (int d=0; d<AMREX_SPACEDIM; ++d) {
                 CCMoments(gradU,d,ccTempA,2,gradU2[d]);
-                MultiFab::Add(ccTemp,ccTempA,0,d,1,0);
+                MultiFab::Add(ccTemp,ccTempA,0,0,1,0);
                 gradU2[d] *= dProb; // <(du_i/dx_i)^2> each component
             }
             Real avg_mom2 = ComputeSpatialMean(ccTemp, 0); // <\sum_i (du_i/dx_i)^2>
@@ -1307,82 +1389,7 @@ void main_driver(const char* argv)
             turboutfile << kolm_s << " " << kolm_t << std::endl;
         }
 #endif
-
-        // write a plotfile
-        bool writePlt = false;
-        if (plot_int > 0) {
-            if (n_steps_skip >= 0) { // for positive n_steps_skip, write out at plot_int
-                writePlt = (step%plot_int == 0);
-            }
-            else if (n_steps_skip < 0) { // for negative n_steps_skip, write out at plot_int-1
-                writePlt = ((step+1)%plot_int == 0);
-            }
-        }
-
-        if (writePlt) {
-            //yzAverage(cuMeans, cuVars, primMeans, primVars, spatialCross,
-            //          cuMeansAv, cuVarsAv, primMeansAv, primVarsAv, spatialCrossAv);
-            WritePlotFileStag(step, time, geom, cu, cuMeans, cuVars, cumom, cumomMeans, cumomVars,
-                              prim, primMeans, primVars, vel, velMeans, velVars, coVars, surfcov, surfcovMeans, surfcovVars, eta, kappa);
-            
-#if defined(TURB)
-            if (turbForcing > 0) {
-                EvaluateWritePlotFileVelGrad(step, time, geom, vel);
-            }
-#endif
-
-            if (plot_cross) {
-                if (do_1D) {
-                    WriteSpatialCross1D(spatialCross1D, step, geom, ncross);
-                }
-                else if (do_2D) {
-                //    WriteSpatialCross2D(spatialCross2D, step, geom, ncross); // (do later)
-                }
-                else {
-                    WriteSpatialCross3D(spatialCross3D, step, geom, ncross);
-                    if (ParallelDescriptor::IOProcessor()) {
-                        outfile << step << " ";
-                        for (auto l=0; l<2*nvars+8+2*nspecies; ++l) {
-                            outfile << dataSliceMeans_xcross[l] << " ";
-                        }
-                        outfile << std::endl;
-                    }
-                }
-            }
-
-#if defined(TURB)
-            // compressible turbulence structure factor snapshot
-            if (turbForcing >= 1) {
-
-                cnt = 0;
-
-                // copy velocities into structFactMFTurb
-                for(int d=0; d<AMREX_SPACEDIM; d++) {
-                    ShiftFaceToCC(vel[d], 0, structFactMFTurb, d, 1);
-                    cnt++;
-                }
-                // copy density, pressure and temperature into structFactMFTurb
-                MultiFab::Copy(structFactMFTurb, prim, 0, cnt, 1, 0);
-                cnt++;
-                MultiFab::Copy(structFactMFTurb, prim, 4, cnt, 1, 0);
-                cnt++;
-                MultiFab::Copy(structFactMFTurb, prim, 5, cnt, 1, 0);
-
-                // reset and compute structure factor
-                turbStructFact.FortStructure(structFactMFTurb,geom,1);
-                turbStructFact.CallFinalize(geom);
-
-                // integrate cov_mag over shells in k and write to file (energy spectrum)
-                turbStructFact.IntegratekShells(step,geom);
-
-                // integrate cov_mag over shells in k and write to file (for rho, press, temp)
-                turbStructFact.IntegratekShellsMisc(step,geom);
-
-            }
-#endif
-
-        }
-
+        
         // collect a snapshot for structure factor
         if (step > amrex::Math::abs(n_steps_skip) && 
             struct_fact_int > 0 && 

From 71236d0cc4db051296c7c61f9c75c635fda99058 Mon Sep 17 00:00:00 2001
From: isriva <ishan66@gmail.com>
Date: Tue, 12 Sep 2023 16:24:51 -0700
Subject: [PATCH 04/11] test inputs file for turb structure factor

---
 inputs_SF_decomp_test | 113 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 inputs_SF_decomp_test

diff --git a/inputs_SF_decomp_test b/inputs_SF_decomp_test
new file mode 100644
index 00000000..52726e69
--- /dev/null
+++ b/inputs_SF_decomp_test
@@ -0,0 +1,113 @@
+  # amrex.fpe_trap_invalid=1
+  # amrex.fpe_trap_overflow=1
+  # amrex.fpe_trap_zero=1
+
+
+  # Problem specification
+  # mean free path (Ar): lambda = 6.26e-6 cm
+  # dx = dy = dz = 2*lambda = 1.252e-5
+  prob_lo = 0.0 0.0 0.0      # physical lo coordinate
+  prob_hi = 0.005008 0.005008 0.005008 # physical hi coordinate
+  prob_hi = 0.001252 0.001252 0.001252 # physical hi coordinate
+
+  # Number of ghost cells, conserved, and primitive variables
+  # ---------------------
+  ngc = 2 2 2
+  nspecies = 2
+  nvars = 7
+  nprimvars = 10
+
+  # number of cells in domain
+  n_cells = 64 64 64
+  n_cells = 16 16 16
+  # max number of cells in a box
+  max_grid_size = 64 64 64
+
+  # Time-step control
+  fixed_dt = 6.25e-10
+  stoch_stress_form = 1
+
+  # Controls for number of steps between actions
+  max_step = 1000000
+  plot_int = 10000
+  chk_int  = 50000
+  max_step = 100
+  plot_int = 10
+
+  # Multispecies toggle
+  # if algorithm_type = 1, single component
+  # if algorithm_type = 2, multispecies
+  algorithm_type = 2
+
+  # Viscous tensor form
+  # if visc_type = 1, L = not-symmetric (bulk viscosity = 0)
+  # if visc_type = 2, L = symmetric (bulk viscosity = 0)
+  # if visc_type = 3, L = symmetric + bulk viscosity
+  visc_type = 2
+
+  # Advection method
+  # if advection_type = 1, interpolate primitive quantities
+  # if advection_type = 2, interpolate conserved quantities
+  advection_type = 1
+
+  # Problem specification
+  # if prob_type = 1, constant species concentration
+  # if prob_type = 2, Rayleigh-Taylor instability
+  # if prob_type = 3, diffusion barrier
+  prob_type = 1
+
+  # Initial parameters
+  k_B = 1.38064852e-16 # [units: cm2*g*s-2*K-1]
+  Runiv = 8.314462175e7
+  T_init = 273
+  rho0 = 0.0017775409151219938
+  rhobar = 0.5 0.5
+  #mach0 = 0.5
+  turbForcing = 2
+  turb_a      = 6486.000000000001 # forcing for solenoidal 1/T_L
+  turb_b      = 4823117954.373371 # forcing for solenoidal sigma/sqrt{T_L}
+  turb_c      = 6486.000000000001  # forcing for compressional 1/T_L
+  turb_d      = 4823117954.373371  # forcing for compressional sigma/sqrt{T_L}
+  turb_alpha  = 1.0  # fraction of solenoidal forcing
+
+  struct_fact_int = -10
+  n_steps_skip = 0
+
+  # Boundary conditions:
+  # NOTE: setting bc_vel to periodic sets all the other bc's to periodic)
+  # bc_vel:   -1 = periodic
+  #            1 = slip
+  #            2 = no-slip
+  # bc_mass:  -1 = periodic
+  #            1 = wall
+  #            2 = reservoir (set bc_Yk or bc_Xk in compressible namelist)
+  # bc_therm: -1 = periodic
+  #            1 = adiabatic
+  #            2 = isothermal (set with t_lo/hi in common namelist)
+  bc_vel_lo = -1 -1 -1
+  bc_vel_hi = -1 -1 -1
+  bc_mass_lo  = -1 -1 -1
+  bc_mass_hi  = -1 -1 -1
+  bc_therm_lo = -1 -1 -1
+  bc_therm_hi = -1 -1 -1
+
+  # Temperature if thermal BC specified
+  t_lo = 273 273 273
+  t_hi = 273 273 273
+
+  #Kinetic species info
+  #--------------
+  molmass  = 39.9480 39.9480
+  diameter = 3.66e-8 3.66e-8
+
+  # Enter negative dof to use hcv & hcp values
+  dof =  3 3
+  hcv = -1 -1
+  hcp = -1 -1
+
+  # write out means and variances to plotfile
+  transport_type = 1
+  plot_means = 0
+  plot_vars = 0
+  plot_covars = 0
+

From d13ce7f4c278579b0e0869329470049f0fdd870b Mon Sep 17 00:00:00 2001
From: isriva <ishan66@gmail.com>
Date: Tue, 12 Sep 2023 21:32:41 -0700
Subject: [PATCH 05/11] minor notational change

---
 src_analysis/StructFact.cpp | 38 ++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/src_analysis/StructFact.cpp b/src_analysis/StructFact.cpp
index 8a136caa..f4773445 100644
--- a/src_analysis/StructFact.cpp
+++ b/src_analysis/StructFact.cpp
@@ -1874,29 +1874,29 @@ void StructFact::DecomposeVelFourier(const amrex::MultiFab& vel_dft_real,
                         real(i,j,k,2)*GzC + imag(i,j,k,2)*GzR ;
 
             if (Lap < 1.0e-12) { // zero mode for no bulk motion
-                real_sol(i,j,k,0) = 0.0;
-                real_sol(i,j,k,1) = 0.0;
-                real_sol(i,j,k,2) = 0.0;
-                imag_sol(i,j,k,0) = 0.0;
-                imag_sol(i,j,k,1) = 0.0;
-                imag_sol(i,j,k,2) = 0.0;
+                real_dil(i,j,k,0) = 0.0;
+                real_dil(i,j,k,1) = 0.0;
+                real_dil(i,j,k,2) = 0.0;
+                imag_dil(i,j,k,0) = 0.0;
+                imag_dil(i,j,k,1) = 0.0;
+                imag_dil(i,j,k,2) = 0.0;
             }
             else {
-                // Solenoidal velocity 
-                real_sol(i,j,k,0) = (divR*GxR + divC*GxC) / Lap;
-                real_sol(i,j,k,1) = (divR*GyR + divC*GyC) / Lap;
-                real_sol(i,j,k,2) = (divR*GzR + divC*GzC) / Lap;
-                imag_sol(i,j,k,0) = (divC*GxR - divR*GxC) / Lap;
-                imag_sol(i,j,k,1) = (divC*GyR - divR*GyC) / Lap;
-                imag_sol(i,j,k,2) = (divC*GzR - divR*GzC) / Lap;
+                // Dilatational velocity 
+                real_dil(i,j,k,0) = (divR*GxR + divC*GxC) / Lap;
+                real_dil(i,j,k,1) = (divR*GyR + divC*GyC) / Lap;
+                real_dil(i,j,k,2) = (divR*GzR + divC*GzC) / Lap;
+                imag_dil(i,j,k,0) = (divC*GxR - divR*GxC) / Lap;
+                imag_dil(i,j,k,1) = (divC*GyR - divR*GyC) / Lap;
+                imag_dil(i,j,k,2) = (divC*GzR - divR*GzC) / Lap;
                 
                 // Solenoidal velocity
-                real_dil(i,j,k,0) = real(i,j,k,0) - real_sol(i,j,k,0);
-                real_dil(i,j,k,1) = real(i,j,k,1) - real_sol(i,j,k,1); 
-                real_dil(i,j,k,2) = real(i,j,k,2) - real_sol(i,j,k,2);
-                imag_dil(i,j,k,0) = imag(i,j,k,0) - imag_sol(i,j,k,0);
-                imag_dil(i,j,k,1) = imag(i,j,k,1) - imag_sol(i,j,k,1);
-                imag_dil(i,j,k,2) = imag(i,j,k,2) - imag_sol(i,j,k,2);
+                real_sol(i,j,k,0) = real(i,j,k,0) - real_dil(i,j,k,0);
+                real_sol(i,j,k,1) = real(i,j,k,1) - real_dil(i,j,k,1); 
+                real_sol(i,j,k,2) = real(i,j,k,2) - real_dil(i,j,k,2);
+                imag_sol(i,j,k,0) = imag(i,j,k,0) - imag_dil(i,j,k,0);
+                imag_sol(i,j,k,1) = imag(i,j,k,1) - imag_dil(i,j,k,1);
+                imag_sol(i,j,k,2) = imag(i,j,k,2) - imag_dil(i,j,k,2);
             }
         });
     }

From aafdd1bd54d6d23bfc722299def81fffdf2348f4 Mon Sep 17 00:00:00 2001
From: Andy Nonaka <AJNonaka@lbl.gov>
Date: Wed, 13 Sep 2023 11:09:34 -0700
Subject: [PATCH 06/11] move file

---
 .../compressible_stag/inputs_SF_decomp_test                       | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename inputs_SF_decomp_test => exec/compressible_stag/inputs_SF_decomp_test (100%)

diff --git a/inputs_SF_decomp_test b/exec/compressible_stag/inputs_SF_decomp_test
similarity index 100%
rename from inputs_SF_decomp_test
rename to exec/compressible_stag/inputs_SF_decomp_test

From 2b679da7542c4adcf533c24d36daf6a72ffea261 Mon Sep 17 00:00:00 2001
From: isriva <ishan66@gmail.com>
Date: Wed, 13 Sep 2023 18:53:06 -0700
Subject: [PATCH 07/11] decompose velocity and output several velocity gradient
 properties

---
 src_analysis/StructFact.H                     |   3 +-
 src_analysis/StructFact.cpp                   |  12 +-
 src_common/NormInnerProduct.cpp               |  22 +
 src_common/common_functions.H                 |   6 +
 src_compressible_stag/DeriveVelProp.cpp       | 557 ++++++++++++++++++
 src_compressible_stag/Make.package            |   1 +
 .../compressible_functions_stag.H             |  31 +
 src_compressible_stag/main_driver.cpp         | 265 +++------
 src_compressible_stag/writePlotFileStag.cpp   | 120 ----
 9 files changed, 698 insertions(+), 319 deletions(-)
 create mode 100644 src_compressible_stag/DeriveVelProp.cpp

diff --git a/src_analysis/StructFact.H b/src_analysis/StructFact.H
index b4e70d04..2ae91097 100644
--- a/src_analysis/StructFact.H
+++ b/src_analysis/StructFact.H
@@ -102,8 +102,7 @@ public:
                       const amrex::Vector< std::string >&,
                       const amrex::Vector< amrex::Real >&,
                       const amrex::Vector< int >&,
-                      const amrex::Vector< int >&,
-                      const amrex::Geometry&);
+                      const amrex::Vector< int >&);
 
     void FortStructure(const amrex::MultiFab&, const amrex::Geometry&,
                        const int& reset=0);
diff --git a/src_analysis/StructFact.cpp b/src_analysis/StructFact.cpp
index f4773445..44d47556 100644
--- a/src_analysis/StructFact.cpp
+++ b/src_analysis/StructFact.cpp
@@ -256,8 +256,7 @@ void StructFact::defineDecomp(const amrex::BoxArray& ba_in,
                               const Vector< std::string >& /*var_names*/,
                               const amrex::Vector< amrex::Real >& var_scaling_in,
                               const Vector< int >& s_pairA_in,
-                              const Vector< int >& s_pairB_in,
-                              const amrex::Geometry& geom) 
+                              const Vector< int >& s_pairB_in)
 {
 
   BL_PROFILE_VAR("StructFact::defineDecomp()",StructFactDefineDecomp);
@@ -1400,7 +1399,8 @@ void StructFact::IntegratekShells(const int& step, const Geometry& /*geom*/, con
         center[d] = n_cells[d]/2;
     }
 
-    int npts = n_cells[0]/2-1;
+    //int npts = n_cells[0]/2-1;
+    int npts = n_cells[0]/2;
     //int npts_sq = npts*npts;
 
     Gpu::DeviceVector<Real> phisum_device(npts);
@@ -1550,7 +1550,8 @@ void StructFact::IntegratekShellsDecomp(const int& step,
         center[d] = n_cells[d]/2;
     }
 
-    int npts = n_cells[0]/2-1;
+    //int npts = n_cells[0]/2-1;
+    int npts = n_cells[0]/2;
     //int npts_sq = npts*npts;
 
     Gpu::DeviceVector<Real> phisum_sol_device(npts);
@@ -1674,7 +1675,8 @@ void StructFact::IntegratekShellsScalar(const int& step, const Geometry& /*geom*
         center[d] = n_cells[d]/2;
     }
 
-    int npts = n_cells[0]/2-1;
+    //int npts = n_cells[0]/2-1;
+    int npts = n_cells[0]/2;
 
     Gpu::DeviceVector<Real> phisum_device(npts);
     Gpu::DeviceVector<int>  phicnt_device(npts);
diff --git a/src_common/NormInnerProduct.cpp b/src_common/NormInnerProduct.cpp
index ab1683ff..1f2bfeb0 100644
--- a/src_common/NormInnerProduct.cpp
+++ b/src_common/NormInnerProduct.cpp
@@ -324,6 +324,28 @@ void CCMoments(const amrex::MultiFab& m1,
   SumCC(mscr,0,prod_val,false);
 }
 
+void FCMoments(const std::array<MultiFab, AMREX_SPACEDIM>& m1,
+		       const amrex::Vector<int>& comps,
+               std::array<MultiFab, AMREX_SPACEDIM>&  mscr,
+		       const int& power,
+		       amrex::Vector<amrex::Real>& prod_val)
+{
+
+  BL_PROFILE_VAR("FCMoments()",FCMoments);
+
+  if (comps.size() != AMREX_SPACEDIM) amrex::Abort("FCMoments:: Vector of comps needs to same size as AMREX_SPACEDIM");
+  if (prod_val.size() != AMREX_SPACEDIM) amrex::Abort("FCMoments:: Vector of prod_val needs to same size as AMREX_SPACEDIM");
+
+  for (int d=0; d<AMREX_SPACEDIM; ++d) {
+    MultiFab::Copy(mscr[d],m1[d],comps[d],0,1,0);
+    for(int i=1; i<power; i++){
+      MultiFab::Multiply(mscr[d],m1[d],comps[d],0,1,0);
+    }
+  }
+  SumStag(mscr,prod_val,false);
+}
+
+
 void StagL2Norm(const std::array<MultiFab, AMREX_SPACEDIM>& m1,
 		const int& comp,
                 std::array<MultiFab, AMREX_SPACEDIM>& mscr,
diff --git a/src_common/common_functions.H b/src_common/common_functions.H
index a3d1df05..901eb7b3 100644
--- a/src_common/common_functions.H
+++ b/src_common/common_functions.H
@@ -220,6 +220,12 @@ void CCMoments(const MultiFab & m1,
 		 const int & power,
 		 Real & prod_val);
 
+void FCMoments(const std::array<MultiFab, AMREX_SPACEDIM>& m1,
+		       const amrex::Vector<int>& comps,
+               std::array<MultiFab, AMREX_SPACEDIM>&  mscr,
+		       const int& power,
+		       amrex::Vector<amrex::Real>& prod_val);
+
 void StagL2Norm(const std::array<MultiFab, AMREX_SPACEDIM> & m1,
 		const int & comp,
                 std::array<MultiFab, AMREX_SPACEDIM>& mscr,
diff --git a/src_compressible_stag/DeriveVelProp.cpp b/src_compressible_stag/DeriveVelProp.cpp
new file mode 100644
index 00000000..770dcc71
--- /dev/null
+++ b/src_compressible_stag/DeriveVelProp.cpp
@@ -0,0 +1,557 @@
+#include "AMReX_PlotFileUtil.H"
+#include "compressible_functions.H"
+#include "compressible_functions_stag.H"
+#include "common_functions.H"
+
+#if defined(TURB)
+void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
+                std::array< MultiFab, AMREX_SPACEDIM >& cumom,
+                MultiFab& prim,
+                MultiFab& eta,
+                const amrex::Geometry& geom,
+                Real& turbKE, Real& c_speed,
+                Real& u_rms,
+                Real& taylor_len, Real& taylor_Re, Real& taylor_Ma,
+                Real& skew, Real& kurt,
+                Real& eps_s, Real& eps_d, Real& eps_ratio,
+                Real& kolm_s, Real& kolm_d, Real& kolm_t)
+
+{
+    BL_PROFILE_VAR("GetTurbQty()",GetTurbQty);
+
+    Real dProb = (AMREX_SPACEDIM==2) ? n_cells[0]*n_cells[1] : 
+                                       n_cells[0]*n_cells[1]*n_cells[2];
+    dProb = 1./dProb;
+
+    // Setup temp MultiFabs
+    std::array< MultiFab, AMREX_SPACEDIM > macTemp;
+    MultiFab gradU;
+    MultiFab sound_speed;
+    MultiFab ccTemp;
+    MultiFab ccTempA;
+    MultiFab ccTempDiv;
+    std::array< MultiFab, NUM_EDGE > curlU;
+    std::array< MultiFab, NUM_EDGE > eta_edge;
+    std::array< MultiFab, NUM_EDGE > curlUtemp;
+    AMREX_D_TERM(macTemp[0].define(convert(prim.boxArray(),nodal_flag_x), prim.DistributionMap(), 1, 1);,
+                 macTemp[1].define(convert(prim.boxArray(),nodal_flag_y), prim.DistributionMap(), 1, 1);,
+                 macTemp[2].define(convert(prim.boxArray(),nodal_flag_z), prim.DistributionMap(), 1, 1););   
+    gradU.define(prim.boxArray(),prim.DistributionMap(),AMREX_SPACEDIM,0);
+    sound_speed.define(prim.boxArray(),prim.DistributionMap(),1,0);
+    ccTemp.define(prim.boxArray(),prim.DistributionMap(),1,0);
+    ccTempA.define(prim.boxArray(),prim.DistributionMap(),1,0);
+    ccTempDiv.define(prim.boxArray(),prim.DistributionMap(),1,0);
+#if (AMREX_SPACEDIM == 3)
+    curlU[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0);
+    curlU[1].define(convert(prim.boxArray(),nodal_flag_xz), prim.DistributionMap(), 1, 0);
+    curlU[2].define(convert(prim.boxArray(),nodal_flag_yz), prim.DistributionMap(), 1, 0);
+    eta_edge[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0);
+    eta_edge[1].define(convert(prim.boxArray(),nodal_flag_xz), prim.DistributionMap(), 1, 0);
+    eta_edge[2].define(convert(prim.boxArray(),nodal_flag_yz), prim.DistributionMap(), 1, 0);
+#elif (AMREX_SPACEDIM == 2)
+    curlU[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0);
+    eta_edge[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0);
+#endif
+    
+#if (AMREX_SPACEDIM == 3)
+    curlUtemp[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0);
+    curlUtemp[1].define(convert(prim.boxArray(),nodal_flag_xz), prim.DistributionMap(), 1, 0);
+    curlUtemp[2].define(convert(prim.boxArray(),nodal_flag_yz), prim.DistributionMap(), 1, 0);
+#elif (AMREX_SPACEDIM == 2)
+    curlUtemp[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0);
+#endif
+
+    // Setup temp variables
+    Real temp;
+    Vector<Real> tempvec(3);
+    Vector<Real> rhouu(3);
+    Vector<Real> uu(3);
+    Vector<Real> gradU2(3);
+    Vector<Real> gradU3(3);
+    Vector<Real> gradU4(3);
+    Vector<Real> eps_s_vec(3); // solenoidal dissipation
+
+    // turbulent kinetic energy
+    StagInnerProd(cumom,0,vel,0,macTemp,rhouu);
+    rhouu[0] /= (n_cells[0]+1)*n_cells[1]*n_cells[2];
+    rhouu[1] /= (n_cells[1]+1)*n_cells[2]*n_cells[0];
+    rhouu[2] /= (n_cells[2]+1)*n_cells[0]*n_cells[1];
+    turbKE = 0.5*( rhouu[0] + rhouu[1] + rhouu[2] );
+
+    // RMS velocity
+    StagInnerProd(vel,0,vel,0,macTemp,uu);
+    uu[0] /= (n_cells[0]+1)*n_cells[1]*n_cells[2];
+    uu[1] /= (n_cells[1]+1)*n_cells[2]*n_cells[0];
+    uu[2] /= (n_cells[2]+1)*n_cells[0]*n_cells[1];
+    u_rms = sqrt((uu[0] + uu[1] + uu[2])/3.0);
+
+    // Compute sound speed
+    ComputeSoundSpeed(sound_speed,prim);
+    c_speed = ComputeSpatialMean(sound_speed, 0);
+
+    // compute gradU = [du/dx dv/dy dw/dz] at cell-centers
+    ComputeCentredGradFC(vel,gradU,geom);
+    ccTemp.setVal(0.0);
+    for (int d=0; d<AMREX_SPACEDIM; ++d) {
+        MultiFab::Add(ccTemp,gradU,d,0,1,0);
+    }
+    CCInnerProd(ccTemp,0,ccTemp,0,ccTempDiv,temp); // store (\sum_i du_i/dx_i)^2 MFab
+
+    // Compute Velocity gradient moment sum
+    // 2nd moment
+    ccTemp.setVal(0.0);
+    for (int d=0; d<AMREX_SPACEDIM; ++d) {
+        CCMoments(gradU,d,ccTempA,2,gradU2[d]);
+        MultiFab::Add(ccTemp,ccTempA,0,0,1,0);
+        gradU2[d] *= dProb; // <(du_i/dx_i)^2> each component
+    }
+    Real avg_mom2 = ComputeSpatialMean(ccTemp, 0); // <\sum_i (du_i/dx_i)^2>
+    
+    // 3rd moment
+    ccTemp.setVal(0.0);
+    for (int d=0; d<AMREX_SPACEDIM; ++d) {
+        CCMoments(gradU,d,ccTempA,3,gradU3[d]);
+        MultiFab::Add(ccTemp,ccTempA,0,0,1,0);
+        gradU3[d] *= dProb; // <(du_i/dx_i)^3> each component
+    }
+    Real avg_mom3 = ComputeSpatialMean(ccTemp, 0); //  <\sum_i (du_i/dx_i)^3>
+    
+    // 4th moment
+    ccTemp.setVal(0.0);
+    for (int d=0; d<AMREX_SPACEDIM; ++d) {
+        CCMoments(gradU,d,ccTempA,4,gradU4[d]);
+        MultiFab::Add(ccTemp,ccTempA,0,0,1,0);
+        gradU4[d] *= dProb; // <(du_i/dx_i)^4> each component
+    }
+    Real avg_mom4 = ComputeSpatialMean(ccTemp, 0); //  <\sum_i (du_i/dx_i)^4>
+            
+    // Taylor Microscale
+    taylor_len = u_rms/avg_mom2;
+
+    // Taylor Reynolds Number & Turbulent Mach number
+    Real rho_avg = ComputeSpatialMean(prim, 0);
+    Real eta_avg = ComputeSpatialMean(eta, 0);
+    taylor_Re = rho_avg*taylor_len*u_rms/eta_avg;
+    taylor_Ma = sqrt(3.0)*u_rms/c_speed;
+
+    // Skewness
+    //Real skew1 = gradU3[0]/pow(gradU2[0],1.5); // <(du_1/dx_1)^3>/<(du_1/dx_1)^2>^1.5
+    //Real skew2 = gradU3[1]/pow(gradU2[1],1.5); // <(du_2/dx_2)^3>/<(du_2/dx_2)^2>^1.5
+    //Real skew3 = gradU3[2]/pow(gradU2[2],1.5); // <(du_3/dx_3)^3>/<(du_3/dx_3)^2>^1.5
+    // <\sum_i (du_i/dx_i)^3> / (\sum_i <(du_i/dx_i)^2>^1.5)
+    skew = avg_mom3/(pow(gradU2[0],1.5) + pow(gradU2[1],1.5) + pow(gradU2[2],1.5)); 
+            
+    // Kurtosis
+    //Real kurt1 = gradU4[0]/pow(gradU2[0],2); // <(du_1/dx_1)^4>/<(du_1/dx_1)^2>^2
+    //Real kurt2 = gradU4[1]/pow(gradU2[1],2); // <(du_2/dx_2)^4>/<(du_2/dx_2)^2>^2
+    //Real kurt3 = gradU4[2]/pow(gradU2[2],2); // <(du_3/dx_3)^4>/<(du_3/dx_3)^2>^2
+    // <\sum_i (du_i/dx_i)^4> / (\sum_i <(du_i/dx_i)^2>^2)
+    kurt =  avg_mom4/(pow(gradU2[0],2) + pow(gradU2[1],2) + pow(gradU2[2],2)); 
+
+    // Compute \omega (curl)
+    ComputeCurlFaceToEdge(vel,curlU,geom);
+    
+    // Solenoidal dissipation: <eta \omega_i \omega_i>/<rho>
+    AverageCCToEdge(eta,eta_edge,0,1,SPEC_BC_COMP,geom);
+    EdgeInnerProd(curlU,0,curlU,0,curlUtemp,tempvec);
+    EdgeInnerProd(curlUtemp,0,eta_edge,0,curlU,eps_s_vec);
+    eps_s_vec[0] /= (n_cells[0]+1)*(n_cells[1]+1)*n_cells[2];
+    eps_s_vec[1] /= (n_cells[0]+1)*(n_cells[2]+1)*n_cells[1];
+    eps_s_vec[2] /= (n_cells[1]+1)*(n_cells[2]+1)*n_cells[0];
+    eps_s = (eps_s_vec[0] + eps_s_vec[1] + eps_s_vec[2])/rho_avg;
+
+    // Dilational dissipation (4/3)*<eta \sum_i (du_i/dx_i)^2>/<rho>
+    CCInnerProd(ccTempDiv,0,eta,0,ccTemp,eps_d);
+    eps_d *= (dProb*(4.0/3.0)/rho_avg);
+
+    // Ratio of Dilational to Solenoidal dissipation
+    eps_ratio = eps_d/eps_s;
+    Real eps_t = eps_s + eps_d;
+
+    // Kolmogorov scales
+    kolm_s = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*rho_avg*eps_s)),0.25);
+    kolm_d = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*rho_avg*eps_d)),0.25);
+    kolm_t = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*rho_avg*eps_t)),0.25);
+
+}
+#endif
+
+#if defined(TURB)
+void GetTurbQtyDecomp(const MultiFab& vel_decomp_in, // contains 6 components for solenoidal and dilataional velocities
+                      const MultiFab& prim,
+                      const amrex::Geometry& geom,
+                      Real& turbKE_s, Real& turbKE_d, Real& delta_turbKE,
+                      Real& u_rms_s, Real& u_rms_d, Real& delta_u_rms,
+                      Real& taylor_Ma_d,
+                      Real& skew_s, Real& kurt_s,
+                      Real& skew_d, Real& kurt_d)
+
+{
+    BL_PROFILE_VAR("GetTurbQtyDecomp()",GetTurbQtyDecomp);
+
+    MultiFab vel_decomp;
+    vel_decomp.define(prim.boxArray(),prim.DistributionMap(),6,1); // need a ghost cell for gradients
+    vel_decomp.ParallelCopy(vel_decomp_in,0,0,6);
+    vel_decomp.FillBoundary(geom.periodicity());
+    
+    Vector<Real> dProb(3);
+    dProb[0] = 1.0/((n_cells[0]+1)*n_cells[1]*n_cells[2]);
+    dProb[1] = 1.0/((n_cells[1]+1)*n_cells[2]*n_cells[0]);
+    dProb[2] = 1.0/((n_cells[2]+1)*n_cells[0]*n_cells[1]);
+
+    // Setup temp MultiFabs
+    std::array< MultiFab, AMREX_SPACEDIM > gradU;
+    std::array< MultiFab, AMREX_SPACEDIM > faceTemp;
+    MultiFab sound_speed;
+    MultiFab ccTemp;
+    AMREX_D_TERM(gradU[0].define(convert(prim.boxArray(),nodal_flag_x), prim.DistributionMap(), 6, 0);,
+                 gradU[1].define(convert(prim.boxArray(),nodal_flag_y), prim.DistributionMap(), 6, 0);,
+                 gradU[2].define(convert(prim.boxArray(),nodal_flag_z), prim.DistributionMap(), 6, 0););   
+    AMREX_D_TERM(faceTemp[0].define(convert(prim.boxArray(),nodal_flag_x), prim.DistributionMap(), 1, 0);,
+                 faceTemp[1].define(convert(prim.boxArray(),nodal_flag_y), prim.DistributionMap(), 1, 0);,
+                 faceTemp[2].define(convert(prim.boxArray(),nodal_flag_z), prim.DistributionMap(), 1, 0););   
+    sound_speed.define(prim.boxArray(),prim.DistributionMap(),1,0);
+    ccTemp.define(prim.boxArray(),prim.DistributionMap(),1,0);
+
+    // Setup temp variables
+    Vector<Real> gradU2_temp(3);
+    Vector<Real> gradU2_s(3);
+    Vector<Real> gradU3_s(3);
+    Vector<Real> gradU4_s(3);
+    Vector<Real> gradU2_d(3);
+    Vector<Real> gradU3_d(3);
+    Vector<Real> gradU4_d(3);
+    
+    Vector<int> comps_s{0,1,2};
+    Vector<int> comps_d{3,4,5};
+
+    // turbulent kinetic energy (solenoidal)
+    ccTemp.setVal(0.0);
+    MultiFab::AddProduct(ccTemp,vel_decomp,0,vel_decomp,0,0,1,0); //uu
+    MultiFab::AddProduct(ccTemp,vel_decomp,1,vel_decomp,1,0,1,0); //vv
+    MultiFab::AddProduct(ccTemp,vel_decomp,2,vel_decomp,2,0,1,0); //ww
+    u_rms_s = ComputeSpatialMean(ccTemp, 0);
+    u_rms_s = sqrt(u_rms_s/3.0);
+    MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww)
+    turbKE_s = ComputeSpatialMean(ccTemp,0);
+    turbKE_s = 0.5*turbKE_s;
+
+    // turbulent kinetic energy (dilatational)
+    ccTemp.setVal(0.0);
+    MultiFab::AddProduct(ccTemp,vel_decomp,3,vel_decomp,3,0,1,0); //uu
+    MultiFab::AddProduct(ccTemp,vel_decomp,4,vel_decomp,4,0,1,0); //vv
+    MultiFab::AddProduct(ccTemp,vel_decomp,5,vel_decomp,5,0,1,0); //ww
+    u_rms_d = ComputeSpatialMean(ccTemp, 0);
+    u_rms_d = sqrt(u_rms_d/3.0);
+    MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww)
+    turbKE_d = ComputeSpatialMean(ccTemp,0);
+    turbKE_d = 0.5*turbKE_d;
+
+    // ratio of turbulent kinetic energies
+    delta_u_rms  = u_rms_d/u_rms_s;
+    delta_turbKE = turbKE_d/turbKE_s;
+
+    // Taylor Mach (dilatational)
+    ComputeSoundSpeed(sound_speed,prim);
+    Real c_speed = ComputeSpatialMean(sound_speed, 0);
+    taylor_Ma_d = sqrt(3.0)*u_rms_d/c_speed;
+
+    // compute gradU = [du/dx dv/dy dw/dz] at cell-centers
+    ComputeGrad(vel_decomp,gradU,0,0,6,-1,geom,0);
+
+    // Compute Velocity gradient moment sum
+    // 2nd moment (solenoidal)
+    FCMoments(gradU,comps_s,faceTemp,2,gradU2_temp);
+    gradU2_s[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+    gradU2_s[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+    gradU2_s[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+    // 2nd moment (dilatational)
+    FCMoments(gradU,comps_d,faceTemp,2,gradU2_temp);
+    gradU2_d[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+    gradU2_d[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+    gradU2_d[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+
+    // Compute Velocity gradient moment sum
+    // 3rd moment (solenoidal)
+    FCMoments(gradU,comps_s,faceTemp,3,gradU2_temp);
+    gradU3_s[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+    gradU3_s[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+    gradU3_s[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+    // 3rd moment (dilatational)
+    FCMoments(gradU,comps_d,faceTemp,3,gradU2_temp);
+    gradU3_d[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+    gradU3_d[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+    gradU3_d[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+
+    // Compute Velocity gradient moment sum
+    // 4th moment (solenoidal)
+    FCMoments(gradU,comps_s,faceTemp,4,gradU2_temp);
+    gradU4_s[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+    gradU4_s[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+    gradU4_s[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+    // 4th moment (dilatational)
+    FCMoments(gradU,comps_d,faceTemp,4,gradU2_temp);
+    gradU4_d[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity()));
+    gradU4_d[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity()));
+    gradU4_d[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity()));
+            
+    // Skewness
+    // <\sum_i (du_i/dx_i)^3> / (\sum_i <(du_i/dx_i)^2>^1.5)
+    skew_s = (gradU3_s[0] + gradU3_s[1] + gradU3_s[2])/
+             (pow(gradU2_s[0],1.5) + pow(gradU2_s[1],1.5) + pow(gradU2_s[2],1.5)); 
+    skew_d = (gradU3_d[0] + gradU3_d[1] + gradU3_d[2])/
+             (pow(gradU2_d[0],1.5) + pow(gradU2_d[1],1.5) + pow(gradU2_d[2],1.5)); 
+            
+    // Kurtosis
+    // <\sum_i (du_i/dx_i)^4> / (\sum_i <(du_i/dx_i)^2>^2)
+    kurt_s = (gradU4_s[0] + gradU4_s[1] + gradU4_s[2])/
+             (pow(gradU2_s[0],2.0) + pow(gradU2_s[1],2.0) + pow(gradU2_s[2],2.0)); 
+    kurt_d = (gradU4_d[0] + gradU4_d[1] + gradU4_d[2])/
+             (pow(gradU2_d[0],2.0) + pow(gradU2_d[1],2.0) + pow(gradU2_d[2],2.0)); 
+
+}
+#endif
+
+
+void EvaluateWritePlotFileVelGrad(int step,
+                                  const amrex::Real time,
+                                  const amrex::Geometry& geom,
+                                  const std::array<MultiFab, AMREX_SPACEDIM>& vel)
+{
+    BL_PROFILE_VAR("EvaluateWritePlotFileVelGrad()",EvaluateWritePlotFileVelGrad);
+
+    // Evaluate velocity gradient components and divergence and vorticity
+    MultiFab vel_grad;
+    
+    // Cell-Centered Velocity Gradient Stats (1,2,3 are directions)
+    // 0: u_1,1
+    // 1: u_2,2
+    // 2: u_3,3
+    // 3: u_1,2
+    // 4: u_1,3
+    // 5: u_2,3
+    // 6: divergence = u_1,1 + u_2,2 + u_3,3
+    // 7: vorticity = sqrt(wx + wy + wz)
+    vel_grad.define(convert(vel[0].boxArray(),IntVect(AMREX_D_DECL(0,0,0))), vel[0].DistributionMap(), 8, 0);
+    vel_grad.setVal(0.0);
+
+    const GpuArray<Real, AMREX_SPACEDIM> dx = geom.CellSizeArray();
+
+    for ( MFIter mfi(vel_grad,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+        
+        const Box& bx = mfi.tilebox();
+        
+        const Array4<Real> & vgrad = vel_grad.array(mfi);
+        
+        AMREX_D_TERM(Array4<Real const> const& velx = vel[0].array(mfi);,
+                     Array4<Real const> const& vely = vel[1].array(mfi);,
+                     Array4<Real const> const& velz = vel[2].array(mfi););
+
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            // u_1,1
+            vgrad(i,j,k,0) = (velx(i+1,j,k) - velx(i,j,k))/dx[0];
+            
+            // u_2,2
+            vgrad(i,j,k,1) = (vely(i,j+1,k) - vely(i,j,k))/dx[1];
+            
+            // u_3,3
+            vgrad(i,j,k,2) = (velz(i,j,k+1) - velz(i,j,k))/dx[2];
+
+            // divergence
+            vgrad(i,j,k,6) = vgrad(i,j,k,0) + vgrad(i,j,k,1) + vgrad(i,j,k,2);
+
+            // on edges: u_1,2 and u_2,1 and curl w1 = u_2,1 - u_1,2
+            Real u12_mm = (velx(i,j,k) - velx(i,j-1,k))/dx[1];
+            Real u21_mm = (vely(i,j,k) - vely(i-1,j,k))/dx[0];
+            Real w1_mm  = u21_mm - u12_mm;
+            Real u12_mp = (velx(i,j+1,k) - velx(i,j,k))/dx[1];
+            Real u21_mp = (vely(i,j+1,k) - vely(i-1,j+1,k))/dx[0];
+            Real w1_mp  = u21_mp - u12_mp;
+            Real u12_pm = (velx(i+1,j,k) - velx(i+1,j-1,k))/dx[1];
+            Real u21_pm = (vely(i+1,j,k) - vely(i,j,k))/dx[0];
+            Real w1_pm  = u21_pm - u12_pm;
+            Real u12_pp = (velx(i+1,j+1,k) - velx(i+1,j,k))/dx[1];
+            Real u21_pp = (vely(i+1,j+1,k) - vely(i,j+1,k))/dx[0];
+            Real w1_pp  = u21_pp - u12_pp;
+
+            // u_1,2
+            vgrad(i,j,k,3) = 0.25*(u12_mm + u12_mp + u12_pm + u12_pp);
+
+            // on edges: u_1,3 and u_3,1 and curl w2 = u_1,3 - u_3,1
+            Real u13_mm = (velx(i,j,k) - velx(i,j,k-1))/dx[2];
+            Real u31_mm = (velz(i,j,k) - velz(i-1,j,k))/dx[0];
+            Real w2_mm  = u13_mm - u31_mm;
+            Real u13_mp = (velx(i,j,k+1) - velx(i,j,k))/dx[2];
+            Real u31_mp = (velz(i,j,k+1) - velz(i-1,j,k+1))/dx[0];
+            Real w2_mp  = u13_mp - u31_mp;
+            Real u13_pm = (velx(i+1,j,k) - velx(i+1,j,k-1))/dx[2];
+            Real u31_pm = (velz(i+1,j,k) - velz(i,j,k))/dx[0];
+            Real w2_pm  = u13_pm - u31_pm;
+            Real u13_pp = (velx(i+1,j,k+1) - velx(i+1,j,k))/dx[2];
+            Real u31_pp = (velz(i+1,j,k+1) - velz(i,j,k+1))/dx[0];
+            Real w2_pp  = u13_pp - u31_pp;
+
+            // u_1,3
+            vgrad(i,j,k,4) = 0.25*(u13_mm + u13_mp + u13_pm + u13_pp);
+
+            // on edges: u_2,3 and u_3,2 and curl w2 = u_3,2 - u_2,3
+            Real u23_mm = (vely(i,j,k) - vely(i,j,k-1))/dx[2];
+            Real u32_mm = (velz(i,j,k) - velz(i,j-1,k))/dx[1];
+            Real w3_mm  = u32_mm - u23_mm;
+            Real u23_mp = (vely(i,j,k+1) - vely(i,j,k))/dx[2];
+            Real u32_mp = (velz(i,j,k+1) - velz(i,j-1,k+1))/dx[1];
+            Real w3_mp  = u32_mp - u23_mp;
+            Real u23_pm = (vely(i,j+1,k) - vely(i,j+1,k-1))/dx[2];
+            Real u32_pm = (velz(i,j+1,k) - velz(i,j,k))/dx[1];
+            Real w3_pm  = u32_pm - u23_pm;
+            Real u23_pp = (vely(i,j+1,k+1) - vely(i,j+1,k))/dx[2];
+            Real u32_pp = (velz(i,j+1,k+1) - velz(i,j,k+1))/dx[1];
+            Real w3_pp  = u32_pp - u23_pp;
+
+            // u_2,3
+            vgrad(i,j,k,5) = 0.25*(u23_mm + u23_mp + u23_pm + u23_pp);
+
+            // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3)
+            vgrad(i,j,k,7) = sqrt(0.25*(w1_mm*w1_mm + w1_mp*w1_mp + w1_pm*w1_pm + w1_pp*w1_pp +
+                                        w2_mm*w2_mm + w2_mp*w2_mp + w2_pm*w2_pm + w2_pp*w2_pp +
+                                        w3_mm*w3_mm + w3_mp*w3_mp + w3_pm*w3_pm + w3_pp*w3_pp));
+        });
+    }
+
+    // Write on a plotfile
+    std::string plotfilename = amrex::Concatenate("vel_grad",step,9);
+    amrex::Vector<std::string> varNames(8);
+    varNames[0] = "ux_x";
+    varNames[1] = "uy_y";
+    varNames[2] = "uz_z";
+    varNames[3] = "ux_y";
+    varNames[4] = "ux_z";
+    varNames[5] = "uy_z";
+    varNames[6] = "div";
+    varNames[7] = "vort";
+    WriteSingleLevelPlotfile(plotfilename,vel_grad,varNames,geom,time,step);
+}
+
+#if defined(TURB)
+void EvaluateWritePlotFileVelGrad(int step,
+                                  const amrex::Real time,
+                                  const amrex::Geometry& geom,
+                                  const std::array<MultiFab, AMREX_SPACEDIM>& vel,
+                                  const amrex::MultiFab& vel_decomp_in)
+{
+    BL_PROFILE_VAR("EvaluateWritePlotFileVelGrad()",EvaluateWritePlotFileVelGrad);
+
+    MultiFab output;
+    
+    // Cell-Centered Velocity Gradient Stats (1,2,3 are directions)
+    // 0: ux_s
+    // 1: uy_s
+    // 2: uz_s
+    // 3: ux_d
+    // 4: uy_d
+    // 5: uz_d
+    // 6: umag_s
+    // 7: umag_d
+    // 8: divergence = u_1,1 + u_2,2 + u_3,3
+    // 9: vorticity = sqrt(wx + wy + wz)
+    output.define(convert(vel[0].boxArray(),IntVect(AMREX_D_DECL(0,0,0))), vel[0].DistributionMap(), 10, 0);
+    output.setVal(0.0);
+
+    MultiFab vel_decomp;
+    vel_decomp.define(convert(vel[0].boxArray(),IntVect(AMREX_D_DECL(0,0,0))), vel[0].DistributionMap(), 6, 0);
+    vel_decomp.ParallelCopy(vel_decomp_in,0,0,6);
+
+    const GpuArray<Real, AMREX_SPACEDIM> dx = geom.CellSizeArray();
+
+    for ( MFIter mfi(output,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+        
+        const Box& bx = mfi.tilebox();
+        
+        const Array4<Real>             out = output.array(mfi);
+
+        const Array4<const Real>  v_decomp = vel_decomp.array(mfi);
+        
+        AMREX_D_TERM(Array4<Real const> const& velx = vel[0].array(mfi);,
+                     Array4<Real const> const& vely = vel[1].array(mfi);,
+                     Array4<Real const> const& velz = vel[2].array(mfi););
+
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            for (int n=0;n<6;++n) {
+                out(i,j,k,n) = v_decomp(i,j,k,n);
+            }
+            out(i,j,k,6) = sqrt(out(i,j,k,0)*out(i,j,k,0) + out(i,j,k,1)*out(i,j,k,1) + out(i,j,k,2)*out(i,j,k,2)); // mag solenoidal
+            out(i,j,k,7) = sqrt(out(i,j,k,3)*out(i,j,k,3) + out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5)); // mag dilatational
+
+            // divergence
+            out(i,j,k,8) = (velx(i+1,j,k) - velx(i,j,k))/dx[0] +
+                           (vely(i,j+1,k) - vely(i,j,k))/dx[1] + 
+                           (velz(i,j,k+1) - velz(i,j,k))/dx[2] ;
+
+            // on edges: u_1,2 and u_2,1 and curl w1 = u_2,1 - u_1,2
+            Real u12_mm = (velx(i,j,k) - velx(i,j-1,k))/dx[1];
+            Real u21_mm = (vely(i,j,k) - vely(i-1,j,k))/dx[0];
+            Real w1_mm  = u21_mm - u12_mm;
+            Real u12_mp = (velx(i,j+1,k) - velx(i,j,k))/dx[1];
+            Real u21_mp = (vely(i,j+1,k) - vely(i-1,j+1,k))/dx[0];
+            Real w1_mp  = u21_mp - u12_mp;
+            Real u12_pm = (velx(i+1,j,k) - velx(i+1,j-1,k))/dx[1];
+            Real u21_pm = (vely(i+1,j,k) - vely(i,j,k))/dx[0];
+            Real w1_pm  = u21_pm - u12_pm;
+            Real u12_pp = (velx(i+1,j+1,k) - velx(i+1,j,k))/dx[1];
+            Real u21_pp = (vely(i+1,j+1,k) - vely(i,j+1,k))/dx[0];
+            Real w1_pp  = u21_pp - u12_pp;
+
+            // on edges: u_1,3 and u_3,1 and curl w2 = u_1,3 - u_3,1
+            Real u13_mm = (velx(i,j,k) - velx(i,j,k-1))/dx[2];
+            Real u31_mm = (velz(i,j,k) - velz(i-1,j,k))/dx[0];
+            Real w2_mm  = u13_mm - u31_mm;
+            Real u13_mp = (velx(i,j,k+1) - velx(i,j,k))/dx[2];
+            Real u31_mp = (velz(i,j,k+1) - velz(i-1,j,k+1))/dx[0];
+            Real w2_mp  = u13_mp - u31_mp;
+            Real u13_pm = (velx(i+1,j,k) - velx(i+1,j,k-1))/dx[2];
+            Real u31_pm = (velz(i+1,j,k) - velz(i,j,k))/dx[0];
+            Real w2_pm  = u13_pm - u31_pm;
+            Real u13_pp = (velx(i+1,j,k+1) - velx(i+1,j,k))/dx[2];
+            Real u31_pp = (velz(i+1,j,k+1) - velz(i,j,k+1))/dx[0];
+            Real w2_pp  = u13_pp - u31_pp;
+
+            // on edges: u_2,3 and u_3,2 and curl w2 = u_3,2 - u_2,3
+            Real u23_mm = (vely(i,j,k) - vely(i,j,k-1))/dx[2];
+            Real u32_mm = (velz(i,j,k) - velz(i,j-1,k))/dx[1];
+            Real w3_mm  = u32_mm - u23_mm;
+            Real u23_mp = (vely(i,j,k+1) - vely(i,j,k))/dx[2];
+            Real u32_mp = (velz(i,j,k+1) - velz(i,j-1,k+1))/dx[1];
+            Real w3_mp  = u32_mp - u23_mp;
+            Real u23_pm = (vely(i,j+1,k) - vely(i,j+1,k-1))/dx[2];
+            Real u32_pm = (velz(i,j+1,k) - velz(i,j,k))/dx[1];
+            Real w3_pm  = u32_pm - u23_pm;
+            Real u23_pp = (vely(i,j+1,k+1) - vely(i,j+1,k))/dx[2];
+            Real u32_pp = (velz(i,j+1,k+1) - velz(i,j,k+1))/dx[1];
+            Real w3_pp  = u32_pp - u23_pp;
+
+            // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3)
+            out(i,j,k,9) = sqrt(0.25*(w1_mm*w1_mm + w1_mp*w1_mp + w1_pm*w1_pm + w1_pp*w1_pp +
+                                      w2_mm*w2_mm + w2_mp*w2_mp + w2_pm*w2_pm + w2_pp*w2_pp +
+                                      w3_mm*w3_mm + w3_mp*w3_mp + w3_pm*w3_pm + w3_pp*w3_pp));
+        });
+    }
+
+    // Write on a plotfile
+    std::string plotfilename = amrex::Concatenate("vel_grad_decomp",step,9);
+    amrex::Vector<std::string> varNames(10);
+    varNames[0] = "ux_s";
+    varNames[1] = "uy_s";
+    varNames[2] = "uz_s";
+    varNames[3] = "ux_d";
+    varNames[4] = "ux_d";
+    varNames[5] = "uy_d";
+    varNames[6] = "umag_s";
+    varNames[7] = "umag_d";
+    varNames[8] = "div";
+    varNames[9] = "vort";
+    WriteSingleLevelPlotfile(plotfilename,output,varNames,geom,time,step);
+}
+#endif
+
diff --git a/src_compressible_stag/Make.package b/src_compressible_stag/Make.package
index f18abed0..442f6316 100644
--- a/src_compressible_stag/Make.package
+++ b/src_compressible_stag/Make.package
@@ -7,6 +7,7 @@ CEXE_sources += membraneStag.cpp
 CEXE_sources += writePlotFileStag.cpp
 CEXE_sources += Checkpoint.cpp
 CEXE_sources += TurbForcingComp.cpp
+CEXE_sources += DeriveVelProp.cpp
 
 CEXE_sources += compressible_functions_stag.cpp
 CEXE_headers += compressible_functions_stag.H
diff --git a/src_compressible_stag/compressible_functions_stag.H b/src_compressible_stag/compressible_functions_stag.H
index dcdce16f..2e98ce30 100644
--- a/src_compressible_stag/compressible_functions_stag.H
+++ b/src_compressible_stag/compressible_functions_stag.H
@@ -46,6 +46,14 @@ void EvaluateWritePlotFileVelGrad(int step,
                                   const amrex::Geometry& geom,
                                   const std::array<MultiFab, AMREX_SPACEDIM>& vel);
 
+#if defined(TURB)
+void EvaluateWritePlotFileVelGrad(int step,
+                                  const amrex::Real time,
+                                  const amrex::Geometry& geom,
+                                  const std::array<MultiFab, AMREX_SPACEDIM>& vel,
+                                  const amrex::MultiFab& vel_decomp);
+#endif
+
 void conservedToPrimitiveStag(MultiFab& prim_in, std::array<MultiFab, AMREX_SPACEDIM>& velStag_in,
                               MultiFab& cons_in, const std::array<MultiFab, AMREX_SPACEDIM>& momStag_in);
 
@@ -388,4 +396,27 @@ void ComputeSoundSpeed(MultiFab& sound_speed_in, const MultiFab& prim_in);
 
 amrex::Real GetMaxAcousticCFL(const MultiFab& prim_in, const std::array<MultiFab, AMREX_SPACEDIM>& vel_in, const Real& dt, const Geometry& geom);
 
+#if defined(TURB)
+void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
+                std::array< MultiFab, AMREX_SPACEDIM >& cumom,
+                MultiFab& prim,
+                MultiFab& eta,
+                const amrex::Geometry& geom,
+                Real& turbKE, Real& c_speed,
+                Real& u_rms,
+                Real& taylor_len, Real& taylor_Re, Real& taylor_Ma,
+                Real& skew, Real& kurt,
+                Real& eps_s, Real& eps_d, Real& eps_ratio,
+                Real& kolm_s, Real& kolm_d, Real& kolm_t);
+
+void GetTurbQtyDecomp(const MultiFab& vel_decomp,
+                      const MultiFab& prim,
+                      const amrex::Geometry& geom,
+                      Real& turbKE_s, Real& turbKE_d, Real& delta_turbKE,
+                      Real& u_rms_s, Real& u_rms_d, Real& delta_u_rms,
+                      Real& taylor_Ma_d,
+                      Real& skew_s, Real& kurt_s,
+                      Real& skew_d, Real& kurt_d);
+#endif
+
 #endif
diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp
index e7c2c289..a4cbde6b 100644
--- a/src_compressible_stag/main_driver.cpp
+++ b/src_compressible_stag/main_driver.cpp
@@ -261,15 +261,8 @@ void main_driver(const char* argv)
     // data structure for turbulence diagnostics
     std::string turbfilename = "turbstats";
     std::ofstream turboutfile;
-    std::array< MultiFab, AMREX_SPACEDIM > macTemp;
-    MultiFab gradU;
-    MultiFab sound_speed;
-    MultiFab ccTemp;
-    MultiFab ccTempA;
-    MultiFab ccTempDiv;
-    std::array< MultiFab, NUM_EDGE > curlU;
-    std::array< MultiFab, NUM_EDGE > eta_edge;
-    std::array< MultiFab, NUM_EDGE > curlUtemp;
+    std::string turbfilenamedecomp = "turbstatsdecomp";
+    std::ofstream turboutfiledecomp;
 #endif
 
     /////////////////////////////////////////////
@@ -441,9 +434,6 @@ void main_driver(const char* argv)
     // need to use dVol for scaling
     Real dVol = (AMREX_SPACEDIM==2) ? dx[0]*dx[1]*cell_depth : dx[0]*dx[1]*dx[2];
     Real dVolinv = 1.0/dVol;
-    Real dProb = (AMREX_SPACEDIM==2) ? n_cells[0]*n_cells[1] : 
-                                       n_cells[0]*n_cells[1]*n_cells[2];
-    dProb = 1./dProb;
     
     MultiFab structFactMFTurbVel;
     MultiFab structFactMFTurbScalar;
@@ -523,35 +513,8 @@ void main_driver(const char* argv)
         if (turbForcing >= 1) { // temporary fab for turbulent
             if (ParallelDescriptor::IOProcessor()) {
                 turboutfile.open(turbfilename, std::ios::app);
+                turboutfiledecomp.open(turbfilenamedecomp, std::ios::app);
             }
-            AMREX_D_TERM(macTemp[0].define(convert(ba,nodal_flag_x), dmap, 1, 1);,
-                         macTemp[1].define(convert(ba,nodal_flag_y), dmap, 1, 1);,
-                         macTemp[2].define(convert(ba,nodal_flag_z), dmap, 1, 1););   
-            gradU.define(ba,dmap,AMREX_SPACEDIM,0);
-            sound_speed.define(ba,dmap,1,0);
-            ccTemp.define(ba,dmap,1,0);
-            ccTempA.define(ba,dmap,1,0);
-            ccTempDiv.define(ba,dmap,1,0);
-#if (AMREX_SPACEDIM == 3)
-            curlU[0].define(convert(ba,nodal_flag_xy), dmap, 1, 0);
-            curlU[1].define(convert(ba,nodal_flag_xz), dmap, 1, 0);
-            curlU[2].define(convert(ba,nodal_flag_yz), dmap, 1, 0);
-            eta_edge[0].define(convert(ba,nodal_flag_xy), dmap, 1, 0);
-            eta_edge[1].define(convert(ba,nodal_flag_xz), dmap, 1, 0);
-            eta_edge[2].define(convert(ba,nodal_flag_yz), dmap, 1, 0);
-#elif (AMREX_SPACEDIM == 2)
-            curlU[0].define(convert(ba,nodal_flag_xy), dmap, 1, 0);
-            eta_edge[0].define(convert(ba,nodal_flag_xy), dmap, 1, 0);
-#endif
-    
-#if (AMREX_SPACEDIM == 3)
-            curlUtemp[0].define(convert(ba,nodal_flag_xy), dmap, 1, 0);
-            curlUtemp[1].define(convert(ba,nodal_flag_xz), dmap, 1, 0);
-            curlUtemp[2].define(convert(ba,nodal_flag_yz), dmap, 1, 0);
-#elif (AMREX_SPACEDIM == 2)
-            curlUtemp[0].define(convert(ba,nodal_flag_xy), dmap, 1, 0);
-#endif
-
         }
 #endif
     }
@@ -694,39 +657,21 @@ void main_driver(const char* argv)
                 turboutfile.open(turbfilename);
                 turboutfile << "step " << "time " << "turbKE " << "RMSu " 
                             << "<c> " << "TaylorLen " << "TaylorRe " << "TaylorMa "
-                            << "skew1 " << "skew2 " << "skew3 " << "skew "
-                            << "kurt1 " << "kurt2 " << "kurt3 " << "kurt "
+                            << "skew " << "kurt "
                             << "eps_s " << "eps_d " << "eps_d/eps_s "
-                            << "kolm_s " << "kolm_t"  
+                            << "kolm_s " << "kolm_s" << "kolm_t"
                             << std::endl;
+
+                turboutfiledecomp.open(turbfilenamedecomp);
+                turboutfiledecomp << "step " << "time " 
+                                  << "turbKE_s " << "turbKE_d " << "delta_turbKE "
+                                  << "u_rms_s " << "u_rms_d " << "delta_u_rms " 
+                                  << "TaylorMa_d "
+                                  << "skew_s " << "kurt_s "
+                                  << "skew_d " << "kurt_d "
+                                  << std::endl;
+
             }
-            AMREX_D_TERM(macTemp[0].define(convert(ba,nodal_flag_x), dmap, 1, 1);,
-                         macTemp[1].define(convert(ba,nodal_flag_y), dmap, 1, 1);,
-                         macTemp[2].define(convert(ba,nodal_flag_z), dmap, 1, 1););   
-            gradU.define(ba,dmap,AMREX_SPACEDIM,0);
-            sound_speed.define(ba,dmap,1,0);
-            ccTemp.define(ba,dmap,1,0);
-            ccTempA.define(ba,dmap,1,0);
-            ccTempDiv.define(ba,dmap,1,0);
-#if (AMREX_SPACEDIM == 3)
-            curlU[0].define(convert(ba,nodal_flag_xy), dmap, 1, 0);
-            curlU[1].define(convert(ba,nodal_flag_xz), dmap, 1, 0);
-            curlU[2].define(convert(ba,nodal_flag_yz), dmap, 1, 0);
-            eta_edge[0].define(convert(ba,nodal_flag_xy), dmap, 1, 0);
-            eta_edge[1].define(convert(ba,nodal_flag_xz), dmap, 1, 0);
-            eta_edge[2].define(convert(ba,nodal_flag_yz), dmap, 1, 0);
-#elif (AMREX_SPACEDIM == 2)
-            curlU[0].define(convert(ba,nodal_flag_xy), dmap, 1, 0);
-            eta_edge[0].define(convert(ba,nodal_flag_xy), dmap, 1, 0);
-#endif
-    
-#if (AMREX_SPACEDIM == 3)
-            curlUtemp[0].define(convert(ba,nodal_flag_xy), dmap, 1, 0);
-            curlUtemp[1].define(convert(ba,nodal_flag_xz), dmap, 1, 0);
-            curlUtemp[2].define(convert(ba,nodal_flag_yz), dmap, 1, 0);
-#elif (AMREX_SPACEDIM == 2)
-            curlUtemp[0].define(convert(ba,nodal_flag_xy), dmap, 1, 0);
-#endif
         }
 #endif
 
@@ -775,7 +720,7 @@ void main_driver(const char* argv)
                           prim, primMeans, primVars, vel, velMeans, velVars, coVars, surfcov, surfcovMeans, surfcovVars, eta, kappa);
 #if defined(TURB)
             if (turbForcing > 0) {
-                EvaluateWritePlotFileVelGrad(0, 0.0, geom, vel);
+                EvaluateWritePlotFileVelGrad(0, 0.0, geom, vel, vel_decomp);
             }
 #endif
 
@@ -952,6 +897,7 @@ void main_driver(const char* argv)
         structFactMFTurbVel.define(ba, dmap, 3, 0);
         structFactMFTurbScalar.define(ba, dmap, 6, 0);
         vel_decomp.define(ba, dmap, 6, 0);
+        vel_decomp.setVal(0.0);
 
         turbStructFactVelTotal.define(ba,dmap,
                 var_names_turbVelTotal,var_scaling_turbVelTotal,
@@ -961,7 +907,7 @@ void main_driver(const char* argv)
                 s_pairA_turbScalar,s_pairB_turbScalar);
         turbStructFactVelDecomp.defineDecomp(ba,dmap,
                 var_names_turbVelTotal,var_scaling_turbVelDecomp,
-                s_pairA_turbVelTotal,s_pairB_turbVelTotal,geom);
+                s_pairA_turbVelTotal,s_pairB_turbVelTotal);
     }
 #endif
 
@@ -1203,7 +1149,7 @@ void main_driver(const char* argv)
             
 #if defined(TURB)
             if (turbForcing > 0) {
-                EvaluateWritePlotFileVelGrad(step, time, geom, vel);
+                EvaluateWritePlotFileVelGrad(step, time, geom, vel, vel_decomp);
             }
 #endif
 
@@ -1253,140 +1199,74 @@ void main_driver(const char* argv)
                     turbStructFactScalar.FortStructure(structFactMFTurbScalar,geom,1);
                     turbStructFactScalar.CallFinalize(geom);
                     turbStructFactScalar.IntegratekShellsScalar(step,geom,var_names_turbScalar);
-                
-
             }
 #endif
-
         }
 
 
 #if defined(TURB)
         // turbulence outputs
-        if ((turbForcing >= 1) and (writePlt)) {
+        if ((turbForcing >= 1) and (step%1000 == 0)) {
 
+            Real turbKE, c_speed, u_rms, taylor_len, taylor_Re, taylor_Ma,
+            skew, kurt, eps_s, eps_d, eps_ratio, kolm_s, kolm_d, kolm_t;
             for (int i=0; i<AMREX_SPACEDIM; ++i) {
                 vel[i].FillBoundary(geom.periodicity());
                 cumom[i].FillBoundary(geom.periodicity());
             }
+            GetTurbQty(vel, cumom, prim, eta, geom,
+                       turbKE, c_speed, u_rms,
+                       taylor_len, taylor_Re, taylor_Ma,
+                       skew, kurt,
+                       eps_s, eps_d, eps_ratio,
+                       kolm_s, kolm_d, kolm_t);
             
-            turboutfile << step << " " << time << " ";
-
-            Real temp;
-            Vector<Real> tempvec(3);
-            
-            Vector<Real> rhouu(3);
-            Vector<Real> uu(3);
-            Vector<Real> gradU2(3);
-            Vector<Real> gradU3(3);
-            Vector<Real> gradU4(3);
-            Vector<Real> eps_s_vec(3); // solenoidal dissipation
-            Real eps_d; // dilational dissipation
-            
-            // turbulent kinetic energy
-            StagInnerProd(cumom,0,vel,0,macTemp,rhouu);
-            rhouu[0] /= (n_cells[0]+1)*n_cells[1]*n_cells[2];
-            rhouu[1] /= (n_cells[1]+1)*n_cells[2]*n_cells[0];
-            rhouu[2] /= (n_cells[2]+1)*n_cells[0]*n_cells[1];
-            turboutfile << 0.5*( rhouu[0] + rhouu[1] + rhouu[2] ) << " ";
-
-            // RMS velocity
-            StagInnerProd(vel,0,vel,0,macTemp,uu);
-            uu[0] /= (n_cells[0]+1)*n_cells[1]*n_cells[2];
-            uu[1] /= (n_cells[1]+1)*n_cells[2]*n_cells[0];
-            uu[2] /= (n_cells[2]+1)*n_cells[0]*n_cells[1];
-            Real u_rms = sqrt((uu[0] + uu[1] + uu[2])/3.0);
+            turboutfile << step << " ";
+            turboutfile << time << " ";
+            turboutfile << turbKE << " ";
             turboutfile << u_rms << " ";
-
-            // compute gradU = [du/dx dv/dy dw/dz] at cell-centers
-            ComputeCentredGradFC(vel,gradU,geom);
-            ccTemp.setVal(0.0);
-            for (int d=0; d<AMREX_SPACEDIM; ++d) {
-                MultiFab::Add(ccTemp,gradU,d,0,1,0);
-            }
-            CCInnerProd(ccTemp,0,ccTemp,0,ccTempDiv,temp); // store (\sum_i du_i/dx_i)^2 MFab
-
-            // Compute Velocity gradient moment sum
-            // 2nd moment
-            ccTemp.setVal(0.0);
-            for (int d=0; d<AMREX_SPACEDIM; ++d) {
-                CCMoments(gradU,d,ccTempA,2,gradU2[d]);
-                MultiFab::Add(ccTemp,ccTempA,0,0,1,0);
-                gradU2[d] *= dProb; // <(du_i/dx_i)^2> each component
-            }
-            Real avg_mom2 = ComputeSpatialMean(ccTemp, 0); // <\sum_i (du_i/dx_i)^2>
-            
-            // 3rd moment
-            ccTemp.setVal(0.0);
-            for (int d=0; d<AMREX_SPACEDIM; ++d) {
-                CCMoments(gradU,d,ccTempA,3,gradU3[d]);
-                MultiFab::Add(ccTemp,ccTempA,0,0,1,0);
-                gradU3[d] *= dProb; // <(du_i/dx_i)^3> each component
-            }
-            Real avg_mom3 = ComputeSpatialMean(ccTemp, 0); //  <\sum_i (du_i/dx_i)^3>
-            
-            // 4th moment
-            ccTemp.setVal(0.0);
-            for (int d=0; d<AMREX_SPACEDIM; ++d) {
-                CCMoments(gradU,d,ccTempA,4,gradU4[d]);
-                MultiFab::Add(ccTemp,ccTempA,0,0,1,0);
-                gradU4[d] *= dProb; // <(du_i/dx_i)^4> each component
-            }
-            Real avg_mom4 = ComputeSpatialMean(ccTemp, 0); //  <\sum_i (du_i/dx_i)^4>
-
-            // Compute sound speed
-            ComputeSoundSpeed(sound_speed,prim);
-            Real c_avg = ComputeSpatialMean(sound_speed, 0);
-            turboutfile << c_avg << " ";
-            
-            // Taylor Microscale
-            Real taylor_lamda = u_rms/avg_mom2;
-            turboutfile << taylor_lamda << " " ;
-
-            // Taylor Reynolds Number & Turbulent Mach number
-            Real rho_avg = ComputeSpatialMean(cu, 0);
-            Real eta_avg = ComputeSpatialMean(eta, 0);
-            Real taylor_Re = rho_avg*taylor_lamda*u_rms/eta_avg;
-            Real taylor_Ma = sqrt(3.0)*u_rms/c_avg;
-            turboutfile << taylor_Re << " " << taylor_Ma << " ";
-
-            // Skewness
-            Real skew1 = gradU3[0]/pow(gradU2[0],1.5); // <(du_1/dx_1)^3>/<(du_1/dx_1)^2>^1.5
-            Real skew2 = gradU3[1]/pow(gradU2[1],1.5); // <(du_2/dx_2)^3>/<(du_2/dx_2)^2>^1.5
-            Real skew3 = gradU3[2]/pow(gradU2[2],1.5); // <(du_3/dx_3)^3>/<(du_3/dx_3)^2>^1.5
-            Real skew = avg_mom3/(pow(gradU2[0],1.5) + pow(gradU2[1],1.5) + pow(gradU2[2],1.5)); // <\sum_i (du_i/dx_i)^3> / (\sum_i <(du_i/dx_i)^2>^1.5)
-            turboutfile << skew1 << " " << skew2 << " " << skew3 << " " << skew << " ";
-            
-            // Kurtosis
-            Real kurt1 = gradU4[0]/pow(gradU2[0],2); // <(du_1/dx_1)^4>/<(du_1/dx_1)^2>^2
-            Real kurt2 = gradU4[1]/pow(gradU2[1],2); // <(du_2/dx_2)^4>/<(du_2/dx_2)^2>^2
-            Real kurt3 = gradU4[2]/pow(gradU2[2],2); // <(du_3/dx_3)^4>/<(du_3/dx_3)^2>^2
-            Real kurt =  avg_mom4/(pow(gradU2[0],2) + pow(gradU2[1],2) + pow(gradU2[2],2)); // <\sum_i (du_i/dx_i)^4> / (\sum_i <(du_i/dx_i)^2>^2)
-            turboutfile << kurt1 << " " << kurt2 << " " << kurt3 << " " << kurt << " ";
-
-            // Compute \omega (curl)
-            ComputeCurlFaceToEdge(vel,curlU,geom);
-            
-            // Solenoidal dissipation: <eta \omega_i \omega_i>/<rho>
-            AverageCCToEdge(eta,eta_edge,0,1,SPEC_BC_COMP,geom);
-            EdgeInnerProd(curlU,0,curlU,0,curlUtemp,tempvec);
-            EdgeInnerProd(curlUtemp,0,eta_edge,0,curlU,eps_s_vec);
-            eps_s_vec[0] /= (n_cells[0]+1)*(n_cells[1]+1)*n_cells[2];
-            eps_s_vec[1] /= (n_cells[0]+1)*(n_cells[2]+1)*n_cells[1];
-            eps_s_vec[2] /= (n_cells[1]+1)*(n_cells[2]+1)*n_cells[0];
-            Real eps_s = (eps_s_vec[0] + eps_s_vec[1] + eps_s_vec[2])/rho_avg;
+            turboutfile << c_speed << " ";
+            turboutfile << taylor_len << " " ;
+            turboutfile << taylor_Re << " "; 
+            turboutfile << taylor_Ma << " ";
+            turboutfile << skew << " ";
+            turboutfile << kurt << " ";
             turboutfile << eps_s << " ";
-
-            // Dilational dissipation (4/3)*<eta \sum_i (du_i/dx_i)^2>/<rho>
-            CCInnerProd(ccTempDiv,0,eta,0,ccTemp,eps_d);
-            eps_d *= (dProb*(4.0/3.0)/rho_avg);
-            turboutfile << eps_d << " " << eps_d/eps_s << " ";
-
-            // Kolmogorov scales
-            Real kolm_s = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*rho_avg*eps_s)),0.25);
-            Real eps_t = eps_s + eps_d;
-            Real kolm_t = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*rho_avg*eps_t)),0.25);
-            turboutfile << kolm_s << " " << kolm_t << std::endl;
+            turboutfile << eps_d << " "; 
+            turboutfile << eps_ratio << " ";
+            turboutfile << kolm_s << " ";
+            turboutfile << kolm_d << " ";
+            turboutfile << kolm_t;
+            turboutfile << std::endl;
+        }
+        
+        if ((turbForcing >= 1) and (writePlt)) {
+            Real turbKE_s, turbKE_d, delta_turbKE;
+            Real u_rms_s, u_rms_d, delta_u_rms;
+            Real taylor_Ma_d;
+            Real skew_s, kurt_s;
+            Real skew_d, kurt_d;
+            GetTurbQtyDecomp(vel_decomp, prim, geom,
+                             turbKE_s, turbKE_d, delta_turbKE,
+                             u_rms_s, u_rms_d, delta_u_rms,
+                             taylor_Ma_d,
+                             skew_s, kurt_s,
+                             skew_d, kurt_d);
+            
+            turboutfiledecomp << step << " ";
+            turboutfiledecomp << time << " ";
+            turboutfiledecomp << turbKE_s << " ";
+            turboutfiledecomp << turbKE_d << " ";
+            turboutfiledecomp << delta_turbKE << " ";
+            turboutfiledecomp << u_rms_s << " ";
+            turboutfiledecomp << u_rms_d << " ";
+            turboutfiledecomp << delta_u_rms << " ";
+            turboutfiledecomp << taylor_Ma_d << " ";
+            turboutfiledecomp << skew_s << " ";
+            turboutfiledecomp << kurt_s << " ";
+            turboutfiledecomp << skew_d << " ";
+            turboutfiledecomp << kurt_d;
+            turboutfiledecomp << std::endl;
         }
 #endif
         
@@ -1653,6 +1533,7 @@ void main_driver(const char* argv)
 #if defined(TURB)
     if (turbForcing >= 1) {
         if (ParallelDescriptor::IOProcessor()) turboutfile.close();
+        if (ParallelDescriptor::IOProcessor()) turboutfiledecomp.close();
     }
 #endif
 
diff --git a/src_compressible_stag/writePlotFileStag.cpp b/src_compressible_stag/writePlotFileStag.cpp
index 5a0abb53..7bf35679 100644
--- a/src_compressible_stag/writePlotFileStag.cpp
+++ b/src_compressible_stag/writePlotFileStag.cpp
@@ -516,124 +516,4 @@ void WritePlotFilesSF_2D(const amrex::MultiFab& mag, const amrex::MultiFab& real
       WriteSingleLevelPlotfile(plotfilename2,realimag,varNames,geom2,time,step);
 }
 
-void EvaluateWritePlotFileVelGrad(int step,
-                                  const amrex::Real time,
-                                  const amrex::Geometry& geom,
-                                  const std::array<MultiFab, AMREX_SPACEDIM>& vel)
-{
-    BL_PROFILE_VAR("EvaluateWritePlotFileVelGrad()",EvaluateWritePlotFileVelGrad);
-
-    // Evaluate velocity gradient components and divergence and vorticity
-    MultiFab vel_grad;
-    
-    // Cell-Centered Velocity Gradient Stats (1,2,3 are directions)
-    // 0: u_1,1
-    // 1: u_2,2
-    // 2: u_3,3
-    // 3: u_1,2
-    // 4: u_1,3
-    // 5: u_2,3
-    // 6: divergence = u_1,1 + u_2,2 + u_3,3
-    // 7: vorticity = sqrt(wx + wy + wz)
-    vel_grad.define(convert(vel[0].boxArray(),IntVect(AMREX_D_DECL(0,0,0))), vel[0].DistributionMap(), 8, 0);
-    vel_grad.setVal(0.0);
-
-    const GpuArray<Real, AMREX_SPACEDIM> dx = geom.CellSizeArray();
-
-    for ( MFIter mfi(vel_grad,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
-        
-        const Box& bx = mfi.tilebox();
-        
-        const Array4<Real> & vgrad = vel_grad.array(mfi);
-        
-        AMREX_D_TERM(Array4<Real const> const& velx = vel[0].array(mfi);,
-                     Array4<Real const> const& vely = vel[1].array(mfi);,
-                     Array4<Real const> const& velz = vel[2].array(mfi););
-
-        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-        {
-            // u_1,1
-            vgrad(i,j,k,0) = (velx(i+1,j,k) - velx(i,j,k))/dx[0];
-            
-            // u_2,2
-            vgrad(i,j,k,1) = (vely(i,j+1,k) - vely(i,j,k))/dx[1];
-            
-            // u_3,3
-            vgrad(i,j,k,2) = (velz(i,j,k+1) - velz(i,j,k))/dx[2];
-
-            // divergence
-            vgrad(i,j,k,6) = vgrad(i,j,k,0) + vgrad(i,j,k,1) + vgrad(i,j,k,2);
-
-            // on edges: u_1,2 and u_2,1 and curl w1 = u_2,1 - u_1,2
-            Real u12_mm = (velx(i,j,k) - velx(i,j-1,k))/dx[1];
-            Real u21_mm = (vely(i,j,k) - vely(i-1,j,k))/dx[0];
-            Real w1_mm  = u21_mm - u12_mm;
-            Real u12_mp = (velx(i,j+1,k) - velx(i,j,k))/dx[1];
-            Real u21_mp = (vely(i,j+1,k) - vely(i-1,j+1,k))/dx[0];
-            Real w1_mp  = u21_mp - u12_mp;
-            Real u12_pm = (velx(i+1,j,k) - velx(i+1,j-1,k))/dx[1];
-            Real u21_pm = (vely(i+1,j,k) - vely(i,j,k))/dx[0];
-            Real w1_pm  = u21_pm - u12_pm;
-            Real u12_pp = (velx(i+1,j+1,k) - velx(i+1,j,k))/dx[1];
-            Real u21_pp = (vely(i+1,j+1,k) - vely(i,j+1,k))/dx[0];
-            Real w1_pp  = u21_pp - u12_pp;
-
-            // u_1,2
-            vgrad(i,j,k,3) = 0.25*(u12_mm + u12_mp + u12_pm + u12_pp);
-
-            // on edges: u_1,3 and u_3,1 and curl w2 = u_1,3 - u_3,1
-            Real u13_mm = (velx(i,j,k) - velx(i,j,k-1))/dx[2];
-            Real u31_mm = (velz(i,j,k) - velz(i-1,j,k))/dx[0];
-            Real w2_mm  = u13_mm - u31_mm;
-            Real u13_mp = (velx(i,j,k+1) - velx(i,j,k))/dx[2];
-            Real u31_mp = (velz(i,j,k+1) - velz(i-1,j,k+1))/dx[0];
-            Real w2_mp  = u13_mp - u31_mp;
-            Real u13_pm = (velx(i+1,j,k) - velx(i+1,j,k-1))/dx[2];
-            Real u31_pm = (velz(i+1,j,k) - velz(i,j,k))/dx[0];
-            Real w2_pm  = u13_pm - u31_pm;
-            Real u13_pp = (velx(i+1,j,k+1) - velx(i+1,j,k))/dx[2];
-            Real u31_pp = (velz(i+1,j,k+1) - velz(i,j,k+1))/dx[0];
-            Real w2_pp  = u13_pp - u31_pp;
-
-            // u_1,3
-            vgrad(i,j,k,4) = 0.25*(u13_mm + u13_mp + u13_pm + u13_pp);
-
-            // on edges: u_2,3 and u_3,2 and curl w2 = u_3,2 - u_2,3
-            Real u23_mm = (vely(i,j,k) - vely(i,j,k-1))/dx[2];
-            Real u32_mm = (velz(i,j,k) - velz(i,j-1,k))/dx[1];
-            Real w3_mm  = u32_mm - u23_mm;
-            Real u23_mp = (vely(i,j,k+1) - vely(i,j,k))/dx[2];
-            Real u32_mp = (velz(i,j,k+1) - velz(i,j-1,k+1))/dx[1];
-            Real w3_mp  = u32_mp - u23_mp;
-            Real u23_pm = (vely(i,j+1,k) - vely(i,j+1,k-1))/dx[2];
-            Real u32_pm = (velz(i,j+1,k) - velz(i,j,k))/dx[1];
-            Real w3_pm  = u32_pm - u23_pm;
-            Real u23_pp = (vely(i,j+1,k+1) - vely(i,j+1,k))/dx[2];
-            Real u32_pp = (velz(i,j+1,k+1) - velz(i,j,k+1))/dx[1];
-            Real w3_pp  = u32_pp - u23_pp;
-
-            // u_2,3
-            vgrad(i,j,k,5) = 0.25*(u23_mm + u23_mp + u23_pm + u23_pp);
-
-            // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3)
-            vgrad(i,j,k,7) = sqrt(0.25*(w1_mm*w1_mm + w1_mp*w1_mp + w1_pm*w1_pm + w1_pp*w1_pp +
-                                        w2_mm*w2_mm + w2_mp*w2_mp + w2_pm*w2_pm + w2_pp*w2_pp +
-                                        w3_mm*w3_mm + w3_mp*w3_mp + w3_pm*w3_pm + w3_pp*w3_pp));
-        });
-    }
-
-    // Write on a plotfile
-    std::string plotfilename = amrex::Concatenate("vel_grad",step,9);
-    amrex::Vector<std::string> varNames(8);
-    varNames[0] = "ux_x";
-    varNames[1] = "uy_y";
-    varNames[2] = "uz_z";
-    varNames[3] = "ux_y";
-    varNames[4] = "ux_z";
-    varNames[5] = "uy_z";
-    varNames[6] = "div";
-    varNames[7] = "vort";
-    WriteSingleLevelPlotfile(plotfilename,vel_grad,varNames,geom,time,step);
-}
-
 

From 1d4cba917b12a17bb78f0f57164d54398a2bdd92 Mon Sep 17 00:00:00 2001
From: isriva <ishan66@gmail.com>
Date: Fri, 15 Sep 2023 16:02:39 -0700
Subject: [PATCH 08/11] minor changes to turbulence analysis

---
 src_compressible_stag/DeriveVelProp.cpp | 40 +++++++++++++++++--------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/src_compressible_stag/DeriveVelProp.cpp b/src_compressible_stag/DeriveVelProp.cpp
index 770dcc71..cbbcc5ae 100644
--- a/src_compressible_stag/DeriveVelProp.cpp
+++ b/src_compressible_stag/DeriveVelProp.cpp
@@ -30,6 +30,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
     MultiFab ccTemp;
     MultiFab ccTempA;
     MultiFab ccTempDiv;
+    MultiFab eta_kin; // kinematic viscosity
     std::array< MultiFab, NUM_EDGE > curlU;
     std::array< MultiFab, NUM_EDGE > eta_edge;
     std::array< MultiFab, NUM_EDGE > curlUtemp;
@@ -41,6 +42,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
     ccTemp.define(prim.boxArray(),prim.DistributionMap(),1,0);
     ccTempA.define(prim.boxArray(),prim.DistributionMap(),1,0);
     ccTempDiv.define(prim.boxArray(),prim.DistributionMap(),1,0);
+    eta_kin.define(prim.boxArray(),prim.DistributionMap(),1,0);
 #if (AMREX_SPACEDIM == 3)
     curlU[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0);
     curlU[1].define(convert(prim.boxArray(),nodal_flag_xz), prim.DistributionMap(), 1, 0);
@@ -61,6 +63,18 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
     curlUtemp[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0);
 #endif
 
+    // Get Kinematic Viscosity
+    for ( MFIter mfi(eta,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+        const Box& bx = mfi.tilebox();
+        const Array4<Real> & eta_kin_fab = eta_kin.array(mfi);
+        const Array4<const Real>& eta_fab = eta.array(mfi);
+        const Array4<const Real>& prim_fab = prim.array(mfi);
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            eta_kin_fab(i,j,k) = eta_fab(i,j,k) / prim_fab(i,j,k,0);
+        });
+    }
+
     // Setup temp variables
     Real temp;
     Vector<Real> tempvec(3);
@@ -100,6 +114,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
     // Compute Velocity gradient moment sum
     // 2nd moment
     ccTemp.setVal(0.0);
+    ccTempA.setVal(0.0);
     for (int d=0; d<AMREX_SPACEDIM; ++d) {
         CCMoments(gradU,d,ccTempA,2,gradU2[d]);
         MultiFab::Add(ccTemp,ccTempA,0,0,1,0);
@@ -126,13 +141,13 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
     Real avg_mom4 = ComputeSpatialMean(ccTemp, 0); //  <\sum_i (du_i/dx_i)^4>
             
     // Taylor Microscale
-    taylor_len = u_rms/avg_mom2;
+    taylor_len = sqrt(3.0)*u_rms/sqrt(avg_mom2); // from Wang et al., JFM, 2012
 
     // Taylor Reynolds Number & Turbulent Mach number
     Real rho_avg = ComputeSpatialMean(prim, 0);
     Real eta_avg = ComputeSpatialMean(eta, 0);
-    taylor_Re = rho_avg*taylor_len*u_rms/eta_avg;
-    taylor_Ma = sqrt(3.0)*u_rms/c_speed;
+    taylor_Re = rho_avg*taylor_len*u_rms/eta_avg; // from from John, Donzis, Sreenivasan, PRL 2019
+    taylor_Ma = sqrt(3.0)*u_rms/c_speed; // from John, Donzis, Sreenivasan, PRL 2019
 
     // Skewness
     //Real skew1 = gradU3[0]/pow(gradU2[0],1.5); // <(du_1/dx_1)^3>/<(du_1/dx_1)^2>^1.5
@@ -151,27 +166,28 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
     // Compute \omega (curl)
     ComputeCurlFaceToEdge(vel,curlU,geom);
     
-    // Solenoidal dissipation: <eta \omega_i \omega_i>/<rho>
-    AverageCCToEdge(eta,eta_edge,0,1,SPEC_BC_COMP,geom);
+    // Solenoidal dissipation: <eta_kin \omega_i \omega_i>/<rho>
+    AverageCCToEdge(eta_kin,eta_edge,0,1,SPEC_BC_COMP,geom);
     EdgeInnerProd(curlU,0,curlU,0,curlUtemp,tempvec);
     EdgeInnerProd(curlUtemp,0,eta_edge,0,curlU,eps_s_vec);
     eps_s_vec[0] /= (n_cells[0]+1)*(n_cells[1]+1)*n_cells[2];
     eps_s_vec[1] /= (n_cells[0]+1)*(n_cells[2]+1)*n_cells[1];
     eps_s_vec[2] /= (n_cells[1]+1)*(n_cells[2]+1)*n_cells[0];
-    eps_s = (eps_s_vec[0] + eps_s_vec[1] + eps_s_vec[2])/rho_avg;
+    eps_s = (eps_s_vec[0] + eps_s_vec[1] + eps_s_vec[2]);
 
-    // Dilational dissipation (4/3)*<eta \sum_i (du_i/dx_i)^2>/<rho>
-    CCInnerProd(ccTempDiv,0,eta,0,ccTemp,eps_d);
-    eps_d *= (dProb*(4.0/3.0)/rho_avg);
+    // Dilational dissipation (4/3)*<eta_kin (\sum_i du_i/dx_i)^2>/<rho>
+    CCInnerProd(ccTempDiv,0,eta_kin,0,ccTemp,eps_d);
+    eps_d *= dProb*(4.0/3.0);
 
     // Ratio of Dilational to Solenoidal dissipation
     eps_ratio = eps_d/eps_s;
     Real eps_t = eps_s + eps_d;
 
     // Kolmogorov scales
-    kolm_s = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*rho_avg*eps_s)),0.25);
-    kolm_d = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*rho_avg*eps_d)),0.25);
-    kolm_t = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*rho_avg*eps_t)),0.25);
+    Real eta_kin_avg = ComputeSpatialMean(eta_kin, 0);
+    kolm_s = pow((eta_kin_avg*eta_kin_avg*eta_kin_avg/eps_s),0.25);
+    kolm_d = pow((eta_kin_avg*eta_kin_avg*eta_kin_avg/eps_d),0.25);
+    kolm_t = pow((eta_kin_avg*eta_kin_avg*eta_kin_avg/eps_t),0.25);
 
 }
 #endif

From 036995f64fbbab82ce15c95898e8b1f18ff32048 Mon Sep 17 00:00:00 2001
From: Ishan Srivastava <isriva@login15.frontier.olcf.ornl.gov>
Date: Fri, 15 Sep 2023 19:04:26 -0400
Subject: [PATCH 09/11] change default build parameters for OLCF

---
 exec/compressible_stag/build_frontier.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/exec/compressible_stag/build_frontier.sh b/exec/compressible_stag/build_frontier.sh
index 49fda8a1..d89d820b 100755
--- a/exec/compressible_stag/build_frontier.sh
+++ b/exec/compressible_stag/build_frontier.sh
@@ -21,4 +21,4 @@ export CFLAGS="-I${ROCM_PATH}/include"
 export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed"
 export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa"
 
-make -j18 USE_CUDA=FALSE USE_HIP=TRUE DO_TURB=TRUE
+make -j18 USE_CUDA=FALSE USE_HIP=TRUE DO_TURB=TRUE MAX_SPEC=2

From 778af2076c313e8be99028570a1308f155867f7e Mon Sep 17 00:00:00 2001
From: Ishan Srivastava <ishan66@gmail.com>
Date: Sat, 16 Sep 2023 11:06:29 -0700
Subject: [PATCH 10/11] fixed a small bug

---
 src_compressible_stag/DeriveVelProp.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src_compressible_stag/DeriveVelProp.cpp b/src_compressible_stag/DeriveVelProp.cpp
index cbbcc5ae..87f3232d 100644
--- a/src_compressible_stag/DeriveVelProp.cpp
+++ b/src_compressible_stag/DeriveVelProp.cpp
@@ -42,7 +42,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
     ccTemp.define(prim.boxArray(),prim.DistributionMap(),1,0);
     ccTempA.define(prim.boxArray(),prim.DistributionMap(),1,0);
     ccTempDiv.define(prim.boxArray(),prim.DistributionMap(),1,0);
-    eta_kin.define(prim.boxArray(),prim.DistributionMap(),1,0);
+    eta_kin.define(prim.boxArray(),prim.DistributionMap(),1,ngc);
 #if (AMREX_SPACEDIM == 3)
     curlU[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0);
     curlU[1].define(convert(prim.boxArray(),nodal_flag_xz), prim.DistributionMap(), 1, 0);
@@ -65,7 +65,8 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel,
 
     // Get Kinematic Viscosity
     for ( MFIter mfi(eta,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
-        const Box& bx = mfi.tilebox();
+        // grow the box by ngc
+        const Box& bx = amrex::grow(mfi.tilebox(), ngc);
         const Array4<Real> & eta_kin_fab = eta_kin.array(mfi);
         const Array4<const Real>& eta_fab = eta.array(mfi);
         const Array4<const Real>& prim_fab = prim.array(mfi);

From 7ed8ece9c0e77ac83e0b672e45fb383420e6d35e Mon Sep 17 00:00:00 2001
From: jbb <jbbell@lbl.gov>
Date: Mon, 18 Sep 2023 13:42:19 -0700
Subject: [PATCH 11/11] fixed bug and added option to put in perturbation to
 initial profile

---
 exec/thinFilm/main_driver.cpp        | 34 +++++++++++++++++++++++++---
 exec/thinFilm/thinfilm_functions.cpp |  3 +++
 exec/thinFilm/thinfilm_namespace.H   |  1 +
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/exec/thinFilm/main_driver.cpp b/exec/thinFilm/main_driver.cpp
index ce876e2c..cce75b6b 100644
--- a/exec/thinFilm/main_driver.cpp
+++ b/exec/thinFilm/main_driver.cpp
@@ -196,6 +196,29 @@ void main_driver(const char* argv)
 
     // initialize height
     height.setVal(thinfilm_h0);
+
+    Real time = 0.;
+
+    // update height using forward Euler
+    if(thinfilm_pertamp > 0.){
+       for ( MFIter mfi(height,TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
+
+          const Box& bx = mfi.tilebox();
+            
+          const Array4<Real> & h = height.array(mfi);
+
+          amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+          {
+                h(i,j,k) += thinfilm_pertamp * thinfilm_h0*std::cos(2.*3.14159265358979323846264338327950288*(i+.5)*dx[0]/(prob_hi[0]-prob_lo[0]));
+                if(i==0 && j ==0){
+                     amrex::Print() << " HEIGHT " << time << " " << h(i,j,k)-thinfilm_h0 << std::endl;
+                }
+          });
+
+       }
+     }
+
+    height.FillBoundary(geom.periodicity());
     
     // Physical time constant for dimensional time
     Real t0 = 3.0*visc_coef*thinfilm_h0/thinfilm_gamma;
@@ -204,8 +227,7 @@ void main_driver(const char* argv)
     Real ConstNoise = 2.*k_B*T_init[0] / (3.*visc_coef);
     Real Const3dx = thinfilm_gamma / (3.*visc_coef);
     
-    Real time = 0.;
-    Real dt = 0.1 * (t0/std::pow(thinfilm_h0,4)) * std::pow(dx[0],4) / 16.;
+    Real dt = 0.25 * (t0/std::pow(thinfilm_h0,4)) * std::pow(dx[0],4) / 16.;
 
     int stats_count = 0;
     
@@ -248,11 +270,13 @@ void main_driver(const char* argv)
                                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
                 hfacex(i,j,k) = 0.5*( h(i-1,j,k) + h(i,j,k) );
+                hfacex(i,j,k) = thinfilm_h0;
                 gradhx(i,j,k) = ( h(i,j,k) - h(i-1,j,k) ) / dx[0];
             },
                                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
                 hfacey(i,j,k) = 0.5*( h(i,j-1,k) + h(i,j,k) );
+                hfacey(i,j,k) = thinfilm_h0;
                 gradhy(i,j,k) = ( h(i,j,k) - h(i,j-1,k) ) / dx[1];
             });
 
@@ -432,7 +456,7 @@ void main_driver(const char* argv)
             amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
                 h(i,j,k) -= dt * ( (fluxx(i+1,j,k) - fluxx(i,j,k)) / dx[0]
-                                  +(fluxy(i,j+1,k) - fluxy(i,j,k)) / dx[1] );
+                                  +(fluxy(i,j+1,k) - fluxy(i,j,k)) / dx[1]);
             });
 
         }
@@ -495,6 +519,10 @@ void main_driver(const char* argv)
                         amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
                         {
                             dhstar(i,j,k) += ( h(thinfilm_icorr,j,k)-thinfilm_h0 ) * ( h(i,j,k)-thinfilm_h0 );
+                            // HACK
+                            if(i==0 && j ==0 && thinfilm_pertamp > 0.){
+                                amrex::Print() << " HEIGHT " << time << " " << h(i,j,k)-thinfilm_h0 << std::endl;
+                            }
                         });
                     } else {
                         amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
diff --git a/exec/thinFilm/thinfilm_functions.cpp b/exec/thinFilm/thinfilm_functions.cpp
index 01627c36..b60c2004 100644
--- a/exec/thinFilm/thinfilm_functions.cpp
+++ b/exec/thinFilm/thinfilm_functions.cpp
@@ -4,6 +4,7 @@
 
 AMREX_GPU_MANAGED amrex::Real thinfilm::thinfilm_h0;
 AMREX_GPU_MANAGED amrex::Real thinfilm::thinfilm_gamma;
+AMREX_GPU_MANAGED amrex::Real thinfilm::thinfilm_pertamp;
 
 AMREX_GPU_MANAGED int thinfilm::thinfilm_icorr;
 AMREX_GPU_MANAGED int thinfilm::thinfilm_jcorr;
@@ -15,11 +16,13 @@ void InitializeThinfilmNamespace() {
     // defaults
     thinfilm_icorr = 0;
     thinfilm_jcorr = 0;
+    thinfilm_pertamp = 0;
     
     ParmParse pp;
     
     pp.get("thinfilm_h0",thinfilm_h0);
     pp.get("thinfilm_gamma",thinfilm_gamma);
+    pp.get("thinfilm_pertamp",thinfilm_pertamp);
 
     pp.query("thinfilm_icorr",thinfilm_icorr);
     pp.query("thinfilm_icorr",thinfilm_jcorr);
diff --git a/exec/thinFilm/thinfilm_namespace.H b/exec/thinFilm/thinfilm_namespace.H
index 8ed079b8..5d5feac3 100644
--- a/exec/thinFilm/thinfilm_namespace.H
+++ b/exec/thinFilm/thinfilm_namespace.H
@@ -2,6 +2,7 @@ namespace thinfilm {
 
     extern AMREX_GPU_MANAGED amrex::Real thinfilm_h0;
     extern AMREX_GPU_MANAGED amrex::Real thinfilm_gamma;
+    extern AMREX_GPU_MANAGED amrex::Real thinfilm_pertamp;
 
     extern AMREX_GPU_MANAGED int thinfilm_icorr;
     extern AMREX_GPU_MANAGED int thinfilm_jcorr;