diff --git a/exec/compressible/GNUmakefile b/exec/compressible/GNUmakefile index cd7ee153..5cee2876 100644 --- a/exec/compressible/GNUmakefile +++ b/exec/compressible/GNUmakefile @@ -11,6 +11,7 @@ DIM = 3 TINY_PROFILE = FALSE MAX_SPEC = 8 MAX_REAC = 5 +USE_FFT = TRUE USE_PARTICLES = FALSE diff --git a/exec/compressible_stag/GNUmakefile b/exec/compressible_stag/GNUmakefile index 24dfa949..1c7de9fa 100644 --- a/exec/compressible_stag/GNUmakefile +++ b/exec/compressible_stag/GNUmakefile @@ -12,9 +12,23 @@ DIM = 3 TINY_PROFILE = FALSE MAX_SPEC = 8 MAX_REAC = 5 +USE_FFT = TRUE USE_PARTICLES = FALSE -DO_TURB = FALSE +DO_TURB = FALSE + +USE_HEFFTE_FFTW = FALSE +USE_HEFFTE_CUFFT = FALSE +USE_HEFFTE_ROCFFT = FALSE +USE_DISTRIBUTED_FFT = TRUE + +ifeq ($(USE_HEFFTE_FFTW),TRUE) + HEFFTE_HOME ?= ../../../heffte/ +else ifeq ($(USE_HEFFTE_CUFFT),TRUE) + HEFFTE_HOME ?= ../../../heffte-org/build_aware/ +else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) + HEFFTE_HOME ?= ../../../heffte-org/build_noaware/ +endif include $(AMREX_HOME)/Tools/GNUMake/Make.defs @@ -41,23 +55,35 @@ include ../../src_rng/Make.package VPATH_LOCATIONS += ../../src_rng/ INCLUDE_LOCATIONS += ../../src_rng/ - include ../../src_common/Make.package VPATH_LOCATIONS += ../../src_common/ INCLUDE_LOCATIONS += ../../src_common/ +#ifeq ($(USE_HEFFTE_FFTW),TRUE) +# include $(HEFFTE_HOME)/src/Make.package +#else ifeq ($(USE_HEFFTE_CUFFT),TRUE) +# include $(HEFFTE_HOME)/src/Make.package +#else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) +# include $(HEFFTE_HOME)/src/Make.package +#endif + +ifeq ($(USE_HEFFTE_FFTW),TRUE) + DEFINES += -DHEFFTE_FFTW + LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f +else ifeq ($(USE_HEFFTE_CUFFT),TRUE) + DEFINES += -DHEFFTE_CUFFT +else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) + DEFINES += -DHEFFTE_ROCFFT +endif + include $(AMREX_HOME)/Src/Base/Make.package include ../../src_analysis/Make.package VPATH_LOCATIONS += ../../src_analysis/ INCLUDE_LOCATIONS += ../../src_analysis/ - + include $(AMREX_HOME)/Tools/GNUMake/Make.rules -ifeq ($(findstring cgpu, $(HOST)), cgpu) - CXXFLAGS += $(FFTW) -endif - ifeq ($(USE_CUDA),TRUE) LIBRARIES += -lcufft else ifeq ($(USE_HIP),TRUE) @@ -66,7 +92,7 @@ else ifeq ($(USE_HIP),TRUE) LIBRARY_LOCATIONS += $(ROC_PATH)/rocfft/lib LIBRARIES += -L$(ROC_PATH)/rocfft/lib -lrocfft else - LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 + LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f endif ifeq ($(DO_TURB), TRUE) diff --git a/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile b/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile new file mode 100644 index 00000000..052b3851 --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/GNUmakefile @@ -0,0 +1,69 @@ +AMREX_HOME ?= ../../../../amrex/ + +DEBUG = FALSE +USE_MPI = TRUE +USE_OMP = FALSE +USE_CUDA = FALSE +USE_HIP = FALSE +COMP = gnu +DIM = 3 +TINY_PROFILE = FALSE + +USE_HEFFTE_FFTW = FALSE +USE_HEFFTE_CUFFT = FALSE +USE_HEFFTE_ROCFFT = FALSE + +ifeq ($(USE_HEFFTE_FFTW),TRUE) + HEFFTE_HOME ?= ../../../../heffte/ +else ifeq ($(USE_HEFFTE_CUFFT),TRUE) + HEFFTE_HOME ?= ../../../../heffte-org/build_aware/ +else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) + HEFFTE_HOME ?= ../../../../heffte/ +endif + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +VPATH_LOCATIONS += . +INCLUDE_LOCATIONS += . + +#ifeq ($(USE_HEFFTE_FFTW),TRUE) +# include $(HEFFTE_HOME)/src/Make.package +#else ifeq ($(USE_HEFFTE_CUFFT),TRUE) +# include $(HEFFTE_HOME)/src/Make.package +#else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) +# include $(HEFFTE_HOME)/src/Make.package +#endif + +include ./Make.package +ifeq ($(USE_HEFFTE_FFTW),TRUE) + DEFINES += -DHEFFTE_FFTW + LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f +else ifeq ($(USE_HEFFTE_CUFFT),TRUE) + DEFINES += -DHEFFTE_CUFFT + VPATH_LOCATIONS += $(HEFFTE_HOME)/include + INCLUDE_LOCATIONS += $(HEFFTE_HOME)/include + LIBRARY_LOCATIONS += $(HEFFTE_HOME)/lib + LIBRARIES += -lheffte +else ifeq ($(USE_HEFFTE_ROCFFT),TRUE) + DEFINES += -DHEFFTE_ROCFFT +endif + +include $(AMREX_HOME)/Src/Base/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules + +ifeq ($(USE_CUDA),TRUE) + LIBRARIES += -lcufft +else ifeq ($(USE_HIP),TRUE) + # Use rocFFT. ROC_PATH is defined in amrex + INCLUDE_LOCATIONS += $(ROC_PATH)/rocfft/include + LIBRARY_LOCATIONS += $(ROC_PATH)/rocfft/lib + LIBRARIES += -L$(ROC_PATH)/rocfft/lib -lrocfft +else + LIBRARIES += -L$(FFTW_DIR) -lfftw3_mpi -lfftw3 -lfftw3f +endif + +ifeq ($(DO_TURB), TRUE) + DEFINES += -DTURB +endif + diff --git a/exec/compressible_stag/SPECTRAL_FILTER/Make.package b/exec/compressible_stag/SPECTRAL_FILTER/Make.package new file mode 100644 index 00000000..e0391922 --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/Make.package @@ -0,0 +1,5 @@ +CEXE_sources += main.cpp +CEXE_sources += main_driver.cpp +CEXE_sources += spectral_functions.cpp + +CEXE_headers += spectral_functions.H diff --git a/exec/compressible_stag/SPECTRAL_FILTER/build_frontier.sh b/exec/compressible_stag/SPECTRAL_FILTER/build_frontier.sh new file mode 100755 index 00000000..dcb05e97 --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/build_frontier.sh @@ -0,0 +1,27 @@ +#!/usr/bin/bash + +## load necessary modules +module load craype-accel-amd-gfx90a +module load amd-mixed +#module load rocm/5.2.0 # waiting for 5.6 for next bump +module load cray-mpich/8.1.23 +module load cce/15.0.0 # must be loaded after rocm + +# GPU-aware MPI +export MPICH_GPU_SUPPORT_ENABLED=1 + +# optimize CUDA compilation for MI250X +export AMREX_AMD_ARCH=gfx90a + +# compiler environment hints +##export CC=$(which hipcc) +##export CXX=$(which hipcc) +##export FC=$(which ftn) +##export CFLAGS="-I${ROCM_PATH}/include" +##export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" +##export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64 ${PE_MPICH_GTL_DIR_amd_gfx90a} -lmpi_gtl_hsa" +export LDFLAGS="-L${MPICH_DIR}/lib -lmpi ${CRAY_XPMEM_POST_LINK_OPTS} -lxpmem ${PE_MPICH_GTL_DIR_amd_gfx90a} ${PE_MPICH_GTL_LIBS_amd_gfx90a}" +export CXXFLAGS="-I${MPICH_DIR}/include" +export HIPFLAGS="--amdgpu-target=gfx90a" + +make -j10 USE_HIP=TRUE USE_HEFFTE_ROCFFT=TRUE USE_ASSERTION=TRUE diff --git a/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh b/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh new file mode 100755 index 00000000..e3bd5aac --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/build_perlmutter.sh @@ -0,0 +1,30 @@ +#!/usr/bin/bash + +# required dependencies +module load cray-fftw +module load cmake +module load cudatoolkit + +module list + +# necessary to use CUDA-Aware MPI and run a job +export CRAY_ACCEL_TARGET=nvidia80 + +export MPICH_GPU_SUPPORT_ENABLED=1 + +# optimize CUDA compilation for A100 +export AMREX_CUDA_ARCH=8.0 + +# optimize CPU microarchitecture for AMD EPYC 3rd Gen (Milan/Zen3) +# note: the cc/CC/ftn wrappers below add those +export CXXFLAGS="-march=znver3" +export CFLAGS="-march=znver3" + +# compiler environment hints +export CC=cc +export CXX=CC +export FC=ftn +export CUDACXX=$(which nvcc) +export CUDAHOSTCXX=CC + +make -j10 USE_CUDA=TRUE USE_HEFFTE_CUFFT=TRUE USE_ASSERTION=TRUE MAX_SPEC=2 diff --git a/exec/compressible_stag/SPECTRAL_FILTER/main.cpp b/exec/compressible_stag/SPECTRAL_FILTER/main.cpp new file mode 100644 index 00000000..95c149e5 --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/main.cpp @@ -0,0 +1,28 @@ +#include +//#include + +// function declaration +void main_driver (const char* argv); + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + +// this specific part has been moved to Flagellum/main_driver.cpp +// { +// amrex::ParmParse pp("particles"); +//#ifdef AMREX_USE_GPU +// bool particles_do_tiling = true; +//#else +// bool particles_do_tiling = false; +//#endif +// pp.queryAdd("do_tiling", particles_do_tiling); +// } + + // argv[1] contains the name of the inputs file entered at the command line + main_driver(argv[1]); + + amrex::Finalize(); + + return 0; +} diff --git a/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp b/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp new file mode 100644 index 00000000..7db4b16d --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/main_driver.cpp @@ -0,0 +1,450 @@ +#include "spectral_functions.H" +#include +#include +#include "AMReX_ParmParse.H" + +#include "chrono" + +using namespace std::chrono; +using namespace amrex; + +// argv contains the name of the inputs file entered at the command line +void main_driver(const char* argv) +{ + BL_PROFILE_VAR("main_driver()",main_driver); + + amrex::Vector nodal_flag_dir; + amrex::IntVect nodal_flag_x; + amrex::IntVect nodal_flag_y; + amrex::IntVect nodal_flag_z; + nodal_flag_dir.resize(3); + + for (int i=0; i<3; ++i) { + nodal_flag_x[i] = int(i==0); + nodal_flag_y[i] = int(i==1); + nodal_flag_z[i] = int(i==2); + AMREX_D_TERM(nodal_flag_dir[0][i] = nodal_flag_x[i];, + nodal_flag_dir[1][i] = nodal_flag_y[i];, + nodal_flag_dir[2][i] = nodal_flag_z[i];); + } + + // timer + Real ts1 = ParallelDescriptor::second(); + + std::string inputs_file = argv; + + ParmParse pp; + amrex::Vector temp_real(3,0.); + amrex::Vector temp_int (3,0 ); + + amrex::Vector max_grid_size(3,1 ); + amrex::Vector n_cells(3,0 ); + amrex::Vector prob_lo(3,0 ); + amrex::Vector prob_hi(3,0 ); + + if (pp.queryarr("n_cells",temp_int,0,3)) { + for (int i=0; i<3; ++i) { + n_cells[i] = temp_int[i]; + } + } + int npts = n_cells[0]*n_cells[1]*n_cells[2]; + if (pp.queryarr("prob_lo",temp_real,0,3)) { + for (int i=0; i<3; ++i) { + prob_lo[i] = temp_real[i]; + } + } + if (pp.queryarr("prob_hi",temp_real,0,3)) { + for (int i=0; i<3; ++i) { + prob_hi[i] = temp_real[i]; + } + } + pp.queryarr("max_grid_size",max_grid_size,0,3); + + int restart; + pp.query("restart",restart); + + int nprimvars; + pp.query("nprimvars",nprimvars); + + int plot_filter = 0; + pp.query("plot_filter",plot_filter); + + amrex::IntVect ngc; + for (int i=0; i<3; ++i) { + ngc[i] = 1; // number of ghost cells + } + if (pp.queryarr("ngc",temp_int,0,3)) { + for (int i=0; i<3; ++i) { + ngc[i] = temp_int[i]; + } + } + + amrex::Real kmin; + pp.query("kmin",kmin); + + amrex::Real kmax; + pp.query("kmax",kmax); + + std::array< MultiFab, 3 > vel; + MultiFab prim; + + // make BoxArray and Geometry + BoxArray ba; + Geometry geom; + DistributionMapping dmap; + + IntVect dom_lo(AMREX_D_DECL( 0, 0, 0)); + IntVect dom_hi(AMREX_D_DECL(n_cells[0]-1, n_cells[1]-1, n_cells[2]-1)); + Box domain(dom_lo, dom_hi); + + // This defines the physical box, [-1,1] in each direction. + RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])}, + {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])}); + + // This defines a Geometry object + Vector is_periodic(3,1); // force to be periodic -- can change later + geom.define(domain,&real_box,CoordSys::cartesian,is_periodic.data()); + + const GpuArray dx = geom.CellSizeArray(); + const RealBox& realDomain = geom.ProbDomain(); + + SpectralReadCheckPoint(geom, domain, prim, vel, ba, dmap, n_cells, nprimvars, max_grid_size, ngc, restart); + + MultiFab MFTurbScalar; + MultiFab MFTurbVel; + MultiFab vel_decomp_filter_heffte; + MultiFab scalar_filter_heffte; + MFTurbVel.define(ba, dmap, 3, 0); + MFTurbScalar.define(ba, dmap, 1, 0); + vel_decomp_filter_heffte.define(ba, dmap, 9, 0); + scalar_filter_heffte.define(ba, dmap, 1, 0); + vel_decomp_filter_heffte.setVal(0.0); + scalar_filter_heffte.setVal(0.0); + + // Set BC: 1) fill boundary 2) physical + for (int d=0; d<3; d++) { + vel[d].FillBoundary(geom.periodicity()); + } + prim.FillBoundary(geom.periodicity()); + + for(int d=0; d<3; d++) { + ShiftFaceToCC(vel[d], 0, MFTurbVel, d, 1); + } + MultiFab::Copy(MFTurbScalar, prim, 0, 0, 1, 0); + + SpectralVelDecomp(MFTurbVel, vel_decomp_filter_heffte, kmin, kmax, geom, n_cells); + SpectralScalarDecomp(MFTurbScalar, scalar_filter_heffte, kmin, kmax, geom, n_cells); + + MultiFab vel_decomp_filter; + MultiFab scalar_filter; + vel_decomp_filter.define(ba, dmap, 9, 2); + scalar_filter.define(ba, dmap, 1, 2); + MultiFab::Copy(vel_decomp_filter,vel_decomp_filter_heffte,0,0,9,0); + MultiFab::Copy(scalar_filter,scalar_filter_heffte,0,0,1,0); + vel_decomp_filter.FillBoundary(geom.periodicity()); + scalar_filter.FillBoundary(geom.periodicity()); + + if (plot_filter) SpectralWritePlotFile(restart, kmin, kmax, geom, vel_decomp_filter, scalar_filter, MFTurbVel, MFTurbScalar); + + // Turbulence Diagnostics + Real u_rms, u_rms_s, u_rms_d, delta_u_rms; + Real taylor_len, taylor_Re_eta; + Real skew, skew_s, skew_d, kurt, kurt_s, kurt_d; + Vector var(9, 0.0); + Real skew_vort, kurt_vort, skew_div, kurt_div; + { + Vector dProb(3); + dProb[0] = 1.0/((n_cells[0]+1)*n_cells[1]*n_cells[2]); + dProb[1] = 1.0/((n_cells[1]+1)*n_cells[2]*n_cells[0]); + dProb[2] = 1.0/((n_cells[2]+1)*n_cells[0]*n_cells[1]); + + // Setup temp MultiFabs + std::array< MultiFab, 3 > gradU; + std::array< MultiFab, 3 > faceTemp; + MultiFab sound_speed; + MultiFab ccTemp; + MultiFab ccTempA; + AMREX_D_TERM(gradU[0].define(convert(prim.boxArray(),nodal_flag_x), prim.DistributionMap(), 9, 0);, + gradU[1].define(convert(prim.boxArray(),nodal_flag_y), prim.DistributionMap(), 9, 0);, + gradU[2].define(convert(prim.boxArray(),nodal_flag_z), prim.DistributionMap(), 9, 0);); + AMREX_D_TERM(faceTemp[0].define(convert(prim.boxArray(),nodal_flag_x), prim.DistributionMap(), 1, 0);, + faceTemp[1].define(convert(prim.boxArray(),nodal_flag_y), prim.DistributionMap(), 1, 0);, + faceTemp[2].define(convert(prim.boxArray(),nodal_flag_z), prim.DistributionMap(), 1, 0);); + sound_speed.define(prim.boxArray(),prim.DistributionMap(),1,0); + ccTemp.define(prim.boxArray(),prim.DistributionMap(),1,0); + ccTempA.define(prim.boxArray(),prim.DistributionMap(),1,0); + + // Setup temp variables + Vector gradU2_temp(3); + Vector gradU2(3); + Vector gradU3(3); + Vector gradU4(3); + Vector gradU2_s(3); + Vector gradU3_s(3); + Vector gradU4_s(3); + Vector gradU2_d(3); + Vector gradU3_d(3); + Vector gradU4_d(3); + + Vector comps {0,1,2}; + Vector comps_s{3,4,5}; + Vector comps_d{6,7,8}; + + // turbulent kinetic energy (total) + ccTemp.setVal(0.0); + MultiFab::AddProduct(ccTemp,vel_decomp_filter,0,vel_decomp_filter,0,0,1,0); //uu + MultiFab::AddProduct(ccTemp,vel_decomp_filter,1,vel_decomp_filter,1,0,1,0); //vv + MultiFab::AddProduct(ccTemp,vel_decomp_filter,2,vel_decomp_filter,2,0,1,0); //ww + u_rms = ccTemp.sum(0)/npts; + u_rms = sqrt(u_rms/3.0); + MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww) + + // turbulent kinetic energy (solenoidal) + ccTemp.setVal(0.0); + MultiFab::AddProduct(ccTemp,vel_decomp_filter,3,vel_decomp_filter,3,0,1,0); //uu + MultiFab::AddProduct(ccTemp,vel_decomp_filter,4,vel_decomp_filter,4,0,1,0); //vv + MultiFab::AddProduct(ccTemp,vel_decomp_filter,5,vel_decomp_filter,5,0,1,0); //ww + u_rms_s = ccTemp.sum(0)/npts; + u_rms_s = sqrt(u_rms_s/3.0); + MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww) + + // turbulent kinetic energy (dilatational) + ccTemp.setVal(0.0); + MultiFab::AddProduct(ccTemp,vel_decomp_filter,6,vel_decomp_filter,6,0,1,0); //uu + MultiFab::AddProduct(ccTemp,vel_decomp_filter,7,vel_decomp_filter,7,0,1,0); //vv + MultiFab::AddProduct(ccTemp,vel_decomp_filter,8,vel_decomp_filter,8,0,1,0); //ww + u_rms_d = ccTemp.sum(0)/npts; + u_rms_d = sqrt(u_rms_d/3.0); + MultiFab::Multiply(ccTemp,prim,0,0,1,0); // rho*(uu+vv+ww) + + // ratio of turbulent kinetic energies + delta_u_rms = u_rms_d/u_rms_s; + + // compute gradU = [du/dx dv/dy dw/dz] at cell-centers + ComputeGrad(vel_decomp_filter,gradU,0,0,9,-1,geom,0); + + // Compute Velocity gradient moment sum + // 2nd moment (total) + FCMoments(gradU,comps,faceTemp,2,gradU2_temp); + gradU2[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU2[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU2[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + ccTemp.setVal(0.0); + ccTempA.setVal(0.0); + ShiftFaceToCC(faceTemp[0],0,ccTempA,0,1); + MultiFab::Add(ccTemp,ccTempA,0,0,1,0); + ShiftFaceToCC(faceTemp[1],0,ccTempA,0,1); + MultiFab::Add(ccTemp,ccTempA,0,0,1,0); + ShiftFaceToCC(faceTemp[2],0,ccTempA,0,1); + MultiFab::Add(ccTemp,ccTempA,0,0,1,0); + Real avg_mom2 = ccTemp.sum(0)/npts; + // 2nd moment (solenoidal) + FCMoments(gradU,comps_s,faceTemp,2,gradU2_temp); + gradU2_s[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU2_s[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU2_s[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + // 2nd moment (dilatational) + FCMoments(gradU,comps_d,faceTemp,2,gradU2_temp); + gradU2_d[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU2_d[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU2_d[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + + // Taylor Mach + //ComputeSoundSpeed(sound_speed,prim,2); + //Real c_speed = sound_speed.sum(0)/npts; + Real rho_avg = prim.sum(0)/npts; + // Taylor Ma + //taylor_Ma = sqrt(3.0)*u_rms/c_speed; + // Taylor Microscale + taylor_len = sqrt(3.0)*u_rms/sqrt(avg_mom2); // from Wang et al., JFM, 2012 + taylor_Re_eta = rho_avg*taylor_len*u_rms; // from from John, Donzis, Sreenivasan, PRL 2019 + + // Compute Velocity gradient moment sum + // 3rd moment (total) + FCMoments(gradU,comps,faceTemp,3,gradU2_temp); + gradU3[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU3[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU3[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + // 3rd moment (solenoidal) + FCMoments(gradU,comps_s,faceTemp,3,gradU2_temp); + gradU3_s[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU3_s[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU3_s[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + // 3rd moment (dilatational) + FCMoments(gradU,comps_d,faceTemp,3,gradU2_temp); + gradU3_d[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU3_d[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU3_d[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + + // Compute Velocity gradient moment sum + // 4th moment (total) + FCMoments(gradU,comps,faceTemp,4,gradU2_temp); + gradU4[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU4[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU4[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + // 4th moment (solenoidal) + FCMoments(gradU,comps_s,faceTemp,4,gradU2_temp); + gradU4_s[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU4_s[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU4_s[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + // 4th moment (dilatational) + FCMoments(gradU,comps_d,faceTemp,4,gradU2_temp); + gradU4_d[0] = dProb[0]*(faceTemp[0].sum_unique(0,false,geom.periodicity())); + gradU4_d[1] = dProb[1]*(faceTemp[1].sum_unique(0,false,geom.periodicity())); + gradU4_d[2] = dProb[2]*(faceTemp[2].sum_unique(0,false,geom.periodicity())); + + // Skewness + // <\sum_i (du_i/dx_i)^3> / (\sum_i <(du_i/dx_i)^2>^1.5) + skew = (gradU3[0] + gradU3[1] + gradU3[2])/ + (pow(gradU2[0],1.5) + pow(gradU2[1],1.5) + pow(gradU2[2],1.5)); + skew_s = (gradU3_s[0] + gradU3_s[1] + gradU3_s[2])/ + (pow(gradU2_s[0],1.5) + pow(gradU2_s[1],1.5) + pow(gradU2_s[2],1.5)); + skew_d = (gradU3_d[0] + gradU3_d[1] + gradU3_d[2])/ + (pow(gradU2_d[0],1.5) + pow(gradU2_d[1],1.5) + pow(gradU2_d[2],1.5)); + + // Kurtosis + // <\sum_i (du_i/dx_i)^4> / (\sum_i <(du_i/dx_i)^2>^2) + kurt = (gradU4[0] + gradU4[1] + gradU4[2])/ + (pow(gradU2[0],2.0) + pow(gradU2[1],2.0) + pow(gradU2[2],2.0)); + kurt_s = (gradU4_s[0] + gradU4_s[1] + gradU4_s[2])/ + (pow(gradU2_s[0],2.0) + pow(gradU2_s[1],2.0) + pow(gradU2_s[2],2.0)); + kurt_d = (gradU4_d[0] + gradU4_d[1] + gradU4_d[2])/ + (pow(gradU2_d[0],2.0) + pow(gradU2_d[1],2.0) + pow(gradU2_d[2],2.0)); + + // velocity variances + for (int i=0;i<9;++i) { + ccTemp.setVal(0.0); + MultiFab::AddProduct(ccTemp,vel_decomp_filter,i,vel_decomp_filter,i,0,1,0); + Real mean = vel_decomp_filter.sum(i)/npts; + Real mean2 = ccTemp.sum(0)/npts; + var[i] = mean2 - mean*mean; + } + + // skewness and kurtosis of velocity voritcity and divergence + MultiFab vel_stats; + vel_stats.define(prim.boxArray(),prim.DistributionMap(),4,0); // div, w1, w2, w3 + for ( MFIter mfi(vel_stats,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + const Box& bx = mfi.tilebox(); + const Array4& v_decomp = vel_decomp_filter.array(mfi); + const Array4< Real>& v_stats = vel_stats.array(mfi); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + // divergence + v_stats(i,j,k,0) = 0.5*( (v_decomp(i+1,j,k,0) - v_decomp(i-1,j,k,0))/dx[0] + + (v_decomp(i,j+1,k,1) - v_decomp(i,j-1,k,1))/dx[1] + + (v_decomp(i,j,k+1,2) - v_decomp(i,j,k-1,2))/dx[2] ); + + // curl w1 = u_2,1 - u_1,2 + v_stats(i,j,k,1) = 0.5*( (v_decomp(i+1,j,k,1) - v_decomp(i-1,j,k,1))/dx[0] - + (v_decomp(i,j+1,k,0) - v_decomp(i,j-1,k,0))/dx[1] ); + + // curl w2 = u_1,3 - u_3,1 + v_stats(i,j,k,2) = 0.5*( (v_decomp(i,j,k+1,0) - v_decomp(i,j,k-1,0))/dx[2] - + (v_decomp(i+1,j,k,2) - v_decomp(i-1,j,k,2))/dx[0] ); + + // curl w2 = u_3,2 - u_2,3 + v_stats(i,j,k,3) = 0.5*( (v_decomp(i,j+1,k,2) - v_decomp(i,j-1,k,2))/dx[1] - + (v_decomp(i,j,k+1,1) - v_decomp(i,j,k-1,1))/dx[2] ); + + }); + } + // compute spatial mean + Real mean_div = vel_stats.sum(0) / (npts); + Real mean_w1 = vel_stats.sum(1) / (npts); + Real mean_w2 = vel_stats.sum(2) / (npts); + Real mean_w3 = vel_stats.sum(3) / (npts); + vel_stats.plus(-1.0*mean_div, 0, 1); + vel_stats.plus(-1.0*mean_w1, 1, 1); + vel_stats.plus(-1.0*mean_w2, 2, 1); + vel_stats.plus(-1.0*mean_w3, 3, 1); + + Vector U2(4); + Vector U3(4); + Vector U4(4); + for (int i=0;i<4;++i) { + CCMoments(vel_stats,i,ccTempA,2,U2[i]); + CCMoments(vel_stats,i,ccTempA,3,U3[i]); + CCMoments(vel_stats,i,ccTempA,4,U4[i]); + } + skew_div = U3[0]/pow(U2[0],1.5); + kurt_div = U4[0]/pow(U4[0],2.0); + skew_vort = (U3[1] + U3[2] + U3[3])/ + (pow(U2[1],1.5) + pow(U2[2],1.5) + pow(U2[3],1.5)); + kurt_vort = (U4[1] + U4[2] + U4[3])/ + (pow(U2[1],2.0) + pow(U2[2],2.0) + pow(U2[3],2.0)); + + } + std::string turbfilename = amrex::Concatenate("turbstats_filtered_",restart,9); + std::ostringstream os; + os << std::setprecision(3) << kmin; + turbfilename += os.str(); + turbfilename += "_"; + std::ostringstream oss; + oss << std::setprecision(3) << kmax; + turbfilename += oss.str(); + + std::ofstream turboutfile; + if (ParallelDescriptor::IOProcessor()) { + turboutfile.open(turbfilename, std::ios::app); + } + if (ParallelDescriptor::IOProcessor()) { + turboutfile << "u_rms " << "u_rms_s " << "u_rms_d " << "delta_u_rms " + << "TaylorLen " << "TaylorRe*Eta " + << "skew " << "skew_s " << "skew_d " + << "kurt " << "kurt_s " << "kurt_d " + << "var_ux " << "var_uy " << "var_uz " + << "var_uxs " << "var_uys " << "var_uzs " + << "var_uxd " << "var_uyd " << "var_uzd " + << "skew_div " << "kurt_div " + << "skew_vort " << "kurt_vort " + << std::endl; + + turboutfile << u_rms << " "; + turboutfile << u_rms_s << " "; + turboutfile << u_rms_d << " "; + turboutfile << delta_u_rms << " "; + turboutfile << taylor_len << " "; + turboutfile << taylor_Re_eta << " "; + turboutfile << skew << " "; + turboutfile << skew_s << " "; + turboutfile << skew_d << " "; + turboutfile << kurt << " "; + turboutfile << kurt_s << " "; + turboutfile << kurt_d << " "; + for (int i=0;i<9;++i) { + turboutfile << var[i] << " "; + } + turboutfile << skew_div << " "; + turboutfile << kurt_div << " "; + turboutfile << skew_vort << " "; + turboutfile << kurt_vort << " "; + turboutfile << std::endl; + } + // timer + Real ts2 = ParallelDescriptor::second() - ts1; + ParallelDescriptor::ReduceRealMax(ts2, ParallelDescriptor::IOProcessorNumber()); + amrex::Print() << "Time (spectral filtering) " << ts2 << " seconds\n"; + + // MultiFab memory usage + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + + amrex::Long min_fab_megabytes = amrex::TotalBytesAllocatedInFabsHWM()/1048576; + amrex::Long max_fab_megabytes = min_fab_megabytes; + + ParallelDescriptor::ReduceLongMin(min_fab_megabytes, IOProc); + ParallelDescriptor::ReduceLongMax(max_fab_megabytes, IOProc); + + amrex::Print() << "High-water FAB megabyte spread across MPI nodes: [" + << min_fab_megabytes << " ... " << max_fab_megabytes << "]\n"; + + min_fab_megabytes = amrex::TotalBytesAllocatedInFabs()/1048576; + max_fab_megabytes = min_fab_megabytes; + + ParallelDescriptor::ReduceLongMin(min_fab_megabytes, IOProc); + ParallelDescriptor::ReduceLongMax(max_fab_megabytes, IOProc); + + amrex::Print() << "Curent FAB megabyte spread across MPI nodes: [" + << min_fab_megabytes << " ... " << max_fab_megabytes << "]\n"; + + if (ParallelDescriptor::IOProcessor()) turboutfile.close(); +} diff --git a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H new file mode 100644 index 00000000..4f99f51a --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.H @@ -0,0 +1,113 @@ +#ifndef _spectral_functions_stag_H_ +#define _spectral_functions_stag_H_ + +#include +#include +#include +#include + +#include +#include + + +#ifdef AMREX_USE_CUDA +#include +#elif AMREX_USE_HIP +# if __has_include() // ROCm 5.3+ +# include +# else +# include +# endif +#else +#include +#include +#endif + +#include + +#include + +#define ALIGN 16 + +using namespace amrex; + +#if !defined(HEFFTE_FFTW) && !defined(HEFFTE_CUFFT) && !defined(HEFFTE_ROCFFT) +#ifdef AMREX_USE_CUDA +std::string cufftError (const cufftResult& err); +#endif +#ifdef AMREX_USE_HIP +std::string rocfftError (const rocfft_status err); +void Assert_rocfft_status (std::string const& name, rocfft_status status); +#endif +#endif + +void SpectralReadCheckPoint(amrex::Geometry& geom, + const amrex::Box& domain, + amrex::MultiFab& prim, + std::array& vel, + BoxArray& ba, DistributionMapping& dmap, + const amrex::Vector n_cells, + const int nprimvars, + const amrex::Vector max_grid_size, + const amrex::IntVect ngc, + const int restart); + +void SpectralVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp_filter, + const amrex::Real kmin, + const amrex::Real kmax, + const amrex::Geometry& geom, + const amrex::Vector n_cells); + +void SpectralScalarDecomp(const MultiFab& scalar, + MultiFab& scalar_filter, + const amrex::Real kmin, + const amrex::Real kmax, + const amrex::Geometry& geom, + const amrex::Vector n_cells); + +void SpectralWritePlotFile(const int step, + const amrex::Real& kmin, + const amrex::Real& kmax, + const amrex::Geometry& geom, + const amrex::MultiFab& vel_decomp_in, + const amrex::MultiFab& scalar_in, + const amrex::MultiFab& vel_total, + const amrex::MultiFab& scalar_total); + +void Read_Copy_MF_Checkpoint(amrex::MultiFab& mf, std::string mf_name, + const std::string& checkpointname, + BoxArray& ba_old, DistributionMapping& dmap_old, + int NVARS, int NGC, const amrex::IntVect ngc, + int nodal_flag=-1); + +void ShiftFaceToCC(const MultiFab& face_in, int face_in_comp, + MultiFab& cc_in, int cc_in_comp, + int ncomp); + +void ComputeGrad(const MultiFab & phi_in, std::array & gphi, + int start_incomp, int start_outcomp, int ncomp, int bccomp, const Geometry & geom, + int increment); + +void SumStag(const std::array& m1, + amrex::Vector& sum); + +void FCMoments(const std::array& m1, + const amrex::Vector& comps, + std::array& mscr, + const int& power, + amrex::Vector& prod_val); + +void SumCC(const amrex::MultiFab& m1, + const int& comp, + amrex::Real& sum, + const bool& divide_by_ncells); + +void CCMoments(const amrex::MultiFab& m1, + const int& comp1, + amrex::MultiFab& mscr, + const int& power, + amrex::Real& prod_val); + +#endif + diff --git a/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp new file mode 100644 index 00000000..b36e18b8 --- /dev/null +++ b/exec/compressible_stag/SPECTRAL_FILTER/spectral_functions.cpp @@ -0,0 +1,1077 @@ +#include "spectral_functions.H" +#include "AMReX_PlotFileUtil.H" +#include "AMReX_PlotFileDataImpl.H" + +#include + +#include "chrono" +#include +#include "AMReX_PlotFileUtil.H" +#include "AMReX_BoxArray.H" + +using namespace std::chrono; + +namespace { + void GotoNextLine (std::istream& is) + { + constexpr std::streamsize bl_ignore_max { 100000 }; + is.ignore(bl_ignore_max, '\n'); + } +} + +void SpectralReadCheckPoint(amrex::Geometry& geom, + const amrex::Box& domain, + amrex::MultiFab& prim, + std::array& vel, + BoxArray& ba, DistributionMapping& dmap, + const amrex::Vector n_cells, + const int nprimvars, + const amrex::Vector max_grid_size, + const amrex::IntVect ngc, + const int restart) +{ + // timer for profiling + BL_PROFILE_VAR("SpectralReadCheckPoint()",SpectralReadCheckPoint); + + // checkpoint file name, e.g., chk0000010 + const std::string& checkpointname = amrex::Concatenate("chk",restart,9); + + amrex::Print() << "Restart from checkpoint " << checkpointname << "\n"; + + VisMF::IO_Buffer io_buffer(VisMF::GetIOBufferSize()); + + std::string line, word; + + // read in old boxarray, and create old distribution map (this is to read in MFabs) + BoxArray ba_old; + DistributionMapping dmap_old; + + // initialize new boxarray + ba.define(domain); + ba.maxSize(IntVect(max_grid_size)); + dmap.define(ba, ParallelDescriptor::NProcs()); + + amrex::Vector nodal_flag_dir; + amrex::IntVect nodal_flag_x; + amrex::IntVect nodal_flag_y; + amrex::IntVect nodal_flag_z; + nodal_flag_dir.resize(3); + + for (int i=0; i<3; ++i) { + nodal_flag_x[i] = int(i==0); + nodal_flag_y[i] = int(i==1); + nodal_flag_z[i] = int(i==2); + AMREX_D_TERM(nodal_flag_dir[0][i] = nodal_flag_x[i];, + nodal_flag_dir[1][i] = nodal_flag_y[i];, + nodal_flag_dir[2][i] = nodal_flag_z[i];); + } + + // Header + { + std::string File(checkpointname + "/Header"); + Vector fileCharPtr; + ParallelDescriptor::ReadAndBcastFile(File, fileCharPtr); + std::string fileCharPtrString(fileCharPtr.dataPtr()); + std::istringstream is(fileCharPtrString, std::istringstream::in); + + // read in title line + std::getline(is, line); + + // read in time step number + int step; + is >> step; + GotoNextLine(is); + + // read in time + Real time; + is >> time; + GotoNextLine(is); + + // read in statsCount + int statsCount; + is >> statsCount; + GotoNextLine(is); + + // read in BoxArray (fluid) from Header + ba_old.readFrom(is); + GotoNextLine(is); + + // create old distribution mapping + dmap_old.define(ba_old, ParallelDescriptor::NProcs()); + + prim.define(ba,dmap,nprimvars,ngc); + // velocity and momentum (instantaneous, means, variances) + for (int d=0; d<3; d++) { + vel[d].define(convert(ba,nodal_flag_dir[d]), dmap, 1, ngc); + } + } + + // C++ random number engine + // each MPI process reads in its own file + int comm_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank); + + int n_ranks; + MPI_Comm_size(MPI_COMM_WORLD, &n_ranks); + + // read in the MultiFab data + Read_Copy_MF_Checkpoint(prim,"prim",checkpointname,ba_old,dmap_old,nprimvars,1,ngc); + + Read_Copy_MF_Checkpoint(vel[0],"velx",checkpointname,ba_old,dmap_old,1,1,ngc,0); + Read_Copy_MF_Checkpoint(vel[1],"vely",checkpointname,ba_old,dmap_old,1,1,ngc,1); + Read_Copy_MF_Checkpoint(vel[2],"velz",checkpointname,ba_old,dmap_old,1,1,ngc,2); + + // FillBoundaries + prim.FillBoundary(geom.periodicity()); + vel[0].FillBoundary(geom.periodicity()); + vel[1].FillBoundary(geom.periodicity()); + vel[2].FillBoundary(geom.periodicity()); +} + +void SpectralVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp_filter, + const amrex::Real kmin, + const amrex::Real kmax, + const amrex::Geometry& geom, + const amrex::Vector n_cells) +{ + BL_PROFILE_VAR("SpectralVelDecomp()",SpectralVelDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, + "SpectralVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, + "SpectralVelDecomp: Must have one Box per MPI process when using heFFTe"); + + const GpuArray dx = geom.CellSizeArray(); + + long npts; + Box domain = geom.Domain(); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of vel + DistributionMapping dm = vel.DistributionMap(); + BoxArray ba = vel.boxArray(); + + // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid + Box local_box; + int local_boxid; + { + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + // each MPI rank has its own local_box Box and local_boxid ID + if (ParallelDescriptor::MyProc() == dm[i]) { + local_box = b; + local_boxid = i; + } + } + } + + // now each MPI rank works on its own box + // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset + + // start by coarsening each box by 2 in the x-direction + Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); + + // if the coarsened box's high-x index is even, we shrink the size in 1 in x + // this avoids overlap between coarsened boxes + if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { + c_local_box.setBig(0,c_local_box.bigEnd(0)-1); + } + // for any boxes that touch the hi-x domain we + // increase the size of boxes by 1 in x + // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) + if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_local_box.growHi(0,1); + } + + // each MPI rank gets storage for its piece of the fft + BaseFab > spectral_field_Tx(c_local_box, 1, The_Device_Arena()); // totalx + BaseFab > spectral_field_Ty(c_local_box, 1, The_Device_Arena()); // totaly + BaseFab > spectral_field_Tz(c_local_box, 1, The_Device_Arena()); // totalz + BaseFab > spectral_field_Sx(c_local_box, 1, The_Device_Arena()); // solenoidalx + BaseFab > spectral_field_Sy(c_local_box, 1, The_Device_Arena()); // solenoidaly + BaseFab > spectral_field_Sz(c_local_box, 1, The_Device_Arena()); // solenoidalz + BaseFab > spectral_field_Dx(c_local_box, 1, The_Device_Arena()); // dilatationalx + BaseFab > spectral_field_Dy(c_local_box, 1, The_Device_Arena()); // dilatationaly + BaseFab > spectral_field_Dz(c_local_box, 1, The_Device_Arena()); // dilatationalz + MultiFab vel_single(ba, dm, 1, 0); + + int r2c_direction = 0; + + // ForwardTransform + // X + using heffte_complex = typename heffte::fft_output::type; + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 0, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + } + // Y + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 1, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + } + // Z + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 2, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + } + + Gpu::streamSynchronize(); + + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + + // Decompose velocity field into solenoidal and dilatational + Array4< GpuComplex > spectral_tx = spectral_field_Tx.array(); + Array4< GpuComplex > spectral_ty = spectral_field_Ty.array(); + Array4< GpuComplex > spectral_tz = spectral_field_Tz.array(); + Array4< GpuComplex > spectral_sx = spectral_field_Sx.array(); + Array4< GpuComplex > spectral_sy = spectral_field_Sy.array(); + Array4< GpuComplex > spectral_sz = spectral_field_Sz.array(); + Array4< GpuComplex > spectral_dx = spectral_field_Dx.array(); + Array4< GpuComplex > spectral_dy = spectral_field_Dy.array(); + Array4< GpuComplex > spectral_dz = spectral_field_Dz.array(); + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + + Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; + + if (i <= nx/2) { + + // Get the wavevector + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + // Gradient Operators + GxR = (cos(2.0*M_PI*ki/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*ki/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*kj/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*kj/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*kk/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*kk/nz)-0.0)/dx[2]; + } + else { // conjugate + amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); + } + + // Get the wavenumber + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + Real knum = (ki*ki + kj*kj + kk*kk); + knum = std::sqrt(knum); + + // Scale Total velocity FFT components with Filtering + if ((knum >= kmin) and (knum <= kmax)) { + + spectral_tx(i,j,k) *= (1.0/sqrtnpts); + spectral_ty(i,j,k) *= (1.0/sqrtnpts); + spectral_tz(i,j,k) *= (1.0/sqrtnpts); + + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; + } + else { + + // Dilatational velocity + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; + } + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); + } + else { + spectral_tx(i,j,k) = 0.0; + spectral_ty(i,j,k) = 0.0; + spectral_tz(i,j,k) = 0.0; + spectral_sx(i,j,k) = 0.0; + spectral_sy(i,j,k) = 0.0; + spectral_sz(i,j,k) = 0.0; + spectral_dx(i,j,k) = 0.0; + spectral_dy(i,j,k) = 0.0; + spectral_dz(i,j,k) = 0.0; + } + + }); + + Gpu::streamSynchronize(); + + MultiFab vel_decomp_filter_single(ba, dm, 1, 0); + // inverse Fourier transform filtered total velocity + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 0, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 1, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 2, 1); + } + // inverse Fourier transform filtered solenoidal and dilatational components + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sx.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 3, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sy.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 4, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sz.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 5, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dx.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 6, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dy.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 7, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dz.dataPtr(); + fft.backward(spectral_data, vel_decomp_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp_filter.ParallelCopy(vel_decomp_filter_single, 0, 8, 1); + } + + + vel_decomp_filter.mult(1.0/sqrtnpts); + +} + + +void SpectralScalarDecomp(const MultiFab& scalar, + MultiFab& scalar_filter, + const amrex::Real kmin, + const amrex::Real kmax, + const amrex::Geometry& geom, + const amrex::Vector n_cells) +{ + BL_PROFILE_VAR("SpectralScalarDecomp()",SpectralScalarDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(scalar.nComp() == 1, + "SpectralScalarDecomp: must have 1 components of input scalar MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(scalar.local_size() == 1, + "SpectralScalarDecomp: Must have one Box per MPI process when using heFFTe"); + + const GpuArray dx = geom.CellSizeArray(); + + long npts; + Box domain = geom.Domain(); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of vel + DistributionMapping dm = scalar.DistributionMap(); + BoxArray ba = scalar.boxArray(); + + // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid + Box local_box; + int local_boxid; + { + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + // each MPI rank has its own local_box Box and local_boxid ID + if (ParallelDescriptor::MyProc() == dm[i]) { + local_box = b; + local_boxid = i; + } + } + } + + // now each MPI rank works on its own box + // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset + + // start by coarsening each box by 2 in the x-direction + Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); + + // if the coarsened box's high-x index is even, we shrink the size in 1 in x + // this avoids overlap between coarsened boxes + if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { + c_local_box.setBig(0,c_local_box.bigEnd(0)-1); + } + // for any boxes that touch the hi-x domain we + // increase the size of boxes by 1 in x + // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) + if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_local_box.growHi(0,1); + } + + // each MPI rank gets storage for its piece of the fft + BaseFab > spectral_field(c_local_box, 1, The_Device_Arena()); + MultiFab scalar_single(ba, dm, 1, 0); + + int r2c_direction = 0; + + // ForwardTransform + using heffte_complex = typename heffte::fft_output::type; + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + scalar_single.ParallelCopy(scalar, 0, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); + fft.forward(scalar_single[local_boxid].dataPtr(),spectral_data); + } + + Gpu::streamSynchronize(); + + // filtering + Array4< GpuComplex > spectral = spectral_field.array(); + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + + if (i <= nx/2) { + } + else { // conjugate + amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); + } + + // Get the wavenumber + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + Real knum = (ki*ki + kj*kj + kk*kk); + knum = std::sqrt(knum); + + // Scale Scalar FFT components with Filtering + if ((knum >= kmin) and (knum <= kmax)) { + spectral(i,j,k) *= (1.0/sqrtnpts); + spectral(i,j,k) *= (1.0/sqrtnpts); + spectral(i,j,k) *= (1.0/sqrtnpts); + } + else { + spectral(i,j,k) *= 0.0; + spectral(i,j,k) *= 0.0; + spectral(i,j,k) *= 0.0; + } + }); + + Gpu::streamSynchronize(); + + MultiFab scalar_filter_single(ba, dm, 1, 0); + // inverse Fourier transform filtered scalar + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); + fft.backward(spectral_data, scalar_filter_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + scalar_filter.ParallelCopy(scalar_filter_single, 0, 0, 1); + } + + scalar_filter.mult(1.0/sqrtnpts); + +} + +void SpectralWritePlotFile(const int step, + const amrex::Real& kmin, + const amrex::Real& kmax, + const amrex::Geometry& geom, + const amrex::MultiFab& vel_decomp_in, + const amrex::MultiFab& scalar_in, + const amrex::MultiFab& vel_total, + const amrex::MultiFab& scalar_total) +{ + + MultiFab output; + + // Cell-Centered Velocity Gradient Stats (1,2,3 are directions) + // 0: ux + // 1: uy + // 2: uz + // 3: ux_s + // 4: uy_s + // 5: uz_s + // 6: ux_d + // 7: uy_d + // 8: uz_d + // 9: umag + // 10: umag_s + // 11: umag_d + // 12: scalar + // 13: divergence = u_1,1 + u_2,2 + u_3,3 + // 14: vorticity w1 + // 15: vorticity w2 + // 16: vorticity w3 + // 17: vorticity mag: sqrt(w1**2 + w2**2 + w3**2) + // 18: ux_org + // 19: scalar_org + output.define(vel_decomp_in.boxArray(), vel_decomp_in.DistributionMap(), 20, 0); + output.setVal(0.0); + + const GpuArray dx = geom.CellSizeArray(); + + for ( MFIter mfi(output,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + const Array4< Real>& out = output.array(mfi); + + const Array4& v_decomp = vel_decomp_in.array(mfi); + + const Array4& sca = scalar_in.array(mfi); + + const Array4& v_tot = vel_total.array(mfi); + + const Array4& sca_tot = scalar_total.array(mfi); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + + out(i,j,k,0) = v_decomp(i,j,k,0); + out(i,j,k,1) = v_decomp(i,j,k,1); + out(i,j,k,2) = v_decomp(i,j,k,2); + out(i,j,k,3) = v_decomp(i,j,k,3); + out(i,j,k,4) = v_decomp(i,j,k,4); + out(i,j,k,5) = v_decomp(i,j,k,5); + out(i,j,k,6) = v_decomp(i,j,k,6); + out(i,j,k,7) = v_decomp(i,j,k,7); + out(i,j,k,8) = v_decomp(i,j,k,8); + + out(i,j,k,9) = std::sqrt(out(i,j,k,0)*out(i,j,k,0) + out(i,j,k,1)*out(i,j,k,1) + out(i,j,k,2)*out(i,j,k,2)); // mag + out(i,j,k,10) = std::sqrt(out(i,j,k,3)*out(i,j,k,3) + out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5)); // mag solednoidal + out(i,j,k,11) = std::sqrt(out(i,j,k,6)*out(i,j,k,6) + out(i,j,k,7)*out(i,j,k,7) + out(i,j,k,8)*out(i,j,k,8)); // mag solednoidal + + out(i,j,k,12) = sca(i,j,k,0); + + // divergence + out(i,j,k,13) = 0.5*( (v_decomp(i+1,j,k,0) - v_decomp(i-1,j,k,0))/dx[0] + + (v_decomp(i,j+1,k,1) - v_decomp(i,j-1,k,1))/dx[1] + + (v_decomp(i,j,k+1,2) - v_decomp(i,j,k-1,2))/dx[2] ); + + // curl w1 = u_2,1 - u_1,2 + out(i,j,k,14) = 0.5*( (v_decomp(i+1,j,k,1) - v_decomp(i-1,j,k,1))/dx[0] - + (v_decomp(i,j+1,k,0) - v_decomp(i,j-1,k,0))/dx[1] ); + + // curl w2 = u_1,3 - u_3,1 + out(i,j,k,15) = 0.5*( (v_decomp(i,j,k+1,0) - v_decomp(i,j,k-1,0))/dx[2] - + (v_decomp(i+1,j,k,2) - v_decomp(i-1,j,k,2))/dx[0] ); + + // curl w2 = u_3,2 - u_2,3 + out(i,j,k,16) = 0.5*( (v_decomp(i,j+1,k,2) - v_decomp(i,j-1,k,2))/dx[1] - + (v_decomp(i,j,k+1,1) - v_decomp(i,j,k-1,1))/dx[2] ); + + // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3) + out(i,j,k,17) = std::sqrt( out(i,j,k,14)*out(i,j,k,14) + out(i,j,k,15)*out(i,j,k,15) + out(i,j,k,16)*out(i,j,k,16) ); + + // original velx + out(i,j,k,18) = v_tot(i,j,k,0); + + // original scalar + out(i,j,k,19) = sca_tot(i,j,k,0); + }); + } + + // Write on a plotfile + std::string plotfilename = amrex::Concatenate("filtered_",step,9); + std::ostringstream os; + os << std::setprecision(3) << kmin; + plotfilename += os.str();; + plotfilename += "_"; + std::ostringstream oss; + oss << std::setprecision(3) << kmax; + plotfilename += oss.str(); + + amrex::Vector varNames(20); + varNames[0] = "ux"; + varNames[1] = "uy"; + varNames[2] = "uz"; + varNames[3] = "ux_s"; + varNames[4] = "uy_s"; + varNames[5] = "uz_s"; + varNames[6] = "ux_d"; + varNames[7] = "uy_d"; + varNames[8] = "uz_d"; + varNames[9] = "umag"; + varNames[10] = "umag_s"; + varNames[11] = "umag_d"; + varNames[12] = "rho"; + varNames[13] = "div"; + varNames[14] = "w1"; + varNames[15] = "w2"; + varNames[16] = "w3"; + varNames[17] = "vort"; + varNames[18] = "ux_org"; + varNames[19] = "rho_org"; + WriteSingleLevelPlotfile(plotfilename,output,varNames,geom,0.0,step); +} + +void Read_Copy_MF_Checkpoint(amrex::MultiFab& mf, std::string mf_name, const std::string& checkpointname, + BoxArray& ba_old, DistributionMapping& dmap_old, + int NVARS, int ghost, const amrex::IntVect ngc, + int nodal_flag) +{ + // Read into temporary MF from file + MultiFab mf_temp; + VisMF::Read(mf_temp,amrex::MultiFabFileFullPrefix(0, checkpointname, "Level_", mf_name)); + + // Copy temporary MF into the new MF + if (ghost) { + mf.ParallelCopy(mf_temp, 0, 0, NVARS, ngc, ngc); + } + else { + mf.ParallelCopy(mf_temp, 0, 0, NVARS, 0, 0); + } +} + +void ShiftFaceToCC(const MultiFab& face_in, int face_comp, + MultiFab& cc_in, int cc_comp, int ncomp) +{ + + BL_PROFILE_VAR("ShiftFaceToCC()",ShiftFaceToCC); + + if (!face_in.is_nodal(0) && !face_in.is_nodal(1) && !face_in.is_nodal(2)) { + Abort("ShiftFaceToCC requires a face-centered MultiFab"); + } + + // Loop over boxes (note that mfi takes a cell-centered multifab as an argument) + for (MFIter mfi(cc_in,TilingIfNotGPU()); mfi.isValid(); ++mfi) { + + const Box& bx = mfi.tilebox(); + + Array4 const& face = face_in.array(mfi); + + Array4 const& cc = cc_in.array(mfi); + + amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + cc(i,j,k,cc_comp+n) = face(i,j,k,face_comp+n); + }); + } +} + +void ComputeGrad(const MultiFab & phi_in, std::array & gphi, + int start_incomp, int start_outcomp, int ncomp, int bccomp, const Geometry & geom, + int increment) +{ + BL_PROFILE_VAR("ComputeGrad()",ComputeGrad); + + // Physical Domain + Box dom(geom.Domain()); + + const GpuArray dx = geom.CellSizeArray(); + + // if not incrementing, initialize data to zero + if (increment == 0) { + for (int dir=0; dir<3; ++dir) { + gphi[dir].setVal(0.,start_outcomp,ncomp,0); + } + } + + // Loop over boxes (note that mfi takes a cell-centered multifab as an argument) + for ( MFIter mfi(phi_in,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Array4 & phi = phi_in.array(mfi); + + AMREX_D_TERM(const Array4 & gphix = gphi[0].array(mfi);, + const Array4 & gphiy = gphi[1].array(mfi);, + const Array4 & gphiz = gphi[2].array(mfi);); + + AMREX_D_TERM(const Box & bx_x = mfi.nodaltilebox(0);, + const Box & bx_y = mfi.nodaltilebox(1);, + const Box & bx_z = mfi.nodaltilebox(2);); + + amrex::ParallelFor(bx_x, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + gphix(i,j,k,start_outcomp+n) += (phi(i,j,k,start_incomp+n)-phi(i-1,j,k,start_incomp+n)) / dx[0]; + }, + bx_y, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + gphiy(i,j,k,start_outcomp+n) += (phi(i,j,k,start_incomp+n)-phi(i,j-1,k,start_incomp+n)) / dx[1]; + } + , bx_z, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + gphiz(i,j,k,start_outcomp+n) += (phi(i,j,k,start_incomp+n)-phi(i,j,k-1,start_incomp+n)) / dx[2]; + } + ); + + } // end MFIter +} + +void FCMoments(const std::array& m1, + const amrex::Vector& comps, + std::array& mscr, + const int& power, + amrex::Vector& prod_val) +{ + + BL_PROFILE_VAR("FCMoments()",FCMoments); + + for (int d=0; d<3; ++d) { + MultiFab::Copy(mscr[d],m1[d],comps[d],0,1,0); + for(int i=1; i& m1, + amrex::Vector& sum) +{ + BL_PROFILE_VAR("SumStag()",SumStag); + + // Initialize to zero + std::fill(sum.begin(), sum.end(), 0.); + + ReduceOps reduce_op; + + //////// x-faces + + ReduceData reduce_datax(reduce_op); + using ReduceTuple = typename decltype(reduce_datax)::Type; + + for (MFIter mfi(m1[0],TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + const Box& bx_grid = mfi.validbox(); + + auto const& fab = m1[0].array(mfi); + + int xlo = bx_grid.smallEnd(0); + int xhi = bx_grid.bigEnd(0); + + reduce_op.eval(bx, reduce_datax, + [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple + { + Real weight = (i>xlo && i(reduce_datax.value()); + ParallelDescriptor::ReduceRealSum(sum[0]); + + //////// y-faces + + ReduceData reduce_datay(reduce_op); + + for (MFIter mfi(m1[1],TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + const Box& bx_grid = mfi.validbox(); + + auto const& fab = m1[1].array(mfi); + + int ylo = bx_grid.smallEnd(1); + int yhi = bx_grid.bigEnd(1); + + reduce_op.eval(bx, reduce_datay, + [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple + { + Real weight = (j>ylo && j(reduce_datay.value()); + ParallelDescriptor::ReduceRealSum(sum[1]); + + //////// z-faces + + ReduceData reduce_dataz(reduce_op); + + for (MFIter mfi(m1[2],TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + const Box& bx_grid = mfi.validbox(); + + auto const& fab = m1[2].array(mfi); + + int zlo = bx_grid.smallEnd(2); + int zhi = bx_grid.bigEnd(2); + + reduce_op.eval(bx, reduce_dataz, + [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple + { + Real weight = (k>zlo && k(reduce_dataz.value()); + ParallelDescriptor::ReduceRealSum(sum[2]); +} + +void CCMoments(const amrex::MultiFab& m1, + const int& comp1, + amrex::MultiFab& mscr, + const int& power, + amrex::Real& prod_val) +{ + + BL_PROFILE_VAR("CCMoments()",CCMoments); + + MultiFab::Copy(mscr,m1,comp1,0,1,0); + for(int i=1; i +#include + +#include +#include + +using namespace amrex; +using namespace std; + +static +void +PrintUsage (const char* progName) +{ + Print() << std::endl + << "This utility computes PDF of scalars, and various powers of Laplacian of velocity field," << std::endl; + + Print() << "Usage:" << '\n'; + Print() << progName << " " << std::endl + << "OR" << std::endl + << progName << std::endl + << " step=" << std::endl + << " nbins= " << std::endl + << " range= " << std::endl + << std::endl; + + exit(1); +} + + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + + { + + if (argc == 1) { + PrintUsage(argv[0]); + } + + ParmParse pp; + + int step; + pp.query("step",step); + + std::string iFile = amrex::Concatenate("plt",step,9); + + Vector scalar_out(3); + scalar_out[0] = amrex::Concatenate("rho_pdf",step,9); + scalar_out[1] = amrex::Concatenate("press_pdf",step,9); + scalar_out[2] = amrex::Concatenate("temp_pdf",step,9); + Vector Lap_out(5); + Lap_out[0] = amrex::Concatenate("L0_pdf",step,9); + Lap_out[1] = amrex::Concatenate("L1_pdf",step,9); + Lap_out[2] = amrex::Concatenate("L2_pdf",step,9); + Lap_out[3] = amrex::Concatenate("L3_pdf",step,9); + Lap_out[4] = amrex::Concatenate("L4_pdf",step,9); + + int nbins; + pp.get("nbins", nbins); + + Real range; + pp.get("range",range); + + amrex::Print() << "Reading from plotfile " << iFile << "\n"; + + // for the Header + std::string iFile2 = iFile; + iFile2 += "/Header"; + + // open header + ifstream x; + x.open(iFile2.c_str(), ios::in); + + // read in first line of header (typically "HyperCLaw-V1.1" or similar) + std::string str; + x >> str; + + // read in number of components from header + int ncomp; + x >> ncomp; + + // read in variable names from header + int flag = 0; + int rho_ind, press_ind, temp_ind, velx_ind; + for (int n=0; n> str; + if (str == "rhoInstant") rho_ind = flag; + if (str == "pInstant") press_ind = flag; + if (str == "tInstant") temp_ind = flag; + if (str == "uxInstantFACE") velx_ind = flag; + flag ++; + } + + // read in dimensionality from header + int dim; + x >> dim; + + // read in time + Real time; + x >> time; + + // read in finest level + int finest_level; + x >> finest_level; + + // read in prob_lo and prob_hi + amrex::GpuArray prob_lo, prob_hi; + for (int i=0; i<3; ++i) { + x >> prob_lo[i]; + } + for (int i=0; i<3; ++i) { + x >> prob_hi[i]; + } + + // now read in the plotfile data + // check to see whether the user pointed to the plotfile base directory + // or the data itself + if (amrex::FileExists(iFile+"/Level_0/Cell_H")) { + iFile += "/Level_0/Cell"; + } + if (amrex::FileExists(iFile+"/Level_00/Cell_H")) { + iFile += "/Level_00/Cell"; + } + + // storage for the input coarse and fine MultiFabs + MultiFab mf; + + // read in plotfile mf to MultiFab + VisMF::Read(mf, iFile); + + // get BoxArray and DistributionMapping + BoxArray ba = mf.boxArray(); + DistributionMapping dmap = mf.DistributionMap(); + + // physical dimensions of problem + RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])}, + {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])}); + + // single box with the enire domain + Box domain = ba.minimalBox().enclosedCells(); + + Real ncells = (double) domain.numPts(); + + // set to 1 (periodic) + Vector is_periodic(3,1); + + Geometry geom(domain,&real_box,CoordSys::cartesian,is_periodic.data()); + + const Real* dx = geom.CellSize(); + + //////////////////////////////////////////////////////////////////////// + ////////////// velocity Laplacian PDFs ///////////////////////////////// + //////////////////////////////////////////////////////////////////////// + MultiFab vel_grown(ba,dmap,3,1); + MultiFab laplacian(ba,dmap,3,1); + + // copy shifted velocity components from mf into vel_grown + Copy(vel_grown,mf,velx_ind,0,3,0); + Copy(laplacian,mf,velx_ind,0,3,0); + + // fill ghost cells of vel_grown + vel_grown.FillBoundary(geom.periodicity()); + laplacian.FillBoundary(geom.periodicity()); + + for (int m=0; m<5; ++m) { + + Vector L2(3,0.); + for (int i=0; i<3; i++) + L2[i]=0.; + + for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + L2[n] += lap(i,j,k,n)*lap(i,j,k,n); + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(L2.dataPtr(),3); + amrex::Long totpts = domain.numPts(); + L2[0] = sqrt(L2[0]/totpts); + L2[1] = sqrt(L2[1]/totpts); + L2[2] = sqrt(L2[2]/totpts); + Print() << "L2 norm of Laplacian to power " << m << " is " << L2[0] + << " " << L2[1] << " " << L2[2] << " " << std::endl; + + for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + lap(i,j,k,n) = lap(i,j,k,n)/L2[n]; + + } + } + } + } + + } // end MFIter + + Vector bins(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count=0; + amrex::Long totbin=0; + for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; + + for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((lap(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[index] += 1; + totbin++; + } + + count++; + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count); + ParallelDescriptor::ReduceLongSum(totbin); + Print() << "Points outside of range "<< count - totbin << " " << + (double)(count-totbin)/count << std::endl; + + // print out contents of bins to the screen + for (int i=0; i& vel = vel_grown.array(mfi); + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + lap(i,j,k,n) = -(vel(i+1,j,k,n) - 2.*vel(i,j,k,n) + vel(i-1,j,k,n)) / (dx[0]*dx[0]) + -(vel(i,j+1,k,n) - 2.*vel(i,j,k,n) + vel(i,j-1,k,n)) / (dx[1]*dx[1]) + -(vel(i,j,k+1,n) - 2.*vel(i,j,k,n) + vel(i,j,k+1,n)) / (dx[2]*dx[2]); + } + } + } + } + + } // end MFIter + + // copy lap into vel_grown + Copy(vel_grown,laplacian,0,0,3,0); + + // fill ghost cells of vel_grown + vel_grown.FillBoundary(geom.periodicity()); + + } // end loop + //////////////////////////////////////////////////////////////////////// + ////////////// velocity Laplacian PDFs ///////////////////////////////// + //////////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////////// + ///////////////////////// scalar PDFs ///////////////////////////////// + //////////////////////////////////////////////////////////////////////// + MultiFab scalar(ba,dmap,3,0); + Copy(scalar,mf,rho_ind,0,1,0); + Copy(scalar,mf,press_ind,1,1,0); + Copy(scalar,mf,temp_ind,2,1,0); + + // compute spatial mean + Real mean_rho = scalar.sum(0) / (ncells); + Real mean_press = scalar.sum(1) / (ncells); + Real mean_temp = scalar.sum(2) / (ncells); + + // get fluctuations + scalar.plus(-1.0*mean_rho, 0, 1); + scalar.plus(-1.0*mean_press, 1, 1); + scalar.plus(-1.0*mean_temp, 2, 1); + + // get rms + Real rms_rho = scalar.norm2(0) / sqrt(ncells); + Real rms_press = scalar.norm2(1) / sqrt(ncells); + Real rms_temp = scalar.norm2(2) / sqrt(ncells); + + // scale by rms + scalar.mult(1.0/rms_rho, 0, 1); + scalar.mult(1.0/rms_press, 1, 1); + scalar.mult(1.0/rms_temp, 2, 1); + + // now compute pdfs + for (int m = 0; m < 3; ++m) { + + Vector bins(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count=0; + amrex::Long totbin=0; + for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; + + for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& sca = scalar.array(mfi); + + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((sca(i,j,k,m) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[index] += 1; + totbin++; + } + + count++; + + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count); + ParallelDescriptor::ReduceLongSum(totbin); + Print() << "Points outside of range "<< count - totbin << " " << + (double)(count-totbin)/count << std::endl; + + // print out contents of bins to the screen + for (int i=0; i +#include + +#include +#include +#include + +using namespace amrex; +using namespace std; + +static +void +PrintUsage (const char* progName) +{ + Print() << std::endl + << "This utility computes PDF of vorticity and divergence, and various powers of Laplacian of solenoidal and dilatational velocity field," << std::endl; + + Print() << "Usage:" << '\n'; + Print() << progName << " " << std::endl + << "OR" << std::endl + << progName << std::endl + << " step=" << std::endl + << " nbins= " << std::endl + << " range= " << std::endl + << std::endl; + + exit(1); +} + + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + + { + + if (argc == 1) { + PrintUsage(argv[0]); + } + + ParmParse pp; + + int step; + pp.query("step",step); + + std::string iFile = amrex::Concatenate("vel_grad_decomp",step,9); + + Vector scalar_out(5); + scalar_out[0] = amrex::Concatenate("div_pdf",step,9); + scalar_out[1] = amrex::Concatenate("vortx_pdf",step,9); + scalar_out[2] = amrex::Concatenate("vorty_pdf",step,9); + scalar_out[3] = amrex::Concatenate("vortz_pdf",step,9); + scalar_out[4] = amrex::Concatenate("vort_pdf",step,9); + Vector Lap_out_sol(5); + Lap_out_sol[0] = amrex::Concatenate("L0_pdf_sol",step,9); + Lap_out_sol[1] = amrex::Concatenate("L1_pdf_sol",step,9); + Lap_out_sol[2] = amrex::Concatenate("L2_pdf_sol",step,9); + Lap_out_sol[3] = amrex::Concatenate("L3_pdf_sol",step,9); + Lap_out_sol[4] = amrex::Concatenate("L4_pdf_sol",step,9); + Vector Lap_out_dil(5); + Lap_out_dil[0] = amrex::Concatenate("L0_pdf_dil",step,9); + Lap_out_dil[1] = amrex::Concatenate("L1_pdf_dil",step,9); + Lap_out_dil[2] = amrex::Concatenate("L2_pdf_dil",step,9); + Lap_out_dil[3] = amrex::Concatenate("L3_pdf_dil",step,9); + Lap_out_dil[4] = amrex::Concatenate("L4_pdf_dil",step,9); + + int nbins; + pp.get("nbins", nbins); + + Real range; + pp.get("range",range); + + amrex::Print() << "Reading from vel_grad_decomp plotfile " << iFile << "\n"; + + // for the Header + std::string iFile2 = iFile; + iFile2 += "/Header"; + + // open header + ifstream x; + x.open(iFile2.c_str(), ios::in); + + // read in first line of header (typically "HyperCLaw-V1.1" or similar) + std::string str; + x >> str; + + // read in number of components from header + int ncomp; + x >> ncomp; + + // read in variable names from header + int flag = 0; + int vort_ind, div_ind, velx_sol_ind, vely_sol_ind, velz_sol_ind, velx_dil_ind, vely_dil_ind, velz_dil_ind; + for (int n=0; n> str; + if (str == "vort") vort_ind = flag; + if (str == "div") div_ind = flag; + if (str == "ux_s") velx_sol_ind = flag; + if (str == "uy_s") vely_sol_ind = flag; + if (str == "uz_s") velz_sol_ind = flag; + if (str == "ux_d") velx_dil_ind = flag; + if (str == "uy_d") vely_dil_ind = flag; + if (str == "uz_d") velz_dil_ind = flag; + flag ++; + } + + // read in dimensionality from header + int dim; + x >> dim; + + // read in time + Real time; + x >> time; + + // read in finest level + int finest_level; + x >> finest_level; + + // read in prob_lo and prob_hi + amrex::GpuArray prob_lo, prob_hi; + for (int i=0; i<3; ++i) { + x >> prob_lo[i]; + } + for (int i=0; i<3; ++i) { + x >> prob_hi[i]; + } + + // now read in the plotfile data + // check to see whether the user pointed to the plotfile base directory + // or the data itself + if (amrex::FileExists(iFile+"/Level_0/Cell_H")) { + iFile += "/Level_0/Cell"; + } + if (amrex::FileExists(iFile+"/Level_00/Cell_H")) { + iFile += "/Level_00/Cell"; + } + + // storage for the input coarse and fine MultiFabs + MultiFab mf; + + // read in plotfile mf to MultiFab + VisMF::Read(mf, iFile); + + // get BoxArray and DistributionMapping + BoxArray ba = mf.boxArray(); + DistributionMapping dmap = mf.DistributionMap(); + + // physical dimensions of problem + RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])}, + {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])}); + + // single box with the enire domain + Box domain = ba.minimalBox().enclosedCells(); + + Real ncells = (double) domain.numPts(); + + // set to 1 (periodic) + Vector is_periodic(3,1); + + Geometry geom(domain,&real_box,CoordSys::cartesian,is_periodic.data()); + + const Real* dx = geom.CellSize(); + + //////////////////////////////////////////////////////////////////////// + ////////////// velocity Laplacian PDFs///////////// //////////////////// + //////////////////////////////////////////////////////////////////////// + MultiFab vel_grown(ba,dmap,6,1); + MultiFab vel_sol (ba,dmap,3,1); + MultiFab laplacian(ba,dmap,6,1); + + // copy shifted velocity components from mf into vel_grown + Copy(vel_grown,mf,velx_sol_ind,0,1,0); // sol + Copy(vel_grown,mf,vely_sol_ind,1,1,0); // sol + Copy(vel_grown,mf,velz_sol_ind,2,1,0); // sol + + Copy(laplacian,mf,velx_sol_ind,0,1,0); // sol + Copy(laplacian,mf,vely_sol_ind,1,1,0); // sol + Copy(laplacian,mf,velz_sol_ind,2,1,0); // sol + + Copy(vel_grown,mf,velx_dil_ind,3,1,0); // dil + Copy(vel_grown,mf,vely_dil_ind,4,1,0); // dil + Copy(vel_grown,mf,velz_dil_ind,5,1,0); // dil + + Copy(laplacian,mf,velx_dil_ind,3,1,0); // dil + Copy(laplacian,mf,vely_dil_ind,4,1,0); // dil + Copy(laplacian,mf,velz_dil_ind,5,1,0); // dil + + Copy(vel_sol,mf,velx_sol_ind,0,1,0); // sol + Copy(vel_sol,mf,vely_sol_ind,1,1,0); // sol + Copy(vel_sol,mf,velz_sol_ind,2,1,0); // sol + + // fill ghost cells of vel_grown + vel_grown.FillBoundary(geom.periodicity()); + laplacian.FillBoundary(geom.periodicity()); + vel_sol .FillBoundary(geom.periodicity()); + + for (int m=0; m<5; ++m) { + + Vector L2(6,0.); + for (int i=0; i<6; i++) + L2[i]=0.; + + for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<6; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + L2[n] += lap(i,j,k,n)*lap(i,j,k,n); + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(L2.dataPtr(),6); + amrex::Long totpts = domain.numPts(); + L2[0] = sqrt(L2[0]/totpts); + L2[1] = sqrt(L2[1]/totpts); + L2[2] = sqrt(L2[2]/totpts); + L2[3] = sqrt(L2[3]/totpts); + L2[4] = sqrt(L2[4]/totpts); + L2[5] = sqrt(L2[5]/totpts); + Print() << "L2 norm of Laplacian (solenoidal) to power " << m << " is " << L2[0] + << " " << L2[1] << " " << L2[2] << " " << std::endl; + Print() << "L2 norm of Laplacian (dilational) to power " << m << " is " << L2[3] + << " " << L2[4] << " " << L2[5] << " " << std::endl; + + for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<6; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + lap(i,j,k,n) = lap(i,j,k,n)/L2[n]; + + } + } + } + } + + } // end MFIter + + Vector bins_sol(nbins+1,0.); + Vector bins_dil(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count_sol=0; + amrex::Long totbin_sol=0; + amrex::Long count_dil=0; + amrex::Long totbin_dil=0; + for (int ind=0 ; ind < nbins+1; ind++) bins_sol[ind]=0; + for (int ind=0 ; ind < nbins+1; ind++) bins_dil[ind]=0; + + for ( MFIter mfi(laplacian,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((lap(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins_sol[index] += 1; + totbin_sol++; + } + + count_sol++; + + } + } + } + } + + for (auto n=3; n<6; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((lap(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins_dil[index] += 1; + totbin_dil++; + } + + count_dil++; + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins_sol.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count_sol); + ParallelDescriptor::ReduceLongSum(totbin_sol); + ParallelDescriptor::ReduceRealSum(bins_dil.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count_dil); + ParallelDescriptor::ReduceLongSum(totbin_dil); + Print() << "Points outside of range (solenoidal) "<< count_sol - totbin_sol << " " << + (double)(count_sol-totbin_sol)/count_sol << std::endl; + Print() << "Points outside of range (dilational) "<< count_dil - totbin_dil << " " << + (double)(count_dil-totbin_dil)/count_dil << std::endl; + + // print out contents of bins to the screen + for (int i=0; i& vel = vel_grown.array(mfi); + const Array4& lap = laplacian.array(mfi); + + for (auto n=0; n<6; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + lap(i,j,k,n) = -(vel(i+1,j,k,n) - 2.*vel(i,j,k,n) + vel(i-1,j,k,n)) / (dx[0]*dx[0]) + -(vel(i,j+1,k,n) - 2.*vel(i,j,k,n) + vel(i,j-1,k,n)) / (dx[1]*dx[1]) + -(vel(i,j,k+1,n) - 2.*vel(i,j,k,n) + vel(i,j,k+1,n)) / (dx[2]*dx[2]); + } + } + } + } + + } // end MFIter + + // copy lap into vel_grown + Copy(vel_grown,laplacian,0,0,6,0); + + // fill ghost cells of vel_grown + vel_grown.FillBoundary(geom.periodicity()); + + } // end loop + //////////////////////////////////////////////////////////////////////// + ////////////// velocity Laplacian PDFs //////////// //////////////////// + //////////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////////// + ///////////////////////// scalar PDFs ///////////////////////////////// + //////////////////////////////////////////////////////////////////////// + MultiFab scalar(ba,dmap,4,0); // vort_mag, div, vort_x, vort_y, vort_z + scalar.setVal(0.0); + Copy(scalar,mf,div_ind,0,1,0); + + // Compute vorticity components and store in scalar + for ( MFIter mfi(vel_sol,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + Array4 const& sol = vel_sol .array(mfi); + Array4 const& sca = scalar .array(mfi); + + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + // dw/dy - dv/dz + sca(i,j,k,1) = + (sol(i,j+1,k,velz_sol_ind) - sol(i,j-1,k,velz_sol_ind)) / (2.*dx[1]) - + (sol(i,j,k+1,vely_sol_ind) - sol(i,j,k-1,vely_sol_ind)) / (2.*dx[2]); + + // dv/dx - du/dy + sca(i,j,k,2) = + (sol(i+1,j,k,vely_sol_ind) - sol(i-1,j,k,vely_sol_ind)) / (2.*dx[0]) - + (sol(i,j+1,k,velx_sol_ind) - sol(i,j-1,k,velx_sol_ind)) / (2.*dx[1]); + + // du/dz - dw/dx + sca(i,j,k,3) = + (sol(i,j,k+1,velx_sol_ind) - sol(i,j,k-1,velx_sol_ind)) / (2.*dx[2]) - + (sol(i+1,j,k,velz_sol_ind) - sol(i-1,j,k,velz_sol_ind)) / (2.*dx[0]); + + } + } + } + } + + // compute spatial mean + Real mean_div = scalar.sum(0) / (ncells); + Real mean_vortx = scalar.sum(1) / (ncells); + Real mean_vorty = scalar.sum(2) / (ncells); + Real mean_vortz = scalar.sum(3) / (ncells); + + // get fluctuations + scalar.plus(-1.0*mean_div, 0, 1); + scalar.plus(-1.0*mean_vortx, 1, 1); + scalar.plus(-1.0*mean_vorty, 2, 1); + scalar.plus(-1.0*mean_vortz, 3, 1); + + // get rms + Real rms_div = scalar.norm2(0) / sqrt(ncells); + Real rms_vortx = scalar.norm2(1) / sqrt(ncells); + Real rms_vorty = scalar.norm2(2) / sqrt(ncells); + Real rms_vortz = scalar.norm2(3) / sqrt(ncells); + + // scale by rms + scalar.mult(1.0/rms_div, 0, 1); + scalar.mult(1.0/rms_vortx, 1, 1); + scalar.mult(1.0/rms_vorty, 2, 1); + scalar.mult(1.0/rms_vortz, 3, 1); + + // now compute pdfs + for (int m = 0; m < 4; ++m) { + + Vector bins(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count=0; + amrex::Long totbin=0; + for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; + + for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& sca = scalar.array(mfi); + + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((sca(i,j,k,m) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[index] += 1; + totbin++; + } + + count++; + + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count); + ParallelDescriptor::ReduceLongSum(totbin); + Print() << "Points outside of range "<< count - totbin << " " << + (double)(count-totbin)/count << std::endl; + + // print out contents of bins to the screen + for (int i=0; i bins(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count=0; + amrex::Long totbin=0; + for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; + + for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& sca = scalar.array(mfi); + + for (auto n = 1; n < 4; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((sca(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[index] += 1; + totbin++; + } + + count++; + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count); + ParallelDescriptor::ReduceLongSum(totbin); + Print() << "Points outside of range "<< count - totbin << " " << + (double)(count-totbin)/count << std::endl; + + // print out contents of bins to the screen + for (int i=0; i bins(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count=0; + amrex::Long totbin=0; + for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; + + for ( MFIter mfi(vel_decomp,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& vel = vel_decomp.array(mfi); + + for (auto n = 0; n < 3; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((vel(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[index] += 1; + totbin++; + } + + count++; + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count); + ParallelDescriptor::ReduceLongSum(totbin); + Print() << "Points outside of range "<< count - totbin << " " << + (double)(count-totbin)/count << std::endl; + + // print out contents of bins to the screen + for (int i=0; i bins(nbins+1,0.); + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + amrex::Long count=0; + amrex::Long totbin=0; + for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; + + for ( MFIter mfi(vel_decomp,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& vel = vel_decomp.array(mfi); + + for (auto n = 3; n < 6; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((vel(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[index] += 1; + totbin++; + } + + count++; + + } + } + } + } + + } // end MFIter + + ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count); + ParallelDescriptor::ReduceLongSum(totbin); + Print() << "Points outside of range "<< count - totbin << " " << + (double)(count-totbin)/count << std::endl; + + // print out contents of bins to the screen + for (int i=0; i +#include + +#include +#include +#include + +using namespace amrex; +using namespace std; + +static +void +PrintUsage (const char* progName) +{ + Print() << std::endl + << "This utility computes PDF of vorticity and divergence, and various thermodynamic scalars," << std::endl; + + Print() << "Usage:" << '\n'; + Print() << progName << " " << std::endl + << "OR" << std::endl + << progName << std::endl + << " steps=" << std::endl + << " nbins= " << std::endl + << " range= " << std::endl + << std::endl; + + exit(1); +} + + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + + { + + if (argc == 1) { + PrintUsage(argv[0]); + } + + ParmParse pp; + + std::vector steps; + pp.queryarr("steps",steps); + int nsteps = steps.size(); + Print() << "number of steps to process: " << nsteps << std::endl; + + Vector scalar_out(5); + scalar_out[0] = amrex::Concatenate("div_pdf_",steps[0],9); + scalar_out[0] = scalar_out[0] + "_"; + scalar_out[0] = amrex::Concatenate(scalar_out[0],steps[nsteps-1],9); + scalar_out[1] = amrex::Concatenate("vortx_pdf_",steps[0],9); + scalar_out[1] = scalar_out[1] + "_"; + scalar_out[1] = amrex::Concatenate(scalar_out[1],steps[nsteps-1],9); + scalar_out[2] = amrex::Concatenate("vorty_pdf_",steps[0],9); + scalar_out[2] = scalar_out[2] + "_"; + scalar_out[2] = amrex::Concatenate(scalar_out[2],steps[nsteps-1],9); + scalar_out[3] = amrex::Concatenate("vortz_pdf_",steps[0],9); + scalar_out[3] = scalar_out[3] + "_"; + scalar_out[3] = amrex::Concatenate(scalar_out[3],steps[nsteps-1],9); + scalar_out[4] = amrex::Concatenate("vort_pdf_",steps[0],9); + scalar_out[4] = scalar_out[4] + "_"; + scalar_out[4] = amrex::Concatenate(scalar_out[4],steps[nsteps-1],9); + + int nbins; + pp.get("nbins", nbins); + + Real range; + pp.get("range",range); + + Vector > bins; + Vector count(5,0); + Vector totbin(5,0); + for (int i=0; i<5; ++i) { + bins.push_back(Vector (nbins+1,0.)); + } + + int halfbin = nbins/2; + Real hbinwidth = range/nbins; + Real binwidth = 2.*range/nbins; + + for (int step=0; step> str; + + // read in number of components from header + int ncomp; + x >> ncomp; + + // read in variable names from header + int flag = 0; + int vort_ind, div_ind, velx_sol_ind, vely_sol_ind, velz_sol_ind, velx_dil_ind, vely_dil_ind, velz_dil_ind; + for (int n=0; n> str; + if (str == "vort") vort_ind = flag; + if (str == "div") div_ind = flag; + if (str == "ux_s") velx_sol_ind = flag; + if (str == "uy_s") vely_sol_ind = flag; + if (str == "uz_s") velz_sol_ind = flag; + flag ++; + } + + // read in dimensionality from header + int dim; + x >> dim; + + // read in time + Real time; + x >> time; + + // read in finest level + int finest_level; + x >> finest_level; + + // read in prob_lo and prob_hi + amrex::GpuArray prob_lo, prob_hi; + for (int i=0; i<3; ++i) { + x >> prob_lo[i]; + } + for (int i=0; i<3; ++i) { + x >> prob_hi[i]; + } + + // now read in the plotfile data + // check to see whether the user pointed to the plotfile base directory + // or the data itself + if (amrex::FileExists(iFile+"/Level_0/Cell_H")) { + iFile += "/Level_0/Cell"; + } + if (amrex::FileExists(iFile+"/Level_00/Cell_H")) { + iFile += "/Level_00/Cell"; + } + + // storage for the input coarse and fine MultiFabs + MultiFab mf; + + // read in plotfile mf to MultiFab + VisMF::Read(mf, iFile); + + // get BoxArray and DistributionMapping + BoxArray ba = mf.boxArray(); + DistributionMapping dmap = mf.DistributionMap(); + + // physical dimensions of problem + RealBox real_box({AMREX_D_DECL(prob_lo[0],prob_lo[1],prob_lo[2])}, + {AMREX_D_DECL(prob_hi[0],prob_hi[1],prob_hi[2])}); + + // single box with the enire domain + Box domain = ba.minimalBox().enclosedCells(); + + Real ncells = (double) domain.numPts(); + + // set to 1 (periodic) + Vector is_periodic(3,1); + + Geometry geom(domain,&real_box,CoordSys::cartesian,is_periodic.data()); + + const Real* dx = geom.CellSize(); + + //////////////////////////////////////////////////////////////////////// + ////////////// velocity Laplacian PDFs///////////// //////////////////// + //////////////////////////////////////////////////////////////////////// + MultiFab vel_grown(ba,dmap,6,1); + MultiFab vel_sol (ba,dmap,3,1); + + // copy shifted velocity components from mf into vel_grown + Copy(vel_grown,mf,velx_sol_ind,0,1,0); // sol + Copy(vel_grown,mf,vely_sol_ind,1,1,0); // sol + Copy(vel_grown,mf,velz_sol_ind,2,1,0); // sol + + Copy(vel_grown,mf,velx_dil_ind,3,1,0); // dil + Copy(vel_grown,mf,vely_dil_ind,4,1,0); // dil + Copy(vel_grown,mf,velz_dil_ind,5,1,0); // dil + + Copy(vel_sol,mf,velx_sol_ind,0,1,0); // sol + Copy(vel_sol,mf,vely_sol_ind,1,1,0); // sol + Copy(vel_sol,mf,velz_sol_ind,2,1,0); // sol + + // fill ghost cells of vel_grown + vel_grown.FillBoundary(geom.periodicity()); + vel_sol .FillBoundary(geom.periodicity()); + + //////////////////////////////////////////////////////////////////////// + ///////////////////////// scalar PDFs ///////////////////////////////// + //////////////////////////////////////////////////////////////////////// + MultiFab scalar(ba,dmap,4,0); // vort_mag, div, vort_x, vort_y, vort_z + scalar.setVal(0.0); + Copy(scalar,mf,div_ind,0,1,0); + + // Compute vorticity components and store in scalar + for ( MFIter mfi(vel_sol,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + Array4 const& sol = vel_sol .array(mfi); + Array4 const& sca = scalar .array(mfi); + + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + // dw/dy - dv/dz + sca(i,j,k,1) = + (sol(i,j+1,k,velz_sol_ind) - sol(i,j-1,k,velz_sol_ind)) / (2.*dx[1]) - + (sol(i,j,k+1,vely_sol_ind) - sol(i,j,k-1,vely_sol_ind)) / (2.*dx[2]); + + // dv/dx - du/dy + sca(i,j,k,2) = + (sol(i+1,j,k,vely_sol_ind) - sol(i-1,j,k,vely_sol_ind)) / (2.*dx[0]) - + (sol(i,j+1,k,velx_sol_ind) - sol(i,j-1,k,velx_sol_ind)) / (2.*dx[1]); + + // du/dz - dw/dx + sca(i,j,k,3) = + (sol(i,j,k+1,velx_sol_ind) - sol(i,j,k-1,velx_sol_ind)) / (2.*dx[2]) - + (sol(i+1,j,k,velz_sol_ind) - sol(i-1,j,k,velz_sol_ind)) / (2.*dx[0]); + + } + } + } + } + + // compute spatial mean + Real mean_div = scalar.sum(0) / (ncells); + Real mean_vortx = scalar.sum(1) / (ncells); + Real mean_vorty = scalar.sum(2) / (ncells); + Real mean_vortz = scalar.sum(3) / (ncells); + + // get fluctuations + scalar.plus(-1.0*mean_div, 0, 1); + scalar.plus(-1.0*mean_vortx, 1, 1); + scalar.plus(-1.0*mean_vorty, 2, 1); + scalar.plus(-1.0*mean_vortz, 3, 1); + + // get rms + Real rms_div = scalar.norm2(0) / sqrt(ncells); + Real rms_vortx = scalar.norm2(1) / sqrt(ncells); + Real rms_vorty = scalar.norm2(2) / sqrt(ncells); + Real rms_vortz = scalar.norm2(3) / sqrt(ncells); + + // scale by rms + scalar.mult(1.0/rms_div, 0, 1); + scalar.mult(1.0/rms_vortx, 1, 1); + scalar.mult(1.0/rms_vorty, 2, 1); + scalar.mult(1.0/rms_vortz, 3, 1); + + // ompute pdfs + for (int m = 0; m < 4; ++m) { + + for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& sca = scalar.array(mfi); + + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((sca(i,j,k,m) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[m][index] += 1; + totbin[m]++; + } + + count[m]++; + + } + } + } + + } // end MFIter + ParallelDescriptor::ReduceRealSum(bins[m].dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count[m]); + ParallelDescriptor::ReduceLongSum(totbin[m]); + + Print() << "Points outside of range "<< count[m] - totbin[m] << " " << + (double)(count[m]-totbin[m])/count[m] << std::endl; + } + + // ompute pdfs vorticity + for ( MFIter mfi(scalar,false); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.validbox(); + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + + const Array4& sca = scalar.array(mfi); + + for (auto n = 1; n < 4; ++n) { + for (auto k = lo.z; k <= hi.z; ++k) { + for (auto j = lo.y; j <= hi.y; ++j) { + for (auto i = lo.x; i <= hi.x; ++i) { + + int index = floor((sca(i,j,k,n) + hbinwidth)/binwidth); + index += halfbin; + + if( index >=0 && index <= nbins) { + bins[4][index] += 1; + totbin[4]++; + } + + count[4]++; + + } + } + } + } + + } // end MFIter + ParallelDescriptor::ReduceRealSum(bins[4].dataPtr(),nbins+1); + ParallelDescriptor::ReduceLongSum(count[4]); + ParallelDescriptor::ReduceLongSum(totbin[4]); + + Print() << "Points outside of range "<< count[4] - totbin[4] << " " << + (double)(count[4]-totbin[4])/count[4] << std::endl; + + } // end nsteps + + // print out contents of bins to the screen + for (int m=0; m<5; ++m) { + for (int i=0; i bins(nbins+1,0.); +// +// int halfbin = nbins/2; +// Real hbinwidth = range/nbins; +// Real binwidth = 2.*range/nbins; +// amrex::Long count=0; +// amrex::Long totbin=0; +// for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; +// +// for ( MFIter mfi(vel_decomp,false); mfi.isValid(); ++mfi ) { +// +// const Box& bx = mfi.validbox(); +// const auto lo = amrex::lbound(bx); +// const auto hi = amrex::ubound(bx); +// +// const Array4& vel = vel_decomp.array(mfi); +// +// for (auto n = 0; n < 3; ++n) { +// for (auto k = lo.z; k <= hi.z; ++k) { +// for (auto j = lo.y; j <= hi.y; ++j) { +// for (auto i = lo.x; i <= hi.x; ++i) { +// +// int index = floor((vel(i,j,k,n) + hbinwidth)/binwidth); +// index += halfbin; +// +// if( index >=0 && index <= nbins) { +// bins[index] += 1; +// totbin++; +// } +// +// count++; +// +// } +// } +// } +// } +// +// } // end MFIter +// +// ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); +// ParallelDescriptor::ReduceLongSum(count); +// ParallelDescriptor::ReduceLongSum(totbin); +// Print() << "Points outside of range "<< count - totbin << " " << +// (double)(count-totbin)/count << std::endl; +// +// // print out contents of bins to the screen +// for (int i=0; i bins(nbins+1,0.); +// +// int halfbin = nbins/2; +// Real hbinwidth = range/nbins; +// Real binwidth = 2.*range/nbins; +// amrex::Long count=0; +// amrex::Long totbin=0; +// for (int ind=0 ; ind < nbins+1; ind++) bins[ind]=0; +// +// for ( MFIter mfi(vel_decomp,false); mfi.isValid(); ++mfi ) { +// +// const Box& bx = mfi.validbox(); +// const auto lo = amrex::lbound(bx); +// const auto hi = amrex::ubound(bx); +// +// const Array4& vel = vel_decomp.array(mfi); +// +// for (auto n = 3; n < 6; ++n) { +// for (auto k = lo.z; k <= hi.z; ++k) { +// for (auto j = lo.y; j <= hi.y; ++j) { +// for (auto i = lo.x; i <= hi.x; ++i) { +// +// int index = floor((vel(i,j,k,n) + hbinwidth)/binwidth); +// index += halfbin; +// +// if( index >=0 && index <= nbins) { +// bins[index] += 1; +// totbin++; +// } +// +// count++; +// +// } +// } +// } +// } +// +// } // end MFIter +// +// ParallelDescriptor::ReduceRealSum(bins.dataPtr(),nbins+1); +// ParallelDescriptor::ReduceLongSum(count); +// ParallelDescriptor::ReduceLongSum(totbin); +// Print() << "Points outside of range "<< count - totbin << " " << +// (double)(count-totbin)/count << std::endl; +// +// // print out contents of bins to the screen +// for (int i=0; i +//#if defined(HEFFTE_FFTW) || defined(HEFFTE_CUFFT) || defined(HEFFTE_ROCFFT) // use heFFTe +//#include +//#elif defined(USE_DISTRIBUTED_FFT) // use single grid FFT +//#include +//#else // use single grid FFT +//#include +//#endif +#endif diff --git a/src_analysis/TurbSpectra_distributed.H b/src_analysis/TurbSpectra_distributed.H new file mode 100644 index 00000000..41975655 --- /dev/null +++ b/src_analysis/TurbSpectra_distributed.H @@ -0,0 +1,52 @@ +#ifndef _TurbSpectraDistributed_H_ +#define _TurbSpectraDistributed_H_ + +#include +#include +#include +#include + +#include + +#include + +#include "common_functions.H" + +#define ALIGN 16 + +using namespace amrex; + + +void IntegrateKScalar(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp); +//void IntegrateKScalarHeffte(const BaseFab >& spectral_field, +// const std::string& name, const Real& scaling, +// const Box& c_local_box, +// const Real& sqrtnpts, +// const int& step); +void IntegrateKVelocity(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp); +//void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, +// const BaseFab >& spectral_fieldy, +// const BaseFab >& spectral_fieldz, +// const std::string& name, const Real& scaling, +// const Box& c_local_box, +// const int& step); +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& var_scaling, + const amrex::Vector< std::string >& var_names); +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& var_scaling, + const amrex::Vector< std::string >& var_names); + + +#endif diff --git a/src_analysis/TurbSpectra_distributed.cpp b/src_analysis/TurbSpectra_distributed.cpp new file mode 100644 index 00000000..e628c5ab --- /dev/null +++ b/src_analysis/TurbSpectra_distributed.cpp @@ -0,0 +1,452 @@ +#include + +#include "TurbSpectra.H" +#include "common_functions.H" + +#include +#include "AMReX_PlotFileUtil.H" +#include "AMReX_BoxArray.H" + +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), + "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), + "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, + "TurbSpectrumScalar: Must have one Box per MPI process"); + + int ncomp = variables.nComp(); + + Box domain = geom.Domain(); + auto npts = domain.numPts(); + Real sqrtnpts = std::sqrt(npts); + + amrex::FFT::R2C r2c(geom.Domain()); + + auto const& [cba, cdm] = r2c.getSpectralDataLayout(); + + MultiFab cov(cba, cdm, ncomp, 0); + + for (int comp=0; comp const& data = cov.array(mfi); + Array4 > spectral = cmf.const_array(mfi); + const Box& bx = mfi.validbox(); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + Real re = spectral(i,j,k).real(); + Real im = spectral(i,j,k).imag(); + data(i,j,k,comp_gpu) = (re*re + im*im)/(sqrtnpts_gpu*sqrtnpts_gpu*scaling_i_gpu); + }); + } + + // Integrate spectra over k-shells + IntegrateKScalar(cov,name_gpu,step,comp_gpu); + } +} + + +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, + "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, + "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, + "TurbSpectrumVelDecomp: Must have one Box per MPI process"); + + const GpuArray dx = geom.CellSizeArray(); + + Box domain = geom.Domain(); + auto npts = domain.numPts(); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of vel + DistributionMapping dm = vel.DistributionMap(); + BoxArray ba = vel.boxArray(); + + amrex::FFT::R2C r2c(geom.Domain()); + + // box array and dmap for FFT + auto const& [cba, cdm] = r2c.getSpectralDataLayout(); + + // each MPI rank gets storage for its piece of the fft + cMultiFab spectral_field_Tx(cba,cdm,1,0); // totalx + cMultiFab spectral_field_Ty(cba,cdm,1,0); // totaly + cMultiFab spectral_field_Tz(cba,cdm,1,0); // totalz + cMultiFab spectral_field_Sx(cba,cdm,1,0); // solenoidalx + cMultiFab spectral_field_Sy(cba,cdm,1,0); // solenoidaly + cMultiFab spectral_field_Sz(cba,cdm,1,0); // solenoidalz + cMultiFab spectral_field_Dx(cba,cdm,1,0); // dilatationalx + cMultiFab spectral_field_Dy(cba,cdm,1,0); // dilatationaly + cMultiFab spectral_field_Dz(cba,cdm,1,0); // dilatationalz + + // ForwardTransform + // X + { + MultiFab vel_single(vel, amrex::make_alias, 0, 1); + r2c.forward(vel_single,spectral_field_Tx); + } + // Y + { + MultiFab vel_single(vel, amrex::make_alias, 1, 1); + r2c.forward(vel_single,spectral_field_Ty); + } + // Z + { + MultiFab vel_single(vel, amrex::make_alias, 2, 1); + r2c.forward(vel_single,spectral_field_Tz); + } + + // Decompose velocity field into solenoidal and dilatational + for (MFIter mfi(spectral_field_Tx); mfi.isValid(); ++mfi) { + Array4< GpuComplex > spectral_tx = spectral_field_Tx.array(mfi); + Array4< GpuComplex > spectral_ty = spectral_field_Ty.array(mfi); + Array4< GpuComplex > spectral_tz = spectral_field_Tz.array(mfi); + Array4< GpuComplex > spectral_sx = spectral_field_Sx.array(mfi); + Array4< GpuComplex > spectral_sy = spectral_field_Sy.array(mfi); + Array4< GpuComplex > spectral_sz = spectral_field_Sz.array(mfi); + Array4< GpuComplex > spectral_dx = spectral_field_Dx.array(mfi); + Array4< GpuComplex > spectral_dy = spectral_field_Dy.array(mfi); + Array4< GpuComplex > spectral_dz = spectral_field_Dz.array(mfi); + const Box& bx = mfi.validbox(); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + { + + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + + Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; + + if (i <= nx/2) { + + // Get the wavevector + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + // Gradient Operators + GxR = (cos(2.0*M_PI*ki/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*ki/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*kj/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*kj/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*kk/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*kk/nz)-0.0)/dx[2]; + + // Scale Total velocity FFT components + spectral_tx(i,j,k) *= (1.0/sqrtnpts); + spectral_ty(i,j,k) *= (1.0/sqrtnpts); + spectral_tz(i,j,k) *= (1.0/sqrtnpts); + + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; + } + else { + + // Dilatational velocity + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; + } + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); + } + else { // conjugate + amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); + } + + }); + } + + MultiFab cov(cba, cdm, 3, 0); // total, solenoidal, dilatational + + // Fill in the covariance multifab + Real scaling_gpu = scaling; + for (MFIter mfi(cov); mfi.isValid(); ++mfi) { + Array4 const& data = cov.array(mfi); + Array4 > spec_tx = spectral_field_Tx.const_array(mfi); + Array4 > spec_ty = spectral_field_Ty.const_array(mfi); + Array4 > spec_tz = spectral_field_Tz.const_array(mfi); + Array4 > spec_sx = spectral_field_Sx.const_array(mfi); + Array4 > spec_sy = spectral_field_Sy.const_array(mfi); + Array4 > spec_sz = spectral_field_Sz.const_array(mfi); + Array4 > spec_dx = spectral_field_Dx.const_array(mfi); + Array4 > spec_dy = spectral_field_Dy.const_array(mfi); + Array4 > spec_dz = spectral_field_Dz.const_array(mfi); + const Box& bx = mfi.validbox(); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (i <= n_cells[0]/2) { + Real re_x, re_y, re_z, im_x, im_y, im_z; + + re_x = spec_tx(i,j,k).real(); + im_x = spec_tx(i,j,k).imag(); + re_y = spec_ty(i,j,k).real(); + im_y = spec_ty(i,j,k).imag(); + re_z = spec_tz(i,j,k).real(); + im_z = spec_tz(i,j,k).imag(); + data(i,j,k,0) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + re_x = spec_sx(i,j,k).real(); + im_x = spec_sx(i,j,k).imag(); + re_y = spec_sy(i,j,k).real(); + im_y = spec_sy(i,j,k).imag(); + re_z = spec_sz(i,j,k).real(); + im_z = spec_sz(i,j,k).imag(); + data(i,j,k,1) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + re_x = spec_dx(i,j,k).real(); + im_x = spec_dx(i,j,k).imag(); + re_y = spec_dy(i,j,k).real(); + im_y = spec_dy(i,j,k).imag(); + re_z = spec_dz(i,j,k).real(); + im_z = spec_dz(i,j,k).imag(); + data(i,j,k,2) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + } + else { + amrex::Abort("check the code; i should not go beyond n_cells[0]/2"); + } + }); + } + + // Integrate K spectrum for velocities + IntegrateKVelocity(cov,"vel_total" ,step,0); + IntegrateKVelocity(cov,"vel_solenoidal",step,1); + IntegrateKVelocity(cov,"vel_dilational",step,2); + + // inverse Fourier transform solenoidal and dilatational components + { + MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 0, 1); + r2c.backward(spectral_field_Sx,vel_decomp_single); + } + { + MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 1, 1); + r2c.backward(spectral_field_Sy,vel_decomp_single); + } + { + MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 2, 1); + r2c.backward(spectral_field_Sz,vel_decomp_single); + } + { + MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 3, 1); + r2c.backward(spectral_field_Dx,vel_decomp_single); + } + { + MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 4, 1); + r2c.backward(spectral_field_Dy,vel_decomp_single); + } + { + MultiFab vel_decomp_single(vel_decomp, amrex::make_alias, 5, 1); + r2c.backward(spectral_field_Dz,vel_decomp_single); + } + + vel_decomp.mult(1.0/sqrtnpts); + +} + +void IntegrateKScalar(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp) + +{ + int npts = n_cells[0]/2; + + Gpu::DeviceVector phisum_device(npts, 0); + Gpu::DeviceVector phicnt_device(npts, 0); + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + int comp_gpu = comp; + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + const Array4 & cov = cov_mag.const_array(mfi); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (i <= n_cells[0]/2) { + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + else { + amrex::Abort("check the code; i should not go beyond n_cells[0]/2"); + } + }); + } + + Gpu::HostVector phisum_host(npts); + Gpu::HostVector phicnt_host(npts); + Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::copyAsync(Gpu::deviceToHost, phicnt_device.begin(), phicnt_device.end(), phicnt_host.begin()); + Gpu::streamSynchronize(); + + ParallelDescriptor::ReduceRealSum(phisum_host.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_host.dataPtr(),npts); + + Real dk = 1.; + for (int d = 1; d < npts; ++d) { + phisum_host[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_host[d]; + } + + if (ParallelDescriptor::IOProcessor()) { + std::ofstream turb; + std::string turbBaseName = "turb_"+name; + std::string turbName = Concatenate(turbBaseName,step,7); + turbName += ".txt"; + + turb.open(turbName); + for (int d=1; d phisum_device(npts, 0); + Gpu::DeviceVector phicnt_device(npts, 0); + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + int comp_gpu = comp; + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + const Array4 & cov = cov_mag.const_array(mfi); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (i <= n_cells[0]/2) { + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + else { + amrex::Abort("check the code; i should not go beyond n_cells[0]/2"); + } + }); + } + + Gpu::HostVector phisum_host(npts); + Gpu::HostVector phicnt_host(npts); + Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::copyAsync(Gpu::deviceToHost, phicnt_device.begin(), phicnt_device.end(), phicnt_host.begin()); + Gpu::streamSynchronize(); + + ParallelDescriptor::ReduceRealSum(phisum_host.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_host.dataPtr(),npts); + + Real dk = 1.; + for (int d = 1; d < npts; ++d) { + phisum_host[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_host[d]; + } + + if (ParallelDescriptor::IOProcessor()) { + std::ofstream turb; + std::string turbBaseName = "turb_"+name; + std::string turbName = Concatenate(turbBaseName,step,7); + turbName += ".txt"; + + turb.open(turbName); + for (int d=1; d + +#include +#include +#include +#include + +#include + +#include + +#include "common_functions.H" + +#define ALIGN 16 + +using namespace amrex; + +void IntegrateKScalar(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp); +//void IntegrateKScalarHeffte(const BaseFab >& spectral_field, +// const std::string& name, const Real& scaling, +// const Box& c_local_box, +// const Real& sqrtnpts, +// const int& step); +void IntegrateKVelocity(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp); +//void IntegrateKVelocityHeffte(const BaseFab >& spectral_fieldx, +// const BaseFab >& spectral_fieldy, +// const BaseFab >& spectral_fieldz, +// const std::string& name, const Real& scaling, +// const Box& c_local_box, +// const int& step); +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& var_scaling, + const amrex::Vector< std::string >& var_names); +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& var_scaling, + const amrex::Vector< std::string >& var_names); + + +#endif diff --git a/src_analysis/TurbSpectra_heffte.cpp b/src_analysis/TurbSpectra_heffte.cpp new file mode 100644 index 00000000..90cc2615 --- /dev/null +++ b/src_analysis/TurbSpectra_heffte.cpp @@ -0,0 +1,749 @@ +#include "TurbSpectra.H" +#include "common_functions.H" + +#include +#include "AMReX_PlotFileUtil.H" +#include "AMReX_BoxArray.H" + +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), + "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), + "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.local_size() == 1, + "TurbSpectrumScalar: Must have one Box per MPI process when using heFFTe"); + + int ncomp = variables.nComp(); + + long npts; + Box domain = geom.Domain(); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of variables + DistributionMapping dm = variables.DistributionMap(); + BoxArray ba = variables.boxArray(); + + // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid + Box local_box; + int local_boxid; + { + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + // each MPI rank has its own local_box Box and local_boxid ID + if (ParallelDescriptor::MyProc() == dm[i]) { + local_box = b; + local_boxid = i; + } + } + } + + // now each MPI rank works on its own box + // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset + + // start by coarsening each box by 2 in the x-direction + Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); + + // if the coarsened box's high-x index is even, we shrink the size in 1 in x + // this avoids overlap between coarsened boxes + if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { + c_local_box.setBig(0,c_local_box.bigEnd(0)-1); + } + // for any boxes that touch the hi-x domain we + // increase the size of boxes by 1 in x + // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) + if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_local_box.growHi(0,1); + } + + // BOX ARRAY TO STORE COVARIANCE MATRIX IN A MFAB + // create a BoxArray containing the fft boxes + // by construction, these boxes correlate to the associated spectral_data + // this we can copy the spectral data into this multifab since we know they are owned by the same MPI rank + BoxArray fft_ba; + { + BoxList bl; + bl.reserve(ba.size()); + + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + + Box r_box = b; + Box c_box = amrex::coarsen(r_box, IntVect(AMREX_D_DECL(2,1,1))); + + // this avoids overlap for the cases when one or more r_box's + // have an even cell index in the hi-x cell + if (c_box.bigEnd(0) * 2 == r_box.bigEnd(0)) { + c_box.setBig(0,c_box.bigEnd(0)-1); + } + + // increase the size of boxes touching the hi-x domain by 1 in x + // this is an (Nx x Ny x Nz) -> (Nx/2+1 x Ny x Nz) real-to-complex sizing + if (b.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_box.growHi(0,1); + } + bl.push_back(c_box); + + } + fft_ba.define(std::move(bl)); + } + MultiFab cov(fft_ba, dm, ncomp, 0); + + // each MPI rank gets storage for its piece of the fft + BaseFab > spectral_field(c_local_box, 1, The_Device_Arena()); + MultiFab variables_single(ba, dm, 1, 0); + using heffte_complex = typename heffte::fft_output::type; + + int r2c_direction = 0; + for (int comp=0; comp fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field.dataPtr(); + variables_single.ParallelCopy(variables,comp,0,1); + fft.forward(variables_single[local_boxid].dataPtr(),spectral_data); + Gpu::streamSynchronize(); + + // Fill in the covariance multifab + int comp_gpu = comp; + Real sqrtnpts_gpu = sqrtnpts; + Real scaling_i_gpu = scaling[comp]; + std::string name_gpu = var_names[comp]; + for (MFIter mfi(cov); mfi.isValid(); ++mfi) { + Array4 const& data = cov.array(mfi); + Array4 > spectral = spectral_field.const_array(); + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + Real re = spectral(i,j,k).real(); + Real im = spectral(i,j,k).imag(); + data(i,j,k,comp_gpu) = (re*re + im*im)/(sqrtnpts_gpu*sqrtnpts_gpu*scaling_i_gpu); + }); + } + + // Integrate spectra over k-shells + IntegrateKScalar(cov,name_gpu,step,comp_gpu); + } +} + +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, + "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, + "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.local_size() == 1, + "TurbSpectrumVelDecomp: Must have one Box per MPI process when using heFFTe"); + + const GpuArray dx = geom.CellSizeArray(); + + long npts; + Box domain = geom.Domain(); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + Real sqrtnpts = std::sqrt(npts); + + // get box array and distribution map of vel + DistributionMapping dm = vel.DistributionMap(); + BoxArray ba = vel.boxArray(); + + // since there is 1 MPI rank per box, each MPI rank obtains its local box and the associated boxid + Box local_box; + int local_boxid; + { + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + // each MPI rank has its own local_box Box and local_boxid ID + if (ParallelDescriptor::MyProc() == dm[i]) { + local_box = b; + local_boxid = i; + } + } + } + + // now each MPI rank works on its own box + // for real->complex fft's, the fft is stored in an (nx/2+1) x ny x nz dataset + + // start by coarsening each box by 2 in the x-direction + Box c_local_box = amrex::coarsen(local_box, IntVect(AMREX_D_DECL(2,1,1))); + + // if the coarsened box's high-x index is even, we shrink the size in 1 in x + // this avoids overlap between coarsened boxes + if (c_local_box.bigEnd(0) * 2 == local_box.bigEnd(0)) { + c_local_box.setBig(0,c_local_box.bigEnd(0)-1); + } + // for any boxes that touch the hi-x domain we + // increase the size of boxes by 1 in x + // this makes the overall fft dataset have size (Nx/2+1 x Ny x Nz) + if (local_box.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_local_box.growHi(0,1); + } + + // each MPI rank gets storage for its piece of the fft + BaseFab > spectral_field_Tx(c_local_box, 1, The_Device_Arena()); // totalx + BaseFab > spectral_field_Ty(c_local_box, 1, The_Device_Arena()); // totaly + BaseFab > spectral_field_Tz(c_local_box, 1, The_Device_Arena()); // totalz + BaseFab > spectral_field_Sx(c_local_box, 1, The_Device_Arena()); // solenoidalx + BaseFab > spectral_field_Sy(c_local_box, 1, The_Device_Arena()); // solenoidaly + BaseFab > spectral_field_Sz(c_local_box, 1, The_Device_Arena()); // solenoidalz + BaseFab > spectral_field_Dx(c_local_box, 1, The_Device_Arena()); // dilatationalx + BaseFab > spectral_field_Dy(c_local_box, 1, The_Device_Arena()); // dilatationaly + BaseFab > spectral_field_Dz(c_local_box, 1, The_Device_Arena()); // dilatationalz + MultiFab vel_single(ba, dm, 1, 0); + + int r2c_direction = 0; + + // ForwardTransform + // X + using heffte_complex = typename heffte::fft_output::type; + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 0, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tx.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + } + // Y + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 1, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Ty.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + } + // Z + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + vel_single.ParallelCopy(vel, 2, 0, 1); + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Tz.dataPtr(); + fft.forward(vel_single[local_boxid].dataPtr(),spectral_data); + } + + Gpu::streamSynchronize(); + + // Decompose velocity field into solenoidal and dilatational + Array4< GpuComplex > spectral_tx = spectral_field_Tx.array(); + Array4< GpuComplex > spectral_ty = spectral_field_Ty.array(); + Array4< GpuComplex > spectral_tz = spectral_field_Tz.array(); + Array4< GpuComplex > spectral_sx = spectral_field_Sx.array(); + Array4< GpuComplex > spectral_sy = spectral_field_Sy.array(); + Array4< GpuComplex > spectral_sz = spectral_field_Sz.array(); + Array4< GpuComplex > spectral_dx = spectral_field_Dx.array(); + Array4< GpuComplex > spectral_dy = spectral_field_Dy.array(); + Array4< GpuComplex > spectral_dz = spectral_field_Dz.array(); + ParallelFor(c_local_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) + { + + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + + Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; + + if (i <= nx/2) { + + // Get the wavevector + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + // Gradient Operators + GxR = (cos(2.0*M_PI*ki/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*ki/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*kj/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*kj/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*kk/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*kk/nz)-0.0)/dx[2]; + } + else { // conjugate + amrex::Abort("check the code; i should not go beyond bx.length(0)/2"); + } + + // Scale Total velocity FFT components + spectral_tx(i,j,k) *= (1.0/sqrtnpts); + spectral_ty(i,j,k) *= (1.0/sqrtnpts); + spectral_tz(i,j,k) *= (1.0/sqrtnpts); + + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; + } + else { + + // Dilatational velocity + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; + } + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); + + }); + + Gpu::streamSynchronize(); + + // BOX ARRAY TO STORE COVARIANCE MATRIX IN A MFAB + // create a BoxArray containing the fft boxes + // by construction, these boxes correlate to the associated spectral_data + // this we can copy the spectral data into this multifab since we know they are owned by the same MPI rank + BoxArray fft_ba; + { + BoxList bl; + bl.reserve(ba.size()); + + for (int i = 0; i < ba.size(); ++i) { + Box b = ba[i]; + + Box r_box = b; + Box c_box = amrex::coarsen(r_box, IntVect(AMREX_D_DECL(2,1,1))); + + // this avoids overlap for the cases when one or more r_box's + // have an even cell index in the hi-x cell + if (c_box.bigEnd(0) * 2 == r_box.bigEnd(0)) { + c_box.setBig(0,c_box.bigEnd(0)-1); + } + + // increase the size of boxes touching the hi-x domain by 1 in x + // this is an (Nx x Ny x Nz) -> (Nx/2+1 x Ny x Nz) real-to-complex sizing + if (b.bigEnd(0) == geom.Domain().bigEnd(0)) { + c_box.growHi(0,1); + } + bl.push_back(c_box); + + } + fft_ba.define(std::move(bl)); + } + MultiFab cov(fft_ba, dm, 3, 0); // total, solenoidal, dilatational + + // Fill in the covariance multifab + Real sqrtnpts_gpu = sqrtnpts; + Real scaling_gpu = scaling; + for (MFIter mfi(cov); mfi.isValid(); ++mfi) { + Array4 const& data = cov.array(mfi); + Array4 > spec_tx = spectral_field_Tx.const_array(); + Array4 > spec_ty = spectral_field_Ty.const_array(); + Array4 > spec_tz = spectral_field_Tz.const_array(); + Array4 > spec_sx = spectral_field_Sx.const_array(); + Array4 > spec_sy = spectral_field_Sy.const_array(); + Array4 > spec_sz = spectral_field_Sz.const_array(); + Array4 > spec_dx = spectral_field_Dx.const_array(); + Array4 > spec_dy = spectral_field_Dy.const_array(); + Array4 > spec_dz = spectral_field_Dz.const_array(); + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + Real re_x, re_y, re_z, im_x, im_y, im_z; + + re_x = spec_tx(i,j,k).real(); + im_x = spec_tx(i,j,k).imag(); + re_y = spec_ty(i,j,k).real(); + im_y = spec_ty(i,j,k).imag(); + re_z = spec_tz(i,j,k).real(); + im_z = spec_tz(i,j,k).imag(); + data(i,j,k,0) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + re_x = spec_sx(i,j,k).real(); + im_x = spec_sx(i,j,k).imag(); + re_y = spec_sy(i,j,k).real(); + im_y = spec_sy(i,j,k).imag(); + re_z = spec_sz(i,j,k).real(); + im_z = spec_sz(i,j,k).imag(); + data(i,j,k,1) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + re_x = spec_dx(i,j,k).real(); + im_x = spec_dx(i,j,k).imag(); + re_y = spec_dy(i,j,k).real(); + im_y = spec_dy(i,j,k).imag(); + re_z = spec_dz(i,j,k).real(); + im_z = spec_dz(i,j,k).imag(); + data(i,j,k,2) = (re_x*re_x + im_x*im_x + + re_y*re_y + im_y*im_y + + re_z*re_z + im_z*im_z)/(scaling_gpu); + }); + } + + // Integrate K spectrum for velocities + IntegrateKVelocity(cov,"vel_total" ,step,0); + IntegrateKVelocity(cov,"vel_solenoidal",step,1); + IntegrateKVelocity(cov,"vel_dilational",step,2); + + MultiFab vel_decomp_single(ba, dm, 1, 0); + // inverse Fourier transform solenoidal and dilatational components + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sx.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 0, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sy.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 1, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Sz.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 2, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dx.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 3, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dy.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 4, 1); + } + { +#if defined(HEFFTE_CUFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_ROCFFT) + heffte::fft3d_r2c fft +#elif defined(HEFFTE_FFTW) + heffte::fft3d_r2c fft +#endif + ({{local_box.smallEnd(0),local_box.smallEnd(1),local_box.smallEnd(2)}, + {local_box.bigEnd(0) ,local_box.bigEnd(1) ,local_box.bigEnd(2)}}, + {{c_local_box.smallEnd(0),c_local_box.smallEnd(1),c_local_box.smallEnd(2)}, + {c_local_box.bigEnd(0) ,c_local_box.bigEnd(1) ,c_local_box.bigEnd(2)}}, + r2c_direction, ParallelDescriptor::Communicator()); + + heffte_complex* spectral_data = (heffte_complex*) spectral_field_Dz.dataPtr(); + fft.backward(spectral_data, vel_decomp_single[local_boxid].dataPtr()); + + Gpu::streamSynchronize(); + vel_decomp.ParallelCopy(vel_decomp_single, 0, 5, 1); + } + + + vel_decomp.mult(1.0/sqrtnpts); + +} + +void IntegrateKScalar(const MultiFab& cov_mag, + const std::string& name, + const int& step, + const int& comp) + +{ + int npts = n_cells[0]/2; + + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); +// Gpu::HostVector phisum_host(npts); +// Gpu::HostVector phicnt_host(npts); + + Gpu::HostVector phisum_host(npts); + + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); +// for (int d=0; d & cov = cov_mag.const_array(mfi); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + } + }); + } + + Gpu::streamSynchronize(); + + ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); + + Real dk = 1.; + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + if (d != 0) { + phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; + } + }); + + Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::streamSynchronize(); + + if (ParallelDescriptor::IOProcessor()) { + std::ofstream turb; + std::string turbBaseName = "turb_"+name; + std::string turbName = Concatenate(turbBaseName,step,7); + turbName += ".txt"; + + turb.open(turbName); + for (int d=1; d phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + + Gpu::HostVector phisum_host(npts); + + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + int comp_gpu = comp; + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + for ( MFIter mfi(cov_mag,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + const Array4 & cov = cov_mag.const_array(mfi); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + amrex::Gpu::Atomic::Add(&(phisum_ptr[cell]), cov(i,j,k,comp_gpu)); + amrex::Gpu::Atomic::Add(&(phicnt_ptr[cell]),1); + } + }); + } + + Gpu::streamSynchronize(); + + ParallelDescriptor::ReduceRealSum(phisum_device.dataPtr(),npts); + ParallelDescriptor::ReduceIntSum(phicnt_device.dataPtr(),npts); + + Real dk = 1.; + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + if (d != 0) { + phisum_ptr[d] *= 4.*M_PI*(d*d*dk+dk*dk*dk/12.)/phicnt_ptr[d]; + } + }); + + Gpu::copyAsync(Gpu::deviceToHost, phisum_device.begin(), phisum_device.end(), phisum_host.begin()); + Gpu::streamSynchronize(); + + if (ParallelDescriptor::IOProcessor()) { + std::ofstream turb; + std::string turbBaseName = "turb_"+name; + std::string turbName = Concatenate(turbBaseName,step,7); + turbName += ".txt"; + + turb.open(turbName); + for (int d=1; d +#elif AMREX_USE_HIP +# if __has_include() // ROCm 5.3+ +# include +# else +# include +# endif +#else +#include +#include +#endif + +#include +#include +#include +#include + +#include + +#include + +#include "common_functions.H" + +#define ALIGN 16 + +using namespace amrex; + +#ifdef AMREX_USE_CUDA +std::string cufftError (const cufftResult& err); +#endif +#ifdef AMREX_USE_HIP +std::string rocfftError (const rocfft_status err); +void Assert_rocfft_status (std::string const& name, rocfft_status status); +#endif + +void IntegrateKScalar(const Vector > > >& spectral_field, + const MultiFab& variables_onegrid, + const std::string& name, + const Real& scaling, + const Real& sqrtnpts, + const int& step); +void IntegrateKVelocity(const Vector > > >& spectral_fieldx, + const Vector > > >& spectral_fieldy, + const Vector > > >& spectral_fieldz, + const MultiFab& vel_onegrid, + const std::string& name, + const Real& scaling, + const int& step); +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& var_scaling, + const amrex::Vector< std::string >& var_names); +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& var_scaling, + const amrex::Vector< std::string >& var_names); +void InverseFFTVel(Vector > > >& spectral_field, + MultiFab& vel_decomp_onegrid, const IntVect& fft_size); + + +#endif diff --git a/src_analysis/TurbSpectra_single.cpp b/src_analysis/TurbSpectra_single.cpp new file mode 100644 index 00000000..b6bccc02 --- /dev/null +++ b/src_analysis/TurbSpectra_single.cpp @@ -0,0 +1,1043 @@ +#include "TurbSpectra.H" +#include "common_functions.H" + +#include +#include "AMReX_PlotFileUtil.H" +#include "AMReX_BoxArray.H" + +#ifdef AMREX_USE_CUDA +std::string cufftError (const cufftResult& err) +{ + switch (err) { + case CUFFT_SUCCESS: return "CUFFT_SUCCESS"; + case CUFFT_INVALID_PLAN: return "CUFFT_INVALID_PLAN"; + case CUFFT_ALLOC_FAILED: return "CUFFT_ALLOC_FAILED"; + case CUFFT_INVALID_TYPE: return "CUFFT_INVALID_TYPE"; + case CUFFT_INVALID_VALUE: return "CUFFT_INVALID_VALUE"; + case CUFFT_INTERNAL_ERROR: return "CUFFT_INTERNAL_ERROR"; + case CUFFT_EXEC_FAILED: return "CUFFT_EXEC_FAILED"; + case CUFFT_SETUP_FAILED: return "CUFFT_SETUP_FAILED"; + case CUFFT_INVALID_SIZE: return "CUFFT_INVALID_SIZE"; + case CUFFT_UNALIGNED_DATA: return "CUFFT_UNALIGNED_DATA"; + default: return std::to_string(err) + " (unknown error code)"; + } +} +#endif + +#ifdef AMREX_USE_HIP +std::string rocfftError (const rocfft_status err) +{ + if (err == rocfft_status_success) { + return std::string("rocfft_status_success"); + } else if (err == rocfft_status_failure) { + return std::string("rocfft_status_failure"); + } else if (err == rocfft_status_invalid_arg_value) { + return std::string("rocfft_status_invalid_arg_value"); + } else if (err == rocfft_status_invalid_dimensions) { + return std::string("rocfft_status_invalid_dimensions"); + } else if (err == rocfft_status_invalid_array_type) { + return std::string("rocfft_status_invalid_array_type"); + } else if (err == rocfft_status_invalid_strides) { + return std::string("rocfft_status_invalid_strides"); + } else if (err == rocfft_status_invalid_distance) { + return std::string("rocfft_status_invalid_distance"); + } else if (err == rocfft_status_invalid_offset) { + return std::string("rocfft_status_invalid_offset"); + } else { + return std::to_string(err) + " (unknown error code)"; + } +} + +void Assert_rocfft_status (std::string const& name, rocfft_status status) +{ + if (status != rocfft_status_success) { + amrex::AllPrint() << name + " failed! Error: " + rocfftError(status) << "\n";; + } +} +#endif + +void TurbSpectrumScalar(const MultiFab& variables, + const amrex::Geometry& geom, + const int& step, + const amrex::Vector& scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumScalar()",TurbSpectrumScalar); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == var_names.size(), "TurbSpectrumScalar: must have same number variable names as components of input MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(variables.nComp() == scaling.size(), "TurbSpectrumScalar: must have same number variable scaling as components of input MultiFab"); + int ncomp = variables.nComp(); + + long npts; + + // Initialize the boxarray "ba_onegrid" from the single box "domain" + BoxArray ba_onegrid; + { + Box domain = geom.Domain(); + ba_onegrid.define(domain); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + } + Real sqrtnpts = std::sqrt(npts); + DistributionMapping dmap_onegrid(ba_onegrid); + MultiFab variables_onegrid; + variables_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + +#ifdef AMREX_USE_CUDA + using FFTplan = cufftHandle; + using FFTcomplex = cuDoubleComplex; +#elif AMREX_USE_HIP + using FFTplan = rocfft_plan; + using FFTcomplex = double2; +#else + using FFTplan = fftw_plan; + using FFTcomplex = fftw_complex; +#endif + + // size of box including ghost cell range + IntVect fft_size; + + // contain to store FFT - note it is shrunk by "half" in x + Vector > > > spectral_field; + Vector forward_plan; + bool built_plan = false; + + // for CUDA builds we only need to build the plan once; track whether we did + for (int comp=0; comp >(spectral_bx,1, + The_Device_Arena())); + spectral_field.back()->setVal(0.0); // touch the memory + FFTplan fplan; + +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_forward, rocfft_precision_double, + 3, lengths, 1, nullptr); + Assert_rocfft_status("rocfft_plan_create", result); +#else // host + fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], + variables_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_field.back()->dataPtr()), + FFTW_ESTIMATE); +#endif + forward_plan.push_back(fplan); + } + + built_plan = true; + } + + ParallelDescriptor::Barrier(); + + // ForwardTransform + for (MFIter mfi(variables_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecD2Z(forward_plan[i], + variables_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_field[i]->dataPtr())); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + Assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* variables_onegrid_ptr = variables_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); + result = rocfft_execute(forward_plan[i], + (void**) &variables_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + Assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + Assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(forward_plan[i]); +#endif + } + + // Integrate spectra over k-shells + IntegrateKScalar(spectral_field,variables_onegrid,var_names[comp],scaling[comp],sqrtnpts,step); + } + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(forward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(forward_plan[i]); +#else + fftw_destroy_plan(forward_plan[i]); +#endif + } +} + +void TurbSpectrumVelDecomp(const MultiFab& vel, + MultiFab& vel_decomp, + const amrex::Geometry& geom, + const int& step, + const amrex::Real& scaling, + const amrex::Vector< std::string >& var_names) +{ + BL_PROFILE_VAR("TurbSpectrumVelDecomp()",TurbSpectrumVelDecomp); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(vel.nComp() == 3, + "TurbSpectrumVelDecomp: must have 3 components of input vel MultiFab"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(var_names.size() == 3, + "TurbSpectrumVelDecomp: must have 3 names for output vel spectra (total, solenoidal, dilatational"); + const GpuArray dx = geom.CellSizeArray(); + + long npts; + + // Initialize the boxarray "ba_onegrid" from the single box "domain" + BoxArray ba_onegrid; + { + Box domain = geom.Domain(); + ba_onegrid.define(domain); + npts = (domain.length(0)*domain.length(1)*domain.length(2)); + } + Real sqrtnpts = std::sqrt(npts); + DistributionMapping dmap_onegrid(ba_onegrid); + MultiFab vel_onegrid; + vel_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + +#ifdef AMREX_USE_CUDA + using FFTplan = cufftHandle; + using FFTcomplex = cuDoubleComplex; +#elif AMREX_USE_HIP + using FFTplan = rocfft_plan; + using FFTcomplex = double2; +#else + using FFTplan = fftw_plan; + using FFTcomplex = fftw_complex; +#endif + + // size of box including ghost cell range + IntVect fft_size; + + // contain to store FFT - note it is shrunk by "half" in x + Vector > > > spectral_fieldx; + Vector > > > spectral_fieldy; + Vector > > > spectral_fieldz; + Vector > > > spectral_field_Sx; + Vector > > > spectral_field_Sy; + Vector > > > spectral_field_Sz; + Vector > > > spectral_field_Dx; + Vector > > > spectral_field_Dy; + Vector > > > spectral_field_Dz; + + // x-velocity + { + Vector forward_plan; + vel_onegrid.ParallelCopy(vel,0,0,1); + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + + // grab a single box including ghost cell range + Box realspace_bx = mfi.fabbox(); + + // size of box including ghost cell range + fft_size = realspace_bx.length(); // This will be different for hybrid FFT + + // this is the size of the box, except the 0th component is 'halved plus 1' + IntVect spectral_bx_size = fft_size; + spectral_bx_size[0] = fft_size[0]/2 + 1; + + // spectral box + Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); + + spectral_fieldx.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_fieldx.back()->setVal(0.0); // touch the memory + + spectral_field_Sx.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Sx.back()->setVal(0.0); // touch the memory + + spectral_field_Dx.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Dx.back()->setVal(0.0); // touch the memory + + FFTplan fplan; + +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_forward, rocfft_precision_double, + 3, lengths, 1, nullptr); + Assert_rocfft_status("rocfft_plan_create", result); +#else // host + fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldx.back()->dataPtr()), + FFTW_ESTIMATE); +#endif + forward_plan.push_back(fplan); + } + + ParallelDescriptor::Barrier(); + + // ForwardTransform + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecD2Z(forward_plan[i], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldx[i]->dataPtr())); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + Assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldx[i]->dataPtr()); + result = rocfft_execute(forward_plan[i], + (void**) &vel_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + Assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + Assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(forward_plan[i]); +#endif + } + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(forward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(forward_plan[i]); +#else + fftw_destroy_plan(forward_plan[i]); +#endif + } + + } // end x-vel + + // y-velocity + { + Vector forward_plan; + vel_onegrid.ParallelCopy(vel,1,0,1); + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + + // grab a single box including ghost cell range + Box realspace_bx = mfi.fabbox(); + + // size of box including ghost cell range + fft_size = realspace_bx.length(); // This will be different for hybrid FFT + + // this is the size of the box, except the 0th component is 'halved plus 1' + IntVect spectral_bx_size = fft_size; + spectral_bx_size[0] = fft_size[0]/2 + 1; + + // spectral box + Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); + + spectral_fieldy.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_fieldy.back()->setVal(0.0); // touch the memory + + spectral_field_Sy.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Sy.back()->setVal(0.0); // touch the memory + + spectral_field_Dy.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Dy.back()->setVal(0.0); // touch the memory + + FFTplan fplan; + +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_forward, rocfft_precision_double, + 3, lengths, 1, nullptr); + Assert_rocfft_status("rocfft_plan_create", result); +#else // host + fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldy.back()->dataPtr()), + FFTW_ESTIMATE); +#endif + forward_plan.push_back(fplan); + } + + ParallelDescriptor::Barrier(); + + // ForwardTransform + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecD2Z(forward_plan[i], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldy[i]->dataPtr())); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + Assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldy[i]->dataPtr()); + result = rocfft_execute(forward_plan[i], + (void**) &vel_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + Assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + Assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(forward_plan[i]); +#endif + } + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(forward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(forward_plan[i]); +#else + fftw_destroy_plan(forward_plan[i]); +#endif + } + + } // end y-vel + + // z-velocity + { + Vector forward_plan; + vel_onegrid.ParallelCopy(vel,2,0,1); + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + + // grab a single box including ghost cell range + Box realspace_bx = mfi.fabbox(); + + // size of box including ghost cell range + fft_size = realspace_bx.length(); // This will be different for hybrid FFT + + // this is the size of the box, except the 0th component is 'halved plus 1' + IntVect spectral_bx_size = fft_size; + spectral_bx_size[0] = fft_size[0]/2 + 1; + + // spectral box + Box spectral_bx = Box(IntVect(0), spectral_bx_size - IntVect(1)); + + spectral_fieldz.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_fieldz.back()->setVal(0.0); // touch the memory + + spectral_field_Sz.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Sz.back()->setVal(0.0); // touch the memory + + spectral_field_Dz.emplace_back(new BaseFab >(spectral_bx,1, + The_Device_Arena())); + spectral_field_Dz.back()->setVal(0.0); // touch the memory + + FFTplan fplan; + +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_D2Z); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_forward, rocfft_precision_double, + 3, lengths, 1, nullptr); + Assert_rocfft_status("rocfft_plan_create", result); +#else // host + fplan = fftw_plan_dft_r2c_3d(fft_size[2], fft_size[1], fft_size[0], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldz.back()->dataPtr()), + FFTW_ESTIMATE); +#endif + forward_plan.push_back(fplan); + } + + ParallelDescriptor::Barrier(); + + // ForwardTransform + for (MFIter mfi(vel_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(forward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecD2Z(forward_plan[i], + vel_onegrid[mfi].dataPtr(), + reinterpret_cast + (spectral_fieldz[i]->dataPtr())); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + Assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(forward_plan[i], &buffersize); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* vel_onegrid_ptr = vel_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_fieldz[i]->dataPtr()); + result = rocfft_execute(forward_plan[i], + (void**) &vel_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + Assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + Assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(forward_plan[i]); +#endif + } + + // destroy fft plan + for (int i = 0; i < forward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(forward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(forward_plan[i]); +#else + fftw_destroy_plan(forward_plan[i]); +#endif + } + + } // end x-vel + + + // Decompose velocity field into solenoidal and dilatational + for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + Array4< GpuComplex > spectral_tx = (*spectral_fieldx[0]) .array(); + Array4< GpuComplex > spectral_ty = (*spectral_fieldy[0]) .array(); + Array4< GpuComplex > spectral_tz = (*spectral_fieldz[0]) .array(); + Array4< GpuComplex > spectral_sx = (*spectral_field_Sx[0]).array(); + Array4< GpuComplex > spectral_sy = (*spectral_field_Sy[0]).array(); + Array4< GpuComplex > spectral_sz = (*spectral_field_Sz[0]).array(); + Array4< GpuComplex > spectral_dx = (*spectral_field_Dx[0]).array(); + Array4< GpuComplex > spectral_dy = (*spectral_field_Dy[0]).array(); + Array4< GpuComplex > spectral_dz = (*spectral_field_Dz[0]).array(); + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + + Real GxR = 0.0, GxC = 0.0, GyR = 0.0, GyC = 0.0, GzR = 0.0, GzC = 0.0; + + if (i <= nx/2) { + + // Get the wavevector + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + // Gradient Operators + GxR = (cos(2.0*M_PI*i/nx)-1.0)/dx[0]; + GxC = (sin(2.0*M_PI*i/nx)-0.0)/dx[0]; + GyR = (cos(2.0*M_PI*j/ny)-1.0)/dx[1]; + GyC = (sin(2.0*M_PI*j/ny)-0.0)/dx[1]; + GzR = (cos(2.0*M_PI*k/nz)-1.0)/dx[2]; + GzC = (sin(2.0*M_PI*k/nz)-0.0)/dx[2]; + + // Scale Total velocity FFT components + spectral_tx(i,j,k) *= (1.0/sqrtnpts); + spectral_ty(i,j,k) *= (1.0/sqrtnpts); + spectral_tz(i,j,k) *= (1.0/sqrtnpts); + + // Inverse Laplacian + Real Lap = GxR*GxR + GxC*GxC + GyR*GyR + GyC*GyC + GzR*GzR + GzC*GzC; + + // Divergence of vel + Real divR = spectral_tx(i,j,k).real()*GxR - spectral_tx(i,j,k).imag()*GxC + + spectral_ty(i,j,k).real()*GyR - spectral_ty(i,j,k).imag()*GyC + + spectral_tz(i,j,k).real()*GzR - spectral_tz(i,j,k).imag()*GzC ; + Real divC = spectral_tx(i,j,k).real()*GxC + spectral_tx(i,j,k).imag()*GxR + + spectral_ty(i,j,k).real()*GyC + spectral_ty(i,j,k).imag()*GyR + + spectral_tz(i,j,k).real()*GzC + spectral_tz(i,j,k).imag()*GzR ; + + if (Lap < 1.0e-12) { // zero mode for no bulk motion + spectral_dx(i,j,k) *= 0.0; + spectral_dy(i,j,k) *= 0.0; + spectral_dz(i,j,k) *= 0.0; + } + else { + // Dilatational velocity + GpuComplex copy_dx((divR*GxR + divC*GxC) / Lap, + (divC*GxR - divR*GxC) / Lap); + spectral_dx(i,j,k) = copy_dx; + + GpuComplex copy_dy((divR*GyR + divC*GyC) / Lap, + (divC*GyR - divR*GyC) / Lap); + spectral_dy(i,j,k) = copy_dy; + + GpuComplex copy_dz((divR*GzR + divC*GzC) / Lap, + (divC*GzR - divR*GzC) / Lap); + spectral_dz(i,j,k) = copy_dz; + } + + // Solenoidal velocity + spectral_sx(i,j,k) = spectral_tx(i,j,k) - spectral_dx(i,j,k); + spectral_sy(i,j,k) = spectral_ty(i,j,k) - spectral_dy(i,j,k); + spectral_sz(i,j,k) = spectral_tz(i,j,k) - spectral_dz(i,j,k); + } + }); + } + + ParallelDescriptor::Barrier(); + + // Integrate K spectrum for velocities + IntegrateKVelocity(spectral_fieldx, spectral_fieldy, spectral_fieldz, vel_onegrid, "vel_total" ,scaling,step); + IntegrateKVelocity(spectral_field_Sx, spectral_field_Sy, spectral_field_Sz, vel_onegrid, "vel_solenoidal",scaling,step); + IntegrateKVelocity(spectral_field_Dx, spectral_field_Dy, spectral_field_Dz, vel_onegrid, "vel_dilatational",scaling,step); + + + // Inverse Solenoidal and Dilatational Velocity Components + { // solenoidal x + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Sx, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,0,1); + } + { // solenoidal y + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Sy, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,1,1); + } + { // solenoidal z + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Sz, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,2,1); + } + { // dilatational x + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Dx, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,3,1); + } + { // dilatational y + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Dy, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,4,1); + } + { // dilatational z + MultiFab vel_decomp_onegrid; + vel_decomp_onegrid.define(ba_onegrid, dmap_onegrid, 1, 0); + vel_decomp_onegrid.setVal(0.0); + InverseFFTVel(spectral_field_Dz, vel_decomp_onegrid,fft_size); + // copy into external multifab + vel_decomp.ParallelCopy(vel_decomp_onegrid,0,5,1); + } + vel_decomp.mult(1.0/sqrtnpts); +} + +void IntegrateKScalar(const Vector > > >& spectral_field, + const MultiFab& variables_onegrid, + const std::string& name, + const Real& scaling, + const Real& sqrtnpts, + const int& step) + +{ + int npts = n_cells[0]/2; + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + + Gpu::HostVector phisum_host(npts); + + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + // Integrate spectra over k-shells + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + for ( MFIter mfi(variables_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.fabbox(); + + const Array4 > spectral = (*spectral_field[0]).const_array(); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (i <= bx.length(0)/2) { // only half of kx-domain + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + Real real = spectral(i,j,k).real(); + Real imag = spectral(i,j,k).imag(); + Real cov = (1.0/(scaling*sqrtnpts*sqrtnpts))*(real*real + imag*imag); + amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + }); + } + + for (int d=1; d > > >& spectral_fieldx, + const Vector > > >& spectral_fieldy, + const Vector > > >& spectral_fieldz, + const MultiFab& vel_onegrid, + const std::string& name, + const Real& scaling, + const int& step) +{ + int npts = n_cells[0]/2; + + Gpu::DeviceVector phisum_device(npts); + Gpu::DeviceVector phicnt_device(npts); + Gpu::HostVector phisum_host(npts); + Real* phisum_ptr = phisum_device.dataPtr(); // pointer to data + int* phicnt_ptr = phicnt_device.dataPtr(); // pointer to data + + // Integrate spectra over k-shells + amrex::ParallelFor(npts, [=] AMREX_GPU_DEVICE (int d) noexcept + { + phisum_ptr[d] = 0.; + phicnt_ptr[d] = 0; + }); + + int nx = n_cells[0]; + int ny = n_cells[1]; + int nz = n_cells[2]; + for ( MFIter mfi(vel_onegrid,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.fabbox(); + + const Array4 > spectralx = (*spectral_fieldx[0]).const_array(); + const Array4 > spectraly = (*spectral_fieldy[0]).const_array(); + const Array4 > spectralz = (*spectral_fieldz[0]).const_array(); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + if (i <= bx.length(0)/2) { // only half of kx-domain + int ki = i; + int kj = j; + if (j >= ny/2) kj = ny - j; + int kk = k; + if (k >= nz/2) kk = nz - k; + + Real dist = (ki*ki + kj*kj + kk*kk); + dist = std::sqrt(dist); + + if ( dist <= n_cells[0]/2-0.5) { + dist = dist+0.5; + int cell = int(dist); + Real real, imag, cov_x, cov_y, cov_z, cov; + real = spectralx(i,j,k).real(); + imag = spectralx(i,j,k).imag(); + cov_x = (1.0/scaling)*(real*real + imag*imag); + real = spectraly(i,j,k).real(); + imag = spectraly(i,j,k).imag(); + cov_y = (1.0/scaling)*(real*real + imag*imag); + real = spectralz(i,j,k).real(); + imag = spectralz(i,j,k).imag(); + cov_z = (1.0/scaling)*(real*real + imag*imag); + cov = cov_x + cov_y + cov_z; + amrex::HostDevice::Atomic::Add(&(phisum_ptr[cell]), cov); + amrex::HostDevice::Atomic::Add(&(phicnt_ptr[cell]),1); + } + } + }); + } + + for (int d=1; d > > >& spectral_field, + MultiFab& vel_decomp_onegrid, const IntVect& fft_size) +{ + +#ifdef AMREX_USE_CUDA + using FFTplan = cufftHandle; + using FFTcomplex = cuDoubleComplex; +#elif AMREX_USE_HIP + using FFTplan = rocfft_plan; + using FFTcomplex = double2; +#else + using FFTplan = fftw_plan; + using FFTcomplex = fftw_complex; +#endif + + Vector backward_plan; + + for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { + FFTplan fplan; +#ifdef AMREX_USE_CUDA // CUDA + cufftResult result = cufftPlan3d(&fplan, fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " cufftplan3d forward failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP // HIP + const std::size_t lengths[] = {std::size_t(fft_size[0]),std::size_t(fft_size[1]),std::size_t(fft_size[2])}; + rocfft_status result = rocfft_plan_create(&fplan, rocfft_placement_notinplace, + rocfft_transform_type_real_inverse, rocfft_precision_double, + 3, lengths, 1, nullptr); + Assert_rocfft_status("rocfft_plan_create", result); +#else // host + fplan = fftw_plan_dft_c2r_3d(fft_size[2], fft_size[1], fft_size[0], + reinterpret_cast + (spectral_field.back()->dataPtr()), + vel_decomp_onegrid[mfi].dataPtr(), + FFTW_ESTIMATE); +#endif + backward_plan.push_back(fplan); + } + + ParallelDescriptor::Barrier(); + + // Backward Transform + for (MFIter mfi(vel_decomp_onegrid); mfi.isValid(); ++mfi) { + int i = mfi.LocalIndex(); +#ifdef AMREX_USE_CUDA + cufftSetStream(backward_plan[i], amrex::Gpu::gpuStream()); + cufftResult result = cufftExecZ2D(backward_plan[i], + reinterpret_cast + (spectral_field[i]->dataPtr()), + vel_decomp_onegrid[mfi].dataPtr()); + if (result != CUFFT_SUCCESS) { + amrex::AllPrint() << " forward transform using cufftExec failed! Error: " + << cufftError(result) << "\n"; + } +#elif AMREX_USE_HIP + rocfft_execution_info execinfo = nullptr; + rocfft_status result = rocfft_execution_info_create(&execinfo); + Assert_rocfft_status("rocfft_execution_info_create", result); + + std::size_t buffersize = 0; + result = rocfft_plan_get_work_buffer_size(backward_plan[i], &buffersize); + Assert_rocfft_status("rocfft_plan_get_work_buffer_size", result); + + void* buffer = amrex::The_Arena()->alloc(buffersize); + result = rocfft_execution_info_set_work_buffer(execinfo, buffer, buffersize); + Assert_rocfft_status("rocfft_execution_info_set_work_buffer", result); + + result = rocfft_execution_info_set_stream(execinfo, amrex::Gpu::gpuStream()); + Assert_rocfft_status("rocfft_execution_info_set_stream", result); + + amrex::Real* vel_onegrid_ptr = vel_decomp_onegrid[mfi].dataPtr(); + FFTcomplex* spectral_field_ptr = reinterpret_cast(spectral_field[i]->dataPtr()); + result = rocfft_execute(backward_plan[i], + (void**) &vel_onegrid_ptr, // in + (void**) &spectral_field_ptr, // out + execinfo); + Assert_rocfft_status("rocfft_execute", result); + amrex::Gpu::streamSynchronize(); + amrex::The_Arena()->free(buffer); + result = rocfft_execution_info_destroy(execinfo); + Assert_rocfft_status("rocfft_execution_info_destroy", result); +#else + fftw_execute(backward_plan[i]); +#endif + } + + // destroy fft plan + for (int i = 0; i < backward_plan.size(); ++i) { +#ifdef AMREX_USE_CUDA + cufftDestroy(backward_plan[i]); +#elif AMREX_USE_HIP + rocfft_plan_destroy(backward_plan[i]); +#else + fftw_destroy_plan(backward_plan[i]); +#endif + } + +} + diff --git a/src_compressible/compressible_functions.cpp b/src_compressible/compressible_functions.cpp index 96bb6f50..3d818cfe 100644 --- a/src_compressible/compressible_functions.cpp +++ b/src_compressible/compressible_functions.cpp @@ -8,6 +8,7 @@ AMREX_GPU_MANAGED int compressible::do_1D; AMREX_GPU_MANAGED int compressible::do_2D; AMREX_GPU_MANAGED int compressible::all_correl; AMREX_GPU_MANAGED int compressible::nspec_surfcov = 0; +AMREX_GPU_MANAGED int compressible::turbRestartRun = 1; AMREX_GPU_MANAGED bool compressible::do_reservoir = false; AMREX_GPU_MANAGED amrex::Real compressible::zeta_ratio = -1.0; @@ -64,7 +65,14 @@ void InitializeCompressibleNamespace() all_correl = 0; pp.query("all_correl",all_correl); - + // restart for turbulence + // if 1: will advance time, if 0: only stats no advance time + pp.query("turbRestartRun",turbRestartRun); + if (turbRestartRun == 0) { + if (restart <= 0) amrex::Abort("turbRestartRun requires restarting from a checkpoint, restart > 0"); + if (max_step != restart+1) amrex::Abort("this is a single step run; max_step should be equal to restart+1"); + } + // do reservoir? if ((bc_mass_lo[0] == 4) or (bc_mass_lo[1] == 4) or (bc_mass_lo[2] == 4) or (bc_mass_hi[0] == 4) or (bc_mass_hi[1] == 4) or (bc_mass_hi[2] == 4)) { diff --git a/src_compressible/compressible_namespace.H b/src_compressible/compressible_namespace.H index e75c9935..d3195911 100644 --- a/src_compressible/compressible_namespace.H +++ b/src_compressible/compressible_namespace.H @@ -7,6 +7,7 @@ namespace compressible { extern AMREX_GPU_MANAGED int do_2D; extern AMREX_GPU_MANAGED int all_correl; extern AMREX_GPU_MANAGED int nspec_surfcov; + extern AMREX_GPU_MANAGED int turbRestartRun; extern AMREX_GPU_MANAGED bool do_reservoir; extern AMREX_GPU_MANAGED amrex::Real zeta_ratio; diff --git a/src_compressible_stag/Checkpoint.cpp b/src_compressible_stag/Checkpoint.cpp index caf1a722..e0b0201c 100644 --- a/src_compressible_stag/Checkpoint.cpp +++ b/src_compressible_stag/Checkpoint.cpp @@ -643,7 +643,7 @@ void ReadCheckPoint3D(int& step, dmap.define(ba, ParallelDescriptor::NProcs()); #if defined(TURB) - if (turbForcing > 1) { + if ((turbForcing > 1) and (turbRestartRun)) { turbforce.define(ba,dmap,turb_a,turb_b,turb_c,turb_d,turb_alpha); } #endif @@ -694,7 +694,7 @@ void ReadCheckPoint3D(int& step, #if defined(TURB) // Read in turbulent forcing - if (turbForcing > 1) { + if ((turbForcing > 1) and (turbRestartRun)) { Real fs_temp; Real fc_temp; for (int i=0; i<132; ++i) { @@ -1391,28 +1391,29 @@ void Read_Copy_MF_Checkpoint(amrex::MultiFab& mf, std::string mf_name, const std BoxArray& ba_old, DistributionMapping& dmap_old, int NVARS, int ghost, int nodal_flag) { - // define temporary MF - MultiFab mf_temp; - if (nodal_flag < 0) { - if (ghost) { - mf_temp.define(ba_old,dmap_old,NVARS,ngc); - } - else { - mf_temp.define(ba_old,dmap_old,NVARS,0); - } - - } - else { - if (ghost) { - mf_temp.define(convert(ba_old,nodal_flag_dir[nodal_flag]),dmap_old,NVARS,ngc); - } - else { - mf_temp.define(convert(ba_old,nodal_flag_dir[nodal_flag]),dmap_old,NVARS,0); - } - - } + //// define temporary MF + //MultiFab mf_temp; + //if (nodal_flag < 0) { + // if (ghost) { + // mf_temp.define(ba_old,dmap_old,NVARS,ngc); + // } + // else { + // mf_temp.define(ba_old,dmap_old,NVARS,0); + // } + + //} + //else { + // if (ghost) { + // mf_temp.define(convert(ba_old,nodal_flag_dir[nodal_flag]),dmap_old,NVARS,ngc); + // } + // else { + // mf_temp.define(convert(ba_old,nodal_flag_dir[nodal_flag]),dmap_old,NVARS,0); + // } + + //} // Read into temporary MF from file + MultiFab mf_temp; VisMF::Read(mf_temp,amrex::MultiFabFileFullPrefix(0, checkpointname, "Level_", mf_name)); // Copy temporary MF into the new MF diff --git a/src_compressible_stag/DeriveVelProp.cpp b/src_compressible_stag/DeriveVelProp.cpp index 87f3232d..c437cd38 100644 --- a/src_compressible_stag/DeriveVelProp.cpp +++ b/src_compressible_stag/DeriveVelProp.cpp @@ -8,6 +8,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, std::array< MultiFab, AMREX_SPACEDIM >& cumom, MultiFab& prim, MultiFab& eta, + MultiFab& zeta, const amrex::Geometry& geom, Real& turbKE, Real& c_speed, Real& u_rms, @@ -26,11 +27,11 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, // Setup temp MultiFabs std::array< MultiFab, AMREX_SPACEDIM > macTemp; MultiFab gradU; + MultiFab eta_bulk_diss; MultiFab sound_speed; MultiFab ccTemp; MultiFab ccTempA; MultiFab ccTempDiv; - MultiFab eta_kin; // kinematic viscosity std::array< MultiFab, NUM_EDGE > curlU; std::array< MultiFab, NUM_EDGE > eta_edge; std::array< MultiFab, NUM_EDGE > curlUtemp; @@ -42,7 +43,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, ccTemp.define(prim.boxArray(),prim.DistributionMap(),1,0); ccTempA.define(prim.boxArray(),prim.DistributionMap(),1,0); ccTempDiv.define(prim.boxArray(),prim.DistributionMap(),1,0); - eta_kin.define(prim.boxArray(),prim.DistributionMap(),1,ngc); + if (visc_type == 3) eta_bulk_diss.define(prim.boxArray(),prim.DistributionMap(),1,0); #if (AMREX_SPACEDIM == 3) curlU[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0); curlU[1].define(convert(prim.boxArray(),nodal_flag_xz), prim.DistributionMap(), 1, 0); @@ -63,19 +64,6 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, curlUtemp[0].define(convert(prim.boxArray(),nodal_flag_xy), prim.DistributionMap(), 1, 0); #endif - // Get Kinematic Viscosity - for ( MFIter mfi(eta,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { - // grow the box by ngc - const Box& bx = amrex::grow(mfi.tilebox(), ngc); - const Array4 & eta_kin_fab = eta_kin.array(mfi); - const Array4& eta_fab = eta.array(mfi); - const Array4& prim_fab = prim.array(mfi); - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - eta_kin_fab(i,j,k) = eta_fab(i,j,k) / prim_fab(i,j,k,0); - }); - } - // Setup temp variables Real temp; Vector tempvec(3); @@ -87,14 +75,38 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, Vector eps_s_vec(3); // solenoidal dissipation // turbulent kinetic energy - StagInnerProd(cumom,0,vel,0,macTemp,rhouu); +// StagInnerProd(cumom,0,vel,0,macTemp,rhouu); + { + auto mask = cumom[0].OwnerMask(geom.periodicity()); + rhouu[0] = MultiFab::Dot(cumom[0],0,vel[0],0,1,0); + } + { + auto mask = cumom[1].OwnerMask(geom.periodicity()); + rhouu[1] = MultiFab::Dot(cumom[1],0,vel[1],0,1,0); + } + { + auto mask = cumom[2].OwnerMask(geom.periodicity()); + rhouu[2] = MultiFab::Dot(cumom[2],0,vel[2],0,1,0); + } rhouu[0] /= (n_cells[0]+1)*n_cells[1]*n_cells[2]; rhouu[1] /= (n_cells[1]+1)*n_cells[2]*n_cells[0]; rhouu[2] /= (n_cells[2]+1)*n_cells[0]*n_cells[1]; turbKE = 0.5*( rhouu[0] + rhouu[1] + rhouu[2] ); // RMS velocity - StagInnerProd(vel,0,vel,0,macTemp,uu); +// StagInnerProd(vel,0,vel,0,macTemp,uu); + { + auto mask = vel[0].OwnerMask(geom.periodicity()); + uu[0] = MultiFab::Dot(vel[0],0,vel[0],0,1,0); + } + { + auto mask = vel[1].OwnerMask(geom.periodicity()); + uu[1] = MultiFab::Dot(vel[1],0,vel[1],0,1,0); + } + { + auto mask = vel[2].OwnerMask(geom.periodicity()); + uu[2] = MultiFab::Dot(vel[2],0,vel[2],0,1,0); + } uu[0] /= (n_cells[0]+1)*n_cells[1]*n_cells[2]; uu[1] /= (n_cells[1]+1)*n_cells[2]*n_cells[0]; uu[2] /= (n_cells[2]+1)*n_cells[0]*n_cells[1]; @@ -167,28 +179,53 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, // Compute \omega (curl) ComputeCurlFaceToEdge(vel,curlU,geom); - // Solenoidal dissipation: / - AverageCCToEdge(eta_kin,eta_edge,0,1,SPEC_BC_COMP,geom); + // Solenoidal dissipation: + AverageCCToEdge(eta,eta_edge,0,1,SPEC_BC_COMP,geom); EdgeInnerProd(curlU,0,curlU,0,curlUtemp,tempvec); - EdgeInnerProd(curlUtemp,0,eta_edge,0,curlU,eps_s_vec); +// EdgeInnerProd(curlUtemp,0,eta_edge,0,curlU,eps_s_vec); + { + auto mask = curlUtemp[0].OwnerMask(geom.periodicity()); + eps_s_vec[0] = MultiFab::Dot(curlUtemp[0],0,eta_edge[0],0,1,0); + } + { + auto mask = curlUtemp[1].OwnerMask(geom.periodicity()); + eps_s_vec[1] = MultiFab::Dot(curlUtemp[1],0,eta_edge[1],0,1,0); + } + { + auto mask = curlUtemp[2].OwnerMask(geom.periodicity()); + eps_s_vec[2] = MultiFab::Dot(curlUtemp[2],0,eta_edge[2],0,1,0); + } eps_s_vec[0] /= (n_cells[0]+1)*(n_cells[1]+1)*n_cells[2]; eps_s_vec[1] /= (n_cells[0]+1)*(n_cells[2]+1)*n_cells[1]; eps_s_vec[2] /= (n_cells[1]+1)*(n_cells[2]+1)*n_cells[0]; eps_s = (eps_s_vec[0] + eps_s_vec[1] + eps_s_vec[2]); - // Dilational dissipation (4/3)*/ - CCInnerProd(ccTempDiv,0,eta_kin,0,ccTemp,eps_d); - eps_d *= dProb*(4.0/3.0); + // Dilational dissipation (4/3)* +// CCInnerProd(ccTempDiv,0,eta,0,ccTemp,eps_d); + if (visc_type == 3) { + // get eta_bulk_diss = kappa + 4/3 eta + MultiFab::LinComb(eta_bulk_diss, 1.0, zeta, 0, + 1.3333333333, eta, 0, + 0, 1, 0); + eps_d = MultiFab::Dot(eta_bulk_diss, 0, ccTempDiv, 0, 1, 0); + eps_d *= dProb; + } + else { + eps_d = MultiFab::Dot(eta, 0, ccTempDiv, 0, 1, 0); + eps_d *= dProb*(4.0/3.0); + } // Ratio of Dilational to Solenoidal dissipation eps_ratio = eps_d/eps_s; Real eps_t = eps_s + eps_d; // Kolmogorov scales - Real eta_kin_avg = ComputeSpatialMean(eta_kin, 0); - kolm_s = pow((eta_kin_avg*eta_kin_avg*eta_kin_avg/eps_s),0.25); - kolm_d = pow((eta_kin_avg*eta_kin_avg*eta_kin_avg/eps_d),0.25); - kolm_t = pow((eta_kin_avg*eta_kin_avg*eta_kin_avg/eps_t),0.25); + kolm_s = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_s)),0.25); + kolm_d = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_d)),0.25); + kolm_t = pow((eta_avg*eta_avg*eta_avg/(rho_avg*rho_avg*eps_t)),0.25); +// kolm_s = pow((eta_avg*eta_avg*eta_avg/eps_s),0.25); +// kolm_d = pow((eta_avg*eta_avg*eta_avg/eps_d),0.25); +// kolm_t = pow((eta_avg*eta_avg*eta_avg/eps_t),0.25); } #endif @@ -485,9 +522,9 @@ void EvaluateWritePlotFileVelGrad(int step, const Box& bx = mfi.tilebox(); - const Array4 out = output.array(mfi); + const Array4< Real>& out = output.array(mfi); - const Array4 v_decomp = vel_decomp.array(mfi); + const Array4& v_decomp = vel_decomp.array(mfi); AMREX_D_TERM(Array4 const& velx = vel[0].array(mfi);, Array4 const& vely = vel[1].array(mfi);, @@ -495,11 +532,16 @@ void EvaluateWritePlotFileVelGrad(int step, amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - for (int n=0;n<6;++n) { - out(i,j,k,n) = v_decomp(i,j,k,n); - } - out(i,j,k,6) = sqrt(out(i,j,k,0)*out(i,j,k,0) + out(i,j,k,1)*out(i,j,k,1) + out(i,j,k,2)*out(i,j,k,2)); // mag solenoidal - out(i,j,k,7) = sqrt(out(i,j,k,3)*out(i,j,k,3) + out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5)); // mag dilatational + + out(i,j,k,0) = v_decomp(i,j,k,0); + out(i,j,k,1) = v_decomp(i,j,k,1); + out(i,j,k,2) = v_decomp(i,j,k,2); + out(i,j,k,3) = v_decomp(i,j,k,3); + out(i,j,k,4) = v_decomp(i,j,k,4); + out(i,j,k,5) = v_decomp(i,j,k,5); + + out(i,j,k,6) = std::sqrt(out(i,j,k,0)*out(i,j,k,0) + out(i,j,k,1)*out(i,j,k,1) + out(i,j,k,2)*out(i,j,k,2)); // mag solenoidal + out(i,j,k,7) = std::sqrt(out(i,j,k,3)*out(i,j,k,3) + out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5)); // mag dilatational // divergence out(i,j,k,8) = (velx(i+1,j,k) - velx(i,j,k))/dx[0] + @@ -549,7 +591,7 @@ void EvaluateWritePlotFileVelGrad(int step, Real w3_pp = u32_pp - u23_pp; // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3) - out(i,j,k,9) = sqrt(0.25*(w1_mm*w1_mm + w1_mp*w1_mp + w1_pm*w1_pm + w1_pp*w1_pp + + out(i,j,k,9) = std::sqrt(0.25*(w1_mm*w1_mm + w1_mp*w1_mp + w1_pm*w1_pm + w1_pp*w1_pp + w2_mm*w2_mm + w2_mp*w2_mp + w2_pm*w2_pm + w2_pp*w2_pp + w3_mm*w3_mm + w3_mp*w3_mp + w3_pm*w3_pm + w3_pp*w3_pp)); }); @@ -562,8 +604,8 @@ void EvaluateWritePlotFileVelGrad(int step, varNames[1] = "uy_s"; varNames[2] = "uz_s"; varNames[3] = "ux_d"; - varNames[4] = "ux_d"; - varNames[5] = "uy_d"; + varNames[4] = "uy_d"; + varNames[5] = "uz_d"; varNames[6] = "umag_s"; varNames[7] = "umag_d"; varNames[8] = "div"; @@ -572,3 +614,123 @@ void EvaluateWritePlotFileVelGrad(int step, } #endif +#if defined(TURB) +void EvaluateWritePlotFileVelGradTiny(int step, + const amrex::Real time, + const amrex::Geometry& geom, + const std::array& vel, + const amrex::MultiFab& vel_decomp_in) +{ + BL_PROFILE_VAR("EvaluateWritePlotFileVelGradTiny()",EvaluateWritePlotFileVelGradTiny); + + MultiFab output; + + // 0: vorticity wx_sifted + // 1: vorticity wy_shifted + // 2: vorticity wz_shifted + // 3: vorticity wx_avg + // 4: vorticity wy_avg + // 5: vorticity wz_avg + // 6: vorticity_mag_shft_then_sq = sqrt(wx + wy + wz) + // 7: vorticity_mag_avg_then_sq = sqrt(wx + wy + wz) + // 8: vorticity_mag_sq_then_avg = sqrt(wx + wy + wz) + // 9: divergence = u_1,1 + u_2,2 + u_3,3 + output.define(convert(vel[0].boxArray(),IntVect(AMREX_D_DECL(0,0,0))), vel[0].DistributionMap(), 10, 0); + output.setVal(0.0); + + const GpuArray dx = geom.CellSizeArray(); + + for ( MFIter mfi(output,TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + const Box& bx = mfi.tilebox(); + + const Array4< Real>& out = output.array(mfi); + + AMREX_D_TERM(Array4 const& velx = vel[0].array(mfi);, + Array4 const& vely = vel[1].array(mfi);, + Array4 const& velz = vel[2].array(mfi);); + + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + + // divergence + out(i,j,k,9) = (velx(i+1,j,k) - velx(i,j,k))/dx[0] + + (vely(i,j+1,k) - vely(i,j,k))/dx[1] + + (velz(i,j,k+1) - velz(i,j,k))/dx[2] ; + + // on edges: u_1,2 and u_2,1 and curl w1 = u_2,1 - u_1,2 + Real u12_mm = (velx(i,j,k) - velx(i,j-1,k))/dx[1]; + Real u21_mm = (vely(i,j,k) - vely(i-1,j,k))/dx[0]; + Real w1_mm = u21_mm - u12_mm; + Real u12_mp = (velx(i,j+1,k) - velx(i,j,k))/dx[1]; + Real u21_mp = (vely(i,j+1,k) - vely(i-1,j+1,k))/dx[0]; + Real w1_mp = u21_mp - u12_mp; + Real u12_pm = (velx(i+1,j,k) - velx(i+1,j-1,k))/dx[1]; + Real u21_pm = (vely(i+1,j,k) - vely(i,j,k))/dx[0]; + Real w1_pm = u21_pm - u12_pm; + Real u12_pp = (velx(i+1,j+1,k) - velx(i+1,j,k))/dx[1]; + Real u21_pp = (vely(i+1,j+1,k) - vely(i,j+1,k))/dx[0]; + Real w1_pp = u21_pp - u12_pp; + out(i,j,k,0) = w1_mm; + out(i,j,k,3) = 0.5*(w1_mm+w1_mp+w1_pm+w1_pp); + + // on edges: u_1,3 and u_3,1 and curl w2 = u_1,3 - u_3,1 + Real u13_mm = (velx(i,j,k) - velx(i,j,k-1))/dx[2]; + Real u31_mm = (velz(i,j,k) - velz(i-1,j,k))/dx[0]; + Real w2_mm = u13_mm - u31_mm; + Real u13_mp = (velx(i,j,k+1) - velx(i,j,k))/dx[2]; + Real u31_mp = (velz(i,j,k+1) - velz(i-1,j,k+1))/dx[0]; + Real w2_mp = u13_mp - u31_mp; + Real u13_pm = (velx(i+1,j,k) - velx(i+1,j,k-1))/dx[2]; + Real u31_pm = (velz(i+1,j,k) - velz(i,j,k))/dx[0]; + Real w2_pm = u13_pm - u31_pm; + Real u13_pp = (velx(i+1,j,k+1) - velx(i+1,j,k))/dx[2]; + Real u31_pp = (velz(i+1,j,k+1) - velz(i,j,k+1))/dx[0]; + Real w2_pp = u13_pp - u31_pp; + out(i,j,k,1) = w2_mm; + out(i,j,k,4) = 0.5*(w2_mm+w2_mp+w2_pm+w2_pp); + + // on edges: u_2,3 and u_3,2 and curl w2 = u_3,2 - u_2,3 + Real u23_mm = (vely(i,j,k) - vely(i,j,k-1))/dx[2]; + Real u32_mm = (velz(i,j,k) - velz(i,j-1,k))/dx[1]; + Real w3_mm = u32_mm - u23_mm; + Real u23_mp = (vely(i,j,k+1) - vely(i,j,k))/dx[2]; + Real u32_mp = (velz(i,j,k+1) - velz(i,j-1,k+1))/dx[1]; + Real w3_mp = u32_mp - u23_mp; + Real u23_pm = (vely(i,j+1,k) - vely(i,j+1,k-1))/dx[2]; + Real u32_pm = (velz(i,j+1,k) - velz(i,j,k))/dx[1]; + Real w3_pm = u32_pm - u23_pm; + Real u23_pp = (vely(i,j+1,k+1) - vely(i,j+1,k))/dx[2]; + Real u32_pp = (velz(i,j+1,k+1) - velz(i,j,k+1))/dx[1]; + Real w3_pp = u32_pp - u23_pp; + out(i,j,k,2) = w3_mm; + out(i,j,k,5) = 0.5*(w3_mm+w3_mp+w3_pm+w3_pp); + + // vorticity magnitude: sqrt(w1*w1 + w2*w2 + w3*w3) + out(i,j,k,6) = sqrt(w1_mm*w1_mm + w2_mm*w2_mm + w3_mm*w3_mm); + out(i,j,k,7) = sqrt(out(i,j,k,4)*out(i,j,k,4) + out(i,j,k,5)*out(i,j,k,5) + + out(i,j,k,6)*out(i,j,k,6)); + out(i,j,k,8) = std::sqrt(0.25*(w1_mm*w1_mm + w1_mp*w1_mp + w1_pm*w1_pm + w1_pp*w1_pp + + w2_mm*w2_mm + w2_mp*w2_mp + w2_pm*w2_pm + w2_pp*w2_pp + + w3_mm*w3_mm + w3_mp*w3_mp + w3_pm*w3_pm + w3_pp*w3_pp)); + }); + } + + // Write on a plotfile + std::string plotfilename = amrex::Concatenate("vort_div",step,9); + amrex::Vector varNames(10); + varNames[0] = "w1_shift"; + varNames[1] = "w2_shift"; + varNames[2] = "w3_shift"; + varNames[3] = "w1_avg"; + varNames[4] = "w2_avg"; + varNames[5] = "w3_avg"; + varNames[6] = "vort_mag_shft"; + varNames[7] = "vort_mag_shft_avg"; + varNames[8] = "vort_mag_avg"; + varNames[9] = "div"; + WriteSingleLevelPlotfile(plotfilename,output,varNames,geom,time,step); +} +#endif + + diff --git a/src_compressible_stag/compressible_functions_stag.H b/src_compressible_stag/compressible_functions_stag.H index 10143477..f0b628ae 100644 --- a/src_compressible_stag/compressible_functions_stag.H +++ b/src_compressible_stag/compressible_functions_stag.H @@ -54,6 +54,11 @@ void EvaluateWritePlotFileVelGrad(int step, const amrex::Geometry& geom, const std::array& vel, const amrex::MultiFab& vel_decomp); +void EvaluateWritePlotFileVelGradTiny(int step, + const amrex::Real time, + const amrex::Geometry& geom, + const std::array& vel, + const amrex::MultiFab& vel_decomp); #endif void conservedToPrimitiveStag(MultiFab& prim_in, std::array& velStag_in, @@ -434,6 +439,7 @@ void GetTurbQty(std::array< MultiFab, AMREX_SPACEDIM >& vel, std::array< MultiFab, AMREX_SPACEDIM >& cumom, MultiFab& prim, MultiFab& eta, + MultiFab& zeta, const amrex::Geometry& geom, Real& turbKE, Real& c_speed, Real& u_rms, diff --git a/src_compressible_stag/main_driver.cpp b/src_compressible_stag/main_driver.cpp index da87869e..a21e222f 100644 --- a/src_compressible_stag/main_driver.cpp +++ b/src_compressible_stag/main_driver.cpp @@ -1,3 +1,4 @@ +#include "TurbSpectra.H" #include "common_functions.H" #include "compressible_functions.H" #include "compressible_functions_stag.H" @@ -265,10 +266,16 @@ void main_driver(const char* argv) #if defined(TURB) // data structure for turbulence diagnostics + MultiFab MFTurbScalar; + MultiFab MFTurbVel; + MultiFab vel_decomp; std::string turbfilename = "turbstats"; std::ofstream turboutfile; std::string turbfilenamedecomp = "turbstatsdecomp"; std::ofstream turboutfiledecomp; + // need to use dVol for scaling + Real dVol = (AMREX_SPACEDIM==2) ? dx[0]*dx[1]*cell_depth : dx[0]*dx[1]*dx[2]; + Real dVolinv = 1.0/dVol; #endif ///////////////////////////////////////////// @@ -439,40 +446,6 @@ void main_driver(const char* argv) var_scaling_cons[d] = 1./(dx[0]*dx[1]*dx[2]); } -#if defined(TURB) - ////////////////////////////////////////////////////////////// - // structure factor variables names and scaling for turbulence - // variables are velocities, density, pressure and temperature - ////////////////////////////////////////////////////////////// - // need to use dVol for scaling - Real dVol = (AMREX_SPACEDIM==2) ? dx[0]*dx[1]*cell_depth : dx[0]*dx[1]*dx[2]; - Real dVolinv = 1.0/dVol; - - MultiFab structFactMFTurbVel; - MultiFab structFactMFTurbScalar; - MultiFab vel_decomp; - - Vector< std::string > var_names_turbVelTotal{"ux","uy","uz"}; - Vector var_scaling_turbVelTotal(3, dVolinv); - amrex::Vector< int > s_pairA_turbVelTotal(3); - amrex::Vector< int > s_pairB_turbVelTotal(3); - for (int d=0; d<3; ++d) { - s_pairA_turbVelTotal[d] = d; - s_pairB_turbVelTotal[d] = d; - } - - Vector var_scaling_turbVelDecomp(6, dVolinv); - - Vector< std::string > var_names_turbScalar{"rho","tenp","press"}; - Vector var_scaling_turbScalar(3, dVolinv); - amrex::Vector< int > s_pairA_turbScalar(3); - amrex::Vector< int > s_pairB_turbScalar(3); - for (int d=0; d<3; ++d) { - s_pairA_turbScalar[d] = d; - s_pairB_turbScalar[d] = d; - } -#endif - // object for turbulence forcing TurbForcingComp turbforce; @@ -736,6 +709,7 @@ void main_driver(const char* argv) #if defined(TURB) if (turbForcing > 0) { EvaluateWritePlotFileVelGrad(0, 0.0, geom, vel, vel_decomp); + EvaluateWritePlotFileVelGradTiny(0, 0.0, geom, vel, vel_decomp); } #endif @@ -769,6 +743,15 @@ void main_driver(const char* argv) } // end t=0 setup + +#if defined(TURB) + if (turbForcing >= 1) { + MFTurbVel.define(ba, dmap, 3, 0); + MFTurbScalar.define(ba, dmap, 3, 0); + vel_decomp.define(ba, dmap, 6, 0); + vel_decomp.setVal(0.0); + } +#endif /////////////////////////////////////////// // Setup Structure factor @@ -909,26 +892,6 @@ void main_driver(const char* argv) } } - -#if defined(TURB) - if (turbForcing >= 1) { - - structFactMFTurbVel.define(ba, dmap, 3, 0); - structFactMFTurbScalar.define(ba, dmap, 6, 0); - vel_decomp.define(ba, dmap, 6, 0); - vel_decomp.setVal(0.0); - - turbStructFactVelTotal.define(ba,dmap, - var_names_turbVelTotal,var_scaling_turbVelTotal, - s_pairA_turbVelTotal,s_pairB_turbVelTotal); - turbStructFactScalar.define(ba,dmap, - var_names_turbScalar,var_scaling_turbScalar, - s_pairA_turbScalar,s_pairB_turbScalar); - turbStructFactVelDecomp.defineDecomp(ba,dmap, - var_names_turbVelTotal,var_scaling_turbVelDecomp, - s_pairA_turbVelTotal,s_pairB_turbVelTotal); - } -#endif if (n_ads_spec>0) { @@ -1089,7 +1052,7 @@ void main_driver(const char* argv) #if defined(TURB) // Initialize Turbulence Forcing Object - if (turbForcing > 1) { + if ((turbForcing > 1) and (turbRestartRun)) { turbforce.Initialize(geom); } #endif @@ -1136,8 +1099,13 @@ void main_driver(const char* argv) } // FHD - RK3stepStag(cu, cumom, prim, vel, source, eta, zeta, kappa, chi, D, - faceflux, edgeflux_x, edgeflux_y, edgeflux_z, cenflux, ranchem, geom, dt, step, turbforce); + if (turbRestartRun) { + RK3stepStag(cu, cumom, prim, vel, source, eta, zeta, kappa, chi, D, + faceflux, edgeflux_x, edgeflux_y, edgeflux_z, cenflux, ranchem, geom, dt, step, turbforce); + } + else { + calculateTransportCoeffs(prim, eta, zeta, kappa, chi, D); + } if (n_ads_spec>0 && splitting_MFsurfchem == 1) sample_MFsurfchem(cu, prim, surfcov, dNadsdes, geom, dt/2.0); @@ -1286,6 +1254,9 @@ void main_driver(const char* argv) writePlt = ((step+1)%plot_int == 0); } } +#if defined(TURB) + if ((turbRestartRun == 0) and (turbForcing >= 1)) writePlt = true; +#endif if (writePlt) { //yzAverage(cuMeans, cuVars, primMeans, primVars, spatialCross, @@ -1293,12 +1264,6 @@ void main_driver(const char* argv) WritePlotFileStag(step, time, geom, cu, cuMeans, cuVars, cumom, cumomMeans, cumomVars, prim, primMeans, primVars, vel, velMeans, velVars, coVars, surfcov, surfcovMeans, surfcovVars, surfcovcoVars, eta, kappa, zeta); -#if defined(TURB) - if (turbForcing > 0) { - EvaluateWritePlotFileVelGrad(step, time, geom, vel, vel_decomp); - } -#endif - if (plot_cross) { if (do_1D) { WriteSpatialCross1D(spatialCross1D, step, geom, ncross); @@ -1324,27 +1289,24 @@ void main_driver(const char* argv) // copy velocities into structFactMFTurb for(int d=0; d var_names_turbVel{"vel_total","vel_solenoidal","vel_dilation"}; + Real scaling_turb_veldecomp = dVolinv; + TurbSpectrumVelDecomp(MFTurbVel, vel_decomp, geom, step, scaling_turb_veldecomp, var_names_turbVel); - // scalars - turbStructFactScalar.FortStructure(structFactMFTurbScalar,geom,1); - turbStructFactScalar.CallFinalize(geom); - turbStructFactScalar.IntegratekShellsScalar(step,geom,var_names_turbScalar); + // scalars + Vector< std::string > var_names_turbScalar{"rho","temp","press"}; + Vector scaling_turb_scalar(3, dVolinv); + TurbSpectrumScalar(MFTurbScalar, geom, step, scaling_turb_scalar, var_names_turbScalar); + + EvaluateWritePlotFileVelGrad(step, time, geom, vel, vel_decomp); + EvaluateWritePlotFileVelGradTiny(step, time, geom, vel, vel_decomp); } #endif } @@ -1352,7 +1314,8 @@ void main_driver(const char* argv) #if defined(TURB) // turbulence outputs - if ((turbForcing >= 1) and (step%1000 == 0)) { + if (((turbForcing >= 1) and (step%1000 == 0)) or + ((turbForcing >= 1) and (turbRestartRun == 0))) { Real turbKE, c_speed, u_rms, taylor_len, taylor_Re, taylor_Ma, skew, kurt, eps_s, eps_d, eps_ratio, kolm_s, kolm_d, kolm_t; @@ -1360,7 +1323,7 @@ void main_driver(const char* argv) vel[i].FillBoundary(geom.periodicity()); cumom[i].FillBoundary(geom.periodicity()); } - GetTurbQty(vel, cumom, prim, eta, geom, + GetTurbQty(vel, cumom, prim, eta, zeta, geom, turbKE, c_speed, u_rms, taylor_len, taylor_Re, taylor_Ma, skew, kurt, @@ -1386,7 +1349,9 @@ void main_driver(const char* argv) turboutfile << std::endl; } - if ((turbForcing >= 1) and (writePlt)) { + if (((turbForcing >= 1) and (writePlt)) or + ((turbForcing >= 1) and (turbRestartRun == 0))) { + Real turbKE_s, turbKE_d, delta_turbKE; Real u_rms_s, u_rms_d, delta_u_rms; Real taylor_Ma_d;