From bcc67e19b44b6a7b11d2302c9487fd19ec722662 Mon Sep 17 00:00:00 2001 From: Luca Fedeli Date: Thu, 1 Feb 2024 19:07:28 +0100 Subject: [PATCH 01/13] Docs: update instructions for Adastra supercomputer (CINES, France) (#4655) * update instructions for Adastra supercomputer * remove empty line * fix bug * fix bug * fix bug --- Docs/source/install/hpc/adastra.rst | 30 ++++++---- .../adastra_warpx.profile.example | 28 ++++----- .../adastra-cines/install_dependencies.sh | 60 +++++++++---------- Tools/machines/adastra-cines/submit.sh | 17 ++++-- 4 files changed, 74 insertions(+), 61 deletions(-) diff --git a/Docs/source/install/hpc/adastra.rst b/Docs/source/install/hpc/adastra.rst index 44b07985670..0b984d5e2be 100644 --- a/Docs/source/install/hpc/adastra.rst +++ b/Docs/source/install/hpc/adastra.rst @@ -31,18 +31,26 @@ If you are new to this system, **please see the following resources**: Preparation ----------- +The following instructions will install WarpX in the ``$SHAREDHOMEDIR`` directory, +which is shared among all the members of a given project. Due to the inode +quota enforced for this machine, a shared installation of WarpX is advised. + Use the following commands to download the WarpX source code: .. code-block:: bash - git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx + # If you have multiple projects, activate the project that you want to use with: + # + # myproject -a YOUR_PROJECT_NAME + # + git clone https://github.com/ECP-WarpX/WarpX.git $SHAREDHOMEDIR/src/warpx -We use system software modules, add environment hints and further dependencies via the file ``$HOME/adastra_warpx.profile``. +We use system software modules, add environment hints and further dependencies via the file ``$SHAREDHOMEDIR/adastra_warpx.profile``. Create it now: .. code-block:: bash - cp $HOME/src/warpx/Tools/machines/adastra-cines/adastra_warpx.profile.example $HOME/adastra_warpx.profile + cp $SHAREDHOMEDIR/src/warpx/Tools/machines/adastra-cines/adastra_warpx.profile.example $SHAREDHOMEDIR/adastra_warpx.profile .. dropdown:: Script Details :color: light @@ -53,8 +61,8 @@ Create it now: :language: bash Edit the 2nd line of this script, which sets the ``export proj=""`` variable using a text editor -such as ``nano``, ``emacs``, or ``vim`` (all available by default on -Adastra login nodes). +such as ``nano``, ``emacs``, or ``vim`` (all available by default on Adastra login nodes) and +uncomment the 3rd line (which sets ``$proj`` as the active project). .. important:: @@ -62,14 +70,14 @@ Adastra login nodes). .. code-block:: bash - source $HOME/adastra_warpx.profile + source $SHAREDHOMEDIR/adastra_warpx.profile Finally, since Adastra does not yet provide software modules for some of our dependencies, install them once: .. code-block:: bash - bash $HOME/src/warpx/Tools/machines/adastra-cines/install_dependencies.sh - source $HOME/sw/adastra/gpu/venvs/warpx-adastra/bin/activate + bash $SHAREDHOMEDIR/src/warpx/Tools/machines/adastra-cines/install_dependencies.sh + source $SHAREDHOMEDIR/sw/adastra/gpu/venvs/warpx-adastra/bin/activate .. dropdown:: Script Details :color: light @@ -89,13 +97,13 @@ Use the following :ref:`cmake commands ` to compile the applicat .. code-block:: bash - cd $HOME/src/warpx + cd $SHAREDHOMEDIR/src/warpx rm -rf build_adastra cmake -S . -B build_adastra -DWarpX_COMPUTE=HIP -DWarpX_PSATD=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_DIMS="1;2;RZ;3" cmake --build build_adastra -j 16 -The WarpX application executables are now in ``$HOME/src/warpx/build_adastra/bin/``. +The WarpX application executables are now in ``$SHAREDHOMEDIR/src/warpx/build_adastra/bin/``. Additionally, the following commands will install WarpX as a Python module: .. code-block:: bash @@ -119,7 +127,7 @@ If you already installed WarpX in the past and want to update it, start by getti .. code-block:: bash - cd $HOME/src/warpx + cd $SHAREDHOMEDIR/src/warpx # read the output of this command - does it look ok? git status diff --git a/Tools/machines/adastra-cines/adastra_warpx.profile.example b/Tools/machines/adastra-cines/adastra_warpx.profile.example index 23441638893..0d55e869d6a 100644 --- a/Tools/machines/adastra-cines/adastra_warpx.profile.example +++ b/Tools/machines/adastra-cines/adastra_warpx.profile.example @@ -1,30 +1,33 @@ -# please set your project account +# please set your project account and uncomment the following two lines #export proj=your_project_id +#myproject -a $proj # required dependencies +module purge +module load cpe/23.12 module load craype-accel-amd-gfx90a craype-x86-trento module load PrgEnv-cray +module load CCE-GPU-3.0.0 module load amd-mixed/5.2.3 -module load CPE-23.02-cce-15.0.1-GPU-softs # optional: for PSATD in RZ geometry support -export CMAKE_PREFIX_PATH=${HOME}/sw/adastra/gpu/blaspp-master:$CMAKE_PREFIX_PATH -export CMAKE_PREFIX_PATH=${HOME}/sw/adastra/gpu/lapackpp-master:$CMAKE_PREFIX_PATH -export LD_LIBRARY_PATH=${HOME}/sw/adastra/gpu/blaspp-master/lib64:$LD_LIBRARY_PATH -export LD_LIBRARY_PATH=${HOME}/sw/adastra/gpu/lapackpp-master/lib64:$LD_LIBRARY_PATH +export CMAKE_PREFIX_PATH=${SHAREDHOMEDIR}/sw/adastra/gpu/blaspp-master:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${SHAREDHOMEDIR}/sw/adastra/gpu/lapackpp-master:$CMAKE_PREFIX_PATH +export LD_LIBRARY_PATH=${SHAREDHOMEDIR}/sw/adastra/gpu/blaspp-master/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=${SHAREDHOMEDIR}/sw/adastra/gpu/lapackpp-master/lib64:$LD_LIBRARY_PATH # optional: for QED lookup table generation support -module load boost/1.81.0-mpi-python3 +module load boost/1.83.0-mpi-python3 # optional: for openPMD support module load cray-hdf5-parallel -export CMAKE_PREFIX_PATH=${HOME}/sw/adastra/gpu/c-blosc-1.21.1:$CMAKE_PREFIX_PATH -export CMAKE_PREFIX_PATH=${HOME}/sw/adastra/gpu/adios2-2.8.3:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${SHAREDHOMEDIR}/sw/adastra/gpu/c-blosc-1.21.1:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${SHAREDHOMEDIR}/sw/adastra/gpu/adios2-2.8.3:$CMAKE_PREFIX_PATH export PATH=${HOME}/sw/adastra/gpu/adios2-2.8.3/bin:${PATH} # optional: for Python bindings or libEnsemble -module load cray-python/3.9.13.1 +module load cray-python/3.11.5 # fix system defaults: do not escape $ with a \ on tab completion shopt -s direxpand @@ -49,7 +52,4 @@ export AMREX_AMD_ARCH=gfx90a # compiler environment hints export CC=$(which cc) export CXX=$(which CC) -export FC=$(which ftn) -export CFLAGS="-I${ROCM_PATH}/include" -export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" -export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64" +export FC=$(which amdflang) diff --git a/Tools/machines/adastra-cines/install_dependencies.sh b/Tools/machines/adastra-cines/install_dependencies.sh index 8a4cef4a2ec..b48bf144c2a 100755 --- a/Tools/machines/adastra-cines/install_dependencies.sh +++ b/Tools/machines/adastra-cines/install_dependencies.sh @@ -20,7 +20,7 @@ if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in yo # Remove old dependencies ##################################################### # -SW_DIR="${HOME}/sw/adastra/gpu" +SW_DIR="${SHAREDHOMEDIR}/sw/adastra/gpu" rm -rf ${SW_DIR} mkdir -p ${SW_DIR} @@ -34,62 +34,62 @@ python3 -m pip uninstall -qqq -y mpi4py 2>/dev/null || true # # BLAS++ (for PSATD+RZ) -if [ -d $HOME/src/blaspp ] +if [ -d $SHAREDHOMEDIR/src/blaspp ] then - cd $HOME/src/blaspp + cd $SHAREDHOMEDIR/src/blaspp git fetch --prune git checkout master git pull cd - else - git clone https://github.com/icl-utk-edu/blaspp.git $HOME/src/blaspp + git clone https://github.com/icl-utk-edu/blaspp.git $SHAREDHOMEDIR/src/blaspp fi -rm -rf $HOME/src/blaspp-adastra-gpu-build -CXX=$(which CC) cmake -S $HOME/src/blaspp -B $HOME/src/blaspp-adastra-gpu-build -Duse_openmp=OFF -Dgpu_backend=hip -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=${SW_DIR}/blaspp-master -cmake --build $HOME/src/blaspp-adastra-gpu-build --target install --parallel 16 -rm -rf $HOME/src/blaspp-adastra-gpu-build +rm -rf $SHAREDHOMEDIR/src/blaspp-adastra-gpu-build +CXX=$(which CC) cmake -S $SHAREDHOMEDIR/src/blaspp -B $SHAREDHOMEDIR/src/blaspp-adastra-gpu-build -Duse_openmp=OFF -Dgpu_backend=hip -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=${SW_DIR}/blaspp-master +cmake --build $SHAREDHOMEDIR/src/blaspp-adastra-gpu-build --target install --parallel 16 +rm -rf $SHAREDHOMEDIR/src/blaspp-adastra-gpu-build # LAPACK++ (for PSATD+RZ) -if [ -d $HOME/src/lapackpp ] +if [ -d $SHAREDHOMEDIR/src/lapackpp ] then - cd $HOME/src/lapackpp + cd $SHAREDHOMEDIR/src/lapackpp git fetch --prune git checkout master git pull cd - else - git clone https://github.com/icl-utk-edu/lapackpp.git $HOME/src/lapackpp + git clone https://github.com/icl-utk-edu/lapackpp.git $SHAREDHOMEDIR/src/lapackpp fi -rm -rf $HOME/src/lapackpp-adastra-gpu-build -CXX=$(which CC) CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S $HOME/src/lapackpp -B $HOME/src/lapackpp-adastra-gpu-build -DCMAKE_CXX_STANDARD=17 -Dbuild_tests=OFF -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/lapackpp-master -cmake --build $HOME/src/lapackpp-adastra-gpu-build --target install --parallel 16 -rm -rf $HOME/src/lapackpp-adastra-gpu-build +rm -rf $SHAREDHOMEDIR/src/lapackpp-adastra-gpu-build +CXX=$(which CC) CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S $SHAREDHOMEDIR/src/lapackpp -B $SHAREDHOMEDIR/src/lapackpp-adastra-gpu-build -DCMAKE_CXX_STANDARD=17 -Dbuild_tests=OFF -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/lapackpp-master +cmake --build $SHAREDHOMEDIR/src/lapackpp-adastra-gpu-build --target install --parallel 16 +rm -rf $SHAREDHOMEDIR/src/lapackpp-adastra-gpu-build # c-blosc (I/O compression, for OpenPMD) -if [ -d $HOME/src/c-blosc ] +if [ -d $SHAREDHOMEDIR/src/c-blosc ] then # git repository is already there : else - git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git $HOME/src/c-blosc + git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git $SHAREDHOMEDIR/src/c-blosc fi -rm -rf $HOME/src/c-blosc-ad-build -cmake -S $HOME/src/c-blosc -B $HOME/src/c-blosc-ad-build -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDEACTIVATE_AVX2=OFF -DCMAKE_INSTALL_PREFIX=${HOME}/sw/adastra/gpu/c-blosc-1.21.1 -cmake --build $HOME/src/c-blosc-ad-build --target install --parallel 16 -rm -rf $HOME/src/c-blosc-ad-build +rm -rf $SHAREDHOMEDIR/src/c-blosc-ad-build +cmake -S $SHAREDHOMEDIR/src/c-blosc -B $SHAREDHOMEDIR/src/c-blosc-ad-build -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDEACTIVATE_AVX2=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/c-blosc-1.21.1 +cmake --build $SHAREDHOMEDIR/src/c-blosc-ad-build --target install --parallel 16 +rm -rf $SHAREDHOMEDIR/src/c-blosc-ad-build # ADIOS2 v. 2.8.3 (for OpenPMD) -if [ -d $HOME/src/adios2 ] +if [ -d $SHAREDHOMEDIR/src/adios2 ] then # git repository is already there : else - git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git $HOME/src/adios2 + git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git $SHAREDHOMEDIR/src/adios2 fi -rm -rf $HOME/src/adios2-ad-build -cmake -S $HOME/src/adios2 -B $HOME/src/adios2-ad-build -DADIOS2_USE_Blosc=ON -DADIOS2_USE_Fortran=OFF -DADIOS2_USE_Python=OFF -DADIOS2_USE_ZeroMQ=OFF -DCMAKE_INSTALL_PREFIX=${HOME}/sw/adastra/gpu/adios2-2.8.3 -cmake --build $HOME/src/adios2-ad-build --target install -j 16 -rm -rf $HOME/src/adios2-ad-build +rm -rf $SHAREDHOMEDIR/src/adios2-ad-build +cmake -S $SHAREDHOMEDIR/src/adios2 -B $SHAREDHOMEDIR/src/adios2-ad-build -DADIOS2_USE_Blosc=ON -DADIOS2_USE_Fortran=OFF -DADIOS2_USE_Python=OFF -DADIOS2_USE_ZeroMQ=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/adios2-2.8.3 +cmake --build $SHAREDHOMEDIR/src/adios2-ad-build --target install -j 16 +rm -rf $SHAREDHOMEDIR/src/adios2-ad-build # Python ###################################################################### @@ -114,9 +114,9 @@ python3 -m pip install --upgrade openpmd-api python3 -m pip install --upgrade matplotlib python3 -m pip install --upgrade yt # install or update WarpX dependencies such as picmistandard -python3 -m pip install --upgrade -r $HOME/src/warpx/requirements.txt +python3 -m pip install --upgrade -r $SHAREDHOMEDIR/src/warpx/requirements.txt # optional: for libEnsemble -python3 -m pip install -r $HOME/src/warpx/Tools/LibEnsemble/requirements.txt +python3 -m pip install -r $SHAREDHOMEDIR/src/warpx/Tools/LibEnsemble/requirements.txt # optional: for optimas (based on libEnsemble & ax->botorch->gpytorch->pytorch) #python3 -m pip install --upgrade torch --index-url https://download.pytorch.org/whl/rocm5.4.2 -#python3 -m pip install -r $HOME/src/warpx/Tools/optimas/requirements.txt +#python3 -m pip install -r $SHAREDHOMEDIR/src/warpx/Tools/optimas/requirements.txt diff --git a/Tools/machines/adastra-cines/submit.sh b/Tools/machines/adastra-cines/submit.sh index 0cb75e86e69..15a2b292b58 100644 --- a/Tools/machines/adastra-cines/submit.sh +++ b/Tools/machines/adastra-cines/submit.sh @@ -1,22 +1,26 @@ #!/bin/bash -#SBATCH --job-name=warpx #SBATCH --account= +#SBATCH --job-name=warpx #SBATCH --constraint=MI250 -#SBATCH --ntasks-per-node=8 --cpus-per-task=8 --gpus-per-node=8 -#SBATCH --threads-per-core=1 # --hint=nomultithread +#SBATCH --nodes=2 #SBATCH --exclusive #SBATCH --output=%x-%j.out #SBATCH --time=00:10:00 -#SBATCH --nodes=2 module purge -# Architecture +# A CrayPE environment version +module load cpe/23.12 +# An architecture module load craype-accel-amd-gfx90a craype-x86-trento # A compiler to target the architecture module load PrgEnv-cray # Some architecture related libraries and tools -module load amd-mixed +module load CCE-GPU-3.0.0 +module load amd-mixed/5.2.3 + +date +module list export MPICH_GPU_SUPPORT_ENABLED=1 @@ -36,4 +40,5 @@ export OMP_NUM_THREADS=1 export WARPX_NMPI_PER_NODE=8 export TOTAL_NMPI=$(( ${SLURM_JOB_NUM_NODES} * ${WARPX_NMPI_PER_NODE} )) srun -N${SLURM_JOB_NUM_NODES} -n${TOTAL_NMPI} --ntasks-per-node=${WARPX_NMPI_PER_NODE} \ + --cpus-per-task=8 --threads-per-core=1 --gpu-bind=closest \ ./warpx inputs > output.txt From d8df8f60f0dc584202165d62ac37d0947cb8eb3d Mon Sep 17 00:00:00 2001 From: David Grote Date: Thu, 1 Feb 2024 11:37:59 -0800 Subject: [PATCH 02/13] Fix doc for doChargeConservingDepositionShapeNImplicit (#4658) --- Source/Particles/Deposition/CurrentDeposition.H | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index efe6efcc788..5d1055278b2 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -1142,9 +1142,11 @@ void doEsirkepovDepositionShapeN (const GetParticlePosition& GetPosition, * particles positions are determined and in how the particle gamma is calculated. * * \tparam depos_order deposition order + * \param xp_n,yp_n,zp_n Pointer to arrays of particle position at time level n. * \param GetPosition A functor for returning the particle position. * \param wp Pointer to array of particle weights. - * \param uxp,uyp,uzp Pointer to arrays of particle momentum. + * \param uxp_n,uyp_n,uzp_n Pointer to arrays of particle momentum at time level n. + * \param uxp_nph,uyp_nph,uzp_nph Pointer to arrays of particle momentum at time level n + 1/2. * \param ion_lev Pointer to array of particle ionization level. This is required to have the charge of each macroparticle since q is a scalar. For non-ionizable species, From 282ae836de82ce38e31a79eda06564aaa92beb81 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Thu, 1 Feb 2024 21:44:20 +0100 Subject: [PATCH 03/13] Add WarpX_CCACHE Option (#4637) WarpX autodetects `ccache` and uses it, and there's nothing you can do about it. This PR disables it by default, and lets developers enable it through `-DWarpX_CCACHE:BOOL=ON`. The reason for this is mostly related to compiler wrappers, of which there are many... If `g++` is a compiler wrapper, then `ccache g++` cannot see the effective flags passed to underlying real `g++`. That leads eventually to false positive cache hits, which is a pain to debug. For compiler wrappers you want `g++` the wrapper to invoke the call to `ccache ` to fix that, which is for example how Spack handle it. Further, if you use spack with ccache enabled, the defaults of WarpX cause `ccache` to be invoked twice (inner & outer), doubling the cache requirements. Finally, compiler wrapper that handle ccache themselves may set further ccache options / flags that WarpX does not set, such as disabling hashing of the build dir -- w/o that option cache may be useless. * Update Order and Docs --- CMakeLists.txt | 11 ++++++++++- Docs/source/install/cmake.rst | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a0b28c9f86..76a5ecdd3f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,7 +43,15 @@ set_cxx17_superbuild() # this is an optional tool that stores compiled object files; allows fast # re-builds even with "make clean" in between. Mainly used to store AMReX # objects -set_ccache() +if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) + set(WarpX_CCACHE_DEFAULT ON) +else() + set(WarpX_CCACHE_DEFAULT OFF) # we are a subproject in a superbuild +endif() +option(WarpX_CCACHE "Enable ccache for faster rebuilds" ${WarpX_CCACHE_DEFAULT}) +if(WarpX_CCACHE) + set_ccache() +endif() # Output Directories ########################################################## @@ -149,6 +157,7 @@ endif() # this defined the variable BUILD_TESTING which is ON by default #include(CTest) + # Dependencies ################################################################ # diff --git a/Docs/source/install/cmake.rst b/Docs/source/install/cmake.rst index d1dc25cf095..0882efd7fe2 100644 --- a/Docs/source/install/cmake.rst +++ b/Docs/source/install/cmake.rst @@ -116,7 +116,7 @@ By default, the most important dependencies of WarpX are automatically downloade CMake Option Default & Values Description ============================= ============================================== =========================================================== ``BUILD_SHARED_LIBS`` ON/**OFF** `Build shared libraries for dependencies `__ -``CCACHE_PROGRAM`` First found ``ccache`` executable. Set to ``-DCCACHE_PROGRAM=NO`` to disable CCache. +``WarpX_CCACHE`` **ON**/OFF Search and use CCache to speed up rebuilds. ``AMReX_CUDA_PTX_VERBOSE`` ON/**OFF** Print CUDA code generation statistics from ``ptxas``. ``WarpX_amrex_src`` *None* Path to AMReX source directory (preferred if set) ``WarpX_amrex_repo`` ``https://github.com/AMReX-Codes/amrex.git`` Repository URI to pull and build AMReX from From 7a7c704ec0d34c46634452a49c07dcd34c1e3c13 Mon Sep 17 00:00:00 2001 From: Luca Fedeli Date: Fri, 2 Feb 2024 00:17:36 +0100 Subject: [PATCH 04/13] Update profile and job script for LUMI supercomputer (#4634) * update LUMI profile and LUMI job script * add advice to run on dev-g * update job script and profile --- .../lumi-csc/lumi_warpx.profile.example | 20 +++++++++------- Tools/machines/lumi-csc/submit.sh | 24 +++++++++++++++---- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/Tools/machines/lumi-csc/lumi_warpx.profile.example b/Tools/machines/lumi-csc/lumi_warpx.profile.example index 74b8aa8df17..2cb44035ce4 100644 --- a/Tools/machines/lumi-csc/lumi_warpx.profile.example +++ b/Tools/machines/lumi-csc/lumi_warpx.profile.example @@ -2,9 +2,9 @@ #export proj= # required dependencies -module load LUMI/23.03 partition/G +module load LUMI/23.09 partition/G module load rocm/5.2.3 # waiting for 5.5 for next bump -module load buildtools/23.03 +module load buildtools/23.09 # optional: just an additional text editor module load nano @@ -16,16 +16,16 @@ export LD_LIBRARY_PATH=${HOME}/sw/lumi/gpu/blaspp-master/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${HOME}/sw/lumi/gpu/lapackpp-master/lib64:$LD_LIBRARY_PATH # optional: for QED lookup table generation support -module load Boost/1.81.0-cpeCray-23.03 +module load Boost/1.82.0-cpeCray-23.09 # optional: for openPMD support -module load cray-hdf5/1.12.2.3 +module load cray-hdf5/1.12.2.7 export CMAKE_PREFIX_PATH=${HOME}/sw/lumi/gpu/c-blosc-1.21.1:$CMAKE_PREFIX_PATH export CMAKE_PREFIX_PATH=${HOME}/sw/lumi/gpu/adios2-2.8.3:$CMAKE_PREFIX_PATH export PATH=${HOME}/sw/lumi/gpu/adios2-2.8.3/bin:${PATH} # optional: for Python bindings or libEnsemble -module load cray-python/3.9.13.1 +module load cray-python/3.10.10 # an alias to request an interactive batch node for one hour # for paralle execution, start on the batch node: srun @@ -41,9 +41,13 @@ export MPICH_GPU_SUPPORT_ENABLED=1 export AMREX_AMD_ARCH=gfx90a # compiler environment hints -export CC=$(which cc) -export CXX=$(which CC) -export FC=$(which ftn) +# Warning: using the compiler wrappers cc and CC +# instead of amdclang and amdclang++ +# currently results in a significant +# loss of performances +export CC=$(which amdclang) +export CXX=$(which amdclang++) +export FC=$(which amdflang) export CFLAGS="-I${ROCM_PATH}/include" export CXXFLAGS="-I${ROCM_PATH}/include -Wno-pass-failed" export LDFLAGS="-L${ROCM_PATH}/lib -lamdhip64" diff --git a/Tools/machines/lumi-csc/submit.sh b/Tools/machines/lumi-csc/submit.sh index f6be702300a..d784471acd5 100644 --- a/Tools/machines/lumi-csc/submit.sh +++ b/Tools/machines/lumi-csc/submit.sh @@ -10,7 +10,7 @@ #SBATCH --gpus-per-node=8 #SBATCH --time=00:10:00 -export MPICH_GPU_SUPPORT_ENABLED=1 +date # note (12-12-22) # this environment setting is currently needed on LUMI to work-around a @@ -30,7 +30,11 @@ export FI_MR_CACHE_MONITOR=memhooks # alternative cache monitor # the home directory, which does not scale. export ROCFFT_RTC_CACHE_PATH=/dev/null -export OMP_NUM_THREADS=1 +# Seen since August 2023 +# OLCFDEV-1597: OFI Poll Failed UNDELIVERABLE Errors +# https://docs.olcf.ornl.gov/systems/frontier_user_guide.html#olcfdev-1597-ofi-poll-failed-undeliverable-errors +export MPICH_SMP_SINGLE_COPY_MODE=NONE +export FI_CXI_RX_MATCH_MODE=software # LUMI documentation suggests using the following wrapper script # to set the ROCR_VISIBLE_DEVICES to the value of SLURM_LOCALID @@ -47,9 +51,21 @@ chmod +x ./select_gpu sleep 1 # LUMI documentation suggests using the following CPU bind -# so that the node local rank and GPU ID match +# in order to have 6 threads per GPU (blosc compression in adios2 uses threads) # see https://docs.lumi-supercomputer.eu/runjobs/scheduled-jobs/lumig-job/ -CPU_BIND="map_cpu:48,56,16,24,1,8,32,40" +# +# WARNING: the following CPU_BIND options don't work on the dev-g partition. +# If you want to run your simulation on dev-g, please comment them +# out and replace them with CPU_BIND="map_cpu:49,57,17,25,1,9,33,41" +# +CPU_BIND="mask_cpu:7e000000000000,7e00000000000000" +CPU_BIND="${CPU_BIND},7e0000,7e000000" +CPU_BIND="${CPU_BIND},7e,7e00" +CPU_BIND="${CPU_BIND},7e00000000,7e0000000000" + +export OMP_NUM_THREADS=6 + +export MPICH_GPU_SUPPORT_ENABLED=1 srun --cpu-bind=${CPU_BIND} ./select_gpu ./warpx inputs | tee outputs.txt rm -rf ./select_gpu From 1093f618d9efb5d0971fa609ed24758364a17caa Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 1 Feb 2024 16:58:27 -0800 Subject: [PATCH 05/13] Fix BTD/Scrape Flush Count with Filters (#4657) * Fix BTD/Scrape Flush Count with Filters Move the counting of already flushed particles for writers that call the I/O backends multiple time per data set, e.g., BTD and boundary scraping, into the I/O backend. Currently, filtering is done as the first step in I/O backends and thus the previous count outside of the I/O backends was over-counting particles that might still get filtered out. Offset should be a `long`: Overflow risk is very high for pure `int`. Also, counter is `unsigned`, so `unsigned long` for now. * Simplify: Remove `m_totalParticles_flushed_already` Less state we can forget in checkpoint-restart and that we have to transfer across API boundaries. --- Source/Diagnostics/BTDiagnostics.H | 2 - Source/Diagnostics/BTDiagnostics.cpp | 31 ++++-------- .../BoundaryScrapingDiagnostics.cpp | 17 ++----- Source/Diagnostics/Diagnostics.H | 5 -- Source/Diagnostics/FlushFormats/FlushFormat.H | 3 +- .../FlushFormats/FlushFormatAscent.H | 3 +- .../FlushFormats/FlushFormatAscent.cpp | 2 +- .../FlushFormats/FlushFormatCheckpoint.H | 3 +- .../FlushFormats/FlushFormatCheckpoint.cpp | 2 +- .../FlushFormats/FlushFormatOpenPMD.H | 3 +- .../FlushFormats/FlushFormatOpenPMD.cpp | 4 +- .../FlushFormats/FlushFormatPlotfile.H | 3 +- .../FlushFormats/FlushFormatPlotfile.cpp | 6 +-- .../FlushFormats/FlushFormatSensei.H | 3 +- .../FlushFormats/FlushFormatSensei.cpp | 7 ++- Source/Diagnostics/FullDiagnostics.cpp | 5 +- Source/Diagnostics/OpenPMDHelpFunction.H | 18 +++++++ Source/Diagnostics/OpenPMDHelpFunction.cpp | 20 ++++++++ Source/Diagnostics/WarpXOpenPMD.H | 8 ++- Source/Diagnostics/WarpXOpenPMD.cpp | 50 ++++++++----------- 20 files changed, 94 insertions(+), 101 deletions(-) diff --git a/Source/Diagnostics/BTDiagnostics.H b/Source/Diagnostics/BTDiagnostics.H index ab894da69c2..f6c44c777ea 100644 --- a/Source/Diagnostics/BTDiagnostics.H +++ b/Source/Diagnostics/BTDiagnostics.H @@ -399,8 +399,6 @@ private: lab-frame data. */ void InitializeParticleFunctors () override; - /** Update total number of particles flushed for all species for ith snapshot */ - void UpdateTotalParticlesFlushed(int i_buffer); /** Reset total number of particles in the particle buffer to 0 for ith snapshot */ void ResetTotalParticlesInBuffer(int i_buffer); /** Clear particle data stored in the particle buffer */ diff --git a/Source/Diagnostics/BTDiagnostics.cpp b/Source/Diagnostics/BTDiagnostics.cpp index 0e517e8190c..f7965cd2688 100644 --- a/Source/Diagnostics/BTDiagnostics.cpp +++ b/Source/Diagnostics/BTDiagnostics.cpp @@ -129,7 +129,6 @@ void BTDiagnostics::DerivedInitData () } } m_particles_buffer.resize(m_num_buffers); - m_totalParticles_flushed_already.resize(m_num_buffers); m_totalParticles_in_buffer.resize(m_num_buffers); // check that simulation can fill all BTD snapshots @@ -1065,12 +1064,12 @@ BTDiagnostics::Flush (int i_buffer, bool force_flush) } } m_flush_format->WriteToFile( - m_varnames, m_mf_output[i_buffer], m_geom_output[i_buffer], warpx.getistep(), - labtime, m_output_species[i_buffer], nlev_output, file_name, m_file_min_digits, + m_varnames, m_mf_output.at(i_buffer), m_geom_output.at(i_buffer), warpx.getistep(), + labtime, + m_output_species.at(i_buffer), nlev_output, file_name, m_file_min_digits, m_plot_raw_fields, m_plot_raw_fields_guards, - use_pinned_pc, isBTD, i_buffer, m_buffer_flush_counter[i_buffer], - m_max_buffer_multifabs[i_buffer], m_geom_snapshot[i_buffer][0], isLastBTDFlush, - m_totalParticles_flushed_already[i_buffer]); + use_pinned_pc, isBTD, i_buffer, m_buffer_flush_counter.at(i_buffer), + m_max_buffer_multifabs.at(i_buffer), m_geom_snapshot.at(i_buffer).at(0), isLastBTDFlush); // Rescaling the box for plotfile after WriteToFile. This is because, for plotfiles, when writing particles, amrex checks if the particles are within the bounds defined by the box. However, in BTD, particles can be (at max) 1 cell outside the bounds of the geometry. So we keep a one-cell bigger box for plotfile when writing out the particle data and rescale after. if (m_format == "plotfile") { @@ -1104,7 +1103,6 @@ BTDiagnostics::Flush (int i_buffer, bool force_flush) NullifyFirstFlush(i_buffer); // if particles are selected for output then update and reset counters if (!m_output_species_names.empty()) { - UpdateTotalParticlesFlushed(i_buffer); ResetTotalParticlesInBuffer(i_buffer); ClearParticleBuffer(i_buffer); } @@ -1271,10 +1269,10 @@ void BTDiagnostics::MergeBuffersForPlotfile (int i_snapshot) InterleaveSpeciesHeader(recent_species_Header,snapshot_species_Header, m_output_species_names[i], m_buffer_flush_counter[i_snapshot]); if (BufferSpeciesHeader.m_total_particles == 0) { continue; } - if (m_totalParticles_flushed_already[i_snapshot][i]==0) { - WARPX_ALWAYS_ASSERT_WITH_MESSAGE( - std::rename(recent_ParticleHdrFilename.c_str(), snapshot_ParticleHdrFilename.c_str()) == 0, - std::string("Renaming ").append(recent_ParticleHdrFilename).append(" to ").append(snapshot_ParticleHdrFilename).append(" has failed")); + if (!amrex::FileExists(snapshot_ParticleHdrFilename)) { + WARPX_ALWAYS_ASSERT_WITH_MESSAGE( + std::rename(recent_ParticleHdrFilename.c_str(), snapshot_ParticleHdrFilename.c_str()) == 0, + std::string("Renaming ").append(recent_ParticleHdrFilename).append(" to ").append(snapshot_ParticleHdrFilename).append(" has failed")); } else { InterleaveParticleDataHeader(recent_ParticleHdrFilename, snapshot_ParticleHdrFilename); @@ -1435,10 +1433,8 @@ BTDiagnostics::InitializeParticleBuffer () const MultiParticleContainer& mpc = warpx.GetPartContainer(); for (int i = 0; i < m_num_buffers; ++i) { m_particles_buffer[i].resize(m_output_species_names.size()); - m_totalParticles_flushed_already[i].resize(m_output_species_names.size()); m_totalParticles_in_buffer[i].resize(m_output_species_names.size()); for (int isp = 0; isp < m_particles_buffer[i].size(); ++isp) { - m_totalParticles_flushed_already[i][isp] = 0; m_totalParticles_in_buffer[i][isp] = 0; m_particles_buffer[i][isp] = std::make_unique(WarpX::GetInstance().GetParGDB()); const int idx = mpc.getSpeciesID(m_output_species_names[isp]); @@ -1489,15 +1485,6 @@ BTDiagnostics::PrepareParticleDataForOutput() } } -void -BTDiagnostics::UpdateTotalParticlesFlushed(int i_buffer) -{ - for (int isp = 0; isp < m_totalParticles_flushed_already[i_buffer].size(); ++isp) { - m_totalParticles_flushed_already[i_buffer][isp] += static_cast( - m_particles_buffer[i_buffer][isp]->TotalNumberOfParticles()); - } -} - void BTDiagnostics::ResetTotalParticlesInBuffer(int i_buffer) { diff --git a/Source/Diagnostics/BoundaryScrapingDiagnostics.cpp b/Source/Diagnostics/BoundaryScrapingDiagnostics.cpp index c85dbd6b226..11ffce02f09 100644 --- a/Source/Diagnostics/BoundaryScrapingDiagnostics.cpp +++ b/Source/Diagnostics/BoundaryScrapingDiagnostics.cpp @@ -102,15 +102,6 @@ BoundaryScrapingDiagnostics::InitializeParticleBuffer () m_output_species[i_buffer].push_back(ParticleDiag(m_diag_name, species_name, pc, bnd_buffer)); } } - // Initialize total number of particles flushed - m_totalParticles_flushed_already.resize(m_num_buffers); - for (int i_buffer = 0; i_buffer < m_num_buffers; ++i_buffer) { - int const n_species = static_cast(m_output_species_names.size()); - m_totalParticles_flushed_already[i_buffer].resize(n_species); - for (int i_species=0; i_speciesWriteToFile( - m_varnames, m_mf_output[i_buffer], m_geom_output[i_buffer], warpx.getistep(), - warpx.gett_new(0), m_output_species[i_buffer], nlev_output, file_prefix, + m_varnames, m_mf_output.at(i_buffer), m_geom_output.at(i_buffer), warpx.getistep(), + warpx.gett_new(0), + m_output_species.at(i_buffer), + nlev_output, file_prefix, m_file_min_digits, false, false, use_pinned_pc, isBTD, warpx.getistep(0), bufferID, numBTDBuffers, geom, - isLastBTD, m_totalParticles_flushed_already[i_buffer]); + isLastBTD); // Now that the data has been written out, clear out the buffer particle_buffer.clearParticles(i_buffer); diff --git a/Source/Diagnostics/Diagnostics.H b/Source/Diagnostics/Diagnostics.H index 53ce319d747..c0d2a9f0d53 100644 --- a/Source/Diagnostics/Diagnostics.H +++ b/Source/Diagnostics/Diagnostics.H @@ -309,11 +309,6 @@ protected: /** Vector of pointers to functors to compute particle output per species*/ amrex::Vector< std::unique_ptr > m_all_particle_functors; - /** Vector of total number of particles previously flushed, per species, per snapshot. - * The first vector is for total number of snapshots and second vector loops - * over the total number of species selected for diagnostics. - */ - amrex::Vector< amrex::Vector > m_totalParticles_flushed_already; /** Vector of total number of particles in the buffer, per species, per snapshot. * The first vector is for total number of snapshots and second vector loops * over the total number of species selected for diagnostics. diff --git a/Source/Diagnostics/FlushFormats/FlushFormat.H b/Source/Diagnostics/FlushFormats/FlushFormat.H index 403e9df7857..65741e4ff20 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormat.H +++ b/Source/Diagnostics/FlushFormats/FlushFormat.H @@ -24,8 +24,7 @@ public: bool isBTD = false, int snapshotID = -1, int bufferID = 1, int numBuffers = 1, const amrex::Geometry& full_BTD_snapshot = amrex::Geometry(), - bool isLastBTDFlush = false, - const amrex::Vector& totalParticlesFlushedAlready = amrex::Vector() ) const = 0; + bool isLastBTDFlush = false) const = 0; FlushFormat () = default; virtual ~FlushFormat() = default; diff --git a/Source/Diagnostics/FlushFormats/FlushFormatAscent.H b/Source/Diagnostics/FlushFormats/FlushFormatAscent.H index 228e4bc5cf6..9d8d3fcd7d2 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormatAscent.H +++ b/Source/Diagnostics/FlushFormats/FlushFormatAscent.H @@ -41,8 +41,7 @@ public: bool isBTD = false, int snapshotID = -1, int bufferID = 1, int numBuffers = 1, const amrex::Geometry& full_BTD_snapshot = amrex::Geometry(), - bool isLastBTDFlush = false, - const amrex::Vector& totalParticlesFlushedAlready = amrex::Vector() ) const override; + bool isLastBTDFlush = false ) const override; #ifdef AMREX_USE_ASCENT /** \brief Do in-situ visualization for particle data. diff --git a/Source/Diagnostics/FlushFormats/FlushFormatAscent.cpp b/Source/Diagnostics/FlushFormats/FlushFormatAscent.cpp index 980047e3b46..abfba37cd15 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormatAscent.cpp +++ b/Source/Diagnostics/FlushFormats/FlushFormatAscent.cpp @@ -21,7 +21,7 @@ FlushFormatAscent::WriteToFile ( const bool /*use_pinned_pc*/, bool isBTD, int /*snapshotID*/, int /*bufferID*/, int /*numBuffers*/, const amrex::Geometry& /*full_BTD_snapshot*/, - bool /*isLastBTDFlush*/, const amrex::Vector& /* totalParticlesFlushedAlready*/) const + bool /*isLastBTDFlush*/) const { #ifdef AMREX_USE_ASCENT WARPX_PROFILE("FlushFormatAscent::WriteToFile()"); diff --git a/Source/Diagnostics/FlushFormats/FlushFormatCheckpoint.H b/Source/Diagnostics/FlushFormats/FlushFormatCheckpoint.H index f6aad226d75..5c26ac97f61 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormatCheckpoint.H +++ b/Source/Diagnostics/FlushFormats/FlushFormatCheckpoint.H @@ -28,8 +28,7 @@ class FlushFormatCheckpoint final : public FlushFormatPlotfile bool isBTD = false, int snapshotID = -1, int bufferID = 1, int numBuffers = 1, const amrex::Geometry& full_BTD_snapshot = amrex::Geometry(), - bool isLastBTDFlush = false, - const amrex::Vector& totalParticlesFlushedAlready = amrex::Vector() ) const final; + bool isLastBTDFlush = false) const final; void CheckpointParticles (const std::string& dir, const amrex::Vector& particle_diags) const; diff --git a/Source/Diagnostics/FlushFormats/FlushFormatCheckpoint.cpp b/Source/Diagnostics/FlushFormats/FlushFormatCheckpoint.cpp index 5f59cd723da..d77437fb931 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormatCheckpoint.cpp +++ b/Source/Diagnostics/FlushFormats/FlushFormatCheckpoint.cpp @@ -39,7 +39,7 @@ FlushFormatCheckpoint::WriteToFile ( bool /*isBTD*/, int /*snapshotID*/, int /*bufferID*/, int /*numBuffers*/, const amrex::Geometry& /*full_BTD_snapshot*/, - bool /*isLastBTDFlush*/, const amrex::Vector& /* totalParticlesFlushedAlready*/) const + bool /*isLastBTDFlush*/) const { WARPX_PROFILE("FlushFormatCheckpoint::WriteToFile()"); diff --git a/Source/Diagnostics/FlushFormats/FlushFormatOpenPMD.H b/Source/Diagnostics/FlushFormats/FlushFormatOpenPMD.H index 88380407f5e..141760ac2a3 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormatOpenPMD.H +++ b/Source/Diagnostics/FlushFormats/FlushFormatOpenPMD.H @@ -40,8 +40,7 @@ public: bool isBTD = false, int snapshotID = -1, int bufferID = 1, int numBuffers = 1, const amrex::Geometry& full_BTD_snapshot = amrex::Geometry(), - bool isLastBTDFlush = false, - const amrex::Vector& totalParticlesFlushedAlready = amrex::Vector() ) const override; + bool isLastBTDFlush = false ) const override; ~FlushFormatOpenPMD () override = default; diff --git a/Source/Diagnostics/FlushFormats/FlushFormatOpenPMD.cpp b/Source/Diagnostics/FlushFormats/FlushFormatOpenPMD.cpp index 3b7006243e7..e0c8c4ef2d6 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormatOpenPMD.cpp +++ b/Source/Diagnostics/FlushFormats/FlushFormatOpenPMD.cpp @@ -126,7 +126,7 @@ FlushFormatOpenPMD::WriteToFile ( const bool use_pinned_pc, bool isBTD, int snapshotID, int bufferID, int numBuffers, const amrex::Geometry& full_BTD_snapshot, - bool isLastBTDFlush, const amrex::Vector& totalParticlesFlushedAlready) const + bool isLastBTDFlush) const { WARPX_PROFILE("FlushFormatOpenPMD::WriteToFile()"); const std::string& filename = amrex::Concatenate(prefix, iteration[0], file_min_digits); @@ -164,7 +164,7 @@ FlushFormatOpenPMD::WriteToFile ( // particles: all (reside only on locally finest level) m_OpenPMDPlotWriter->WriteOpenPMDParticles( - particle_diags, static_cast(time), use_pinned_pc, isBTD, isLastBTDFlush, totalParticlesFlushedAlready); + particle_diags, static_cast(time), use_pinned_pc, isBTD, isLastBTDFlush); // signal that no further updates will be written to this iteration m_OpenPMDPlotWriter->CloseStep(isBTD, isLastBTDFlush); diff --git a/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.H b/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.H index 486dcc3b5ee..c62056b8907 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.H +++ b/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.H @@ -35,8 +35,7 @@ public: bool isBTD = false, int snapshotID = -1, int bufferID = 1, int numBuffers = 1, const amrex::Geometry& full_BTD_snapshot = amrex::Geometry(), - bool isLastBTDFlush = false, - const amrex::Vector& totalParticlesFlushedAlready = amrex::Vector() ) const override; + bool isLastBTDFlush = false) const override; /** Write general info of the run into the plotfile */ void WriteJobInfo(const std::string& dir) const; diff --git a/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.cpp b/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.cpp index df73ed34c94..970d9a504d2 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.cpp +++ b/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.cpp @@ -65,7 +65,7 @@ FlushFormatPlotfile::WriteToFile ( const bool /*use_pinned_pc*/, bool isBTD, int snapshotID, int bufferID, int numBuffers, const amrex::Geometry& /*full_BTD_snapshot*/, - bool isLastBTDFlush, const amrex::Vector& /* totalParticlesFlushedAlready*/) const + bool isLastBTDFlush) const { WARPX_PROFILE("FlushFormatPlotfile::WriteToFile()"); auto & warpx = WarpX::GetInstance(); @@ -340,9 +340,9 @@ FlushFormatPlotfile::WriteWarpXHeader( void FlushFormatPlotfile::WriteParticles(const std::string& dir, const amrex::Vector& particle_diags, - const amrex::Real time, bool isBTD) const + const amrex::Real time, + bool isBTD) const { - for (const auto& part_diag : particle_diags) { WarpXParticleContainer* pc = part_diag.getParticleContainer(); PinnedMemoryParticleContainer* pinned_pc = part_diag.getPinnedParticleContainer(); diff --git a/Source/Diagnostics/FlushFormats/FlushFormatSensei.H b/Source/Diagnostics/FlushFormats/FlushFormatSensei.H index 54eb7099ba4..d2ec9a5a4e0 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormatSensei.H +++ b/Source/Diagnostics/FlushFormats/FlushFormatSensei.H @@ -61,8 +61,7 @@ public: bool isBTD = false, int snapshotID = -1, int bufferID = 1, int numBuffers = 1, const amrex::Geometry& full_BTD_snapshot = amrex::Geometry(), - bool isLastBTDFlush = false, - const amrex::Vector& totalParticlesFlushedAlready = amrex::Vector() ) const override; + bool isLastBTDFlush = false) const override; /** \brief Do in-situ visualization for particle data. * \param[in] particle_diags Each element of this vector handles output of 1 species. diff --git a/Source/Diagnostics/FlushFormats/FlushFormatSensei.cpp b/Source/Diagnostics/FlushFormats/FlushFormatSensei.cpp index e162b8b3121..348e1da4a00 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormatSensei.cpp +++ b/Source/Diagnostics/FlushFormats/FlushFormatSensei.cpp @@ -53,14 +53,13 @@ FlushFormatSensei::WriteToFile ( bool plot_raw_fields, bool plot_raw_fields_guards, const bool use_pinned_pc, bool isBTD, int /*snapshotID*/, int /*bufferID*/, int /*numBuffers*/, - const amrex::Geometry& /*full_BTD_snapshot*/, bool /*isLastBTDFlush*/, - const amrex::Vector& totalParticlesFlushedAlready) const + const amrex::Geometry& /*full_BTD_snapshot*/, bool /*isLastBTDFlush*/) const { amrex::ignore_unused( geom, nlev, prefix, file_min_digits, plot_raw_fields, plot_raw_fields_guards, - use_pinned_pc, - totalParticlesFlushedAlready); + use_pinned_pc + ); #ifndef AMREX_USE_SENSEI_INSITU amrex::ignore_unused(varnames, mf, iteration, time, particle_diags, diff --git a/Source/Diagnostics/FullDiagnostics.cpp b/Source/Diagnostics/FullDiagnostics.cpp index 4f1e47a2a52..fd329a38220 100644 --- a/Source/Diagnostics/FullDiagnostics.cpp +++ b/Source/Diagnostics/FullDiagnostics.cpp @@ -133,8 +133,9 @@ FullDiagnostics::Flush ( int i_buffer, bool /* force_flush */ ) auto & warpx = WarpX::GetInstance(); m_flush_format->WriteToFile( - m_varnames, m_mf_output[i_buffer], m_geom_output[i_buffer], warpx.getistep(), - warpx.gett_new(0), m_output_species[i_buffer], nlev_output, m_file_prefix, + m_varnames, m_mf_output.at(i_buffer), m_geom_output.at(i_buffer), warpx.getistep(), + warpx.gett_new(0), + m_output_species.at(i_buffer), nlev_output, m_file_prefix, m_file_min_digits, m_plot_raw_fields, m_plot_raw_fields_guards); FlushRaw(); diff --git a/Source/Diagnostics/OpenPMDHelpFunction.H b/Source/Diagnostics/OpenPMDHelpFunction.H index 9db4b9fb194..d2f2c4f9f9d 100644 --- a/Source/Diagnostics/OpenPMDHelpFunction.H +++ b/Source/Diagnostics/OpenPMDHelpFunction.H @@ -14,7 +14,25 @@ #include +/** Determine the preferred file ending if unspecified + * + * @return file ending without the "." + */ std::string WarpXOpenPMDFileType (); +#ifdef WARPX_USE_OPENPMD +/** Determine how many particles were already written in this species and step + * + * This checks for a particle species the current size of the id attribute, if it exists, + * and if it does it takes its extent as the number of particles already on disk. + * + * Note that this checks declared size, not necessarily written size. + * + * @return exisitng extent of the "id" attribute or zero. + */ +unsigned long +num_already_flushed (openPMD::ParticleSpecies & currSpecies); +#endif + #endif // WARPX_OPENPMDHELPFUNCTION_H_ diff --git a/Source/Diagnostics/OpenPMDHelpFunction.cpp b/Source/Diagnostics/OpenPMDHelpFunction.cpp index a898c97b6b4..6170249b52b 100644 --- a/Source/Diagnostics/OpenPMDHelpFunction.cpp +++ b/Source/Diagnostics/OpenPMDHelpFunction.cpp @@ -27,3 +27,23 @@ WarpXOpenPMDFileType () #endif // WARPX_USE_OPENPMD return openPMDFileType; } + +#ifdef WARPX_USE_OPENPMD +unsigned long +num_already_flushed (openPMD::ParticleSpecies & currSpecies) +{ + const auto *const scalar = openPMD::RecordComponent::SCALAR; + + unsigned long ParticleFlushOffset = 0; + + if (currSpecies.contains("id")) { + if (currSpecies["id"].contains(scalar)) { + if (!currSpecies["id"][scalar].empty()) { + ParticleFlushOffset = currSpecies["id"][scalar].getExtent().at(0); + } + } + } + + return ParticleFlushOffset; +} +#endif diff --git a/Source/Diagnostics/WarpXOpenPMD.H b/Source/Diagnostics/WarpXOpenPMD.H index e3b7b893d0a..4597dacd9ae 100644 --- a/Source/Diagnostics/WarpXOpenPMD.H +++ b/Source/Diagnostics/WarpXOpenPMD.H @@ -125,8 +125,7 @@ public: amrex::Real time, bool use_pinned_pc = false, bool isBTD = false, - bool isLastBTDFlush = false, - const amrex::Vector& totalParticlesFlushedAlready = amrex::Vector()); + bool isLastBTDFlush = false); /** Write out all openPMD fields for all active MR levels * @@ -290,9 +289,9 @@ private: * @param[in] int_comp_names The int attribute names, from WarpX * @param[in] charge Charge of the particles (note: fix for ions) * @param[in] mass Mass of the particles + * @param[inout] ParticleFlushOffset previously flushed number of particles in BTD * @param[in] isBTD is this a backtransformed diagnostics (BTD) write? * @param[in] isLastBTDFlush is this the last time we will flush this BTD station? - * @param[in] ParticleFlushOffset previously flushed number of particles in BTD */ void DumpToFile (ParticleContainer* pc, const std::string& name, @@ -304,8 +303,7 @@ private: amrex::ParticleReal charge, amrex::ParticleReal mass, bool isBTD = false, - bool isLastBTDFlush = false, - int ParticleFlushOffset = 0); + bool isLastBTDFlush = false); /** Get the openPMD-api filename for openPMD::Series * diff --git a/Source/Diagnostics/WarpXOpenPMD.cpp b/Source/Diagnostics/WarpXOpenPMD.cpp index 71d96a47927..64411ecf6e4 100644 --- a/Source/Diagnostics/WarpXOpenPMD.cpp +++ b/Source/Diagnostics/WarpXOpenPMD.cpp @@ -519,9 +519,11 @@ WarpXOpenPMDPlot::Init (openPMD::Access access, bool isBTD) void WarpXOpenPMDPlot::WriteOpenPMDParticles (const amrex::Vector& particle_diags, - const amrex::Real time, const bool use_pinned_pc, - const bool isBTD, const bool isLastBTDFlush, - const amrex::Vector& totalParticlesFlushedAlready) + const amrex::Real time, + const bool use_pinned_pc, + const bool isBTD, + const bool isLastBTDFlush +) { WARPX_PROFILE("WarpXOpenPMDPlot::WriteOpenPMDParticles()"); @@ -618,31 +620,15 @@ for (unsigned i = 0, n = particle_diags.size(); i < n; ++i) { // real_names contains a list of all real particle attributes. // real_flags is 1 or 0, whether quantity is dumped or not. - { - if (isBTD) { - DumpToFile(&tmp, - particle_diags[i].getSpeciesName(), - m_CurrentStep, - real_flags, - int_flags, - real_names, int_names, - pc->getCharge(), pc->getMass(), - isBTD, isLastBTDFlush, - totalParticlesFlushedAlready[i] - ); - } else { - DumpToFile(&tmp, - particle_diags[i].getSpeciesName(), - m_CurrentStep, - real_flags, - int_flags, - real_names, int_names, - pc->getCharge(), pc->getMass(), - isBTD, isLastBTDFlush, - 0 - ); - } - } + DumpToFile(&tmp, + particle_diags.at(i).getSpeciesName(), + m_CurrentStep, + real_flags, + int_flags, + real_names, int_names, + pc->getCharge(), pc->getMass(), + isBTD, isLastBTDFlush + ); } } @@ -657,8 +643,9 @@ WarpXOpenPMDPlot::DumpToFile (ParticleContainer* pc, amrex::ParticleReal const charge, amrex::ParticleReal const mass, const bool isBTD, - const bool isLastBTDFlush, - int ParticleFlushOffset) { + const bool isLastBTDFlush +) +{ WARPX_ALWAYS_ASSERT_WITH_MESSAGE(m_Series != nullptr, "openPMD: series must be initialized"); AMREX_ALWAYS_ASSERT(write_real_comp.size() == pc->NumRealComps()); @@ -672,6 +659,9 @@ WarpXOpenPMDPlot::DumpToFile (ParticleContainer* pc, openPMD::Iteration currIteration = GetIteration(iteration, isBTD); openPMD::ParticleSpecies currSpecies = currIteration.particles[name]; + // only BTD writes multiple times into the same step, zero for other methods + unsigned long ParticleFlushOffset = isBTD ? num_already_flushed(currSpecies) : 0; + // prepare data structures the first time BTD has non-zero particles // we set some of them to zero extent, so we need to time that well bool const is_first_flush_with_particles = num_dump_particles > 0 && ParticleFlushOffset == 0; From 206b0815a060aa9ccbf0f71a46f1142137f5b8a8 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 1 Feb 2024 20:06:59 -0800 Subject: [PATCH 06/13] Release 24.02 (#4660) * AMReX: 24.02 * pyAMReX: 24.02 * WarpX: 24.02 --- .github/workflows/cuda.yml | 2 +- CMakeLists.txt | 2 +- Docs/source/conf.py | 4 ++-- Python/setup.py | 2 +- Regression/WarpX-GPU-tests.ini | 2 +- Regression/WarpX-tests.ini | 2 +- cmake/dependencies/AMReX.cmake | 4 ++-- cmake/dependencies/pyAMReX.cmake | 4 ++-- run_test.sh | 2 +- setup.py | 2 +- 10 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 9960cdfbb29..79916c455d1 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -115,7 +115,7 @@ jobs: which nvcc || echo "nvcc not in PATH!" git clone https://github.com/AMReX-Codes/amrex.git ../amrex - cd ../amrex && git checkout --detach 689144d157a0106faf3d0ae89f8d90b0250cf975 && cd - + cd ../amrex && git checkout --detach 24.02 && cd - make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_PSATD=TRUE USE_CCACHE=TRUE -j 2 ccache -s diff --git a/CMakeLists.txt b/CMakeLists.txt index 76a5ecdd3f3..3a947b01dcd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ # Preamble #################################################################### # cmake_minimum_required(VERSION 3.20.0) -project(WarpX VERSION 24.01) +project(WarpX VERSION 24.02) include(${WarpX_SOURCE_DIR}/cmake/WarpXFunctions.cmake) diff --git a/Docs/source/conf.py b/Docs/source/conf.py index 48a02c5d216..b34c437b829 100644 --- a/Docs/source/conf.py +++ b/Docs/source/conf.py @@ -103,9 +103,9 @@ def __init__(self, *args, **kwargs): # built documents. # # The short X.Y version. -version = u'24.01' +version = u'24.02' # The full version, including alpha/beta/rc tags. -release = u'24.01' +release = u'24.02' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/Python/setup.py b/Python/setup.py index f82e6563a31..3c6c0f605c0 100644 --- a/Python/setup.py +++ b/Python/setup.py @@ -54,7 +54,7 @@ package_data = {} setup(name = 'pywarpx', - version = '24.01', + version = '24.02', packages = ['pywarpx'], package_dir = {'pywarpx': 'pywarpx'}, description = """Wrapper of WarpX""", diff --git a/Regression/WarpX-GPU-tests.ini b/Regression/WarpX-GPU-tests.ini index 70c62b190fb..0659d530cf5 100644 --- a/Regression/WarpX-GPU-tests.ini +++ b/Regression/WarpX-GPU-tests.ini @@ -60,7 +60,7 @@ emailBody = Check https://ccse.lbl.gov/pub/GpuRegressionTesting/WarpX/ for more [AMReX] dir = /home/regtester/git/amrex/ -branch = 689144d157a0106faf3d0ae89f8d90b0250cf975 +branch = 24.02 [source] dir = /home/regtester/git/WarpX diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini index ab11d70dfcc..ae22eba499d 100644 --- a/Regression/WarpX-tests.ini +++ b/Regression/WarpX-tests.ini @@ -59,7 +59,7 @@ emailBody = Check https://ccse.lbl.gov/pub/RegressionTesting/WarpX/ for more det [AMReX] dir = /home/regtester/AMReX_RegTesting/amrex/ -branch = 689144d157a0106faf3d0ae89f8d90b0250cf975 +branch = 24.02 [source] dir = /home/regtester/AMReX_RegTesting/warpx diff --git a/cmake/dependencies/AMReX.cmake b/cmake/dependencies/AMReX.cmake index 9b74c8db7fd..0f6a15a5ff4 100644 --- a/cmake/dependencies/AMReX.cmake +++ b/cmake/dependencies/AMReX.cmake @@ -250,7 +250,7 @@ macro(find_amrex) endif() set(COMPONENT_PRECISION ${WarpX_PRECISION} P${WarpX_PARTICLE_PRECISION}) - find_package(AMReX 24.01 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_DIMS} ${COMPONENT_EB} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} LSOLVERS) + find_package(AMReX 24.02 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_DIMS} ${COMPONENT_EB} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} LSOLVERS) # note: TINYP skipped because user-configured and optional # AMReX CMake helper scripts @@ -269,7 +269,7 @@ set(WarpX_amrex_src "" set(WarpX_amrex_repo "https://github.com/AMReX-Codes/amrex.git" CACHE STRING "Repository URI to pull and build AMReX from if(WarpX_amrex_internal)") -set(WarpX_amrex_branch "689144d157a0106faf3d0ae89f8d90b0250cf975" +set(WarpX_amrex_branch "24.02" CACHE STRING "Repository branch for WarpX_amrex_repo if(WarpX_amrex_internal)") diff --git a/cmake/dependencies/pyAMReX.cmake b/cmake/dependencies/pyAMReX.cmake index 3c043f60226..b4cf9f3f9c1 100644 --- a/cmake/dependencies/pyAMReX.cmake +++ b/cmake/dependencies/pyAMReX.cmake @@ -64,7 +64,7 @@ function(find_pyamrex) endif() elseif(NOT WarpX_pyamrex_internal) # TODO: MPI control - find_package(pyAMReX 24.01 CONFIG REQUIRED) + find_package(pyAMReX 24.02 CONFIG REQUIRED) message(STATUS "pyAMReX: Found version '${pyAMReX_VERSION}'") endif() endfunction() @@ -79,7 +79,7 @@ option(WarpX_pyamrex_internal "Download & build pyAMReX" ON) set(WarpX_pyamrex_repo "https://github.com/AMReX-Codes/pyamrex.git" CACHE STRING "Repository URI to pull and build pyamrex from if(WarpX_pyamrex_internal)") -set(WarpX_pyamrex_branch "cdf03496f6809527b97950e077508ca4b201fa9b" +set(WarpX_pyamrex_branch "24.02" CACHE STRING "Repository branch for WarpX_pyamrex_repo if(WarpX_pyamrex_internal)") diff --git a/run_test.sh b/run_test.sh index 48857d264cb..e1b45ab7c28 100755 --- a/run_test.sh +++ b/run_test.sh @@ -68,7 +68,7 @@ python3 -m pip install --upgrade -r warpx/Regression/requirements.txt # Clone AMReX and warpx-data git clone https://github.com/AMReX-Codes/amrex.git -cd amrex && git checkout --detach 689144d157a0106faf3d0ae89f8d90b0250cf975 && cd - +cd amrex && git checkout --detach 24.02 && cd - # warpx-data contains various required data sets git clone --depth 1 https://github.com/ECP-WarpX/warpx-data.git # openPMD-example-datasets contains various required data sets diff --git a/setup.py b/setup.py index d3efeaaacd5..197a39ce23f 100644 --- a/setup.py +++ b/setup.py @@ -278,7 +278,7 @@ def build_extension(self, ext): setup( name='pywarpx', # note PEP-440 syntax: x.y.zaN but x.y.z.devN - version = '24.01', + version = '24.02', packages = ['pywarpx'], package_dir = {'pywarpx': 'Python/pywarpx'}, author='Jean-Luc Vay, David P. Grote, Maxence Thévenet, Rémi Lehe, Andrew Myers, Weiqun Zhang, Axel Huebl, et al.', From 9d8ecf93df7c8713df08eac96c1f88f31b7fcd0d Mon Sep 17 00:00:00 2001 From: Roelof Groenewald <40245517+roelof-groenewald@users.noreply.github.com> Date: Thu, 1 Feb 2024 20:08:18 -0800 Subject: [PATCH 07/13] Add install instructions for ALCF's Polaris (#4636) * add polaris machine files * add doc page for Polaris --- Docs/source/install/hpc.rst | 1 + Docs/source/install/hpc/polaris.rst | 187 ++++++++++++++++++ .../polaris-alcf/install_gpu_dependencies.sh | 123 ++++++++++++ Tools/machines/polaris-alcf/polaris_gpu.pbs | 36 ++++ .../polaris_gpu_warpx.profile.example | 51 +++++ 5 files changed, 398 insertions(+) create mode 100644 Docs/source/install/hpc/polaris.rst create mode 100755 Tools/machines/polaris-alcf/install_gpu_dependencies.sh create mode 100644 Tools/machines/polaris-alcf/polaris_gpu.pbs create mode 100644 Tools/machines/polaris-alcf/polaris_gpu_warpx.profile.example diff --git a/Docs/source/install/hpc.rst b/Docs/source/install/hpc.rst index 9617f2a7fd6..a7b0f636b56 100644 --- a/Docs/source/install/hpc.rst +++ b/Docs/source/install/hpc.rst @@ -46,6 +46,7 @@ This section documents quick-start guides for a selection of supercomputers that hpc/lxplus hpc/ookami hpc/perlmutter + hpc/polaris hpc/quartz hpc/spock hpc/summit diff --git a/Docs/source/install/hpc/polaris.rst b/Docs/source/install/hpc/polaris.rst new file mode 100644 index 00000000000..d20ecccee32 --- /dev/null +++ b/Docs/source/install/hpc/polaris.rst @@ -0,0 +1,187 @@ +.. _building-polaris: + +Polaris (ALCF) +============== + +The `Polaris cluster `__ is located at ALCF. + + +Introduction +------------ + +If you are new to this system, **please see the following resources**: + +* `ALCF user guide `__ +* Batch system: `PBS `__ +* `Filesystems `__ + +.. _building-polaris-preparation: + +Preparation +----------- + +Use the following commands to download the WarpX source code: + +.. code-block:: bash + + git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx + +On Polaris, you can run either on GPU nodes with fast A100 GPUs (recommended) or CPU nodes. + +.. tab-set:: + + .. tab-item:: A100 GPUs + + We use system software modules, add environment hints and further dependencies via the file ``$HOME/polaris_gpu_warpx.profile``. + Create it now: + + .. code-block:: bash + + cp $HOME/src/warpx/Tools/machines/polaris-alcf/polaris_gpu_warpx.profile.example $HOME/polaris_gpu_warpx.profile + + .. dropdown:: Script Details + :color: light + :icon: info + :animate: fade-in-slide-down + + .. literalinclude:: ../../../../Tools/machines/polaris-alcf/polaris_gpu_warpx.profile.example + :language: bash + + Edit the 2nd line of this script, which sets the ``export proj=""`` variable. + For example, if you are member of the project ``proj_name``, then run ``nano $HOME/polaris_gpu_warpx.profile`` and edit line 2 to read: + + .. code-block:: bash + + export proj="proj_name" + + Exit the ``nano`` editor with ``Ctrl`` + ``O`` (save) and then ``Ctrl`` + ``X`` (exit). + + .. important:: + + Now, and as the first step on future logins to Polaris, activate these environment settings: + + .. code-block:: bash + + source $HOME/polaris_gpu_warpx.profile + + Finally, since Polaris does not yet provide software modules for some of our dependencies, install them once: + + .. code-block:: bash + + bash $HOME/src/warpx/Tools/machines/polaris-alcf/install_gpu_dependencies.sh + source ${CFS}/${proj%_g}/${USER}/sw/polaris/gpu/venvs/warpx/bin/activate + + .. dropdown:: Script Details + :color: light + :icon: info + :animate: fade-in-slide-down + + .. literalinclude:: ../../../../Tools/machines/polaris-alcf/install_gpu_dependencies.sh + :language: bash + + + .. tab-item:: CPU Nodes + + *Under construction* + + +.. _building-polaris-compilation: + +Compilation +----------- + +Use the following :ref:`cmake commands ` to compile the application executable: + +.. tab-set:: + + .. tab-item:: A100 GPUs + + .. code-block:: bash + + cd $HOME/src/warpx + rm -rf build_pm_gpu + + cmake -S . -B build_pm_gpu -DWarpX_COMPUTE=CUDA -DWarpX_PSATD=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_DIMS="1;2;RZ;3" + cmake --build build_pm_gpu -j 16 + + The WarpX application executables are now in ``$HOME/src/warpx/build_pm_gpu/bin/``. + Additionally, the following commands will install WarpX as a Python module: + + .. code-block:: bash + + cd $HOME/src/warpx + rm -rf build_pm_gpu_py + + cmake -S . -B build_pm_gpu_py -DWarpX_COMPUTE=CUDA -DWarpX_PSATD=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_APP=OFF -DWarpX_PYTHON=ON -DWarpX_DIMS="1;2;RZ;3" + cmake --build build_pm_gpu_py -j 16 --target pip_install + + .. tab-item:: CPU Nodes + + *Under construction* + +Now, you can :ref:`submit Polaris compute jobs ` for WarpX :ref:`Python (PICMI) scripts ` (:ref:`example scripts `). +Or, you can use the WarpX executables to submit Polaris jobs (:ref:`example inputs `). +For executables, you can reference their location in your :ref:`job script ` or copy them to a location in ``$PSCRATCH``. + + +.. _building-polaris-update: + +Update WarpX & Dependencies +--------------------------- + +If you already installed WarpX in the past and want to update it, start by getting the latest source code: + +.. code-block:: bash + + cd $HOME/src/warpx + + # read the output of this command - does it look ok? + git status + + # get the latest WarpX source code + git fetch + git pull + + # read the output of these commands - do they look ok? + git status + git log # press q to exit + +And, if needed, + +- :ref:`update the polaris_gpu_warpx.profile or polaris_cpu_warpx files `, +- log out and into the system, activate the now updated environment profile as usual, +- :ref:`execute the dependency install scripts `. + +As a last step, clean the build directory ``rm -rf $HOME/src/warpx/build_pm_*`` and rebuild WarpX. + + +.. _running-cpp-polaris: + +Running +------- + +.. tab-set:: + + .. tab-item:: A100 (40GB) GPUs + + The batch script below can be used to run a WarpX simulation on multiple nodes (change ```` accordingly) on the supercomputer Polaris at ALCF. + + Replace descriptions between chevrons ``<>`` by relevant values, for instance ```` could be ``plasma_mirror_inputs``. + Note that we run one MPI rank per GPU. + + .. literalinclude:: ../../../../Tools/machines/polaris-alcf/polaris_gpu.pbs + :language: bash + :caption: You can copy this file from ``$HOME/src/warpx/Tools/machines/polaris-alcf/polaris_gpu.pbs``. + + To run a simulation, copy the lines above to a file ``polaris_gpu.pbs`` and run + + .. code-block:: bash + + qsub polaris_gpu.pbs + + to submit the job. + + + .. tab-item:: CPU Nodes + + *Under construction* diff --git a/Tools/machines/polaris-alcf/install_gpu_dependencies.sh b/Tools/machines/polaris-alcf/install_gpu_dependencies.sh new file mode 100755 index 00000000000..e2cdca86fbc --- /dev/null +++ b/Tools/machines/polaris-alcf/install_gpu_dependencies.sh @@ -0,0 +1,123 @@ +#!/bin/bash +# +# Copyright 2024 The WarpX Community +# +# This file is part of WarpX. +# +# Author: Axel Huebl (edited by Roelof Groenewald for Polaris) +# License: BSD-3-Clause-LBNL + +# Exit on first error encountered ############################################# +# +set -eu -o pipefail + +# Check: ###################################################################### +# +# Was polaris_gpu_warpx.profile sourced and configured correctly? +if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your polaris_gpu_warpx.profile file! Please edit its line 2 to continue!"; exit 1; fi + +# Remove old dependencies ##################################################### +# +SW_DIR="/home/${USER}/sw/polaris/gpu" +rm -rf ${SW_DIR} +mkdir -p ${SW_DIR} + +# remove common user mistakes in python, located in .local instead of a venv +python3 -m pip uninstall -qq -y pywarpx +python3 -m pip uninstall -qq -y warpx +python3 -m pip uninstall -qqq -y mpi4py 2>/dev/null || true + +# General extra dependencies ################################################## +# + +# c-blosc (I/O compression) +if [ -d $HOME/src/c-blosc ] +then + cd $HOME/src/c-blosc + git fetch --prune + git checkout v1.21.1 + cd - +else + git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git $HOME/src/c-blosc +fi +rm -rf $HOME/src/c-blosc-pm-gpu-build +cmake -S $HOME/src/c-blosc -B $HOME/src/c-blosc-pm-gpu-build -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDEACTIVATE_AVX2=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/c-blosc-1.21.1 +cmake --build $HOME/src/c-blosc-pm-gpu-build --target install --parallel 16 +rm -rf $HOME/src/c-blosc-pm-gpu-build + +# ADIOS2 +if [ -d $HOME/src/adios2 ] +then + cd $HOME/src/adios2 + git fetch --prune + git checkout v2.8.3 + cd - +else + git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git $HOME/src/adios2 +fi +rm -rf $HOME/src/adios2-pm-gpu-build +cmake -S $HOME/src/adios2 -B $HOME/src/adios2-pm-gpu-build -DADIOS2_USE_Blosc=ON -DADIOS2_USE_Fortran=OFF -DADIOS2_USE_Python=OFF -DADIOS2_USE_ZeroMQ=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/adios2-2.8.3 +cmake --build $HOME/src/adios2-pm-gpu-build --target install -j 16 +rm -rf $HOME/src/adios2-pm-gpu-build + +# BLAS++ (for PSATD+RZ) +if [ -d $HOME/src/blaspp ] +then + cd $HOME/src/blaspp + git fetch --prune + git checkout master + git pull + cd - +else + git clone https://github.com/icl-utk-edu/blaspp.git $HOME/src/blaspp +fi +rm -rf $HOME/src/blaspp-pm-gpu-build +CXX=$(which CC) cmake -S $HOME/src/blaspp -B $HOME/src/blaspp-pm-gpu-build -Duse_openmp=OFF -Dgpu_backend=cuda -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=${SW_DIR}/blaspp-master +cmake --build $HOME/src/blaspp-pm-gpu-build --target install --parallel 16 +rm -rf $HOME/src/blaspp-pm-gpu-build + +# LAPACK++ (for PSATD+RZ) +if [ -d $HOME/src/lapackpp ] +then + cd $HOME/src/lapackpp + git fetch --prune + git checkout master + git pull + cd - +else + git clone https://github.com/icl-utk-edu/lapackpp.git $HOME/src/lapackpp +fi +rm -rf $HOME/src/lapackpp-pm-gpu-build +CXX=$(which CC) CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S $HOME/src/lapackpp -B $HOME/src/lapackpp-pm-gpu-build -DCMAKE_CXX_STANDARD=17 -Dbuild_tests=OFF -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/lapackpp-master +cmake --build $HOME/src/lapackpp-pm-gpu-build --target install --parallel 16 +rm -rf $HOME/src/lapackpp-pm-gpu-build + +# Python ###################################################################### +# +python3 -m pip install --upgrade pip +python3 -m pip install --upgrade virtualenv +python3 -m pip cache purge +rm -rf ${SW_DIR}/venvs/warpx +python3 -m venv --system-site-packages ${SW_DIR}/venvs/warpx +source ${SW_DIR}/venvs/warpx/bin/activate +python3 -m pip install --upgrade pip +python3 -m pip install --upgrade build +python3 -m pip install --upgrade packaging +python3 -m pip install --upgrade wheel +python3 -m pip install --upgrade setuptools +python3 -m pip install --upgrade cython +python3 -m pip install --upgrade numpy +python3 -m pip install --upgrade pandas +python3 -m pip install --upgrade scipy +# MPICC="cc -target-accel=nvidia80 -shared" python3 -m pip install --upgrade mpi4py --no-cache-dir --no-build-isolation --no-binary mpi4py +python3 -m pip install --upgrade openpmd-api +python3 -m pip install --upgrade matplotlib +python3 -m pip install --upgrade yt +# install or update WarpX dependencies such as picmistandard +python3 -m pip install --upgrade -r $HOME/src/warpx/requirements.txt +python3 -m pip install cupy-cuda11x # CUDA 11.7 compatible wheel +# optional: for libEnsemble +python3 -m pip install -r $HOME/src/warpx/Tools/LibEnsemble/requirements.txt +# optional: for optimas (based on libEnsemble & ax->botorch->gpytorch->pytorch) +python3 -m pip install --upgrade torch # CUDA 11.7 compatible wheel +python3 -m pip install -r $HOME/src/warpx/Tools/optimas/requirements.txt diff --git a/Tools/machines/polaris-alcf/polaris_gpu.pbs b/Tools/machines/polaris-alcf/polaris_gpu.pbs new file mode 100644 index 00000000000..178db6ad6a2 --- /dev/null +++ b/Tools/machines/polaris-alcf/polaris_gpu.pbs @@ -0,0 +1,36 @@ +#!/bin/bash -l + +#PBS -A +#PBS -l select=:system=polaris +#PBS -l place=scatter +#PBS -l walltime=0:10:00 +#PBS -l filesystems=home:eagle +#PBS -q debug +#PBS -N test_warpx + +# Set required environment variables +# support gpu-aware-mpi +# export MPICH_GPU_SUPPORT_ENABLED=1 + +# Change to working directory +echo Working directory is $PBS_O_WORKDIR +cd ${PBS_O_WORKDIR} + +echo Jobid: $PBS_JOBID +echo Running on host `hostname` +echo Running on nodes `cat $PBS_NODEFILE` + +# executable & inputs file or python interpreter & PICMI script here +EXE=./warpx +INPUTS=input1d + +# MPI and OpenMP settings +NNODES=`wc -l < $PBS_NODEFILE` +NRANKS_PER_NODE=4 +NDEPTH=1 +NTHREADS=1 + +NTOTRANKS=$(( NNODES * NRANKS_PER_NODE )) +echo "NUM_OF_NODES= ${NNODES} TOTAL_NUM_RANKS= ${NTOTRANKS} RANKS_PER_NODE= ${NRANKS_PER_NODE} THREADS_PER_RANK= ${NTHREADS}" + +mpiexec -np ${NTOTRANKS} ${EXE} ${INPUTS} > output.txt diff --git a/Tools/machines/polaris-alcf/polaris_gpu_warpx.profile.example b/Tools/machines/polaris-alcf/polaris_gpu_warpx.profile.example new file mode 100644 index 00000000000..d7a68bf16bb --- /dev/null +++ b/Tools/machines/polaris-alcf/polaris_gpu_warpx.profile.example @@ -0,0 +1,51 @@ +# Set the project name +export proj="" # change me! + +# swap to the Milan cray package +# module swap craype-x86-rome craype-x86-milan + +# required dependencies +module load cmake/3.23.2 +module load cudatoolkit-standalone + +# optional: for QED support with detailed tables +# module load boost/1.81.0 + +# optional: for openPMD and PSATD+RZ support +module load cray-hdf5-parallel/1.12.2.3 +export CMAKE_PREFIX_PATH=/home/${USER}/sw/polaris/gpu/c-blosc-1.21.1:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=/home/${USER}/sw/polaris/gpu/adios2-2.8.3:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=/home/${USER}/sw/polaris/gpu/blaspp-master:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=/home/${USER}/sw/polaris/gpu/lapackpp-master:$CMAKE_PREFIX_PATH + +export LD_LIBRARY_PATH=/home/${USER}/sw/polaris/gpu/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/home/${USER}/sw/polaris/gpu/adios2-2.8.3/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/home/${USER}/sw/polaris/gpu/blaspp-master/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/home/${USER}/sw/polaris/gpu/lapackpp-master/lib64:$LD_LIBRARY_PATH + +export PATH=/home/${USER}/sw/polaris/gpu/adios2-2.8.3/bin:${PATH} + +# optional: for Python bindings or libEnsemble +module load cray-python/3.9.13.1 + +if [ -d "/home/${USER}/sw/polaris/gpu/venvs/warpx" ] +then + source /home/${USER}/sw/polaris/gpu/venvs/warpx/bin/activate +fi + +# necessary to use CUDA-Aware MPI and run a job +export CRAY_ACCEL_TARGET=nvidia80 + +# optimize CUDA compilation for A100 +export AMREX_CUDA_ARCH=8.0 + +# optimize CPU microarchitecture for AMD EPYC 3rd Gen (Milan/Zen3) +# note: the cc/CC/ftn wrappers below add those +# export CXXFLAGS="-march=znver3" +# export CFLAGS="-march=znver3" + +# compiler environment hints +export CC=nvc +export CXX=nvc++ +export CUDACXX=nvcc +export CUDAHOSTCXX=nvc++ From 60bf00039522925870b86f3db6c4ac7d12c3a04b Mon Sep 17 00:00:00 2001 From: Justin Ray Angus Date: Thu, 1 Feb 2024 20:16:19 -0800 Subject: [PATCH 08/13] Implemented Villasenor and Buneman deposition routine for implicit solver (#4623) * added infrastracture for doing the VillasenorAndBuneman current deposition scheme. Set algo.current_deposition = 3 to use. Right now, the actual algorithm is identical to doChargeConsevingCurrentDepositionImplicit(). * small modification to message when asserting that the evolve scheme is implicit when using villasenor-and-buneman current deposition. * added new interpolation.angus_scheme as intermediate step for adding the CC1 deposit/gather routine from PICNIC. Seems to pass numerical energy conservation test in 2D right now, but the gather/deposit are actually just CIC. * Created doGatherPicnicShapeN function. Does CIC right now, same as the VandB current deposition. * vandb current deposition works in 2D. * fixed bug in vandb deposition. Exact charge conservation is now obtained. Made same fix to corresonding gather routine. * streamlined vandb deposition and corresponding gather routine. * using dxp/dt in place of vp when doing current deposition for vandb scheme gives better charge conservation. * added a new templated function to ShapeFactors.H that is used to compute the deposition weights tranverse to the current direction for a segment in the villesanor and buneman current deposition. * significant streamlining of new vandb current deposition. * applied same streamlining to vandb deposition from previous commit to the corresponding gather routine. * working on generalzation of vandb deposit and corresponding gather routine to work for 1D, 3D, and for depos_order = 1. WIP * removed interolation.angus_scheme flag that was used to do the Picnic gather when doing Implicit. Now, the gather routine is chosen based on the CurrentDepositionAlgo enum value. * galerkin_interpolation flag is no longer used with the Implicit solver. Removed some assertions related to this flag and the implicit solver. * VillasenorAndBuneman ==> Villasenor * added ability to use shape_factor 3 with villasenor deposit. However, initial tests do not give exact energy conservation. It is good, but not exact. Don't know why, so for now we still assert to shape_factor < 3 when using villasenor deposit. * villasenor deposition now works with shape_factor = 3. * villasenor deposition, and the corresponding gather, now works in 1D. * intermediate checkin. Working on getting villasenor working in 3D. WIP. * added a new function to ShapeFactors.H that is similar to the average function, but instead of returning the average weights using old and new positions it returns both old and new weights along with the shared left index. This is needed for the transverse interpolation for villasenor in 3D. * added coding to compute the cell-crossings in 3D. It runs in 3D, but charge is not conserved. WIP. * fixed bug. villasenor deposition and corresponding gather work in 3D for shape factor = 2. Cleaned up logic for cell crossings setting in 3D. * cleaning things up. * villasenor deposition and corresponding gather work in 3D with shape_factor = 1,2, and 3. * code compiles in RZ. * small tune up. * fixed bug in how Xcell is set in villasenor deposition/gather. Previous implementation only worked when the left domain boundary was zero. * refactoring to avoid roundoff issue in charge conservation for shape factor 1 with villasenor deposition. Roundoff issue solved in 1D. Still need to do 2D and 3D. * fixed roundoff issue with shape = 1 when using villasenor for 2D. * fixed roundoff issue with shape = 3 when using villasenor in 3D. * added proper briefs to the villasenor deposition and the picnic-like gather. * added a few comments. * removed duplicate query for galerkin_scheme in WarpX.cpp. Why didn't merge catch this? * minor cleanup. * added 4th order shape factor routines to ShapeFactors.H as needed for using shape factor = 4 with villasenor deposition. * generalized villasenor deposition routine and the picnic gather to work shape factor orders higher than 3. * added ability to parse in shape factor 4 when using villasenor deposition and added the ability to call shape factor 4 for the current density and charge density deposits. * fixed a few small bugs that prevented compile in 3D. * villasenor deposition now uses villasenor for out-of-plane J as well. Same for out-of-plane E in corresponding Picnic gather. * slight refactoring and cleaning up some comments. * cleaning up some comments. * added regression tests. * removed blank spaces. * fixed bug * cleaning up tabs. * commented out std::cout lines. * initializing potentially uninitialized variables. * fixed indentation issue. * changed some types to avoid narrowing conversion issue. * removed cout comment lines. * ions renamed to protons in new implicit example input deck, consistent with what is assumed in the benchmark data in the benchmarks_json/ folder. Updated the benchmark data using simulation ran on quartz LC. * had to static_cast some quantities in new gather routine. * more static casting. * clang-tidy told me to use auto. * fixed indentation. * changed tolerance for new 2D CI test. max delta energy error on Azure is 2X higher than on LC. * added particle y-position the json file for new 2D implicit ci test. * updated values in json file for new 2D implicit ci test to have the sum of the absolute value of the fields rather than just the sum. * if ==> else if. This should help avoid a potential bug in the future. * adjusted some lines had an incorrect number of white spaces. * significant simplification of code to determine segment values at cell crossings in 2D and 3D. By way of Dave Grote. * slight refactoring for style and performance. * more refactoring. No longer pre-defining slopes. Added logical check for zero dxp values before division. * made list of params in brief for doVillasenorDepositionShapeNImplicit consistent with the input parameters. * all variables used in computing the shape factor for the VB deposition, and corresponding gather, now have type double. --- Examples/Tests/Implicit/analysis_vandb_2d.py | 68 ++ Examples/Tests/Implicit/inputs_vandb_2d | 92 +++ .../ImplicitPicard_VandB_2d.json | 31 + Regression/WarpX-tests.ini | 17 + Source/Diagnostics/WarpXOpenPMD.cpp | 2 + Source/Initialization/WarpXInitData.cpp | 3 + .../Particles/Deposition/CurrentDeposition.H | 664 ++++++++++++++++ Source/Particles/Gather/FieldGather.H | 710 +++++++++++++++++- .../Particles/PhysicalParticleContainer.cpp | 6 +- Source/Particles/ShapeFactors.H | 105 +++ Source/Particles/WarpXParticleContainer.cpp | 74 +- Source/Utils/WarpXAlgorithmSelection.H | 3 +- Source/Utils/WarpXAlgorithmSelection.cpp | 9 +- Source/WarpX.H | 2 +- Source/WarpX.cpp | 57 +- Source/ablastr/particles/DepositCharge.H | 5 + 16 files changed, 1810 insertions(+), 38 deletions(-) create mode 100755 Examples/Tests/Implicit/analysis_vandb_2d.py create mode 100644 Examples/Tests/Implicit/inputs_vandb_2d create mode 100644 Regression/Checksum/benchmarks_json/ImplicitPicard_VandB_2d.json diff --git a/Examples/Tests/Implicit/analysis_vandb_2d.py b/Examples/Tests/Implicit/analysis_vandb_2d.py new file mode 100755 index 00000000000..fa3299925a8 --- /dev/null +++ b/Examples/Tests/Implicit/analysis_vandb_2d.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +# Copyright 2024 Justin Angus +# +# +# This file is part of WarpX. +# +# License: BSD-3-Clause-LBNL +# +# This is a script that analyses the simulation results from the script `inputs_vandb_2d`. +# This simulates a 2D periodic plasma using the implicit solver +# with the Villasenor deposition using shape factor 2. +import os +import sys + +import numpy as np +from scipy.constants import e, epsilon_0 +import yt + +sys.path.insert(1, '../../../../warpx/Regression/Checksum/') +import checksumAPI + +# this will be the name of the plot file +fn = sys.argv[1] + +field_energy = np.loadtxt('diags/reducedfiles/field_energy.txt', skiprows=1) +particle_energy = np.loadtxt('diags/reducedfiles/particle_energy.txt', skiprows=1) + +total_energy = field_energy[:,2] + particle_energy[:,2] + +delta_E = (total_energy - total_energy[0])/total_energy[0] +max_delta_E = np.abs(delta_E).max() + +# This case should have near machine precision conservation of energy +tolerance_rel_energy = 2.e-14 +tolerance_rel_charge = 2.e-15 + +print(f"max change in energy: {max_delta_E}") +print(f"tolerance: {tolerance_rel_energy}") + +assert( max_delta_E < tolerance_rel_energy ) + +# check for machine precision conservation of charge density +n0 = 1.e30 + +pltdir = sys.argv[1] +ds = yt.load(pltdir) +data = ds.covering_grid(level = 0, left_edge = ds.domain_left_edge, dims = ds.domain_dimensions) + +divE = data['boxlib', 'divE'].value +rho = data['boxlib', 'rho'].value + +# compute local error in Gauss's law +drho = (rho - epsilon_0*divE)/e/n0 + +# compute RMS on in error on the grid +nX = drho.shape[0] +nZ = drho.shape[1] +drho2_avg = (drho**2).sum()/(nX*nZ) +drho_rms = np.sqrt(drho2_avg) + +print(f"rms error in charge conservation: {drho_rms}") +print(f"tolerance: {tolerance_rel_charge}") + +assert( drho_rms < tolerance_rel_charge ) + +test_name = os.path.split(os.getcwd())[1] +checksumAPI.evaluate_checksum(test_name, fn) diff --git a/Examples/Tests/Implicit/inputs_vandb_2d b/Examples/Tests/Implicit/inputs_vandb_2d new file mode 100644 index 00000000000..2dc57323efe --- /dev/null +++ b/Examples/Tests/Implicit/inputs_vandb_2d @@ -0,0 +1,92 @@ +################################# +########## CONSTANTS ############ +################################# + +my_constants.n0 = 1.e30 # m^-3 +my_constants.Ti = 100. # eV +my_constants.Te = 100. # eV +my_constants.wpe = q_e*sqrt(n0/(m_e*epsilon0)) +my_constants.de0 = clight/wpe +my_constants.nppcz = 10 # number of particles/cell in z +my_constants.dt = 0.1/wpe # s + +################################# +####### GENERAL PARAMETERS ###### +################################# +max_step = 20 +amr.n_cell = 40 40 +amr.max_grid_size = 8 +amr.blocking_factor = 8 +amr.max_level = 0 +geometry.dims = 2 +geometry.prob_lo = 0.0 0.0 # physical domain +geometry.prob_hi = 10.0*de0 10.0*de0 + +################################# +####### Boundary condition ###### +################################# +boundary.field_lo = periodic periodic +boundary.field_hi = periodic periodic + +################################# +############ NUMERICS ########### +################################# +warpx.serialize_initial_conditions = 1 +warpx.verbose = 1 +warpx.const_dt = dt +#warpx.cfl = 0.5656 +warpx.use_filter = 0 + +algo.maxwell_solver = Yee +algo.evolve_scheme = "implicit_picard" +algo.require_picard_convergence = 0 +algo.max_picard_iterations = 25 +algo.picard_iteration_tolerance = 0.0 #1.0e-12 +algo.particle_pusher = "boris" +#algo.particle_pusher = "higuera" + +algo.particle_shape = 2 +#algo.current_deposition = "direct" +#algo.current_deposition = "esirkepov" +algo.current_deposition = "villasenor" + +################################# +############ PLASMA ############# +################################# +particles.species_names = electrons protons + +electrons.charge = -q_e +electrons.mass = m_e +electrons.injection_style = "NUniformPerCell" +electrons.num_particles_per_cell_each_dim = nppcz nppcz +electrons.profile = constant +electrons.density = 1.e30 # number per m^3 +electrons.momentum_distribution_type = "gaussian" +electrons.ux_th = sqrt(Te*q_e/m_e)/clight +electrons.uy_th = sqrt(Te*q_e/m_e)/clight +electrons.uz_th = sqrt(Te*q_e/m_e)/clight + +protons.charge = q_e +protons.mass = m_p +protons.injection_style = "NUniformPerCell" +protons.num_particles_per_cell_each_dim = nppcz nppcz +protons.profile = constant +protons.density = 1.e30 # number per m^3 +protons.momentum_distribution_type = "gaussian" +protons.ux_th = sqrt(Ti*q_e/m_p)/clight +protons.uy_th = sqrt(Ti*q_e/m_p)/clight +protons.uz_th = sqrt(Ti*q_e/m_p)/clight + +# Diagnostics +diagnostics.diags_names = diag1 +diag1.intervals = 20 +diag1.diag_type = Full +diag1.fields_to_plot = Ex Ey Ez Bx By Bz jx jy jz rho divE +diag1.electrons.variables = w ux uy uz +diag1.protons.variables = w ux uy uz + +warpx.reduced_diags_names = particle_energy field_energy +particle_energy.type = ParticleEnergy +particle_energy.intervals = 1 +field_energy.type = FieldEnergy +field_energy.intervals = 1 diff --git a/Regression/Checksum/benchmarks_json/ImplicitPicard_VandB_2d.json b/Regression/Checksum/benchmarks_json/ImplicitPicard_VandB_2d.json new file mode 100644 index 00000000000..d97eb04883f --- /dev/null +++ b/Regression/Checksum/benchmarks_json/ImplicitPicard_VandB_2d.json @@ -0,0 +1,31 @@ +{ + "lev=0": { + "Bx": 72730.70321925254, + "By": 89276.6097395453, + "Bz": 66911.00019634314, + "Ex": 92036838733000.64, + "Ey": 15583500940725.84, + "Ez": 89163420502164.97, + "divE": 8.998871921763322e+22, + "jx": 2.7748639888523993e+19, + "jy": 2.9501400595579277e+19, + "jz": 2.6976140199337787e+19, + "rho": 796777020986.2787 + }, + "protons": { + "particle_momentum_x": 2.0873315539608036e-17, + "particle_momentum_y": 2.0858882907322405e-17, + "particle_momentum_z": 2.0877345477243595e-17, + "particle_position_x": 0.004251275869323399, + "particle_position_y": 0.0042512738905209615, + "particle_weight": 2823958719279159.5 + }, + "electrons": { + "particle_momentum_x": 4.882673707817137e-19, + "particle_momentum_y": 4.879672470952739e-19, + "particle_momentum_z": 4.872329687213274e-19, + "particle_position_x": 0.004251641684258687, + "particle_position_y": 0.004251751978637919, + "particle_weight": 2823958719279159.5 + } +} diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini index ae22eba499d..3310e642dd3 100644 --- a/Regression/WarpX-tests.ini +++ b/Regression/WarpX-tests.ini @@ -4515,6 +4515,23 @@ doVis = 0 compareParticles = 1 analysisRoutine = Examples/Tests/Implicit/analysis_1d.py +[ImplicitPicard_VandB_2d] +buildDir = . +inputFile = Examples/Tests/Implicit/inputs_vandb_2d +runtime_params = warpx.abort_on_warning_threshold=high +dim = 2 +addToCompileString = +cmakeSetupOpts = -DWarpX_DIMS=2 +restartTest = 0 +useMPI = 1 +numprocs = 2 +useOMP = 0 +numthreads = 1 +compileTest = 0 +doVis = 0 +compareParticles = 1 +analysisRoutine = Examples/Tests/Implicit/analysis_vandb_2d.py + [SemiImplicitPicard_1d] buildDir = . inputFile = Examples/Tests/Implicit/inputs_1d_semiimplicit diff --git a/Source/Diagnostics/WarpXOpenPMD.cpp b/Source/Diagnostics/WarpXOpenPMD.cpp index 64411ecf6e4..7cc9f571a4a 100644 --- a/Source/Diagnostics/WarpXOpenPMD.cpp +++ b/Source/Diagnostics/WarpXOpenPMD.cpp @@ -1130,6 +1130,8 @@ WarpXOpenPMDPlot::SetConstParticleRecordsEDPIC ( return "Esirkepov"; case CurrentDepositionAlgo::Vay : return "Vay"; + case CurrentDepositionAlgo::Villasenor : + return "Villasenor"; default: return "directMorseNielson"; } diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp index 169453a6e99..eee3012ab4d 100644 --- a/Source/Initialization/WarpXInitData.cpp +++ b/Source/Initialization/WarpXInitData.cpp @@ -229,6 +229,9 @@ WarpX::PrintMainPICparameters () else if (current_deposition_algo == CurrentDepositionAlgo::Esirkepov){ amrex::Print() << "Current Deposition: | Esirkepov \n"; } + else if (current_deposition_algo == CurrentDepositionAlgo::Villasenor){ + amrex::Print() << "Current Deposition: | Villasenor \n"; + } // Print type of particle pusher if (particle_pusher_algo == ParticlePusherAlgo::Vay){ amrex::Print() << "Particle Pusher: | Vay \n"; diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index 5d1055278b2..18df09c3b43 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -1535,6 +1535,670 @@ void doChargeConservingDepositionShapeNImplicit (const amrex::ParticleReal * con #endif } +/** + * \brief Villasenor and Buneman Current Deposition for thread thread_num for implicit scheme. + * The specifics for the implicit scheme are in how gamma is determined. This is a charge- + * conserving deposition. The difference from Esirkepov is that the deposit is done segment + * by segment, where the segments are determined by cell crossings. In general, this results + * in a tighter stencil. The implementation is valid for an arbitrary number of cell crossings. + * + * \param depos_order deposition order + * \param xp_n,yp_n,zp_n Pointer to arrays of particle position at time level n. + * \param GetPosition A functor for returning the particle position. + * \param wp Pointer to array of particle weights. + * \param uxp_n,uyp_n,uzp_n Pointer to arrays of particle momentum at time level n. + * \param uxp_nph,uyp_nph,uzp_nph Pointer to arrays of particle momentum at time level n + 1/2. + * \param ion_lev Pointer to array of particle ionization level. This is + required to have the charge of each macroparticle + since q is a scalar. For non-ionizable species, + ion_lev is a null pointer. + * \param Jx_arr,Jy_arr,Jz_arr Array4 of current density, either full array or tile. + * \param np_to_deposit Number of particles for which current is deposited. + * \param dt Time step for particle level + * \param dx 3D cell size + * \param xyzmin Physical lower bounds of domain. + * \param lo Index lower bounds of domain. + * \param q species charge. + * \param n_rz_azimuthal_modes Number of azimuthal modes when using RZ geometry. + * \param cost Pointer to (load balancing) cost corresponding to box where present particles deposit current. + * \param load_balance_costs_update_algo Selected method for updating load balance costs. + */ +template +void doVillasenorDepositionShapeNImplicit (const amrex::ParticleReal * const xp_n, + const amrex::ParticleReal * const yp_n, + const amrex::ParticleReal * const zp_n, + const GetParticlePosition& GetPosition, + const amrex::ParticleReal * const wp, + [[maybe_unused]]const amrex::ParticleReal * const uxp_n, + [[maybe_unused]]const amrex::ParticleReal * const uyp_n, + [[maybe_unused]]const amrex::ParticleReal * const uzp_n, + [[maybe_unused]]const amrex::ParticleReal * const uxp_nph, + [[maybe_unused]]const amrex::ParticleReal * const uyp_nph, + [[maybe_unused]]const amrex::ParticleReal * const uzp_nph, + const int * const ion_lev, + const amrex::Array4& Jx_arr, + const amrex::Array4& Jy_arr, + const amrex::Array4& Jz_arr, + const long np_to_deposit, + const amrex::Real dt, + const std::array& dx, + const std::array xyzmin, + const amrex::Dim3 lo, + const amrex::Real q, + const int n_rz_azimuthal_modes, + amrex::Real * const cost, + const long load_balance_costs_update_algo) +{ + using namespace amrex; +#if !defined(WARPX_DIM_RZ) + ignore_unused(n_rz_azimuthal_modes); +#endif + +#if !defined(AMREX_USE_GPU) + amrex::ignore_unused(cost, load_balance_costs_update_algo); +#endif + + // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer + // (do_ionization=1) + bool const do_ionization = ion_lev; +#if !defined(WARPX_DIM_1D_Z) + Real const dxi = 1.0_rt / dx[0]; +#endif +#if !defined(WARPX_DIM_1D_Z) + Real const xmin = xyzmin[0]; +#endif +#if defined(WARPX_DIM_3D) + Real const dyi = 1.0_rt / dx[1]; + Real const ymin = xyzmin[1]; +#endif + Real const dzi = 1.0_rt / dx[2]; + Real const zmin = xyzmin[2]; + +#if defined(WARPX_DIM_3D) + Real const invvol = 1.0_rt / (dx[0]*dx[1]*dx[2]); +#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) + Real const invvol = 1.0_rt / (dx[0]*dx[2]); +#elif defined(WARPX_DIM_1D_Z) + Real const invvol = 1.0_rt / (dx[2]); +#endif + +#if !defined(WARPX_DIM_1D_Z) + Real constexpr one_third = 1.0_rt / 3.0_rt; + Real constexpr one_sixth = 1.0_rt / 6.0_rt; +#endif + + // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr +#if defined(WARPX_USE_GPUCLOCK) + amrex::Real* cost_real = nullptr; + if( load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock) { + cost_real = (amrex::Real *) amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real)); + *cost_real = 0._rt; + } +#endif + amrex::ParallelFor( + np_to_deposit, + [=] AMREX_GPU_DEVICE (long const ip) { +#if defined(WARPX_USE_GPUCLOCK) + const auto KernelTimer = ablastr::parallelization::KernelTimer( + cost && (load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::GpuClock), + cost_real); +#endif + +#if !defined(WARPX_DIM_3D) + constexpr amrex::ParticleReal inv_c2 = 1._prt/(PhysConst::c*PhysConst::c); + + // Compute inverse Lorentz factor, the average of gamma at time levels n and n+1 + // The uxp,uyp,uzp are the velocities at time level n+1/2 + const amrex::ParticleReal uxp_np1 = 2._prt*uxp_nph[ip] - uxp_n[ip]; + const amrex::ParticleReal uyp_np1 = 2._prt*uyp_nph[ip] - uyp_n[ip]; + const amrex::ParticleReal uzp_np1 = 2._prt*uzp_nph[ip] - uzp_n[ip]; + const amrex::ParticleReal gamma_n = std::sqrt(1._prt + (uxp_n[ip]*uxp_n[ip] + uyp_n[ip]*uyp_n[ip] + uzp_n[ip]*uzp_n[ip])*inv_c2); + const amrex::ParticleReal gamma_np1 = std::sqrt(1._prt + (uxp_np1*uxp_np1 + uyp_np1*uyp_np1 + uzp_np1*uzp_np1)*inv_c2); + const amrex::ParticleReal gaminv = 2.0_prt/(gamma_n + gamma_np1); +#endif + + // wqx, wqy wqz are particle current in each direction + Real wq = q*wp[ip]; + if (do_ionization){ + wq *= ion_lev[ip]; + } + + ParticleReal xp_nph, yp_nph, zp_nph; + GetPosition(ip, xp_nph, yp_nph, zp_nph); + +#if !defined(WARPX_DIM_1D_Z) + ParticleReal const xp_np1 = 2._prt*xp_nph - xp_n[ip]; +#else + ignore_unused(xp_n); +#endif +#if defined(WARPX_DIM_3D) || defined(WARPX_DIM_RZ) + ParticleReal const yp_np1 = 2._prt*yp_nph - yp_n[ip]; +#else + ignore_unused(yp_n); +#endif + ParticleReal const zp_np1 = 2._prt*zp_nph - zp_n[ip]; + + // computes current and old position in grid units +#if defined(WARPX_DIM_RZ) + amrex::Real const xp_new = xp_np1; + amrex::Real const yp_new = yp_np1; + amrex::Real const xp_mid = xp_nph; + amrex::Real const yp_mid = yp_nph; + amrex::Real const xp_old = xp_n[ip]; + amrex::Real const yp_old = yp_n[ip]; + amrex::Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new); + amrex::Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old); + amrex::Real const rp_mid = (rp_new + rp_old)/2._rt; + amrex::Real costheta_mid, sintheta_mid; + if (rp_mid > 0._rt) { + costheta_mid = xp_mid/rp_mid; + sintheta_mid = yp_mid/rp_mid; + } else { + costheta_mid = 1._rt; + sintheta_mid = 0._rt; + } + const Complex xy_mid0 = Complex{costheta_mid, sintheta_mid}; + + // Keep these double to avoid bug in single precision + double const x_new = (rp_new - xmin)*dxi; + double const x_old = (rp_old - xmin)*dxi; + amrex::Real const vx = (rp_new - rp_old)/dt; + amrex::Real const vy = (-uxp_nph[ip]*sintheta_mid + uyp_nph[ip]*costheta_mid)*gaminv; +#elif defined(WARPX_DIM_XZ) + // Keep these double to avoid bug in single precision + double const x_new = (xp_np1 - xmin)*dxi; + double const x_old = (xp_n[ip] - xmin)*dxi; + amrex::Real const vx = (xp_np1 - xp_n[ip])/dt; + amrex::Real const vy = uyp_nph[ip]*gaminv; +#elif defined(WARPX_DIM_1D_Z) + amrex::Real const vx = uxp_nph[ip]*gaminv; + amrex::Real const vy = uyp_nph[ip]*gaminv; +#elif defined(WARPX_DIM_3D) + // Keep these double to avoid bug in single precision + double const x_new = (xp_np1 - xmin)*dxi; + double const x_old = (xp_n[ip] - xmin)*dxi; + double const y_new = (yp_np1 - ymin)*dyi; + double const y_old = (yp_n[ip] - ymin)*dyi; + amrex::Real const vx = (xp_np1 - xp_n[ip])/dt; + amrex::Real const vy = (yp_np1 - yp_n[ip])/dt; +#endif + + // Keep these double to avoid bug in single precision + double const z_new = (zp_np1 - zmin)*dzi; + double const z_old = (zp_n[ip] - zmin)*dzi; + amrex::Real const vz = (zp_np1 - zp_n[ip])/dt; + + // Define velocity kernals to deposit + amrex::Real const wqx = wq*vx*invvol; + amrex::Real const wqy = wq*vy*invvol; + amrex::Real const wqz = wq*vz*invvol; + + // 1) Determine the number of segments. + // 2) Loop over segments and deposit current. + + // cell crossings are defined at cell edges if depos_order is odd + // cell crossings are defined at cell centers if depos_order is even + + int num_segments = 1; + double shift = 0.0; + if ( (depos_order % 2) == 0 ) { shift = 0.5; } + +#if defined(WARPX_DIM_3D) + + // compute cell crossings in X-direction + const auto i_old = static_cast(x_old-shift); + const auto i_new = static_cast(x_new-shift); + int cell_crossings_x = std::abs(i_new-i_old); + num_segments += cell_crossings_x; + + // compute cell crossings in Y-direction + const auto j_old = static_cast(y_old-shift); + const auto j_new = static_cast(y_new-shift); + int cell_crossings_y = std::abs(j_new-j_old); + num_segments += cell_crossings_y; + + // compute cell crossings in Z-direction + const auto k_old = static_cast(z_old-shift); + const auto k_new = static_cast(z_new-shift); + int cell_crossings_z = std::abs(k_new-k_old); + num_segments += cell_crossings_z; + + // need to assert that the number of cell crossings in each direction + // is within the range permitted by the number of guard cells + // e.g., if (num_segments > 7) ... + + // compute total change in particle position and the initial cell + // locations in each direction used to find the position at cell crossings. + const double dxp = x_new - x_old; + const double dyp = y_new - y_old; + const double dzp = z_new - z_old; + const auto dirX_sign = static_cast(dxp < 0. ? -1. : 1.); + const auto dirY_sign = static_cast(dyp < 0. ? -1. : 1.); + const auto dirZ_sign = static_cast(dzp < 0. ? -1. : 1.); + double Xcell = 0., Ycell = 0., Zcell = 0.; + if (num_segments > 1) { + Xcell = static_cast(i_old) + shift + 0.5*(1.-dirX_sign); + Ycell = static_cast(j_old) + shift + 0.5*(1.-dirY_sign); + Zcell = static_cast(k_old) + shift + 0.5*(1.-dirZ_sign); + } + + // loop over the number of segments and deposit + Compute_shape_factor< depos_order-1 > compute_shape_factor_cell; + Compute_shape_factor_pair< depos_order > compute_shape_factors_node; + double dxp_seg, dyp_seg, dzp_seg; + double x0_new, y0_new, z0_new; + double x0_old = x_old; + double y0_old = y_old; + double z0_old = z_old; + + for (int ns=0; ns(dxp == 0. ? 1. : dxp_seg/dxp); + const auto seg_factor_y = static_cast(dyp == 0. ? 1. : dyp_seg/dyp); + const auto seg_factor_z = static_cast(dzp == 0. ? 1. : dzp_seg/dzp); + + // compute cell-based weights using the average segment position + double sx_cell[depos_order] = {0.}; + double sy_cell[depos_order] = {0.}; + double sz_cell[depos_order] = {0.}; + double const x0_bar = (x0_new + x0_old)/2.0; + double const y0_bar = (y0_new + y0_old)/2.0; + double const z0_bar = (z0_new + z0_old)/2.0; + const int i0_cell = compute_shape_factor_cell( sx_cell, x0_bar-0.5 ); + const int j0_cell = compute_shape_factor_cell( sy_cell, y0_bar-0.5 ); + const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 ); + + if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights + Compute_shape_factor_pair compute_shape_factors_cell; + double sx_old_cell[depos_order] = {0.}; + double sx_new_cell[depos_order] = {0.}; + double sy_old_cell[depos_order] = {0.}; + double sy_new_cell[depos_order] = {0.}; + double sz_old_cell[depos_order] = {0.}; + double sz_new_cell[depos_order] = {0.}; + const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 ); + const int j0_cell_2 = compute_shape_factors_cell( sy_old_cell, sy_new_cell, y0_old-0.5, y0_new-0.5 ); + const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 ); + ignore_unused(i0_cell_2, j0_cell_2, k0_cell_2); + for (int m=0; m(x_old-shift); + const auto i_new = static_cast(x_new-shift); + int cell_crossings_x = std::abs(i_new-i_old); + num_segments += cell_crossings_x; + + // compute cell crossings in Z-direction + const auto k_old = static_cast(z_old-shift); + const auto k_new = static_cast(z_new-shift); + int cell_crossings_z = std::abs(k_new-k_old); + num_segments += cell_crossings_z; + + // need to assert that the number of cell crossings in each direction + // is within the range permitted by the number of guard cells + // e.g., if (num_segments > 5) ... + + // compute total change in particle position and the initial cell + // locations in each direction used to find the position at cell crossings. + const double dxp = x_new - x_old; + const double dzp = z_new - z_old; + const auto dirX_sign = static_cast(dxp < 0. ? -1. : 1.); + const auto dirZ_sign = static_cast(dzp < 0. ? -1. : 1.); + double Xcell = 0., Zcell = 0.; + if (num_segments > 1) { + Xcell = static_cast(i_old) + shift + 0.5*(1.-dirX_sign); + Zcell = static_cast(k_old) + shift + 0.5*(1.-dirZ_sign); + } + + // loop over the number of segments and deposit + Compute_shape_factor< depos_order-1 > compute_shape_factor_cell; + Compute_shape_factor_pair< depos_order > compute_shape_factors_node; + double dxp_seg, dzp_seg; + double x0_new, z0_new; + double x0_old = x_old; + double z0_old = z_old; + + for (int ns=0; ns(dxp == 0. ? 1. : dxp_seg/dxp); + const auto seg_factor_z = static_cast(dzp == 0. ? 1. : dzp_seg/dzp); + + // compute cell-based weights using the average segment position + double sx_cell[depos_order] = {0.}; + double sz_cell[depos_order] = {0.}; + double const x0_bar = (x0_new + x0_old)/2.0; + double const z0_bar = (z0_new + z0_old)/2.0; + const int i0_cell = compute_shape_factor_cell( sx_cell, x0_bar-0.5 ); + const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 ); + + if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights + Compute_shape_factor_pair compute_shape_factors_cell; + double sx_old_cell[depos_order] = {0.}; + double sx_new_cell[depos_order] = {0.}; + double sz_old_cell[depos_order] = {0.}; + double sz_new_cell[depos_order] = {0.}; + const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 ); + const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 ); + ignore_unused(i0_cell_2, k0_cell_2); + for (int m=0; m(z_old-shift); + const auto k_new = static_cast(z_new-shift); + int cell_crossings_z = std::abs(k_new-k_old); + num_segments += cell_crossings_z; + + // need to assert that the number of cell crossings in each direction + // is within the range permitted by the number of guard cells + // e.g., if (num_segments > 3) ... + + // compute dzp and the initial cell location used to find the cell crossings. + double const dzp = z_new - z_old; + const auto dirZ_sign = static_cast(dzp < 0. ? -1. : 1.); + double Zcell = static_cast(k_old) + shift + 0.5*(1.-dirZ_sign); + + // loop over the number of segments and deposit + Compute_shape_factor< depos_order-1 > compute_shape_factor_cell; + Compute_shape_factor_pair< depos_order > compute_shape_factors_node; + double dzp_seg; + double z0_new; + double z0_old = z_old; + + for (int ns=0; ns(dzp == 0. ? 1. : dzp_seg/dzp); + + // compute cell-based weights using the average segment position + double sz_cell[depos_order] = {0.}; + double const z0_bar = (z0_new + z0_old)/2.0; + const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 ); + + if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights + Compute_shape_factor_pair compute_shape_factors_cell; + double sz_old_cell[depos_order] = {0.}; + double sz_new_cell[depos_order] = {0.}; + const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 ); + ignore_unused(k0_cell_2); + for (int m=0; mfree(cost_real); + } +#endif +} + /** * \brief Vay current deposition * ( Vay et al, 2013) diff --git a/Source/Particles/Gather/FieldGather.H b/Source/Particles/Gather/FieldGather.H index dd6b7276681..670d95014a0 100644 --- a/Source/Particles/Gather/FieldGather.H +++ b/Source/Particles/Gather/FieldGather.H @@ -880,6 +880,681 @@ void doGatherShapeNEsirkepovStencilImplicit ( #endif } +/** + * \brief Energy conserving field gather for thread thread_num for the implicit scheme + * This uses the same stencil for the gather that is used for Villasenor current deposition. + * The magnetic field is deposited using direct deposition. + * + * \tparam depos_order Particle shape order + * \param xp_n,yp_n,zp_n Particle position coordinates at start of step + * \param xp_nph,yp_nph,zp_nph Particle position coordinates at half step + * \param Exp,Eyp,Ezp Electric field on particles. + * \param Bxp,Byp,Bzp Magnetic field on particles. + * \param Ex_arr,Ey_arr,Ez_arr Array4 of the electric field, either full array or tile. + * \param Bx_arr,By_arr,Bz_arr Array4 of the magnetic field, either full array or tile. + * \param Ex_type,Ey_type,Ez_type IndexType of the electric field + * \param Bx_type,By_type,Bz_type IndexType of the magnetic field + * \param dx 3D cell spacing + * \param xyzmin Physical lower bounds of domain in x, y, z. + * \param lo Index lower bounds of domain. + * \param n_rz_azimuthal_modes Number of azimuthal modes when using RZ geometry + */ +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void doGatherPicnicShapeN ( + [[maybe_unused]] const amrex::ParticleReal xp_n, + [[maybe_unused]] const amrex::ParticleReal yp_n, + const amrex::ParticleReal zp_n, + [[maybe_unused]] const amrex::ParticleReal xp_nph, + [[maybe_unused]] const amrex::ParticleReal yp_nph, + const amrex::ParticleReal zp_nph, + amrex::ParticleReal& Exp, + amrex::ParticleReal& Eyp, + amrex::ParticleReal& Ezp, + amrex::ParticleReal& Bxp, + amrex::ParticleReal& Byp, + amrex::ParticleReal& Bzp, + amrex::Array4 const& Ex_arr, + amrex::Array4 const& Ey_arr, + amrex::Array4 const& Ez_arr, + amrex::Array4 const& Bx_arr, + amrex::Array4 const& By_arr, + amrex::Array4 const& Bz_arr, + [[maybe_unused]] const amrex::IndexType Ex_type, + [[maybe_unused]] const amrex::IndexType Ey_type, + [[maybe_unused]] const amrex::IndexType Ez_type, + [[maybe_unused]] const amrex::IndexType Bx_type, + [[maybe_unused]] const amrex::IndexType By_type, + [[maybe_unused]] const amrex::IndexType Bz_type, + const amrex::GpuArray& dx, + const amrex::GpuArray& xyzmin, + const amrex::Dim3& lo, + const int n_rz_azimuthal_modes) +{ + using namespace amrex; +#if !defined(WARPX_DIM_RZ) + ignore_unused(n_rz_azimuthal_modes); +#endif + +#if !defined(WARPX_DIM_1D_Z) + Real const dxi = 1.0_rt / dx[0]; +#endif +#if !defined(WARPX_DIM_1D_Z) + Real const xmin = xyzmin[0]; +#endif +#if defined(WARPX_DIM_3D) + Real const dyi = 1.0_rt / dx[1]; + Real const ymin = xyzmin[1]; +#endif + Real const dzi = 1.0_rt / dx[2]; + Real const zmin = xyzmin[2]; + +#if !defined(WARPX_DIM_1D_Z) + ParticleReal xp_np1 = 2._prt*xp_nph - xp_n; +#endif +#if defined(WARPX_DIM_3D) || defined(WARPX_DIM_RZ) + ParticleReal yp_np1 = 2._prt*yp_nph - yp_n; +#endif + ParticleReal zp_np1 = 2._prt*zp_nph - zp_n; + +#if !defined(WARPX_DIM_1D_Z) + Real constexpr one_third = 1.0_rt / 3.0_rt; + Real constexpr one_sixth = 1.0_rt / 6.0_rt; +#endif + + // computes current and old position in grid units +#if defined(WARPX_DIM_RZ) + amrex::Real const xp_new = xp_np1; + amrex::Real const yp_new = yp_np1; + amrex::Real const xp_mid = xp_nph; + amrex::Real const yp_mid = yp_nph; + amrex::Real const xp_old = xp_n; + amrex::Real const yp_old = yp_n; + amrex::Real const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new); + amrex::Real const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old); + amrex::Real const rp_mid = (rp_new + rp_old)/2._rt; + amrex::Real costheta_mid, sintheta_mid; + if (rp_mid > 0._rt) { + costheta_mid = xp_mid/rp_mid; + sintheta_mid = yp_mid/rp_mid; + } else { + costheta_mid = 1._rt; + sintheta_mid = 0._rt; + } + const Complex xy_mid0 = Complex{costheta_mid, sintheta_mid}; + // Keep these double to avoid bug in single precision + double const x_new = (rp_new - xmin)*dxi; + double const x_old = (rp_old - xmin)*dxi; + double const x_bar = (rp_mid - xmin)*dxi; +#elif !defined(WARPX_DIM_1D_Z) + // Keep these double to avoid bug in single precision + double const x_new = (xp_np1 - xmin)*dxi; + double const x_old = (xp_n - xmin)*dxi; + double const x_bar = (xp_nph - xmin)*dxi; +#endif +#if defined(WARPX_DIM_3D) + // Keep these double to avoid bug in single precision + double const y_new = (yp_np1 - ymin)*dyi; + double const y_old = (yp_n - ymin)*dyi; + double const y_bar = (yp_nph - ymin)*dyi; +#endif + // Keep these double to avoid bug in single precision + double const z_new = (zp_np1 - zmin)*dzi; + double const z_old = (zp_n - zmin)*dzi; + double const z_bar = (zp_nph - zmin)*dzi; + + // 1) Determine the number of segments. + // 2) Loop over segments and gather electric field. + // 3) Gather magnetic field. + + // cell crossings are defined at cell edges if depos_order is odd + // cell crossings are defined at cell centers if depos_order is even + + int num_segments = 1; + double shift = 0.0; + if ( (depos_order % 2) == 0 ) { shift = 0.5; } + +#if defined(WARPX_DIM_3D) + + // compute cell crossings in X-direction + const auto i_old = static_cast(x_old-shift); + const auto i_new = static_cast(x_new-shift); + int cell_crossings_x = std::abs(i_new-i_old); + num_segments += cell_crossings_x; + + // compute cell crossings in Y-direction + const auto j_old = static_cast(y_old-shift); + const auto j_new = static_cast(y_new-shift); + int cell_crossings_y = std::abs(j_new-j_old); + num_segments += cell_crossings_y; + + // compute cell crossings in Z-direction + const auto k_old = static_cast(z_old-shift); + const auto k_new = static_cast(z_new-shift); + int cell_crossings_z = std::abs(k_new-k_old); + num_segments += cell_crossings_z; + + // need to assert that the number of cell crossings in each direction + // is within the range permitted by the number of guard cells + // e.g., if (num_segments > 7) ... + + // compute total change in particle position and the initial cell + // locations in each direction used to find the position at cell crossings. + const double dxp = x_new - x_old; + const double dyp = y_new - y_old; + const double dzp = z_new - z_old; + const auto dirX_sign = static_cast(dxp < 0. ? -1. : 1.); + const auto dirY_sign = static_cast(dyp < 0. ? -1. : 1.); + const auto dirZ_sign = static_cast(dzp < 0. ? -1. : 1.); + double Xcell = 0., Ycell = 0., Zcell = 0.; + if (num_segments > 1) { + Xcell = static_cast(i_old) + shift + 0.5*(1.-dirX_sign); + Ycell = static_cast(j_old) + shift + 0.5*(1.-dirY_sign); + Zcell = static_cast(k_old) + shift + 0.5*(1.-dirZ_sign); + } + + // loop over the number of segments and deposit + Compute_shape_factor< depos_order-1 > compute_shape_factor_cell; + Compute_shape_factor_pair< depos_order > compute_shape_factors_node; + double dxp_seg, dyp_seg, dzp_seg; + double x0_new, y0_new, z0_new; + double x0_old = x_old; + double y0_old = y_old; + double z0_old = z_old; + + for (int ns=0; ns(dxp == 0. ? 1. : dxp_seg/dxp); + const auto seg_factor_y = static_cast(dyp == 0. ? 1. : dyp_seg/dyp); + const auto seg_factor_z = static_cast(dzp == 0. ? 1. : dzp_seg/dzp); + + // compute cell-based weights using the average segment position + double sx_cell[depos_order] = {0.}; + double sy_cell[depos_order] = {0.}; + double sz_cell[depos_order] = {0.}; + double const x0_bar = (x0_new + x0_old)/2.0; + double const y0_bar = (y0_new + y0_old)/2.0; + double const z0_bar = (z0_new + z0_old)/2.0; + const int i0_cell = compute_shape_factor_cell( sx_cell, x0_bar-0.5 ); + const int j0_cell = compute_shape_factor_cell( sy_cell, y0_bar-0.5 ); + const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 ); + + if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights + Compute_shape_factor_pair compute_shape_factors_cell; + double sx_old_cell[depos_order] = {0.}; + double sx_new_cell[depos_order] = {0.}; + double sy_old_cell[depos_order] = {0.}; + double sy_new_cell[depos_order] = {0.}; + double sz_old_cell[depos_order] = {0.}; + double sz_new_cell[depos_order] = {0.}; + const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 ); + const int j0_cell_2 = compute_shape_factors_cell( sy_old_cell, sy_new_cell, y0_old-0.5, y0_new-0.5 ); + const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 ); + ignore_unused(i0_cell_2, j0_cell_2, k0_cell_2); + for (int m=0; m compute_shape_factor_B; + double sz_bar_node[depos_order_B+1] = {0.}; + double sz_bar_cell[depos_order_B+1] = {0.}; + const int k_bar_node = compute_shape_factor_B(sz_bar_node, z_bar); + const int k_bar_cell = compute_shape_factor_B(sz_bar_cell, z_bar-0.5); + double sy_bar_node[depos_order_B+1] = {0.}; + double sy_bar_cell[depos_order_B+1] = {0.}; + const int j_bar_node = compute_shape_factor_B(sy_bar_node, y_bar); + const int j_bar_cell = compute_shape_factor_B(sy_bar_cell, y_bar-0.5); + double sx_bar_node[depos_order_B+1] = {0.}; + double sx_bar_cell[depos_order_B+1] = {0.}; + const int i_bar_node = compute_shape_factor_B(sx_bar_node, x_bar); + const int i_bar_cell = compute_shape_factor_B(sx_bar_cell, x_bar-0.5); + + amrex::Real weight; + for (int i=0; i<=depos_order_B; i++) { + for (int j=0; j<=depos_order_B; j++) { + for (int k=0; k<=depos_order_B; k++) { + weight = static_cast(sx_bar_node[i]*sy_bar_cell[j]*sz_bar_cell[k]); + Bxp += Bx_arr(lo.x+i_bar_node+i, lo.y+j_bar_cell+j, lo.z+k_bar_cell+k)*weight; + // + weight = static_cast(sx_bar_cell[i]*sy_bar_node[j]*sz_bar_cell[k]); + Byp += By_arr(lo.x+i_bar_cell+i, lo.y+j_bar_node+j, lo.z+k_bar_cell+k)*weight; + // + weight = static_cast(sx_bar_cell[i]*sy_bar_cell[j]*sz_bar_node[k]); + Bzp += Bz_arr(lo.x+i_bar_cell+i, lo.y+j_bar_cell+j, lo.z+k_bar_node+k)*weight; + } + } + } + +#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) + + // compute cell crossings in X-direction + const auto i_old = static_cast(x_old-shift); + const auto i_new = static_cast(x_new-shift); + int cell_crossings_x = std::abs(i_new-i_old); + num_segments += cell_crossings_x; + + // compute cell crossings in Z-direction + const auto k_old = static_cast(z_old-shift); + const auto k_new = static_cast(z_new-shift); + int cell_crossings_z = std::abs(k_new-k_old); + num_segments += cell_crossings_z; + + // need to assert that the number of cell crossings in each direction + // is within the range permitted by the number of guard cells + // e.g., if (num_segments > 5) ... + + // compute total change in particle position and the initial cell + // locations in each direction used to find the position at cell crossings. + const double dxp = x_new - x_old; + const double dzp = z_new - z_old; + const auto dirX_sign = static_cast(dxp < 0. ? -1. : 1.); + const auto dirZ_sign = static_cast(dzp < 0. ? -1. : 1.); + double Xcell = 0., Zcell = 0.; + if (num_segments > 1) { + Xcell = static_cast(i_old) + shift + 0.5*(1.-dirX_sign); + Zcell = static_cast(k_old) + shift + 0.5*(1.-dirZ_sign); + } + + // loop over the number of segments and deposit + Compute_shape_factor< depos_order-1 > compute_shape_factor_cell; + Compute_shape_factor_pair< depos_order > compute_shape_factors_node; + double dxp_seg, dzp_seg; + double x0_new, z0_new; + double x0_old = x_old; + double z0_old = z_old; + + for (int ns=0; ns(dxp == 0. ? 1. : dxp_seg/dxp); + const auto seg_factor_z = static_cast(dzp == 0. ? 1. : dzp_seg/dzp); + + // compute cell-based weights using the average segment position + double sx_cell[depos_order] = {0.}; + double sz_cell[depos_order] = {0.}; + double const x0_bar = (x0_new + x0_old)/2.0; + double const z0_bar = (z0_new + z0_old)/2.0; + const int i0_cell = compute_shape_factor_cell(sx_cell, x0_bar-0.5); + const int k0_cell = compute_shape_factor_cell(sz_cell, z0_bar-0.5); + + if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights + Compute_shape_factor_pair compute_shape_factors_cell; + double sx_old_cell[depos_order] = {0.}; + double sx_new_cell[depos_order] = {0.}; + double sz_old_cell[depos_order] = {0.}; + double sz_new_cell[depos_order] = {0.}; + const int i0_cell_2 = compute_shape_factors_cell( sx_old_cell, sx_new_cell, x0_old-0.5, x0_new-0.5 ); + const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 ); + ignore_unused(i0_cell_2, k0_cell_2); + for (int m=0; m compute_shape_factor_B; + double sz_bar_node[depos_order_B+1] = {0.}; + double sz_bar_cell[depos_order_B+1] = {0.}; + const int k_bar_node = compute_shape_factor_B(sz_bar_node, z_bar); + const int k_bar_cell = compute_shape_factor_B(sz_bar_cell, z_bar-0.5); + double sx_bar_node[depos_order_B+1] = {0.}; + double sx_bar_cell[depos_order_B+1] = {0.}; + const int i_bar_node = compute_shape_factor_B(sx_bar_node, x_bar); + const int i_bar_cell = compute_shape_factor_B(sx_bar_cell, x_bar-0.5); + + for (int i=0; i<=depos_order_B; i++) { + for (int k=0; k<=depos_order_B; k++) { + const auto weight_Bz = static_cast(sx_bar_cell[i]*sz_bar_node[k]); + Bzp += Bz_arr(lo.x+i_bar_cell+i, lo.y+k_bar_node+k, 0, 0)*weight_Bz; + // + const auto weight_Bx = static_cast(sx_bar_node[i]*sz_bar_cell[k]); + Bxp += Bx_arr(lo.x+i_bar_node+i, lo.y+k_bar_cell+k, 0, 0)*weight_Bx; + // + const auto weight_By = static_cast(sx_bar_cell[i]*sz_bar_cell[k]); + Byp += By_arr(lo.x+i_bar_cell+i, lo.y+k_bar_cell+k, 0, 0)*weight_By; +#if defined(WARPX_DIM_RZ) + Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta} + for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) { + const auto dBx = (+ Bx_arr(lo.x+i_bar_node+i, lo.y+k_bar_cell+k, 0, 2*imode-1)*xy_mid.real() + - Bx_arr(lo.x+i_bar_node+i, lo.y+k_bar_cell+k, 0, 2*imode)*xy_mid.imag()); + const auto dBy = (+ By_arr(lo.x+i_bar_cell+i, lo.y+k_bar_cell+k, 0, 2*imode-1)*xy_mid.real() + - By_arr(lo.x+i_bar_cell+i, lo.y+k_bar_cell+k, 0, 2*imode)*xy_mid.imag()); + const auto dBz = (+ Bz_arr(lo.x+i_bar_cell+i, lo.y+k_bar_node+k, 0, 2*imode-1)*xy_mid.real() + - Bz_arr(lo.x+i_bar_cell+i, lo.y+k_bar_node+k, 0, 2*imode)*xy_mid.imag()); + Bxp += weight_Bx*dBx; + Byp += weight_By*dBy; + Bzp += weight_Bz*dBz; + xy_mid = xy_mid*xy_mid0; + } +#endif + } + } + +#ifdef WARPX_DIM_RZ + + // Convert Exp and Eyp (which are actually Er and Etheta) to Ex and Ey + const amrex::Real Exp_save = Exp; + Exp = costheta_mid*Exp - sintheta_mid*Eyp; + Eyp = costheta_mid*Eyp + sintheta_mid*Exp_save; + const amrex::Real Bxp_save = Bxp; + Bxp = costheta_mid*Bxp - sintheta_mid*Byp; + Byp = costheta_mid*Byp + sintheta_mid*Bxp_save; + +#endif + +#elif defined(WARPX_DIM_1D_Z) + + // compute cell crossings in Z-direction + const auto k_old = static_cast(z_old-shift); + const auto k_new = static_cast(z_new-shift); + int cell_crossings_z = std::abs(k_new-k_old); + num_segments += cell_crossings_z; + + // need to assert that the number of cell crossings in each direction + // is within the range permitted by the number of guard cells + // e.g., if (num_segments > 3) ... + + // compute dzp and the initial cell location used to find the cell crossings. + double const dzp = z_new - z_old; + const auto dirZ_sign = static_cast(dzp < 0. ? -1. : 1.); + double Zcell = static_cast(k_old) + shift + 0.5*(1.-dirZ_sign); + + // loop over the number of segments and deposit + Compute_shape_factor< depos_order-1 > compute_shape_factor_cell; + Compute_shape_factor_pair< depos_order > compute_shape_factors_node; + double dzp_seg; + double z0_new; + double z0_old = z_old; + + for (int ns=0; ns(dzp == 0. ? 1. : dzp_seg/dzp); + + // compute cell-based weights using the average segment position + double sz_cell[depos_order] = {0.}; + double const z0_bar = (z0_new + z0_old)/2.0; + const int k0_cell = compute_shape_factor_cell( sz_cell, z0_bar-0.5 ); + + if constexpr (depos_order >= 3) { // higher-order correction to the cell-based weights + Compute_shape_factor_pair compute_shape_factors_cell; + double sz_old_cell[depos_order] = {0.}; + double sz_new_cell[depos_order] = {0.}; + const int k0_cell_2 = compute_shape_factors_cell( sz_old_cell, sz_new_cell, z0_old-0.5, z0_new-0.5 ); + ignore_unused(k0_cell_2); + for (int m=0; m compute_shape_factor_B; + double sz_bar_node[depos_order_B+1] = {0.}; + double sz_bar_cell[depos_order_B+1] = {0.}; + const int k_bar_node = compute_shape_factor_B(sz_bar_node, z_bar); + const int k_bar_cell = compute_shape_factor_B(sz_bar_cell, z_bar-0.5_rt); + + amrex::Real weight; + for (int k=0; k<=depos_order_B; k++) { + weight = static_cast(sz_bar_node[k]); + Bzp += Bz_arr(lo.x+k_bar_node+k, 0, 0)*weight; + // + weight = static_cast(sz_bar_cell[k]); + Bxp += Bx_arr(lo.x+k_bar_cell+k, 0, 0)*weight; + Byp += By_arr(lo.x+k_bar_cell+k, 0, 0)*weight; + } + +#endif +} + /** * \brief Field gather for particles * @@ -1052,6 +1727,7 @@ void doGatherShapeN (const amrex::ParticleReal xp, * \param lo Index lower bounds of domain. * \param n_rz_azimuthal_modes Number of azimuthal modes when using RZ geometry * \param nox order of the particle shape function + * \param gather_type integer identifier for which algorithm to use * \param galerkin_interpolation whether to use lower order in v */ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE @@ -1085,9 +1761,9 @@ void doGatherShapeNImplicit ( const amrex::Dim3& lo, const int n_rz_azimuthal_modes, const int nox, - const bool galerkin_interpolation) + const int depos_type ) { - if (galerkin_interpolation) { + if (depos_type==0) { // CurrentDepositionAlgo::Esirkepov if (nox == 1) { doGatherShapeNEsirkepovStencilImplicit<1>(xp_n, yp_n, zp_n, xp_nph, yp_nph, zp_nph, Exp, Eyp, Ezp, Bxp, Byp, Bzp, @@ -1107,7 +1783,35 @@ void doGatherShapeNImplicit ( ex_type, ey_type, ez_type, bx_type, by_type, bz_type, dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); } - } else { + } + else if (depos_type==3) { // CurrentDepositionAlgo::Villasenor + if (nox == 1) { + doGatherPicnicShapeN<1>(xp_n, yp_n, zp_n, xp_nph, yp_nph, zp_nph, + Exp, Eyp, Ezp, Bxp, Byp, Bzp, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + ex_type, ey_type, ez_type, bx_type, by_type, bz_type, + dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); + } else if (nox == 2) { + doGatherPicnicShapeN<2>(xp_n, yp_n, zp_n, xp_nph, yp_nph, zp_nph, + Exp, Eyp, Ezp, Bxp, Byp, Bzp, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + ex_type, ey_type, ez_type, bx_type, by_type, bz_type, + dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); + } else if (nox == 3) { + doGatherPicnicShapeN<3>(xp_n, yp_n, zp_n, xp_nph, yp_nph, zp_nph, + Exp, Eyp, Ezp, Bxp, Byp, Bzp, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + ex_type, ey_type, ez_type, bx_type, by_type, bz_type, + dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); + } else if (nox == 4) { + doGatherPicnicShapeN<4>(xp_n, yp_n, zp_n, xp_nph, yp_nph, zp_nph, + Exp, Eyp, Ezp, Bxp, Byp, Bzp, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + ex_type, ey_type, ez_type, bx_type, by_type, bz_type, + dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes); + } + } + else if (depos_type==1) { // CurrentDepositionAlgo::Direct if (nox == 1) { doGatherShapeN<1,0>(xp_nph, yp_nph, zp_nph, Exp, Eyp, Ezp, Bxp, Byp, Bzp, ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index e39cd79b55d..929c3c26649 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -2984,7 +2984,7 @@ PhysicalParticleContainer::ImplicitPushXP (WarpXParIter& pti, const Dim3 lo = lbound(box); - bool galerkin_interpolation = WarpX::galerkin_interpolation; + int depos_type = WarpX::current_deposition_algo; int nox = WarpX::nox; int n_rz_azimuthal_modes = WarpX::n_rz_azimuthal_modes; @@ -3107,8 +3107,8 @@ PhysicalParticleContainer::ImplicitPushXP (WarpXParIter& pti, doGatherShapeNImplicit(xp_n, yp_n, zp_n, xp, yp, zp, Exp, Eyp, Ezp, Bxp, Byp, Bzp, ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, ex_type, ey_type, ez_type, bx_type, by_type, bz_type, - dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes, - nox, galerkin_interpolation); + dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes, nox, + depos_type ); } // Externally applied E and B-field in Cartesian co-ordinates diff --git a/Source/Particles/ShapeFactors.H b/Source/Particles/ShapeFactors.H index a0b4ed63a30..73e8f7243bb 100644 --- a/Source/Particles/ShapeFactors.H +++ b/Source/Particles/ShapeFactors.H @@ -64,6 +64,19 @@ struct Compute_shape_factor // index of the leftmost cell where particle deposits return j-1; } + else if constexpr (depos_order == 4){ + const auto j = static_cast(xmid + T(0.5)); + const T xint = xmid - T(j); + const T xint_p1 = xint + T(1.0); + const T xint_m1 = xint - T(1.0); + sx[0] = T(1.0)/T(384.0)*(T(1.0) - T(2.0)*xint)*(T(1.0) - T(2.0)*xint)*(T(1.0) - T(2.0)*xint)*(T(1.0) - T(2.0)*xint); + sx[1] = T(1.0)/T(96.0)*(T(55.0) + T(4.0)*xint_p1*(T(5.0) - T(2.0)*xint_p1*(T(15.0) + T(2.0)*xint_p1*(xint_p1 - T(5.0))))); + sx[2] = T(115.0)/T(192.0) + xint*xint*(xint*xint/T(4.0) - T(5.0)/T(8.0)); + sx[3] = T(1.0)/T(96.0)*(T(55.0) - T(4.0)*xint_m1*(T(5.0) + T(2.0)*xint_m1*(T(15.0) - T(2.0)*xint_m1*(-xint_m1 - T(5.0))))); + sx[4] = T(1.0)/T(384.0)*(T(1.0) + T(2.0)*xint)*(T(1.0) + T(2.0)*xint)*(T(1.0) + T(2.0)*xint)*(T(1.0) + T(2.0)*xint); + // index of the leftmost cell where particle deposits + return j-2; + } else{ WARPX_ABORT_WITH_MESSAGE("Unknown particle shape selected in Compute_shape_factor"); amrex::ignore_unused(sx, xmid); @@ -132,4 +145,96 @@ struct Compute_shifted_shape_factor } }; +/** + * Compute shape factors for two positions that are within + * half a grid cell of the same cell interface and return the common + * index of the leftmost cell where particle writes, which is correctly + * determined by the average of the positions. + * This is used for computing the segment weights transverse to the + * current density direction in the Villasenor deposition algorithm. + */ +template +struct Compute_shape_factor_pair +{ + template< typename T > + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + int operator()( + T* const sx_old, + T* const sx_new, + T xold, + T xnew) const + { + const T xmid = T(0.5)*(xnew + xold); + if constexpr (depos_order == 1){ + const auto j = static_cast(xmid); + const T xint_old = xold - T(j); + sx_old[0] = T(1.0) - xint_old; + sx_old[1] = xint_old; + // + const T xint_new = xnew - T(j); + sx_new[0] = T(1.0) - xint_new; + sx_new[1] = xint_new; + return j; + } + else if constexpr (depos_order == 2){ + const auto j = static_cast(xmid + T(0.5)); + const T xint_old = xold - T(j); + sx_old[0] = T(0.5)*(T(0.5) - xint_old)*(T(0.5) - xint_old); + sx_old[1] = T(0.75) - xint_old*xint_old; + sx_old[2] = T(0.5)*(T(0.5) + xint_old)*(T(0.5) + xint_old); + // + const T xint_new = xnew - T(j); + sx_new[0] = T(0.5)*(T(0.5) - xint_new)*(T(0.5) - xint_new); + sx_new[1] = T(0.75) - xint_new*xint_new; + sx_new[2] = T(0.5)*(T(0.5) + xint_new)*(T(0.5) + xint_new); + // index of the leftmost cell where particle deposits + return j-1; + } + else if constexpr (depos_order == 3){ + const auto j = static_cast(xmid); + const T xint_old = xold - T(j); + sx_old[0] = T(1.0)/T(6.0)*(T(1.0) - xint_old)*(T(1.0) - xint_old)*(T(1.0) - xint_old); + sx_old[1] = T(2.0)/T(3.0) - xint_old*xint_old*(T(1.0) - xint_old/(T(2.0))); + sx_old[2] = T(2.0)/T(3.0) - (T(1.0) - xint_old)*(T(1.0) - xint_old)*(T(1.0) - T(0.5)*(T(1.0) - xint_old)); + sx_old[3] = T(1.0)/T(6.0)*xint_old*xint_old*xint_old; + // + const T xint_new = xnew - T(j); + sx_new[0] = T(1.0)/T(6.0)*(T(1.0) - xint_new)*(T(1.0) - xint_new)*(T(1.0) - xint_new); + sx_new[1] = T(2.0)/T(3.0) - xint_new*xint_new*(T(1.0) - xint_new/(T(2.0))); + sx_new[2] = T(2.0)/T(3.0) - (T(1.0) - xint_new)*(T(1.0) - xint_new)*(T(1.0) - T(0.5)*(T(1.0) - xint_new)); + sx_new[3] = T(1.0)/T(6.0)*xint_new*xint_new*xint_new; + // index of the leftmost cell where particle deposits + return j-1; + } + else if constexpr (depos_order == 4){ + const auto j = static_cast(xmid + T(0.5)); + const T xint_old = xold - T(j); + T xint_p1 = xint_old + T(1.0); + T xint_m1 = xint_old - T(1.0); + sx_old[0] = T(1.0)/T(384.0)*(T(1.0) - T(2.0)*xint_old)*(T(1.0) - T(2.0)*xint_old)*(T(1.0) - T(2.0)*xint_old)*(T(1.0) - T(2.0)*xint_old); + sx_old[1] = T(1.0)/T(96.0)*(T(55.0) + T(4.0)*xint_p1*(T(5.0) - T(2.0)*xint_p1*(T(15.0) + T(2.0)*xint_p1*(xint_p1 - T(5.0))))); + sx_old[2] = T(115.0)/T(192.0) + xint_old*xint_old*(xint_old*xint_old/T(4.0) - T(5.0)/T(8.0)); + sx_old[3] = T(1.0)/T(96.0)*(T(55.0) - T(4.0)*xint_m1*(T(5.0) + T(2.0)*xint_m1*(T(15.0) - T(2.0)*xint_m1*(-xint_m1 - T(5.0))))); + sx_old[4] = T(1.0)/T(384.0)*(T(1.0) + T(2.0)*xint_old)*(T(1.0) + T(2.0)*xint_old)*(T(1.0) + T(2.0)*xint_old)*(T(1.0) + T(2.0)*xint_old); + // + const T xint_new = xnew - T(j); + xint_p1 = xint_new + T(1.0); + xint_m1 = xint_new - T(1.0); + sx_new[0] = T(1.0)/T(384.0)*(T(1.0) - T(2.0)*xint_new)*(T(1.0) - T(2.0)*xint_new)*(T(1.0) - T(2.0)*xint_new)*(T(1.0) - T(2.0)*xint_new); + sx_new[1] = T(1.0)/T(96.0)*(T(55.0) + T(4.0)*xint_p1*(T(5.0) - T(2.0)*xint_p1*(T(15.0) + T(2.0)*xint_p1*(xint_p1 - T(5.0))))); + sx_new[2] = T(115.0)/T(192.0) + xint_new*xint_new*(xint_new*xint_new/T(4.0) - T(5.0)/T(8.0)); + sx_new[3] = T(1.0)/T(96.0)*(T(55.0) - T(4.0)*xint_m1*(T(5.0) + T(2.0)*xint_m1*(T(15.0) - T(2.0)*xint_m1*(-xint_m1 - T(5.0))))); + sx_new[4] = T(1.0)/T(384.0)*(T(1.0) + T(2.0)*xint_new)*(T(1.0) + T(2.0)*xint_new)*(T(1.0) + T(2.0)*xint_new)*(T(1.0) + T(2.0)*xint_new); + // + // index of the leftmost cell where particle deposits + return j-2; + } + else{ + WARPX_ABORT_WITH_MESSAGE("Unknown particle shape selected in Compute_shape_factor_pair"); + amrex::ignore_unused(sx_old, sx_new, xold, xnew); + } + return 0; + } +}; + #endif // SHAPEFACTORS_H_ diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 85bb1c3f4b8..a395198e361 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -452,9 +452,10 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti, // Take into account Galilean shift const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev, 0.5_rt*dt); - if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) { + if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov || + WarpX::current_deposition_algo == CurrentDepositionAlgo::Villasenor) { if (WarpX::grid_type == GridType::Collocated) { - WARPX_ABORT_WITH_MESSAGE("The Esirkepov algorithm cannot be used with a collocated grid."); + WARPX_ABORT_WITH_MESSAGE("Charge-conserving current depositions (Esirkepov and Villasenor) cannot be used with a collocated grid."); } } @@ -517,6 +518,9 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti, if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) { WARPX_ABORT_WITH_MESSAGE("Cannot do shared memory deposition with Esirkepov algorithm"); } + else if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Villasenor) { + WARPX_ABORT_WITH_MESSAGE("Cannot do shared memory deposition with Villasenor algorithm"); + } else if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Vay) { WARPX_ABORT_WITH_MESSAGE("Cannot do shared memory deposition with Vay algorithm"); } @@ -525,21 +529,21 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti, if (WarpX::nox == 1){ doDepositionSharedShapeN<1>( GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset, - uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, jx_fab, jy_fab, jz_fab, np_to_deposit, relative_time, dx, xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost, WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size); } else if (WarpX::nox == 2){ doDepositionSharedShapeN<2>( GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset, - uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, jx_fab, jy_fab, jz_fab, np_to_deposit, relative_time, dx, xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost, WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size); } else if (WarpX::nox == 3){ doDepositionSharedShapeN<3>( GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset, - uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, jx_fab, jy_fab, jz_fab, np_to_deposit, relative_time, dx, xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost, WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size); @@ -620,6 +624,66 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti, WarpX::load_balance_costs_update_algo); } } + } else if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Villasenor) { + if (push_type == PushType::Implicit) { +#if (AMREX_SPACEDIM >= 2) + auto& xp_n = pti.GetAttribs(particle_comps["x_n"]); + const ParticleReal* xp_n_data = xp_n.dataPtr() + offset; +#else + const ParticleReal* xp_n_data = nullptr; +#endif +#if defined(WARPX_DIM_3D) || defined(WARPX_DIM_RZ) + auto& yp_n = pti.GetAttribs(particle_comps["y_n"]); + const ParticleReal* yp_n_data = yp_n.dataPtr() + offset; +#else + const ParticleReal* yp_n_data = nullptr; +#endif + auto& zp_n = pti.GetAttribs(particle_comps["z_n"]); + const ParticleReal* zp_n_data = zp_n.dataPtr() + offset; + auto& uxp_n = pti.GetAttribs(particle_comps["ux_n"]); + auto& uyp_n = pti.GetAttribs(particle_comps["uy_n"]); + auto& uzp_n = pti.GetAttribs(particle_comps["uz_n"]); + if (WarpX::nox == 1){ + doVillasenorDepositionShapeNImplicit<1>( + xp_n_data, yp_n_data, zp_n_data, + GetPosition, wp.dataPtr() + offset, + uxp_n.dataPtr() + offset, uyp_n.dataPtr() + offset, uzp_n.dataPtr() + offset, + uxp.dataPtr() + offset, uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, + jx_arr, jy_arr, jz_arr, np_to_deposit, dt, dx, xyzmin, lo, q, + WarpX::n_rz_azimuthal_modes, cost, + WarpX::load_balance_costs_update_algo); + } else if (WarpX::nox == 2){ + doVillasenorDepositionShapeNImplicit<2>( + xp_n_data, yp_n_data, zp_n_data, + GetPosition, wp.dataPtr() + offset, + uxp_n.dataPtr() + offset, uyp_n.dataPtr() + offset, uzp_n.dataPtr() + offset, + uxp.dataPtr() + offset, uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, + jx_arr, jy_arr, jz_arr, np_to_deposit, dt, dx, xyzmin, lo, q, + WarpX::n_rz_azimuthal_modes, cost, + WarpX::load_balance_costs_update_algo); + } else if (WarpX::nox == 3){ + doVillasenorDepositionShapeNImplicit<3>( + xp_n_data, yp_n_data, zp_n_data, + GetPosition, wp.dataPtr() + offset, + uxp_n.dataPtr() + offset, uyp_n.dataPtr() + offset, uzp_n.dataPtr() + offset, + uxp.dataPtr() + offset, uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, + jx_arr, jy_arr, jz_arr, np_to_deposit, dt, dx, xyzmin, lo, q, + WarpX::n_rz_azimuthal_modes, cost, + WarpX::load_balance_costs_update_algo); + } else if (WarpX::nox == 4){ + doVillasenorDepositionShapeNImplicit<4>( + xp_n_data, yp_n_data, zp_n_data, + GetPosition, wp.dataPtr() + offset, + uxp_n.dataPtr() + offset, uyp_n.dataPtr() + offset, uzp_n.dataPtr() + offset, + uxp.dataPtr() + offset, uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev, + jx_arr, jy_arr, jz_arr, np_to_deposit, dt, dx, xyzmin, lo, q, + WarpX::n_rz_azimuthal_modes, cost, + WarpX::load_balance_costs_update_algo); + } + } + else { + WARPX_ABORT_WITH_MESSAGE("The Villasenor algorithm can only be used with implicit algorithm."); + } } else if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Vay) { if (push_type == PushType::Implicit) { WARPX_ABORT_WITH_MESSAGE("The Vay algorithm cannot be used with implicit algorithm."); diff --git a/Source/Utils/WarpXAlgorithmSelection.H b/Source/Utils/WarpXAlgorithmSelection.H index 87db0ae9b9b..735fc7993f1 100644 --- a/Source/Utils/WarpXAlgorithmSelection.H +++ b/Source/Utils/WarpXAlgorithmSelection.H @@ -87,7 +87,8 @@ struct CurrentDepositionAlgo { enum { Esirkepov = 0, Direct = 1, - Vay = 2 + Vay = 2, + Villasenor = 3 }; }; diff --git a/Source/Utils/WarpXAlgorithmSelection.cpp b/Source/Utils/WarpXAlgorithmSelection.cpp index 75c488134e0..abaf17f0a2c 100644 --- a/Source/Utils/WarpXAlgorithmSelection.cpp +++ b/Source/Utils/WarpXAlgorithmSelection.cpp @@ -63,10 +63,11 @@ const std::map particle_pusher_algo_to_int = { }; const std::map current_deposition_algo_to_int = { - {"esirkepov", CurrentDepositionAlgo::Esirkepov }, - {"direct", CurrentDepositionAlgo::Direct }, - {"vay", CurrentDepositionAlgo::Vay }, - {"default", CurrentDepositionAlgo::Esirkepov } // NOTE: overwritten for PSATD and Hybrid-PIC below + {"esirkepov", CurrentDepositionAlgo::Esirkepov }, + {"direct", CurrentDepositionAlgo::Direct }, + {"vay", CurrentDepositionAlgo::Vay }, + {"villasenor", CurrentDepositionAlgo::Villasenor }, + {"default", CurrentDepositionAlgo::Esirkepov } // NOTE: overwritten for PSATD and Hybrid-PIC below }; const std::map charge_deposition_algo_to_int = { diff --git a/Source/WarpX.H b/Source/WarpX.H index e8c7ae79f7e..d3900c91593 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -156,7 +156,7 @@ public: int maxlevel_extEMfield_init; // Algorithms - //! Integer that corresponds to the current deposition algorithm (Esirkepov, direct, Vay) + //! Integer that corresponds to the current deposition algorithm (Esirkepov, direct, Vay, Villasenor) static short current_deposition_algo; //! Integer that corresponds to the charge deposition algorithm (only standard deposition) static short charge_deposition_algo; diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 92623bc2a06..5031381561f 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -1157,6 +1157,12 @@ WarpX::ReadParameters () "Current centering (nodal deposition) cannot be used with Esirkepov deposition." "Please set warpx.do_current_centering = 0 or algo.current_deposition = direct."); + WARPX_ALWAYS_ASSERT_WITH_MESSAGE( + current_deposition_algo != CurrentDepositionAlgo::Villasenor || + !do_current_centering, + "Current centering (nodal deposition) cannot be used with Villasenor deposition." + "Please set warpx.do_current_centering = 0 or algo.current_deposition = direct."); + WARPX_ALWAYS_ASSERT_WITH_MESSAGE( WarpX::current_deposition_algo != CurrentDepositionAlgo::Vay || !do_current_centering, @@ -1179,6 +1185,14 @@ WarpX::ReadParameters () "Vay deposition not implemented with multi-J algorithm"); } + if (current_deposition_algo == CurrentDepositionAlgo::Villasenor) { + WARPX_ALWAYS_ASSERT_WITH_MESSAGE( + evolve_scheme == EvolveScheme::ImplicitPicard || + evolve_scheme == EvolveScheme::SemiImplicitPicard, + "Villasenor current deposition can only" + "be used with Implicit evolve schemes."); + } + // Query algo.field_gathering from input, set field_gathering_algo to // "default" if not found (default defined in Utils/WarpXAlgorithmSelection.cpp) field_gathering_algo = static_cast(GetAlgorithmInteger(pp_algo, "field_gathering")); @@ -1243,8 +1257,9 @@ WarpX::ReadParameters () WARPX_ALWAYS_ASSERT_WITH_MESSAGE( current_deposition_algo == CurrentDepositionAlgo::Esirkepov || + current_deposition_algo == CurrentDepositionAlgo::Villasenor || current_deposition_algo == CurrentDepositionAlgo::Direct, - "Only Esirkepov or Direct current deposition supported with the implicit and semi-implicit schemes"); + "Only Esirkepov, Villasenor, or Direct current deposition supported with the implicit and semi-implicit schemes"); WARPX_ALWAYS_ASSERT_WITH_MESSAGE( electromagnetic_solver_id == ElectromagneticSolverAlgo::Yee || @@ -1259,18 +1274,6 @@ WarpX::ReadParameters () WARPX_ALWAYS_ASSERT_WITH_MESSAGE( field_gathering_algo != GatheringAlgo::MomentumConserving, "With implicit and semi-implicit schemes, the momentum conserving field gather is not supported as it would not conserve energy"); - - if (current_deposition_algo == CurrentDepositionAlgo::Direct) { - WARPX_ALWAYS_ASSERT_WITH_MESSAGE( - !galerkin_interpolation, - "With implicit and semi-implicit schemes and direct deposition, the Galerkin field gathering must be turned off in order to conserve energy"); - } - - if (current_deposition_algo == CurrentDepositionAlgo::Esirkepov) { - WARPX_ALWAYS_ASSERT_WITH_MESSAGE( - galerkin_interpolation, - "With implicit and semi-implicit schemes and Esirkepov deposition, the Galerkin field gathering must be turned on in order to conserve energy"); - } } // Load balancing parameters @@ -1325,10 +1328,18 @@ WarpX::ReadParameters () if (!species_names.empty() || !lasers_names.empty()) { if (utils::parser::queryWithParser(pp_algo, "particle_shape", particle_shape)){ - WARPX_ALWAYS_ASSERT_WITH_MESSAGE( - (particle_shape >= 1) && (particle_shape <=3), - "algo.particle_shape can be only 1, 2, or 3" - ); + if(current_deposition_algo == CurrentDepositionAlgo::Villasenor) { + WARPX_ALWAYS_ASSERT_WITH_MESSAGE( + (particle_shape >= 1) && (particle_shape <=4), + "algo.particle_shape can be only 1, 2, 3, or 4 with villasenor deposition" + ); + } + else { + WARPX_ALWAYS_ASSERT_WITH_MESSAGE( + (particle_shape >= 1) && (particle_shape <=3), + "algo.particle_shape can be only 1, 2, or 3" + ); + } nox = particle_shape; noy = particle_shape; @@ -1337,7 +1348,8 @@ WarpX::ReadParameters () else{ WARPX_ABORT_WITH_MESSAGE( "algo.particle_shape must be set in the input file:" - " please set algo.particle_shape to 1, 2, or 3"); + " please set algo.particle_shape to 1, 2, or 3." + " if using the villasenor deposition, can use 4 also."); } if ((maxLevel() > 0) && (particle_shape > 1) && (do_pml_j_damping == 1)) @@ -1481,6 +1493,7 @@ WarpX::ReadParameters () // are used current_correction = true; if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov || + WarpX::current_deposition_algo == CurrentDepositionAlgo::Villasenor || WarpX::current_deposition_algo == CurrentDepositionAlgo::Vay || WarpX::do_dive_cleaning) { @@ -1495,6 +1508,7 @@ WarpX::ReadParameters () if (!current_correction && current_deposition_algo != CurrentDepositionAlgo::Esirkepov && + current_deposition_algo != CurrentDepositionAlgo::Villasenor && current_deposition_algo != CurrentDepositionAlgo::Vay) { ablastr::warn_manager::WMRecordWarning( @@ -1585,14 +1599,15 @@ WarpX::ReadParameters () ); - if (current_deposition_algo == CurrentDepositionAlgo::Esirkepov) { + if (current_deposition_algo == CurrentDepositionAlgo::Esirkepov || + current_deposition_algo == CurrentDepositionAlgo::Villasenor) { // The comoving PSATD algorithm is not implemented nor tested with Esirkepov current deposition WARPX_ALWAYS_ASSERT_WITH_MESSAGE(v_comoving_is_zero, - "Esirkepov current deposition cannot be used with the comoving PSATD algorithm"); + "charge-conserving current depositions (Esirkepov and Villasenor) cannot be used with the comoving PSATD algorithm"); WARPX_ALWAYS_ASSERT_WITH_MESSAGE(v_galilean_is_zero, - "Esirkepov current deposition cannot be used with the Galilean algorithm."); + "charge-conserving current depositions (Esirkepov and Villasenor) cannot be used with the Galilean algorithm."); } WARPX_ALWAYS_ASSERT_WITH_MESSAGE( diff --git a/Source/ablastr/particles/DepositCharge.H b/Source/ablastr/particles/DepositCharge.H index ad01ba4a213..f43e35c6b0b 100644 --- a/Source/ablastr/particles/DepositCharge.H +++ b/Source/ablastr/particles/DepositCharge.H @@ -195,6 +195,11 @@ deposit_charge (typename T_PC::ParIterType& pti, rho_fab, np_to_deposit.value(), dx, xyzmin, lo, charge, n_rz_azimuthal_modes, cost, load_balance_costs_update_algo); + } else if (nox == 4){ + doChargeDepositionShapeN<4>(GetPosition, wp.dataPtr()+offset, ion_lev, + rho_fab, np_to_deposit.value(), dx, xyzmin, lo, charge, + n_rz_azimuthal_modes, cost, + load_balance_costs_update_algo); } ABLASTR_PROFILE_VAR_STOP(blp_ppc_chd, do_device_synchronize); From 6e332e9479baa2769ff0ac22adb51c25c67627da Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Fri, 2 Feb 2024 14:21:51 -0800 Subject: [PATCH 09/13] Particle Container to Pure SoA Again (#4653) * AMReX & pyAMReX: Latest `development` More pure SoA and id handling goodness. * Particle Container to Pure SoA Again Transition to new, purely SoA particle containers. This was originally merged in #3850 and reverted in #4652, since we discovered issues loosing particles & laser particles on GPU. * Modernize `idcpu` Treatment - faster: less emitted operations, no jumps - cheaper: less used registers - safer: no read-before-write warnings - cooler: no explanation needed --- .github/workflows/cuda.yml | 2 +- Docs/source/developers/amrex_basics.rst | 2 +- Docs/source/developers/dimensionality.rst | 4 +- Docs/source/developers/particles.rst | 18 +- Docs/source/usage/workflows/python_extend.rst | 28 +- .../particle_data_python/PICMI_inputs_2d.py | 6 +- .../PICMI_inputs_prev_pos_2d.py | 6 +- .../PICMI_inputs_runtime_component_analyze.py | 6 +- Python/pywarpx/_libwarpx.py | 6 +- Python/pywarpx/particle_containers.py | 255 ++++++++---------- Regression/WarpX-GPU-tests.ini | 2 +- Regression/WarpX-tests.ini | 2 +- .../BackTransformParticleFunctor.H | 16 +- .../FlushFormats/FlushFormatAscent.cpp | 3 - .../FlushFormats/FlushFormatCheckpoint.cpp | 9 +- .../FlushFormats/FlushFormatPlotfile.cpp | 13 +- Source/Diagnostics/ParticleIO.cpp | 2 +- .../Diagnostics/ReducedDiags/FieldProbe.cpp | 6 +- .../FieldProbeParticleContainer.H | 20 +- .../FieldProbeParticleContainer.cpp | 34 +-- .../ReducedDiags/LoadBalanceCosts.cpp | 3 +- Source/Diagnostics/WarpXOpenPMD.H | 4 +- Source/Diagnostics/WarpXOpenPMD.cpp | 139 ++-------- .../ParticleBoundaryProcess.H | 7 +- Source/EmbeddedBoundary/ParticleScraper.H | 7 +- .../BinaryCollision/BinaryCollision.H | 16 +- .../Coulomb/PairWiseCoulombCollisionFunc.H | 7 +- .../Collision/BinaryCollision/DSMC/DSMC.H | 3 +- .../DSMC/SplitAndScatterFunc.H | 28 +- .../NuclearFusion/NuclearFusionFunc.H | 14 +- .../ProtonBoronFusionInitializeMomentum.H | 10 +- .../TwoProductFusionInitializeMomentum.H | 4 +- .../BinaryCollision/ParticleCreationFunc.H | 59 ++-- .../BinaryCollision/ShuffleFisherYates.H | 2 +- .../Particles/Deposition/ChargeDeposition.H | 2 +- .../Particles/Deposition/CurrentDeposition.H | 2 +- .../ElementaryProcess/QEDPairGeneration.H | 2 +- .../ElementaryProcess/QEDPhotonEmission.H | 16 +- Source/Particles/LaserParticleContainer.H | 7 +- .../NamedComponentParticleContainer.H | 52 ++-- Source/Particles/ParticleBoundaryBuffer.cpp | 4 +- Source/Particles/ParticleCreation/SmartCopy.H | 5 +- .../Particles/ParticleCreation/SmartCreate.H | 21 +- .../Particles/ParticleCreation/SmartUtils.H | 8 +- Source/Particles/PhysicalParticleContainer.H | 8 +- .../Particles/PhysicalParticleContainer.cpp | 131 ++++----- Source/Particles/Pusher/GetAndSetPosition.H | 152 +++++++---- .../Particles/Resampling/LevelingThinning.cpp | 5 +- Source/Particles/Sorting/Partition.cpp | 4 +- Source/Particles/Sorting/SortingUtils.H | 41 ++- Source/Particles/Sorting/SortingUtils.cpp | 2 +- Source/Particles/WarpXParticleContainer.H | 27 +- Source/Particles/WarpXParticleContainer.cpp | 90 +++---- .../Particles/ParticleBoundaryBuffer.cpp | 10 +- .../PinnedMemoryParticleContainer.cpp | 2 +- .../Particles/WarpXParticleContainer.cpp | 18 +- Source/Utils/ParticleUtils.H | 7 +- Source/Utils/ParticleUtils.cpp | 26 +- Source/ablastr/particles/IndexHandling.H | 41 --- Source/ablastr/particles/ParticleMoments.H | 25 +- cmake/dependencies/AMReX.cmake | 2 +- cmake/dependencies/pyAMReX.cmake | 2 +- run_test.sh | 2 +- 63 files changed, 703 insertions(+), 754 deletions(-) delete mode 100644 Source/ablastr/particles/IndexHandling.H diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 79916c455d1..5e9f43f639d 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -115,7 +115,7 @@ jobs: which nvcc || echo "nvcc not in PATH!" git clone https://github.com/AMReX-Codes/amrex.git ../amrex - cd ../amrex && git checkout --detach 24.02 && cd - + cd ../amrex && git checkout --detach 296ed40e16ae1877640f5b78e9162dbd4ba1c279 && cd - make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_PSATD=TRUE USE_CCACHE=TRUE -j 2 ccache -s diff --git a/Docs/source/developers/amrex_basics.rst b/Docs/source/developers/amrex_basics.rst index 577a6547bb5..64ad71af06c 100644 --- a/Docs/source/developers/amrex_basics.rst +++ b/Docs/source/developers/amrex_basics.rst @@ -13,7 +13,7 @@ WarpX is built on the Adaptive Mesh Refinement (AMR) library `AMReX & particle_di // get names of real comps std::map real_comps_map = pc->getParticleComps(); - // WarpXParticleContainer compile-time extra AoS attributes (Real): 0 - // WarpXParticleContainer compile-time extra AoS attributes (int): 0 - // WarpXParticleContainer compile-time extra SoA attributes (Real): PIdx::nattribs // not an efficient search, but N is small... for(int j = 0; j < PIdx::nattribs; ++j) diff --git a/Source/Diagnostics/FlushFormats/FlushFormatCheckpoint.cpp b/Source/Diagnostics/FlushFormats/FlushFormatCheckpoint.cpp index d77437fb931..b083e60529f 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormatCheckpoint.cpp +++ b/Source/Diagnostics/FlushFormats/FlushFormatCheckpoint.cpp @@ -178,8 +178,8 @@ FlushFormatCheckpoint::CheckpointParticles ( Vector real_names; Vector int_names; + // note: positions skipped here, since we reconstruct a plotfile SoA from them real_names.push_back("weight"); - real_names.push_back("momentum_x"); real_names.push_back("momentum_y"); real_names.push_back("momentum_z"); @@ -189,9 +189,12 @@ FlushFormatCheckpoint::CheckpointParticles ( #endif // get the names of the real comps - real_names.resize(pc->NumRealComps()); + // note: skips the mandatory AMREX_SPACEDIM positions for pure SoA + real_names.resize(pc->NumRealComps() - AMREX_SPACEDIM); auto runtime_rnames = pc->getParticleRuntimeComps(); - for (auto const& x : runtime_rnames) { real_names[x.second+PIdx::nattribs] = x.first; } + for (auto const& x : runtime_rnames) { + real_names[x.second + PIdx::nattribs - AMREX_SPACEDIM] = x.first; + } // and the int comps int_names.resize(pc->NumIntComps()); diff --git a/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.cpp b/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.cpp index 970d9a504d2..880e2df01ff 100644 --- a/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.cpp +++ b/Source/Diagnostics/FlushFormats/FlushFormatPlotfile.cpp @@ -355,8 +355,8 @@ FlushFormatPlotfile::WriteParticles(const std::string& dir, Vector int_flags; Vector real_flags; + // note: positions skipped here, since we reconstruct a plotfile SoA from them real_names.push_back("weight"); - real_names.push_back("momentum_x"); real_names.push_back("momentum_y"); real_names.push_back("momentum_z"); @@ -366,14 +366,21 @@ FlushFormatPlotfile::WriteParticles(const std::string& dir, #endif // get the names of the real comps - real_names.resize(tmp.NumRealComps()); + + // note: skips the mandatory AMREX_SPACEDIM positions for pure SoA + real_names.resize(tmp.NumRealComps() - AMREX_SPACEDIM); auto runtime_rnames = tmp.getParticleRuntimeComps(); - for (auto const& x : runtime_rnames) { real_names[x.second+PIdx::nattribs] = x.first; } + for (auto const& x : runtime_rnames) { + real_names[x.second + PIdx::nattribs - AMREX_SPACEDIM] = x.first; + } // plot any "extra" fields by default real_flags = part_diag.m_plot_flags; real_flags.resize(tmp.NumRealComps(), 1); + // note: skip the mandatory AMREX_SPACEDIM positions for pure SoA + real_flags.erase(real_flags.begin(), real_flags.begin() + AMREX_SPACEDIM); + // and the names int_names.resize(tmp.NumIntComps()); auto runtime_inames = tmp.getParticleRuntimeiComps(); diff --git a/Source/Diagnostics/ParticleIO.cpp b/Source/Diagnostics/ParticleIO.cpp index 7ca5e6541d7..a8bb9303fe1 100644 --- a/Source/Diagnostics/ParticleIO.cpp +++ b/Source/Diagnostics/ParticleIO.cpp @@ -160,7 +160,7 @@ MultiParticleContainer::Restart (const std::string& dir) ); } - for (int j = PIdx::nattribs; j < nr; ++j) { + for (int j = PIdx::nattribs-AMREX_SPACEDIM; j < nr; ++j) { const auto& comp_name = real_comp_names[j]; auto current_comp_names = pc->getParticleComps(); auto search = current_comp_names.find(comp_name); diff --git a/Source/Diagnostics/ReducedDiags/FieldProbe.cpp b/Source/Diagnostics/ReducedDiags/FieldProbe.cpp index 9f45392bb0a..24ad0e64ea8 100644 --- a/Source/Diagnostics/ReducedDiags/FieldProbe.cpp +++ b/Source/Diagnostics/ReducedDiags/FieldProbe.cpp @@ -431,8 +431,6 @@ void FieldProbe::ComputeDiags (int step) { const auto getPosition = GetParticlePosition(pti); auto setPosition = SetParticlePosition(pti); - const auto& aos = pti.GetArrayOfStructs(); - const auto* AMREX_RESTRICT m_structs = aos().dataPtr(); auto const np = pti.numParticles(); if (update_particles_moving_window) @@ -482,6 +480,8 @@ void FieldProbe::ComputeDiags (int step) ParticleReal* const AMREX_RESTRICT part_Bz = attribs[FieldProbePIdx::Bz].dataPtr(); ParticleReal* const AMREX_RESTRICT part_S = attribs[FieldProbePIdx::S].dataPtr(); + auto * const AMREX_RESTRICT idcpu = pti.GetStructOfArrays().GetIdCPUData().data(); + const auto &xyzmin = WarpX::LowerCorner(box, lev, 0._rt); const std::array &dx = WarpX::CellSize(lev); @@ -556,7 +556,7 @@ void FieldProbe::ComputeDiags (int step) amrex::ParticleReal xp, yp, zp; getPosition(ip, xp, yp, zp); long idx = ip*noutputs; - dvp[idx++] = m_structs[ip].id(); + dvp[idx++] = amrex::ParticleIDWrapper{idcpu[ip]}; // all particles created on IO cpu dvp[idx++] = xp; dvp[idx++] = yp; dvp[idx++] = zp; diff --git a/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.H b/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.H index c85bf8fd541..7d59ade5dc6 100644 --- a/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.H +++ b/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.H @@ -24,7 +24,14 @@ struct FieldProbePIdx { enum { - Ex = 0, Ey, Ez, +#if !defined (WARPX_DIM_1D_Z) + x, +#endif +#if defined (WARPX_DIM_3D) + y, +#endif + z, + Ex, Ey, Ez, Bx, By, Bz, S, //!< the Poynting vector #ifdef WARPX_DIM_RZ @@ -40,9 +47,14 @@ struct FieldProbePIdx * nattribs tells the particle container to allot 7 SOA values. */ class FieldProbeParticleContainer - : public amrex::ParticleContainer<0, 0, FieldProbePIdx::nattribs> + : public amrex::ParticleContainerPureSoA { public: + static constexpr int NStructReal = 0; + static constexpr int NStructInt = 0; + static constexpr int NReal = FieldProbePIdx::nattribs; + static constexpr int NInt = 0; + FieldProbeParticleContainer (amrex::AmrCore* amr_core); ~FieldProbeParticleContainer() override = default; @@ -52,9 +64,9 @@ public: FieldProbeParticleContainer& operator= ( FieldProbeParticleContainer&& ) = default; //! amrex iterator for our number of attributes - using iterator = amrex::ParIter<0, 0, FieldProbePIdx::nattribs, 0>; + using iterator = amrex::ParIterSoA; //! amrex iterator for our number of attributes (read-only) - using const_iterator = amrex::ParConstIter<0, 0, FieldProbePIdx::nattribs, 0>; + using const_iterator = amrex::ParConstIterSoA; //! similar to WarpXParticleContainer::AddNParticles but does not include u(x,y,z) void AddNParticles (int lev, amrex::Vector const & x, amrex::Vector const & y, amrex::Vector const & z); diff --git a/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.cpp b/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.cpp index 1fd741ddc47..7e7aecb9167 100644 --- a/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.cpp +++ b/Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.cpp @@ -59,7 +59,7 @@ using namespace amrex; FieldProbeParticleContainer::FieldProbeParticleContainer (AmrCore* amr_core) - : ParticleContainer<0, 0, FieldProbePIdx::nattribs>(amr_core->GetParGDB()) + : ParticleContainerPureSoA(amr_core->GetParGDB()) { SetParticleSize(); } @@ -89,33 +89,15 @@ FieldProbeParticleContainer::AddNParticles (int lev, * is then coppied to the permament tile which is stored on the particle * (particle_tile). */ + using PinnedTile = typename ContainerLike::ParticleTileType; - using PinnedTile = ParticleTile, - NArrayReal, NArrayInt, - amrex::PinnedArenaAllocator>; PinnedTile pinned_tile; pinned_tile.define(NumRuntimeRealComps(), NumRuntimeIntComps()); for (int i = 0; i < np; i++) { - ParticleType p; - p.id() = ParticleType::NextID(); - p.cpu() = ParallelDescriptor::MyProc(); -#if defined(WARPX_DIM_3D) - p.pos(0) = x[i]; - p.pos(1) = y[i]; - p.pos(2) = z[i]; -#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) - amrex::ignore_unused(y); - p.pos(0) = x[i]; - p.pos(1) = z[i]; -#elif defined(WARPX_DIM_1D_Z) - amrex::ignore_unused(x, y); - p.pos(0) = z[i]; -#endif - - // write position, cpu id, and particle id to particle - pinned_tile.push_back(p); + auto & idcpu_data = pinned_tile.GetStructOfArrays().GetIdCPUData(); + idcpu_data.push_back(amrex::SetParticleIDandCPU(ParticleType::NextID(), ParallelDescriptor::MyProc())); } // write Real attributes (SoA) to particle initialized zero @@ -125,7 +107,13 @@ FieldProbeParticleContainer::AddNParticles (int lev, #ifdef WARPX_DIM_RZ pinned_tile.push_back_real(FieldProbePIdx::theta, np, 0.0); #endif - +#if !defined (WARPX_DIM_1D_Z) + pinned_tile.push_back_real(FieldProbePIdx::x, x); +#endif +#if defined (WARPX_DIM_3D) + pinned_tile.push_back_real(FieldProbePIdx::y, y); +#endif + pinned_tile.push_back_real(FieldProbePIdx::z, z); pinned_tile.push_back_real(FieldProbePIdx::Ex, np, 0.0); pinned_tile.push_back_real(FieldProbePIdx::Ey, np, 0.0); pinned_tile.push_back_real(FieldProbePIdx::Ez, np, 0.0); diff --git a/Source/Diagnostics/ReducedDiags/LoadBalanceCosts.cpp b/Source/Diagnostics/ReducedDiags/LoadBalanceCosts.cpp index 893b00a5f00..b4e07b51982 100644 --- a/Source/Diagnostics/ReducedDiags/LoadBalanceCosts.cpp +++ b/Source/Diagnostics/ReducedDiags/LoadBalanceCosts.cpp @@ -56,8 +56,7 @@ namespace auto const & plev = pc.GetParticles(lev); auto const & ptile = plev.at(box_index); - auto const & aos = ptile.GetArrayOfStructs(); - auto const np = aos.numParticles(); + auto const np = ptile.numParticles(); num_macro_particles += np; } diff --git a/Source/Diagnostics/WarpXOpenPMD.H b/Source/Diagnostics/WarpXOpenPMD.H index 4597dacd9ae..6c904790e15 100644 --- a/Source/Diagnostics/WarpXOpenPMD.H +++ b/Source/Diagnostics/WarpXOpenPMD.H @@ -41,7 +41,7 @@ class WarpXParticleCounter { public: using ParticleContainer = typename WarpXParticleContainer::ContainerLike; - using ParticleIter = typename amrex::ParIter<0, 0, PIdx::nattribs, 0, amrex::PinnedArenaAllocator>; + using ParticleIter = typename amrex::ParIterSoA; WarpXParticleCounter (ParticleContainer* pc); [[nodiscard]] unsigned long GetTotalNumParticles () const {return m_Total;} @@ -77,7 +77,7 @@ class WarpXOpenPMDPlot { public: using ParticleContainer = typename WarpXParticleContainer::ContainerLike; - using ParticleIter = typename amrex::ParConstIter<0, 0, PIdx::nattribs, 0, amrex::PinnedArenaAllocator>; + using ParticleIter = typename amrex::ParConstIterSoA; /** Initialize openPMD I/O routines * diff --git a/Source/Diagnostics/WarpXOpenPMD.cpp b/Source/Diagnostics/WarpXOpenPMD.cpp index 7cc9f571a4a..39717ef6ec5 100644 --- a/Source/Diagnostics/WarpXOpenPMD.cpp +++ b/Source/Diagnostics/WarpXOpenPMD.cpp @@ -18,11 +18,9 @@ #include "WarpX.H" #include "OpenPMDHelpFunction.H" -#include #include #include -#include #include #include #include @@ -550,6 +548,13 @@ for (unsigned i = 0, n = particle_diags.size(); i < n; ++i) { // see openPMD ED-PIC extension for namings // note: an underscore separates the record name from its component // for non-scalar records +#if !defined (WARPX_DIM_1D_Z) + real_names.push_back("position_x"); +#endif +#if defined (WARPX_DIM_3D) + real_names.push_back("position_y"); +#endif + real_names.push_back("position_z"); real_names.push_back("weighting"); real_names.push_back("momentum_x"); real_names.push_back("momentum_y"); @@ -722,77 +727,7 @@ WarpXOpenPMDPlot::DumpToFile (ParticleContainer* pc, contributed_particles = true; - // get position and particle ID from aos - // note: this implementation iterates the AoS 4x... - // if we flush late as we do now, we can also copy out the data in one go - const auto &aos = pti.GetArrayOfStructs(); // size = numParticlesOnTile - { - // Save positions -#if defined(WARPX_DIM_RZ) - { - const std::shared_ptr z( - new amrex::ParticleReal[numParticleOnTile], - [](amrex::ParticleReal const *p) { delete[] p; } - ); - for (auto i = 0; i < numParticleOnTile; i++) { - z.get()[i] = aos[i].pos(1); // {0: "r", 1: "z"} - } - std::string const positionComponent = "z"; - currSpecies["position"]["z"].storeChunk(z, {offset}, {numParticleOnTile64}); - } - - // reconstruct x and y from polar coordinates r, theta - auto const& soa = pti.GetStructOfArrays(); - amrex::ParticleReal const* theta = soa.GetRealData(PIdx::theta).dataPtr(); - WARPX_ALWAYS_ASSERT_WITH_MESSAGE(theta != nullptr, "openPMD: invalid theta pointer."); - WARPX_ALWAYS_ASSERT_WITH_MESSAGE(int(soa.GetRealData(PIdx::theta).size()) == numParticleOnTile, - "openPMD: theta and tile size do not match"); - { - const std::shared_ptr< amrex::ParticleReal > x( - new amrex::ParticleReal[numParticleOnTile], - [](amrex::ParticleReal const *p){ delete[] p; } - ); - const std::shared_ptr< amrex::ParticleReal > y( - new amrex::ParticleReal[numParticleOnTile], - [](amrex::ParticleReal const *p){ delete[] p; } - ); - for (auto i=0; i curr( - new amrex::ParticleReal[numParticleOnTile], - [](amrex::ParticleReal const *p) { delete[] p; } - ); - for (auto i = 0; i < numParticleOnTile; i++) { - curr.get()[i] = aos[i].pos(currDim); - } - std::string const positionComponent = positionComponents[currDim]; - currSpecies["position"][positionComponent].storeChunk(curr, {offset}, - {numParticleOnTile64}); - } -#endif - - // save particle ID after converting it to a globally unique ID - const std::shared_ptr ids( - new uint64_t[numParticleOnTile], - [](uint64_t const *p) { delete[] p; } - ); - for (auto i = 0; i < numParticleOnTile; i++) { - ids.get()[i] = ablastr::particles::localIDtoGlobal(static_cast(aos[i].id()), static_cast(aos[i].cpu())); - } - const auto *const scalar = openPMD::RecordComponent::SCALAR; - currSpecies["id"][scalar].storeChunk(ids, {offset}, {numParticleOnTile64}); - - } - // save "extra" particle properties in AoS and SoA + // save particle properties SaveRealProperty(pti, currSpecies, offset, @@ -893,10 +828,9 @@ WarpXOpenPMDPlot::SetupRealProperties (ParticleContainer const * pc, std::set< std::string > addedRecords; // add meta-data per record only once for (auto idx=0; idxNumRealComps(); idx++) { - auto ii = ParticleContainer::NStructReal + idx; // jump over extra AoS names - if (write_real_comp[ii]) { + if (write_real_comp[idx]) { // handle scalar and non-scalar records by name - const auto [record_name, component_name] = detail::name2openPMD(real_comp_names[ii]); + const auto [record_name, component_name] = detail::name2openPMD(real_comp_names[idx]); auto currRecord = currSpecies[record_name]; // meta data for ED-PIC extension @@ -917,10 +851,9 @@ WarpXOpenPMDPlot::SetupRealProperties (ParticleContainer const * pc, } } for (auto idx=0; idx( numParticleOnTile ); - auto const& aos = pti.GetArrayOfStructs(); // size = numParticlesOnTile + auto const numParticleOnTile64 = static_cast(numParticleOnTile); auto const& soa = pti.GetStructOfArrays(); - // first we concatenate the AoS into contiguous arrays - { - // note: WarpX does not yet use extra AoS Real attributes - for( auto idx=0; idx d( - new amrex::ParticleReal[numParticleOnTile], - [](amrex::ParticleReal const *p){ delete[] p; } - ); - - for( auto kk=0; kk #include #include #include + namespace ParticleBoundaryProcess { struct NoOp { @@ -25,12 +27,11 @@ struct NoOp { struct Absorb { template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - void operator() (const PData& ptd, int i, + void operator() (PData& ptd, int i, const amrex::RealVect& /*pos*/, const amrex::RealVect& /*normal*/, amrex::RandomEngine const& /*engine*/) const noexcept { - auto& p = ptd.m_aos[i]; - p.id() = -p.id(); + amrex::ParticleIDWrapper{ptd.m_idcpu[i]}.make_invalid(); } }; } diff --git a/Source/EmbeddedBoundary/ParticleScraper.H b/Source/EmbeddedBoundary/ParticleScraper.H index d6196c35f44..a175fe23133 100644 --- a/Source/EmbeddedBoundary/ParticleScraper.H +++ b/Source/EmbeddedBoundary/ParticleScraper.H @@ -38,7 +38,7 @@ * passed in to this function as an argument. This function can access the * position at which the particle hit the boundary, and also the associated * normal vector. Particles can be `absorbed` by setting their ids to negative - * to flag them for removal. Likewise, the can be reflected back into the domain + * to flag them for removal. Likewise, they can be reflected back into the domain * by modifying their data appropriately and leaving their ids alone. * * This version operates only at the specified level. @@ -82,7 +82,7 @@ scrapeParticles (PC& pc, const amrex::Vector& distance_t * passed in to this function as an argument. This function can access the * position at which the particle hit the boundary, and also the associated * normal vector. Particles can be `absorbed` by setting their ids to negative - * to flag them for removal. Likewise, the can be reflected back into the domain + * to flag them for removal. Likewise, they can be reflected back into the domain * by modifying their data appropriately and leaving their ids alone. * * This version operates over all the levels in the pc. @@ -170,13 +170,12 @@ scrapeParticles (PC& pc, const amrex::Vector& distance_t auto& tile = pti.GetParticleTile(); auto ptd = tile.getParticleTileData(); const auto np = tile.numParticles(); - amrex::Particle<0,0> * const particles = tile.GetArrayOfStructs()().data(); auto phi = (*distance_to_eb[lev])[pti].array(); // signed distance function amrex::ParallelForRNG( np, [=] AMREX_GPU_DEVICE (const int ip, amrex::RandomEngine const& engine) noexcept { // skip particles that are already flagged for removal - if (particles[ip].id() < 0) return; + if (!amrex::ParticleIDWrapper{ptd.m_idcpu[ip]}.is_valid()) return; amrex::ParticleReal xp, yp, zp; getPosition(ip, xp, yp, zp); diff --git a/Source/Particles/Collision/BinaryCollision/BinaryCollision.H b/Source/Particles/Collision/BinaryCollision/BinaryCollision.H index 5c90dab25e6..c69f07acdb2 100644 --- a/Source/Particles/Collision/BinaryCollision/BinaryCollision.H +++ b/Source/Particles/Collision/BinaryCollision/BinaryCollision.H @@ -72,7 +72,8 @@ class BinaryCollision final // Define shortcuts for frequently-used type names using ParticleType = WarpXParticleContainer::ParticleType; using ParticleTileType = WarpXParticleContainer::ParticleTileType; - using ParticleBins = amrex::DenseBins; + using ParticleTileDataType = ParticleTileType::ParticleTileDataType; + using ParticleBins = amrex::DenseBins; using SoaData_type = WarpXParticleContainer::ParticleTileType::ParticleTileDataType; using index_type = ParticleBins::index_type; @@ -261,9 +262,6 @@ public: const amrex::ParticleReal q1 = species_1.getCharge(); const amrex::ParticleReal m1 = species_1.getMass(); auto get_position_1 = GetParticlePosition(ptile_1, getpos_offset); - // Needed to access the particle id - ParticleType * AMREX_RESTRICT - particle_ptr_1 = ptile_1.GetArrayOfStructs()().data(); amrex::Geometry const& geom = WarpX::GetInstance().Geom(lev); #if defined WARPX_DIM_1D_Z @@ -371,7 +369,7 @@ public: soa_1, soa_1, product_species_vector, tile_products_data, - particle_ptr_1, particle_ptr_1, m1, m1, + m1, m1, products_mass, p_mask, products_np, copy_species1, copy_species2, p_pair_indices_1, p_pair_indices_2, @@ -403,9 +401,6 @@ public: const amrex::ParticleReal q1 = species_1.getCharge(); const amrex::ParticleReal m1 = species_1.getMass(); auto get_position_1 = GetParticlePosition(ptile_1, getpos_offset); - // Needed to access the particle id - ParticleType * AMREX_RESTRICT - particle_ptr_1 = ptile_1.GetArrayOfStructs()().data(); // - Species 2 const auto soa_2 = ptile_2.getParticleTileData(); index_type* AMREX_RESTRICT indices_2 = bins_2.permutationPtr(); @@ -413,9 +408,6 @@ public: const amrex::ParticleReal q2 = species_2.getCharge(); const amrex::ParticleReal m2 = species_2.getMass(); auto get_position_2 = GetParticlePosition(ptile_2, getpos_offset); - // Needed to access the particle id - ParticleType * AMREX_RESTRICT - particle_ptr_2 = ptile_2.GetArrayOfStructs()().data(); amrex::Geometry const& geom = WarpX::GetInstance().Geom(lev); #if defined WARPX_DIM_1D_Z @@ -535,7 +527,7 @@ public: soa_1, soa_2, product_species_vector, tile_products_data, - particle_ptr_1, particle_ptr_2, m1, m2, + m1, m2, products_mass, p_mask, products_np, copy_species1, copy_species2, p_pair_indices_1, p_pair_indices_2, diff --git a/Source/Particles/Collision/BinaryCollision/Coulomb/PairWiseCoulombCollisionFunc.H b/Source/Particles/Collision/BinaryCollision/Coulomb/PairWiseCoulombCollisionFunc.H index feb7acf81d3..cfdc36d3c50 100644 --- a/Source/Particles/Collision/BinaryCollision/Coulomb/PairWiseCoulombCollisionFunc.H +++ b/Source/Particles/Collision/BinaryCollision/Coulomb/PairWiseCoulombCollisionFunc.H @@ -23,10 +23,13 @@ * \brief This functor performs pairwise Coulomb collision on a single cell by calling the function * ElasticCollisionPerez. It also reads and contains the Coulomb logarithm. */ -class PairWiseCoulombCollisionFunc{ +class PairWiseCoulombCollisionFunc +{ // Define shortcuts for frequently-used type names using ParticleType = WarpXParticleContainer::ParticleType; - using ParticleBins = amrex::DenseBins; + using ParticleTileType = WarpXParticleContainer::ParticleTileType; + using ParticleTileDataType = ParticleTileType::ParticleTileDataType; + using ParticleBins = amrex::DenseBins; using index_type = ParticleBins::index_type; using SoaData_type = WarpXParticleContainer::ParticleTileType::ParticleTileDataType; diff --git a/Source/Particles/Collision/BinaryCollision/DSMC/DSMC.H b/Source/Particles/Collision/BinaryCollision/DSMC/DSMC.H index c1be307b811..ab01eba2c81 100644 --- a/Source/Particles/Collision/BinaryCollision/DSMC/DSMC.H +++ b/Source/Particles/Collision/BinaryCollision/DSMC/DSMC.H @@ -38,7 +38,8 @@ class DSMC final // Define shortcuts for frequently-used type names using ParticleType = WarpXParticleContainer::ParticleType; using ParticleTileType = WarpXParticleContainer::ParticleTileType; - using ParticleBins = amrex::DenseBins; + using ParticleTileDataType = ParticleTileType::ParticleTileDataType; + using ParticleBins = amrex::DenseBins; using SoaData_type = WarpXParticleContainer::ParticleTileType::ParticleTileDataType; using index_type = ParticleBins::index_type; diff --git a/Source/Particles/Collision/BinaryCollision/DSMC/SplitAndScatterFunc.H b/Source/Particles/Collision/BinaryCollision/DSMC/SplitAndScatterFunc.H index c1fb7ee7e38..f684b60da78 100644 --- a/Source/Particles/Collision/BinaryCollision/DSMC/SplitAndScatterFunc.H +++ b/Source/Particles/Collision/BinaryCollision/DSMC/SplitAndScatterFunc.H @@ -10,6 +10,9 @@ #define SPLIT_AND_SCATTER_FUNC_H_ #include "Particles/Collision/ScatteringProcess.H" +#include "Particles/NamedComponentParticleContainer.H" + +#include /** * \brief Function that performs the particle scattering and injection due @@ -55,8 +58,6 @@ int splitScatteringParticles ( const auto ptile1_data = ptile1.getParticleTileData(); const auto ptile2_data = ptile2.getParticleTileData(); - const Long minus_one_long = -1; - ParallelForRNG(n_total_pairs, [=] AMREX_GPU_DEVICE (int i, RandomEngine const& engine) noexcept { @@ -70,20 +71,35 @@ int splitScatteringParticles ( // starting with the parent particles auto& w1 = ptile1_data.m_rdata[PIdx::w][p_pair_indices_1[i]]; auto& w2 = ptile2_data.m_rdata[PIdx::w][p_pair_indices_2[i]]; + uint64_t* AMREX_RESTRICT idcpu1 = ptile1_data.m_idcpu; + uint64_t* AMREX_RESTRICT idcpu2 = ptile2_data.m_idcpu; + + // Note: Particle::atomicSetID should also be provided as a standalone helper function in AMReX + // to replace the following lambda. + auto const atomicSetIdMinus = [] AMREX_GPU_DEVICE (uint64_t & idcpu) + { +#if defined(AMREX_USE_OMP) +#pragma omp atomic write + idcpu = amrex::ParticleIdCpus::Invalid; +#else + amrex::Gpu::Atomic::Exch( + (unsigned long long *)&idcpu, + (unsigned long long)amrex::ParticleIdCpus::Invalid + ); +#endif + }; // Remove p_pair_reaction_weight[i] from the colliding particles' weights. // If the colliding particle weight decreases to zero, remove particle by // setting its id to -1. Gpu::Atomic::AddNoRet(&w1, -p_pair_reaction_weight[i]); if (w1 <= 0._prt) { - auto& p = ptile1_data.m_aos[p_pair_indices_1[i]]; - p.atomicSetID(minus_one_long); + atomicSetIdMinus(idcpu1[p_pair_indices_1[i]]); } Gpu::Atomic::AddNoRet(&w2, -p_pair_reaction_weight[i]); if (w2 <= 0._prt) { - auto& p = ptile2_data.m_aos[p_pair_indices_2[i]]; - p.atomicSetID(minus_one_long); + atomicSetIdMinus(idcpu2[p_pair_indices_2[i]]); } // Set the child particle properties appropriately diff --git a/Source/Particles/Collision/BinaryCollision/NuclearFusion/NuclearFusionFunc.H b/Source/Particles/Collision/BinaryCollision/NuclearFusion/NuclearFusionFunc.H index 397536b67bf..b2a2112ca68 100644 --- a/Source/Particles/Collision/BinaryCollision/NuclearFusion/NuclearFusionFunc.H +++ b/Source/Particles/Collision/BinaryCollision/NuclearFusion/NuclearFusionFunc.H @@ -33,10 +33,13 @@ * creation functor. * This functor also reads and contains the fusion multiplier. */ -class NuclearFusionFunc{ +class NuclearFusionFunc +{ // Define shortcuts for frequently-used type names using ParticleType = WarpXParticleContainer::ParticleType; - using ParticleBins = amrex::DenseBins; + using ParticleTileType = WarpXParticleContainer::ParticleTileType; + using ParticleTileDataType = ParticleTileType::ParticleTileDataType; + using ParticleBins = amrex::DenseBins; using index_type = ParticleBins::index_type; using SoaData_type = WarpXParticleContainer::ParticleTileType::ParticleTileDataType; @@ -154,12 +157,13 @@ public: // other species and we need to decrease their weight accordingly. // c1 corresponds to the minimum number of times a particle of species 1 will be paired // with a particle of species 2. Same for c2. - const index_type c1 = amrex::max(NI2/NI1,1u); - const index_type c2 = amrex::max(NI1/NI2,1u); + // index_type(1): https://github.com/AMReX-Codes/amrex/pull/3684 + const index_type c1 = amrex::max(NI2/NI1, index_type(1)); + const index_type c2 = amrex::max(NI1/NI2, index_type(1)); // multiplier ratio to take into account unsampled pairs const auto multiplier_ratio = static_cast( - (m_isSameSpecies)?(2u*max_N - 1):(max_N)); + m_isSameSpecies ? 2*max_N - 1 : max_N); #if (defined WARPX_DIM_RZ) amrex::ParticleReal * const AMREX_RESTRICT theta1 = soa_1.m_rdata[PIdx::theta]; diff --git a/Source/Particles/Collision/BinaryCollision/NuclearFusion/ProtonBoronFusionInitializeMomentum.H b/Source/Particles/Collision/BinaryCollision/NuclearFusion/ProtonBoronFusionInitializeMomentum.H index 7b29267ec32..0b51d6b4b61 100644 --- a/Source/Particles/Collision/BinaryCollision/NuclearFusion/ProtonBoronFusionInitializeMomentum.H +++ b/Source/Particles/Collision/BinaryCollision/NuclearFusion/ProtonBoronFusionInitializeMomentum.H @@ -22,10 +22,12 @@ namespace { // Define shortcuts for frequently-used type names - using SoaData_type = WarpXParticleContainer::ParticleTileType::ParticleTileDataType; - using ParticleType = WarpXParticleContainer::ParticleType; - using ParticleBins = amrex::DenseBins; - using index_type = ParticleBins::index_type; + using SoaData_type = typename WarpXParticleContainer::ParticleTileType::ParticleTileDataType; + using ParticleType = typename WarpXParticleContainer::ParticleType; + using ParticleTileType = typename WarpXParticleContainer::ParticleTileType; + using ParticleTileDataType = typename ParticleTileType::ParticleTileDataType; + using ParticleBins = amrex::DenseBins; + using index_type = typename ParticleBins::index_type; /** * \brief This function initializes the momentum of the alpha particles produced from diff --git a/Source/Particles/Collision/BinaryCollision/NuclearFusion/TwoProductFusionInitializeMomentum.H b/Source/Particles/Collision/BinaryCollision/NuclearFusion/TwoProductFusionInitializeMomentum.H index be3f5b2d957..52e9db8aa94 100644 --- a/Source/Particles/Collision/BinaryCollision/NuclearFusion/TwoProductFusionInitializeMomentum.H +++ b/Source/Particles/Collision/BinaryCollision/NuclearFusion/TwoProductFusionInitializeMomentum.H @@ -24,7 +24,9 @@ namespace { // Define shortcuts for frequently-used type names using SoaData_type = WarpXParticleContainer::ParticleTileType::ParticleTileDataType; using ParticleType = WarpXParticleContainer::ParticleType; - using ParticleBins = amrex::DenseBins; + using ParticleTileType = WarpXParticleContainer::ParticleTileType; + using ParticleTileDataType = ParticleTileType::ParticleTileDataType; + using ParticleBins = amrex::DenseBins; using index_type = ParticleBins::index_type; /** diff --git a/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H b/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H index dc830b477df..7a2853e3db5 100644 --- a/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H +++ b/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H @@ -30,13 +30,15 @@ * \brief This functor creates particles produced from a binary collision and sets their initial * properties (position, momentum, weight). */ -class ParticleCreationFunc{ +class ParticleCreationFunc +{ // Define shortcuts for frequently-used type names - using ParticleType = WarpXParticleContainer::ParticleType; - using ParticleTileType = WarpXParticleContainer::ParticleTileType; - using ParticleBins = amrex::DenseBins; - using index_type = ParticleBins::index_type; - using SoaData_type = WarpXParticleContainer::ParticleTileType::ParticleTileDataType; + using ParticleType = typename WarpXParticleContainer::ParticleType; + using ParticleTileType = typename WarpXParticleContainer::ParticleTileType; + using ParticleTileDataType = typename ParticleTileType::ParticleTileDataType; + using ParticleBins = amrex::DenseBins; + using index_type = typename ParticleBins::index_type; + using SoaData_type = typename WarpXParticleContainer::ParticleTileType::ParticleTileDataType; public: /** @@ -69,12 +71,6 @@ public: * @param[in, out] soa_1 struct of array data of the first colliding particle species * @param[in, out] soa_2 struct of array data of the second colliding particle species * @param[out] tile_products array containing tile data of the product particles. - * @param[out] particle_ptr_1 pointer to data of the first colliding particle species. Is - * needed to set the id of a particle to -1 in order to delete it when its weight - * reaches 0. - * @param[out] particle_ptr_2 pointer to data of the second colliding particle species. Is - * needed to set the id of a particle to -1 in order to delete it when its weight - * reaches 0. * @param[in] m1 mass of the first colliding particle species * @param[in] m2 mass of the second colliding particle species * @param[in] products_mass array storing the mass of product particles @@ -102,7 +98,6 @@ public: const SoaData_type& soa_1, const SoaData_type& soa_2, const amrex::Vector& pc_products, ParticleTileType** AMREX_RESTRICT tile_products, - ParticleType* particle_ptr_1, ParticleType* particle_ptr_2, const amrex::ParticleReal& m1, const amrex::ParticleReal& m2, const amrex::Vector& products_mass, const index_type* AMREX_RESTRICT p_mask, @@ -137,6 +132,8 @@ public: amrex::ParticleReal* AMREX_RESTRICT w1 = soa_1.m_rdata[PIdx::w]; amrex::ParticleReal* AMREX_RESTRICT w2 = soa_2.m_rdata[PIdx::w]; + uint64_t* AMREX_RESTRICT idcpu1 = soa_1.m_idcpu; + uint64_t* AMREX_RESTRICT idcpu2 = soa_2.m_idcpu; // Create necessary GPU vectors, that will be used in the kernel below amrex::Vector soa_products; @@ -205,16 +202,31 @@ public: amrex::Gpu::Atomic::AddNoRet(&w2[p_pair_indices_2[i]], -p_pair_reaction_weight[i]); + // Note: Particle::atomicSetID should also be provided as a standalone helper function in AMReX + // to replace the following lambda. + auto const atomicSetIdMinus = [] AMREX_GPU_DEVICE (uint64_t & idcpu) + { +#if defined(AMREX_USE_OMP) +#pragma omp atomic write + idcpu = amrex::ParticleIdCpus::Invalid; +#else + amrex::Gpu::Atomic::Exch( + (unsigned long long *)&idcpu, + (unsigned long long)amrex::ParticleIdCpus::Invalid + ); +#endif + }; + // If the colliding particle weight decreases to zero, remove particle by // setting its id to -1 - constexpr amrex::Long minus_one_long = -1; if (w1[p_pair_indices_1[i]] <= amrex::ParticleReal(0.)) { - particle_ptr_1[p_pair_indices_1[i]].atomicSetID(minus_one_long); + atomicSetIdMinus(idcpu1[p_pair_indices_1[i]]); + } if (w2[p_pair_indices_2[i]] <= amrex::ParticleReal(0.)) { - particle_ptr_2[p_pair_indices_2[i]].atomicSetID(minus_one_long); + atomicSetIdMinus(idcpu2[p_pair_indices_2[i]]); } // Initialize the product particles' momentum, using a function depending on the @@ -294,12 +306,14 @@ private: * \brief This class does nothing and is used as second template parameter for binary collisions * that do not create particles. */ -class NoParticleCreationFunc{ - using ParticleType = WarpXParticleContainer::ParticleType; - using ParticleTileType = WarpXParticleContainer::ParticleTileType; - using ParticleBins = amrex::DenseBins; - using index_type = ParticleBins::index_type; - using SoaData_type = WarpXParticleContainer::ParticleTileType::ParticleTileDataType; +class NoParticleCreationFunc +{ + using ParticleType = typename WarpXParticleContainer::ParticleType; + using ParticleTileType = typename WarpXParticleContainer::ParticleTileType; + using ParticleTileDataType = typename ParticleTileType::ParticleTileDataType; + using ParticleBins = amrex::DenseBins; + using index_type = typename ParticleBins::index_type; + using SoaData_type = typename WarpXParticleContainer::ParticleTileType::ParticleTileDataType; public: NoParticleCreationFunc () = default; @@ -313,7 +327,6 @@ public: const SoaData_type& /*soa_1*/, const SoaData_type& /*soa_2*/, amrex::Vector& /*pc_products*/, ParticleTileType** /*tile_products*/, - ParticleType* /*particle_ptr_1*/, ParticleType* /*particle_ptr_2*/, const amrex::ParticleReal& /*m1*/, const amrex::ParticleReal& /*m2*/, const amrex::Vector& /*products_mass*/, const index_type* /*p_mask*/, const amrex::Vector& /*products_np*/, diff --git a/Source/Particles/Collision/BinaryCollision/ShuffleFisherYates.H b/Source/Particles/Collision/BinaryCollision/ShuffleFisherYates.H index 42259512b0d..3b8f72f4b84 100644 --- a/Source/Particles/Collision/BinaryCollision/ShuffleFisherYates.H +++ b/Source/Particles/Collision/BinaryCollision/ShuffleFisherYates.H @@ -12,7 +12,7 @@ /* \brief Shuffle array according to Fisher-Yates algorithm. * Only shuffle the part between is <= i < ie, n = ie-is. * T_index shall be - * amrex::DenseBins::index_type + * amrex::DenseBins::index_type */ template diff --git a/Source/Particles/Deposition/ChargeDeposition.H b/Source/Particles/Deposition/ChargeDeposition.H index d0db678dfda..d0822789015 100644 --- a/Source/Particles/Deposition/ChargeDeposition.H +++ b/Source/Particles/Deposition/ChargeDeposition.H @@ -252,7 +252,7 @@ void doChargeDepositionSharedShapeN (const GetParticlePosition& GetPositio const int n_rz_azimuthal_modes, amrex::Real* cost, const long load_balance_costs_update_algo, - const amrex::DenseBins& a_bins, + const amrex::DenseBins& a_bins, const amrex::Box& box, const amrex::Geometry& geom, const amrex::IntVect& a_tbox_max_size, diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index 18df09c3b43..2252a63fd07 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -592,7 +592,7 @@ void doDepositionSharedShapeN (const GetParticlePosition& GetPosition, int n_rz_azimuthal_modes, amrex::Real* cost, long load_balance_costs_update_algo, - const amrex::DenseBins& a_bins, + const amrex::DenseBins& a_bins, const amrex::Box& box, const amrex::Geometry& geom, const amrex::IntVect& a_tbox_max_size) diff --git a/Source/Particles/ElementaryProcess/QEDPairGeneration.H b/Source/Particles/ElementaryProcess/QEDPairGeneration.H index 5abc9282d4f..fb723f0b79a 100644 --- a/Source/Particles/ElementaryProcess/QEDPairGeneration.H +++ b/Source/Particles/ElementaryProcess/QEDPairGeneration.H @@ -167,7 +167,7 @@ public: p_ux, p_uy, p_uz, engine); - src.m_aos[i_src].id() = -1; //destroy photon after pair generation + src.m_idcpu[i_src] = amrex::ParticleIdCpus::Invalid; // destroy photon after pair generation } private: diff --git a/Source/Particles/ElementaryProcess/QEDPhotonEmission.H b/Source/Particles/ElementaryProcess/QEDPhotonEmission.H index 8ba5c63ad57..567b260d0e4 100644 --- a/Source/Particles/ElementaryProcess/QEDPhotonEmission.H +++ b/Source/Particles/ElementaryProcess/QEDPhotonEmission.H @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -237,12 +238,11 @@ void cleanLowEnergyPhotons( const int old_size, const int num_added, const amrex::ParticleReal energy_threshold) { - auto pp = ptile.GetArrayOfStructs()().data() + old_size; - - const auto& soa = ptile.GetStructOfArrays(); + auto& soa = ptile.GetStructOfArrays(); + auto p_idcpu = soa.GetIdCPUData().data() + old_size; const auto p_ux = soa.GetRealData(PIdx::ux).data() + old_size; - const auto p_uy = soa.GetRealData(PIdx::uy).data() + old_size; - const auto p_uz = soa.GetRealData(PIdx::uz).data() + old_size; + const auto p_uy = soa.GetRealData(PIdx::uy).data() + old_size; + const auto p_uz = soa.GetRealData(PIdx::uz).data() + old_size; //The square of the energy threshold const auto energy_threshold2 = std::max( @@ -251,8 +251,6 @@ void cleanLowEnergyPhotons( amrex::ParallelFor(num_added, [=] AMREX_GPU_DEVICE (int ip) noexcept { - auto& p = pp[ip]; - const auto ux = p_ux[ip]; const auto uy = p_uy[ip]; const auto uz = p_uz[ip]; @@ -262,8 +260,8 @@ void cleanLowEnergyPhotons( constexpr amrex::ParticleReal me_c = PhysConst::m_e*PhysConst::c; const auto phot_energy2 = (ux*ux + uy*uy + uz*uz)*me_c*me_c; - if (phot_energy2 < energy_threshold2){ - p.id() = - 1; + if (phot_energy2 < energy_threshold2) { + p_idcpu[ip] = amrex::ParticleIdCpus::Invalid; } }); } diff --git a/Source/Particles/LaserParticleContainer.H b/Source/Particles/LaserParticleContainer.H index e6fa308431c..fac94ff20a3 100644 --- a/Source/Particles/LaserParticleContainer.H +++ b/Source/Particles/LaserParticleContainer.H @@ -56,10 +56,9 @@ public: * \brief Method to initialize runtime attributes. Does nothing for LaserParticleContainer. */ void DefaultInitializeRuntimeAttributes ( - amrex::ParticleTile, - NArrayReal, NArrayInt, amrex::PinnedArenaAllocator>& /*pinned_tile*/, - const int /*n_external_attr_real*/, - const int /*n_external_attr_int*/) final {} + typename ContainerLike::ParticleTileType& /*pinned_tile*/, + int /*n_external_attr_real*/, + int /*n_external_attr_int*/) final {} void ReadHeader (std::istream& is) final; diff --git a/Source/Particles/NamedComponentParticleContainer.H b/Source/Particles/NamedComponentParticleContainer.H index 3be0886425d..e7a7a20fad5 100644 --- a/Source/Particles/NamedComponentParticleContainer.H +++ b/Source/Particles/NamedComponentParticleContainer.H @@ -18,24 +18,39 @@ #include -/** Particle Attributes stored in amrex::ParticleContainer's struct of array +/** Real Particle Attributes stored in amrex::ParticleContainer's struct of array */ struct PIdx { enum { - w = 0, ///< weight +#if !defined (WARPX_DIM_1D_Z) + x, +#endif +#if defined (WARPX_DIM_3D) + y, +#endif + z, + w, ///< weight ux, uy, uz, #ifdef WARPX_DIM_RZ theta, ///< RZ needs all three position components #endif - nattribs ///< number of attributes + nattribs ///< number of compile-time attributes + }; +}; + +/** Integer Particle Attributes stored in amrex::ParticleContainer's struct of array + */ +struct PIdxInt +{ + enum { + nattribs ///< number of compile-time attributes }; }; /** Particle Container class that allows to add/access particle components * with a name (string) instead of doing so with an integer index. - * (The "components" are all the particle quantities - except those - * that are stored in an AoS by amrex, i.e. the particle positions and ID) + * (The "components" are all the particle amrex::Real quantities.) * * This is done by storing maps that give the index of the component * that corresponds to a given string. @@ -45,11 +60,11 @@ struct PIdx */ template class T_Allocator=amrex::DefaultAllocator> class NamedComponentParticleContainer : -public amrex::ParticleContainer<0,0,PIdx::nattribs,0,T_Allocator> +public amrex::ParticleContainerPureSoA { public: /** Construct an empty NamedComponentParticleContainer **/ - NamedComponentParticleContainer () : amrex::ParticleContainer<0,0,PIdx::nattribs,0,T_Allocator>() {} + NamedComponentParticleContainer () : amrex::ParticleContainerPureSoA() {} /** Construct a NamedComponentParticleContainer from an AmrParGDB object * @@ -61,8 +76,15 @@ public: * AMR hierarchy. Usually, this is generated by an AmrCore or AmrLevel object. */ NamedComponentParticleContainer (amrex::AmrParGDB* amr_pgdb) - : amrex::ParticleContainer<0,0,PIdx::nattribs,0,T_Allocator>(amr_pgdb) { + : amrex::ParticleContainerPureSoA(amr_pgdb) { // build up the map of string names to particle component numbers +#if !defined (WARPX_DIM_1D_Z) + particle_comps["x"] = PIdx::x; +#endif +#if defined (WARPX_DIM_3D) + particle_comps["y"] = PIdx::y; +#endif + particle_comps["z"] = PIdx::z; particle_comps["w"] = PIdx::w; particle_comps["ux"] = PIdx::ux; particle_comps["uy"] = PIdx::uy; @@ -85,12 +107,12 @@ public: * @param p_ricomps name-to-index map for run-time integer components */ NamedComponentParticleContainer( - amrex::ParticleContainer<0,0,PIdx::nattribs,0,T_Allocator> && pc, + amrex::ParticleContainerPureSoA && pc, std::map p_comps, std::map p_icomps, std::map p_rcomps, std::map p_ricomps) - : amrex::ParticleContainer<0,0,PIdx::nattribs,0,T_Allocator>(std::move(pc)), + : amrex::ParticleContainerPureSoA(std::move(pc)), particle_comps(std::move(p_comps)), particle_icomps(std::move(p_icomps)), particle_runtime_comps(std::move(p_rcomps)), @@ -118,7 +140,7 @@ public: NamedComponentParticleContainer make_alike () const { auto tmp = NamedComponentParticleContainer( - amrex::ParticleContainer<0,0,PIdx::nattribs,0,T_Allocator>::template make_alike(), + amrex::ParticleContainerPureSoA::template make_alike(), particle_comps, particle_icomps, particle_runtime_comps, @@ -127,10 +149,10 @@ public: return tmp; } - using amrex::ParticleContainer<0,0,PIdx::nattribs,0,T_Allocator>::NumRealComps; - using amrex::ParticleContainer<0,0,PIdx::nattribs,0,T_Allocator>::NumIntComps; - using amrex::ParticleContainer<0,0,PIdx::nattribs,0,T_Allocator>::AddRealComp; - using amrex::ParticleContainer<0,0,PIdx::nattribs,0,T_Allocator>::AddIntComp; + using amrex::ParticleContainerPureSoA::NumRealComps; + using amrex::ParticleContainerPureSoA::NumIntComps; + using amrex::ParticleContainerPureSoA::AddRealComp; + using amrex::ParticleContainerPureSoA::AddIntComp; /** Allocate a new run-time real component * diff --git a/Source/Particles/ParticleBoundaryBuffer.cpp b/Source/Particles/ParticleBoundaryBuffer.cpp index 54c4396379d..88304bd8a9c 100644 --- a/Source/Particles/ParticleBoundaryBuffer.cpp +++ b/Source/Particles/ParticleBoundaryBuffer.cpp @@ -50,7 +50,7 @@ struct CopyAndTimestamp { void operator() (const DstData& dst, const SrcData& src, int src_i, int dst_i) const noexcept { - dst.m_aos[dst_i] = src.m_aos[src_i]; + dst.m_idcpu[dst_i] = src.m_idcpu[src_i]; for (int j = 0; j < SrcData::NAR; ++j) { dst.m_rdata[j][dst_i] = src.m_rdata[j][src_i]; } @@ -222,7 +222,7 @@ void ParticleBoundaryBuffer::gatherParticles (MultiParticleContainer& mypc, { WARPX_PROFILE("ParticleBoundaryBuffer::gatherParticles"); - using PIter = amrex::ParConstIter<0,0,PIdx::nattribs>; + using PIter = amrex::ParConstIterSoA; const auto& warpx_instance = WarpX::GetInstance(); const amrex::Geometry& geom = warpx_instance.Geom(0); auto plo = geom.ProbLoArray(); diff --git a/Source/Particles/ParticleCreation/SmartCopy.H b/Source/Particles/ParticleCreation/SmartCopy.H index 2c04baa18bb..6a6ceb3d290 100644 --- a/Source/Particles/ParticleCreation/SmartCopy.H +++ b/Source/Particles/ParticleCreation/SmartCopy.H @@ -26,7 +26,7 @@ * type. Second, if a given component name is found in both the src * and the dst, then the src value is copied. * - * Particle structs - positions and id numbers - are always copied. + * Particle positions and id numbers are always copied. * * You don't create this directly - use the SmartCopyFactory object below. */ @@ -48,9 +48,6 @@ struct SmartCopy void operator() (DstData& dst, const SrcData& src, int i_src, int i_dst, amrex::RandomEngine const& engine) const noexcept { - // the particle struct is always copied over - dst.m_aos[i_dst] = src.m_aos[i_src]; - // initialize the real components for (int j = 0; j < DstData::NAR; ++j) { dst.m_rdata[j][i_dst] = initializeRealValue(m_policy_real[j], engine); diff --git a/Source/Particles/ParticleCreation/SmartCreate.H b/Source/Particles/ParticleCreation/SmartCreate.H index 67d7767a5d3..b4f25d5daad 100644 --- a/Source/Particles/ParticleCreation/SmartCreate.H +++ b/Source/Particles/ParticleCreation/SmartCreate.H @@ -14,6 +14,8 @@ #include #include #include +#include +#include /** * \brief This is a functor for performing a "smart create" that works @@ -47,23 +49,22 @@ struct SmartCreate const int id = 0) const noexcept { #if defined(WARPX_DIM_3D) - prt.m_aos[i_prt].pos(0) = x; - prt.m_aos[i_prt].pos(1) = y; - prt.m_aos[i_prt].pos(2) = z; + prt.m_rdata[PIdx::x][i_prt] = x; + prt.m_rdata[PIdx::y][i_prt] = y; + prt.m_rdata[PIdx::z][i_prt] = z; #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) - prt.m_aos[i_prt].pos(0) = x; - prt.m_aos[i_prt].pos(1) = z; + prt.m_rdata[PIdx::x][i_prt] = x; + prt.m_rdata[PIdx::z][i_prt] = z; amrex::ignore_unused(y); #else - prt.m_aos[i_prt].pos(0) = z; + prt.m_rdata[PIdx::z][i_prt] = z; amrex::ignore_unused(x,y); #endif - prt.m_aos[i_prt].cpu() = cpu; - prt.m_aos[i_prt].id() = id; + prt.m_idcpu[i_prt] = amrex::SetParticleIDandCPU(id, cpu); - // initialize the real components - for (int j = 0; j < PartData::NAR; ++j) { + // initialize the real components after position + for (int j = AMREX_SPACEDIM; j < PartData::NAR; ++j) { prt.m_rdata[j][i_prt] = initializeRealValue(m_policy_real[j], engine); } for (int j = 0; j < prt.m_num_runtime_real; ++j) { diff --git a/Source/Particles/ParticleCreation/SmartUtils.H b/Source/Particles/ParticleCreation/SmartUtils.H index 732a12bb729..f84734308fb 100644 --- a/Source/Particles/ParticleCreation/SmartUtils.H +++ b/Source/Particles/ParticleCreation/SmartUtils.H @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -60,12 +61,11 @@ void setNewParticleIDs (PTile& ptile, int old_size, int num_added) } const int cpuid = amrex::ParallelDescriptor::MyProc(); - auto pp = ptile.GetArrayOfStructs()().data() + old_size; + auto ptd = ptile.getParticleTileData(); amrex::ParallelFor(num_added, [=] AMREX_GPU_DEVICE (int ip) noexcept { - auto& p = pp[ip]; - p.id() = pid+ip; - p.cpu() = cpuid; + auto const new_id = ip + old_size; + ptd.m_idcpu[new_id] = amrex::SetParticleIDandCPU(pid+ip, cpuid); }); } diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H index a12ae75f629..edf91a84526 100644 --- a/Source/Particles/PhysicalParticleContainer.H +++ b/Source/Particles/PhysicalParticleContainer.H @@ -268,11 +268,9 @@ public: * @param[in] engine the random engine, used in initialization of QED optical depths */ void DefaultInitializeRuntimeAttributes ( - amrex::ParticleTile, - NArrayReal, NArrayInt, - amrex::PinnedArenaAllocator>& pinned_tile, - int n_external_attr_real, - int n_external_attr_int) final; + typename ContainerLike::ParticleTileType& pinned_tile, + int n_external_attr_real, + int n_external_attr_int) final; /** * \brief Apply NCI Godfrey filter to all components of E and B before gather diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 929c3c26649..08c784709fa 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -198,8 +198,8 @@ namespace * and avoid any possible undefined behavior before the next call to redistribute) and sets * the particle id to -1 so that it can be effectively deleted. * - * \param p particle aos data - * \param pa particle soa data + * \param idcpu particle id soa data + * \param pa particle real soa data * \param ip index for soa data * \param do_field_ionization whether species has ionization * \param pi ionization level data @@ -210,20 +210,21 @@ namespace */ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ZeroInitializeAndSetNegativeID ( - ParticleType& p, const GpuArray& pa, long& ip, + uint64_t * AMREX_RESTRICT idcpu, + const GpuArray& pa, long& ip, const bool& do_field_ionization, int* pi #ifdef WARPX_QED - ,const bool& has_quantum_sync, amrex::ParticleReal* p_optical_depth_QSR - ,const bool& has_breit_wheeler, amrex::ParticleReal* p_optical_depth_BW + ,const bool& has_quantum_sync, amrex::ParticleReal* AMREX_RESTRICT p_optical_depth_QSR + ,const bool& has_breit_wheeler, amrex::ParticleReal* AMREX_RESTRICT p_optical_depth_BW #endif ) noexcept { - p.pos(0) = 0._rt; + pa[PIdx::z][ip] = 0._rt; #if (AMREX_SPACEDIM >= 2) - p.pos(1) = 0._rt; + pa[PIdx::x][ip] = 0._rt; #endif #if defined(WARPX_DIM_3D) - p.pos(2) = 0._rt; + pa[PIdx::y][ip] = 0._rt; #endif pa[PIdx::w ][ip] = 0._rt; pa[PIdx::ux][ip] = 0._rt; @@ -238,7 +239,7 @@ namespace if (has_breit_wheeler) {p_optical_depth_BW[ip] = 0._rt;} #endif - p.id() = -1; + idcpu[ip] = amrex::ParticleIdCpus::Invalid; } } @@ -780,11 +781,9 @@ PhysicalParticleContainer::AddPlasmaFromFile(PlasmaInjector & plasma_injector, void PhysicalParticleContainer::DefaultInitializeRuntimeAttributes ( - amrex::ParticleTile, - NArrayReal, NArrayInt, - amrex::PinnedArenaAllocator>& pinned_tile, - const int n_external_attr_real, - const int n_external_attr_int) + typename ContainerLike::ParticleTileType& pinned_tile, + int n_external_attr_real, + int n_external_attr_int) { ParticleCreation::DefaultInitializeRuntimeAttributes(pinned_tile, n_external_attr_real, n_external_attr_int, @@ -1084,7 +1083,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int const int max_new_particles = Scan::ExclusiveSum(counts.size(), counts.data(), offset.data()); // Update NextID to include particles created in this function - Long pid; + int pid; #ifdef AMREX_USE_OMP #pragma omp critical (add_plasma_nextid) #endif @@ -1093,7 +1092,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int ParticleType::NextID(pid+max_new_particles); } WARPX_ALWAYS_ASSERT_WITH_MESSAGE( - static_cast(pid + max_new_particles) < LastParticleID, + static_cast(pid) + static_cast(max_new_particles) < LongParticleIds::LastParticleID, "ERROR: overflow on particle id numbers"); const int cpuid = ParallelDescriptor::MyProc(); @@ -1104,16 +1103,16 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int DefineAndReturnParticleTile(lev, grid_id, tile_id); } - auto old_size = particle_tile.GetArrayOfStructs().size(); + auto old_size = particle_tile.size(); auto new_size = old_size + max_new_particles; particle_tile.resize(new_size); - ParticleType* pp = particle_tile.GetArrayOfStructs()().data() + old_size; auto& soa = particle_tile.GetStructOfArrays(); GpuArray pa; for (int ia = 0; ia < PIdx::nattribs; ++ia) { pa[ia] = soa.GetRealData(ia).data() + old_size; } + uint64_t * AMREX_RESTRICT pa_idcpu = soa.GetIdCPUData().data() + old_size; // user-defined integer and real attributes const auto n_user_int_attribs = static_cast(m_user_int_attribs.size()); const auto n_user_real_attribs = static_cast(m_user_real_attribs.size()); @@ -1226,9 +1225,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int for (int i_part = 0; i_part < pcounts[index]; ++i_part) { long ip = poffset[index] + i_part; - ParticleType& p = pp[ip]; - p.id() = pid+ip; - p.cpu() = cpuid; + pa_idcpu[ip] = amrex::SetParticleIDandCPU(pid+ip, cpuid); const XDim3 r = (fine_overlap_box.ok() && fine_overlap_box.contains(iv)) ? // In the refined injection region: use refinement ratio `lrrfac` inj_pos->getPositionUnitBox(i_part, lrrfac, engine) : @@ -1238,7 +1235,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int #if defined(WARPX_DIM_3D) if (!tile_realbox.contains(XDim3{pos.x,pos.y,pos.z})) { - ZeroInitializeAndSetNegativeID(p, pa, ip, loc_do_field_ionization, pi + ZeroInitializeAndSetNegativeID(pa_idcpu, pa, ip, loc_do_field_ionization, pi #ifdef WARPX_QED ,loc_has_quantum_sync, p_optical_depth_QSR ,loc_has_breit_wheeler, p_optical_depth_BW @@ -1249,7 +1246,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) amrex::ignore_unused(k); if (!tile_realbox.contains(XDim3{pos.x,pos.z,0.0_rt})) { - ZeroInitializeAndSetNegativeID(p, pa, ip, loc_do_field_ionization, pi + ZeroInitializeAndSetNegativeID(pa_idcpu, pa, ip, loc_do_field_ionization, pi #ifdef WARPX_QED ,loc_has_quantum_sync, p_optical_depth_QSR ,loc_has_breit_wheeler, p_optical_depth_BW @@ -1260,7 +1257,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int #else amrex::ignore_unused(j,k); if (!tile_realbox.contains(XDim3{pos.z,0.0_rt,0.0_rt})) { - ZeroInitializeAndSetNegativeID(p, pa, ip, loc_do_field_ionization, pi + ZeroInitializeAndSetNegativeID(pa_idcpu, pa, ip, loc_do_field_ionization, pi #ifdef WARPX_QED ,loc_has_quantum_sync, p_optical_depth_QSR ,loc_has_breit_wheeler, p_optical_depth_BW @@ -1299,7 +1296,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int const Real z0 = applyBallisticCorrection(pos, inj_mom, gamma_boost, beta_boost, t); if (!inj_pos->insideBounds(xb, yb, z0)) { - ZeroInitializeAndSetNegativeID(p, pa, ip, loc_do_field_ionization, pi + ZeroInitializeAndSetNegativeID(pa_idcpu, pa, ip, loc_do_field_ionization, pi #ifdef WARPX_QED ,loc_has_quantum_sync, p_optical_depth_QSR ,loc_has_breit_wheeler, p_optical_depth_BW @@ -1313,7 +1310,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int // Remove particle if density below threshold if ( dens < density_min ){ - ZeroInitializeAndSetNegativeID(p, pa, ip, loc_do_field_ionization, pi + ZeroInitializeAndSetNegativeID(pa_idcpu, pa, ip, loc_do_field_ionization, pi #ifdef WARPX_QED ,loc_has_quantum_sync, p_optical_depth_QSR ,loc_has_breit_wheeler, p_optical_depth_BW @@ -1331,7 +1328,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int // If the particle is not within the lab-frame zmin, zmax, etc. // go to the next generated particle. if (!inj_pos->insideBounds(xb, yb, z0_lab)) { - ZeroInitializeAndSetNegativeID(p, pa, ip, loc_do_field_ionization, pi + ZeroInitializeAndSetNegativeID(pa_idcpu, pa, ip, loc_do_field_ionization, pi #ifdef WARPX_QED ,loc_has_quantum_sync, p_optical_depth_QSR ,loc_has_breit_wheeler, p_optical_depth_BW @@ -1343,7 +1340,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int dens = inj_rho->getDensity(pos.x, pos.y, z0_lab); // Remove particle if density below threshold if ( dens < density_min ){ - ZeroInitializeAndSetNegativeID(p, pa, ip, loc_do_field_ionization, pi + ZeroInitializeAndSetNegativeID(pa_idcpu, pa, ip, loc_do_field_ionization, pi #ifdef WARPX_QED ,loc_has_quantum_sync, p_optical_depth_QSR ,loc_has_breit_wheeler, p_optical_depth_BW @@ -1410,17 +1407,17 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int pa[PIdx::uz][ip] = u.z; #if defined(WARPX_DIM_3D) - p.pos(0) = pos.x; - p.pos(1) = pos.y; - p.pos(2) = pos.z; + pa[PIdx::x][ip] = pos.x; + pa[PIdx::y][ip] = pos.y; + pa[PIdx::z][ip] = pos.z; #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) #ifdef WARPX_DIM_RZ pa[PIdx::theta][ip] = theta; #endif - p.pos(0) = xb; - p.pos(1) = pos.z; + pa[PIdx::x][ip] = xb; + pa[PIdx::z][ip] = pos.z; #else - p.pos(0) = pos.z; + pa[PIdx::z][ip] = pos.z; #endif } }); @@ -1645,7 +1642,7 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector, const int max_new_particles = Scan::ExclusiveSum(counts.size(), counts.data(), offset.data()); // Update NextID to include particles created in this function - Long pid; + int pid; #ifdef AMREX_USE_OMP #pragma omp critical (add_plasma_nextid) #endif @@ -1654,23 +1651,23 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector, ParticleType::NextID(pid+max_new_particles); } WARPX_ALWAYS_ASSERT_WITH_MESSAGE( - static_cast(pid + max_new_particles) < LastParticleID, + static_cast(pid) + static_cast(max_new_particles) < LongParticleIds::LastParticleID, "overflow on particle id numbers"); const int cpuid = ParallelDescriptor::MyProc(); auto& particle_tile = tmp_pc.DefineAndReturnParticleTile(0, grid_id, tile_id); - auto old_size = particle_tile.GetArrayOfStructs().size(); + auto old_size = particle_tile.size(); auto new_size = old_size + max_new_particles; particle_tile.resize(new_size); - ParticleType* pp = particle_tile.GetArrayOfStructs()().data() + old_size; auto& soa = particle_tile.GetStructOfArrays(); GpuArray pa; for (int ia = 0; ia < PIdx::nattribs; ++ia) { pa[ia] = soa.GetRealData(ia).data() + old_size; } + uint64_t * AMREX_RESTRICT pa_idcpu = soa.GetIdCPUData().data() + old_size; // user-defined integer and real attributes const auto n_user_int_attribs = static_cast(m_user_int_attribs.size()); @@ -1768,9 +1765,7 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector, for (int i_part = 0; i_part < pcounts[index]; ++i_part) { const long ip = poffset[index] + i_part; - ParticleType& p = pp[ip]; - p.id() = pid+ip; - p.cpu() = cpuid; + pa_idcpu[ip] = amrex::SetParticleIDandCPU(pid+ip, cpuid); // This assumes the flux_pos is of type InjectorPositionRandomPlane const XDim3 r = (fine_overlap_box.ok() && fine_overlap_box.contains(iv)) ? @@ -1795,19 +1790,19 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector, // the particles will be within the domain. #if defined(WARPX_DIM_3D) if (!ParticleUtils::containsInclusive(tile_realbox, XDim3{ppos.x,ppos.y,ppos.z})) { - p.id() = -1; + pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid; continue; } #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) amrex::ignore_unused(k); if (!ParticleUtils::containsInclusive(tile_realbox, XDim3{ppos.x,ppos.z,0.0_prt})) { - p.id() = -1; + pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid; continue; } #else amrex::ignore_unused(j,k); if (!ParticleUtils::containsInclusive(tile_realbox, XDim3{ppos.z,0.0_prt,0.0_prt})) { - p.id() = -1; + pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid; continue; } #endif @@ -1815,7 +1810,7 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector, // If the particle's initial position is not within or on the species's // xmin, xmax, ymin, ymax, zmin, zmax, go to the next generated particle. if (!flux_pos->insideBoundsInclusive(ppos.x, ppos.y, ppos.z)) { - p.id() = -1; + pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid; continue; } @@ -1848,8 +1843,8 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector, #endif Real flux = inj_flux->getFlux(ppos.x, ppos.y, ppos.z, t); // Remove particle if flux is negative or 0 - if ( flux <=0 ){ - p.id() = -1; + if (flux <= 0) { + pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid; continue; } @@ -1858,7 +1853,7 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector, } #ifdef WARPX_QED - if(loc_has_quantum_sync){ + if (loc_has_quantum_sync) { p_optical_depth_QSR[ip] = quantum_sync_get_opt(engine); } @@ -1908,18 +1903,18 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector, UpdatePosition(ppos.x, ppos.y, ppos.z, pu.x, pu.y, pu.z, t_fract); #if defined(WARPX_DIM_3D) - p.pos(0) = ppos.x; - p.pos(1) = ppos.y; - p.pos(2) = ppos.z; + pa[PIdx::x][ip] = ppos.x; + pa[PIdx::y][ip] = ppos.y; + pa[PIdx::z][ip] = ppos.z; #elif defined(WARPX_DIM_RZ) pa[PIdx::theta][ip] = std::atan2(ppos.y, ppos.x); - p.pos(0) = std::sqrt(ppos.x*ppos.x + ppos.y*ppos.y); - p.pos(1) = ppos.z; + pa[PIdx::x][ip] = std::sqrt(ppos.x*ppos.x + ppos.y*ppos.y); + pa[PIdx::z][ip] = ppos.z; #elif defined(WARPX_DIM_XZ) - p.pos(0) = ppos.x; - p.pos(1) = ppos.z; + pa[PIdx::x][ip] = ppos.x; + pa[PIdx::z][ip] = ppos.z; #else - p.pos(0) = ppos.z; + pa[PIdx::z][ip] = ppos.z; #endif } }); @@ -2342,20 +2337,22 @@ PhysicalParticleContainer::SplitParticles (int lev) split_offset[1] /= ppc_nd[1]; split_offset[2] /= ppc_nd[2]; } - // particle Array Of Structs data - auto& particles = pti.GetArrayOfStructs(); // particle Struct Of Arrays data auto& attribs = pti.GetAttribs(); auto& wp = attribs[PIdx::w ]; auto& uxp = attribs[PIdx::ux]; auto& uyp = attribs[PIdx::uy]; auto& uzp = attribs[PIdx::uz]; + + ParticleTileType& ptile = ParticlesAt(lev, pti); + auto& soa = ptile.GetStructOfArrays(); + uint64_t * const AMREX_RESTRICT idcpu = soa.GetIdCPUData().data(); + const long np = pti.numParticles(); for(int i=0; i> attr_int; pctmp_split.AddNParticles(lev, np_split_to_add, - xp, yp, zp, uxp, uyp, uzp, - 1, attr, + xp, + yp, + zp, + uxp, + uyp, + uzp, + 1, + attr, 0, attr_int, - 1, NoSplitParticleID); + 1, LongParticleIds::NoSplitParticleID); // Copy particles from tmp to current particle container constexpr bool local_flag = true; addParticles(pctmp_split,local_flag); diff --git a/Source/Particles/Pusher/GetAndSetPosition.H b/Source/Particles/Pusher/GetAndSetPosition.H index e4477a2a60d..44641557756 100644 --- a/Source/Particles/Pusher/GetAndSetPosition.H +++ b/Source/Particles/Pusher/GetAndSetPosition.H @@ -30,24 +30,26 @@ void get_particle_position (const WarpXParticleContainer::SuperParticleType& p, amrex::ParticleReal& y, amrex::ParticleReal& z) noexcept { -#ifdef WARPX_DIM_RZ - const amrex::ParticleReal theta = p.rdata(T_PIdx::theta); - const amrex::ParticleReal r = p.pos(0); + using namespace amrex::literals; + +#if defined(WARPX_DIM_RZ) + amrex::ParticleReal const theta = p.rdata(T_PIdx::theta); + amrex::ParticleReal const r = p.pos(T_PIdx::x); x = r*std::cos(theta); y = r*std::sin(theta); - z = p.pos(1); -#elif WARPX_DIM_3D - x = p.pos(0); - y = p.pos(1); - z = p.pos(2); -#elif WARPX_DIM_XZ - x = p.pos(0); - y = amrex::ParticleReal(0.0); - z = p.pos(1); + z = p.pos(PIdx::z); +#elif defined(WARPX_DIM_3D) + x = p.pos(PIdx::x); + y = p.pos(PIdx::y); + z = p.pos(PIdx::z); +#elif defined(WARPX_DIM_XZ) + x = p.pos(PIdx::x); + y = 0_prt; + z = p.pos(PIdx::z); #else - x = amrex::ParticleReal(0.0); - y = amrex::ParticleReal(0.0); - z = p.pos(0); + x = 0_prt; + y = 0_prt; + z = p.pos(PIdx::z); #endif } @@ -59,10 +61,19 @@ void get_particle_position (const WarpXParticleContainer::SuperParticleType& p, template struct GetParticlePosition { - using PType = WarpXParticleContainer::ParticleType; using RType = amrex::ParticleReal; - const PType* AMREX_RESTRICT m_structs = nullptr; +#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_XZ) + const RType* AMREX_RESTRICT m_x = nullptr; + const RType* AMREX_RESTRICT m_z = nullptr; +#elif defined(WARPX_DIM_3D) + const RType* AMREX_RESTRICT m_x = nullptr; + const RType* AMREX_RESTRICT m_y = nullptr; + const RType* AMREX_RESTRICT m_z = nullptr; +#elif defined(WARPX_DIM_1D_Z) + const RType* AMREX_RESTRICT m_z = nullptr; +#endif + #if defined(WARPX_DIM_RZ) const RType* m_theta = nullptr; #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) @@ -84,10 +95,19 @@ struct GetParticlePosition template GetParticlePosition (const ptiType& a_pti, long a_offset = 0) noexcept { - const auto& aos = a_pti.GetArrayOfStructs(); - m_structs = aos().dataPtr() + a_offset; -#if defined(WARPX_DIM_RZ) const auto& soa = a_pti.GetStructOfArrays(); + +#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_XZ) + m_x = soa.GetRealData(PIdx::x).dataPtr() + a_offset; + m_z = soa.GetRealData(PIdx::z).dataPtr() + a_offset; +#elif defined(WARPX_DIM_3D) + m_x = soa.GetRealData(PIdx::x).dataPtr() + a_offset; + m_y = soa.GetRealData(PIdx::y).dataPtr() + a_offset; + m_z = soa.GetRealData(PIdx::z).dataPtr() + a_offset; +#elif defined(WARPX_DIM_1D_Z) + m_z = soa.GetRealData(PIdx::z).dataPtr() + a_offset; +#endif +#if defined(WARPX_DIM_RZ) m_theta = soa.GetRealData(T_PIdx::theta).dataPtr() + a_offset; #endif } @@ -98,24 +118,23 @@ struct GetParticlePosition AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void operator() (const long i, RType& x, RType& y, RType& z) const noexcept { - const PType& p = m_structs[i]; #ifdef WARPX_DIM_RZ - const RType r = p.pos(0); + RType const r = m_x[i]; x = r*std::cos(m_theta[i]); y = r*std::sin(m_theta[i]); - z = p.pos(1); + z = m_z[i]; #elif WARPX_DIM_3D - x = p.pos(0); - y = p.pos(1); - z = p.pos(2); + x = m_x[i]; + y = m_y[i]; + z = m_z[i]; #elif WARPX_DIM_XZ - x = p.pos(0); + x = m_x[i]; y = m_y_default; - z = p.pos(1); + z = m_z[i]; #else x = m_x_default; y = m_y_default; - z = p.pos(0); + z = m_z[i]; #endif } @@ -127,23 +146,22 @@ struct GetParticlePosition AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AsStored (const long i, RType& x, RType& y, RType& z) const noexcept { - const PType& p = m_structs[i]; #ifdef WARPX_DIM_RZ - x = p.pos(0); + x = m_x[i]; y = m_theta[i]; - z = p.pos(1); + z = m_z[i]; #elif WARPX_DIM_3D - x = p.pos(0); - y = p.pos(1); - z = p.pos(2); + x = m_x[i]; + y = m_y[i]; + z = m_z[i]; #elif WARPX_DIM_XZ - x = p.pos(0); + x = m_x[i]; y = m_y_default; - z = p.pos(1); + z = m_z[i]; #else x = m_x_default; y = m_y_default; - z = p.pos(0); + z = m_z[i]; #endif } }; @@ -158,10 +176,18 @@ struct GetParticlePosition template struct SetParticlePosition { - using PType = WarpXParticleContainer::ParticleType; using RType = amrex::ParticleReal; - PType* AMREX_RESTRICT m_structs; +#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_XZ) + RType* AMREX_RESTRICT m_x; + RType* AMREX_RESTRICT m_z; +#elif defined(WARPX_DIM_3D) + RType* AMREX_RESTRICT m_x; + RType* AMREX_RESTRICT m_y; + RType* AMREX_RESTRICT m_z; +#elif defined(WARPX_DIM_1D_Z) + RType* AMREX_RESTRICT m_z; +#endif #if defined(WARPX_DIM_RZ) RType* AMREX_RESTRICT m_theta; #endif @@ -169,10 +195,18 @@ struct SetParticlePosition template SetParticlePosition (const ptiType& a_pti, long a_offset = 0) noexcept { - auto& aos = a_pti.GetArrayOfStructs(); - m_structs = aos().dataPtr() + a_offset; -#if defined(WARPX_DIM_RZ) auto& soa = a_pti.GetStructOfArrays(); +#if defined(WARPX_DIM_RZ) || defined(WARPX_DIM_XZ) + m_x = soa.GetRealData(PIdx::x).dataPtr() + a_offset; + m_z = soa.GetRealData(PIdx::z).dataPtr() + a_offset; +#elif defined(WARPX_DIM_3D) + m_x = soa.GetRealData(PIdx::x).dataPtr() + a_offset; + m_y = soa.GetRealData(PIdx::y).dataPtr() + a_offset; + m_z = soa.GetRealData(PIdx::z).dataPtr() + a_offset; +#elif defined(WARPX_DIM_1D_Z) + m_z = soa.GetRealData(PIdx::z).dataPtr() + a_offset; +#endif +#if defined(WARPX_DIM_RZ) m_theta = soa.GetRealData(T_PIdx::theta).dataPtr() + a_offset; #endif } @@ -190,17 +224,17 @@ struct SetParticlePosition #endif #ifdef WARPX_DIM_RZ m_theta[i] = std::atan2(y, x); - m_structs[i].pos(0) = std::sqrt(x*x + y*y); - m_structs[i].pos(1) = z; + m_x[i] = std::sqrt(x*x + y*y); + m_z[i] = z; #elif WARPX_DIM_3D - m_structs[i].pos(0) = x; - m_structs[i].pos(1) = y; - m_structs[i].pos(2) = z; + m_x[i] = x; + m_y[i] = y; + m_z[i] = z; #elif WARPX_DIM_XZ - m_structs[i].pos(0) = x; - m_structs[i].pos(1) = z; + m_x[i] = x; + m_z[i] = z; #else - m_structs[i].pos(0) = z; + m_z[i] = z; #endif } @@ -218,18 +252,18 @@ struct SetParticlePosition amrex::ignore_unused(x,y); #endif #ifdef WARPX_DIM_RZ - m_structs[i].pos(0) = x; + m_x[i] = x; m_theta[i] = y; - m_structs[i].pos(1) = z; + m_z[i] = z; #elif WARPX_DIM_3D - m_structs[i].pos(0) = x; - m_structs[i].pos(1) = y; - m_structs[i].pos(2) = z; + m_x[i] = x; + m_y[i] = y; + m_z[i] = z; #elif WARPX_DIM_XZ - m_structs[i].pos(0) = x; - m_structs[i].pos(1) = z; + m_x[i] = x; + m_z[i] = z; #else - m_structs[i].pos(0) = z; + m_z[i] = z; #endif } }; diff --git a/Source/Particles/Resampling/LevelingThinning.cpp b/Source/Particles/Resampling/LevelingThinning.cpp index 680e33ebe6a..5dc6a458f97 100644 --- a/Source/Particles/Resampling/LevelingThinning.cpp +++ b/Source/Particles/Resampling/LevelingThinning.cpp @@ -60,8 +60,7 @@ void LevelingThinning::operator() (WarpXParIter& pti, const int lev, auto& ptile = pc->ParticlesAt(lev, pti); auto& soa = ptile.GetStructOfArrays(); amrex::ParticleReal * const AMREX_RESTRICT w = soa.GetRealData(PIdx::w).data(); - WarpXParticleContainer::ParticleType * const AMREX_RESTRICT - particle_ptr = ptile.GetArrayOfStructs()().data(); + auto * const AMREX_RESTRICT idcpu = soa.GetIdCPUData().data(); // Using this function means that we must loop over the cells in the ParallelFor. In the case // of the leveling thinning algorithm, it would have possibly been more natural and more @@ -114,7 +113,7 @@ void LevelingThinning::operator() (WarpXParIter& pti, const int lev, // Remove particle with probability 1 - particle_weight/level_weight if (random_number > w[indices[i]]/level_weight) { - particle_ptr[indices[i]].id() = -1; + idcpu[indices[i]] = amrex::ParticleIdCpus::Invalid; } // Set particle weight to level weight otherwise else diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp index 58511cfd5e7..58e3450f47d 100644 --- a/Source/Particles/Sorting/Partition.cpp +++ b/Source/Particles/Sorting/Partition.cpp @@ -61,7 +61,7 @@ PhysicalParticleContainer::PartitionParticlesInBuffers( // Initialize temporary arrays Gpu::DeviceVector inexflag; inexflag.resize(np); - Gpu::DeviceVector pid; + Gpu::DeviceVector pid; pid.resize(np); // First, partition particles into the larger buffer @@ -109,7 +109,7 @@ PhysicalParticleContainer::PartitionParticlesInBuffers( // - For each particle in the large buffer, find whether it is in // the smaller buffer, by looking up the mask. Store the answer in `inexflag`. amrex::ParallelFor( np - n_fine, - fillBufferFlagRemainingParticles(pti, bmasks, inexflag, Geom(lev), pid, n_fine) ); + fillBufferFlagRemainingParticles(pti, bmasks, inexflag, Geom(lev), pid, int(n_fine)) ); auto *const sep2 = stablePartition( sep, pid.end(), inexflag ); if (bmasks == gather_masks) { diff --git a/Source/Particles/Sorting/SortingUtils.H b/Source/Particles/Sorting/SortingUtils.H index ac2c63e88f8..ba7761bf48a 100644 --- a/Source/Particles/Sorting/SortingUtils.H +++ b/Source/Particles/Sorting/SortingUtils.H @@ -12,6 +12,7 @@ #include #include +#include /** \brief Fill the elements of the input vector with consecutive integer, @@ -19,7 +20,7 @@ * * \param[inout] v Vector of integers, to be filled by this routine */ -void fillWithConsecutiveIntegers( amrex::Gpu::DeviceVector& v ); +void fillWithConsecutiveIntegers( amrex::Gpu::DeviceVector& v ); /** \brief Find the indices that would reorder the elements of `predicate` * so that the elements with non-zero value precede the other elements @@ -41,7 +42,7 @@ ForwardIterator stablePartition(ForwardIterator const index_begin, int const* AMREX_RESTRICT predicate_ptr = predicate.dataPtr(); int N = static_cast(std::distance(index_begin, index_end)); auto num_true = amrex::StablePartition(&(*index_begin), N, - [predicate_ptr] AMREX_GPU_DEVICE (long i) { return predicate_ptr[i]; }); + [predicate_ptr] AMREX_GPU_DEVICE (int i) { return predicate_ptr[i]; }); ForwardIterator sep = index_begin; std::advance(sep, num_true); @@ -49,7 +50,7 @@ ForwardIterator stablePartition(ForwardIterator const index_begin, // On CPU: Use std library ForwardIterator const sep = std::stable_partition( index_begin, index_end, - [&predicate](long i) { return predicate[i]; } + [&predicate](int i) { return predicate[i]; } ); #endif return sep; @@ -88,7 +89,7 @@ class fillBufferFlag // Extract simple structure that can be used directly on the GPU m_domain{geom.Domain()}, m_inexflag_ptr{inexflag.dataPtr()}, - m_particles{pti.GetArrayOfStructs().data()}, + m_ptd{pti.GetParticleTile().getConstParticleTileData()}, m_buffer_mask{(*bmasks)[pti].array()} { for (int idim=0; idim m_buffer_mask; amrex::GpuArray m_prob_lo; amrex::GpuArray m_inv_cell_size; @@ -141,12 +140,12 @@ class fillBufferFlagRemainingParticles amrex::iMultiFab const* bmasks, amrex::Gpu::DeviceVector& inexflag, amrex::Geometry const& geom, - amrex::Gpu::DeviceVector const& particle_indices, - long const start_index ) : + amrex::Gpu::DeviceVector const& particle_indices, + int start_index ) : m_domain{geom.Domain()}, // Extract simple structure that can be used directly on the GPU m_inexflag_ptr{inexflag.dataPtr()}, - m_particles{pti.GetArrayOfStructs().data()}, + m_ptd{pti.GetParticleTile().getConstParticleTileData()}, m_buffer_mask{(*bmasks)[pti].array()}, m_start_index{start_index}, m_indices_ptr{particle_indices.dataPtr()} @@ -159,11 +158,11 @@ class fillBufferFlagRemainingParticles AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - void operator()( const long i ) const { + void operator()( const int i ) const { // Select a particle - auto const& p = m_particles[m_indices_ptr[i+m_start_index]]; + auto const j = m_indices_ptr[i+m_start_index]; // Find the index of the cell where this particle is located - amrex::IntVect const iv = amrex::getParticleCell( p, + amrex::IntVect const iv = amrex::getParticleCell( m_ptd, j, m_prob_lo, m_inv_cell_size, m_domain ); // Find the value of the buffer flag in this cell and // store it at the corresponding particle position in the array `inexflag` @@ -175,10 +174,10 @@ class fillBufferFlagRemainingParticles amrex::GpuArray m_inv_cell_size; amrex::Box m_domain; int* m_inexflag_ptr; - WarpXParticleContainer::ParticleType const* m_particles; + WarpXParticleContainer::ParticleTileType::ConstParticleTileDataType const m_ptd; amrex::Array4 m_buffer_mask; - long const m_start_index; - long const* m_indices_ptr; + int const m_start_index; + int const* m_indices_ptr; }; /** \brief Functor that copies the elements of `src` into `dst`, @@ -195,7 +194,7 @@ class copyAndReorder copyAndReorder( amrex::Gpu::DeviceVector const& src, amrex::Gpu::DeviceVector& dst, - amrex::Gpu::DeviceVector const& indices ): + amrex::Gpu::DeviceVector const& indices ): // Extract simple structure that can be used directly on the GPU m_src_ptr{src.dataPtr()}, m_dst_ptr{dst.dataPtr()}, @@ -203,14 +202,14 @@ class copyAndReorder {} AMREX_GPU_DEVICE AMREX_FORCE_INLINE - void operator()( const long ip ) const { + void operator()( const int ip ) const { m_dst_ptr[ip] = m_src_ptr[ m_indices_ptr[ip] ]; } private: T const* m_src_ptr; T* m_dst_ptr; - long const* m_indices_ptr; + int const* m_indices_ptr; }; #endif // WARPX_PARTICLES_SORTING_SORTINGUTILS_H_ diff --git a/Source/Particles/Sorting/SortingUtils.cpp b/Source/Particles/Sorting/SortingUtils.cpp index 699119e8e18..cd4b6a13c76 100644 --- a/Source/Particles/Sorting/SortingUtils.cpp +++ b/Source/Particles/Sorting/SortingUtils.cpp @@ -8,7 +8,7 @@ #include "SortingUtils.H" -void fillWithConsecutiveIntegers( amrex::Gpu::DeviceVector& v ) +void fillWithConsecutiveIntegers( amrex::Gpu::DeviceVector& v ) { #ifdef AMREX_USE_GPU // On GPU: Use amrex diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index 33aa71d1c7d..7d2d5619da9 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -49,10 +49,10 @@ class WarpXParIter - : public amrex::ParIter<0,0,PIdx::nattribs> + : public amrex::ParIterSoA { public: - using amrex::ParIter<0,0,PIdx::nattribs>::ParIter; + using amrex::ParIterSoA::ParIterSoA; WarpXParIter (ContainerType& pc, int level); @@ -89,13 +89,14 @@ public: * particle container classes (that store a collection of particles) derive. Derived * classes can be used for plasma particles, photon particles, or non-physical * particles (e.g., for the laser antenna). - * It derives from amrex::ParticleContainer<0,0,PIdx::nattribs>, where the - * template arguments stand for the number of int and amrex::Real SoA and AoS - * data in amrex::Particle. - * - AoS amrex::Real: x, y, z (default), 0 additional (first template - * parameter) - * - AoS int: id, cpu (default), 0 additional (second template parameter) - * - SoA amrex::Real: PIdx::nattribs (third template parameter), see PIdx for + * It derives from amrex::ParticleContainerPureSoA, where the + * template arguments stand for the number of int and amrex::Real SoA + * data in amrex::SoAParticle. + * - SoA amrex::Real: positions x, y, z, momentum ux, uy, uz, ... see PIdx for details; + * more can be added at runtime + * - SoA int: 0 attributes by default, but can be added at runtime + * - SoA uint64_t: idcpu, a global 64bit index, with a 40bit local id and a 24bit cpu id + * (both set at creation) * the list. * * WarpXParticleContainer contains the main functions for initialization, @@ -164,11 +165,9 @@ public: * class. */ virtual void DefaultInitializeRuntimeAttributes ( - amrex::ParticleTile, - NArrayReal, NArrayInt, - amrex::PinnedArenaAllocator>& pinned_tile, - int n_external_attr_real, - int n_external_attr_int) = 0; + typename ContainerLike::ParticleTileType& pinned_tile, + int n_external_attr_real, + int n_external_attr_int) = 0; /// /// This pushes the particle positions by one half time step. diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index a395198e361..0d565c039e6 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -75,13 +75,13 @@ using namespace amrex; WarpXParIter::WarpXParIter (ContainerType& pc, int level) - : amrex::ParIter<0,0,PIdx::nattribs>(pc, level, + : amrex::ParIterSoA(pc, level, MFItInfo().SetDynamic(WarpX::do_dynamic_scheduling)) { } WarpXParIter::WarpXParIter (ContainerType& pc, int level, MFItInfo& info) - : amrex::ParIter<0,0,PIdx::nattribs>(pc, level, + : amrex::ParIterSoA(pc, level, info.SetDynamic(WarpX::do_dynamic_scheduling)) { } @@ -198,52 +198,53 @@ WarpXParticleContainer::AddNParticles (int /*lev*/, long n, // Redistribute() will move them to proper places. auto& particle_tile = DefineAndReturnParticleTile(0, 0, 0); - using PinnedTile = amrex::ParticleTile, - NArrayReal, NArrayInt, - amrex::PinnedArenaAllocator>; + using PinnedTile = typename ContainerLike::ParticleTileType; PinnedTile pinned_tile; pinned_tile.define(NumRuntimeRealComps(), NumRuntimeIntComps()); const std::size_t np = iend-ibegin; #ifdef WARPX_DIM_RZ + amrex::Vector r(np); amrex::Vector theta(np); #endif for (auto i = ibegin; i < iend; ++i) { - ParticleType p; - if (id==-1) - { - p.id() = ParticleType::NextID(); - } else { - p.id() = id; + auto & idcpu_data = pinned_tile.GetStructOfArrays().GetIdCPUData(); + + amrex::Long current_id = id; // copy input + if (id == -1) { + current_id = ParticleType::NextID(); } - p.cpu() = amrex::ParallelDescriptor::MyProc(); + idcpu_data.push_back(amrex::SetParticleIDandCPU(current_id, ParallelDescriptor::MyProc())); + +#ifdef WARPX_DIM_RZ + r[i-ibegin] = std::sqrt(x[i]*x[i] + y[i]*y[i]); + theta[i-ibegin] = std::atan2(y[i], x[i]); +#endif + } + + if (np > 0) + { #if defined(WARPX_DIM_3D) - p.pos(0) = x[i]; - p.pos(1) = y[i]; - p.pos(2) = z[i]; + pinned_tile.push_back_real(PIdx::x, x.data() + ibegin, x.data() + iend); + pinned_tile.push_back_real(PIdx::y, y.data() + ibegin, y.data() + iend); + pinned_tile.push_back_real(PIdx::z, z.data() + ibegin, z.data() + iend); #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) amrex::ignore_unused(y); #ifdef WARPX_DIM_RZ - theta[i-ibegin] = std::atan2(y[i], x[i]); - p.pos(0) = std::sqrt(x[i]*x[i] + y[i]*y[i]); + pinned_tile.push_back_real(PIdx::x, r.data(), r.data() + np); #else - p.pos(0) = x[i]; + pinned_tile.push_back_real(PIdx::x, x.data() + ibegin, x.data() + iend); #endif - p.pos(1) = z[i]; + pinned_tile.push_back_real(PIdx::z, z.data() + ibegin, z.data() + iend); #else //AMREX_SPACEDIM == 1 amrex::ignore_unused(x,y); - p.pos(0) = z[i]; + pinned_tile.push_back_real(PIdx::z, z.data() + ibegin, z.data() + iend); #endif - pinned_tile.push_back(p); - } - - if (np > 0) - { - pinned_tile.push_back_real(PIdx::w , attr_real[0].data() + ibegin, attr_real[0].data() + iend); + pinned_tile.push_back_real(PIdx::w, attr_real[0].data() + ibegin, attr_real[0].data() + iend); pinned_tile.push_back_real(PIdx::ux, ux.data() + ibegin, ux.data() + iend); pinned_tile.push_back_real(PIdx::uy, uy.data() + ibegin, uy.data() + iend); pinned_tile.push_back_real(PIdx::uz, uz.data() + ibegin, uz.data() + iend); @@ -476,15 +477,14 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti, //sort particles by bin WARPX_PROFILE_VAR_START(blp_sort); - amrex::DenseBins bins; + amrex::DenseBins bins; { auto& ptile = ParticlesAt(lev, pti); - auto& aos = ptile.GetArrayOfStructs(); - auto *pstruct_ptr = aos().dataPtr(); + auto ptd = ptile.getParticleTileData(); const int ntiles = numTilesInBox(box, true, bin_size); - bins.build(ptile.numParticles(), pstruct_ptr, ntiles, + bins.build(ptile.numParticles(), ptd, ntiles, [=] AMREX_GPU_HOST_DEVICE (const ParticleType& p) -> unsigned int { Box tbox; @@ -947,7 +947,7 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector const& wp, // HACK - sort particles by bin here. WARPX_PROFILE_VAR_START(blp_sort); - amrex::DenseBins bins; + amrex::DenseBins bins; { const Geometry& geom = Geom(lev); const auto dxi = geom.InvCellSizeArray(); @@ -955,16 +955,15 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector const& wp, const auto domain = geom.Domain(); auto& ptile = ParticlesAt(lev, pti); - auto& aos = ptile.GetArrayOfStructs(); - auto *pstruct_ptr = aos().dataPtr(); + auto ptd = ptile.getParticleTileData(); Box box = pti.validbox(); box.grow(ng_rho); const amrex::IntVect bin_size = WarpX::shared_tilesize; const int ntiles = numTilesInBox(box, true, bin_size); - bins.build(ptile.numParticles(), pstruct_ptr, ntiles, - [=] AMREX_GPU_HOST_DEVICE (const ParticleType& p) -> unsigned int + bins.build(ptile.numParticles(), ptd, ntiles, + [=] AMREX_GPU_HOST_DEVICE (ParticleType const & p) -> unsigned int { Box tbx; auto iv = getParticleCell(p, plo, dxi, domain); @@ -984,8 +983,7 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector const& wp, const auto domain = geom.Domain(); auto& ptile = ParticlesAt(lev, pti); - auto& aos = ptile.GetArrayOfStructs(); - auto *pstruct_ptr = aos().dataPtr(); + auto ptd = ptile.getParticleTileData(); Box box = pti.validbox(); box.grow(ng_rho); @@ -999,9 +997,10 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector const& wp, const auto bin_start = offsets_ptr[ibin]; const auto bin_stop = offsets_ptr[ibin+1]; if (bin_start < bin_stop) { - auto p = pstruct_ptr[permutation[bin_start]]; + // static_cast until https://github.com/AMReX-Codes/amrex/pull/3684 + auto const i = static_cast(permutation[bin_start]); Box tbx; - auto iv = getParticleCell(p, plo, dxi, domain); + auto iv = getParticleCell(ptd, i, plo, dxi, domain); AMREX_ASSERT(box.contains(iv)); [[maybe_unused]] auto tid = getTileIndex(iv, box, true, bin_size, tbx); AMREX_ASSERT(tid == ibin); @@ -1490,10 +1489,10 @@ WarpXParticleContainer::particlePostLocate(ParticleType& p, // Tag particle if goes to higher level. // It will be split later in the loop if (pld.m_lev == lev+1 - and p.id() != NoSplitParticleID + and p.id() != amrex::LongParticleIds::NoSplitParticleID and p.id() >= 0) { - p.id() = DoSplitParticleID; + p.id() = amrex::LongParticleIds::DoSplitParticleID; } if (pld.m_lev == lev-1){ @@ -1532,9 +1531,9 @@ WarpXParticleContainer::ApplyBoundaryConditions (){ const Real zmax = Geom(lev).ProbHi(WARPX_ZINDEX); ParticleTileType& ptile = ParticlesAt(lev, pti); - ParticleType * const pp = ptile.GetArrayOfStructs()().data(); auto& soa = ptile.GetStructOfArrays(); + uint64_t * const AMREX_RESTRICT idcpu = soa.GetIdCPUData().data(); amrex::ParticleReal * const AMREX_RESTRICT ux = soa.GetRealData(PIdx::ux).data(); amrex::ParticleReal * const AMREX_RESTRICT uy = soa.GetRealData(PIdx::uy).data(); amrex::ParticleReal * const AMREX_RESTRICT uz = soa.GetRealData(PIdx::uz).data(); @@ -1543,10 +1542,9 @@ WarpXParticleContainer::ApplyBoundaryConditions (){ amrex::ParallelForRNG( pti.numParticles(), [=] AMREX_GPU_DEVICE (long i, amrex::RandomEngine const& engine) { - ParticleType& p = pp[i]; - // skip particles that are already flagged for removal - if (p.id() < 0) { return; } + auto pidw = amrex::ParticleIDWrapper{idcpu[i]}; + if (!pidw.is_valid()) { return; } ParticleReal x, y, z; GetPosition.AsStored(i, x, y, z); @@ -1568,7 +1566,7 @@ WarpXParticleContainer::ApplyBoundaryConditions (){ boundary_conditions, engine); if (particle_lost) { - p.id() = -p.id(); + pidw.make_invalid(); } else { SetPosition.AsStored(i, x, y, z); } diff --git a/Source/Python/Particles/ParticleBoundaryBuffer.cpp b/Source/Python/Particles/ParticleBoundaryBuffer.cpp index 2a35faece9b..b04ac75e600 100644 --- a/Source/Python/Particles/ParticleBoundaryBuffer.cpp +++ b/Source/Python/Particles/ParticleBoundaryBuffer.cpp @@ -10,13 +10,13 @@ namespace warpx { class BoundaryBufferParIter - : public amrex::ParIter<0,0,PIdx::nattribs,0,amrex::PinnedArenaAllocator> + : public amrex::ParIterSoA { public: - using amrex::ParIter<0,0,PIdx::nattribs,0,amrex::PinnedArenaAllocator>::ParIter; + using amrex::ParIterSoA::ParIterSoA; BoundaryBufferParIter(ContainerType& pc, int level) : - amrex::ParIter<0,0,PIdx::nattribs,0,amrex::PinnedArenaAllocator>(pc, level) {} + amrex::ParIterSoA(pc, level) {} }; } @@ -24,9 +24,9 @@ void init_BoundaryBufferParIter (py::module& m) { py::class_< warpx::BoundaryBufferParIter, - amrex::ParIter<0,0,PIdx::nattribs,0,amrex::PinnedArenaAllocator> + amrex::ParIterSoA >(m, "BoundaryBufferParIter") - .def(py::init::ContainerType&, int>(), + .def(py::init::ContainerType&, int>(), py::arg("particle_container"), py::arg("level") ) ; diff --git a/Source/Python/Particles/PinnedMemoryParticleContainer.cpp b/Source/Python/Particles/PinnedMemoryParticleContainer.cpp index 600d56a62c9..d4f6a422dbe 100644 --- a/Source/Python/Particles/PinnedMemoryParticleContainer.cpp +++ b/Source/Python/Particles/PinnedMemoryParticleContainer.cpp @@ -13,6 +13,6 @@ void init_PinnedMemoryParticleContainer (py::module& m) { py::class_< PinnedMemoryParticleContainer, - amrex::ParticleContainer<0,0,PIdx::nattribs,0,amrex::PinnedArenaAllocator> + amrex::ParticleContainerPureSoA > pmpc (m, "PinnedMemoryParticleContainer"); } diff --git a/Source/Python/Particles/WarpXParticleContainer.cpp b/Source/Python/Particles/WarpXParticleContainer.cpp index 1473a750941..07793a373f3 100644 --- a/Source/Python/Particles/WarpXParticleContainer.cpp +++ b/Source/Python/Particles/WarpXParticleContainer.cpp @@ -12,11 +12,11 @@ void init_WarpXParIter (py::module& m) { py::class_< - WarpXParIter, amrex::ParIter<0,0,PIdx::nattribs> + WarpXParIter, amrex::ParIterSoA >(m, "WarpXParIter") - .def(py::init::ContainerType&, int>(), + .def(py::init::ContainerType&, int>(), py::arg("particle_container"), py::arg("level")) - .def(py::init::ContainerType&, int, amrex::MFItInfo&>(), + .def(py::init::ContainerType&, int, amrex::MFItInfo&>(), py::arg("particle_container"), py::arg("level"), py::arg("info")) ; @@ -26,11 +26,11 @@ void init_WarpXParticleContainer (py::module& m) { py::class_< WarpXParticleContainer, - amrex::ParticleContainer<0, 0, PIdx::nattribs, 0> + amrex::ParticleContainerPureSoA > wpc (m, "WarpXParticleContainer"); wpc .def("add_real_comp", - [](WarpXParticleContainer& pc, const std::string& name, bool const comm) { pc.AddRealComp(name, comm); }, + [](WarpXParticleContainer& pc, const std::string& name, bool comm) { pc.AddRealComp(name, comm); }, py::arg("name"), py::arg("comm") ) .def("add_n_particles", @@ -93,6 +93,14 @@ void init_WarpXParticleContainer (py::module& m) }, py::arg("comp_name") ) + .def("get_icomp_index", + [](WarpXParticleContainer& pc, std::string comp_name) + { + auto particle_comps = pc.getParticleiComps(); + return particle_comps.at(comp_name); + }, + py::arg("comp_name") + ) .def("num_local_tiles_at_level", &WarpXParticleContainer::numLocalTilesAtLevel, py::arg("level") diff --git a/Source/Utils/ParticleUtils.H b/Source/Utils/ParticleUtils.H index b04176d4d83..7e3c89228ea 100644 --- a/Source/Utils/ParticleUtils.H +++ b/Source/Utils/ParticleUtils.H @@ -28,9 +28,10 @@ namespace ParticleUtils { * @param[in] mfi the MultiFAB iterator. * @param[in] ptile the particle tile. */ - amrex::DenseBins - findParticlesInEachCell(int lev, amrex::MFIter const& mfi, - WarpXParticleContainer::ParticleTileType const& ptile); + amrex::DenseBins + findParticlesInEachCell (int lev, + amrex::MFIter const & mfi, + WarpXParticleContainer::ParticleTileType & ptile); /** * \brief Return (relativistic) particle energy given velocity and mass. diff --git a/Source/Utils/ParticleUtils.cpp b/Source/Utils/ParticleUtils.cpp index 60e04f12b86..b8207b61fa0 100644 --- a/Source/Utils/ParticleUtils.cpp +++ b/Source/Utils/ParticleUtils.cpp @@ -22,24 +22,28 @@ #include #include -namespace ParticleUtils { +namespace ParticleUtils +{ using namespace amrex; + // Define shortcuts for frequently-used type names - using ParticleType = WarpXParticleContainer::ParticleType; - using ParticleTileType = WarpXParticleContainer::ParticleTileType; - using ParticleBins = DenseBins; - using index_type = ParticleBins::index_type; + using ParticleType = typename WarpXParticleContainer::ParticleType; + using ParticleTileType = typename WarpXParticleContainer::ParticleTileType; + using ParticleTileDataType = typename ParticleTileType::ParticleTileDataType; + using ParticleBins = DenseBins; + using index_type = typename ParticleBins::index_type; /* Find the particles and count the particles that are in each cell. Note that this does *not* rearrange particle arrays */ ParticleBins - findParticlesInEachCell( int const lev, MFIter const& mfi, - ParticleTileType const& ptile) { + findParticlesInEachCell (int lev, + MFIter const & mfi, + ParticleTileType & ptile) { // Extract particle structures for this tile int const np = ptile.numParticles(); - ParticleType const* particle_ptr = ptile.GetArrayOfStructs()().data(); + auto ptd = ptile.getParticleTileData(); // Extract box properties Geometry const& geom = WarpX::GetInstance().Geom(lev); @@ -51,9 +55,9 @@ namespace ParticleUtils { // Find particles that are in each cell; // results are stored in the object `bins`. ParticleBins bins; - bins.build(np, particle_ptr, cbx, + bins.build(np, ptd, cbx, // Pass lambda function that returns the cell index - [=] AMREX_GPU_DEVICE (const ParticleType& p) noexcept + [=] AMREX_GPU_DEVICE (ParticleType const & p) noexcept -> amrex::IntVect { return IntVect{AMREX_D_DECL( static_cast((p.pos(0)-plo[0])*dxi[0] - lo.x), @@ -64,4 +68,4 @@ namespace ParticleUtils { return bins; } -} +} // namespace ParticleUtils diff --git a/Source/ablastr/particles/IndexHandling.H b/Source/ablastr/particles/IndexHandling.H deleted file mode 100644 index 0ad5ca60446..00000000000 --- a/Source/ablastr/particles/IndexHandling.H +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright 2019-2022 Axel Huebl - * - * This file is part of WarpX. - * - * License: BSD-3-Clause-LBNL - */ -#ifndef ABLASTR_INDEX_HANDLING_H -#define ABLASTR_INDEX_HANDLING_H - -#include - - -namespace ablastr::particles { - - /** A helper function to derive a globally unique particle ID - * - * @param[in] id AMReX particle ID (on local cpu/rank), AoS .id - * @param[in] cpu AMReX particle CPU (rank) at creation of the particle, AoS .cpu - * @return global particle ID that is unique and permanent in the whole simulation - */ - constexpr uint64_t - localIDtoGlobal (int const id, int const cpu) - { - static_assert(sizeof(int) * 2u <= sizeof(uint64_t), - "int size might cause collisions in global IDs"); - // implementation: - // - we cast both 32-bit (or smaller) ints to a 64bit unsigned int - // - this will leave half of the "upper range" bits in the 64bit unsigned int zeroed out - // because the corresponding (extended) value range was not part of the value range in - // the int representation - // - we bit-shift the cpu into the upper half of zero bits in the 64 bit unsigned int - // (imagine this step as "adding a per-cpu/rank offset to the local integers") - // - then we add this offset - // note: the add is expressed as bitwise OR (|) since this saves us from writing - // brackets for operator precedence between + and << - return uint64_t(id) | uint64_t(cpu) << 32u; - } - -} // namespace ablastr::particles - -#endif // ABLASTR_INDEX_HANDLING_H diff --git a/Source/ablastr/particles/ParticleMoments.H b/Source/ablastr/particles/ParticleMoments.H index e45fb574cce..b648ccb28aa 100644 --- a/Source/ablastr/particles/ParticleMoments.H +++ b/Source/ablastr/particles/ParticleMoments.H @@ -35,7 +35,7 @@ namespace particles { amrex::ParticleReal, amrex::ParticleReal> MinAndMaxPositions (T_PC const & pc) { - using PType = typename T_PC::SuperParticleType; + using ConstParticleTileDataType = typename T_PC::ParticleTileType::ConstParticleTileDataType; // Get min and max for the local rank amrex::ReduceOps< @@ -46,11 +46,11 @@ namespace particles { amrex::ParticleReal, amrex::ParticleReal, amrex::ParticleReal> >( pc, - [=] AMREX_GPU_DEVICE(PType const & p) noexcept + [=] AMREX_GPU_DEVICE(const ConstParticleTileDataType& ptd, const int i) noexcept { - amrex::ParticleReal const x = p.pos(0); - amrex::ParticleReal const y = p.pos(1); - amrex::ParticleReal const z = p.pos(2); + const amrex::ParticleReal x = ptd.rdata(0)[i]; + const amrex::ParticleReal y = ptd.rdata(1)[i]; + const amrex::ParticleReal z = ptd.rdata(2)[i]; return amrex::makeTuple(x, y, z, x, y, z); }, @@ -90,7 +90,8 @@ namespace particles { amrex::ParticleReal, amrex::ParticleReal> MeanAndStdPositions (T_PC const & pc) { - using PType = typename T_PC::SuperParticleType; + + using ConstParticleTileDataType = typename T_PC::ParticleTileType::ConstParticleTileDataType; amrex::ReduceOps< amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, @@ -103,12 +104,14 @@ namespace particles { amrex::ParticleReal> >( pc, - [=] AMREX_GPU_DEVICE(const PType& p) noexcept + [=] AMREX_GPU_DEVICE(const ConstParticleTileDataType& ptd, const int i) noexcept { - amrex::ParticleReal const x = p.pos(0); - amrex::ParticleReal const y = p.pos(1); - amrex::ParticleReal const z = p.pos(2); - amrex::ParticleReal const w = p.rdata(T_RealSoAWeight); + + const amrex::ParticleReal x = ptd.rdata(0)[i]; + const amrex::ParticleReal y = ptd.rdata(1)[i]; + const amrex::ParticleReal z = ptd.rdata(2)[i]; + + const amrex::ParticleReal w = ptd.rdata(T_RealSoAWeight)[i]; return amrex::makeTuple(x, x*x, y, y*y, z, z*z, w); }, diff --git a/cmake/dependencies/AMReX.cmake b/cmake/dependencies/AMReX.cmake index 0f6a15a5ff4..81f5a533a76 100644 --- a/cmake/dependencies/AMReX.cmake +++ b/cmake/dependencies/AMReX.cmake @@ -269,7 +269,7 @@ set(WarpX_amrex_src "" set(WarpX_amrex_repo "https://github.com/AMReX-Codes/amrex.git" CACHE STRING "Repository URI to pull and build AMReX from if(WarpX_amrex_internal)") -set(WarpX_amrex_branch "24.02" +set(WarpX_amrex_branch "296ed40e16ae1877640f5b78e9162dbd4ba1c279" CACHE STRING "Repository branch for WarpX_amrex_repo if(WarpX_amrex_internal)") diff --git a/cmake/dependencies/pyAMReX.cmake b/cmake/dependencies/pyAMReX.cmake index b4cf9f3f9c1..8a9e35c6579 100644 --- a/cmake/dependencies/pyAMReX.cmake +++ b/cmake/dependencies/pyAMReX.cmake @@ -79,7 +79,7 @@ option(WarpX_pyamrex_internal "Download & build pyAMReX" ON) set(WarpX_pyamrex_repo "https://github.com/AMReX-Codes/pyamrex.git" CACHE STRING "Repository URI to pull and build pyamrex from if(WarpX_pyamrex_internal)") -set(WarpX_pyamrex_branch "24.02" +set(WarpX_pyamrex_branch "defb663d74ef9f50183b31c5dc9731cf6adb447c" CACHE STRING "Repository branch for WarpX_pyamrex_repo if(WarpX_pyamrex_internal)") diff --git a/run_test.sh b/run_test.sh index e1b45ab7c28..6d8a1ddb014 100755 --- a/run_test.sh +++ b/run_test.sh @@ -68,7 +68,7 @@ python3 -m pip install --upgrade -r warpx/Regression/requirements.txt # Clone AMReX and warpx-data git clone https://github.com/AMReX-Codes/amrex.git -cd amrex && git checkout --detach 24.02 && cd - +cd amrex && git checkout --detach 296ed40e16ae1877640f5b78e9162dbd4ba1c279 && cd - # warpx-data contains various required data sets git clone --depth 1 https://github.com/ECP-WarpX/warpx-data.git # openPMD-example-datasets contains various required data sets From a9d8126b500e1c7197eb0ed1e52fd50bb09cbdf4 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Mon, 5 Feb 2024 04:15:34 -0800 Subject: [PATCH 10/13] Fix: Pre-Installed AMReX w/ CUDA (#4668) Fix CMake language activation with pre-installed AMReX using the CUDA backend. --- cmake/dependencies/AMReX.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmake/dependencies/AMReX.cmake b/cmake/dependencies/AMReX.cmake index 81f5a533a76..6c7c9466dfd 100644 --- a/cmake/dependencies/AMReX.cmake +++ b/cmake/dependencies/AMReX.cmake @@ -257,6 +257,10 @@ macro(find_amrex) list(APPEND CMAKE_MODULE_PATH "${AMReX_DIR}/AMReXCMakeModules") message(STATUS "AMReX: Found version '${AMReX_VERSION}'") + + if(WarpX_COMPUTE STREQUAL CUDA) + enable_language(CUDA) + endif() endif() endmacro() From 7e368134be037599d8ed1983a04f400765dd719b Mon Sep 17 00:00:00 2001 From: "S. Eric Clark" <25495882+clarkse@users.noreply.github.com> Date: Mon, 5 Feb 2024 16:26:55 -0800 Subject: [PATCH 11/13] Add hybrid resistivity current term (#4661) * Adding total current magnitude dependence in hybrid resistivity. * Removed dead line of code. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Adding logic to only do current interpolation when the resistivity has J dependence. * Fixing staggering bug and changing how squares are computed. * Changing to using std::sqrt and adding _rt to value initialization for jtot_val. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- Docs/source/usage/parameters.rst | 2 +- Python/pywarpx/picmi.py | 2 +- .../HybridPICModel/HybridPICModel.H | 3 +- .../HybridPICModel/HybridPICModel.cpp | 8 ++- .../HybridPICSolveE.cpp | 68 +++++++++++++++++-- 5 files changed, 71 insertions(+), 12 deletions(-) diff --git a/Docs/source/usage/parameters.rst b/Docs/source/usage/parameters.rst index 493e8307037..d26c22e6dea 100644 --- a/Docs/source/usage/parameters.rst +++ b/Docs/source/usage/parameters.rst @@ -2247,7 +2247,7 @@ Maxwell solver: kinetic-fluid hybrid If ``algo.maxwell_solver`` is set to ``hybrid``, this sets the exponent used to calculate the electron pressure (see :ref:`here `). -* ``hybrid_pic_model.plasma_resistivity(rho)`` (`float` or `str`) optional (default ``0``) +* ``hybrid_pic_model.plasma_resistivity(rho,J)`` (`float` or `str`) optional (default ``0``) If ``algo.maxwell_solver`` is set to ``hybrid``, this sets the plasma resistivity in :math:`\Omega m`. * ``hybrid_pic_model.J[x/y/z]_external_grid_function(x, y, z, t)`` (`float` or `str`) optional (default ``0``) diff --git a/Python/pywarpx/picmi.py b/Python/pywarpx/picmi.py index 89bd5af2eab..f11ecb379f2 100644 --- a/Python/pywarpx/picmi.py +++ b/Python/pywarpx/picmi.py @@ -1184,7 +1184,7 @@ def solver_initialize_inputs(self): pywarpx.hybridpicmodel.gamma = self.gamma pywarpx.hybridpicmodel.n_floor = self.n_floor pywarpx.hybridpicmodel.__setattr__( - 'plasma_resistivity(rho)', + 'plasma_resistivity(rho,J)', pywarpx.my_constants.mangle_expression(self.plasma_resistivity, self.mangle_dict) ) pywarpx.hybridpicmodel.substeps = self.substeps diff --git a/Source/FieldSolver/FiniteDifferenceSolver/HybridPICModel/HybridPICModel.H b/Source/FieldSolver/FiniteDifferenceSolver/HybridPICModel/HybridPICModel.H index d1931a71765..23ef49b58cb 100644 --- a/Source/FieldSolver/FiniteDifferenceSolver/HybridPICModel/HybridPICModel.H +++ b/Source/FieldSolver/FiniteDifferenceSolver/HybridPICModel/HybridPICModel.H @@ -172,7 +172,8 @@ public: /** Plasma resistivity */ std::string m_eta_expression = "0.0"; std::unique_ptr m_resistivity_parser; - amrex::ParserExecutor<1> m_eta; + amrex::ParserExecutor<2> m_eta; + bool m_resistivity_has_J_dependence = false; /** External current */ std::string m_Jx_ext_grid_function = "0.0"; diff --git a/Source/FieldSolver/FiniteDifferenceSolver/HybridPICModel/HybridPICModel.cpp b/Source/FieldSolver/FiniteDifferenceSolver/HybridPICModel/HybridPICModel.cpp index fb7e90f21a1..034bb71efbc 100644 --- a/Source/FieldSolver/FiniteDifferenceSolver/HybridPICModel/HybridPICModel.cpp +++ b/Source/FieldSolver/FiniteDifferenceSolver/HybridPICModel/HybridPICModel.cpp @@ -37,7 +37,7 @@ void HybridPICModel::ReadParameters () Abort("hybrid_pic_model.n0_ref should be specified if hybrid_pic_model.gamma != 1"); } - pp_hybrid.query("plasma_resistivity(rho)", m_eta_expression); + pp_hybrid.query("plasma_resistivity(rho,J)", m_eta_expression); utils::parser::queryWithParser(pp_hybrid, "n_floor", m_n_floor); // convert electron temperature from eV to J @@ -123,8 +123,10 @@ void HybridPICModel::ClearLevel (int lev) void HybridPICModel::InitData () { m_resistivity_parser = std::make_unique( - utils::parser::makeParser(m_eta_expression, {"rho"})); - m_eta = m_resistivity_parser->compile<1>(); + utils::parser::makeParser(m_eta_expression, {"rho","J"})); + m_eta = m_resistivity_parser->compile<2>(); + const std::set resistivity_symbols = m_resistivity_parser->symbols(); + m_resistivity_has_J_dependence += resistivity_symbols.count("J"); m_J_external_parser[0] = std::make_unique( utils::parser::makeParser(m_Jx_ext_grid_function,{"x","y","z","t"})); diff --git a/Source/FieldSolver/FiniteDifferenceSolver/HybridPICSolveE.cpp b/Source/FieldSolver/FiniteDifferenceSolver/HybridPICSolveE.cpp index 1a72fee53c2..5100eed0df3 100644 --- a/Source/FieldSolver/FiniteDifferenceSolver/HybridPICSolveE.cpp +++ b/Source/FieldSolver/FiniteDifferenceSolver/HybridPICSolveE.cpp @@ -433,6 +433,7 @@ void FiniteDifferenceSolver::HybridPICSolveECylindrical ( // get hybrid model parameters const auto eta = hybrid_model->m_eta; const auto rho_floor = hybrid_model->m_n_floor * PhysConst::q_e; + const auto resistivity_has_J_dependence = hybrid_model->m_resistivity_has_J_dependence; // Index type required for interpolating fields from their respective // staggering to the Ex, Ey, Ez locations @@ -589,6 +590,15 @@ void FiniteDifferenceSolver::HybridPICSolveECylindrical ( // Interpolate to get the appropriate charge density in space Real rho_val = Interp(rho, nodal, Er_stag, coarsen, i, j, 0, 0); + // Interpolate current to appropriate staggering to match E field + Real jtot_val = 0._rt; + if (include_resistivity_term && resistivity_has_J_dependence) { + Real jr_val = Interp(Jr, Jr_stag, Er_stag, coarsen, i, j, 0, 0); + Real jt_val = Interp(Jt, Jt_stag, Er_stag, coarsen, i, j, 0, 0); + Real jz_val = Interp(Jz, Jz_stag, Er_stag, coarsen, i, j, 0, 0); + jtot_val = std::sqrt(jr_val*jr_val + jt_val*jt_val + jz_val*jz_val); + } + // safety condition since we divide by rho_val later if (rho_val < rho_floor) { rho_val = rho_floor; } @@ -601,7 +611,7 @@ void FiniteDifferenceSolver::HybridPICSolveECylindrical ( Er(i, j, 0) = (enE_r - grad_Pe) / rho_val; // Add resistivity only if E field value is used to update B - if (include_resistivity_term) { Er(i, j, 0) += eta(rho_val) * Jr(i, j, 0); } + if (include_resistivity_term) { Er(i, j, 0) += eta(rho_val, jtot_val) * Jr(i, j, 0); } }, // Et calculation @@ -622,6 +632,15 @@ void FiniteDifferenceSolver::HybridPICSolveECylindrical ( // Interpolate to get the appropriate charge density in space Real rho_val = Interp(rho, nodal, Er_stag, coarsen, i, j, 0, 0); + // Interpolate current to appropriate staggering to match E field + Real jtot_val = 0._rt; + if (include_resistivity_term && resistivity_has_J_dependence) { + Real jr_val = Interp(Jr, Jr_stag, Et_stag, coarsen, i, j, 0, 0); + Real jt_val = Interp(Jt, Jt_stag, Et_stag, coarsen, i, j, 0, 0); + Real jz_val = Interp(Jz, Jz_stag, Et_stag, coarsen, i, j, 0, 0); + jtot_val = std::sqrt(jr_val*jr_val + jt_val*jt_val + jz_val*jz_val); + } + // safety condition since we divide by rho_val later if (rho_val < rho_floor) { rho_val = rho_floor; } @@ -635,7 +654,7 @@ void FiniteDifferenceSolver::HybridPICSolveECylindrical ( Et(i, j, 0) = (enE_t - grad_Pe) / rho_val; // Add resistivity only if E field value is used to update B - if (include_resistivity_term) { Et(i, j, 0) += eta(rho_val) * Jt(i, j, 0); } + if (include_resistivity_term) { Et(i, j, 0) += eta(rho_val, jtot_val) * Jt(i, j, 0); } }, // Ez calculation @@ -647,6 +666,15 @@ void FiniteDifferenceSolver::HybridPICSolveECylindrical ( // Interpolate to get the appropriate charge density in space Real rho_val = Interp(rho, nodal, Ez_stag, coarsen, i, j, k, 0); + // Interpolate current to appropriate staggering to match E field + Real jtot_val = 0._rt; + if (include_resistivity_term && resistivity_has_J_dependence) { + Real jr_val = Interp(Jr, Jr_stag, Ez_stag, coarsen, i, j, 0, 0); + Real jt_val = Interp(Jt, Jt_stag, Ez_stag, coarsen, i, j, 0, 0); + Real jz_val = Interp(Jz, Jz_stag, Ez_stag, coarsen, i, j, 0, 0); + jtot_val = std::sqrt(jr_val*jr_val + jt_val*jt_val + jz_val*jz_val); + } + // safety condition since we divide by rho_val later if (rho_val < rho_floor) { rho_val = rho_floor; } @@ -659,7 +687,7 @@ void FiniteDifferenceSolver::HybridPICSolveECylindrical ( Ez(i, j, k) = (enE_z - grad_Pe) / rho_val; // Add resistivity only if E field value is used to update B - if (include_resistivity_term) { Ez(i, j, k) += eta(rho_val) * Jz(i, j, k); } + if (include_resistivity_term) { Ez(i, j, k) += eta(rho_val, jtot_val) * Jz(i, j, k); } } ); @@ -699,6 +727,7 @@ void FiniteDifferenceSolver::HybridPICSolveECartesian ( // get hybrid model parameters const auto eta = hybrid_model->m_eta; const auto rho_floor = hybrid_model->m_n_floor * PhysConst::q_e; + const auto resistivity_has_J_dependence = hybrid_model->m_resistivity_has_J_dependence; // Index type required for interpolating fields from their respective // staggering to the Ex, Ey, Ez locations @@ -853,6 +882,15 @@ void FiniteDifferenceSolver::HybridPICSolveECartesian ( // Interpolate to get the appropriate charge density in space Real rho_val = Interp(rho, nodal, Ex_stag, coarsen, i, j, k, 0); + // Interpolate current to appropriate staggering to match E field + Real jtot_val = 0._rt; + if (include_resistivity_term && resistivity_has_J_dependence) { + Real jx_val = Interp(Jx, Jx_stag, Ex_stag, coarsen, i, j, k, 0); + Real jy_val = Interp(Jy, Jy_stag, Ex_stag, coarsen, i, j, k, 0); + Real jz_val = Interp(Jz, Jz_stag, Ex_stag, coarsen, i, j, k, 0); + jtot_val = std::sqrt(jx_val*jx_val + jy_val*jy_val + jz_val*jz_val); + } + // safety condition since we divide by rho_val later if (rho_val < rho_floor) { rho_val = rho_floor; } @@ -865,7 +903,7 @@ void FiniteDifferenceSolver::HybridPICSolveECartesian ( Ex(i, j, k) = (enE_x - grad_Pe) / rho_val; // Add resistivity only if E field value is used to update B - if (include_resistivity_term) { Ex(i, j, k) += eta(rho_val) * Jx(i, j, k); } + if (include_resistivity_term) { Ex(i, j, k) += eta(rho_val, jtot_val) * Jx(i, j, k); } }, // Ey calculation @@ -883,6 +921,15 @@ void FiniteDifferenceSolver::HybridPICSolveECartesian ( // Interpolate to get the appropriate charge density in space Real rho_val = Interp(rho, nodal, Ey_stag, coarsen, i, j, k, 0); + // Interpolate current to appropriate staggering to match E field + Real jtot_val = 0._rt; + if (include_resistivity_term && resistivity_has_J_dependence) { + Real jx_val = Interp(Jx, Jx_stag, Ey_stag, coarsen, i, j, k, 0); + Real jy_val = Interp(Jy, Jy_stag, Ey_stag, coarsen, i, j, k, 0); + Real jz_val = Interp(Jz, Jz_stag, Ey_stag, coarsen, i, j, k, 0); + jtot_val = std::sqrt(jx_val*jx_val + jy_val*jy_val + jz_val*jz_val); + } + // safety condition since we divide by rho_val later if (rho_val < rho_floor) { rho_val = rho_floor; } @@ -895,7 +942,7 @@ void FiniteDifferenceSolver::HybridPICSolveECartesian ( Ey(i, j, k) = (enE_y - grad_Pe) / rho_val; // Add resistivity only if E field value is used to update B - if (include_resistivity_term) { Ey(i, j, k) += eta(rho_val) * Jy(i, j, k); } + if (include_resistivity_term) { Ey(i, j, k) += eta(rho_val, jtot_val) * Jy(i, j, k); } }, // Ez calculation @@ -907,6 +954,15 @@ void FiniteDifferenceSolver::HybridPICSolveECartesian ( // Interpolate to get the appropriate charge density in space Real rho_val = Interp(rho, nodal, Ez_stag, coarsen, i, j, k, 0); + // Interpolate current to appropriate staggering to match E field + Real jtot_val = 0._rt; + if (include_resistivity_term && resistivity_has_J_dependence) { + Real jx_val = Interp(Jx, Jx_stag, Ez_stag, coarsen, i, j, k, 0); + Real jy_val = Interp(Jy, Jy_stag, Ez_stag, coarsen, i, j, k, 0); + Real jz_val = Interp(Jz, Jz_stag, Ez_stag, coarsen, i, j, k, 0); + jtot_val = std::sqrt(jx_val*jx_val + jy_val*jy_val + jz_val*jz_val); + } + // safety condition since we divide by rho_val later if (rho_val < rho_floor) { rho_val = rho_floor; } @@ -919,7 +975,7 @@ void FiniteDifferenceSolver::HybridPICSolveECartesian ( Ez(i, j, k) = (enE_z - grad_Pe) / rho_val; // Add resistivity only if E field value is used to update B - if (include_resistivity_term) { Ez(i, j, k) += eta(rho_val) * Jz(i, j, k); } + if (include_resistivity_term) { Ez(i, j, k) += eta(rho_val, jtot_val) * Jz(i, j, k); } } ); From 83e16ee9a6875d4434bf9d9aff9bc51d1923c6e4 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Mon, 5 Feb 2024 18:24:51 -0800 Subject: [PATCH 12/13] CI: 4 Cores Linux/Win, 3 Cores macOS (#4673) Increase build and test parallelism according to new increased core limits on public GH hosted runners. --- .github/workflows/clang_tidy.yml | 6 +++--- .github/workflows/codeql.yml | 4 ++-- .github/workflows/cuda.yml | 6 +++--- .github/workflows/hip.yml | 4 ++-- .github/workflows/insitu.yml | 4 ++-- .github/workflows/intel.yml | 8 ++++---- .github/workflows/ubuntu.yml | 10 +++++----- .github/workflows/windows.yml | 4 ++-- 8 files changed, 23 insertions(+), 23 deletions(-) diff --git a/.github/workflows/clang_tidy.yml b/.github/workflows/clang_tidy.yml index b8b0053adaa..2a30696fb8c 100644 --- a/.github/workflows/clang_tidy.yml +++ b/.github/workflows/clang_tidy.yml @@ -37,7 +37,7 @@ jobs: cmake -S . -B build_clang_tidy \ -DCMAKE_VERBOSE_MAKEFILE=ON \ - -DWarpX_DIMS="1;2;3;RZ" \ + -DWarpX_DIMS="1;2;RZ;3" \ -DWarpX_MPI=ON \ -DWarpX_COMPUTE=OMP \ -DWarpX_PSATD=ON \ @@ -47,10 +47,10 @@ jobs: -DWarpX_PRECISION=SINGLE \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - cmake --build build_clang_tidy -j 2 + cmake --build build_clang_tidy -j 4 ${{github.workspace}}/.github/workflows/source/makeMakefileForClangTidy.py --input ${{github.workspace}}/ccache.log.txt - make -j2 --keep-going -f clang-tidy-ccache-misses.mak \ + make -j4 --keep-going -f clang-tidy-ccache-misses.mak \ CLANG_TIDY=clang-tidy \ CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*" diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 436df798d3b..008d82af239 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -75,7 +75,7 @@ jobs: export CCACHE_MAXSIZE=100M ccache -z - $CMAKE --build build -j 2 + $CMAKE --build build -j 4 ccache -s du -hs ~/.cache/ccache @@ -83,7 +83,7 @@ jobs: # Make sure CodeQL has something to do touch Source/Utils/WarpXVersion.cpp export CCACHE_DISABLE=1 - $CMAKE --build build -j 2 + $CMAKE --build build -j 4 - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v2 diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 5e9f43f639d..ed8b315c4e9 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -73,7 +73,7 @@ jobs: -DWarpX_PSATD=ON \ -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON \ -DAMReX_CUDA_ERROR_CAPTURE_THIS=ON - cmake --build build_sp -j 2 + cmake --build build_sp -j 4 python3 -m pip install --upgrade pip python3 -m pip install --upgrade build packaging setuptools wheel @@ -116,7 +116,7 @@ jobs: git clone https://github.com/AMReX-Codes/amrex.git ../amrex cd ../amrex && git checkout --detach 296ed40e16ae1877640f5b78e9162dbd4ba1c279 && cd - - make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_PSATD=TRUE USE_CCACHE=TRUE -j 2 + make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_PSATD=TRUE USE_CCACHE=TRUE -j 4 ccache -s du -hs ~/.cache/ccache @@ -171,7 +171,7 @@ jobs: -DWarpX_PSATD=ON \ -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON \ -DAMReX_CUDA_ERROR_CAPTURE_THIS=ON - cmake --build build -j 2 + cmake --build build -j 4 # work-around for mpi4py 3.1.1 build system issue with using # a GNU-built Python executable with non-GNU Python modules diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index f7378bfa775..51cadc89604 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -56,7 +56,7 @@ jobs: -DWarpX_OPENPMD=ON \ -DWarpX_PRECISION=SINGLE \ -DWarpX_PSATD=ON - cmake --build build_sp -j 2 + cmake --build build_sp -j 4 export WARPX_MPI=OFF export PYWARPX_LIB_DIR=$PWD/build_sp/lib/site-packages/pywarpx/ @@ -116,7 +116,7 @@ jobs: -DWarpX_OPENPMD=ON \ -DWarpX_PRECISION=DOUBLE \ -DWarpX_PSATD=ON - cmake --build build_2d -j 2 + cmake --build build_2d -j 4 export WARPX_MPI=OFF export PYWARPX_LIB_DIR=$PWD/build_2d/lib/site-packages/pywarpx/ diff --git a/.github/workflows/insitu.yml b/.github/workflows/insitu.yml index 57a25ce7629..6006c3e5c5b 100644 --- a/.github/workflows/insitu.yml +++ b/.github/workflows/insitu.yml @@ -28,7 +28,7 @@ jobs: -DWarpX_COMPUTE=NOACC - name: Build run: | - cmake --build build -j 2 + cmake --build build -j 4 ascent: name: Ascent @@ -51,7 +51,7 @@ jobs: - name: Build run: | . /ascent_docker_setup_env.sh - cmake --build build -j 2 + cmake --build build -j 4 - name: Test run: | cp Examples/Physics_applications/laser_acceleration/inputs_3d . diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 9124715fe18..1731f6e3723 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -53,7 +53,7 @@ jobs: -DWarpX_MPI=OFF \ -DWarpX_OPENPMD=ON \ -DWarpX_openpmd_internal=OFF - cmake --build build_dp -j 2 + cmake --build build_dp -j 4 cmake -S . -B build_sp \ -DCMAKE_VERBOSE_MAKEFILE=ON \ @@ -64,7 +64,7 @@ jobs: -DWarpX_OPENPMD=ON \ -DWarpX_openpmd_internal=OFF \ -DWarpX_PRECISION=SINGLE - cmake --build build_sp -j 2 + cmake --build build_sp -j 4 cmake --build build_sp --target pip_install ccache -s @@ -120,7 +120,7 @@ jobs: -DWarpX_MPI=OFF \ -DWarpX_OPENPMD=ON \ -DWarpX_PRECISION=SINGLE - cmake --build build_sp -j 2 + cmake --build build_sp -j 4 cmake --build build_sp --target pip_install ccache -s @@ -184,7 +184,7 @@ jobs: -DWarpX_MPI=OFF \ -DWarpX_OPENPMD=ON \ -DWarpX_PRECISION=SINGLE - cmake --build build_sp -j 2 + cmake --build build_sp -j 4 ccache -s du -hs ~/.cache/ccache diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 6b8e26111b8..239da17be64 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -38,7 +38,7 @@ jobs: -DWarpX_EB=OFF \ -DWarpX_MPI=OFF \ -DWarpX_QED=OFF - cmake --build build -j 2 + cmake --build build -j 4 ./build/bin/warpx.3d Examples/Physics_applications/laser_acceleration/inputs_3d ./build/bin/warpx.rz Examples/Physics_applications/laser_acceleration/inputs_rz @@ -79,7 +79,7 @@ jobs: -DWarpX_EB=OFF \ -DWarpX_PSATD=ON \ -DWarpX_QED_TABLE_GEN=ON - cmake --build build -j 2 + cmake --build build -j 4 ./build/bin/warpx.1d Examples/Physics_applications/laser_acceleration/inputs_1d ./build/bin/warpx.2d Examples/Physics_applications/laser_acceleration/inputs_2d @@ -126,7 +126,7 @@ jobs: -DWarpX_PARTICLE_PRECISION=SINGLE \ -DWarpX_QED_TABLE_GEN=ON - cmake --build build -j 2 + cmake --build build -j 4 ./build/bin/warpx.3d Examples/Physics_applications/laser_acceleration/inputs_3d ./build/bin/warpx.rz Examples/Physics_applications/laser_acceleration/inputs_rz @@ -164,7 +164,7 @@ jobs: -DCMAKE_VERBOSE_MAKEFILE=ON \ -DWarpX_APP=OFF \ -DWarpX_LIB=OFF - cmake --build build -j 2 + cmake --build build -j 4 ccache -s du -hs ~/.cache/ccache @@ -208,7 +208,7 @@ jobs: -DWarpX_PSATD=ON \ -DWarpX_PYTHON=ON \ -DWarpX_QED_TABLE_GEN=ON - cmake --build build -j 2 --target pip_install + cmake --build build -j 4 --target pip_install ccache -s du -hs ~/.cache/ccache diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 8e2bb00f1db..eee40e72965 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -38,7 +38,7 @@ jobs: -DWarpX_MPI=OFF ` -DWarpX_PYTHON=ON if(!$?) { Exit $LASTEXITCODE } - cmake --build build --config Debug --parallel 2 + cmake --build build --config Debug --parallel 4 if(!$?) { Exit $LASTEXITCODE } python3 -m pip install --upgrade pip @@ -96,7 +96,7 @@ jobs: -DWarpX_MPI=OFF ^ -DWarpX_OPENPMD=ON if errorlevel 1 exit 1 - cmake --build build --config Release --parallel 2 + cmake --build build --config Release --parallel 4 if errorlevel 1 exit 1 cmake --build build --config Release --target install From 6e87dd52995b6faf16a2bf76bd873c6447f76548 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 6 Feb 2024 13:01:58 -0800 Subject: [PATCH 13/13] Update GitHub Action versions (#4674) * Bump actions/upload-artifact from 3 to 4 * Bump github/codeql-action from 2 to 3 * Bump actions/checkout from 3 to 4 * Bump actions/setup-python from 4 to 5 * Bump actions/cache from 3 to 4 --- .github/workflows/clang_tidy.yml | 6 +++--- .github/workflows/cleanup-cache-postpr.yml | 2 +- .github/workflows/cleanup-cache.yml | 2 +- .github/workflows/codeql.yml | 14 +++++++------- .github/workflows/cuda.yml | 16 ++++++++-------- .github/workflows/hip.yml | 10 +++++----- .github/workflows/insitu.yml | 6 +++--- .github/workflows/intel.yml | 14 +++++++------- .github/workflows/macos.yml | 6 +++--- .github/workflows/post-pr.yml | 2 +- .github/workflows/source.yml | 2 +- .github/workflows/ubuntu.yml | 22 +++++++++++----------- .github/workflows/windows.yml | 12 ++++++------ 13 files changed, 57 insertions(+), 57 deletions(-) diff --git a/.github/workflows/clang_tidy.yml b/.github/workflows/clang_tidy.yml index 2a30696fb8c..6a1172802a8 100644 --- a/.github/workflows/clang_tidy.yml +++ b/.github/workflows/clang_tidy.yml @@ -12,12 +12,12 @@ jobs: runs-on: ubuntu-22.04 if: github.event.pull_request.draft == false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies run: | .github/workflows/dependencies/clang14.sh - name: set up cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -66,7 +66,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/cleanup-cache-postpr.yml b/.github/workflows/cleanup-cache-postpr.yml index 978e9c28f04..9a2ffb0f61a 100644 --- a/.github/workflows/cleanup-cache-postpr.yml +++ b/.github/workflows/cleanup-cache-postpr.yml @@ -16,7 +16,7 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Clean up ccache run: | gh extension install actions/gh-actions-cache diff --git a/.github/workflows/cleanup-cache.yml b/.github/workflows/cleanup-cache.yml index 6421bbf4215..bd1a518acf4 100644 --- a/.github/workflows/cleanup-cache.yml +++ b/.github/workflows/cleanup-cache.yml @@ -16,7 +16,7 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Clean up ccache run: | gh extension install actions/gh-actions-cache diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 008d82af239..bc0bee545cc 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -29,7 +29,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Packages (C++) if: ${{ matrix.language == 'cpp' }} @@ -44,7 +44,7 @@ jobs: - name: Set Up Cache if: ${{ matrix.language == 'cpp' }} - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -57,14 +57,14 @@ jobs: $CMAKE -S . -B build -DWarpX_OPENPMD=ON - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: config-file: ./.github/codeql/warpx-codeql.yml languages: ${{ matrix.language }} queries: +security-and-quality - name: Build (py) - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 if: ${{ matrix.language == 'python' }} - name: Build (C++) @@ -86,7 +86,7 @@ jobs: $CMAKE --build build -j 4 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 with: category: "/language:${{ matrix.language }}" upload: False @@ -107,7 +107,7 @@ jobs: output: sarif-results/${{ matrix.language }}.sarif - name: Upload SARIF - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: sarif-results/${{ matrix.language }}.sarif @@ -120,7 +120,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index ed8b315c4e9..3546eb8e9eb 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -19,8 +19,8 @@ jobs: CXXFLAGS: "-Werror" CMAKE_GENERATOR: Ninja steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 name: Install Python with: python-version: '3.x' @@ -28,7 +28,7 @@ jobs: run: | .github/workflows/dependencies/nvcc11-3.sh - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -92,12 +92,12 @@ jobs: runs-on: ubuntu-20.04 if: github.event.pull_request.draft == false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies run: | .github/workflows/dependencies/nvcc11-8.sh - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -129,11 +129,11 @@ jobs: # # For NVHPC, Ninja is slower than the default: # CMAKE_GENERATOR: Ninja steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Dependencies run: .github/workflows/dependencies/nvhpc.sh - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -197,7 +197,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index 51cadc89604..0e311f061ef 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -15,12 +15,12 @@ jobs: CMAKE_GENERATOR: Ninja if: github.event.pull_request.draft == false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies shell: bash run: .github/workflows/dependencies/hip.sh - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -74,12 +74,12 @@ jobs: CMAKE_GENERATOR: Ninja if: github.event.pull_request.draft == false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies shell: bash run: .github/workflows/dependencies/hip.sh - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -135,7 +135,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/insitu.yml b/.github/workflows/insitu.yml index 6006c3e5c5b..42923d3df8e 100644 --- a/.github/workflows/insitu.yml +++ b/.github/workflows/insitu.yml @@ -20,7 +20,7 @@ jobs: container: image: senseiinsitu/ci:fedora35-amrex-20220613 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Configure run: | cmake -S . -B build \ @@ -41,7 +41,7 @@ jobs: container: image: alpinedav/ascent:0.9.2 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Configure run: | . /ascent_docker_setup_env.sh @@ -61,7 +61,7 @@ jobs: max_step = 40 \ diag1.intervals = 30:40:10 \ diag1.format = ascent - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 with: name: ascent-test-artifacts path: | diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 1731f6e3723..3b1d6b546a4 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -17,12 +17,12 @@ jobs: #env: # CMAKE_GENERATOR: Ninja steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies run: | .github/workflows/dependencies/icc.sh - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -82,13 +82,13 @@ jobs: # CMAKE_GENERATOR: Ninja if: github.event.pull_request.draft == false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies shell: bash run: | .github/workflows/dependencies/dpcpp.sh - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -146,13 +146,13 @@ jobs: # CMAKE_GENERATOR: Ninja if: github.event.pull_request.draft == false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies shell: bash run: | .github/workflows/dependencies/dpcpp.sh - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -204,7 +204,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 0e8819032e3..f34f9f3534d 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -17,7 +17,7 @@ jobs: # For macOS, Ninja is slower than the default: #CMAKE_GENERATOR: Ninja steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies run: | set +e @@ -45,7 +45,7 @@ jobs: python3 -m pip install --upgrade build packaging setuptools wheel python3 -m pip install --upgrade mpi4py - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: /Users/runner/Library/Caches/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -97,7 +97,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/post-pr.yml b/.github/workflows/post-pr.yml index f5b914033b7..2768ef376cc 100644 --- a/.github/workflows/post-pr.yml +++ b/.github/workflows/post-pr.yml @@ -13,7 +13,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/source.yml b/.github/workflows/source.yml index 08050768894..a1c29416b3e 100644 --- a/.github/workflows/source.yml +++ b/.github/workflows/source.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Non-ASCII characters run: .github/workflows/source/hasNonASCII - name: TABs diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 239da17be64..cf4b375ce00 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -14,12 +14,12 @@ jobs: env: CXXFLAGS: "-Werror" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies run: | .github/workflows/dependencies/gcc.sh - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -54,12 +54,12 @@ jobs: CXX: "g++-12" CC: "gcc-12" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies run: | .github/workflows/dependencies/gcc12.sh - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -94,12 +94,12 @@ jobs: CXX: "g++-12" CC: "gcc-12" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies run: | .github/workflows/dependencies/gcc12_blaspp_lapackpp.sh - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -141,13 +141,13 @@ jobs: CMAKE_GENERATOR: Ninja CXXFLAGS: "-Werror" steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies run: | .github/workflows/dependencies/gcc.sh sudo apt-get install -y libopenmpi-dev openmpi-bin - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -179,12 +179,12 @@ jobs: # On CI for this test, Ninja is slower than the default: #CMAKE_GENERATOR: Ninja steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install dependencies run: | .github/workflows/dependencies/pyfull.sh - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/ccache key: ccache-${{ github.workflow }}-${{ github.job }}-git-${{ github.sha }} @@ -227,7 +227,7 @@ jobs: PR_NUMBER: ${{ github.event.number }} run: | echo $PR_NUMBER > pr_number.txt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: pr_number path: pr_number.txt diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index eee40e72965..2ef74cdb7f9 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -12,12 +12,12 @@ jobs: runs-on: windows-latest if: github.event.pull_request.draft == false steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: '3.x' - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 # - once stored under a key, they become immutable (even if local cache path content changes) # - for a refresh the key has to change, e.g., hash of a tracked file in the key with: @@ -63,13 +63,13 @@ jobs: runs-on: windows-2019 if: github.event.pull_request.draft == false steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: '3.8' - uses: seanmiddleditch/gha-setup-ninja@master - name: CCache Cache - uses: actions/cache@v3 + uses: actions/cache@v4 # - once stored under a key, they become immutable (even if local cache path content changes) # - for a refresh the key has to change, e.g., hash of a tracked file in the key with: