Skip to content

Commit

Permalink
Merge pull request lammps#4007 from hagertnl/issue3775_fft_kokkos
Browse files Browse the repository at this point in the history
KSPACE: decouple KOKKOS and non-KOKKOS FFT
  • Loading branch information
akohlmey authored Feb 7, 2024
2 parents d378415 + c1024c8 commit b9ec854
Show file tree
Hide file tree
Showing 50 changed files with 449 additions and 351 deletions.
33 changes: 8 additions & 25 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -984,14 +984,15 @@ if(PKG_KOKKOS)
endif()
endif()
if(PKG_KSPACE)
if (LMP_HEFFTE)
if (FFT_USE_HEFFTE)
message(STATUS "<<< FFT settings >>>
-- Primary FFT lib: heFFTe")
if (HEFFTE_BACKEND)
message(STATUS "heFFTe backend: ${HEFFTE_BACKEND}")
if (FFT_HEFFTE_BACKEND)
message(STATUS "heFFTe backend: ${FFT_HEFFTE_BACKEND}")
else()
message(STATUS "heFFTe backend: stock (builtin FFT implementation, tested for corrected but not optimized for production)")
endif()
message(STATUS "Using distributed FFT algorithms from heFTTe")
if(FFT_SINGLE)
message(STATUS "Using single precision FFTs")
else()
Expand All @@ -1010,28 +1011,10 @@ if(PKG_KSPACE)
else()
message(STATUS "Using non-threaded FFTs")
endif()
if (FFT_HEFFTE)
message(STATUS "Using distributed algorithms from heFTTe")
else()
message(STATUS "Using builtin distributed algorithms")
endif()
if(PKG_KOKKOS)
if(Kokkos_ENABLE_CUDA)
if(FFT STREQUAL "KISS")
message(STATUS "Kokkos FFT: KISS")
else()
message(STATUS "Kokkos FFT: cuFFT")
endif()
elseif(Kokkos_ENABLE_HIP)
if(FFT STREQUAL "KISS")
message(STATUS "Kokkos FFT: KISS")
else()
message(STATUS "Kokkos FFT: hipFFT")
endif()
else()
message(STATUS "Kokkos FFT: ${FFT}")
endif()
endif()
message(STATUS "Using builtin distributed FFT algorithms")
endif()
if(PKG_KOKKOS)
message(STATUS "Kokkos FFT: ${FFT_KOKKOS}")
endif()
endif()
if(BUILD_DOC)
Expand Down
24 changes: 20 additions & 4 deletions cmake/Modules/Packages/KOKKOS.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -126,16 +126,32 @@ if(PKG_KSPACE)
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/grid3d_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp)
set(FFT_KOKKOS "KISS" CACHE STRING "FFT library for Kokkos-enabled KSPACE package")
set(FFT_KOKKOS_VALUES KISS FFTW3 MKL HIPFFT CUFFT)
set_property(CACHE FFT_KOKKOS PROPERTY STRINGS ${FFT_KOKKOS_VALUES})
validate_option(FFT_KOKKOS FFT_KOKKOS_VALUES)
string(TOUPPER ${FFT_KOKKOS} FFT_KOKKOS)

if(Kokkos_ENABLE_CUDA)
if(NOT (FFT STREQUAL "KISS"))
target_compile_definitions(lammps PRIVATE -DFFT_CUFFT)
if(NOT ((FFT_KOKKOS STREQUAL "KISS") OR (FFT_KOKKOS STREQUAL "CUFFT")))
message(FATAL_ERROR "The CUDA backend of Kokkos requires either KISS FFT or CUFFT.")
elseif(FFT_KOKKOS STREQUAL "KISS")
message(WARNING "Using KISS FFT with the CUDA backend of Kokkos may be sub-optimal.")
target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_KISS)
elseif(FFT_KOKKOS STREQUAL "CUFFT")
target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_CUFFT)
target_link_libraries(lammps PRIVATE cufft)
endif()
elseif(Kokkos_ENABLE_HIP)
if(NOT (FFT STREQUAL "KISS"))
if(NOT ((FFT_KOKKOS STREQUAL "KISS") OR (FFT_KOKKOS STREQUAL "HIPFFT")))
message(FATAL_ERROR "The HIP backend of Kokkos requires either KISS FFT or HIPFFT.")
elseif(FFT_KOKKOS STREQUAL "KISS")
message(WARNING "Using KISS FFT with the HIP backend of Kokkos may be sub-optimal.")
target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_KISS)
elseif(FFT_KOKKOS STREQUAL "HIPFFT")
include(DetectHIPInstallation)
find_package(hipfft REQUIRED)
target_compile_definitions(lammps PRIVATE -DFFT_HIPFFT)
target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_HIPFFT)
target_link_libraries(lammps PRIVATE hip::hipfft)
endif()
endif()
Expand Down
35 changes: 27 additions & 8 deletions cmake/Modules/Packages/KSPACE.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,15 @@ endif()

option(FFT_USE_HEFFTE "Use heFFTe as the distributed FFT engine, overrides the FFT option." OFF)
if(FFT_USE_HEFFTE)
# if FFT_HEFFTE is enabled, switch the builtin FFT engine with Heffte
set(FFT_HEFFTE_BACKEND_VALUES FFTW MKL)
set(FFT_HEFFTE_BACKEND "" CACHE STRING "Select heFFTe backend, e.g., FFTW or MKL")
# if FFT_HEFFTE is enabled, use the heFFTe parallel engine instead of the builtin fftMPI engine

# map standard FFT choices to available heFFTe backends: FFTW3 -> FFTW, KISS -> BUILTIN
set(FFT_HEFFTE_BACKEND_VALUES FFTW MKL BUILTIN)
string(REPLACE FFTW3 FFTW FFT_HEFFTE_BACKEND_DEFAULT ${FFT})
string(REPLACE KISS BUILTIN FFT_HEFFTE_BACKEND_DEFAULT ${FFT_HEFFTE_BACKEND_DEFAULT})
set(FFT_HEFFTE_BACKEND "${FFT_HEFFTE_BACKEND_DEFAULT}" CACHE STRING "Select heFFTe backend, e.g., FFTW or MKL")
set_property(CACHE FFT_HEFFTE_BACKEND PROPERTY STRINGS ${FFT_HEFFTE_BACKEND_VALUES})
validate_option(FFT_HEFFTE_BACKEND FFT_HEFFTE_BACKEND_VALUES)

if(FFT_HEFFTE_BACKEND STREQUAL "FFTW") # respect the backend choice, FFTW or MKL
set(HEFFTE_COMPONENTS "FFTW")
Expand All @@ -60,24 +65,38 @@ if(FFT_USE_HEFFTE)
set(HEFFTE_COMPONENTS "MKL")
set(Heffte_ENABLE_MKL "ON" CACHE BOOL "Enables MKL backend for heFFTe")
else()
set(HEFFTE_COMPONENTS "BUILTIN")
message(WARNING "FFT_HEFFTE_BACKEND not selected, defaulting to the builtin 'stock' backend, which is intended for testing and is not optimized for production runs")
endif()

find_package(Heffte 2.4.0 QUIET COMPONENTS ${HEFFTE_COMPONENTS})
if (NOT Heffte_FOUND) # download and build
if(BUILD_SHARED_LIBS)
set(BUILD_SHARED_LIBS_WAS_ON YES)
set(BUILD_SHARED_LIBS OFF)
endif()
if(CMAKE_REQUEST_PIC)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
endif()
set(Heffte_ENABLE_${FFT_HEFFTE_BACKEND} ON)
include(FetchContent)
FetchContent_Declare(HEFFTE_PROJECT # using v2.4.0
URL "https://github.com/icl-utk-edu/heffte/archive/refs/tags/v2.4.0.tar.gz"
URL_HASH SHA256=02310fb4f9688df02f7181667e61c3adb7e38baf79611d80919d47452ff7881d
)
FetchContent_Populate(HEFFTE_PROJECT)

# fixup git hash to show "(unknown)" to avoid compilation failures.
file(READ ${heffte_project_SOURCE_DIR}/include/heffte_config.cmake.h HEFFTE_CFG_FILE_TEXT)
string(REPLACE "@Heffte_GIT_HASH@" "(unknown)" HEFFTE_CFG_FILE_TEXT "${HEFFTE_CFG_FILE_TEXT}")
file(WRITE ${heffte_project_SOURCE_DIR}/include/heffte_config.cmake.h "${HEFFTE_CFG_FILE_TEXT}")

add_subdirectory(${heffte_project_SOURCE_DIR} ${heffte_project_BINARY_DIR})
set_target_properties(lmp PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
set_target_properties(lammps PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
add_library(Heffte::Heffte INTERFACE IMPORTED GLOBAL)
target_link_libraries(Heffte::Heffte INTERFACE Heffte)
add_library(Heffte::Heffte ALIAS Heffte)
if(BUILD_SHARED_LIBS_WAS_ON)
set(BUILD_SHARED_LIBS ON)
endif()
endif()

target_compile_definitions(lammps PRIVATE -DFFT_HEFFTE "-DFFT_HEFFTE_${FFT_HEFFTE_BACKEND}")
target_link_libraries(lammps PRIVATE Heffte::Heffte)
endif()
Expand Down
3 changes: 3 additions & 0 deletions cmake/presets/kokkos-cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,8 @@ set(BUILD_OMP ON CACHE BOOL "" FORCE)
get_filename_component(NVCC_WRAPPER_CMD ${CMAKE_CURRENT_SOURCE_DIR}/../lib/kokkos/bin/nvcc_wrapper ABSOLUTE)
set(CMAKE_CXX_COMPILER ${NVCC_WRAPPER_CMD} CACHE FILEPATH "" FORCE)

# If KSPACE is also enabled, use CUFFT for FFTs
set(FFT_KOKKOS "CUFFT" CACHE STRING FORCE)

# hide deprecation warnings temporarily for stable release
set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE)
3 changes: 3 additions & 0 deletions cmake/presets/kokkos-hip.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ set(BUILD_OMP ON CACHE BOOL "" FORCE)
set(CMAKE_CXX_COMPILER hipcc CACHE STRING "" FORCE)
set(CMAKE_TUNE_FLAGS "-munsafe-fp-atomics" CACHE STRING "" FORCE)

# If KSPACE is also enabled, use CUFFT for FFTs
set(FFT_KOKKOS "HIPFFT" CACHE STRING FORCE)

# hide deprecation warnings temporarily for stable release
set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE)

Expand Down
27 changes: 20 additions & 7 deletions doc/src/Build_settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,19 @@ libraries and better pipelining for packing and communication.
.. code-block:: bash
-D FFT=value # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS
-D FFT_KOKKOS=value # FFTW3 or MKL or KISS or CUFFT or HIPFFT, default is KISS
-D FFT_SINGLE=value # yes or no (default), no = double precision
-D FFT_PACK=value # array (default) or pointer or memcpy
-D FFT_USE_HEFFTE=value # yes or no (default), yes links to heFFTe
.. note::

The values for the FFT variable must be in upper-case. This is
an exception to the rule that all CMake variables can be specified
with lower-case values.
When the Kokkos variant of a package is compiled and selected at run time,
the FFT library selected by the FFT_KOKKOS variable applies. Otherwise,
the FFT library selected by the FFT variable applies.
The same FFT settings apply to both. FFT_KOKKOS must be compatible with the
Kokkos back end - for example, when using the CUDA back end of Kokkos,
you must use either CUFFT or KISS.

Usually these settings are all that is needed. If FFTW3 is
selected, then CMake will try to detect, if threaded FFTW
Expand Down Expand Up @@ -106,6 +110,8 @@ libraries and better pipelining for packing and communication.
FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS
# default is KISS if not specified
FFT_INC = -DFFT_KOKKOS_CUFFT # -DFFT_KOKKOS_{FFTW,FFTW3,MKL,CUFFT,HIPFFT,KISS}
# default is KISS if not specified
FFT_INC = -DFFT_SINGLE # do not specify for double precision
FFT_INC = -DFFT_FFTW_THREADS # enable using threaded FFTW3 libraries
FFT_INC = -DFFT_MKL_THREADS # enable using threaded FFTs with MKL libraries
Expand All @@ -116,6 +122,8 @@ libraries and better pipelining for packing and communication.
FFT_INC = -I/usr/local/include
FFT_PATH = -L/usr/local/lib
FFT_LIB = -lhipfft # hipFFT either precision
FFT_LIB = -lcufft # cuFFT either precision
FFT_LIB = -lfftw3 # FFTW3 double precision
FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS)
FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision
Expand Down Expand Up @@ -178,6 +186,11 @@ The Intel MKL math library is part of the Intel compiler suite. It
can be used with the Intel or GNU compiler (see the ``FFT_LIB`` setting
above).

The cuFFT and hipFFT FFT libraries are packaged with NVIDIA's CUDA and
AMD's HIP installations, respectively. These FFT libraries require the
Kokkos acceleration package to be enabled and the Kokkos back end to be
GPU-resident (i.e., HIP or CUDA).

Performing 3d FFTs in parallel can be time-consuming due to data access
and required communication. This cost can be reduced by performing
single-precision FFTs instead of double precision. Single precision
Expand All @@ -189,11 +202,11 @@ generally less than the difference in precision. Using the
``-DFFT_SINGLE`` setting trades off a little accuracy for reduced memory
use and parallel communication costs for transposing 3d FFT data.

When using ``-DFFT_SINGLE`` with FFTW3, you may need to build the FFTW
library a second time with support for single-precision.
When using ``-DFFT_SINGLE`` with FFTW3, you may need to ensure that
the FFTW3 installation includes support for single-precision.

For FFTW3, do the following, which should produce the additional
library ``libfftw3f.a`` or ``libfftw3f.so``\ .
When compiler FFTW3 from source, you can do the following, which should
produce the additional libraries ``libfftw3f.a`` and/or ``libfftw3f.so``\ .

.. code-block:: bash
Expand Down
2 changes: 2 additions & 0 deletions doc/src/Howto_cmake.rst
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,8 @@ Some common LAMMPS specific variables
- when set to ``name`` the LAMMPS executable and library will be called ``lmp_name`` and ``liblammps_name.a``
* - ``FFT``
- select which FFT library to use: ``FFTW3``, ``MKL``, ``KISS`` (default, unless FFTW3 is found)
* - ``FFT_KOKKOS``
- select which FFT library to use in Kokkos-enabled styles: ``FFTW3``, ``MKL``, ``HIPFFT``, ``CUFFT``, ``KISS`` (default)
* - ``FFT_SINGLE``
- select whether to use single precision FFTs (default: ``off``)
* - ``WITH_JPEG``
Expand Down
9 changes: 8 additions & 1 deletion doc/src/info.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Syntax
info args
* args = one or more of the following keywords: *out*, *all*, *system*, *memory*, *communication*, *computes*, *dumps*, *fixes*, *groups*, *regions*, *variables*, *coeffs*, *styles*, *time*, *accelerator*, or *configuration*
* args = one or more of the following keywords: *out*, *all*, *system*, *memory*, *communication*, *computes*, *dumps*, *fixes*, *groups*, *regions*, *variables*, *coeffs*, *styles*, *time*, *accelerator*, *fft* or *configuration*
* *out* values = *screen*, *log*, *append* filename, *overwrite* filename
* *styles* values = *all*, *angle*, *atom*, *bond*, *compute*, *command*, *dump*, *dihedral*, *fix*, *improper*, *integrate*, *kspace*, *minimize*, *pair*, *region*

Expand Down Expand Up @@ -92,6 +92,13 @@ The *accelerator* category prints out information about compile time
settings of included accelerator support for the GPU, KOKKOS, INTEL,
and OPENMP packages.

.. versionadded:: TBD

The *fft* category prints out information about the included 3d-FFT
support. This lists the 3d-FFT engine, FFT precision, FFT library
used by the FFT engine. If the KOKKOS package is included, the settings
used for the KOKKOS package are displayed as well.

The *styles* category prints the list of styles available in the
current LAMMPS binary. It supports one of the following options
to control which category of styles is printed out:
Expand Down
5 changes: 4 additions & 1 deletion doc/src/kspace_style.rst
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,10 @@ relative RMS error.
For the KOKKOS package, the *pppm/kk* style performs charge
assignment and force interpolation calculations, along with the FFTs
themselves, on the GPU or (optionally) threaded on the CPU when
using OpenMP and FFTW3.
using OpenMP and FFTW3. The specific FFT library is selected using
the FFT_KOKKOS CMake parameter. See the
:doc:`Build settings <Build_settings>` doc page for how to select a
3rd-party FFT library.

----------

Expand Down
12 changes: 1 addition & 11 deletions src/AMOEBA/amoeba_convolution.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,7 @@
#define LMP_AMOEBA_CONVOLUTION_H

#include "pointers.h"

#ifdef FFT_SINGLE
typedef float FFT_SCALAR;
#define LMP_FFT_PREC "single"
#define MPI_FFT_SCALAR MPI_FLOAT
#else

typedef double FFT_SCALAR;
#define LMP_FFT_PREC "double"
#define MPI_FFT_SCALAR MPI_DOUBLE
#endif
#include "lmpfftsettings.h"

namespace LAMMPS_NS {

Expand Down
29 changes: 15 additions & 14 deletions src/INTEL/pppm_electrode_intel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "update.h"
#include "wire_dipole.h"

#include <algorithm>
#include <cmath>
#include <cstring>

Expand Down Expand Up @@ -159,7 +160,6 @@ void PPPMElectrodeIntel::setup()
PPPMIntel::setup();
prd[0] /= wire_volfactor;
prd[1] /= wire_volfactor;

}

void PPPMElectrodeIntel::compute(int eflag, int vflag)
Expand Down Expand Up @@ -275,7 +275,7 @@ void PPPMElectrodeIntel::compute(int eflag, int vflag)
slabflag = 0; // bypass compute_second's slabcorr()
PPPMIntel::compute_second(eflag, vflag);
slabflag = tempslabflag;
boundcorr->compute_corr(qsum, eflag_atom, eflag_global, energy, eatom);
boundcorr->compute_corr(qsum, eflag_atom, eflag_global, energy, eatom);
compute_vector_called = false;
}

Expand Down Expand Up @@ -328,7 +328,7 @@ void PPPMElectrodeIntel::compute_vector(double *vec, int sensor_grpbit, int sour
// electrolyte density (without writing an additional function)
FFT_SCALAR ***density_brick_real = density_brick;
FFT_SCALAR *density_fft_real = density_fft;
if (neighbor->ago != 0) pack_buffers(); // since midstep positions may be outdated
if (neighbor->ago != 0) pack_buffers(); // since midstep positions may be outdated
switch (fix->precision()) {
case FixIntel::PREC_MODE_MIXED:
make_rho_in_brick<float, double>(fix->get_mixed_buffers(), source_grpbit,
Expand Down Expand Up @@ -1197,22 +1197,23 @@ void PPPMElectrodeIntel::pack_buffers_q()
{
fix->start_watch(TIME_PACK);
int packthreads;
if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads;
else packthreads = 1;
#if defined(_OPENMP)
#pragma omp parallel if (packthreads > 1)
#endif
if (comm->nthreads > INTEL_HTHREADS)
packthreads = comm->nthreads;
else
packthreads = 1;
#if defined(_OPENMP)
#pragma omp parallel if (packthreads > 1)
#endif
{
int ifrom, ito, tid;
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost,
packthreads,
sizeof(IntelBuffers<float,double>::atom_t));
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, packthreads,
sizeof(IntelBuffers<float, double>::atom_t));
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
fix->get_mixed_buffers()->thr_pack_q(ifrom,ito);
fix->get_mixed_buffers()->thr_pack_q(ifrom, ito);
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
fix->get_double_buffers()->thr_pack_q(ifrom,ito);
fix->get_double_buffers()->thr_pack_q(ifrom, ito);
else
fix->get_single_buffers()->thr_pack_q(ifrom,ito);
fix->get_single_buffers()->thr_pack_q(ifrom, ito);
}
fix->stop_watch(TIME_PACK);
}
3 changes: 0 additions & 3 deletions src/INTEL/pppm_electrode_intel.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,9 @@ KSpaceStyle(pppm/electrode/intel,PPPMElectrodeIntel)
#ifndef LMP_PPPM_ELECTRODE_INTEL_H
#define LMP_PPPM_ELECTRODE_INTEL_H

#include "boundary_correction.h"
#include "electrode_kspace.h"
#include "fix_intel.h"
#include "pppm.h"
#include "pppm_intel.h"
#include <algorithm>

namespace LAMMPS_NS {

Expand Down
Loading

0 comments on commit b9ec854

Please sign in to comment.