From 122d424dd977bb5e0cfa16139dcce403288fb1d2 Mon Sep 17 00:00:00 2001 From: Rupert Nash Date: Mon, 6 May 2024 10:35:58 +0100 Subject: [PATCH] Tidy the CMake for aarch64 builds; make lattices conform to new style interface; fix mac+gcc+catch2 problem --- CMake/HemeLbOptions.cmake | 9 +++- Code/lb/kernels/GuoForcingLBGK.h | 2 +- Code/lb/lattices/Lattice.h | 50 +++++-------------- Code/tests/lb/GuoForcingTests.cc | 16 +++--- dependencies/Catch2/build.cmake | 3 +- dependencies/Catch2/mac-conditionals-gcc.diff | 14 ++++++ 6 files changed, 46 insertions(+), 48 deletions(-) create mode 100644 dependencies/Catch2/mac-conditionals-gcc.diff diff --git a/CMake/HemeLbOptions.cmake b/CMake/HemeLbOptions.cmake index 9b76e6191..5ecc6da06 100644 --- a/CMake/HemeLbOptions.cmake +++ b/CMake/HemeLbOptions.cmake @@ -23,8 +23,15 @@ pass_option(HEMELB HEMELB_BUILD_COLLOIDS "Build the colloids option" OFF) pass_option(HEMELB HEMELB_VALIDATE_GEOMETRY "Validate geometry" OFF) pass_option(HEMELB HEMELB_USE_ALL_WARNINGS_GNU "Show all compiler warnings on development builds (gnu-style-compilers)" ON) pass_option(HEMELB HEMELB_DEPENDENCIES_SET_RPATH "Set runtime RPATH" ON) -pass_option(HEMELB HEMELB_USE_SSE3 "Use SSE3 intrinsics" ON) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + set(_default_sse3_flag ON) +else() + set(_default_sse3_flag OFF) +endif() +pass_option(HEMELB HEMELB_USE_SSE3 "Use SSE3 intrinsics" ${_default_sse3_flag}) pass_option(HEMELB HEMELB_USE_VELOCITY_WEIGHTS_FILE "Use Velocity weights file" OFF) + pass_option(HEMELB HEMELB_SEPARATE_CONCERNS "Communicate for each concern separately" OFF) if (HEMELB_BUILD_RBC) diff --git a/Code/lb/kernels/GuoForcingLBGK.h b/Code/lb/kernels/GuoForcingLBGK.h index 757b94271..cb42a7bab 100644 --- a/Code/lb/kernels/GuoForcingLBGK.h +++ b/Code/lb/kernels/GuoForcingLBGK.h @@ -94,7 +94,7 @@ namespace hemelb::lb LatticeType::CalculateForceDistribution(lbmParams->GetTau(), hydroVars.velocity, hydroVars.force, - hydroVars.forceDist.data()); + hydroVars.forceDist); for (Direction dir = 0; dir < LatticeType::NUMVECTORS; ++dir) hydroVars.SetFPostCollision(dir, diff --git a/Code/lb/lattices/Lattice.h b/Code/lb/lattices/Lattice.h index f99cb7894..8c0f0b7af 100644 --- a/Code/lb/lattices/Lattice.h +++ b/Code/lb/lattices/Lattice.h @@ -446,48 +446,22 @@ namespace hemelb::lb inline static void CalculateForceDistribution(const distribn_t &tau, const LatticeVelocity& velocity, const LatticeForceVector& force, - distribn_t forceDist[]) + mut_span forceDist) { - CalculateForceDistribution(tau, - velocity.x(), velocity.y(), velocity.z(), - force.x(), force.y(), force.z(), - forceDist); - } - - /** - * Calculate Force - * @param tau - * @param force_x - * @param force_y - * @param force_z - * @param forceDist - */ - inline static void CalculateForceDistribution(const distribn_t &tau, - const distribn_t &velocity_x, - const distribn_t &velocity_y, - const distribn_t &velocity_z, - const LatticeForce &force_x, - const LatticeForce &force_y, - const LatticeForce &force_z, - distribn_t forceDist[]) - { - auto const invCs2 = 1e0 / Cs2; - auto const invCs4 = invCs2 * invCs2; + auto constexpr invCs2 = 1e0 / Cs2; + auto constexpr invCs4 = invCs2 * invCs2; distribn_t prefactor = (1.0 - (1.0 / (2.0 * tau))); - distribn_t vScalarProductF = velocity_x * force_x + velocity_y * force_y - + velocity_z * force_z; + distribn_t vDotF = Dot(velocity, force); - for (Direction i = 0; i < NUMVECTORS; ++i) - { - distribn_t vScalarProductDirection = velocity_x * CX[i] - + velocity_y * CY[i] + velocity_z * CZ[i]; - distribn_t FScalarProductDirection = force_x * CX[i] + force_y * CY[i] - + force_z * CZ[i]; - forceDist[i] = prefactor * EQMWEIGHTS[i] - * ( invCs2 * (FScalarProductDirection - vScalarProductF) - + invCs4 * (FScalarProductDirection * vScalarProductDirection)); + for (Direction i = 0; i < NUMVECTORS; ++i) { + distribn_t vDotDir = Dot(velocity, CD[i]); + distribn_t fDotDir = Dot(force, CD[i]); + + forceDist[i] = prefactor * EQMWEIGHTS[i] * ( + invCs2 * (fDotDir - vDotF) + invCs4 * (fDotDir * vDotDir) + ); } - } + } #endif // Calculate density, momentum and the equilibrium distribution diff --git a/Code/tests/lb/GuoForcingTests.cc b/Code/tests/lb/GuoForcingTests.cc index 26ac82ba0..7d1fdbdc0 100644 --- a/Code/tests/lb/GuoForcingTests.cc +++ b/Code/tests/lb/GuoForcingTests.cc @@ -166,7 +166,7 @@ namespace hemelb::tests void testForceDistributionBadTau() { distribn_t Fi[LatticeType::NUMVECTORS]; - LatticeType::CalculateForceDistribution(0.5, 1.0, 10.0, 100.0, 1, 1, 1, Fi); + LatticeType::CalculateForceDistribution(0.5, {1.0, 10.0, 100.0}, {1, 1, 1}, Fi); for (size_t i(0); i < LatticeType::NUMVECTORS; ++i) REQUIRE(Fi[i] == apprx(0)); } @@ -178,7 +178,7 @@ namespace hemelb::tests distribn_t Fi[LatticeType::NUMVECTORS]; for (size_t i(0); i < LatticeType::NUMVECTORS; ++i) { LatticeVelocity const ei(LatticeType::CX[i], LatticeType::CY[i], LatticeType::CZ[i]); - LatticeType::CalculateForceDistribution(0.25, ei[0], ei[1], ei[2], ei[0], ei[1], ei[2], Fi); + LatticeType::CalculateForceDistribution(0.25, ei, ei, Fi); distribn_t ei_norm(Dot(ei, ei)); distribn_t const expected = (1. - 0.5 / 0.25) * LatticeType::EQMWEIGHTS[i] * (ei_norm * ei_norm * 9.0); @@ -192,7 +192,7 @@ namespace hemelb::tests distribn_t Fi[LatticeType::NUMVECTORS]; for (size_t i(0); i < LatticeType::NUMVECTORS; ++i) { LatticeVelocity const ei(LatticeType::CX[i], LatticeType::CY[i], LatticeType::CZ[i]); - LatticeType::CalculateForceDistribution(0.25, 0, 0, 0, ei[0], ei[1], ei[2], Fi); + LatticeType::CalculateForceDistribution(0.25, LatticeVelocity::Zero(), ei, Fi); distribn_t ei_norm(Dot(ei, ei)); distribn_t const expected = (1. - 0.5 / 0.25) * LatticeType::EQMWEIGHTS[i] * (ei_norm * 3.0); REQUIRE(Fi[i] == apprx(expected)); @@ -208,7 +208,7 @@ namespace hemelb::tests distribn_t Fi[LatticeType::NUMVECTORS]; for(size_t i = 0; i < N; ++i) { - LatticeType::CalculateForceDistribution(1e0, rdm(e1), rdm(e1), rdm(e1), 0e0, 0e0, 0e0, Fi); + LatticeType::CalculateForceDistribution(1e0, {rdm(e1), rdm(e1), rdm(e1)}, LatticeForceVector::Zero(), Fi); for(size_t j(0); j < LatticeType::NUMVECTORS; ++j) { REQUIRE(Fi[j] == Approx(0).margin(1e-12)); } @@ -226,18 +226,20 @@ namespace hemelb::tests // No velocity for (size_t i = 0; i < N; ++i) { LatticeType::CalculateForceDistribution( - 1e0, 0e0, 0e0, 0e0, rdm(e1), rdm(e1), rdm(e1), Fi); + 1e0, LatticeVelocity::Zero(), + {rdm(e1), rdm(e1), rdm(e1)}, + Fi); auto const moment = std::accumulate(Fi, Fi+LatticeType::NUMVECTORS, 0e0); REQUIRE(moment == apprx(0.0)); } // Force and Velocity are perpendicular for(size_t i = 0; i < N; ++i) { auto const a = rdm(e1), b = rdm(e1); - LatticeType::CalculateForceDistribution(1e0, rdm(e1), a, b, 0, -b, a, Fi); + LatticeType::CalculateForceDistribution(1e0, {rdm(e1), a, b}, {0, -b, a}, Fi); auto const moment0 = std::accumulate(Fi, Fi+LatticeType::NUMVECTORS, 0e0); REQUIRE(moment0 == apprx(0.0)); - LatticeType::CalculateForceDistribution(1e0, 0, a, b, rdm(e1), -b, a, Fi); + LatticeType::CalculateForceDistribution(1e0, {0, a, b}, {rdm(e1), -b, a}, Fi); auto const moment1 = std::accumulate(Fi, Fi+LatticeType::NUMVECTORS, 0e0); REQUIRE(moment1 == apprx(0.0)); } diff --git a/dependencies/Catch2/build.cmake b/dependencies/Catch2/build.cmake index c6180e605..a296016b1 100644 --- a/dependencies/Catch2/build.cmake +++ b/dependencies/Catch2/build.cmake @@ -17,4 +17,5 @@ ExternalProject_Add( dep_Catch2 URL ${CATCH2_TARBALL} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${HEMELB_DEPENDENCIES_INSTALL_PREFIX} -DBUILD_TESTING=OFF - ) + PATCH_COMMAND patch -p1 < ${CMAKE_CURRENT_LIST_DIR}/mac-conditionals-gcc.diff +) diff --git a/dependencies/Catch2/mac-conditionals-gcc.diff b/dependencies/Catch2/mac-conditionals-gcc.diff new file mode 100644 index 000000000..57d68d7c9 --- /dev/null +++ b/dependencies/Catch2/mac-conditionals-gcc.diff @@ -0,0 +1,14 @@ +--- a/single_include/catch2/catch.hpp 2022-04-12 21:38:17 ++++ b/single_include/catch2/catch.hpp 2024-05-03 12:54:23 +@@ -69,6 +69,11 @@ + // See e.g.: + // https://opensource.apple.com/source/CarbonHeaders/CarbonHeaders-18.1/TargetConditionals.h.auto.html + #ifdef __APPLE__ ++# ifndef __clang__ ++# if !__has_builtin(__has_extension) ++# define __has_extension(x) 0 ++# endif ++# endif + # include + # if (defined(TARGET_OS_OSX) && TARGET_OS_OSX == 1) || \ + (defined(TARGET_OS_MAC) && TARGET_OS_MAC == 1)