From ef96bdda1ced57afa8783d520387784165f7ce00 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Fri, 12 Jul 2024 08:33:22 +0200 Subject: [PATCH 1/5] Switch to an internal buffer for the conversion from dates to tsince. --- src/model/sgp4.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/model/sgp4.cpp b/src/model/sgp4.cpp index e6f98b77b..23908d5cc 100644 --- a/src/model/sgp4.cpp +++ b/src/model/sgp4.cpp @@ -511,6 +511,9 @@ struct sgp4_propagator::impl { std::vector m_init_buffer; cfunc m_cf_tprop; std::optional m_dtens; + // NOTE: this is a buffer used to convert dates to tsinces in the call operator + // overloads taking dates in input. + std::vector m_tms_vec; // Serialization. template @@ -520,6 +523,7 @@ struct sgp4_propagator::impl { ar & m_init_buffer; ar & m_cf_tprop; ar & m_dtens; + // NOTE: no need to save the content of m_tms_vec. } }; @@ -634,7 +638,7 @@ sgp4_propagator::sgp4_propagator(ptag, std::tuple, cfunc, c // Build and assign the implementation. m_impl = std::make_unique( - impl{std::move(sat_buffer), std::move(init_buffer), std::move(cf_tprop), std::move(dt)}); + impl{std::move(sat_buffer), std::move(init_buffer), std::move(cf_tprop), std::move(dt), {}}); } template @@ -843,10 +847,7 @@ void sgp4_propagator::operator()(out_2d out, in_1d dates) // We need to convert the dates into time deltas suitable for use in the other call operator overload. // Prepare the memory buffer. - // NOTE: this could also be an internal buffer to be re-used across different - // invocations of the call operators. Like this, we would avoid a memory allocation - // for each call operator invocation. Worth it? - std::vector tms_vec; + auto &tms_vec = m_impl->m_tms_vec; using tms_vec_size_t = decltype(tms_vec.size()); tms_vec.resize(boost::numeric_cast(dates.extent(0))); @@ -987,7 +988,7 @@ void sgp4_propagator::operator()(out_3d out, in_2d dates) // Prepare the memory buffer. const auto n_evals = dates.extent(0); - std::vector tms_vec; + auto &tms_vec = m_impl->m_tms_vec; using tms_vec_size_t = decltype(tms_vec.size()); tms_vec.resize(boost::safe_numerics::safe(n_evals) * dates.extent(1)); From 2e1b898d4930a5be969fe1f6018abcb9aaccdb17 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Tue, 16 Jul 2024 11:58:09 +0200 Subject: [PATCH 2/5] Internal doc bit. --- include/heyoka/model/sgp4.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/heyoka/model/sgp4.hpp b/include/heyoka/model/sgp4.hpp index 7f8bbde58..da3510351 100644 --- a/include/heyoka/model/sgp4.hpp +++ b/include/heyoka/model/sgp4.hpp @@ -183,6 +183,11 @@ class HEYOKA_DLL_PUBLIC_INLINE_CLASS sgp4_propagator using out_3d = mdspan>; // NOTE: it is important to document properly the non-overlapping // memory requirement for the input arguments. + // NOTE: because of the use of an internal buffer to convert + // dates to tsinces, the date overloads of these operators are + // never thread-safe. This needs to be documented properly. + // Perhaps this can be fixed one day if we implement the conversion + // to dates on-the-fly via double-length primitives. void operator()(out_2d, in_1d); void operator()(out_2d, in_1d); void operator()(out_3d, in_2d); From 7f8e3ccfda1cb3f3e4da826060b6ed2ceb73e0b5 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Tue, 16 Jul 2024 12:24:44 +0200 Subject: [PATCH 3/5] Be more precise about checking for fast math flags when deciding whether or not to use the fast vectorised implementations. --- src/detail/llvm_helpers.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/detail/llvm_helpers.cpp b/src/detail/llvm_helpers.cpp index 33a7cecd7..f289480f2 100644 --- a/src/detail/llvm_helpers.cpp +++ b/src/detail/llvm_helpers.cpp @@ -290,8 +290,8 @@ llvm::CallInst *llvm_add_vfabi_attrs(llvm_state &s, llvm::CallInst *call, const auto &context = s.context(); auto &builder = s.builder(); - // Are we in fast math mode? - const auto use_fast_math = builder.getFastMathFlags().isFast(); + // Can we use the faster but less precise vectorised implementations? + const auto use_fast_math = builder.getFastMathFlags().approxFunc(); if (!vfi.empty()) { // There exist vector variants of the scalar function. @@ -546,8 +546,8 @@ llvm::Value *llvm_math_intr(llvm_state &s, const std::string &intr_name, auto &builder = s.builder(); - // Are we in fast math mode? - const auto use_fast_math = builder.getFastMathFlags().isFast(); + // Can we use the faster but less precise vectorised implementations? + const auto use_fast_math = builder.getFastMathFlags().approxFunc(); if (llvm_stype_can_use_math_intrinsics(s, scal_t)) { // We can use the LLVM intrinsics for the given scalar type. @@ -692,8 +692,8 @@ llvm::Value *llvm_math_cmath(llvm_state &s, const std::string &base_name, Args * auto &builder = s.builder(); - // Are we in fast math mode? - const auto use_fast_math = builder.getFastMathFlags().isFast(); + // Can we use the faster but less precise vectorised implementations? + const auto use_fast_math = builder.getFastMathFlags().approxFunc(); // Determine the type and scalar type of the arguments. auto *x_t = arg_types[0]; @@ -2720,7 +2720,7 @@ namespace class fmf_disabler { ir_builder *m_builder; - llvm::FastMathFlags m_orig_fmf; + const llvm::FastMathFlags m_orig_fmf; public: explicit fmf_disabler(ir_builder &b) : m_builder(&b), m_orig_fmf(m_builder->getFastMathFlags()) From 704ac5113694c3f3ccbe5b51a3e55d1451ea4587 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Tue, 16 Jul 2024 13:47:33 +0200 Subject: [PATCH 4/5] Reduce the number of max iters in the Kepler solvers, and don't log in case of failure. In case of failure, just return NaN. --- src/detail/llvm_helpers_celmec.cpp | 45 ++---------------------------- 1 file changed, 3 insertions(+), 42 deletions(-) diff --git a/src/detail/llvm_helpers_celmec.cpp b/src/detail/llvm_helpers_celmec.cpp index 3fcb5deeb..f411a2736 100644 --- a/src/detail/llvm_helpers_celmec.cpp +++ b/src/detail/llvm_helpers_celmec.cpp @@ -53,9 +53,7 @@ #include #include -#include #include -#include #include #include @@ -481,7 +479,7 @@ llvm::Function *llvm_add_inv_kep_E(llvm_state &s, llvm::Type *fp_t, std::uint32_ // number of iterations has been exceeded). // NOTE: hard-code max_iter for the time being. It would probably // make sense to make it dependent on the epsilon of fp_t though? - auto *max_iter = builder.getInt32(50); + auto *max_iter = builder.getInt32(20); auto loop_cond = [&]() -> llvm::Value * { // NOTE: we use an *absolute* tolerance of 4*eps for both the check // on the magnitude of f(E) and on the magnitude of the bounding @@ -598,10 +596,6 @@ llvm::Function *llvm_add_inv_kep_E(llvm_state &s, llvm::Type *fp_t, std::uint32_ auto *new_val = builder.CreateSelect( tol_check, llvm_constantfp(s, tp, std::numeric_limits::quiet_NaN()), old_val); builder.CreateStore(new_val, retval); - - llvm_invoke_external(s, "heyoka_inv_kep_E_max_iter", builder.getVoidTy(), {}, - llvm::AttributeList::get(context, llvm::AttributeList::FunctionIndex, - {llvm::Attribute::NoUnwind, llvm::Attribute::WillReturn})); }, []() {}); @@ -869,7 +863,7 @@ llvm::Function *llvm_add_inv_kep_F(llvm_state &s, llvm::Type *fp_t, std::uint32_ // number of iterations has been exceeded). // NOTE: hard-code max_iter for the time being. It would probably // make sense to make it dependent on the epsilon of fp_t though? - auto *max_iter = builder.getInt32(50); + auto *max_iter = builder.getInt32(20); auto loop_cond = [&]() -> llvm::Value * { // NOTE: we use an *absolute* tolerance of 4*eps for both the check // on the magnitude of f(F) and on the magnitude of the bounding @@ -980,10 +974,6 @@ llvm::Function *llvm_add_inv_kep_F(llvm_state &s, llvm::Type *fp_t, std::uint32_ auto *new_val = builder.CreateSelect( tol_check, llvm_constantfp(s, tp, std::numeric_limits::quiet_NaN()), old_val); builder.CreateStore(new_val, retval); - - llvm_invoke_external(s, "heyoka_inv_kep_F_max_iter", builder.getVoidTy(), {}, - llvm::AttributeList::get(context, llvm::AttributeList::FunctionIndex, - {llvm::Attribute::NoUnwind, llvm::Attribute::WillReturn})); }, []() {}); @@ -1184,7 +1174,7 @@ llvm::Function *llvm_add_inv_kep_DE(llvm_state &s, llvm::Type *fp_t, std::uint32 // number of iterations has been exceeded). // NOTE: hard-code max_iter for the time being. It would probably // make sense to make it dependent on the epsilon of fp_t though? - auto *max_iter = builder.getInt32(50); + auto *max_iter = builder.getInt32(20); auto loop_cond = [&]() -> llvm::Value * { // NOTE: we use an *absolute* tolerance of 4*eps for both the check // on the magnitude of f(DE) and on the magnitude of the bounding @@ -1295,10 +1285,6 @@ llvm::Function *llvm_add_inv_kep_DE(llvm_state &s, llvm::Type *fp_t, std::uint32 auto *new_val = builder.CreateSelect( tol_check, llvm_constantfp(s, tp, std::numeric_limits::quiet_NaN()), old_val); builder.CreateStore(new_val, retval); - - llvm_invoke_external(s, "heyoka_inv_kep_DE_max_iter", builder.getVoidTy(), {}, - llvm::AttributeList::get(context, llvm::AttributeList::FunctionIndex, - {llvm::Attribute::NoUnwind, llvm::Attribute::WillReturn})); }, []() {}); @@ -1333,28 +1319,3 @@ llvm::Function *llvm_add_inv_kep_DE(llvm_state &s, llvm::Type *fp_t, std::uint32 } // namespace detail HEYOKA_END_NAMESPACE - -// LCOV_EXCL_START - -// NOTE: these functions will be called when numerical root finding exceeds the maximum number -// of allowed iterations. -extern "C" { - -HEYOKA_DLL_PUBLIC void heyoka_inv_kep_E_max_iter() noexcept -{ - heyoka::detail::get_logger()->warn("iteration limit exceeded while solving the elliptic inverse Kepler equation"); -} - -HEYOKA_DLL_PUBLIC void heyoka_inv_kep_F_max_iter() noexcept -{ - heyoka::detail::get_logger()->warn( - "iteration limit exceeded while solving the inverse Kepler equation for the eccentric longitude F"); -} - -HEYOKA_DLL_PUBLIC void heyoka_inv_kep_DE_max_iter() noexcept -{ - heyoka::detail::get_logger()->warn("iteration limit exceeded while solving the inverse Kepler equation for DE"); -} -} - -// LCOV_EXCL_STOP From 35d349365eb07a757387a5cd80481e69a7cc5fe7 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Tue, 16 Jul 2024 13:49:34 +0200 Subject: [PATCH 5/5] Update changelog. --- doc/changelog.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/changelog.rst b/doc/changelog.rst index 2e5658d79..af5afb90f 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -14,6 +14,13 @@ New - Add relational and logical operators to the expression system (`#432 `__). +Changes +~~~~~~~ + +- Reduce the maximum number of iterations in the Kepler solvers + and do not log if the iteration limit is reached + (`#434 `__). + Fix ~~~