bluescarni · bluescarni · Jul 16, 2024 · Jul 12, 2024 · Jul 16, 2024 · Jul 16, 2024
diff --git a/doc/changelog.rst b/doc/changelog.rst
@@ -14,6 +14,13 @@ New
 - Add relational and logical operators to the expression system
   (`#432 <https://github.com/bluescarni/heyoka/pull/432>`__).
 
+Changes
+~~~~~~~
+
+- Reduce the maximum number of iterations in the Kepler solvers
+  and do not log if the iteration limit is reached
+  (`#434 <https://github.com/bluescarni/heyoka/pull/434>`__).
+
 Fix
 ~~~
 

diff --git a/include/heyoka/model/sgp4.hpp b/include/heyoka/model/sgp4.hpp
@@ -183,6 +183,11 @@ class HEYOKA_DLL_PUBLIC_INLINE_CLASS sgp4_propagator
     using out_3d = mdspan<T, dextents<std::size_t, 3>>;
     // NOTE: it is important to document properly the non-overlapping
     // memory requirement for the input arguments.
+    // NOTE: because of the use of an internal buffer to convert
+    // dates to tsinces, the date overloads of these operators are
+    // never thread-safe. This needs to be documented properly.
+    // Perhaps this can be fixed one day if we implement the conversion
+    // to dates on-the-fly via double-length primitives.
     void operator()(out_2d, in_1d<T>);
     void operator()(out_2d, in_1d<date>);
     void operator()(out_3d, in_2d<T>);

diff --git a/src/detail/llvm_helpers.cpp b/src/detail/llvm_helpers.cpp
@@ -290,8 +290,8 @@ llvm::CallInst *llvm_add_vfabi_attrs(llvm_state &s, llvm::CallInst *call, const
     auto &context = s.context();
     auto &builder = s.builder();
 
-    // Are we in fast math mode?
-    const auto use_fast_math = builder.getFastMathFlags().isFast();
+    // Can we use the faster but less precise vectorised implementations?
+    const auto use_fast_math = builder.getFastMathFlags().approxFunc();
 
     if (!vfi.empty()) {
         // There exist vector variants of the scalar function.
@@ -546,8 +546,8 @@ llvm::Value *llvm_math_intr(llvm_state &s, const std::string &intr_name,
 
     auto &builder = s.builder();
 
-    // Are we in fast math mode?
-    const auto use_fast_math = builder.getFastMathFlags().isFast();
+    // Can we use the faster but less precise vectorised implementations?
+    const auto use_fast_math = builder.getFastMathFlags().approxFunc();
 
     if (llvm_stype_can_use_math_intrinsics(s, scal_t)) {
         // We can use the LLVM intrinsics for the given scalar type.
@@ -692,8 +692,8 @@ llvm::Value *llvm_math_cmath(llvm_state &s, const std::string &base_name, Args *
 
     auto &builder = s.builder();
 
-    // Are we in fast math mode?
-    const auto use_fast_math = builder.getFastMathFlags().isFast();
+    // Can we use the faster but less precise vectorised implementations?
+    const auto use_fast_math = builder.getFastMathFlags().approxFunc();
 
     // Determine the type and scalar type of the arguments.
     auto *x_t = arg_types[0];
@@ -2720,7 +2720,7 @@ namespace
 class fmf_disabler
 {
     ir_builder *m_builder;
-    llvm::FastMathFlags m_orig_fmf;
+    const llvm::FastMathFlags m_orig_fmf;
 
 public:
     explicit fmf_disabler(ir_builder &b) : m_builder(&b), m_orig_fmf(m_builder->getFastMathFlags())

diff --git a/src/detail/llvm_helpers_celmec.cpp b/src/detail/llvm_helpers_celmec.cpp
@@ -53,9 +53,7 @@
 
 #include <heyoka/detail/llvm_func_create.hpp>
 #include <heyoka/detail/llvm_helpers.hpp>
-#include <heyoka/detail/logging_impl.hpp>
 #include <heyoka/detail/type_traits.hpp>
-#include <heyoka/detail/visibility.hpp>
 #include <heyoka/llvm_state.hpp>
 #include <heyoka/number.hpp>
 
@@ -481,7 +479,7 @@ llvm::Function *llvm_add_inv_kep_E(llvm_state &s, llvm::Type *fp_t, std::uint32_
     // number of iterations has been exceeded).
     // NOTE: hard-code max_iter for the time being. It would probably
     // make sense to make it dependent on the epsilon of fp_t though?
-    auto *max_iter = builder.getInt32(50);
+    auto *max_iter = builder.getInt32(20);
     auto loop_cond = [&]() -> llvm::Value * {
         // NOTE: we use an *absolute* tolerance of 4*eps for both the check
         // on the magnitude of f(E) and on the magnitude of the bounding
@@ -598,10 +596,6 @@ llvm::Function *llvm_add_inv_kep_E(llvm_state &s, llvm::Type *fp_t, std::uint32_
             auto *new_val = builder.CreateSelect(
                 tol_check, llvm_constantfp(s, tp, std::numeric_limits<double>::quiet_NaN()), old_val);
             builder.CreateStore(new_val, retval);
-
-            llvm_invoke_external(s, "heyoka_inv_kep_E_max_iter", builder.getVoidTy(), {},
-                                 llvm::AttributeList::get(context, llvm::AttributeList::FunctionIndex,
-                                                          {llvm::Attribute::NoUnwind, llvm::Attribute::WillReturn}));
         },
         []() {});
 
@@ -869,7 +863,7 @@ llvm::Function *llvm_add_inv_kep_F(llvm_state &s, llvm::Type *fp_t, std::uint32_
     // number of iterations has been exceeded).
     // NOTE: hard-code max_iter for the time being. It would probably
     // make sense to make it dependent on the epsilon of fp_t though?
-    auto *max_iter = builder.getInt32(50);
+    auto *max_iter = builder.getInt32(20);
     auto loop_cond = [&]() -> llvm::Value * {
         // NOTE: we use an *absolute* tolerance of 4*eps for both the check
         // on the magnitude of f(F) and on the magnitude of the bounding
@@ -980,10 +974,6 @@ llvm::Function *llvm_add_inv_kep_F(llvm_state &s, llvm::Type *fp_t, std::uint32_
             auto *new_val = builder.CreateSelect(
                 tol_check, llvm_constantfp(s, tp, std::numeric_limits<double>::quiet_NaN()), old_val);
             builder.CreateStore(new_val, retval);
-
-            llvm_invoke_external(s, "heyoka_inv_kep_F_max_iter", builder.getVoidTy(), {},
-                                 llvm::AttributeList::get(context, llvm::AttributeList::FunctionIndex,
-                                                          {llvm::Attribute::NoUnwind, llvm::Attribute::WillReturn}));
         },
         []() {});
 
@@ -1184,7 +1174,7 @@ llvm::Function *llvm_add_inv_kep_DE(llvm_state &s, llvm::Type *fp_t, std::uint32
     // number of iterations has been exceeded).
     // NOTE: hard-code max_iter for the time being. It would probably
     // make sense to make it dependent on the epsilon of fp_t though?
-    auto *max_iter = builder.getInt32(50);
+    auto *max_iter = builder.getInt32(20);
     auto loop_cond = [&]() -> llvm::Value * {
         // NOTE: we use an *absolute* tolerance of 4*eps for both the check
         // on the magnitude of f(DE) and on the magnitude of the bounding
@@ -1295,10 +1285,6 @@ llvm::Function *llvm_add_inv_kep_DE(llvm_state &s, llvm::Type *fp_t, std::uint32
             auto *new_val = builder.CreateSelect(
                 tol_check, llvm_constantfp(s, tp, std::numeric_limits<double>::quiet_NaN()), old_val);
             builder.CreateStore(new_val, retval);
-
-            llvm_invoke_external(s, "heyoka_inv_kep_DE_max_iter", builder.getVoidTy(), {},
-                                 llvm::AttributeList::get(context, llvm::AttributeList::FunctionIndex,
-                                                          {llvm::Attribute::NoUnwind, llvm::Attribute::WillReturn}));
         },
         []() {});
 
@@ -1333,28 +1319,3 @@ llvm::Function *llvm_add_inv_kep_DE(llvm_state &s, llvm::Type *fp_t, std::uint32
 } // namespace detail
 
 HEYOKA_END_NAMESPACE
-
-// LCOV_EXCL_START
-
-// NOTE: these functions will be called when numerical root finding exceeds the maximum number
-// of allowed iterations.
-extern "C" {
-
-HEYOKA_DLL_PUBLIC void heyoka_inv_kep_E_max_iter() noexcept
-{
-    heyoka::detail::get_logger()->warn("iteration limit exceeded while solving the elliptic inverse Kepler equation");
-}
-
-HEYOKA_DLL_PUBLIC void heyoka_inv_kep_F_max_iter() noexcept
-{
-    heyoka::detail::get_logger()->warn(
-        "iteration limit exceeded while solving the inverse Kepler equation for the eccentric longitude F");
-}
-
-HEYOKA_DLL_PUBLIC void heyoka_inv_kep_DE_max_iter() noexcept
-{
-    heyoka::detail::get_logger()->warn("iteration limit exceeded while solving the inverse Kepler equation for DE");
-}
-}
-
-// LCOV_EXCL_STOP
diff --git a/src/model/sgp4.cpp b/src/model/sgp4.cpp
@@ -511,6 +511,9 @@ struct sgp4_propagator<T>::impl {
     std::vector<T> m_init_buffer;
     cfunc<T> m_cf_tprop;
     std::optional<dtens> m_dtens;
+    // NOTE: this is a buffer used to convert dates to tsinces in the call operator
+    // overloads taking dates in input.
+    std::vector<T> m_tms_vec;
 
     // Serialization.
     template <typename Archive>
@@ -520,6 +523,7 @@ struct sgp4_propagator<T>::impl {
         ar & m_init_buffer;
         ar & m_cf_tprop;
         ar & m_dtens;
+        // NOTE: no need to save the content of m_tms_vec.
     }
 };
 
@@ -634,7 +638,7 @@ sgp4_propagator<T>::sgp4_propagator(ptag, std::tuple<std::vector<T>, cfunc<T>, c
 
     // Build and assign the implementation.
     m_impl = std::make_unique<impl>(
-        impl{std::move(sat_buffer), std::move(init_buffer), std::move(cf_tprop), std::move(dt)});
+        impl{std::move(sat_buffer), std::move(init_buffer), std::move(cf_tprop), std::move(dt), {}});
 }
 
 template <typename T>
@@ -843,10 +847,7 @@ void sgp4_propagator<T>::operator()(out_2d out, in_1d<date> dates)
     // We need to convert the dates into time deltas suitable for use in the other call operator overload.
 
     // Prepare the memory buffer.
-    // NOTE: this could also be an internal buffer to be re-used across different
-    // invocations of the call operators. Like this, we would avoid a memory allocation
-    // for each call operator invocation. Worth it?
-    std::vector<T> tms_vec;
+    auto &tms_vec = m_impl->m_tms_vec;
     using tms_vec_size_t = decltype(tms_vec.size());
     tms_vec.resize(boost::numeric_cast<tms_vec_size_t>(dates.extent(0)));
 
@@ -987,7 +988,7 @@ void sgp4_propagator<T>::operator()(out_3d out, in_2d<date> dates)
 
     // Prepare the memory buffer.
     const auto n_evals = dates.extent(0);
-    std::vector<T> tms_vec;
+    auto &tms_vec = m_impl->m_tms_vec;
     using tms_vec_size_t = decltype(tms_vec.size());
     tms_vec.resize(boost::safe_numerics::safe<tms_vec_size_t>(n_evals) * dates.extent(1));