From 76750537893a7a3e1dcf0130f31f361eb3f98bcb Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sat, 14 Sep 2024 15:47:44 +0200 Subject: [PATCH 1/6] A few initial simplifications. --- src/math/pow.cpp | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/math/pow.cpp b/src/math/pow.cpp index 399f62aef..4478e01d4 100644 --- a/src/math/pow.cpp +++ b/src/math/pow.cpp @@ -496,9 +496,14 @@ llvm::Value *taylor_diff_pow_impl(llvm_state &s, llvm::Type *fp_t, const pow_imp // Fetch the pow eval algo. const auto pea = get_pow_eval_algo(f); + // Codegen the exponent. + auto *expo = taylor_codegen_numparam(s, fp_t, num, par_ptr, batch_size); + + // Fetch the internal vector type. + auto *vec_t = make_vector_type(fp_t, batch_size); + if (order == 0u) { - return pea.eval_f( - s, {taylor_fetch_diff(arr, u_idx, 0, n_uvars), taylor_codegen_numparam(s, fp_t, num, par_ptr, batch_size)}); + return pea.eval_f(s, {taylor_fetch_diff(arr, u_idx, 0, n_uvars), expo}); } // Special case for sqrt(). @@ -514,7 +519,6 @@ llvm::Value *taylor_diff_pow_impl(llvm_state &s, llvm::Type *fp_t, const pow_imp } // The general case. - auto &builder = s.builder(); // NOTE: iteration in the [0, order) range // (i.e., order *not* included). @@ -526,21 +530,17 @@ llvm::Value *taylor_diff_pow_impl(llvm_state &s, llvm::Type *fp_t, const pow_imp // Compute the scalar factor: order * num - j * (num + 1). auto scal_f = [&]() -> llvm::Value * { if constexpr (std::is_same_v) { - return vector_splat( - builder, - llvm_codegen(s, fp_t, - number_like(s, fp_t, static_cast(order)) * num - - number_like(s, fp_t, static_cast(j)) * (num + number_like(s, fp_t, 1.))), - batch_size); + return llvm_codegen(s, vec_t, + number_like(s, fp_t, static_cast(order)) * num + - number_like(s, fp_t, static_cast(j)) + * (num + number_like(s, fp_t, 1.))); } else { - auto pc = taylor_codegen_numparam(s, fp_t, num, par_ptr, batch_size); - auto *jvec = vector_splat(builder, llvm_codegen(s, fp_t, number(static_cast(j))), batch_size); - auto *ordvec - = vector_splat(builder, llvm_codegen(s, fp_t, number(static_cast(order))), batch_size); - auto *onevec = vector_splat(builder, llvm_codegen(s, fp_t, number(1.)), batch_size); + auto *jvec = llvm_codegen(s, vec_t, number(static_cast(j))); + auto *ordvec = llvm_codegen(s, vec_t, number(static_cast(order))); + auto *onevec = llvm_codegen(s, vec_t, number(1.)); - auto tmp1 = llvm_fmul(s, ordvec, pc); - auto tmp2 = llvm_fmul(s, jvec, llvm_fadd(s, pc, onevec)); + auto tmp1 = llvm_fmul(s, ordvec, expo); + auto tmp2 = llvm_fmul(s, jvec, llvm_fadd(s, expo, onevec)); return llvm_fsub(s, tmp1, tmp2); } @@ -554,12 +554,14 @@ llvm::Value *taylor_diff_pow_impl(llvm_state &s, llvm::Type *fp_t, const pow_imp auto *ret_acc = pairwise_sum(s, sum); // Compute the final divisor: order * (zero-th derivative of u_idx). - auto *ord_f = vector_splat(builder, llvm_codegen(s, fp_t, number(static_cast(order))), batch_size); + auto *ord_f = llvm_codegen(s, vec_t, number(static_cast(order))); auto *b0 = taylor_fetch_diff(arr, u_idx, 0, n_uvars); auto *div = llvm_fmul(s, ord_f, b0); // Compute and return the result: ret_acc / div. - return llvm_fdiv(s, ret_acc, div); + auto *ret = llvm_fdiv(s, ret_acc, div); + + return ret; } // All the other cases. From 907ba11c7c0102b0288b337141428be26a2ac709 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sun, 15 Sep 2024 10:19:35 +0200 Subject: [PATCH 2/6] Another minor bit. --- src/math/pow.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/math/pow.cpp b/src/math/pow.cpp index 4478e01d4..384377c9c 100644 --- a/src/math/pow.cpp +++ b/src/math/pow.cpp @@ -539,8 +539,8 @@ llvm::Value *taylor_diff_pow_impl(llvm_state &s, llvm::Type *fp_t, const pow_imp auto *ordvec = llvm_codegen(s, vec_t, number(static_cast(order))); auto *onevec = llvm_codegen(s, vec_t, number(1.)); - auto tmp1 = llvm_fmul(s, ordvec, expo); - auto tmp2 = llvm_fmul(s, jvec, llvm_fadd(s, expo, onevec)); + auto *tmp1 = llvm_fmul(s, ordvec, expo); + auto *tmp2 = llvm_fmul(s, jvec, llvm_fadd(s, expo, onevec)); return llvm_fsub(s, tmp1, tmp2); } From c988736b3986a8b55f5d05ec89d7de7c42738335 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sun, 15 Sep 2024 15:37:10 +0200 Subject: [PATCH 3/6] Add a preprocessing step before the Taylor decomposition to transform powers with non-numerical exponent into combinations of exps and logs. This allows us to avoid having to implement the Taylor diff of pow() for non-numerical exponents. --- src/math/pow.cpp | 8 +++++ src/taylor_01.cpp | 78 +++++++++++++++++++++++++++++++++++++++++++++ test/taylor_pow.cpp | 68 ++++++++++++++++++--------------------- 3 files changed, 117 insertions(+), 37 deletions(-) diff --git a/src/math/pow.cpp b/src/math/pow.cpp index 384377c9c..c71e98ac2 100644 --- a/src/math/pow.cpp +++ b/src/math/pow.cpp @@ -564,6 +564,8 @@ llvm::Value *taylor_diff_pow_impl(llvm_state &s, llvm::Type *fp_t, const pow_imp return ret; } +// LCOV_EXCL_START + // All the other cases. template , is_num_param>, int> = 0> llvm::Value *taylor_diff_pow_impl(llvm_state &, llvm::Type *, const pow_impl &, const U1 &, const U2 &, @@ -574,6 +576,8 @@ llvm::Value *taylor_diff_pow_impl(llvm_state &, llvm::Type *, const pow_impl &, "An invalid argument type was encountered while trying to build the Taylor derivative of a pow()"); } +// LCOV_EXCL_STOP + llvm::Value *taylor_diff_pow(llvm_state &s, llvm::Type *fp_t, const pow_impl &f, const std::vector &deps, const std::vector &arr, llvm::Value *par_ptr, std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx, std::uint32_t batch_size) @@ -971,6 +975,8 @@ llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &s, llvm::Type *fp_t, con return f; } +// LCOV_EXCL_START + // All the other cases. template , is_num_param>, int> = 0> llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &, llvm::Type *, const pow_impl &, const U1 &, const U2 &, @@ -980,6 +986,8 @@ llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &, llvm::Type *, const po "of a pow() in compact mode"); } +// LCOV_EXCL_STOP + llvm::Function *taylor_c_diff_func_pow(llvm_state &s, llvm::Type *fp_t, const pow_impl &fn, std::uint32_t n_uvars, std::uint32_t batch_size) { diff --git a/src/taylor_01.cpp b/src/taylor_01.cpp index 679a4a33c..5626b9205 100644 --- a/src/taylor_01.cpp +++ b/src/taylor_01.cpp @@ -8,12 +8,14 @@ #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -46,6 +48,7 @@ #include #include +#include #include #include #include @@ -54,7 +57,11 @@ #include #include #include +#include #include +#include +#include +#include #include #include #include @@ -769,6 +776,74 @@ void taylor_decompose_replace_numbers(taylor_dc_t &dc, std::vector:: } } +// NOLINTNEXTLINE(misc-no-recursion) +expression pow_to_explog(funcptr_map &func_map, const expression &ex) +{ + return std::visit( + // NOLINTNEXTLINE(misc-no-recursion) + [&](const T &v) { + if constexpr (std::same_as) { + const auto *f_id = v.get_ptr(); + + // Check if we already performed the transformation on ex. + if (const auto it = func_map.find(f_id); it != func_map.end()) { + return it->second; + } + + // Perform the transformation on the function arguments. + std::vector new_args; + new_args.reserve(v.args().size()); + for (const auto &orig_arg : v.args()) { + new_args.push_back(pow_to_explog(func_map, orig_arg)); + } + + // Prepare the return value. + std::optional retval; + + if (v.template extract() != nullptr + && !std::holds_alternative(new_args[1].value())) { + // The function is a pow() and the exponent is not a number: transform x**y -> exp(y*log(x)). + // + // NOTE: do not call directly log(new_args[0]) in order to avoid constant folding when the base + // is a number. For instance, if we have pow(2_dbl, par[0]), then we would end up computing + // log(2) in double precision. This would result in an inaccurate result if the fp type + // or precision in use during integration is higher than double. + // NOTE: because the exponent is not a number, no other constant folding should take + // place here. + retval.emplace(exp(new_args[1] * expression{func{detail::log_impl(new_args[0])}})); + } else { + // Create a copy of v with the new arguments. + retval.emplace(v.copy(std::move(new_args))); + } + + // Put the return value into the cache. + [[maybe_unused]] const auto [_, flag] = func_map.emplace(f_id, *retval); + // NOTE: an expression cannot contain itself. + assert(flag); // LCOV_EXCL_LINE + + return std::move(*retval); + } else { + return ex; + } + }, + ex.value()); +} + +// Helper to transform x**y -> exp(y*log(x)), if y is not a number. +std::vector pow_to_explog(const std::vector &v_ex) +{ + funcptr_map func_map; + + std::vector retval; + retval.reserve(v_ex.size()); + + for (const auto &e : v_ex) { + retval.push_back(pow_to_explog(func_map, e)); + } + + return retval; +} + } // namespace } // namespace detail @@ -798,6 +873,9 @@ taylor_decompose_sys(const std::vector> &sys_, std::ranges::transform(sys_, std::back_inserter(all_ex), &std::pair::second); all_ex.insert(all_ex.end(), sv_funcs_.begin(), sv_funcs_.end()); + // Transform x**y -> exp(y*log(x)), if y is not a number. + all_ex = detail::pow_to_explog(all_ex); + // Transform sums into subs. all_ex = detail::sum_to_sub(all_ex); diff --git a/test/taylor_pow.cpp b/test/taylor_pow.cpp index be2e6f00b..17e1632ff 100644 --- a/test/taylor_pow.cpp +++ b/test/taylor_pow.cpp @@ -26,6 +26,7 @@ #endif #include +#include #include #include #include @@ -97,7 +98,9 @@ TEST_CASE("taylor pow approx") kw::opt_level = 0, kw::compact_mode = true}; - REQUIRE(ir_contains(ta, "taylor_c_diff.pow.")); + REQUIRE(!ir_contains(ta, "taylor_c_diff.pow.")); + REQUIRE(ir_contains(ta, "taylor_c_diff.exp.")); + REQUIRE(ir_contains(ta, "taylor_c_diff.log.")); } { @@ -167,9 +170,7 @@ TEST_CASE("taylor pow") kw::opt_level = opt_level, kw::pars = {fp_t{1} / fp_t{3}}}; - if (opt_level == 0u && compact_mode) { - REQUIRE(ir_contains(ta, "@heyoka.taylor_c_diff.pow.num_par")); - } + REQUIRE(!ir_contains(ta, "@heyoka.taylor_c_diff.pow.num_par")); ta.step(true); @@ -705,39 +706,6 @@ TEST_CASE("taylor pow") compare_batch_scalar({prime(x) = pow(y, expression{number{fp_t{3}}} / expression{number{fp_t{2}}}), prime(y) = pow(x, expression{number{fp_t{-1}}} / expression{number{fp_t{3}}})}, opt_level, high_accuracy, compact_mode, rng, .1f, 20.f); - - // Failure modes for non-implemented cases. - { - REQUIRE_THROWS_MATCHES((taylor_adaptive_batch{{prime(x) = pow(1_dbl, x)}, - {fp_t{2}, fp_t{2}, fp_t{3}}, - 3, - kw::tol = .1, - kw::high_accuracy = high_accuracy, - kw::compact_mode = compact_mode, - kw::opt_level = opt_level}), - std::invalid_argument, - Message(compact_mode - ? "An invalid argument type was encountered while trying to build the " - "Taylor derivative of a pow() in compact mode" - : "An invalid argument type was encountered while trying to build the " - "Taylor derivative of a pow()")); - } - - { - REQUIRE_THROWS_MATCHES((taylor_adaptive_batch{{prime(y) = pow(y, x), prime(x) = x + y}, - {fp_t{2}, fp_t{2}, fp_t{3}, fp_t{2}, fp_t{2}, fp_t{3}}, - 3, - kw::tol = .1, - kw::high_accuracy = high_accuracy, - kw::compact_mode = compact_mode, - kw::opt_level = opt_level}), - std::invalid_argument, - Message(compact_mode - ? "An invalid argument type was encountered while trying to build the " - "Taylor derivative of a pow() in compact mode" - : "An invalid argument type was encountered while trying to build the " - "Taylor derivative of a pow()")); - } }; for (auto cm : {false, true}) { @@ -749,3 +717,29 @@ TEST_CASE("taylor pow") } } } + +// Small test for the preprocessing that turns pow into exp+log. +TEST_CASE("pow_to_explog") +{ + auto [x, y] = make_vars("x", "y"); + + auto tmp1 = x + pow(y, par[0]); + auto tmp2 = pow(x, tmp1); + auto tmp3 = pow(tmp1, y); + + auto ta = taylor_adaptive{{prime(x) = (tmp1 * tmp2) / tmp3, prime(y) = tmp1}, {}, kw::tol = 1e-1}; + + REQUIRE(ta.get_decomposition().size() == 16u); + + // Count the number of exps and logs. + auto n_exp = 0, n_log = 0; + for (const auto &[ex, _] : ta.get_decomposition()) { + if (const auto *fptr = std::get_if(&ex.value())) { + n_exp += (fptr->extract() != nullptr); + n_log += (fptr->extract() != nullptr); + } + } + + REQUIRE(n_exp == 3); + REQUIRE(n_log == 3); +} From 80094064eecbaf560f8df5ead762d574065f3280 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sun, 15 Sep 2024 17:10:25 +0200 Subject: [PATCH 4/6] Tentative ppc test fix. --- test/llvm_helpers.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/llvm_helpers.cpp b/test/llvm_helpers.cpp index ee7f06904..96e9bb393 100644 --- a/test/llvm_helpers.cpp +++ b/test/llvm_helpers.cpp @@ -3397,6 +3397,20 @@ TEST_CASE("is_finite scalar mp") #endif +// NOTE: is_natural() appears not to be working on ppc, giving the following error: +// +// LLVM ERROR: Cannot select: 0x63b95812620: v1i128 = bitcast 0x63b957f9700 +// 0x63b957f9700: f128,ch = load<(load (s128) from %ir.2 + 16, align 1)> 0x63b95acca30, 0x63b957f9690, undef:i64 +// 0x63b957f9690: i64 = add nuw 0x63b957f9380, Constant:i64<16> +// 0x63b957f9380: i64,ch = CopyFromReg 0x63b95acca30, Register:i64 %1 +// 0x63b957f9310: i64 = Register %1 +// 0x63b957f9620: i64 = Constant<16> +// 0x63b957f9460: i64 = undef +// In function: hey_is_natural +// +// This seems like an instruction selection problem specific to the ppc backend. +#if !defined(HEYOKA_ARCH_PPC) + TEST_CASE("is_natural scalar") { using detail::llvm_is_natural; @@ -3578,3 +3592,5 @@ TEST_CASE("is_natural scalar mp") } #endif + +#endif From ef1383c4d9aca2020bff6f5e37bc0b8bda3deaea Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sun, 15 Sep 2024 17:10:48 +0200 Subject: [PATCH 5/6] Test additions, coverage fixes, simplifications. --- src/math/pow.cpp | 25 ++++++++-------------- test/taylor_pow.cpp | 51 ++++++++++++++++++++++++++++++++------------- 2 files changed, 45 insertions(+), 31 deletions(-) diff --git a/src/math/pow.cpp b/src/math/pow.cpp index c71e98ac2..7a7d7fa62 100644 --- a/src/math/pow.cpp +++ b/src/math/pow.cpp @@ -528,23 +528,14 @@ llvm::Value *taylor_diff_pow_impl(llvm_state &s, llvm::Type *fp_t, const pow_imp auto *v1 = taylor_fetch_diff(arr, idx, j, n_uvars); // Compute the scalar factor: order * num - j * (num + 1). - auto scal_f = [&]() -> llvm::Value * { - if constexpr (std::is_same_v) { - return llvm_codegen(s, vec_t, - number_like(s, fp_t, static_cast(order)) * num - - number_like(s, fp_t, static_cast(j)) - * (num + number_like(s, fp_t, 1.))); - } else { - auto *jvec = llvm_codegen(s, vec_t, number(static_cast(j))); - auto *ordvec = llvm_codegen(s, vec_t, number(static_cast(order))); - auto *onevec = llvm_codegen(s, vec_t, number(1.)); + auto *jvec = llvm_codegen(s, vec_t, number(static_cast(j))); + auto *ordvec = llvm_codegen(s, vec_t, number(static_cast(order))); + auto *onevec = llvm_codegen(s, vec_t, number(1.)); - auto *tmp1 = llvm_fmul(s, ordvec, expo); - auto *tmp2 = llvm_fmul(s, jvec, llvm_fadd(s, expo, onevec)); + auto *tmp1 = llvm_fmul(s, ordvec, expo); + auto *tmp2 = llvm_fmul(s, jvec, llvm_fadd(s, expo, onevec)); - return llvm_fsub(s, tmp1, tmp2); - } - }(); + auto *scal_f = llvm_fsub(s, tmp1, tmp2); // Add scal_f*v0*v1 to the sum. sum.push_back(llvm_fmul(s, scal_f, llvm_fmul(s, v0, v1))); @@ -584,6 +575,7 @@ llvm::Value *taylor_diff_pow(llvm_state &s, llvm::Type *fp_t, const pow_impl &f, { assert(f.args().size() == 2u); + // LCOV_EXCL_START if (!deps.empty()) { throw std::invalid_argument( fmt::format("An empty hidden dependency vector is expected in order to compute the Taylor " @@ -591,6 +583,7 @@ llvm::Value *taylor_diff_pow(llvm_state &s, llvm::Type *fp_t, const pow_impl &f, "instead", deps.size())); } + // LCOV_EXCL_STOP return std::visit( [&](const auto &v1, const auto &v2) { @@ -904,7 +897,7 @@ llvm::Function *taylor_c_diff_func_pow_impl(llvm_state &s, llvm::Type *fp_t, con auto *ft = llvm::FunctionType::get(val_t, fargs, false); // Create the function f = llvm::Function::Create(ft, llvm::Function::InternalLinkage, fname, &md); - assert(f != nullptr); + assert(f != nullptr); // LCOV_EXCL_LINE // Fetch the necessary function arguments. auto ord = f->args().begin(); diff --git a/test/taylor_pow.cpp b/test/taylor_pow.cpp index 17e1632ff..ccf2d480f 100644 --- a/test/taylor_pow.cpp +++ b/test/taylor_pow.cpp @@ -718,28 +718,49 @@ TEST_CASE("taylor pow") } } -// Small test for the preprocessing that turns pow into exp+log. +// Tests for the preprocessing that turns pow into exp+log. TEST_CASE("pow_to_explog") { auto [x, y] = make_vars("x", "y"); - auto tmp1 = x + pow(y, par[0]); - auto tmp2 = pow(x, tmp1); - auto tmp3 = pow(tmp1, y); + for (auto cm : {false, true}) { + auto tmp1 = x + pow(y, par[0]); + auto tmp2 = pow(x, tmp1); + auto tmp3 = pow(tmp1, par[1]); - auto ta = taylor_adaptive{{prime(x) = (tmp1 * tmp2) / tmp3, prime(y) = tmp1}, {}, kw::tol = 1e-1}; + auto ta = taylor_adaptive{ + {prime(x) = (tmp1 * tmp2) / tmp3, prime(y) = tmp1}, {.1, .2}, kw::pars = {1.2, 3.4}, kw::compact_mode = cm}; - REQUIRE(ta.get_decomposition().size() == 16u); + REQUIRE(ta.get_decomposition().size() == 16u); - // Count the number of exps and logs. - auto n_exp = 0, n_log = 0; - for (const auto &[ex, _] : ta.get_decomposition()) { - if (const auto *fptr = std::get_if(&ex.value())) { - n_exp += (fptr->extract() != nullptr); - n_log += (fptr->extract() != nullptr); + // Count the number of exps and logs. + auto n_exp = 0, n_log = 0; + for (const auto &[ex, _] : ta.get_decomposition()) { + if (const auto *fptr = std::get_if(&ex.value())) { + n_exp += (fptr->extract() != nullptr); + n_log += (fptr->extract() != nullptr); + } } - } - REQUIRE(n_exp == 3); - REQUIRE(n_log == 3); + REQUIRE(n_exp == 3); + REQUIRE(n_log == 3); + + // Create an analogous of ta in which the pars have been hard-coded to numbers. + tmp1 = x + pow(y, 1.2); + tmp2 = pow(x, tmp1); + tmp3 = pow(tmp1, 3.4); + + auto ta_hc = taylor_adaptive{ + {prime(x) = (tmp1 * tmp2) / tmp3, prime(y) = tmp1}, {.1, .2}, kw::compact_mode = cm}; + + // Compute the Taylor coefficients. + ta.step(0., true); + ta_hc.step(0., true); + + // Compare them. + auto n_tc = ta.get_tc().size(); + for (decltype(n_tc) i = 0; i < n_tc; ++i) { + REQUIRE(ta.get_tc()[i] == approximately(ta_hc.get_tc()[i], 1000.)); + } + } } From 26c888e803b5954848164843391b05a0296f5e3e Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Mon, 16 Sep 2024 07:57:58 +0200 Subject: [PATCH 6/6] Update changelog. --- doc/changelog.rst | 3 +++ src/math/pow.cpp | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/changelog.rst b/doc/changelog.rst index 316d27b2a..fd558726f 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -7,6 +7,9 @@ Changelog New ~~~ +- Non-number exponents for the ``pow()`` function + are now supported in Taylor integrators + (`#454 `__). - It is now possible to initialise a Taylor integrator with an empty initial state vector. This will result in zero-initialization of the state vector diff --git a/src/math/pow.cpp b/src/math/pow.cpp index 7a7d7fa62..cad561728 100644 --- a/src/math/pow.cpp +++ b/src/math/pow.cpp @@ -550,9 +550,7 @@ llvm::Value *taylor_diff_pow_impl(llvm_state &s, llvm::Type *fp_t, const pow_imp auto *div = llvm_fmul(s, ord_f, b0); // Compute and return the result: ret_acc / div. - auto *ret = llvm_fdiv(s, ret_acc, div); - - return ret; + return llvm_fdiv(s, ret_acc, div); } // LCOV_EXCL_START