From 50fca06a366f378c725f98a195d37b5134df7347 Mon Sep 17 00:00:00 2001
From: Francesco Biscani <bluescarni@gmail.com>
Date: Fri, 19 Feb 2021 22:37:10 +0100
Subject: [PATCH 1/4] Implement unary negation.

---
 CMakeLists.txt              |   1 +
 include/heyoka/math.hpp     |   1 +
 include/heyoka/math/neg.hpp |  68 +++++
 src/expression.cpp          |   7 +-
 src/math/neg.cpp            | 312 ++++++++++++++++++++++
 test/CMakeLists.txt         |   2 +
 test/neg.cpp                |  41 +++
 test/taylor_neg.cpp         | 512 ++++++++++++++++++++++++++++++++++++
 8 files changed, 943 insertions(+), 1 deletion(-)
 create mode 100644 include/heyoka/math/neg.hpp
 create mode 100644 src/math/neg.cpp
 create mode 100644 test/neg.cpp
 create mode 100644 test/taylor_neg.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3600e105c..9535140c9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -191,6 +191,7 @@ set(HEYOKA_SRC_FILES
     "${CMAKE_CURRENT_SOURCE_DIR}/src/math/acosh.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/src/math/atanh.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/src/math/erf.cpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/src/math/neg.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/src/detail/string_conv.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/src/detail/math_wrappers.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/src/detail/llvm_helpers.cpp"
diff --git a/include/heyoka/math.hpp b/include/heyoka/math.hpp
index b9b6ae922..dca56a48b 100644
--- a/include/heyoka/math.hpp
+++ b/include/heyoka/math.hpp
@@ -20,6 +20,7 @@
 #include <heyoka/math/erf.hpp>
 #include <heyoka/math/exp.hpp>
 #include <heyoka/math/log.hpp>
+#include <heyoka/math/neg.hpp>
 #include <heyoka/math/pow.hpp>
 #include <heyoka/math/sigmoid.hpp>
 #include <heyoka/math/sin.hpp>
diff --git a/include/heyoka/math/neg.hpp b/include/heyoka/math/neg.hpp
new file mode 100644
index 000000000..fb5b3a44f
--- /dev/null
+++ b/include/heyoka/math/neg.hpp
@@ -0,0 +1,68 @@
+// Copyright 2020, 2021 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com)
+//
+// This file is part of the heyoka library.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef HEYOKA_MATH_NEG_HPP
+#define HEYOKA_MATH_NEG_HPP
+
+#include <cstdint>
+#include <ostream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include <heyoka/config.hpp>
+#include <heyoka/detail/fwd_decl.hpp>
+#include <heyoka/detail/llvm_fwd.hpp>
+#include <heyoka/detail/visibility.hpp>
+#include <heyoka/func.hpp>
+
+namespace heyoka
+{
+
+namespace detail
+{
+
+class HEYOKA_DLL_PUBLIC neg_impl : public func_base
+{
+public:
+    neg_impl();
+    explicit neg_impl(expression);
+
+    void to_stream(std::ostream &) const;
+
+    llvm::Value *codegen_dbl(llvm_state &, const std::vector<llvm::Value *> &) const;
+    llvm::Value *codegen_ldbl(llvm_state &, const std::vector<llvm::Value *> &) const;
+#if defined(HEYOKA_HAVE_REAL128)
+    llvm::Value *codegen_f128(llvm_state &, const std::vector<llvm::Value *> &) const;
+#endif
+
+    llvm::Value *taylor_diff_dbl(llvm_state &, const std::vector<std::uint32_t> &, const std::vector<llvm::Value *> &,
+                                 llvm::Value *, llvm::Value *, std::uint32_t, std::uint32_t, std::uint32_t,
+                                 std::uint32_t) const;
+    llvm::Value *taylor_diff_ldbl(llvm_state &, const std::vector<std::uint32_t> &, const std::vector<llvm::Value *> &,
+                                  llvm::Value *, llvm::Value *, std::uint32_t, std::uint32_t, std::uint32_t,
+                                  std::uint32_t) const;
+#if defined(HEYOKA_HAVE_REAL128)
+    llvm::Value *taylor_diff_f128(llvm_state &, const std::vector<std::uint32_t> &, const std::vector<llvm::Value *> &,
+                                  llvm::Value *, llvm::Value *, std::uint32_t, std::uint32_t, std::uint32_t,
+                                  std::uint32_t) const;
+#endif
+    llvm::Function *taylor_c_diff_func_dbl(llvm_state &, std::uint32_t, std::uint32_t) const;
+    llvm::Function *taylor_c_diff_func_ldbl(llvm_state &, std::uint32_t, std::uint32_t) const;
+#if defined(HEYOKA_HAVE_REAL128)
+    llvm::Function *taylor_c_diff_func_f128(llvm_state &, std::uint32_t, std::uint32_t) const;
+#endif
+};
+
+} // namespace detail
+
+HEYOKA_DLL_PUBLIC expression neg(expression);
+
+} // namespace heyoka
+
+#endif
diff --git a/src/expression.cpp b/src/expression.cpp
index 96c60c474..514676c03 100644
--- a/src/expression.cpp
+++ b/src/expression.cpp
@@ -37,6 +37,7 @@
 #include <heyoka/expression.hpp>
 #include <heyoka/func.hpp>
 #include <heyoka/llvm_state.hpp>
+#include <heyoka/math/neg.hpp>
 #include <heyoka/math/square.hpp>
 #include <heyoka/number.hpp>
 #include <heyoka/param.hpp>
@@ -200,7 +201,11 @@ expression operator+(expression e)
 
 expression operator-(expression e)
 {
-    return expression{number{-1.}} * std::move(e);
+    if (auto num_ptr = std::get_if<number>(&e.value())) {
+        return expression{-std::move(*num_ptr)};
+    } else {
+        return neg(std::move(e));
+    }
 }
 
 expression operator+(expression e1, expression e2)
diff --git a/src/math/neg.cpp b/src/math/neg.cpp
new file mode 100644
index 000000000..fc68d2fae
--- /dev/null
+++ b/src/math/neg.cpp
@@ -0,0 +1,312 @@
+// Copyright 2020, 2021 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com)
+//
+// This file is part of the heyoka library.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <heyoka/config.hpp>
+
+#include <cassert>
+#include <initializer_list>
+#include <ostream>
+#include <type_traits>
+#include <utility>
+#include <variant>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include <llvm/IR/Attributes.h>
+#include <llvm/IR/BasicBlock.h>
+#include <llvm/IR/DerivedTypes.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Type.h>
+#include <llvm/IR/Value.h>
+
+#if defined(HEYOKA_HAVE_REAL128)
+
+#include <mp++/real128.hpp>
+
+#endif
+
+#include <heyoka/detail/llvm_helpers.hpp>
+#include <heyoka/detail/string_conv.hpp>
+#include <heyoka/detail/taylor_common.hpp>
+#include <heyoka/expression.hpp>
+#include <heyoka/func.hpp>
+#include <heyoka/llvm_state.hpp>
+#include <heyoka/math/neg.hpp>
+#include <heyoka/number.hpp>
+#include <heyoka/taylor.hpp>
+#include <heyoka/variable.hpp>
+
+namespace heyoka
+{
+
+namespace detail
+{
+
+neg_impl::neg_impl(expression e) : func_base("neg", std::vector{std::move(e)}) {}
+
+neg_impl::neg_impl() : neg_impl(0_dbl) {}
+
+void neg_impl::to_stream(std::ostream &os) const
+{
+    assert(!args().empty());
+
+    os << '-' << args()[0];
+}
+
+llvm::Value *neg_impl::codegen_dbl(llvm_state &s, const std::vector<llvm::Value *> &args) const
+{
+    assert(args.size() == 1u);
+    assert(args[0] != nullptr);
+
+    return s.builder().CreateFNeg(args[0]);
+}
+
+llvm::Value *neg_impl::codegen_ldbl(llvm_state &s, const std::vector<llvm::Value *> &args) const
+{
+    // NOTE: codegen is identical as in dbl.
+    return codegen_dbl(s, args);
+}
+
+#if defined(HEYOKA_HAVE_REAL128)
+
+llvm::Value *neg_impl::codegen_f128(llvm_state &s, const std::vector<llvm::Value *> &args) const
+{
+    // NOTE: codegen is identical as in dbl.
+    return codegen_dbl(s, args);
+}
+
+#endif
+
+namespace
+{
+
+// Derivative of neg(number).
+template <typename T, typename U, std::enable_if_t<is_num_param_v<U>, int> = 0>
+llvm::Value *taylor_diff_neg_impl(llvm_state &s, const neg_impl &, const U &num, const std::vector<llvm::Value *> &,
+                                  llvm::Value *par_ptr, std::uint32_t, std::uint32_t order, std::uint32_t,
+                                  std::uint32_t batch_size)
+{
+    if (order == 0u) {
+        return s.builder().CreateFNeg(taylor_codegen_numparam<T>(s, num, par_ptr, batch_size));
+    } else {
+        return vector_splat(s.builder(), codegen<T>(s, number{0.}), batch_size);
+    }
+}
+
+// Derivative of neg(variable).
+template <typename T>
+llvm::Value *taylor_diff_neg_impl(llvm_state &s, const neg_impl &, const variable &var,
+                                  const std::vector<llvm::Value *> &arr, llvm::Value *, std::uint32_t n_uvars,
+                                  std::uint32_t order, std::uint32_t, std::uint32_t)
+{
+    return s.builder().CreateFNeg(taylor_fetch_diff(arr, uname_to_index(var.name()), order, n_uvars));
+}
+
+// All the other cases.
+template <typename T, typename U, std::enable_if_t<!is_num_param_v<U>, int> = 0>
+llvm::Value *taylor_diff_neg_impl(llvm_state &, const neg_impl &, const U &, const std::vector<llvm::Value *> &,
+                                  llvm::Value *, std::uint32_t, std::uint32_t, std::uint32_t, std::uint32_t)
+{
+    throw std::invalid_argument(
+        "An invalid argument type was encountered while trying to build the Taylor derivative of a negation");
+}
+
+template <typename T>
+llvm::Value *taylor_diff_neg(llvm_state &s, const neg_impl &f, const std::vector<std::uint32_t> &deps,
+                             const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, std::uint32_t n_uvars,
+                             std::uint32_t order, std::uint32_t idx, std::uint32_t batch_size)
+{
+    assert(f.args().size() == 1u);
+
+    if (!deps.empty()) {
+        using namespace fmt::literals;
+
+        throw std::invalid_argument("An empty hidden dependency vector is expected in order to compute the Taylor "
+                                    "derivative of the negation, but a vector of size {} was passed "
+                                    "instead"_format(deps.size()));
+    }
+
+    return std::visit(
+        [&](const auto &v) { return taylor_diff_neg_impl<T>(s, f, v, arr, par_ptr, n_uvars, order, idx, batch_size); },
+        f.args()[0].value());
+}
+
+} // namespace
+
+llvm::Value *neg_impl::taylor_diff_dbl(llvm_state &s, const std::vector<std::uint32_t> &deps,
+                                       const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
+                                       std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
+                                       std::uint32_t batch_size) const
+{
+    return taylor_diff_neg<double>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
+}
+
+llvm::Value *neg_impl::taylor_diff_ldbl(llvm_state &s, const std::vector<std::uint32_t> &deps,
+                                        const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
+                                        std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
+                                        std::uint32_t batch_size) const
+{
+    return taylor_diff_neg<long double>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
+}
+
+#if defined(HEYOKA_HAVE_REAL128)
+
+llvm::Value *neg_impl::taylor_diff_f128(llvm_state &s, const std::vector<std::uint32_t> &deps,
+                                        const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
+                                        std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
+                                        std::uint32_t batch_size) const
+{
+    return taylor_diff_neg<mppp::real128>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
+}
+
+#endif
+
+namespace
+{
+
+// Derivative of neg(number).
+template <typename T, typename U, std::enable_if_t<is_num_param_v<U>, int> = 0>
+llvm::Function *taylor_c_diff_func_neg_impl(llvm_state &s, const neg_impl &fn, const U &num, std::uint32_t,
+                                            std::uint32_t batch_size)
+{
+    using namespace fmt::literals;
+
+    return taylor_c_diff_func_unary_num_det<T>(
+        s, fn, num, batch_size,
+        "heyoka_taylor_diff_neg_{}_{}"_format(taylor_c_diff_numparam_mangle(num),
+                                              taylor_mangle_suffix(to_llvm_vector_type<T>(s.context(), batch_size))),
+        "the negation");
+}
+
+// Derivative of neg(variable).
+template <typename T>
+llvm::Function *taylor_c_diff_func_neg_impl(llvm_state &s, const neg_impl &, const variable &, std::uint32_t n_uvars,
+                                            std::uint32_t batch_size)
+{
+    auto &module = s.module();
+    auto &builder = s.builder();
+    auto &context = s.context();
+
+    // Fetch the floating-point type.
+    auto val_t = to_llvm_vector_type<T>(context, batch_size);
+
+    // Get the function name.
+    using namespace fmt::literals;
+    const auto fname = "heyoka_taylor_diff_neg_var_{}_n_uvars_{}"_format(taylor_mangle_suffix(val_t), n_uvars);
+
+    // The function arguments:
+    // - diff order,
+    // - idx of the u variable whose diff is being computed,
+    // - diff array,
+    // - par ptr,
+    // - time ptr,
+    // - idx of the var argument.
+    std::vector<llvm::Type *> fargs{llvm::Type::getInt32Ty(context),
+                                    llvm::Type::getInt32Ty(context),
+                                    llvm::PointerType::getUnqual(val_t),
+                                    llvm::PointerType::getUnqual(to_llvm_type<T>(context)),
+                                    llvm::PointerType::getUnqual(to_llvm_type<T>(context)),
+                                    llvm::Type::getInt32Ty(context)};
+
+    // Try to see if we already created the function.
+    auto f = module.getFunction(fname);
+
+    if (f == nullptr) {
+        // The function was not created before, do it now.
+
+        // Fetch the current insertion block.
+        auto orig_bb = builder.GetInsertBlock();
+
+        // The return type is val_t.
+        auto *ft = llvm::FunctionType::get(val_t, fargs, false);
+        // Create the function
+        f = llvm::Function::Create(ft, llvm::Function::InternalLinkage, fname, &module);
+        assert(f != nullptr);
+
+        // Fetch the necessary function arguments.
+        auto ord = f->args().begin();
+        auto diff_ptr = f->args().begin() + 2;
+        auto var_idx = f->args().begin() + 5;
+
+        // Create a new basic block to start insertion into.
+        builder.SetInsertPoint(llvm::BasicBlock::Create(context, "entry", f));
+
+        // Create the return value.
+        auto retval = builder.CreateFNeg(taylor_c_load_diff(s, diff_ptr, n_uvars, ord, var_idx));
+
+        // Return the result.
+        builder.CreateRet(retval);
+
+        // Verify.
+        s.verify_function(f);
+
+        // Restore the original insertion block.
+        builder.SetInsertPoint(orig_bb);
+    } else {
+        // The function was created before. Check if the signatures match.
+        // NOTE: there could be a mismatch if the derivative function was created
+        // and then optimised - optimisation might remove arguments which are compile-time
+        // constants.
+        if (!compare_function_signature(f, val_t, fargs)) {
+            throw std::invalid_argument("Inconsistent function signature for the Taylor derivative of the negation "
+                                        "in compact mode detected");
+        }
+    }
+
+    return f;
+}
+
+// All the other cases.
+template <typename T, typename U, std::enable_if_t<!is_num_param_v<U>, int> = 0>
+llvm::Function *taylor_c_diff_func_neg_impl(llvm_state &, const neg_impl &, const U &, std::uint32_t, std::uint32_t)
+{
+    throw std::invalid_argument("An invalid argument type was encountered while trying to build the Taylor derivative "
+                                "of a negation in compact mode");
+}
+
+template <typename T>
+llvm::Function *taylor_c_diff_func_neg(llvm_state &s, const neg_impl &fn, std::uint32_t n_uvars,
+                                       std::uint32_t batch_size)
+{
+    assert(fn.args().size() == 1u);
+
+    return std::visit([&](const auto &v) { return taylor_c_diff_func_neg_impl<T>(s, fn, v, n_uvars, batch_size); },
+                      fn.args()[0].value());
+}
+
+} // namespace
+
+llvm::Function *neg_impl::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size) const
+{
+    return taylor_c_diff_func_neg<double>(s, *this, n_uvars, batch_size);
+}
+
+llvm::Function *neg_impl::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size) const
+{
+    return taylor_c_diff_func_neg<long double>(s, *this, n_uvars, batch_size);
+}
+
+#if defined(HEYOKA_HAVE_REAL128)
+
+llvm::Function *neg_impl::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size) const
+{
+    return taylor_c_diff_func_neg<mppp::real128>(s, *this, n_uvars, batch_size);
+}
+
+#endif
+
+} // namespace detail
+
+expression neg(expression e)
+{
+    return expression{func{detail::neg_impl(std::move(e))}};
+}
+
+} // namespace heyoka
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 82c88df53..2706fe40f 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -59,6 +59,7 @@ ADD_HEYOKA_TESTCASE(taylor_tanh)
 ADD_HEYOKA_TESTCASE(taylor_asinh)
 ADD_HEYOKA_TESTCASE(taylor_acosh)
 ADD_HEYOKA_TESTCASE(taylor_atanh)
+ADD_HEYOKA_TESTCASE(taylor_neg)
 ADD_HEYOKA_TESTCASE(two_body)
 ADD_HEYOKA_TESTCASE(two_body_batch)
 ADD_HEYOKA_TESTCASE(e3bp)
@@ -71,3 +72,4 @@ ADD_HEYOKA_TESTCASE(one_body)
 ADD_HEYOKA_TESTCASE(number)
 ADD_HEYOKA_TESTCASE(pow)
 ADD_HEYOKA_TESTCASE(mul)
+ADD_HEYOKA_TESTCASE(neg)
diff --git a/test/neg.cpp b/test/neg.cpp
new file mode 100644
index 000000000..284e0687b
--- /dev/null
+++ b/test/neg.cpp
@@ -0,0 +1,41 @@
+// Copyright 2020, 2021 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com)
+//
+// This file is part of the heyoka library.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <sstream>
+
+#include <heyoka/expression.hpp>
+#include <heyoka/math/neg.hpp>
+
+#include "catch.hpp"
+
+using namespace heyoka;
+
+TEST_CASE("neg ostream")
+{
+    auto [x, y] = make_vars("x", "y");
+
+    std::ostringstream oss;
+    oss << neg(x + y);
+
+    REQUIRE(oss.str() == "-(x + y)");
+
+    oss.str("");
+    oss << -x;
+
+    REQUIRE(oss.str() == "-x");
+}
+
+TEST_CASE("unary minus simpl")
+{
+    REQUIRE(-1_dbl == expression{-1.});
+    REQUIRE(-1.1_ldbl == expression{-1.1l});
+
+    auto [x] = make_vars("x");
+
+    REQUIRE(-x == neg(x));
+}
diff --git a/test/taylor_neg.cpp b/test/taylor_neg.cpp
new file mode 100644
index 000000000..d79c398f8
--- /dev/null
+++ b/test/taylor_neg.cpp
@@ -0,0 +1,512 @@
+// Copyright 2020, 2021 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com)
+//
+// This file is part of the heyoka library.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <heyoka/config.hpp>
+
+#include <cmath>
+#include <initializer_list>
+#include <random>
+#include <tuple>
+#include <vector>
+
+#if defined(HEYOKA_HAVE_REAL128)
+
+#include <mp++/real128.hpp>
+
+#endif
+
+#include <heyoka/expression.hpp>
+#include <heyoka/llvm_state.hpp>
+#include <heyoka/math/neg.hpp>
+#include <heyoka/number.hpp>
+#include <heyoka/taylor.hpp>
+
+#include "catch.hpp"
+#include "test_utils.hpp"
+
+static std::mt19937 rng;
+
+using namespace heyoka;
+using namespace heyoka_test;
+
+const auto fp_types = std::tuple<double, long double
+#if defined(HEYOKA_HAVE_REAL128)
+                                 ,
+                                 mppp::real128
+#endif
+                                 >{};
+
+template <typename T, typename U>
+void compare_batch_scalar(std::initializer_list<U> sys, unsigned opt_level, bool high_accuracy, bool compact_mode)
+{
+    for (auto batch_size : {2u, 4u, 8u, 23u}) {
+        llvm_state s{kw::opt_level = opt_level};
+
+        taylor_add_jet<T>(s, "jet_batch", sys, 3, batch_size, high_accuracy, compact_mode);
+        taylor_add_jet<T>(s, "jet_scalar", sys, 3, 1, high_accuracy, compact_mode);
+
+        s.compile();
+
+        auto jptr_batch = reinterpret_cast<void (*)(T *, const T *, const T *)>(s.jit_lookup("jet_batch"));
+        auto jptr_scalar = reinterpret_cast<void (*)(T *, const T *, const T *)>(s.jit_lookup("jet_scalar"));
+
+        std::vector<T> jet_batch;
+        jet_batch.resize(8 * batch_size);
+        std::uniform_real_distribution<float> dist(.1f, 20.f);
+        std::generate(jet_batch.begin(), jet_batch.end(), [&dist]() { return T{dist(rng)}; });
+
+        std::vector<T> jet_scalar;
+        jet_scalar.resize(8);
+
+        jptr_batch(jet_batch.data(), nullptr, nullptr);
+
+        for (auto batch_idx = 0u; batch_idx < batch_size; ++batch_idx) {
+            // Assign the initial values of x and y.
+            for (auto i = 0u; i < 2u; ++i) {
+                jet_scalar[i] = jet_batch[i * batch_size + batch_idx];
+            }
+
+            jptr_scalar(jet_scalar.data(), nullptr, nullptr);
+
+            for (auto i = 2u; i < 8u; ++i) {
+                REQUIRE(jet_scalar[i] == approximately(jet_batch[i * batch_size + batch_idx]));
+            }
+        }
+    }
+}
+
+TEST_CASE("taylor neg")
+{
+    auto tester = [](auto fp_x, unsigned opt_level, bool high_accuracy, bool compact_mode) {
+        using fp_t = decltype(fp_x);
+
+        using Catch::Matchers::Message;
+
+        auto x = "x"_var, y = "y"_var;
+
+        // Number tests.
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(expression{number{fp_t(2)}}), x + y}, 1, 1, high_accuracy,
+                                 compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{3}};
+            jet.resize(4);
+
+            jptr(jet.data(), nullptr, nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == 3);
+            REQUIRE(jet[2] == -2);
+            REQUIRE(jet[3] == 5);
+        }
+
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(par[0]), x + y}, 1, 1, high_accuracy, compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{3}};
+            jet.resize(4);
+
+            std::vector<fp_t> pars{fp_t{2}};
+
+            jptr(jet.data(), pars.data(), nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == 3);
+            REQUIRE(jet[2] == -2);
+            REQUIRE(jet[3] == 5);
+        }
+
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(expression{number{fp_t(2)}}), x + y}, 1, 2, high_accuracy,
+                                 compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{-2}, fp_t{3}, fp_t{-3}};
+            jet.resize(8);
+
+            jptr(jet.data(), nullptr, nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == -2);
+
+            REQUIRE(jet[2] == 3);
+            REQUIRE(jet[3] == -3);
+
+            REQUIRE(jet[4] == -2);
+            REQUIRE(jet[5] == -2);
+
+            REQUIRE(jet[6] == 5);
+            REQUIRE(jet[7] == -5);
+        }
+
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(par[1]), x + y}, 1, 2, high_accuracy, compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{-2}, fp_t{3}, fp_t{-3}};
+            jet.resize(8);
+
+            std::vector<fp_t> pars{fp_t{0}, fp_t{0}, fp_t{2}, fp_t{2}};
+
+            jptr(jet.data(), pars.data(), nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == -2);
+
+            REQUIRE(jet[2] == 3);
+            REQUIRE(jet[3] == -3);
+
+            REQUIRE(jet[4] == -2);
+            REQUIRE(jet[5] == -2);
+
+            REQUIRE(jet[6] == 5);
+            REQUIRE(jet[7] == -5);
+        }
+
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(expression{number{fp_t(2)}}), x + y}, 2, 1, high_accuracy,
+                                 compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{3}};
+            jet.resize(6);
+
+            jptr(jet.data(), nullptr, nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == 3);
+            REQUIRE(jet[2] == -2);
+            REQUIRE(jet[3] == 5);
+            REQUIRE(jet[4] == 0);
+            REQUIRE(jet[5] == approximately(fp_t{1} / 2 * (jet[2] + jet[3])));
+        }
+
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(expression{number{fp_t(2)}}), x + y}, 2, 2, high_accuracy,
+                                 compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{-2}, fp_t{3}, fp_t{-3}};
+            jet.resize(12);
+
+            jptr(jet.data(), nullptr, nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == -2);
+
+            REQUIRE(jet[2] == 3);
+            REQUIRE(jet[3] == -3);
+
+            REQUIRE(jet[4] == -2);
+            REQUIRE(jet[5] == -2);
+
+            REQUIRE(jet[6] == 5);
+            REQUIRE(jet[7] == -5);
+
+            REQUIRE(jet[8] == 0);
+            REQUIRE(jet[9] == 0);
+
+            REQUIRE(jet[10] == approximately(fp_t{1} / 2 * (jet[4] + jet[6])));
+            REQUIRE(jet[11] == approximately(fp_t{1} / 2 * (jet[5] + jet[7])));
+        }
+
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(expression{number{fp_t(2)}}), x + y}, 3, 3, high_accuracy,
+                                 compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{-2}, fp_t{1}, fp_t{3}, fp_t{-3}, fp_t{0}};
+            jet.resize(24);
+
+            jptr(jet.data(), nullptr, nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == -2);
+            REQUIRE(jet[2] == 1);
+
+            REQUIRE(jet[3] == 3);
+            REQUIRE(jet[4] == -3);
+            REQUIRE(jet[5] == 0);
+
+            REQUIRE(jet[6] == -2);
+            REQUIRE(jet[7] == -2);
+            REQUIRE(jet[8] == -2);
+
+            REQUIRE(jet[9] == 5);
+            REQUIRE(jet[10] == -5);
+            REQUIRE(jet[11] == 1);
+
+            REQUIRE(jet[12] == 0);
+            REQUIRE(jet[13] == 0);
+            REQUIRE(jet[14] == 0);
+
+            REQUIRE(jet[15] == approximately(fp_t{1} / 2 * (jet[6] + jet[9])));
+            REQUIRE(jet[16] == approximately(fp_t{1} / 2 * (jet[7] + jet[10])));
+            REQUIRE(jet[17] == approximately(fp_t{1} / 2 * (jet[8] + jet[11])));
+
+            REQUIRE(jet[18] == 0);
+            REQUIRE(jet[19] == 0);
+            REQUIRE(jet[20] == 0);
+
+            REQUIRE(jet[21] == approximately(fp_t{1} / 6 * (2 * jet[15])));
+            REQUIRE(jet[22] == approximately(fp_t{1} / 6 * (2 * jet[16])));
+            REQUIRE(jet[23] == approximately(fp_t{1} / 6 * (2 * jet[17])));
+        }
+
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(par[0]), x + y}, 3, 3, high_accuracy, compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{-2}, fp_t{1}, fp_t{3}, fp_t{-3}, fp_t{0}};
+            jet.resize(24);
+
+            std::vector<fp_t> pars{fp_t{2}, fp_t{2}, fp_t{2}};
+
+            jptr(jet.data(), pars.data(), nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == -2);
+            REQUIRE(jet[2] == 1);
+
+            REQUIRE(jet[3] == 3);
+            REQUIRE(jet[4] == -3);
+            REQUIRE(jet[5] == 0);
+
+            REQUIRE(jet[6] == -2);
+            REQUIRE(jet[7] == -2);
+            REQUIRE(jet[8] == -2);
+
+            REQUIRE(jet[9] == 5);
+            REQUIRE(jet[10] == -5);
+            REQUIRE(jet[11] == 1);
+
+            REQUIRE(jet[12] == 0);
+            REQUIRE(jet[13] == 0);
+            REQUIRE(jet[14] == 0);
+
+            REQUIRE(jet[15] == approximately(fp_t{1} / 2 * (jet[6] + jet[9])));
+            REQUIRE(jet[16] == approximately(fp_t{1} / 2 * (jet[7] + jet[10])));
+            REQUIRE(jet[17] == approximately(fp_t{1} / 2 * (jet[8] + jet[11])));
+
+            REQUIRE(jet[18] == 0);
+            REQUIRE(jet[19] == 0);
+            REQUIRE(jet[20] == 0);
+
+            REQUIRE(jet[21] == approximately(fp_t{1} / 6 * (2 * jet[15])));
+            REQUIRE(jet[22] == approximately(fp_t{1} / 6 * (2 * jet[16])));
+            REQUIRE(jet[23] == approximately(fp_t{1} / 6 * (2 * jet[17])));
+        }
+
+        // Do the batch/scalar comparison.
+        compare_batch_scalar<fp_t>({neg(expression{number{fp_t(2)}}), x + y}, opt_level, high_accuracy, compact_mode);
+
+        // Variable tests.
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(y), neg(x)}, 1, 1, high_accuracy, compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{3}};
+            jet.resize(4);
+
+            jptr(jet.data(), nullptr, nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == 3);
+            REQUIRE(jet[2] == -3);
+            REQUIRE(jet[3] == -2);
+        }
+
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(y), neg(x)}, 1, 2, high_accuracy, compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{4}, fp_t{3}, fp_t{5}};
+            jet.resize(8);
+
+            jptr(jet.data(), nullptr, nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == 4);
+
+            REQUIRE(jet[2] == 3);
+            REQUIRE(jet[3] == 5);
+
+            REQUIRE(jet[4] == -3);
+            REQUIRE(jet[5] == -5);
+
+            REQUIRE(jet[6] == -2);
+            REQUIRE(jet[7] == -4);
+        }
+
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(y), neg(x)}, 2, 1, high_accuracy, compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{3}};
+            jet.resize(6);
+
+            jptr(jet.data(), nullptr, nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == 3);
+            REQUIRE(jet[2] == -3);
+            REQUIRE(jet[3] == -2);
+            REQUIRE(jet[4] == 1);
+            REQUIRE(jet[5] == 3 / 2.);
+        }
+
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(y), neg(x)}, 2, 2, high_accuracy, compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{4}, fp_t{3}, fp_t{5}};
+            jet.resize(12);
+
+            jptr(jet.data(), nullptr, nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == 4);
+
+            REQUIRE(jet[2] == 3);
+            REQUIRE(jet[3] == 5);
+
+            REQUIRE(jet[4] == -3);
+            REQUIRE(jet[5] == -5);
+
+            REQUIRE(jet[6] == -2);
+            REQUIRE(jet[7] == -4);
+
+            REQUIRE(jet[8] == 1);
+            REQUIRE(jet[9] == 2);
+
+            REQUIRE(jet[10] == 3 / 2.);
+            REQUIRE(jet[11] == 5 / 2.);
+        }
+
+        {
+            llvm_state s{kw::opt_level = opt_level};
+
+            taylor_add_jet<fp_t>(s, "jet", {neg(y), neg(x)}, 3, 3, high_accuracy, compact_mode);
+
+            s.compile();
+
+            auto jptr = reinterpret_cast<void (*)(fp_t *, const fp_t *, const fp_t *)>(s.jit_lookup("jet"));
+
+            std::vector<fp_t> jet{fp_t{2}, fp_t{4}, fp_t{3}, fp_t{3}, fp_t{5}, fp_t{6}};
+            jet.resize(24);
+
+            jptr(jet.data(), nullptr, nullptr);
+
+            REQUIRE(jet[0] == 2);
+            REQUIRE(jet[1] == 4);
+            REQUIRE(jet[2] == 3);
+
+            REQUIRE(jet[3] == 3);
+            REQUIRE(jet[4] == 5);
+            REQUIRE(jet[5] == 6);
+
+            REQUIRE(jet[6] == -3);
+            REQUIRE(jet[7] == -5);
+            REQUIRE(jet[8] == -6);
+
+            REQUIRE(jet[9] == -2);
+            REQUIRE(jet[10] == -4);
+            REQUIRE(jet[11] == -3);
+
+            REQUIRE(jet[12] == 1);
+            REQUIRE(jet[13] == 2);
+            REQUIRE(jet[14] == 3 / 2.);
+
+            REQUIRE(jet[15] == 3 / 2.);
+            REQUIRE(jet[16] == 5 / 2.);
+            REQUIRE(jet[17] == 3);
+
+            REQUIRE(jet[18] == approximately(-fp_t{1} / 3 * jet[15]));
+            REQUIRE(jet[19] == approximately(-fp_t{1} / 3 * jet[16]));
+            REQUIRE(jet[20] == approximately(-fp_t{1} / 3 * jet[17]));
+
+            REQUIRE(jet[21] == approximately(-fp_t{1} / 3 * jet[12]));
+            REQUIRE(jet[22] == approximately(-fp_t{1} / 3 * jet[13]));
+            REQUIRE(jet[23] == approximately(-fp_t{1} / 3 * jet[14]));
+        }
+
+        // Do the batch/scalar comparison.
+        compare_batch_scalar<fp_t>({neg(y), neg(x)}, opt_level, high_accuracy, compact_mode);
+    };
+
+    for (auto cm : {false, true}) {
+        for (auto f : {false, true}) {
+            tuple_for_each(fp_types, [&tester, f, cm](auto x) { tester(x, 0, f, cm); });
+            tuple_for_each(fp_types, [&tester, f, cm](auto x) { tester(x, 1, f, cm); });
+            tuple_for_each(fp_types, [&tester, f, cm](auto x) { tester(x, 2, f, cm); });
+            tuple_for_each(fp_types, [&tester, f, cm](auto x) { tester(x, 3, f, cm); });
+        }
+    }
+}

From 699e04e430dcdce9d18f4d687dfbde511dc33603 Mon Sep 17 00:00:00 2001
From: Francesco Biscani <bluescarni@gmail.com>
Date: Fri, 19 Feb 2021 22:50:43 +0100
Subject: [PATCH 2/4] Header fixes.

---
 src/math/neg.cpp    | 3 ++-
 src/math/square.cpp | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/math/neg.cpp b/src/math/neg.cpp
index fc68d2fae..5c98f770d 100644
--- a/src/math/neg.cpp
+++ b/src/math/neg.cpp
@@ -18,11 +18,12 @@
 
 #include <fmt/format.h>
 
-#include <llvm/IR/Attributes.h>
 #include <llvm/IR/BasicBlock.h>
 #include <llvm/IR/DerivedTypes.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/Module.h>
 #include <llvm/IR/Type.h>
 #include <llvm/IR/Value.h>
 
diff --git a/src/math/square.cpp b/src/math/square.cpp
index 83618d77f..3fecb6455 100644
--- a/src/math/square.cpp
+++ b/src/math/square.cpp
@@ -17,11 +17,12 @@
 
 #include <fmt/format.h>
 
-#include <llvm/IR/Attributes.h>
 #include <llvm/IR/BasicBlock.h>
 #include <llvm/IR/DerivedTypes.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/Module.h>
 #include <llvm/IR/Type.h>
 #include <llvm/IR/Value.h>
 

From b94f1575525bb47965ccdb933771e31001777c00 Mon Sep 17 00:00:00 2001
From: Francesco Biscani <bluescarni@gmail.com>
Date: Fri, 19 Feb 2021 22:53:25 +0100
Subject: [PATCH 3/4] Small cleanup.

---
 src/math/square.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/math/square.cpp b/src/math/square.cpp
index 3fecb6455..e742dbeab 100644
--- a/src/math/square.cpp
+++ b/src/math/square.cpp
@@ -229,8 +229,8 @@ llvm::Function *taylor_c_diff_func_square_impl(llvm_state &s, const square_impl
     auto val_t = to_llvm_vector_type<T>(context, batch_size);
 
     // Get the function name.
-    const auto fname
-        = "heyoka_taylor_diff_square_var_" + taylor_mangle_suffix(val_t) + "_n_uvars_" + li_to_string(n_uvars);
+    using namespace fmt::literals;
+    const auto fname = "heyoka_taylor_diff_square_var_{}_n_uvars_{}"_format(taylor_mangle_suffix(val_t), n_uvars);
 
     // The function arguments:
     // - diff order,

From a229a0a1de776755633c9878c53be104c16f0dda Mon Sep 17 00:00:00 2001
From: Francesco Biscani <bluescarni@gmail.com>
Date: Fri, 19 Feb 2021 23:02:53 +0100
Subject: [PATCH 4/4] Update changelog.

---
 doc/changelog.rst | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/doc/changelog.rst b/doc/changelog.rst
index 395706b72..7aaac5c41 100644
--- a/doc/changelog.rst
+++ b/doc/changelog.rst
@@ -7,6 +7,13 @@ Changelog
 New
 ~~~
 
+- Introduce a dedicated negation operator in the
+  expression system
+  (`#99 <https://github.com/bluescarni/heyoka/pull/99>`__).
+- Implement various new automatic simplifications
+  in the expression system, and introduce ``powi()`` as
+  an alternative exponentiation function for natural exponents
+  (`#98 <https://github.com/bluescarni/heyoka/pull/98>`__).
 - Implement propagation over a time grid
   (`#95 <https://github.com/bluescarni/heyoka/pull/95>`__).
 - Implement support for dense output
@@ -16,6 +23,13 @@ New
   integrator classes
   (`#91 <https://github.com/bluescarni/heyoka/pull/91>`__).
 
+Fix
+~~~
+
+- Avoid division by zero in certain corner cases
+  when using ``pow()`` with small natural exponents
+  (`#98 <https://github.com/bluescarni/heyoka/pull/98>`__).
+
 0.3.0 (2021-02-11)
 ------------------