From 99f498fb397f1ccef3b4a3365f4ed9b5f847cbb1 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Fri, 10 Nov 2023 15:25:32 +0100 Subject: [PATCH 01/13] Basic skeleton of initial support for sparse indices in diff_tensors, not functional yet. --- include/heyoka/expression.hpp | 14 +- src/expression_diff.cpp | 274 +++++++++++++++++++++++----------- 2 files changed, 196 insertions(+), 92 deletions(-) diff --git a/include/heyoka/expression.hpp b/include/heyoka/expression.hpp index 9c833a1aa..e7989e951 100644 --- a/include/heyoka/expression.hpp +++ b/include/heyoka/expression.hpp @@ -419,21 +419,23 @@ IGOR_MAKE_NAMED_ARGUMENT(diff_order); namespace detail { -// Private aliases/utilities needed in the implementation of dtens. -using dtens_v_idx_t = std::vector; +// Sparse structure used to index derivatives in dtens: +// the first element of the pair is the function component index, +// the second element the vector of derivative orders in sparse form. +using dtens_sv_idx_t = std::pair>>; -struct dtens_v_idx_cmp { - [[nodiscard]] bool operator()(const dtens_v_idx_t &, const dtens_v_idx_t &) const; +struct dtens_sv_idx_cmp { + [[nodiscard]] bool operator()(const dtens_sv_idx_t &, const dtens_sv_idx_t &) const; }; -using dtens_map_t = boost::container::flat_map; +using dtens_map_t = boost::container::flat_map; } // namespace detail class HEYOKA_DLL_PUBLIC dtens { public: - using v_idx_t = detail::dtens_v_idx_t; + using v_idx_t = std::vector; using size_type = detail::dtens_map_t::size_type; private: diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index 3334595f7..2b1f5bb0d 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -453,15 +454,60 @@ auto diff_make_adj_dep(const std::vector &dc, std::vector &p) const noexcept + { + std::size_t seed = std::hash{}(p.first); + boost::hash_combine(seed, std::hash{}(p.second)); + + return seed; + } +}; + +// This is an alternative version of dtens_sv_idx_t that uses a dictionary +// for storing the derivative orders instead of a sorted vector. Using a dictionary +// allows for faster/easier manipulation of the derivative orders. +using dtens_ss_idx_t = std::pair>; + +// Helper to turn a dtens_sv_idx_t into a dtens_ss_idx_t. +void vidx_v2s(dtens_ss_idx_t &output, const dtens_sv_idx_t &input) +{ + output.first = input.first; + + output.second.clear(); + for (const auto &p : input.second) { + [[maybe_unused]] auto [_, flag] = output.second.insert(p); + assert(flag); + } +} + +// Helper to turn a dtens_ss_idx_t into a dtens_sv_idx_t. +dtens_sv_idx_t vidx_s2v(const dtens_ss_idx_t &input) +{ + dtens_sv_idx_t retval{input.first, {}}; + + std::copy(input.second.begin(), input.second.end(), std::back_inserter(retval.second)); + std::sort(retval.second.begin(), retval.second.end(), + [](const auto &p1, const auto &p2) { return p1.first < p2.first; }); + + return retval; +} + // Hasher for the local maps of derivatives used in the // forward/reverse mode implementations. struct diff_map_hasher { - std::size_t operator()(const dtens_v_idx_t &v) const noexcept + std::size_t operator()(const dtens_ss_idx_t &s) const noexcept { - std::size_t seed = std::hash{}(v.size()); - - for (auto idx : v) { - boost::hash_combine(seed, std::hash{}(idx)); + // Use as seed the component index. + std::size_t seed = std::hash{}(s.first); + + // Compose via additions the hashes of the derivative orders. + // NOTE: it is important that we use here a commutative operation + // so that the final hash is independent of the order in which + // the derivative orders are stored in the dictionary. + for (const auto &p : s.second) { + seed += idx_pair_hasher{}(p); } return seed; @@ -507,8 +553,8 @@ void diff_tensors_forward_impl( // to prevent duplicates. For order-1 derivatives, no duplicate // derivatives will be produced and thus we can use a plain vector // which can be quite a bit faster. - using diff_map_t = fast_umap; - using diff_vec_t = std::vector>; + using diff_map_t = fast_umap; + using diff_vec_t = std::vector>; using local_diff_t = std::variant; auto local_diff = (cur_order == 1u) ? local_diff_t(diff_vec_t{}) : local_diff_t(diff_map_t{}); @@ -518,7 +564,7 @@ void diff_tensors_forward_impl( auto local_dvec = [&local_diff]() -> diff_vec_t & { return std::get(local_diff); }; // An indices vector used as temporary variable in several places below. - std::vector tmp_v_idx; + dtens_ss_idx_t tmp_v_idx; // These two containers will be used to store the list of subexpressions // which depend on an input. They are used in the forward pass @@ -557,9 +603,8 @@ void diff_tensors_forward_impl( for (std::vector::size_type out_idx = 0; out_idx < cur_nouts; ++out_idx, ++out_it) { assert(out_it != diff_map.end()); - tmp_v_idx = out_it->first; - assert(diff_arg_idx + 1u < tmp_v_idx.size()); - tmp_v_idx[diff_arg_idx + 1u] += 1u; + vidx_v2s(tmp_v_idx, out_it->first); + tmp_v_idx.second[static_cast(diff_arg_idx)] += 1u; if (cur_order == 1u) { local_dvec().emplace_back(tmp_v_idx, 0_dbl); @@ -675,9 +720,8 @@ void diff_tensors_forward_impl( for (std::vector::size_type out_idx = 0; out_idx < cur_nouts; ++out_idx, ++out_it) { assert(out_it != diff_map.end()); - tmp_v_idx = out_it->first; - assert(diff_arg_idx + 1u < tmp_v_idx.size()); - tmp_v_idx[diff_arg_idx + 1u] += 1u; + vidx_v2s(tmp_v_idx, out_it->first); + tmp_v_idx.second[static_cast(diff_arg_idx)] += 1u; if (cur_order == 1u) { auto cur_der = diffs[diffs.size() - cur_nouts + out_idx]; @@ -697,9 +741,13 @@ void diff_tensors_forward_impl( // Merge the local map into diff_map. if (cur_order == 1u) { - diff_map.insert(diff_map.end(), local_dvec().begin(), local_dvec().end()); + for (auto &p : local_dvec()) { + diff_map.emplace_back(vidx_s2v(p.first), std::move(p.second)); + } } else { - diff_map.insert(diff_map.end(), local_dmap().begin(), local_dmap().end()); + for (auto &p : local_dmap()) { + diff_map.emplace_back(vidx_s2v(p.first), std::move(p.second)); + } } } @@ -742,8 +790,8 @@ void diff_tensors_reverse_impl( // to prevent duplicates. For order-1 derivatives, no duplicate // derivatives will be produced and thus we can use a plain vector // which can be quite a bit faster. - using diff_map_t = fast_umap; - using diff_vec_t = std::vector>; + using diff_map_t = fast_umap; + using diff_vec_t = std::vector>; using local_diff_t = std::variant; auto local_diff = (cur_order == 1u) ? local_diff_t(diff_vec_t{}) : local_diff_t(diff_map_t{}); @@ -756,7 +804,7 @@ void diff_tensors_reverse_impl( const auto nargs = args.size(); // An indices vector used as temporary variable in several places below. - std::vector tmp_v_idx; + dtens_ss_idx_t tmp_v_idx; // These two containers will be used to store the list of subexpressions // on which an output depends. They are used in the reverse pass @@ -893,12 +941,11 @@ void diff_tensors_reverse_impl( // Add the derivatives to the local map. for (decltype(args.size()) j = 0; j < nargs; ++j) { // Compute the indices vector for the current derivative. - tmp_v_idx = prev_begin->first; - assert(j + 1u < tmp_v_idx.size()); + vidx_v2s(tmp_v_idx, prev_begin->first); // NOTE: no need to overflow check here, because no derivative // order can end up being larger than the total diff order which // is representable by std::uint32_t. - tmp_v_idx[j + 1u] += 1u; + tmp_v_idx.second[static_cast(j)] += 1u; if (cur_order == 1u) { // Check if the diff argument is present in the @@ -936,31 +983,53 @@ void diff_tensors_reverse_impl( // Merge the local map into diff_map. if (cur_order == 1u) { - diff_map.insert(diff_map.end(), local_dvec().begin(), local_dvec().end()); + for (auto &p : local_dvec()) { + diff_map.emplace_back(vidx_s2v(p.first), std::move(p.second)); + } } else { - diff_map.insert(diff_map.end(), local_dmap().begin(), local_dmap().end()); + for (auto &p : local_dmap()) { + diff_map.emplace_back(vidx_s2v(p.first), std::move(p.second)); + } } } } // namespace -bool dtens_v_idx_cmp::operator()(const dtens_v_idx_t &v1, const dtens_v_idx_t &v2) const +bool dtens_sv_idx_cmp::operator()(const dtens_sv_idx_t &v1, const dtens_sv_idx_t &v2) const { - assert(v1.size() == v2.size()); - assert(v1.size() > 1u); +#if !defined(NDEBUG) + + // Sanity checks on the inputs. + + // Check indices sorting. + auto cmp = [](const auto &p1, const auto &p2) { return p1.first < p2.first; }; + assert(std::is_sorted(v1.second.begin(), v1.second.end(), cmp)); + assert(std::is_sorted(v2.second.begin(), v2.second.end(), cmp)); - // Compute the total derivative order for both - // vectors. + // Check no duplicate indices. + auto no_dup = [](const auto &p1, const auto &p2) { return p1.first == p2.first; }; + assert(std::adjacent_find(v1.second.begin(), v1.second.end(), no_dup) == v1.second.end()); + assert(std::adjacent_find(v2.second.begin(), v2.second.end(), no_dup) == v2.second.end()); + + // Check no zero diff orders. + auto nz_order = [](const auto &p) { return p.second != 0u; }; + assert(std::all_of(v1.second.begin(), v1.second.end(), nz_order)); + assert(std::all_of(v2.second.begin(), v2.second.end(), nz_order)); + +#endif + + // Compute the total derivative order for both vectors. // NOTE: here we have to use safe_numerics because this comparison operator - // might end up being invoked on a user-supplied v_idx_t, whose total degree - // may overflow. The v_idx_t in dtens, by contrast, are guaranteed to never + // might end up being invoked on a user-supplied index vector, whose total degree + // may overflow. The indices vector in dtens, by contrast, are guaranteed to never // overflow when computing the total degree. - boost::safe_numerics::safe deg1 = 0, deg2 = 0; - const auto size = v1.size(); - for (decltype(v1.size()) i = 1; i < size; ++i) { - deg1 += v1[i]; - deg2 += v2[i]; - } + using su32 = boost::safe_numerics::safe; + + // The accumulator. + auto acc = [](const auto &val, const auto &p) { return val + p.second; }; + + const auto deg1 = std::accumulate(v1.second.begin(), v1.second.end(), su32(0), acc); + const auto deg2 = std::accumulate(v2.second.begin(), v2.second.end(), su32(0), acc); if (deg1 < deg2) { return true; @@ -972,18 +1041,54 @@ bool dtens_v_idx_cmp::operator()(const dtens_v_idx_t &v1, const dtens_v_idx_t &v // The total derivative order is the same, look at // the component index next. - if (v1[0] < v2[0]) { + if (v1.first < v2.first) { return true; } - if (v1[0] > v2[0]) { + if (v1.first > v2.first) { return false; } // Component and total derivative order are the same, // resort to reverse lexicographical compare on the // derivative orders. - return std::lexicographical_compare(v1.begin() + 1, v1.end(), v2.begin() + 1, v2.end(), std::greater{}); + auto it1 = v1.second.begin(), it2 = v2.second.begin(); + const auto end1 = v1.second.end(), end2 = v2.second.end(); + for (; it1 != end1 && it2 != end2; ++it1, ++it2) { + const auto [idx1, n1] = *it1; + const auto [idx2, n2] = *it2; + + if (idx2 > idx1) { + return true; + } + + if (idx1 > idx2) { + return false; + } + + if (n1 > n2) { + return true; + } + + if (n2 > n1) { + return false; + } + + assert(std::equal(v1.second.begin(), it1 + 1, v2.second.begin())); + } + + if (it1 == end1 && it2 == end2) { + assert(v1.second == v2.second); + return false; + } + + if (it1 == end1) { + return false; + } + + assert(it2 == end2); + + return true; } } // namespace detail @@ -1097,9 +1202,9 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vectorfirst == v) { @@ -1109,10 +1214,8 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector tmp_v_idx; - tmp_v_idx.resize(1 + boost::safe_numerics::safe(nargs)); + // An indices vector used as temporary variable in several places below. + dtens_sv_idx_t tmp_v_idx; // Vector that will store the previous-order derivatives in the loop below. // It will be used to construct the decomposition. @@ -1121,7 +1224,7 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector(i); + tmp_v_idx.first = boost::numeric_cast(i); assert(search_diff_map(tmp_v_idx) == diff_map.end()); diff_map.emplace_back(tmp_v_idx, v_ex[i]); @@ -1131,9 +1234,10 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector(0)); + tmp_v_idx.first = 0; + tmp_v_idx.second.clear(); + tmp_v_idx.second.emplace_back(0, cur_order); + const auto prev_begin = search_diff_map(tmp_v_idx); assert(prev_begin != diff_map.end()); @@ -1177,7 +1281,7 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector &v_ex, const std::vector= 2u && p.first.size() - 1u == nargs; })); + assert(std::all_of(retval.begin(), retval.end(), [&nargs](const auto &p) { + return p.first.second.empty() || p.first.second.back().first < nargs; + })); // No duplicates in the indices vectors. assert(std::adjacent_find(retval.begin(), retval.end(), [](const auto &p1, const auto &p2) { return p1.first == p2.first; }) @@ -1372,29 +1477,32 @@ std::uint32_t dtens::get_order() const // in the map (specifically, it is // the last element in the indices // vector of the last derivative). - return (end() - 1)->first.back(); + assert(!(end() - 1)->first.second.empty()); + return (end() - 1)->first.second.back().second; } dtens::iterator dtens::find(const v_idx_t &vidx) const { - // NOTE: we need to sanity check vidx as the - // custom comparison operator for the internal map - // has preconditions. - // First we handle the empty case. if (p_impl->m_map.empty()) { return end(); } - // Second, we check that the size of vidx is correct. - if (begin()->first.size() != vidx.size()) { + // vidx must at least contain the function component index. + if (vidx.empty()) { return end(); } - assert(vidx.size() > 1u); + // Turn vidx into sparse format. + detail::dtens_sv_idx_t s_vidx{vidx[0], {}}; + for (decltype(vidx.size()) i = 1; i < vidx.size(); ++i) { + if (vidx[i] != 0u) { + s_vidx.second.emplace_back(boost::numeric_cast(i - 1u), vidx[i]); + } + } - // vidx is ok, look it up. - return p_impl->m_map.find(vidx); + // Lookup. + return p_impl->m_map.find(s_vidx); } const expression &dtens::operator[](const v_idx_t &vidx) const @@ -1430,13 +1538,11 @@ dtens::subrange dtens::get_derivatives(std::uint32_t order) const // Create the indices vector corresponding to the first derivative // of component 0 for the given order in the map. - auto vidx = begin()->first; - assert(std::all_of(vidx.begin(), vidx.end(), [](auto x) { return x == 0u; })); - vidx[1] = order; + detail::dtens_sv_idx_t s_vidx{0, {{0, order}}}; // Locate the corresponding derivative in the map. // NOTE: this could be end() for invalid order. - const auto b = p_impl->m_map.find(vidx); + const auto b = p_impl->m_map.find(s_vidx); #if !defined(NDEBUG) @@ -1453,11 +1559,12 @@ dtens::subrange dtens::get_derivatives(std::uint32_t order) const // NOTE: get_nouts() can return zero only if the internal // map is empty, and we handled this corner case earlier. assert(get_nouts() > 0u); - vidx[0] = get_nouts() - 1u; - vidx[1] = 0; - vidx.back() = order; + s_vidx.first = get_nouts() - 1u; + assert(get_nvars() > 0u); + s_vidx.second[0].first = get_nvars() - 1u; + // NOTE: this could be end() for invalid order. - auto e = p_impl->m_map.find(vidx); + auto e = p_impl->m_map.find(s_vidx); #if !defined(NDEBUG) @@ -1489,14 +1596,11 @@ dtens::subrange dtens::get_derivatives(std::uint32_t component, std::uint32_t or // Create the indices vector corresponding to the first derivative // for the given order and component in the map. - auto vidx = begin()->first; - assert(std::all_of(vidx.begin(), vidx.end(), [](auto x) { return x == 0u; })); - vidx[0] = component; - vidx[1] = order; + detail::dtens_sv_idx_t s_vidx{component, {{0, order}}}; // Locate the corresponding derivative in the map. // NOTE: this could be end() for invalid component/order. - const auto b = p_impl->m_map.find(vidx); + const auto b = p_impl->m_map.find(s_vidx); #if !defined(NDEBUG) @@ -1510,10 +1614,11 @@ dtens::subrange dtens::get_derivatives(std::uint32_t component, std::uint32_t or // Modify vidx so that it now refers to the last derivative // for the given order and component in the map. - vidx[1] = 0; - vidx.back() = order; + assert(get_nvars() > 0u); + s_vidx.second[0].first = get_nvars() - 1u; + // NOTE: this could be end() for invalid component/order. - auto e = p_impl->m_map.find(vidx); + auto e = p_impl->m_map.find(s_vidx); #if !defined(NDEBUG) @@ -1588,8 +1693,7 @@ std::uint32_t dtens::get_nvars() const if (p_impl->m_map.empty()) { assert(ret == 0u); } else { - assert(!begin()->first.empty()); - assert(ret == begin()->first.size() - 1u); + assert(!begin()->first.second.empty()); } #endif @@ -1605,12 +1709,10 @@ std::uint32_t dtens::get_nouts() const // Construct the indices vector corresponding // to the first derivative of order 1 of the first component. - auto vidx = begin()->first; - assert(std::all_of(vidx.begin(), vidx.end(), [](auto x) { return x == 0u; })); - vidx[1] = 1; + detail::dtens_sv_idx_t s_vidx{0, {{0, 1}}}; // Try to find it in the map. - const auto it = p_impl->m_map.find(vidx); + const auto it = p_impl->m_map.find(s_vidx); // NOTE: the number of outputs is always representable by // std::uint32_t, otherwise we could not index the function From e63aae3b7ba1fc5d680c9a09616e3b45971a091a Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Fri, 10 Nov 2023 15:48:18 +0100 Subject: [PATCH 02/13] Minor. --- src/expression_diff.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index 2b1f5bb0d..cc967743d 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -551,7 +551,7 @@ void diff_tensors_forward_impl( // will produce several times the same derivative, and thus // we need to store the derivatives in a dictionary in order // to prevent duplicates. For order-1 derivatives, no duplicate - // derivatives will be produced and thus we can use a plain vector + // derivatives will be produced and thus we can use a plain vector, // which can be quite a bit faster. using diff_map_t = fast_umap; using diff_vec_t = std::vector>; @@ -788,7 +788,7 @@ void diff_tensors_reverse_impl( // will produce several times the same derivative, and thus // we need to store the derivatives in a dictionary in order // to prevent duplicates. For order-1 derivatives, no duplicate - // derivatives will be produced and thus we can use a plain vector + // derivatives will be produced and thus we can use a plain vector, // which can be quite a bit faster. using diff_map_t = fast_umap; using diff_vec_t = std::vector>; From 9016ef89dd33591a61316886dfade09e6e7409ac Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Fri, 10 Nov 2023 22:07:57 +0100 Subject: [PATCH 03/13] Fix quadratic complexity when building a ffnn model. --- src/model/ffnn.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/model/ffnn.cpp b/src/model/ffnn.cpp index 4e5358021..435b8839b 100644 --- a/src/model/ffnn.cpp +++ b/src/model/ffnn.cpp @@ -19,6 +19,7 @@ #include #include +#include #include HEYOKA_BEGIN_NAMESPACE @@ -43,21 +44,28 @@ std::vector compute_layer(su32 layer_id, const std::vector retval(static_cast::size_type>(n_neurons_curr_layer), 0_dbl); + std::vector retval, tmp_sum; + retval.reserve(n_neurons_curr_layer); + for (su32 i = 0; i < n_neurons_curr_layer; ++i) { + // Clear the summation terms. + tmp_sum.clear(); + for (su32 j = 0; j < n_neurons_prev_layer; ++j) { // Add the weight and update the weight counter. - retval[i] += nn_wb[wcounter] * inputs[j]; + tmp_sum.push_back(nn_wb[wcounter] * inputs[j]); ++wcounter; } // Add the bias and update the counter. - retval[i] += nn_wb[bcounter + n_net_w]; + tmp_sum.push_back(nn_wb[bcounter + n_net_w]); ++bcounter; + // Activation function. - retval[i] = activation(retval[i]); + retval.push_back(activation(sum(tmp_sum))); } + return retval; } From ef8e031f014ec470bafa5635594176a79d1e5011 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Fri, 10 Nov 2023 22:08:34 +0100 Subject: [PATCH 04/13] Several fixes, provisional WIP. --- src/expression_diff.cpp | 40 ++++++++++++++++++++++++-------- test/expression_diff_tensors.cpp | 4 ++++ 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index cc967743d..8c95ddc9f 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -1236,7 +1236,9 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vectorfirst.second.empty()); - return (end() - 1)->first.second.back().second; + const auto &sv = (end() - 1)->first.second; + if (sv.empty()) { + return 0; + } else { + return sv.back().second; + } } dtens::iterator dtens::find(const v_idx_t &vidx) const @@ -1493,6 +1499,12 @@ dtens::iterator dtens::find(const v_idx_t &vidx) const return end(); } + // The size of vidx must be consistent with the number + // of diff args. + if (vidx.size() - 1u != get_nvars()) { + return end(); + } + // Turn vidx into sparse format. detail::dtens_sv_idx_t s_vidx{vidx[0], {}}; for (decltype(vidx.size()) i = 1; i < vidx.size(); ++i) { @@ -1538,7 +1550,10 @@ dtens::subrange dtens::get_derivatives(std::uint32_t order) const // Create the indices vector corresponding to the first derivative // of component 0 for the given order in the map. - detail::dtens_sv_idx_t s_vidx{0, {{0, order}}}; + detail::dtens_sv_idx_t s_vidx{0, {}}; + if (order != 0u) { + s_vidx.second.emplace_back(0, order); + } // Locate the corresponding derivative in the map. // NOTE: this could be end() for invalid order. @@ -1560,8 +1575,10 @@ dtens::subrange dtens::get_derivatives(std::uint32_t order) const // map is empty, and we handled this corner case earlier. assert(get_nouts() > 0u); s_vidx.first = get_nouts() - 1u; - assert(get_nvars() > 0u); - s_vidx.second[0].first = get_nvars() - 1u; + if (order != 0u) { + assert(get_nvars() > 0u); + s_vidx.second[0].first = get_nvars() - 1u; + } // NOTE: this could be end() for invalid order. auto e = p_impl->m_map.find(s_vidx); @@ -1596,7 +1613,10 @@ dtens::subrange dtens::get_derivatives(std::uint32_t component, std::uint32_t or // Create the indices vector corresponding to the first derivative // for the given order and component in the map. - detail::dtens_sv_idx_t s_vidx{component, {{0, order}}}; + detail::dtens_sv_idx_t s_vidx{component, {}}; + if (order != 0u) { + s_vidx.second.emplace_back(0, order); + } // Locate the corresponding derivative in the map. // NOTE: this could be end() for invalid component/order. @@ -1615,7 +1635,9 @@ dtens::subrange dtens::get_derivatives(std::uint32_t component, std::uint32_t or // Modify vidx so that it now refers to the last derivative // for the given order and component in the map. assert(get_nvars() > 0u); - s_vidx.second[0].first = get_nvars() - 1u; + if (order != 0u) { + s_vidx.second[0].first = get_nvars() - 1u; + } // NOTE: this could be end() for invalid component/order. auto e = p_impl->m_map.find(s_vidx); @@ -1692,8 +1714,6 @@ std::uint32_t dtens::get_nvars() const if (p_impl->m_map.empty()) { assert(ret == 0u); - } else { - assert(!begin()->first.second.empty()); } #endif diff --git a/test/expression_diff_tensors.cpp b/test/expression_diff_tensors.cpp index aa04923c4..05fcc4a13 100644 --- a/test/expression_diff_tensors.cpp +++ b/test/expression_diff_tensors.cpp @@ -384,6 +384,8 @@ TEST_CASE("dtens basics") REQUIRE(dt3.get_args() == dt2.get_args()); } +#if 0 + TEST_CASE("fixed centres check") { std::uniform_real_distribution rdist(-10., 10.); @@ -577,6 +579,8 @@ TEST_CASE("speelpenning check") } } +#endif + TEST_CASE("speelpenning complexity") { fmt::print("Speelpenning's example\n"); From 56e79e7c8307a0300cf28c50ba4a14761d0b7ccc Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sat, 11 Nov 2023 18:38:06 +0100 Subject: [PATCH 05/13] Small tweaks. --- src/expression_diff.cpp | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index 80d3cfa3e..8dcf36db8 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -473,8 +473,10 @@ using dtens_ss_idx_t = std::pairm_map.empty()) { return 0; } - // Construct the indices vector corresponding + // Try to find in the map the indices vector corresponding // to the first derivative of order 1 of the first component. - detail::dtens_sv_idx_t s_vidx{0, {{0, 1}}}; - - // Try to find it in the map. - const auto it = p_impl->m_map.find(s_vidx); + const auto it = p_impl->m_map.find(detail::s_vidx_001); // NOTE: the number of outputs is always representable by // std::uint32_t, otherwise we could not index the function From d196d27630223ee31be6fd0df5bb3c07e5c2d05b Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sun, 12 Nov 2023 00:43:29 +0100 Subject: [PATCH 06/13] Small tweaks, internal docs. --- include/heyoka/expression.hpp | 8 +- src/expression_diff.cpp | 174 ++++++++++++++++++++++++---------- 2 files changed, 129 insertions(+), 53 deletions(-) diff --git a/include/heyoka/expression.hpp b/include/heyoka/expression.hpp index e7989e951..047a7bac5 100644 --- a/include/heyoka/expression.hpp +++ b/include/heyoka/expression.hpp @@ -420,8 +420,10 @@ namespace detail { // Sparse structure used to index derivatives in dtens: -// the first element of the pair is the function component index, -// the second element the vector of derivative orders in sparse form. +// - the first element of the pair is the function component index, +// - the second element is the vector of variable index/diff order pairs, +// which is kept sorted according to the variable index, and in which no +// diff order can be zero and no variable index can appear twice. using dtens_sv_idx_t = std::pair>>; struct dtens_sv_idx_cmp { @@ -435,7 +437,9 @@ using dtens_map_t = boost::container::flat_map; + using size_type = detail::dtens_map_t::size_type; private: diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index 8dcf36db8..38b2339ff 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -454,20 +454,9 @@ auto diff_make_adj_dep(const std::vector &dc, std::vector &p) const noexcept - { - std::size_t seed = std::hash{}(p.first); - boost::hash_combine(seed, std::hash{}(p.second)); - - return seed; - } -}; - // This is an alternative version of dtens_sv_idx_t that uses a dictionary -// for storing the derivative orders instead of a sorted vector. Using a dictionary -// allows for faster/easier manipulation of the derivative orders. +// for storing the index/order pairs instead of a sorted vector. Using a dictionary +// allows for faster/easier manipulation. using dtens_ss_idx_t = std::pair>; // Helper to turn a dtens_sv_idx_t into a dtens_ss_idx_t. @@ -476,7 +465,7 @@ void vidx_v2s(dtens_ss_idx_t &output, const dtens_sv_idx_t &input) // Assign the component. output.first = input.first; - // Assign the derivative indices. + // Assign the index/order pairs. output.second.clear(); for (const auto &p : input.second) { [[maybe_unused]] auto [_, flag] = output.second.insert(p); @@ -490,7 +479,7 @@ dtens_sv_idx_t vidx_s2v(const dtens_ss_idx_t &input) // Init retval. dtens_sv_idx_t retval{input.first, {input.second.begin(), input.second.end()}}; - // Sort the derivative indices. + // Sort the index/order pairs. std::sort(retval.second.begin(), retval.second.end(), [](const auto &p1, const auto &p2) { return p1.first < p2.first; }); @@ -505,12 +494,15 @@ struct diff_map_hasher { // Use as seed the component index. std::size_t seed = std::hash{}(s.first); - // Compose via additions the hashes of the derivative orders. + // Compose via additions the hashes of the index/order pairs. // NOTE: it is important that we use here a commutative operation - // so that the final hash is independent of the order in which - // the derivative orders are stored in the dictionary. + // for the composition so that the final hash is independent of the order + // in which the pairs are stored in the dictionary. for (const auto &p : s.second) { - seed += idx_pair_hasher{}(p); + std::size_t p_hash = std::hash{}(p.first); + boost::hash_combine(p_hash, std::hash{}(p.second)); + + seed += p_hash; } return seed; @@ -566,7 +558,7 @@ void diff_tensors_forward_impl( auto local_dmap = [&local_diff]() -> diff_map_t & { return std::get(local_diff); }; auto local_dvec = [&local_diff]() -> diff_vec_t & { return std::get(local_diff); }; - // An indices vector used as temporary variable in several places below. + // This is used as a temporary variable in several places below. dtens_ss_idx_t tmp_v_idx; // These two containers will be used to store the list of subexpressions @@ -809,7 +801,7 @@ void diff_tensors_reverse_impl( // Cache the number of diff arguments. const auto nargs = args.size(); - // An indices vector used as temporary variable in several places below. + // This is used as a temporary variable in several places below. dtens_ss_idx_t tmp_v_idx; // These two containers will be used to store the list of subexpressions @@ -1001,35 +993,33 @@ void diff_tensors_reverse_impl( } } -} // namespace - -bool dtens_sv_idx_cmp::operator()(const dtens_sv_idx_t &v1, const dtens_sv_idx_t &v2) const +// Utility function to check that a dtens_sv_idx_t is well-formed. +void sv_sanity_check([[maybe_unused]] const dtens_sv_idx_t &v) { -#if !defined(NDEBUG) - - // Sanity checks on the inputs. - - // Check indices sorting. + // Check sorting according to the derivative indices. auto cmp = [](const auto &p1, const auto &p2) { return p1.first < p2.first; }; - assert(std::is_sorted(v1.second.begin(), v1.second.end(), cmp)); - assert(std::is_sorted(v2.second.begin(), v2.second.end(), cmp)); + assert(std::is_sorted(v.second.begin(), v.second.end(), cmp)); - // Check no duplicate indices. + // Check no duplicate derivative indices. auto no_dup = [](const auto &p1, const auto &p2) { return p1.first == p2.first; }; - assert(std::adjacent_find(v1.second.begin(), v1.second.end(), no_dup) == v1.second.end()); - assert(std::adjacent_find(v2.second.begin(), v2.second.end(), no_dup) == v2.second.end()); + assert(std::adjacent_find(v.second.begin(), v.second.end(), no_dup) == v.second.end()); - // Check no zero diff orders. + // Check no zero derivative orders. auto nz_order = [](const auto &p) { return p.second != 0u; }; - assert(std::all_of(v1.second.begin(), v1.second.end(), nz_order)); - assert(std::all_of(v2.second.begin(), v2.second.end(), nz_order)); + assert(std::all_of(v.second.begin(), v.second.end(), nz_order)); +} -#endif +// Implementation of dtens_sv_idx_cmp::operator(). +bool dtens_sv_idx_cmp_impl(const dtens_sv_idx_t &v1, const dtens_sv_idx_t &v2) +{ + // Sanity checks on the inputs. + sv_sanity_check(v1); + sv_sanity_check(v2); - // Compute the total derivative order for both vectors. + // Compute the total derivative order for both v1 and v2. // NOTE: here we have to use safe_numerics because this comparison operator - // might end up being invoked on a user-supplied index vector, whose total degree - // may overflow. The indices vector in dtens, by contrast, are guaranteed to never + // might end up being invoked on a user-supplied dtens_sv_idx_t, whose total degree + // may overflow. The dtens_sv_idx_t in dtens, by contrast, are guaranteed to never // overflow when computing the total degree. using su32 = boost::safe_numerics::safe; @@ -1099,6 +1089,87 @@ bool dtens_sv_idx_cmp::operator()(const dtens_sv_idx_t &v1, const dtens_sv_idx_t return true; } +#if !defined(NDEBUG) + +// Same comparison as the previous function, but in dense format. +// Used only for debug. +bool dtens_v_idx_cmp_impl(const dtens::v_idx_t &v1, const dtens::v_idx_t &v2) +{ + assert(v1.size() == v2.size()); + assert(!v1.empty()); + + // Compute the total derivative order for both + // vectors. + boost::safe_numerics::safe deg1 = 0, deg2 = 0; + const auto size = v1.size(); + for (decltype(v1.size()) i = 1; i < size; ++i) { + deg1 += v1[i]; + deg2 += v2[i]; + } + + if (deg1 < deg2) { + return true; + } + + if (deg1 > deg2) { + return false; + } + + // The total derivative order is the same, look at + // the component index next. + if (v1[0] < v2[0]) { + return true; + } + + if (v1[0] > v2[0]) { + return false; + } + + // Component and total derivative order are the same, + // resort to reverse lexicographical compare on the + // derivative orders. + return std::lexicographical_compare(v1.begin() + 1, v1.end(), v2.begin() + 1, v2.end(), std::greater{}); +} + +#endif + +} // namespace + +bool dtens_sv_idx_cmp::operator()(const dtens_sv_idx_t &v1, const dtens_sv_idx_t &v2) const +{ + auto ret = dtens_sv_idx_cmp_impl(v1, v2); + +#if !defined(NDEBUG) + + // Convert to dense and re-run the same comparison. + auto to_dense = [](const dtens_sv_idx_t &v) { + dtens::v_idx_t dv{v.first}; + + std::uint32_t cur_d_idx = 0; + for (auto it = v.second.begin(); it != v.second.end(); ++cur_d_idx) { + if (cur_d_idx == it->first) { + dv.push_back(it->second); + ++it; + } else { + dv.push_back(0); + } + } + + return dv; + }; + + auto dv1 = to_dense(v1); + auto dv2 = to_dense(v2); + dv1.resize(std::max(dv1.size(), dv2.size())); + dv2.resize(std::max(dv1.size(), dv2.size())); + + assert(ret == dtens_v_idx_cmp_impl(dv1, dv2)); + +#endif + + return ret; +} + } // namespace detail // NOLINTNEXTLINE(bugprone-exception-escape) @@ -1198,17 +1269,12 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector(nargs)); - // Map to associate a vector of indices to a derivative. - // The first index in each vector is the component index, - // the rest of the indices are the derivative orders for - // each diff args. E.g., with diff args = [x, y, z], - // then [0, 1, 2, 1] means d4f0/(dx dy**2 dz) (where f0 is the - // first component of the vector function f). + // Map to associate a dtens_sv_idx_t to a derivative. // This will be kept manually sorted according to dtens_v_idx_cmp // and it will be turned into a flat map at the end. dtens_map_t::sequence_type diff_map; - // Helper to locate the vector of indices v in diff_map. If not present, + // Helper to locate a dtens_sv_idx_t in diff_map. If not present, // diff_map.end() will be returned. auto search_diff_map = [&diff_map](const dtens_sv_idx_t &v) { auto it = std::lower_bound(diff_map.begin(), diff_map.end(), v, [](const auto &item, const auto &vec) { @@ -1222,7 +1288,7 @@ auto diff_tensors_impl(const std::vector &v_ex, const std::vector &v_ex, const std::vectorm_map.empty()) { return 0; } @@ -1489,6 +1556,11 @@ std::uint32_t dtens::get_order() const // vector of the last derivative). const auto &sv = (end() - 1)->first.second; if (sv.empty()) { + // NOTE: an empty index/order vector + // at the end means that the maximum + // diff order is zero and that we are + // only storing the original function + // components in the dtens object. return 0; } else { return sv.back().second; @@ -1577,7 +1649,7 @@ dtens::subrange dtens::get_derivatives(std::uint32_t order) const #endif - // Modify vidx so that it now refers to the last derivative + // Modify s_vidx so that it now refers to the last derivative // for the last component at the given order in the map. // NOTE: get_nouts() can return zero only if the internal // map is empty, and we handled this corner case earlier. From c5572e23459f38416362b941994ffa56ba6b4e5b Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sun, 12 Nov 2023 01:57:30 +0100 Subject: [PATCH 07/13] Fix and re-enable tests. --- test/expression_diff_tensors.cpp | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/test/expression_diff_tensors.cpp b/test/expression_diff_tensors.cpp index 05fcc4a13..0cba38a63 100644 --- a/test/expression_diff_tensors.cpp +++ b/test/expression_diff_tensors.cpp @@ -384,8 +384,6 @@ TEST_CASE("dtens basics") REQUIRE(dt3.get_args() == dt2.get_args()); } -#if 0 - TEST_CASE("fixed centres check") { std::uniform_real_distribution rdist(-10., 10.); @@ -454,17 +452,13 @@ TEST_CASE("fixed centres check") const auto &v_idx = sr_it->first; - REQUIRE(v_idx.size() == diff_vars.size() + 1u); - // Build the current derivative via repeated // invocations of diff(). - auto ex = v_idx[0] == 0u ? fc_energy : fc_pot; - - for (auto j = 1u; j < v_idx.size(); ++j) { - auto order = v_idx[j]; + auto ex = v_idx.first == 0u ? fc_energy : fc_pot; + for (auto [var_idx, order] : v_idx.second) { for (decltype(order) k = 0; k < order; ++k) { - ex = diff(ex, diff_vars[j - 1u]); + ex = diff(ex, diff_vars[var_idx]); } } @@ -545,18 +539,15 @@ TEST_CASE("speelpenning check") const auto &v_idx = sr_it->first; - REQUIRE(v_idx.size() == nvars + 1u); - REQUIRE(v_idx[0] == 0u); + REQUIRE(v_idx.first == 0u); // Build the current derivative via repeated // invocations of diff(). auto ex = prod; - for (auto j = 1u; j < v_idx.size(); ++j) { - auto order = v_idx[j]; - + for (auto [var_idx, order] : v_idx.second) { for (decltype(order) k = 0; k < order; ++k) { - ex = diff(ex, vars[j - 1u]); + ex = diff(ex, vars[var_idx]); } } @@ -579,8 +570,6 @@ TEST_CASE("speelpenning check") } } -#endif - TEST_CASE("speelpenning complexity") { fmt::print("Speelpenning's example\n"); From 19122f66cd8e1fca7f1f01d89737266ed78b38eb Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sun, 12 Nov 2023 11:42:43 +0100 Subject: [PATCH 08/13] Small coverage fixes. --- src/expression_diff.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index 38b2339ff..2ed5992e0 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -484,7 +484,7 @@ dtens_sv_idx_t vidx_s2v(const dtens_ss_idx_t &input) [](const auto &p1, const auto &p2) { return p1.first < p2.first; }); return retval; -} +} // LCOV_EXCL_LINE // Hasher for the local maps of derivatives used in the // forward/reverse mode implementations. @@ -987,9 +987,11 @@ void diff_tensors_reverse_impl( diff_map.emplace_back(vidx_s2v(p.first), std::move(p.second)); } } else { + // LCOV_EXCL_START for (auto &p : local_dmap()) { diff_map.emplace_back(vidx_s2v(p.first), std::move(p.second)); } + // LCOV_EXCL_STOP } } @@ -1156,7 +1158,7 @@ bool dtens_sv_idx_cmp::operator()(const dtens_sv_idx_t &v1, const dtens_sv_idx_t } return dv; - }; + }; // LCOV_EXCL_LINE auto dv1 = to_dense(v1); auto dv2 = to_dense(v2); From 840ba1eb27be770f6b0ca0c64444672900d5c062 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sun, 12 Nov 2023 11:52:33 +0100 Subject: [PATCH 09/13] Bump up s11n version number for dtens. --- include/heyoka/expression.hpp | 5 +++++ src/expression_diff.cpp | 9 ++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/heyoka/expression.hpp b/include/heyoka/expression.hpp index 047a7bac5..bf05a518c 100644 --- a/include/heyoka/expression.hpp +++ b/include/heyoka/expression.hpp @@ -515,6 +515,11 @@ HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const dtens &); HEYOKA_END_NAMESPACE +// Version changelog: +// - version 1: switched from dense to sparse +// format for the indices vectors. +BOOST_CLASS_VERSION(heyoka::dtens::impl, 1) + // fmt formatter for dtens, implemented // on top of the streaming operator. namespace fmt diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index 2ed5992e0..e4ede9216 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -1202,8 +1202,15 @@ struct dtens::impl { // NOTE: as usual, we assume here that the archive contains // a correctly-serialised instance. In particular, we are assuming // that the elements in ar are sorted correctly. - void load(boost::archive::binary_iarchive &ar, unsigned) + void load(boost::archive::binary_iarchive &ar, unsigned version) { + // LCOV_EXCL_START + if (version < static_cast(boost::serialization::version::type::value)) { + throw std::invalid_argument( + fmt::format("Unable to load a dtens object: the archive version ({}) is too old", version)); + } + // LCOV_EXCL_STOP + try { // Reset m_map. m_map.clear(); From 109743adb258226c4e0a674b6784b6aa68c03a61 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sun, 12 Nov 2023 11:56:57 +0100 Subject: [PATCH 10/13] Small addition. --- include/heyoka/expression.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/heyoka/expression.hpp b/include/heyoka/expression.hpp index bf05a518c..f7113e225 100644 --- a/include/heyoka/expression.hpp +++ b/include/heyoka/expression.hpp @@ -440,6 +440,9 @@ class HEYOKA_DLL_PUBLIC dtens // Derivative indexing vector in dense form. using v_idx_t = std::vector; + // Derivative indexing vector in sparse form. + using sv_idx_t = detail::dtens_sv_idx_t; + using size_type = detail::dtens_map_t::size_type; private: From da1288d0b84b8bc4b119af614199a3fd1da7ad76 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sun, 12 Nov 2023 12:01:21 +0100 Subject: [PATCH 11/13] Add another test in the hope of triggering more corner cases. --- test/expression_diff_tensors.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/expression_diff_tensors.cpp b/test/expression_diff_tensors.cpp index 0cba38a63..4cb91140c 100644 --- a/test/expression_diff_tensors.cpp +++ b/test/expression_diff_tensors.cpp @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include @@ -700,3 +702,18 @@ TEST_CASE("jacobian") REQUIRE(jac == std::vector{1_dbl, 1_dbl, 1_dbl, -1_dbl, -1_dbl, -1_dbl}); } } + +// A test on a neural network. No REQUIREs, +// for this test we rely on the internal assertions +// in debug mode. +TEST_CASE("nn test") +{ + const auto nn_layer = 50u; + + auto [x, y] = make_vars("x", "y"); + auto ffnn = model::ffnn(kw::inputs = {x, y}, kw::nn_hidden = {nn_layer, nn_layer, nn_layer}, kw::n_out = 2, + kw::activations = {heyoka::tanh, heyoka::tanh, heyoka::tanh, heyoka::tanh}); + + auto dt = diff_tensors(ffnn, kw::diff_args = diff_args::params); + auto dNdtheta = dt.get_jacobian(); +} From 60399488667db540bb3d33e72ca52837a7e7c32a Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sun, 12 Nov 2023 12:05:39 +0100 Subject: [PATCH 12/13] Update changelog. --- doc/changelog.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/changelog.rst b/doc/changelog.rst index 9a8e42762..b4a33aaba 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -23,7 +23,8 @@ Changes - Substantial speedups in the computation of first-order derivatives with respect to many variables/parameters - (`#358 `__). + (`#360 `__, + `#358 `__). - Substantial performance improvements in the computation of derivative tensors of large expressions with a high degree of internal redundancy From f164b7e344d372e2586b8df360026f52e121c5ce Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Sun, 12 Nov 2023 14:32:09 +0100 Subject: [PATCH 13/13] Simplification in the comparison operator. --- src/expression_diff.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/expression_diff.cpp b/src/expression_diff.cpp index e4ede9216..138ca369d 100644 --- a/src/expression_diff.cpp +++ b/src/expression_diff.cpp @@ -1077,18 +1077,18 @@ bool dtens_sv_idx_cmp_impl(const dtens_sv_idx_t &v1, const dtens_sv_idx_t &v2) assert(std::equal(v1.second.begin(), it1 + 1, v2.second.begin())); } - if (it1 == end1 && it2 == end2) { - assert(v1.second == v2.second); - return false; - } - - if (it1 == end1) { - return false; - } - - assert(it2 == end2); - - return true; + // NOTE: if we end up here, it means that: + // - component and diff order are the same, and + // - the two index/order lists share a common + // (possibly empty) initial sequence. + // It follows then that both it1 and it2 must be + // end iterators, because if either index/order list + // had additional terms, then the diff orders + // could not possibly be equal. + assert(it1 == end1 && it2 == end2); + assert(v1.second == v2.second); + + return false; } #if !defined(NDEBUG)