Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch std::string -> hashes for label resolution. #2215

Merged
merged 13 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions arbor/benchmark_cell_group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ benchmark_cell_group::benchmark_cell_group(const std::vector<cell_gid_type>& gid
for (const auto& c: cells_) {
cg_sources.add_cell();
cg_targets.add_cell();
cg_sources.add_label(c.source, {0, 1});
cg_targets.add_label(c.target, {0, 1});
cg_sources.add_label(internal_hash(c.source), {0, 1});
cg_targets.add_label(internal_hash(c.target), {0, 1});
}

benchmark_cell_group::reset();
Expand Down
18 changes: 12 additions & 6 deletions arbor/cable_cell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ struct cable_cell_impl {
decor decorations;

// The placeable label to lid_range map
dynamic_typed_map<constant_type<std::unordered_multimap<cell_tag_type, lid_range>>::type> labeled_lid_ranges;
dynamic_typed_map<constant_type<std::unordered_multimap<hash_type, lid_range>>::type> labeled_lid_ranges;

cable_cell_impl(const arb::morphology& m, const label_dict& labels, const decor& decorations):
provider(m, labels),
Expand Down Expand Up @@ -120,7 +120,7 @@ struct cable_cell_impl {
}

template <typename Item>
void place(const locset& ls, const Item& item, const cell_tag_type& label) {
void place(const locset& ls, const Item& item, const hash_type& label) {
auto& mm = get_location_map(item);
cell_lid_type& lid = placed_count.get<Item>();
cell_lid_type first = lid;
Expand Down Expand Up @@ -226,7 +226,8 @@ void cable_cell_impl::init(const decor& d) {
for (const auto& p: d.placements()) {
auto& where = std::get<0>(p);
auto& label = std::get<2>(p);
std::visit([this, &where, &label] (auto&& what) {return this->place(where, what, label);}, std::get<1>(p));
std::visit([this, &where, &label] (auto&& what) {return this->place(where, what, label); },
std::get<1>(p));
}
}

Expand Down Expand Up @@ -280,16 +281,21 @@ const cable_cell_parameter_set& cable_cell::default_parameters() const {
return impl_->decorations.defaults();
}

const std::unordered_multimap<cell_tag_type, lid_range>& cable_cell::detector_ranges() const {
const cable_cell::lid_range_map& cable_cell::detector_ranges() const {
return impl_->labeled_lid_ranges.get<threshold_detector>();
}

const std::unordered_multimap<cell_tag_type, lid_range>& cable_cell::synapse_ranges() const {
const cable_cell::lid_range_map& cable_cell::synapse_ranges() const {
return impl_->labeled_lid_ranges.get<synapse>();
}

const std::unordered_multimap<cell_tag_type, lid_range>& cable_cell::junction_ranges() const {
const cable_cell::lid_range_map& cable_cell::junction_ranges() const {
return impl_->labeled_lid_ranges.get<junction>();
}

cell_tag_type decor::tag_of(hash_type hash) const {
if (!hashes_.count(hash)) throw arbor_internal_error{util::pprintf("Unknown hash for {}.", std::to_string(hash))};
return hashes_.at(hash);
}

} // namespace arb
9 changes: 8 additions & 1 deletion arbor/cable_cell_param.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
#include <arbor/cable_cell_param.hpp>
#include <arbor/s_expr.hpp>

#include <arbor/util/hash_def.hpp>
#include "util/maputil.hpp"
#include "util/strprintf.hpp"

namespace arb {

Expand Down Expand Up @@ -120,7 +122,12 @@ decor& decor::paint(region where, paintable what) {
}

decor& decor::place(locset where, placeable what, cell_tag_type label) {
placements_.emplace_back(std::move(where), std::move(what), std::move(label));
auto hash = internal_hash(label);
if (hashes_.count(hash) && hashes_.at(hash) != label) {
throw arbor_internal_error{util::strprintf("Hash collision {} ./. {}", label, hashes_.at(hash))};
}
placements_.emplace_back(std::move(where), std::move(what), hash);
hashes_.emplace(hash, label);
return *this;
}

Expand Down
12 changes: 5 additions & 7 deletions arbor/communication/mpi_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,11 @@ struct mpi_context_impl {
}

cell_label_range gather_cell_label_range(const cell_label_range& local_ranges) const {
std::vector<cell_size_type> sizes;
std::vector<cell_tag_type> labels;
std::vector<lid_range> ranges;
sizes = mpi::gather_all(local_ranges.sizes(), comm_);
labels = mpi::gather_all(local_ranges.labels(), comm_);
ranges = mpi::gather_all(local_ranges.ranges(), comm_);
return cell_label_range(sizes, labels, ranges);
cell_label_range res;
res.sizes = mpi::gather_all(local_ranges.sizes, comm_);
res.labels = mpi::gather_all(local_ranges.labels, comm_);
res.ranges = mpi::gather_all(local_ranges.ranges, comm_);
return res;
}

cell_labels_and_gids gather_cell_labels_and_gids(const cell_labels_and_gids& local_labels_and_gids) const {
Expand Down
61 changes: 27 additions & 34 deletions arbor/fvm_lowered_cell_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,15 +315,27 @@ fvm_detector_info get_detector_info(arb_size_type max,
return { max, std::move(cv), std::move(threshold), ctx };
}

template <typename Backend>
void fvm_lowered_cell_impl<Backend>::add_probes(const std::vector<cell_gid_type>& gids,
const std::vector<cable_cell>& cells,
const recipe& rec,
const fvm_cv_discretization& D,
const std::unordered_map<std::string, mechanism*>& mechptr_by_name,
const fvm_mechanism_data& mech_data,
const std::vector<target_handle>& target_handles,
probe_association_map& probe_map) {
inline cell_size_type
add_labels(cell_label_range& clr, const cable_cell::lid_range_map& ranges) {
clr.add_cell();
cell_size_type count = 0;
std::unordered_map<hash_type, cell_tag_type> hashes;
for (const auto& [label, range]: ranges) {
clr.add_label(label, range);
count += (range.end - range.begin);
}
return count;
}

template <typename Backend> void
fvm_lowered_cell_impl<Backend>::add_probes(const std::vector<cell_gid_type>& gids,
const std::vector<cable_cell>& cells,
const recipe& rec,
const fvm_cv_discretization& D,
const std::unordered_map<std::string, mechanism*>& mechptr_by_name,
const fvm_mechanism_data& mech_data,
const std::vector<target_handle>& target_handles,
probe_association_map& probe_map) {
auto ncell = gids.size();

std::vector<fvm_probe_data> probe_data;
Expand All @@ -343,9 +355,9 @@ void fvm_lowered_cell_impl<Backend>::add_probes(const std::vector<cell_gid_type>
}
}

template <typename Backend>
fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(const std::vector<cell_gid_type>& gids,
const recipe& rec) {
template <typename Backend> fvm_initialization_data
fvm_lowered_cell_impl<Backend>::initialize(const std::vector<cell_gid_type>& gids,
const recipe& rec) {
using std::any_cast;
using util::count_along;
using util::make_span;
Expand Down Expand Up @@ -375,28 +387,9 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(const std::ve
for (auto i : util::make_span(ncell)) {
auto gid = gids[i];
const auto& c = cells[i];

fvm_info.source_data.add_cell();
fvm_info.target_data.add_cell();
fvm_info.gap_junction_data.add_cell();

unsigned count = 0;
for (const auto& [label, range]: c.detector_ranges()) {
fvm_info.source_data.add_label(label, range);
count+=(range.end - range.begin);
}
fvm_info.num_sources[gid] = count;

count = 0;
for (const auto& [label, range]: c.synapse_ranges()) {
fvm_info.target_data.add_label(label, range);
count+=(range.end - range.begin);
}
fvm_info.num_targets[gid] = count;

for (const auto& [label, range]: c.junction_ranges()) {
fvm_info.gap_junction_data.add_label(label, range);
}
fvm_info.num_sources[gid] = add_labels(fvm_info.source_data, c.detector_ranges());
fvm_info.num_targets[gid] = add_labels(fvm_info.target_data, c.synapse_ranges());
add_labels(fvm_info.gap_junction_data, c.junction_ranges());
}

cable_cell_global_properties global_props;
Expand Down
10 changes: 5 additions & 5 deletions arbor/include/arbor/cable_cell.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,8 @@ using cable_cell_location_map = static_typed_map<location_assignment,
synapse, junction, i_clamp, threshold_detector>;

// High-level abstract representation of a cell.
class ARB_SYMBOL_VISIBLE cable_cell {
public:
struct ARB_SYMBOL_VISIBLE cable_cell {
using lid_range_map = std::unordered_multimap<hash_type, lid_range>;
using index_type = cell_lid_type;
using size_type = cell_local_size_type;
using value_type = double;
Expand Down Expand Up @@ -311,9 +311,9 @@ class ARB_SYMBOL_VISIBLE cable_cell {
const cable_cell_parameter_set& default_parameters() const;

// The labeled lid_ranges of sources, targets and gap_junctions on the cell;
const std::unordered_multimap<cell_tag_type, lid_range>& detector_ranges() const;
const std::unordered_multimap<cell_tag_type, lid_range>& synapse_ranges() const;
const std::unordered_multimap<cell_tag_type, lid_range>& junction_ranges() const;
const lid_range_map& detector_ranges() const;
const lid_range_map& synapse_ranges() const;
const lid_range_map& junction_ranges() const;

private:
std::unique_ptr<cable_cell_impl, void (*)(cable_cell_impl*)> impl_;
Expand Down
5 changes: 4 additions & 1 deletion arbor/include/arbor/cable_cell_param.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,8 +313,9 @@ struct ARB_ARBOR_API cable_cell_parameter_set {
// are to be applied to a morphology in a cable_cell.
class ARB_ARBOR_API decor {
std::vector<std::pair<region, paintable>> paintings_;
std::vector<std::tuple<locset, placeable, cell_tag_type>> placements_;
std::vector<std::tuple<locset, placeable, hash_type>> placements_;
cable_cell_parameter_set defaults_;
std::unordered_map<hash_type, cell_tag_type> hashes_;

public:
const auto& paintings() const {return paintings_; }
Expand All @@ -324,6 +325,8 @@ class ARB_ARBOR_API decor {
decor& paint(region, paintable);
decor& place(locset, placeable, cell_tag_type);
decor& set_default(defaultable);

cell_tag_type tag_of(hash_type) const;
};

ARB_ARBOR_API extern cable_cell_parameter_set neuron_parameter_defaults;
Expand Down
4 changes: 4 additions & 0 deletions arbor/include/arbor/common_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@

namespace arb {

// Internal hashes use this 64bit id

using hash_type = std::size_t;

// For identifying cells globally.

using cell_gid_type = std::uint32_t;
Expand Down
2 changes: 1 addition & 1 deletion arbor/include/arbor/cv_policy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@

namespace arb {

class cable_cell;
struct cable_cell;

struct cv_policy_base {
virtual locset cv_boundary_points(const cable_cell& cell) const = 0;
Expand Down
73 changes: 63 additions & 10 deletions arbor/include/arbor/util/hash_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,79 @@
*/

#include <cstddef>
#include <typeindex>
#include <string_view>
AdhocMan marked this conversation as resolved.
Show resolved Hide resolved
#include <functional>

#include <iostream>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#include <iostream>

I think this is not needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh dear my old vice ;)


// Helpers for forming hash values of compounds objects.

namespace arb {

inline std::size_t hash_value_combine(std::size_t n) {
return n;
// Non-cryptographic hash function for mapping strings to internal
// identifiers. Concretely, FNV-1a hash function taken from
//
// http://www.isthe.com/chongo/tech/comp/fnv/index.html
//
// NOTE: It may be worth it considering different hash functions in
// the future that have better characteristic, xxHash or Murmur
// look interesting but are more complex and likely require adding
// external dependencies.
// NOTE: this is the obligatory comment on a better hash function
// that will be here until the end of time.

template <typename T>
inline constexpr std::size_t internal_hash(T&& data) {
Copy link
Contributor

@AdhocMan AdhocMan Oct 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for moving it here!
I just have one suggestion.
Calling internal_hash and hash_value would give different results for a single type due to the multiplication with the prime number in hash_value_combine. How about merging the internal_hash function into hash_value?
So something like this:

template <typename T>
inline constexpr std::size_t hash_value(const T& t) {
    if constexpr (std::is_convertible_v<T, std::string_view>) {
        constexpr std::size_t prime = 0x100000001b3;
        constexpr std::size_t offset_basis = 0xcbf29ce484222325;

        std::size_t hash = offset_basis;

        for (uint8_t byte: std::string_view{t}) {
            hash = hash ^ byte;
            hash = hash * prime;
        }

        return hash;
    }
    else { return std::hash<std::decay_t<T>>{}(t); }
}

template <typename T, typename... TAIL>
inline constexpr std::size_t hash_value(const T& t, const TAIL&... tail) {
    constexpr std::size_t prime = 93481;
    return prime * hash_value(t) + hash_value(tail...);
}

This way we would simply always use hash_value throughout the code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, a comment mentioning the different behaviour for char* and const char* types would be good. For these, the content they point to is hashed. Any other type of pointer will have the hash of the pointer value itself.
Or we do not allow usage with other types of pointers.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. I decided to add a slightly more intricate logic layer:

  • char*, const char*, char[] are hashed as strings
  • integral types are hashed byte-wise, to avoid the STL int-hash-is-id 'feature'
  • void* is hashed like an integral value
  • other pointer types are denied
  • everything else is hashed via STL

Idea being that hashes of pointer types are likely be unintended and must be opted-in to via casting to void*.
The deviation from STL is meant to spread out integer-keyed maps a bit better. I hope they do not use a similar
scheme for real values...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a decent solution for the problem of how to treat different pointer types.
The first comment is not yet addressed however. Calling hash_value and internal_hash for a single type will yield a different hash. As suggested above, we could just overload the hash_value function, such that the current internal_hash function will just become the hash_value function for single types. This way we can also get rid of hash_value_combine, which appears to be only used here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I changed the hash_combine function (below) s.t.

hash_combine(vs) = H(0, vs)
H(h, v:vs) = H(h * prime + internal_hash(v), vs)

shouldn't that address your comment?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I missed that change. I think just having hash_value would still be overall cleaner then internal_hash, hash_combine and hash_value. But from a functionality point of view I'm ok with it as is.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, the surface now just is hash_value. I should adjust the usage accordingly and stuff internal_hash and hash_combine into a detail namespace.

using D = std::decay_t<T>;
constexpr std::size_t prime = 0x100000001b3;
constexpr std::size_t offset_basis = 0xcbf29ce484222325;
static_assert(!std::is_pointer_v<D> || std::is_same_v<D, void*> || std::is_convertible_v<T, std::string_view>,
"Pointer types except void* will not be hashed.");
if constexpr (std::is_convertible_v<T, std::string_view>) {
std::size_t hash = offset_basis;
for (uint8_t byte: std::string_view{data}) {
hash = hash ^ byte;
hash = hash * prime;
}
return hash;
}
if constexpr (std::is_integral_v<D>) {
unsigned long long bytes = data;
std::size_t hash = offset_basis;
for (int ix = 0; ix < sizeof(data); ++ix) {
uint8_t byte = bytes & 255;
bytes >>= 8;
hash = hash ^ byte;
hash = hash * prime;
}
return hash;
}
if constexpr (std::is_pointer_v<D>) {
unsigned long long bytes = reinterpret_cast<unsigned long long>(data);
std::size_t hash = offset_basis;
for (int ix = 0; ix < sizeof(data); ++ix) {
uint8_t byte = bytes & 255;
bytes >>= 8;
hash = hash ^ byte;
hash = hash * prime;
}
return hash;
}
return std::hash<D>{}(data);
}

template <typename... T>
std::size_t hash_value_combine(std::size_t n, std::size_t m, T... tail) {
constexpr std::size_t prime2 = 54517;
return hash_value_combine(prime2*n + m, tail...);
inline
std::size_t hash_value_combine(std::size_t n) { return n; }

template <typename T, typename... Ts>
std::size_t hash_value_combine(std::size_t n, const T& head, const Ts&... tail) {
constexpr std::size_t prime = 54517;
return hash_value_combine(prime*n + internal_hash(head), tail...);
}

template <typename... T>
std::size_t hash_value(const T&... t) {
constexpr std::size_t prime1 = 93481;
return hash_value_combine(prime1, std::hash<T>{}(t)...);
std::size_t hash_value(const T&... ts) {
return hash_value_combine(0, ts...);
}
}

Expand Down
Loading
Loading