Skip to content

Commit

Permalink
Use cuco::static_set in JSON tree algorithm (#13928)
Browse files Browse the repository at this point in the history
In JSON tree algorithms of JSON reader, cuco static_map is used as a set. This PR replaces it with static_set.
No tests are changed. No significant runtime changes.
Addresses part of  #12261

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Yunsong Wang (https://github.com/PointKernel)

URL: #13928
  • Loading branch information
karthikeyann authored Aug 28, 2023
1 parent 2c7f02c commit aba001c
Showing 1 changed file with 38 additions and 47 deletions.
85 changes: 38 additions & 47 deletions cpp/src/io/json/json_tree.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

#include <cub/device/device_radix_sort.cuh>

#include <cuco/static_map.cuh>
#include <cuco/static_set.cuh>

#include <thrust/binary_search.h>
#include <thrust/copy.h>
Expand Down Expand Up @@ -400,21 +400,13 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
{
CUDF_FUNC_RANGE();
using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
using hash_map_type =
cuco::static_map<size_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;

auto const num_nodes = d_tree.node_categories.size();
auto const num_fields = thrust::count(rmm::exec_policy(stream),
d_tree.node_categories.begin(),
d_tree.node_categories.end(),
node_t::NC_FN);

constexpr size_type empty_node_index_sentinel = -1;
hash_map_type key_map{compute_hash_table_size(num_fields, 40), // 40% occupancy in hash map
cuco::empty_key{empty_node_index_sentinel},
cuco::empty_value{empty_node_index_sentinel},
hash_table_allocator_type{default_allocator<char>{}, stream},
stream.value()};
auto const d_hasher = [d_input = d_input.data(),
node_range_begin = d_tree.node_range_begin.data(),
node_range_end = d_tree.node_range_end.data()] __device__(auto node_id) {
Expand All @@ -434,25 +426,33 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
};
// key-value pairs: uses node_id itself as node_type. (unique node_id for a field name due to
// hashing)
auto const iter = cudf::detail::make_counting_transform_iterator(
0, [] __device__(size_type i) { return cuco::make_pair(i, i); });
auto const iter = thrust::make_counting_iterator<size_type>(0);

auto const is_field_name_node = [node_categories =
d_tree.node_categories.data()] __device__(auto node_id) {
return node_categories[node_id] == node_t::NC_FN;
};
key_map.insert_if(iter,
iter + num_nodes,
thrust::counting_iterator<size_type>(0), // stencil
is_field_name_node,
d_hasher,
d_equal,
stream.value());

using hasher_type = decltype(d_hasher);
constexpr size_type empty_node_index_sentinel = -1;
auto key_set =
cuco::experimental::static_set{cuco::experimental::extent{compute_hash_table_size(
num_fields, 40)}, // 40% occupancy in hash map
cuco::empty_key{empty_node_index_sentinel},
d_equal,
cuco::experimental::linear_probing<1, hasher_type>{d_hasher},
hash_table_allocator_type{default_allocator<char>{}, stream},
stream.value()};
key_set.insert_if_async(iter,
iter + num_nodes,
thrust::counting_iterator<size_type>(0), // stencil
is_field_name_node,
stream.value());

auto const get_hash_value =
[key_map = key_map.get_device_view(), d_hasher, d_equal] __device__(auto node_id) -> size_type {
auto const it = key_map.find(node_id, d_hasher, d_equal);
return (it == key_map.end()) ? size_type{0} : it->second.load(cuda::std::memory_order_relaxed);
[key_set = key_set.ref(cuco::experimental::op::find)] __device__(auto node_id) -> size_type {
auto const it = key_set.find(node_id);
return (it == key_set.end()) ? size_type{0} : *it;
};

// convert field nodes to node indices, and other nodes to enum value.
Expand Down Expand Up @@ -528,7 +528,6 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_n
{
CUDF_FUNC_RANGE();
auto const num_nodes = parent_node_ids.size();
rmm::device_uvector<size_type> col_id(num_nodes, stream, mr);

// array of arrays
NodeIndexT const row_array_children_level = is_enabled_lines ? 1 : 2;
Expand Down Expand Up @@ -560,17 +559,6 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_n
list_indices.begin());
}

using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
using hash_map_type =
cuco::static_map<size_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;

constexpr size_type empty_node_index_sentinel = -1;
hash_map_type key_map{compute_hash_table_size(num_nodes), // TODO reduce oversubscription
cuco::empty_key{empty_node_index_sentinel},
cuco::empty_value{empty_node_index_sentinel},
cuco::erased_key{-2},
hash_table_allocator_type{default_allocator<char>{}, stream},
stream.value()};
// path compression is not used since extra writes make all map operations slow.
auto const d_hasher = [node_level = node_levels.begin(),
node_type = node_type.begin(),
Expand Down Expand Up @@ -632,23 +620,26 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_n
return node_id1 == node_id2;
};

constexpr size_type empty_node_index_sentinel = -1;
using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
using hasher_type = decltype(d_hashed_cache);

auto key_set = cuco::experimental::static_set{
cuco::experimental::extent{compute_hash_table_size(num_nodes)},
cuco::empty_key<cudf::size_type>{empty_node_index_sentinel},
d_equal,
cuco::experimental::linear_probing<1, hasher_type>{d_hashed_cache},
hash_table_allocator_type{default_allocator<char>{}, stream},
stream.value()};

// insert and convert node ids to unique set ids
auto const num_inserted = thrust::count_if(
rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(num_nodes),
[d_hashed_cache,
d_equal,
view = key_map.get_device_mutable_view(),
uq_node_id = col_id.begin()] __device__(auto node_id) mutable {
auto it = view.insert_and_find(cuco::make_pair(node_id, node_id), d_hashed_cache, d_equal);
uq_node_id[node_id] = (it.first)->first.load(cuda::std::memory_order_relaxed);
return it.second;
});
auto nodes_itr = thrust::make_counting_iterator<size_type>(0);
auto const num_columns = key_set.insert(nodes_itr, nodes_itr + num_nodes, stream.value());

auto const num_columns = num_inserted; // key_map.get_size() is not updated.
rmm::device_uvector<size_type> unique_keys(num_columns, stream);
key_map.retrieve_all(unique_keys.begin(), thrust::make_discard_iterator(), stream.value());
rmm::device_uvector<size_type> col_id(num_nodes, stream, mr);
key_set.find_async(nodes_itr, nodes_itr + num_nodes, col_id.begin(), stream.value());
std::ignore = key_set.retrieve_all(unique_keys.begin(), stream.value());

return {std::move(col_id), std::move(unique_keys)};
}
Expand Down

0 comments on commit aba001c

Please sign in to comment.