Skip to content

Commit

Permalink
minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
shrshi committed Aug 2, 2024
1 parent 3900ee3 commit 3949cda
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 13 deletions.
23 changes: 13 additions & 10 deletions cpp/src/io/json/device_column_tree_extraction.cu
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ rmm::device_uvector<row_offset_t> extract_device_column_subtree(

// (Optional?) TODO: Validation of the remaining column tree properties

return std::move(subtree_nrows);
return subtree_nrows;
}

device_column_subtree_properties allocate_device_column_subtree_properties(
Expand Down Expand Up @@ -282,8 +282,8 @@ device_column_subtree_properties allocate_device_column_subtree_properties(
thrust::reduce(
rmm::exec_policy(stream), max_row_offsets_it, max_row_offsets_it + str_val_end) +
str_val_end;
rmm::device_uvector<row_offset_t> string_offsets(string_offsets_size, stream);
rmm::device_uvector<row_offset_t> string_lengths(string_offsets_size, stream);
rmm::device_uvector<SymbolOffsetT> string_offsets(string_offsets_size, stream);
rmm::device_uvector<SymbolOffsetT> string_lengths(string_offsets_size, stream);

auto list_partitioning_idx_it =
thrust::partition(rmm::exec_policy(stream),
Expand All @@ -301,7 +301,7 @@ device_column_subtree_properties allocate_device_column_subtree_properties(
max_row_offsets_it,
max_row_offsets_it + (list_end - str_val_end)) +
2 * (list_end - str_val_end);
rmm::device_uvector<row_offset_t> child_offsets(child_offsets_size, stream);
rmm::device_uvector<SymbolOffsetT> child_offsets(child_offsets_size, stream);

auto validity_buffer_size =
thrust::reduce(rmm::exec_policy(stream), subtree_nrows.begin(), subtree_nrows.end());
Expand All @@ -319,10 +319,11 @@ device_column_subtree_properties allocate_device_column_subtree_properties(

void initialize_device_column_subtree_properties(
device_column_subtree_properties &d_props,
device_span<row_offset_t> subtree_nrows,
tree_meta_t &tree,
device_span<NodeIndexT> original_col_ids,
device_span<row_offset_t> row_offsets,
const column_tree_properties &c_props,
column_tree_properties &c_props,
rmm::cuda_stream_view stream) {

auto num_nodes = tree.node_levels.size();
Expand All @@ -342,7 +343,7 @@ void initialize_device_column_subtree_properties(
sorted_node_levels_order.begin());
auto node_range_lengths_it = thrust::make_transform_iterator(
thrust::make_zip_iterator(node_range_begin_it, node_range_end_it),
cuda::proclaim_return_type<row_offset_t>([] __device__(auto range_it) {
cuda::proclaim_return_type<SymbolOffsetT>([] __device__(auto range_it) {
return thrust::get<1>(range_it) - thrust::get<0>(range_it);
}));

Expand All @@ -352,13 +353,14 @@ void initialize_device_column_subtree_properties(
sorted_node_levels_order.begin());

rmm::device_uvector<row_offset_t> sorted_subtree_nrows(num_columns, stream);
thrust::copy(rmm::exec_policy_nosync(stream), subtree_nrows.begin(), subtree_nrows.end(), sorted_subtree_nrows.begin());
thrust::sort_by_key(rmm::exec_policy_nosync(stream),
c_props.mapped_ids.begin(),
c_props.mapped_ids.end(),
sorted_subtree_nrows.begin());

thrust::copy_if(
rmm::exec_policy(stream),
rmm::exec_policy_nosync(stream),
thrust::make_zip_iterator(node_range_begin_it, node_range_lengths_it),
thrust::make_zip_iterator(node_range_begin_it + num_nodes, node_range_lengths_it + num_nodes),
thrust::make_counting_iterator(0),
Expand All @@ -371,18 +373,19 @@ void initialize_device_column_subtree_properties(
});

// row_offsets need to be prefix summed across columns for validity initialization
// TODO: replace replace_if with a transform input iterator and pass that to inclusive scan
thrust::replace_if(
rmm::exec_policy(stream),
rmm::exec_policy_nosync(stream),
row_offsets_it,
row_offsets_it + num_nodes,
thrust::make_counting_iterator(0),
[sorted_subtree_nrows = sorted_subtree_nrows.begin(), node_col_ids_it] __device__(
NodeIndexT node) { return sorted_subtree_nrows[node_col_ids_it[node]] == 0; },
0);
thrust::inclusive_scan(
rmm::exec_policy(stream), row_offsets_it, row_offsets_it + num_nodes, row_offsets_it);
rmm::exec_policy_nosync(stream), row_offsets_it, row_offsets_it + num_nodes, row_offsets_it);
thrust::for_each_n(
rmm::exec_policy(stream),
rmm::exec_policy_nosync(stream),
thrust::make_counting_iterator(0),
num_nodes,
[sorted_subtree_nrows = sorted_subtree_nrows.begin(),
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/io/json/nested_json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,10 @@ struct column_tree_properties {
* in column_tree_properties.
*/
struct device_column_subtree_properties {
rmm::device_uvector<row_offset_t> string_offsets;
rmm::device_uvector<row_offset_t> string_lengths;
rmm::device_uvector<SymbolOffsetT> string_offsets;
rmm::device_uvector<SymbolOffsetT> string_lengths;
// Row offsets
rmm::device_uvector<row_offset_t> child_offsets;
rmm::device_uvector<SymbolOffsetT> child_offsets;
// Validity bitmap
rmm::device_buffer validity;
};
Expand Down

0 comments on commit 3949cda

Please sign in to comment.