Skip to content

Commit

Permalink
Fix atomic_ref scope when multiple blocks are updating the same out…
Browse files Browse the repository at this point in the history
…put (#16051)

in a few places, `thread_scope_block` is used even where there threads from multiple blocks update the same location. This PR changes these to `thread_scope_device` to avoid UB with sufficiently large inputs.

Have not ran benchmarks to evaluate the impact.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Mark Harris (https://github.com/harrism)

URL: #16051
  • Loading branch information
vuule authored Jun 18, 2024
1 parent 56e8442 commit 7ff2764
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion cpp/src/strings/case.cu
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ CUDF_KERNEL void has_multibytes_kernel(char const* d_input_chars,
auto const mb_total = block_reduce(temp_storage).Reduce(mb_count, cub::Sum());

if ((lane_idx == 0) && (mb_total > 0)) {
cuda::atomic_ref<int64_t, cuda::thread_scope_block> ref{*d_output};
cuda::atomic_ref<int64_t, cuda::thread_scope_device> ref{*d_output};
ref.fetch_add(mb_total, cuda::std::memory_order_relaxed);
}
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/replace/multi.cu
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ CUDF_KERNEL void count_targets(replace_multi_parallel_fn fn, int64_t chars_bytes
auto const total = block_reduce(temp_storage).Reduce(count, cub::Sum());

if ((lane_idx == 0) && (total > 0)) {
cuda::atomic_ref<int64_t, cuda::thread_scope_block> ref{*d_output};
cuda::atomic_ref<int64_t, cuda::thread_scope_device> ref{*d_output};
ref.fetch_add(total, cuda::std::memory_order_relaxed);
}
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/split/split.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ CUDF_KERNEL void count_delimiters_kernel(Tokenizer tokenizer,
auto const total = block_reduce(temp_storage).Reduce(count, cub::Sum());

if ((lane_idx == 0) && (total > 0)) {
cuda::atomic_ref<int64_t, cuda::thread_scope_block> ref{*d_output};
cuda::atomic_ref<int64_t, cuda::thread_scope_device> ref{*d_output};
ref.fetch_add(total, cuda::std::memory_order_relaxed);
}
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/text/tokenize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ CUDF_KERNEL void count_characters(uint8_t const* d_chars, int64_t chars_bytes, i
auto const total = block_reduce(temp_storage).Reduce(count, cub::Sum());

if ((lane_idx == 0) && (total > 0)) {
cuda::atomic_ref<int64_t, cuda::thread_scope_block> ref{*d_output};
cuda::atomic_ref<int64_t, cuda::thread_scope_device> ref{*d_output};
ref.fetch_add(total, cuda::std::memory_order_relaxed);
}
}
Expand Down

0 comments on commit 7ff2764

Please sign in to comment.