diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index 12bf5e860fe..e1ed4ac3aa0 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -482,7 +482,7 @@ __global__ void __launch_bounds__(decode_block_size) target_pos = min(s->nz_count, src_pos + decode_block_size - out_thread0); if (out_thread0 > 32) { target_pos = min(target_pos, s->dict_pos); } } - // TODO(ets): see if this sync can be removed + // this needs to be here to prevent warp 3 modifying src_pos before all threads have read it __syncthreads(); if (t < 32) { // decode repetition and definition levels. diff --git a/cpp/src/io/parquet/page_delta_decode.cu b/cpp/src/io/parquet/page_delta_decode.cu index c2a7deedb80..528048d2fe6 100644 --- a/cpp/src/io/parquet/page_delta_decode.cu +++ b/cpp/src/io/parquet/page_delta_decode.cu @@ -365,7 +365,7 @@ __global__ void __launch_bounds__(96) } else { // warp2 target_pos = min(s->nz_count, src_pos + batch_size); } - // TODO(ets): see if this sync can be removed + // this needs to be here to prevent warp 2 modifying src_pos before all threads have read it __syncthreads(); // warp0 will decode the rep/def levels, warp1 will unpack a mini-batch of deltas. @@ -507,7 +507,7 @@ __global__ void __launch_bounds__(decode_block_size) } else { // warp 3 target_pos = min(s->nz_count, src_pos + batch_size); } - // TODO(ets): see if this sync can be removed + // this needs to be here to prevent warp 3 modifying src_pos before all threads have read it __syncthreads(); // warp0 will decode the rep/def levels, warp1 will unpack a mini-batch of prefixes, warp 2 will