From 7134023073d34df04ebeff4d2af8b39664b5c679 Mon Sep 17 00:00:00 2001 From: MithunR Date: Tue, 23 May 2023 14:57:37 -0700 Subject: [PATCH] Followup for null count fixup in row_conversion.cu. (#1155) * Followup for null count fixup in row_conversion.cu. This is a followup to #1148. `row_conversion.cu` was modified in https://github.com/rapidsai/cudf/pull/13372 to explicitly calculate null-counts for output columns. This commit replicates the changes in cudf/pull/13372, and adds explicit null-count calculation for the string offsets column. Signed-off-by: MithunR --- src/main/cpp/src/row_conversion.cu | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/main/cpp/src/row_conversion.cu b/src/main/cpp/src/row_conversion.cu index 3fe7cf3c6c..9952ed59ea 100644 --- a/src/main/cpp/src/row_conversion.cu +++ b/src/main/cpp/src/row_conversion.cu @@ -2259,8 +2259,15 @@ std::unique_ptr convert_from_rows(lists_column_view const &input, for (int i = 0; i < static_cast(schema.size()); ++i) { if (schema[i].id() == type_id::STRING) { // stuff real string column - auto const null_count = string_row_offset_columns[string_idx]->null_count(); auto string_data = string_row_offset_columns[string_idx].release()->release(); + auto const null_count = [&] { + // Null-count not set previously. Calculate, on the fly. + auto const &null_mask = *string_data.null_mask; + return null_mask.data() ? + cudf::null_count(static_cast(null_mask.data()), 0, + num_rows) : + 0; + }(); output_columns[i] = make_strings_column(num_rows, std::move(string_col_offsets[string_idx]), std::move(string_data_cols[string_idx]), @@ -2325,6 +2332,12 @@ std::unique_ptr
convert_from_rows_fixed_width_optimized( num_rows, num_columns, size_per_row, dev_column_start.data(), dev_column_size.data(), dev_output_data.data(), dev_output_nm.data(), child.data()); + // Set null counts, because output_columns are modified via mutable-view, + // in the kernel above. + // TODO(future): Consider setting null count in the kernel itself. + for (auto &col : output_columns) { + col->set_null_count(cudf::null_count(col->view().null_mask(), 0, col->size())); + } return std::make_unique
(std::move(output_columns)); } else { CUDF_FAIL("Only fixed width types are currently supported");