Skip to content

Commit

Permalink
Fix null counts for output of row/column conversion:
Browse files Browse the repository at this point in the history
Fixes #1169.

This commit fixes the null counts in row/column conversion, for all output
columns.
The prior fix in #1155 only fixed the count for STRING outputs.

There are 2 additional/tangential fixes in this change:
  1. `cudf::detail::null_count()` is used in place of `cudf::null_count()`,
     thus running on the current stream.
  2. The Java `RowConversion.convertFromRowsFixedWidthOptimized()` now uses
     the `convertFromRowsFixedWidthOptimized()` native function instead of
     `convertFromRows()`. This should prove faster.

Signed-off-by: MithunR <[email protected]>
  • Loading branch information
mythrocks committed May 25, 2023
1 parent 7134023 commit 1c0dbc0
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 13 deletions.
35 changes: 23 additions & 12 deletions src/main/cpp/src/row_conversion.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <cooperative_groups.h>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/sequence.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/integer_utils.hpp>
Expand Down Expand Up @@ -2031,6 +2032,19 @@ convert_to_rows_fixed_width_optimized(table_view const &tbl, rmm::cuda_stream_vi
}
}

namespace {

/// @brief Calculates and sets null counts for specified columns
void fixup_null_counts(std::vector<std::unique_ptr<column>>& output_columns,
rmm::cuda_stream_view stream) {
for (auto &col : output_columns) {
col->set_null_count(
cudf::detail::null_count(col->view().null_mask(), 0, col->size(), stream));
}
}

}

/**
* @brief convert from JCUDF row format to cudf columns
*
Expand Down Expand Up @@ -2260,23 +2274,21 @@ std::unique_ptr<table> convert_from_rows(lists_column_view const &input,
if (schema[i].id() == type_id::STRING) {
// stuff real string column
auto string_data = string_row_offset_columns[string_idx].release()->release();
auto const null_count = [&] {
// Null-count not set previously. Calculate, on the fly.
auto const &null_mask = *string_data.null_mask;
return null_mask.data() ?
cudf::null_count(static_cast<bitmask_type const *>(null_mask.data()), 0,
num_rows) :
0;
}();
output_columns[i] =
make_strings_column(num_rows, std::move(string_col_offsets[string_idx]),
std::move(string_data_cols[string_idx]),
std::move(*string_data.null_mask.release()), null_count);
std::move(*string_data.null_mask.release()), 0);
// Null count set to 0, temporarily. Will be fixed up before return.
string_idx++;
}
}
}

// Set null counts, because output_columns are modified via mutable-view,
// in the kernel above.
// TODO(future): Consider setting null count in the kernel itself.
fixup_null_counts(output_columns, stream);

return std::make_unique<table>(std::move(output_columns));
}

Expand Down Expand Up @@ -2335,9 +2347,8 @@ std::unique_ptr<table> convert_from_rows_fixed_width_optimized(
// Set null counts, because output_columns are modified via mutable-view,
// in the kernel above.
// TODO(future): Consider setting null count in the kernel itself.
for (auto &col : output_columns) {
col->set_null_count(cudf::null_count(col->view().null_mask(), 0, col->size()));
}
fixup_null_counts(output_columns, stream);

return std::make_unique<table>(std::move(output_columns));
} else {
CUDF_FAIL("Only fixed width types are currently supported");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ public static Table convertFromRowsFixedWidthOptimized(ColumnView vec, DType ...
scale[i] = schema[i].getScale();

}
return new Table(convertFromRows(vec.getNativeView(), types, scale));
return new Table(convertFromRowsFixedWidthOptimized(vec.getNativeView(), types, scale));
}

private static native long[] convertToRows(long nativeHandle);
Expand Down

0 comments on commit 1c0dbc0

Please sign in to comment.