Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into guf-host_span-ctor-device-accesible
Browse files Browse the repository at this point in the history
  • Loading branch information
vuule authored Oct 8, 2024
2 parents 6ad42b0 + 553d8ec commit 46fc4f6
Show file tree
Hide file tree
Showing 43 changed files with 685 additions and 537 deletions.
48 changes: 32 additions & 16 deletions cpp/include/cudf/reduction/detail/reduction_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,41 @@ namespace detail {
// intermediate data structure to compute `var`, `std`
template <typename ResultType>
struct var_std {
ResultType value; /// the value
ResultType value_squared; /// the value of squared

CUDF_HOST_DEVICE inline var_std(ResultType _value = 0, ResultType _value_squared = 0)
: value(_value), value_squared(_value_squared){};
// Uses the pairwise approach of Chan, Golub, and LeVeque,
// _Algorithms for computing the sample variance: analysis and
// recommendations_ (1983)
// https://doi.org/10.1080/00031305.1983.10483115
// Also http://www.cs.yale.edu/publications/techreports/tr222.pdf
// This is a modification of Youngs and Cramer's online approach.
ResultType running_sum;
ResultType running_square_deviations;
size_type count;

CUDF_HOST_DEVICE inline var_std(ResultType t = 0, ResultType s = 0, size_type n = 0)
: running_sum(t), running_square_deviations(s), count(n){};

using this_t = var_std<ResultType>;

CUDF_HOST_DEVICE inline this_t operator+(this_t const& rhs) const
{
return this_t((this->value + rhs.value), (this->value_squared + rhs.value_squared));
// Updates as per equations 1.5a and 1.5b in the paper
// T_{1,m+n} = T_{1,m} + T_{m+1,n+1}
// S_{1,m+n} = S_{1,m} + S_{m+1,n+1} + m/(n(m+n)) * (n/m T_{1,m} - T_{m+1,n+1})**2
// Here the first m samples are in this, the remaining n samples are in rhs.
auto const m = this->count;
auto const n = rhs.count;
// Avoid division by zero.
if (m == 0) { return rhs; }
if (n == 0) { return *this; }
auto const tm = this->running_sum;
auto const tn = rhs.running_sum;
auto const sm = this->running_square_deviations;
auto const sn = rhs.running_square_deviations;
auto const tmn = tm + tn;
auto const diff = ((static_cast<ResultType>(n) / m) * tm) - tn;
// Computing m/n(m+n) as m/n/(m+n) to avoid integer overflow
auto const smn = sm + sn + ((static_cast<ResultType>(m) / n) / (m + n)) * diff * diff;
return {tmn, smn, m + n};
};
};

Expand All @@ -50,10 +74,7 @@ template <typename ResultType>
struct transformer_var_std {
using OutputType = var_std<ResultType>;

CUDF_HOST_DEVICE inline OutputType operator()(ResultType const& value)
{
return OutputType(value, value * value);
};
CUDF_HOST_DEVICE inline OutputType operator()(ResultType const& value) { return {value, 0, 1}; };
};

// ------------------------------------------------------------------------
Expand Down Expand Up @@ -257,12 +278,7 @@ struct variance : public compound_op<variance> {
cudf::size_type const& count,
cudf::size_type const& ddof)
{
ResultType mean = input.value / count;
ResultType asum = input.value_squared;
cudf::size_type div = count - ddof;
ResultType var = asum / div - ((mean * mean) * count) / div;

return var;
return input.running_square_deviations / (count - ddof);
};
};
};
Expand Down
88 changes: 50 additions & 38 deletions cpp/src/io/json/process_tokens.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/io/detail/tokenize_json.hpp>
#include <cudf/utilities/memory_resource.hpp>

#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -87,38 +88,41 @@ void validate_token_stream(device_span<char const> d_input,
{
CUDF_FUNC_RANGE();
if (!options.is_strict_validation()) { return; }

rmm::device_uvector<bool> d_invalid = cudf::detail::make_zeroed_device_uvector_async<bool>(
tokens.size(), stream, cudf::get_current_device_resource_ref());

using token_t = cudf::io::json::token_t;
cudf::detail::optional_trie trie_na =
cudf::detail::create_serialized_trie(options.get_na_values(), stream);
auto trie_na_view = cudf::detail::make_trie_view(trie_na);
auto literals = options.get_na_values();
literals.emplace_back("null"); // added these too to single trie
literals.emplace_back("true");
literals.emplace_back("false");

cudf::detail::optional_trie trie_literals =
cudf::detail::create_serialized_trie(literals, stream);
cudf::detail::optional_trie trie_nonnumeric = cudf::detail::create_serialized_trie(
{"NaN", "Infinity", "+INF", "+Infinity", "-INF", "-Infinity"}, stream);

auto validate_values = cuda::proclaim_return_type<bool>(
[data = d_input.data(),
trie_na = trie_na_view,
trie_literals = cudf::detail::make_trie_view(trie_literals),
trie_nonnumeric = cudf::detail::make_trie_view(trie_nonnumeric),
allow_numeric_leading_zeros = options.is_allowed_numeric_leading_zeros(),
allow_nonnumeric =
options.is_allowed_nonnumeric_numbers()] __device__(SymbolOffsetT start,
SymbolOffsetT end) -> bool {
// This validates an unquoted value. A value must match https://www.json.org/json-en.html
// but the leading and training whitespace should already have been removed, and is not
// a string
auto c = data[start];
auto is_null_literal = serialized_trie_contains(trie_na, {data + start, end - start});
if (is_null_literal) {
return true;
} else if ('n' == c) {
return substr_eq(data, start, end, 4, "null");
} else if ('t' == c) {
return substr_eq(data, start, end, 4, "true");
} else if ('f' == c) {
return substr_eq(data, start, end, 5, "false");
} else if (allow_nonnumeric && c == 'N') {
return substr_eq(data, start, end, 3, "NaN");
} else if (allow_nonnumeric && c == 'I') {
return substr_eq(data, start, end, 8, "Infinity");
} else if (allow_nonnumeric && c == '+') {
return substr_eq(data, start, end, 4, "+INF") ||
substr_eq(data, start, end, 9, "+Infinity");
} else if ('-' == c || c <= '9' && 'c' >= '0') {
auto const is_literal = serialized_trie_contains(trie_literals, {data + start, end - start});
if (is_literal) { return true; }
if (allow_nonnumeric) {
auto const is_nonnumeric =
serialized_trie_contains(trie_nonnumeric, {data + start, end - start});
if (is_nonnumeric) { return true; }
}
auto c = data[start];
if ('-' == c || c <= '9' && 'c' >= '0') {
// number
auto num_state = number_state::START;
for (auto at = start; at < end; at++) {
Expand All @@ -140,9 +144,6 @@ void validate_token_stream(device_span<char const> d_input,
num_state = number_state::LEADING_ZERO;
} else if (c >= '1' && c <= '9') {
num_state = number_state::WHOLE;
} else if (allow_nonnumeric && 'I' == c) {
return substr_eq(data, start, end, 4, "-INF") ||
substr_eq(data, start, end, 9, "-Infinity");
} else {
return false;
}
Expand Down Expand Up @@ -273,33 +274,44 @@ void validate_token_stream(device_span<char const> d_input,

auto num_tokens = tokens.size();
auto count_it = thrust::make_counting_iterator(0);
auto predicate = [tokens = tokens.begin(),
token_indices = token_indices.begin(),
validate_values,
validate_strings] __device__(auto i) -> bool {
auto predicate = cuda::proclaim_return_type<bool>([tokens = tokens.begin(),
token_indices = token_indices.begin(),
validate_values,
validate_strings] __device__(auto i) -> bool {
if (tokens[i] == token_t::ValueEnd) {
return !validate_values(token_indices[i - 1], token_indices[i]);
} else if (tokens[i] == token_t::FieldNameEnd || tokens[i] == token_t::StringEnd) {
return !validate_strings(token_indices[i - 1], token_indices[i]);
}
return false;
};
});

auto conditional_invalidout_it =
cudf::detail::make_tabulate_output_iterator(cuda::proclaim_return_type<void>(
[d_invalid = d_invalid.begin()] __device__(size_type i, bool x) -> void {
if (x) { d_invalid[i] = true; }
}));
thrust::transform(rmm::exec_policy_nosync(stream),
count_it,
count_it + num_tokens,
conditional_invalidout_it,
predicate);

using scan_type = write_if::scan_type;
auto conditional_write = write_if{tokens.begin(), num_tokens};
auto conditional_output_it = cudf::detail::make_tabulate_output_iterator(conditional_write);
auto transform_op = cuda::proclaim_return_type<scan_type>(
[predicate, tokens = tokens.begin()] __device__(auto i) -> scan_type {
if (predicate(i)) return {token_t::ErrorBegin, tokens[i] == token_t::LineEnd};
return {static_cast<token_t>(tokens[i]), tokens[i] == token_t::LineEnd};
});
auto binary_op = cuda::proclaim_return_type<scan_type>(
auto binary_op = cuda::proclaim_return_type<scan_type>(
[] __device__(scan_type prev, scan_type curr) -> scan_type {
auto op_result = (prev.first == token_t::ErrorBegin ? prev.first : curr.first);
return scan_type((curr.second ? curr.first : op_result), prev.second | curr.second);
return {(curr.second ? curr.first : op_result), prev.second | curr.second};
});
auto transform_op = cuda::proclaim_return_type<scan_type>(
[d_invalid = d_invalid.begin(), tokens = tokens.begin()] __device__(auto i) -> scan_type {
if (d_invalid[i]) return {token_t::ErrorBegin, tokens[i] == token_t::LineEnd};
return {static_cast<token_t>(tokens[i]), tokens[i] == token_t::LineEnd};
});

thrust::transform_inclusive_scan(rmm::exec_policy(stream),
thrust::transform_inclusive_scan(rmm::exec_policy_nosync(stream),
count_it,
count_it + num_tokens,
conditional_output_it,
Expand Down
26 changes: 18 additions & 8 deletions cpp/src/reductions/compound.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,18 @@

#include <cudf/dictionary/detail/iterator.cuh>
#include <cudf/reduction/detail/reduction.cuh>
#include <cudf/reduction/detail/reduction_operators.cuh>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/memory_resource.hpp>
#include <cudf/utilities/traits.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <thrust/iterator/transform_iterator.h>

#include <stdexcept>
#include <type_traits>

namespace cudf {
namespace reduction {
namespace compound {
Expand Down Expand Up @@ -53,35 +58,39 @@ std::unique_ptr<scalar> compound_reduction(column_view const& col,
{
auto const valid_count = col.size() - col.null_count();

// All null input produces all null output
if (valid_count == 0 ||
// Only care about ddof for standard deviation and variance right now
valid_count <= ddof && (std::is_same_v<Op, cudf::reduction::detail::op::standard_deviation> ||
std::is_same_v<Op, cudf::reduction::detail::op::variance>)) {
auto result = cudf::make_fixed_width_scalar(output_dtype, stream, mr);
result->set_valid_async(false, stream);
return result;
}
// reduction by iterator
auto dcol = cudf::column_device_view::create(col, stream);
std::unique_ptr<scalar> result;
Op compound_op{};

if (!cudf::is_dictionary(col.type())) {
if (col.has_nulls()) {
auto it = thrust::make_transform_iterator(
dcol->pair_begin<ElementType, true>(),
compound_op.template get_null_replacing_element_transformer<ResultType>());
result = cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
return cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
it, col.size(), compound_op, valid_count, ddof, stream, mr);
} else {
auto it = thrust::make_transform_iterator(
dcol->begin<ElementType>(), compound_op.template get_element_transformer<ResultType>());
result = cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
return cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
it, col.size(), compound_op, valid_count, ddof, stream, mr);
}
} else {
auto it = thrust::make_transform_iterator(
cudf::dictionary::detail::make_dictionary_pair_iterator<ElementType>(*dcol, col.has_nulls()),
compound_op.template get_null_replacing_element_transformer<ResultType>());
result = cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
return cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
it, col.size(), compound_op, valid_count, ddof, stream, mr);
}

// set scalar is valid
result->set_valid_async(col.null_count() < col.size(), stream);
return result;
};

// @brief result type dispatcher for compound reduction (a.k.a. mean, var, std)
Expand Down Expand Up @@ -137,6 +146,7 @@ struct element_type_dispatcher {
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_EXPECTS(ddof >= 0, "ddof must be non-negative", std::domain_error);
return cudf::type_dispatcher(
output_dtype, result_type_dispatcher<ElementType, Op>(), col, output_dtype, ddof, stream, mr);
}
Expand Down
Loading

0 comments on commit 46fc4f6

Please sign in to comment.