Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into histogram_insert_or_apply
Browse files Browse the repository at this point in the history
  • Loading branch information
mhaseeb123 authored Oct 8, 2024
2 parents 30772ae + 349ba5d commit 34d41ab
Show file tree
Hide file tree
Showing 54 changed files with 820 additions and 570 deletions.
8 changes: 5 additions & 3 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@

set -euo pipefail

export RAPIDS_VERSION="$(rapids-version)"
export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
export RAPIDS_VERSION_NUMBER="$RAPIDS_VERSION_MAJOR_MINOR"

rapids-logger "Create test conda environment"
Expand All @@ -29,7 +28,10 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
libcudf pylibcudf cudf dask-cudf
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"pylibcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"dask-cudf=${RAPIDS_VERSION_MAJOR_MINOR}"

export RAPIDS_DOCS_DIR="$(mktemp -d)"

Expand Down
7 changes: 6 additions & 1 deletion ci/test_cpp_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Generate C++ testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"
Expand All @@ -31,7 +33,10 @@ rapids-print-env

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
libcudf libcudf_kafka libcudf-tests libcudf-example
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf_kafka=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf-tests=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf-example=${RAPIDS_VERSION_MAJOR_MINOR}"

rapids-logger "Check GPU usage"
nvidia-smi
4 changes: 3 additions & 1 deletion ci/test_java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Generate Java testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"
Expand All @@ -30,7 +32,7 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
libcudf
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"

rapids-logger "Check GPU usage"
nvidia-smi
Expand Down
5 changes: 4 additions & 1 deletion ci/test_notebooks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Generate notebook testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"
Expand All @@ -30,7 +32,8 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
cudf libcudf
"cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"

NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")"
pushd notebooks
Expand Down
5 changes: 4 additions & 1 deletion ci/test_python_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Generate Python testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"
Expand Down Expand Up @@ -38,4 +40,5 @@ rapids-print-env
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
cudf libcudf
"cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"
6 changes: 5 additions & 1 deletion ci/test_python_other.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
# Common setup steps shared by Python test jobs
source ./ci/test_python_common.sh test_python_other

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
dask-cudf cudf_kafka custreamz
"dask-cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"cudf_kafka=${RAPIDS_VERSION_MAJOR_MINOR}" \
"custreamz=${RAPIDS_VERSION_MAJOR_MINOR}"

rapids-logger "Check GPU usage"
nvidia-smi
Expand Down
48 changes: 32 additions & 16 deletions cpp/include/cudf/reduction/detail/reduction_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,41 @@ namespace detail {
// intermediate data structure to compute `var`, `std`
template <typename ResultType>
struct var_std {
ResultType value; /// the value
ResultType value_squared; /// the value of squared

CUDF_HOST_DEVICE inline var_std(ResultType _value = 0, ResultType _value_squared = 0)
: value(_value), value_squared(_value_squared){};
// Uses the pairwise approach of Chan, Golub, and LeVeque,
// _Algorithms for computing the sample variance: analysis and
// recommendations_ (1983)
// https://doi.org/10.1080/00031305.1983.10483115
// Also http://www.cs.yale.edu/publications/techreports/tr222.pdf
// This is a modification of Youngs and Cramer's online approach.
ResultType running_sum;
ResultType running_square_deviations;
size_type count;

CUDF_HOST_DEVICE inline var_std(ResultType t = 0, ResultType s = 0, size_type n = 0)
: running_sum(t), running_square_deviations(s), count(n){};

using this_t = var_std<ResultType>;

CUDF_HOST_DEVICE inline this_t operator+(this_t const& rhs) const
{
return this_t((this->value + rhs.value), (this->value_squared + rhs.value_squared));
// Updates as per equations 1.5a and 1.5b in the paper
// T_{1,m+n} = T_{1,m} + T_{m+1,n+1}
// S_{1,m+n} = S_{1,m} + S_{m+1,n+1} + m/(n(m+n)) * (n/m T_{1,m} - T_{m+1,n+1})**2
// Here the first m samples are in this, the remaining n samples are in rhs.
auto const m = this->count;
auto const n = rhs.count;
// Avoid division by zero.
if (m == 0) { return rhs; }
if (n == 0) { return *this; }
auto const tm = this->running_sum;
auto const tn = rhs.running_sum;
auto const sm = this->running_square_deviations;
auto const sn = rhs.running_square_deviations;
auto const tmn = tm + tn;
auto const diff = ((static_cast<ResultType>(n) / m) * tm) - tn;
// Computing m/n(m+n) as m/n/(m+n) to avoid integer overflow
auto const smn = sm + sn + ((static_cast<ResultType>(m) / n) / (m + n)) * diff * diff;
return {tmn, smn, m + n};
};
};

Expand All @@ -50,10 +74,7 @@ template <typename ResultType>
struct transformer_var_std {
using OutputType = var_std<ResultType>;

CUDF_HOST_DEVICE inline OutputType operator()(ResultType const& value)
{
return OutputType(value, value * value);
};
CUDF_HOST_DEVICE inline OutputType operator()(ResultType const& value) { return {value, 0, 1}; };
};

// ------------------------------------------------------------------------
Expand Down Expand Up @@ -257,12 +278,7 @@ struct variance : public compound_op<variance> {
cudf::size_type const& count,
cudf::size_type const& ddof)
{
ResultType mean = input.value / count;
ResultType asum = input.value_squared;
cudf::size_type div = count - ddof;
ResultType var = asum / div - ((mean * mean) * count) / div;

return var;
return input.running_square_deviations / (count - ddof);
};
};
};
Expand Down
88 changes: 50 additions & 38 deletions cpp/src/io/json/process_tokens.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/io/detail/tokenize_json.hpp>
#include <cudf/utilities/memory_resource.hpp>

#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -87,38 +88,41 @@ void validate_token_stream(device_span<char const> d_input,
{
CUDF_FUNC_RANGE();
if (!options.is_strict_validation()) { return; }

rmm::device_uvector<bool> d_invalid = cudf::detail::make_zeroed_device_uvector_async<bool>(
tokens.size(), stream, cudf::get_current_device_resource_ref());

using token_t = cudf::io::json::token_t;
cudf::detail::optional_trie trie_na =
cudf::detail::create_serialized_trie(options.get_na_values(), stream);
auto trie_na_view = cudf::detail::make_trie_view(trie_na);
auto literals = options.get_na_values();
literals.emplace_back("null"); // added these too to single trie
literals.emplace_back("true");
literals.emplace_back("false");

cudf::detail::optional_trie trie_literals =
cudf::detail::create_serialized_trie(literals, stream);
cudf::detail::optional_trie trie_nonnumeric = cudf::detail::create_serialized_trie(
{"NaN", "Infinity", "+INF", "+Infinity", "-INF", "-Infinity"}, stream);

auto validate_values = cuda::proclaim_return_type<bool>(
[data = d_input.data(),
trie_na = trie_na_view,
trie_literals = cudf::detail::make_trie_view(trie_literals),
trie_nonnumeric = cudf::detail::make_trie_view(trie_nonnumeric),
allow_numeric_leading_zeros = options.is_allowed_numeric_leading_zeros(),
allow_nonnumeric =
options.is_allowed_nonnumeric_numbers()] __device__(SymbolOffsetT start,
SymbolOffsetT end) -> bool {
// This validates an unquoted value. A value must match https://www.json.org/json-en.html
// but the leading and training whitespace should already have been removed, and is not
// a string
auto c = data[start];
auto is_null_literal = serialized_trie_contains(trie_na, {data + start, end - start});
if (is_null_literal) {
return true;
} else if ('n' == c) {
return substr_eq(data, start, end, 4, "null");
} else if ('t' == c) {
return substr_eq(data, start, end, 4, "true");
} else if ('f' == c) {
return substr_eq(data, start, end, 5, "false");
} else if (allow_nonnumeric && c == 'N') {
return substr_eq(data, start, end, 3, "NaN");
} else if (allow_nonnumeric && c == 'I') {
return substr_eq(data, start, end, 8, "Infinity");
} else if (allow_nonnumeric && c == '+') {
return substr_eq(data, start, end, 4, "+INF") ||
substr_eq(data, start, end, 9, "+Infinity");
} else if ('-' == c || c <= '9' && 'c' >= '0') {
auto const is_literal = serialized_trie_contains(trie_literals, {data + start, end - start});
if (is_literal) { return true; }
if (allow_nonnumeric) {
auto const is_nonnumeric =
serialized_trie_contains(trie_nonnumeric, {data + start, end - start});
if (is_nonnumeric) { return true; }
}
auto c = data[start];
if ('-' == c || c <= '9' && 'c' >= '0') {
// number
auto num_state = number_state::START;
for (auto at = start; at < end; at++) {
Expand All @@ -140,9 +144,6 @@ void validate_token_stream(device_span<char const> d_input,
num_state = number_state::LEADING_ZERO;
} else if (c >= '1' && c <= '9') {
num_state = number_state::WHOLE;
} else if (allow_nonnumeric && 'I' == c) {
return substr_eq(data, start, end, 4, "-INF") ||
substr_eq(data, start, end, 9, "-Infinity");
} else {
return false;
}
Expand Down Expand Up @@ -273,33 +274,44 @@ void validate_token_stream(device_span<char const> d_input,

auto num_tokens = tokens.size();
auto count_it = thrust::make_counting_iterator(0);
auto predicate = [tokens = tokens.begin(),
token_indices = token_indices.begin(),
validate_values,
validate_strings] __device__(auto i) -> bool {
auto predicate = cuda::proclaim_return_type<bool>([tokens = tokens.begin(),
token_indices = token_indices.begin(),
validate_values,
validate_strings] __device__(auto i) -> bool {
if (tokens[i] == token_t::ValueEnd) {
return !validate_values(token_indices[i - 1], token_indices[i]);
} else if (tokens[i] == token_t::FieldNameEnd || tokens[i] == token_t::StringEnd) {
return !validate_strings(token_indices[i - 1], token_indices[i]);
}
return false;
};
});

auto conditional_invalidout_it =
cudf::detail::make_tabulate_output_iterator(cuda::proclaim_return_type<void>(
[d_invalid = d_invalid.begin()] __device__(size_type i, bool x) -> void {
if (x) { d_invalid[i] = true; }
}));
thrust::transform(rmm::exec_policy_nosync(stream),
count_it,
count_it + num_tokens,
conditional_invalidout_it,
predicate);

using scan_type = write_if::scan_type;
auto conditional_write = write_if{tokens.begin(), num_tokens};
auto conditional_output_it = cudf::detail::make_tabulate_output_iterator(conditional_write);
auto transform_op = cuda::proclaim_return_type<scan_type>(
[predicate, tokens = tokens.begin()] __device__(auto i) -> scan_type {
if (predicate(i)) return {token_t::ErrorBegin, tokens[i] == token_t::LineEnd};
return {static_cast<token_t>(tokens[i]), tokens[i] == token_t::LineEnd};
});
auto binary_op = cuda::proclaim_return_type<scan_type>(
auto binary_op = cuda::proclaim_return_type<scan_type>(
[] __device__(scan_type prev, scan_type curr) -> scan_type {
auto op_result = (prev.first == token_t::ErrorBegin ? prev.first : curr.first);
return scan_type((curr.second ? curr.first : op_result), prev.second | curr.second);
return {(curr.second ? curr.first : op_result), prev.second | curr.second};
});
auto transform_op = cuda::proclaim_return_type<scan_type>(
[d_invalid = d_invalid.begin(), tokens = tokens.begin()] __device__(auto i) -> scan_type {
if (d_invalid[i]) return {token_t::ErrorBegin, tokens[i] == token_t::LineEnd};
return {static_cast<token_t>(tokens[i]), tokens[i] == token_t::LineEnd};
});

thrust::transform_inclusive_scan(rmm::exec_policy(stream),
thrust::transform_inclusive_scan(rmm::exec_policy_nosync(stream),
count_it,
count_it + num_tokens,
conditional_output_it,
Expand Down
Loading

0 comments on commit 34d41ab

Please sign in to comment.