Skip to content

Commit

Permalink
address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
karthikeyann committed Oct 7, 2024
1 parent f1c5b6b commit 7306d14
Showing 1 changed file with 9 additions and 11 deletions.
20 changes: 9 additions & 11 deletions cpp/src/io/json/process_tokens.cu
Original file line number Diff line number Diff line change
Expand Up @@ -97,33 +97,31 @@ void validate_token_stream(device_span<char const> d_input,
literals.emplace_back("null"); // added these too to single trie
literals.emplace_back("true");
literals.emplace_back("false");
std::vector<std::string> nonnumeric{"NaN", "Infinity", "+INF", "+Infinity", "-INF", "-Infinity"};

cudf::detail::optional_trie trie_literals =
cudf::detail::create_serialized_trie(literals, stream);
auto trie_literals_view = cudf::detail::make_trie_view(trie_literals);
cudf::detail::optional_trie trie_nonnumeric =
cudf::detail::create_serialized_trie(nonnumeric, stream);
auto trie_nonnumeric_view = cudf::detail::make_trie_view(trie_nonnumeric);
cudf::detail::optional_trie trie_nonnumeric = cudf::detail::create_serialized_trie(
{"NaN", "Infinity", "+INF", "+Infinity", "-INF", "-Infinity"}, stream);

auto validate_values = cuda::proclaim_return_type<bool>(
[data = d_input.data(),
trie_literals = trie_literals_view,
trie_nonnumeric = trie_nonnumeric_view,
trie_literals = cudf::detail::make_trie_view(trie_literals),
trie_nonnumeric = cudf::detail::make_trie_view(trie_nonnumeric),
allow_numeric_leading_zeros = options.is_allowed_numeric_leading_zeros(),
allow_nonnumeric =
options.is_allowed_nonnumeric_numbers()] __device__(SymbolOffsetT start,
SymbolOffsetT end) -> bool {
// This validates an unquoted value. A value must match https://www.json.org/json-en.html
// but the leading and training whitespace should already have been removed, and is not
// a string
auto c = data[start];
auto is_literal = serialized_trie_contains(trie_literals, {data + start, end - start});
auto const is_literal = serialized_trie_contains(trie_literals, {data + start, end - start});
if (is_literal) { return true; }
if (allow_nonnumeric) {
auto is_literal = serialized_trie_contains(trie_nonnumeric, {data + start, end - start});
if (is_literal) { return true; }
auto const is_nonnumeric =
serialized_trie_contains(trie_nonnumeric, {data + start, end - start});
if (is_nonnumeric) { return true; }
}
auto c = data[start];
if ('-' == c || c <= '9' && 'c' >= '0') {
// number
auto num_state = number_state::START;
Expand Down

0 comments on commit 7306d14

Please sign in to comment.