Skip to content

Commit

Permalink
Reenable arrow tests (#16556)
Browse files Browse the repository at this point in the history
This PR reenables the tests that were disabled in #16379, converting them to use the new C data interface functions instead of the old libarrow-based ones.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - David Wendt (https://github.com/davidwendt)

URL: #16556
  • Loading branch information
vyasr authored Aug 16, 2024
1 parent bc8ca9b commit 10cdd5f
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 200 deletions.
4 changes: 0 additions & 4 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -690,10 +690,6 @@ ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE tes
ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing)
# Deprecation from 16297 and fixes in 16379 caused this test to be empty This will be reenabled once
# the deprecated APIs have been replaced in 24.10.
#
# ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_LABELING_BINS_TEST streams/labeling_bins_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing)
Expand Down
5 changes: 4 additions & 1 deletion cpp/tests/interop/arrow_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@

#include <arrow/util/bitmap_builders.h>

#include <algorithm>

#pragma once

template <typename T>
Expand Down Expand Up @@ -154,8 +156,9 @@ std::shared_ptr<arrow::Array> get_arrow_list_array(std::vector<T> data,
"Failed to append values to buffer builder");
CUDF_EXPECTS(buff_builder.Finish(&offset_buffer).ok(), "Failed to allocate buffer");

auto nullable = std::accumulate(list_validity.begin(), list_validity.end(), 0) > 0;
return std::make_shared<arrow::ListArray>(
arrow::list(data_array->type()),
arrow::list(arrow::field("", data_array->type(), nullable)),
offsets.size() - 1,
offset_buffer,
data_array,
Expand Down
145 changes: 99 additions & 46 deletions cpp/tests/interop/from_arrow_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,6 @@
* limitations under the License.
*/

// These interop functions are deprecated. We keep the code in this
// test and will migrate the tests to export the arrow C data
// interface which we consume with from_arrow_host. For now, the tests
// are commented out.

#if 0

#include <tests/interop/arrow_utils.hpp>

#include <cudf_test/base_fixture.hpp>
Expand All @@ -43,6 +36,10 @@

#include <thrust/iterator/counting_iterator.h>

#include <arrow/c/bridge.h>
#include <nanoarrow/nanoarrow.h>
#include <nanoarrow/nanoarrow_device.h>

std::unique_ptr<cudf::table> get_cudf_table()
{
std::vector<std::unique_ptr<cudf::column>> columns;
Expand Down Expand Up @@ -93,6 +90,45 @@ struct FromArrowTest : public cudf::test::BaseFixture {};
template <typename T>
struct FromArrowTestDurationsTest : public cudf::test::BaseFixture {};

std::optional<std::unique_ptr<cudf::table>> export_table(std::shared_ptr<arrow::Table> arrow_table)
{
ArrowSchema schema;
if (!arrow::ExportSchema(*arrow_table->schema(), &schema).ok()) { return std::nullopt; }
auto batch = arrow_table->CombineChunksToBatch().ValueOrDie();
ArrowArray arr;
if (!arrow::ExportRecordBatch(*batch, &arr).ok()) { return std::nullopt; }
auto ret = cudf::from_arrow(&schema, &arr);
arr.release(&arr);
schema.release(&schema);
return {std::move(ret)};
}

std::optional<std::unique_ptr<cudf::scalar>> export_scalar(arrow::Scalar const& arrow_scalar)
{
auto maybe_array = arrow::MakeArrayFromScalar(arrow_scalar, 1);
if (!maybe_array.ok()) { return std::nullopt; }
auto array = *maybe_array;

ArrowSchema schema;
if (!arrow::ExportType(*array->type(), &schema).ok()) { return std::nullopt; }

ArrowArray arr;
if (!arrow::ExportArray(*array, &arr).ok()) { return std::nullopt; }

auto col = cudf::from_arrow_column(&schema, &arr);
auto ret = cudf::get_element(col->view(), 0);

arr.release(&arr);
schema.release(&schema);
return {std::move(ret)};
}

std::optional<std::unique_ptr<cudf::scalar>> export_scalar(
std::shared_ptr<arrow::Scalar> const arrow_scalar)
{
return export_scalar(*arrow_scalar);
}

TYPED_TEST_SUITE(FromArrowTestDurationsTest, cudf::test::DurationTypes);

TEST_F(FromArrowTest, EmptyTable)
Expand All @@ -102,9 +138,10 @@ TEST_F(FromArrowTest, EmptyTable)
auto expected_cudf_table = tables.first->view();
auto arrow_table = tables.second;

auto got_cudf_table = cudf::from_arrow(*arrow_table);
auto got_cudf_table = export_table(arrow_table);
ASSERT_TRUE(got_cudf_table.has_value());

CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table->view());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table.value()->view());
}

TEST_F(FromArrowTest, DateTimeTable)
Expand All @@ -127,9 +164,10 @@ TEST_F(FromArrowTest, DateTimeTable)

auto arrow_table = arrow::Table::Make(schema, {arr});

auto got_cudf_table = cudf::from_arrow(*arrow_table);
auto got_cudf_table = export_table(arrow_table);
ASSERT_TRUE(got_cudf_table.has_value());

CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table.value()->view());
}

TYPED_TEST(FromArrowTestDurationsTest, DurationTable)
Expand Down Expand Up @@ -160,9 +198,10 @@ TYPED_TEST(FromArrowTestDurationsTest, DurationTable)

auto arrow_table = arrow::Table::Make(schema, {arr});

auto got_cudf_table = cudf::from_arrow(*arrow_table);
auto got_cudf_table = export_table(arrow_table);
ASSERT_TRUE(got_cudf_table.has_value());

CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table.value()->view());
}

TEST_F(FromArrowTest, NestedList)
Expand All @@ -188,8 +227,9 @@ TEST_F(FromArrowTest, NestedList)

auto arrow_table = arrow::Table::Make(schema, {nested_list_arr});

auto got_cudf_table = cudf::from_arrow(*arrow_table);
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view());
auto got_cudf_table = export_table(arrow_table);
ASSERT_TRUE(got_cudf_table.has_value());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table.value()->view());
}

TEST_F(FromArrowTest, StructColumn)
Expand Down Expand Up @@ -274,9 +314,10 @@ TEST_F(FromArrowTest, StructColumn)
auto schema = std::make_shared<arrow::Schema>(schema_vector);
auto input = arrow::Table::Make(schema, {struct_array});

auto got_cudf_table = cudf::from_arrow(*input);
auto got_cudf_table = export_table(input);
ASSERT_TRUE(got_cudf_table.has_value());

CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table->view());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table.value()->view());
}

TEST_F(FromArrowTest, DictionaryIndicesType)
Expand Down Expand Up @@ -304,9 +345,10 @@ TEST_F(FromArrowTest, DictionaryIndicesType)

cudf::table expected_table(std::move(columns));

auto got_cudf_table = cudf::from_arrow(*arrow_table);
auto got_cudf_table = export_table(arrow_table);
ASSERT_TRUE(got_cudf_table.has_value());

CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table.view(), got_cudf_table->view());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table.view(), got_cudf_table.value()->view());
}

TEST_F(FromArrowTest, ChunkedArray)
Expand Down Expand Up @@ -369,9 +411,10 @@ TEST_F(FromArrowTest, ChunkedArray)

auto expected_cudf_table = get_cudf_table();

auto got_cudf_table = cudf::from_arrow(*arrow_table);
auto got_cudf_table = export_table(arrow_table);
ASSERT_TRUE(got_cudf_table.has_value());

CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table->view(), got_cudf_table->view());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table->view(), got_cudf_table.value()->view());
}

struct FromArrowTestSlice
Expand All @@ -388,13 +431,14 @@ TEST_P(FromArrowTestSlice, SliceTest)
auto sliced_cudf_table = cudf::slice(cudf_table_view, {start, end})[0];
auto expected_cudf_table = cudf::table{sliced_cudf_table};
auto sliced_arrow_table = arrow_table->Slice(start, end - start);
auto got_cudf_table = cudf::from_arrow(*sliced_arrow_table);
auto got_cudf_table = export_table(sliced_arrow_table);
ASSERT_TRUE(got_cudf_table.has_value());

// This has been added to take-care of empty string column issue with no children
if (got_cudf_table->num_rows() == 0 and expected_cudf_table.num_rows() == 0) {
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_cudf_table.view(), got_cudf_table->view());
if (got_cudf_table.value()->num_rows() == 0 and expected_cudf_table.num_rows() == 0) {
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_cudf_table.view(), got_cudf_table.value()->view());
} else {
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table.view(), got_cudf_table->view());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table.view(), got_cudf_table.value()->view());
}
}

Expand All @@ -417,9 +461,10 @@ TEST_F(FromArrowTest, FixedPoint128Table)
auto const schema = std::make_shared<arrow::Schema>(schema_vector);
auto const arrow_table = arrow::Table::Make(schema, {arr});

auto got_cudf_table = cudf::from_arrow(*arrow_table);
auto got_cudf_table = export_table(arrow_table);
ASSERT_TRUE(got_cudf_table.has_value());

CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view());
}
}

Expand All @@ -441,9 +486,10 @@ TEST_F(FromArrowTest, FixedPoint128TableLarge)
auto const schema = std::make_shared<arrow::Schema>(schema_vector);
auto const arrow_table = arrow::Table::Make(schema, {arr});

auto got_cudf_table = cudf::from_arrow(*arrow_table);
auto got_cudf_table = export_table(arrow_table);
ASSERT_TRUE(got_cudf_table.has_value());

CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view());
}
}

Expand All @@ -466,9 +512,10 @@ TEST_F(FromArrowTest, FixedPoint128TableNulls)
auto const schema = std::make_shared<arrow::Schema>(schema_vector);
auto const arrow_table = arrow::Table::Make(schema, {arr});

auto got_cudf_table = cudf::from_arrow(*arrow_table);
auto got_cudf_table = export_table(arrow_table);
ASSERT_TRUE(got_cudf_table.has_value());

CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view());
}
}

Expand All @@ -493,9 +540,10 @@ TEST_F(FromArrowTest, FixedPoint128TableNullsLarge)
auto const schema = std::make_shared<arrow::Schema>(schema_vector);
auto const arrow_table = arrow::Table::Make(schema, {arr});

auto got_cudf_table = cudf::from_arrow(*arrow_table);
auto got_cudf_table = export_table(arrow_table);
ASSERT_TRUE(got_cudf_table.has_value());

CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view());
}
}

Expand All @@ -519,9 +567,12 @@ TYPED_TEST(FromArrowNumericScalarTest, Basic)
{
TypeParam const value{42};
auto const arrow_scalar = arrow::MakeScalar(value);
auto const cudf_scalar = cudf::from_arrow(*arrow_scalar);

auto const cudf_scalar = export_scalar(arrow_scalar);
ASSERT_TRUE(cudf_scalar.has_value());

auto const cudf_numeric_scalar =
dynamic_cast<cudf::numeric_scalar<TypeParam>*>(cudf_scalar.get());
dynamic_cast<cudf::numeric_scalar<TypeParam>*>(cudf_scalar.value().get());
if (cudf_numeric_scalar == nullptr) { CUDF_FAIL("Attempted to test with a non-numeric type."); }
EXPECT_EQ(cudf_numeric_scalar->type(), cudf::data_type(cudf::type_to_id<TypeParam>()));
EXPECT_EQ(cudf_numeric_scalar->value(), value);
Expand All @@ -535,12 +586,13 @@ TEST_F(FromArrowDecimalScalarTest, Basic)
auto const value{42};
auto const precision{8};
auto const scale{4};
auto arrow_scalar = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale));
auto cudf_scalar = cudf::from_arrow(arrow_scalar);
auto arrow_scalar = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale));
auto const cudf_scalar = export_scalar(arrow_scalar);
ASSERT_TRUE(cudf_scalar.has_value());

// Arrow offers a minimum of 128 bits for the Decimal type.
auto const cudf_decimal_scalar =
dynamic_cast<cudf::fixed_point_scalar<numeric::decimal128>*>(cudf_scalar.get());
dynamic_cast<cudf::fixed_point_scalar<numeric::decimal128>*>(cudf_scalar.value().get());
EXPECT_EQ(cudf_decimal_scalar->type(),
cudf::data_type(cudf::type_to_id<numeric::decimal128>(), scale));
EXPECT_EQ(cudf_decimal_scalar->value(), value);
Expand All @@ -552,9 +604,10 @@ TEST_F(FromArrowStringScalarTest, Basic)
{
auto const value = std::string("hello world");
auto const arrow_scalar = arrow::StringScalar(value);
auto const cudf_scalar = cudf::from_arrow(arrow_scalar);
auto const cudf_scalar = export_scalar(arrow_scalar);
ASSERT_TRUE(cudf_scalar.has_value());

auto const cudf_string_scalar = dynamic_cast<cudf::string_scalar*>(cudf_scalar.get());
auto const cudf_string_scalar = dynamic_cast<cudf::string_scalar*>(cudf_scalar.value().get());
EXPECT_EQ(cudf_string_scalar->type(), cudf::data_type(cudf::type_id::STRING));
EXPECT_EQ(cudf_string_scalar->to_string(), value);
}
Expand All @@ -572,9 +625,10 @@ TEST_F(FromArrowListScalarTest, Basic)
auto const array = *maybe_array;

auto const arrow_scalar = arrow::ListScalar(array);
auto const cudf_scalar = cudf::from_arrow(arrow_scalar);
auto const cudf_scalar = export_scalar(arrow_scalar);
ASSERT_TRUE(cudf_scalar.has_value());

auto const cudf_list_scalar = dynamic_cast<cudf::list_scalar*>(cudf_scalar.get());
auto const cudf_list_scalar = dynamic_cast<cudf::list_scalar*>(cudf_scalar.value().get());
EXPECT_EQ(cudf_list_scalar->type(), cudf::data_type(cudf::type_id::LIST));

cudf::test::fixed_width_column_wrapper<int64_t> const lhs(
Expand All @@ -592,15 +646,14 @@ TEST_F(FromArrowStructScalarTest, Basic)
auto const field = arrow::field("", underlying_arrow_scalar->type);
auto const arrow_type = arrow::struct_({field});
auto const arrow_scalar = arrow::StructScalar({underlying_arrow_scalar}, arrow_type);
auto const cudf_scalar = cudf::from_arrow(arrow_scalar);
auto const cudf_scalar = export_scalar(arrow_scalar);
ASSERT_TRUE(cudf_scalar.has_value());

auto const cudf_struct_scalar = dynamic_cast<cudf::struct_scalar*>(cudf_scalar.get());
auto const cudf_struct_scalar = dynamic_cast<cudf::struct_scalar*>(cudf_scalar.value().get());
EXPECT_EQ(cudf_struct_scalar->type(), cudf::data_type(cudf::type_id::STRUCT));

cudf::test::fixed_width_column_wrapper<int64_t> const col({value});
cudf::table_view const lhs({col});

CUDF_TEST_EXPECT_TABLES_EQUAL(lhs, cudf_struct_scalar->view());
}

#endif
Loading

0 comments on commit 10cdd5f

Please sign in to comment.