Skip to content

Commit

Permalink
Remove legacy Arrow interop APIs (#16590)
Browse files Browse the repository at this point in the history
Contributes to #15193.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Robert (Bobby) Evans (https://github.com/revans2)
  - Bradley Dice (https://github.com/bdice)
  - David Wendt (https://github.com/davidwendt)
  - Yunsong Wang (https://github.com/PointKernel)

URL: #16590
  • Loading branch information
vyasr authored Aug 22, 2024
1 parent bf2ee32 commit 6c4905d
Show file tree
Hide file tree
Showing 10 changed files with 167 additions and 1,341 deletions.
3 changes: 0 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -363,17 +363,14 @@ add_library(
src/hash/sha512_hash.cu
src/hash/xxhash_64.cu
src/interop/dlpack.cpp
src/interop/from_arrow.cu
src/interop/arrow_utilities.cpp
src/interop/decimal_conversion_utilities.cu
src/interop/to_arrow.cu
src/interop/to_arrow_device.cu
src/interop/to_arrow_host.cu
src/interop/from_arrow_device.cu
src/interop/from_arrow_host.cu
src/interop/from_arrow_stream.cu
src/interop/to_arrow_schema.cpp
src/interop/detail/arrow_allocator.cpp
src/io/avro/avro.cpp
src/io/avro/avro_gpu.cu
src/io/avro/reader_impl.cu
Expand Down
101 changes: 1 addition & 100 deletions cpp/include/cudf/detail/interop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,13 @@

#pragma once

// We disable warning 611 because the `arrow::TableBatchReader` only partially
// override the `ReadNext` method of `arrow::RecordBatchReader::ReadNext`
// triggering warning 611-D from nvcc.
#ifdef __CUDACC__
#pragma nv_diag_suppress 611
#pragma nv_diag_suppress 2810
#endif
#include <rmm/resource_ref.hpp>

#include <arrow/api.h>
#ifdef __CUDACC__
#pragma nv_diag_default 611
#pragma nv_diag_default 2810
#endif

#include <cudf/interop.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/export.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <string>
#include <rmm/resource_ref.hpp>

namespace CUDF_EXPORT cudf {
namespace detail {
Expand All @@ -61,89 +45,6 @@ DLManagedTensor* to_dlpack(table_view const& input,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

// Creating arrow as per given type_id and buffer arguments
template <typename... Ts>
std::shared_ptr<arrow::Array> to_arrow_array(cudf::type_id id, Ts&&... args)
{
switch (id) {
case type_id::BOOL8: return std::make_shared<arrow::BooleanArray>(std::forward<Ts>(args)...);
case type_id::INT8: return std::make_shared<arrow::Int8Array>(std::forward<Ts>(args)...);
case type_id::INT16: return std::make_shared<arrow::Int16Array>(std::forward<Ts>(args)...);
case type_id::INT32: return std::make_shared<arrow::Int32Array>(std::forward<Ts>(args)...);
case type_id::INT64: return std::make_shared<arrow::Int64Array>(std::forward<Ts>(args)...);
case type_id::UINT8: return std::make_shared<arrow::UInt8Array>(std::forward<Ts>(args)...);
case type_id::UINT16: return std::make_shared<arrow::UInt16Array>(std::forward<Ts>(args)...);
case type_id::UINT32: return std::make_shared<arrow::UInt32Array>(std::forward<Ts>(args)...);
case type_id::UINT64: return std::make_shared<arrow::UInt64Array>(std::forward<Ts>(args)...);
case type_id::FLOAT32: return std::make_shared<arrow::FloatArray>(std::forward<Ts>(args)...);
case type_id::FLOAT64: return std::make_shared<arrow::DoubleArray>(std::forward<Ts>(args)...);
case type_id::TIMESTAMP_DAYS:
return std::make_shared<arrow::Date32Array>(std::make_shared<arrow::Date32Type>(),
std::forward<Ts>(args)...);
case type_id::TIMESTAMP_SECONDS:
return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::SECOND),
std::forward<Ts>(args)...);
case type_id::TIMESTAMP_MILLISECONDS:
return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MILLI),
std::forward<Ts>(args)...);
case type_id::TIMESTAMP_MICROSECONDS:
return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::MICRO),
std::forward<Ts>(args)...);
case type_id::TIMESTAMP_NANOSECONDS:
return std::make_shared<arrow::TimestampArray>(arrow::timestamp(arrow::TimeUnit::NANO),
std::forward<Ts>(args)...);
case type_id::DURATION_SECONDS:
return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::SECOND),
std::forward<Ts>(args)...);
case type_id::DURATION_MILLISECONDS:
return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MILLI),
std::forward<Ts>(args)...);
case type_id::DURATION_MICROSECONDS:
return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::MICRO),
std::forward<Ts>(args)...);
case type_id::DURATION_NANOSECONDS:
return std::make_shared<arrow::DurationArray>(arrow::duration(arrow::TimeUnit::NANO),
std::forward<Ts>(args)...);
default: CUDF_FAIL("Unsupported type_id conversion to arrow");
}
}

// Converting arrow type to cudf type
data_type arrow_to_cudf_type(arrow::DataType const& arrow_type);

/**
* @copydoc cudf::to_arrow(table_view input, std::vector<column_metadata> const& metadata,
* rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr)
*/
std::shared_ptr<arrow::Table> to_arrow(table_view input,
std::vector<column_metadata> const& metadata,
rmm::cuda_stream_view stream,
arrow::MemoryPool* ar_mr);

/**
* @copydoc cudf::to_arrow(cudf::scalar const& input, column_metadata const& metadata,
* rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr)
*/
std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
column_metadata const& metadata,
rmm::cuda_stream_view stream,
arrow::MemoryPool* ar_mr);
/**
* @copydoc cudf::from_arrow(arrow::Table const& input_table, rmm::cuda_stream_view stream,
* rmm::device_async_resource_ref mr)
*/
std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @copydoc cudf::from_arrow(arrow::Scalar const& input, rmm::cuda_stream_view stream,
* rmm::device_async_resource_ref mr)
*/
std::unique_ptr<cudf::scalar> from_arrow(arrow::Scalar const& input,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @brief Return a maximum precision for a given type.
*
Expand Down
101 changes: 0 additions & 101 deletions cpp/include/cudf/interop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,6 @@

#pragma once

// We disable warning 611 because the `arrow::TableBatchReader` only partially
// override the `ReadNext` method of `arrow::RecordBatchReader::ReadNext`
// triggering warning 611-D from nvcc.
#ifdef __CUDACC__
#pragma nv_diag_suppress 611
#pragma nv_diag_suppress 2810
#endif
#include <rmm/resource_ref.hpp>

#include <arrow/api.h>
#ifdef __CUDACC__
#pragma nv_diag_default 611
#pragma nv_diag_default 2810
#endif

#include <cudf/column/column.hpp>
#include <cudf/detail/transform.hpp>
#include <cudf/table/table.hpp>
Expand Down Expand Up @@ -131,59 +116,6 @@ struct column_metadata {
column_metadata() = default;
};

/**
* @brief Create `arrow::Table` from cudf table `input`
*
* Converts the `cudf::table_view` to `arrow::Table` with the provided
* metadata `column_names`.
*
* @deprecated Since 24.08. Use cudf::to_arrow_host instead.
*
* @throws cudf::logic_error if `column_names` size doesn't match with number of columns.
*
* @param input table_view that needs to be converted to arrow Table
* @param metadata Contains hierarchy of names of columns and children
* @param stream CUDA stream used for device memory operations and kernel launches
* @param ar_mr arrow memory pool to allocate memory for arrow Table
* @return arrow Table generated from `input`
*
* @note For decimals, since the precision is not stored for them in libcudf,
* it will be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type
* supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision
* 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be
* converted to Arrow decimal128 of the precision 38.
*/
[[deprecated("Use cudf::to_arrow_host")]] std::shared_ptr<arrow::Table> to_arrow(
table_view input,
std::vector<column_metadata> const& metadata = {},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
arrow::MemoryPool* ar_mr = arrow::default_memory_pool());

/**
* @brief Create `arrow::Scalar` from cudf scalar `input`
*
* Converts the `cudf::scalar` to `arrow::Scalar`.
*
* @deprecated Since 24.08.
*
* @param input scalar that needs to be converted to arrow Scalar
* @param metadata Contains hierarchy of names of columns and children
* @param stream CUDA stream used for device memory operations and kernel launches
* @param ar_mr arrow memory pool to allocate memory for arrow Scalar
* @return arrow Scalar generated from `input`
*
* @note For decimals, since the precision is not stored for them in libcudf,
* it will be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type
* supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision
* 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be
* converted to Arrow decimal128 of the precision 38.
*/
[[deprecated("Use cudf::to_arrow_host")]] std::shared_ptr<arrow::Scalar> to_arrow(
cudf::scalar const& input,
column_metadata const& metadata = {},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
arrow::MemoryPool* ar_mr = arrow::default_memory_pool());

/**
* @brief typedef for a unique_ptr to an ArrowSchema with custom deleter
*
Expand Down Expand Up @@ -386,39 +318,6 @@ unique_device_array_t to_arrow_host(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
* @brief Create `cudf::table` from given arrow Table input
*
* @deprecated Since 24.08. Use cudf::from_arrow_host instead.
*
* @param input arrow:Table that needs to be converted to `cudf::table`
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate `cudf::table`
* @return cudf table generated from given arrow Table
*/
[[deprecated("Use cudf::from_arrow_host")]] std::unique_ptr<table> from_arrow(
arrow::Table const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
* @brief Create `cudf::scalar` from given arrow Scalar input
*
* @deprecated Since 24.08. Use arrow's `MakeArrayFromScalar` on the
* input, followed by `ExportArray` to obtain something that can be
* consumed by `from_arrow_host`. Then use `cudf::get_element` to
* extract a device scalar from the column.
*
* @param input `arrow::Scalar` that needs to be converted to `cudf::scalar`
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate `cudf::scalar`
* @return cudf scalar generated from given arrow Scalar
*/
[[deprecated("See docstring for migration strategies")]] std::unique_ptr<cudf::scalar> from_arrow(
arrow::Scalar const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
* @brief Create `cudf::table` from given ArrowArray and ArrowSchema input
*
Expand Down
83 changes: 0 additions & 83 deletions cpp/src/interop/detail/arrow_allocator.cpp

This file was deleted.

31 changes: 0 additions & 31 deletions cpp/src/interop/detail/arrow_allocator.hpp

This file was deleted.

Loading

0 comments on commit 6c4905d

Please sign in to comment.