Skip to content

Commit

Permalink
Fix C++ and Cython io types (#16610)
Browse files Browse the repository at this point in the history
The C++ I/O types were previously not specifying a base type despite the fact that the Cython code was relying on the base being an int32. This has apparently never bitten us before, but in theory this could go very wrong since it leaves the underlying type up to the compiler and if the C++ binary used something other than an int32 that would result in an ABI incompatibility with the Python build that would produce spurious results.

While fixing this, I also noticed that the Cython contained a number of erroneous (likely outdated) declarations. Since Cython extern declarations are simply an indicate to Cython of how to resolve a function call _if_ it appears in compiled Cython code, these were not causing any build failures because these were all unused APIs, so I removed them from the Cython with no further changes needed.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)

URL: #16610
  • Loading branch information
vyasr authored Aug 20, 2024
1 parent a45af4a commit 3ac409d
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 35 deletions.
12 changes: 6 additions & 6 deletions cpp/include/cudf/io/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ namespace io {
/**
* @brief Compression algorithms
*/
enum class compression_type {
enum class compression_type : int32_t {
NONE, ///< No compression
AUTO, ///< Automatically detect or select compression format
SNAPPY, ///< Snappy format, using byte-oriented LZ77
Expand All @@ -72,7 +72,7 @@ enum class compression_type {
/**
* @brief Data source or destination types
*/
enum class io_type {
enum class io_type : int32_t {
FILEPATH, ///< Input/output is a file path
HOST_BUFFER, ///< Input/output is a buffer in host memory
DEVICE_BUFFER, ///< Input/output is a buffer in device memory
Expand All @@ -83,7 +83,7 @@ enum class io_type {
/**
* @brief Behavior when handling quotations in field data
*/
enum class quote_style {
enum class quote_style : int32_t {
MINIMAL, ///< Quote only fields which contain special characters
ALL, ///< Quote all fields
NONNUMERIC, ///< Quote all non-numeric fields
Expand All @@ -93,7 +93,7 @@ enum class quote_style {
/**
* @brief Column statistics granularity type for parquet/orc writers
*/
enum statistics_freq {
enum statistics_freq : int32_t {
STATISTICS_NONE = 0, ///< No column statistics
STATISTICS_ROWGROUP = 1, ///< Per-Rowgroup column statistics
STATISTICS_PAGE = 2, ///< Per-page column statistics
Expand All @@ -103,7 +103,7 @@ enum statistics_freq {
/**
* @brief Valid encodings for use with `column_in_metadata::set_encoding()`
*/
enum class column_encoding {
enum class column_encoding : int32_t {
// Common encodings:
USE_DEFAULT = -1, ///< No encoding has been requested, use default encoding
DICTIONARY, ///< Use dictionary encoding
Expand Down Expand Up @@ -222,7 +222,7 @@ class writer_compression_statistics {
/**
* @brief Control use of dictionary encoding for parquet writer
*/
enum dictionary_policy {
enum dictionary_policy : int32_t {
NEVER = 0, ///< Never use dictionary encoding
ADAPTIVE = 1, ///< Use dictionary when it will not impact compression
ALWAYS = 2 ///< Use dictionary regardless of impact on compression
Expand Down
50 changes: 21 additions & 29 deletions python/pylibcudf/pylibcudf/libcudf/io/types.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@ cimport pylibcudf.libcudf.table.table_view as cudf_table_view
from libc.stdint cimport int32_t, uint8_t
from libcpp cimport bool
from libcpp.map cimport map
from libcpp.memory cimport shared_ptr, unique_ptr
from libcpp.pair cimport pair
from libcpp.memory cimport unique_ptr
from libcpp.string cimport string
from libcpp.unordered_map cimport unordered_map
from libcpp.vector cimport vector
from pyarrow.includes.libarrow cimport CRandomAccessFile
from pylibcudf.libcudf.table.table cimport table
from pylibcudf.libcudf.types cimport size_type

Expand Down Expand Up @@ -42,32 +40,32 @@ cdef extern from "cudf/io/types.hpp" \
cpdef enum class io_type(int32_t):
FILEPATH
HOST_BUFFER
DEVICE_BUFFER
VOID
USER_IMPLEMENTED

cpdef enum class statistics_freq(int32_t):
STATISTICS_NONE = 0,
STATISTICS_ROWGROUP = 1,
STATISTICS_PAGE = 2,
STATISTICS_COLUMN = 3,
STATISTICS_NONE,
STATISTICS_ROWGROUP,
STATISTICS_PAGE,
STATISTICS_COLUMN,

cpdef enum class dictionary_policy(int32_t):
NEVER = 0,
ADAPTIVE = 1,
ALWAYS = 2,

cdef extern from "cudf/io/types.hpp" namespace "cudf::io" nogil:
cpdef enum class column_encoding(int32_t):
USE_DEFAULT = -1
DICTIONARY = 0
PLAIN = 1
DELTA_BINARY_PACKED = 2
DELTA_LENGTH_BYTE_ARRAY =3
DELTA_BYTE_ARRAY = 4
BYTE_STREAM_SPLIT = 5
DIRECT = 6
DIRECT_V2 = 7
DICTIONARY_V2 = 8
NEVER,
ADAPTIVE,
ALWAYS,

cpdef enum class column_encoding(int32_t):
USE_DEFAULT
DICTIONARY
PLAIN
DELTA_BINARY_PACKED
DELTA_LENGTH_BYTE_ARRAY
DELTA_BYTE_ARRAY
BYTE_STREAM_SPLIT
DIRECT
DIRECT_V2
DICTIONARY_V2

cdef cppclass column_name_info:
string name
Expand All @@ -76,7 +74,6 @@ cdef extern from "cudf/io/types.hpp" \
cdef cppclass table_metadata:
table_metadata() except +

vector[string] column_names
map[string, string] user_data
vector[unordered_map[string, string]] per_file_user_data
vector[column_name_info] schema_info
Expand Down Expand Up @@ -120,10 +117,7 @@ cdef extern from "cudf/io/types.hpp" \
host_buffer(const char* data, size_t size)

cdef cppclass source_info:
io_type type
const vector[string]& filepaths() except +
const vector[host_buffer]& buffers() except +
vector[shared_ptr[CRandomAccessFile]] files

source_info() except +
source_info(const vector[string] &filepaths) except +
Expand All @@ -132,9 +126,7 @@ cdef extern from "cudf/io/types.hpp" \
source_info(const vector[cudf_io_datasource.datasource*] &datasources) except +

cdef cppclass sink_info:
io_type type
const vector[string]& filepaths()
const vector[vector[char] *]& buffers()
const vector[cudf_io_data_sink.data_sink *]& user_sinks()

sink_info() except +
Expand Down

0 comments on commit 3ac409d

Please sign in to comment.