diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1b17eae0842..f861fb57916 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -144,7 +144,7 @@ repos: - id: ruff-format files: python/.*$ - repo: https://github.com/rapidsai/pre-commit-hooks - rev: v0.3.1 + rev: v0.4.0 hooks: - id: verify-copyright exclude: | diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index cf33703f544..e5565c4b53c 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -22,7 +22,6 @@ export PIP_CONSTRAINT="/tmp/constraints.txt" python -m auditwheel repair \ --exclude libcudf.so \ - --exclude libarrow.so.1601 \ --exclude libnvcomp.so \ --exclude libnvcomp_bitcomp.so \ --exclude libnvcomp_gdeflate.so \ diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh index 9694c3f6144..8975381ceba 100755 --- a/ci/build_wheel_libcudf.sh +++ b/ci/build_wheel_libcudf.sh @@ -10,6 +10,6 @@ package_dir="python/libcudf" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" mkdir -p ${package_dir}/final_dist -python -m auditwheel repair --exclude libarrow.so.1601 -w ${package_dir}/final_dist ${package_dir}/dist/* +python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh index 7181a49d397..0e4745bda28 100755 --- a/ci/build_wheel_pylibcudf.sh +++ b/ci/build_wheel_pylibcudf.sh @@ -20,7 +20,6 @@ export PIP_CONSTRAINT="/tmp/constraints.txt" python -m auditwheel repair \ --exclude libcudf.so \ - --exclude libarrow.so.1601 \ --exclude libnvcomp.so \ --exclude libnvcomp_bitcomp.so \ --exclude libnvcomp_gdeflate.so \ diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index b1a857fabf0..c4c32da8af2 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -38,15 +38,11 @@ dependencies: - identify>=2.5.20 - ipython - jupyter_client -- libarrow-acero==16.1.0.* -- libarrow-dataset==16.1.0.* -- libarrow==16.1.0.* - libcufile-dev=1.4.0.31 - libcufile=1.4.0.31 - libcurand-dev=10.3.0.86 - libcurand=10.3.0.86 - libkvikio==24.10.*,>=0.0.0a0 -- libparquet==16.1.0.* - librdkafka>=1.9.0,<1.10.0a0 - librmm==24.10.*,>=0.0.0a0 - make @@ -59,7 +55,6 @@ dependencies: - ninja - notebook - numba>=0.57 -- numpy - numpy>=1.23,<3.0a0 - numpydoc - nvcc_linux-64=11.8 @@ -72,7 +67,6 @@ dependencies: - pandoc - pre-commit - ptxcompiler -- pyarrow==16.1.0.* - pydata-sphinx-theme!=0.14.2 - pytest-benchmark - pytest-cases>=3.8.2 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 9b4e2dfe073..7439c9543a5 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -39,13 +39,9 @@ dependencies: - identify>=2.5.20 - ipython - jupyter_client -- libarrow-acero==16.1.0.* -- libarrow-dataset==16.1.0.* -- libarrow==16.1.0.* - libcufile-dev - libcurand-dev - libkvikio==24.10.*,>=0.0.0a0 -- libparquet==16.1.0.* - librdkafka>=1.9.0,<1.10.0a0 - librmm==24.10.*,>=0.0.0a0 - make @@ -58,7 +54,6 @@ dependencies: - ninja - notebook - numba>=0.57 -- numpy - numpy>=1.23,<3.0a0 - numpydoc - nvcomp==3.0.6 @@ -69,7 +64,6 @@ dependencies: - pandas>=2.0,<2.2.3dev0 - pandoc - pre-commit -- pyarrow==16.1.0.* - pydata-sphinx-theme!=0.14.2 - pynvjitlink>=0.0.0a0 - pytest-benchmark diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index b2dad767da4..53f52a35651 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -64,8 +64,6 @@ requirements: - rapids-build-backend >=0.3.0,<0.4.0.dev0 - scikit-build-core >=0.10.0 - dlpack >=0.8,<1.0 - - numpy 2.0 - - pyarrow ==16.1.0.* - libcudf ={{ version }} - pylibcudf ={{ version }} - rmm ={{ minor_version }} @@ -84,7 +82,7 @@ requirements: - cupy >=12.0.0 - numba >=0.57 - numpy >=1.23,<3.0a0 - - {{ pin_compatible('pyarrow', max_pin='x.x') }} + - pyarrow ==16.1.0.* - libcudf ={{ version }} - pylibcudf ={{ version }} - {{ pin_compatible('rmm', max_pin='x.x') }} diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index ff7458caf82..4b1c4cca828 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -19,9 +19,6 @@ c_stdlib_version: cmake_version: - ">=3.26.4,!=3.30.0" -libarrow_version: - - "==16.1.0" - dlpack_version: - ">=0.8,<1.0" diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index aa1c94a4bca..1c2e9e8dd98 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -64,7 +64,6 @@ requirements: {% endif %} - cuda-version ={{ cuda_version }} - nvcomp {{ nvcomp_version }} - - libarrow {{ libarrow_version }} - dlpack {{ dlpack_version }} - librdkafka {{ librdkafka_version }} - fmt {{ fmt_version }} @@ -92,7 +91,6 @@ outputs: - cmake {{ cmake_version }} host: - cuda-version ={{ cuda_version }} - - libarrow {{ libarrow_version }} run: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml index fef78467027..67b9b76bb8c 100644 --- a/conda/recipes/pylibcudf/meta.yaml +++ b/conda/recipes/pylibcudf/meta.yaml @@ -64,8 +64,6 @@ requirements: - rapids-build-backend >=0.3.0,<0.4.0.dev0 - scikit-build-core >=0.10.0 - dlpack >=0.8,<1.0 - - numpy 2.0 - - pyarrow ==16.1.0.* - libcudf ={{ version }} - rmm ={{ minor_version }} {% if cuda_major == "11" %} @@ -81,7 +79,7 @@ requirements: - typing_extensions >=4.0.0 - pandas >=2.0,<2.2.3dev0 - numpy >=1.23,<3.0a0 - - {{ pin_compatible('pyarrow', max_pin='x.x') }} + - pyarrow ==16.1.0.* - {{ pin_compatible('rmm', max_pin='x.x') }} - fsspec >=0.6.0 {% if cuda_major == "11" %} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4080c5d02da..1040fcb7b91 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -54,11 +54,6 @@ mark_as_advanced(CUDF_BUILD_TESTUTIL) option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON) option(CUDF_LARGE_STRINGS_DISABLED "Build with large string support disabled" OFF) mark_as_advanced(CUDF_LARGE_STRINGS_DISABLED) -option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF) -option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF) -option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF) -option(CUDF_ENABLE_ARROW_PARQUET "Find (or build) Arrow with Parquet support" OFF) -option(CUDF_ENABLE_ARROW_S3 "Build/Enable AWS S3 Arrow filesystem support" OFF) option( CUDF_USE_PER_THREAD_DEFAULT_STREAM "Build cuDF with per-thread default stream, including passing the per-thread default @@ -81,8 +76,6 @@ option(CUDA_ENABLE_LINEINFO option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON) # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) -option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF) -mark_as_advanced(USE_LIBARROW_FROM_PYARROW) set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON) if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS) @@ -100,8 +93,6 @@ message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}") message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}") message(VERBOSE "CUDF: Build cuDF shared libraries: ${BUILD_SHARED_LIBS}") message(VERBOSE "CUDF: Use a file cache for JIT compiled kernels: ${JITIFY_USE_CACHE}") -message(VERBOSE "CUDF: Build and statically link Arrow libraries: ${CUDF_USE_ARROW_STATIC}") -message(VERBOSE "CUDF: Build and enable S3 filesystem support for Arrow: ${CUDF_ENABLE_ARROW_S3}") message(VERBOSE "CUDF: Build with per-thread default stream: ${CUDF_USE_PER_THREAD_DEFAULT_STREAM}") message( VERBOSE @@ -192,8 +183,6 @@ include(cmake/thirdparty/get_nvcomp.cmake) include(cmake/thirdparty/get_cccl.cmake) # find rmm include(cmake/thirdparty/get_rmm.cmake) -# find arrow -include(cmake/thirdparty/get_arrow.cmake) # find flatbuffers include(cmake/thirdparty/get_flatbuffers.cmake) # find dlpack @@ -807,7 +796,7 @@ add_dependencies(cudf jitify_preprocess_run) # Specify the target module library dependencies target_link_libraries( cudf - PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm $ + PUBLIC CCCL::CCCL rmm::rmm $ PRIVATE $ cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio $ nanoarrow ) @@ -1056,20 +1045,6 @@ following IMPORTED GLOBAL targets: ]=] ) -if(CUDF_ENABLE_ARROW_PARQUET) - string( - APPEND - install_code_string - [=[ - if(NOT Parquet_DIR) - set(Parquet_DIR "${Arrow_DIR}") - endif() - set(ArrowDataset_DIR "${Arrow_DIR}") - find_dependency(ArrowDataset) - ]=] - ) -endif() - rapids_export( INSTALL cudf EXPORT_SET cudf-exports ${_components_export_string} diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index e3e6a07661a..07cbf5150f4 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -22,82 +22,8 @@ include_guard(GLOBAL) -# Generate a FindArrow module for the case where we need to search for arrow within a pip install -# pyarrow. -function(find_libarrow_in_python_wheel PYARROW_VERSION) - string(REPLACE "." ";" PYARROW_VER_COMPONENTS "${PYARROW_VERSION}") - list(GET PYARROW_VER_COMPONENTS 0 PYARROW_MAJOR_VER) - list(GET PYARROW_VER_COMPONENTS 1 PYARROW_MINOR_VER) - - # Ensure that the major and minor versions are two digits long - string(LENGTH ${PYARROW_MAJOR_VER} PYARROW_MAJOR_LENGTH) - string(LENGTH ${PYARROW_MINOR_VER} PYARROW_MINOR_LENGTH) - if(${PYARROW_MAJOR_LENGTH} EQUAL 1) - set(PYARROW_MAJOR_VER "0${PYARROW_MAJOR_VER}") - endif() - if(${PYARROW_MINOR_LENGTH} EQUAL 1) - set(PYARROW_MINOR_VER "0${PYARROW_MINOR_VER}") - endif() - - set(PYARROW_LIB "libarrow.so.${PYARROW_MAJOR_VER}${PYARROW_MINOR_VER}") - - string( - APPEND - initial_code_block - [=[ -find_package(Python 3.10 REQUIRED COMPONENTS Interpreter) -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" - OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR - OUTPUT_STRIP_TRAILING_WHITESPACE - COMMAND_ERROR_IS_FATAL ANY -) -list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") -]=] - ) - string( - APPEND - final_code_block - [=[ -list(POP_BACK CMAKE_PREFIX_PATH) -]=] - ) - rapids_find_generate_module( - Arrow NO_CONFIG - VERSION "${PYARROW_VERSION}" - LIBRARY_NAMES "${PYARROW_LIB}" - BUILD_EXPORT_SET cudf-exports - INSTALL_EXPORT_SET cudf-exports - HEADER_NAMES arrow/python/arrow_to_pandas.h INITIAL_CODE_BLOCK initial_code_block - FINAL_CODE_BLOCK final_code_block - ) - - find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL) - add_library(arrow_shared ALIAS Arrow::Arrow) - - rapids_export_package(BUILD Arrow cudf-exports) - rapids_export_package(INSTALL Arrow cudf-exports) -endfunction() - # This function finds arrow and sets any additional necessary environment variables. -function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENABLE_PYTHON - ENABLE_PARQUET PYARROW_LIBARROW -) - - if(PYARROW_LIBARROW) - # Generate a FindArrow.cmake to find pyarrow's libarrow.so - find_libarrow_in_python_wheel(${VERSION}) - set(ARROW_FOUND - TRUE - PARENT_SCOPE - ) - set(ARROW_LIBRARIES - arrow_shared - PARENT_SCOPE - ) - return() - endif() - +function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_PARQUET) if(BUILD_STATIC) if(TARGET arrow_static) set(ARROW_FOUND @@ -124,10 +50,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB endif() endif() - if(NOT ARROW_ARMV8_ARCH) - set(ARROW_ARMV8_ARCH "armv8-a") - endif() - if(NOT ARROW_SIMD_LEVEL) set(ARROW_SIMD_LEVEL "NONE") endif() @@ -150,14 +72,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB set(ARROW_OPENSSL_USE_SHARED ON) endif() - set(ARROW_PYTHON_OPTIONS "") - if(ENABLE_PYTHON) - list(APPEND ARROW_PYTHON_OPTIONS "ARROW_PYTHON ON") - # Arrow's logic to build Boost from source is busted, so we have to get it from the system. - list(APPEND ARROW_PYTHON_OPTIONS "BOOST_SOURCE SYSTEM") - list(APPEND ARROW_PYTHON_OPTIONS "ARROW_DEPENDENCY_SOURCE AUTO") - endif() - set(ARROW_PARQUET_OPTIONS "") if(ENABLE_PARQUET) # Arrow's logic to build Boost from source is busted, so we have to get it from the system. @@ -174,6 +88,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB GIT_REPOSITORY https://github.com/apache/arrow.git GIT_TAG apache-arrow-${VERSION} GIT_SHALLOW TRUE SOURCE_SUBDIR cpp + EXCLUDE_FROM_ALL ${EXCLUDE_FROM_ALL} OPTIONS "CMAKE_VERBOSE_MAKEFILE ON" "ARROW_ACERO ON" "ARROW_IPC ON" @@ -181,16 +96,14 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB "ARROW_WITH_BACKTRACE ON" "ARROW_CXXFLAGS -w" "ARROW_JEMALLOC OFF" - "ARROW_S3 ${ENABLE_S3}" - "ARROW_ORC ${ENABLE_ORC}" - # e.g. needed by blazingsql-io + "ARROW_S3 OFF" + "ARROW_ORC OFF" ${ARROW_PARQUET_OPTIONS} "ARROW_PARQUET ${ENABLE_PARQUET}" "ARROW_FILESYSTEM ON" - ${ARROW_PYTHON_OPTIONS} + "ARROW_PYTHON OFF" # Arrow modifies CMake's GLOBAL RULE_LAUNCH_COMPILE unless this is off "ARROW_USE_CCACHE OFF" - "ARROW_ARMV8_ARCH ${ARROW_ARMV8_ARCH}" "ARROW_SIMD_LEVEL ${ARROW_SIMD_LEVEL}" "ARROW_BUILD_STATIC ${ARROW_BUILD_STATIC}" "ARROW_BUILD_SHARED ${ARROW_BUILD_SHARED}" @@ -269,7 +182,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB endif() if(Arrow_ADDED) - set(arrow_code_string [=[ if (TARGET cudf::arrow_shared AND (NOT TARGET arrow_shared)) @@ -324,101 +236,106 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB get_target_property(interface_libs arrow_static INTERFACE_LINK_LIBRARIES) endif() endif() - rapids_export( - BUILD Arrow - VERSION ${VERSION} - EXPORT_SET arrow_targets - GLOBAL_TARGETS arrow_shared arrow_static - NAMESPACE cudf:: - FINAL_CODE_BLOCK arrow_code_string - ) - - if(ENABLE_PARQUET) - - set(arrow_acero_code_string - [=[ - if (TARGET cudf::arrow_acero_shared AND (NOT TARGET arrow_acero_shared)) - add_library(arrow_acero_shared ALIAS cudf::arrow_acero_shared) - endif() - if (TARGET cudf::arrow_acero_static AND (NOT TARGET arrow_acero_static)) - add_library(arrow_acero_static ALIAS cudf::arrow_acero_static) - endif() - ]=] - ) + include(rapids-export) + if(NOT EXCLUDE_FROM_ALL) rapids_export( - BUILD ArrowAcero + BUILD Arrow VERSION ${VERSION} - EXPORT_SET arrow_acero_targets - GLOBAL_TARGETS arrow_acero_shared arrow_acero_static + EXPORT_SET arrow_targets + GLOBAL_TARGETS arrow_shared arrow_static NAMESPACE cudf:: - FINAL_CODE_BLOCK arrow_acero_code_string + FINAL_CODE_BLOCK arrow_code_string ) - set(arrow_dataset_code_string - [=[ - if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared)) - add_library(arrow_dataset_shared ALIAS cudf::arrow_dataset_shared) - endif() - if (TARGET cudf::arrow_dataset_static AND (NOT TARGET arrow_dataset_static)) - add_library(arrow_dataset_static ALIAS cudf::arrow_dataset_static) - endif() - ]=] - ) + if(ENABLE_PARQUET) + set(arrow_acero_code_string + [=[ + if (TARGET cudf::arrow_acero_shared AND (NOT TARGET arrow_acero_shared)) + add_library(arrow_acero_shared ALIAS cudf::arrow_acero_shared) + endif() + if (TARGET cudf::arrow_acero_static AND (NOT TARGET arrow_acero_static)) + add_library(arrow_acero_static ALIAS cudf::arrow_acero_static) + endif() + ]=] + ) - rapids_export( - BUILD ArrowDataset - VERSION ${VERSION} - EXPORT_SET arrow_dataset_targets - GLOBAL_TARGETS arrow_dataset_shared arrow_dataset_static - NAMESPACE cudf:: - FINAL_CODE_BLOCK arrow_dataset_code_string - ) + rapids_export( + BUILD ArrowAcero + VERSION ${VERSION} + EXPORT_SET arrow_acero_targets + GLOBAL_TARGETS arrow_acero_shared arrow_acero_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK arrow_acero_code_string + ) - set(parquet_code_string - [=[ - if (TARGET cudf::parquet_shared AND (NOT TARGET parquet_shared)) - add_library(parquet_shared ALIAS cudf::parquet_shared) - endif() - if (TARGET cudf::parquet_static AND (NOT TARGET parquet_static)) - add_library(parquet_static ALIAS cudf::parquet_static) - endif() - ]=] - ) + set(arrow_dataset_code_string + [=[ + if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared)) + add_library(arrow_dataset_shared ALIAS cudf::arrow_dataset_shared) + endif() + if (TARGET cudf::arrow_dataset_static AND (NOT TARGET arrow_dataset_static)) + add_library(arrow_dataset_static ALIAS cudf::arrow_dataset_static) + endif() + ]=] + ) - rapids_export( - BUILD Parquet - VERSION ${VERSION} - EXPORT_SET parquet_targets - GLOBAL_TARGETS parquet_shared parquet_static - NAMESPACE cudf:: - FINAL_CODE_BLOCK parquet_code_string - ) + rapids_export( + BUILD ArrowDataset + VERSION ${VERSION} + EXPORT_SET arrow_dataset_targets + GLOBAL_TARGETS arrow_dataset_shared arrow_dataset_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK arrow_dataset_code_string + ) + set(parquet_code_string + [=[ + if (TARGET cudf::parquet_shared AND (NOT TARGET parquet_shared)) + add_library(parquet_shared ALIAS cudf::parquet_shared) + endif() + if (TARGET cudf::parquet_static AND (NOT TARGET parquet_static)) + add_library(parquet_static ALIAS cudf::parquet_static) + endif() + ]=] + ) + + rapids_export( + BUILD Parquet + VERSION ${VERSION} + EXPORT_SET parquet_targets + GLOBAL_TARGETS parquet_shared parquet_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK parquet_code_string + ) + endif() endif() endif() - # We generate the arrow-configfiles when we built arrow locally, so always do `find_dependency` - rapids_export_package(BUILD Arrow cudf-exports) - rapids_export_package(INSTALL Arrow cudf-exports) - if(ENABLE_PARQUET) - rapids_export_package(BUILD Parquet cudf-exports) - rapids_export_package(BUILD ArrowDataset cudf-exports) - endif() + if(NOT EXCLUDE_FROM_ALL) + # We generate the arrow-configfiles when we built arrow locally, so always do `find_dependency` + rapids_export_package(BUILD Arrow cudf-exports) + rapids_export_package(INSTALL Arrow cudf-exports) - include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root( - BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-exports - ) - rapids_export_find_package_root( - BUILD Parquet [=[${CMAKE_CURRENT_LIST_DIR}]=] - EXPORT_SET cudf-exports - CONDITION ENABLE_PARQUET - ) - rapids_export_find_package_root( - BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=] - EXPORT_SET cudf-exports - CONDITION ENABLE_PARQUET - ) + if(ENABLE_PARQUET) + rapids_export_package(BUILD Parquet cudf-exports) + rapids_export_package(BUILD ArrowDataset cudf-exports) + endif() + + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root( + BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-exports + ) + rapids_export_find_package_root( + BUILD Parquet [=[${CMAKE_CURRENT_LIST_DIR}]=] + EXPORT_SET cudf-exports + CONDITION ENABLE_PARQUET + ) + rapids_export_find_package_root( + BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=] + EXPORT_SET cudf-exports + CONDITION ENABLE_PARQUET + ) + endif() set(ARROW_LIBRARIES "${ARROW_LIBRARIES}" @@ -435,7 +352,21 @@ if(NOT DEFINED CUDF_VERSION_Arrow) ) endif() +# Default to static arrow builds +if(NOT DEFINED CUDF_USE_ARROW_STATIC) + set(CUDF_USE_ARROW_STATIC ON) +endif() + +# Default to excluding from installation since we generally privately and statically link Arrow. +if(NOT DEFINED CUDF_EXCLUDE_ARROW_FROM_ALL) + set(CUDF_EXCLUDE_ARROW_FROM_ALL OFF) +endif() + +if(NOT DEFINED CUDF_ENABLE_ARROW_PARQUET) + set(CUDF_ENABLE_ARROW_PARQUET OFF) +endif() + find_and_configure_arrow( - ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC} - ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} ${USE_LIBARROW_FROM_PYARROW} + ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_EXCLUDE_ARROW_FROM_ALL} + ${CUDF_ENABLE_ARROW_PARQUET} ) diff --git a/cpp/examples/interop/CMakeLists.txt b/cpp/examples/interop/CMakeLists.txt index a1f99c1d2fd..2816f613d3d 100644 --- a/cpp/examples/interop/CMakeLists.txt +++ b/cpp/examples/interop/CMakeLists.txt @@ -15,6 +15,13 @@ project( include(../fetch_dependencies.cmake) +# The Arrow CMake is currently broken if the build type is not set +set(CMAKE_BUILD_TYPE Release) +# No need to install Arrow libs when only the final example executable is shipped. +set(CUDF_EXCLUDE_ARROW_FROM_ALL ON) +include(../../cmake/thirdparty/get_arrow.cmake) + add_executable(interop interop.cpp) target_link_libraries(interop PRIVATE cudf::cudf) target_compile_features(interop PRIVATE cxx_std_17) +target_link_libraries(interop PRIVATE ${ARROW_LIBRARIES}) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ac77a362e1c..f86acbcc51b 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -24,8 +24,8 @@ rapids_test_init() # properties and linking to build the test function(ConfigureTest CMAKE_TEST_NAME) set(options) - set(one_value GPUS PERCENT STREAM_MODE EXTRA_LIB) - set(multi_value) + set(one_value GPUS PERCENT STREAM_MODE) + set(multi_value EXTRA_LIBS) cmake_parse_arguments(_CUDF_TEST "${options}" "${one_value}" "${multi_value}" ${ARGN}) if(NOT DEFINED _CUDF_TEST_GPUS AND NOT DEFINED _CUDF_TEST_PERCENT) set(_CUDF_TEST_GPUS 1) @@ -57,7 +57,7 @@ function(ConfigureTest CMAKE_TEST_NAME) target_link_libraries( ${CMAKE_TEST_NAME} PRIVATE cudftestutil GTest::gmock GTest::gmock_main GTest::gtest GTest::gtest_main - nvtx3::nvtx3-cpp $ "${_CUDF_TEST_EXTRA_LIB}" + nvtx3::nvtx3-cpp $ "${_CUDF_TEST_EXTRA_LIBS}" ) rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ${CUDA_STATIC_RUNTIME}) rapids_test_add( @@ -78,6 +78,14 @@ function(ConfigureTest CMAKE_TEST_NAME) endif() endfunction() +# ################################################################################################## +# dependencies ################################################################################### +# ################################################################################################## + +# No need to install Arrow libs when only the final test executables are shipped. +set(CUDF_EXCLUDE_ARROW_FROM_ALL ON) +include(../cmake/thirdparty/get_arrow.cmake) + # ################################################################################################## # test sources ################################################################################## # ################################################################################################## @@ -197,7 +205,7 @@ ConfigureTest( QUANTILES_TEST quantiles/percentile_approx_test.cpp quantiles/quantile_test.cpp quantiles/quantiles_test.cpp GPUS 1 - PERCENT 70 + PERCENT 70 EXTRA_LIBS ${ARROW_LIBRARIES} ) # ################################################################################################## @@ -276,8 +284,9 @@ ConfigureTest( interop/from_arrow_host_test.cpp interop/from_arrow_stream_test.cpp interop/dlpack_test.cpp - EXTRA_LIB + EXTRA_LIBS nanoarrow + ${ARROW_LIBRARIES} ) # ################################################################################################## @@ -288,7 +297,7 @@ ConfigureTest(ROW_SELECTION_TEST io/row_selection_test.cpp) ConfigureTest( CSV_TEST io/csv_test.cpp GPUS 1 - PERCENT 30 + PERCENT 30 EXTRA_LIBS ${ARROW_LIBRARIES} ) ConfigureTest( FILE_IO_TEST io/file_io_test.cpp @@ -316,7 +325,7 @@ ConfigureTest( ConfigureTest( JSON_TEST io/json/json_test.cpp io/json/json_chunked_reader.cu GPUS 1 - PERCENT 30 + PERCENT 30 EXTRA_LIBS ${ARROW_LIBRARIES} ) ConfigureTest(JSON_WRITER_TEST io/json/json_writer.cpp) ConfigureTest(JSON_TYPE_CAST_TEST io/json/json_type_cast_test.cu) diff --git a/dependencies.yaml b/dependencies.yaml index b56430ac531..5be291b3671 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -17,7 +17,6 @@ files: - depends_on_rmm - develop - docs - - libarrow_build - notebooks - py_version - rapids_build_skbuild @@ -41,7 +40,6 @@ files: output: none includes: - cuda_version - - libarrow_run - test_cpp test_python: output: none @@ -60,7 +58,6 @@ files: - build_all - cuda - cuda_version - - libarrow_run - test_java test_notebooks: output: none @@ -79,7 +76,6 @@ files: - cuda - cuda_version - docs - - libarrow_run - py_version py_build_cudf: output: pyproject @@ -139,7 +135,6 @@ files: includes: - build_base - build_cpp - - build_python_libcudf - depends_on_librmm py_run_libcudf: output: pyproject @@ -391,38 +386,6 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - cython>=3.0.3 - # Hard pin the patch version used during the build. This must be kept - # in sync with the version pinned in get_arrow.cmake. - - &pyarrow_build pyarrow==16.1.0.* - - output_types: pyproject - packages: - # Hard pin the version used during the build. - # Sync with conda build constraint & wheel run constraint. - - numpy==2.0.* - build_python_libcudf: - common: - - output_types: [conda, requirements, pyproject] - packages: - - *pyarrow_build - libarrow_build: - common: - - output_types: conda - packages: - # Hard pin the Arrow patch version used during the build. This must - # be kept in sync with the version pinned in get_arrow.cmake. - - libarrow-acero==16.1.0.* - - libarrow-dataset==16.1.0.* - - libarrow==16.1.0.* - - libparquet==16.1.0.* - libarrow_run: - common: - - output_types: conda - packages: - # Allow runtime version to float up to patch version - - libarrow-acero>=16.1.0,<16.2.0a0 - - libarrow-dataset>=16.1.0,<16.2.0a0 - - libarrow>=16.1.0,<16.2.0a0 - - libparquet>=16.1.0,<16.2.0a0 pyarrow_run: common: - output_types: [conda, requirements, pyproject] @@ -602,7 +565,7 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - fsspec>=0.6.0 - - numpy>=1.23,<3.0a0 + - &numpy numpy>=1.23,<3.0a0 - pandas>=2.0,<2.2.3dev0 run_pylibcudf: common: @@ -733,6 +696,7 @@ dependencies: - *cmake_ver - maven - openjdk=8.* + - boost test_python_common: common: - output_types: [conda, requirements, pyproject] @@ -746,7 +710,7 @@ dependencies: packages: - fastavro>=0.22.9 - hypothesis - - numpy + - *numpy - pandas test_python_cudf: common: diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 22059c5bc7f..c18a90140b6 100644 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -212,6 +212,10 @@ target_compile_definitions( ) target_link_options(cudfjni PRIVATE "-Wl,--no-undefined") +set(CUDF_ENABLE_ARROW_PARQUET ON) +include(../../../../cpp/cmake/thirdparty/get_arrow.cmake) +target_link_libraries(cudfjni PRIVATE ${ARROW_LIBRARIES}) + if(USE_GDS) add_library(cufilejni src/CuFileJni.cpp) set_target_properties( diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 72f20b30052..7193ada5b93 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -35,7 +35,6 @@ include(../../cpp/cmake/thirdparty/get_dlpack.cmake) include(rapids-cython-core) rapids_cython_init() -include(../pylibcudf/cmake/Modules/LinkPyarrowHeaders.cmake) add_subdirectory(cudf/_lib) add_subdirectory(udf_cpp) diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 5ea378fc0e5..5d4b5421f16 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -65,9 +65,6 @@ rapids_cython_create_modules( target_link_libraries(strings_udf PUBLIC cudf_strings_udf) target_include_directories(interop PUBLIC "$") -set(targets_using_arrow_headers avro csv orc json parquet) -link_to_pyarrow_headers("${targets_using_arrow_headers}") - include(${rapids-cmake-dir}/export/find_package_root.cmake) include(../../../../cpp/cmake/thirdparty/get_nanoarrow.cmake) target_link_libraries(interop PUBLIC nanoarrow) diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt index 620229a1275..e7408cf2852 100644 --- a/python/cudf/cudf/_lib/io/CMakeLists.txt +++ b/python/cudf/cudf/_lib/io/CMakeLists.txt @@ -19,5 +19,3 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX io_ ASSOCIATED_TARGETS cudf ) - -link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index a28a97490ac..0c1d5015078 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -133,8 +133,6 @@ requires = [ "libcudf==24.10.*,>=0.0.0a0", "librmm==24.10.*,>=0.0.0a0", "ninja", - "numpy==2.0.*", - "pyarrow==16.1.0.*", "pylibcudf==24.10.*,>=0.0.0a0", "rmm==24.10.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt index 1b205537d73..4490c41c7a9 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt +++ b/python/cudf_kafka/cudf_kafka/_lib/CMakeLists.txt @@ -20,5 +20,3 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ) -include(../../../pylibcudf/cmake/Modules/LinkPyarrowHeaders.cmake) -link_to_pyarrow_headers("${RAPIDS_CYTHON_CREATED_TARGETS}") diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 01e7299a33a..6ca798bb11c 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -106,6 +106,4 @@ requires = [ "cmake>=3.26.4,!=3.30.0", "cython>=3.0.3", "ninja", - "numpy==2.0.*", - "pyarrow==16.1.0.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt index 09c7ed2e217..96eb6c3bb30 100644 --- a/python/libcudf/CMakeLists.txt +++ b/python/libcudf/CMakeLists.txt @@ -32,9 +32,6 @@ endif() unset(cudf_FOUND) -# For wheels, this should always be true -set(USE_LIBARROW_FROM_PYARROW ON) - # Find Python early so that later commands can use it find_package(Python 3.10 REQUIRED COMPONENTS Interpreter) @@ -46,13 +43,11 @@ set(CUDA_STATIC_RUNTIME ON) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) -include(../pylibcudf/cmake/Modules/LinkPyarrowHeaders.cmake) - add_subdirectory(../../cpp cudf-cpp) # Ensure other libraries needed by libcudf.so get installed alongside it. include(cmake/Modules/WheelHelpers.cmake) install_aliased_imported_targets( - TARGETS cudf arrow_shared nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp - DESTINATION ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} + TARGETS cudf nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp DESTINATION + ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} ) diff --git a/python/libcudf/libcudf/load.py b/python/libcudf/libcudf/load.py index f6ba0d51bdb..ba134710868 100644 --- a/python/libcudf/libcudf/load.py +++ b/python/libcudf/libcudf/load.py @@ -18,10 +18,6 @@ def load_library(): - # This is loading the libarrow shared library in situations where it comes from the - # pyarrow package (i.e. when installed as a wheel). - import pyarrow # noqa: F401 - # Dynamically load libcudf.so. Prefer a system library if one is present to # avoid clobbering symbols that other packages might expect, but if no # other library is present use the one in the wheel. diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml index fd01f7f6e2f..43878d0aec2 100644 --- a/python/libcudf/pyproject.toml +++ b/python/libcudf/pyproject.toml @@ -71,5 +71,4 @@ requires = [ "cmake>=3.26.4,!=3.30.0", "librmm==24.10.*,>=0.0.0a0", "ninja", - "pyarrow==16.1.0.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/pylibcudf/CMakeLists.txt b/python/pylibcudf/CMakeLists.txt index 340ad120377..a4b831790fb 100644 --- a/python/pylibcudf/CMakeLists.txt +++ b/python/pylibcudf/CMakeLists.txt @@ -36,7 +36,6 @@ include(rapids-cython-core) rapids_cython_init() -include(cmake/Modules/LinkPyarrowHeaders.cmake) add_subdirectory(pylibcudf) if(DEFINED cython_lib_dir) diff --git a/python/pylibcudf/cmake/Modules/LinkPyarrowHeaders.cmake b/python/pylibcudf/cmake/Modules/LinkPyarrowHeaders.cmake deleted file mode 100644 index d432f9fe1f5..00000000000 --- a/python/pylibcudf/cmake/Modules/LinkPyarrowHeaders.cmake +++ /dev/null @@ -1,40 +0,0 @@ -# ============================================================================= -# Copyright (c) 2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= -include_guard(GLOBAL) - -find_package(Python REQUIRED COMPONENTS Development NumPy) - -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())" - OUTPUT_VARIABLE PYARROW_INCLUDE_DIR - ERROR_VARIABLE PYARROW_ERROR - RESULT_VARIABLE PYARROW_RESULT - OUTPUT_STRIP_TRAILING_WHITESPACE -) - -if(${PYARROW_RESULT}) - message(FATAL_ERROR "Error while trying to obtain pyarrow include directory:\n${PYARROW_ERROR}") -endif() - -# Due to cudf's scalar.pyx needing to cimport pylibcudf's scalar.pyx (because there are parts of -# cudf Cython that need to directly access the c_obj underlying the pylibcudf Scalar) the -# requirement for arrow headers infects all of cudf. These requirements will go away once all -# scalar-related Cython code is removed from cudf. -function(link_to_pyarrow_headers targets) - foreach(target IN LISTS targets) - # PyArrow headers require numpy headers. - target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}") - target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") - endforeach() -endfunction() diff --git a/python/pylibcudf/pylibcudf/io/CMakeLists.txt b/python/pylibcudf/pylibcudf/io/CMakeLists.txt index 55bea4fc262..bcc2151f5b6 100644 --- a/python/pylibcudf/pylibcudf/io/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/io/CMakeLists.txt @@ -20,8 +20,3 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_io_ ASSOCIATED_TARGETS cudf ) - -set(targets_using_arrow_headers pylibcudf_io_avro pylibcudf_io_csv pylibcudf_io_datasource - pylibcudf_io_json pylibcudf_io_parquet pylibcudf_io_types -) -link_to_pyarrow_headers("${targets_using_arrow_headers}") diff --git a/python/pylibcudf/pylibcudf/libcudf/io/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/io/CMakeLists.txt index 6831063ecb9..9f5f74506e9 100644 --- a/python/pylibcudf/pylibcudf/libcudf/io/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/libcudf/io/CMakeLists.txt @@ -21,6 +21,3 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf MODULE_PREFIX cpp_io_ ) - -set(targets_using_arrow_headers cpp_io_json cpp_io_types) -link_to_pyarrow_headers("${targets_using_arrow_headers}") diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index 0d673ea4cc3..e4c6edc6141 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -40,7 +40,7 @@ classifiers = [ test = [ "fastavro>=0.22.9", "hypothesis", - "numpy", + "numpy>=1.23,<3.0a0", "pandas", "pytest-cov", "pytest-xdist", @@ -104,8 +104,6 @@ requires = [ "libcudf==24.10.*,>=0.0.0a0", "librmm==24.10.*,>=0.0.0a0", "ninja", - "numpy==2.0.*", - "pyarrow==16.1.0.*", "rmm==24.10.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.