From 750adca4e4cc7b18ef80ba39950ed1d250919016 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 10 Sep 2024 17:40:49 -0700 Subject: [PATCH] nvCOMP GZIP integration (#16770) nvCOMP GZIP integration. Opt-in for now. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Mark Harris (https://github.com/harrism) - Nghia Truong (https://github.com/ttnghia) - Muhammad Haseeb (https://github.com/mhaseeb123) URL: https://github.com/rapidsai/cudf/pull/16770 --- cpp/include/cudf/io/nvcomp_adapter.hpp | 2 +- cpp/src/io/comp/nvcomp_adapter.cpp | 14 +++++++++++--- cpp/src/io/parquet/reader_impl_chunking.cu | 14 ++++++++++++-- docs/cudf/source/user_guide/io/io.md | 6 +++++- 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/io/nvcomp_adapter.hpp b/cpp/include/cudf/io/nvcomp_adapter.hpp index e7fe3cc7214..0d74a4158ad 100644 --- a/cpp/include/cudf/io/nvcomp_adapter.hpp +++ b/cpp/include/cudf/io/nvcomp_adapter.hpp @@ -24,7 +24,7 @@ namespace CUDF_EXPORT cudf { namespace io::nvcomp { -enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4 }; +enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4, GZIP }; /** * @brief Set of parameters that impact whether nvCOMP features are enabled. diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp index 261a8eb401d..c3187f73a95 100644 --- a/cpp/src/io/comp/nvcomp_adapter.cpp +++ b/cpp/src/io/comp/nvcomp_adapter.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -44,6 +45,8 @@ auto batched_decompress_get_temp_size_ex(compression_type compression, Args&&... return nvcompBatchedLZ4DecompressGetTempSizeEx(std::forward(args)...); case compression_type::DEFLATE: return nvcompBatchedDeflateDecompressGetTempSizeEx(std::forward(args)...); + case compression_type::GZIP: + return nvcompBatchedGzipDecompressGetTempSizeEx(std::forward(args)...); default: CUDF_FAIL("Unsupported compression type"); } } @@ -73,6 +76,8 @@ auto batched_decompress_async(compression_type compression, Args&&... args) case compression_type::DEFLATE: return nvcompBatchedDeflateDecompressAsync(std::forward(args)...); case compression_type::LZ4: return nvcompBatchedLZ4DecompressAsync(std::forward(args)...); + case compression_type::GZIP: + return nvcompBatchedGzipDecompressAsync(std::forward(args)...); default: CUDF_FAIL("Unsupported compression type"); } } @@ -84,6 +89,7 @@ std::string compression_type_name(compression_type compression) case compression_type::ZSTD: return "Zstandard"; case compression_type::DEFLATE: return "Deflate"; case compression_type::LZ4: return "LZ4"; + case compression_type::GZIP: return "GZIP"; } return "compression_type(" + std::to_string(static_cast(compression)) + ")"; } @@ -359,8 +365,8 @@ std::optional is_compression_disabled_impl(compression_type compres return "nvCOMP use is disabled through the `LIBCUDF_NVCOMP_POLICY` environment variable."; } return std::nullopt; + default: return "Unsupported compression type"; } - return "Unsupported compression type"; } std::optional is_compression_disabled(compression_type compression, @@ -396,7 +402,8 @@ std::optional is_decompression_disabled_impl(compression_type compr feature_status_parameters params) { switch (compression) { - case compression_type::DEFLATE: { + case compression_type::DEFLATE: + case compression_type::GZIP: { if (not params.are_all_integrations_enabled) { return "DEFLATE decompression is experimental, you can enable it through " "`LIBCUDF_NVCOMP_POLICY` environment variable."; @@ -447,6 +454,7 @@ std::optional is_decompression_disabled(compression_type compressio size_t required_alignment(compression_type compression) { switch (compression) { + case compression_type::GZIP: case compression_type::DEFLATE: return nvcompDeflateRequiredAlignment; case compression_type::SNAPPY: return nvcompSnappyRequiredAlignment; case compression_type::ZSTD: return nvcompZstdRequiredAlignment; @@ -462,7 +470,7 @@ std::optional compress_max_allowed_chunk_size(compression_type compressi case compression_type::SNAPPY: return nvcompSnappyCompressionMaxAllowedChunkSize; case compression_type::ZSTD: return nvcompZstdCompressionMaxAllowedChunkSize; case compression_type::LZ4: return nvcompLZ4CompressionMaxAllowedChunkSize; - default: return std::nullopt; + default: CUDF_FAIL("Unsupported compression type"); } } diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu index 84f0dab0d8b..245e1829c72 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.cu +++ b/cpp/src/io/parquet/reader_impl_chunking.cu @@ -865,8 +865,18 @@ std::vector compute_page_splits_by_row(device_span - **Notes:** - \[¹\] - Not all orientations are GPU-accelerated. @@ -177,4 +176,9 @@ If no value is set, behavior will be the same as the "STABLE" option. +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ | DEFLATE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | Experimental | Experimental | ❌ | +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ + | LZ4 | ❌ | ❌ | Stable | Stable | ❌ | ❌ | Stable | Stable | ❌ | + +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ + | GZIP | ❌ | ❌ | Experimental | Experimental | ❌ | ❌ | ❌ | ❌ | ❌ | + +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ + ```