Skip to content

Commit

Permalink
Refactor pinned memory vector and ORC+Parquet writers (rapidsai#13206)
Browse files Browse the repository at this point in the history
Currently, `cudf::detail::pinned_allocator` is used in various places to implement a pinned host vector. This standardizes such usage, removing `cudf::detail::pinned_allocator` from the usage sites and replacing its usage by a standard `cudf::detail::pinned_host_vector` instead.

Some small changes are also made for ORC/Parquet writer classes, replacing `bool _single_write_mode` by `SingleWriteMode _single_write_mode`.

This is needed for rapidsai#13076.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Yunsong Wang (https://github.com/PointKernel)

URL: rapidsai#13206
  • Loading branch information
ttnghia authored Apr 25, 2023
1 parent 777c1f4 commit 848dd5a
Show file tree
Hide file tree
Showing 14 changed files with 153 additions and 190 deletions.
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ outputs:
- test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
- test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp
- test -f $PREFIX/include/cudf/detail/utilities/logger.hpp
- test -f $PREFIX/include/cudf/detail/utilities/pinned_allocator.hpp
- test -f $PREFIX/include/cudf/detail/utilities/pinned_host_vector.hpp
- test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp
- test -f $PREFIX/include/cudf/detail/utilities/visitor_overload.hpp
- test -f $PREFIX/include/cudf/dictionary/detail/concatenate.hpp
Expand Down
5 changes: 2 additions & 3 deletions cpp/benchmarks/io/text/multibyte_split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include <cudf_test/file_utilities.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/utilities/pinned_allocator.hpp>
#include <cudf/detail/utilities/pinned_host_vector.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/io/text/data_chunk_source_factories.hpp>
#include <cudf/io/text/detail/bgzip_utils.hpp>
Expand All @@ -33,7 +33,6 @@
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <thrust/host_vector.h>
#include <thrust/transform.h>

#include <nvbench/nvbench.cuh>
Expand Down Expand Up @@ -136,7 +135,7 @@ static void bench_multibyte_split(nvbench::state& state,
std::unique_ptr<cudf::io::datasource> datasource;
auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
auto host_input = std::vector<char>{};
auto host_pinned_input = thrust::host_vector<char, cudf::detail::pinned_allocator<char>>{};
auto host_pinned_input = cudf::detail::pinned_host_vector<char>{};

if (source_type != data_chunk_source_type::device &&
source_type != data_chunk_source_type::host_pinned) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2008-2022 NVIDIA Corporation
* Copyright 2008-2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,6 +22,8 @@

#include <cudf/utilities/error.hpp>

#include <thrust/host_vector.h>

namespace cudf::detail {

/*! \p pinned_allocator is a CUDA-specific host memory allocator
Expand Down Expand Up @@ -199,4 +201,11 @@ class pinned_allocator {
return !operator==(x);
}
};

/**
* @brief A vector class with pinned host memory allocator
*/
template <typename T>
using pinned_host_vector = thrust::host_vector<T, pinned_allocator<T>>;

} // namespace cudf::detail
4 changes: 2 additions & 2 deletions cpp/include/cudf/io/detail/orc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class writer {
*/
explicit writer(std::unique_ptr<cudf::io::data_sink> sink,
orc_writer_options const& options,
SingleWriteMode mode,
single_write_mode mode,
rmm::cuda_stream_view stream);

/**
Expand All @@ -109,7 +109,7 @@ class writer {
*/
explicit writer(std::unique_ptr<cudf::io::data_sink> sink,
chunked_orc_writer_options const& options,
SingleWriteMode mode,
single_write_mode mode,
rmm::cuda_stream_view stream);

/**
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/io/detail/parquet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ class writer {
*/
explicit writer(std::vector<std::unique_ptr<data_sink>> sinks,
parquet_writer_options const& options,
SingleWriteMode mode,
single_write_mode mode,
rmm::cuda_stream_view stream);

/**
Expand All @@ -171,7 +171,7 @@ class writer {
*/
explicit writer(std::vector<std::unique_ptr<data_sink>> sinks,
chunked_parquet_writer_options const& options,
SingleWriteMode mode,
single_write_mode mode,
rmm::cuda_stream_view stream);

/**
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/io/detail/utils.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,7 +22,7 @@ namespace detail {
/**
* @brief Whether writer writes in chunks or all at once
*/
enum class SingleWriteMode : bool { YES, NO };
enum class single_write_mode : bool { YES, NO };
} // namespace detail
} // namespace io
} // namespace cudf
8 changes: 4 additions & 4 deletions cpp/src/io/functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ void write_orc(orc_writer_options const& options)
CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing");

auto writer = std::make_unique<detail_orc::writer>(
std::move(sinks[0]), options, io_detail::SingleWriteMode::YES, cudf::get_default_stream());
std::move(sinks[0]), options, io_detail::single_write_mode::YES, cudf::get_default_stream());

writer->write(options.get_table());
}
Expand All @@ -444,7 +444,7 @@ orc_chunked_writer::orc_chunked_writer(chunked_orc_writer_options const& options
CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing");

writer = std::make_unique<detail_orc::writer>(
std::move(sinks[0]), options, io_detail::SingleWriteMode::NO, cudf::get_default_stream());
std::move(sinks[0]), options, io_detail::single_write_mode::NO, cudf::get_default_stream());
}

/**
Expand Down Expand Up @@ -519,7 +519,7 @@ std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const

auto sinks = make_datasinks(options.get_sink());
auto writer = std::make_unique<detail_parquet::writer>(
std::move(sinks), options, io_detail::SingleWriteMode::YES, cudf::get_default_stream());
std::move(sinks), options, io_detail::single_write_mode::YES, cudf::get_default_stream());

writer->write(options.get_table(), options.get_partitions());

Expand Down Expand Up @@ -575,7 +575,7 @@ parquet_chunked_writer::parquet_chunked_writer(chunked_parquet_writer_options co
auto sinks = make_datasinks(options.get_sink());

writer = std::make_unique<detail_parquet::writer>(
std::move(sinks), options, io_detail::SingleWriteMode::NO, cudf::get_default_stream());
std::move(sinks), options, io_detail::single_write_mode::NO, cudf::get_default_stream());
}

/**
Expand Down
Loading

0 comments on commit 848dd5a

Please sign in to comment.