From 8163d026b3c56253d9e33c0129fac5d9ba573c53 Mon Sep 17 00:00:00 2001 From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com> Date: Wed, 3 Apr 2024 00:01:52 +0800 Subject: [PATCH] GH-40431: [C++] Move key_hash/key_map/light_array related files to internal for prevent using by users (#40484) ### Rationale for this change These files expose implementation details and APIs that are not meant for third-party use. This PR explicitly marks them internal, which also avoids having them installed. ### Are these changes tested? By existing builds and tests. ### Are there any user-facing changes? No, except hiding some header files that were not supposed to be included externally. * GitHub Issue: #40431 Lead-authored-by: ZhangHuiGui Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/arrow/CMakeLists.txt | 10 +++++----- cpp/src/arrow/acero/asof_join_node.cc | 4 ++-- cpp/src/arrow/acero/bloom_filter_test.cc | 2 +- cpp/src/arrow/acero/hash_join_node.cc | 2 +- cpp/src/arrow/acero/hash_join_node.h | 3 ++- cpp/src/arrow/acero/schema_util.h | 14 +++++++------- cpp/src/arrow/acero/swiss_join.cc | 2 +- cpp/src/arrow/acero/swiss_join_internal.h | 4 ++-- .../compute/{key_hash.cc => key_hash_internal.cc} | 4 ++-- .../compute/{key_hash.h => key_hash_internal.h} | 2 +- ...{key_hash_avx2.cc => key_hash_internal_avx2.cc} | 2 +- cpp/src/arrow/compute/key_hash_test.cc | 2 +- .../compute/{key_map.cc => key_map_internal.cc} | 2 +- .../compute/{key_map.h => key_map_internal.h} | 0 .../{key_map_avx2.cc => key_map_internal_avx2.cc} | 2 +- .../{light_array.cc => light_array_internal.cc} | 2 +- .../{light_array.h => light_array_internal.h} | 0 cpp/src/arrow/compute/light_array_test.cc | 2 +- cpp/src/arrow/compute/row/compare_internal.h | 2 +- cpp/src/arrow/compute/row/encode_internal.h | 4 ++-- cpp/src/arrow/compute/row/grouper.cc | 4 ++-- cpp/src/arrow/compute/row/row_internal.h | 2 +- cpp/src/arrow/compute/util.cc | 4 ++-- cpp/src/arrow/compute/util.h | 8 ++++++-- 24 files changed, 44 insertions(+), 39 deletions(-) rename cpp/src/arrow/compute/{key_hash.cc => key_hash_internal.cc} (99%) rename cpp/src/arrow/compute/{key_hash.h => key_hash_internal.h} (99%) rename cpp/src/arrow/compute/{key_hash_avx2.cc => key_hash_internal_avx2.cc} (99%) rename cpp/src/arrow/compute/{key_map.cc => key_map_internal.cc} (99%) rename cpp/src/arrow/compute/{key_map.h => key_map_internal.h} (100%) rename cpp/src/arrow/compute/{key_map_avx2.cc => key_map_internal_avx2.cc} (99%) rename cpp/src/arrow/compute/{light_array.cc => light_array_internal.cc} (99%) rename cpp/src/arrow/compute/{light_array.h => light_array_internal.h} (100%) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 4bf1008af4cd0..617bfedabf373 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -689,9 +689,9 @@ set(ARROW_COMPUTE_SRCS compute/function.cc compute/function_internal.cc compute/kernel.cc - compute/key_hash.cc - compute/key_map.cc - compute/light_array.cc + compute/key_hash_internal.cc + compute/key_map_internal.cc + compute/light_array_internal.cc compute/ordering.cc compute/registry.cc compute/kernels/codegen_internal.cc @@ -717,8 +717,8 @@ set(ARROW_COMPUTE_SRCS compute/row/row_internal.cc compute/util.cc) -append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_avx2.cc) -append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_avx2.cc) +append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc) +append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc) append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc) append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc) append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc) diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc index cf0d475c1d770..48cc83dd3d6a9 100644 --- a/cpp/src/arrow/acero/asof_join_node.cc +++ b/cpp/src/arrow/acero/asof_join_node.cc @@ -45,8 +45,8 @@ #include "arrow/compute/function_internal.h" #endif #include "arrow/acero/time_series_util.h" -#include "arrow/compute/key_hash.h" -#include "arrow/compute/light_array.h" +#include "arrow/compute/key_hash_internal.h" +#include "arrow/compute/light_array_internal.h" #include "arrow/record_batch.h" #include "arrow/result.h" #include "arrow/status.h" diff --git a/cpp/src/arrow/acero/bloom_filter_test.cc b/cpp/src/arrow/acero/bloom_filter_test.cc index bad331cfd99d1..a2d6e9575a1aa 100644 --- a/cpp/src/arrow/acero/bloom_filter_test.cc +++ b/cpp/src/arrow/acero/bloom_filter_test.cc @@ -27,7 +27,7 @@ #include "arrow/acero/task_util.h" #include "arrow/acero/test_util_internal.h" #include "arrow/acero/util.h" -#include "arrow/compute/key_hash.h" +#include "arrow/compute/key_hash_internal.h" #include "arrow/util/bitmap_ops.h" #include "arrow/util/config.h" #include "arrow/util/cpu_info.h" diff --git a/cpp/src/arrow/acero/hash_join_node.cc b/cpp/src/arrow/acero/hash_join_node.cc index c0179fd160e4e..b49364300dac8 100644 --- a/cpp/src/arrow/acero/hash_join_node.cc +++ b/cpp/src/arrow/acero/hash_join_node.cc @@ -27,7 +27,7 @@ #include "arrow/acero/options.h" #include "arrow/acero/schema_util.h" #include "arrow/acero/util.h" -#include "arrow/compute/key_hash.h" +#include "arrow/compute/key_hash_internal.h" #include "arrow/util/checked_cast.h" #include "arrow/util/future.h" #include "arrow/util/thread_pool.h" diff --git a/cpp/src/arrow/acero/hash_join_node.h b/cpp/src/arrow/acero/hash_join_node.h index cca64d59830b2..ad60019ceabc4 100644 --- a/cpp/src/arrow/acero/hash_join_node.h +++ b/cpp/src/arrow/acero/hash_join_node.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include "arrow/acero/options.h" @@ -88,7 +89,7 @@ class ARROW_ACERO_EXPORT HashJoinSchema { const Expression& filter); bool PayloadIsEmpty(int side) { - ARROW_DCHECK(side == 0 || side == 1); + assert(side == 0 || side == 1); return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0; } diff --git a/cpp/src/arrow/acero/schema_util.h b/cpp/src/arrow/acero/schema_util.h index 6760022feb4be..db3076a58841a 100644 --- a/cpp/src/arrow/acero/schema_util.h +++ b/cpp/src/arrow/acero/schema_util.h @@ -17,13 +17,13 @@ #pragma once +#include #include #include #include #include -#include "arrow/compute/light_array.h" // for KeyColumnMetadata -#include "arrow/type.h" // for DataType, FieldRef, Field and Schema +#include "arrow/type.h" // for DataType, FieldRef, Field and Schema namespace arrow { @@ -47,8 +47,8 @@ struct SchemaProjectionMap { const int* source_to_base; const int* base_to_target; inline int get(int i) const { - ARROW_DCHECK(i >= 0 && i < num_cols); - ARROW_DCHECK(source_to_base[i] != kMissingField); + assert(i >= 0 && i < num_cols); + assert(source_to_base[i] != kMissingField); return base_to_target[source_to_base[i]]; } }; @@ -66,7 +66,7 @@ class SchemaProjectionMaps { Status Init(ProjectionIdEnum full_schema_handle, const Schema& schema, const std::vector& projection_handles, const std::vector*>& projections) { - ARROW_DCHECK(projection_handles.size() == projections.size()); + assert(projection_handles.size() == projections.size()); ARROW_RETURN_NOT_OK(RegisterSchema(full_schema_handle, schema)); for (size_t i = 0; i < projections.size(); ++i) { ARROW_RETURN_NOT_OK( @@ -174,7 +174,7 @@ class SchemaProjectionMaps { } } // We should never get here - ARROW_DCHECK(false); + assert(false); return -1; } @@ -207,7 +207,7 @@ class SchemaProjectionMaps { break; } } - ARROW_DCHECK(field_id != SchemaProjectionMap::kMissingField); + assert(field_id != SchemaProjectionMap::kMissingField); mapping[i] = field_id; inverse_mapping[field_id] = i; } diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc index 68b0e37b01aa9..61c8bfe95414e 100644 --- a/cpp/src/arrow/acero/swiss_join.cc +++ b/cpp/src/arrow/acero/swiss_join.cc @@ -25,7 +25,7 @@ #include "arrow/acero/util.h" #include "arrow/array/util.h" // MakeArrayFromScalar #include "arrow/compute/kernels/row_encoder_internal.h" -#include "arrow/compute/key_hash.h" +#include "arrow/compute/key_hash_internal.h" #include "arrow/compute/row/compare_internal.h" #include "arrow/compute/row/encode_internal.h" #include "arrow/util/bit_util.h" diff --git a/cpp/src/arrow/acero/swiss_join_internal.h b/cpp/src/arrow/acero/swiss_join_internal.h index aa36a61109274..dceb74abe4f1b 100644 --- a/cpp/src/arrow/acero/swiss_join_internal.h +++ b/cpp/src/arrow/acero/swiss_join_internal.h @@ -23,8 +23,8 @@ #include "arrow/acero/schema_util.h" #include "arrow/acero/task_util.h" #include "arrow/compute/kernels/row_encoder_internal.h" -#include "arrow/compute/key_map.h" -#include "arrow/compute/light_array.h" +#include "arrow/compute/key_map_internal.h" +#include "arrow/compute/light_array_internal.h" #include "arrow/compute/row/encode_internal.h" namespace arrow { diff --git a/cpp/src/arrow/compute/key_hash.cc b/cpp/src/arrow/compute/key_hash_internal.cc similarity index 99% rename from cpp/src/arrow/compute/key_hash.cc rename to cpp/src/arrow/compute/key_hash_internal.cc index 1902b9ce9a88e..a0002efb3faf3 100644 --- a/cpp/src/arrow/compute/key_hash.cc +++ b/cpp/src/arrow/compute/key_hash_internal.cc @@ -15,14 +15,14 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/compute/key_hash.h" +#include "arrow/compute/key_hash_internal.h" #include #include #include -#include "arrow/compute/light_array.h" +#include "arrow/compute/light_array_internal.h" #include "arrow/util/bit_util.h" #include "arrow/util/ubsan.h" diff --git a/cpp/src/arrow/compute/key_hash.h b/cpp/src/arrow/compute/key_hash_internal.h similarity index 99% rename from cpp/src/arrow/compute/key_hash.h rename to cpp/src/arrow/compute/key_hash_internal.h index 1173df5ed103e..7d226f52086b1 100644 --- a/cpp/src/arrow/compute/key_hash.h +++ b/cpp/src/arrow/compute/key_hash_internal.h @@ -23,7 +23,7 @@ #include -#include "arrow/compute/light_array.h" +#include "arrow/compute/light_array_internal.h" #include "arrow/compute/util.h" namespace arrow { diff --git a/cpp/src/arrow/compute/key_hash_avx2.cc b/cpp/src/arrow/compute/key_hash_internal_avx2.cc similarity index 99% rename from cpp/src/arrow/compute/key_hash_avx2.cc rename to cpp/src/arrow/compute/key_hash_internal_avx2.cc index aec2800c647d7..4def87bd7aa20 100644 --- a/cpp/src/arrow/compute/key_hash_avx2.cc +++ b/cpp/src/arrow/compute/key_hash_internal_avx2.cc @@ -17,7 +17,7 @@ #include -#include "arrow/compute/key_hash.h" +#include "arrow/compute/key_hash_internal.h" #include "arrow/util/bit_util.h" namespace arrow { diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc index c998df7169c4a..4e5d869cb7db6 100644 --- a/cpp/src/arrow/compute/key_hash_test.cc +++ b/cpp/src/arrow/compute/key_hash_test.cc @@ -23,7 +23,7 @@ #include #include "arrow/array/builder_binary.h" -#include "arrow/compute/key_hash.h" +#include "arrow/compute/key_hash_internal.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/util.h" #include "arrow/util/cpu_info.h" diff --git a/cpp/src/arrow/compute/key_map.cc b/cpp/src/arrow/compute/key_map_internal.cc similarity index 99% rename from cpp/src/arrow/compute/key_map.cc rename to cpp/src/arrow/compute/key_map_internal.cc index a027ec811cf24..9e6d60ab5032b 100644 --- a/cpp/src/arrow/compute/key_map.cc +++ b/cpp/src/arrow/compute/key_map_internal.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/compute/key_map.h" +#include "arrow/compute/key_map_internal.h" #include #include diff --git a/cpp/src/arrow/compute/key_map.h b/cpp/src/arrow/compute/key_map_internal.h similarity index 100% rename from cpp/src/arrow/compute/key_map.h rename to cpp/src/arrow/compute/key_map_internal.h diff --git a/cpp/src/arrow/compute/key_map_avx2.cc b/cpp/src/arrow/compute/key_map_internal_avx2.cc similarity index 99% rename from cpp/src/arrow/compute/key_map_avx2.cc rename to cpp/src/arrow/compute/key_map_internal_avx2.cc index 3526a6cb0f344..8c98166f49269 100644 --- a/cpp/src/arrow/compute/key_map_avx2.cc +++ b/cpp/src/arrow/compute/key_map_internal_avx2.cc @@ -17,7 +17,7 @@ #include -#include "arrow/compute/key_map.h" +#include "arrow/compute/key_map_internal.h" #include "arrow/util/logging.h" namespace arrow { diff --git a/cpp/src/arrow/compute/light_array.cc b/cpp/src/arrow/compute/light_array_internal.cc similarity index 99% rename from cpp/src/arrow/compute/light_array.cc rename to cpp/src/arrow/compute/light_array_internal.cc index b225e04b05cea..4f235925d0fb6 100644 --- a/cpp/src/arrow/compute/light_array.cc +++ b/cpp/src/arrow/compute/light_array_internal.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/compute/light_array.h" +#include "arrow/compute/light_array_internal.h" #include diff --git a/cpp/src/arrow/compute/light_array.h b/cpp/src/arrow/compute/light_array_internal.h similarity index 100% rename from cpp/src/arrow/compute/light_array.h rename to cpp/src/arrow/compute/light_array_internal.h diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc index ecc5f3ad37931..08f36ee606025 100644 --- a/cpp/src/arrow/compute/light_array_test.cc +++ b/cpp/src/arrow/compute/light_array_test.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/compute/light_array.h" +#include "arrow/compute/light_array_internal.h" #include #include diff --git a/cpp/src/arrow/compute/row/compare_internal.h b/cpp/src/arrow/compute/row/compare_internal.h index db953fbe11271..b039ca97ff978 100644 --- a/cpp/src/arrow/compute/row/compare_internal.h +++ b/cpp/src/arrow/compute/row/compare_internal.h @@ -19,7 +19,7 @@ #include -#include "arrow/compute/light_array.h" +#include "arrow/compute/light_array_internal.h" #include "arrow/compute/row/encode_internal.h" #include "arrow/compute/row/row_internal.h" #include "arrow/compute/util.h" diff --git a/cpp/src/arrow/compute/row/encode_internal.h b/cpp/src/arrow/compute/row/encode_internal.h index 6091fb66982af..2afc150530b9e 100644 --- a/cpp/src/arrow/compute/row/encode_internal.h +++ b/cpp/src/arrow/compute/row/encode_internal.h @@ -22,8 +22,8 @@ #include #include "arrow/array/data.h" -#include "arrow/compute/key_map.h" -#include "arrow/compute/light_array.h" +#include "arrow/compute/key_map_internal.h" +#include "arrow/compute/light_array_internal.h" #include "arrow/compute/row/row_internal.h" #include "arrow/compute/util.h" #include "arrow/memory_pool.h" diff --git a/cpp/src/arrow/compute/row/grouper.cc b/cpp/src/arrow/compute/row/grouper.cc index 5e23eda16fda2..756c70967ac6f 100644 --- a/cpp/src/arrow/compute/row/grouper.cc +++ b/cpp/src/arrow/compute/row/grouper.cc @@ -26,8 +26,8 @@ #include "arrow/compute/api_vector.h" #include "arrow/compute/function.h" #include "arrow/compute/kernels/row_encoder_internal.h" -#include "arrow/compute/key_hash.h" -#include "arrow/compute/light_array.h" +#include "arrow/compute/key_hash_internal.h" +#include "arrow/compute/light_array_internal.h" #include "arrow/compute/registry.h" #include "arrow/compute/row/compare_internal.h" #include "arrow/compute/row/grouper_internal.h" diff --git a/cpp/src/arrow/compute/row/row_internal.h b/cpp/src/arrow/compute/row/row_internal.h index c9194267aa3fe..3220b7ffe6e40 100644 --- a/cpp/src/arrow/compute/row/row_internal.h +++ b/cpp/src/arrow/compute/row/row_internal.h @@ -20,7 +20,7 @@ #include #include "arrow/buffer.h" -#include "arrow/compute/light_array.h" +#include "arrow/compute/light_array_internal.h" #include "arrow/memory_pool.h" #include "arrow/status.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index 2058ba9f30757..b0c863b26a062 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -32,7 +32,7 @@ using internal::CpuInfo; namespace util { void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { - int64_t new_top = top_ + PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t); + int64_t new_top = top_ + EstimatedAllocationSize(num_bytes); // Stack overflow check (see GH-39582). // XXX cannot return a regular Status because most consumers do not either. ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow"; @@ -48,7 +48,7 @@ void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { void TempVectorStack::release(int id, uint32_t num_bytes) { ARROW_DCHECK(num_vectors_ == id + 1); - int64_t size = PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t); + int64_t size = EstimatedAllocationSize(num_bytes); ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[-1] == kGuard2); ARROW_DCHECK(top_ >= size); diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h index e2a2e71b8521d..88dce160ce936 100644 --- a/cpp/src/arrow/compute/util.h +++ b/cpp/src/arrow/compute/util.h @@ -89,7 +89,7 @@ class ARROW_EXPORT TempVectorStack { Status Init(MemoryPool* pool, int64_t size) { num_vectors_ = 0; top_ = 0; - buffer_size_ = PaddedAllocationSize(size) + kPadding + 2 * sizeof(uint64_t); + buffer_size_ = EstimatedAllocationSize(size); ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool)); // Ensure later operations don't accidentally read uninitialized memory. std::memset(buffer->mutable_data(), 0xFF, size); @@ -98,7 +98,11 @@ class ARROW_EXPORT TempVectorStack { } private: - int64_t PaddedAllocationSize(int64_t num_bytes) { + static int64_t EstimatedAllocationSize(int64_t size) { + return PaddedAllocationSize(size) + 2 * sizeof(uint64_t); + } + + static int64_t PaddedAllocationSize(int64_t num_bytes) { // Round up allocation size to multiple of 8 bytes // to avoid returning temp vectors with unaligned address. //