From 76fb7eee23a017545d9ef29c08f0b33f7e809e71 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Thu, 30 Jan 2025 13:00:21 -0500 Subject: [PATCH] Implement ViewBinaryViewArrayAsBytes (#709) --- src/nanoarrow/hpp/view.hpp | 54 +++++++++++++++++++++++++++++++ src/nanoarrow/hpp/view_test.cc | 58 ++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/src/nanoarrow/hpp/view.hpp b/src/nanoarrow/hpp/view.hpp index 0b235c5ad..b91975839 100644 --- a/src/nanoarrow/hpp/view.hpp +++ b/src/nanoarrow/hpp/view.hpp @@ -234,6 +234,60 @@ class ViewArrayAsBytes { value_type operator[](int64_t i) const { return range_.get(i); } }; +class ViewBinaryViewArrayAsBytes { + private: + struct Get { + const uint8_t* validity; + const union ArrowBinaryView* inline_data; + const void** variadic_buffers; + + internal::Maybe operator()(int64_t i) const { + if (validity == nullptr || ArrowBitGet(validity, i)) { + const union ArrowBinaryView* bv = &inline_data[i]; + if (bv->inlined.size <= NANOARROW_BINARY_VIEW_INLINE_SIZE) { + return ArrowStringView{reinterpret_cast(bv->inlined.data), + bv->inlined.size}; + } + + return ArrowStringView{ + reinterpret_cast(variadic_buffers[bv->ref.buffer_index]) + + bv->ref.offset, + bv->ref.size}; + } + return NA; + } + }; + + internal::RandomAccessRange range_; + + public: + ViewBinaryViewArrayAsBytes(const ArrowArrayView* array_view) + : range_{ + Get{ + array_view->buffer_views[0].data.as_uint8, + array_view->buffer_views[1].data.as_binary_view, + array_view->variadic_buffers, + }, + array_view->offset, + array_view->length, + } {} + + ViewBinaryViewArrayAsBytes(const ArrowArray* array) + : range_{ + Get{static_cast(array->buffers[0]), + static_cast(array->buffers[1]), + array->buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS}, + array->offset, + array->length, + } {} + + using value_type = typename internal::RandomAccessRange::value_type; + using const_iterator = typename internal::RandomAccessRange::const_iterator; + const_iterator begin() const { return range_.begin(); } + const_iterator end() const { return range_.end(); } + value_type operator[](int64_t i) const { return range_.get(i); } +}; + /// \brief A range-for compatible wrapper for ArrowArray of fixed size binary /// /// Provides a sequence of optional referencing each non-null diff --git a/src/nanoarrow/hpp/view_test.cc b/src/nanoarrow/hpp/view_test.cc index e4cfa499a..37a9342e1 100644 --- a/src/nanoarrow/hpp/view_test.cc +++ b/src/nanoarrow/hpp/view_test.cc @@ -82,6 +82,64 @@ TEST(NanoarrowHppTest, NanoarrowHppViewArrayAsBytesTest) { } } +class BinaryViewTypeTestFixture + : public ::testing::TestWithParam> { + protected: + enum ArrowType data_type; +}; + +TEST_P(BinaryViewTypeTestFixture, NanoarrowHppViewBinaryViewArrayAsBytesTest) { + using namespace nanoarrow::literals; + + nanoarrow::UniqueArray array{}; + const auto [offset, type] = GetParam(); + ASSERT_EQ(ArrowArrayInitFromType(array.get(), type), NANOARROW_OK); + ASSERT_EQ(ArrowArrayStartAppending(array.get()), NANOARROW_OK); + + ASSERT_EQ(ArrowArrayAppendString(array.get(), "foo"_asv), NANOARROW_OK); + ASSERT_EQ(ArrowArrayAppendNull(array.get(), 1), NANOARROW_OK); + ASSERT_EQ(ArrowArrayAppendString(array.get(), "this_string_is_longer_than_inline"_asv), + NANOARROW_OK); + ASSERT_EQ(ArrowArrayAppendNull(array.get(), 1), NANOARROW_OK); + ASSERT_EQ(ArrowArrayFinishBuildingDefault(array.get(), nullptr), NANOARROW_OK); + array->offset = offset; + array->length = array->length - offset; + + ArrowStringView expected[] = {"foo"_asv, ""_asv, + "this_string_is_longer_than_inline"_asv, ""_asv, + "here_is_another_string"_asv}; + + nanoarrow::UniqueArrayView array_view{}; + ArrowArrayViewInitFromType(array_view.get(), type); + ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), NANOARROW_OK); + + int i = offset; + for (auto slot : nanoarrow::ViewBinaryViewArrayAsBytes(array.get())) { + if (i == 1 || i == 3) { + EXPECT_EQ(slot, nanoarrow::NA); + } else { + EXPECT_EQ(slot, expected[i]); + } + ++i; + } + + i = offset; + for (auto slot : nanoarrow::ViewBinaryViewArrayAsBytes(array_view.get())) { + if (i == 1 || i == 3) { + EXPECT_EQ(slot, nanoarrow::NA); + } else { + EXPECT_EQ(slot, expected[i]); + } + ++i; + } +} + +INSTANTIATE_TEST_SUITE_P( + NanoarrowHppTest, BinaryViewTypeTestFixture, + ::testing::Combine(::testing::Values(0, 2), + ::testing::Values(NANOARROW_TYPE_BINARY_VIEW, + NANOARROW_TYPE_STRING_VIEW))); + TEST(NanoarrowHppTest, NanoarrowHppViewArrayAsFixedSizeBytesTest) { using namespace nanoarrow::literals;