diff --git a/Cargo.lock b/Cargo.lock index 8828937c8..ae6897793 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -234,6 +234,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bstr" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" +dependencies = [ + "memchr", +] + [[package]] name = "bumpalo" version = "3.14.0" @@ -1492,6 +1501,7 @@ dependencies = [ "atomic-traits", "bitflags 2.4.2", "bitvec", + "bstr", "enum-map", "heapless", "libc", diff --git a/pgrx-tests/src/tests/array_borrowed.rs b/pgrx-tests/src/tests/array_borrowed.rs new file mode 100644 index 000000000..dc59bd47b --- /dev/null +++ b/pgrx-tests/src/tests/array_borrowed.rs @@ -0,0 +1,497 @@ +//LICENSE Portions Copyright 2019-2021 ZomboDB, LLC. +//LICENSE +//LICENSE Portions Copyright 2021-2023 Technology Concepts & Design, Inc. +//LICENSE +//LICENSE Portions Copyright 2023-2023 PgCentral Foundation, Inc. +//LICENSE +//LICENSE All rights reserved. +//LICENSE +//LICENSE Use of this source code is governed by the MIT license that can be found in the LICENSE file. +#![allow(unused_imports)] +use core::ffi::CStr; +use pgrx::array::{FlatArray, RawArray}; +use pgrx::nullable::Nullable; +use pgrx::prelude::*; +use pgrx::Json; +use pgrx::PostgresEnum; +use serde::Serialize; +use serde_json::json; + +#[pg_extern(name = "borrow_sum_array")] +fn borrow_sum_array_i32(values: &FlatArray<'_, i32>) -> i32 { + // we implement it this way so we can trap an overflow (as we have a test for this) and + // catch it correctly in both --debug and --release modes + let mut sum = 0_i32; + for v in values { + let v = v.into_option().copied().unwrap_or(0); + let (val, overflow) = sum.overflowing_add(v); + if overflow { + panic!("attempt to add with overflow"); + } else { + sum = val; + } + } + sum +} + +#[pg_extern(name = "borrow_sum_array")] +fn borrow_sum_array_i64(values: &FlatArray<'_, i64>) -> i64 { + values.iter().map(|v| v.into_option().copied().unwrap_or(0i64)).sum() +} + +#[pg_extern] +fn borrow_count_true(values: &FlatArray<'_, bool>) -> i32 { + values.iter().filter(|b| b.into_option().copied().unwrap_or(false)).count() as i32 +} + +#[pg_extern] +fn borrow_count_nulls(values: &FlatArray<'_, i32>) -> i32 { + values.iter().map(|v| v.as_option().is_none()).filter(|v| *v).count() as i32 +} + +#[pg_extern] +fn borrow_optional_array_arg(values: Option<&FlatArray<'_, f32>>) -> f32 { + values.unwrap().iter().map(|v| v.into_option().copied().unwrap_or(0f32)).sum() +} + +#[pg_extern] +fn borrow_optional_array_with_default( + values: default!(Option<&FlatArray<'_, i32>>, "NULL"), +) -> i32 { + values.unwrap().iter().map(|v| v.into_option().copied().unwrap_or(0)).sum() +} + +// TODO: fix this test by fixing serde impls for `FlatArray<'a, &'a str> -> Json` +// #[pg_extern] +// fn borrow_serde_serialize_array<'dat>(values: &FlatArray<'dat, &'dat str>) -> Json { +// Json(json! { { "values": values } }) +// } + +// FIXME: serialize for FlatArray? +// #[pg_extern] +// fn borrow_serde_serialize_array_i32(values: &FlatArray<'_, i32>) -> Json { +// Json(json! { { "values": values } }) +// } + +#[pg_extern] +fn borrow_return_text_array() -> Vec<&'static str> { + vec!["a", "b", "c", "d"] +} + +#[pg_extern] +fn borrow_return_zero_length_vec() -> Vec { + Vec::new() +} + +#[pg_extern] +fn borrow_get_arr_nelems(arr: &FlatArray<'_, i32>) -> libc::c_int { + arr.count() as _ +} + +#[pg_extern] +fn borrow_get_arr_data_ptr_nth_elem(arr: &FlatArray<'_, i32>, elem: i32) -> Option { + arr.nth(elem as usize).unwrap().into_option().copied() +} + +#[pg_extern] +fn borrow_display_get_arr_nullbitmap(arr: &FlatArray<'_, i32>) -> String { + if let Some(slice) = arr.nulls() { + // SAFETY: If the test has gotten this far, the ptr is good for 0+ bytes, + // so reborrow NonNull<[u8]> as &[u8] for the hot second we're looking at it. + // might panic if the array is len 0 + format!("{:#010b}", slice[0]) + } else { + String::from("") + } +} + +#[pg_extern] +fn borrow_get_arr_ndim(arr: &FlatArray<'_, i32>) -> libc::c_int { + // SAFETY: This is a valid FlatArrayType and it's just a field access. + arr.dims().len() as libc::c_int +} + +// This deliberately iterates the FlatArray. +// Because FlatArray::iter currently iterates the FlatArray as Datums, this is guaranteed to be "bug-free" regarding size. +#[pg_extern] +fn borrow_arr_mapped_vec(arr: &FlatArray<'_, i32>) -> Vec { + arr.iter().filter_map(|x| x.into_option()).copied().collect() +} + +/// Naive conversion. +#[pg_extern] +#[allow(deprecated)] +fn borrow_arr_into_vec(arr: &FlatArray<'_, i32>) -> Vec { + arr.iter_non_null().copied().collect() +} + +#[pg_extern] +#[allow(deprecated)] +fn borrow_arr_sort_uniq(arr: &FlatArray<'_, i32>) -> Vec { + let mut v: Vec = arr.iter_non_null().copied().collect(); + v.sort(); + v.dedup(); + v +} + +// FIXME: BorrowDatum for PostgresEnum? +// #[derive(Debug, Eq, PartialEq, PostgresEnum, Serialize)] +// pub enum BorrowFlatArrayTestEnum { +// One, +// Two, +// Three, +// } + +// #[pg_extern] +// fn borrow_enum_array_roundtrip( +// a: &FlatArray<'_, BorrowFlatArrayTestEnum>, +// ) -> Vec> { +// a.iter().cloned().collect() +// } + +#[pg_extern] +fn borrow_validate_cstring_array( + a: &FlatArray<'_, CStr>, +) -> std::result::Result> { + assert_eq!( + a.iter().map(|v| v.into_option()).collect::>(), + vec![ + Some(c"one"), + Some(c"two"), + None, + Some(c"four"), + Some(c"five"), + None, + Some(c"seven"), + None, + None + ] + ); + Ok(true) +} + +#[cfg(any(test, feature = "pg_test"))] +#[pgrx::pg_schema] +mod tests { + use crate as pgrx_tests; + + use super::*; + use pgrx::prelude::*; + use pgrx::{IntoDatum, Json}; + use serde_json::json; + + // #[pg_test] + // fn borrow_test_enum_array_roundtrip() -> spi::Result<()> { + // let a = Spi::get_one::>>( + // "SELECT borrow_enum_array_roundtrip(ARRAY['One', 'Two']::BorrowFlatArrayTestEnum[])", + // )? + // .expect("SPI result was null"); + // assert_eq!(a, vec![Some(BorrowFlatArrayTestEnum::One), Some(BorrowFlatArrayTestEnum::Two)]); + // Ok(()) + // } + + #[pg_test] + fn borrow_test_sum_array_i32() { + let sum = Spi::get_one::("SELECT borrow_sum_array(ARRAY[1,2,3]::integer[])"); + assert_eq!(sum, Ok(Some(6))); + } + + #[pg_test] + fn borrow_test_sum_array_i64() { + let sum = Spi::get_one::("SELECT borrow_sum_array(ARRAY[1,2,3]::bigint[])"); + assert_eq!(sum, Ok(Some(6))); + } + + #[pg_test(expected = "attempt to add with overflow")] + fn borrow_test_sum_array_i32_overflow() -> Result, pgrx::spi::Error> { + Spi::get_one::( + "SELECT borrow_sum_array(a) FROM (SELECT array_agg(s) a FROM generate_series(1, 1000000) s) x;", + ) + } + + #[pg_test] + fn borrow_test_count_true() { + let cnt = Spi::get_one::("SELECT borrow_count_true(ARRAY[true, true, false, true])"); + assert_eq!(cnt, Ok(Some(3))); + } + + #[pg_test] + fn borrow_test_count_nulls() { + let cnt = + Spi::get_one::("SELECT borrow_count_nulls(ARRAY[NULL, 1, 2, NULL]::integer[])"); + assert_eq!(cnt, Ok(Some(2))); + } + + #[pg_test] + fn borrow_test_optional_array() { + let sum = Spi::get_one::("SELECT borrow_optional_array_arg(ARRAY[1,2,3]::real[])"); + assert_eq!(sum, Ok(Some(6f32))); + } + + // TODO: fix this test by redesigning SPI. + // #[pg_test] + // fn borrow_test_serde_serialize_array() -> Result<(), pgrx::spi::Error> { + // let json = Spi::get_one::( + // "SELECT borrow_serde_serialize_array(ARRAY['one', null, 'two', 'three'])", + // )? + // .expect("returned json was null"); + // assert_eq!(json.0, json! {{"values": ["one", null, "two", "three"]}}); + // Ok(()) + // } + + #[pg_test] + fn borrow_test_optional_array_with_default() { + let sum = Spi::get_one::("SELECT borrow_optional_array_with_default(ARRAY[1,2,3])"); + assert_eq!(sum, Ok(Some(6))); + } + + // #[pg_test] + // fn borrow_test_serde_serialize_array_i32() -> Result<(), pgrx::spi::Error> { + // let json = Spi::get_one::( + // "SELECT borrow_serde_serialize_array_i32(ARRAY[1, null, 2, 3, null, 4, 5])", + // )? + // .expect("returned json was null"); + // assert_eq!(json.0, json! {{"values": [1,null,2,3,null,4, 5]}}); + // Ok(()) + // } + + #[pg_test] + fn borrow_test_return_text_array() { + let rc = Spi::get_one::("SELECT ARRAY['a', 'b', 'c', 'd'] = return_text_array();"); + assert_eq!(rc, Ok(Some(true))); + } + + #[pg_test] + fn borrow_test_return_zero_length_vec() { + let rc = Spi::get_one::("SELECT ARRAY[]::integer[] = return_zero_length_vec();"); + assert_eq!(rc, Ok(Some(true))); + } + + // #[pg_test] + // fn borrow_test_slice_to_array() -> Result<(), pgrx::spi::Error> { + // let owned_vec = vec![Some(1), None, Some(2), Some(3), None, Some(4), Some(5)]; + // let json = Spi::connect(|client| { + // client + // .select( + // "SELECT borrow_serde_serialize_array_i32($1)", + // None, + // Some(vec![( + // PgBuiltInOids::INT4ARRAYOID.oid(), + // owned_vec.as_slice().into_datum(), + // )]), + // )? + // .first() + // .get_one::() + // })? + // .expect("Failed to return json even though it's right there ^^"); + // assert_eq!(json.0, json! {{"values": [1, null, 2, 3, null, 4, 5]}}); + // Ok(()) + // } + + #[pg_test] + fn borrow_test_arr_data_ptr() { + let len = Spi::get_one::("SELECT borrow_get_arr_nelems('{1,2,3,4,5}'::int[])"); + assert_eq!(len, Ok(Some(5))); + } + + #[pg_test] + fn borrow_test_get_arr_data_ptr_nth_elem() { + let nth = + Spi::get_one::("SELECT borrow_get_arr_data_ptr_nth_elem('{1,2,3,4,5}'::int[], 2)"); + assert_eq!(nth, Ok(Some(3))); + } + + #[pg_test] + fn borrow_test_display_get_arr_nullbitmap() -> Result<(), pgrx::spi::Error> { + let bitmap_str = Spi::get_one::( + "SELECT borrow_display_get_arr_nullbitmap(ARRAY[1,NULL,3,NULL,5]::int[])", + )? + .expect("datum was null"); + + assert_eq!(bitmap_str, "0b00010101"); + + let bitmap_str = Spi::get_one::( + "SELECT borrow_display_get_arr_nullbitmap(ARRAY[1,2,3,4,5]::int[])", + )? + .expect("datum was null"); + + assert_eq!(bitmap_str, ""); + Ok(()) + } + + #[pg_test] + fn borrow_test_get_arr_ndim() -> Result<(), pgrx::spi::Error> { + let ndim = Spi::get_one::("SELECT borrow_get_arr_ndim(ARRAY[1,2,3,4,5]::int[])")? + .expect("datum was null"); + + assert_eq!(ndim, 1); + + let ndim = Spi::get_one::("SELECT borrow_get_arr_ndim('{{1,2,3},{4,5,6}}'::int[])")? + .expect("datum was null"); + + assert_eq!(ndim, 2); + Ok(()) + } + + #[pg_test] + fn borrow_test_arr_to_vec() { + let result = + Spi::get_one::>("SELECT borrow_arr_mapped_vec(ARRAY[3,2,2,1]::integer[])"); + let other = + Spi::get_one::>("SELECT borrow_arr_into_vec(ARRAY[3,2,2,1]::integer[])"); + // One should be equivalent to the canonical form. + assert_eq!(result, Ok(Some(vec![3, 2, 2, 1]))); + // And they should be equal to each other. + assert_eq!(result, other); + } + + #[pg_test] + fn borrow_test_arr_sort_uniq() { + let result = + Spi::get_one::>("SELECT borrow_arr_sort_uniq(ARRAY[3,2,2,1]::integer[])"); + assert_eq!(result, Ok(Some(vec![1, 2, 3]))); + } + + #[pg_test] + fn borrow_test_arr_sort_uniq_with_null() { + let result = + Spi::get_one::>("SELECT borrow_arr_sort_uniq(ARRAY[3,2,NULL,2,1]::integer[])"); + assert_eq!(result, Ok(Some(vec![1, 2, 3]))); + } + + #[pg_test] + fn borrow_test_cstring_array() -> Result<(), pgrx::spi::Error> { + let strings = Spi::get_one::("SELECT borrow_validate_cstring_array(ARRAY['one', 'two', NULL, 'four', 'five', NULL, 'seven', NULL, NULL]::cstring[])")?.expect("datum was NULL"); + assert_eq!(strings, true); + Ok(()) + } + + // FIXME: lol SPI + // #[pg_test] + // fn borrow_test_f64_slice() -> Result<(), Box> { + // let array = Spi::get_one::<&FlatArray<'_, f64>>("SELECT ARRAY[1.0, 2.0, 3.0]::float8[]")? + // .expect("datum was null"); + // assert_eq!(array.as_slice()?, &[1.0, 2.0, 3.0]); + // Ok(()) + // } + + // #[pg_test] + // fn borrow_test_f32_slice() -> Result<(), Box> { + // let array = Spi::get_one::<&FlatArray<'_, f32>>("SELECT ARRAY[1.0, 2.0, 3.0]::float4[]")? + // .expect("datum was null"); + // assert_eq!(array.as_slice()?, &[1.0, 2.0, 3.0]); + // Ok(()) + // } + + // #[pg_test] + // fn borrow_test_i64_slice() -> Result<(), Box> { + // let array = Spi::get_one::<&FlatArray<'_, i64>>("SELECT ARRAY[1, 2, 3]::bigint[]")? + // .expect("datum was null"); + // assert_eq!(array.as_slice()?, &[1, 2, 3]); + // Ok(()) + // } + + // #[pg_test] + // fn borrow_test_i32_slice() -> Result<(), Box> { + // let array = Spi::get_one::<&FlatArray<'_, i32>>("SELECT ARRAY[1, 2, 3]::integer[]")? + // .expect("datum was null"); + // assert_eq!(array.as_slice()?, &[1, 2, 3]); + // Ok(()) + // } + + // #[pg_test] + // fn borrow_test_i16_slice() -> Result<(), Box> { + // let array = Spi::get_one::<&FlatArray<'_, i16>>("SELECT ARRAY[1, 2, 3]::smallint[]")? + // .expect("datum was null"); + // assert_eq!(array.as_slice()?, &[1, 2, 3]); + // Ok(()) + // } + + // #[pg_test] + // fn borrow_test_slice_with_null() -> Result<(), Box> { + // let array = Spi::get_one::>("SELECT ARRAY[1, 2, 3, NULL]::smallint[]")? + // .expect("datum was null"); + // assert_eq!(array.as_slice(), Err(FlatArraySliceError::ContainsNulls)); + // Ok(()) + // } + + // #[pg_test] + // fn borrow_test_array_of_points() -> Result<(), Box> { + // let points: &FlatArray<'_, pg_sys::Point> = Spi::get_one( + // "SELECT ARRAY['(1,1)', '(2, 2)', '(3,3)', '(4,4)', NULL, '(5,5)']::point[]", + // )? + // .unwrap(); + // let points = points.into_iter().collect::>(); + // let expected = vec![ + // Some(pg_sys::Point { x: 1.0, y: 1.0 }), + // Some(pg_sys::Point { x: 2.0, y: 2.0 }), + // Some(pg_sys::Point { x: 3.0, y: 3.0 }), + // Some(pg_sys::Point { x: 4.0, y: 4.0 }), + // None, + // Some(pg_sys::Point { x: 5.0, y: 5.0 }), + // ]; + + // for (p, expected) in points.into_iter().zip(expected.into_iter()) { + // match (p, expected) { + // (Some(l), Some(r)) => { + // assert_eq!(l.x, r.x); + // assert_eq!(l.y, r.y); + // } + // (None, None) => (), + // _ => panic!("points not equal"), + // } + // } + // Ok(()) + // } + + // FIXME: needs to be type-subbed to a stringly type and SPI needs to make sense + // #[pg_test] + // fn borrow_test_text_array_as_vec_string() -> Result<(), Box> { + // let a = Spi::get_one::<&FlatArray<'_, String>>( + // "SELECT ARRAY[NULL, NULL, NULL, NULL, 'the fifth element']::text[]", + // )? + // .expect("spi result was NULL") + // .into_iter() + // .collect::>(); + // assert_eq!(a, vec![None, None, None, None, Some(String::from("the fifth element"))]); + // Ok(()) + // } + + // FIXME: needs to be type-subbed to a stringly type + // #[pg_test] + // fn borrow_test_text_array_iter() -> Result<(), Box> { + // let a = Spi::get_one::<&FlatArray<'_, String>>( + // "SELECT ARRAY[NULL, NULL, NULL, NULL, 'the fifth element']::text[]", + // )? + // .expect("spi result was NULL"); + + // let mut iter = a.iter(); + + // assert_eq!(iter.next(), Some(None)); + // assert_eq!(iter.next(), Some(None)); + // assert_eq!(iter.next(), Some(None)); + // assert_eq!(iter.next(), Some(None)); + // assert_eq!(iter.next(), Some(Some(String::from("the fifth element")))); + // assert_eq!(iter.next(), None); + + // Ok(()) + // } + + // FIXME: Needs to be type-subbed to a stringly type + // #[pg_test] + // fn borrow_test_text_array_via_getter() -> Result<(), Box> { + // let a = Spi::get_one::<&FlatArray<'_, String>>( + // "SELECT ARRAY[NULL, NULL, NULL, NULL, 'the fifth element']::text[]", + // )? + // .expect("spi result was NULL"); + + // assert_eq!(a.nth(0), Some(None)); + // assert_eq!(a.nth(1), Some(None)); + // assert_eq!(a.nth(2), Some(None)); + // assert_eq!(a.nth(3), Some(None)); + // assert_eq!(a.nth(4), Some(Some(String::from("the fifth element")))); + // assert_eq!(a.nth(5), None); + + // Ok(()) + // } +} diff --git a/pgrx-tests/src/tests/array_tests.rs b/pgrx-tests/src/tests/array_tests.rs index 855f284a0..b77ddd44c 100644 --- a/pgrx-tests/src/tests/array_tests.rs +++ b/pgrx-tests/src/tests/array_tests.rs @@ -218,6 +218,7 @@ mod tests { #[pg_test(expected = "attempt to add with overflow")] fn test_sum_array_i32_overflow() -> Result, pgrx::spi::Error> { + // Note that this test is calling a builtin, array_agg Spi::get_one::( "SELECT sum_array(a) FROM (SELECT array_agg(s) a FROM generate_series(1, 1000000) s) x;", ) diff --git a/pgrx-tests/src/tests/mod.rs b/pgrx-tests/src/tests/mod.rs index fd1658cce..3cbc88a2e 100644 --- a/pgrx-tests/src/tests/mod.rs +++ b/pgrx-tests/src/tests/mod.rs @@ -11,6 +11,7 @@ mod aggregate_tests; mod anyarray_tests; mod anyelement_tests; mod anynumeric_tests; +mod array_borrowed; mod array_tests; mod attributes_tests; mod bgworker_tests; diff --git a/pgrx/Cargo.toml b/pgrx/Cargo.toml index bcdf8b67f..9382f2e2c 100644 --- a/pgrx/Cargo.toml +++ b/pgrx/Cargo.toml @@ -61,6 +61,7 @@ enum-map = "2.6.3" atomic-traits = "0.3.0" # PgAtomic and shmem init bitflags = "2.4.0" # BackgroundWorker bitvec = "1.0" # processing array nullbitmaps +bstr = { version = "1.10", default-features = false} heapless = "0.8" # shmem and PgLwLock libc.workspace = true # FFI type compat seahash = "4.1.0" # derive(PostgresHash) diff --git a/pgrx/src/array.rs b/pgrx/src/array.rs index ee3112873..26e4444a3 100644 --- a/pgrx/src/array.rs +++ b/pgrx/src/array.rs @@ -8,16 +8,270 @@ //LICENSE //LICENSE Use of this source code is governed by the MIT license that can be found in the LICENSE file. #![allow(clippy::precedence)] -use crate::datum::Array; +#![allow(unused)] +#![deny(unsafe_op_in_unsafe_fn)] +use crate::datum::{Array, BorrowDatum, Datum}; +use crate::layout::{Align, Layout}; +use crate::nullable::Nullable; +use crate::pgrx_sql_entity_graph::metadata::{ + ArgumentError, Returns, ReturnsError, SqlMapping, SqlTranslatable, +}; use crate::toast::{Toast, Toasty}; use crate::{layout, pg_sys, varlena}; -use bitvec::ptr::{self as bitptr, BitPtr, BitPtrError, Mut}; -use bitvec::slice::BitSlice; +use bitvec::ptr::{self as bitptr, BitPtr, BitPtrError, Const, Mut}; +use bitvec::slice::{self as bitslice, BitSlice}; +use core::iter::{ExactSizeIterator, FusedIterator}; +use core::marker::PhantomData; use core::ptr::{self, NonNull}; -use core::slice; +use core::{ffi, mem, slice}; mod port; +/** `pg_sys::ArrayType` and its unsized varlena + +# Safety +`&FlatArray<'_, T>` assumes its tail is the remainder of a Postgres array of element `T`. +*/ +#[repr(C)] +#[derive(Debug)] +pub struct FlatArray<'mcx, T: ?Sized> { + scalar: PhantomData<&'mcx T>, + head: pg_sys::ArrayType, + tail: [u8], +} + +impl<'mcx, T> FlatArray<'mcx, T> +where + T: ?Sized, +{ + fn as_raw(&self) -> RawArray { + unsafe { + let ptr = NonNull::new_unchecked(ptr::from_ref(self).cast_mut()); + RawArray::from_ptr(ptr.cast()) + } + } + + /// Number of elements in the Array, including nulls + /// + /// Note that for many Arrays, this doesn't have a linear relationship with array byte-len. + #[doc(alias = "cardinality")] + #[doc(alias = "nelems")] + pub fn count(&self) -> usize { + self.as_raw().len() + } + + pub fn dims(&self) -> Dimensions { + // SAFETY: Validity of the ptr and ndim field was asserted upon obtaining the FlatArray ref, + // so can assume the dims ptr is also valid, allowing making the slice. + unsafe { + let ptr = self as *const Self as *const pg_sys::ArrayType; + let ndim = self.head.ndim as usize; + let dims = slice::from_raw_parts(port::ARR_DIMS(ptr.cast_mut()), ndim); + Dimensions { dims } + } + } +} + +impl<'mcx, T> FlatArray<'mcx, T> +where + T: ?Sized + BorrowDatum, +{ + /// Iterate the array + #[doc(alias = "unnest")] + pub fn iter(&self) -> ArrayIter<'_, T> { + let nelems = self.count(); + let raw = self.as_raw(); + let nulls = + raw.nulls_bitptr().map(|p| unsafe { bitslice::from_raw_parts(p, nelems).unwrap() }); + + let data = unsafe { NonNull::new_unchecked(raw.data_ptr().cast_mut()) }; + let arr = self; + let index = 0; + let offset = 0; + let align = Layout::lookup_oid(self.head.elemtype).align; + + ArrayIter { data, nulls, nelems, arr, index, offset, align } + } + + pub fn iter_non_null(&self) -> impl Iterator { + self.iter().filter_map(|elem| elem.into_option()) + } + + /* + /** + Some problems with the design of an iter_mut for FlatArray: + In order to traverse the array, we need to assume well-formedness of e.g. cstring/varlena elements, + but &mut would allow safely updating varlenas within their span, e.g. injecting \0 into cstrings. + making it so that nothing allows making an ill-formed varlena via &mut seems untenable, also? + probably only viable to expose &mut for fixed-size types, then + */ + pub fn iter_mut(&mut self) -> ArrayIterMut<'mcx, T> { + ??? + } + */ + /// Borrow the nth element. + /// + /// `FlatArray::nth` may have to iterate the array, thus it is named for `Iterator::nth`, + /// as opposed to a constant-time `get`. + pub fn nth(&self, index: usize) -> Option> { + self.iter().nth(index) + } + + /* + /// Mutably borrow the nth element. + /// + /// `FlatArray::nth_mut` may have to iterate the array, thus it is named for `Iterator::nth`. + pub fn nth_mut(&mut self, index: usize) -> Option> { + // FIXME: consider nullability + // FIXME: Become a dispatch to Iterator::nth + todo!() + } + */ + + pub fn nulls(&self) -> Option<&[u8]> { + let len = self.count() + 7 >> 3; // Obtains 0 if len was 0. + + // SAFETY: This obtains the nulls pointer from a function that must either + // return a null pointer or a pointer to a valid null bitmap. + unsafe { + let nulls_ptr = port::ARR_NULLBITMAP(ptr::addr_of!(self.head).cast_mut()); + ptr::slice_from_raw_parts(nulls_ptr, len).as_ref() + } + } + + /** Oxidized form of [ARR_NULLBITMAP(ArrayType*)][arr_nullbitmap] + + If this returns None, the array *cannot* have nulls. + Note that unlike the `is_null: bool` that appears elsewhere, 1 is "valid" and 0 is "null". + + # Safety + Trailing bits must be set to 0, and all elements marked with 1 must be initialized. + The null bitmap is linear but the layout of elements may be nonlinear, so for some arrays + these cannot be calculated directly from each other. + + [ARR_NULLBITMAP]: + */ + pub unsafe fn nulls_mut(&mut self) -> Option<&mut [u8]> { + let len = self.count() + 7 >> 3; // Obtains 0 if len was 0. + + // SAFETY: This obtains the nulls pointer from a function that must either + // return a null pointer or a pointer to a valid null bitmap. + unsafe { + let nulls_ptr = port::ARR_NULLBITMAP(ptr::addr_of_mut!(self.head)); + ptr::slice_from_raw_parts_mut(nulls_ptr, len).as_mut() + } + } +} + +unsafe impl BorrowDatum for FlatArray<'_, T> { + const PASS: layout::PassBy = layout::PassBy::Ref; + unsafe fn point_from(ptr: ptr::NonNull) -> ptr::NonNull { + unsafe { + let len = + varlena::varsize_any(ptr.as_ptr().cast()) - mem::size_of::(); + ptr::NonNull::new_unchecked( + ptr::slice_from_raw_parts_mut(ptr.as_ptr(), len) as *mut Self + ) + } + } +} + +unsafe impl SqlTranslatable for &FlatArray<'_, T> +where + T: ?Sized + SqlTranslatable, +{ + fn argument_sql() -> Result { + match T::argument_sql()? { + SqlMapping::As(sql) => Ok(SqlMapping::As(format!("{sql}[]"))), + SqlMapping::Skip => Err(ArgumentError::SkipInArray), + SqlMapping::Composite { .. } => Ok(SqlMapping::Composite { array_brackets: true }), + } + } + + fn return_sql() -> Result { + match T::return_sql()? { + Returns::One(SqlMapping::As(sql)) => { + Ok(Returns::One(SqlMapping::As(format!("{sql}[]")))) + } + Returns::One(SqlMapping::Composite { array_brackets: _ }) => { + Ok(Returns::One(SqlMapping::Composite { array_brackets: true })) + } + Returns::One(SqlMapping::Skip) => Err(ReturnsError::SkipInArray), + Returns::SetOf(_) => Err(ReturnsError::SetOfInArray), + Returns::Table(_) => Err(ReturnsError::TableInArray), + } + } +} + +#[derive(Clone)] +pub struct Dimensions<'arr> { + dims: &'arr [ffi::c_int], +} + +impl<'arr> Dimensions<'arr> { + pub fn len(&self) -> usize { + self.dims.len() + } +} + +/// Iterator for arrays +#[derive(Clone)] +pub struct ArrayIter<'arr, T> +where + T: ?Sized + BorrowDatum, +{ + arr: &'arr FlatArray<'arr, T>, + data: ptr::NonNull, + nulls: Option<&'arr BitSlice>, + nelems: usize, + index: usize, + offset: usize, + align: Align, +} + +impl<'arr, T> Iterator for ArrayIter<'arr, T> +where + T: ?Sized + BorrowDatum, +{ + type Item = Nullable<&'arr T>; + + fn next(&mut self) -> Option> { + if self.index >= self.nelems { + return None; + } + let is_null = match self.nulls { + Some(nulls) => !nulls.get(self.index).unwrap(), + None => false, + }; + // note the index freezes when we reach the end, fusing the iterator + self.index += 1; + + if is_null { + // note that we do NOT offset when the value is a null! + Some(Nullable::Null) + } else { + let borrow = unsafe { T::borrow_unchecked(self.data.add(self.offset)) }; + // As we always have a borrow, we just ask Rust what the array element's size is + self.offset += self.align.pad(mem::size_of_val(borrow)); + Some(Nullable::Valid(borrow)) + } + } +} + +impl<'arr, 'mcx, T> IntoIterator for &'arr FlatArray<'mcx, T> +where + T: ?Sized + BorrowDatum, +{ + type IntoIter = ArrayIter<'arr, T>; + type Item = Nullable<&'arr T>; + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'arr, T> ExactSizeIterator for ArrayIter<'arr, T> where T: ?Sized + BorrowDatum {} +impl<'arr, T> FusedIterator for ArrayIter<'arr, T> where T: ?Sized + BorrowDatum {} + /** An aligned, dereferenceable `NonNull` with low-level accessors. @@ -162,8 +416,7 @@ impl RawArray { } } - /** - A slice of the dimensions. + /** A slice describing the array's dimensions. Oxidized form of [ARR_DIMS(ArrayType*)][ARR_DIMS]. The length will be within 0..=[pg_sys::MAXDIM]. @@ -186,6 +439,7 @@ impl RawArray { } /// The flattened length of the array over every single element. + /// /// Includes all items, even the ones that might be null. /// /// # Panics @@ -226,8 +480,7 @@ impl RawArray { // This field is an "int32" in Postgres } - /** - Equivalent to [ARR_HASNULL(ArrayType*)][ARR_HASNULL]. + /** Equivalent to [ARR_HASNULL(ArrayType*)][ARR_HASNULL]. Note this means that it only asserts that there MIGHT be a null @@ -247,16 +500,26 @@ impl RawArray { } #[inline] - fn nulls_bitptr(&mut self) -> Option> { + fn nulls_bitptr(&self) -> Option> { + let nulls_ptr = unsafe { port::ARR_NULLBITMAP(self.ptr.as_ptr()) }.cast_const(); + match BitPtr::try_from(nulls_ptr) { + Ok(ptr) => Some(ptr), + Err(BitPtrError::Null(_)) => None, + Err(BitPtrError::Misaligned(_)) => unreachable!(), + } + } + + #[inline] + fn nulls_mut_bitptr(&mut self) -> Option> { + let nulls_ptr = unsafe { port::ARR_NULLBITMAP(self.ptr.as_ptr()) }; match BitPtr::try_from(self.nulls_mut_ptr()) { Ok(ptr) => Some(ptr), Err(BitPtrError::Null(_)) => None, - Err(BitPtrError::Misaligned(_)) => unreachable!("impossible to misalign *mut u8"), + Err(BitPtrError::Misaligned(_)) => unreachable!(), } } - /** - Oxidized form of [ARR_NULLBITMAP(ArrayType*)][ARR_NULLBITMAP] + /** Oxidized form of [ARR_NULLBITMAP(ArrayType*)][ARR_NULLBITMAP] If this returns None, the array cannot have nulls. If this returns Some, it points to the bitslice that marks nulls in this array. @@ -275,8 +538,8 @@ impl RawArray { NonNull::new(ptr::slice_from_raw_parts_mut(self.nulls_mut_ptr(), len)) } - /** - The [bitvec] equivalent of [RawArray::nulls]. + /** The [bitvec] equivalent of [RawArray::nulls]. + If this returns `None`, the array cannot have nulls. If this returns `Some`, it points to the bitslice that marks nulls in this array. @@ -288,23 +551,21 @@ impl RawArray { [ARR_NULLBITMAP]: */ pub fn nulls_bitslice(&mut self) -> Option>> { - NonNull::new(bitptr::bitslice_from_raw_parts_mut(self.nulls_bitptr()?, self.len())) + NonNull::new(bitptr::bitslice_from_raw_parts_mut(self.nulls_mut_bitptr()?, self.len())) } - /** - Checks the array for any NULL values by assuming it is a proper varlena array, + /** Checks the array for any NULL values # Safety - * This requires every index is valid to read or correctly marked as null. + */ pub unsafe fn any_nulls(&self) -> bool { // SAFETY: Caller asserted safety conditions. unsafe { pg_sys::array_contains_nulls(self.ptr.as_ptr()) } } - /** - Oxidized form of [ARR_DATA_PTR(ArrayType*)][ARR_DATA_PTR] + /** Oxidized form of [ARR_DATA_PTR(ArrayType*)][ARR_DATA_PTR] # Safety diff --git a/pgrx/src/lib.rs b/pgrx/src/lib.rs index 3359a4dc4..fd22eeebd 100644 --- a/pgrx/src/lib.rs +++ b/pgrx/src/lib.rs @@ -73,6 +73,7 @@ pub mod spi; #[cfg(feature = "cshim")] pub mod spinlock; pub mod stringinfo; +pub mod text; pub mod trigger_support; pub mod tupdesc; pub mod varlena; diff --git a/pgrx/src/text.rs b/pgrx/src/text.rs new file mode 100644 index 000000000..03040af15 --- /dev/null +++ b/pgrx/src/text.rs @@ -0,0 +1,153 @@ +#![deny(unsafe_op_in_unsafe_fn)] +use crate::datum::BorrowDatum; +use crate::layout::PassBy; +use crate::pgrx_sql_entity_graph::metadata::{ + ArgumentError, Returns, ReturnsError, SqlMapping, SqlTranslatable, +}; +use crate::{pg_sys, varlena}; +use alloc::borrow::Cow; +use alloc::string::String; +use core::borrow::Borrow; +use core::ops::{Deref, DerefMut}; +use core::{ptr, str}; + +use bstr::{BStr, ByteSlice}; + +// We reexport these types so people don't have to care whether they're pulled from BStr or std, +// they just use the ones from pgrx::text::* +pub use bstr::{Bytes, Chars}; +pub use core::str::{Utf8Chunks, Utf8Error}; + +/// A Postgres string, AKA `TEXT`. +/// +/// This is a varlena: a reference to a variable-length header followed by a slice of bytes. +#[repr(transparent)] +pub struct Text([u8]); + +/// Data field of a TEXT varlena +/// +/// Usually this will be UTF-8, but this is not always strictly enforced by PostgreSQL. +#[repr(transparent)] +pub struct TextData([u8]); + +impl TextData { + /// Reborrow `&Text as `&BStr` + /// + /// We do not implement Deref to BStr or [u8] because we'd like to expose a more selective API. + /// Several fn that [u8] implements are implemented very differently on str, and we would like + /// the API of Text to "feel like" that of str in most cases. + fn as_bstr(&self) -> &BStr { + self.as_bytes().borrow() + } + + /// Obtain a reference to the Text's data as bytes + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// Obtain a mutable reference to the Text's data as bytes + /// + /// # Safety + /// Like [`str::as_bytes_mut`], this can cause problems if you change Text in a way that + /// your database is not specified to support, so the caller must assure that it remains in + /// a valid encoding for the database. + pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] { + &mut self.0 + } + + /// Iterate over the UTF-8 characters of this Text + /// + /// If the data is not UTF-8, the replacement character � is returned. + pub fn chars(&self) -> Chars<'_> { + self.as_bstr().chars() + } + + /// Iterate over the Text's data as bytes + pub fn bytes(&self) -> Bytes<'_> { + self.as_bstr().bytes() + } + + /// Is the data ASCII? + pub fn is_ascii(&self) -> bool { + self.as_bytes().is_ascii() + } + + /// Is this slice nonzero len? + pub fn is_empty(&self) -> bool { + self.as_bytes().is_empty() + } + + /// Length of the data in bytes + pub fn len(&self) -> usize { + self.as_bytes().len() + } + + /// Obtain a reference to the data if it is a UTF-8 str + pub fn to_str(&self) -> Result<&str, Utf8Error> { + str::from_utf8(self.as_bytes()) + } + + /// You have two cows. Both are UTF-8 data. + /// + /// One is completely UTF-8, but the other is allocated and non-UTF-8 is patched over with �. + pub fn to_str_lossy(&self) -> Cow<'_, str> { + String::from_utf8_lossy(self.as_bytes()) + } + + /// Iterate over the UTF-8 chunks of the Text's data + pub fn utf8_chunks(&self) -> Utf8Chunks { + self.as_bytes().utf8_chunks() + } +} + +impl Text { + /// Length of the entire varlena in bytes + pub fn vl_len(&self) -> usize { + self.0.len() + } +} + +impl Deref for Text { + type Target = TextData; + fn deref(&self) -> &Self::Target { + let self_ptr = self as *const Text as *const pg_sys::varlena; + unsafe { &*varlena_to_text_data(self_ptr.cast_mut()) } + } +} + +impl DerefMut for Text { + fn deref_mut(&mut self) -> &mut Self::Target { + let self_ptr = self as *mut Text as *mut pg_sys::varlena; + unsafe { &mut *varlena_to_text_data(self_ptr) } + } +} + +unsafe fn varlena_to_text_data(vptr: *mut pg_sys::varlena) -> *mut TextData { + unsafe { + let len = varlena::varsize_any_exhdr(vptr); + let data = varlena::vardata_any(vptr).cast_mut(); + + ptr::slice_from_raw_parts_mut(data.cast::(), len) as *mut TextData + } +} + +unsafe impl BorrowDatum for Text { + const PASS: PassBy = PassBy::Ref; + unsafe fn point_from(ptr: ptr::NonNull) -> ptr::NonNull { + unsafe { + let len = varlena::varsize_any(ptr.as_ptr().cast()); + ptr::NonNull::new_unchecked( + ptr::slice_from_raw_parts_mut(ptr.as_ptr(), len) as *mut Text + ) + } + } +} + +unsafe impl<'dat> SqlTranslatable for &'dat Text { + fn argument_sql() -> Result { + Ok(SqlMapping::literal("TEXT")) + } + fn return_sql() -> Result { + Ok(Returns::One(SqlMapping::literal("TEXT"))) + } +}