Skip to content

Commit

Permalink
Get rid of the unsafe c_ffi code
Browse files Browse the repository at this point in the history
  • Loading branch information
jleibs authored and teh-cmc committed Aug 31, 2024
1 parent b53150e commit 16a133e
Showing 1 changed file with 51 additions and 73 deletions.
124 changes: 51 additions & 73 deletions rerun_py/src/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
use std::collections::BTreeMap;

use arrow::{
array::{make_array, ArrayData},
pyarrow::PyArrowType,
};
use arrow2::{
array::{Array, ListArray, PrimitiveArray},
datatypes::{DataType, Field},
ffi,
offset::Offsets,
};
use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
ffi::Py_uintptr_t,
exceptions::PyRuntimeError,
types::{PyAnyMethods as _, PyDict, PyDictMethods, PyString},
Bound, PyAny, PyResult,
};
Expand All @@ -22,73 +24,49 @@ use re_sdk::{ComponentName, EntityPath, Timeline};
/// Perform conversion between a pyarrow array to arrow2 types.
///
/// `name` is the name of the Rerun component, and the name of the pyarrow `Field` (column name).
fn array_to_rust(
arrow_array: &Bound<'_, PyAny>,
name: Option<&str>,
) -> PyResult<(Box<dyn Array>, Field)> {
// prepare pointers to receive the Array struct
let array = Box::new(ffi::ArrowArray::empty());
let schema = Box::new(ffi::ArrowSchema::empty());

let array_ptr = &*array as *const ffi::ArrowArray;
let schema_ptr = &*schema as *const ffi::ArrowSchema;

// make the conversion through PyArrow's private API
// this changes the pointer's memory and is thus unsafe. In particular, `_export_to_c` can go out of bounds
arrow_array.call_method1(
"_export_to_c",
(array_ptr as Py_uintptr_t, schema_ptr as Py_uintptr_t),
)?;

#[allow(unsafe_code)]
// SAFETY:
// TODO(jleibs): Convince ourselves that this is safe
// Following pattern from: https://github.com/pola-rs/polars/blob/1c6b7b70e935fe70384fc0d1ca8d07763011d8b8/examples/python_rust_compiled_function/src/ffi.rs
unsafe {
let mut field = ffi::import_field_from_c(schema.as_ref())
.map_err(|err| PyValueError::new_err(format!("Error importing Field: {err}")))?;

// NOTE: Do not carry the extension metadata beyond the FFI barrier in order the match the
// data sent by other SDKs.
//
// We've stopped using datatype extensions overall, as they generally have been creating more
// problems than they have solved.
//
// With the addition of `Chunk` and `ChunkMetadata`, it is likely that we will get rid of extension types
// entirely at some point, since it looks like we won't have any use for them anymore.
//
// Doing so will require a more extensive refactoring of the Python SDK though, so until we're absolutely
// certain where we're going, this is a nice, painless and easily reversible solution.
//
// See <https://github.com/rerun-io/rerun/issues/6606>.
let datatype = if let DataType::List(inner) = field.data_type() {
let Field {
name,
data_type,
is_nullable,
metadata,
} = &**inner;
DataType::List(std::sync::Arc::new(
Field::new(
name.clone(),
data_type.to_logical_type().clone(),
*is_nullable,
)
.with_metadata(metadata.clone()),
))
} else {
field.data_type().to_logical_type().clone()
};

let array = ffi::import_array_from_c(*array, datatype)
.map_err(|err| PyValueError::new_err(format!("Error importing Array: {err}")))?;

if let Some(name) = name {
field.name = name.to_owned();
}

Ok((array, field))
}
fn array_to_rust(arrow_array: &Bound<'_, PyAny>, name: &str) -> PyResult<(Box<dyn Array>, Field)> {
let py_array: PyArrowType<ArrayData> = arrow_array.extract()?;
let arr1_array = make_array(py_array.0);

let data = arr1_array.to_data();

let arr2_array = arrow2::array::from_data(&data);

// NOTE: Do not carry the extension metadata beyond the FFI barrier in order the match the
// data sent by other SDKs.
//
// We've stopped using datatype extensions overall, as they generally have been creating more
// problems than they have solved.
//
// With the addition of `Chunk` and `ChunkMetadata`, it is likely that we will get rid of extension types
// entirely at some point, since it looks like we won't have any use for them anymore.
//
// Doing so will require a more extensive refactoring of the Python SDK though, so until we're absolutely
// certain where we're going, this is a nice, painless and easily reversible solution.
//
// See <https://github.com/rerun-io/rerun/issues/6606>.
let datatype = if let DataType::List(inner) = arr2_array.data_type() {
let Field {
name: child_name,
data_type,
is_nullable,
metadata,
} = &**inner;
DataType::List(std::sync::Arc::new(
Field::new(
child_name.clone(),
data_type.to_logical_type().clone(),
*is_nullable,
)
.with_metadata(metadata.clone()),
))
} else {
arr2_array.data_type().to_logical_type().clone()
};

let field = Field::new(name, datatype.clone(), true);

Ok((arr2_array, field))
}

/// Build a [`PendingRow`] given a '**kwargs'-style dictionary of component arrays.
Expand All @@ -104,7 +82,7 @@ pub fn build_row_from_components(
components.iter().map(|(name, array)| {
let py_name = name.downcast::<PyString>()?;
let name: std::borrow::Cow<'_, str> = py_name.extract()?;
array_to_rust(&array, Some(&name))
array_to_rust(&array, &name)
}),
|iter| iter.unzip(),
)?;
Expand Down Expand Up @@ -136,7 +114,7 @@ pub fn build_chunk_from_components(
timelines.iter().map(|(name, array)| {
let py_name = name.downcast::<PyString>()?;
let name: std::borrow::Cow<'_, str> = py_name.extract()?;
array_to_rust(&array, Some(&name))
array_to_rust(&array, &name)
}),
|iter| iter.unzip(),
)?;
Expand Down Expand Up @@ -178,7 +156,7 @@ pub fn build_chunk_from_components(
components.iter().map(|(name, array)| {
let py_name = name.downcast::<PyString>()?;
let name: std::borrow::Cow<'_, str> = py_name.extract()?;
array_to_rust(&array, Some(&name))
array_to_rust(&array, &name)
}),
|iter| iter.unzip(),
)?;
Expand Down

0 comments on commit 16a133e

Please sign in to comment.