From 2ec8ce8057b58745d147e3124da8501dd927a3e7 Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Mon, 9 Sep 2024 19:45:30 -0400 Subject: [PATCH] Switch ArrowDict to signed and fix static datatype --- crates/store/re_chunk/src/util.rs | 8 ++++---- crates/store/re_chunk_store/src/dataframe.rs | 2 +- crates/store/re_dataframe/src/range.rs | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/store/re_chunk/src/util.rs b/crates/store/re_chunk/src/util.rs index 1cde20e9d6a2..091f06962f6d 100644 --- a/crates/store/re_chunk/src/util.rs +++ b/crates/store/re_chunk/src/util.rs @@ -84,7 +84,7 @@ pub fn arrays_to_list_array( pub fn arrays_to_dictionary( array_datatype: ArrowDatatype, arrays: &[Option<(Idx, &dyn ArrowArray)>], -) -> Option> { +) -> Option> { // Dedupe the input arrays based on the given primary key. let arrays_dense_deduped = arrays .iter() @@ -96,7 +96,7 @@ pub fn arrays_to_dictionary( // Compute the keys for the final dictionary, using that same primary key. let keys = { - let mut cur_key = 0u32; + let mut cur_key = 0i32; arrays .iter() .dedup_by_with_count(|lhs, rhs| { @@ -140,7 +140,7 @@ pub fn arrays_to_dictionary( }; let datatype = ArrowDatatype::Dictionary( - arrow2::datatypes::IntegerType::UInt32, + arrow2::datatypes::IntegerType::Int32, std::sync::Arc::new(data.data_type().clone()), true, // is_sorted ); @@ -149,7 +149,7 @@ pub fn arrays_to_dictionary( // unique values. ArrowDictionaryArray::try_new( datatype, - ArrowPrimitiveArray::::from(keys), + ArrowPrimitiveArray::::from(keys), data.to_boxed(), ) .ok() diff --git a/crates/store/re_chunk_store/src/dataframe.rs b/crates/store/re_chunk_store/src/dataframe.rs index f348667b677f..2b2bc6705703 100644 --- a/crates/store/re_chunk_store/src/dataframe.rs +++ b/crates/store/re_chunk_store/src/dataframe.rs @@ -482,7 +482,7 @@ impl ChunkStore { archetype_name: None, archetype_field_name: None, component_name: *component_name, - datatype: datatype.clone(), + datatype: ArrowListArray::::default_datatype(datatype.clone()), is_static: true, }) }) diff --git a/crates/store/re_dataframe/src/range.rs b/crates/store/re_dataframe/src/range.rs index 3da271784eff..e56258bc01bc 100644 --- a/crates/store/re_dataframe/src/range.rs +++ b/crates/store/re_dataframe/src/range.rs @@ -91,7 +91,7 @@ impl RangeQueryHandle<'_> { .map(|col| match col { ColumnDescriptor::Component(mut descr) => { descr.datatype = ArrowDatatype::Dictionary( - arrow2::datatypes::IntegerType::UInt32, + arrow2::datatypes::IntegerType::Int32, descr.datatype.into(), true, ); @@ -311,7 +311,7 @@ impl RangeQueryHandle<'_> { // see if this ever becomes an issue before going down this road. // // TODO(cmc): Opportunities for parallelization, if it proves to be a net positive in practice. - let dict_arrays: HashMap<&ComponentColumnDescriptor, ArrowDictionaryArray> = { + let dict_arrays: HashMap<&ComponentColumnDescriptor, ArrowDictionaryArray> = { re_tracing::profile_scope!("queries"); columns @@ -612,7 +612,7 @@ mod tests { }) .unwrap() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap() .values() .clone() @@ -639,7 +639,7 @@ mod tests { }) .unwrap() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap() .values() .clone()