Skip to content

Commit

Permalink
Create datafusion-functions-array crate and move ArrayToString fu…
Browse files Browse the repository at this point in the history
…nction into it (apache#9113)

* Add `datafusion-functions-array` crate

* Add test for round tripping array_to_string
  • Loading branch information
alamb authored Feb 12, 2024
1 parent 3c2b542 commit 292865e
Show file tree
Hide file tree
Showing 25 changed files with 652 additions and 293 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ jobs:
- name: Check function packages (encoding_expressions)
run: cargo check --no-default-features --features=encoding_expressions -p datafusion

- name: Check function packages (array_expressions)
run: cargo check --no-default-features --features=array_expressions -p datafusion

- name: Check Cargo.lock for datafusion-cli
run: |
# If this test fails, try running `cargo update` in the `datafusion-cli` directory
Expand Down
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

[workspace]
exclude = ["datafusion-cli"]
members = ["datafusion/common", "datafusion/core", "datafusion/expr", "datafusion/execution", "datafusion/functions", "datafusion/optimizer", "datafusion/physical-expr", "datafusion/physical-plan", "datafusion/proto", "datafusion/proto/gen", "datafusion/sql", "datafusion/sqllogictest", "datafusion/substrait", "datafusion/wasmtest", "datafusion-examples", "docs", "test-utils", "benchmarks",
members = ["datafusion/common", "datafusion/core", "datafusion/expr", "datafusion/execution", "datafusion/functions", "datafusion/functions-array", "datafusion/optimizer", "datafusion/physical-expr", "datafusion/physical-plan", "datafusion/proto", "datafusion/proto/gen", "datafusion/sql", "datafusion/sqllogictest", "datafusion/substrait", "datafusion/wasmtest", "datafusion-examples", "docs", "test-utils", "benchmarks",
]
resolver = "2"

Expand Down Expand Up @@ -51,6 +51,7 @@ datafusion-common = { path = "datafusion/common", version = "35.0.0" }
datafusion-execution = { path = "datafusion/execution", version = "35.0.0" }
datafusion-expr = { path = "datafusion/expr", version = "35.0.0" }
datafusion-functions = { path = "datafusion/functions", version = "35.0.0" }
datafusion-functions-array = { path = "datafusion/functions-array", version = "35.0.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "35.0.0" }
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "35.0.0" }
datafusion-physical-plan = { path = "datafusion/physical-plan", version = "35.0.0" }
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ This crate has several [features] which can be specified in your `Cargo.toml`.

Default features:

- `array_expressions`: functions for working with arrays such as `array_to_string`
- `compression`: reading files compressed with `xz2`, `bzip2`, `flate2`, and `zstd`
- `crypto_expressions`: cryptographic functions such as `md5` and `sha256`
- `encoding_expressions`: `encode` and `decode` functions
Expand Down
68 changes: 40 additions & 28 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@ path = "src/lib.rs"

[features]
# Used to enable the avro format
array_expressions = ["datafusion-functions-array"]
avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
backtrace = ["datafusion-common/backtrace"]
compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression", "tokio-util"]
crypto_expressions = ["datafusion-physical-expr/crypto_expressions", "datafusion-optimizer/crypto_expressions"]
default = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions", "compression", "parquet"]
default = ["array_expressions", "crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions", "compression", "parquet"]
encoding_expressions = ["datafusion-functions/encoding_expressions"]
# Used for testing ONLY: causes all values to hash to the same value (test for collisions)
force_hash_collisions = []
Expand All @@ -68,7 +69,8 @@ dashmap = { workspace = true }
datafusion-common = { path = "../common", version = "35.0.0", features = ["object_store"], default-features = false }
datafusion-execution = { workspace = true }
datafusion-expr = { workspace = true }
datafusion-functions = { path = "../functions", version = "35.0.0" }
datafusion-functions = { workspace = true }
datafusion-functions-array = { workspace = true, optional = true }
datafusion-optimizer = { path = "../optimizer", version = "35.0.0", default-features = false }
datafusion-physical-expr = { path = "../physical-expr", version = "35.0.0", default-features = false }
datafusion-physical-plan = { workspace = true }
Expand Down
5 changes: 5 additions & 0 deletions datafusion/core/src/execution/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,11 @@ impl SessionState {
datafusion_functions::register_all(&mut new_self)
.expect("can not register built in functions");

// register crate of array expressions (if enabled)
#[cfg(feature = "array_expressions")]
datafusion_functions_array::register_all(&mut new_self)
.expect("can not register array expressions");

new_self
}
/// Returns new [`SessionState`] using the provided
Expand Down
6 changes: 6 additions & 0 deletions datafusion/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,12 @@ pub mod functions {
pub use datafusion_functions::*;
}

/// re-export of [`datafusion_functions_array`] crate, if "array_expressions" feature is enabled
pub mod functions_array {
#[cfg(feature = "array_expressions")]
pub use datafusion_functions::*;
}

#[cfg(test)]
pub mod test;
pub mod test_util;
Expand Down
2 changes: 2 additions & 0 deletions datafusion/core/src/prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ pub use datafusion_expr::{
Expr,
};
pub use datafusion_functions::expr_fn::*;
#[cfg(feature = "array_expressions")]
pub use datafusion_functions_array::expr_fn::*;

pub use std::ops::Not;
pub use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
Expand Down
30 changes: 29 additions & 1 deletion datafusion/core/tests/dataframe/dataframe_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ use arrow::{
array::{Int32Array, StringArray},
record_batch::RecordBatch,
};
use arrow_array::types::Int32Type;
use arrow_array::ListArray;
use arrow_schema::SchemaRef;
use std::sync::Arc;

Expand All @@ -40,6 +42,7 @@ fn test_schema() -> SchemaRef {
Arc::new(Schema::new(vec![
Field::new("a", DataType::Utf8, false),
Field::new("b", DataType::Int32, false),
Field::new("l", DataType::new_list(DataType::Int32, true), true),
]))
}

Expand All @@ -57,6 +60,12 @@ async fn create_test_table() -> Result<DataFrame> {
"123AbcDef",
])),
Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
Some(vec![Some(0), Some(1), Some(2)]),
None,
Some(vec![Some(3), None, Some(5)]),
Some(vec![Some(6), Some(7)]),
])),
],
)?;

Expand All @@ -67,7 +76,7 @@ async fn create_test_table() -> Result<DataFrame> {
ctx.table("test").await
}

/// Excutes an expression on the test dataframe as a select.
/// Executes an expression on the test dataframe as a select.
/// Compares formatted output of a record batch with an expected
/// vector of strings, using the assert_batch_eq! macro
macro_rules! assert_fn_batches {
Expand Down Expand Up @@ -862,3 +871,22 @@ async fn test_fn_decode() -> Result<()> {

Ok(())
}

#[tokio::test]
async fn test_fn_array_to_string() -> Result<()> {
let expr = array_to_string(col("l"), lit("***"));

let expected = [
"+-------------------------------------+",
"| array_to_string(test.l,Utf8(\"***\")) |",
"+-------------------------------------+",
"| 0***1***2 |",
"| |",
"| 3***5 |",
"| 6***7 |",
"+-------------------------------------+",
];
assert_fn_batches!(expr, expected);

Ok(())
}
13 changes: 0 additions & 13 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,6 @@ pub enum BuiltinScalarFunction {
ArrayReverse,
/// array_slice
ArraySlice,
/// array_to_string
ArrayToString,
/// array_intersect
ArrayIntersect,
/// array_union
Expand Down Expand Up @@ -434,7 +432,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReverse => Volatility::Immutable,
BuiltinScalarFunction::Flatten => Volatility::Immutable,
BuiltinScalarFunction::ArraySlice => Volatility::Immutable,
BuiltinScalarFunction::ArrayToString => Volatility::Immutable,
BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable,
BuiltinScalarFunction::ArrayUnion => Volatility::Immutable,
BuiltinScalarFunction::ArrayResize => Volatility::Immutable,
Expand Down Expand Up @@ -631,7 +628,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReverse => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArraySlice => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayResize => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayToString => Ok(Utf8),
BuiltinScalarFunction::ArrayIntersect => {
match (input_expr_types[0].clone(), input_expr_types[1].clone()) {
(DataType::Null, DataType::Null) | (DataType::Null, _) => {
Expand Down Expand Up @@ -991,9 +987,6 @@ impl BuiltinScalarFunction {
Signature::variadic_any(self.volatility())
}

BuiltinScalarFunction::ArrayToString => {
Signature::variadic_any(self.volatility())
}
BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()),
BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()),
BuiltinScalarFunction::Cardinality => Signature::any(1, self.volatility()),
Expand Down Expand Up @@ -1605,12 +1598,6 @@ impl BuiltinScalarFunction {
}
BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"],
BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"],
BuiltinScalarFunction::ArrayToString => &[
"array_to_string",
"list_to_string",
"array_join",
"list_join",
],
BuiltinScalarFunction::ArrayUnion => &["array_union", "list_union"],
BuiltinScalarFunction::Cardinality => &["cardinality"],
BuiltinScalarFunction::ArrayResize => &["array_resize", "list_resize"],
Expand Down
Loading

0 comments on commit 292865e

Please sign in to comment.