Skip to content

Commit

Permalink
Rename functions-array to functions-nested (apache#11602)
Browse files Browse the repository at this point in the history
* rename create to function-nested

* rename array_expressions to nested_expression

* rename doc and workflow

* cargo fmt

* update lock

* Update readme

* rename the missing parts

* rename the planner

* add backward compatibility
  • Loading branch information
goldmedal authored Jul 24, 2024
1 parent 72c6491 commit 1e06b91
Show file tree
Hide file tree
Showing 48 changed files with 83 additions and 73 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ jobs:
# Ensure that the datafusion crate can be built with only a subset of the function
# packages enabled.
- name: Check datafusion (array_expressions)
run: cargo check --no-default-features --features=array_expressions -p datafusion
- name: Check datafusion (nested_expressions)
run: cargo check --no-default-features --features=nested_expressions -p datafusion

- name: Check datafusion (crypto)
run: cargo check --no-default-features --features=crypto_expressions -p datafusion
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ members = [
"datafusion/execution",
"datafusion/functions-aggregate",
"datafusion/functions",
"datafusion/functions-array",
"datafusion/functions-nested",
"datafusion/optimizer",
"datafusion/physical-expr-common",
"datafusion/physical-expr",
Expand Down Expand Up @@ -94,7 +94,7 @@ datafusion-execution = { path = "datafusion/execution", version = "40.0.0" }
datafusion-expr = { path = "datafusion/expr", version = "40.0.0" }
datafusion-functions = { path = "datafusion/functions", version = "40.0.0" }
datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "40.0.0" }
datafusion-functions-array = { path = "datafusion/functions-array", version = "40.0.0" }
datafusion-functions-nested = { path = "datafusion/functions-nested", version = "40.0.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "40.0.0", default-features = false }
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "40.0.0", default-features = false }
datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "40.0.0", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ This crate has several [features] which can be specified in your `Cargo.toml`.

Default features:

- `array_expressions`: functions for working with arrays such as `array_to_string`
- `nested_expressions`: functions for working with nested type function such as `array_to_string`
- `compression`: reading files compressed with `xz2`, `bzip2`, `flate2`, and `zstd`
- `crypto_expressions`: cryptographic functions such as `md5` and `sha256`
- `datetime_expressions`: date and time functions such as `to_timestamp`
Expand Down
4 changes: 2 additions & 2 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,17 @@ name = "datafusion"
path = "src/lib.rs"

[features]
nested_expressions = ["datafusion-functions-nested"]
# This feature is deprecated. Use the `nested_expressions` feature instead.
array_expressions = ["nested_expressions"]
# Used to enable the avro format
array_expressions = ["datafusion-functions-array"]
avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
backtrace = ["datafusion-common/backtrace"]
compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression", "tokio-util"]
crypto_expressions = ["datafusion-functions/crypto_expressions"]
datetime_expressions = ["datafusion-functions/datetime_expressions"]
default = [
"array_expressions",
"nested_expressions",
"crypto_expressions",
"datetime_expressions",
"encoding_expressions",
Expand Down Expand Up @@ -102,7 +104,7 @@ datafusion-execution = { workspace = true }
datafusion-expr = { workspace = true }
datafusion-functions = { workspace = true }
datafusion-functions-aggregate = { workspace = true }
datafusion-functions-array = { workspace = true, optional = true }
datafusion-functions-nested = { workspace = true, optional = true }
datafusion-optimizer = { workspace = true }
datafusion-physical-expr = { workspace = true }
datafusion-physical-expr-common = { workspace = true }
Expand Down Expand Up @@ -221,4 +223,4 @@ name = "parquet_statistic"
[[bench]]
harness = false
name = "map_query_sql"
required-features = ["array_expressions"]
required-features = ["nested_expressions"]
2 changes: 1 addition & 1 deletion datafusion/core/benches/map_query_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use tokio::runtime::Runtime;
use datafusion::prelude::SessionContext;
use datafusion_common::ScalarValue;
use datafusion_expr::Expr;
use datafusion_functions_array::map::map;
use datafusion_functions_nested::map::map;

mod data_utils;

Expand Down
23 changes: 12 additions & 11 deletions datafusion/core/src/execution/session_state_defaults.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ use crate::datasource::file_format::parquet::ParquetFormatFactory;
use crate::datasource::file_format::FileFormatFactory;
use crate::datasource::provider::{DefaultTableFactory, TableProviderFactory};
use crate::execution::context::SessionState;
#[cfg(feature = "array_expressions")]
use crate::functions_array;
#[cfg(feature = "nested_expressions")]
use crate::functions_nested;
use crate::{functions, functions_aggregate};
use datafusion_execution::config::SessionConfig;
use datafusion_execution::object_store::ObjectStoreUrl;
Expand Down Expand Up @@ -82,11 +82,11 @@ impl SessionStateDefaults {
pub fn default_expr_planners() -> Vec<Arc<dyn ExprPlanner>> {
let expr_planners: Vec<Arc<dyn ExprPlanner>> = vec![
Arc::new(functions::core::planner::CoreFunctionPlanner::default()),
// register crate of array expressions (if enabled)
#[cfg(feature = "array_expressions")]
Arc::new(functions_array::planner::ArrayFunctionPlanner),
#[cfg(feature = "array_expressions")]
Arc::new(functions_array::planner::FieldAccessPlanner),
// register crate of nested expressions (if enabled)
#[cfg(feature = "nested_expressions")]
Arc::new(functions_nested::planner::NestedFunctionPlanner),
#[cfg(feature = "nested_expressions")]
Arc::new(functions_nested::planner::FieldAccessPlanner),
#[cfg(any(
feature = "datetime_expressions",
feature = "unicode_expressions"
Expand All @@ -100,8 +100,8 @@ impl SessionStateDefaults {
/// returns the list of default [`ScalarUDF']'s
pub fn default_scalar_functions() -> Vec<Arc<ScalarUDF>> {
let mut functions: Vec<Arc<ScalarUDF>> = functions::all_default_functions();
#[cfg(feature = "array_expressions")]
functions.append(&mut functions_array::all_default_array_functions());
#[cfg(feature = "nested_expressions")]
functions.append(&mut functions_nested::all_default_nested_functions());

functions
}
Expand Down Expand Up @@ -140,8 +140,9 @@ impl SessionStateDefaults {
/// registers all the builtin array functions
pub fn register_array_functions(state: &mut SessionState) {
// register crate of array expressions (if enabled)
#[cfg(feature = "array_expressions")]
functions_array::register_all(state).expect("can not register array expressions");
#[cfg(feature = "nested_expressions")]
functions_nested::register_all(state)
.expect("can not register nested expressions");
}

/// registers all the builtin aggregate functions
Expand Down
15 changes: 11 additions & 4 deletions datafusion/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@
//! * [datafusion_execution]: State and structures needed for execution
//! * [datafusion_expr]: [`LogicalPlan`], [`Expr`] and related logical planning structure
//! * [datafusion_functions]: Scalar function packages
//! * [datafusion_functions_array]: Scalar function packages for `ARRAY`s
//! * [datafusion_functions_nested]: Scalar function packages for `ARRAY`s, `MAP`s and `STRUCT`s
//! * [datafusion_optimizer]: [`OptimizerRule`]s and [`AnalyzerRule`]s
//! * [datafusion_physical_expr]: [`PhysicalExpr`] and related expressions
//! * [datafusion_physical_plan]: [`ExecutionPlan`] and related expressions
Expand Down Expand Up @@ -569,10 +569,17 @@ pub mod functions {
pub use datafusion_functions::*;
}

/// re-export of [`datafusion_functions_array`] crate, if "array_expressions" feature is enabled
/// re-export of [`datafusion_functions_nested`] crate, if "nested_expressions" feature is enabled
pub mod functions_nested {
#[cfg(feature = "nested_expressions")]
pub use datafusion_functions_nested::*;
}

/// re-export of [`datafusion_functions_nested`] crate as [`functions_array`] for backward compatibility, if "nested_expressions" feature is enabled
#[deprecated(since = "41.0.0", note = "use datafusion-functions-nested instead")]
pub mod functions_array {
#[cfg(feature = "array_expressions")]
pub use datafusion_functions_array::*;
#[cfg(feature = "nested_expressions")]
pub use datafusion_functions_nested::*;
}

/// re-export of [`datafusion_functions_aggregate`] crate
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/src/prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ pub use datafusion_expr::{
Expr,
};
pub use datafusion_functions::expr_fn::*;
#[cfg(feature = "array_expressions")]
pub use datafusion_functions_array::expr_fn::*;
#[cfg(feature = "nested_expressions")]
pub use datafusion_functions_nested::expr_fn::*;

pub use std::ops::Not;
pub use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/dataframe/dataframe_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use datafusion_common::{DFSchema, ScalarValue};
use datafusion_expr::expr::Alias;
use datafusion_expr::ExprSchemable;
use datafusion_functions_aggregate::expr_fn::{approx_median, approx_percentile_cont};
use datafusion_functions_array::map::map;
use datafusion_functions_nested::map::map;

fn test_schema() -> SchemaRef {
Arc::new(Schema::new(vec![
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/tests/expr_api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use datafusion_expr::AggregateExt;
use datafusion_functions::core::expr_ext::FieldAccessor;
use datafusion_functions_aggregate::first_last::first_value_udaf;
use datafusion_functions_aggregate::sum::sum_udaf;
use datafusion_functions_array::expr_ext::{IndexAccessor, SliceAccessor};
use datafusion_functions_nested::expr_ext::{IndexAccessor, SliceAccessor};
use sqlparser::ast::NullTreatment;
/// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan
use std::sync::{Arc, OnceLock};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use datafusion_expr::{
LogicalPlanBuilder, OperateFunctionArg, ScalarUDF, ScalarUDFImpl, Signature,
Volatility,
};
use datafusion_functions_array::range::range_udf;
use datafusion_functions_nested::range::range_udf;

/// test that casting happens on udfs.
/// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/expr_rewriter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub use order_by::rewrite_sort_cols_by_aggs;
///
/// For example, concatenating arrays `a || b` is represented as
/// `Operator::ArrowAt`, but can be implemented by calling a function
/// `array_concat` from the `functions-array` crate.
/// `array_concat` from the `functions-nested` crate.
// This is not used in datafusion internally, but it is still helpful for downstream project so don't remove it.
pub trait FunctionRewrite {
/// Return a human readable name for this rewrite
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
# under the License.

[package]
name = "datafusion-functions-array"
description = "Array Function packages for the DataFusion query engine"
name = "datafusion-functions-nested"
description = "Nested Type Function packages for the DataFusion query engine"
keywords = ["datafusion", "logical", "plan", "expressions"]
readme = "README.md"
version = { workspace = true }
Expand All @@ -34,7 +34,7 @@ workspace = true
[features]

[lib]
name = "datafusion_functions_array"
name = "datafusion_functions_nested"
path = "src/lib.rs"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
under the License.
-->

# DataFusion Array Function Library
# DataFusion Nested Type Function Library

[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.

This crate contains functions for working with arrays, such as `array_append` that work with
This crate contains functions for working with arrays, maps and structs, such as `array_append` that work with
`ListArray`, `LargeListArray` and `FixedListArray` types from the `arrow` crate.

[df]: https://crates.io/crates/datafusion
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ extern crate arrow;

use crate::criterion::Criterion;
use datafusion_expr::lit;
use datafusion_functions_array::expr_fn::{array_replace_all, make_array};
use datafusion_functions_nested::expr_fn::{array_replace_all, make_array};

fn criterion_benchmark(c: &mut Criterion) {
// Construct large arrays for benchmarking
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ use std::sync::Arc;
use datafusion_common::ScalarValue;
use datafusion_expr::planner::ExprPlanner;
use datafusion_expr::{ColumnarValue, Expr};
use datafusion_functions_array::map::map_udf;
use datafusion_functions_array::planner::ArrayFunctionPlanner;
use datafusion_functions_nested::map::map_udf;
use datafusion_functions_nested::planner::NestedFunctionPlanner;

fn keys(rng: &mut ThreadRng) -> Vec<String> {
let mut keys = vec![];
Expand Down Expand Up @@ -58,7 +58,7 @@ fn criterion_benchmark(c: &mut Criterion) {
buffer.push(Expr::Literal(ScalarValue::Int32(Some(values[i]))));
}

let planner = ArrayFunctionPlanner {};
let planner = NestedFunctionPlanner {};

b.iter(|| {
black_box(
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use crate::extract::{array_element, array_slice};
///
/// ```
/// # use datafusion_expr::{lit, col, Expr};
/// # use datafusion_functions_array::expr_ext::IndexAccessor;
/// # use datafusion_functions_nested::expr_ext::IndexAccessor;
/// let expr = col("c1")
/// .index(lit(3));
/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]");
Expand Down Expand Up @@ -65,7 +65,7 @@ impl IndexAccessor for Expr {
///
/// ```
/// # use datafusion_expr::{lit, col};
/// # use datafusion_functions_array::expr_ext::SliceAccessor;
/// # use datafusion_functions_nested::expr_ext::SliceAccessor;
/// let expr = col("c1")
/// .range(lit(2), lit(4));
/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4)]");
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
#![deny(clippy::clone_on_ref_ptr)]

//! Array Functions for [DataFusion].
//! Nested type Functions for [DataFusion].
//!
//! This crate contains a collection of array functions implemented using the
//! This crate contains a collection of nested type functions implemented using the
//! extension API.
//!
//! [DataFusion]: https://crates.io/crates/datafusion
Expand Down Expand Up @@ -102,8 +102,8 @@ pub mod expr_fn {
pub use super::string::string_to_array;
}

/// Return all default array functions
pub fn all_default_array_functions() -> Vec<Arc<ScalarUDF>> {
/// Return all default nested type functions
pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
vec![
string::array_to_string_udf(),
string::string_to_array_udf(),
Expand Down Expand Up @@ -148,7 +148,7 @@ pub fn all_default_array_functions() -> Vec<Arc<ScalarUDF>> {

/// Registers all enabled packages with a [`FunctionRegistry`]
pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
let functions: Vec<Arc<ScalarUDF>> = all_default_array_functions();
let functions: Vec<Arc<ScalarUDF>> = all_default_nested_functions();
functions.into_iter().try_for_each(|udf| {
let existing_udf = registry.register_udf(udf)?;
if let Some(existing_udf) = existing_udf {
Expand All @@ -162,14 +162,14 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {

#[cfg(test)]
mod tests {
use crate::all_default_array_functions;
use crate::all_default_nested_functions;
use datafusion_common::Result;
use std::collections::HashSet;

#[test]
fn test_no_duplicate_name() -> Result<()> {
let mut names = HashSet::new();
for func in all_default_array_functions() {
for func in all_default_nested_functions() {
assert!(
names.insert(func.name().to_string().to_lowercase()),
"duplicate function name: {}",
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//! SQL planning extensions like [`ArrayFunctionPlanner`] and [`FieldAccessPlanner`]
//! SQL planning extensions like [`NestedFunctionPlanner`] and [`FieldAccessPlanner`]
use datafusion_common::{exec_err, utils::list_ndims, DFSchema, Result};
use datafusion_expr::expr::ScalarFunction;
Expand All @@ -35,9 +35,9 @@ use crate::{
make_array::make_array,
};

pub struct ArrayFunctionPlanner;
pub struct NestedFunctionPlanner;

impl ExprPlanner for ArrayFunctionPlanner {
impl ExprPlanner for NestedFunctionPlanner {
fn plan_binary_op(
&self,
expr: RawBinaryExpr,
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion datafusion/proto/tests/cases/roundtrip_logical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use datafusion::functions_aggregate::expr_fn::{
count_distinct, covar_pop, covar_samp, first_value, grouping, median, stddev,
stddev_pop, sum, var_pop, var_sample,
};
use datafusion::functions_array::map::map;
use datafusion::functions_nested::map::map;
use datafusion::prelude::*;
use datafusion::test_util::{TestTableFactory, TestTableProvider};
use datafusion_common::config::TableOptions;
Expand Down
2 changes: 1 addition & 1 deletion dev/release/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ dot -Tsvg dev/release/crate-deps.dot > dev/release/crate-deps.svg
(cd datafusion/functions-aggregate && cargo publish)
(cd datafusion/physical-expr && cargo publish)
(cd datafusion/functions && cargo publish)
(cd datafusion/functions-array && cargo publish)
(cd datafusion/functions-nested && cargo publish)
(cd datafusion/sql && cargo publish)
(cd datafusion/optimizer && cargo publish)
(cd datafusion/common-runtime && cargo publish)
Expand Down
Loading

0 comments on commit 1e06b91

Please sign in to comment.