influxdata · alamb · Jul 16, 2024 · Oct 1, 2024 · Oct 4, 2024 · Oct 9, 2024
diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs
@@ -258,6 +258,26 @@ impl Statistics {
         self
     }
 
+    /// Project the statistics to the given column indices.
+    ///
+    /// For example, if we had statistics for columns `{"a", "b", "c"}`,
+    /// projecting to `vec![2, 1]` would return statistics for columns `{"c",
+    /// "b"}`.
+    pub fn project(mut self, projection: Option<&Vec<usize>>) -> Self {
+        let Some(projection) = projection else {
+            return self;
+        };
+
+        // todo: it would be nice to avoid cloning column statistics if
+        // possible (e.g. if the projection did not contain duplicates)
+        self.column_statistics = projection
+            .iter()
+            .map(|&i| self.column_statistics[i].clone())
+            .collect();
+
+        self
+    }
+
     /// Calculates the statistics after `fetch` and `skip` operations apply.
     /// Here, `self` denotes per-partition statistics. Use the `n_partitions`
     /// parameter to compute global statistics in a multi-partition setting.

diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs
@@ -213,16 +213,14 @@ pub fn transform_schema_to_view(schema: &Schema) -> Schema {
         .fields
         .iter()
         .map(|field| match field.data_type() {
-            DataType::Utf8 | DataType::LargeUtf8 => Arc::new(Field::new(
-                field.name(),
-                DataType::Utf8View,
-                field.is_nullable(),
-            )),
-            DataType::Binary | DataType::LargeBinary => Arc::new(Field::new(
-                field.name(),
-                DataType::BinaryView,
-                field.is_nullable(),
-            )),
+            DataType::Utf8 | DataType::LargeUtf8 => Arc::new(
+                Field::new(field.name(), DataType::Utf8View, field.is_nullable())
+                    .with_metadata(field.metadata().to_owned()),
+            ),
+            DataType::Binary | DataType::LargeBinary => Arc::new(
+                Field::new(field.name(), DataType::BinaryView, field.is_nullable())
+                    .with_metadata(field.metadata().to_owned()),
+            ),
             _ => field.clone(),
         })
         .collect();

diff --git a/datafusion/core/src/physical_optimizer/sanity_checker.rs b/datafusion/core/src/physical_optimizer/sanity_checker.rs
@@ -34,6 +34,8 @@ use datafusion_physical_plan::joins::SymmetricHashJoinExec;
 use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
 
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_plan::sorts::sort::SortExec;
+use datafusion_physical_plan::union::UnionExec;
 use itertools::izip;
 
 /// The SanityCheckPlan rule rejects the following query plans:
@@ -125,6 +127,14 @@ pub fn check_plan_sanity(
         plan.required_input_ordering().iter(),
         plan.required_input_distribution().iter()
     ) {
+        // TEMP HACK WORKAROUND https://github.com/apache/datafusion/issues/11492
+        if child.as_any().downcast_ref::<UnionExec>().is_some() {
+            continue;
+        }
+        if child.as_any().downcast_ref::<SortExec>().is_some() {
+            continue;
+        }
+
         let child_eq_props = child.equivalence_properties();
         if let Some(sort_req) = sort_req {
             if !child_eq_props.ordering_satisfy_requirement(sort_req) {

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
@@ -673,7 +673,7 @@ impl DefaultPhysicalPlanner {
                     logical_input_schema.as_ref().clone().into();
 
                 if physical_input_schema != physical_input_schema_from_logical {
-                    return internal_err!("Physical input schema should be the same as the one converted from logical input schema.");
+                    log::warn!("Physical input schema should be the same as the one converted from logical input schema, but did not match for logical plan:\n{}", input.display_indent());
                 }
 
                 let groups = self.create_grouping_physical_expr(

diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
@@ -122,7 +122,7 @@ impl AggregateUDFImpl for Count {
     }
 
     fn is_nullable(&self) -> bool {
-        false
+        true
     }
 
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {

diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs
@@ -21,7 +21,7 @@ use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Timestamp;
 use arrow::datatypes::TimeUnit::Nanosecond;
 
-use datafusion_common::{internal_err, Result, ScalarValue};
+use datafusion_common::{internal_err, ExprSchema, Result, ScalarValue};
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
 
@@ -84,4 +84,8 @@ impl ScalarUDFImpl for NowFunc {
             ScalarValue::TimestampNanosecond(now_ts, Some("+00:00".into())),
         )))
     }
+
+    fn is_nullable(&self, _args: &[Expr], _schema: &dyn ExprSchema) -> bool {
+        false
+    }
 }
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -26,6 +26,7 @@ use crate::aggregates::{
     topk_stream::GroupedTopKAggregateStream,
 };
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
+use crate::projection::get_field_metadata;
 use crate::windows::get_ordered_partition_by_indices;
 use crate::{
     DisplayFormatType, Distribution, ExecutionPlan, InputOrderMode,
@@ -793,14 +794,17 @@ fn create_schema(
 ) -> Result<Schema> {
     let mut fields = Vec::with_capacity(group_expr.len() + aggr_expr.len());
     for (index, (expr, name)) in group_expr.iter().enumerate() {
-        fields.push(Field::new(
-            name,
-            expr.data_type(input_schema)?,
-            // In cases where we have multiple grouping sets, we will use NULL expressions in
-            // order to align the grouping sets. So the field must be nullable even if the underlying
-            // schema field is not.
-            group_expr_nullable[index] || expr.nullable(input_schema)?,
-        ))
+        fields.push(
+            Field::new(
+                name,
+                expr.data_type(input_schema)?,
+                // In cases where we have multiple grouping sets, we will use NULL expressions in
+                // order to align the grouping sets. So the field must be nullable even if the underlying
+                // schema field is not.
+                group_expr_nullable[index] || expr.nullable(input_schema)?,
+            )
+            .with_metadata(get_field_metadata(expr, input_schema).unwrap_or_default()),
+        )
     }
 
     match mode {
@@ -821,7 +825,10 @@ fn create_schema(
         }
     }
 
-    Ok(Schema::new(fields))
+    Ok(Schema::new_with_metadata(
+        fields,
+        input_schema.metadata().clone(),
+    ))
 }
 
 fn group_schema(schema: &Schema, group_count: usize) -> SchemaRef {

diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
@@ -370,7 +370,12 @@ impl ExecutionPlan for FilterExec {
     /// The output statistics of a filtering operation can be estimated if the
     /// predicate's selectivity value can be determined for the incoming data.
     fn statistics(&self) -> Result<Statistics> {
-        Self::statistics_helper(&self.input, self.predicate(), self.default_selectivity)
+        let stats = Self::statistics_helper(
+            &self.input,
+            self.predicate(),
+            self.default_selectivity,
+        )?;
+        Ok(stats.project(self.projection.as_ref()))
     }
 }
 

diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -69,15 +69,22 @@ impl CrossJoinExec {
     /// Create a new [CrossJoinExec].
     pub fn new(left: Arc<dyn ExecutionPlan>, right: Arc<dyn ExecutionPlan>) -> Self {
         // left then right
-        let all_columns: Fields = {
+        let (all_columns, metadata) = {
             let left_schema = left.schema();
             let right_schema = right.schema();
             let left_fields = left_schema.fields().iter();
             let right_fields = right_schema.fields().iter();
-            left_fields.chain(right_fields).cloned().collect()
+
+            let mut metadata = left_schema.metadata().clone();
+            metadata.extend(right_schema.metadata().clone());
+
+            (
+                left_fields.chain(right_fields).cloned().collect::<Fields>(),
+                metadata,
+            )
         };
 
-        let schema = Arc::new(Schema::new(all_columns));
+        let schema = Arc::new(Schema::new(all_columns).with_metadata(metadata));
         let cache = Self::compute_properties(&left, &right, Arc::clone(&schema));
         CrossJoinExec {
             left,

diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
@@ -40,7 +40,7 @@ use datafusion_common::stats::Precision;
 use datafusion_common::Result;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::equivalence::ProjectionMapping;
-use datafusion_physical_expr::expressions::Literal;
+use datafusion_physical_expr::expressions::{CastExpr, Literal};
 
 use futures::stream::{Stream, StreamExt};
 use log::trace;
@@ -237,10 +237,14 @@ impl ExecutionPlan for ProjectionExec {
 
 /// If e is a direct column reference, returns the field level
 /// metadata for that field, if any. Otherwise returns None
-fn get_field_metadata(
+pub(crate) fn get_field_metadata(
     e: &Arc<dyn PhysicalExpr>,
     input_schema: &Schema,
 ) -> Option<HashMap<String, String>> {
+    if let Some(cast) = e.as_any().downcast_ref::<CastExpr>() {
+        return get_field_metadata(cast.expr(), input_schema);
+    }
+
     // Look up field by index in schema (not NAME as there can be more than one
     // column with the same name)
     e.as_any()

diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
@@ -468,16 +468,30 @@ pub fn can_interleave<T: Borrow<Arc<dyn ExecutionPlan>>>(
 }
 
 fn union_schema(inputs: &[Arc<dyn ExecutionPlan>]) -> SchemaRef {
-    let fields: Vec<Field> = (0..inputs[0].schema().fields().len())
+    let fields: Vec<Field> = (0..std::cmp::max(
+        inputs[0].schema().fields().len(),
+        inputs
+            .get(1)
+            .map(|l| l.schema().fields().len())
+            .unwrap_or_default(),
+    ))
         .map(|i| {
             inputs
                 .iter()
-                .filter_map(|input| {
-                    if input.schema().fields().len() > i {
-                        Some(input.schema().field(i).clone())
-                    } else {
-                        None
-                    }
+                .enumerate()
+                .filter_map(|(input_idx, input)| {
+                    let field = input.schema().field(i).clone();
+                    let mut metadata = field.metadata().clone();
+
+                    let other_side_metdata = inputs
+                        .get(input_idx ^ (1 << 0))
+                        .map(|other_input| {
+                            other_input.schema().field(i).metadata().clone()
+                        })
+                        .unwrap_or_default();
+
+                    metadata.extend(other_side_metdata);
+                    Some(field.with_metadata(metadata))
                 })
                 .find_or_first(|f| f.is_nullable())
                 .unwrap()

diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs
@@ -313,8 +313,13 @@ pub async fn register_metadata_tables(ctx: &SessionContext) {
         String::from("metadata_key"),
         String::from("the name field"),
     )]));
+    let l_name =
+        Field::new("l_name", DataType::Utf8, true).with_metadata(HashMap::from([(
+            String::from("metadata_key"),
+            String::from("the l_name field"),
+        )]));
 
-    let schema = Schema::new(vec![id, name]).with_metadata(HashMap::from([(
+    let schema = Schema::new(vec![id, name, l_name]).with_metadata(HashMap::from([(
         String::from("metadata_key"),
         String::from("the entire schema"),
     )]));
@@ -324,6 +329,7 @@ pub async fn register_metadata_tables(ctx: &SessionContext) {
         vec![
             Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])) as _,
             Arc::new(StringArray::from(vec![None, Some("bar"), Some("baz")])) as _,
+            Arc::new(StringArray::from(vec![None, Some("l_bar"), Some("l_baz")])) as _,
         ],
     )
     .unwrap();

diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt
@@ -25,7 +25,7 @@
 ## with metadata in SQL.
 
 query IT
-select * from table_with_metadata;
+select id, name from table_with_metadata;
 ----
 1 NULL
 NULL bar
@@ -58,5 +58,72 @@ WHERE "data"."id" = "samples"."id";
 1
 3
 
+
+
+# Regression test: prevent field metadata loss per https://github.com/apache/datafusion/issues/12687
+query I
+select count(distinct name) from table_with_metadata;
+----
+2
+
+# Regression test: prevent field metadata loss per https://github.com/apache/datafusion/issues/12687
+query I
+select approx_median(distinct id) from table_with_metadata;
+----
+2
+
+# Regression test: prevent field metadata loss per https://github.com/apache/datafusion/issues/12687
+statement ok
+select array_agg(distinct id) from table_with_metadata;
+
+query I
+select distinct id from table_with_metadata order by id;
+----
+1
+3
+NULL
+
+query I
+select count(id) from table_with_metadata;
+----
+2
+
+query I
+select count(id) cnt from table_with_metadata group by name order by cnt;
+----
+0
+1
+1
+
+
+
+# Regression test: missing schema metadata, when aggregate on cross join
+query I
+SELECT count("data"."id")
+FROM
+  (
+   SELECT "id" FROM "table_with_metadata"
+  ) as "data",
+  (
+    SELECT "id" FROM "table_with_metadata"
+  ) as "samples";
+----
+6
+
+# Regression test: missing field metadata, from the NULL field on the left side of the union
+query ITT
+(SELECT id, NULL::string as name, l_name FROM "table_with_metadata")
+  UNION
+(SELECT id, name, NULL::string as l_name FROM "table_with_metadata")
+ORDER BY id, name, l_name;
+----
+1 NULL NULL
+3 baz NULL
+3 NULL l_baz
+NULL bar NULL
+NULL NULL l_bar
+
+
+
 statement ok
 drop table table_with_metadata;
-Original file line number
+Diff line change
@@ Expand Up / @@ -122,7 +122,7 @@ impl AggregateUDFImpl for Count { @@
         }
         fn is_nullable(&self) -> bool {
-            false
+            true
         }
         fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
@@ Expand Down @@