Skip to content

Commit

Permalink
test: reproducer for missing schema metadata on cross join
Browse files Browse the repository at this point in the history
fix: pass thru schema metadata on cross join

fix: preserve metadata when transforming to view types

test: reproducer for missing field metadata in left hand NULL field of union

fix: preserve field metadata from right side of union

chore: safe indexing
  • Loading branch information
wiedld committed Oct 9, 2024
1 parent 2a930f3 commit 3d5938a
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 16 deletions.
18 changes: 8 additions & 10 deletions datafusion/core/src/datasource/file_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,16 +213,14 @@ pub fn transform_schema_to_view(schema: &Schema) -> Schema {
.fields
.iter()
.map(|field| match field.data_type() {
DataType::Utf8 | DataType::LargeUtf8 => Arc::new(Field::new(
field.name(),
DataType::Utf8View,
field.is_nullable(),
)),
DataType::Binary | DataType::LargeBinary => Arc::new(Field::new(
field.name(),
DataType::BinaryView,
field.is_nullable(),
)),
DataType::Utf8 | DataType::LargeUtf8 => Arc::new(
Field::new(field.name(), DataType::Utf8View, field.is_nullable())
.with_metadata(field.metadata().to_owned()),
),
DataType::Binary | DataType::LargeBinary => Arc::new(
Field::new(field.name(), DataType::BinaryView, field.is_nullable())
.with_metadata(field.metadata().to_owned()),
),
_ => field.clone(),
})
.collect();
Expand Down
13 changes: 10 additions & 3 deletions datafusion/physical-plan/src/joins/cross_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,22 @@ impl CrossJoinExec {
/// Create a new [CrossJoinExec].
pub fn new(left: Arc<dyn ExecutionPlan>, right: Arc<dyn ExecutionPlan>) -> Self {
// left then right
let all_columns: Fields = {
let (all_columns, metadata) = {
let left_schema = left.schema();
let right_schema = right.schema();
let left_fields = left_schema.fields().iter();
let right_fields = right_schema.fields().iter();
left_fields.chain(right_fields).cloned().collect()

let mut metadata = left_schema.metadata().clone();
metadata.extend(right_schema.metadata().clone());

(
left_fields.chain(right_fields).cloned().collect::<Fields>(),
metadata,
)
};

let schema = Arc::new(Schema::new(all_columns));
let schema = Arc::new(Schema::new(all_columns).with_metadata(metadata));
let cache = Self::compute_properties(&left, &right, Arc::clone(&schema));
CrossJoinExec {
left,
Expand Down
11 changes: 10 additions & 1 deletion datafusion/physical-plan/src/union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,16 @@ fn union_schema(inputs: &[Arc<dyn ExecutionPlan>]) -> SchemaRef {
.iter()
.filter_map(|input| {
if input.schema().fields().len() > i {
Some(input.schema().field(i).clone())
let field = input.schema().field(i).clone();
let right_hand_metdata = inputs
.get(1)
.map(|right_input| {
right_input.schema().field(i).metadata().clone()
})
.unwrap_or_default();
let mut metadata = field.metadata().clone();
metadata.extend(right_hand_metdata);
Some(field.with_metadata(metadata))
} else {
None
}
Expand Down
8 changes: 7 additions & 1 deletion datafusion/sqllogictest/src/test_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,13 @@ pub async fn register_metadata_tables(ctx: &SessionContext) {
String::from("metadata_key"),
String::from("the name field"),
)]));
let l_name =
Field::new("l_name", DataType::Utf8, true).with_metadata(HashMap::from([(
String::from("metadata_key"),
String::from("the l_name field"),
)]));

let schema = Schema::new(vec![id, name]).with_metadata(HashMap::from([(
let schema = Schema::new(vec![id, name, l_name]).with_metadata(HashMap::from([(
String::from("metadata_key"),
String::from("the entire schema"),
)]));
Expand All @@ -321,6 +326,7 @@ pub async fn register_metadata_tables(ctx: &SessionContext) {
vec![
Arc::new(Int32Array::from(vec![Some(1), None, Some(3)])) as _,
Arc::new(StringArray::from(vec![None, Some("bar"), Some("baz")])) as _,
Arc::new(StringArray::from(vec![None, Some("l_bar"), Some("l_baz")])) as _,
],
)
.unwrap();
Expand Down
31 changes: 30 additions & 1 deletion datafusion/sqllogictest/test_files/metadata.slt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
## with metadata in SQL.

query IT
select * from table_with_metadata;
select id, name from table_with_metadata;
----
1 NULL
NULL bar
Expand Down Expand Up @@ -96,5 +96,34 @@ select count(id) cnt from table_with_metadata group by name order by cnt;
1



# Regression test: missing schema metadata, when aggregate on cross join
query I
SELECT count("data"."id")
FROM
(
SELECT "id" FROM "table_with_metadata"
) as "data",
(
SELECT "id" FROM "table_with_metadata"
) as "samples";
----
6

# Regression test: missing field metadata, from the NULL field on the left side of the union
query ITT
(SELECT id, NULL::string as name, l_name FROM "table_with_metadata")
UNION
(SELECT id, name, NULL::string as l_name FROM "table_with_metadata")
ORDER BY id, name, l_name;
----
1 NULL NULL
3 baz NULL
3 NULL l_baz
NULL bar NULL
NULL NULL l_bar



statement ok
drop table table_with_metadata;

0 comments on commit 3d5938a

Please sign in to comment.