Skip to content

Commit

Permalink
Merge branch 'main' into issue-9961
Browse files Browse the repository at this point in the history
  • Loading branch information
Ted-Jiang authored Apr 9, 2024
2 parents 5ecdc5c + 78f8ef1 commit 0d91757
Show file tree
Hide file tree
Showing 82 changed files with 3,210 additions and 1,870 deletions.
29 changes: 13 additions & 16 deletions datafusion/common/src/dfschema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ impl DFSchema {
&self,
qualifier: Option<&TableReference>,
name: &str,
) -> Result<Option<usize>> {
) -> Option<usize> {
let mut matches = self
.iter()
.enumerate()
Expand All @@ -345,19 +345,19 @@ impl DFSchema {
(None, Some(_)) | (None, None) => f.name() == name,
})
.map(|(idx, _)| idx);
Ok(matches.next())
matches.next()
}

/// Find the index of the column with the given qualifier and name
pub fn index_of_column(&self, col: &Column) -> Result<usize> {
self.index_of_column_by_name(col.relation.as_ref(), &col.name)?
self.index_of_column_by_name(col.relation.as_ref(), &col.name)
.ok_or_else(|| field_not_found(col.relation.clone(), &col.name, self))
}

/// Check if the column is in the current schema
pub fn is_column_from_schema(&self, col: &Column) -> Result<bool> {
pub fn is_column_from_schema(&self, col: &Column) -> bool {
self.index_of_column_by_name(col.relation.as_ref(), &col.name)
.map(|idx| idx.is_some())
.is_some()
}

/// Find the field with the given name
Expand All @@ -381,7 +381,7 @@ impl DFSchema {
) -> Result<(Option<&TableReference>, &Field)> {
if let Some(qualifier) = qualifier {
let idx = self
.index_of_column_by_name(Some(qualifier), name)?
.index_of_column_by_name(Some(qualifier), name)
.ok_or_else(|| field_not_found(Some(qualifier.clone()), name, self))?;
Ok((self.field_qualifiers[idx].as_ref(), self.field(idx)))
} else {
Expand Down Expand Up @@ -519,7 +519,7 @@ impl DFSchema {
name: &str,
) -> Result<&Field> {
let idx = self
.index_of_column_by_name(Some(qualifier), name)?
.index_of_column_by_name(Some(qualifier), name)
.ok_or_else(|| field_not_found(Some(qualifier.clone()), name, self))?;

Ok(self.field(idx))
Expand Down Expand Up @@ -1190,11 +1190,8 @@ mod tests {
.to_string(),
expected_help
);
assert!(schema.index_of_column_by_name(None, "y").unwrap().is_none());
assert!(schema
.index_of_column_by_name(None, "t1.c0")
.unwrap()
.is_none());
assert!(schema.index_of_column_by_name(None, "y").is_none());
assert!(schema.index_of_column_by_name(None, "t1.c0").is_none());

Ok(())
}
Expand Down Expand Up @@ -1284,28 +1281,28 @@ mod tests {
{
let col = Column::from_qualified_name("t1.c0");
let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
assert!(schema.is_column_from_schema(&col)?);
assert!(schema.is_column_from_schema(&col));
}

// qualified not exists
{
let col = Column::from_qualified_name("t1.c2");
let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
assert!(!schema.is_column_from_schema(&col)?);
assert!(!schema.is_column_from_schema(&col));
}

// unqualified exists
{
let col = Column::from_name("c0");
let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
assert!(schema.is_column_from_schema(&col)?);
assert!(schema.is_column_from_schema(&col));
}

// unqualified not exists
{
let col = Column::from_name("c2");
let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
assert!(!schema.is_column_from_schema(&col)?);
assert!(!schema.is_column_from_schema(&col));
}

Ok(())
Expand Down
3 changes: 1 addition & 2 deletions datafusion/common/src/tree_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,9 @@ use crate::Result;
/// These macros are used to determine continuation during transforming traversals.
macro_rules! handle_transform_recursion {
($F_DOWN:expr, $F_CHILD:expr, $F_UP:expr) => {{
#[allow(clippy::redundant_closure_call)]
$F_DOWN?
.transform_children(|n| n.map_children($F_CHILD))?
.transform_parent(|n| $F_UP(n))
.transform_parent($F_UP)
}};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,12 +232,8 @@ impl PruningStatistics for BloomFilterStatistics {
ScalarValue::Float32(Some(v)) => sbbf.check(v),
ScalarValue::Int64(Some(v)) => sbbf.check(v),
ScalarValue::Int32(Some(v)) => sbbf.check(v),
ScalarValue::Int16(Some(v)) => sbbf.check(v),
ScalarValue::Int8(Some(v)) => sbbf.check(v),
ScalarValue::UInt64(Some(v)) => sbbf.check(v),
ScalarValue::UInt32(Some(v)) => sbbf.check(v),
ScalarValue::UInt16(Some(v)) => sbbf.check(v),
ScalarValue::UInt8(Some(v)) => sbbf.check(v),
ScalarValue::Decimal128(Some(v), p, s) => match parquet_type {
Type::INT32 => {
//https://github.com/apache/parquet-format/blob/eb4b31c1d64a01088d02a2f9aefc6c17c54cc6fc/Encodings.md?plain=1#L35-L42
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/src/execution/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ use datafusion_common::{
alias::AliasGenerator,
config::{ConfigExtension, TableOptions},
exec_err, not_impl_err, plan_datafusion_err, plan_err,
tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor},
tree_node::{TreeNodeRecursion, TreeNodeVisitor},
SchemaReference, TableReference,
};
use datafusion_execution::registry::SerializerRegistry;
Expand Down Expand Up @@ -2298,7 +2298,7 @@ impl SQLOptions {
/// Return an error if the [`LogicalPlan`] has any nodes that are
/// incompatible with this [`SQLOptions`].
pub fn verify_plan(&self, plan: &LogicalPlan) -> Result<()> {
plan.visit(&mut BadPlanVisitor::new(self))?;
plan.visit_with_subqueries(&mut BadPlanVisitor::new(self))?;
Ok(())
}
}
Expand Down
54 changes: 26 additions & 28 deletions datafusion/core/tests/parquet/row_group_pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ async fn prune_disabled() {
// https://github.com/apache/arrow-datafusion/issues/9779 bug so that tests pass
// if and only if Bloom filters on Int8 and Int16 columns are still buggy.
macro_rules! int_tests {
($bits:expr, correct_bloom_filters: $correct_bloom_filters:expr) => {
($bits:expr) => {
paste::item! {
#[tokio::test]
async fn [<prune_int $bits _lt >]() {
Expand Down Expand Up @@ -329,9 +329,9 @@ macro_rules! int_tests {
.with_expected_errors(Some(0))
.with_matched_by_stats(Some(1))
.with_pruned_by_stats(Some(3))
.with_matched_by_bloom_filter(Some(if $correct_bloom_filters { 1 } else { 0 }))
.with_pruned_by_bloom_filter(Some(if $correct_bloom_filters { 0 } else { 1 }))
.with_expected_rows(if $correct_bloom_filters { 1 } else { 0 })
.with_matched_by_bloom_filter(Some(1))
.with_pruned_by_bloom_filter(Some(0))
.with_expected_rows(1)
.test_row_group_prune()
.await;
}
Expand All @@ -343,9 +343,9 @@ macro_rules! int_tests {
.with_expected_errors(Some(0))
.with_matched_by_stats(Some(1))
.with_pruned_by_stats(Some(3))
.with_matched_by_bloom_filter(Some(if $correct_bloom_filters { 1 } else { 0 }))
.with_pruned_by_bloom_filter(Some(if $correct_bloom_filters { 0 } else { 1 }))
.with_expected_rows(if $correct_bloom_filters { 1 } else { 0 })
.with_matched_by_bloom_filter(Some(1))
.with_pruned_by_bloom_filter(Some(0))
.with_expected_rows(1)
.test_row_group_prune()
.await;
}
Expand Down Expand Up @@ -404,9 +404,9 @@ macro_rules! int_tests {
.with_expected_errors(Some(0))
.with_matched_by_stats(Some(1))
.with_pruned_by_stats(Some(3))
.with_matched_by_bloom_filter(Some(if $correct_bloom_filters { 1 } else { 0 }))
.with_pruned_by_bloom_filter(Some(if $correct_bloom_filters { 0 } else { 1 }))
.with_expected_rows(if $correct_bloom_filters { 1 } else { 0 })
.with_matched_by_bloom_filter(Some(1))
.with_pruned_by_bloom_filter(Some(0))
.with_expected_rows(1)
.test_row_group_prune()
.await;
}
Expand Down Expand Up @@ -447,17 +447,16 @@ macro_rules! int_tests {
};
}

int_tests!(8, correct_bloom_filters: false);
int_tests!(16, correct_bloom_filters: false);
int_tests!(32, correct_bloom_filters: true);
int_tests!(64, correct_bloom_filters: true);
// int8/int16 are incorrect: https://github.com/apache/arrow-datafusion/issues/9779
int_tests!(32);
int_tests!(64);

// $bits: number of bits of the integer to test (8, 16, 32, 64)
// $correct_bloom_filters: if false, replicates the
// https://github.com/apache/arrow-datafusion/issues/9779 bug so that tests pass
// if and only if Bloom filters on UInt8 and UInt16 columns are still buggy.
macro_rules! uint_tests {
($bits:expr, correct_bloom_filters: $correct_bloom_filters:expr) => {
($bits:expr) => {
paste::item! {
#[tokio::test]
async fn [<prune_uint $bits _lt >]() {
Expand All @@ -482,9 +481,9 @@ macro_rules! uint_tests {
.with_expected_errors(Some(0))
.with_matched_by_stats(Some(1))
.with_pruned_by_stats(Some(3))
.with_matched_by_bloom_filter(Some(if $correct_bloom_filters { 1 } else { 0 }))
.with_pruned_by_bloom_filter(Some(if $correct_bloom_filters { 0 } else { 1 }))
.with_expected_rows(if $correct_bloom_filters { 1 } else { 0 })
.with_matched_by_bloom_filter(Some(1))
.with_pruned_by_bloom_filter(Some(0))
.with_expected_rows(1)
.test_row_group_prune()
.await;
}
Expand All @@ -496,9 +495,9 @@ macro_rules! uint_tests {
.with_expected_errors(Some(0))
.with_matched_by_stats(Some(1))
.with_pruned_by_stats(Some(3))
.with_matched_by_bloom_filter(Some(if $correct_bloom_filters { 1 } else { 0 }))
.with_pruned_by_bloom_filter(Some(if $correct_bloom_filters { 0 } else { 1 }))
.with_expected_rows(if $correct_bloom_filters { 1 } else { 0 })
.with_matched_by_bloom_filter(Some(1))
.with_pruned_by_bloom_filter(Some(0))
.with_expected_rows(1)
.test_row_group_prune()
.await;
}
Expand Down Expand Up @@ -542,9 +541,9 @@ macro_rules! uint_tests {
.with_expected_errors(Some(0))
.with_matched_by_stats(Some(1))
.with_pruned_by_stats(Some(3))
.with_matched_by_bloom_filter(Some(if $correct_bloom_filters { 1 } else { 0 }))
.with_pruned_by_bloom_filter(Some(if $correct_bloom_filters { 0 } else { 1 }))
.with_expected_rows(if $correct_bloom_filters { 1 } else { 0 })
.with_matched_by_bloom_filter(Some(1))
.with_pruned_by_bloom_filter(Some(0))
.with_expected_rows(1)
.test_row_group_prune()
.await;
}
Expand Down Expand Up @@ -585,10 +584,9 @@ macro_rules! uint_tests {
};
}

uint_tests!(8, correct_bloom_filters: false);
uint_tests!(16, correct_bloom_filters: false);
uint_tests!(32, correct_bloom_filters: true);
uint_tests!(64, correct_bloom_filters: true);
// uint8/uint16 are incorrect: https://github.com/apache/arrow-datafusion/issues/9779
uint_tests!(32);
uint_tests!(64);

#[tokio::test]
async fn prune_int32_eq_large_in_list() {
Expand Down
Loading

0 comments on commit 0d91757

Please sign in to comment.