Skip to content

Commit

Permalink
fix: nullability semantics of predicates in JoinRel and FilterRel (#52)
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrobbel authored Sep 20, 2022
1 parent 6c838d9 commit add22d9
Show file tree
Hide file tree
Showing 10 changed files with 272 additions and 142 deletions.
1 change: 1 addition & 0 deletions rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ use strum::IntoEnumIterator;
// Aliases for common types used on the crate interface.
pub use input::config::glob::Pattern;
pub use input::config::Config;
pub use output::comment::Comment;
pub use output::diagnostic::Classification;
pub use output::diagnostic::Diagnostic;
pub use output::diagnostic::Level;
Expand Down
2 changes: 1 addition & 1 deletion rs/src/parse/expressions/literals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ fn parse_decimal(
if val >= range || val <= -range {
Err(cause!(
ExpressionIllegalLiteralValue,
"decimal value is out of range for specificied precision and scale"
"decimal value is out of range for specified precision and scale"
))
} else {
Literal::new_compound(
Expand Down
9 changes: 2 additions & 7 deletions rs/src/parse/expressions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ use crate::util;
use crate::util::string::Describe;

/// Description of an expression.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, Default, PartialEq)]
pub enum Expression {
/// Used for unknown expression types.
#[default]
Unresolved,

/// Used for literals.
Expand All @@ -44,12 +45,6 @@ pub enum Expression {
Cast(data::Type, Box<Expression>),
}

impl Default for Expression {
fn default() -> Self {
Expression::Unresolved
}
}

impl From<literals::Literal> for Expression {
fn from(l: literals::Literal) -> Self {
Expression::Literal(l)
Expand Down
10 changes: 3 additions & 7 deletions rs/src/parse/relations/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,10 @@ pub fn parse_filter_rel(
describe!(y, Relation, "Filter by {}", &predicate);
summary!(
y,
"This relation discards all rows for which {} yields false.",
&predicate
"This relation discards all rows for which the expression {} yields {}.",
&predicate,
if nullable { "false or null" } else { "false" }
);
if nullable {
// FIXME: what's the behavior when a filter condition is nullable and
// yields null? Same applies for all other usages of parse_predicate().
summary!(y, "Behavior for a null condition is unspecified.");
}

// Handle the common field.
handle_rel_common!(x, y);
Expand Down
129 changes: 71 additions & 58 deletions rs/src/parse/relations/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,9 @@ pub fn parse_join_rel(x: &substrait::JoinRel, y: &mut context::Context) -> diagn
}

// Parse join expression.
let join_expression =
proto_boxed_required_field!(x, y, expression, expressions::parse_predicate)
.1
.unwrap_or_default();
let (join_expression_node, opt_join_expression) =
proto_boxed_required_field!(x, y, expression, expressions::parse_predicate);
let join_expression = opt_join_expression.unwrap_or_default();

// Parse join type.
let join_type = proto_required_enum_field!(x, y, r#type, JoinType)
Expand Down Expand Up @@ -84,7 +83,7 @@ pub fn parse_join_rel(x: &substrait::JoinRel, y: &mut context::Context) -> diagn

// Handle optional post-join filter.
let filter_expression =
proto_boxed_field!(x, y, post_join_filter, expressions::parse_predicate).1;
proto_boxed_field!(x, y, post_join_filter, expressions::parse_predicate);

// Describe the relation.
let prefix = match (join_type, x.post_join_filter.is_some()) {
Expand All @@ -106,59 +105,73 @@ pub fn parse_join_rel(x: &substrait::JoinRel, y: &mut context::Context) -> diagn
};
describe!(y, Relation, "{prefix} join by {join_expression}");
summary!(y, "{prefix} join by {join_expression:#}.");
y.push_summary(comment::Comment::new().nl().plain(match join_type {
JoinType::Unspecified => "",
JoinType::Inner => concat!(
" Returns rows combining the row from the left and right ",
"input for each pair where the join expression yields true.",
),
JoinType::Outer => concat!(
" Returns rows combining the row from the left and right ",
"input for each pair where the join expression yields true. ",
"If the join expression never yields true for any left or ",
"right row, this returns a row anyway, with the fields ",
"corresponding to the other input set to null.",
),
JoinType::Left => concat!(
" Returns rows combining the row from the left and right ",
"input for each pair where the join expression yields true. ",
"If the join expression never yields true for a row from the ",
"left, this returns a row anyway, with the fields corresponding ",
"to the right input set to null.",
),
JoinType::Right => concat!(
" Returns rows combining the row from the left and right ",
"input for each pair where the join expression yields true. ",
"If the join expression never yields true for a row from the ",
"right, this returns a row anyway, with the fields corresponding ",
"to the left input set to null.",
),
JoinType::Semi => concat!(
" Filters rows from the left input, propagating a row only if ",
"the join expression yields true for that row combined with ",
"any row from the right input.",
),
JoinType::Anti => concat!(
" Filters rows from the left input, propagating a row only if ",
"the join expression does not yield true for that row combined ",
"with any row from the right input.",
),
JoinType::Single => concat!(
" Returns a row for each row from the left input, concatenating ",
"it with the row from the right input for which the join ",
"expression yields true. If the expression never yields true for ",
"a left input, the fields corresponding to the right input are ",
"set to null. If the expression yields true for a left row and ",
"multiple right rows, this may return the first pair encountered ",
"or throw an error."
),
}));
if let Some(filter_expression) = filter_expression {
y.push_summary(
comment::Comment::new()
.nl()
.plain(format!("The result is filtered by {filter_expression:#}.")),
);
let nullable = if join_expression_node.data_type().nullable() {
"false or null"
} else {
"false"
};
y.push_summary(
comment::Comment::new().nl().plain(match join_type {
JoinType::Unspecified => "".to_string(),
JoinType::Inner => format!(
"Returns rows combining the row from the left and right \
input for each pair where the join expression yields true, \
discarding rows where the join expression yields {}.",
nullable
),
JoinType::Outer => format!(
"Returns rows combining the row from the left and right \
input for each pair where the join expression yields true, \
discarding rows where the join expression yields {}. \
If the join expression never yields true for any left or \
right row, this returns a row anyway, with the fields \
corresponding to the other input set to null.",
nullable
),
JoinType::Left => format!(
"Returns rows combining the row from the left and right \
input for each pair where the join expression yields true, \
discarding rows where the join expression yields {}. \
If the join expression never yields true for a row from the \
left, this returns a row anyway, with the fields corresponding \
to the right input set to null.",
nullable
),
JoinType::Right => format!(
"Returns rows combining the row from the left and right \
input for each pair where the join expression yields true, \
discarding rows where the join expression yields {}. \
If the join expression never yields true for a row from the \
right, this returns a row anyway, with the fields corresponding \
to the left input set to null.",
nullable
),
JoinType::Semi => "Filters rows from the left input, propagating a row only if \
the join expression yields true for that row combined with \
any row from the right input."
.to_string(),
JoinType::Anti => "Filters rows from the left input, propagating a row only if \
the join expression does not yield true for that row combined \
with any row from the right input."
.to_string(),
JoinType::Single => "Returns a row for each row from the left input, concatenating \
it with the row from the right input for which the join \
expression yields true. If the expression never yields true for \
a left input, the fields corresponding to the right input are \
set to null. If the expression yields true for a left row and \
multiple right rows, this may return the first pair encountered \
or throw an error."
.to_string(),
}),
);

if let (Some(node), Some(filter_expression)) = filter_expression {
let nullable = node.data_type().nullable();
y.push_summary(comment::Comment::new().nl().plain(format!(
"The result is filtered by the expression {filter_expression:#}, \
discarding all rows for which the filter expression yields {}.",
if nullable { "false or null" } else { "false" }
)));
}

// Handle the common field.
Expand Down
16 changes: 16 additions & 0 deletions tests/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,17 @@ def parse_type_instruction(type_str, path):
return [dict(DataType=dict(path=path, data_type=type_str))]


def parse_comment_instruction(comment_test, path):
"""Parses a comment check instruction in the input format into the
Rust/serde instruction syntax."""
if comment_test is None:
return []

if not isinstance(comment_test, str):
raise Exception("__test.comment must be a string")
return [dict(Comment=dict(path=path, msg=comment_test))]


def parse_instructions(test_tags, fname, proto_desc):
"""Parses and checks the syntax for instructions in the input format into
the Rust/serde instruction syntax."""
Expand All @@ -327,6 +338,11 @@ def parse_instructions(test_tags, fname, proto_desc):
parse_type_instruction(insn_type.pop("type", None), path)
)

# Handle comment instructions.
instructions.extend(
parse_comment_instruction(insn_type.pop("comment", None), path)
)

if insn_type:
raise Exception(
"Found unknown __test key(s): {}".format(
Expand Down
25 changes: 25 additions & 0 deletions tests/src/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ struct DiagnosticTest {
pub after: Option<PathElement>,
}

#[derive(serde::Deserialize, Debug)]
struct CommentTest {
pub path: Vec<PathElement>,
pub msg: String,
}

#[derive(serde::Deserialize, Debug)]
struct DataTypeTest {
pub path: Vec<PathElement>,
Expand Down Expand Up @@ -123,6 +129,7 @@ enum Instruction {
Level(LevelTest),
Diag(DiagnosticTest),
DataType(DataTypeTest),
Comment(CommentTest),
}

/// A diagnostic level override command.
Expand Down Expand Up @@ -359,6 +366,23 @@ impl TestCase {
})
}

/// Runs the given comment test instruction.
fn run_comment_test(
result: &mut TestResult,
root: &mut sv::output::tree::Node,
desc: &CommentTest,
) {
let path = convert_path(&desc.path);
result.log(format!("Checking comment at {path}..."));
Self::traverse(result, root, path.elements.iter(), |result, node| {
let actual = format!("{}", node.summary.clone().unwrap_or_default());
let pattern = glob::Pattern::new(&desc.msg).unwrap();
if !pattern.matches(&actual) {
result.error(format!("comment mismatch; found {actual}"));
}
})
}

/// Runs the given test case, updating result.
fn run(
result: &mut TestResult,
Expand Down Expand Up @@ -420,6 +444,7 @@ impl TestCase {
Instruction::DataType(data_type) => {
Self::run_data_type_test(result, &mut root, data_type)
}
Instruction::Comment(comment) => Self::run_comment_test(result, &mut root, comment),
}
}
}
Expand Down
46 changes: 46 additions & 0 deletions tests/tests/relations/filter/nullability-bool-expression.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: filter-nullability-bool-expression
plan:
__test: [level: i]
relations:
- rel:
filter:
input:
read:
baseSchema:
names: [a, b]
struct:
nullability: NULLABILITY_REQUIRED
types:
- string: { nullability: NULLABILITY_REQUIRED }
- bool: { nullability: NULLABILITY_NULLABLE }
namedTable:
names:
- test
condition:
literal:
nullable: true
boolean: false
__test:
[
comment: "*false or null.",
type: "NSTRUCT<a: string, b: boolean?>",
]
- rel:
filter:
input:
read:
baseSchema:
names: [a, b]
struct:
nullability: NULLABILITY_REQUIRED
types:
- string: { nullability: NULLABILITY_REQUIRED }
- bool: { nullability: NULLABILITY_REQUIRED }
namedTable:
names:
- test
condition:
literal:
nullable: false
boolean: false
__test: [comment: "*false.", type: "NSTRUCT<a: string, b: boolean>"]
Loading

0 comments on commit add22d9

Please sign in to comment.