Skip to content

Commit cc96911

Browse files
committed
Simplify logical and physical case branch filtering logic
1 parent 51af749 commit cc96911

File tree

2 files changed

+117
-130
lines changed

2 files changed

+117
-130
lines changed

datafusion/expr/src/expr_schema.rs

Lines changed: 45 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -282,63 +282,52 @@ impl ExprSchemable for Expr {
282282
Expr::OuterReferenceColumn(field, _) => Ok(field.is_nullable()),
283283
Expr::Literal(value, _) => Ok(value.is_null()),
284284
Expr::Case(case) => {
285-
let nullable_then = if case.expr.is_some() {
286-
// Case-with-expression is nullable if any of the 'then' expressions.
287-
// Assume all 'then' expressions are reachable
288-
case.when_then_expr
289-
.iter()
290-
.filter_map(|(_, t)| match t.nullable(input_schema) {
291-
Ok(n) => {
292-
if n {
293-
Some(Ok(()))
294-
} else {
295-
None
296-
}
297-
}
298-
Err(e) => Some(Err(e)),
299-
})
300-
.next()
301-
} else {
302-
// case-without-expression is nullable if any of the 'then' expressions is nullable
303-
// and reachable when the 'then' expression evaluates to `null`.
304-
case.when_then_expr
305-
.iter()
306-
.filter_map(|(w, t)| {
307-
match t.nullable(input_schema) {
308-
// Branches with a then expression that is not nullable can be skipped
309-
Ok(false) => None,
310-
// Pass on error determining nullability verbatim
311-
Err(e) => Some(Err(e)),
312-
// For branches with a nullable 'then' expression, try to determine
313-
// using limited const evaluation if the branch will be taken when
314-
// the 'then' expression evaluates to null.
315-
Ok(true) => {
316-
let is_null = |expr: &Expr /* Type */| {
317-
if expr.eq(t) {
318-
Some(true)
319-
} else {
320-
None
321-
}
322-
};
323-
324-
let bounds = predicate_bounds::evaluate_bounds(
325-
w,
326-
is_null,
327-
input_schema,
328-
);
329-
if bounds.is_certainly_not_true() {
330-
// The branch will certainly never be taken.
331-
// The most common pattern for this is `WHEN x IS NOT NULL THEN x`.
332-
None
333-
} else {
334-
// The branch might be taken
335-
Some(Ok(()))
336-
}
337-
}
285+
let is_case_with_expr = case.expr.is_some();
286+
let nullable_then = case
287+
.when_then_expr
288+
.iter()
289+
.filter_map(|(w, t)| {
290+
let is_nullable = match t.nullable(input_schema) {
291+
Err(e) => return Some(Err(e)),
292+
Ok(n) => n,
293+
};
294+
295+
// Branches with a then expression that is not nullable do not impact the
296+
// nullability of the case expression.
297+
if !is_nullable {
298+
return None;
299+
}
300+
301+
// Case-with-expression is nullable if any of the 'then' expressions.
302+
// Assume all 'then' expressions are reachable
303+
if is_case_with_expr {
304+
return Some(Ok(()));
305+
}
306+
307+
// For branches with a nullable 'then' expression, try to determine
308+
// if the 'then' expression is ever reachable in the situation where
309+
// it would evaluate to null.
310+
let is_null = |expr: &Expr /* Type */| {
311+
if expr.eq(t) {
312+
Some(true)
313+
} else {
314+
None
338315
}
339-
})
340-
.next()
341-
};
316+
};
317+
318+
let bounds =
319+
predicate_bounds::evaluate_bounds(w, is_null, input_schema);
320+
321+
if bounds.is_certainly_not_true() {
322+
// The branch will certainly never be taken.
323+
// The most common pattern for this is `WHEN x IS NOT NULL THEN x`.
324+
None
325+
} else {
326+
// The branch might be taken
327+
Some(Ok(()))
328+
}
329+
})
330+
.next();
342331

343332
if let Some(nullable_then) = nullable_then {
344333
// There is at least one reachable nullable then

datafusion/physical-expr/src/expressions/case.rs

Lines changed: 72 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,57 +1283,56 @@ impl PhysicalExpr for CaseExpr {
12831283
}
12841284

12851285
fn nullable(&self, input_schema: &Schema) -> Result<bool> {
1286-
let nullable_then = if self.body.expr.is_some() {
1287-
// Case-with-expression is nullable if any of the 'then' expressions.
1288-
// Assume all 'then' expressions are reachable
1289-
self.body
1290-
.when_then_expr
1291-
.iter()
1292-
.filter_map(|(_, t)| match t.nullable(input_schema) {
1293-
Ok(n) => {
1294-
if n {
1295-
Some(Ok(()))
1296-
} else {
1297-
None
1298-
}
1299-
}
1300-
Err(e) => Some(Err(e)),
1301-
})
1302-
.next()
1303-
} else {
1304-
// case-without-expression is nullable if any of the 'then' expressions is nullable
1305-
// and reachable when the 'then' expression evaluates to `null`.
1306-
self.body
1307-
.when_then_expr
1308-
.iter()
1309-
.filter_map(|(w, t)| {
1310-
match t.nullable(input_schema) {
1311-
// Branches with a then expression that is not nullable can be skipped
1312-
Ok(false) => None,
1313-
// Pass on error determining nullability verbatim
1314-
Err(e) => Some(Err(e)),
1315-
Ok(true) => {
1316-
// For branches with a nullable 'then' expression, try to determine
1317-
// using const evaluation if the branch will be taken when
1318-
// the 'then' expression evaluates to null.
1319-
let is_null = |expr: &dyn PhysicalExpr /* Type */| {
1320-
expr.dyn_eq(t.as_ref())
1321-
};
1322-
1323-
match const_eval_predicate(w, is_null, input_schema) {
1324-
// Const evaluation was inconclusive or determined the branch
1325-
// would be taken
1326-
Ok(None) | Ok(Some(true)) => Some(Ok(())),
1327-
// Const evaluation proves the branch will never be taken.
1328-
// The most common pattern for this is `WHEN x IS NOT NULL THEN x`.
1329-
Ok(Some(false)) => None,
1330-
Err(e) => Some(Err(e)),
1331-
}
1332-
}
1333-
}
1334-
})
1335-
.next()
1336-
};
1286+
let is_case_with_expr = self.body.expr.is_some();
1287+
1288+
let nullable_then = self
1289+
.body
1290+
.when_then_expr
1291+
.iter()
1292+
.filter_map(|(w, t)| {
1293+
let is_nullable = match t.nullable(input_schema) {
1294+
// Pass on error determining nullability verbatim
1295+
Err(e) => return Some(Err(e)),
1296+
Ok(n) => n,
1297+
};
1298+
1299+
// Branches with a then expression that is not nullable do not impact the
1300+
// nullability of the case expression.
1301+
if !is_nullable {
1302+
return None;
1303+
}
1304+
1305+
// Case-with-expression is nullable if any of the 'then' expressions.
1306+
// Assume all 'then' expressions are reachable
1307+
if is_case_with_expr {
1308+
return Some(Ok(()));
1309+
}
1310+
1311+
// For branches with a nullable 'then' expression, try to determine
1312+
// if the 'then' expression is ever reachable in the situation where
1313+
// it would evaluate to null.
1314+
1315+
// Replace `value` with `NULL` in `predicate`
1316+
let with_null = match replace_with_null(w, t.as_ref(), input_schema) {
1317+
Err(e) => return Some(Err(e)),
1318+
Ok(e) => e,
1319+
};
1320+
1321+
let predicate_result = match evaluate_predicate(&with_null) {
1322+
Err(e) => return Some(Err(e)),
1323+
Ok(b) => b,
1324+
};
1325+
1326+
match predicate_result {
1327+
// Const evaluation was inconclusive or determined the branch
1328+
// would be taken
1329+
None | Some(true) => Some(Ok(())),
1330+
// Const evaluation proves the branch will never be taken.
1331+
// The most common pattern for this is `WHEN x IS NOT NULL THEN x`.
1332+
Some(false) => None,
1333+
}
1334+
})
1335+
.next();
13371336

13381337
if let Some(nullable_then) = nullable_then {
13391338
// There is at least one reachable nullable then
@@ -1441,32 +1440,12 @@ impl PhysicalExpr for CaseExpr {
14411440
}
14421441
}
14431442

1444-
/// Attempts to const evaluate the given `predicate` with the assumption that `value` evaluates to `NULL`.
1443+
/// Attempts to const evaluate the given `predicate`.
14451444
/// Returns:
14461445
/// - `Some(true)` if the predicate evaluates to a truthy value.
14471446
/// - `Some(false)` if the predicate evaluates to a falsy value.
14481447
/// - `None` if the predicate could not be evaluated.
1449-
fn const_eval_predicate<F>(
1450-
predicate: &Arc<dyn PhysicalExpr>,
1451-
evaluates_to_null: F,
1452-
input_schema: &Schema,
1453-
) -> Result<Option<bool>>
1454-
where
1455-
F: Fn(&dyn PhysicalExpr) -> bool,
1456-
{
1457-
// Replace `value` with `NULL` in `predicate`
1458-
let with_null = Arc::clone(predicate)
1459-
.transform_down(|e| {
1460-
if evaluates_to_null(e.as_ref()) {
1461-
let data_type = e.data_type(input_schema)?;
1462-
let null_literal = lit(ScalarValue::try_new_null(&data_type)?);
1463-
Ok(Transformed::yes(null_literal))
1464-
} else {
1465-
Ok(Transformed::no(e))
1466-
}
1467-
})?
1468-
.data;
1469-
1448+
fn evaluate_predicate(predicate: &Arc<dyn PhysicalExpr>) -> Result<Option<bool>> {
14701449
// Create a dummy record with no columns and one row
14711450
let batch = RecordBatch::try_new_with_options(
14721451
Arc::new(Schema::empty()),
@@ -1475,7 +1454,7 @@ where
14751454
)?;
14761455

14771456
// Evaluate the predicate and interpret the result as a boolean
1478-
let result = match with_null.evaluate(&batch) {
1457+
let result = match predicate.evaluate(&batch) {
14791458
// An error during evaluation means we couldn't const evaluate the predicate, so return `None`
14801459
Err(_) => None,
14811460
Ok(ColumnarValue::Array(array)) => Some(
@@ -1487,6 +1466,25 @@ where
14871466
Ok(result.map(|v| matches!(v, ScalarValue::Boolean(Some(true)))))
14881467
}
14891468

1469+
fn replace_with_null(
1470+
expr: &Arc<dyn PhysicalExpr>,
1471+
expr_to_replace: &dyn PhysicalExpr,
1472+
input_schema: &Schema,
1473+
) -> Result<Arc<dyn PhysicalExpr>, DataFusionError> {
1474+
let with_null = Arc::clone(expr)
1475+
.transform_down(|e| {
1476+
if e.as_ref().dyn_eq(expr_to_replace) {
1477+
let data_type = e.data_type(input_schema)?;
1478+
let null_literal = lit(ScalarValue::try_new_null(&data_type)?);
1479+
Ok(Transformed::yes(null_literal))
1480+
} else {
1481+
Ok(Transformed::no(e))
1482+
}
1483+
})?
1484+
.data;
1485+
Ok(with_null)
1486+
}
1487+
14901488
/// Create a CASE expression
14911489
pub fn case(
14921490
expr: Option<Arc<dyn PhysicalExpr>>,

0 commit comments

Comments
 (0)