Skip to content

Commit 4af84a7

Browse files
committed
Simplify logical and physical case branch filtering logic
1 parent 51af749 commit 4af84a7

File tree

2 files changed

+113
-130
lines changed

2 files changed

+113
-130
lines changed

datafusion/expr/src/expr_schema.rs

Lines changed: 44 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -282,63 +282,51 @@ impl ExprSchemable for Expr {
282282
Expr::OuterReferenceColumn(field, _) => Ok(field.is_nullable()),
283283
Expr::Literal(value, _) => Ok(value.is_null()),
284284
Expr::Case(case) => {
285-
let nullable_then = if case.expr.is_some() {
286-
// Case-with-expression is nullable if any of the 'then' expressions.
287-
// Assume all 'then' expressions are reachable
288-
case.when_then_expr
289-
.iter()
290-
.filter_map(|(_, t)| match t.nullable(input_schema) {
291-
Ok(n) => {
292-
if n {
293-
Some(Ok(()))
294-
} else {
295-
None
296-
}
297-
}
298-
Err(e) => Some(Err(e)),
299-
})
300-
.next()
301-
} else {
302-
// case-without-expression is nullable if any of the 'then' expressions is nullable
303-
// and reachable when the 'then' expression evaluates to `null`.
304-
case.when_then_expr
305-
.iter()
306-
.filter_map(|(w, t)| {
307-
match t.nullable(input_schema) {
308-
// Branches with a then expression that is not nullable can be skipped
309-
Ok(false) => None,
310-
// Pass on error determining nullability verbatim
311-
Err(e) => Some(Err(e)),
312-
// For branches with a nullable 'then' expression, try to determine
313-
// using limited const evaluation if the branch will be taken when
314-
// the 'then' expression evaluates to null.
315-
Ok(true) => {
316-
let is_null = |expr: &Expr /* Type */| {
317-
if expr.eq(t) {
318-
Some(true)
319-
} else {
320-
None
321-
}
322-
};
323-
324-
let bounds = predicate_bounds::evaluate_bounds(
325-
w,
326-
is_null,
327-
input_schema,
328-
);
329-
if bounds.is_certainly_not_true() {
330-
// The branch will certainly never be taken.
331-
// The most common pattern for this is `WHEN x IS NOT NULL THEN x`.
332-
None
333-
} else {
334-
// The branch might be taken
335-
Some(Ok(()))
336-
}
337-
}
285+
let nullable_then = case
286+
.when_then_expr
287+
.iter()
288+
.filter_map(|(w, t)| {
289+
let is_nullable = match t.nullable(input_schema) {
290+
Err(e) => return Some(Err(e)),
291+
Ok(n) => n,
292+
};
293+
294+
// Branches with a then expression that is not nullable do not impact the
295+
// nullability of the case expression.
296+
if !is_nullable {
297+
return None;
298+
}
299+
300+
// For case-with-expression assume all 'then' expressions are reachable
301+
if case.expr.is_some() {
302+
return Some(Ok(()));
303+
}
304+
305+
// For branches with a nullable 'then' expression, try to determine
306+
// if the 'then' expression is ever reachable in the situation where
307+
// it would evaluate to null.
308+
let is_null = |expr: &Expr /* Type */| {
309+
if expr.eq(t) {
310+
Some(true)
311+
} else {
312+
None
338313
}
339-
})
340-
.next()
341-
};
314+
};
315+
316+
let bounds =
317+
predicate_bounds::evaluate_bounds(w, is_null, input_schema);
318+
319+
if bounds.is_certainly_not_true() {
320+
// The predicate will never evaluate to true, so the 'then' expression
321+
// is never reachable.
322+
// The most common pattern for this is `WHEN x IS NOT NULL THEN x`.
323+
None
324+
} else {
325+
// The branch might be taken
326+
Some(Ok(()))
327+
}
328+
})
329+
.next();
342330

343331
if let Some(nullable_then) = nullable_then {
344332
// There is at least one reachable nullable then

datafusion/physical-expr/src/expressions/case.rs

Lines changed: 69 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,57 +1283,53 @@ impl PhysicalExpr for CaseExpr {
12831283
}
12841284

12851285
fn nullable(&self, input_schema: &Schema) -> Result<bool> {
1286-
let nullable_then = if self.body.expr.is_some() {
1287-
// Case-with-expression is nullable if any of the 'then' expressions.
1288-
// Assume all 'then' expressions are reachable
1289-
self.body
1290-
.when_then_expr
1291-
.iter()
1292-
.filter_map(|(_, t)| match t.nullable(input_schema) {
1293-
Ok(n) => {
1294-
if n {
1295-
Some(Ok(()))
1296-
} else {
1297-
None
1298-
}
1299-
}
1300-
Err(e) => Some(Err(e)),
1301-
})
1302-
.next()
1303-
} else {
1304-
// case-without-expression is nullable if any of the 'then' expressions is nullable
1305-
// and reachable when the 'then' expression evaluates to `null`.
1306-
self.body
1307-
.when_then_expr
1308-
.iter()
1309-
.filter_map(|(w, t)| {
1310-
match t.nullable(input_schema) {
1311-
// Branches with a then expression that is not nullable can be skipped
1312-
Ok(false) => None,
1313-
// Pass on error determining nullability verbatim
1314-
Err(e) => Some(Err(e)),
1315-
Ok(true) => {
1316-
// For branches with a nullable 'then' expression, try to determine
1317-
// using const evaluation if the branch will be taken when
1318-
// the 'then' expression evaluates to null.
1319-
let is_null = |expr: &dyn PhysicalExpr /* Type */| {
1320-
expr.dyn_eq(t.as_ref())
1321-
};
1322-
1323-
match const_eval_predicate(w, is_null, input_schema) {
1324-
// Const evaluation was inconclusive or determined the branch
1325-
// would be taken
1326-
Ok(None) | Ok(Some(true)) => Some(Ok(())),
1327-
// Const evaluation proves the branch will never be taken.
1328-
// The most common pattern for this is `WHEN x IS NOT NULL THEN x`.
1329-
Ok(Some(false)) => None,
1330-
Err(e) => Some(Err(e)),
1331-
}
1332-
}
1333-
}
1334-
})
1335-
.next()
1336-
};
1286+
let nullable_then = self
1287+
.body
1288+
.when_then_expr
1289+
.iter()
1290+
.filter_map(|(w, t)| {
1291+
let is_nullable = match t.nullable(input_schema) {
1292+
// Pass on error determining nullability verbatim
1293+
Err(e) => return Some(Err(e)),
1294+
Ok(n) => n,
1295+
};
1296+
1297+
// Branches with a then expression that is not nullable do not impact the
1298+
// nullability of the case expression.
1299+
if !is_nullable {
1300+
return None;
1301+
}
1302+
1303+
// For case-with-expression assume all 'then' expressions are reachable
1304+
if self.body.expr.is_some() {
1305+
return Some(Ok(()));
1306+
}
1307+
1308+
// For branches with a nullable 'then' expression, try to determine
1309+
// if the 'then' expression is ever reachable in the situation where
1310+
// it would evaluate to null.
1311+
1312+
// Replace the `then` expression with `NULL` in the `when` expression
1313+
let with_null = match replace_with_null(w, t.as_ref(), input_schema) {
1314+
Err(e) => return Some(Err(e)),
1315+
Ok(e) => e,
1316+
};
1317+
1318+
// Try to const evaluate the modified `when` expression.
1319+
let predicate_result = match evaluate_predicate(&with_null) {
1320+
Err(e) => return Some(Err(e)),
1321+
Ok(b) => b,
1322+
};
1323+
1324+
match predicate_result {
1325+
// Evaluation was inconclusive or true, so the 'then' expression is reachable
1326+
None | Some(true) => Some(Ok(())),
1327+
// Evaluation proves the branch will never be taken.
1328+
// The most common pattern for this is `WHEN x IS NOT NULL THEN x`.
1329+
Some(false) => None,
1330+
}
1331+
})
1332+
.next();
13371333

13381334
if let Some(nullable_then) = nullable_then {
13391335
// There is at least one reachable nullable then
@@ -1441,32 +1437,12 @@ impl PhysicalExpr for CaseExpr {
14411437
}
14421438
}
14431439

1444-
/// Attempts to const evaluate the given `predicate` with the assumption that `value` evaluates to `NULL`.
1440+
/// Attempts to const evaluate the given `predicate`.
14451441
/// Returns:
14461442
/// - `Some(true)` if the predicate evaluates to a truthy value.
14471443
/// - `Some(false)` if the predicate evaluates to a falsy value.
14481444
/// - `None` if the predicate could not be evaluated.
1449-
fn const_eval_predicate<F>(
1450-
predicate: &Arc<dyn PhysicalExpr>,
1451-
evaluates_to_null: F,
1452-
input_schema: &Schema,
1453-
) -> Result<Option<bool>>
1454-
where
1455-
F: Fn(&dyn PhysicalExpr) -> bool,
1456-
{
1457-
// Replace `value` with `NULL` in `predicate`
1458-
let with_null = Arc::clone(predicate)
1459-
.transform_down(|e| {
1460-
if evaluates_to_null(e.as_ref()) {
1461-
let data_type = e.data_type(input_schema)?;
1462-
let null_literal = lit(ScalarValue::try_new_null(&data_type)?);
1463-
Ok(Transformed::yes(null_literal))
1464-
} else {
1465-
Ok(Transformed::no(e))
1466-
}
1467-
})?
1468-
.data;
1469-
1445+
fn evaluate_predicate(predicate: &Arc<dyn PhysicalExpr>) -> Result<Option<bool>> {
14701446
// Create a dummy record with no columns and one row
14711447
let batch = RecordBatch::try_new_with_options(
14721448
Arc::new(Schema::empty()),
@@ -1475,7 +1451,7 @@ where
14751451
)?;
14761452

14771453
// Evaluate the predicate and interpret the result as a boolean
1478-
let result = match with_null.evaluate(&batch) {
1454+
let result = match predicate.evaluate(&batch) {
14791455
// An error during evaluation means we couldn't const evaluate the predicate, so return `None`
14801456
Err(_) => None,
14811457
Ok(ColumnarValue::Array(array)) => Some(
@@ -1487,6 +1463,25 @@ where
14871463
Ok(result.map(|v| matches!(v, ScalarValue::Boolean(Some(true)))))
14881464
}
14891465

1466+
fn replace_with_null(
1467+
expr: &Arc<dyn PhysicalExpr>,
1468+
expr_to_replace: &dyn PhysicalExpr,
1469+
input_schema: &Schema,
1470+
) -> Result<Arc<dyn PhysicalExpr>, DataFusionError> {
1471+
let with_null = Arc::clone(expr)
1472+
.transform_down(|e| {
1473+
if e.as_ref().dyn_eq(expr_to_replace) {
1474+
let data_type = e.data_type(input_schema)?;
1475+
let null_literal = lit(ScalarValue::try_new_null(&data_type)?);
1476+
Ok(Transformed::yes(null_literal))
1477+
} else {
1478+
Ok(Transformed::no(e))
1479+
}
1480+
})?
1481+
.data;
1482+
Ok(with_null)
1483+
}
1484+
14901485
/// Create a CASE expression
14911486
pub fn case(
14921487
expr: Option<Arc<dyn PhysicalExpr>>,

0 commit comments

Comments
 (0)