From 123310b018ca6d70bf47ac7b191a318639dc7cf5 Mon Sep 17 00:00:00 2001 From: osipovartem Date: Wed, 16 Jul 2025 14:41:46 +0300 Subject: [PATCH 1/8] Allow comparison between boolean and int values --- datafusion/expr-common/src/type_coercion/binary.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 3f9429bff71d..0bec2dae6f46 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -28,6 +28,7 @@ use arrow::datatypes::{ DataType, Field, FieldRef, Fields, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, }; +use arrow::datatypes::DataType::{Binary, BinaryView, Boolean, Int16, Int32, Int64, Int8, LargeBinary, LargeUtf8, Utf8, Utf8View}; use datafusion_common::types::NativeType; use datafusion_common::{ exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, Diagnostic, @@ -734,6 +735,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Option { } } +/// Coercion rules for boolean types: If at least one argument is +/// a boolean type and both arguments can be coerced into a boolean type, coerce +/// to boolean type. +fn boolean_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { + use arrow::datatypes::DataType::*; + match (lhs_type, rhs_type) { + (Boolean, Int8 | Int16 | Int32 |Int64) | + (Int8 | Int16 | Int32 |Int64, Boolean)=> Some(Boolean), + _ => None, + } +} + /// Returns the output type of applying mathematics operations such as /// `+` to arguments of `lhs_type` and `rhs_type`. fn mathematics_numerical_coercion( From 9eb825d33de492e5b1811ee7253a4d9b58699c89 Mon Sep 17 00:00:00 2001 From: osipovartem Date: Wed, 16 Jul 2025 14:42:44 +0300 Subject: [PATCH 2/8] Allow comparison between boolean and int values --- datafusion/expr-common/src/type_coercion/binary.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 0bec2dae6f46..4266a9caa3d5 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -28,7 +28,6 @@ use arrow::datatypes::{ DataType, Field, FieldRef, Fields, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, }; -use arrow::datatypes::DataType::{Binary, BinaryView, Boolean, Int16, Int32, Int64, Int8, LargeBinary, LargeUtf8, Utf8, Utf8View}; use datafusion_common::types::NativeType; use datafusion_common::{ exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, Diagnostic, From 5f54ba43d825013e3cdf6c28b0862fe3c4e6d338 Mon Sep 17 00:00:00 2001 From: osipovartem Date: Wed, 16 Jul 2025 14:47:01 +0300 Subject: [PATCH 3/8] Add integer types --- datafusion/expr-common/src/type_coercion/binary.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 4266a9caa3d5..7f906b572377 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -1014,8 +1014,10 @@ fn map_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { fn boolean_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { use arrow::datatypes::DataType::*; match (lhs_type, rhs_type) { - (Boolean, Int8 | Int16 | Int32 |Int64) | - (Int8 | Int16 | Int32 |Int64, Boolean)=> Some(Boolean), + (Boolean, Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64) + | (Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64, Boolean) => { + Some(Boolean) + } _ => None, } } From 92c2697880f101402c22a2c3c5a89b207c6122d1 Mon Sep 17 00:00:00 2001 From: osipovartem Date: Wed, 16 Jul 2025 15:52:04 +0300 Subject: [PATCH 4/8] clippy+fmt+tests fix --- datafusion-examples/examples/dataframe.rs | 4 +-- .../expr-common/src/type_coercion/binary.rs | 26 +++++++++++++++++++ datafusion/expr/src/logical_plan/display.rs | 10 +++---- datafusion/functions/src/datetime/to_date.rs | 2 +- .../optimizer/src/analyzer/type_coercion.rs | 11 ++++++-- 5 files changed, 43 insertions(+), 10 deletions(-) diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe.rs index 6f61c164f41d..811fca6a4dde 100644 --- a/datafusion-examples/examples/dataframe.rs +++ b/datafusion-examples/examples/dataframe.rs @@ -66,8 +66,8 @@ async fn main() -> Result<()> { write_out(&ctx).await?; register_aggregate_test_data("t1", &ctx).await?; register_aggregate_test_data("t2", &ctx).await?; - where_scalar_subquery(&ctx).await?; - where_in_subquery(&ctx).await?; + Box::pin(where_scalar_subquery(&ctx)).await?; + Box::pin(where_in_subquery(&ctx)).await?; where_exist_subquery(&ctx).await?; Ok(()) } diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 7f906b572377..c3af4d9ba221 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -2449,6 +2449,32 @@ mod tests { DataType::List(Arc::clone(&inner_field)) ); + // boolean + let int_types = vec![ + DataType::Int8, + DataType::Int16, + DataType::Int32, + DataType::Int64, + DataType::UInt8, + DataType::UInt16, + DataType::UInt32, + DataType::UInt64, + ]; + for int_type in int_types { + test_coercion_binary_rule!( + DataType::Boolean, + int_type, + Operator::Eq, + DataType::Boolean + ); + test_coercion_binary_rule!( + int_type, + DataType::Boolean, + Operator::Eq, + DataType::Boolean + ); + } + // Negative test: inner_timestamp_field and inner_field are not compatible because their inner types are not compatible let inner_timestamp_field = Arc::new(Field::new_list_field( DataType::Timestamp(TimeUnit::Microsecond, None), diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs index 07a069cbb400..f2bb45909aac 100644 --- a/datafusion/expr/src/logical_plan/display.rs +++ b/datafusion/expr/src/logical_plan/display.rs @@ -797,12 +797,12 @@ mod tests { let pivot = Pivot { input: Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, - schema: schema.clone(), + schema: Arc::clone(&schema), })), aggregate_expr: Expr::Column(Column::from_name("sum_value")), pivot_column: Column::from_name("category"), pivot_values, - schema: schema.clone(), + schema: Arc::clone(&schema), value_subquery: None, default_on_null_expr: None, }; @@ -834,15 +834,15 @@ mod tests { let pivot = Pivot { input: Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, - schema: schema.clone(), + schema: Arc::clone(&schema), })), aggregate_expr: Expr::Column(Column::from_name("sum_value")), pivot_column: Column::from_name("category"), pivot_values: vec![], - schema: schema.clone(), + schema: Arc::clone(&schema), value_subquery: Some(Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, - schema: schema.clone(), + schema:Arc::clone(&schema), }))), default_on_null_expr: None, }; diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index ccea816ccf78..142fdf815a7e 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -166,7 +166,7 @@ mod tests { use arrow::datatypes::DataType; use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type}; use datafusion_common::ScalarValue; - use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; + use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; use std::sync::Arc; #[test] diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index d47f7ea6ce68..fbae409c25ab 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -1572,12 +1572,19 @@ mod test { let expected = "Projection: a IS TRUE\n EmptyRelation"; assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?; - let empty = empty_with_type(DataType::Int64); + let empty = empty_with_type(DataType::Float64); let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); let ret = assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, ""); let err = ret.unwrap_err().to_string(); - assert!(err.contains("Cannot infer common argument type for comparison operation Int64 IS DISTINCT FROM Boolean"), "{err}"); + assert!(err.contains("Cannot infer common argument type for comparison operation Float64 IS DISTINCT FROM Boolean"), "{err}"); + // integer + let expr = col("a").is_true(); + let empty = empty_with_type(DataType::Int64); + let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); + let expected = "Projection: CAST(a AS Boolean) IS TRUE\n EmptyRelation"; + assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?; + // is not true let expr = col("a").is_not_true(); let empty = empty_with_type(DataType::Boolean); From e42e1cc606ec175a621fa353d9a444f90de80f23 Mon Sep 17 00:00:00 2001 From: osipovartem Date: Wed, 16 Jul 2025 16:01:41 +0300 Subject: [PATCH 5/8] tests fix --- datafusion/expr/src/logical_plan/display.rs | 2 +- datafusion/optimizer/src/analyzer/type_coercion.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs index f2bb45909aac..5abc375510e5 100644 --- a/datafusion/expr/src/logical_plan/display.rs +++ b/datafusion/expr/src/logical_plan/display.rs @@ -842,7 +842,7 @@ mod tests { schema: Arc::clone(&schema), value_subquery: Some(Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, - schema:Arc::clone(&schema), + schema: Arc::clone(&schema), }))), default_on_null_expr: None, }; diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index fbae409c25ab..df9738c70d24 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -1584,7 +1584,7 @@ mod test { let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); let expected = "Projection: CAST(a AS Boolean) IS TRUE\n EmptyRelation"; assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?; - + // is not true let expr = col("a").is_not_true(); let empty = empty_with_type(DataType::Boolean); From 18009623f571135a75350b61ad4440792ef5c4aa Mon Sep 17 00:00:00 2001 From: osipovartem Date: Wed, 16 Jul 2025 16:32:27 +0300 Subject: [PATCH 6/8] Fix incompatible types test --- datafusion/physical-expr/src/expressions/case.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs index 854c715eb0a2..494f9fe0d211 100644 --- a/datafusion/physical-expr/src/expressions/case.rs +++ b/datafusion/physical-expr/src/expressions/case.rs @@ -1086,7 +1086,7 @@ mod tests { #[test] fn case_test_incompatible() -> Result<()> { - // 1 then is int64 + // 1 then is float64 // 2 then is boolean let batch = case_test_batch()?; let schema = batch.schema(); @@ -1098,7 +1098,7 @@ mod tests { lit("foo"), &batch.schema(), )?; - let then1 = lit(123i32); + let then1 = lit(1.23f64); let when2 = binary( col("a", &schema)?, Operator::Eq, From 82ee464eef3682e1c3d93555bc7ee67b3a9f807d Mon Sep 17 00:00:00 2001 From: osipovartem Date: Wed, 16 Jul 2025 16:50:57 +0300 Subject: [PATCH 7/8] Fix slt test --- datafusion/sqllogictest/test_files/scalar.slt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index f583d659fd4f..dae5fc68b3c4 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -1536,8 +1536,10 @@ SELECT not(true), not(false) ---- false true -query error type_coercion\ncaused by\nError during planning: Cannot infer common argument type for comparison operation Int64 IS DISTINCT FROM Boolean +query BB SELECT not(1), not(0) +---- +false true query ?B SELECT null, not(null) From 9b68f8c0ea2592113d8695f626af07f534902753 Mon Sep 17 00:00:00 2001 From: osipovartem Date: Wed, 16 Jul 2025 18:30:13 +0300 Subject: [PATCH 8/8] Fix docs --- docs/source/library-user-guide/adding-udfs.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index 8fb8a59fb860..4f5b4b779d73 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -1075,8 +1075,8 @@ use datafusion_expr::Expr; pub struct EchoFunction {} impl TableFunctionImpl for EchoFunction { - fn call(&self, exprs: &[Expr]) -> Result> { - let Some(Expr::Literal(ScalarValue::Int64(Some(value)))) = exprs.get(0) else { + fn call(&self, exprs: &[(datafusion_expr::Expr, Option)]) -> Result> { + let Some((Expr::Literal(ScalarValue::Int64(Some(value))), _)) = exprs.get(0) else { return plan_err!("First argument must be an integer"); }; @@ -1116,8 +1116,8 @@ With the UDTF implemented, you can register it with the `SessionContext`: # pub struct EchoFunction {} # # impl TableFunctionImpl for EchoFunction { -# fn call(&self, exprs: &[Expr]) -> Result> { -# let Some(Expr::Literal(ScalarValue::Int64(Some(value)))) = exprs.get(0) else { +# fn call(&self, exprs: &[(datafusion_expr::Expr, Option)]) -> Result> { +# let Some((Expr::Literal(ScalarValue::Int64(Some(value))), _)) = exprs.get(0) else { # return plan_err!("First argument must be an integer"); # }; #