From 1e326039f7a523b4d9637d05136a574acf023935 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 15 Oct 2019 20:10:52 +0800 Subject: [PATCH 1/4] [SPARK-27880][SQL] Add support for bool_and and bool_or aggregates --- .../catalyst/analysis/FunctionRegistry.scala | 2 + .../aggregate/UnevaluableAggs.scala | 32 ++++ .../catalyst/optimizer/finishAnalysis.scala | 2 + .../ExpressionTypeCheckingSuite.scala | 2 + .../resources/sql-tests/inputs/group-by.sql | 18 ++- .../sql-tests/results/group-by.sql.out | 138 ++++++++++++------ 6 files changed, 146 insertions(+), 48 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index d5728b902757..159f9342d41f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -316,6 +316,8 @@ object FunctionRegistry { expression[EveryAgg]("every"), expression[AnyAgg]("any"), expression[SomeAgg]("some"), + expression[BoolAnd]("bool_and"), + expression[BoolOr]("bool_or"), // string functions expression[Ascii]("ascii"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala index 4562fbcff5f3..4e92913a9b03 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala @@ -87,3 +87,35 @@ case class AnyAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) { case class SomeAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) { override def nodeName: String = "Some" } + +@ExpressionDescription( + usage = "_FUNC_(expr) - Returns true if at least one value of `expr` is true.", + examples = """ + Examples: + > SELECT _FUNC_(col) FROM VALUES (true), (false), (false) AS tab(col); + true + > SELECT _FUNC_(col) FROM VALUES (NULL), (true), (false) AS tab(col); + true + > SELECT _FUNC_(col) FROM VALUES (false), (false), (NULL) AS tab(col); + false + """, + since = "3.0.0") +case class BoolOr(arg: Expression) extends UnevaluableBooleanAggBase(arg) { + override def nodeName: String = "bool_or" +} + +@ExpressionDescription( + usage = "_FUNC_(expr) - Returns true if all values of `expr` are true.", + examples = """ + Examples: + > SELECT _FUNC_(col) FROM VALUES (true), (true), (true) AS tab(col); + true + > SELECT _FUNC_(col) FROM VALUES (NULL), (true), (true) AS tab(col); + true + > SELECT _FUNC_(col) FROM VALUES (true), (false), (true) AS tab(col); + false + """, + since = "3.0.0") +case class BoolAnd(arg: Expression) extends UnevaluableBooleanAggBase(arg) { + override def nodeName: String = "bool_and" +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index 9d7564175314..e0c122038524 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -50,6 +50,8 @@ object ReplaceExpressions extends Rule[LogicalPlan] { case SomeAgg(arg) => Max(arg) case AnyAgg(arg) => Max(arg) case EveryAgg(arg) => Min(arg) + case BoolOr(arg) => Max(arg) + case BoolAnd(arg) => Min(arg) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala index 4440ac9e281c..1dae7edd03a8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala @@ -147,6 +147,8 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite { assertSuccess(new EveryAgg('booleanField)) assertSuccess(new AnyAgg('booleanField)) assertSuccess(new SomeAgg('booleanField)) + assertSuccess(BoolOr('booleanField)) + assertSuccess(BoolAnd('booleanField)) assertError(Min('mapField), "min does not support ordering on type") assertError(Max('mapField), "max does not support ordering on type") diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index 66bc90914e0d..fcde225676cb 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -90,16 +90,16 @@ CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES (5, null), (5, true), (5, false) AS test_agg(k, v); -- empty table -SELECT every(v), some(v), any(v) FROM test_agg WHERE 1 = 0; +SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0; -- all null values -SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 4; +SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4; -- aggregates are null Filtering -SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 5; +SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5; -- group by -SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k; +SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k; -- having SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) = false; @@ -137,10 +137,18 @@ SELECT any(1L); -- input type checking String SELECT every("true"); --- every/some/any aggregates are supported as windows expression. +-- input type checking Decimal +SELECT bool_and(1.0); + +-- input type checking double +SELECT bool_or(1.0D); + +-- every/some/any aggregates/bool_and/bool_or are supported as windows expression. SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg; SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg; SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg; +SELECT k, v, bool_and(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg; +SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg; -- Having referencing aggregate expressions is ok. SELECT count(*) FROM test_agg HAVING count(*) > 1L; diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 3a5df254f2cd..ed5ced8c8c0f 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 52 +-- Number of queries: 56 -- !query 0 @@ -291,39 +291,39 @@ struct<> -- !query 31 -SELECT every(v), some(v), any(v) FROM test_agg WHERE 1 = 0 +SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0 -- !query 31 schema -struct +struct -- !query 31 output -NULL NULL NULL +NULL NULL NULL NULL NULL -- !query 32 -SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 4 +SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4 -- !query 32 schema -struct +struct -- !query 32 output -NULL NULL NULL +NULL NULL NULL NULL NULL -- !query 33 -SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 5 +SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5 -- !query 33 schema -struct +struct -- !query 33 output -false true true +false true true false true -- !query 34 -SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k +SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k -- !query 34 schema -struct +struct -- !query 34 output -1 false true true -2 true true true -3 false false false -4 NULL NULL NULL -5 false true true +1 false true true false true +2 true true true true true +3 false false false false false +4 NULL NULL NULL NULL NULL +5 false true true false true -- !query 35 @@ -411,10 +411,28 @@ cannot resolve 'every('true')' due to data type mismatch: Input to function 'eve -- !query 43 -SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg +SELECT bool_and(1.0) -- !query 43 schema -struct +struct<> -- !query 43 output +org.apache.spark.sql.AnalysisException +cannot resolve 'bool_and(1.0BD)' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [decimal(2,1)].; line 1 pos 7 + + +-- !query 44 +SELECT bool_or(1.0D) +-- !query 44 schema +struct<> +-- !query 44 output +org.apache.spark.sql.AnalysisException +cannot resolve 'bool_or(1.0D)' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [double].; line 1 pos 7 + + +-- !query 45 +SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg +-- !query 45 schema +struct +-- !query 45 output 1 false false 1 true false 2 true true @@ -427,11 +445,11 @@ struct --- !query 44 output +-- !query 46 output 1 false false 1 true true 2 true true @@ -444,11 +462,11 @@ struct --- !query 45 output +-- !query 47 output 1 false false 1 true true 2 true true @@ -461,37 +479,71 @@ struct +-- !query 48 output +1 false false +1 true false +2 true true +3 NULL NULL +3 false false +4 NULL NULL +4 NULL NULL +5 NULL NULL +5 false false +5 true false + + +-- !query 49 +SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg +-- !query 49 schema +struct +-- !query 49 output +1 false false +1 true true +2 true true +3 NULL NULL +3 false false +4 NULL NULL +4 NULL NULL +5 NULL NULL +5 false false +5 true true + + +-- !query 50 SELECT count(*) FROM test_agg HAVING count(*) > 1L --- !query 46 schema +-- !query 50 schema struct --- !query 46 output +-- !query 50 output 10 --- !query 47 +-- !query 51 SELECT k, max(v) FROM test_agg GROUP BY k HAVING max(v) = true --- !query 47 schema +-- !query 51 schema struct --- !query 47 output +-- !query 51 output 1 true 2 true 5 true --- !query 48 +-- !query 52 SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L --- !query 48 schema +-- !query 52 schema struct --- !query 48 output +-- !query 52 output 10 --- !query 49 +-- !query 53 SELECT count(*) FROM test_agg WHERE count(*) > 1L --- !query 49 schema +-- !query 53 schema struct<> --- !query 49 output +-- !query 53 output org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. @@ -499,11 +551,11 @@ Expression in where clause: [(count(1) > 1L)] Invalid expressions: [count(1)]; --- !query 50 +-- !query 54 SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L --- !query 50 schema +-- !query 54 schema struct<> --- !query 50 output +-- !query 54 output org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. @@ -511,11 +563,11 @@ Expression in where clause: [((count(1) + 1L) > 1L)] Invalid expressions: [count(1)]; --- !query 51 +-- !query 55 SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1 --- !query 51 schema +-- !query 55 schema struct<> --- !query 51 output +-- !query 55 output org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. From 0a097426dc54ddb8b32b4802456c3e9b1ca7113a Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 16 Oct 2019 14:54:37 +0800 Subject: [PATCH 2/4] alias --- .../catalyst/analysis/FunctionRegistry.scala | 6 +-- .../aggregate/UnevaluableAggs.scala | 48 ------------------- .../catalyst/optimizer/finishAnalysis.scala | 3 -- .../ExpressionTypeCheckingSuite.scala | 3 -- .../sql-tests/results/group-by.sql.out | 20 ++++---- .../results/udf/udf-group-by.sql.out | 12 ++--- 6 files changed, 19 insertions(+), 73 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 159f9342d41f..8cf0928c2afd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -314,10 +314,10 @@ object FunctionRegistry { expression[CollectSet]("collect_set"), expression[CountMinSketchAgg]("count_min_sketch"), expression[EveryAgg]("every"), + expression[EveryAgg]("bool_and"), expression[AnyAgg]("any"), - expression[SomeAgg]("some"), - expression[BoolAnd]("bool_and"), - expression[BoolOr]("bool_or"), + expression[AnyAgg]("some"), + expression[AnyAgg]("bool_or"), // string functions expression[Ascii]("ascii"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala index 4e92913a9b03..a8220ec641d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala @@ -71,51 +71,3 @@ case class EveryAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) { case class AnyAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) { override def nodeName: String = "Any" } - -@ExpressionDescription( - usage = "_FUNC_(expr) - Returns true if at least one value of `expr` is true.", - examples = """ - Examples: - > SELECT _FUNC_(col) FROM VALUES (true), (false), (false) AS tab(col); - true - > SELECT _FUNC_(col) FROM VALUES (NULL), (true), (false) AS tab(col); - true - > SELECT _FUNC_(col) FROM VALUES (false), (false), (NULL) AS tab(col); - false - """, - since = "3.0.0") -case class SomeAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) { - override def nodeName: String = "Some" -} - -@ExpressionDescription( - usage = "_FUNC_(expr) - Returns true if at least one value of `expr` is true.", - examples = """ - Examples: - > SELECT _FUNC_(col) FROM VALUES (true), (false), (false) AS tab(col); - true - > SELECT _FUNC_(col) FROM VALUES (NULL), (true), (false) AS tab(col); - true - > SELECT _FUNC_(col) FROM VALUES (false), (false), (NULL) AS tab(col); - false - """, - since = "3.0.0") -case class BoolOr(arg: Expression) extends UnevaluableBooleanAggBase(arg) { - override def nodeName: String = "bool_or" -} - -@ExpressionDescription( - usage = "_FUNC_(expr) - Returns true if all values of `expr` are true.", - examples = """ - Examples: - > SELECT _FUNC_(col) FROM VALUES (true), (true), (true) AS tab(col); - true - > SELECT _FUNC_(col) FROM VALUES (NULL), (true), (true) AS tab(col); - true - > SELECT _FUNC_(col) FROM VALUES (true), (false), (true) AS tab(col); - false - """, - since = "3.0.0") -case class BoolAnd(arg: Expression) extends UnevaluableBooleanAggBase(arg) { - override def nodeName: String = "bool_and" -} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index e0c122038524..70277526cba8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -47,11 +47,8 @@ object ReplaceExpressions extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { case e: RuntimeReplaceable => e.child case CountIf(predicate) => Count(new NullIf(predicate, Literal.FalseLiteral)) - case SomeAgg(arg) => Max(arg) case AnyAgg(arg) => Max(arg) case EveryAgg(arg) => Min(arg) - case BoolOr(arg) => Max(arg) - case BoolAnd(arg) => Min(arg) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala index 1dae7edd03a8..ed11bce5d12b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala @@ -146,9 +146,6 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite { assertSuccess(Min('arrayField)) assertSuccess(new EveryAgg('booleanField)) assertSuccess(new AnyAgg('booleanField)) - assertSuccess(new SomeAgg('booleanField)) - assertSuccess(BoolOr('booleanField)) - assertSuccess(BoolAnd('booleanField)) assertError(Min('mapField), "min does not support ordering on type") assertError(Max('mapField), "max does not support ordering on type") diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index ed5ced8c8c0f..545aa238dd75 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -293,7 +293,7 @@ struct<> -- !query 31 SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0 -- !query 31 schema -struct +struct -- !query 31 output NULL NULL NULL NULL NULL @@ -301,7 +301,7 @@ NULL NULL NULL NULL NULL -- !query 32 SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4 -- !query 32 schema -struct +struct -- !query 32 output NULL NULL NULL NULL NULL @@ -309,7 +309,7 @@ NULL NULL NULL NULL NULL -- !query 33 SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5 -- !query 33 schema -struct +struct -- !query 33 output false true true false true @@ -317,7 +317,7 @@ false true true false true -- !query 34 SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k -- !query 34 schema -struct +struct -- !query 34 output 1 false true true false true 2 true true true true true @@ -389,7 +389,7 @@ SELECT some(1S) struct<> -- !query 40 output org.apache.spark.sql.AnalysisException -cannot resolve 'some(1S)' due to data type mismatch: Input to function 'some' should have been boolean, but it's [smallint].; line 1 pos 7 +cannot resolve 'any(1S)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [smallint].; line 1 pos 7 -- !query 41 @@ -416,7 +416,7 @@ SELECT bool_and(1.0) struct<> -- !query 43 output org.apache.spark.sql.AnalysisException -cannot resolve 'bool_and(1.0BD)' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [decimal(2,1)].; line 1 pos 7 +cannot resolve 'every(1.0BD)' due to data type mismatch: Input to function 'every' should have been boolean, but it's [decimal(2,1)].; line 1 pos 7 -- !query 44 @@ -425,7 +425,7 @@ SELECT bool_or(1.0D) struct<> -- !query 44 output org.apache.spark.sql.AnalysisException -cannot resolve 'bool_or(1.0D)' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [double].; line 1 pos 7 +cannot resolve 'any(1.0D)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [double].; line 1 pos 7 -- !query 45 @@ -448,7 +448,7 @@ struct +struct -- !query 46 output 1 false false 1 true true @@ -482,7 +482,7 @@ struct +struct -- !query 48 output 1 false false 1 true false @@ -499,7 +499,7 @@ struct +struct -- !query 49 output 1 false false 1 true true diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out index febe47b5ba84..b762d18fb839 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out @@ -293,7 +293,7 @@ struct<> -- !query 31 SELECT udf(every(v)), udf(some(v)), any(v) FROM test_agg WHERE 1 = 0 -- !query 31 schema -struct +struct -- !query 31 output NULL NULL NULL @@ -301,7 +301,7 @@ NULL NULL NULL -- !query 32 SELECT udf(every(udf(v))), some(v), any(v) FROM test_agg WHERE k = 4 -- !query 32 schema -struct +struct -- !query 32 output NULL NULL NULL @@ -309,7 +309,7 @@ NULL NULL NULL -- !query 33 SELECT every(v), udf(some(v)), any(v) FROM test_agg WHERE k = 5 -- !query 33 schema -struct +struct -- !query 33 output false true true @@ -317,7 +317,7 @@ false true true -- !query 34 SELECT udf(k), every(v), udf(some(v)), any(v) FROM test_agg GROUP BY udf(k) -- !query 34 schema -struct +struct -- !query 34 output 1 false true true 2 true true true @@ -389,7 +389,7 @@ SELECT some(udf(1S)) struct<> -- !query 40 output org.apache.spark.sql.AnalysisException -cannot resolve 'some(CAST(udf(cast(1 as string)) AS SMALLINT))' due to data type mismatch: Input to function 'some' should have been boolean, but it's [smallint].; line 1 pos 7 +cannot resolve 'any(CAST(udf(cast(1 as string)) AS SMALLINT))' due to data type mismatch: Input to function 'any' should have been boolean, but it's [smallint].; line 1 pos 7 -- !query 41 @@ -430,7 +430,7 @@ struct +struct -- !query 44 output 1 false false 1 true true From ce32134c334d873d10dcfda2103c86608953b1f6 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 16 Oct 2019 18:02:14 +0800 Subject: [PATCH 3/4] fix ut --- sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index 37183556d792..630cf22fb20d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -95,7 +95,7 @@ class ExplainSuite extends QueryTest with SharedSparkSession { // plan should show the rewritten aggregate expression. val df = sql("SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k") checkKeywordsExistsInExplain(df, - "Aggregate [k#x], [k#x, min(v#x) AS every(v)#x, max(v#x) AS some(v)#x, " + + "Aggregate [k#x], [k#x, min(v#x) AS every(v)#x, max(v#x) AS any(v)#x, " + "max(v#x) AS any(v)#x]") } } From 547c5240f1ee573d78559a16aefa9bfef37e2a65 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 16 Oct 2019 18:15:24 +0800 Subject: [PATCH 4/4] enable tests: aggregates_part2.sql#L116-L160 and udf-aggregates_part2.sql#L118-L162 --- .../inputs/postgreSQL/aggregates_part2.sql | 68 ++++----- .../udf/postgreSQL/udf-aggregates_part2.sql | 68 ++++----- .../postgreSQL/aggregates_part2.sql.out | 140 +++++++++++++----- .../postgreSQL/udf-aggregates_part2.sql.out | 140 +++++++++++++----- 4 files changed, 262 insertions(+), 154 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql index 47f9d2f37306..144a3bc2a38f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql @@ -114,50 +114,40 @@ SELECT NOT (FALSE OR FALSE) AS `t`; -- [SPARK-27880] Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY) --- CREATE TEMPORARY TABLE bool_test( --- b1 BOOL, --- b2 BOOL, --- b3 BOOL, --- b4 BOOL); +CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES + (TRUE, null, FALSE, null), + (FALSE, TRUE, null, null), + (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4); -- empty case --- SELECT --- BOOL_AND(b1) AS "n", --- BOOL_OR(b3) AS "n" --- FROM bool_test; - --- COPY bool_test FROM STDIN NULL 'null'; --- TRUE null FALSE null --- FALSE TRUE null null --- null TRUE FALSE null --- \. +SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0; --- SELECT --- BOOL_AND(b1) AS "f", --- BOOL_AND(b2) AS "t", --- BOOL_AND(b3) AS "f", --- BOOL_AND(b4) AS "n", --- BOOL_AND(NOT b2) AS "f", --- BOOL_AND(NOT b3) AS "t" --- FROM bool_test; +SELECT + BOOL_AND(b1) AS f1, + BOOL_AND(b2) AS t2, + BOOL_AND(b3) AS f3, + BOOL_AND(b4) AS n4, + BOOL_AND(NOT b2) AS f5, + BOOL_AND(NOT b3) AS t6 +FROM bool_test; --- SELECT --- EVERY(b1) AS "f", --- EVERY(b2) AS "t", --- EVERY(b3) AS "f", --- EVERY(b4) AS "n", --- EVERY(NOT b2) AS "f", --- EVERY(NOT b3) AS "t" --- FROM bool_test; +SELECT + EVERY(b1) AS f1, + EVERY(b2) AS t2, + EVERY(b3) AS f3, + EVERY(b4) AS n4, + EVERY(NOT b2) AS f5, + EVERY(NOT b3) AS t6 +FROM bool_test; --- SELECT --- BOOL_OR(b1) AS "t", --- BOOL_OR(b2) AS "t", --- BOOL_OR(b3) AS "f", --- BOOL_OR(b4) AS "n", --- BOOL_OR(NOT b2) AS "f", --- BOOL_OR(NOT b3) AS "t" --- FROM bool_test; +SELECT + BOOL_OR(b1) AS t1, + BOOL_OR(b2) AS t2, + BOOL_OR(b3) AS f3, + BOOL_OR(b4) AS n4, + BOOL_OR(NOT b2) AS f5, + BOOL_OR(NOT b3) AS t6 +FROM bool_test; -- -- Test cases that should be optimized into indexscans instead of diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql index a86bb0b47487..d82fcd90a22a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql @@ -116,50 +116,40 @@ SELECT NOT (FALSE OR FALSE) AS `t`; -- [SPARK-27880] Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY) --- CREATE TEMPORARY TABLE bool_test( --- b1 BOOL, --- b2 BOOL, --- b3 BOOL, --- b4 BOOL); +CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES + (TRUE, null, FALSE, null), + (FALSE, TRUE, null, null), + (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4); -- empty case --- SELECT --- BOOL_AND(b1) AS "n", --- BOOL_OR(b3) AS "n" --- FROM bool_test; - --- COPY bool_test FROM STDIN NULL 'null'; --- TRUE null FALSE null --- FALSE TRUE null null --- null TRUE FALSE null --- \. +SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0; --- SELECT --- BOOL_AND(b1) AS "f", --- BOOL_AND(b2) AS "t", --- BOOL_AND(b3) AS "f", --- BOOL_AND(b4) AS "n", --- BOOL_AND(NOT b2) AS "f", --- BOOL_AND(NOT b3) AS "t" --- FROM bool_test; +SELECT + BOOL_AND(b1) AS f1, + BOOL_AND(b2) AS t2, + BOOL_AND(b3) AS f3, + BOOL_AND(b4) AS n4, + BOOL_AND(NOT b2) AS f5, + BOOL_AND(NOT b3) AS t6 +FROM bool_test; --- SELECT --- EVERY(b1) AS "f", --- EVERY(b2) AS "t", --- EVERY(b3) AS "f", --- EVERY(b4) AS "n", --- EVERY(NOT b2) AS "f", --- EVERY(NOT b3) AS "t" --- FROM bool_test; +SELECT + EVERY(b1) AS f1, + EVERY(b2) AS t2, + EVERY(b3) AS f3, + EVERY(b4) AS n4, + EVERY(NOT b2) AS f5, + EVERY(NOT b3) AS t6 +FROM bool_test; --- SELECT --- BOOL_OR(b1) AS "t", --- BOOL_OR(b2) AS "t", --- BOOL_OR(b3) AS "f", --- BOOL_OR(b4) AS "n", --- BOOL_OR(NOT b2) AS "f", --- BOOL_OR(NOT b3) AS "t" --- FROM bool_test; +SELECT + BOOL_OR(b1) AS t1, + BOOL_OR(b2) AS t2, + BOOL_OR(b3) AS f3, + BOOL_OR(b4) AS n4, + BOOL_OR(NOT b2) AS f5, + BOOL_OR(NOT b3) AS t6 +FROM bool_test; -- -- Test cases that should be optimized into indexscans instead of diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out index 2b5371a65719..d5362809c804 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 16 +-- Number of queries: 21 -- !query 0 @@ -51,106 +51,170 @@ true false true false true true true true true -- !query 3 -select min(unique1) from tenk1 +CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES + (TRUE, null, FALSE, null), + (FALSE, TRUE, null, null), + (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4) -- !query 3 schema -struct +struct<> -- !query 3 output -0 + -- !query 4 -select max(unique1) from tenk1 +SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0 -- !query 4 schema -struct +struct -- !query 4 output -9999 +NULL NULL -- !query 5 -select max(unique1) from tenk1 where unique1 < 42 +SELECT + BOOL_AND(b1) AS f1, + BOOL_AND(b2) AS t2, + BOOL_AND(b3) AS f3, + BOOL_AND(b4) AS n4, + BOOL_AND(NOT b2) AS f5, + BOOL_AND(NOT b3) AS t6 +FROM bool_test -- !query 5 schema -struct +struct -- !query 5 output -41 +false true false NULL false true -- !query 6 -select max(unique1) from tenk1 where unique1 > 42 +SELECT + EVERY(b1) AS f1, + EVERY(b2) AS t2, + EVERY(b3) AS f3, + EVERY(b4) AS n4, + EVERY(NOT b2) AS f5, + EVERY(NOT b3) AS t6 +FROM bool_test -- !query 6 schema -struct +struct -- !query 6 output -9999 +false true false NULL false true -- !query 7 -select max(unique1) from tenk1 where unique1 > 42000 +SELECT + BOOL_OR(b1) AS t1, + BOOL_OR(b2) AS t2, + BOOL_OR(b3) AS f3, + BOOL_OR(b4) AS n4, + BOOL_OR(NOT b2) AS f5, + BOOL_OR(NOT b3) AS t6 +FROM bool_test -- !query 7 schema -struct +struct -- !query 7 output -NULL +true true false NULL false true -- !query 8 -select max(tenthous) from tenk1 where thousand = 33 +select min(unique1) from tenk1 -- !query 8 schema -struct +struct -- !query 8 output -9033 +0 -- !query 9 -select min(tenthous) from tenk1 where thousand = 33 +select max(unique1) from tenk1 -- !query 9 schema -struct +struct -- !query 9 output -33 +9999 -- !query 10 -select distinct max(unique2) from tenk1 +select max(unique1) from tenk1 where unique1 < 42 -- !query 10 schema -struct +struct -- !query 10 output -9999 +41 -- !query 11 -select max(unique2) from tenk1 order by 1 +select max(unique1) from tenk1 where unique1 > 42 -- !query 11 schema -struct +struct -- !query 11 output 9999 -- !query 12 -select max(unique2) from tenk1 order by max(unique2) +select max(unique1) from tenk1 where unique1 > 42000 -- !query 12 schema -struct +struct -- !query 12 output -9999 +NULL -- !query 13 -select max(unique2) from tenk1 order by max(unique2)+1 +select max(tenthous) from tenk1 where thousand = 33 -- !query 13 schema -struct +struct -- !query 13 output -9999 +9033 -- !query 14 -select t1.max_unique2, g from (select max(unique2) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc +select min(tenthous) from tenk1 where thousand = 33 -- !query 14 schema -struct +struct -- !query 14 output +33 + + +-- !query 15 +select distinct max(unique2) from tenk1 +-- !query 15 schema +struct +-- !query 15 output +9999 + + +-- !query 16 +select max(unique2) from tenk1 order by 1 +-- !query 16 schema +struct +-- !query 16 output +9999 + + +-- !query 17 +select max(unique2) from tenk1 order by max(unique2) +-- !query 17 schema +struct +-- !query 17 output +9999 + + +-- !query 18 +select max(unique2) from tenk1 order by max(unique2)+1 +-- !query 18 schema +struct +-- !query 18 output +9999 + + +-- !query 19 +select t1.max_unique2, g from (select max(unique2) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc +-- !query 19 schema +struct +-- !query 19 output 9999 3 9999 2 9999 1 --- !query 15 +-- !query 20 select max(100) from tenk1 --- !query 15 schema +-- !query 20 schema struct --- !query 15 output +-- !query 20 output 100 diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out index ad2f1bdf77d7..9efc7f65fdfc 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 16 +-- Number of queries: 21 -- !query 0 @@ -51,106 +51,170 @@ true false true false true true true true true -- !query 3 -select min(udf(unique1)) from tenk1 +CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES + (TRUE, null, FALSE, null), + (FALSE, TRUE, null, null), + (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4) -- !query 3 schema -struct +struct<> -- !query 3 output -0 + -- !query 4 -select udf(max(unique1)) from tenk1 +SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0 -- !query 4 schema -struct +struct -- !query 4 output -9999 +NULL NULL -- !query 5 -select max(unique1) from tenk1 where udf(unique1) < 42 +SELECT + BOOL_AND(b1) AS f1, + BOOL_AND(b2) AS t2, + BOOL_AND(b3) AS f3, + BOOL_AND(b4) AS n4, + BOOL_AND(NOT b2) AS f5, + BOOL_AND(NOT b3) AS t6 +FROM bool_test -- !query 5 schema -struct +struct -- !query 5 output -41 +false true false NULL false true -- !query 6 -select max(unique1) from tenk1 where unique1 > udf(42) +SELECT + EVERY(b1) AS f1, + EVERY(b2) AS t2, + EVERY(b3) AS f3, + EVERY(b4) AS n4, + EVERY(NOT b2) AS f5, + EVERY(NOT b3) AS t6 +FROM bool_test -- !query 6 schema -struct +struct -- !query 6 output -9999 +false true false NULL false true -- !query 7 -select max(unique1) from tenk1 where udf(unique1) > 42000 +SELECT + BOOL_OR(b1) AS t1, + BOOL_OR(b2) AS t2, + BOOL_OR(b3) AS f3, + BOOL_OR(b4) AS n4, + BOOL_OR(NOT b2) AS f5, + BOOL_OR(NOT b3) AS t6 +FROM bool_test -- !query 7 schema -struct +struct -- !query 7 output -NULL +true true false NULL false true -- !query 8 -select max(tenthous) from tenk1 where udf(thousand) = 33 +select min(udf(unique1)) from tenk1 -- !query 8 schema -struct +struct -- !query 8 output -9033 +0 -- !query 9 -select min(tenthous) from tenk1 where udf(thousand) = 33 +select udf(max(unique1)) from tenk1 -- !query 9 schema -struct +struct -- !query 9 output -33 +9999 -- !query 10 -select distinct max(udf(unique2)) from tenk1 +select max(unique1) from tenk1 where udf(unique1) < 42 -- !query 10 schema -struct +struct -- !query 10 output -9999 +41 -- !query 11 -select max(unique2) from tenk1 order by udf(1) +select max(unique1) from tenk1 where unique1 > udf(42) -- !query 11 schema -struct +struct -- !query 11 output 9999 -- !query 12 -select max(unique2) from tenk1 order by max(udf(unique2)) +select max(unique1) from tenk1 where udf(unique1) > 42000 -- !query 12 schema -struct +struct -- !query 12 output -9999 +NULL -- !query 13 -select udf(max(udf(unique2))) from tenk1 order by udf(max(unique2))+1 +select max(tenthous) from tenk1 where udf(thousand) = 33 -- !query 13 schema -struct +struct -- !query 13 output -9999 +9033 -- !query 14 -select t1.max_unique2, udf(g) from (select max(udf(unique2)) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc +select min(tenthous) from tenk1 where udf(thousand) = 33 -- !query 14 schema -struct +struct -- !query 14 output +33 + + +-- !query 15 +select distinct max(udf(unique2)) from tenk1 +-- !query 15 schema +struct +-- !query 15 output +9999 + + +-- !query 16 +select max(unique2) from tenk1 order by udf(1) +-- !query 16 schema +struct +-- !query 16 output +9999 + + +-- !query 17 +select max(unique2) from tenk1 order by max(udf(unique2)) +-- !query 17 schema +struct +-- !query 17 output +9999 + + +-- !query 18 +select udf(max(udf(unique2))) from tenk1 order by udf(max(unique2))+1 +-- !query 18 schema +struct +-- !query 18 output +9999 + + +-- !query 19 +select t1.max_unique2, udf(g) from (select max(udf(unique2)) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc +-- !query 19 schema +struct +-- !query 19 output 9999 3 9999 2 9999 1 --- !query 15 +-- !query 20 select udf(max(100)) from tenk1 --- !query 15 schema +-- !query 20 schema struct --- !query 15 output +-- !query 20 output 100