diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index d5728b902757..8cf0928c2afd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -314,8 +314,10 @@ object FunctionRegistry { expression[CollectSet]("collect_set"), expression[CountMinSketchAgg]("count_min_sketch"), expression[EveryAgg]("every"), + expression[EveryAgg]("bool_and"), expression[AnyAgg]("any"), - expression[SomeAgg]("some"), + expression[AnyAgg]("some"), + expression[AnyAgg]("bool_or"), // string functions expression[Ascii]("ascii"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala index 4562fbcff5f3..a8220ec641d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala @@ -71,19 +71,3 @@ case class EveryAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) { case class AnyAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) { override def nodeName: String = "Any" } - -@ExpressionDescription( - usage = "_FUNC_(expr) - Returns true if at least one value of `expr` is true.", - examples = """ - Examples: - > SELECT _FUNC_(col) FROM VALUES (true), (false), (false) AS tab(col); - true - > SELECT _FUNC_(col) FROM VALUES (NULL), (true), (false) AS tab(col); - true - > SELECT _FUNC_(col) FROM VALUES (false), (false), (NULL) AS tab(col); - false - """, - since = "3.0.0") -case class SomeAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) { - override def nodeName: String = "Some" -} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index 9d7564175314..70277526cba8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -47,7 +47,6 @@ object ReplaceExpressions extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { case e: RuntimeReplaceable => e.child case CountIf(predicate) => Count(new NullIf(predicate, Literal.FalseLiteral)) - case SomeAgg(arg) => Max(arg) case AnyAgg(arg) => Max(arg) case EveryAgg(arg) => Min(arg) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala index 4440ac9e281c..ed11bce5d12b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala @@ -146,7 +146,6 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite { assertSuccess(Min('arrayField)) assertSuccess(new EveryAgg('booleanField)) assertSuccess(new AnyAgg('booleanField)) - assertSuccess(new SomeAgg('booleanField)) assertError(Min('mapField), "min does not support ordering on type") assertError(Max('mapField), "max does not support ordering on type") diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index 66bc90914e0d..fcde225676cb 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -90,16 +90,16 @@ CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES (5, null), (5, true), (5, false) AS test_agg(k, v); -- empty table -SELECT every(v), some(v), any(v) FROM test_agg WHERE 1 = 0; +SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0; -- all null values -SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 4; +SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4; -- aggregates are null Filtering -SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 5; +SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5; -- group by -SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k; +SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k; -- having SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) = false; @@ -137,10 +137,18 @@ SELECT any(1L); -- input type checking String SELECT every("true"); --- every/some/any aggregates are supported as windows expression. +-- input type checking Decimal +SELECT bool_and(1.0); + +-- input type checking double +SELECT bool_or(1.0D); + +-- every/some/any aggregates/bool_and/bool_or are supported as windows expression. SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg; SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg; SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg; +SELECT k, v, bool_and(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg; +SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg; -- Having referencing aggregate expressions is ok. SELECT count(*) FROM test_agg HAVING count(*) > 1L; diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql index 47f9d2f37306..144a3bc2a38f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql @@ -114,50 +114,40 @@ SELECT NOT (FALSE OR FALSE) AS `t`; -- [SPARK-27880] Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY) --- CREATE TEMPORARY TABLE bool_test( --- b1 BOOL, --- b2 BOOL, --- b3 BOOL, --- b4 BOOL); +CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES + (TRUE, null, FALSE, null), + (FALSE, TRUE, null, null), + (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4); -- empty case --- SELECT --- BOOL_AND(b1) AS "n", --- BOOL_OR(b3) AS "n" --- FROM bool_test; - --- COPY bool_test FROM STDIN NULL 'null'; --- TRUE null FALSE null --- FALSE TRUE null null --- null TRUE FALSE null --- \. +SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0; --- SELECT --- BOOL_AND(b1) AS "f", --- BOOL_AND(b2) AS "t", --- BOOL_AND(b3) AS "f", --- BOOL_AND(b4) AS "n", --- BOOL_AND(NOT b2) AS "f", --- BOOL_AND(NOT b3) AS "t" --- FROM bool_test; +SELECT + BOOL_AND(b1) AS f1, + BOOL_AND(b2) AS t2, + BOOL_AND(b3) AS f3, + BOOL_AND(b4) AS n4, + BOOL_AND(NOT b2) AS f5, + BOOL_AND(NOT b3) AS t6 +FROM bool_test; --- SELECT --- EVERY(b1) AS "f", --- EVERY(b2) AS "t", --- EVERY(b3) AS "f", --- EVERY(b4) AS "n", --- EVERY(NOT b2) AS "f", --- EVERY(NOT b3) AS "t" --- FROM bool_test; +SELECT + EVERY(b1) AS f1, + EVERY(b2) AS t2, + EVERY(b3) AS f3, + EVERY(b4) AS n4, + EVERY(NOT b2) AS f5, + EVERY(NOT b3) AS t6 +FROM bool_test; --- SELECT --- BOOL_OR(b1) AS "t", --- BOOL_OR(b2) AS "t", --- BOOL_OR(b3) AS "f", --- BOOL_OR(b4) AS "n", --- BOOL_OR(NOT b2) AS "f", --- BOOL_OR(NOT b3) AS "t" --- FROM bool_test; +SELECT + BOOL_OR(b1) AS t1, + BOOL_OR(b2) AS t2, + BOOL_OR(b3) AS f3, + BOOL_OR(b4) AS n4, + BOOL_OR(NOT b2) AS f5, + BOOL_OR(NOT b3) AS t6 +FROM bool_test; -- -- Test cases that should be optimized into indexscans instead of diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql index a86bb0b47487..d82fcd90a22a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql @@ -116,50 +116,40 @@ SELECT NOT (FALSE OR FALSE) AS `t`; -- [SPARK-27880] Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY) --- CREATE TEMPORARY TABLE bool_test( --- b1 BOOL, --- b2 BOOL, --- b3 BOOL, --- b4 BOOL); +CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES + (TRUE, null, FALSE, null), + (FALSE, TRUE, null, null), + (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4); -- empty case --- SELECT --- BOOL_AND(b1) AS "n", --- BOOL_OR(b3) AS "n" --- FROM bool_test; - --- COPY bool_test FROM STDIN NULL 'null'; --- TRUE null FALSE null --- FALSE TRUE null null --- null TRUE FALSE null --- \. +SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0; --- SELECT --- BOOL_AND(b1) AS "f", --- BOOL_AND(b2) AS "t", --- BOOL_AND(b3) AS "f", --- BOOL_AND(b4) AS "n", --- BOOL_AND(NOT b2) AS "f", --- BOOL_AND(NOT b3) AS "t" --- FROM bool_test; +SELECT + BOOL_AND(b1) AS f1, + BOOL_AND(b2) AS t2, + BOOL_AND(b3) AS f3, + BOOL_AND(b4) AS n4, + BOOL_AND(NOT b2) AS f5, + BOOL_AND(NOT b3) AS t6 +FROM bool_test; --- SELECT --- EVERY(b1) AS "f", --- EVERY(b2) AS "t", --- EVERY(b3) AS "f", --- EVERY(b4) AS "n", --- EVERY(NOT b2) AS "f", --- EVERY(NOT b3) AS "t" --- FROM bool_test; +SELECT + EVERY(b1) AS f1, + EVERY(b2) AS t2, + EVERY(b3) AS f3, + EVERY(b4) AS n4, + EVERY(NOT b2) AS f5, + EVERY(NOT b3) AS t6 +FROM bool_test; --- SELECT --- BOOL_OR(b1) AS "t", --- BOOL_OR(b2) AS "t", --- BOOL_OR(b3) AS "f", --- BOOL_OR(b4) AS "n", --- BOOL_OR(NOT b2) AS "f", --- BOOL_OR(NOT b3) AS "t" --- FROM bool_test; +SELECT + BOOL_OR(b1) AS t1, + BOOL_OR(b2) AS t2, + BOOL_OR(b3) AS f3, + BOOL_OR(b4) AS n4, + BOOL_OR(NOT b2) AS f5, + BOOL_OR(NOT b3) AS t6 +FROM bool_test; -- -- Test cases that should be optimized into indexscans instead of diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 3a5df254f2cd..545aa238dd75 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 52 +-- Number of queries: 56 -- !query 0 @@ -291,39 +291,39 @@ struct<> -- !query 31 -SELECT every(v), some(v), any(v) FROM test_agg WHERE 1 = 0 +SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0 -- !query 31 schema -struct +struct -- !query 31 output -NULL NULL NULL +NULL NULL NULL NULL NULL -- !query 32 -SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 4 +SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4 -- !query 32 schema -struct +struct -- !query 32 output -NULL NULL NULL +NULL NULL NULL NULL NULL -- !query 33 -SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 5 +SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5 -- !query 33 schema -struct +struct -- !query 33 output -false true true +false true true false true -- !query 34 -SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k +SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k -- !query 34 schema -struct +struct -- !query 34 output -1 false true true -2 true true true -3 false false false -4 NULL NULL NULL -5 false true true +1 false true true false true +2 true true true true true +3 false false false false false +4 NULL NULL NULL NULL NULL +5 false true true false true -- !query 35 @@ -389,7 +389,7 @@ SELECT some(1S) struct<> -- !query 40 output org.apache.spark.sql.AnalysisException -cannot resolve 'some(1S)' due to data type mismatch: Input to function 'some' should have been boolean, but it's [smallint].; line 1 pos 7 +cannot resolve 'any(1S)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [smallint].; line 1 pos 7 -- !query 41 @@ -411,10 +411,28 @@ cannot resolve 'every('true')' due to data type mismatch: Input to function 'eve -- !query 43 -SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg +SELECT bool_and(1.0) -- !query 43 schema -struct +struct<> -- !query 43 output +org.apache.spark.sql.AnalysisException +cannot resolve 'every(1.0BD)' due to data type mismatch: Input to function 'every' should have been boolean, but it's [decimal(2,1)].; line 1 pos 7 + + +-- !query 44 +SELECT bool_or(1.0D) +-- !query 44 schema +struct<> +-- !query 44 output +org.apache.spark.sql.AnalysisException +cannot resolve 'any(1.0D)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [double].; line 1 pos 7 + + +-- !query 45 +SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg +-- !query 45 schema +struct +-- !query 45 output 1 false false 1 true false 2 true true @@ -427,11 +445,11 @@ struct --- !query 44 output +-- !query 46 schema +struct +-- !query 46 output 1 false false 1 true true 2 true true @@ -444,11 +462,11 @@ struct --- !query 45 output +-- !query 47 output 1 false false 1 true true 2 true true @@ -461,37 +479,71 @@ struct +-- !query 48 output +1 false false +1 true false +2 true true +3 NULL NULL +3 false false +4 NULL NULL +4 NULL NULL +5 NULL NULL +5 false false +5 true false + + +-- !query 49 +SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg +-- !query 49 schema +struct +-- !query 49 output +1 false false +1 true true +2 true true +3 NULL NULL +3 false false +4 NULL NULL +4 NULL NULL +5 NULL NULL +5 false false +5 true true + + +-- !query 50 SELECT count(*) FROM test_agg HAVING count(*) > 1L --- !query 46 schema +-- !query 50 schema struct --- !query 46 output +-- !query 50 output 10 --- !query 47 +-- !query 51 SELECT k, max(v) FROM test_agg GROUP BY k HAVING max(v) = true --- !query 47 schema +-- !query 51 schema struct --- !query 47 output +-- !query 51 output 1 true 2 true 5 true --- !query 48 +-- !query 52 SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L --- !query 48 schema +-- !query 52 schema struct --- !query 48 output +-- !query 52 output 10 --- !query 49 +-- !query 53 SELECT count(*) FROM test_agg WHERE count(*) > 1L --- !query 49 schema +-- !query 53 schema struct<> --- !query 49 output +-- !query 53 output org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. @@ -499,11 +551,11 @@ Expression in where clause: [(count(1) > 1L)] Invalid expressions: [count(1)]; --- !query 50 +-- !query 54 SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L --- !query 50 schema +-- !query 54 schema struct<> --- !query 50 output +-- !query 54 output org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. @@ -511,11 +563,11 @@ Expression in where clause: [((count(1) + 1L) > 1L)] Invalid expressions: [count(1)]; --- !query 51 +-- !query 55 SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1 --- !query 51 schema +-- !query 55 schema struct<> --- !query 51 output +-- !query 55 output org.apache.spark.sql.AnalysisException Aggregate/Window/Generate expressions are not valid in where clause of the query. diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out index 2b5371a65719..d5362809c804 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 16 +-- Number of queries: 21 -- !query 0 @@ -51,106 +51,170 @@ true false true false true true true true true -- !query 3 -select min(unique1) from tenk1 +CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES + (TRUE, null, FALSE, null), + (FALSE, TRUE, null, null), + (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4) -- !query 3 schema -struct +struct<> -- !query 3 output -0 + -- !query 4 -select max(unique1) from tenk1 +SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0 -- !query 4 schema -struct +struct -- !query 4 output -9999 +NULL NULL -- !query 5 -select max(unique1) from tenk1 where unique1 < 42 +SELECT + BOOL_AND(b1) AS f1, + BOOL_AND(b2) AS t2, + BOOL_AND(b3) AS f3, + BOOL_AND(b4) AS n4, + BOOL_AND(NOT b2) AS f5, + BOOL_AND(NOT b3) AS t6 +FROM bool_test -- !query 5 schema -struct +struct -- !query 5 output -41 +false true false NULL false true -- !query 6 -select max(unique1) from tenk1 where unique1 > 42 +SELECT + EVERY(b1) AS f1, + EVERY(b2) AS t2, + EVERY(b3) AS f3, + EVERY(b4) AS n4, + EVERY(NOT b2) AS f5, + EVERY(NOT b3) AS t6 +FROM bool_test -- !query 6 schema -struct +struct -- !query 6 output -9999 +false true false NULL false true -- !query 7 -select max(unique1) from tenk1 where unique1 > 42000 +SELECT + BOOL_OR(b1) AS t1, + BOOL_OR(b2) AS t2, + BOOL_OR(b3) AS f3, + BOOL_OR(b4) AS n4, + BOOL_OR(NOT b2) AS f5, + BOOL_OR(NOT b3) AS t6 +FROM bool_test -- !query 7 schema -struct +struct -- !query 7 output -NULL +true true false NULL false true -- !query 8 -select max(tenthous) from tenk1 where thousand = 33 +select min(unique1) from tenk1 -- !query 8 schema -struct +struct -- !query 8 output -9033 +0 -- !query 9 -select min(tenthous) from tenk1 where thousand = 33 +select max(unique1) from tenk1 -- !query 9 schema -struct +struct -- !query 9 output -33 +9999 -- !query 10 -select distinct max(unique2) from tenk1 +select max(unique1) from tenk1 where unique1 < 42 -- !query 10 schema -struct +struct -- !query 10 output -9999 +41 -- !query 11 -select max(unique2) from tenk1 order by 1 +select max(unique1) from tenk1 where unique1 > 42 -- !query 11 schema -struct +struct -- !query 11 output 9999 -- !query 12 -select max(unique2) from tenk1 order by max(unique2) +select max(unique1) from tenk1 where unique1 > 42000 -- !query 12 schema -struct +struct -- !query 12 output -9999 +NULL -- !query 13 -select max(unique2) from tenk1 order by max(unique2)+1 +select max(tenthous) from tenk1 where thousand = 33 -- !query 13 schema -struct +struct -- !query 13 output -9999 +9033 -- !query 14 -select t1.max_unique2, g from (select max(unique2) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc +select min(tenthous) from tenk1 where thousand = 33 -- !query 14 schema -struct +struct -- !query 14 output +33 + + +-- !query 15 +select distinct max(unique2) from tenk1 +-- !query 15 schema +struct +-- !query 15 output +9999 + + +-- !query 16 +select max(unique2) from tenk1 order by 1 +-- !query 16 schema +struct +-- !query 16 output +9999 + + +-- !query 17 +select max(unique2) from tenk1 order by max(unique2) +-- !query 17 schema +struct +-- !query 17 output +9999 + + +-- !query 18 +select max(unique2) from tenk1 order by max(unique2)+1 +-- !query 18 schema +struct +-- !query 18 output +9999 + + +-- !query 19 +select t1.max_unique2, g from (select max(unique2) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc +-- !query 19 schema +struct +-- !query 19 output 9999 3 9999 2 9999 1 --- !query 15 +-- !query 20 select max(100) from tenk1 --- !query 15 schema +-- !query 20 schema struct --- !query 15 output +-- !query 20 output 100 diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out index ad2f1bdf77d7..9efc7f65fdfc 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 16 +-- Number of queries: 21 -- !query 0 @@ -51,106 +51,170 @@ true false true false true true true true true -- !query 3 -select min(udf(unique1)) from tenk1 +CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES + (TRUE, null, FALSE, null), + (FALSE, TRUE, null, null), + (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4) -- !query 3 schema -struct +struct<> -- !query 3 output -0 + -- !query 4 -select udf(max(unique1)) from tenk1 +SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0 -- !query 4 schema -struct +struct -- !query 4 output -9999 +NULL NULL -- !query 5 -select max(unique1) from tenk1 where udf(unique1) < 42 +SELECT + BOOL_AND(b1) AS f1, + BOOL_AND(b2) AS t2, + BOOL_AND(b3) AS f3, + BOOL_AND(b4) AS n4, + BOOL_AND(NOT b2) AS f5, + BOOL_AND(NOT b3) AS t6 +FROM bool_test -- !query 5 schema -struct +struct -- !query 5 output -41 +false true false NULL false true -- !query 6 -select max(unique1) from tenk1 where unique1 > udf(42) +SELECT + EVERY(b1) AS f1, + EVERY(b2) AS t2, + EVERY(b3) AS f3, + EVERY(b4) AS n4, + EVERY(NOT b2) AS f5, + EVERY(NOT b3) AS t6 +FROM bool_test -- !query 6 schema -struct +struct -- !query 6 output -9999 +false true false NULL false true -- !query 7 -select max(unique1) from tenk1 where udf(unique1) > 42000 +SELECT + BOOL_OR(b1) AS t1, + BOOL_OR(b2) AS t2, + BOOL_OR(b3) AS f3, + BOOL_OR(b4) AS n4, + BOOL_OR(NOT b2) AS f5, + BOOL_OR(NOT b3) AS t6 +FROM bool_test -- !query 7 schema -struct +struct -- !query 7 output -NULL +true true false NULL false true -- !query 8 -select max(tenthous) from tenk1 where udf(thousand) = 33 +select min(udf(unique1)) from tenk1 -- !query 8 schema -struct +struct -- !query 8 output -9033 +0 -- !query 9 -select min(tenthous) from tenk1 where udf(thousand) = 33 +select udf(max(unique1)) from tenk1 -- !query 9 schema -struct +struct -- !query 9 output -33 +9999 -- !query 10 -select distinct max(udf(unique2)) from tenk1 +select max(unique1) from tenk1 where udf(unique1) < 42 -- !query 10 schema -struct +struct -- !query 10 output -9999 +41 -- !query 11 -select max(unique2) from tenk1 order by udf(1) +select max(unique1) from tenk1 where unique1 > udf(42) -- !query 11 schema -struct +struct -- !query 11 output 9999 -- !query 12 -select max(unique2) from tenk1 order by max(udf(unique2)) +select max(unique1) from tenk1 where udf(unique1) > 42000 -- !query 12 schema -struct +struct -- !query 12 output -9999 +NULL -- !query 13 -select udf(max(udf(unique2))) from tenk1 order by udf(max(unique2))+1 +select max(tenthous) from tenk1 where udf(thousand) = 33 -- !query 13 schema -struct +struct -- !query 13 output -9999 +9033 -- !query 14 -select t1.max_unique2, udf(g) from (select max(udf(unique2)) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc +select min(tenthous) from tenk1 where udf(thousand) = 33 -- !query 14 schema -struct +struct -- !query 14 output +33 + + +-- !query 15 +select distinct max(udf(unique2)) from tenk1 +-- !query 15 schema +struct +-- !query 15 output +9999 + + +-- !query 16 +select max(unique2) from tenk1 order by udf(1) +-- !query 16 schema +struct +-- !query 16 output +9999 + + +-- !query 17 +select max(unique2) from tenk1 order by max(udf(unique2)) +-- !query 17 schema +struct +-- !query 17 output +9999 + + +-- !query 18 +select udf(max(udf(unique2))) from tenk1 order by udf(max(unique2))+1 +-- !query 18 schema +struct +-- !query 18 output +9999 + + +-- !query 19 +select t1.max_unique2, udf(g) from (select max(udf(unique2)) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc +-- !query 19 schema +struct +-- !query 19 output 9999 3 9999 2 9999 1 --- !query 15 +-- !query 20 select udf(max(100)) from tenk1 --- !query 15 schema +-- !query 20 schema struct --- !query 15 output +-- !query 20 output 100 diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out index febe47b5ba84..b762d18fb839 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out @@ -293,7 +293,7 @@ struct<> -- !query 31 SELECT udf(every(v)), udf(some(v)), any(v) FROM test_agg WHERE 1 = 0 -- !query 31 schema -struct +struct -- !query 31 output NULL NULL NULL @@ -301,7 +301,7 @@ NULL NULL NULL -- !query 32 SELECT udf(every(udf(v))), some(v), any(v) FROM test_agg WHERE k = 4 -- !query 32 schema -struct +struct -- !query 32 output NULL NULL NULL @@ -309,7 +309,7 @@ NULL NULL NULL -- !query 33 SELECT every(v), udf(some(v)), any(v) FROM test_agg WHERE k = 5 -- !query 33 schema -struct +struct -- !query 33 output false true true @@ -317,7 +317,7 @@ false true true -- !query 34 SELECT udf(k), every(v), udf(some(v)), any(v) FROM test_agg GROUP BY udf(k) -- !query 34 schema -struct +struct -- !query 34 output 1 false true true 2 true true true @@ -389,7 +389,7 @@ SELECT some(udf(1S)) struct<> -- !query 40 output org.apache.spark.sql.AnalysisException -cannot resolve 'some(CAST(udf(cast(1 as string)) AS SMALLINT))' due to data type mismatch: Input to function 'some' should have been boolean, but it's [smallint].; line 1 pos 7 +cannot resolve 'any(CAST(udf(cast(1 as string)) AS SMALLINT))' due to data type mismatch: Input to function 'any' should have been boolean, but it's [smallint].; line 1 pos 7 -- !query 41 @@ -430,7 +430,7 @@ struct +struct -- !query 44 output 1 false false 1 true true diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index 37183556d792..630cf22fb20d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -95,7 +95,7 @@ class ExplainSuite extends QueryTest with SharedSparkSession { // plan should show the rewritten aggregate expression. val df = sql("SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k") checkKeywordsExistsInExplain(df, - "Aggregate [k#x], [k#x, min(v#x) AS every(v)#x, max(v#x) AS some(v)#x, " + + "Aggregate [k#x], [k#x, min(v#x) AS every(v)#x, max(v#x) AS any(v)#x, " + "max(v#x) AS any(v)#x]") } }