Skip to content

Commit 6d4cc7b

Browse files
yaooqinncloud-fan
authored andcommitted
[SPARK-27880][SQL] Add bool_and for every and bool_or for any as function aliases
### What changes were proposed in this pull request? bool_or(x) <=> any/some(x) <=> max(x) bool_and(x) <=> every(x) <=> min(x) Args: x: boolean ### Why are the changes needed? PostgreSQL, Presto and Vertica, etc also support this feature: ### Does this PR introduce any user-facing change? add new functions support ### How was this patch tested? add ut Closes #26126 from yaooqinn/SPARK-27880. Authored-by: Kent Yao <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent d11cbf2 commit 6d4cc7b

File tree

12 files changed

+382
-230
lines changed

12 files changed

+382
-230
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,10 @@ object FunctionRegistry {
314314
expression[CollectSet]("collect_set"),
315315
expression[CountMinSketchAgg]("count_min_sketch"),
316316
expression[EveryAgg]("every"),
317+
expression[EveryAgg]("bool_and"),
317318
expression[AnyAgg]("any"),
318-
expression[SomeAgg]("some"),
319+
expression[AnyAgg]("some"),
320+
expression[AnyAgg]("bool_or"),
319321

320322
// string functions
321323
expression[Ascii]("ascii"),

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -71,19 +71,3 @@ case class EveryAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
7171
case class AnyAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
7272
override def nodeName: String = "Any"
7373
}
74-
75-
@ExpressionDescription(
76-
usage = "_FUNC_(expr) - Returns true if at least one value of `expr` is true.",
77-
examples = """
78-
Examples:
79-
> SELECT _FUNC_(col) FROM VALUES (true), (false), (false) AS tab(col);
80-
true
81-
> SELECT _FUNC_(col) FROM VALUES (NULL), (true), (false) AS tab(col);
82-
true
83-
> SELECT _FUNC_(col) FROM VALUES (false), (false), (NULL) AS tab(col);
84-
false
85-
""",
86-
since = "3.0.0")
87-
case class SomeAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
88-
override def nodeName: String = "Some"
89-
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ object ReplaceExpressions extends Rule[LogicalPlan] {
4747
def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
4848
case e: RuntimeReplaceable => e.child
4949
case CountIf(predicate) => Count(new NullIf(predicate, Literal.FalseLiteral))
50-
case SomeAgg(arg) => Max(arg)
5150
case AnyAgg(arg) => Max(arg)
5251
case EveryAgg(arg) => Min(arg)
5352
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,6 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
146146
assertSuccess(Min('arrayField))
147147
assertSuccess(new EveryAgg('booleanField))
148148
assertSuccess(new AnyAgg('booleanField))
149-
assertSuccess(new SomeAgg('booleanField))
150149

151150
assertError(Min('mapField), "min does not support ordering on type")
152151
assertError(Max('mapField), "max does not support ordering on type")

sql/core/src/test/resources/sql-tests/inputs/group-by.sql

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,16 +90,16 @@ CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
9090
(5, null), (5, true), (5, false) AS test_agg(k, v);
9191

9292
-- empty table
93-
SELECT every(v), some(v), any(v) FROM test_agg WHERE 1 = 0;
93+
SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0;
9494

9595
-- all null values
96-
SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 4;
96+
SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4;
9797

9898
-- aggregates are null Filtering
99-
SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 5;
99+
SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5;
100100

101101
-- group by
102-
SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k;
102+
SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k;
103103

104104
-- having
105105
SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) = false;
@@ -137,10 +137,18 @@ SELECT any(1L);
137137
-- input type checking String
138138
SELECT every("true");
139139

140-
-- every/some/any aggregates are supported as windows expression.
140+
-- input type checking Decimal
141+
SELECT bool_and(1.0);
142+
143+
-- input type checking double
144+
SELECT bool_or(1.0D);
145+
146+
-- every/some/any aggregates/bool_and/bool_or are supported as windows expression.
141147
SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
142148
SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
143149
SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
150+
SELECT k, v, bool_and(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
151+
SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
144152

145153
-- Having referencing aggregate expressions is ok.
146154
SELECT count(*) FROM test_agg HAVING count(*) > 1L;

sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql

Lines changed: 29 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -114,50 +114,40 @@ SELECT
114114
NOT (FALSE OR FALSE) AS `t`;
115115

116116
-- [SPARK-27880] Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY)
117-
-- CREATE TEMPORARY TABLE bool_test(
118-
-- b1 BOOL,
119-
-- b2 BOOL,
120-
-- b3 BOOL,
121-
-- b4 BOOL);
117+
CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES
118+
(TRUE, null, FALSE, null),
119+
(FALSE, TRUE, null, null),
120+
(null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4);
122121

123122
-- empty case
124-
-- SELECT
125-
-- BOOL_AND(b1) AS "n",
126-
-- BOOL_OR(b3) AS "n"
127-
-- FROM bool_test;
128-
129-
-- COPY bool_test FROM STDIN NULL 'null';
130-
-- TRUE null FALSE null
131-
-- FALSE TRUE null null
132-
-- null TRUE FALSE null
133-
-- \.
123+
SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0;
134124

135-
-- SELECT
136-
-- BOOL_AND(b1) AS "f",
137-
-- BOOL_AND(b2) AS "t",
138-
-- BOOL_AND(b3) AS "f",
139-
-- BOOL_AND(b4) AS "n",
140-
-- BOOL_AND(NOT b2) AS "f",
141-
-- BOOL_AND(NOT b3) AS "t"
142-
-- FROM bool_test;
125+
SELECT
126+
BOOL_AND(b1) AS f1,
127+
BOOL_AND(b2) AS t2,
128+
BOOL_AND(b3) AS f3,
129+
BOOL_AND(b4) AS n4,
130+
BOOL_AND(NOT b2) AS f5,
131+
BOOL_AND(NOT b3) AS t6
132+
FROM bool_test;
143133

144-
-- SELECT
145-
-- EVERY(b1) AS "f",
146-
-- EVERY(b2) AS "t",
147-
-- EVERY(b3) AS "f",
148-
-- EVERY(b4) AS "n",
149-
-- EVERY(NOT b2) AS "f",
150-
-- EVERY(NOT b3) AS "t"
151-
-- FROM bool_test;
134+
SELECT
135+
EVERY(b1) AS f1,
136+
EVERY(b2) AS t2,
137+
EVERY(b3) AS f3,
138+
EVERY(b4) AS n4,
139+
EVERY(NOT b2) AS f5,
140+
EVERY(NOT b3) AS t6
141+
FROM bool_test;
152142

153-
-- SELECT
154-
-- BOOL_OR(b1) AS "t",
155-
-- BOOL_OR(b2) AS "t",
156-
-- BOOL_OR(b3) AS "f",
157-
-- BOOL_OR(b4) AS "n",
158-
-- BOOL_OR(NOT b2) AS "f",
159-
-- BOOL_OR(NOT b3) AS "t"
160-
-- FROM bool_test;
143+
SELECT
144+
BOOL_OR(b1) AS t1,
145+
BOOL_OR(b2) AS t2,
146+
BOOL_OR(b3) AS f3,
147+
BOOL_OR(b4) AS n4,
148+
BOOL_OR(NOT b2) AS f5,
149+
BOOL_OR(NOT b3) AS t6
150+
FROM bool_test;
161151

162152
--
163153
-- Test cases that should be optimized into indexscans instead of

sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql

Lines changed: 29 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -116,50 +116,40 @@ SELECT
116116
NOT (FALSE OR FALSE) AS `t`;
117117

118118
-- [SPARK-27880] Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY)
119-
-- CREATE TEMPORARY TABLE bool_test(
120-
-- b1 BOOL,
121-
-- b2 BOOL,
122-
-- b3 BOOL,
123-
-- b4 BOOL);
119+
CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES
120+
(TRUE, null, FALSE, null),
121+
(FALSE, TRUE, null, null),
122+
(null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4);
124123

125124
-- empty case
126-
-- SELECT
127-
-- BOOL_AND(b1) AS "n",
128-
-- BOOL_OR(b3) AS "n"
129-
-- FROM bool_test;
130-
131-
-- COPY bool_test FROM STDIN NULL 'null';
132-
-- TRUE null FALSE null
133-
-- FALSE TRUE null null
134-
-- null TRUE FALSE null
135-
-- \.
125+
SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0;
136126

137-
-- SELECT
138-
-- BOOL_AND(b1) AS "f",
139-
-- BOOL_AND(b2) AS "t",
140-
-- BOOL_AND(b3) AS "f",
141-
-- BOOL_AND(b4) AS "n",
142-
-- BOOL_AND(NOT b2) AS "f",
143-
-- BOOL_AND(NOT b3) AS "t"
144-
-- FROM bool_test;
127+
SELECT
128+
BOOL_AND(b1) AS f1,
129+
BOOL_AND(b2) AS t2,
130+
BOOL_AND(b3) AS f3,
131+
BOOL_AND(b4) AS n4,
132+
BOOL_AND(NOT b2) AS f5,
133+
BOOL_AND(NOT b3) AS t6
134+
FROM bool_test;
145135

146-
-- SELECT
147-
-- EVERY(b1) AS "f",
148-
-- EVERY(b2) AS "t",
149-
-- EVERY(b3) AS "f",
150-
-- EVERY(b4) AS "n",
151-
-- EVERY(NOT b2) AS "f",
152-
-- EVERY(NOT b3) AS "t"
153-
-- FROM bool_test;
136+
SELECT
137+
EVERY(b1) AS f1,
138+
EVERY(b2) AS t2,
139+
EVERY(b3) AS f3,
140+
EVERY(b4) AS n4,
141+
EVERY(NOT b2) AS f5,
142+
EVERY(NOT b3) AS t6
143+
FROM bool_test;
154144

155-
-- SELECT
156-
-- BOOL_OR(b1) AS "t",
157-
-- BOOL_OR(b2) AS "t",
158-
-- BOOL_OR(b3) AS "f",
159-
-- BOOL_OR(b4) AS "n",
160-
-- BOOL_OR(NOT b2) AS "f",
161-
-- BOOL_OR(NOT b3) AS "t"
162-
-- FROM bool_test;
145+
SELECT
146+
BOOL_OR(b1) AS t1,
147+
BOOL_OR(b2) AS t2,
148+
BOOL_OR(b3) AS f3,
149+
BOOL_OR(b4) AS n4,
150+
BOOL_OR(NOT b2) AS f5,
151+
BOOL_OR(NOT b3) AS t6
152+
FROM bool_test;
163153

164154
--
165155
-- Test cases that should be optimized into indexscans instead of

0 commit comments

Comments
 (0)