Skip to content

Commit 0cf4f07

Browse files
yaooqinnmaropu
authored andcommitted
[SPARK-29545][SQL] Add support for bit_xor aggregate function
### What changes were proposed in this pull request? bit_xor(expr) - Returns the bitwise XOR of all non-null input values, or null if none ### Why are the changes needed? As we support `bit_and`, `bit_or` now, we'd better support the related aggregate function **bit_xor** ahead of postgreSQL, because many other popular databases support it. http://infocenter.sybase.com/help/index.jsp?topic=/com.sybase.help.sqlanywhere.12.0.1/dbreference/bit-xor-function.html https://dev.mysql.com/doc/refman/5.7/en/group-by-functions.html#function_bit-or https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/Aggregate/BIT_XOR.htm?TocPath=SQL%20Reference%20Manual%7CSQL%20Functions%7CAggregate%20Functions%7C_____10 ### Does this PR introduce any user-facing change? add a new bit agg ### How was this patch tested? UTs added Closes apache#26205 from yaooqinn/SPARK-29545. Authored-by: Kent Yao <[email protected]> Signed-off-by: Takeshi Yamamuro <[email protected]>
1 parent 8bd8f49 commit 0cf4f07

File tree

4 files changed

+151
-44
lines changed

4 files changed

+151
-44
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ object FunctionRegistry {
527527
expression[BitwiseCount]("bit_count"),
528528
expression[BitAndAgg]("bit_and"),
529529
expression[BitOrAgg]("bit_or"),
530+
expression[BitXorAgg]("bit_xor"),
530531

531532
// json
532533
expression[StructsToJson]("to_json"),

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala

Lines changed: 49 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,14 @@
1717

1818
package org.apache.spark.sql.catalyst.expressions.aggregate
1919

20-
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BitwiseAnd, BitwiseOr, ExpectsInputTypes, Expression, ExpressionDescription, If, IsNull, Literal}
20+
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryArithmetic, BitwiseAnd, BitwiseOr, BitwiseXor, ExpectsInputTypes, Expression, ExpressionDescription, If, IsNull, Literal}
2121
import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegralType}
2222

23-
@ExpressionDescription(
24-
usage = "_FUNC_(expr) - Returns the bitwise AND of all non-null input values, or null if none.",
25-
examples = """
26-
Examples:
27-
> SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col);
28-
1
29-
""",
30-
since = "3.0.0")
31-
case class BitAndAgg(child: Expression) extends DeclarativeAggregate with ExpectsInputTypes {
23+
abstract class BitAggregate extends DeclarativeAggregate with ExpectsInputTypes {
3224

33-
override def nodeName: String = "bit_and"
25+
val child: Expression
26+
27+
def bitOperator(left: Expression, right: Expression): BinaryArithmetic
3428

3529
override def children: Seq[Expression] = child :: Nil
3630

@@ -40,23 +34,40 @@ case class BitAndAgg(child: Expression) extends DeclarativeAggregate with Expect
4034

4135
override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType)
4236

43-
private lazy val bitAnd = AttributeReference("bit_and", child.dataType)()
44-
45-
override lazy val aggBufferAttributes: Seq[AttributeReference] = bitAnd :: Nil
37+
private lazy val bitAgg = AttributeReference(nodeName, child.dataType)()
4638

4739
override lazy val initialValues: Seq[Literal] = Literal.create(null, dataType) :: Nil
4840

41+
override lazy val aggBufferAttributes: Seq[AttributeReference] = bitAgg :: Nil
42+
43+
override lazy val evaluateExpression: AttributeReference = bitAgg
44+
4945
override lazy val updateExpressions: Seq[Expression] =
50-
If(IsNull(bitAnd),
46+
If(IsNull(bitAgg),
5147
child,
52-
If(IsNull(child), bitAnd, BitwiseAnd(bitAnd, child))) :: Nil
48+
If(IsNull(child), bitAgg, bitOperator(bitAgg, child))) :: Nil
5349

5450
override lazy val mergeExpressions: Seq[Expression] =
55-
If(IsNull(bitAnd.left),
56-
bitAnd.right,
57-
If(IsNull(bitAnd.right), bitAnd.left, BitwiseAnd(bitAnd.left, bitAnd.right))) :: Nil
51+
If(IsNull(bitAgg.left),
52+
bitAgg.right,
53+
If(IsNull(bitAgg.right), bitAgg.left, bitOperator(bitAgg.left, bitAgg.right))) :: Nil
54+
}
55+
56+
@ExpressionDescription(
57+
usage = "_FUNC_(expr) - Returns the bitwise AND of all non-null input values, or null if none.",
58+
examples = """
59+
Examples:
60+
> SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col);
61+
1
62+
""",
63+
since = "3.0.0")
64+
case class BitAndAgg(child: Expression) extends BitAggregate {
5865

59-
override lazy val evaluateExpression: AttributeReference = bitAnd
66+
override def nodeName: String = "bit_and"
67+
68+
override def bitOperator(left: Expression, right: Expression): BinaryArithmetic = {
69+
BitwiseAnd(left, right)
70+
}
6071
}
6172

6273
@ExpressionDescription(
@@ -67,33 +78,28 @@ case class BitAndAgg(child: Expression) extends DeclarativeAggregate with Expect
6778
7
6879
""",
6980
since = "3.0.0")
70-
case class BitOrAgg(child: Expression) extends DeclarativeAggregate with ExpectsInputTypes {
81+
case class BitOrAgg(child: Expression) extends BitAggregate {
7182

7283
override def nodeName: String = "bit_or"
7384

74-
override def children: Seq[Expression] = child :: Nil
75-
76-
override def nullable: Boolean = true
77-
78-
override def dataType: DataType = child.dataType
79-
80-
override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType)
81-
82-
private lazy val bitOr = AttributeReference("bit_or", child.dataType)()
83-
84-
override lazy val aggBufferAttributes: Seq[AttributeReference] = bitOr :: Nil
85-
86-
override lazy val initialValues: Seq[Literal] = Literal.create(null, dataType) :: Nil
85+
override def bitOperator(left: Expression, right: Expression): BinaryArithmetic = {
86+
BitwiseOr(left, right)
87+
}
88+
}
8789

88-
override lazy val updateExpressions: Seq[Expression] =
89-
If(IsNull(bitOr),
90-
child,
91-
If(IsNull(child), bitOr, BitwiseOr(bitOr, child))) :: Nil
90+
@ExpressionDescription(
91+
usage = "_FUNC_(expr) - Returns the bitwise XOR of all non-null input values, or null if none.",
92+
examples = """
93+
Examples:
94+
> SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col);
95+
6
96+
""",
97+
since = "3.0.0")
98+
case class BitXorAgg(child: Expression) extends BitAggregate {
9299

93-
override lazy val mergeExpressions: Seq[Expression] =
94-
If(IsNull(bitOr.left),
95-
bitOr.right,
96-
If(IsNull(bitOr.right), bitOr.left, BitwiseOr(bitOr.left, bitOr.right))) :: Nil
100+
override def nodeName: String = "bit_xor"
97101

98-
override lazy val evaluateExpression: AttributeReference = bitOr
102+
override def bitOperator(left: Expression, right: Expression): BinaryArithmetic = {
103+
BitwiseXor(left, right)
104+
}
99105
}

sql/core/src/test/resources/sql-tests/inputs/bitwise.sql

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,34 @@ select bit_count(-9223372036854775808L);
3737
-- other illegal arguments
3838
select bit_count("bit count");
3939
select bit_count('a');
40+
41+
-- test for bit_xor
42+
--
43+
CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
44+
(1, 1, 1, 1L),
45+
(2, 3, 4, null),
46+
(7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4);
47+
48+
-- empty case
49+
SELECT BIT_XOR(b3) AS n1 FROM bitwise_test where 1 = 0;
50+
51+
-- null case
52+
SELECT BIT_XOR(b4) AS n1 FROM bitwise_test where b4 is null;
53+
54+
-- the suffix numbers show the expected answer
55+
SELECT
56+
BIT_XOR(cast(b1 as tinyint)) AS a4,
57+
BIT_XOR(cast(b2 as smallint)) AS b5,
58+
BIT_XOR(b3) AS c2,
59+
BIT_XOR(b4) AS d2,
60+
BIT_XOR(distinct b4) AS e2
61+
FROM bitwise_test;
62+
63+
-- group by
64+
SELECT bit_xor(b3) FROM bitwise_test GROUP BY b1 & 1;
65+
66+
--having
67+
SELECT b1, bit_xor(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7;
68+
69+
-- window
70+
SELECT b1, b2, bit_xor(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test;

sql/core/src/test/resources/sql-tests/results/bitwise.sql.out

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 20
2+
-- Number of queries: 27
33

44

55
-- !query 0
@@ -162,3 +162,72 @@ struct<>
162162
-- !query 19 output
163163
org.apache.spark.sql.AnalysisException
164164
cannot resolve 'bit_count('a')' due to data type mismatch: argument 1 requires (integral or boolean) type, however, ''a'' is of string type.; line 1 pos 7
165+
166+
167+
-- !query 20
168+
CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
169+
(1, 1, 1, 1L),
170+
(2, 3, 4, null),
171+
(7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4)
172+
-- !query 20 schema
173+
struct<>
174+
-- !query 20 output
175+
176+
177+
178+
-- !query 21
179+
SELECT BIT_XOR(b3) AS n1 FROM bitwise_test where 1 = 0
180+
-- !query 21 schema
181+
struct<n1:int>
182+
-- !query 21 output
183+
NULL
184+
185+
186+
-- !query 22
187+
SELECT BIT_XOR(b4) AS n1 FROM bitwise_test where b4 is null
188+
-- !query 22 schema
189+
struct<n1:bigint>
190+
-- !query 22 output
191+
NULL
192+
193+
194+
-- !query 23
195+
SELECT
196+
BIT_XOR(cast(b1 as tinyint)) AS a4,
197+
BIT_XOR(cast(b2 as smallint)) AS b5,
198+
BIT_XOR(b3) AS c2,
199+
BIT_XOR(b4) AS d2,
200+
BIT_XOR(distinct b4) AS e2
201+
FROM bitwise_test
202+
-- !query 23 schema
203+
struct<a4:tinyint,b5:smallint,c2:int,d2:bigint,e2:bigint>
204+
-- !query 23 output
205+
4 5 2 2 2
206+
207+
208+
-- !query 24
209+
SELECT bit_xor(b3) FROM bitwise_test GROUP BY b1 & 1
210+
-- !query 24 schema
211+
struct<bit_xor(b3):int>
212+
-- !query 24 output
213+
4
214+
6
215+
216+
217+
-- !query 25
218+
SELECT b1, bit_xor(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7
219+
-- !query 25 schema
220+
struct<b1:int,bit_xor(b2):int>
221+
-- !query 25 output
222+
1 1
223+
2 3
224+
225+
226+
-- !query 26
227+
SELECT b1, b2, bit_xor(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test
228+
-- !query 26 schema
229+
struct<b1:int,b2:int,bit_xor(b2) OVER (PARTITION BY b1 ORDER BY b2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
230+
-- !query 26 output
231+
1 1 1
232+
2 3 3
233+
7 7 7

0 commit comments

Comments
 (0)