From b6047b434ad7f560e012f295f2fb9c90deb09efa Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 22 Oct 2019 17:19:00 +0800 Subject: [PATCH 1/7] [SPARK-29545][SQL] Add support for bit_xor aggregate function --- .../catalyst/analysis/FunctionRegistry.scala | 2 + .../aggregate/bitwiseAggregates.scala | 41 ++++- .../inputs/postgreSQL/aggregates_part2.sql | 15 +- .../postgreSQL/aggregates_part2.sql.out | 153 ++++++++++-------- 4 files changed, 137 insertions(+), 74 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 04e8963944fd..f4477deb9708 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -527,6 +527,8 @@ object FunctionRegistry { expression[BitwiseCount]("bit_count"), expression[BitAndAgg]("bit_and"), expression[BitOrAgg]("bit_or"), + expression[BitXorAgg]("bit_xor"), + // json expression[StructsToJson]("to_json"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala index 131fa2eb5055..3dd189d90c3d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BitwiseAnd, BitwiseOr, ExpectsInputTypes, Expression, ExpressionDescription, If, IsNull, Literal} +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BitwiseAnd, BitwiseOr, BitwiseXor, ExpectsInputTypes, Expression, ExpressionDescription, If, IsNull, Literal} import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegralType} @ExpressionDescription( @@ -97,3 +97,42 @@ case class BitOrAgg(child: Expression) extends DeclarativeAggregate with Expects override lazy val evaluateExpression: AttributeReference = bitOr } + +@ExpressionDescription( + usage = "_FUNC_(expr) - Returns the bitwise OR of all non-null input values, or null if none.", + examples = """ + Examples: + > SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col); + 6 + """, + since = "3.0.0") +case class BitXorAgg(child: Expression) extends DeclarativeAggregate with ExpectsInputTypes { + + override def nodeName: String = "bit_xor" + + override def children: Seq[Expression] = child :: Nil + + override def nullable: Boolean = true + + override def dataType: DataType = child.dataType + + override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType) + + private lazy val bitXOr = AttributeReference("bit_xor", child.dataType)() + + override lazy val aggBufferAttributes: Seq[AttributeReference] = bitXOr :: Nil + + override lazy val initialValues: Seq[Literal] = Literal.create(null, dataType) :: Nil + + override lazy val updateExpressions: Seq[Expression] = + If(IsNull(bitXOr), + child, + If(IsNull(child), bitXOr, BitwiseXor(bitXOr, child))) :: Nil + + override lazy val mergeExpressions: Seq[Expression] = + If(IsNull(bitXOr.left), + bitXOr.right, + If(IsNull(bitXOr.right), bitXOr.left, BitwiseXor(bitXOr.left, bitXOr.right))) :: Nil + + override lazy val evaluateExpression: AttributeReference = bitXOr +} diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql index ba91366014e1..ef40b42c3033 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql @@ -47,11 +47,12 @@ CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4); -- empty case -SELECT BIT_AND(b1) AS n1, BIT_OR(b2) AS n2 FROM bitwise_test where 1 = 0; +SELECT BIT_AND(b1) AS n1, BIT_OR(b2) AS n2, BIT_XOR(b3) AS n3 FROM bitwise_test where 1 = 0; -- null case -SELECT BIT_AND(b4) AS n1, BIT_OR(b4) AS n2 FROM bitwise_test where b4 is null; +SELECT BIT_AND(b4) AS n1, BIT_OR(b4) AS n2, BIT_XOR(b4) AS n3 FROM bitwise_test where b4 is null; +-- the suffix numbers show the expected answer SELECT BIT_AND(cast(b1 as tinyint)) AS a1, BIT_AND(cast(b2 as smallint)) AS b1, @@ -60,11 +61,16 @@ SELECT BIT_OR(cast(b1 as tinyint)) AS e7, BIT_OR(cast(b2 as smallint)) AS f7, BIT_OR(b3) AS g7, - BIT_OR(b4) AS h3 + BIT_OR(b4) AS h3, + BIT_XOR(cast(b1 as tinyint)) AS i5, + BIT_XOR(cast(b2 as smallint)) AS j5, + BIT_XOR(b3) AS k5, + BIT_XOR(b4) AS l2, + BIT_XOR(distinct b4) AS m2 FROM bitwise_test; -- group by -SELECT b1 , bit_and(b2), bit_or(b4) FROM bitwise_test GROUP BY b1; +SELECT b1 , bit_and(b2), bit_or(b4), bit_xor(b3) FROM bitwise_test GROUP BY b1; --having SELECT b1, bit_and(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7; @@ -72,6 +78,7 @@ SELECT b1, bit_and(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7; -- window SELECT b1, b2, bit_and(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test; SELECT b1, b2, bit_or(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test; +SELECT b1, b2, bit_xor(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test; -- -- test boolean aggregates diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out index fdca51ff1325..586c7a641130 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 29 +-- Number of queries: 30 -- !query 0 @@ -28,19 +28,19 @@ struct<> -- !query 2 -SELECT BIT_AND(b1) AS n1, BIT_OR(b2) AS n2 FROM bitwise_test where 1 = 0 +SELECT BIT_AND(b1) AS n1, BIT_OR(b2) AS n2, BIT_XOR(b3) AS n3 FROM bitwise_test where 1 = 0 -- !query 2 schema -struct +struct -- !query 2 output -NULL NULL +NULL NULL NULL -- !query 3 -SELECT BIT_AND(b4) AS n1, BIT_OR(b4) AS n2 FROM bitwise_test where b4 is null +SELECT BIT_AND(b4) AS n1, BIT_OR(b4) AS n2, BIT_XOR(b4) AS n3 FROM bitwise_test where b4 is null -- !query 3 schema -struct +struct -- !query 3 output -NULL NULL +NULL NULL NULL -- !query 4 @@ -52,22 +52,27 @@ SELECT BIT_OR(cast(b1 as tinyint)) AS e7, BIT_OR(cast(b2 as smallint)) AS f7, BIT_OR(b3) AS g7, - BIT_OR(b4) AS h3 + BIT_OR(b4) AS h3, + BIT_XOR(cast(b1 as tinyint)) AS i5, + BIT_XOR(cast(b2 as smallint)) AS j5, + BIT_XOR(b3) AS k5, + BIT_XOR(b4) AS l2, + BIT_XOR(distinct b4) AS m2 FROM bitwise_test -- !query 4 schema -struct +struct -- !query 4 output -1 1 1 1 7 7 7 3 +1 1 1 1 7 7 7 3 5 5 5 2 2 -- !query 5 -SELECT b1 , bit_and(b2), bit_or(b4) FROM bitwise_test GROUP BY b1 +SELECT b1 , bit_and(b2), bit_or(b4), bit_xor(b3) FROM bitwise_test GROUP BY b1 -- !query 5 schema -struct +struct -- !query 5 output -1 1 1 -3 3 NULL -7 7 3 +1 1 1 1 +3 3 NULL 3 +7 7 3 7 -- !query 6 @@ -100,6 +105,16 @@ struct +-- !query 9 output +1 1 1 +3 3 3 +7 7 7 + + +-- !query 10 SELECT (NULL AND NULL) IS NULL AS `t`, (TRUE AND NULL) IS NULL AS `t`, @@ -110,13 +125,13 @@ SELECT NOT (TRUE AND FALSE) AS `t`, NOT (FALSE AND TRUE) AS `t`, NOT (FALSE AND FALSE) AS `t` --- !query 9 schema +-- !query 10 schema struct --- !query 9 output +-- !query 10 output true true false true false true true true true --- !query 10 +-- !query 11 SELECT (NULL OR NULL) IS NULL AS `t`, (TRUE OR NULL) IS NULL AS `t`, @@ -127,32 +142,32 @@ SELECT (TRUE OR FALSE) AS `t`, (FALSE OR TRUE) AS `t`, NOT (FALSE OR FALSE) AS `t` --- !query 10 schema +-- !query 11 schema struct --- !query 10 output +-- !query 11 output true false true false true true true true true --- !query 11 +-- !query 12 CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES (TRUE, null, FALSE, null), (FALSE, TRUE, null, null), (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4) --- !query 11 schema +-- !query 12 schema struct<> --- !query 11 output +-- !query 12 output --- !query 12 +-- !query 13 SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0 --- !query 12 schema +-- !query 13 schema struct --- !query 12 output +-- !query 13 output NULL NULL --- !query 13 +-- !query 14 SELECT BOOL_AND(b1) AS f1, BOOL_AND(b2) AS t2, @@ -161,13 +176,13 @@ SELECT BOOL_AND(NOT b2) AS f5, BOOL_AND(NOT b3) AS t6 FROM bool_test --- !query 13 schema +-- !query 14 schema struct --- !query 13 output +-- !query 14 output false true false NULL false true --- !query 14 +-- !query 15 SELECT EVERY(b1) AS f1, EVERY(b2) AS t2, @@ -176,13 +191,13 @@ SELECT EVERY(NOT b2) AS f5, EVERY(NOT b3) AS t6 FROM bool_test --- !query 14 schema +-- !query 15 schema struct --- !query 14 output +-- !query 15 output false true false NULL false true --- !query 15 +-- !query 16 SELECT BOOL_OR(b1) AS t1, BOOL_OR(b2) AS t2, @@ -191,78 +206,70 @@ SELECT BOOL_OR(NOT b2) AS f5, BOOL_OR(NOT b3) AS t6 FROM bool_test --- !query 15 schema -struct --- !query 15 output -true true false NULL false true - - --- !query 16 -select min(unique1) from tenk1 -- !query 16 schema -struct +struct -- !query 16 output -0 +true true false NULL false true -- !query 17 -select max(unique1) from tenk1 +select min(unique1) from tenk1 -- !query 17 schema -struct +struct -- !query 17 output -9999 +0 -- !query 18 -select max(unique1) from tenk1 where unique1 < 42 +select max(unique1) from tenk1 -- !query 18 schema struct -- !query 18 output -41 +9999 -- !query 19 -select max(unique1) from tenk1 where unique1 > 42 +select max(unique1) from tenk1 where unique1 < 42 -- !query 19 schema struct -- !query 19 output -9999 +41 -- !query 20 -select max(unique1) from tenk1 where unique1 > 42000 +select max(unique1) from tenk1 where unique1 > 42 -- !query 20 schema struct -- !query 20 output -NULL +9999 -- !query 21 -select max(tenthous) from tenk1 where thousand = 33 +select max(unique1) from tenk1 where unique1 > 42000 -- !query 21 schema -struct +struct -- !query 21 output -9033 +NULL -- !query 22 -select min(tenthous) from tenk1 where thousand = 33 +select max(tenthous) from tenk1 where thousand = 33 -- !query 22 schema -struct +struct -- !query 22 output -33 +9033 -- !query 23 -select distinct max(unique2) from tenk1 +select min(tenthous) from tenk1 where thousand = 33 -- !query 23 schema -struct +struct -- !query 23 output -9999 +33 -- !query 24 -select max(unique2) from tenk1 order by 1 +select distinct max(unique2) from tenk1 -- !query 24 schema struct -- !query 24 output @@ -270,7 +277,7 @@ struct -- !query 25 -select max(unique2) from tenk1 order by max(unique2) +select max(unique2) from tenk1 order by 1 -- !query 25 schema struct -- !query 25 output @@ -278,7 +285,7 @@ struct -- !query 26 -select max(unique2) from tenk1 order by max(unique2)+1 +select max(unique2) from tenk1 order by max(unique2) -- !query 26 schema struct -- !query 26 output @@ -286,18 +293,26 @@ struct -- !query 27 -select t1.max_unique2, g from (select max(unique2) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc +select max(unique2) from tenk1 order by max(unique2)+1 -- !query 27 schema -struct +struct -- !query 27 output +9999 + + +-- !query 28 +select t1.max_unique2, g from (select max(unique2) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc +-- !query 28 schema +struct +-- !query 28 output 9999 3 9999 2 9999 1 --- !query 28 +-- !query 29 select max(100) from tenk1 --- !query 28 schema +-- !query 29 schema struct --- !query 28 output +-- !query 29 output 100 From dcd9685095c9f7978d4018ff9718016e21168334 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 22 Oct 2019 17:21:02 +0800 Subject: [PATCH 2/7] doc --- .../sql/catalyst/expressions/aggregate/bitwiseAggregates.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala index 3dd189d90c3d..e0b9c03a68d9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala @@ -99,7 +99,7 @@ case class BitOrAgg(child: Expression) extends DeclarativeAggregate with Expects } @ExpressionDescription( - usage = "_FUNC_(expr) - Returns the bitwise OR of all non-null input values, or null if none.", + usage = "_FUNC_(expr) - Returns the bitwise XOR of all non-null input values, or null if none.", examples = """ Examples: > SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col); From 9a777caf39fa70938084c2d496a17411b45bc2b0 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 23 Oct 2019 11:02:26 +0800 Subject: [PATCH 3/7] rm space --- python/pyspark/version.py | 22 +++++++++---------- .../catalyst/analysis/FunctionRegistry.scala | 1 - 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/python/pyspark/version.py b/python/pyspark/version.py index ba2a40cec01e..416815401373 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -1,19 +1,19 @@ #!/usr/bin/env python # -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with +# licensed to the apache software foundation (asf) under one or more +# contributor license agreements. see the notice file distributed with # this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at +# the asf licenses this file to you under the apache license, version 2.0 +# (the "license"); you may not use this file except in compliance with +# the license. you may obtain a copy of the license at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/license-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. __version__ = "3.0.0.dev0" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index f4477deb9708..52e05b820366 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -529,7 +529,6 @@ object FunctionRegistry { expression[BitOrAgg]("bit_or"), expression[BitXorAgg]("bit_xor"), - // json expression[StructsToJson]("to_json"), expression[JsonToStructs]("from_json"), From 8e04b30524990f776d8e06b2f7d0ac16aba08263 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 23 Oct 2019 11:09:27 +0800 Subject: [PATCH 4/7] fix license --- python/pyspark/version.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/python/pyspark/version.py b/python/pyspark/version.py index 416815401373..ba2a40cec01e 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -1,19 +1,19 @@ #!/usr/bin/env python # -# licensed to the apache software foundation (asf) under one or more -# contributor license agreements. see the notice file distributed with +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. -# the asf licenses this file to you under the apache license, version 2.0 -# (the "license"); you may not use this file except in compliance with -# the license. you may obtain a copy of the license at +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/license-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# unless required by applicable law or agreed to in writing, software -# distributed under the license is distributed on an "as is" basis, -# without warranties or conditions of any kind, either express or implied. -# see the license for the specific language governing permissions and -# limitations under the license. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. __version__ = "3.0.0.dev0" From ffb1a605ff97c705785d06990d282b7d495ed9f7 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 24 Oct 2019 19:55:44 +0800 Subject: [PATCH 5/7] abstact --- .../aggregate/bitwiseAggregates.scala | 101 +++++++----------- 1 file changed, 37 insertions(+), 64 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala index e0b9c03a68d9..5a201d8f579a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala @@ -20,17 +20,9 @@ package org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BitwiseAnd, BitwiseOr, BitwiseXor, ExpectsInputTypes, Expression, ExpressionDescription, If, IsNull, Literal} import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegralType} -@ExpressionDescription( - usage = "_FUNC_(expr) - Returns the bitwise AND of all non-null input values, or null if none.", - examples = """ - Examples: - > SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col); - 1 - """, - since = "3.0.0") -case class BitAndAgg(child: Expression) extends DeclarativeAggregate with ExpectsInputTypes { +abstract class BitAggregate extends DeclarativeAggregate with ExpectsInputTypes { - override def nodeName: String = "bit_and" + val child: Expression override def children: Seq[Expression] = child :: Nil @@ -40,23 +32,36 @@ case class BitAndAgg(child: Expression) extends DeclarativeAggregate with Expect override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType) - private lazy val bitAnd = AttributeReference("bit_and", child.dataType)() - - override lazy val aggBufferAttributes: Seq[AttributeReference] = bitAnd :: Nil + protected lazy val bitAgg = AttributeReference(nodeName, child.dataType)() override lazy val initialValues: Seq[Literal] = Literal.create(null, dataType) :: Nil + override lazy val aggBufferAttributes: Seq[AttributeReference] = bitAgg :: Nil + + override lazy val evaluateExpression: AttributeReference = bitAgg +} + +@ExpressionDescription( + usage = "_FUNC_(expr) - Returns the bitwise AND of all non-null input values, or null if none.", + examples = """ + Examples: + > SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col); + 1 + """, + since = "3.0.0") +case class BitAndAgg(child: Expression) extends BitAggregate { + + override def nodeName: String = "bit_and" + override lazy val updateExpressions: Seq[Expression] = - If(IsNull(bitAnd), + If(IsNull(bitAgg), child, - If(IsNull(child), bitAnd, BitwiseAnd(bitAnd, child))) :: Nil + If(IsNull(child), bitAgg, BitwiseAnd(bitAgg, child))) :: Nil override lazy val mergeExpressions: Seq[Expression] = - If(IsNull(bitAnd.left), - bitAnd.right, - If(IsNull(bitAnd.right), bitAnd.left, BitwiseAnd(bitAnd.left, bitAnd.right))) :: Nil - - override lazy val evaluateExpression: AttributeReference = bitAnd + If(IsNull(bitAgg.left), + bitAgg.right, + If(IsNull(bitAgg.right), bitAgg.left, BitwiseAnd(bitAgg.left, bitAgg.right))) :: Nil } @ExpressionDescription( @@ -67,35 +72,19 @@ case class BitAndAgg(child: Expression) extends DeclarativeAggregate with Expect 7 """, since = "3.0.0") -case class BitOrAgg(child: Expression) extends DeclarativeAggregate with ExpectsInputTypes { +case class BitOrAgg(child: Expression) extends BitAggregate { override def nodeName: String = "bit_or" - override def children: Seq[Expression] = child :: Nil - - override def nullable: Boolean = true - - override def dataType: DataType = child.dataType - - override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType) - - private lazy val bitOr = AttributeReference("bit_or", child.dataType)() - - override lazy val aggBufferAttributes: Seq[AttributeReference] = bitOr :: Nil - - override lazy val initialValues: Seq[Literal] = Literal.create(null, dataType) :: Nil - override lazy val updateExpressions: Seq[Expression] = - If(IsNull(bitOr), + If(IsNull(bitAgg), child, - If(IsNull(child), bitOr, BitwiseOr(bitOr, child))) :: Nil + If(IsNull(child), bitAgg, BitwiseOr(bitAgg, child))) :: Nil override lazy val mergeExpressions: Seq[Expression] = - If(IsNull(bitOr.left), - bitOr.right, - If(IsNull(bitOr.right), bitOr.left, BitwiseOr(bitOr.left, bitOr.right))) :: Nil - - override lazy val evaluateExpression: AttributeReference = bitOr + If(IsNull(bitAgg.left), + bitAgg.right, + If(IsNull(bitAgg.right), bitAgg.left, BitwiseOr(bitAgg.left, bitAgg.right))) :: Nil } @ExpressionDescription( @@ -106,33 +95,17 @@ case class BitOrAgg(child: Expression) extends DeclarativeAggregate with Expects 6 """, since = "3.0.0") -case class BitXorAgg(child: Expression) extends DeclarativeAggregate with ExpectsInputTypes { +case class BitXorAgg(child: Expression) extends BitAggregate { override def nodeName: String = "bit_xor" - override def children: Seq[Expression] = child :: Nil - - override def nullable: Boolean = true - - override def dataType: DataType = child.dataType - - override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType) - - private lazy val bitXOr = AttributeReference("bit_xor", child.dataType)() - - override lazy val aggBufferAttributes: Seq[AttributeReference] = bitXOr :: Nil - - override lazy val initialValues: Seq[Literal] = Literal.create(null, dataType) :: Nil - override lazy val updateExpressions: Seq[Expression] = - If(IsNull(bitXOr), + If(IsNull(bitAgg), child, - If(IsNull(child), bitXOr, BitwiseXor(bitXOr, child))) :: Nil + If(IsNull(child), bitAgg, BitwiseXor(bitAgg, child))) :: Nil override lazy val mergeExpressions: Seq[Expression] = - If(IsNull(bitXOr.left), - bitXOr.right, - If(IsNull(bitXOr.right), bitXOr.left, BitwiseXor(bitXOr.left, bitXOr.right))) :: Nil - - override lazy val evaluateExpression: AttributeReference = bitXOr + If(IsNull(bitAgg.left), + bitAgg.right, + If(IsNull(bitAgg.right), bitAgg.left, BitwiseXor(bitAgg.left, bitAgg.right))) :: Nil } From 97eeab22de325d24383043923b3f45280bbe83e2 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 24 Oct 2019 22:53:22 +0800 Subject: [PATCH 6/7] refine --- .../aggregate/bitwiseAggregates.scala | 52 ++++++++----------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala index 5a201d8f579a..b77c3bd9cbde 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala @@ -17,13 +17,15 @@ package org.apache.spark.sql.catalyst.expressions.aggregate -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BitwiseAnd, BitwiseOr, BitwiseXor, ExpectsInputTypes, Expression, ExpressionDescription, If, IsNull, Literal} +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryArithmetic, BitwiseAnd, BitwiseOr, BitwiseXor, ExpectsInputTypes, Expression, ExpressionDescription, If, IsNull, Literal} import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegralType} abstract class BitAggregate extends DeclarativeAggregate with ExpectsInputTypes { val child: Expression + def bitOperator(left: Expression, right: Expression): BinaryArithmetic + override def children: Seq[Expression] = child :: Nil override def nullable: Boolean = true @@ -32,13 +34,23 @@ abstract class BitAggregate extends DeclarativeAggregate with ExpectsInputTypes override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType) - protected lazy val bitAgg = AttributeReference(nodeName, child.dataType)() + private lazy val bitAgg = AttributeReference(nodeName, child.dataType)() override lazy val initialValues: Seq[Literal] = Literal.create(null, dataType) :: Nil override lazy val aggBufferAttributes: Seq[AttributeReference] = bitAgg :: Nil override lazy val evaluateExpression: AttributeReference = bitAgg + + override lazy val updateExpressions: Seq[Expression] = + If(IsNull(bitAgg), + child, + If(IsNull(child), bitAgg, bitOperator(bitAgg, child))) :: Nil + + override lazy val mergeExpressions: Seq[Expression] = + If(IsNull(bitAgg.left), + bitAgg.right, + If(IsNull(bitAgg.right), bitAgg.left, bitOperator(bitAgg.left, bitAgg.right))) :: Nil } @ExpressionDescription( @@ -53,15 +65,9 @@ case class BitAndAgg(child: Expression) extends BitAggregate { override def nodeName: String = "bit_and" - override lazy val updateExpressions: Seq[Expression] = - If(IsNull(bitAgg), - child, - If(IsNull(child), bitAgg, BitwiseAnd(bitAgg, child))) :: Nil - - override lazy val mergeExpressions: Seq[Expression] = - If(IsNull(bitAgg.left), - bitAgg.right, - If(IsNull(bitAgg.right), bitAgg.left, BitwiseAnd(bitAgg.left, bitAgg.right))) :: Nil + override def bitOperator(left: Expression, right: Expression): BinaryArithmetic = { + BitwiseAnd(left, right) + } } @ExpressionDescription( @@ -76,15 +82,9 @@ case class BitOrAgg(child: Expression) extends BitAggregate { override def nodeName: String = "bit_or" - override lazy val updateExpressions: Seq[Expression] = - If(IsNull(bitAgg), - child, - If(IsNull(child), bitAgg, BitwiseOr(bitAgg, child))) :: Nil - - override lazy val mergeExpressions: Seq[Expression] = - If(IsNull(bitAgg.left), - bitAgg.right, - If(IsNull(bitAgg.right), bitAgg.left, BitwiseOr(bitAgg.left, bitAgg.right))) :: Nil + override def bitOperator(left: Expression, right: Expression): BinaryArithmetic = { + BitwiseOr(left, right) + } } @ExpressionDescription( @@ -99,13 +99,7 @@ case class BitXorAgg(child: Expression) extends BitAggregate { override def nodeName: String = "bit_xor" - override lazy val updateExpressions: Seq[Expression] = - If(IsNull(bitAgg), - child, - If(IsNull(child), bitAgg, BitwiseXor(bitAgg, child))) :: Nil - - override lazy val mergeExpressions: Seq[Expression] = - If(IsNull(bitAgg.left), - bitAgg.right, - If(IsNull(bitAgg.right), bitAgg.left, BitwiseXor(bitAgg.left, bitAgg.right))) :: Nil + override def bitOperator(left: Expression, right: Expression): BinaryArithmetic = { + BitwiseXor(left, right) + } } From 1481aa89454ab1b3ce0b19ee67e4bac698543206 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 25 Oct 2019 11:09:06 +0800 Subject: [PATCH 7/7] mv tests --- .../resources/sql-tests/inputs/bitwise.sql | 31 ++++ .../inputs/postgreSQL/aggregates_part2.sql | 15 +- .../sql-tests/results/bitwise.sql.out | 71 +++++++- .../postgreSQL/aggregates_part2.sql.out | 153 ++++++++---------- 4 files changed, 174 insertions(+), 96 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/bitwise.sql b/sql/core/src/test/resources/sql-tests/inputs/bitwise.sql index 993eecf0f89b..5e665e4c0c38 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/bitwise.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/bitwise.sql @@ -37,3 +37,34 @@ select bit_count(-9223372036854775808L); -- other illegal arguments select bit_count("bit count"); select bit_count('a'); + +-- test for bit_xor +-- +CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES + (1, 1, 1, 1L), + (2, 3, 4, null), + (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4); + +-- empty case +SELECT BIT_XOR(b3) AS n1 FROM bitwise_test where 1 = 0; + +-- null case +SELECT BIT_XOR(b4) AS n1 FROM bitwise_test where b4 is null; + +-- the suffix numbers show the expected answer +SELECT + BIT_XOR(cast(b1 as tinyint)) AS a4, + BIT_XOR(cast(b2 as smallint)) AS b5, + BIT_XOR(b3) AS c2, + BIT_XOR(b4) AS d2, + BIT_XOR(distinct b4) AS e2 +FROM bitwise_test; + +-- group by +SELECT bit_xor(b3) FROM bitwise_test GROUP BY b1 & 1; + +--having +SELECT b1, bit_xor(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7; + +-- window +SELECT b1, b2, bit_xor(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test; diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql index ef40b42c3033..ba91366014e1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql @@ -47,12 +47,11 @@ CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4); -- empty case -SELECT BIT_AND(b1) AS n1, BIT_OR(b2) AS n2, BIT_XOR(b3) AS n3 FROM bitwise_test where 1 = 0; +SELECT BIT_AND(b1) AS n1, BIT_OR(b2) AS n2 FROM bitwise_test where 1 = 0; -- null case -SELECT BIT_AND(b4) AS n1, BIT_OR(b4) AS n2, BIT_XOR(b4) AS n3 FROM bitwise_test where b4 is null; +SELECT BIT_AND(b4) AS n1, BIT_OR(b4) AS n2 FROM bitwise_test where b4 is null; --- the suffix numbers show the expected answer SELECT BIT_AND(cast(b1 as tinyint)) AS a1, BIT_AND(cast(b2 as smallint)) AS b1, @@ -61,16 +60,11 @@ SELECT BIT_OR(cast(b1 as tinyint)) AS e7, BIT_OR(cast(b2 as smallint)) AS f7, BIT_OR(b3) AS g7, - BIT_OR(b4) AS h3, - BIT_XOR(cast(b1 as tinyint)) AS i5, - BIT_XOR(cast(b2 as smallint)) AS j5, - BIT_XOR(b3) AS k5, - BIT_XOR(b4) AS l2, - BIT_XOR(distinct b4) AS m2 + BIT_OR(b4) AS h3 FROM bitwise_test; -- group by -SELECT b1 , bit_and(b2), bit_or(b4), bit_xor(b3) FROM bitwise_test GROUP BY b1; +SELECT b1 , bit_and(b2), bit_or(b4) FROM bitwise_test GROUP BY b1; --having SELECT b1, bit_and(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7; @@ -78,7 +72,6 @@ SELECT b1, bit_and(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7; -- window SELECT b1, b2, bit_and(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test; SELECT b1, b2, bit_or(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test; -SELECT b1, b2, bit_xor(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test; -- -- test boolean aggregates diff --git a/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out b/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out index 7cbd26e87bd2..42c22a317eb4 100644 --- a/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 20 +-- Number of queries: 27 -- !query 0 @@ -162,3 +162,72 @@ struct<> -- !query 19 output org.apache.spark.sql.AnalysisException cannot resolve 'bit_count('a')' due to data type mismatch: argument 1 requires (integral or boolean) type, however, ''a'' is of string type.; line 1 pos 7 + + +-- !query 20 +CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES + (1, 1, 1, 1L), + (2, 3, 4, null), + (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4) +-- !query 20 schema +struct<> +-- !query 20 output + + + +-- !query 21 +SELECT BIT_XOR(b3) AS n1 FROM bitwise_test where 1 = 0 +-- !query 21 schema +struct +-- !query 21 output +NULL + + +-- !query 22 +SELECT BIT_XOR(b4) AS n1 FROM bitwise_test where b4 is null +-- !query 22 schema +struct +-- !query 22 output +NULL + + +-- !query 23 +SELECT + BIT_XOR(cast(b1 as tinyint)) AS a4, + BIT_XOR(cast(b2 as smallint)) AS b5, + BIT_XOR(b3) AS c2, + BIT_XOR(b4) AS d2, + BIT_XOR(distinct b4) AS e2 +FROM bitwise_test +-- !query 23 schema +struct +-- !query 23 output +4 5 2 2 2 + + +-- !query 24 +SELECT bit_xor(b3) FROM bitwise_test GROUP BY b1 & 1 +-- !query 24 schema +struct +-- !query 24 output +4 +6 + + +-- !query 25 +SELECT b1, bit_xor(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7 +-- !query 25 schema +struct +-- !query 25 output +1 1 +2 3 + + +-- !query 26 +SELECT b1, b2, bit_xor(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test +-- !query 26 schema +struct +-- !query 26 output +1 1 1 +2 3 3 +7 7 7 diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out index 586c7a641130..fdca51ff1325 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 30 +-- Number of queries: 29 -- !query 0 @@ -28,19 +28,19 @@ struct<> -- !query 2 -SELECT BIT_AND(b1) AS n1, BIT_OR(b2) AS n2, BIT_XOR(b3) AS n3 FROM bitwise_test where 1 = 0 +SELECT BIT_AND(b1) AS n1, BIT_OR(b2) AS n2 FROM bitwise_test where 1 = 0 -- !query 2 schema -struct +struct -- !query 2 output -NULL NULL NULL +NULL NULL -- !query 3 -SELECT BIT_AND(b4) AS n1, BIT_OR(b4) AS n2, BIT_XOR(b4) AS n3 FROM bitwise_test where b4 is null +SELECT BIT_AND(b4) AS n1, BIT_OR(b4) AS n2 FROM bitwise_test where b4 is null -- !query 3 schema -struct +struct -- !query 3 output -NULL NULL NULL +NULL NULL -- !query 4 @@ -52,27 +52,22 @@ SELECT BIT_OR(cast(b1 as tinyint)) AS e7, BIT_OR(cast(b2 as smallint)) AS f7, BIT_OR(b3) AS g7, - BIT_OR(b4) AS h3, - BIT_XOR(cast(b1 as tinyint)) AS i5, - BIT_XOR(cast(b2 as smallint)) AS j5, - BIT_XOR(b3) AS k5, - BIT_XOR(b4) AS l2, - BIT_XOR(distinct b4) AS m2 + BIT_OR(b4) AS h3 FROM bitwise_test -- !query 4 schema -struct +struct -- !query 4 output -1 1 1 1 7 7 7 3 5 5 5 2 2 +1 1 1 1 7 7 7 3 -- !query 5 -SELECT b1 , bit_and(b2), bit_or(b4), bit_xor(b3) FROM bitwise_test GROUP BY b1 +SELECT b1 , bit_and(b2), bit_or(b4) FROM bitwise_test GROUP BY b1 -- !query 5 schema -struct +struct -- !query 5 output -1 1 1 1 -3 3 NULL 3 -7 7 3 7 +1 1 1 +3 3 NULL +7 7 3 -- !query 6 @@ -105,16 +100,6 @@ struct --- !query 9 output -1 1 1 -3 3 3 -7 7 7 - - --- !query 10 SELECT (NULL AND NULL) IS NULL AS `t`, (TRUE AND NULL) IS NULL AS `t`, @@ -125,13 +110,13 @@ SELECT NOT (TRUE AND FALSE) AS `t`, NOT (FALSE AND TRUE) AS `t`, NOT (FALSE AND FALSE) AS `t` --- !query 10 schema +-- !query 9 schema struct --- !query 10 output +-- !query 9 output true true false true false true true true true --- !query 11 +-- !query 10 SELECT (NULL OR NULL) IS NULL AS `t`, (TRUE OR NULL) IS NULL AS `t`, @@ -142,32 +127,32 @@ SELECT (TRUE OR FALSE) AS `t`, (FALSE OR TRUE) AS `t`, NOT (FALSE OR FALSE) AS `t` --- !query 11 schema +-- !query 10 schema struct --- !query 11 output +-- !query 10 output true false true false true true true true true --- !query 12 +-- !query 11 CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES (TRUE, null, FALSE, null), (FALSE, TRUE, null, null), (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4) --- !query 12 schema +-- !query 11 schema struct<> --- !query 12 output +-- !query 11 output --- !query 13 +-- !query 12 SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0 --- !query 13 schema +-- !query 12 schema struct --- !query 13 output +-- !query 12 output NULL NULL --- !query 14 +-- !query 13 SELECT BOOL_AND(b1) AS f1, BOOL_AND(b2) AS t2, @@ -176,13 +161,13 @@ SELECT BOOL_AND(NOT b2) AS f5, BOOL_AND(NOT b3) AS t6 FROM bool_test --- !query 14 schema +-- !query 13 schema struct --- !query 14 output +-- !query 13 output false true false NULL false true --- !query 15 +-- !query 14 SELECT EVERY(b1) AS f1, EVERY(b2) AS t2, @@ -191,13 +176,13 @@ SELECT EVERY(NOT b2) AS f5, EVERY(NOT b3) AS t6 FROM bool_test --- !query 15 schema +-- !query 14 schema struct --- !query 15 output +-- !query 14 output false true false NULL false true --- !query 16 +-- !query 15 SELECT BOOL_OR(b1) AS t1, BOOL_OR(b2) AS t2, @@ -206,70 +191,78 @@ SELECT BOOL_OR(NOT b2) AS f5, BOOL_OR(NOT b3) AS t6 FROM bool_test --- !query 16 schema +-- !query 15 schema struct --- !query 16 output +-- !query 15 output true true false NULL false true --- !query 17 +-- !query 16 select min(unique1) from tenk1 --- !query 17 schema +-- !query 16 schema struct --- !query 17 output +-- !query 16 output 0 --- !query 18 +-- !query 17 select max(unique1) from tenk1 +-- !query 17 schema +struct +-- !query 17 output +9999 + + +-- !query 18 +select max(unique1) from tenk1 where unique1 < 42 -- !query 18 schema struct -- !query 18 output -9999 +41 -- !query 19 -select max(unique1) from tenk1 where unique1 < 42 +select max(unique1) from tenk1 where unique1 > 42 -- !query 19 schema struct -- !query 19 output -41 +9999 -- !query 20 -select max(unique1) from tenk1 where unique1 > 42 +select max(unique1) from tenk1 where unique1 > 42000 -- !query 20 schema struct -- !query 20 output -9999 +NULL -- !query 21 -select max(unique1) from tenk1 where unique1 > 42000 +select max(tenthous) from tenk1 where thousand = 33 -- !query 21 schema -struct +struct -- !query 21 output -NULL +9033 -- !query 22 -select max(tenthous) from tenk1 where thousand = 33 +select min(tenthous) from tenk1 where thousand = 33 -- !query 22 schema -struct +struct -- !query 22 output -9033 +33 -- !query 23 -select min(tenthous) from tenk1 where thousand = 33 +select distinct max(unique2) from tenk1 -- !query 23 schema -struct +struct -- !query 23 output -33 +9999 -- !query 24 -select distinct max(unique2) from tenk1 +select max(unique2) from tenk1 order by 1 -- !query 24 schema struct -- !query 24 output @@ -277,7 +270,7 @@ struct -- !query 25 -select max(unique2) from tenk1 order by 1 +select max(unique2) from tenk1 order by max(unique2) -- !query 25 schema struct -- !query 25 output @@ -285,7 +278,7 @@ struct -- !query 26 -select max(unique2) from tenk1 order by max(unique2) +select max(unique2) from tenk1 order by max(unique2)+1 -- !query 26 schema struct -- !query 26 output @@ -293,26 +286,18 @@ struct -- !query 27 -select max(unique2) from tenk1 order by max(unique2)+1 --- !query 27 schema -struct --- !query 27 output -9999 - - --- !query 28 select t1.max_unique2, g from (select max(unique2) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc --- !query 28 schema +-- !query 27 schema struct --- !query 28 output +-- !query 27 output 9999 3 9999 2 9999 1 --- !query 29 +-- !query 28 select max(100) from tenk1 --- !query 29 schema +-- !query 28 schema struct --- !query 29 output +-- !query 28 output 100