Skip to content

Commit ec5f157

Browse files
petermaxleehvanhovell
authored andcommitted
[SPARK-17117][SQL] 1 / NULL should not fail analysis
## What changes were proposed in this pull request? This patch fixes the problem described in SPARK-17117, i.e. "SELECT 1 / NULL" throws an analysis exception: ``` org.apache.spark.sql.AnalysisException: cannot resolve '(1 / NULL)' due to data type mismatch: differing types in '(1 / NULL)' (int and null). ``` The problem is that division type coercion did not take null type into account. ## How was this patch tested? A unit test for the type coercion, and a few end-to-end test cases using SQLQueryTestSuite. Author: petermaxlee <[email protected]> Closes #14695 from petermaxlee/SPARK-17117. (cherry picked from commit 68f5087) Signed-off-by: Herman van Hovell <[email protected]>
1 parent 5735b8b commit ec5f157

File tree

4 files changed

+89
-23
lines changed

4 files changed

+89
-23
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -543,11 +543,14 @@ object TypeCoercion {
543543
// Decimal and Double remain the same
544544
case d: Divide if d.dataType == DoubleType => d
545545
case d: Divide if d.dataType.isInstanceOf[DecimalType] => d
546-
case Divide(left, right) if isNumeric(left) && isNumeric(right) =>
546+
case Divide(left, right) if isNumericOrNull(left) && isNumericOrNull(right) =>
547547
Divide(Cast(left, DoubleType), Cast(right, DoubleType))
548548
}
549549

550-
private def isNumeric(ex: Expression): Boolean = ex.dataType.isInstanceOf[NumericType]
550+
private def isNumericOrNull(ex: Expression): Boolean = {
551+
// We need to handle null types in case a query contains null literals.
552+
ex.dataType.isInstanceOf[NumericType] || ex.dataType == NullType
553+
}
551554
}
552555

553556
/**

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
1919

2020
import java.sql.Timestamp
2121

22-
import org.apache.spark.sql.catalyst.analysis.TypeCoercion.{Division, FunctionArgumentConversion}
22+
import org.apache.spark.sql.catalyst.analysis.TypeCoercion._
2323
import org.apache.spark.sql.catalyst.dsl.expressions._
2424
import org.apache.spark.sql.catalyst.expressions._
2525
import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -730,6 +730,13 @@ class TypeCoercionSuite extends PlanTest {
730730
// the right expression to Decimal.
731731
ruleTest(rules, sum(Divide(Decimal(4.0), 3)), sum(Divide(Decimal(4.0), 3)))
732732
}
733+
734+
test("SPARK-17117 null type coercion in divide") {
735+
val rules = Seq(FunctionArgumentConversion, Division, ImplicitTypeCasts)
736+
val nullLit = Literal.create(null, NullType)
737+
ruleTest(rules, Divide(1L, nullLit), Divide(Cast(1L, DoubleType), Cast(nullLit, DoubleType)))
738+
ruleTest(rules, Divide(nullLit, 1L), Divide(Cast(nullLit, DoubleType), Cast(1L, DoubleType)))
739+
}
733740
}
734741

735742

sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,19 @@ select + + 100;
1616
select - - max(key) from testdata;
1717
select + - key from testdata where key = 33;
1818

19+
-- div
20+
select 5 / 2;
21+
select 5 / 0;
22+
select 5 / null;
23+
select null / 5;
24+
select 5 div 2;
25+
select 5 div 0;
26+
select 5 div null;
27+
select null div 5;
28+
1929
-- other arithmetics
2030
select 1 + 2;
2131
select 1 - 2;
2232
select 2 * 5;
23-
select 5 / 2;
24-
select 5 div 2;
2533
select 5 % 3;
2634
select pmod(-7, 3);

sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out

Lines changed: 66 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 22
2+
-- Number of queries: 28
33

44

55
-- !query 0
@@ -123,35 +123,35 @@ struct<(- key):int>
123123

124124

125125
-- !query 15
126-
select 1 + 2
126+
select 5 / 2
127127
-- !query 15 schema
128-
struct<(1 + 2):int>
128+
struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double>
129129
-- !query 15 output
130-
3
130+
2.5
131131

132132

133133
-- !query 16
134-
select 1 - 2
134+
select 5 / 0
135135
-- !query 16 schema
136-
struct<(1 - 2):int>
136+
struct<(CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)):double>
137137
-- !query 16 output
138-
-1
138+
NULL
139139

140140

141141
-- !query 17
142-
select 2 * 5
142+
select 5 / null
143143
-- !query 17 schema
144-
struct<(2 * 5):int>
144+
struct<(CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)):double>
145145
-- !query 17 output
146-
10
146+
NULL
147147

148148

149149
-- !query 18
150-
select 5 / 2
150+
select null / 5
151151
-- !query 18 schema
152-
struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double>
152+
struct<(CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)):double>
153153
-- !query 18 output
154-
2.5
154+
NULL
155155

156156

157157
-- !query 19
@@ -163,16 +163,64 @@ struct<CAST((CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)) AS BIGINT):bigint>
163163

164164

165165
-- !query 20
166-
select 5 % 3
166+
select 5 div 0
167167
-- !query 20 schema
168-
struct<(5 % 3):int>
168+
struct<CAST((CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)) AS BIGINT):bigint>
169169
-- !query 20 output
170-
2
170+
NULL
171171

172172

173173
-- !query 21
174-
select pmod(-7, 3)
174+
select 5 div null
175175
-- !query 21 schema
176-
struct<pmod(-7, 3):int>
176+
struct<CAST((CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)) AS BIGINT):bigint>
177177
-- !query 21 output
178+
NULL
179+
180+
181+
-- !query 22
182+
select null div 5
183+
-- !query 22 schema
184+
struct<CAST((CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)) AS BIGINT):bigint>
185+
-- !query 22 output
186+
NULL
187+
188+
189+
-- !query 23
190+
select 1 + 2
191+
-- !query 23 schema
192+
struct<(1 + 2):int>
193+
-- !query 23 output
194+
3
195+
196+
197+
-- !query 24
198+
select 1 - 2
199+
-- !query 24 schema
200+
struct<(1 - 2):int>
201+
-- !query 24 output
202+
-1
203+
204+
205+
-- !query 25
206+
select 2 * 5
207+
-- !query 25 schema
208+
struct<(2 * 5):int>
209+
-- !query 25 output
210+
10
211+
212+
213+
-- !query 26
214+
select 5 % 3
215+
-- !query 26 schema
216+
struct<(5 % 3):int>
217+
-- !query 26 output
218+
2
219+
220+
221+
-- !query 27
222+
select pmod(-7, 3)
223+
-- !query 27 schema
224+
struct<pmod(-7, 3):int>
225+
-- !query 27 output
178226
2

0 commit comments

Comments
 (0)