Skip to content

Commit 11ac443

Browse files
author
Nathan Howell
committed
[SPARK-18772][SQL] NaN/Infinite float parsing in JSON is inconsistent
1 parent dbf3e29 commit 11ac443

File tree

2 files changed

+52
-20
lines changed

2 files changed

+52
-20
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,17 @@ class JacksonParser(
155155
case _ => makeConverter(dataType)
156156
}
157157

158+
private object SpecialDouble {
159+
def unapply(value: String): Option[Double] = {
160+
value.toLowerCase match {
161+
case "nan" => Some(Double.NaN)
162+
case "infinity" | "+infinity" | "inf" | "+inf" => Some(Double.PositiveInfinity)
163+
case "-infinity" | "-inf" => Some(Double.NegativeInfinity)
164+
case _ => None
165+
}
166+
}
167+
}
168+
158169
/**
159170
* Create a converter which converts the JSON documents held by the `JsonParser`
160171
* to a value according to a desired schema.
@@ -193,16 +204,10 @@ class JacksonParser(
193204

194205
case VALUE_STRING =>
195206
// Special case handling for NaN and Infinity.
196-
val value = parser.getText
197-
val lowerCaseValue = value.toLowerCase
198-
if (lowerCaseValue.equals("nan") ||
199-
lowerCaseValue.equals("infinity") ||
200-
lowerCaseValue.equals("-infinity") ||
201-
lowerCaseValue.equals("inf") ||
202-
lowerCaseValue.equals("-inf")) {
203-
value.toFloat
204-
} else {
205-
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as FloatType.")
207+
parser.getText match {
208+
case SpecialDouble(value) => value.toFloat
209+
case _ => throw new SparkSQLJsonProcessingException(
210+
s"Cannot parse ${parser.getText} as FloatType.")
206211
}
207212
}
208213

@@ -213,16 +218,10 @@ class JacksonParser(
213218

214219
case VALUE_STRING =>
215220
// Special case handling for NaN and Infinity.
216-
val value = parser.getText
217-
val lowerCaseValue = value.toLowerCase
218-
if (lowerCaseValue.equals("nan") ||
219-
lowerCaseValue.equals("infinity") ||
220-
lowerCaseValue.equals("-infinity") ||
221-
lowerCaseValue.equals("inf") ||
222-
lowerCaseValue.equals("-inf")) {
223-
value.toDouble
224-
} else {
225-
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as DoubleType.")
221+
parser.getText match {
222+
case SpecialDouble(value) => value
223+
case _ => throw new SparkSQLJsonProcessingException(
224+
s"Cannot parse ${parser.getText} as DoubleType.")
226225
}
227226
}
228227

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1764,4 +1764,37 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
17641764
val df2 = spark.read.option("PREfersdecimaL", "true").json(records)
17651765
assert(df2.schema == schema)
17661766
}
1767+
1768+
test("SPARK-18772: Special floats") {
1769+
val records = sparkContext
1770+
.parallelize(
1771+
"""{"a": "NaN"}""" ::
1772+
"""{"a": "nAn"}""" ::
1773+
"""{"a": "-iNf"}""" ::
1774+
"""{"a": "inF"}""" ::
1775+
"""{"a": "+Inf"}""" ::
1776+
"""{"a": "-iNfInity"}""" ::
1777+
"""{"a": "InFiNiTy"}""" ::
1778+
"""{"a": "+InfiNitY"}""" ::
1779+
"""{"a": "+Infi"}""" ::
1780+
Nil)
1781+
1782+
for (dt <- Seq(FloatType, DoubleType)) {
1783+
val res = spark.read
1784+
.schema(StructType(Seq(StructField("a", dt))))
1785+
.json(records)
1786+
.select($"a".cast(DoubleType).as[java.lang.Double])
1787+
.collect()
1788+
assert(res.length === 9)
1789+
assert(res(0).isNaN)
1790+
assert(res(1).isNaN)
1791+
assert(res(2).toDouble.isNegInfinity)
1792+
assert(res(3).toDouble.isPosInfinity)
1793+
assert(res(4).toDouble.isPosInfinity)
1794+
assert(res(5).toDouble.isNegInfinity)
1795+
assert(res(6).toDouble.isPosInfinity)
1796+
assert(res(7).toDouble.isPosInfinity)
1797+
assert(res(8) eq null)
1798+
}
1799+
}
17671800
}

0 commit comments

Comments
 (0)