diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
index c3be6b2fee99..054561ff5169 100644
--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@@ -72,12 +72,13 @@ hk2-utils-2.4.0-b34.jar
httpclient-4.5.2.jar
httpcore-4.4.4.jar
ivy-2.4.0.jar
-jackson-annotations-2.5.3.jar
-jackson-core-2.5.3.jar
+jackson-annotations-2.7.3.jar
+jackson-core-2.7.3.jar
jackson-core-asl-1.9.13.jar
-jackson-databind-2.5.3.jar
+jackson-databind-2.7.3.jar
jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.11-2.5.3.jar
+jackson-module-paranamer-2.7.3.jar
+jackson-module-scala_2.11-2.7.3.jar
janino-2.7.8.jar
javassist-3.18.1-GA.jar
javax.annotation-api-1.2.jar
@@ -127,7 +128,7 @@ objenesis-2.1.jar
opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
-paranamer-2.6.jar
+paranamer-2.8.jar
parquet-column-1.7.0.jar
parquet-common-1.7.0.jar
parquet-encoding-1.7.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
index 61ed4c0889b8..7e60a313ae8f 100644
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -74,12 +74,13 @@ hk2-utils-2.4.0-b34.jar
httpclient-4.5.2.jar
httpcore-4.4.4.jar
ivy-2.4.0.jar
-jackson-annotations-2.5.3.jar
-jackson-core-2.5.3.jar
+jackson-annotations-2.7.3.jar
+jackson-core-2.7.3.jar
jackson-core-asl-1.9.13.jar
-jackson-databind-2.5.3.jar
+jackson-databind-2.7.3.jar
jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.11-2.5.3.jar
+jackson-module-paranamer-2.7.3.jar
+jackson-module-scala_2.11-2.7.3.jar
janino-2.7.8.jar
java-xmlbuilder-1.0.jar
javassist-3.18.1-GA.jar
@@ -134,7 +135,7 @@ objenesis-2.1.jar
opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
-paranamer-2.6.jar
+paranamer-2.8.jar
parquet-column-1.7.0.jar
parquet-common-1.7.0.jar
parquet-encoding-1.7.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
index fb014921765f..70d33b4f4812 100644
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -74,12 +74,13 @@ hk2-utils-2.4.0-b34.jar
httpclient-4.5.2.jar
httpcore-4.4.4.jar
ivy-2.4.0.jar
-jackson-annotations-2.5.3.jar
-jackson-core-2.5.3.jar
+jackson-annotations-2.7.3.jar
+jackson-core-2.7.3.jar
jackson-core-asl-1.9.13.jar
-jackson-databind-2.5.3.jar
+jackson-databind-2.7.3.jar
jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.11-2.5.3.jar
+jackson-module-paranamer-2.7.3.jar
+jackson-module-scala_2.11-2.7.3.jar
janino-2.7.8.jar
java-xmlbuilder-1.0.jar
javassist-3.18.1-GA.jar
@@ -134,7 +135,7 @@ objenesis-2.1.jar
opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
-paranamer-2.6.jar
+paranamer-2.8.jar
parquet-column-1.7.0.jar
parquet-common-1.7.0.jar
parquet-encoding-1.7.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 0baf4e84fff0..a80f6bc2a406 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -80,13 +80,14 @@ htrace-core-3.0.4.jar
httpclient-4.5.2.jar
httpcore-4.4.4.jar
ivy-2.4.0.jar
-jackson-annotations-2.5.3.jar
-jackson-core-2.5.3.jar
+jackson-annotations-2.7.3.jar
+jackson-core-2.7.3.jar
jackson-core-asl-1.9.13.jar
-jackson-databind-2.5.3.jar
+jackson-databind-2.7.3.jar
jackson-jaxrs-1.9.13.jar
jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.11-2.5.3.jar
+jackson-module-paranamer-2.7.3.jar
+jackson-module-scala_2.11-2.7.3.jar
jackson-xc-1.9.13.jar
janino-2.7.8.jar
java-xmlbuilder-1.0.jar
@@ -142,7 +143,7 @@ objenesis-2.1.jar
opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
-paranamer-2.6.jar
+paranamer-2.8.jar
parquet-column-1.7.0.jar
parquet-common-1.7.0.jar
parquet-encoding-1.7.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 8be218cd68d9..c0b53f73cd49 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -80,13 +80,14 @@ htrace-core-3.1.0-incubating.jar
httpclient-4.5.2.jar
httpcore-4.4.4.jar
ivy-2.4.0.jar
-jackson-annotations-2.5.3.jar
-jackson-core-2.5.3.jar
+jackson-annotations-2.7.3.jar
+jackson-core-2.7.3.jar
jackson-core-asl-1.9.13.jar
-jackson-databind-2.5.3.jar
+jackson-databind-2.7.3.jar
jackson-jaxrs-1.9.13.jar
jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.11-2.5.3.jar
+jackson-module-paranamer-2.7.3.jar
+jackson-module-scala_2.11-2.7.3.jar
jackson-xc-1.9.13.jar
janino-2.7.8.jar
java-xmlbuilder-1.0.jar
@@ -143,7 +144,7 @@ objenesis-2.1.jar
opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
-paranamer-2.6.jar
+paranamer-2.8.jar
parquet-column-1.7.0.jar
parquet-common-1.7.0.jar
parquet-encoding-1.7.0.jar
diff --git a/pom.xml b/pom.xml
index 40d9bf5ccccb..bce553d45de3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -161,7 +161,7 @@
${scala.version}
org.scala-lang
1.9.13
- 2.5.3
+ 2.7.3
1.1.2.4
1.1.2
1.2.0-incubating
@@ -180,6 +180,7 @@
4.5.2-1
1.1
2.52.0
+ 2.8
${java.home}
@@ -1821,6 +1822,11 @@
antlr4-runtime
${antlr4.version}
+
+ com.thoughtworks.paranamer
+ paranamer
+ ${paranamer.version}
+
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index c98aef1a0e69..4e1e0c8d4296 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -193,6 +193,9 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
set, it uses the default value, ``true``.
:param allowNumericLeadingZero: allows leading zeros in numbers (e.g. 00012). If None is
set, it uses the default value, ``false``.
+ :param allowNonNumericNumbers: allows using non-numeric numbers such as "NaN", "Infinity",
+ "-Infinity", "INF", "-INF", which are convertd to floating
+ point numbers, ``true``.
:param allowBackslashEscapingAnyCharacter: allows accepting quoting of all character
using backslash quoting mechanism. If None is
set, it uses the default value, ``false``.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index e1a64dfc5e7b..8da10c9c7abb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -293,6 +293,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
*
*
`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers
* (e.g. 00012)
+ * `allowNonNumericNumbers` (default `true`): allows using non-numeric numbers such as "NaN",
+ * "Infinity", "-Infinity", "INF", "-INF", which are convertd to floating point numbers.
* `allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all
* character using backslash quoting mechanism
* `mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
index aeee2600a19e..cafca323187d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
@@ -129,13 +129,15 @@ object JacksonParser extends Logging {
case (VALUE_STRING, FloatType) =>
// Special case handling for NaN and Infinity.
val value = parser.getText
- val lowerCaseValue = value.toLowerCase()
- if (lowerCaseValue.equals("nan") ||
- lowerCaseValue.equals("infinity") ||
- lowerCaseValue.equals("-infinity") ||
- lowerCaseValue.equals("inf") ||
- lowerCaseValue.equals("-inf")) {
+ if (value.equals("NaN") ||
+ value.equals("Infinity") ||
+ value.equals("+Infinity") ||
+ value.equals("-Infinity")) {
value.toFloat
+ } else if (value.equals("+INF") || value.equals("INF")) {
+ Float.PositiveInfinity
+ } else if (value.equals("-INF")) {
+ Float.NegativeInfinity
} else {
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as FloatType.")
}
@@ -146,13 +148,15 @@ object JacksonParser extends Logging {
case (VALUE_STRING, DoubleType) =>
// Special case handling for NaN and Infinity.
val value = parser.getText
- val lowerCaseValue = value.toLowerCase()
- if (lowerCaseValue.equals("nan") ||
- lowerCaseValue.equals("infinity") ||
- lowerCaseValue.equals("-infinity") ||
- lowerCaseValue.equals("inf") ||
- lowerCaseValue.equals("-inf")) {
+ if (value.equals("NaN") ||
+ value.equals("Infinity") ||
+ value.equals("+Infinity") ||
+ value.equals("-Infinity")) {
value.toDouble
+ } else if (value.equals("+INF") || value.equals("INF")) {
+ Double.PositiveInfinity
+ } else if (value.equals("-INF")) {
+ Double.NegativeInfinity
} else {
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as DoubleType.")
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
index c31dffedbdf6..2aab955c1ecb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.json
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
/**
* Test cases for various [[JSONOptions]].
@@ -93,23 +94,51 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSQLContext {
assert(df.first().getLong(0) == 18)
}
- // The following two tests are not really working - need to look into Jackson's
- // JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS.
- ignore("allowNonNumericNumbers off") {
- val str = """{"age": NaN}"""
- val rdd = spark.sparkContext.parallelize(Seq(str))
- val df = spark.read.json(rdd)
-
- assert(df.schema.head.name == "_corrupt_record")
+ test("allowNonNumericNumbers off") {
+ // non-quoted non-numeric numbers don't work if allowNonNumericNumbers is off.
+ var testCases: Seq[String] = Seq("""{"age": NaN}""", """{"age": Infinity}""",
+ """{"age": +Infinity}""", """{"age": -Infinity}""", """{"age": INF}""",
+ """{"age": +INF}""", """{"age": -INF}""")
+ testCases.foreach { str =>
+ val rdd = spark.sparkContext.parallelize(Seq(str))
+ val df = spark.read.option("allowNonNumericNumbers", "false").json(rdd)
+
+ assert(df.schema.head.name == "_corrupt_record")
+ }
+
+ // quoted non-numeric numbers should still work even allowNonNumericNumbers is off.
+ testCases = Seq("""{"age": "NaN"}""", """{"age": "Infinity"}""", """{"age": "+Infinity"}""",
+ """{"age": "-Infinity"}""", """{"age": "INF"}""", """{"age": "+INF"}""",
+ """{"age": "-INF"}""")
+ val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isPosInfinity,
+ _.isNegInfinity, _.isPosInfinity, _.isPosInfinity, _.isNegInfinity)
+ val schema = StructType(StructField("age", DoubleType, true) :: Nil)
+
+ testCases.zipWithIndex.foreach { case (str, idx) =>
+ val rdd = spark.sparkContext.parallelize(Seq(str))
+ val df = spark.read.option("allowNonNumericNumbers", "false").schema(schema).json(rdd)
+
+ assert(df.schema.head.name == "age")
+ assert(tests(idx)(df.first().getDouble(0)))
+ }
}
- ignore("allowNonNumericNumbers on") {
- val str = """{"age": NaN}"""
- val rdd = spark.sparkContext.parallelize(Seq(str))
- val df = spark.read.option("allowNonNumericNumbers", "true").json(rdd)
-
- assert(df.schema.head.name == "age")
- assert(df.first().getDouble(0).isNaN)
+ test("allowNonNumericNumbers on") {
+ val testCases: Seq[String] = Seq("""{"age": NaN}""", """{"age": Infinity}""",
+ """{"age": +Infinity}""", """{"age": -Infinity}""", """{"age": +INF}""",
+ """{"age": -INF}""", """{"age": "NaN"}""", """{"age": "Infinity"}""",
+ """{"age": "-Infinity"}""")
+ val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isPosInfinity,
+ _.isNegInfinity, _.isPosInfinity, _.isNegInfinity, _.isNaN, _.isPosInfinity,
+ _.isNegInfinity, _.isPosInfinity, _.isNegInfinity)
+ val schema = StructType(StructField("age", DoubleType, true) :: Nil)
+ testCases.zipWithIndex.foreach { case (str, idx) =>
+ val rdd = spark.sparkContext.parallelize(Seq(str))
+ val df = spark.read.option("allowNonNumericNumbers", "true").schema(schema).json(rdd)
+
+ assert(df.schema.head.name == "age")
+ assert(tests(idx)(df.first().getDouble(0)))
+ }
}
test("allowBackslashEscapingAnyCharacter off") {