Skip to content

Commit 5fc800d

Browse files
committed
Revert "[SPARK-11753][SQL][TEST-HADOOP2.2] Make allowNonNumericNumbers option work"
This reverts commit c24b6b6.
1 parent d67c82e commit 5fc800d

File tree

10 files changed

+53
-102
lines changed

10 files changed

+53
-102
lines changed

dev/deps/spark-deps-hadoop-2.2

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,12 @@ hk2-utils-2.4.0-b34.jar
7272
httpclient-4.5.2.jar
7373
httpcore-4.4.4.jar
7474
ivy-2.4.0.jar
75-
jackson-annotations-2.7.3.jar
76-
jackson-core-2.7.3.jar
75+
jackson-annotations-2.5.3.jar
76+
jackson-core-2.5.3.jar
7777
jackson-core-asl-1.9.13.jar
78-
jackson-databind-2.7.3.jar
78+
jackson-databind-2.5.3.jar
7979
jackson-mapper-asl-1.9.13.jar
80-
jackson-module-paranamer-2.7.3.jar
81-
jackson-module-scala_2.11-2.7.3.jar
80+
jackson-module-scala_2.11-2.5.3.jar
8281
janino-2.7.8.jar
8382
javassist-3.18.1-GA.jar
8483
javax.annotation-api-1.2.jar
@@ -128,7 +127,7 @@ objenesis-2.1.jar
128127
opencsv-2.3.jar
129128
oro-2.0.8.jar
130129
osgi-resource-locator-1.0.1.jar
131-
paranamer-2.8.jar
130+
paranamer-2.6.jar
132131
parquet-column-1.8.1.jar
133132
parquet-common-1.8.1.jar
134133
parquet-encoding-1.8.1.jar

dev/deps/spark-deps-hadoop-2.3

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,12 @@ hk2-utils-2.4.0-b34.jar
7474
httpclient-4.5.2.jar
7575
httpcore-4.4.4.jar
7676
ivy-2.4.0.jar
77-
jackson-annotations-2.7.3.jar
78-
jackson-core-2.7.3.jar
77+
jackson-annotations-2.5.3.jar
78+
jackson-core-2.5.3.jar
7979
jackson-core-asl-1.9.13.jar
80-
jackson-databind-2.7.3.jar
80+
jackson-databind-2.5.3.jar
8181
jackson-mapper-asl-1.9.13.jar
82-
jackson-module-paranamer-2.7.3.jar
83-
jackson-module-scala_2.11-2.7.3.jar
82+
jackson-module-scala_2.11-2.5.3.jar
8483
janino-2.7.8.jar
8584
java-xmlbuilder-1.0.jar
8685
javassist-3.18.1-GA.jar
@@ -135,7 +134,7 @@ objenesis-2.1.jar
135134
opencsv-2.3.jar
136135
oro-2.0.8.jar
137136
osgi-resource-locator-1.0.1.jar
138-
paranamer-2.8.jar
137+
paranamer-2.6.jar
139138
parquet-column-1.8.1.jar
140139
parquet-common-1.8.1.jar
141140
parquet-encoding-1.8.1.jar

dev/deps/spark-deps-hadoop-2.4

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,12 @@ hk2-utils-2.4.0-b34.jar
7474
httpclient-4.5.2.jar
7575
httpcore-4.4.4.jar
7676
ivy-2.4.0.jar
77-
jackson-annotations-2.7.3.jar
78-
jackson-core-2.7.3.jar
77+
jackson-annotations-2.5.3.jar
78+
jackson-core-2.5.3.jar
7979
jackson-core-asl-1.9.13.jar
80-
jackson-databind-2.7.3.jar
80+
jackson-databind-2.5.3.jar
8181
jackson-mapper-asl-1.9.13.jar
82-
jackson-module-paranamer-2.7.3.jar
83-
jackson-module-scala_2.11-2.7.3.jar
82+
jackson-module-scala_2.11-2.5.3.jar
8483
janino-2.7.8.jar
8584
java-xmlbuilder-1.0.jar
8685
javassist-3.18.1-GA.jar
@@ -135,7 +134,7 @@ objenesis-2.1.jar
135134
opencsv-2.3.jar
136135
oro-2.0.8.jar
137136
osgi-resource-locator-1.0.1.jar
138-
paranamer-2.8.jar
137+
paranamer-2.6.jar
139138
parquet-column-1.8.1.jar
140139
parquet-common-1.8.1.jar
141140
parquet-encoding-1.8.1.jar

dev/deps/spark-deps-hadoop-2.6

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,13 @@ htrace-core-3.0.4.jar
8080
httpclient-4.5.2.jar
8181
httpcore-4.4.4.jar
8282
ivy-2.4.0.jar
83-
jackson-annotations-2.7.3.jar
84-
jackson-core-2.7.3.jar
83+
jackson-annotations-2.5.3.jar
84+
jackson-core-2.5.3.jar
8585
jackson-core-asl-1.9.13.jar
86-
jackson-databind-2.7.3.jar
86+
jackson-databind-2.5.3.jar
8787
jackson-jaxrs-1.9.13.jar
8888
jackson-mapper-asl-1.9.13.jar
89-
jackson-module-paranamer-2.7.3.jar
90-
jackson-module-scala_2.11-2.7.3.jar
89+
jackson-module-scala_2.11-2.5.3.jar
9190
jackson-xc-1.9.13.jar
9291
janino-2.7.8.jar
9392
java-xmlbuilder-1.0.jar
@@ -143,7 +142,7 @@ objenesis-2.1.jar
143142
opencsv-2.3.jar
144143
oro-2.0.8.jar
145144
osgi-resource-locator-1.0.1.jar
146-
paranamer-2.8.jar
145+
paranamer-2.6.jar
147146
parquet-column-1.8.1.jar
148147
parquet-common-1.8.1.jar
149148
parquet-encoding-1.8.1.jar

dev/deps/spark-deps-hadoop-2.7

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,13 @@ htrace-core-3.1.0-incubating.jar
8080
httpclient-4.5.2.jar
8181
httpcore-4.4.4.jar
8282
ivy-2.4.0.jar
83-
jackson-annotations-2.7.3.jar
84-
jackson-core-2.7.3.jar
83+
jackson-annotations-2.5.3.jar
84+
jackson-core-2.5.3.jar
8585
jackson-core-asl-1.9.13.jar
86-
jackson-databind-2.7.3.jar
86+
jackson-databind-2.5.3.jar
8787
jackson-jaxrs-1.9.13.jar
8888
jackson-mapper-asl-1.9.13.jar
89-
jackson-module-paranamer-2.7.3.jar
90-
jackson-module-scala_2.11-2.7.3.jar
89+
jackson-module-scala_2.11-2.5.3.jar
9190
jackson-xc-1.9.13.jar
9291
janino-2.7.8.jar
9392
java-xmlbuilder-1.0.jar
@@ -144,7 +143,7 @@ objenesis-2.1.jar
144143
opencsv-2.3.jar
145144
oro-2.0.8.jar
146145
osgi-resource-locator-1.0.1.jar
147-
paranamer-2.8.jar
146+
paranamer-2.6.jar
148147
parquet-column-1.8.1.jar
149148
parquet-common-1.8.1.jar
150149
parquet-encoding-1.8.1.jar

pom.xml

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@
160160
<jline.version>${scala.version}</jline.version>
161161
<jline.groupid>org.scala-lang</jline.groupid>
162162
<codehaus.jackson.version>1.9.13</codehaus.jackson.version>
163-
<fasterxml.jackson.version>2.7.3</fasterxml.jackson.version>
163+
<fasterxml.jackson.version>2.5.3</fasterxml.jackson.version>
164164
<snappy.version>1.1.2.4</snappy.version>
165165
<netlib.java.version>1.1.2</netlib.java.version>
166166
<calcite.version>1.2.0-incubating</calcite.version>
@@ -180,7 +180,6 @@
180180
<antlr4.version>4.5.3</antlr4.version>
181181
<jpam.version>1.1</jpam.version>
182182
<selenium.version>2.52.0</selenium.version>
183-
<paranamer.version>2.8</paranamer.version>
184183

185184
<test.java.home>${java.home}</test.java.home>
186185
<test.exclude.tags></test.exclude.tags>
@@ -1826,11 +1825,6 @@
18261825
<artifactId>antlr4-runtime</artifactId>
18271826
<version>${antlr4.version}</version>
18281827
</dependency>
1829-
<dependency>
1830-
<groupId>com.thoughtworks.paranamer</groupId>
1831-
<artifactId>paranamer</artifactId>
1832-
<version>${paranamer.version}</version>
1833-
</dependency>
18341828
</dependencies>
18351829
</dependencyManagement>
18361830

python/pyspark/sql/readwriter.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,6 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
193193
set, it uses the default value, ``true``.
194194
:param allowNumericLeadingZero: allows leading zeros in numbers (e.g. 00012). If None is
195195
set, it uses the default value, ``false``.
196-
:param allowNonNumericNumbers: allows using non-numeric numbers such as "NaN", "Infinity",
197-
"-Infinity", "INF", "-INF", which are convertd to floating
198-
point numbers, ``true``.
199196
:param allowBackslashEscapingAnyCharacter: allows accepting quoting of all character
200197
using backslash quoting mechanism. If None is
201198
set, it uses the default value, ``false``.

sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
293293
* </li>
294294
* <li>`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers
295295
* (e.g. 00012)</li>
296-
* <li>`allowNonNumericNumbers` (default `true`): allows using non-numeric numbers such as "NaN",
297-
* "Infinity", "-Infinity", "INF", "-INF", which are convertd to floating point numbers.</li>
298296
* <li>`allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all
299297
* character using backslash quoting mechanism</li>
300298
* <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,13 @@ object JacksonParser extends Logging {
129129
case (VALUE_STRING, FloatType) =>
130130
// Special case handling for NaN and Infinity.
131131
val value = parser.getText
132-
if (value.equals("NaN") ||
133-
value.equals("Infinity") ||
134-
value.equals("+Infinity") ||
135-
value.equals("-Infinity")) {
132+
val lowerCaseValue = value.toLowerCase()
133+
if (lowerCaseValue.equals("nan") ||
134+
lowerCaseValue.equals("infinity") ||
135+
lowerCaseValue.equals("-infinity") ||
136+
lowerCaseValue.equals("inf") ||
137+
lowerCaseValue.equals("-inf")) {
136138
value.toFloat
137-
} else if (value.equals("+INF") || value.equals("INF")) {
138-
Float.PositiveInfinity
139-
} else if (value.equals("-INF")) {
140-
Float.NegativeInfinity
141139
} else {
142140
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as FloatType.")
143141
}
@@ -148,15 +146,13 @@ object JacksonParser extends Logging {
148146
case (VALUE_STRING, DoubleType) =>
149147
// Special case handling for NaN and Infinity.
150148
val value = parser.getText
151-
if (value.equals("NaN") ||
152-
value.equals("Infinity") ||
153-
value.equals("+Infinity") ||
154-
value.equals("-Infinity")) {
149+
val lowerCaseValue = value.toLowerCase()
150+
if (lowerCaseValue.equals("nan") ||
151+
lowerCaseValue.equals("infinity") ||
152+
lowerCaseValue.equals("-infinity") ||
153+
lowerCaseValue.equals("inf") ||
154+
lowerCaseValue.equals("-inf")) {
155155
value.toDouble
156-
} else if (value.equals("+INF") || value.equals("INF")) {
157-
Double.PositiveInfinity
158-
} else if (value.equals("-INF")) {
159-
Double.NegativeInfinity
160156
} else {
161157
throw new SparkSQLJsonProcessingException(s"Cannot parse $value as DoubleType.")
162158
}

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala

Lines changed: 15 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources.json
1919

2020
import org.apache.spark.sql.QueryTest
2121
import org.apache.spark.sql.test.SharedSQLContext
22-
import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
2322

2423
/**
2524
* Test cases for various [[JSONOptions]].
@@ -94,51 +93,23 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSQLContext {
9493
assert(df.first().getLong(0) == 18)
9594
}
9695

97-
test("allowNonNumericNumbers off") {
98-
// non-quoted non-numeric numbers don't work if allowNonNumericNumbers is off.
99-
var testCases: Seq[String] = Seq("""{"age": NaN}""", """{"age": Infinity}""",
100-
"""{"age": +Infinity}""", """{"age": -Infinity}""", """{"age": INF}""",
101-
"""{"age": +INF}""", """{"age": -INF}""")
102-
testCases.foreach { str =>
103-
val rdd = spark.sparkContext.parallelize(Seq(str))
104-
val df = spark.read.option("allowNonNumericNumbers", "false").json(rdd)
105-
106-
assert(df.schema.head.name == "_corrupt_record")
107-
}
108-
109-
// quoted non-numeric numbers should still work even allowNonNumericNumbers is off.
110-
testCases = Seq("""{"age": "NaN"}""", """{"age": "Infinity"}""", """{"age": "+Infinity"}""",
111-
"""{"age": "-Infinity"}""", """{"age": "INF"}""", """{"age": "+INF"}""",
112-
"""{"age": "-INF"}""")
113-
val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isPosInfinity,
114-
_.isNegInfinity, _.isPosInfinity, _.isPosInfinity, _.isNegInfinity)
115-
val schema = StructType(StructField("age", DoubleType, true) :: Nil)
116-
117-
testCases.zipWithIndex.foreach { case (str, idx) =>
118-
val rdd = spark.sparkContext.parallelize(Seq(str))
119-
val df = spark.read.option("allowNonNumericNumbers", "false").schema(schema).json(rdd)
120-
121-
assert(df.schema.head.name == "age")
122-
assert(tests(idx)(df.first().getDouble(0)))
123-
}
96+
// The following two tests are not really working - need to look into Jackson's
97+
// JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS.
98+
ignore("allowNonNumericNumbers off") {
99+
val str = """{"age": NaN}"""
100+
val rdd = spark.sparkContext.parallelize(Seq(str))
101+
val df = spark.read.json(rdd)
102+
103+
assert(df.schema.head.name == "_corrupt_record")
124104
}
125105

126-
test("allowNonNumericNumbers on") {
127-
val testCases: Seq[String] = Seq("""{"age": NaN}""", """{"age": Infinity}""",
128-
"""{"age": +Infinity}""", """{"age": -Infinity}""", """{"age": +INF}""",
129-
"""{"age": -INF}""", """{"age": "NaN"}""", """{"age": "Infinity"}""",
130-
"""{"age": "-Infinity"}""")
131-
val tests: Seq[Double => Boolean] = Seq(_.isNaN, _.isPosInfinity, _.isPosInfinity,
132-
_.isNegInfinity, _.isPosInfinity, _.isNegInfinity, _.isNaN, _.isPosInfinity,
133-
_.isNegInfinity, _.isPosInfinity, _.isNegInfinity)
134-
val schema = StructType(StructField("age", DoubleType, true) :: Nil)
135-
testCases.zipWithIndex.foreach { case (str, idx) =>
136-
val rdd = spark.sparkContext.parallelize(Seq(str))
137-
val df = spark.read.option("allowNonNumericNumbers", "true").schema(schema).json(rdd)
138-
139-
assert(df.schema.head.name == "age")
140-
assert(tests(idx)(df.first().getDouble(0)))
141-
}
106+
ignore("allowNonNumericNumbers on") {
107+
val str = """{"age": NaN}"""
108+
val rdd = spark.sparkContext.parallelize(Seq(str))
109+
val df = spark.read.option("allowNonNumericNumbers", "true").json(rdd)
110+
111+
assert(df.schema.head.name == "age")
112+
assert(df.first().getDouble(0).isNaN)
142113
}
143114

144115
test("allowBackslashEscapingAnyCharacter off") {

0 commit comments

Comments
 (0)