From ef71599130291aa6bb684afb47e88307b38238f1 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Thu, 5 May 2016 14:47:07 +0900 Subject: [PATCH 1/5] Add CSV tests with HadoopFsRelationTest and support for nullValue for other types --- .../datasources/csv/CSVInferSchema.scala | 4 + .../execution/datasources/csv/CSVSuite.scala | 3 +- .../sources/CSVHadoopFsRelationSuite.scala | 74 +++++++++++++++++++ .../sql/sources/HadoopFsRelationTest.scala | 67 ++++++++++++++--- 4 files changed, 136 insertions(+), 12 deletions(-) create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/sources/CSVHadoopFsRelationSuite.scala diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala index cfd66af18892..5f927269fefe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala @@ -223,6 +223,7 @@ private[csv] object CSVTypeCast { Try(datum.toDouble) .getOrElse(NumberFormat.getInstance(Locale.getDefault).parse(datum).doubleValue()) } + case _: BooleanType if datum == options.nullValue && nullable => null case _: BooleanType => datum.toBoolean case dt: DecimalType => if (datum == options.nullValue && nullable) { @@ -231,6 +232,7 @@ private[csv] object CSVTypeCast { val value = new BigDecimal(datum.replaceAll(",", "")) Decimal(value, dt.precision, dt.scale) } + case _: TimestampType if datum == options.nullValue && nullable => null case _: TimestampType if options.dateFormat != null => // This one will lose microseconds parts. // See https://issues.apache.org/jira/browse/SPARK-10681. @@ -239,10 +241,12 @@ private[csv] object CSVTypeCast { // This one will lose microseconds parts. // See https://issues.apache.org/jira/browse/SPARK-10681. DateTimeUtils.stringToTime(datum).getTime * 1000L + case _: DateType if datum == options.nullValue && nullable => null case _: DateType if options.dateFormat != null => DateTimeUtils.millisToDays(options.dateFormat.parse(datum).getTime) case _: DateType => DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(datum).getTime) + case _: StringType if datum == options.nullValue && nullable => null case _: StringType => UTF8String.fromString(datum) case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 28e59055fa1c..aafe12ad3b40 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -431,7 +431,6 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { } test("nullable fields with user defined null value of \"null\"") { - // year,make,model,comment,blank val dataSchema = StructType(List( StructField("year", IntegerType, nullable = true), @@ -447,7 +446,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { verifyCars(cars, withHeader = true, checkValues = false) val results = cars.collect() - assert(results(0).toSeq === Array(2012, "Tesla", "S", "null", "null")) + assert(results(0).toSeq === Array(2012, "Tesla", "S", null, null)) assert(results(2).toSeq === Array(null, "Chevy", "Volt", null, null)) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/CSVHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/CSVHadoopFsRelationSuite.scala new file mode 100644 index 000000000000..f517b1250168 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/CSVHadoopFsRelationSuite.scala @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.sources + +import org.apache.hadoop.fs.Path + +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.sql.types._ + +class CSVHadoopFsRelationSuite extends HadoopFsRelationTest { + override val dataSourceName: String = "csv" + + override val extraReadOptions: Map[String, String] = + Map("header" -> "true", "inferSchema" -> "true") + + override val extraWriteOptions: Map[String, String] = Map("header" -> "true") + + override protected def supportsDataType(dataType: DataType): Boolean = dataType match { + case _: NullType => false + case _: BinaryType => false + case _: CalendarIntervalType => false + case _: ArrayType => false + case _: MapType => false + case _: StructType => false + // Currently, this writes `DateType` and `TimestampType` as a long value. + // Since `dateFormat` is not yet supported for writing, this is disabled for now. + case _: DateType => false + case _: TimestampType => false + case _: UserDefinedType[_] => false + case _ => true + } + + test("save()/load() - partitioned table - simple queries - partition columns in data") { + withTempDir { file => + val basePath = new Path(file.getCanonicalPath) + val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf) + val qualifiedBasePath = fs.makeQualified(basePath) + + for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) { + val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2") + val header = Seq("a,b") + val data = (1 to 3).map(i => s"""$i,val_$i""") + sparkContext + .parallelize(header ++ data) + .saveAsTextFile(partitionDir.toString) + } + + val dataSchemaWithPartition = + StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true)) + + checkQueries( + hiveContext.read.format(dataSourceName) + .option("dataSchema", dataSchemaWithPartition.json) + .option("inferSchema", "true") + .option("header", "true") + .load(file.getCanonicalPath)) + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala index 67b403a9bd3a..c817093ef3a1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala @@ -39,6 +39,12 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes val dataSourceName: String + // This options below will be applied for the tests for reading in `HadoopFsRelationTest`. + val extraReadOptions = Map.empty[String, String] + + // This options below will be applied for the tests for writing in `HadoopFsRelationTest`. + val extraWriteOptions = Map.empty[String, String] + protected def supportsDataType(dataType: DataType): Boolean = true val dataSchema = @@ -168,26 +174,44 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes test("save()/load() - non-partitioned table - Overwrite") { withTempPath { file => - testDF.write.mode(SaveMode.Overwrite).format(dataSourceName).save(file.getCanonicalPath) - testDF.write.mode(SaveMode.Overwrite).format(dataSourceName).save(file.getCanonicalPath) + testDF.write + .mode(SaveMode.Overwrite) + .format(dataSourceName) + .options(extraWriteOptions) + .save(file.getCanonicalPath) + testDF.write + .mode(SaveMode.Overwrite) + .format(dataSourceName) + .options(extraWriteOptions) + .save(file.getCanonicalPath) checkAnswer( sqlContext.read.format(dataSourceName) .option("path", file.getCanonicalPath) .option("dataSchema", dataSchema.json) + .options(extraReadOptions) .load(), - testDF.collect()) + testDF) } } test("save()/load() - non-partitioned table - Append") { withTempPath { file => - testDF.write.mode(SaveMode.Overwrite).format(dataSourceName).save(file.getCanonicalPath) - testDF.write.mode(SaveMode.Append).format(dataSourceName).save(file.getCanonicalPath) + testDF.write + .mode(SaveMode.Overwrite) + .format(dataSourceName) + .options(extraWriteOptions) + .save(file.getCanonicalPath) + testDF.write + .mode(SaveMode.Append) + .format(dataSourceName) + .options(extraWriteOptions) + .save(file.getCanonicalPath) checkAnswer( sqlContext.read.format(dataSourceName) .option("dataSchema", dataSchema.json) + .options(extraReadOptions) .load(file.getCanonicalPath).orderBy("a"), testDF.union(testDF).orderBy("a").collect()) } @@ -215,6 +239,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes withTempPath { file => partitionedTestDF.write .format(dataSourceName) + .options(extraWriteOptions) .mode(SaveMode.ErrorIfExists) .partitionBy("p1", "p2") .save(file.getCanonicalPath) @@ -222,6 +247,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes checkQueries( sqlContext.read.format(dataSourceName) .option("dataSchema", dataSchema.json) + .options(extraReadOptions) .load(file.getCanonicalPath)) } } @@ -230,12 +256,14 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes withTempPath { file => partitionedTestDF.write .format(dataSourceName) + .options(extraWriteOptions) .mode(SaveMode.Overwrite) .partitionBy("p1", "p2") .save(file.getCanonicalPath) partitionedTestDF.write .format(dataSourceName) + .options(extraWriteOptions) .mode(SaveMode.Overwrite) .partitionBy("p1", "p2") .save(file.getCanonicalPath) @@ -243,6 +271,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes checkAnswer( sqlContext.read.format(dataSourceName) .option("dataSchema", dataSchema.json) + .options(extraReadOptions) .load(file.getCanonicalPath), partitionedTestDF.collect()) } @@ -252,12 +281,14 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes withTempPath { file => partitionedTestDF.write .format(dataSourceName) + .options(extraWriteOptions) .mode(SaveMode.Overwrite) .partitionBy("p1", "p2") .save(file.getCanonicalPath) partitionedTestDF.write .format(dataSourceName) + .options(extraWriteOptions) .mode(SaveMode.Append) .partitionBy("p1", "p2") .save(file.getCanonicalPath) @@ -265,6 +296,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes checkAnswer( sqlContext.read.format(dataSourceName) .option("dataSchema", dataSchema.json) + .options(extraReadOptions) .load(file.getCanonicalPath), partitionedTestDF.union(partitionedTestDF).collect()) } @@ -274,12 +306,14 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes withTempPath { file => partitionedTestDF1.write .format(dataSourceName) + .options(extraWriteOptions) .mode(SaveMode.Overwrite) .partitionBy("p1", "p2") .save(file.getCanonicalPath) partitionedTestDF2.write .format(dataSourceName) + .options(extraWriteOptions) .mode(SaveMode.Append) .partitionBy("p1", "p2") .save(file.getCanonicalPath) @@ -287,6 +321,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes checkAnswer( sqlContext.read.format(dataSourceName) .option("dataSchema", dataSchema.json) + .options(extraReadOptions) .load(file.getCanonicalPath), partitionedTestDF.collect()) } @@ -490,6 +525,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes withTempPath { file => partitionedTestDF.write .format(dataSourceName) + .options(extraWriteOptions) .mode(SaveMode.Overwrite) .partitionBy("p1", "p2") .save(file.getCanonicalPath) @@ -498,6 +534,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes .format(dataSourceName) .option("dataSchema", dataSchema.json) .option("basePath", file.getCanonicalPath) + .options(extraReadOptions) .load(s"${file.getCanonicalPath}/p1=*/p2=???") val expectedPaths = Set( @@ -612,23 +649,33 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes val df = sqlContext.range(1, 10).toDF("i") withTempPath { dir => - df.write.mode("append").format(dataSourceName).save(dir.getCanonicalPath) + df.write + .mode("append") + .format(dataSourceName) + .options(extraWriteOptions) + .save(dir.getCanonicalPath) // Because there data already exists, // this append should succeed because we will use the output committer associated // with file format and AlwaysFailOutputCommitter will not be used. - df.write.mode("append").format(dataSourceName).save(dir.getCanonicalPath) + df.write + .mode("append") + .format(dataSourceName) + .options(extraWriteOptions) + .save(dir.getCanonicalPath) checkAnswer( sqlContext.read .format(dataSourceName) .option("dataSchema", df.schema.json) - .options(extraOptions) + .options(extraOptions ++ extraReadOptions) .load(dir.getCanonicalPath), df.union(df)) // This will fail because AlwaysFailOutputCommitter is used when we do append. intercept[Exception] { df.write.mode("overwrite") - .options(extraOptions).format(dataSourceName).save(dir.getCanonicalPath) + .options(extraOptions ++ extraWriteOptions) + .format(dataSourceName) + .save(dir.getCanonicalPath) } } withTempPath { dir => @@ -637,7 +684,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes // and there is no existing data. intercept[Exception] { df.write.mode("append") - .options(extraOptions) + .options(extraOptions ++ extraWriteOptions) .format(dataSourceName) .save(dir.getCanonicalPath) } From f80df8cec6079cb8eb34b9893065b329847249f0 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Thu, 5 May 2016 15:06:40 +0900 Subject: [PATCH 2/5] Add some more unittests --- .../sql/execution/datasources/csv/CSVTypeCastSuite.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala index 26b33b24efc3..868d5575a42d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala @@ -180,5 +180,13 @@ class CSVTypeCastSuite extends SparkFunSuite { CSVTypeCast.castTo("-", DoubleType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( CSVTypeCast.castTo("-", DecimalType.DoubleDecimal, true, CSVOptions("nullValue", "-"))) + assertNull( + CSVTypeCast.castTo("-", StringType, nullable = true, CSVOptions("nullValue", "-"))) + assertNull( + CSVTypeCast.castTo("-", TimestampType, nullable = true, CSVOptions("nullValue", "-"))) + assertNull( + CSVTypeCast.castTo("-", DateType, nullable = true, CSVOptions("nullValue", "-"))) + assertNull( + CSVTypeCast.castTo("-", BooleanType, nullable = true, CSVOptions("nullValue", "-"))) } } From 2ea702cdfd4732db5c1da6f750f1d15a1ba30396 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Thu, 5 May 2016 15:09:43 +0900 Subject: [PATCH 3/5] Leave the test assert as it is --- .../org/apache/spark/sql/sources/HadoopFsRelationTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala index c817093ef3a1..a5a48ea9bb40 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala @@ -191,7 +191,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes .option("dataSchema", dataSchema.json) .options(extraReadOptions) .load(), - testDF) + testDF.collect()) } } From 33288c8580c7aa3f217b195fc4b32f2d066edd11 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Thu, 5 May 2016 16:32:58 +0900 Subject: [PATCH 4/5] Ignore flaky string tests for now and optimize the casting logic --- .../datasources/csv/CSVInferSchema.scala | 112 +++++++++--------- .../sources/CSVHadoopFsRelationSuite.scala | 3 + 2 files changed, 57 insertions(+), 58 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala index 5f927269fefe..aa6c9dca7a04 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.execution.datasources.csv import java.math.BigDecimal -import java.text.{NumberFormat, SimpleDateFormat} +import java.text.NumberFormat import java.util.Locale import scala.util.control.Exception._ @@ -192,63 +192,59 @@ private[csv] object CSVTypeCast { nullable: Boolean = true, options: CSVOptions = CSVOptions()): Any = { - castType match { - case _: ByteType => if (datum == options.nullValue && nullable) null else datum.toByte - case _: ShortType => if (datum == options.nullValue && nullable) null else datum.toShort - case _: IntegerType => if (datum == options.nullValue && nullable) null else datum.toInt - case _: LongType => if (datum == options.nullValue && nullable) null else datum.toLong - case _: FloatType => - if (datum == options.nullValue && nullable) { - null - } else if (datum == options.nanValue) { - Float.NaN - } else if (datum == options.negativeInf) { - Float.NegativeInfinity - } else if (datum == options.positiveInf) { - Float.PositiveInfinity - } else { - Try(datum.toFloat) - .getOrElse(NumberFormat.getInstance(Locale.getDefault).parse(datum).floatValue()) - } - case _: DoubleType => - if (datum == options.nullValue && nullable) { - null - } else if (datum == options.nanValue) { - Double.NaN - } else if (datum == options.negativeInf) { - Double.NegativeInfinity - } else if (datum == options.positiveInf) { - Double.PositiveInfinity - } else { - Try(datum.toDouble) - .getOrElse(NumberFormat.getInstance(Locale.getDefault).parse(datum).doubleValue()) - } - case _: BooleanType if datum == options.nullValue && nullable => null - case _: BooleanType => datum.toBoolean - case dt: DecimalType => - if (datum == options.nullValue && nullable) { - null - } else { - val value = new BigDecimal(datum.replaceAll(",", "")) - Decimal(value, dt.precision, dt.scale) - } - case _: TimestampType if datum == options.nullValue && nullable => null - case _: TimestampType if options.dateFormat != null => - // This one will lose microseconds parts. - // See https://issues.apache.org/jira/browse/SPARK-10681. - options.dateFormat.parse(datum).getTime * 1000L - case _: TimestampType => - // This one will lose microseconds parts. - // See https://issues.apache.org/jira/browse/SPARK-10681. - DateTimeUtils.stringToTime(datum).getTime * 1000L - case _: DateType if datum == options.nullValue && nullable => null - case _: DateType if options.dateFormat != null => - DateTimeUtils.millisToDays(options.dateFormat.parse(datum).getTime) - case _: DateType => - DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(datum).getTime) - case _: StringType if datum == options.nullValue && nullable => null - case _: StringType => UTF8String.fromString(datum) - case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}") + if (datum == null || (datum == options.nullValue && nullable)) { + null + } else { + castType match { + case _: ByteType => datum.toByte + case _: ShortType => datum.toShort + case _: IntegerType => datum.toInt + case _: LongType => datum.toLong + case _: FloatType => + if (datum == options.nanValue) { + Float.NaN + } else if (datum == options.negativeInf) { + Float.NegativeInfinity + } else if (datum == options.positiveInf) { + Float.PositiveInfinity + } else { + Try(datum.toFloat) + .getOrElse(NumberFormat.getInstance(Locale.getDefault).parse(datum).floatValue()) + } + case _: DoubleType => + if (datum == options.nanValue) { + Double.NaN + } else if (datum == options.negativeInf) { + Double.NegativeInfinity + } else if (datum == options.positiveInf) { + Double.PositiveInfinity + } else { + Try(datum.toDouble) + .getOrElse(NumberFormat.getInstance(Locale.getDefault).parse(datum).doubleValue()) + } + case _: BooleanType => datum.toBoolean + case dt: DecimalType => + if (datum == options.nullValue && nullable) { + null + } else { + val value = new BigDecimal(datum.replaceAll(",", "")) + Decimal(value, dt.precision, dt.scale) + } + case _: TimestampType if options.dateFormat != null => + // This one will lose microseconds parts. + // See https://issues.apache.org/jira/browse/SPARK-10681. + options.dateFormat.parse(datum).getTime * 1000L + case _: TimestampType => + // This one will lose microseconds parts. + // See https://issues.apache.org/jira/browse/SPARK-10681. + DateTimeUtils.stringToTime(datum).getTime * 1000L + case _: DateType if options.dateFormat != null => + DateTimeUtils.millisToDays(options.dateFormat.parse(datum).getTime) + case _: DateType => + DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(datum).getTime) + case _: StringType => UTF8String.fromString(datum) + case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}") + } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/CSVHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/CSVHadoopFsRelationSuite.scala index f517b1250168..f82f9671ffe0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/CSVHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/CSVHadoopFsRelationSuite.scala @@ -32,6 +32,9 @@ class CSVHadoopFsRelationSuite extends HadoopFsRelationTest { override protected def supportsDataType(dataType: DataType): Boolean = dataType match { case _: NullType => false + // `StringType` test is too flaky. Seems random generate data affects delimiter + // for writing and CSV parse does not recognize this. + case _: StringType => false case _: BinaryType => false case _: CalendarIntervalType => false case _: ArrayType => false From f31f69ef44f7505d2483b5a98175cf94fc29b0ff Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Thu, 5 May 2016 18:09:23 +0900 Subject: [PATCH 5/5] Fix existing tests for null --- .../sql/execution/datasources/csv/CSVTypeCastSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala index 868d5575a42d..af95411817a8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala @@ -73,10 +73,10 @@ class CSVTypeCastSuite extends SparkFunSuite { test("String type should always return the same as the input") { assert( - CSVTypeCast.castTo("", StringType, nullable = true, CSVOptions()) == + CSVTypeCast.castTo("", StringType, nullable = true, CSVOptions("nullValue", null)) == UTF8String.fromString("")) assert( - CSVTypeCast.castTo("", StringType, nullable = false, CSVOptions()) == + CSVTypeCast.castTo("", StringType, nullable = false, CSVOptions("nullValue", null)) == UTF8String.fromString("")) }