From 36920996e42ccda514e69b9aae0cf3bfe13242ce Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Fri, 4 Nov 2016 22:41:07 +0900 Subject: [PATCH 1/7] Take the case of null into account --- .../datasources/csv/CSVInferSchema.scala | 2 +- .../sql/execution/datasources/csv/CSVSuite.scala | 16 ++++++++++++++++ .../datasources/csv/CSVTypeCastSuite.scala | 2 ++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala index 1981d8607c0c6..78d1186538dbe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala @@ -232,7 +232,7 @@ private[csv] object CSVTypeCast { nullable: Boolean = true, options: CSVOptions = CSVOptions()): Any = { - if (nullable && datum == options.nullValue) { + if (datum == null || nullable && datum == options.nullValue) { null } else { castType match { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 8209b5bd7f9de..73abcefa21818 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -890,4 +890,20 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { } } } + + test("load null when the schema is larger than parsed tokens ") { + withTempPath { path => + val schema = StructType(Array( + StructField("a", IntegerType, nullable = true), + StructField("b", IntegerType, nullable = true) + )) + Seq("1").toDF().write.text(path.getAbsolutePath) + val df = spark.read + .schema(schema) + .option("header", "false") + .csv(path.getAbsolutePath) + + checkAnswer(df, Row(1, null)) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala index c74406b9cbfbb..e26962350dc40 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala @@ -88,6 +88,8 @@ class CSVTypeCastSuite extends SparkFunSuite { CSVTypeCast.castTo("-", DateType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( CSVTypeCast.castTo("-", StringType, nullable = true, CSVOptions("nullValue", "-"))) + assertNull( + CSVTypeCast.castTo(null, IntegerType, nullable = true, CSVOptions("nullValue", "-"))) } test("String type should also respect `nullValue`") { From e1c58c1f5b2885578defa2fd978836c36e6bab73 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Fri, 4 Nov 2016 23:04:34 +0900 Subject: [PATCH 2/7] minimise the change --- .../spark/sql/execution/datasources/csv/CSVSuite.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 73abcefa21818..1e9ccc9c94209 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -893,11 +893,10 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { test("load null when the schema is larger than parsed tokens ") { withTempPath { path => - val schema = StructType(Array( - StructField("a", IntegerType, nullable = true), - StructField("b", IntegerType, nullable = true) - )) Seq("1").toDF().write.text(path.getAbsolutePath) + val schema = StructType( + StructField("a", IntegerType, true) :: + StructField("b", IntegerType, true) :: Nil) val df = spark.read .schema(schema) .option("header", "false") From 41320754f198278535fd09e6f30f38280c0ec578 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Fri, 4 Nov 2016 23:20:57 +0900 Subject: [PATCH 3/7] Indentation --- .../apache/spark/sql/execution/datasources/csv/CSVSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 1e9ccc9c94209..491ff72337a81 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -896,7 +896,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { Seq("1").toDF().write.text(path.getAbsolutePath) val schema = StructType( StructField("a", IntegerType, true) :: - StructField("b", IntegerType, true) :: Nil) + StructField("b", IntegerType, true) :: Nil) val df = spark.read .schema(schema) .option("header", "false") From c0667d1e31e6eed04bb0be1be0eef0f86fba1bb7 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sat, 5 Nov 2016 14:10:18 +0900 Subject: [PATCH 4/7] Make the condition better --- .../spark/sql/execution/datasources/csv/CSVInferSchema.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala index 78d1186538dbe..511aab3579d2a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala @@ -231,8 +231,8 @@ private[csv] object CSVTypeCast { castType: DataType, nullable: Boolean = true, options: CSVOptions = CSVOptions()): Any = { - - if (datum == null || nullable && datum == options.nullValue) { + val isNull = datum == options.nullValue || datum == null + if (nullable && isNull) { null } else { castType match { From b913eac5f5e3559ed26c02c61f62b556b86413f4 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sat, 5 Nov 2016 14:11:53 +0900 Subject: [PATCH 5/7] No extra change --- .../spark/sql/execution/datasources/csv/CSVInferSchema.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala index 511aab3579d2a..4bb2a09d4de44 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala @@ -231,6 +231,7 @@ private[csv] object CSVTypeCast { castType: DataType, nullable: Boolean = true, options: CSVOptions = CSVOptions()): Any = { + val isNull = datum == options.nullValue || datum == null if (nullable && isNull) { null From e5146e39cdaadec55109c90ed74671322eed7421 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sat, 5 Nov 2016 18:19:33 +0900 Subject: [PATCH 6/7] address the comment for condition and exception --- .../datasources/csv/CSVInferSchema.scala | 7 +++++-- .../datasources/csv/CSVTypeCastSuite.scala | 18 +++++++++++++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala index 4bb2a09d4de44..0bf14e126658c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala @@ -232,8 +232,11 @@ private[csv] object CSVTypeCast { nullable: Boolean = true, options: CSVOptions = CSVOptions()): Any = { - val isNull = datum == options.nullValue || datum == null - if (nullable && isNull) { + // datum can be null if the number of fields found is less than the length of the schema + if (datum == options.nullValue || datum == null) { + if (!nullable) { + throw new RuntimeException("null value found but the field is not nullable.") + } null } else { castType match { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala index e26962350dc40..7f04064fdd7c8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala @@ -90,14 +90,22 @@ class CSVTypeCastSuite extends SparkFunSuite { CSVTypeCast.castTo("-", StringType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( CSVTypeCast.castTo(null, IntegerType, nullable = true, CSVOptions("nullValue", "-"))) + + // casting a null to not nullable field should throw an exception. + var message = intercept[RuntimeException] { + CSVTypeCast.castTo(null, IntegerType, nullable = false, CSVOptions("nullValue", "-")) + }.getMessage + assert(message.contains("null value found but the field is not nullable.")) + + message = intercept[RuntimeException] { + CSVTypeCast.castTo("-", StringType, nullable = false, CSVOptions("nullValue", "-")) + }.getMessage + assert(message.contains("null value found but the field is not nullable.")) } test("String type should also respect `nullValue`") { assertNull( CSVTypeCast.castTo("", StringType, nullable = true, CSVOptions())) - assert( - CSVTypeCast.castTo("", StringType, nullable = false, CSVOptions()) == - UTF8String.fromString("")) assert( CSVTypeCast.castTo("", StringType, nullable = true, CSVOptions("nullValue", "null")) == @@ -111,10 +119,10 @@ class CSVTypeCastSuite extends SparkFunSuite { } test("Throws exception for empty string with non null type") { - val exception = intercept[NumberFormatException]{ + val exception = intercept[RuntimeException]{ CSVTypeCast.castTo("", IntegerType, nullable = false, CSVOptions()) } - assert(exception.getMessage.contains("For input string: \"\"")) + assert(exception.getMessage.contains("null value found but the field is not nullable.")) } test("Types are cast correctly") { From aa89171ecd888ef0010ef079cfe6439509e8fa6a Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sun, 6 Nov 2016 18:39:48 +0900 Subject: [PATCH 7/7] Print field name in the exception message --- .../datasources/csv/CSVInferSchema.scala | 13 ++- .../datasources/csv/CSVRelation.scala | 1 + .../datasources/csv/CSVTypeCastSuite.scala | 85 ++++++++++--------- 3 files changed, 53 insertions(+), 46 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala index 0bf14e126658c..c63aae9d83855 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala @@ -221,13 +221,18 @@ private[csv] object CSVTypeCast { * Currently we do not support complex types (ArrayType, MapType, StructType). * * For string types, this is simply the datum. For other types. - * For other nullable types, this is null if the string datum is empty. + * For other nullable types, returns null if it is null or equals to the value specified + * in `nullValue` option. * * @param datum string value - * @param castType SparkSQL type + * @param name field name in schema. + * @param castType data type to cast `datum` into. + * @param nullable nullability for the field. + * @param options CSV options. */ def castTo( datum: String, + name: String, castType: DataType, nullable: Boolean = true, options: CSVOptions = CSVOptions()): Any = { @@ -235,7 +240,7 @@ private[csv] object CSVTypeCast { // datum can be null if the number of fields found is less than the length of the schema if (datum == options.nullValue || datum == null) { if (!nullable) { - throw new RuntimeException("null value found but the field is not nullable.") + throw new RuntimeException(s"null value found but field $name is not nullable.") } null } else { @@ -285,7 +290,7 @@ private[csv] object CSVTypeCast { DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(datum).getTime) } case _: StringType => UTF8String.fromString(datum) - case udt: UserDefinedType[_] => castTo(datum, udt.sqlType, nullable, options) + case udt: UserDefinedType[_] => castTo(datum, name, udt.sqlType, nullable, options) case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}") } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala index a249b9d9d59b8..a47b4141531fd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala @@ -124,6 +124,7 @@ object CSVRelation extends Logging { // value is not stored in the row. val value = CSVTypeCast.castTo( indexSafeTokens(index), + field.name, field.dataType, field.nullable, params) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala index 7f04064fdd7c8..46333d12138fb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala @@ -36,7 +36,7 @@ class CSVTypeCastSuite extends SparkFunSuite { stringValues.zip(decimalValues).foreach { case (strVal, decimalVal) => val decimalValue = new BigDecimal(decimalVal.toString) - assert(CSVTypeCast.castTo(strVal, decimalType) === + assert(CSVTypeCast.castTo(strVal, "_1", decimalType) === Decimal(decimalValue, decimalType.precision, decimalType.scale)) } } @@ -67,90 +67,91 @@ class CSVTypeCastSuite extends SparkFunSuite { test("Nullable types are handled") { assertNull( - CSVTypeCast.castTo("-", ByteType, nullable = true, CSVOptions("nullValue", "-"))) + CSVTypeCast.castTo("-", "_1", ByteType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( - CSVTypeCast.castTo("-", ShortType, nullable = true, CSVOptions("nullValue", "-"))) + CSVTypeCast.castTo("-", "_1", ShortType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( - CSVTypeCast.castTo("-", IntegerType, nullable = true, CSVOptions("nullValue", "-"))) + CSVTypeCast.castTo("-", "_1", IntegerType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( - CSVTypeCast.castTo("-", LongType, nullable = true, CSVOptions("nullValue", "-"))) + CSVTypeCast.castTo("-", "_1", LongType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( - CSVTypeCast.castTo("-", FloatType, nullable = true, CSVOptions("nullValue", "-"))) + CSVTypeCast.castTo("-", "_1", FloatType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( - CSVTypeCast.castTo("-", DoubleType, nullable = true, CSVOptions("nullValue", "-"))) + CSVTypeCast.castTo("-", "_1", DoubleType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( - CSVTypeCast.castTo("-", BooleanType, nullable = true, CSVOptions("nullValue", "-"))) + CSVTypeCast.castTo("-", "_1", BooleanType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( - CSVTypeCast.castTo("-", DecimalType.DoubleDecimal, true, CSVOptions("nullValue", "-"))) + CSVTypeCast.castTo("-", "_1", DecimalType.DoubleDecimal, true, CSVOptions("nullValue", "-"))) assertNull( - CSVTypeCast.castTo("-", TimestampType, nullable = true, CSVOptions("nullValue", "-"))) + CSVTypeCast.castTo("-", "_1", TimestampType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( - CSVTypeCast.castTo("-", DateType, nullable = true, CSVOptions("nullValue", "-"))) + CSVTypeCast.castTo("-", "_1", DateType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( - CSVTypeCast.castTo("-", StringType, nullable = true, CSVOptions("nullValue", "-"))) + CSVTypeCast.castTo("-", "_1", StringType, nullable = true, CSVOptions("nullValue", "-"))) assertNull( - CSVTypeCast.castTo(null, IntegerType, nullable = true, CSVOptions("nullValue", "-"))) + CSVTypeCast.castTo(null, "_1", IntegerType, nullable = true, CSVOptions("nullValue", "-"))) // casting a null to not nullable field should throw an exception. var message = intercept[RuntimeException] { - CSVTypeCast.castTo(null, IntegerType, nullable = false, CSVOptions("nullValue", "-")) + CSVTypeCast.castTo(null, "_1", IntegerType, nullable = false, CSVOptions("nullValue", "-")) }.getMessage - assert(message.contains("null value found but the field is not nullable.")) + assert(message.contains("null value found but field _1 is not nullable.")) message = intercept[RuntimeException] { - CSVTypeCast.castTo("-", StringType, nullable = false, CSVOptions("nullValue", "-")) + CSVTypeCast.castTo("-", "_1", StringType, nullable = false, CSVOptions("nullValue", "-")) }.getMessage - assert(message.contains("null value found but the field is not nullable.")) + assert(message.contains("null value found but field _1 is not nullable.")) } test("String type should also respect `nullValue`") { assertNull( - CSVTypeCast.castTo("", StringType, nullable = true, CSVOptions())) + CSVTypeCast.castTo("", "_1", StringType, nullable = true, CSVOptions())) assert( - CSVTypeCast.castTo("", StringType, nullable = true, CSVOptions("nullValue", "null")) == + CSVTypeCast.castTo("", "_1", StringType, nullable = true, CSVOptions("nullValue", "null")) == UTF8String.fromString("")) assert( - CSVTypeCast.castTo("", StringType, nullable = false, CSVOptions("nullValue", "null")) == + CSVTypeCast.castTo("", "_1", StringType, nullable = false, CSVOptions("nullValue", "null")) == UTF8String.fromString("")) assertNull( - CSVTypeCast.castTo(null, StringType, nullable = true, CSVOptions("nullValue", "null"))) + CSVTypeCast.castTo(null, "_1", StringType, nullable = true, CSVOptions("nullValue", "null"))) } test("Throws exception for empty string with non null type") { val exception = intercept[RuntimeException]{ - CSVTypeCast.castTo("", IntegerType, nullable = false, CSVOptions()) + CSVTypeCast.castTo("", "_1", IntegerType, nullable = false, CSVOptions()) } - assert(exception.getMessage.contains("null value found but the field is not nullable.")) + assert(exception.getMessage.contains("null value found but field _1 is not nullable.")) } test("Types are cast correctly") { - assert(CSVTypeCast.castTo("10", ByteType) == 10) - assert(CSVTypeCast.castTo("10", ShortType) == 10) - assert(CSVTypeCast.castTo("10", IntegerType) == 10) - assert(CSVTypeCast.castTo("10", LongType) == 10) - assert(CSVTypeCast.castTo("1.00", FloatType) == 1.0) - assert(CSVTypeCast.castTo("1.00", DoubleType) == 1.0) - assert(CSVTypeCast.castTo("true", BooleanType) == true) + assert(CSVTypeCast.castTo("10", "_1", ByteType) == 10) + assert(CSVTypeCast.castTo("10", "_1", ShortType) == 10) + assert(CSVTypeCast.castTo("10", "_1", IntegerType) == 10) + assert(CSVTypeCast.castTo("10", "_1", LongType) == 10) + assert(CSVTypeCast.castTo("1.00", "_1", FloatType) == 1.0) + assert(CSVTypeCast.castTo("1.00", "_1", DoubleType) == 1.0) + assert(CSVTypeCast.castTo("true", "_1", BooleanType) == true) val timestampsOptions = CSVOptions("timestampFormat", "dd/MM/yyyy hh:mm") val customTimestamp = "31/01/2015 00:00" val expectedTime = timestampsOptions.timestampFormat.parse(customTimestamp).getTime val castedTimestamp = - CSVTypeCast.castTo(customTimestamp, TimestampType, nullable = true, timestampsOptions) + CSVTypeCast.castTo(customTimestamp, "_1", TimestampType, nullable = true, timestampsOptions) assert(castedTimestamp == expectedTime * 1000L) val customDate = "31/01/2015" val dateOptions = CSVOptions("dateFormat", "dd/MM/yyyy") val expectedDate = dateOptions.dateFormat.parse(customDate).getTime - val castedDate = CSVTypeCast.castTo(customTimestamp, DateType, nullable = true, dateOptions) + val castedDate = + CSVTypeCast.castTo(customTimestamp, "_1", DateType, nullable = true, dateOptions) assert(castedDate == DateTimeUtils.millisToDays(expectedDate)) val timestamp = "2015-01-01 00:00:00" - assert(CSVTypeCast.castTo(timestamp, TimestampType) == + assert(CSVTypeCast.castTo(timestamp, "_1", TimestampType) == DateTimeUtils.stringToTime(timestamp).getTime * 1000L) - assert(CSVTypeCast.castTo("2015-01-01", DateType) == + assert(CSVTypeCast.castTo("2015-01-01", "_1", DateType) == DateTimeUtils.millisToDays(DateTimeUtils.stringToTime("2015-01-01").getTime)) } @@ -158,8 +159,8 @@ class CSVTypeCastSuite extends SparkFunSuite { val originalLocale = Locale.getDefault try { Locale.setDefault(new Locale("fr", "FR")) - assert(CSVTypeCast.castTo("1,00", FloatType) == 100.0) // Would parse as 1.0 in fr-FR - assert(CSVTypeCast.castTo("1,00", DoubleType) == 100.0) + assert(CSVTypeCast.castTo("1,00", "_1", FloatType) == 100.0) // Would parse as 1.0 in fr-FR + assert(CSVTypeCast.castTo("1,00", "_1", DoubleType) == 100.0) } finally { Locale.setDefault(originalLocale) } @@ -167,7 +168,7 @@ class CSVTypeCastSuite extends SparkFunSuite { test("Float NaN values are parsed correctly") { val floatVal: Float = CSVTypeCast.castTo( - "nn", FloatType, nullable = true, CSVOptions("nanValue", "nn")).asInstanceOf[Float] + "nn", "_1", FloatType, nullable = true, CSVOptions("nanValue", "nn")).asInstanceOf[Float] // Java implements the IEEE-754 floating point standard which guarantees that any comparison // against NaN will return false (except != which returns true) @@ -176,32 +177,32 @@ class CSVTypeCastSuite extends SparkFunSuite { test("Double NaN values are parsed correctly") { val doubleVal: Double = CSVTypeCast.castTo( - "-", DoubleType, nullable = true, CSVOptions("nanValue", "-")).asInstanceOf[Double] + "-", "_1", DoubleType, nullable = true, CSVOptions("nanValue", "-")).asInstanceOf[Double] assert(doubleVal.isNaN) } test("Float infinite values can be parsed") { val floatVal1 = CSVTypeCast.castTo( - "max", FloatType, nullable = true, CSVOptions("negativeInf", "max")).asInstanceOf[Float] + "max", "_1", FloatType, nullable = true, CSVOptions("negativeInf", "max")).asInstanceOf[Float] assert(floatVal1 == Float.NegativeInfinity) val floatVal2 = CSVTypeCast.castTo( - "max", FloatType, nullable = true, CSVOptions("positiveInf", "max")).asInstanceOf[Float] + "max", "_1", FloatType, nullable = true, CSVOptions("positiveInf", "max")).asInstanceOf[Float] assert(floatVal2 == Float.PositiveInfinity) } test("Double infinite values can be parsed") { val doubleVal1 = CSVTypeCast.castTo( - "max", DoubleType, nullable = true, CSVOptions("negativeInf", "max") + "max", "_1", DoubleType, nullable = true, CSVOptions("negativeInf", "max") ).asInstanceOf[Double] assert(doubleVal1 == Double.NegativeInfinity) val doubleVal2 = CSVTypeCast.castTo( - "max", DoubleType, nullable = true, CSVOptions("positiveInf", "max") + "max", "_1", DoubleType, nullable = true, CSVOptions("positiveInf", "max") ).asInstanceOf[Double] assert(doubleVal2 == Double.PositiveInfinity)