Skip to content

Commit aa89171

Browse files
committed
Print field name in the exception message
1 parent e5146e3 commit aa89171

File tree

3 files changed

+53
-46
lines changed

3 files changed

+53
-46
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,21 +221,26 @@ private[csv] object CSVTypeCast {
221221
* Currently we do not support complex types (ArrayType, MapType, StructType).
222222
*
223223
* For string types, this is simply the datum. For other types.
224-
* For other nullable types, this is null if the string datum is empty.
224+
* For other nullable types, returns null if it is null or equals to the value specified
225+
* in `nullValue` option.
225226
*
226227
* @param datum string value
227-
* @param castType SparkSQL type
228+
* @param name field name in schema.
229+
* @param castType data type to cast `datum` into.
230+
* @param nullable nullability for the field.
231+
* @param options CSV options.
228232
*/
229233
def castTo(
230234
datum: String,
235+
name: String,
231236
castType: DataType,
232237
nullable: Boolean = true,
233238
options: CSVOptions = CSVOptions()): Any = {
234239

235240
// datum can be null if the number of fields found is less than the length of the schema
236241
if (datum == options.nullValue || datum == null) {
237242
if (!nullable) {
238-
throw new RuntimeException("null value found but the field is not nullable.")
243+
throw new RuntimeException(s"null value found but field $name is not nullable.")
239244
}
240245
null
241246
} else {
@@ -285,7 +290,7 @@ private[csv] object CSVTypeCast {
285290
DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(datum).getTime)
286291
}
287292
case _: StringType => UTF8String.fromString(datum)
288-
case udt: UserDefinedType[_] => castTo(datum, udt.sqlType, nullable, options)
293+
case udt: UserDefinedType[_] => castTo(datum, name, udt.sqlType, nullable, options)
289294
case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}")
290295
}
291296
}

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ object CSVRelation extends Logging {
124124
// value is not stored in the row.
125125
val value = CSVTypeCast.castTo(
126126
indexSafeTokens(index),
127+
field.name,
127128
field.dataType,
128129
field.nullable,
129130
params)

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala

Lines changed: 43 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class CSVTypeCastSuite extends SparkFunSuite {
3636

3737
stringValues.zip(decimalValues).foreach { case (strVal, decimalVal) =>
3838
val decimalValue = new BigDecimal(decimalVal.toString)
39-
assert(CSVTypeCast.castTo(strVal, decimalType) ===
39+
assert(CSVTypeCast.castTo(strVal, "_1", decimalType) ===
4040
Decimal(decimalValue, decimalType.precision, decimalType.scale))
4141
}
4242
}
@@ -67,107 +67,108 @@ class CSVTypeCastSuite extends SparkFunSuite {
6767

6868
test("Nullable types are handled") {
6969
assertNull(
70-
CSVTypeCast.castTo("-", ByteType, nullable = true, CSVOptions("nullValue", "-")))
70+
CSVTypeCast.castTo("-", "_1", ByteType, nullable = true, CSVOptions("nullValue", "-")))
7171
assertNull(
72-
CSVTypeCast.castTo("-", ShortType, nullable = true, CSVOptions("nullValue", "-")))
72+
CSVTypeCast.castTo("-", "_1", ShortType, nullable = true, CSVOptions("nullValue", "-")))
7373
assertNull(
74-
CSVTypeCast.castTo("-", IntegerType, nullable = true, CSVOptions("nullValue", "-")))
74+
CSVTypeCast.castTo("-", "_1", IntegerType, nullable = true, CSVOptions("nullValue", "-")))
7575
assertNull(
76-
CSVTypeCast.castTo("-", LongType, nullable = true, CSVOptions("nullValue", "-")))
76+
CSVTypeCast.castTo("-", "_1", LongType, nullable = true, CSVOptions("nullValue", "-")))
7777
assertNull(
78-
CSVTypeCast.castTo("-", FloatType, nullable = true, CSVOptions("nullValue", "-")))
78+
CSVTypeCast.castTo("-", "_1", FloatType, nullable = true, CSVOptions("nullValue", "-")))
7979
assertNull(
80-
CSVTypeCast.castTo("-", DoubleType, nullable = true, CSVOptions("nullValue", "-")))
80+
CSVTypeCast.castTo("-", "_1", DoubleType, nullable = true, CSVOptions("nullValue", "-")))
8181
assertNull(
82-
CSVTypeCast.castTo("-", BooleanType, nullable = true, CSVOptions("nullValue", "-")))
82+
CSVTypeCast.castTo("-", "_1", BooleanType, nullable = true, CSVOptions("nullValue", "-")))
8383
assertNull(
84-
CSVTypeCast.castTo("-", DecimalType.DoubleDecimal, true, CSVOptions("nullValue", "-")))
84+
CSVTypeCast.castTo("-", "_1", DecimalType.DoubleDecimal, true, CSVOptions("nullValue", "-")))
8585
assertNull(
86-
CSVTypeCast.castTo("-", TimestampType, nullable = true, CSVOptions("nullValue", "-")))
86+
CSVTypeCast.castTo("-", "_1", TimestampType, nullable = true, CSVOptions("nullValue", "-")))
8787
assertNull(
88-
CSVTypeCast.castTo("-", DateType, nullable = true, CSVOptions("nullValue", "-")))
88+
CSVTypeCast.castTo("-", "_1", DateType, nullable = true, CSVOptions("nullValue", "-")))
8989
assertNull(
90-
CSVTypeCast.castTo("-", StringType, nullable = true, CSVOptions("nullValue", "-")))
90+
CSVTypeCast.castTo("-", "_1", StringType, nullable = true, CSVOptions("nullValue", "-")))
9191
assertNull(
92-
CSVTypeCast.castTo(null, IntegerType, nullable = true, CSVOptions("nullValue", "-")))
92+
CSVTypeCast.castTo(null, "_1", IntegerType, nullable = true, CSVOptions("nullValue", "-")))
9393

9494
// casting a null to not nullable field should throw an exception.
9595
var message = intercept[RuntimeException] {
96-
CSVTypeCast.castTo(null, IntegerType, nullable = false, CSVOptions("nullValue", "-"))
96+
CSVTypeCast.castTo(null, "_1", IntegerType, nullable = false, CSVOptions("nullValue", "-"))
9797
}.getMessage
98-
assert(message.contains("null value found but the field is not nullable."))
98+
assert(message.contains("null value found but field _1 is not nullable."))
9999

100100
message = intercept[RuntimeException] {
101-
CSVTypeCast.castTo("-", StringType, nullable = false, CSVOptions("nullValue", "-"))
101+
CSVTypeCast.castTo("-", "_1", StringType, nullable = false, CSVOptions("nullValue", "-"))
102102
}.getMessage
103-
assert(message.contains("null value found but the field is not nullable."))
103+
assert(message.contains("null value found but field _1 is not nullable."))
104104
}
105105

106106
test("String type should also respect `nullValue`") {
107107
assertNull(
108-
CSVTypeCast.castTo("", StringType, nullable = true, CSVOptions()))
108+
CSVTypeCast.castTo("", "_1", StringType, nullable = true, CSVOptions()))
109109

110110
assert(
111-
CSVTypeCast.castTo("", StringType, nullable = true, CSVOptions("nullValue", "null")) ==
111+
CSVTypeCast.castTo("", "_1", StringType, nullable = true, CSVOptions("nullValue", "null")) ==
112112
UTF8String.fromString(""))
113113
assert(
114-
CSVTypeCast.castTo("", StringType, nullable = false, CSVOptions("nullValue", "null")) ==
114+
CSVTypeCast.castTo("", "_1", StringType, nullable = false, CSVOptions("nullValue", "null")) ==
115115
UTF8String.fromString(""))
116116

117117
assertNull(
118-
CSVTypeCast.castTo(null, StringType, nullable = true, CSVOptions("nullValue", "null")))
118+
CSVTypeCast.castTo(null, "_1", StringType, nullable = true, CSVOptions("nullValue", "null")))
119119
}
120120

121121
test("Throws exception for empty string with non null type") {
122122
val exception = intercept[RuntimeException]{
123-
CSVTypeCast.castTo("", IntegerType, nullable = false, CSVOptions())
123+
CSVTypeCast.castTo("", "_1", IntegerType, nullable = false, CSVOptions())
124124
}
125-
assert(exception.getMessage.contains("null value found but the field is not nullable."))
125+
assert(exception.getMessage.contains("null value found but field _1 is not nullable."))
126126
}
127127

128128
test("Types are cast correctly") {
129-
assert(CSVTypeCast.castTo("10", ByteType) == 10)
130-
assert(CSVTypeCast.castTo("10", ShortType) == 10)
131-
assert(CSVTypeCast.castTo("10", IntegerType) == 10)
132-
assert(CSVTypeCast.castTo("10", LongType) == 10)
133-
assert(CSVTypeCast.castTo("1.00", FloatType) == 1.0)
134-
assert(CSVTypeCast.castTo("1.00", DoubleType) == 1.0)
135-
assert(CSVTypeCast.castTo("true", BooleanType) == true)
129+
assert(CSVTypeCast.castTo("10", "_1", ByteType) == 10)
130+
assert(CSVTypeCast.castTo("10", "_1", ShortType) == 10)
131+
assert(CSVTypeCast.castTo("10", "_1", IntegerType) == 10)
132+
assert(CSVTypeCast.castTo("10", "_1", LongType) == 10)
133+
assert(CSVTypeCast.castTo("1.00", "_1", FloatType) == 1.0)
134+
assert(CSVTypeCast.castTo("1.00", "_1", DoubleType) == 1.0)
135+
assert(CSVTypeCast.castTo("true", "_1", BooleanType) == true)
136136

137137
val timestampsOptions = CSVOptions("timestampFormat", "dd/MM/yyyy hh:mm")
138138
val customTimestamp = "31/01/2015 00:00"
139139
val expectedTime = timestampsOptions.timestampFormat.parse(customTimestamp).getTime
140140
val castedTimestamp =
141-
CSVTypeCast.castTo(customTimestamp, TimestampType, nullable = true, timestampsOptions)
141+
CSVTypeCast.castTo(customTimestamp, "_1", TimestampType, nullable = true, timestampsOptions)
142142
assert(castedTimestamp == expectedTime * 1000L)
143143

144144
val customDate = "31/01/2015"
145145
val dateOptions = CSVOptions("dateFormat", "dd/MM/yyyy")
146146
val expectedDate = dateOptions.dateFormat.parse(customDate).getTime
147-
val castedDate = CSVTypeCast.castTo(customTimestamp, DateType, nullable = true, dateOptions)
147+
val castedDate =
148+
CSVTypeCast.castTo(customTimestamp, "_1", DateType, nullable = true, dateOptions)
148149
assert(castedDate == DateTimeUtils.millisToDays(expectedDate))
149150

150151
val timestamp = "2015-01-01 00:00:00"
151-
assert(CSVTypeCast.castTo(timestamp, TimestampType) ==
152+
assert(CSVTypeCast.castTo(timestamp, "_1", TimestampType) ==
152153
DateTimeUtils.stringToTime(timestamp).getTime * 1000L)
153-
assert(CSVTypeCast.castTo("2015-01-01", DateType) ==
154+
assert(CSVTypeCast.castTo("2015-01-01", "_1", DateType) ==
154155
DateTimeUtils.millisToDays(DateTimeUtils.stringToTime("2015-01-01").getTime))
155156
}
156157

157158
test("Float and Double Types are cast without respect to platform default Locale") {
158159
val originalLocale = Locale.getDefault
159160
try {
160161
Locale.setDefault(new Locale("fr", "FR"))
161-
assert(CSVTypeCast.castTo("1,00", FloatType) == 100.0) // Would parse as 1.0 in fr-FR
162-
assert(CSVTypeCast.castTo("1,00", DoubleType) == 100.0)
162+
assert(CSVTypeCast.castTo("1,00", "_1", FloatType) == 100.0) // Would parse as 1.0 in fr-FR
163+
assert(CSVTypeCast.castTo("1,00", "_1", DoubleType) == 100.0)
163164
} finally {
164165
Locale.setDefault(originalLocale)
165166
}
166167
}
167168

168169
test("Float NaN values are parsed correctly") {
169170
val floatVal: Float = CSVTypeCast.castTo(
170-
"nn", FloatType, nullable = true, CSVOptions("nanValue", "nn")).asInstanceOf[Float]
171+
"nn", "_1", FloatType, nullable = true, CSVOptions("nanValue", "nn")).asInstanceOf[Float]
171172

172173
// Java implements the IEEE-754 floating point standard which guarantees that any comparison
173174
// against NaN will return false (except != which returns true)
@@ -176,32 +177,32 @@ class CSVTypeCastSuite extends SparkFunSuite {
176177

177178
test("Double NaN values are parsed correctly") {
178179
val doubleVal: Double = CSVTypeCast.castTo(
179-
"-", DoubleType, nullable = true, CSVOptions("nanValue", "-")).asInstanceOf[Double]
180+
"-", "_1", DoubleType, nullable = true, CSVOptions("nanValue", "-")).asInstanceOf[Double]
180181

181182
assert(doubleVal.isNaN)
182183
}
183184

184185
test("Float infinite values can be parsed") {
185186
val floatVal1 = CSVTypeCast.castTo(
186-
"max", FloatType, nullable = true, CSVOptions("negativeInf", "max")).asInstanceOf[Float]
187+
"max", "_1", FloatType, nullable = true, CSVOptions("negativeInf", "max")).asInstanceOf[Float]
187188

188189
assert(floatVal1 == Float.NegativeInfinity)
189190

190191
val floatVal2 = CSVTypeCast.castTo(
191-
"max", FloatType, nullable = true, CSVOptions("positiveInf", "max")).asInstanceOf[Float]
192+
"max", "_1", FloatType, nullable = true, CSVOptions("positiveInf", "max")).asInstanceOf[Float]
192193

193194
assert(floatVal2 == Float.PositiveInfinity)
194195
}
195196

196197
test("Double infinite values can be parsed") {
197198
val doubleVal1 = CSVTypeCast.castTo(
198-
"max", DoubleType, nullable = true, CSVOptions("negativeInf", "max")
199+
"max", "_1", DoubleType, nullable = true, CSVOptions("negativeInf", "max")
199200
).asInstanceOf[Double]
200201

201202
assert(doubleVal1 == Double.NegativeInfinity)
202203

203204
val doubleVal2 = CSVTypeCast.castTo(
204-
"max", DoubleType, nullable = true, CSVOptions("positiveInf", "max")
205+
"max", "_1", DoubleType, nullable = true, CSVOptions("positiveInf", "max")
205206
).asInstanceOf[Double]
206207

207208
assert(doubleVal2 == Double.PositiveInfinity)

0 commit comments

Comments
 (0)