Skip to content

Commit 5214e8a

Browse files
committed
NullPoingException in schema inference for CSV when the first line is empty
1 parent b8666fd commit 5214e8a

File tree

3 files changed

+8
-8
lines changed

3 files changed

+8
-8
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,6 @@ private[sql] class CSVOptions(
6262
val ignoreLeadingWhiteSpaceFlag = getBool("ignoreLeadingWhiteSpace")
6363
val ignoreTrailingWhiteSpaceFlag = getBool("ignoreTrailingWhiteSpace")
6464

65-
// Limit the number of lines we'll search for a header row that isn't comment-prefixed
66-
val MAX_COMMENT_LINES_IN_HEADER = 10
67-
6865
// Parse mode flags
6966
if (!ParseModes.isValidMode(parseMode)) {
7067
logWarning(s"$parseMode is not a valid parse mode. Using ${ParseModes.DEFAULT}.")

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,14 @@ private[csv] class CSVRelation(
154154
*/
155155
private def findFirstLine(rdd: RDD[String]): String = {
156156
if (params.isCommentSet) {
157-
rdd.take(params.MAX_COMMENT_LINES_IN_HEADER)
158-
.find(!_.startsWith(params.comment.toString))
159-
.getOrElse(sys.error(s"No uncommented header line in " +
160-
s"first ${params.MAX_COMMENT_LINES_IN_HEADER} lines"))
157+
val comment = params.comment.toString
158+
rdd.filter { line =>
159+
line.trim.nonEmpty && !line.startsWith(comment)
160+
}.first()
161161
} else {
162-
rdd.first()
162+
rdd.filter { line =>
163+
line.trim.nonEmpty
164+
}.first()
163165
}
164166
}
165167
}

sql/core/src/test/resources/cars.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
year,make,model,comment,blank
23
"2012","Tesla","S","No comment",
34

0 commit comments

Comments
 (0)