Skip to content

Commit 7eb83fe

Browse files
HyukjinKwonrxin
authored andcommitted
[SPARK-13137][SQL] NullPoingException in schema inference for CSV when the first line is empty
https://issues.apache.org/jira/browse/SPARK-13137 This PR adds a filter in schema inference so that it does not emit NullPointException. Also, I removed `MAX_COMMENT_LINES_IN_HEADER `but instead used a monad chaining with `filter()` and `first()`. Lastly, I simply added a newline rather than adding a new file for this so that this is covered with the original tests. Author: hyukjinkwon <[email protected]> Closes #11023 from HyukjinKwon/SPARK-13137.
1 parent b6a873d commit 7eb83fe

File tree

3 files changed

+8
-8
lines changed

3 files changed

+8
-8
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,6 @@ private[sql] class CSVOptions(
7575
val ignoreLeadingWhiteSpaceFlag = getBool("ignoreLeadingWhiteSpace")
7676
val ignoreTrailingWhiteSpaceFlag = getBool("ignoreTrailingWhiteSpace")
7777

78-
// Limit the number of lines we'll search for a header row that isn't comment-prefixed
79-
val MAX_COMMENT_LINES_IN_HEADER = 10
80-
8178
// Parse mode flags
8279
if (!ParseModes.isValidMode(parseMode)) {
8380
logWarning(s"$parseMode is not a valid parse mode. Using ${ParseModes.DEFAULT}.")

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,14 @@ private[csv] class CSVRelation(
154154
*/
155155
private def findFirstLine(rdd: RDD[String]): String = {
156156
if (params.isCommentSet) {
157-
rdd.take(params.MAX_COMMENT_LINES_IN_HEADER)
158-
.find(!_.startsWith(params.comment.toString))
159-
.getOrElse(sys.error(s"No uncommented header line in " +
160-
s"first ${params.MAX_COMMENT_LINES_IN_HEADER} lines"))
157+
val comment = params.comment.toString
158+
rdd.filter { line =>
159+
line.trim.nonEmpty && !line.startsWith(comment)
160+
}.first()
161161
} else {
162-
rdd.first()
162+
rdd.filter { line =>
163+
line.trim.nonEmpty
164+
}.first()
163165
}
164166
}
165167
}

sql/core/src/test/resources/cars.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
year,make,model,comment,blank
23
"2012","Tesla","S","No comment",
34

0 commit comments

Comments
 (0)