From 8bf5eadc6904ec52902d5165930a8c13320de67e Mon Sep 17 00:00:00 2001 From: Daniel van der Ende Date: Wed, 14 Jun 2017 20:19:50 +0200 Subject: [PATCH] [SPARK-21098] Set lineseparator csv multiline and csv write This commit sets the lineseparator for reading a multiline csv file or writing a csv file. We cannot make this configurable for reading as it depends on LineReader from Hadoop, which has a hardcoded \n as line ending. --- .../spark/sql/execution/datasources/csv/CSVOptions.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala index 78c16b75ee684..6fb6528226517 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala @@ -90,6 +90,7 @@ class CSVOptions( val quote = getChar("quote", '\"') val escape = getChar("escape", '\\') val comment = getChar("comment", '\u0000') + val lineSeparator = "\n" val headerFlag = getBool("header") val inferSchemaFlag = getBool("inferSchema") @@ -149,6 +150,7 @@ class CSVOptions( format.setQuote(quote) format.setQuoteEscape(escape) format.setComment(comment) + format.setLineSeparator(lineSeparator) writerSettings.setIgnoreLeadingWhitespaces(ignoreLeadingWhiteSpaceFlagInWrite) writerSettings.setIgnoreTrailingWhitespaces(ignoreTrailingWhiteSpaceFlagInWrite) writerSettings.setNullValue(nullValue) @@ -166,6 +168,7 @@ class CSVOptions( format.setQuote(quote) format.setQuoteEscape(escape) format.setComment(comment) + format.setLineSeparator(lineSeparator) settings.setIgnoreLeadingWhitespaces(ignoreLeadingWhiteSpaceInRead) settings.setIgnoreTrailingWhitespaces(ignoreTrailingWhiteSpaceInRead) settings.setReadInputOnSeparateThread(false)