Skip to content

Commit bbff402

Browse files
committed
Fix text source tests and javadoc comments
1 parent d632706 commit bbff402

File tree

5 files changed

+11
-6
lines changed

5 files changed

+11
-6
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ private[sql] class JSONOptions(
131131

132132
def getTextOptions: Map[String, String] = {
133133
lineSeparatorInRead.map{ bytes =>
134-
"lineSep" -> bytes.map("%02x".format(_)).mkString
134+
"lineSep" -> bytes.map("x%02x".format(_)).mkString
135135
}.toMap
136136
}
137137
}

sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
366366
* `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
367367
* <li>`multiLine` (default `false`): parse one record, which may span multiple lines,
368368
* per file</li>
369-
* </ul>
370369
* <li>`lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator
371370
* that should be used for parsing.</li>
372371
* </ul>

sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
518518
* <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that
519519
* indicates a timestamp format. Custom date formats follow the formats at
520520
* `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
521-
* </ul>
522521
* <li>`lineSep` (default `\n`): defines the line separator that should
523522
* be used for writing.</li>
524523
* </ul>

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,16 @@ private[text] class TextOptions(@transient private val parameters: CaseInsensiti
4141
*/
4242
val wholeText = parameters.getOrElse(WHOLETEXT, "false").toBoolean
4343

44-
val lineSeparator: Option[Array[Byte]] = parameters.get(LINE_SEPARATOR).map { hex =>
45-
hex.sliding(2, 2).toArray.map(Integer.parseInt(_, 16).toByte)
44+
val charset: Option[String] = Some("UTF-8")
45+
46+
val lineSeparator: Option[Array[Byte]] = parameters.get("lineSep").collect {
47+
case hexs if hexs.startsWith("x") =>
48+
hexs.replaceAll("[^0-9A-Fa-f]", "").sliding(2, 2).toArray
49+
.map(Integer.parseInt(_, 16).toByte)
50+
case reserved if reserved.startsWith("r") || reserved.startsWith("/") =>
51+
throw new NotImplementedError(s"the $reserved selector has not supported yet")
52+
case delim => delim.getBytes(charset.getOrElse(
53+
throw new IllegalArgumentException("Please, set the charset option for the delimiter")))
4654
}
4755

4856
// Note that the option 'lineSep' uses a different default value in read and write.

sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,6 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
268268
* `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
269269
* <li>`multiLine` (default `false`): parse one record, which may span multiple lines,
270270
* per file</li>
271-
* </ul>
272271
* <li>`lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator
273272
* that should be used for parsing.</li>
274273
* </ul>

0 commit comments

Comments
 (0)