Skip to content
This repository was archived by the owner on Mar 24, 2025. It is now read-only.
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 8 additions & 16 deletions src/main/scala/com/databricks/spark/xml/util/XmlFile.scala
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ import com.databricks.spark.xml.{XmlOptions, XmlInputFormat}

private[xml] object XmlFile {
val DEFAULT_INDENT = " "
val DEFAULT_ROW_SEPARATOR = "\n"

def withCharset(
context: SparkContext,
Expand Down Expand Up @@ -80,11 +79,8 @@ private[xml] object XmlFile {
parameters: Map[String, String] = Map()): Unit = {
val options = XmlOptions(parameters.toMap)
val codecClass = CompressionCodecs.getCodecClass(options.codec)
val startElement = s"<${options.rootTag}>"
val endElement = s"</${options.rootTag}>"
val rowSchema = dataFrame.schema
val indent = XmlFile.DEFAULT_INDENT
val rowSeparator = XmlFile.DEFAULT_ROW_SEPARATOR

val xmlRDD = dataFrame.rdd.mapPartitions { iter =>
val factory = XMLOutputFactory.newInstance()
Expand All @@ -101,6 +97,10 @@ private[xml] object XmlFile {

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pwoody, mind if I ask to remove val startElement = s"<${options.rootTag}>" and val endElement = s"</${options.rootTag}>" above? Seems unused.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, done.

override def next: String = {
if (iter.nonEmpty) {
if (firstRow) {
indentingXmlWriter.writeStartElement(options.rootTag)
firstRow = false
}
val xml = {
StaxXmlGenerator(
rowSchema,
Expand All @@ -109,21 +109,13 @@ private[xml] object XmlFile {
writer.toString
}
writer.reset()

// Here it needs to add indentations for the start of each line,
// in order to insert the start element and end element.
val indentedXml = indent + xml.replaceAll(rowSeparator, rowSeparator + indent)
if (firstRow) {
firstRow = false
startElement + rowSeparator + indentedXml
} else {
indentedXml
}
xml
} else {
indentingXmlWriter.close()
if (!firstRow) {
lastRow = false
endElement
indentingXmlWriter.writeEndElement()
indentingXmlWriter.close()
writer.toString
} else {
// This means the iterator was initially empty.
firstRow = false
Expand Down