Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 3 additions & 47 deletions sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
Original file line number Diff line number Diff line change
Expand Up @@ -165,20 +165,19 @@ class DataFrame private[sql](
* @param _numRows Number of rows to show
* @param truncate Whether truncate long strings and align cells right
*/
private[sql] def showString(_numRows: Int, truncate: Boolean = true): String = {
override private[sql] def showString(_numRows: Int, truncate: Boolean = true): String = {
val numRows = _numRows.max(0)
val sb = new StringBuilder
val takeResult = take(numRows + 1)
val hasMoreData = takeResult.length > numRows
val data = takeResult.take(numRows)
val numCols = schema.fieldNames.length

// For array values, replace Seq and Array with square brackets
// For cells that are beyond 20 characters, replace it with the first 17 and "..."
val rows: Seq[Seq[String]] = schema.fieldNames.toSeq +: data.map { row =>
row.toSeq.map { cell =>
val str = cell match {
case null => "null"
case binary: Array[Byte] => binary.map("%02X".format(_)).mkString("[", " ", "]")
case array: Array[_] => array.mkString("[", ", ", "]")
case seq: Seq[_] => seq.mkString("[", ", ", "]")
case _ => cell.toString
Expand All @@ -187,50 +186,7 @@ class DataFrame private[sql](
}: Seq[String]
}

// Initialise the width of each column to a minimum value of '3'
val colWidths = Array.fill(numCols)(3)

// Compute the width of each column
for (row <- rows) {
for ((cell, i) <- row.zipWithIndex) {
colWidths(i) = math.max(colWidths(i), cell.length)
}
}

// Create SeparateLine
val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()

// column names
rows.head.zipWithIndex.map { case (cell, i) =>
if (truncate) {
StringUtils.leftPad(cell, colWidths(i))
} else {
StringUtils.rightPad(cell, colWidths(i))
}
}.addString(sb, "|", "|", "|\n")

sb.append(sep)

// data
rows.tail.map {
_.zipWithIndex.map { case (cell, i) =>
if (truncate) {
StringUtils.leftPad(cell.toString, colWidths(i))
} else {
StringUtils.rightPad(cell.toString, colWidths(i))
}
}.addString(sb, "|", "|", "|\n")
}

sb.append(sep)

// For Data that has more than "numRows" records
if (hasMoreData) {
val rowsString = if (numRows == 1) "row" else "rows"
sb.append(s"only showing top $numRows $rowsString\n")
}

sb.toString()
formatString ( rows, numRows, hasMoreData, truncate )
}

/**
Expand Down
37 changes: 36 additions & 1 deletion sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,42 @@ class Dataset[T] private[sql](
*
* @since 1.6.0
*/
def show(numRows: Int, truncate: Boolean): Unit = toDF().show(numRows, truncate)
// scalastyle:off println
def show(numRows: Int, truncate: Boolean): Unit = println(showString(numRows, truncate))
// scalastyle:on println

/**
* Compose the string representing rows for output
* @param _numRows Number of rows to show
* @param truncate Whether truncate long strings and align cells right
*/
override private[sql] def showString(_numRows: Int, truncate: Boolean = true): String = {
val numRows = _numRows.max(0)
val takeResult = take(numRows + 1)
val hasMoreData = takeResult.length > numRows
val data = takeResult.take(numRows)

// For array values, replace Seq and Array with square brackets
// For cells that are beyond 20 characters, replace it with the first 17 and "..."
val rows: Seq[Seq[String]] = schema.fieldNames.toSeq +: (data.map {
case r: Row => r
case tuple: Product => Row.fromTuple(tuple)
case o => Row(o)
} map { row =>
row.toSeq.map { cell =>
val str = cell match {
case null => "null"
case binary: Array[Byte] => binary.map("%02X".format(_)).mkString("[", " ", "]")
case array: Array[_] => array.mkString("[", ", ", "]")
case seq: Seq[_] => seq.mkString("[", ", ", "]")
case _ => cell.toString
}
if (truncate && str.length > 20) str.substring(0, 17) + "..." else str
}: Seq[String]
})

formatString ( rows, numRows, hasMoreData, truncate )
}

/**
* Returns a new [[Dataset]] that has exactly `numPartitions` partitions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution

import scala.util.control.NonFatal

import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.StructType

Expand All @@ -42,4 +43,68 @@ private[sql] trait Queryable {
def explain(extended: Boolean): Unit

def explain(): Unit

private[sql] def showString(_numRows: Int, truncate: Boolean = true): String

/**
* Format the string representing rows for output
* @param rows The rows to show
* @param numRows Number of rows to show
* @param hasMoreData Whether some rows are not shown due to the limit
* @param truncate Whether truncate long strings and align cells right
*
*/
private[sql] def formatString (
rows: Seq[Seq[String]],
numRows: Int,
hasMoreData : Boolean,
truncate: Boolean = true): String = {
val sb = new StringBuilder
val numCols = schema.fieldNames.length

// Initialise the width of each column to a minimum value of '3'
val colWidths = Array.fill(numCols)(3)

// Compute the width of each column
for (row <- rows) {
for ((cell, i) <- row.zipWithIndex) {
colWidths(i) = math.max(colWidths(i), cell.length)
}
}

// Create SeparateLine
val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()

// column names
rows.head.zipWithIndex.map { case (cell, i) =>
if (truncate) {
StringUtils.leftPad(cell, colWidths(i))
} else {
StringUtils.rightPad(cell, colWidths(i))
}
}.addString(sb, "|", "|", "|\n")

sb.append(sep)

// data
rows.tail.map {
_.zipWithIndex.map { case (cell, i) =>
if (truncate) {
StringUtils.leftPad(cell.toString, colWidths(i))
} else {
StringUtils.rightPad(cell.toString, colWidths(i))
}
}.addString(sb, "|", "|", "|\n")
}

sb.append(sep)

// For Data that has more than "numRows" records
if (hasMoreData) {
val rowsString = if (numRows == 1) "row" else "rows"
sb.append(s"only showing top $numRows $rowsString\n")
}

sb.toString()
}
}
15 changes: 15 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,21 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
assert(df.showString(10) === expectedAnswer)
}

test("showString: binary") {
val df = Seq(
("12".getBytes, "ABC.".getBytes),
("34".getBytes, "12346".getBytes)
).toDF()
val expectedAnswer = """+-------+----------------+
|| _1| _2|
|+-------+----------------+
||[31 32]| [41 42 43 2E]|
||[33 34]|[31 32 33 34 36]|
|+-------+----------------+
|""".stripMargin
assert(df.showString(10) === expectedAnswer)
}

test("showString: minimum column width") {
val df = Seq(
(1, 1),
Expand Down
14 changes: 14 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,20 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
assert(ds.toString == "[_1: int, _2: int]")
}

test("showString: Kryo encoder") {
implicit val kryoEncoder = Encoders.kryo[KryoData]
val ds = Seq(KryoData(1), KryoData(2)).toDS()

val expectedAnswer = """+-----------+
|| value|
|+-----------+
||KryoData(1)|
||KryoData(2)|
|+-----------+
|""".stripMargin
assert(ds.showString(10) === expectedAnswer)
}

test("Kryo encoder") {
implicit val kryoEncoder = Encoders.kryo[KryoData]
val ds = Seq(KryoData(1), KryoData(2)).toDS()
Expand Down