@@ -231,16 +231,17 @@ class Dataset[T] private[sql](
231231 }
232232
233233 /**
234- * Compose the string representing rows for output
234+ * Get rows represented in Sequence by specific truncate and vertical requirement.
235235 *
236- * @param _numRows Number of rows to show
236+ * @param numRows Number of rows to return
237237 * @param truncate If set to more than 0, truncates strings to `truncate` characters and
238238 * all cells will be aligned right.
239- * @param vertical If set to true, prints output rows vertically (one line per column value) .
239+ * @param vertical If set to true, the rows to return do not need truncate .
240240 */
241- private [sql] def showString (
242- _numRows : Int , truncate : Int = 20 , vertical : Boolean = false ): String = {
243- val numRows = _numRows.max(0 ).min(Int .MaxValue - 1 )
241+ private [sql] def getRows (
242+ numRows : Int ,
243+ truncate : Int ,
244+ vertical : Boolean ): Seq [Seq [String ]] = {
244245 val newDf = toDF()
245246 val castCols = newDf.logicalPlan.output.map { col =>
246247 // Since binary types in top-level schema fields have a specific format to print,
@@ -251,14 +252,12 @@ class Dataset[T] private[sql](
251252 Column (col).cast(StringType )
252253 }
253254 }
254- val takeResult = newDf.select(castCols : _* ).take(numRows + 1 )
255- val hasMoreData = takeResult.length > numRows
256- val data = takeResult.take(numRows)
255+ val data = newDf.select(castCols : _* ).take(numRows + 1 )
257256
258257 // For array values, replace Seq and Array with square brackets
259258 // For cells that are beyond `truncate` characters, replace it with the
260259 // first `truncate-3` and "..."
261- val rows : Seq [ Seq [ String ]] = schema.fieldNames.toSeq +: data.map { row =>
260+ schema.fieldNames.toSeq +: data.map { row =>
262261 row.toSeq.map { cell =>
263262 val str = cell match {
264263 case null => " null"
@@ -274,6 +273,26 @@ class Dataset[T] private[sql](
274273 }
275274 }: Seq [String ]
276275 }
276+ }
277+
278+ /**
279+ * Compose the string representing rows for output
280+ *
281+ * @param _numRows Number of rows to show
282+ * @param truncate If set to more than 0, truncates strings to `truncate` characters and
283+ * all cells will be aligned right.
284+ * @param vertical If set to true, prints output rows vertically (one line per column value).
285+ */
286+ private [sql] def showString (
287+ _numRows : Int ,
288+ truncate : Int = 20 ,
289+ vertical : Boolean = false ): String = {
290+ val numRows = _numRows.max(0 ).min(Int .MaxValue - 1 )
291+ // Get rows represented by Seq[Seq[String]], we may get one more line if it has more data.
292+ val tmpRows = getRows(numRows, truncate, vertical)
293+
294+ val hasMoreData = tmpRows.length - 1 > numRows
295+ val rows = tmpRows.take(numRows + 1 )
277296
278297 val sb = new StringBuilder
279298 val numCols = schema.fieldNames.length
@@ -291,31 +310,25 @@ class Dataset[T] private[sql](
291310 }
292311 }
293312
313+ val paddedRows = rows.map { row =>
314+ row.zipWithIndex.map { case (cell, i) =>
315+ if (truncate > 0 ) {
316+ StringUtils .leftPad(cell, colWidths(i))
317+ } else {
318+ StringUtils .rightPad(cell, colWidths(i))
319+ }
320+ }
321+ }
322+
294323 // Create SeparateLine
295324 val sep : String = colWidths.map(" -" * _).addString(sb, " +" , " +" , " +\n " ).toString()
296325
297326 // column names
298- rows.head.zipWithIndex.map { case (cell, i) =>
299- if (truncate > 0 ) {
300- StringUtils .leftPad(cell, colWidths(i))
301- } else {
302- StringUtils .rightPad(cell, colWidths(i))
303- }
304- }.addString(sb, " |" , " |" , " |\n " )
305-
327+ paddedRows.head.addString(sb, " |" , " |" , " |\n " )
306328 sb.append(sep)
307329
308330 // data
309- rows.tail.foreach {
310- _.zipWithIndex.map { case (cell, i) =>
311- if (truncate > 0 ) {
312- StringUtils .leftPad(cell.toString, colWidths(i))
313- } else {
314- StringUtils .rightPad(cell.toString, colWidths(i))
315- }
316- }.addString(sb, " |" , " |" , " |\n " )
317- }
318-
331+ paddedRows.tail.foreach(_.addString(sb, " |" , " |" , " |\n " ))
319332 sb.append(sep)
320333 } else {
321334 // Extended display mode enabled
@@ -346,7 +359,7 @@ class Dataset[T] private[sql](
346359 }
347360
348361 // Print a footer
349- if (vertical && data .isEmpty) {
362+ if (vertical && rows.tail .isEmpty) {
350363 // In a vertical mode, print an empty row set explicitly
351364 sb.append(" (0 rows)\n " )
352365 } else if (hasMoreData) {
@@ -3209,6 +3222,19 @@ class Dataset[T] private[sql](
32093222 }
32103223 }
32113224
3225+ private [sql] def getRowsToPython (
3226+ _numRows : Int ,
3227+ truncate : Int ,
3228+ vertical : Boolean ): Array [Any ] = {
3229+ EvaluatePython .registerPicklers()
3230+ val numRows = _numRows.max(0 ).min(Int .MaxValue - 1 )
3231+ val rows = getRows(numRows, truncate, vertical).map(_.toArray).toArray
3232+ val toJava : (Any ) => Any = EvaluatePython .toJava(_, ArrayType (ArrayType (StringType )))
3233+ val iter : Iterator [Array [Byte ]] = new SerDeUtil .AutoBatchedPickler (
3234+ rows.iterator.map(toJava))
3235+ PythonRDD .serveIterator(iter, " serve-GetRows" )
3236+ }
3237+
32123238 /**
32133239 * Collect a Dataset as ArrowPayload byte arrays and serve to PySpark.
32143240 */
0 commit comments