Skip to content

Commit f49a0b3

Browse files
committed
more improvement
1 parent f479221 commit f49a0b3

File tree

4 files changed

+26
-21
lines changed

4 files changed

+26
-21
lines changed

R/pkg/R/DataFrame.R

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,12 +213,11 @@ setMethod("showDF",
213213
signature(x = "SparkDataFrame"),
214214
function(x, numRows = 20, truncate = TRUE, vertical = FALSE) {
215215
if (is.logical(truncate) && truncate) {
216-
s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(20),
217-
vertical, FALSE)
216+
s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(20), vertical)
218217
} else {
219218
truncate2 <- as.numeric(truncate)
220219
s <- callJMethod(x@sdf, "showString", numToInt(numRows), numToInt(truncate2),
221-
vertical, FALSE)
220+
vertical)
222221
}
223222
cat(s)
224223
})

python/pyspark/sql/dataframe.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def writeStream(self):
209209
Interface for saving the content of the streaming :class:`DataFrame` out into external
210210
storage.
211211
212-
.. note:: Experimental.
212+
.. note:: Evolving.
213213
214214
:return: :class:`DataStreamWriter`
215215
"""
@@ -285,7 +285,7 @@ def isStreaming(self):
285285
:func:`collect`) will throw an :class:`AnalysisException` when there is a streaming
286286
source present.
287287
288-
.. note:: Experimental
288+
.. note:: Evolving
289289
"""
290290
return self._jdf.isStreaming()
291291

@@ -325,9 +325,9 @@ def show(self, n=20, truncate=True, vertical=False):
325325
name | Bob
326326
"""
327327
if isinstance(truncate, bool) and truncate:
328-
print(self._jdf.showString(n, 20, vertical, False))
328+
print(self._jdf.showString(n, 20, vertical))
329329
else:
330-
print(self._jdf.showString(n, int(truncate), vertical, False))
330+
print(self._jdf.showString(n, int(truncate), vertical))
331331

332332
def __repr__(self):
333333
return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
@@ -368,7 +368,7 @@ def withWatermark(self, eventTime, delayThreshold):
368368
latest record that has been processed in the form of an interval
369369
(e.g. "1 minute" or "5 hours").
370370
371-
.. note:: Experimental
371+
.. note:: Evolving
372372
373373
>>> sdf.select('name', sdf.time.cast('timestamp')).withWatermark('time', '10 minutes')
374374
DataFrame[name: string, time: timestamp]

sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -242,14 +242,16 @@ class Dataset[T] private[sql](
242242
* @param vertical If set to true, prints output rows vertically (one line per column value).
243243
*/
244244
private[sql] def showString(
245-
_numRows: Int,
246-
truncate: Int = 20,
247-
vertical: Boolean = false,
248-
isInternal: Boolean = false): String = {
245+
_numRows: Int, truncate: Int = 20, vertical: Boolean = false): String = {
249246
val numRows = _numRows.max(0)
250-
val takeResult = if (isInternal) toDF().takeInternal(numRows + 1) else toDF().take(numRows + 1)
251-
val hasMoreData = takeResult.length > numRows
252-
val data = takeResult.take(numRows)
247+
val takeResult = toDF().take(numRows + 1)
248+
showString(takeResult, numRows, truncate, vertical)
249+
}
250+
251+
private def showString(
252+
dataWithOneMoreRow: Array[Row], numRows: Int, truncate: Int, vertical: Boolean): String = {
253+
val hasMoreData = dataWithOneMoreRow.length > numRows
254+
val data = dataWithOneMoreRow.take(numRows)
253255

254256
lazy val timeZone =
255257
DateTimeUtils.getTimeZone(sparkSession.sessionState.conf.sessionLocalTimeZone)
@@ -684,14 +686,17 @@ class Dataset[T] private[sql](
684686
} else {
685687
println(showString(numRows, truncate = 0))
686688
}
687-
// scalastyle:on println
688689

689-
// scalastyle:off println
690690
// An internal version of `show`, which won't set execution id and trigger listeners.
691-
private[sql] def showInternal(numRows: Int, truncate: Boolean): Unit = if (truncate) {
692-
println(showString(numRows, truncate = 20, isInternal = true))
693-
} else {
694-
println(showString(numRows, truncate = 0, isInternal = true))
691+
private[sql] def showInternal(_numRows: Int, truncate: Boolean): Unit = {
692+
val numRows = _numRows.max(0)
693+
val takeResult = toDF().takeInternal(numRows + 1)
694+
695+
if (truncate) {
696+
println(showString(takeResult, numRows, truncate = 20, vertical = false))
697+
} else {
698+
println(showString(takeResult, numRows, truncate = 0, vertical = false))
699+
}
695700
}
696701
// scalastyle:on println
697702

sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ case class AnalyzeColumnCommand(
102102

103103
val rowCount = statsRow.getLong(0)
104104
val columnStats = attributesToAnalyze.zipWithIndex.map { case (attr, i) =>
105+
// according to `ColumnStat.statExprs`, the stats struct always have 6 fields.
105106
(attr.name, ColumnStat.rowToColumnStat(statsRow.getStruct(i + 1, 6), attr))
106107
}.toMap
107108
(rowCount, columnStats)

0 commit comments

Comments
 (0)