Skip to content

Commit 6805df8

Browse files
committed
addressed comments and fixed test
1 parent 939b7c4 commit 6805df8

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,9 @@ private[sql] object StatFunctions {
8282
/** Generate a table of frequencies for the elements of two columns. */
8383
private[sql] def crossTabulate(df: DataFrame, col1: String, col2: String): DataFrame = {
8484
val tableName = s"${col1}_$col2"
85-
val distinctCol2 = df.select(col2).distinct.orderBy(col2).collect()
85+
val distinctCol2 = df.select(col2).distinct.collect().sortBy(_.get(0).toString)
8686
val columnSize = distinctCol2.size
87-
require(columnSize < 1e5, s"The number of distinct values for $col2, can't " +
87+
require(columnSize < 1e4, s"The number of distinct values for $col2, can't " +
8888
s"exceed 1e5. Currently $columnSize")
8989
var i = 0
9090
val col2Map = distinctCol2.map { r =>

sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ class DataFrameStatSuite extends FunSuite {
3333
val crosstab = df.stat.crosstab("a", "b")
3434
val columnNames = crosstab.schema.fieldNames
3535
assert(columnNames(0) === "a_b")
36-
assert(columnNames(1) === "1")
37-
assert(columnNames(2) === "2")
36+
assert(columnNames(1) === "0")
37+
assert(columnNames(2) === "1")
3838
val rows: Array[Row] = crosstab.collect()
39-
var count: Integer = 1
39+
var count: Integer = 0
4040
rows.foreach { row =>
4141
assert(row.get(0).toString === count.toString)
4242
assert(row.getLong(1) === 1L)

0 commit comments

Comments
 (0)