File tree Expand file tree Collapse file tree 2 files changed +5
-5
lines changed
main/scala/org/apache/spark/sql/execution/stat
test/scala/org/apache/spark/sql Expand file tree Collapse file tree 2 files changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -82,9 +82,9 @@ private[sql] object StatFunctions {
8282 /** Generate a table of frequencies for the elements of two columns. */
8383 private [sql] def crossTabulate (df : DataFrame , col1 : String , col2 : String ): DataFrame = {
8484 val tableName = s " ${col1}_ $col2"
85- val distinctCol2 = df.select(col2).distinct.orderBy(col2).collect( )
85+ val distinctCol2 = df.select(col2).distinct.collect().sortBy(_.get( 0 ).toString )
8686 val columnSize = distinctCol2.size
87- require(columnSize < 1e5 , s " The number of distinct values for $col2, can't " +
87+ require(columnSize < 1e4 , s " The number of distinct values for $col2, can't " +
8888 s " exceed 1e5. Currently $columnSize" )
8989 var i = 0
9090 val col2Map = distinctCol2.map { r =>
Original file line number Diff line number Diff line change @@ -33,10 +33,10 @@ class DataFrameStatSuite extends FunSuite {
3333 val crosstab = df.stat.crosstab(" a" , " b" )
3434 val columnNames = crosstab.schema.fieldNames
3535 assert(columnNames(0 ) === " a_b" )
36- assert(columnNames(1 ) === " 1 " )
37- assert(columnNames(2 ) === " 2 " )
36+ assert(columnNames(1 ) === " 0 " )
37+ assert(columnNames(2 ) === " 1 " )
3838 val rows : Array [Row ] = crosstab.collect()
39- var count : Integer = 1
39+ var count : Integer = 0
4040 rows.foreach { row =>
4141 assert(row.get(0 ).toString === count.toString)
4242 assert(row.getLong(1 ) === 1L )
You can’t perform that action at this time.
0 commit comments