diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala index 5a0c9a66b858a..3c680282a13dc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala @@ -113,7 +113,7 @@ private[sql] object StatFunctions extends Logging { if (element == null) "null" else element.toString } // get the distinct values of column 2, so that we can make them the column names - val distinctCol2: Map[Any, Int] = + val distinctCol2: Map[String, Int] = counts.map(e => cleanElement(e.get(1))).distinct.zipWithIndex.toMap val columnSize = distinctCol2.size require(columnSize < 1e4, s"The number of distinct values for $col2, can't " + @@ -128,7 +128,7 @@ private[sql] object StatFunctions extends Logging { countsRow.setLong(columnIndex + 1, row.getLong(2)) } // the value of col1 is the first value, the rest are the counts - countsRow.setString(0, cleanElement(col1Item.toString)) + countsRow.setString(0, cleanElement(col1Item)) countsRow }.toSeq // Back ticks can't exist in DataFrame column names, therefore drop them. To be able to accept @@ -139,7 +139,7 @@ private[sql] object StatFunctions extends Logging { // In the map, the column names (._1) are not ordered by the index (._2). This was the bug in // SPARK-8681. We need to explicitly sort by the column index and assign the column names. val headerNames = distinctCol2.toSeq.sortBy(_._2).map { r => - StructField(cleanColumnName(r._1.toString), LongType) + StructField(cleanColumnName(r._1), LongType) } val schema = StructType(StructField(tableName, StringType) +: headerNames)