@@ -110,8 +110,12 @@ private[sql] object StatFunctions extends Logging {
110110 logWarning(" The maximum limit of 1e6 pairs have been collected, which may not be all of " +
111111 " the pairs. Please try reducing the amount of distinct items in your columns." )
112112 }
113+ def cleanElement (element : Any ): String = {
114+ if (element == null ) " " else element.toString
115+ }
113116 // get the distinct values of column 2, so that we can make them the column names
114- val distinctCol2 : Map [Any , Int ] = counts.map(_.get(1 )).distinct.zipWithIndex.toMap
117+ val distinctCol2 : Map [Any , Int ] =
118+ counts.map(e => cleanElement(e.get(1 ))).distinct.zipWithIndex.toMap
115119 val columnSize = distinctCol2.size
116120 require(columnSize < 1e4 , s " The number of distinct values for $col2, can't " +
117121 s " exceed 1e4. Currently $columnSize" )
@@ -121,15 +125,23 @@ private[sql] object StatFunctions extends Logging {
121125 // row.get(0) is column 1
122126 // row.get(1) is column 2
123127 // row.get(2) is the frequency
124- countsRow.setLong(distinctCol2.get(row.get(1 )).get + 1 , row.getLong(2 ))
128+ val columnIndex = distinctCol2.get(cleanElement(row.get(1 ))).get
129+ countsRow.setLong(columnIndex + 1 , row.getLong(2 ))
125130 }
126131 // the value of col1 is the first value, the rest are the counts
127- countsRow.update(0 , UTF8String .fromString(col1Item.toString ))
132+ countsRow.update(0 , UTF8String .fromString(cleanElement( col1Item) ))
128133 countsRow
129134 }.toSeq
135+ // Back ticks can't exist in DataFrame column names, therefore drop them. To be able to accept
136+ // special keywords and `.`, wrap the column names in ``.
137+ def cleanColumnName (name : String ): String = {
138+ name.replace(" `" , " " )
139+ }
130140 // In the map, the column names (._1) are not ordered by the index (._2). This was the bug in
131141 // SPARK-8681. We need to explicitly sort by the column index and assign the column names.
132- val headerNames = distinctCol2.toSeq.sortBy(_._2).map(r => StructField (r._1.toString, LongType ))
142+ val headerNames = distinctCol2.toSeq.sortBy(_._2).map { r =>
143+ StructField (cleanColumnName(r._1.toString), LongType )
144+ }
133145 val schema = StructType (StructField (tableName, StringType ) +: headerNames)
134146
135147 new DataFrame (df.sqlContext, LocalRelation (schema.toAttributes, table)).na.fill(0.0 )
0 commit comments