Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -138,17 +138,37 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
}

/**
* Checks the validity of data column names. Hive metastore disallows the table to use comma in
* data column names. Partition columns do not have such a restriction. Views do not have such
* a restriction.
* Checks the validity of data column names. Hive metastore disallows the table to use some
* special characters (',', ':', and ';') in data column names, including nested column names.
* Partition columns do not have such a restriction. Views do not have such a restriction.
*/
private def verifyDataSchema(
tableName: TableIdentifier, tableType: CatalogTableType, dataSchema: StructType): Unit = {
if (tableType != VIEW) {
dataSchema.map(_.name).foreach { colName =>
if (colName.contains(",")) {
throw new AnalysisException("Cannot create a table having a column whose name contains " +
s"commas in Hive metastore. Table: $tableName; Column: $colName")
val invalidChars = Seq(",", ":", ";")
def verifyNestedColumnNames(schema: StructType): Unit = schema.foreach { f =>
f.dataType match {
case st: StructType => verifyNestedColumnNames(st)
case _ if invalidChars.exists(f.name.contains) =>
val invalidCharsString = invalidChars.map(c => s"'$c'").mkString(", ")
val errMsg = "Cannot create a table having a nested column whose name contains " +
s"invalid characters ($invalidCharsString) in Hive metastore. Table: $tableName; " +
s"Column: ${f.name}"
throw new AnalysisException(errMsg)
case _ =>
}
}

dataSchema.foreach { f =>
f.dataType match {
// Checks top-level column names
case _ if f.name.contains(",") =>
throw new AnalysisException("Cannot create a table having a column whose name " +
s"contains commas in Hive metastore. Table: $tableName; Column: ${f.name}")
// Checks nested column names
case st: StructType =>
verifyNestedColumnNames(st)
case _ =>
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException}
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.hive.HiveExternalCatalog
import org.apache.spark.sql.hive.HiveUtils.{CONVERT_METASTORE_ORC, CONVERT_METASTORE_PARQUET}
import org.apache.spark.sql.hive.orc.OrcFileOperator
Expand Down Expand Up @@ -2248,4 +2249,22 @@ class HiveDDLSuite
checkAnswer(spark.table("t4"), Row(0, 0))
}
}

test("SPARK-24681 checks if nested column names do not include ',', ':', and ';'") {
val expectedMsg = "Cannot create a table having a nested column whose name contains invalid " +
"characters (',', ':', ';') in Hive metastore."

Seq("nested,column", "nested:column", "nested;column").foreach { nestedColumnName =>
withTable("t") {
val e = intercept[AnalysisException] {
spark.range(1)
.select(struct(lit(0).as(nestedColumnName)).as("toplevel"))
.write
.format("hive")
.saveAsTable("t")
}.getMessage
assert(e.contains(expectedMsg))
}
}
}
}