From dbc300edb56b6e813c926b061e780378ee564778 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 4 Jul 2018 13:07:04 +0900 Subject: [PATCH 1/3] Fix --- .../spark/sql/hive/HiveExternalCatalog.scala | 32 +++++++++++++++---- .../sql/hive/execution/SQLQuerySuite.scala | 18 +++++++++++ 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 011a3ba553cb2..49e9fff95ad2b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -138,17 +138,35 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat } /** - * Checks the validity of data column names. Hive metastore disallows the table to use comma in - * data column names. Partition columns do not have such a restriction. Views do not have such - * a restriction. + * Checks the validity of data column names. Hive metastore disallows the table to use some + * special characters (',', ':', and ';') in data column names. Partition columns do not have + * such a restriction. Views do not have such a restriction. */ private def verifyDataSchema( tableName: TableIdentifier, tableType: CatalogTableType, dataSchema: StructType): Unit = { if (tableType != VIEW) { - dataSchema.map(_.name).foreach { colName => - if (colName.contains(",")) { - throw new AnalysisException("Cannot create a table having a column whose name contains " + - s"commas in Hive metastore. Table: $tableName; Column: $colName") + val invalidChars = Seq(",", ":", ";") + def verifyNestedColumnNames(schema: StructType): Unit = schema.foreach { f => + f.dataType match { + case st: StructType => verifyNestedColumnNames(st) + case _ if invalidChars.exists(f.name.contains) => + throw new AnalysisException("Cannot create a table having a nested column whose name " + + s"contains invalid characters (${invalidChars.map(c => s"'$c'").mkString(", ")}) " + + s"in Hive metastore. Table: $tableName; Column: ${f.name}") + case _ => + } + } + + dataSchema.foreach { f => + f.dataType match { + // Checks top-level column names + case _ if f.name.contains(",") => + throw new AnalysisException("Cannot create a table having a column whose name " + + s"contains commas in Hive metastore. Table: $tableName; Column: ${f.name}") + // Checks nested column names + case st: StructType => + verifyNestedColumnNames(st) + case _ => } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 828c18a770c80..db4e880671649 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2005,6 +2005,24 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } + test("SPARK-24681 checks if nested column names do not include ',', ':', and ';'") { + val expectedMsg = "Cannot create a table having a nested column whose name contains invalid " + + "characters (',', ':', ';') in Hive metastore." + + Seq("nested,column", "nested:column", "nested;column").foreach { nestedColumnName => + withTable("t") { + val e = intercept[AnalysisException] { + spark.range(1) + .select(struct(lit(0).as(nestedColumnName)).as("toplevel")) + .write + .format("hive") + .saveAsTable("t") + }.getMessage + assert(e.contains(expectedMsg)) + } + } + } + test("SPARK-19912 String literals should be escaped for Hive metastore partition pruning") { withTable("spark_19912") { Seq( From b298522947fc70337131cdb6b8d0c1e6299eedd3 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Sun, 15 Jul 2018 18:47:42 +0900 Subject: [PATCH 2/3] Fix --- .../spark/sql/hive/HiveExternalCatalog.scala | 11 ++++++----- .../sql/hive/execution/HiveDDLSuite.scala | 19 +++++++++++++++++++ .../sql/hive/execution/SQLQuerySuite.scala | 18 ------------------ 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 49e9fff95ad2b..876eab61b46a4 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -139,8 +139,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat /** * Checks the validity of data column names. Hive metastore disallows the table to use some - * special characters (',', ':', and ';') in data column names. Partition columns do not have - * such a restriction. Views do not have such a restriction. + * special characters (',', ':', and ';') in data column names, including nested column names. + * Partition columns do not have such a restriction. Views do not have such a restriction. */ private def verifyDataSchema( tableName: TableIdentifier, tableType: CatalogTableType, dataSchema: StructType): Unit = { @@ -150,9 +150,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat f.dataType match { case st: StructType => verifyNestedColumnNames(st) case _ if invalidChars.exists(f.name.contains) => - throw new AnalysisException("Cannot create a table having a nested column whose name " + - s"contains invalid characters (${invalidChars.map(c => s"'$c'").mkString(", ")}) " + - s"in Hive metastore. Table: $tableName; Column: ${f.name}") + val errMsg = "Cannot create a table having a nested column whose name contains " + + s"invalid characters (${invalidChars.map(c => s"'$c'").mkString(", ")}) " + + s"in Hive metastore. Table: $tableName; Column: ${f.name}" + throw new AnalysisException(errMsg) case _ => } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 0341c3b378918..31fd4c5a1f996 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils} +import org.apache.spark.sql.functions._ import org.apache.spark.sql.hive.HiveExternalCatalog import org.apache.spark.sql.hive.HiveUtils.{CONVERT_METASTORE_ORC, CONVERT_METASTORE_PARQUET} import org.apache.spark.sql.hive.orc.OrcFileOperator @@ -2248,4 +2249,22 @@ class HiveDDLSuite checkAnswer(spark.table("t4"), Row(0, 0)) } } + + test("SPARK-24681 checks if nested column names do not include ',', ':', and ';'") { + val expectedMsg = "Cannot create a table having a nested column whose name contains invalid " + + "characters (',', ':', ';') in Hive metastore." + + Seq("nested,column", "nested:column", "nested;column").foreach { nestedColumnName => + withTable("t") { + val e = intercept[AnalysisException] { + spark.range(1) + .select(struct(lit(0).as(nestedColumnName)).as("toplevel")) + .write + .format("hive") + .saveAsTable("t") + }.getMessage + assert(e.contains(expectedMsg)) + } + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index db4e880671649..828c18a770c80 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2005,24 +2005,6 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } - test("SPARK-24681 checks if nested column names do not include ',', ':', and ';'") { - val expectedMsg = "Cannot create a table having a nested column whose name contains invalid " + - "characters (',', ':', ';') in Hive metastore." - - Seq("nested,column", "nested:column", "nested;column").foreach { nestedColumnName => - withTable("t") { - val e = intercept[AnalysisException] { - spark.range(1) - .select(struct(lit(0).as(nestedColumnName)).as("toplevel")) - .write - .format("hive") - .saveAsTable("t") - }.getMessage - assert(e.contains(expectedMsg)) - } - } - } - test("SPARK-19912 String literals should be escaped for Hive metastore partition pruning") { withTable("spark_19912") { Seq( From bcdae885df053959cccf6cfc28269b87603c8b58 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Mon, 16 Jul 2018 12:41:54 +0900 Subject: [PATCH 3/3] Fix --- .../org/apache/spark/sql/hive/HiveExternalCatalog.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 876eab61b46a4..68f6da5d93d93 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -150,9 +150,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat f.dataType match { case st: StructType => verifyNestedColumnNames(st) case _ if invalidChars.exists(f.name.contains) => + val invalidCharsString = invalidChars.map(c => s"'$c'").mkString(", ") val errMsg = "Cannot create a table having a nested column whose name contains " + - s"invalid characters (${invalidChars.map(c => s"'$c'").mkString(", ")}) " + - s"in Hive metastore. Table: $tableName; Column: ${f.name}" + s"invalid characters ($invalidCharsString) in Hive metastore. Table: $tableName; " + + s"Column: ${f.name}" throw new AnalysisException(errMsg) case _ => }