From bb8bbef0dad2f79b57db1a9c810dbc918e01763c Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Wed, 18 Jun 2014 12:09:43 -0700 Subject: [PATCH 1/9] Split every string in the result of a describe command. --- .../spark/sql/hive/execution/hiveOperators.scala | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala index a83923144916..4eb72e31cbc8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala @@ -445,7 +445,19 @@ case class NativeCommand( if (sideEffectResult.size == 0) { context.emptyResult } else { - val rows = sideEffectResult.map(r => new GenericRow(Array[Any](r))) + // TODO: Need a better way to handle the result of a native command. + // We may want to consider to use JsonMetaDataFormatter in Hive. + val isDescribe = sql.trim.startsWith("describe") + val rows = if (isDescribe) { + // TODO: If we upgrade Hive to 0.13, we need to check the results of + // context.sessionState.isHiveServerQuery() to determine how to split the result. + // This method is introduced by https://issues.apache.org/jira/browse/HIVE-4545. + // Right now, we split every string by any number of consecutive spaces. + sideEffectResult.map( + r => r.split("\\s+")).map(r => new GenericRow(r.asInstanceOf[Array[Any]])) + } else { + sideEffectResult.map(r => new GenericRow(Array[Any](r))) + } context.sparkContext.parallelize(rows, 1) } } From 342fdf74768d7fdbc50692bf04e6ab3a972b9602 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Wed, 18 Jun 2014 13:12:48 -0700 Subject: [PATCH 2/9] Split to up to 3 parts. --- .../org/apache/spark/sql/hive/execution/hiveOperators.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala index 4eb72e31cbc8..0eef87b032db 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala @@ -454,7 +454,7 @@ case class NativeCommand( // This method is introduced by https://issues.apache.org/jira/browse/HIVE-4545. // Right now, we split every string by any number of consecutive spaces. sideEffectResult.map( - r => r.split("\\s+")).map(r => new GenericRow(r.asInstanceOf[Array[Any]])) + r => r.trim.split("\\s+", 3)).map(r => new GenericRow(r.asInstanceOf[Array[Any]])) } else { sideEffectResult.map(r => new GenericRow(Array[Any](r))) } From 366f8911af35d3ce7eebdc0bdab1fac8eb245cb3 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Wed, 18 Jun 2014 22:54:15 -0700 Subject: [PATCH 3/9] Add describe command. --- .../sql/catalyst/plans/logical/commands.scala | 13 +++ .../apache/spark/sql/execution/commands.scala | 19 ++++ .../org/apache/spark/sql/hive/HiveQl.scala | 64 +++++++++---- .../spark/sql/hive/HiveStrategies.scala | 16 +++- .../sql/hive/execution/hiveOperators.scala | 61 ++++++++++--- .../sql/hive/execution/HiveQuerySuite.scala | 89 +++++++++++++++++-- 6 files changed, 225 insertions(+), 37 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala index 3299e86b8594..77f154db6aec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala @@ -60,3 +60,16 @@ case class ExplainCommand(plan: LogicalPlan) extends Command { * Returned for the "CACHE TABLE tableName" and "UNCACHE TABLE tableName" command. */ case class CacheCommand(tableName: String, doCache: Boolean) extends Command + +/** + * Returned for the "Describe tableName" command. [Extended|Formatted|Pretty] is not handled. + */ +case class DescribeCommand( + table: LogicalPlan, + isFormatted: Boolean, + isExtended: Boolean) extends Command { + override def output = Seq( + BoundReference(0, AttributeReference("name", StringType, nullable = false)()), + BoundReference(1, AttributeReference("type", StringType, nullable = false)()), + BoundReference(2, AttributeReference("comment", StringType, nullable = false)())) +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala index 39b3246c875d..04c7199a4e11 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala @@ -113,3 +113,22 @@ case class CacheCommand(tableName: String, doCache: Boolean)(@transient context: override def output: Seq[Attribute] = Seq.empty } + +/** + * :: DeveloperApi :: + */ +@DeveloperApi +case class DescribeCommand(child: SparkPlan, output: Seq[Attribute])( + @transient context: SQLContext) + extends LeafNode with Command { + + override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = + child.output.map(field => (field.name, field.dataType.toString, None.toString)) + + override def execute(): RDD[Row] = { + val rows = sideEffectResult.map { + case (name, dataType, comment) => new GenericRow(Array[Any](name, dataType, comment)) + } + context.sparkContext.parallelize(rows, 1) + } +} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 844673f66d10..ca0978c9379a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -52,7 +52,6 @@ private[hive] case class AddFile(filePath: String) extends Command private[hive] object HiveQl { protected val nativeCommands = Seq( "TOK_DESCFUNCTION", - "TOK_DESCTABLE", "TOK_DESCDATABASE", "TOK_SHOW_TABLESTATUS", "TOK_SHOWDATABASES", @@ -120,6 +119,12 @@ private[hive] object HiveQl { "TOK_SWITCHDATABASE" ) + // Commands that we do not need to explain. + protected val noExplainCommands = Seq( + "TOK_CREATETABLE", + "TOK_DESCTABLE" + ) ++ nativeCommands + /** * A set of implicit transformations that allow Hive ASTNodes to be rewritten by transformations * similar to [[catalyst.trees.TreeNode]]. @@ -362,13 +367,19 @@ private[hive] object HiveQl { } } + protected def extractDbNameTableName(tableNameParts: Node): (Option[String], String) = { + val (db, tableName) = + tableNameParts.getChildren.map{ case Token(part, Nil) => cleanIdentifier(part)} match { + case Seq(tableOnly) => (None, tableOnly) + case Seq(databaseName, table) => (Some(databaseName), table) + } + + (db, tableName) + } + protected def nodeToPlan(node: Node): LogicalPlan = node match { // Just fake explain for any of the native commands. - case Token("TOK_EXPLAIN", explainArgs) if nativeCommands contains explainArgs.head.getText => - ExplainCommand(NoRelation) - // Create tables aren't native commands due to CTAS queries, but we still don't need to - // explain them. - case Token("TOK_EXPLAIN", explainArgs) if explainArgs.head.getText == "TOK_CREATETABLE" => + case Token("TOK_EXPLAIN", explainArgs) if noExplainCommands contains explainArgs.head.getText => ExplainCommand(NoRelation) case Token("TOK_EXPLAIN", explainArgs) => // Ignore FORMATTED if present. @@ -377,6 +388,34 @@ private[hive] object HiveQl { // TODO: support EXTENDED? ExplainCommand(nodeToPlan(query)) + case Token("TOK_DESCTABLE", describeArgs) => + // Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL + val Some(tableType) :: formatted :: extended :: _ :: Nil = + getClauses(Seq("TOK_TABTYPE", "FORMATTED", "EXTENDED", "PRETTY"), describeArgs) + // TODO: support PRETTY? + tableType match { + case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => { + nameParts.head match { + case Token(".", dbName :: tableName :: Nil) => + // It is describing a table with the format like "describe db.table". + val (db, tableName) = extractDbNameTableName(nameParts.head) + DescribeCommand( + UnresolvedRelation(db, tableName, None), formatted.isDefined, extended.isDefined) + case Token(".", dbName :: tableName :: colName :: Nil) => + // It is describing a column with the format like "describe db.table column". + NativePlaceholder + case tableName => + // It is describing a table with the format like "describe table". + DescribeCommand( + UnresolvedRelation(None, tableName.getText, None), + formatted.isDefined, + extended.isDefined) + } + } + // All other cases. + case _ => NativePlaceholder + } + case Token("TOK_CREATETABLE", children) if children.collect { case t@Token("TOK_QUERY", _) => t }.nonEmpty => // TODO: Parse other clauses. @@ -414,11 +453,8 @@ private[hive] object HiveQl { s"Unhandled clauses: ${notImplemented.flatten.map(dumpTree(_)).mkString("\n")}") } - val (db, tableName) = - tableNameParts.getChildren.map{ case Token(part, Nil) => cleanIdentifier(part)} match { - case Seq(tableOnly) => (None, tableOnly) - case Seq(databaseName, table) => (Some(databaseName), table) - } + val (db, tableName) = extractDbNameTableName(tableNameParts) + InsertIntoCreatedTable(db, tableName, nodeToPlan(query)) // If its not a "CREATE TABLE AS" like above then just pass it back to hive as a native command. @@ -736,11 +772,7 @@ private[hive] object HiveQl { val Some(tableNameParts) :: partitionClause :: Nil = getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs) - val (db, tableName) = - tableNameParts.getChildren.map{ case Token(part, Nil) => cleanIdentifier(part)} match { - case Seq(tableOnly) => (None, tableOnly) - case Seq(databaseName, table) => (Some(databaseName), table) - } + val (db, tableName) = extractDbNameTableName(tableNameParts) val partitionKeys = partitionClause.map(_.getChildren.map { // Parse partitions. We also make keys case insensitive. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 0ac0ee9071f3..90d4286da988 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.hive -import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.{SQLContext} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning._ import org.apache.spark.sql.catalyst.plans._ @@ -81,6 +81,20 @@ private[hive] trait HiveStrategies { def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case logical.NativeCommand(sql) => NativeCommand(sql, plan.output)(context) :: Nil + case describe: logical.DescribeCommand => { + val resolvedTable = context.executePlan(describe.table).analyzed + resolvedTable match { + case t: MetastoreRelation => + Seq(DescribeHiveTableCommand( + t, describe.output, describe.isFormatted, describe.isExtended)(context)) + case o: LogicalPlan => + if (describe.isFormatted) + logger.info("Formatted is ignored because it is not defined for non-Hive tables.") + if (describe.isExtended) + logger.info("Extended is ignored because it is not defined for non-Hive tables.") + Seq(DescribeCommand(planLater(o), describe.output)(context)) + } + } case _ => Nil } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala index 0eef87b032db..1535c284585c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala @@ -19,8 +19,10 @@ package org.apache.spark.sql.hive.execution import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar} import org.apache.hadoop.hive.conf.HiveConf +import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.metastore.MetaStoreUtils import org.apache.hadoop.hive.ql.Context +import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatUtils import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Hive} import org.apache.hadoop.hive.ql.plan.{TableDesc, FileSinkDesc} import org.apache.hadoop.hive.serde.serdeConstants @@ -445,22 +447,55 @@ case class NativeCommand( if (sideEffectResult.size == 0) { context.emptyResult } else { - // TODO: Need a better way to handle the result of a native command. - // We may want to consider to use JsonMetaDataFormatter in Hive. - val isDescribe = sql.trim.startsWith("describe") - val rows = if (isDescribe) { - // TODO: If we upgrade Hive to 0.13, we need to check the results of - // context.sessionState.isHiveServerQuery() to determine how to split the result. - // This method is introduced by https://issues.apache.org/jira/browse/HIVE-4545. - // Right now, we split every string by any number of consecutive spaces. - sideEffectResult.map( - r => r.trim.split("\\s+", 3)).map(r => new GenericRow(r.asInstanceOf[Array[Any]])) - } else { - sideEffectResult.map(r => new GenericRow(Array[Any](r))) - } + val rows = sideEffectResult.map(r => new GenericRow(Array[Any](r))) context.sparkContext.parallelize(rows, 1) } } override def otherCopyArgs = context :: Nil } + +/** + * :: DeveloperApi :: + */ +@DeveloperApi +case class DescribeHiveTableCommand( + table: MetastoreRelation, + output: Seq[Attribute], + isFormatted: Boolean, + isExtended: Boolean)( + @transient context: HiveContext) + extends LeafNode with Command { + + override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = { + val cols: Seq[FieldSchema] = table.hiveQlTable.getCols + val parCols: Seq[FieldSchema] = table.hiveQlTable.getPartCols + val columnInfo = cols.map(field => (field.getName, field.getType, field.getComment)) + val partColumnInfo = parCols.map(field => (field.getName, field.getType, field.getComment)) + + val formattedPart = if (isFormatted) { + (MetaDataFormatUtils.getTableInformation(table.hiveQlTable), null, null) :: Nil + } else { + Nil + } + + val extendedPart = if (isExtended) { + ("Detailed Table Information", table.hiveQlTable.getTTable.toString, null) :: Nil + } else { + Nil + } + + // Trying to mimic the format of Hive's output. But not 100% the same. + columnInfo ++ partColumnInfo ++ Seq(("# Partition Information", null, null)) ++ + partColumnInfo ++ formattedPart ++ extendedPart + } + + override def execute(): RDD[Row] = { + val rows = sideEffectResult.map { + case (name, dataType, comment) => new GenericRow(Array[Any](name, dataType, comment)) + } + context.sparkContext.parallelize(rows, 1) + } + + override def otherCopyArgs = context :: Nil +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index fe698f0fc57b..1e27d0c57473 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -237,13 +237,6 @@ class HiveQuerySuite extends HiveComparisonTest { .map(_.getString(0)) .contains(tableName)) - assertResult(Array(Array("key", "int", "None"), Array("value", "string", "None"))) { - hql(s"DESCRIBE $tableName") - .select('result) - .collect() - .map(_.getString(0).split("\t").map(_.trim)) - } - assert(isExplanation(hql(s"EXPLAIN SELECT key, COUNT(*) FROM $tableName GROUP BY key"))) TestHive.reset() @@ -260,6 +253,88 @@ class HiveQuerySuite extends HiveComparisonTest { assert(Try(q0.count()).isSuccess) } + test("Describe commands") { + hql(s"CREATE TABLE test_describe_commands (key INT, value STRING) PARTITIONED BY (dt STRING)") + + hql( + """FROM src INSERT OVERWRITE TABLE test_describe_commands PARTITION (dt='2008-06-08') + |SELECT key, value + """.stripMargin) + + // Describe a table + assertResult( + Array( + Array("key", "int", null), + Array("value", "string", null), + Array("dt", "string", null), + Array("# Partition Information", null, null), + Array("dt", "string", null)) + ) { + hql("DESCRIBE test_describe_commands") + .select('name, 'type, 'comment) + .collect() + } + + // Describe a table with keyword FORMATTED + // We only + assertResult(6) { + hql("DESCRIBE FORMATTED test_describe_commands").count() + } + + // Describe a table + assertResult(6) { + hql("DESCRIBE EXTENDED test_describe_commands").count() + } + + // Describe a table with a fully qualified table name + assertResult( + Array( + Array("key", "int", null), + Array("value", "string", null), + Array("dt", "string", null), + Array("# Partition Information", null, null), + Array("dt", "string", null)) + ) { + hql("DESCRIBE default.test_describe_commands") + .select('name, 'type, 'comment) + .collect() + } + + // Describe a column is a native command + assertResult(Array(Array("value", "string", "from deserializer"))) { + hql("DESCRIBE test_describe_commands value") + .select('result) + .collect() + .map(_.getString(0).split("\t").map(_.trim)) + } + + // Describe a column is a native command + assertResult(Array(Array("value", "string", "from deserializer"))) { + hql("DESCRIBE default.test_describe_commands value") + .select('result) + .collect() + .map(_.getString(0).split("\t").map(_.trim)) + } + + // Describe a partition is a native command + assertResult( + Array( + Array("key", "int", "None"), + Array("value", "string", "None"), + Array("dt", "string", "None"), + Array("", "", ""), + Array("# Partition Information", "", ""), + Array("# col_name", "data_type", "comment"), + Array("", "", ""), + Array("dt", "string", "None")) + ) { + hql("DESCRIBE test_describe_commands PARTITION (dt='2008-06-08')") + .select('result) + .collect() + .map(_.getString(0).split("\t").map(_.trim)) + } + } + test("parse HQL set commands") { // Adapted from its SQL counterpart. val testKey = "spark.sql.key.usedfortestonly" From f1a417ea87b48164cbe47a9ad12c40b7421c0c88 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Wed, 18 Jun 2014 23:00:19 -0700 Subject: [PATCH 4/9] Update doc. --- .../org/apache/spark/sql/catalyst/plans/logical/commands.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala index 77f154db6aec..5b8c88d9dfb3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala @@ -62,7 +62,7 @@ case class ExplainCommand(plan: LogicalPlan) extends Command { case class CacheCommand(tableName: String, doCache: Boolean) extends Command /** - * Returned for the "Describe tableName" command. [Extended|Formatted|Pretty] is not handled. + * Returned for the "Describe tableName" command. */ case class DescribeCommand( table: LogicalPlan, From 440c5afd314693faaa840bfa7cdf52a0725eb206 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Wed, 18 Jun 2014 23:42:09 -0700 Subject: [PATCH 5/9] rxin's comments. --- .../sql/catalyst/plans/logical/commands.scala | 11 ++++- .../apache/spark/sql/execution/commands.scala | 6 ++- .../org/apache/spark/sql/hive/HiveQl.scala | 5 ++- .../spark/sql/hive/HiveStrategies.scala | 6 +-- .../sql/hive/execution/hiveOperators.scala | 4 +- .../sql/hive/execution/HiveQuerySuite.scala | 42 ++++++++++++++----- 6 files changed, 51 insertions(+), 23 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala index 5b8c88d9dfb3..25ddfdfab30d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala @@ -62,7 +62,16 @@ case class ExplainCommand(plan: LogicalPlan) extends Command { case class CacheCommand(tableName: String, doCache: Boolean) extends Command /** - * Returned for the "Describe tableName" command. + * Returned for the "DESCRIBE tableName" command. + */ + +/** + * Returned for the "DESCRIBE tableName" command. + * @param table The table to be described. + * @param isFormatted True if "DESCRIBE FORMATTED" is used. Otherwise, false. + * It is effective only when the table is a Hive table. + * @param isExtended True if "DESCRIBE EXTENDED" is used. Otherwise, false. + * It is effective only when the table is a Hive table. */ case class DescribeCommand( table: LogicalPlan, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala index 219a9050903f..acb1b0f4dc22 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala @@ -130,8 +130,10 @@ case class DescribeCommand(child: SparkPlan, output: Seq[Attribute])( @transient context: SQLContext) extends LeafNode with Command { - override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = - child.output.map(field => (field.name, field.dataType.toString, None.toString)) + override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = { + Seq(("# Registered as a temporary table", null, null)) ++ + child.output.map(field => (field.name, field.dataType.toString, null)) + } override def execute(): RDD[Row] = { val rows = sideEffectResult.map { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index ca0978c9379a..31b615ce1cae 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -369,7 +369,7 @@ private[hive] object HiveQl { protected def extractDbNameTableName(tableNameParts: Node): (Option[String], String) = { val (db, tableName) = - tableNameParts.getChildren.map{ case Token(part, Nil) => cleanIdentifier(part)} match { + tableNameParts.getChildren.map { case Token(part, Nil) => cleanIdentifier(part) } match { case Seq(tableOnly) => (None, tableOnly) case Seq(databaseName, table) => (Some(databaseName), table) } @@ -379,7 +379,8 @@ private[hive] object HiveQl { protected def nodeToPlan(node: Node): LogicalPlan = node match { // Just fake explain for any of the native commands. - case Token("TOK_EXPLAIN", explainArgs) if noExplainCommands contains explainArgs.head.getText => + case Token("TOK_EXPLAIN", explainArgs) + if noExplainCommands.contains(explainArgs.head.getText) => ExplainCommand(NoRelation) case Token("TOK_EXPLAIN", explainArgs) => // Ignore FORMATTED if present. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 90d4286da988..f663b44a944b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.hive -import org.apache.spark.sql.{SQLContext} +import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning._ import org.apache.spark.sql.catalyst.plans._ @@ -88,10 +88,6 @@ private[hive] trait HiveStrategies { Seq(DescribeHiveTableCommand( t, describe.output, describe.isFormatted, describe.isExtended)(context)) case o: LogicalPlan => - if (describe.isFormatted) - logger.info("Formatted is ignored because it is not defined for non-Hive tables.") - if (describe.isExtended) - logger.info("Extended is ignored because it is not defined for non-Hive tables.") Seq(DescribeCommand(planLater(o), describe.output)(context)) } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala index 1535c284585c..2ab47c42fb98 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala @@ -19,11 +19,11 @@ package org.apache.spark.sql.hive.execution import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar} import org.apache.hadoop.hive.conf.HiveConf -import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.metastore.MetaStoreUtils +import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.ql.Context -import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatUtils import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Hive} +import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatUtils import org.apache.hadoop.hive.ql.plan.{TableDesc, FileSinkDesc} import org.apache.hadoop.hive.serde.serdeConstants import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 73b451a54d73..a20f20af8db3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -21,7 +21,9 @@ import scala.util.Try import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ -import org.apache.spark.sql.{SchemaRDD, execution, Row} +import org.apache.spark.sql.{SchemaRDD, Row} + +case class TestData(a: Int, b: String) /** * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution. @@ -250,11 +252,11 @@ class HiveQuerySuite extends HiveComparisonTest { assert(Try(q0.count()).isSuccess) } - test("Describe commands") { - hql(s"CREATE TABLE test_describe_commands (key INT, value STRING) PARTITIONED BY (dt STRING)") + test("DESCRIBE commands") { + hql(s"CREATE TABLE test_describe_commands1 (key INT, value STRING) PARTITIONED BY (dt STRING)") hql( - """FROM src INSERT OVERWRITE TABLE test_describe_commands PARTITION (dt='2008-06-08') + """FROM src INSERT OVERWRITE TABLE test_describe_commands1 PARTITION (dt='2008-06-08') |SELECT key, value """.stripMargin) @@ -267,7 +269,7 @@ class HiveQuerySuite extends HiveComparisonTest { Array("# Partition Information", null, null), Array("dt", "string", null)) ) { - hql("DESCRIBE test_describe_commands") + hql("DESCRIBE test_describe_commands1") .select('name, 'type, 'comment) .collect() } @@ -275,12 +277,12 @@ class HiveQuerySuite extends HiveComparisonTest { // Describe a table with keyword FORMATTED // We only assertResult(6) { - hql("DESCRIBE FORMATTED test_describe_commands").count() + hql("DESCRIBE FORMATTED test_describe_commands1").count() } // Describe a table assertResult(6) { - hql("DESCRIBE EXTENDED test_describe_commands").count() + hql("DESCRIBE EXTENDED test_describe_commands1").count() } // Describe a table with a fully qualified table name @@ -292,14 +294,14 @@ class HiveQuerySuite extends HiveComparisonTest { Array("# Partition Information", null, null), Array("dt", "string", null)) ) { - hql("DESCRIBE default.test_describe_commands") + hql("DESCRIBE default.test_describe_commands1") .select('name, 'type, 'comment) .collect() } // Describe a column is a native command assertResult(Array(Array("value", "string", "from deserializer"))) { - hql("DESCRIBE test_describe_commands value") + hql("DESCRIBE test_describe_commands1 value") .select('result) .collect() .map(_.getString(0).split("\t").map(_.trim)) @@ -307,7 +309,7 @@ class HiveQuerySuite extends HiveComparisonTest { // Describe a column is a native command assertResult(Array(Array("value", "string", "from deserializer"))) { - hql("DESCRIBE default.test_describe_commands value") + hql("DESCRIBE default.test_describe_commands1 value") .select('result) .collect() .map(_.getString(0).split("\t").map(_.trim)) @@ -325,11 +327,29 @@ class HiveQuerySuite extends HiveComparisonTest { Array("", "", ""), Array("dt", "string", "None")) ) { - hql("DESCRIBE test_describe_commands PARTITION (dt='2008-06-08')") + hql("DESCRIBE test_describe_commands1 PARTITION (dt='2008-06-08')") .select('result) .collect() .map(_.getString(0).split("\t").map(_.trim)) } + + // Describe a registered temporary table. + val testData: SchemaRDD = + TestHive.sparkContext.parallelize( + TestData(1, "str1") :: + TestData(1, "str2") :: Nil) + testData.registerAsTable("test_describe_commands2") + + assertResult( + Array( + Array("# Registered as a temporary table", null, null), + Array("a", "IntegerType", null), + Array("b", "StringType", null)) + ) { + hql("DESCRIBE test_describe_commands2") + .select('name, 'type, 'comment) + .collect() + } } test("parse HQL set commands") { From 8003cf3a5b4c4b2154a96aa4f89ec101c7bebe0c Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Thu, 19 Jun 2014 12:59:54 -0700 Subject: [PATCH 6/9] Generate strings with the format like Hive for unit tests. --- .../sql/catalyst/plans/logical/commands.scala | 8 ++-- .../apache/spark/sql/hive/HiveContext.scala | 5 ++ .../org/apache/spark/sql/hive/HiveQl.scala | 46 ++++++++++--------- .../spark/sql/hive/HiveStrategies.scala | 2 +- .../sql/hive/execution/hiveOperators.scala | 45 +++++++++++------- .../hive/execution/HiveComparisonTest.scala | 22 ++++++++- .../execution/HiveCompatibilitySuite.scala | 15 ++++-- .../sql/hive/execution/HiveQuerySuite.scala | 6 +-- 8 files changed, 96 insertions(+), 53 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala index 25ddfdfab30d..d04160196300 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala @@ -68,17 +68,15 @@ case class CacheCommand(tableName: String, doCache: Boolean) extends Command /** * Returned for the "DESCRIBE tableName" command. * @param table The table to be described. - * @param isFormatted True if "DESCRIBE FORMATTED" is used. Otherwise, false. - * It is effective only when the table is a Hive table. * @param isExtended True if "DESCRIBE EXTENDED" is used. Otherwise, false. * It is effective only when the table is a Hive table. */ case class DescribeCommand( table: LogicalPlan, - isFormatted: Boolean, isExtended: Boolean) extends Command { override def output = Seq( - BoundReference(0, AttributeReference("name", StringType, nullable = false)()), - BoundReference(1, AttributeReference("type", StringType, nullable = false)()), + // Column names are based on Hive. + BoundReference(0, AttributeReference("col_name", StringType, nullable = false)()), + BoundReference(1, AttributeReference("data_type", StringType, nullable = false)()), BoundReference(2, AttributeReference("comment", StringType, nullable = false)())) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index cc95b7af0abf..7695242a8160 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.types._ import org.apache.spark.sql.execution.QueryExecutionException import org.apache.spark.sql.execution.{Command => PhysicalCommand} +import org.apache.spark.sql.hive.execution.DescribeHiveTableCommand /** * Starts up an instance of hive where metadata is stored locally. An in-process metadata data is @@ -291,6 +292,10 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { * execution is simply passed back to Hive. */ def stringResult(): Seq[String] = executedPlan match { + case describeHiveTableCommand: DescribeHiveTableCommand => + // If it is a describe command for a Hive table, we want to have the output format + // be similar with Hive. + describeHiveTableCommand.hiveString case command: PhysicalCommand => command.sideEffectResult.map(_.toString) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 31b615ce1cae..dbcf892dce58 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -391,30 +391,34 @@ private[hive] object HiveQl { case Token("TOK_DESCTABLE", describeArgs) => // Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL - val Some(tableType) :: formatted :: extended :: _ :: Nil = + val Some(tableType) :: formatted :: extended :: pretty :: Nil = getClauses(Seq("TOK_TABTYPE", "FORMATTED", "EXTENDED", "PRETTY"), describeArgs) - // TODO: support PRETTY? - tableType match { - case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => { - nameParts.head match { - case Token(".", dbName :: tableName :: Nil) => - // It is describing a table with the format like "describe db.table". - val (db, tableName) = extractDbNameTableName(nameParts.head) - DescribeCommand( - UnresolvedRelation(db, tableName, None), formatted.isDefined, extended.isDefined) - case Token(".", dbName :: tableName :: colName :: Nil) => - // It is describing a column with the format like "describe db.table column". - NativePlaceholder - case tableName => - // It is describing a table with the format like "describe table". - DescribeCommand( - UnresolvedRelation(None, tableName.getText, None), - formatted.isDefined, - extended.isDefined) + if (formatted.isDefined || pretty.isDefined) { + // FORMATTED and PRETTY are not supported and this statement will be treated as + // a Hive native command. + NativePlaceholder + } else { + tableType match { + case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => { + nameParts.head match { + case Token(".", dbName :: tableName :: Nil) => + // It is describing a table with the format like "describe db.table". + val (db, tableName) = extractDbNameTableName(nameParts.head) + DescribeCommand( + UnresolvedRelation(db, tableName, None), extended.isDefined) + case Token(".", dbName :: tableName :: colName :: Nil) => + // It is describing a column with the format like "describe db.table column". + NativePlaceholder + case tableName => + // It is describing a table with the format like "describe table". + DescribeCommand( + UnresolvedRelation(None, tableName.getText, None), + extended.isDefined) + } } + // All other cases. + case _ => NativePlaceholder } - // All other cases. - case _ => NativePlaceholder } case Token("TOK_CREATETABLE", children) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index f663b44a944b..af7687b40429 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -86,7 +86,7 @@ private[hive] trait HiveStrategies { resolvedTable match { case t: MetastoreRelation => Seq(DescribeHiveTableCommand( - t, describe.output, describe.isFormatted, describe.isExtended)(context)) + t, describe.output, describe.isExtended)(context)) case o: LogicalPlan => Seq(DescribeCommand(planLater(o), describe.output)(context)) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala index 2ab47c42fb98..6a4f14edee22 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala @@ -462,32 +462,43 @@ case class NativeCommand( case class DescribeHiveTableCommand( table: MetastoreRelation, output: Seq[Attribute], - isFormatted: Boolean, isExtended: Boolean)( @transient context: HiveContext) extends LeafNode with Command { - override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = { - val cols: Seq[FieldSchema] = table.hiveQlTable.getCols - val parCols: Seq[FieldSchema] = table.hiveQlTable.getPartCols - val columnInfo = cols.map(field => (field.getName, field.getType, field.getComment)) - val partColumnInfo = parCols.map(field => (field.getName, field.getType, field.getComment)) + // Strings with the format like Hive. It is used for result comparison in our unit tests. + lazy val hiveString: Seq[String] = { + val alignment = 20 + val delim = "\t" - val formattedPart = if (isFormatted) { - (MetaDataFormatUtils.getTableInformation(table.hiveQlTable), null, null) :: Nil - } else { - Nil + sideEffectResult.map { + case (name, dataType, comment) => + String.format("%-" + alignment + "s", name) + delim + + String.format("%-" + alignment + "s", dataType) + delim + + String.format("%-" + alignment + "s", Option(comment).getOrElse("None")) } + } - val extendedPart = if (isExtended) { - ("Detailed Table Information", table.hiveQlTable.getTTable.toString, null) :: Nil - } else { - Nil + override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = { + // Trying to mimic the format of Hive's output. But not exactly the same. + var results: Seq[(String, String, String)] = Nil + + val columns: Seq[FieldSchema] = table.hiveQlTable.getCols + val partitionColumns: Seq[FieldSchema] = table.hiveQlTable.getPartCols + results ++= columns.map(field => (field.getName, field.getType, field.getComment)) + if (!partitionColumns.isEmpty) { + val partColumnInfo = + partitionColumns.map(field => (field.getName, field.getType, field.getComment)) + results ++= + partColumnInfo ++ Seq(("# Partition Information", "", "")) ++ + Seq((s"# ${output.get(0).name}", output.get(1).name, output.get(2).name)) ++ partColumnInfo + } + + if (isExtended) { + results ++= Seq(("Detailed Table Information", table.hiveQlTable.getTTable.toString, "")) } - // Trying to mimic the format of Hive's output. But not 100% the same. - columnInfo ++ partColumnInfo ++ Seq(("# Partition Information", null, null)) ++ - partColumnInfo ++ formattedPart ++ extendedPart + results } override def execute(): RDD[Row] = { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 24c929ff7430..0c9248ae55e6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -144,6 +144,10 @@ abstract class HiveComparisonTest case _: SetCommand => Seq("0") case _: LogicalNativeCommand => answer.filterNot(nonDeterministicLine).filterNot(_ == "") case _: ExplainCommand => answer + case _: DescribeCommand => + answer.filterNot( + r => nonDeterministicLine(r) || ignoredLine(r)).map(_.trim).filterNot( + r => r == "" || r == "\n") case plan => if (isSorted(plan)) answer else answer.sorted } orderedAnswer.map(cleanPaths) @@ -169,6 +173,16 @@ abstract class HiveComparisonTest protected def nonDeterministicLine(line: String) = nonDeterministicLineIndicators.exists(line contains _) + // This list contains indicators for those lines which do not have actual results and we + // want to ignore. + lazy val ignoredLineIndicators = Seq( + "# Partition Information", + "# col_name" + ) + + protected def ignoredLine(line: String) = + ignoredLineIndicators.exists(line contains _) + /** * Removes non-deterministic paths from `str` so cached answers will compare correctly. */ @@ -329,11 +343,17 @@ abstract class HiveComparisonTest if ((!hiveQuery.logical.isInstanceOf[ExplainCommand]) && preparedHive != catalyst) { - val hivePrintOut = s"== HIVE - ${hive.size} row(s) ==" +: preparedHive + val hivePrintOut = s"== HIVE - ${preparedHive.size} row(s) ==" +: preparedHive val catalystPrintOut = s"== CATALYST - ${catalyst.size} row(s) ==" +: catalyst val resultComparison = sideBySide(hivePrintOut, catalystPrintOut).mkString("\n") + println("hive output") + hive.foreach(println) + + println("catalyst printout") + catalyst.foreach(println) + if (recomputeCache) { logger.warn(s"Clearing cache files for failed test $testCaseName") hiveCacheFiles.foreach(_.delete()) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index ee194dbcb77b..0bbfbac2fc3f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -177,7 +177,16 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // After stop taking the `stringOrError` route, exceptions are thrown from these cases. // See SPARK-2129 for details. "join_view", - "mergejoins_mixed" + "mergejoins_mixed", + + // Returning the result of a describe state as a JSON object is not supported. + "describe_table_json", + "describe_database_json", + "describe_formatted_view_partitioned_json", + + // Hive returns the results of describe as plain text. Comments with multiple lines + // introduce extra in the Hive results, which make the result comparison fail. + "describe_comment_indent" ) /** @@ -292,11 +301,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "default_partition_name", "delimiter", "desc_non_existent_tbl", - "describe_comment_indent", - "describe_database_json", "describe_formatted_view_partitioned", - "describe_formatted_view_partitioned_json", - "describe_table_json", "diff_part_input_formats", "disable_file_format_check", "drop_function", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index a20f20af8db3..d7966d3e5c08 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -270,7 +270,7 @@ class HiveQuerySuite extends HiveComparisonTest { Array("dt", "string", null)) ) { hql("DESCRIBE test_describe_commands1") - .select('name, 'type, 'comment) + .select('col_name, 'data_type, 'comment) .collect() } @@ -295,7 +295,7 @@ class HiveQuerySuite extends HiveComparisonTest { Array("dt", "string", null)) ) { hql("DESCRIBE default.test_describe_commands1") - .select('name, 'type, 'comment) + .select('col_name, 'data_type, 'comment) .collect() } @@ -347,7 +347,7 @@ class HiveQuerySuite extends HiveComparisonTest { Array("b", "StringType", null)) ) { hql("DESCRIBE test_describe_commands2") - .select('name, 'type, 'comment) + .select('col_name, 'data_type, 'comment) .collect() } } From 656b068edf96528125489f241f54e439252ee332 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Thu, 19 Jun 2014 13:13:32 -0700 Subject: [PATCH 7/9] 100 characters. --- .../org/apache/spark/sql/hive/execution/hiveOperators.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala index 6a4f14edee22..797262d69f6e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala @@ -490,8 +490,10 @@ case class DescribeHiveTableCommand( val partColumnInfo = partitionColumns.map(field => (field.getName, field.getType, field.getComment)) results ++= - partColumnInfo ++ Seq(("# Partition Information", "", "")) ++ - Seq((s"# ${output.get(0).name}", output.get(1).name, output.get(2).name)) ++ partColumnInfo + partColumnInfo ++ + Seq(("# Partition Information", "", "")) ++ + Seq((s"# ${output.get(0).name}", output.get(1).name, output.get(2).name)) ++ + partColumnInfo } if (isExtended) { From e7c4e726cfb7840ff466653b0c871c2855b9e081 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Thu, 19 Jun 2014 15:10:57 -0700 Subject: [PATCH 8/9] Fix unit test. --- .../sql/hive/execution/HiveQuerySuite.scala | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index d7966d3e5c08..f216d8b8dd92 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -266,7 +266,8 @@ class HiveQuerySuite extends HiveComparisonTest { Array("key", "int", null), Array("value", "string", null), Array("dt", "string", null), - Array("# Partition Information", null, null), + Array("# Partition Information", "", ""), + Array("# col_name", "data_type", "comment"), Array("dt", "string", null)) ) { hql("DESCRIBE test_describe_commands1") @@ -274,24 +275,14 @@ class HiveQuerySuite extends HiveComparisonTest { .collect() } - // Describe a table with keyword FORMATTED - // We only - assertResult(6) { - hql("DESCRIBE FORMATTED test_describe_commands1").count() - } - - // Describe a table - assertResult(6) { - hql("DESCRIBE EXTENDED test_describe_commands1").count() - } - // Describe a table with a fully qualified table name assertResult( Array( Array("key", "int", null), Array("value", "string", null), Array("dt", "string", null), - Array("# Partition Information", null, null), + Array("# Partition Information", "", ""), + Array("# col_name", "data_type", "comment"), Array("dt", "string", null)) ) { hql("DESCRIBE default.test_describe_commands1") From b9b9aa50e9c8378f363609ede7c00d57a1653244 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Thu, 19 Jun 2014 17:45:39 -0700 Subject: [PATCH 9/9] rxin's comments. --- .../apache/spark/sql/catalyst/plans/logical/commands.scala | 6 +----- .../src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 1 + .../spark/sql/hive/execution/HiveComparisonTest.scala | 6 ++++-- .../spark/sql/hive/execution/HiveCompatibilitySuite.scala | 4 ++-- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala index d04160196300..1d5f033f0d27 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala @@ -62,11 +62,7 @@ case class ExplainCommand(plan: LogicalPlan) extends Command { case class CacheCommand(tableName: String, doCache: Boolean) extends Command /** - * Returned for the "DESCRIBE tableName" command. - */ - -/** - * Returned for the "DESCRIBE tableName" command. + * Returned for the "DESCRIBE [EXTENDED] [dbName.]tableName" command. * @param table The table to be described. * @param isExtended True if "DESCRIBE EXTENDED" is used. Otherwise, false. * It is effective only when the table is a Hive table. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index dbcf892dce58..df761b073a75 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -403,6 +403,7 @@ private[hive] object HiveQl { nameParts.head match { case Token(".", dbName :: tableName :: Nil) => // It is describing a table with the format like "describe db.table". + // TODO: Actually, a user may mean tableName.columnName. Need to resolve this issue. val (db, tableName) = extractDbNameTableName(nameParts.head) DescribeCommand( UnresolvedRelation(db, tableName, None), extended.isDefined) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 0c9248ae55e6..08ef4d9b6bb9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -145,9 +145,11 @@ abstract class HiveComparisonTest case _: LogicalNativeCommand => answer.filterNot(nonDeterministicLine).filterNot(_ == "") case _: ExplainCommand => answer case _: DescribeCommand => + // Filter out non-deterministic lines and lines which do not have actual results but + // can introduce problems because of the way Hive formats these lines. + // Then, remove empty lines. Do not sort the results. answer.filterNot( - r => nonDeterministicLine(r) || ignoredLine(r)).map(_.trim).filterNot( - r => r == "" || r == "\n") + r => nonDeterministicLine(r) || ignoredLine(r)).map(_.trim).filterNot(_ == "") case plan => if (isSorted(plan)) answer else answer.sorted } orderedAnswer.map(cleanPaths) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 0bbfbac2fc3f..cdfc2d0c1738 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -78,7 +78,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "alter_merge", "alter_concatenate_indexed_table", "protectmode2", - "describe_table", + //"describe_table", "describe_comment_nonascii", "udf5", "udf_java_method", @@ -185,7 +185,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "describe_formatted_view_partitioned_json", // Hive returns the results of describe as plain text. Comments with multiple lines - // introduce extra in the Hive results, which make the result comparison fail. + // introduce extra lines in the Hive results, which make the result comparison fail. "describe_comment_indent" )