From ae1d05927abc88d0ccc2c3b85640853635e4dc75 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 23 Dec 2020 18:59:04 +0800 Subject: [PATCH 1/5] [SPARK-33892][SQL] Display char/varchar in DESC and SHOW CREATE TABLE --- .../spark/sql/execution/command/tables.scala | 12 +++++---- .../datasources/v2/DescribeTableExec.scala | 5 +++- .../spark/sql/CharVarcharTestSuite.scala | 27 +++++++++++++++++++ .../spark/sql/HiveCharVarcharTestSuite.scala | 9 +++++++ 4 files changed, 47 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index cf2a6ffb2c68..0fcf8f271704 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap} +import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap, CharVarcharUtils} import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat import org.apache.spark.sql.execution.datasources.json.JsonFileFormat @@ -631,7 +631,7 @@ case class DescribeTableCommand( } describeSchema(catalog.lookupRelation(table).schema, result, header = false) } else { - val metadata = catalog.getTableMetadata(table) + val metadata = catalog.getTableRawMetadata(table) if (metadata.schema.isEmpty) { // In older version(prior to 2.1) of Spark, the table schema can be empty and should be // inferred at runtime. We should still support it. @@ -782,9 +782,11 @@ case class DescribeColumnCommand( None } + val dataType = CharVarcharUtils.getRawType(field.metadata) + .getOrElse(field.dataType).catalogString val buffer = ArrayBuffer[Row]( Row("col_name", field.name), - Row("data_type", field.dataType.catalogString), + Row("data_type", dataType), Row("comment", comment.getOrElse("NULL")) ) if (isExtended) { @@ -1111,7 +1113,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) throw new AnalysisException( s"SHOW CREATE TABLE is not supported on a temporary view: ${table.identifier}") } else { - val tableMetadata = catalog.getTableMetadata(table) + val tableMetadata = catalog.getTableRawMetadata(table) // TODO: [SPARK-28692] unify this after we unify the // CREATE TABLE syntax for hive serde and data source table. @@ -1262,7 +1264,7 @@ case class ShowCreateTableAsSerdeCommand(table: TableIdentifier) override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog - val tableMetadata = catalog.getTableMetadata(table) + val tableMetadata = catalog.getTableRawMetadata(table) val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) { throw new AnalysisException( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala index 0ca442baeea2..3ae89ad6bbac 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala @@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, GenericRowWithSchema} import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table} import org.apache.spark.sql.types.StructType @@ -31,6 +31,9 @@ case class DescribeTableExec( table: Table, isExtended: Boolean) extends V2CommandExec { + + override def producedAttributes: AttributeSet = outputSet + private val toRow = { RowEncoder(StructType.fromAttributes(output)).resolveAndBind().createSerializer() } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index 8ab8c37d5e79..bc8d35889705 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -443,6 +443,14 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { ("c1 IN (c2)", true))) } } + + test("DESCRIBE TABLE w/ char/varchar") { + withTable("t") { + sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") + checkAnswer(sql("desc t").selectExpr("data_type").where("data_type like '%char%'"), + Seq(Row("char(5)"), Row("varchar(3)"))) + } + } } // Some basic char/varchar tests which doesn't rely on table implementation. @@ -603,6 +611,25 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa } } } + + test("DESCRIBE COLUMN w/ char/varchar") { + withTable("t") { + sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") + checkAnswer(sql("desc t v").selectExpr("info_value").where("info_value like '%char%'"), + Row("varchar(3)")) + checkAnswer(sql("desc t c").selectExpr("info_value").where("info_value like '%char%'"), + Row("char(5)")) + } + } + + test("SHOW CREATE TABLE w/ char/varchar") { + withTable("t") { + sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") + val rest = sql("SHOW CREATE TABLE t").head().getString(0) + assert(rest.contains("VARCHAR(3)")) + assert(rest.contains("CHAR(5)")) + } + } } class DSV2CharVarcharTestSuite extends CharVarcharTestSuite diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala index f48cfb8dfb89..3968039bc1e7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala @@ -41,6 +41,15 @@ class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSinglet } super.afterAll() } + + test("SHOW CREATE TABLE AS SERDE w/ char/varchar") { + withTable("t") { + sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") + val rest = sql("SHOW CREATE TABLE t AS SERDE").head().getString(0) + assert(rest.contains("VARCHAR(3)")) + assert(rest.contains("CHAR(5)")) + } + } } class HiveCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with TestHiveSingleton { From bf6a173d2a281e63abf0f202b4e52f330147e64c Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 24 Dec 2020 01:02:15 +0800 Subject: [PATCH 2/5] addr comments --- .../sql/execution/datasources/v2/DescribeTableExec.scala | 3 --- .../execution/datasources/v2/ShowTablePropertiesExec.scala | 2 -- .../spark/sql/execution/datasources/v2/V2CommandExec.scala | 4 ++++ .../scala/org/apache/spark/sql/CharVarcharTestSuite.scala | 1 + 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala index 3ae89ad6bbac..018b8ecb47c1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala @@ -31,9 +31,6 @@ case class DescribeTableExec( table: Table, isExtended: Boolean) extends V2CommandExec { - - override def producedAttributes: AttributeSet = outputSet - private val toRow = { RowEncoder(StructType.fromAttributes(output)).resolveAndBind().createSerializer() } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala index 7ceee1edee18..70c8ffc1e7a2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala @@ -30,8 +30,6 @@ case class ShowTablePropertiesExec( catalogTable: Table, propertyKey: Option[String]) extends V2CommandExec { - override def producedAttributes: AttributeSet = AttributeSet(output) - override protected def run(): Seq[InternalRow] = { import scala.collection.JavaConverters._ val toRow = RowEncoder(schema).resolveAndBind().createSerializer() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala index 7738f26dfd26..6b193674cc71 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.AttributeSet import org.apache.spark.sql.execution.SparkPlan /** @@ -55,4 +56,7 @@ abstract class V2CommandExec extends SparkPlan { } override def children: Seq[SparkPlan] = Nil + + override def producedAttributes: AttributeSet = outputSet + } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index bc8d35889705..4677012dc1a1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -612,6 +612,7 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa } } + // TODO: Move this test to super after SPARK-33875 implements DESCRIBE COLUMN for v2 test("DESCRIBE COLUMN w/ char/varchar") { withTable("t") { sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") From 13d955e2f3fb717880b69e649128bbcba415934c Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 24 Dec 2020 10:16:08 +0800 Subject: [PATCH 3/5] build --- .../sql/execution/datasources/v2/DescribeTableExec.scala | 2 +- .../scala/org/apache/spark/sql/CharVarcharTestSuite.scala | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala index 018b8ecb47c1..0ca442baeea2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala @@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, GenericRowWithSchema} +import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table} import org.apache.spark.sql.types.StructType diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index 4677012dc1a1..bb8e07d525a2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -612,8 +612,8 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa } } - // TODO: Move this test to super after SPARK-33875 implements DESCRIBE COLUMN for v2 - test("DESCRIBE COLUMN w/ char/varchar") { + // TODO(SPARK-33875): Move these tests to super after these statements for v2 implemented + test("SPARK-33892: DESCRIBE COLUMN w/ char/varchar") { withTable("t") { sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") checkAnswer(sql("desc t v").selectExpr("info_value").where("info_value like '%char%'"), @@ -623,7 +623,7 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa } } - test("SHOW CREATE TABLE w/ char/varchar") { + test("SPARK-33892: SHOW CREATE TABLE w/ char/varchar") { withTable("t") { sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") val rest = sql("SHOW CREATE TABLE t").head().getString(0) From 2adf7f94a1b0b2f64525f0062c4916e916176afb Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 24 Dec 2020 10:23:55 +0800 Subject: [PATCH 4/5] build --- .../sql/execution/datasources/v2/ShowTablePropertiesExec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala index 70c8ffc1e7a2..6d3a94ef1563 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, GenericRowWithSchema} +import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema} import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table} /** From 057f1ffe9002df782555b6d7d7eebc5b50521c86 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 24 Dec 2020 15:20:48 +0800 Subject: [PATCH 5/5] todo --- .../scala/org/apache/spark/sql/CharVarcharTestSuite.scala | 5 +++-- .../org/apache/spark/sql/HiveCharVarcharTestSuite.scala | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index bb8e07d525a2..9d4b7c4f82ed 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -444,7 +444,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { } } - test("DESCRIBE TABLE w/ char/varchar") { + test("SPARK-33892: DESCRIBE TABLE w/ char/varchar") { withTable("t") { sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") checkAnswer(sql("desc t").selectExpr("data_type").where("data_type like '%char%'"), @@ -612,7 +612,7 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa } } - // TODO(SPARK-33875): Move these tests to super after these statements for v2 implemented + // TODO(SPARK-33875): Move these tests to super after DESCRIBE COLUMN v2 implemented test("SPARK-33892: DESCRIBE COLUMN w/ char/varchar") { withTable("t") { sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") @@ -623,6 +623,7 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa } } + // TODO(SPARK-33898): Move these tests to super after SHOW CREATE TABLE for v2 implemented test("SPARK-33892: SHOW CREATE TABLE w/ char/varchar") { withTable("t") { sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala index 3968039bc1e7..bb7918c881c7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala @@ -42,7 +42,7 @@ class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSinglet super.afterAll() } - test("SHOW CREATE TABLE AS SERDE w/ char/varchar") { + test("SPARK-33892: SHOW CREATE TABLE AS SERDE w/ char/varchar") { withTable("t") { sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format") val rest = sql("SHOW CREATE TABLE t AS SERDE").head().getString(0)