[SPARK-19229][SQL] Disallow Creating Hive Source Tables when Hive Support is Not Enabled

gatorsmile · cmonkey · commit 22d6ac4388d9 · 2017-02-15T20:31:31.000+08:00
### What changes were proposed in this pull request? It is weird to create Hive source tables when using InMemoryCatalog. We are unable to operate it. This PR is to block users to create Hive source tables. ### How was this patch tested? Fixed the test cases Author: gatorsmile <gatorsmile@gmail.com> Closes apache#16587 from gatorsmile/blockHiveTable.
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
@@ -1717,8 +1717,8 @@ def test_list_tables(self):
         self.assertEquals(spark.catalog.listTables(), [])
         self.assertEquals(spark.catalog.listTables("some_db"), [])
         spark.createDataFrame([(1, 1)]).createOrReplaceTempView("temp_tab")
-        spark.sql("CREATE TABLE tab1 (name STRING, age INT)")
-        spark.sql("CREATE TABLE some_db.tab2 (name STRING, age INT)")
+        spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
+        spark.sql("CREATE TABLE some_db.tab2 (name STRING, age INT) USING parquet")
         tables = sorted(spark.catalog.listTables(), key=lambda t: t.name)
         tablesDefault = sorted(spark.catalog.listTables("default"), key=lambda t: t.name)
         tablesSomeDb = sorted(spark.catalog.listTables("some_db"), key=lambda t: t.name)
@@ -1796,8 +1796,8 @@ def test_list_columns(self):
         spark = self.spark
         spark.catalog._reset()
         spark.sql("CREATE DATABASE some_db")
-        spark.sql("CREATE TABLE tab1 (name STRING, age INT)")
-        spark.sql("CREATE TABLE some_db.tab2 (nickname STRING, tolerance FLOAT)")
+        spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
+        spark.sql("CREATE TABLE some_db.tab2 (nickname STRING, tolerance FLOAT) USING parquet")
         columns = sorted(spark.catalog.listColumns("tab1"), key=lambda c: c.name)
         columnsDefault = sorted(spark.catalog.listColumns("tab1", "default"), key=lambda c: c.name)
         self.assertEquals(columns, columnsDefault)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -370,22 +370,6 @@ trait CheckAnalysis extends PredicateHelper {
                  |Conflicting attributes: ${conflictingAttributes.mkString(",")}
                """.stripMargin)
 
-          case s: SimpleCatalogRelation =>
-            failAnalysis(
-              s"""
-                 |Hive support is required to select over the following tables:
-                 |${s.catalogTable.identifier}
-               """.stripMargin)
-
-          // TODO: We need to consolidate this kind of checks for InsertIntoTable
-          // with the rule of PreWriteCheck defined in extendedCheckRules.
-          case InsertIntoTable(s: SimpleCatalogRelation, _, _, _, _) =>
-            failAnalysis(
-              s"""
-                 |Hive support is required to insert into the following tables:
-                 |${s.catalogTable.identifier}
-               """.stripMargin)
-
           case InsertIntoTable(t, _, _, _, _)
             if !t.isInstanceOf[LeafNode] ||
               t.isInstanceOf[Range] ||
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -156,8 +156,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
 
   test("the table type of an external table should be EXTERNAL_TABLE") {
     val catalog = newBasicCatalog()
-    val table =
-      newTable("external_table1", "db2").copy(tableType = CatalogTableType.EXTERNAL)
+    val table = newTable("external_table1", "db2").copy(tableType = CatalogTableType.EXTERNAL)
     catalog.createTable(table, ignoreIfExists = false)
     val actual = catalog.getTable("db2", "external_table1")
     assert(actual.tableType === CatalogTableType.EXTERNAL)
@@ -278,7 +277,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
       schema = new StructType()
         .add("HelLo", "int", nullable = false)
         .add("WoRLd", "int", nullable = true),
-      provider = Some("hive"),
+      provider = Some(defaultProvider),
       partitionColumnNames = Seq("WoRLd"),
       bucketSpec = Some(BucketSpec(4, Seq("HelLo"), Nil)))
     catalog.createTable(tbl, ignoreIfExists = false)
@@ -330,7 +329,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
         .add("col2", "string")
         .add("partCol1", "int")
         .add("partCol2", "string"),
-      provider = Some("hive"),
+      provider = Some(defaultProvider),
       partitionColumnNames = Seq("partCol1", "partCol2"))
     catalog.createTable(table, ignoreIfExists = false)
 
@@ -357,7 +356,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
         .add("col2", "string")
         .add("partCol1", "int")
         .add("partCol2", "string"),
-      provider = Some("hive"),
+      provider = Some(defaultProvider),
       partitionColumnNames = Seq("partCol1", "partCol2"))
     catalog.createTable(table, ignoreIfExists = false)
 
@@ -505,7 +504,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
         .add("col2", "string")
         .add("partCol1", "int")
         .add("partCol2", "string"),
-      provider = Some("hive"),
+      provider = Some(defaultProvider),
       partitionColumnNames = Seq("partCol1", "partCol2"))
     catalog.createTable(table, ignoreIfExists = false)
 
@@ -726,7 +725,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
       tableType = CatalogTableType.MANAGED,
       storage = CatalogStorageFormat.empty,
       schema = new StructType().add("a", "int").add("b", "string"),
-      provider = Some("hive")
+      provider = Some(defaultProvider)
     )
 
     catalog.createTable(table, ignoreIfExists = false)
@@ -746,7 +745,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
         Some(Utils.createTempDir().getAbsolutePath),
         None, None, None, false, Map.empty),
       schema = new StructType().add("a", "int").add("b", "string"),
-      provider = Some("hive")
+      provider = Some(defaultProvider)
     )
     catalog.createTable(externalTable, ignoreIfExists = false)
     assert(!exists(db.locationUri, "external_table"))
@@ -763,7 +762,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
         .add("col2", "string")
         .add("partCol1", "int")
         .add("partCol2", "string"),
-      provider = Some("hive"),
+      provider = Some(defaultProvider),
       partitionColumnNames = Seq("partCol1", "partCol2"))
     catalog.createTable(table, ignoreIfExists = false)
 
@@ -829,6 +828,7 @@ abstract class CatalogTestUtils {
   // Unimplemented methods
   val tableInputFormat: String
   val tableOutputFormat: String
+  val defaultProvider: String
   def newEmptyCatalog(): ExternalCatalog
 
   // These fields must be lazy because they rely on fields that are not implemented yet
@@ -901,7 +901,7 @@ abstract class CatalogTestUtils {
         .add("col2", "string")
         .add("a", "int")
         .add("b", "string"),
-      provider = Some("hive"),
+      provider = Some(defaultProvider),
       partitionColumnNames = Seq("a", "b"),
       bucketSpec = Some(BucketSpec(4, Seq("col1"), Nil)))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalogSuite.scala
@@ -24,6 +24,7 @@ class InMemoryCatalogSuite extends ExternalCatalogSuite {
   protected override val utils: CatalogTestUtils = new CatalogTestUtils {
     override val tableInputFormat: String = "org.apache.park.SequenceFileInputFormat"
     override val tableOutputFormat: String = "org.apache.park.SequenceFileOutputFormat"
+    override val defaultProvider: String = "parquet"
     override def newEmptyCatalog(): ExternalCatalog = new InMemoryCatalog
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -37,6 +37,7 @@ class SessionCatalogSuite extends PlanTest {
   private val utils = new CatalogTestUtils {
     override val tableInputFormat: String = "com.fruit.eyephone.CameraInputFormat"
     override val tableOutputFormat: String = "com.fruit.eyephone.CameraOutputFormat"
+    override val defaultProvider: String = "parquet"
     override def newEmptyCatalog(): ExternalCatalog = new InMemoryCatalog
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -60,7 +60,7 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo
       identifier = table.identifier.copy(
         database = Some(
           table.identifier.database.getOrElse(sessionState.catalog.getCurrentDatabase))),
-      tracksPartitionsInCatalog = sparkSession.sessionState.conf.manageFilesourcePartitions)
+      tracksPartitionsInCatalog = sessionState.conf.manageFilesourcePartitions)
     val dataSource: BaseRelation =
       DataSource(
         sparkSession = sparkSession,
@@ -89,7 +89,7 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo
       // partition provider hive, but no partitions in the metastore. The user has to call
       // `msck repair table` to populate the table partitions.
       tracksPartitionsInCatalog = partitionColumnNames.nonEmpty &&
-        sparkSession.sessionState.conf.manageFilesourcePartitions)
+        sessionState.conf.manageFilesourcePartitions)
     // We will return Nil or throw exception at the beginning if the table already exists, so when
     // we reach here, the table should not exist and we should set `ignoreIfExists` to false.
     sessionState.catalog.createTable(newTable, ignoreIfExists = false)
@@ -163,8 +163,7 @@ case class CreateDataSourceTableAsSelectCommand(
         case fs: HadoopFsRelation if table.partitionColumnNames.nonEmpty &&
             sparkSession.sqlContext.conf.manageFilesourcePartitions =>
           // Need to recover partitions into the metastore so our saved data is visible.
-          sparkSession.sessionState.executePlan(
-            AlterTableRecoverPartitionsCommand(table.identifier)).toRdd
+          sessionState.executePlan(AlterTableRecoverPartitionsCommand(table.identifier)).toRdd
         case _ =>
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -409,7 +409,8 @@ object HiveOnlyCheck extends (LogicalPlan => Unit) {
     plan.foreach {
       case CreateTable(tableDesc, _, Some(_)) if DDLUtils.isHiveTable(tableDesc) =>
         throw new AnalysisException("Hive support is required to use CREATE Hive TABLE AS SELECT")
-
+      case CreateTable(tableDesc, _, _) if DDLUtils.isHiveTable(tableDesc) =>
+        throw new AnalysisException("Hive support is required to CREATE Hive TABLE")
       case _ => // OK
     }
   }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/change-column.sql b/sql/core/src/test/resources/sql-tests/inputs/change-column.sql
@@ -1,5 +1,5 @@
 -- Create the origin table
-CREATE TABLE test_change(a INT, b STRING, c INT);
+CREATE TABLE test_change(a INT, b STRING, c INT) using parquet;
 DESC test_change;
 
 -- Change column name (not supported yet)
@@ -47,7 +47,7 @@ CREATE GLOBAL TEMPORARY VIEW global_temp_view(a, b) AS SELECT 1, "one";
 ALTER TABLE global_temp.global_temp_view CHANGE a a INT COMMENT 'this is column a';
 
 -- Change column in partition spec (not supported yet)
-CREATE TABLE partition_table(a INT, b STRING) PARTITIONED BY (c INT, d STRING);
+CREATE TABLE partition_table(a INT, b STRING, c INT, d STRING) USING parquet PARTITIONED BY (c, d);
 ALTER TABLE partition_table PARTITION (c = 1) CHANGE COLUMN a new_a INT;
 
 -- DROP TEST TABLE
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
@@ -1,4 +1,4 @@
-CREATE TABLE t (a STRING, b INT) PARTITIONED BY (c STRING, d STRING);
+CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet PARTITIONED BY (c, d);
 
 ALTER TABLE t ADD PARTITION (c='Us', d=1);
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/show-tables.sql b/sql/core/src/test/resources/sql-tests/inputs/show-tables.sql
@@ -1,9 +1,9 @@
 -- Test data.
 CREATE DATABASE showdb;
 USE showdb;
-CREATE TABLE show_t1(a String, b Int) PARTITIONED BY (c String, d String);
+CREATE TABLE show_t1(a String, b Int, c String, d String) USING parquet PARTITIONED BY (c, d);
 ALTER TABLE show_t1 ADD PARTITION (c='Us', d=1);
-CREATE TABLE show_t2(b String, d Int);
+CREATE TABLE show_t2(b String, d Int) USING parquet;
 CREATE TEMPORARY VIEW show_t3(e int) USING parquet;
 CREATE GLOBAL TEMP VIEW show_t4 AS SELECT 1 as col1;
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/show_columns.sql b/sql/core/src/test/resources/sql-tests/inputs/show_columns.sql
@@ -2,8 +2,8 @@ CREATE DATABASE showdb;
 
 USE showdb;
 
-CREATE TABLE showcolumn1 (col1 int, `col 2` int);
-CREATE TABLE showcolumn2 (price int, qty int) partitioned by (year int, month int);
+CREATE TABLE showcolumn1 (col1 int, `col 2` int) USING parquet;
+CREATE TABLE showcolumn2 (price int, qty int, year int, month int) USING parquet partitioned by (year, month);
 CREATE TEMPORARY VIEW showColumn3 (col3 int, `col 4` int) USING parquet;
 CREATE GLOBAL TEMP VIEW showColumn4 AS SELECT 1 as col1, 'abc' as `col 5`;
 
diff --git a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
@@ -3,7 +3,7 @@
 
 
 -- !query 0
-CREATE TABLE test_change(a INT, b STRING, c INT)
+CREATE TABLE test_change(a INT, b STRING, c INT) using parquet
 -- !query 0 schema
 struct<>
 -- !query 0 output
@@ -269,7 +269,7 @@ Database 'global_temp' not found;
 
 
 -- !query 28
-CREATE TABLE partition_table(a INT, b STRING) PARTITIONED BY (c INT, d STRING)
+CREATE TABLE partition_table(a INT, b STRING, c INT, d STRING) USING parquet PARTITIONED BY (c, d)
 -- !query 28 schema
 struct<>
 -- !query 28 output
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -3,7 +3,7 @@
 
 
 -- !query 0
-CREATE TABLE t (a STRING, b INT) PARTITIONED BY (c STRING, d STRING)
+CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet PARTITIONED BY (c, d)
 -- !query 0 schema
 struct<>
 -- !query 0 output
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -19,7 +19,7 @@ struct<>
 
 
 -- !query 2
-CREATE TABLE show_t1(a String, b Int) PARTITIONED BY (c String, d String)
+CREATE TABLE show_t1(a String, b Int, c String, d String) USING parquet PARTITIONED BY (c, d)
 -- !query 2 schema
 struct<>
 -- !query 2 output
@@ -35,7 +35,7 @@ struct<>
 
 
 -- !query 4
-CREATE TABLE show_t2(b String, d Int)
+CREATE TABLE show_t2(b String, d Int) USING parquet
 -- !query 4 schema
 struct<>
 -- !query 4 output
diff --git a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
@@ -19,15 +19,15 @@ struct<>
 
 
 -- !query 2
-CREATE TABLE showcolumn1 (col1 int, `col 2` int)
+CREATE TABLE showcolumn1 (col1 int, `col 2` int) USING parquet
 -- !query 2 schema
 struct<>
 -- !query 2 output
 
 
 
 -- !query 3
-CREATE TABLE showcolumn2 (price int, qty int) partitioned by (year int, month int)
+CREATE TABLE showcolumn2 (price int, qty int, year int, month int) USING parquet partitioned by (year, month)
 -- !query 3 schema
 struct<>
 -- !query 3 output
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala

Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@ class InMemoryCatalogSuite extends ExternalCatalogSuite {`
`24`	`24`	`protected override val utils: CatalogTestUtils = new CatalogTestUtils {`
`25`	`25`	`override val tableInputFormat: String = "org.apache.park.SequenceFileInputFormat"`
`26`	`26`	`override val tableOutputFormat: String = "org.apache.park.SequenceFileOutputFormat"`
	`27`	`+ override val defaultProvider: String = "parquet"`
`27`	`28`	`override def newEmptyCatalog(): ExternalCatalog = new InMemoryCatalog`
`28`	`29`	`}`
`29`	`30`
Original file line number	Diff line number	Diff line change
`@@ -37,6 +37,7 @@ class SessionCatalogSuite extends PlanTest {`
`37`	`37`	`private val utils = new CatalogTestUtils {`
`38`	`38`	`override val tableInputFormat: String = "com.fruit.eyephone.CameraInputFormat"`
`39`	`39`	`override val tableOutputFormat: String = "com.fruit.eyephone.CameraOutputFormat"`
	`40`	`+ override val defaultProvider: String = "parquet"`
`40`	`41`	`override def newEmptyCatalog(): ExternalCatalog = new InMemoryCatalog`
`41`	`42`	`}`
`42`	`43`
Original file line number	Diff line number	Diff line change
`@@ -409,7 +409,8 @@ object HiveOnlyCheck extends (LogicalPlan => Unit) {`
`409`	`409`	`plan.foreach {`
`410`	`410`	`case CreateTable(tableDesc, _, Some(_)) if DDLUtils.isHiveTable(tableDesc) =>`
`411`	`411`	`throw new AnalysisException("Hive support is required to use CREATE Hive TABLE AS SELECT")`
`412`		`-`
	`412`	`+ case CreateTable(tableDesc, _, _) if DDLUtils.isHiveTable(tableDesc) =>`
	`413`	`+ throw new AnalysisException("Hive support is required to CREATE Hive TABLE")`
`413`	`414`	`case _ => // OK`
`414`	`415`	`}`
`415`	`416`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-CREATE TABLE t (a STRING, b INT) PARTITIONED BY (c STRING, d STRING);`
	`1`	`+CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet PARTITIONED BY (c, d);`
`2`	`2`
`3`	`3`	`ALTER TABLE t ADD PARTITION (c='Us', d=1);`
`4`	`4`