address comments

xuanyuanking · xuanyuanking · commit 03a91f849d0d · 2020-09-23T15:49:00.000+08:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -847,9 +847,9 @@ class Analyzer(
    */
   object ResolveTempViews extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
-      case u @ UnresolvedRelation(ident, _, _) =>
-        lookupTempView(ident).getOrElse(u)
-      case i @ InsertIntoStatement(UnresolvedRelation(ident, _, _), _, _, _, _) =>
+      case u @ UnresolvedRelation(ident, _, isStreaming) =>
+        lookupTempView(ident, isStreaming).getOrElse(u)
+      case i @ InsertIntoStatement(UnresolvedRelation(ident, _, false), _, _, _, _) =>
         lookupTempView(ident)
           .map(view => i.copy(table = view))
           .getOrElse(i)
@@ -862,15 +862,22 @@ class Analyzer(
         lookupTempView(ident).map(_ => ResolvedView(ident.asIdentifier)).getOrElse(u)
     }
 
-    def lookupTempView(identifier: Seq[String]): Option[LogicalPlan] = {
+    def lookupTempView(
+        identifier: Seq[String], isStreaming: Boolean = false): Option[LogicalPlan] = {
       // Permanent View can't refer to temp views, no need to lookup at all.
       if (isResolvingView) return None
 
-      identifier match {
+      val tmpView = identifier match {
         case Seq(part1) => v1SessionCatalog.lookupTempView(part1)
         case Seq(part1, part2) => v1SessionCatalog.lookupGlobalTempView(part1, part2)
         case _ => None
       }
+
+      if (isStreaming && tmpView.nonEmpty && !tmpView.get.isStreaming) {
+        throw new AnalysisException("The temp view related to non-streaming relation is " +
+          "not supported in readStream.table().")
+      }
+      tmpView
     }
   }
 
@@ -897,11 +904,12 @@ class Analyzer(
     def apply(plan: LogicalPlan): LogicalPlan = ResolveTempViews(plan).resolveOperatorsUp {
       case u: UnresolvedRelation =>
         lookupV2Relation(u.multipartIdentifier, u.options, u.isStreaming)
-          .map {
-            case rel: DataSourceV2Relation =>
-              val ident = rel.identifier.get
-              SubqueryAlias(rel.catalog.get.name +: ident.namespace :+ ident.name, rel)
-            case o => o
+          .map { relation =>
+            val (catalog, ident) = relation match {
+              case ds: DataSourceV2Relation => (ds.catalog, ds.identifier.get)
+              case s: StreamingRelationV2 => (s.catalog, s.identifier.get)
+            }
+            SubqueryAlias(catalog.get.name +: ident.namespace :+ ident.name, relation)
           }.getOrElse(u)
 
       case u @ UnresolvedTable(NonSessionCatalogAndIdentifier(catalog, ident)) =>
@@ -941,8 +949,8 @@ class Analyzer(
           CatalogV2Util.loadTable(catalog, ident) match {
             case Some(table) =>
               if (isStreaming) {
-                Some(StreamingRelationV2(
-                  None, table.name, table, options, table.schema.toAttributes, None))
+                Some(StreamingRelationV2(None, table.name, table, options,
+                  table.schema.toAttributes, Some(catalog), Some(ident), None))
               } else {
                 Some(DataSourceV2Relation.create(table, Some(catalog), Some(ident), options))
               }
@@ -1038,16 +1046,23 @@ class Analyzer(
           lazy val loaded = CatalogV2Util.loadTable(catalog, ident).map {
             case v1Table: V1Table =>
               if (isStreaming) {
-                UnresolvedCatalogRelation(v1Table.v1Table, options, isStreaming = true)
+                SubqueryAlias(
+                  catalog.name +: ident.asMultipartIdentifier,
+                  UnresolvedCatalogRelation(v1Table.v1Table, options, isStreaming = true))
               } else {
                 v1SessionCatalog.getRelation(v1Table.v1Table, options)
               }
             case table =>
               if (isStreaming) {
-                val tableMeta = v1SessionCatalog.getTableMetadata(ident.asTableIdentifier)
-                StreamingRelationV2(
-                  None, table.name, table, options, table.schema.toAttributes,
-                  Some(UnresolvedCatalogRelation(tableMeta, isStreaming = true)))
+                val v1Fallback = table match {
+                  case withFallback: V2TableWithV1Fallback =>
+                    Some(UnresolvedCatalogRelation(withFallback.v1Table, isStreaming = true))
+                  case _ => None
+                }
+                SubqueryAlias(
+                  catalog.name +: ident.asMultipartIdentifier,
+                  StreamingRelationV2(None, table.name, table, options, table.schema.toAttributes,
+                    Some(catalog), Some(ident), v1Fallback))
               } else {
                 SubqueryAlias(
                   catalog.name +: ident.asMultipartIdentifier,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
-import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.connector.catalog.{CatalogManager, Table}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -695,3 +695,11 @@ case class HiveTableRelation(
     dataCols = dataCols.map(_.newInstance()),
     partitionCols = partitionCols.map(_.newInstance()))
 }
+
+/**
+ * A V2 table with V1 fallback support. This is used to fallback to V1 table when the V2 one
+ * doesn't implement specific capabilities but V1 already has.
+ */
+trait V2TableWithV1Fallback extends Table {
+  def v1Table: CatalogTable
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/StreamingRelationV2.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/StreamingRelationV2.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.streaming
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
-import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableProvider}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -37,6 +37,8 @@ case class StreamingRelationV2(
     table: Table,
     extraOptions: CaseInsensitiveStringMap,
     output: Seq[Attribute],
+    catalog: Option[CatalogPlugin],
+    identifier: Option[Identifier],
     v1Relation: Option[LogicalPlan])
   extends LeafNode with MultiInstanceRelation {
   override lazy val resolved = v1Relation.forall(_.resolved)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -296,7 +296,7 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
       getStreamingRelation(tableMeta, extraOptions)
 
     case s @ StreamingRelationV2(
-        _, _, table, extraOptions, _, Some(UnresolvedCatalogRelation(tableMeta, _, true))) =>
+        _, _, table, extraOptions, _, _, _, Some(UnresolvedCatalogRelation(tableMeta, _, true))) =>
       import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
       if (table.isInstanceOf[SupportsRead]
           && table.supportsAny(MICRO_BATCH_READ, CONTINUOUS_READ)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
@@ -43,7 +43,8 @@ object TableCapabilityCheck extends (LogicalPlan => Unit) {
       case r: DataSourceV2Relation if !r.table.supports(BATCH_READ) =>
         failAnalysis(s"Table ${r.table.name()} does not support batch scan.")
 
-      case r: StreamingRelationV2 if !r.table.supportsAny(MICRO_BATCH_READ, CONTINUOUS_READ) =>
+      case r: StreamingRelationV2
+          if !r.table.supportsAny(MICRO_BATCH_READ, CONTINUOUS_READ) && r.v1Relation.isEmpty =>
         throw new AnalysisException(s"Table ${r.table.name()} does not support either " +
           "micro-batch or continuous scan.")
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -90,7 +90,7 @@ class MicroBatchExecution(
           StreamingExecutionRelation(source, output)(sparkSession)
         })
 
-      case s @ StreamingRelationV2(src, srcName, table: SupportsRead, options, output, v1) =>
+      case s @ StreamingRelationV2(src, srcName, table: SupportsRead, options, output, _, _, v1) =>
         val dsStr = if (src.nonEmpty) s"[${src.get}]" else ""
         val v2Disabled = disabledSources.contains(src.getOrElse(None).getClass.getCanonicalName)
         if (!v2Disabled && table.supports(TableCapability.MICRO_BATCH_READ)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -65,7 +65,7 @@ class ContinuousExecution(
     var nextSourceId = 0
     import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
     val _logicalPlan = analyzedPlan.transform {
-      case s @ StreamingRelationV2(ds, sourceName, table: SupportsRead, options, output, _) =>
+      case s @ StreamingRelationV2(ds, sourceName, table: SupportsRead, options, output, _, _, _) =>
         val dsStr = if (ds.nonEmpty) s"[${ds.get}]" else ""
         if (!table.supports(TableCapability.CONTINUOUS_READ)) {
           throw new UnsupportedOperationException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -83,6 +83,8 @@ abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends Spa
       new MemoryStreamTable(this),
       CaseInsensitiveStringMap.empty(),
       attributes,
+      None,
+      None,
       None)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -232,7 +232,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
             Dataset.ofRows(
               sparkSession,
               StreamingRelationV2(
-                Some(provider), source, table, dsOptions, table.schema.toAttributes, v1Relation))
+                Some(provider), source, table, dsOptions,
+                table.schema.toAttributes, None, None, v1Relation))
 
           // fallback to v1
           // TODO (SPARK-27483): we should move this fallback logic to an analyzer rule.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
@@ -46,6 +46,8 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
       table,
       CaseInsensitiveStringMap.empty(),
       TableCapabilityCheckSuite.schema.toAttributes,
+      None,
+      None,
       v1Relation)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
@@ -24,14 +24,17 @@ import scala.collection.JavaConverters._
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
-import org.apache.spark.sql.connector.InMemoryTableCatalog
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, V2TableWithV1Fallback}
+import org.apache.spark.sql.connector.{FakeV2Provider, InMemoryTableCatalog}
 import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, Table, TableCapability}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.read.ScanBuilder
-import org.apache.spark.sql.execution.streaming.{MemoryStream, MemoryStreamScanBuilder}
+import org.apache.spark.sql.execution.streaming.{MemoryStream, MemoryStreamScanBuilder, StreamingRelation}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.StreamTest
+import org.apache.spark.sql.streaming.sources.FakeScanBuilder
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -89,6 +92,16 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     }
   }
 
+  test("stream table API with non-streaming temp view") {
+    val tblName = "my_table"
+    withTable(tblName) {
+      spark.range(3).createOrReplaceTempView(tblName)
+      intercept[AnalysisException] {
+        spark.readStream.table(tblName)
+      }.message.contains("not supported")
+    }
+  }
+
   test("read table without streaming capability support") {
     val tableIdentifer = "testcat.table_name"
 
@@ -102,10 +115,8 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
   test("read table with custom catalog") {
     val tblName = "teststream.table_name"
     withTable(tblName) {
-
       spark.sql(s"CREATE TABLE $tblName (data int) USING foo")
       val stream = MemoryStream[Int]
-
       val testCatalog = spark.sessionState.catalogManager.catalog("teststream").asTableCatalog
       val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
       table.asInstanceOf[InMemoryStreamTable].setStream(stream)
@@ -124,10 +135,8 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
 
     val tblName = "teststream.ns.table_name"
     withTable(tblName) {
-
       spark.sql(s"CREATE TABLE $tblName (data int) USING foo")
       val stream = MemoryStream[Int]
-
       val testCatalog = spark.sessionState.catalogManager.catalog("teststream").asTableCatalog
       val table = testCatalog.loadTable(Identifier.of(Array("ns"), "table_name"))
       table.asInstanceOf[InMemoryStreamTable].setStream(stream)
@@ -140,6 +149,27 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
       )
     }
   }
+
+  test("fallback to V1 relation") {
+    val tblName = DataStreamTableAPISuite.V1FallbackTestTableName
+    spark.conf.set(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key,
+      classOf[InMemoryStreamTableCatalog].getName)
+    val v2Source = classOf[FakeV2Provider].getName
+    withTempDir { tempDir =>
+      withTable(tblName) {
+        spark.sql(s"CREATE TABLE $tblName (data int) USING $v2Source")
+        val plan = spark.readStream.option("path", tempDir.getCanonicalPath).table(tblName)
+          .queryExecution.analyzed.collectFirst {
+          case d: StreamingRelation => d
+        }
+        assert(plan.nonEmpty)
+      }
+    }
+  }
+}
+
+object DataStreamTableAPISuite {
+  val V1FallbackTestTableName = "fallbackV1Test"
 }
 
 class InMemoryStreamTable(override val name: String) extends Table with SupportsRead {
@@ -158,19 +188,42 @@ class InMemoryStreamTable(override val name: String) extends Table with Supports
   }
 }
 
+class NonStreamV2Table(override val name: String)
+    extends Table with SupportsRead with V2TableWithV1Fallback {
+  override def schema(): StructType = StructType(Nil)
+  override def capabilities(): util.Set[TableCapability] = Set(TableCapability.BATCH_READ).asJava
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new FakeScanBuilder
+
+  override def v1Table: CatalogTable = {
+    CatalogTable(
+      identifier =
+        TableIdentifier(DataStreamTableAPISuite.V1FallbackTestTableName, Some("default")),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty,
+      owner = null,
+      schema = schema(),
+      provider = Some("parquet"))
+  }
+}
+
+
 class InMemoryStreamTableCatalog extends InMemoryTableCatalog {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   override def createTable(
-    ident: Identifier,
-    schema: StructType,
-    partitions: Array[Transform],
-    properties: util.Map[String, String]): Table = {
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
     if (tables.containsKey(ident)) {
       throw new TableAlreadyExistsException(ident)
     }
 
-    val table = new InMemoryStreamTable(s"$name.${ident.quoted}")
+    val table = if (ident.name() == DataStreamTableAPISuite.V1FallbackTestTableName) {
+      new NonStreamV2Table(s"$name.${ident.quoted}")
+    } else {
+      new InMemoryStreamTable(s"$name.${ident.quoted}")
+    }
     tables.put(ident, table)
     namespaces.putIfAbsent(ident.namespace.toList, Map())
     table

Original file line number	Diff line number	Diff line change
`@@ -83,6 +83,8 @@ abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends Spa`
`83`	`83`	`new MemoryStreamTable(this),`
`84`	`84`	`CaseInsensitiveStringMap.empty(),`
`85`	`85`	`attributes,`
	`86`	`+ None,`
	`87`	`+ None,`
`86`	`88`	`None)`
`87`	`89`	`}`
`88`	`90`
Original file line number	Diff line number	Diff line change
`@@ -46,6 +46,8 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {`
`46`	`46`	`table,`
`47`	`47`	`CaseInsensitiveStringMap.empty(),`
`48`	`48`	`TableCapabilityCheckSuite.schema.toAttributes,`
	`49`	`+ None,`
	`50`	`+ None,`
`49`	`51`	`v1Relation)`
`50`	`52`	`}`
`51`	`53`