Add v2 SQL test suite.

rdblue · rdblue · commit a22c33578a06 · 2019-05-09T16:02:16.000-07:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1760,6 +1760,11 @@ object SQLConf {
     .internal()
     .intConf
     .createWithDefault(Int.MaxValue)
+
+  val DEFAULT_V2_CATALOG = buildConf("spark.sql.default.catalog")
+      .doc("Name of the default v2 catalog, used when an catalog is not identified in queries")
+      .stringConf
+      .createOptional
 }
 
 /**
@@ -2211,6 +2216,8 @@ class SQLConf extends Serializable with Logging {
   def setCommandRejectsSparkCoreConfs: Boolean =
     getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS)
 
+  def defaultV2Catalog: Option[String] = getConf(DEFAULT_V2_CATALOG)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TestTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TestTableCatalog.scala
@@ -91,7 +91,7 @@ class TestTableCatalog extends TableCatalog {
   override def dropTable(ident: Identifier): Boolean = Option(tables.remove(ident)).isDefined
 }
 
-private object TestTableCatalog {
+object TestTableCatalog {
   /**
    * Apply properties changes to a map and return the result.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
@@ -41,6 +41,8 @@ case class DataSourceResolution(
 
   import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
 
+  private def defaultCatalog: Option[CatalogPlugin] = conf.defaultV2Catalog.map(findCatalog)
+
   override def lookupCatalog: Option[String => CatalogPlugin] = Some(findCatalog)
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
@@ -67,7 +69,9 @@ case class DataSourceResolution(
     case create: CreateTableAsSelectStatement =>
       // the provider was not a v1 source, convert to a v2 plan
       val CatalogObjectIdentifier(maybeCatalog, identifier) = create.tableName
-      val catalog = maybeCatalog.getOrElse(findCatalog.apply("default")).asTableCatalog
+      val catalog = maybeCatalog.orElse(defaultCatalog)
+          .getOrElse(throw new AnalysisException("Default catalog is not set"))
+          .asTableCatalog
       convertCTAS(catalog, identifier, create)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -36,29 +36,23 @@ class PlanResolutionSuite extends AnalysisTest {
 
   private val orc2 = classOf[OrcDataSourceV2].getName
 
-  private val defaultCatalog: TableCatalog = {
-    val newCatalog = new TestTableCatalog
-    newCatalog.initialize("default", CaseInsensitiveStringMap.empty())
-    newCatalog
-  }
-
   private val testCat: TableCatalog = {
     val newCatalog = new TestTableCatalog
     newCatalog.initialize("testcat", CaseInsensitiveStringMap.empty())
     newCatalog
   }
 
   private val lookupCatalog: String => CatalogPlugin = {
-    case "default" =>
-      defaultCatalog
     case "testcat" =>
       testCat
     case name =>
       throw new CatalogNotFoundException(s"No such catalog: $name")
   }
 
   def parseAndResolve(query: String): LogicalPlan = {
-    DataSourceResolution(conf, lookupCatalog).apply(parsePlan(query))
+    val newConf = conf.copy()
+    newConf.setConfString("spark.sql.default.catalog", "testcat")
+    DataSourceResolution(newConf, lookupCatalog).apply(parsePlan(query))
   }
 
   private def extractTableDesc(sql: String): (CatalogTable, Boolean) = {
@@ -355,7 +349,7 @@ class PlanResolutionSuite extends AnalysisTest {
 
     parseAndResolve(sql) match {
       case ctas: CreateTableAsSelect =>
-        assert(ctas.catalog.name == "default")
+        assert(ctas.catalog.name == "testcat")
         assert(ctas.tableName == Identifier.of(Array("mydb"), "page_view"))
         assert(ctas.properties == expectedProperties)
         assert(ctas.writeOptions.isEmpty)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/TestInMemoryTableCatalog.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/TestInMemoryTableCatalog.scala
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2
+
+import java.util
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalog.v2.{CatalogV2Implicits, Identifier, TableCatalog, TableChange, TestTableCatalog}
+import org.apache.spark.sql.catalog.v2.expressions.Transform
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, PartitionReader, PartitionReaderFactory, Scan, ScanBuilder}
+import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriter, DataWriterFactory, SupportsTruncate, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+// this is currently in the spark-sql module because the read and write API is not in catalyst
+// TODO(rdblue): when the v2 source API is in catalyst, merge with TestTableCatalog/InMemoryTable
+class TestInMemoryTableCatalog extends TableCatalog {
+  import CatalogV2Implicits._
+
+  private val tables: util.Map[Identifier, InMemoryTable] =
+    new ConcurrentHashMap[Identifier, InMemoryTable]()
+  private var _name: Option[String] = None
+
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
+    _name = Some(name)
+  }
+
+  override def name: String = _name.get
+
+  override def listTables(namespace: Array[String]): Array[Identifier] = {
+    tables.keySet.asScala.filter(_.namespace.sameElements(namespace)).toArray
+  }
+
+  override def loadTable(ident: Identifier): Table = {
+    Option(tables.get(ident)) match {
+      case Some(table) =>
+        table
+      case _ =>
+        throw new NoSuchTableException(ident)
+    }
+  }
+
+  override def createTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+
+    if (tables.containsKey(ident)) {
+      throw new TableAlreadyExistsException(ident)
+    }
+
+    if (partitions.nonEmpty) {
+      throw new UnsupportedOperationException(
+        s"Catalog $name: Partitioned tables are not supported")
+    }
+
+    val table = new InMemoryTable(s"$name.${ident.quoted}", schema, properties)
+
+    tables.put(ident, table)
+
+    table
+  }
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = {
+    Option(tables.get(ident)) match {
+      case Some(table) =>
+        val properties = TestTableCatalog.applyPropertiesChanges(table.properties, changes)
+        val schema = TestTableCatalog.applySchemaChanges(table.schema, changes)
+        val newTable = new InMemoryTable(table.name, schema, properties, table.data)
+
+        tables.put(ident, newTable)
+
+        newTable
+      case _ =>
+        throw new NoSuchTableException(ident)
+    }
+  }
+
+  override def dropTable(ident: Identifier): Boolean = Option(tables.remove(ident)).isDefined
+
+  def clearTables(): Unit = {
+    tables.clear()
+  }
+}
+
+/**
+ * A simple in-memory table. Rows are stored as a buffered group produced by each output task.
+ */
+private class InMemoryTable(
+    val name: String,
+    val schema: StructType,
+    override val properties: util.Map[String, String])
+  extends Table with SupportsRead with SupportsWrite {
+
+  def this(
+      name: String,
+      schema: StructType,
+      properties: util.Map[String, String],
+      data: Array[BufferedRows]) = {
+    this(name, schema, properties)
+    replaceData(data)
+  }
+
+  @volatile var data: Array[BufferedRows] = Array.empty
+
+  def replaceData(buffers: Array[BufferedRows]): Unit = synchronized {
+    data = buffers
+  }
+
+  override def capabilities: util.Set[TableCapability] = Set(
+    TableCapability.BATCH_READ, TableCapability.BATCH_WRITE, TableCapability.TRUNCATE).asJava
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+    () => new InMemoryBatchScan(data.map(_.asInstanceOf[InputPartition]))
+  }
+
+  class InMemoryBatchScan(data: Array[InputPartition]) extends Scan with Batch {
+    override def readSchema(): StructType = schema
+
+    override def toBatch: Batch = this
+
+    override def planInputPartitions(): Array[InputPartition] = data
+
+    override def createReaderFactory(): PartitionReaderFactory = BufferedRowsReaderFactory
+  }
+
+  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
+    new WriteBuilder with SupportsTruncate {
+      private var shouldTruncate: Boolean = false
+
+      override def truncate(): WriteBuilder = {
+        shouldTruncate = true
+        this
+      }
+
+      override def buildForBatch(): BatchWrite = {
+        if (shouldTruncate) TruncateAndAppend else Append
+      }
+    }
+  }
+
+  private object TruncateAndAppend extends BatchWrite {
+    override def createBatchWriterFactory(): DataWriterFactory = {
+      BufferedRowsWriterFactory
+    }
+
+    override def commit(messages: Array[WriterCommitMessage]): Unit = {
+      replaceData(messages.map(_.asInstanceOf[BufferedRows]))
+    }
+
+    override def abort(messages: Array[WriterCommitMessage]): Unit = {
+    }
+  }
+
+  private object Append extends BatchWrite {
+    override def createBatchWriterFactory(): DataWriterFactory = {
+      BufferedRowsWriterFactory
+    }
+
+    override def commit(messages: Array[WriterCommitMessage]): Unit = {
+      replaceData(data ++ messages.map(_.asInstanceOf[BufferedRows]))
+    }
+
+    override def abort(messages: Array[WriterCommitMessage]): Unit = {
+    }
+  }
+}
+
+private class BufferedRows extends WriterCommitMessage with InputPartition with Serializable {
+  val rows = new mutable.ArrayBuffer[InternalRow]()
+}
+
+private object BufferedRowsReaderFactory extends PartitionReaderFactory {
+  override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
+    new BufferedRowsReader(partition.asInstanceOf[BufferedRows])
+  }
+}
+
+private class BufferedRowsReader(partition: BufferedRows) extends PartitionReader[InternalRow] {
+  private var index: Int = -1
+
+  override def next(): Boolean = {
+    index += 1
+    index < partition.rows.length
+  }
+
+  override def get(): InternalRow = partition.rows(index)
+
+  override def close(): Unit = {}
+}
+
+private object BufferedRowsWriterFactory extends DataWriterFactory {
+  override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] = {
+    new BufferWriter
+  }
+}
+
+private class BufferWriter extends DataWriter[InternalRow] {
+  private val buffer = new BufferedRows
+
+  override def write(row: InternalRow): Unit = buffer.rows.append(row.copy())
+
+  override def commit(): WriterCommitMessage = buffer
+
+  override def abort(): Unit = {}
+}

Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ class TestTableCatalog extends TableCatalog {`
`91`	`91`	`override def dropTable(ident: Identifier): Boolean = Option(tables.remove(ident)).isDefined`
`92`	`92`	`}`
`93`	`93`
`94`		`-private object TestTableCatalog {`
	`94`	`+object TestTableCatalog {`
`95`	`95`	`/**`
`96`	`96`	`* Apply properties changes to a map and return the result.`
`97`	`97`	`*/`