Put common code to NestedDataSourceSuiteBase

MaxGekk · MaxGekk · commit bd03de50da61 · 2020-07-28T18:57:24.000+03:00
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -50,9 +50,10 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.v2.avro.AvroScan
 import org.apache.spark.util.Utils
 
-abstract class AvroSuite extends QueryTest with SharedSparkSession {
+abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDataSourceSuiteBase {
   import testImplicits._
 
+  override val nestedDataSources = Seq("avro")
   val episodesAvro = testFile("episodes.avro")
   val testAvro = testFile("test.avro")
 
@@ -1800,44 +1801,6 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession {
       assert(version === SPARK_VERSION_SHORT)
     }
   }
-
-  test("SPARK-32431: consistent error for nested and top-level duplicate columns") {
-    Seq(
-      Seq("id AS lowercase", "id + 1 AS camelCase") ->
-        new StructType()
-          .add("LowerCase", LongType)
-          .add("camelcase", LongType)
-          .add("CamelCase", LongType),
-      Seq("NAMED_STRUCT('lowercase', id, 'camelCase', id + 1) AS StructColumn") ->
-        new StructType().add("StructColumn",
-          new StructType()
-            .add("LowerCase", LongType)
-            .add("camelcase", LongType)
-            .add("CamelCase", LongType))
-    ).foreach { case (selectExpr: Seq[String], caseInsensitiveSchema: StructType) =>
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-        withTempPath { dir =>
-          val path = dir.getCanonicalPath
-          spark
-            .range(1L)
-            .selectExpr(selectExpr: _*)
-            .write.mode("overwrite")
-            .format("avro")
-            .save(path)
-          val e = intercept[AnalysisException] {
-            spark
-              .read
-              .schema(caseInsensitiveSchema)
-              .format("avro")
-              .load(path)
-              .show
-          }
-          assert(e.getMessage.contains(
-            "Found duplicate column(s) in the data schema: `camelcase`"))
-        }
-      }
-    }
-  }
 }
 
 class AvroV1Suite extends AvroSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -43,10 +43,56 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
+trait NestedDataSourceSuiteBase extends QueryTest with SharedSparkSession{
+  protected val nestedDataSources: Seq[String]
+
+  test("SPARK-32431: consistent error for nested and top-level duplicate columns") {
+    Seq(
+      Seq("id AS lowercase", "id + 1 AS camelCase") ->
+        new StructType()
+          .add("LowerCase", LongType)
+          .add("camelcase", LongType)
+          .add("CamelCase", LongType),
+      Seq("NAMED_STRUCT('lowercase', id, 'camelCase', id + 1) AS StructColumn") ->
+        new StructType().add("StructColumn",
+          new StructType()
+            .add("LowerCase", LongType)
+            .add("camelcase", LongType)
+            .add("CamelCase", LongType))
+    ).foreach { case (selectExpr: Seq[String], caseInsensitiveSchema: StructType) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        nestedDataSources.map { format =>
+          withClue(s"format = $format select = ${selectExpr.mkString(",")}") {
+            withTempPath { dir =>
+              val path = dir.getCanonicalPath
+              spark
+                .range(1L)
+                .selectExpr(selectExpr: _*)
+                .write.mode("overwrite")
+                .format(format)
+                .save(path)
+              val e = intercept[AnalysisException] {
+                spark
+                  .read
+                  .schema(caseInsensitiveSchema)
+                  .format(format)
+                  .load(path)
+                  .show
+              }
+              assert(e.getMessage.contains(
+                "Found duplicate column(s) in the data schema: `camelcase`"))
+            }
+          }
+        }
+      }
+    }
+  }
+}
 
 class FileBasedDataSourceSuite extends QueryTest
   with SharedSparkSession
-  with AdaptiveSparkPlanHelper {
+  with AdaptiveSparkPlanHelper
+  with NestedDataSourceSuiteBase {
   import testImplicits._
 
   override def beforeAll(): Unit = {
@@ -62,6 +108,7 @@ class FileBasedDataSourceSuite extends QueryTest
     }
   }
 
+  override val nestedDataSources = Seq("orc", "parquet", "json")
   private val allFileBasedDataSources = Seq("orc", "parquet", "csv", "json", "text")
   private val nameWithSpecialChars = "sp&cial%c hars"
 
@@ -902,48 +949,6 @@ class FileBasedDataSourceSuite extends QueryTest
       }
     }
   }
-
-  test("SPARK-32431: consistent error for nested and top-level duplicate columns") {
-    Seq(
-      Seq("id AS lowercase", "id + 1 AS camelCase") ->
-        new StructType()
-          .add("LowerCase", LongType)
-          .add("camelcase", LongType)
-          .add("CamelCase", LongType),
-      Seq("NAMED_STRUCT('lowercase', id, 'camelCase', id + 1) AS StructColumn") ->
-        new StructType().add("StructColumn",
-          new StructType()
-            .add("LowerCase", LongType)
-            .add("camelcase", LongType)
-            .add("CamelCase", LongType))
-    ).foreach { case (selectExpr: Seq[String], caseInsensitiveSchema: StructType) =>
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-        Seq("parquet", "orc", "json").map { format =>
-          withClue(s"format = $format select = ${selectExpr.mkString(",")}") {
-            withTempPath { dir =>
-              val path = dir.getCanonicalPath
-              spark
-                .range(1L)
-                .selectExpr(selectExpr: _*)
-                .write.mode("overwrite")
-                .format(format)
-                .save(path)
-              val e = intercept[AnalysisException] {
-                spark
-                  .read
-                  .schema(caseInsensitiveSchema)
-                  .format(format)
-                  .load(path)
-                  .show
-              }
-              assert(e.getMessage.contains(
-                "Found duplicate column(s) in the data schema: `camelcase`"))
-            }
-          }
-        }
-      }
-    }
-  }
 }
 
 object TestingUDT {