Enable tests in AvroSuite and in FileBasedDataSourceSuite

MaxGekk · MaxGekk · commit e66b03cd171e · 2020-07-28T10:04:21.000+03:00
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -1801,7 +1801,7 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  ignore("SPARK-32431: consistent error for nested and top-level duplicate columns") {
+  test("SPARK-32431: consistent error for nested and top-level duplicate columns") {
     Seq(
       Seq("id AS lowercase", "id + 1 AS camelCase") ->
         new StructType()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
@@ -54,6 +54,19 @@ private[spark] object SchemaUtils {
     } while (queue.nonEmpty)
   }
 
+  /**
+   * Checks if an input schema has duplicate column names. This throws an exception if the
+   * duplication exists.
+   *
+   * @param schema schema to check
+   * @param colType column type name, used in an exception message
+   * @param resolver resolver used to determine if two identifiers are equal
+   */
+  def checkSchemaColumnNameDuplication(
+      schema: StructType, colType: String, resolver: Resolver): Unit = {
+    checkSchemaColumnNameDuplication(schema, colType, isCaseSensitiveAnalysis(resolver))
+  }
+
   // Returns true if a given resolver is case-sensitive
   private def isCaseSensitiveAnalysis(resolver: Resolver): Boolean = {
     if (resolver == caseSensitiveResolution) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -421,18 +421,18 @@ case class DataSource(
 
     relation match {
       case hs: HadoopFsRelation =>
-        SchemaUtils.checkColumnNameDuplication(
-          hs.dataSchema.map(_.name),
+        SchemaUtils.checkSchemaColumnNameDuplication(
+          hs.dataSchema,
           "in the data schema",
           equality)
-        SchemaUtils.checkColumnNameDuplication(
-          hs.partitionSchema.map(_.name),
+        SchemaUtils.checkSchemaColumnNameDuplication(
+          hs.partitionSchema,
           "in the partition schema",
           equality)
         DataSourceUtils.verifySchema(hs.fileFormat, hs.dataSchema)
       case _ =>
-        SchemaUtils.checkColumnNameDuplication(
-          relation.schema.map(_.name),
+        SchemaUtils.checkSchemaColumnNameDuplication(
+          relation.schema,
           "in the data schema",
           equality)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -79,7 +79,7 @@ abstract class FileTable(
 
   override lazy val schema: StructType = {
     val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
-    SchemaUtils.checkColumnNameDuplication(dataSchema.fieldNames,
+    SchemaUtils.checkSchemaColumnNameDuplication(dataSchema,
       "in the data schema", caseSensitive)
     dataSchema.foreach { field =>
       if (!supportsDataType(field.dataType)) {
@@ -88,7 +88,7 @@ abstract class FileTable(
       }
     }
     val partitionSchema = fileIndex.partitionSchema
-    SchemaUtils.checkColumnNameDuplication(partitionSchema.fieldNames,
+    SchemaUtils.checkSchemaColumnNameDuplication(partitionSchema,
       "in the partition schema", caseSensitive)
     val partitionNameSet: Set[String] =
       partitionSchema.fields.map(PartitioningUtils.getColName(_, caseSensitive)).toSet
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -903,7 +903,7 @@ class FileBasedDataSourceSuite extends QueryTest
     }
   }
 
-  ignore("SPARK-32431: consistent error for nested and top-level duplicate columns") {
+  test("SPARK-32431: consistent error for nested and top-level duplicate columns") {
     Seq(
       Seq("id AS lowercase", "id + 1 AS camelCase") ->
         new StructType()

Original file line number	Diff line number	Diff line change
`@@ -1801,7 +1801,7 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession {`
`1801`	`1801`	`}`
`1802`	`1802`	`}`
`1803`	`1803`
`1804`		`- ignore("SPARK-32431: consistent error for nested and top-level duplicate columns") {`
	`1804`	`+ test("SPARK-32431: consistent error for nested and top-level duplicate columns") {`
`1805`	`1805`	`Seq(`
`1806`	`1806`	`Seq("id AS lowercase", "id + 1 AS camelCase") ->`
`1807`	`1807`	`new StructType()`
Original file line number	Diff line number	Diff line change
`@@ -903,7 +903,7 @@ class FileBasedDataSourceSuite extends QueryTest`
`903`	`903`	`}`
`904`	`904`	`}`
`905`	`905`
`906`		`- ignore("SPARK-32431: consistent error for nested and top-level duplicate columns") {`
	`906`	`+ test("SPARK-32431: consistent error for nested and top-level duplicate columns") {`
`907`	`907`	`Seq(`
`908`	`908`	`Seq("id AS lowercase", "id + 1 AS camelCase") ->`
`909`	`909`	`new StructType()`