Skip to content

Commit e66b03c

Browse files
committed
Enable tests in AvroSuite and in FileBasedDataSourceSuite
1 parent 8906732 commit e66b03c

File tree

5 files changed

+23
-10
lines changed

5 files changed

+23
-10
lines changed

external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1801,7 +1801,7 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession {
18011801
}
18021802
}
18031803

1804-
ignore("SPARK-32431: consistent error for nested and top-level duplicate columns") {
1804+
test("SPARK-32431: consistent error for nested and top-level duplicate columns") {
18051805
Seq(
18061806
Seq("id AS lowercase", "id + 1 AS camelCase") ->
18071807
new StructType()

sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,19 @@ private[spark] object SchemaUtils {
5454
} while (queue.nonEmpty)
5555
}
5656

57+
/**
58+
* Checks if an input schema has duplicate column names. This throws an exception if the
59+
* duplication exists.
60+
*
61+
* @param schema schema to check
62+
* @param colType column type name, used in an exception message
63+
* @param resolver resolver used to determine if two identifiers are equal
64+
*/
65+
def checkSchemaColumnNameDuplication(
66+
schema: StructType, colType: String, resolver: Resolver): Unit = {
67+
checkSchemaColumnNameDuplication(schema, colType, isCaseSensitiveAnalysis(resolver))
68+
}
69+
5770
// Returns true if a given resolver is case-sensitive
5871
private def isCaseSensitiveAnalysis(resolver: Resolver): Boolean = {
5972
if (resolver == caseSensitiveResolution) {

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -421,18 +421,18 @@ case class DataSource(
421421

422422
relation match {
423423
case hs: HadoopFsRelation =>
424-
SchemaUtils.checkColumnNameDuplication(
425-
hs.dataSchema.map(_.name),
424+
SchemaUtils.checkSchemaColumnNameDuplication(
425+
hs.dataSchema,
426426
"in the data schema",
427427
equality)
428-
SchemaUtils.checkColumnNameDuplication(
429-
hs.partitionSchema.map(_.name),
428+
SchemaUtils.checkSchemaColumnNameDuplication(
429+
hs.partitionSchema,
430430
"in the partition schema",
431431
equality)
432432
DataSourceUtils.verifySchema(hs.fileFormat, hs.dataSchema)
433433
case _ =>
434-
SchemaUtils.checkColumnNameDuplication(
435-
relation.schema.map(_.name),
434+
SchemaUtils.checkSchemaColumnNameDuplication(
435+
relation.schema,
436436
"in the data schema",
437437
equality)
438438
}

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ abstract class FileTable(
7979

8080
override lazy val schema: StructType = {
8181
val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
82-
SchemaUtils.checkColumnNameDuplication(dataSchema.fieldNames,
82+
SchemaUtils.checkSchemaColumnNameDuplication(dataSchema,
8383
"in the data schema", caseSensitive)
8484
dataSchema.foreach { field =>
8585
if (!supportsDataType(field.dataType)) {
@@ -88,7 +88,7 @@ abstract class FileTable(
8888
}
8989
}
9090
val partitionSchema = fileIndex.partitionSchema
91-
SchemaUtils.checkColumnNameDuplication(partitionSchema.fieldNames,
91+
SchemaUtils.checkSchemaColumnNameDuplication(partitionSchema,
9292
"in the partition schema", caseSensitive)
9393
val partitionNameSet: Set[String] =
9494
partitionSchema.fields.map(PartitioningUtils.getColName(_, caseSensitive)).toSet

sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -903,7 +903,7 @@ class FileBasedDataSourceSuite extends QueryTest
903903
}
904904
}
905905

906-
ignore("SPARK-32431: consistent error for nested and top-level duplicate columns") {
906+
test("SPARK-32431: consistent error for nested and top-level duplicate columns") {
907907
Seq(
908908
Seq("id AS lowercase", "id + 1 AS camelCase") ->
909909
new StructType()

0 commit comments

Comments
 (0)