File tree Expand file tree Collapse file tree 5 files changed +23
-10
lines changed
external/avro/src/test/scala/org/apache/spark/sql/avro
catalyst/src/main/scala/org/apache/spark/sql/util
main/scala/org/apache/spark/sql/execution/datasources
test/scala/org/apache/spark/sql Expand file tree Collapse file tree 5 files changed +23
-10
lines changed Original file line number Diff line number Diff line change @@ -1801,7 +1801,7 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession {
18011801 }
18021802 }
18031803
1804- ignore (" SPARK-32431: consistent error for nested and top-level duplicate columns" ) {
1804+ test (" SPARK-32431: consistent error for nested and top-level duplicate columns" ) {
18051805 Seq (
18061806 Seq (" id AS lowercase" , " id + 1 AS camelCase" ) ->
18071807 new StructType ()
Original file line number Diff line number Diff line change @@ -54,6 +54,19 @@ private[spark] object SchemaUtils {
5454 } while (queue.nonEmpty)
5555 }
5656
57+ /**
58+ * Checks if an input schema has duplicate column names. This throws an exception if the
59+ * duplication exists.
60+ *
61+ * @param schema schema to check
62+ * @param colType column type name, used in an exception message
63+ * @param resolver resolver used to determine if two identifiers are equal
64+ */
65+ def checkSchemaColumnNameDuplication (
66+ schema : StructType , colType : String , resolver : Resolver ): Unit = {
67+ checkSchemaColumnNameDuplication(schema, colType, isCaseSensitiveAnalysis(resolver))
68+ }
69+
5770 // Returns true if a given resolver is case-sensitive
5871 private def isCaseSensitiveAnalysis (resolver : Resolver ): Boolean = {
5972 if (resolver == caseSensitiveResolution) {
Original file line number Diff line number Diff line change @@ -421,18 +421,18 @@ case class DataSource(
421421
422422 relation match {
423423 case hs : HadoopFsRelation =>
424- SchemaUtils .checkColumnNameDuplication (
425- hs.dataSchema.map(_.name) ,
424+ SchemaUtils .checkSchemaColumnNameDuplication (
425+ hs.dataSchema,
426426 " in the data schema" ,
427427 equality)
428- SchemaUtils .checkColumnNameDuplication (
429- hs.partitionSchema.map(_.name) ,
428+ SchemaUtils .checkSchemaColumnNameDuplication (
429+ hs.partitionSchema,
430430 " in the partition schema" ,
431431 equality)
432432 DataSourceUtils .verifySchema(hs.fileFormat, hs.dataSchema)
433433 case _ =>
434- SchemaUtils .checkColumnNameDuplication (
435- relation.schema.map(_.name) ,
434+ SchemaUtils .checkSchemaColumnNameDuplication (
435+ relation.schema,
436436 " in the data schema" ,
437437 equality)
438438 }
Original file line number Diff line number Diff line change @@ -79,7 +79,7 @@ abstract class FileTable(
7979
8080 override lazy val schema : StructType = {
8181 val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
82- SchemaUtils .checkColumnNameDuplication (dataSchema.fieldNames ,
82+ SchemaUtils .checkSchemaColumnNameDuplication (dataSchema,
8383 " in the data schema" , caseSensitive)
8484 dataSchema.foreach { field =>
8585 if (! supportsDataType(field.dataType)) {
@@ -88,7 +88,7 @@ abstract class FileTable(
8888 }
8989 }
9090 val partitionSchema = fileIndex.partitionSchema
91- SchemaUtils .checkColumnNameDuplication (partitionSchema.fieldNames ,
91+ SchemaUtils .checkSchemaColumnNameDuplication (partitionSchema,
9292 " in the partition schema" , caseSensitive)
9393 val partitionNameSet : Set [String ] =
9494 partitionSchema.fields.map(PartitioningUtils .getColName(_, caseSensitive)).toSet
Original file line number Diff line number Diff line change @@ -903,7 +903,7 @@ class FileBasedDataSourceSuite extends QueryTest
903903 }
904904 }
905905
906- ignore (" SPARK-32431: consistent error for nested and top-level duplicate columns" ) {
906+ test (" SPARK-32431: consistent error for nested and top-level duplicate columns" ) {
907907 Seq (
908908 Seq (" id AS lowercase" , " id + 1 AS camelCase" ) ->
909909 new StructType ()
You can’t perform that action at this time.
0 commit comments