@@ -43,10 +43,56 @@ import org.apache.spark.sql.internal.SQLConf
4343import org .apache .spark .sql .test .SharedSparkSession
4444import org .apache .spark .sql .types ._
4545
46+ trait NestedDataSourceSuiteBase extends QueryTest with SharedSparkSession {
47+ protected val nestedDataSources : Seq [String ]
48+
49+ test(" SPARK-32431: consistent error for nested and top-level duplicate columns" ) {
50+ Seq (
51+ Seq (" id AS lowercase" , " id + 1 AS camelCase" ) ->
52+ new StructType ()
53+ .add(" LowerCase" , LongType )
54+ .add(" camelcase" , LongType )
55+ .add(" CamelCase" , LongType ),
56+ Seq (" NAMED_STRUCT('lowercase', id, 'camelCase', id + 1) AS StructColumn" ) ->
57+ new StructType ().add(" StructColumn" ,
58+ new StructType ()
59+ .add(" LowerCase" , LongType )
60+ .add(" camelcase" , LongType )
61+ .add(" CamelCase" , LongType ))
62+ ).foreach { case (selectExpr : Seq [String ], caseInsensitiveSchema : StructType ) =>
63+ withSQLConf(SQLConf .CASE_SENSITIVE .key -> " false" ) {
64+ nestedDataSources.map { format =>
65+ withClue(s " format = $format select = ${selectExpr.mkString(" ," )}" ) {
66+ withTempPath { dir =>
67+ val path = dir.getCanonicalPath
68+ spark
69+ .range(1L )
70+ .selectExpr(selectExpr : _* )
71+ .write.mode(" overwrite" )
72+ .format(format)
73+ .save(path)
74+ val e = intercept[AnalysisException ] {
75+ spark
76+ .read
77+ .schema(caseInsensitiveSchema)
78+ .format(format)
79+ .load(path)
80+ .show
81+ }
82+ assert(e.getMessage.contains(
83+ " Found duplicate column(s) in the data schema: `camelcase`" ))
84+ }
85+ }
86+ }
87+ }
88+ }
89+ }
90+ }
4691
4792class FileBasedDataSourceSuite extends QueryTest
4893 with SharedSparkSession
49- with AdaptiveSparkPlanHelper {
94+ with AdaptiveSparkPlanHelper
95+ with NestedDataSourceSuiteBase {
5096 import testImplicits ._
5197
5298 override def beforeAll (): Unit = {
@@ -62,6 +108,7 @@ class FileBasedDataSourceSuite extends QueryTest
62108 }
63109 }
64110
111+ override val nestedDataSources = Seq (" orc" , " parquet" , " json" )
65112 private val allFileBasedDataSources = Seq (" orc" , " parquet" , " csv" , " json" , " text" )
66113 private val nameWithSpecialChars = " sp&cial%c hars"
67114
@@ -902,48 +949,6 @@ class FileBasedDataSourceSuite extends QueryTest
902949 }
903950 }
904951 }
905-
906- test(" SPARK-32431: consistent error for nested and top-level duplicate columns" ) {
907- Seq (
908- Seq (" id AS lowercase" , " id + 1 AS camelCase" ) ->
909- new StructType ()
910- .add(" LowerCase" , LongType )
911- .add(" camelcase" , LongType )
912- .add(" CamelCase" , LongType ),
913- Seq (" NAMED_STRUCT('lowercase', id, 'camelCase', id + 1) AS StructColumn" ) ->
914- new StructType ().add(" StructColumn" ,
915- new StructType ()
916- .add(" LowerCase" , LongType )
917- .add(" camelcase" , LongType )
918- .add(" CamelCase" , LongType ))
919- ).foreach { case (selectExpr : Seq [String ], caseInsensitiveSchema : StructType ) =>
920- withSQLConf(SQLConf .CASE_SENSITIVE .key -> " false" ) {
921- Seq (" parquet" , " orc" , " json" ).map { format =>
922- withClue(s " format = $format select = ${selectExpr.mkString(" ," )}" ) {
923- withTempPath { dir =>
924- val path = dir.getCanonicalPath
925- spark
926- .range(1L )
927- .selectExpr(selectExpr : _* )
928- .write.mode(" overwrite" )
929- .format(format)
930- .save(path)
931- val e = intercept[AnalysisException ] {
932- spark
933- .read
934- .schema(caseInsensitiveSchema)
935- .format(format)
936- .load(path)
937- .show
938- }
939- assert(e.getMessage.contains(
940- " Found duplicate column(s) in the data schema: `camelcase`" ))
941- }
942- }
943- }
944- }
945- }
946- }
947952}
948953
949954object TestingUDT {
0 commit comments