Skip to content

Commit bd03de5

Browse files
committed
Put common code to NestedDataSourceSuiteBase
1 parent e66b03c commit bd03de5

File tree

2 files changed

+50
-82
lines changed

2 files changed

+50
-82
lines changed

external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala

Lines changed: 2 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,10 @@ import org.apache.spark.sql.types._
5050
import org.apache.spark.sql.v2.avro.AvroScan
5151
import org.apache.spark.util.Utils
5252

53-
abstract class AvroSuite extends QueryTest with SharedSparkSession {
53+
abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDataSourceSuiteBase {
5454
import testImplicits._
5555

56+
override val nestedDataSources = Seq("avro")
5657
val episodesAvro = testFile("episodes.avro")
5758
val testAvro = testFile("test.avro")
5859

@@ -1800,44 +1801,6 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession {
18001801
assert(version === SPARK_VERSION_SHORT)
18011802
}
18021803
}
1803-
1804-
test("SPARK-32431: consistent error for nested and top-level duplicate columns") {
1805-
Seq(
1806-
Seq("id AS lowercase", "id + 1 AS camelCase") ->
1807-
new StructType()
1808-
.add("LowerCase", LongType)
1809-
.add("camelcase", LongType)
1810-
.add("CamelCase", LongType),
1811-
Seq("NAMED_STRUCT('lowercase', id, 'camelCase', id + 1) AS StructColumn") ->
1812-
new StructType().add("StructColumn",
1813-
new StructType()
1814-
.add("LowerCase", LongType)
1815-
.add("camelcase", LongType)
1816-
.add("CamelCase", LongType))
1817-
).foreach { case (selectExpr: Seq[String], caseInsensitiveSchema: StructType) =>
1818-
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
1819-
withTempPath { dir =>
1820-
val path = dir.getCanonicalPath
1821-
spark
1822-
.range(1L)
1823-
.selectExpr(selectExpr: _*)
1824-
.write.mode("overwrite")
1825-
.format("avro")
1826-
.save(path)
1827-
val e = intercept[AnalysisException] {
1828-
spark
1829-
.read
1830-
.schema(caseInsensitiveSchema)
1831-
.format("avro")
1832-
.load(path)
1833-
.show
1834-
}
1835-
assert(e.getMessage.contains(
1836-
"Found duplicate column(s) in the data schema: `camelcase`"))
1837-
}
1838-
}
1839-
}
1840-
}
18411804
}
18421805

18431806
class AvroV1Suite extends AvroSuite {

sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala

Lines changed: 48 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,56 @@ import org.apache.spark.sql.internal.SQLConf
4343
import org.apache.spark.sql.test.SharedSparkSession
4444
import org.apache.spark.sql.types._
4545

46+
trait NestedDataSourceSuiteBase extends QueryTest with SharedSparkSession{
47+
protected val nestedDataSources: Seq[String]
48+
49+
test("SPARK-32431: consistent error for nested and top-level duplicate columns") {
50+
Seq(
51+
Seq("id AS lowercase", "id + 1 AS camelCase") ->
52+
new StructType()
53+
.add("LowerCase", LongType)
54+
.add("camelcase", LongType)
55+
.add("CamelCase", LongType),
56+
Seq("NAMED_STRUCT('lowercase', id, 'camelCase', id + 1) AS StructColumn") ->
57+
new StructType().add("StructColumn",
58+
new StructType()
59+
.add("LowerCase", LongType)
60+
.add("camelcase", LongType)
61+
.add("CamelCase", LongType))
62+
).foreach { case (selectExpr: Seq[String], caseInsensitiveSchema: StructType) =>
63+
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
64+
nestedDataSources.map { format =>
65+
withClue(s"format = $format select = ${selectExpr.mkString(",")}") {
66+
withTempPath { dir =>
67+
val path = dir.getCanonicalPath
68+
spark
69+
.range(1L)
70+
.selectExpr(selectExpr: _*)
71+
.write.mode("overwrite")
72+
.format(format)
73+
.save(path)
74+
val e = intercept[AnalysisException] {
75+
spark
76+
.read
77+
.schema(caseInsensitiveSchema)
78+
.format(format)
79+
.load(path)
80+
.show
81+
}
82+
assert(e.getMessage.contains(
83+
"Found duplicate column(s) in the data schema: `camelcase`"))
84+
}
85+
}
86+
}
87+
}
88+
}
89+
}
90+
}
4691

4792
class FileBasedDataSourceSuite extends QueryTest
4893
with SharedSparkSession
49-
with AdaptiveSparkPlanHelper {
94+
with AdaptiveSparkPlanHelper
95+
with NestedDataSourceSuiteBase {
5096
import testImplicits._
5197

5298
override def beforeAll(): Unit = {
@@ -62,6 +108,7 @@ class FileBasedDataSourceSuite extends QueryTest
62108
}
63109
}
64110

111+
override val nestedDataSources = Seq("orc", "parquet", "json")
65112
private val allFileBasedDataSources = Seq("orc", "parquet", "csv", "json", "text")
66113
private val nameWithSpecialChars = "sp&cial%c hars"
67114

@@ -902,48 +949,6 @@ class FileBasedDataSourceSuite extends QueryTest
902949
}
903950
}
904951
}
905-
906-
test("SPARK-32431: consistent error for nested and top-level duplicate columns") {
907-
Seq(
908-
Seq("id AS lowercase", "id + 1 AS camelCase") ->
909-
new StructType()
910-
.add("LowerCase", LongType)
911-
.add("camelcase", LongType)
912-
.add("CamelCase", LongType),
913-
Seq("NAMED_STRUCT('lowercase', id, 'camelCase', id + 1) AS StructColumn") ->
914-
new StructType().add("StructColumn",
915-
new StructType()
916-
.add("LowerCase", LongType)
917-
.add("camelcase", LongType)
918-
.add("CamelCase", LongType))
919-
).foreach { case (selectExpr: Seq[String], caseInsensitiveSchema: StructType) =>
920-
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
921-
Seq("parquet", "orc", "json").map { format =>
922-
withClue(s"format = $format select = ${selectExpr.mkString(",")}") {
923-
withTempPath { dir =>
924-
val path = dir.getCanonicalPath
925-
spark
926-
.range(1L)
927-
.selectExpr(selectExpr: _*)
928-
.write.mode("overwrite")
929-
.format(format)
930-
.save(path)
931-
val e = intercept[AnalysisException] {
932-
spark
933-
.read
934-
.schema(caseInsensitiveSchema)
935-
.format(format)
936-
.load(path)
937-
.show
938-
}
939-
assert(e.getMessage.contains(
940-
"Found duplicate column(s) in the data schema: `camelcase`"))
941-
}
942-
}
943-
}
944-
}
945-
}
946-
}
947952
}
948953

949954
object TestingUDT {

0 commit comments

Comments
 (0)