Skip to content

Commit fef8c68

Browse files
committed
address comments
1 parent d851169 commit fef8c68

File tree

2 files changed

+182
-76
lines changed

2 files changed

+182
-76
lines changed

sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala

Lines changed: 96 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -224,56 +224,110 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest {
224224
}
225225

226226
test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
227-
withTempPath { path =>
228-
withTable("tbl1", "tbl2", "tbl3") {
229-
val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).
230-
toDF("c1", "c2", "c3").repartition(1)
231-
val dataDir = s"${path.getCanonicalPath}/l3/l2/l1/"
232-
val parentDir = s"${path.getCanonicalPath}/l3/l2/"
233-
val wildcardParentDir = new File(s"${path}/l3/l2/*").toURI
234-
val wildcardL3Dir = new File(s"${path}/l3/*").toURI
235-
someDF1.write.parquet(dataDir)
236-
val parentDirStatement =
237-
s"""
238-
|CREATE EXTERNAL TABLE tbl1(
239-
| c1 int,
240-
| c2 int,
241-
| c3 string)
242-
|STORED AS parquet
243-
|LOCATION '${parentDir}'""".stripMargin
244-
sql(parentDirStatement)
245-
val wildcardStatement =
246-
s"""
247-
|CREATE EXTERNAL TABLE tbl2(
248-
| c1 int,
249-
| c2 int,
250-
| c3 string)
251-
|STORED AS parquet
252-
|LOCATION '${wildcardParentDir}'""".stripMargin
253-
sql(wildcardStatement)
254-
val wildcardL3Statement =
255-
s"""
256-
|CREATE EXTERNAL TABLE tbl3(
257-
| c1 int,
258-
| c2 int,
259-
| c3 string)
260-
|STORED AS parquet
261-
|LOCATION '${wildcardL3Dir}'""".stripMargin
262-
sql(wildcardL3Statement)
263-
264-
Seq("true", "false").foreach { parquetConversion =>
265-
withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) {
227+
Seq("true", "false").foreach { parquetConversion =>
228+
withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) {
229+
withTempPath { path =>
230+
withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
231+
val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).
232+
toDF("c1", "c2", "c3").repartition(1)
233+
val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")).
234+
toDF("c1", "c2", "c3").repartition(1)
235+
val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")).
236+
toDF("c1", "c2", "c3").repartition(1)
237+
someDF1.write.parquet(s"${path.getCanonicalPath}/l1/")
238+
someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/")
239+
someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/")
240+
241+
val topDirStatement =
242+
s"""
243+
|CREATE EXTERNAL TABLE tbl1(
244+
| c1 int,
245+
| c2 int,
246+
| c3 string)
247+
|STORED AS parquet
248+
|LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin
249+
sql(topDirStatement)
266250
if (parquetConversion == "true") {
267251
checkAnswer(sql("select * from tbl1"), Nil)
268-
checkAnswer(sql("select * from tbl2"),
269-
(1 to 2).map(i => Row(i, i, s"parq$i")))
270-
checkAnswer(sql("select * from tbl3"), Nil)
271252
} else {
272253
intercept[IOException](sql("select * from tbl1").show())
254+
}
255+
256+
val l1DirStatement =
257+
s"""
258+
|CREATE EXTERNAL TABLE tbl2(
259+
| c1 int,
260+
| c2 int,
261+
| c3 string)
262+
|STORED AS parquet
263+
|LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin
264+
sql(l1DirStatement)
265+
if (parquetConversion == "true") {
273266
checkAnswer(sql("select * from tbl2"),
274267
(1 to 2).map(i => Row(i, i, s"parq$i")))
268+
} else {
269+
intercept[IOException](sql("select * from tbl2").show())
270+
}
271+
272+
val l2DirStatement =
273+
s"""
274+
|CREATE EXTERNAL TABLE tbl3(
275+
| c1 int,
276+
| c2 int,
277+
| c3 string)
278+
|STORED AS parquet
279+
|LOCATION '${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin
280+
sql(l2DirStatement)
281+
if (parquetConversion == "true") {
282+
checkAnswer(sql("select * from tbl3"),
283+
(3 to 4).map(i => Row(i, i, s"parq$i")))
284+
} else {
275285
intercept[IOException](sql("select * from tbl3").show())
276286
}
287+
288+
val wildcardTopDirStatement =
289+
s"""
290+
|CREATE EXTERNAL TABLE tbl4(
291+
| c1 int,
292+
| c2 int,
293+
| c3 string)
294+
|STORED AS parquet
295+
|LOCATION '${new File(s"${path}/*").toURI}'""".stripMargin
296+
sql(wildcardTopDirStatement)
297+
if (parquetConversion == "true") {
298+
checkAnswer(sql("select * from tbl4"),
299+
(1 to 2).map(i => Row(i, i, s"parq$i")))
300+
} else {
301+
intercept[IOException](sql("select * from tbl4").show())
302+
}
303+
304+
val wildcardL1DirStatement =
305+
s"""
306+
|CREATE EXTERNAL TABLE tbl5(
307+
| c1 int,
308+
| c2 int,
309+
| c3 string)
310+
|STORED AS parquet
311+
|LOCATION '${new File(s"${path}/l1/*").toURI}'""".stripMargin
312+
sql(wildcardL1DirStatement)
313+
if (parquetConversion == "true") {
314+
checkAnswer(sql("select * from tbl5"),
315+
(1 to 4).map(i => Row(i, i, s"parq$i")))
316+
} else {
317+
intercept[IOException](sql("select * from tbl5").show())
318+
}
319+
320+
val wildcardL2DirStatement =
321+
s"""
322+
|CREATE EXTERNAL TABLE tbl6(
323+
| c1 int,
324+
| c2 int,
325+
| c3 string)
326+
|STORED AS parquet
327+
|LOCATION '${new File(s"${path}/l1/l2/*").toURI}'""".stripMargin
328+
sql(wildcardL2DirStatement)
329+
checkAnswer(sql("select * from tbl6"),
330+
(3 to 6).map(i => Row(i, i, s"parq$i")))
277331
}
278332
}
279333
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala

Lines changed: 86 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -195,12 +195,6 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
195195
Seq(true, false).foreach { convertMetastore =>
196196
withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") {
197197
withTempDir { dir =>
198-
val dataDir = new File(s"${dir.getCanonicalPath}/l3/l2/l1/").toURI
199-
val parentDir = s"${dir.getCanonicalPath}/l3/l2/"
200-
val l3Dir = s"${dir.getCanonicalPath}/l3/"
201-
val wildcardParentDir = new File(s"${dir}/l3/l2/*").toURI
202-
val wildcardL3Dir = new File(s"${dir}/l3/*").toURI
203-
204198
try {
205199
hiveClient.runSqlHive("USE default")
206200
hiveClient.runSqlHive(
@@ -212,75 +206,133 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
212206
|STORED AS orc""".stripMargin)
213207
// Hive throws an exception if I assign the location in the create table statement.
214208
hiveClient.runSqlHive(
215-
s"ALTER TABLE hive_orc SET LOCATION '$dataDir'")
209+
s"ALTER TABLE hive_orc SET LOCATION " +
210+
s"'${new File(s"${dir.getCanonicalPath}/l1/").toURI}'")
216211
hiveClient.runSqlHive(
217212
"""
218213
|INSERT INTO TABLE hive_orc
219214
|VALUES (1, 1, 'orc1'), (2, 2, 'orc2')""".stripMargin)
220215

221-
withTable("tbl1", "tbl2", "tbl3", "tbl4") {
222-
val parentDirStatement =
216+
hiveClient.runSqlHive(
217+
s"ALTER TABLE hive_orc SET LOCATION " +
218+
s"'${new File(s"${dir.getCanonicalPath}/l1/l2/").toURI}'")
219+
hiveClient.runSqlHive(
220+
"""
221+
|INSERT INTO TABLE hive_orc
222+
|VALUES (3, 3, 'orc3'), (4, 4, 'orc4')""".stripMargin)
223+
224+
hiveClient.runSqlHive(
225+
s"ALTER TABLE hive_orc SET LOCATION " +
226+
s"'${new File(s"${dir.getCanonicalPath}/l1/l2/l3/").toURI}'")
227+
hiveClient.runSqlHive(
228+
"""
229+
|INSERT INTO TABLE hive_orc
230+
|VALUES (5, 5, 'orc5'), (6, 6, 'orc6')""".stripMargin)
231+
232+
withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
233+
val topDirStatement =
223234
s"""
224235
|CREATE EXTERNAL TABLE tbl1(
225236
| c1 int,
226237
| c2 int,
227238
| c3 string)
228239
|STORED AS orc
229-
|LOCATION '${parentDir}'""".stripMargin
230-
sql(parentDirStatement)
231-
val parentDirSqlStatement = s"select * from tbl1"
240+
|LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin
241+
sql(topDirStatement)
242+
val topDirSqlStatement = s"select * from tbl1"
232243
if (convertMetastore) {
233-
checkAnswer(sql(parentDirSqlStatement), Nil)
244+
checkAnswer(sql(topDirSqlStatement), Nil)
234245
} else {
235-
checkAnswer(sql(parentDirSqlStatement),
236-
(1 to 2).map(i => Row(i, i, s"orc$i")))
246+
checkAnswer(sql(topDirSqlStatement),
247+
(1 to 6).map(i => Row(i, i, s"orc$i")))
237248
}
238249

239-
val l3DirStatement =
250+
val l1DirStatement =
240251
s"""
241252
|CREATE EXTERNAL TABLE tbl2(
242253
| c1 int,
243254
| c2 int,
244255
| c3 string)
245256
|STORED AS orc
246-
|LOCATION '${l3Dir}'""".stripMargin
247-
sql(l3DirStatement)
248-
val l3DirSqlStatement = s"select * from tbl2"
257+
|LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
258+
sql(l1DirStatement)
259+
val l1DirSqlStatement = s"select * from tbl2"
249260
if (convertMetastore) {
250-
checkAnswer(sql(l3DirSqlStatement), Nil)
251-
} else {
252-
checkAnswer(sql(l3DirSqlStatement),
261+
checkAnswer(sql(l1DirSqlStatement),
253262
(1 to 2).map(i => Row(i, i, s"orc$i")))
263+
} else {
264+
checkAnswer(sql(l1DirSqlStatement),
265+
(1 to 6).map(i => Row(i, i, s"orc$i")))
254266
}
255267

256-
val wildcardStatement =
268+
val l2DirStatement =
257269
s"""
258270
|CREATE EXTERNAL TABLE tbl3(
259271
| c1 int,
260272
| c2 int,
261273
| c3 string)
262274
|STORED AS orc
263-
|LOCATION '$wildcardParentDir'""".stripMargin
264-
sql(wildcardStatement)
265-
val wildcardSqlStatement = s"select * from tbl3"
275+
|LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
276+
sql(l2DirStatement)
277+
val l2DirSqlStatement = s"select * from tbl3"
266278
if (convertMetastore) {
267-
checkAnswer(sql(wildcardSqlStatement),
268-
(1 to 2).map(i => Row(i, i, s"orc$i")))
279+
checkAnswer(sql(l2DirSqlStatement),
280+
(3 to 4).map(i => Row(i, i, s"orc$i")))
269281
} else {
270-
checkAnswer(sql(wildcardSqlStatement), Nil)
282+
checkAnswer(sql(l2DirSqlStatement),
283+
(3 to 6).map(i => Row(i, i, s"orc$i")))
271284
}
272285

273-
val wildcardL3Statement =
286+
val wildcardTopDirStatement =
274287
s"""
275288
|CREATE EXTERNAL TABLE tbl4(
276289
| c1 int,
277290
| c2 int,
278291
| c3 string)
279292
|STORED AS orc
280-
|LOCATION '$wildcardL3Dir'""".stripMargin
281-
sql(wildcardL3Statement)
282-
val wildcardL3SqlStatement = s"select * from tbl4"
283-
checkAnswer(sql(wildcardL3SqlStatement), Nil)
293+
|LOCATION '${new File(s"${dir}/*").toURI}'""".stripMargin
294+
sql(wildcardTopDirStatement)
295+
val wildcardTopDirSqlStatement = s"select * from tbl4"
296+
if (convertMetastore) {
297+
checkAnswer(sql(wildcardTopDirSqlStatement),
298+
(1 to 2).map(i => Row(i, i, s"orc$i")))
299+
} else {
300+
checkAnswer(sql(wildcardTopDirSqlStatement), Nil)
301+
}
302+
303+
val wildcardL1DirStatement =
304+
s"""
305+
|CREATE EXTERNAL TABLE tbl5(
306+
| c1 int,
307+
| c2 int,
308+
| c3 string)
309+
|STORED AS orc
310+
|LOCATION '${new File(s"${dir}/l1/*").toURI}'""".stripMargin
311+
sql(wildcardL1DirStatement)
312+
val wildcardL1DirSqlStatement = s"select * from tbl5"
313+
if (convertMetastore) {
314+
checkAnswer(sql(wildcardL1DirSqlStatement),
315+
(1 to 4).map(i => Row(i, i, s"orc$i")))
316+
} else {
317+
checkAnswer(sql(wildcardL1DirSqlStatement), Nil)
318+
}
319+
320+
val wildcardL2Statement =
321+
s"""
322+
|CREATE EXTERNAL TABLE tbl6(
323+
| c1 int,
324+
| c2 int,
325+
| c3 string)
326+
|STORED AS orc
327+
|LOCATION '${new File(s"${dir}/l1/l2/*").toURI}'""".stripMargin
328+
sql(wildcardL2Statement)
329+
val wildcardL2SqlStatement = s"select * from tbl6"
330+
if (convertMetastore) {
331+
checkAnswer(sql(wildcardL2SqlStatement),
332+
(3 to 6).map(i => Row(i, i, s"orc$i")))
333+
} else {
334+
checkAnswer(sql(wildcardL2SqlStatement), Nil)
335+
}
284336
}
285337
} finally {
286338
hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc")

0 commit comments

Comments
 (0)