Skip to content

Commit 2ac94f7

Browse files
committed
Update tests.
1 parent 3db3d20 commit 2ac94f7

File tree

2 files changed

+86
-29
lines changed

2 files changed

+86
-29
lines changed

sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.spark.sql.hive
1919

2020
import java.io.File
21+
2122
import org.scalatest.BeforeAndAfterEach
2223

2324
import org.apache.commons.io.FileUtils
@@ -30,6 +31,8 @@ import org.apache.spark.util.Utils
3031
import org.apache.spark.sql.types._
3132
import org.apache.spark.sql.hive.test.TestHive._
3233
import org.apache.spark.sql.hive.test.TestHive.implicits._
34+
import org.apache.spark.sql.parquet.ParquetRelation2
35+
import org.apache.spark.sql.sources.LogicalRelation
3336

3437
/**
3538
* Tests for persisting tables created though the data sources API into the metastore.
@@ -553,4 +556,39 @@ class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach {
553556
sql("DROP TABLE savedJsonTable")
554557
conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, originalDefaultSource)
555558
}
559+
560+
if (HiveShim.version == "0.13.1") {
561+
test("scan a parquet table created through a CTAS statement") {
562+
val originalConvertMetastore = getConf("spark.sql.hive.convertMetastoreParquet", "true")
563+
val originalUseDataSource = getConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, "true")
564+
setConf("spark.sql.hive.convertMetastoreParquet", "true")
565+
setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, "true")
566+
567+
val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
568+
jsonRDD(rdd).registerTempTable("jt")
569+
sql(
570+
"""
571+
|create table test_parquet_ctas STORED AS parquET
572+
|AS select tmp.a from jt tmp where tmp.a < 5
573+
""".stripMargin)
574+
575+
checkAnswer(
576+
sql(s"SELECT a FROM test_parquet_ctas WHERE a > 2 "),
577+
Row(3) :: Row(4) :: Nil
578+
)
579+
580+
table("test_parquet_ctas").queryExecution.analyzed match {
581+
case LogicalRelation(p: ParquetRelation2) => // OK
582+
case _ =>
583+
fail(
584+
s"test_parquet_ctas should be converted to ${classOf[ParquetRelation2].getCanonicalName}")
585+
}
586+
587+
// Clenup and reset confs.
588+
sql("DROP TABLE IF EXISTS jt")
589+
sql("DROP TABLE IF EXISTS test_parquet_ctas")
590+
setConf("spark.sql.hive.convertMetastoreParquet", originalConvertMetastore)
591+
setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, originalUseDataSource)
592+
}
593+
}
556594
}

sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala

Lines changed: 48 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@ package org.apache.spark.sql.parquet
2020

2121
import java.io.File
2222

23-
import org.apache.spark.sql.catalyst.expressions.Row
2423
import org.scalatest.BeforeAndAfterAll
2524

2625
import org.apache.spark.sql.{SQLConf, QueryTest}
26+
import org.apache.spark.sql.catalyst.expressions.Row
2727
import org.apache.spark.sql.execution.PhysicalRDD
2828
import org.apache.spark.sql.hive.execution.HiveTableScan
2929
import org.apache.spark.sql.hive.test.TestHive._
3030
import org.apache.spark.sql.hive.test.TestHive.implicits._
31-
31+
import org.apache.spark.sql.sources.LogicalRelation
3232

3333
// The data where the partitioning key exists only in the directory structure.
3434
case class ParquetData(intField: Int, stringField: String)
@@ -122,52 +122,71 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
122122
override def beforeAll(): Unit = {
123123
super.beforeAll()
124124

125-
sql(s"""
126-
create table test_parquet
127-
(
128-
intField INT,
129-
stringField STRING
130-
)
131-
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
132-
STORED AS
133-
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
134-
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
135-
""")
136-
137-
val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
138-
jsonRDD(rdd).registerTempTable("jt")
139-
sql("""
140-
create table test_parquet_jt ROW FORMAT
141-
| SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
142-
| STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
143-
| OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
144-
| AS select * from jt""".stripMargin)
125+
sql(
126+
"""
127+
|create table test_parquet
128+
|(
129+
| intField INT,
130+
| stringField STRING
131+
|)
132+
|ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
133+
|STORED AS
134+
| INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
135+
| OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
136+
""".stripMargin)
145137

146138
conf.setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, "true")
147139
}
148140

149141
override def afterAll(): Unit = {
150142
super.afterAll()
151-
sql("DROP TABLE test_parquet")
152-
sql("DROP TABLE jt")
153-
sql("DROP TABLE test_parquet_jt")
143+
sql("DROP TABLE IF EXISTS test_parquet")
154144

155145
setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, originalConf.toString)
156146
}
157147

158-
test("scan from an empty parquet table") {
148+
test("scan an empty parquet table") {
159149
checkAnswer(sql("SELECT count(*) FROM test_parquet"), Row(0))
160150
}
161151

162-
test("scan from an empty parquet table with upper case") {
152+
test("scan an empty parquet table with upper case") {
163153
checkAnswer(sql("SELECT count(INTFIELD) FROM TEST_parquet"), Row(0))
164154
}
165155

166-
test("scan from an non empty parquet table #1") {
156+
test("scan a parquet table created through a CTAS statement") {
157+
val originalConvertMetastore = getConf("spark.sql.hive.convertMetastoreParquet", "true")
158+
val originalUseDataSource = getConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, "true")
159+
setConf("spark.sql.hive.convertMetastoreParquet", "true")
160+
setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, "true")
161+
162+
val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""))
163+
jsonRDD(rdd).registerTempTable("jt")
164+
sql(
165+
"""
166+
|create table test_parquet_ctas ROW FORMAT
167+
|SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
168+
|STORED AS
169+
| INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
170+
| OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
171+
|AS select * from jt
172+
""".stripMargin)
173+
167174
checkAnswer(
168-
sql(s"SELECT a, b FROM test_parquet_jt WHERE a = '1'"),
175+
sql(s"SELECT a, b FROM test_parquet_ctas WHERE a = 1"),
169176
Seq(Row(1, "str1"))
170177
)
178+
179+
table("test_parquet_ctas").queryExecution.analyzed match {
180+
case LogicalRelation(p: ParquetRelation2) => // OK
181+
case _ =>
182+
fail(
183+
s"test_parquet_ctas should be converted to ${classOf[ParquetRelation2].getCanonicalName}")
184+
}
185+
186+
sql("DROP TABLE IF EXISTS jt")
187+
sql("DROP TABLE IF EXISTS test_parquet_ctas")
188+
setConf("spark.sql.hive.convertMetastoreParquet", originalConvertMetastore)
189+
setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, originalUseDataSource)
171190
}
172191
}
173192

0 commit comments

Comments
 (0)