Skip to content

Commit ef4ec48

Browse files
committed
Hive module.
1 parent 7e0db5e commit ef4ec48

15 files changed

+139
-68
lines changed

sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ class ErrorPositionSuite extends QueryTest with BeforeAndAfter {
136136
* @param query the query to analyze
137137
* @param token a unique token in the string that should be indicated by the exception
138138
*/
139-
def positionTest(name: String, query: String, token: String) = {
139+
def positionTest(name: String, query: String, token: String): Unit = {
140140
def parseTree =
141141
Try(quietly(HiveQl.dumpTree(HiveQl.getAst(query)))).getOrElse("<failed to parse>")
142142

sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -116,21 +116,20 @@ class HiveInspectorSuite extends FunSuite with HiveInspectors {
116116
}
117117

118118
def checkDataType(dt1: Seq[DataType], dt2: Seq[DataType]): Unit = {
119-
dt1.zip(dt2).map {
120-
case (dd1, dd2) =>
121-
assert(dd1.getClass === dd2.getClass) // DecimalType doesn't has the default precision info
119+
dt1.zip(dt2).foreach { case (dd1, dd2) =>
120+
assert(dd1.getClass === dd2.getClass) // DecimalType doesn't has the default precision info
122121
}
123122
}
124123

125124
def checkValues(row1: Seq[Any], row2: Seq[Any]): Unit = {
126-
row1.zip(row2).map {
127-
case (r1, r2) => checkValue(r1, r2)
125+
row1.zip(row2).foreach { case (r1, r2) =>
126+
checkValue(r1, r2)
128127
}
129128
}
130129

131130
def checkValues(row1: Seq[Any], row2: Row): Unit = {
132-
row1.zip(row2.toSeq).map {
133-
case (r1, r2) => checkValue(r1, r2)
131+
row1.zip(row2.toSeq).foreach { case (r1, r2) =>
132+
checkValue(r1, r2)
134133
}
135134
}
136135

@@ -141,7 +140,7 @@ class HiveInspectorSuite extends FunSuite with HiveInspectors {
141140
assert(r1.compare(r2) === 0)
142141
case (r1: Array[Byte], r2: Array[Byte])
143142
if r1 != null && r2 != null && r1.length == r2.length =>
144-
r1.zip(r2).map { case (b1, b2) => assert(b1 === b2) }
143+
r1.zip(r2).foreach { case (b1, b2) => assert(b1 === b2) }
145144
case (r1, r2) => assert(r1 === r2)
146145
}
147146
}
@@ -166,7 +165,8 @@ class HiveInspectorSuite extends FunSuite with HiveInspectors {
166165
val constantData = constantExprs.map(_.eval())
167166
val constantNullData = constantData.map(_ => null)
168167
val constantWritableOIs = constantExprs.map(e => toWritableInspector(e.dataType))
169-
val constantNullWritableOIs = constantExprs.map(e => toInspector(Literal.create(null, e.dataType)))
168+
val constantNullWritableOIs =
169+
constantExprs.map(e => toInspector(Literal.create(null, e.dataType)))
170170

171171
checkValues(constantData, constantData.zip(constantWritableOIs).map {
172172
case (d, oi) => unwrap(wrap(d, oi), oi)
@@ -202,7 +202,8 @@ class HiveInspectorSuite extends FunSuite with HiveInspectors {
202202
case (t, idx) => StructField(s"c_$idx", t)
203203
})
204204

205-
checkValues(row, unwrap(wrap(Row.fromSeq(row), toInspector(dt)), toInspector(dt)).asInstanceOf[Row])
205+
checkValues(row,
206+
unwrap(wrap(Row.fromSeq(row), toInspector(dt)), toInspector(dt)).asInstanceOf[Row])
206207
checkValue(null, unwrap(wrap(null, toInspector(dt)), toInspector(dt)))
207208
}
208209

@@ -212,8 +213,10 @@ class HiveInspectorSuite extends FunSuite with HiveInspectors {
212213
val d = row(0) :: row(0) :: Nil
213214
checkValue(d, unwrap(wrap(d, toInspector(dt)), toInspector(dt)))
214215
checkValue(null, unwrap(wrap(null, toInspector(dt)), toInspector(dt)))
215-
checkValue(d, unwrap(wrap(d, toInspector(Literal.create(d, dt))), toInspector(Literal.create(d, dt))))
216-
checkValue(d, unwrap(wrap(null, toInspector(Literal.create(d, dt))), toInspector(Literal.create(d, dt))))
216+
checkValue(d,
217+
unwrap(wrap(d, toInspector(Literal.create(d, dt))), toInspector(Literal.create(d, dt))))
218+
checkValue(d,
219+
unwrap(wrap(null, toInspector(Literal.create(d, dt))), toInspector(Literal.create(d, dt))))
217220
}
218221

219222
test("wrap / unwrap Map Type") {
@@ -222,7 +225,9 @@ class HiveInspectorSuite extends FunSuite with HiveInspectors {
222225
val d = Map(row(0) -> row(1))
223226
checkValue(d, unwrap(wrap(d, toInspector(dt)), toInspector(dt)))
224227
checkValue(null, unwrap(wrap(null, toInspector(dt)), toInspector(dt)))
225-
checkValue(d, unwrap(wrap(d, toInspector(Literal.create(d, dt))), toInspector(Literal.create(d, dt))))
226-
checkValue(d, unwrap(wrap(null, toInspector(Literal.create(d, dt))), toInspector(Literal.create(d, dt))))
228+
checkValue(d,
229+
unwrap(wrap(d, toInspector(Literal.create(d, dt))), toInspector(Literal.create(d, dt))))
230+
checkValue(d,
231+
unwrap(wrap(null, toInspector(Literal.create(d, dt))), toInspector(Literal.create(d, dt))))
227232
}
228233
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,36 @@ class InsertIntoHiveTableSuite extends QueryTest with BeforeAndAfter {
115115
test("SPARK-4203:random partition directory order") {
116116
sql("CREATE TABLE tmp_table (key int, value string)")
117117
val tmpDir = Utils.createTempDir()
118-
sql(s"CREATE TABLE table_with_partition(c1 string) PARTITIONED by (p1 string,p2 string,p3 string,p4 string,p5 string) location '${tmpDir.toURI.toString}' ")
119-
sql("INSERT OVERWRITE TABLE table_with_partition partition (p1='a',p2='b',p3='c',p4='c',p5='1') SELECT 'blarr' FROM tmp_table")
120-
sql("INSERT OVERWRITE TABLE table_with_partition partition (p1='a',p2='b',p3='c',p4='c',p5='2') SELECT 'blarr' FROM tmp_table")
121-
sql("INSERT OVERWRITE TABLE table_with_partition partition (p1='a',p2='b',p3='c',p4='c',p5='3') SELECT 'blarr' FROM tmp_table")
122-
sql("INSERT OVERWRITE TABLE table_with_partition partition (p1='a',p2='b',p3='c',p4='c',p5='4') SELECT 'blarr' FROM tmp_table")
118+
sql(
119+
s"""
120+
|CREATE TABLE table_with_partition(c1 string)
121+
|PARTITIONED by (p1 string,p2 string,p3 string,p4 string,p5 string)
122+
|location '${tmpDir.toURI.toString}'
123+
""".stripMargin)
124+
sql(
125+
"""
126+
|INSERT OVERWRITE TABLE table_with_partition
127+
|partition (p1='a',p2='b',p3='c',p4='c',p5='1')
128+
|SELECT 'blarr' FROM tmp_table
129+
""".stripMargin)
130+
sql(
131+
"""
132+
|INSERT OVERWRITE TABLE table_with_partition
133+
|partition (p1='a',p2='b',p3='c',p4='c',p5='2')
134+
|SELECT 'blarr' FROM tmp_table
135+
""".stripMargin)
136+
sql(
137+
"""
138+
|INSERT OVERWRITE TABLE table_with_partition
139+
|partition (p1='a',p2='b',p3='c',p4='c',p5='3')
140+
|SELECT 'blarr' FROM tmp_table
141+
""".stripMargin)
142+
sql(
143+
"""
144+
|INSERT OVERWRITE TABLE table_with_partition
145+
|partition (p1='a',p2='b',p3='c',p4='c',p5='4')
146+
|SELECT 'blarr' FROM tmp_table
147+
""".stripMargin)
123148
def listFolders(path: File, acc: List[String]): List[List[String]] = {
124149
val dir = path.listFiles()
125150
val folders = dir.filter(_.isDirectory).toList
@@ -196,34 +221,42 @@ class InsertIntoHiveTableSuite extends QueryTest with BeforeAndAfter {
196221
testData.registerTempTable("testData")
197222

198223
val testDatawithNull = TestHive.sparkContext.parallelize(
199-
(1 to 10).map(i => ThreeCloumntable(i, i.toString,null))).toDF()
224+
(1 to 10).map(i => ThreeCloumntable(i, i.toString, null))).toDF()
200225

201226
val tmpDir = Utils.createTempDir()
202-
sql(s"CREATE TABLE table_with_partition(key int,value string) PARTITIONED by (ds string) location '${tmpDir.toURI.toString}' ")
203-
sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1') SELECT key,value FROM testData")
227+
sql(
228+
s"""
229+
|CREATE TABLE table_with_partition(key int,value string)
230+
|PARTITIONED by (ds string) location '${tmpDir.toURI.toString}'
231+
""".stripMargin)
232+
sql(
233+
"""
234+
|INSERT OVERWRITE TABLE table_with_partition
235+
|partition (ds='1') SELECT key,value FROM testData
236+
""".stripMargin)
204237

205238
// test schema the same between partition and table
206239
sql("ALTER TABLE table_with_partition CHANGE COLUMN key key BIGINT")
207240
checkAnswer(sql("select key,value from table_with_partition where ds='1' "),
208-
testData.collect.toSeq
241+
testData.collect().toSeq
209242
)
210243

211244
// test difference type of field
212245
sql("ALTER TABLE table_with_partition CHANGE COLUMN key key BIGINT")
213246
checkAnswer(sql("select key,value from table_with_partition where ds='1' "),
214-
testData.collect.toSeq
247+
testData.collect().toSeq
215248
)
216249

217250
// add column to table
218251
sql("ALTER TABLE table_with_partition ADD COLUMNS(key1 string)")
219252
checkAnswer(sql("select key,value,key1 from table_with_partition where ds='1' "),
220-
testDatawithNull.collect.toSeq
253+
testDatawithNull.collect().toSeq
221254
)
222255

223256
// change column name to table
224257
sql("ALTER TABLE table_with_partition CHANGE COLUMN key keynew BIGINT")
225258
checkAnswer(sql("select keynew,value from table_with_partition where ds='1' "),
226-
testData.collect.toSeq
259+
testData.collect().toSeq
227260
)
228261

229262
sql("DROP TABLE table_with_partition")

sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll {
142142
after: () => Unit,
143143
query: String,
144144
expectedAnswer: Seq[Row],
145-
ct: ClassTag[_]) = {
145+
ct: ClassTag[_]): Unit = {
146146
before()
147147

148148
var df = sql(query)

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import org.apache.spark.sql.hive.test.TestHive._
2828
class BigDataBenchmarkSuite extends HiveComparisonTest {
2929
val testDataDirectory = new File("target" + File.separator + "big-data-benchmark-testdata")
3030

31+
val userVisitPath = new File(testDataDirectory, "uservisits").getCanonicalPath
3132
val testTables = Seq(
3233
TestTable(
3334
"rankings",
@@ -63,7 +64,7 @@ class BigDataBenchmarkSuite extends HiveComparisonTest {
6364
| searchWord STRING,
6465
| duration INT)
6566
| ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
66-
| STORED AS TEXTFILE LOCATION "${new File(testDataDirectory, "uservisits").getCanonicalPath}"
67+
| STORED AS TEXTFILE LOCATION "$userVisitPath"
6768
""".stripMargin.cmd),
6869
TestTable(
6970
"documents",
@@ -83,7 +84,10 @@ class BigDataBenchmarkSuite extends HiveComparisonTest {
8384
"SELECT pageURL, pageRank FROM rankings WHERE pageRank > 1")
8485

8586
createQueryTest("query2",
86-
"SELECT SUBSTR(sourceIP, 1, 10), SUM(adRevenue) FROM uservisits GROUP BY SUBSTR(sourceIP, 1, 10)")
87+
"""
88+
|SELECT SUBSTR(sourceIP, 1, 10), SUM(adRevenue) FROM uservisits
89+
|GROUP BY SUBSTR(sourceIP, 1, 10)
90+
""".stripMargin)
8791

8892
createQueryTest("query3",
8993
"""
@@ -113,8 +117,8 @@ class BigDataBenchmarkSuite extends HiveComparisonTest {
113117
|CREATE TABLE url_counts_total AS
114118
| SELECT SUM(count) AS totalCount, destpage
115119
| FROM url_counts_partial GROUP BY destpage
116-
|-- The following queries run, but generate different results in HIVE likely because the UDF is not deterministic
117-
|-- given different input splits.
120+
|-- The following queries run, but generate different results in HIVE
121+
|-- likely because the UDF is not deterministic given different input splits.
118122
|-- SELECT CAST(SUM(count) AS INT) FROM url_counts_partial
119123
|-- SELECT COUNT(*) FROM url_counts_partial
120124
|-- SELECT * FROM url_counts_partial

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,9 @@ abstract class HiveComparisonTest
255255
.filterNot(_ contains "hive.outerjoin.supports.filters")
256256
.filterNot(_ contains "hive.exec.post.hooks")
257257

258-
if (allQueries != queryList)
258+
if (allQueries != queryList) {
259259
logWarning(s"Simplifications made on unsupported operations for test $testCaseName")
260+
}
260261

261262
lazy val consoleTestCase = {
262263
val quotes = "\"\"\""
@@ -305,13 +306,16 @@ abstract class HiveComparisonTest
305306
try {
306307
// Hooks often break the harness and don't really affect our test anyway, don't
307308
// even try running them.
308-
if (installHooksCommand.findAllMatchIn(queryString).nonEmpty)
309+
if (installHooksCommand.findAllMatchIn(queryString).nonEmpty) {
309310
sys.error("hive exec hooks not supported for tests.")
311+
}
310312

311-
logWarning(s"Running query ${i+1}/${queryList.size} with hive.")
313+
logWarning(s"Running query ${i + 1}/${queryList.size} with hive.")
312314
// Analyze the query with catalyst to ensure test tables are loaded.
313315
val answer = hiveQuery.analyzed match {
314-
case _: ExplainCommand => Nil // No need to execute EXPLAIN queries as we don't check the output.
316+
case _: ExplainCommand =>
317+
// No need to execute EXPLAIN queries as we don't check the output.
318+
Nil
315319
case _ => TestHive.runSqlHive(queryString)
316320
}
317321

@@ -394,21 +398,24 @@ abstract class HiveComparisonTest
394398
case tf: org.scalatest.exceptions.TestFailedException => throw tf
395399
case originalException: Exception =>
396400
if (System.getProperty("spark.hive.canarytest") != null) {
397-
// When we encounter an error we check to see if the environment is still okay by running a simple query.
398-
// If this fails then we halt testing since something must have gone seriously wrong.
401+
// When we encounter an error we check to see if the environment is still
402+
// okay by running a simple query. If this fails then we halt testing since
403+
// something must have gone seriously wrong.
399404
try {
400405
new TestHive.HiveQLQueryExecution("SELECT key FROM src").stringResult()
401406
TestHive.runSqlHive("SELECT key FROM src")
402407
} catch {
403408
case e: Exception =>
404-
logError(s"FATAL ERROR: Canary query threw $e This implies that the testing environment has likely been corrupted.")
405-
// The testing setup traps exits so wait here for a long time so the developer can see when things started
406-
// to go wrong.
409+
logError(s"FATAL ERROR: Canary query threw $e This implies that the " +
410+
"testing environment has likely been corrupted.")
411+
// The testing setup traps exits so wait here for a long time so the developer
412+
// can see when things started to go wrong.
407413
Thread.sleep(1000000)
408414
}
409415
}
410416

411-
// If the canary query didn't fail then the environment is still okay, so just throw the original exception.
417+
// If the canary query didn't fail then the environment is still okay,
418+
// so just throw the original exception.
412419
throw originalException
413420
}
414421
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ import org.apache.spark.sql.catalyst.util._
2424
/**
2525
* A framework for running the query tests that are listed as a set of text files.
2626
*
27-
* TestSuites that derive from this class must provide a map of testCaseName -> testCaseFiles that should be included.
28-
* Additionally, there is support for whitelisting and blacklisting tests as development progresses.
27+
* TestSuites that derive from this class must provide a map of testCaseName -> testCaseFiles
28+
* that should be included. Additionally, there is support for whitelisting and blacklisting
29+
* tests as development progresses.
2930
*/
3031
abstract class HiveQueryFileTest extends HiveComparisonTest {
3132
/** A list of tests deemed out of scope and thus completely disregarded */
@@ -54,15 +55,17 @@ abstract class HiveQueryFileTest extends HiveComparisonTest {
5455
case (testCaseName, testCaseFile) =>
5556
if (blackList.map(_.r.pattern.matcher(testCaseName).matches()).reduceLeft(_||_)) {
5657
logDebug(s"Blacklisted test skipped $testCaseName")
57-
} else if (realWhiteList.map(_.r.pattern.matcher(testCaseName).matches()).reduceLeft(_||_) || runAll) {
58+
} else if (realWhiteList.map(_.r.pattern.matcher(testCaseName).matches()).reduceLeft(_||_) ||
59+
runAll) {
5860
// Build a test case and submit it to scala test framework...
5961
val queriesString = fileToString(testCaseFile)
6062
createQueryTest(testCaseName, queriesString)
6163
} else {
6264
// Only output warnings for the built in whitelist as this clutters the output when the user
6365
// trying to execute a single test from the commandline.
64-
if(System.getProperty(whiteListProperty) == null && !runAll)
66+
if (System.getProperty(whiteListProperty) == null && !runAll) {
6567
ignore(testCaseName) {}
68+
}
6669
}
6770
}
6871
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ import org.apache.spark.sql.hive.test.TestHive._
3737
case class TestData(a: Int, b: String)
3838

3939
/**
40-
* A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
40+
* A set of test cases expressed in Hive QL that are not covered by the tests
41+
* included in the hive distribution.
4142
*/
4243
class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
4344
private val originalTimeZone = TimeZone.getDefault
@@ -237,7 +238,11 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
237238
}
238239

239240
createQueryTest("modulus",
240-
"SELECT 11 % 10, IF((101.1 % 100.0) BETWEEN 1.01 AND 1.11, \"true\", \"false\"), (101 / 2) % 10 FROM src LIMIT 1")
241+
"""
242+
|SELECT 11 % 10,
243+
| IF((101.1 % 100.0) BETWEEN 1.01 AND 1.11, "true", "false"), (101 / 2) % 10
244+
|FROM src LIMIT 1
245+
""".stripMargin)
241246

242247
test("Query expressed in SQL") {
243248
setConf("spark.sql.dialect", "sql")
@@ -309,7 +314,10 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
309314
"SELECT * FROM src a JOIN src b ON a.key = b.key")
310315

311316
createQueryTest("small.cartesian",
312-
"SELECT a.key, b.key FROM (SELECT key FROM src WHERE key < 1) a JOIN (SELECT key FROM src WHERE key = 2) b")
317+
"""
318+
|SELECT a.key, b.key FROM (SELECT key FROM src WHERE key < 1) a
319+
|JOIN (SELECT key FROM src WHERE key = 2) b
320+
""".stripMargin)
313321

314322
createQueryTest("length.udf",
315323
"SELECT length(\"test\") FROM src LIMIT 1")
@@ -463,8 +471,10 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
463471
|create table src_lv2 (key string, value string);
464472
|
465473
|FROM src
466-
|insert overwrite table src_lv1 SELECT key, D.* lateral view explode(array(key+3, key+4)) D as CX
467-
|insert overwrite table src_lv2 SELECT key, D.* lateral view explode(array(key+3, key+4)) D as CX
474+
|insert overwrite table src_lv1
475+
| SELECT key, D.* lateral view explode(array(key+3, key+4)) D as CX
476+
|insert overwrite table src_lv2
477+
| SELECT key, D.* lateral view explode(array(key+3, key+4)) D as CX
468478
""".stripMargin)
469479

470480
createQueryTest("lateral view5",
@@ -584,7 +594,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
584594
}
585595
}
586596

587-
def isExplanation(result: DataFrame) = {
597+
def isExplanation(result: DataFrame): Boolean = {
588598
val explanation = result.select('plan).collect().map { case Row(plan: String) => plan }
589599
explanation.contains("== Physical Plan ==")
590600
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ case class Nested(a: Int, B: Int)
2525
case class Data(a: Int, B: Int, n: Nested, nestedArray: Seq[Nested])
2626

2727
/**
28-
* A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
28+
* A set of test cases expressed in Hive QL that are not covered by the tests
29+
* included in the hive distribution.
2930
*/
3031
class HiveResolutionSuite extends HiveComparisonTest {
3132

0 commit comments

Comments
 (0)