Skip to content

Commit b14b83e

Browse files
In a HiveContext, make SQLConf a subset of HiveConf.
- Any settings that go to SQLConf will go to HiveConf as well. - Add tests for the Hive case.
1 parent 6983180 commit b14b83e

File tree

5 files changed

+81
-17
lines changed

5 files changed

+81
-17
lines changed

sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import scala.collection.mutable
2626
*/
2727
class SQLConf {
2828

29-
private val settings = new mutable.HashMap[String, String]()
29+
protected val settings = new mutable.HashMap[String, String]()
3030

3131
private[spark] def clear() {
3232
settings.clear()

sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ case class SetCommandPhysical(
3333
extends LeafNode {
3434

3535
def execute(): RDD[Row] = (key, value) match {
36-
case (Some(k), Some(v)) =>
37-
context.emptyResult
36+
case (Some(k), Some(v)) => context.emptyResult
3837
case (Some(k), None) =>
3938
val resultString = context.sqlConf.getOption(k) match {
4039
case Some(v) => s"$k=$v"
@@ -48,8 +47,7 @@ case class SetCommandPhysical(
4847
}.toSeq
4948
// Assume config parameters can fit into one split (machine) ;)
5049
context.sparkContext.parallelize(rows, 1)
51-
case _ =>
52-
context.emptyResult
50+
case _ => context.emptyResult
5351
}
5452

5553
def output: Seq[Attribute] = Seq.empty // TODO: right thing?

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ class SQLQuerySuite extends QueryTest {
336336
(6, "f")))
337337
}
338338

339-
test("SET commands using sql()") {
339+
test("SET commands semantics using sql()") {
340340
sqlConf.clear()
341341
val testKey = "test.key.0"
342342
val testVal = "test.val.0"
@@ -345,6 +345,7 @@ class SQLQuerySuite extends QueryTest {
345345
// "set" itself returns all config variables currently specified in SQLConf.
346346
assert(sql("set").collect().size == 0)
347347

348+
// "set key=val"
348349
sql(s"SET $testKey=$testVal")
349350
checkAnswer(
350351
sql("SET"),

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,8 @@ class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
5959

6060
/** Sets up the system initially or after a RESET command */
6161
protected def configure() {
62-
// TODO: refactor this so we can work with other databases.
63-
runSqlHive(
64-
s"set javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=$metastorePath;create=true")
65-
runSqlHive("set hive.metastore.warehouse.dir=" + warehousePath)
62+
sqlConf.set("javax.jdo.option.ConnectionURL", s"jdbc:derby:;databaseName=$metastorePath;create=true")
63+
sqlConf.set("hive.metastore.warehouse.dir", warehousePath)
6664
}
6765

6866
configure() // Must be called before initializing the catalog below.
@@ -134,12 +132,19 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
134132
}
135133

136134
/**
137-
* Contract: after initialization of this HiveContext, these two confs should
138-
* contain exactly the same key-value pairs throughout the life time of `this`.
135+
* Any properties set by sqlConf.set() or a SET command inside hql() or sql()
136+
* will be set in the SQLConf, *as well as* getting set in the HiveConf. In
137+
* other words, the SQLConf properties will be a subset of the HiveConf properties
138+
* throughout the life time of this session.
139139
*/
140140
@transient protected[hive] lazy val hiveconf = new HiveConf(classOf[SessionState])
141-
@transient override lazy val sqlConf: SQLConf = new SQLConf(hiveconf.getAllProperties)
142-
141+
@transient override lazy val sqlConf: SQLConf = new SQLConf(hiveconf.getAllProperties) {
142+
override def set(key: String, value: String): SQLConf = {
143+
hiveconf.set(key, value)
144+
settings(key) = value
145+
this
146+
}
147+
}
143148
@transient protected[hive] lazy val sessionState = new SessionState(hiveconf)
144149

145150
sessionState.err = new PrintStream(outputBuffer, true, "UTF-8")

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.spark.sql.hive.execution
1919

2020
import org.apache.spark.sql.hive.test.TestHive._
21+
import org.apache.spark.sql.Row
2122

2223
/**
2324
* A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
@@ -158,10 +159,17 @@ class HiveQuerySuite extends HiveComparisonTest {
158159
hql("SELECT * FROM src").toString
159160
}
160161

162+
test("HiveContext initializes its SQLConf to pick up its HiveConf's current params") {
163+
import scala.collection.JavaConversions._ // implicits for java.util.Properties
164+
val hiveconfSet: Set[(String, String)] = hiveconf.getAllProperties.toSeq.toSet
165+
val sqlconfSet: Set[(String, String)] = sqlConf.getAll.toSet
166+
assert(sqlconfSet.intersect(hiveconfSet) === sqlconfSet)
167+
assert(sqlConf.getAll.size > 0, "The two confs should not be empty")
168+
}
169+
161170
test("parse HQL set commands") {
162-
// Adapted from SQLConfSuite.
163-
sqlConf.clear()
164-
val testKey = "spark.sql.key"
171+
// Adapted from its SQL counterpart.
172+
val testKey = "spark.sql.key.usedfortestonly"
165173
val testVal = "val0,val_1,val2.3,my_table"
166174

167175
hql(s"set $testKey=$testVal")
@@ -177,8 +185,60 @@ class HiveQuerySuite extends HiveComparisonTest {
177185

178186
hql(s"set $testKey=")
179187
assert(sqlConf.get(testKey, "0") == "")
188+
}
189+
190+
test("SET commands semantics for a HiveContext") {
191+
// Adapted from its SQL counterpart.
192+
val testKey = "spark.sql.key.usedfortestonly"
193+
var testVal = "test.val.0"
194+
val nonexistentKey = "nonexistent"
195+
def fromRows(row: Array[Row]): Array[String] = row.map(_.getString(0))
196+
197+
sqlConf.clear()
198+
199+
// "set" itself returns all config variables currently specified in SQLConf.
200+
assert(hql("set").collect().size == 0)
201+
202+
// "set key=val"
203+
hql(s"SET $testKey=$testVal")
204+
assert(fromRows(hql("SET").collect()) sameElements Array(s"$testKey=$testVal"))
205+
assert(hiveconf.get(testKey, "") == testVal)
180206

207+
hql(s"SET ${testKey + testKey}=${testVal + testVal}")
208+
assert(fromRows(hql("SET").collect()) sameElements
209+
Array(
210+
s"$testKey=$testVal",
211+
s"${testKey + testKey}=${testVal + testVal}"))
212+
assert(hiveconf.get(testKey + testKey, "") == testVal + testVal)
213+
214+
// "set key"
215+
assert(fromRows(hql(s"SET $testKey").collect()) sameElements
216+
Array(s"$testKey=$testVal"))
217+
assert(fromRows(hql(s"SET $nonexistentKey").collect()) sameElements
218+
Array(s"$nonexistentKey is undefined"))
219+
220+
// Assert that sql() should have the same effects as hql() by repeating the above using sql().
181221
sqlConf.clear()
222+
assert(sql("set").collect().size == 0)
223+
224+
sql(s"SET $testKey=$testVal")
225+
assert(fromRows(sql("SET").collect()) sameElements Array(s"$testKey=$testVal"))
226+
assert(hiveconf.get(testKey, "") == testVal)
227+
228+
sql(s"SET ${testKey + testKey}=${testVal + testVal}")
229+
assert(fromRows(sql("SET").collect()) sameElements
230+
Array(
231+
s"$testKey=$testVal",
232+
s"${testKey + testKey}=${testVal + testVal}"))
233+
assert(hiveconf.get(testKey + testKey, "") == testVal + testVal)
234+
235+
assert(fromRows(sql(s"SET $testKey").collect()) sameElements
236+
Array(s"$testKey=$testVal"))
237+
assert(fromRows(sql(s"SET $nonexistentKey").collect()) sameElements
238+
Array(s"$nonexistentKey is undefined"))
182239
}
183240

241+
// Put tests that depend on specific Hive settings before these last two test,
242+
// since they modify /clear stuff.
243+
184244
}

0 commit comments

Comments
 (0)