apache · chenghao-intel · Mar 4, 2015 · Mar 4, 2015 · Mar 16, 2015 · Mar 16, 2015
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -63,8 +63,10 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
   def this(sparkContext: JavaSparkContext) = this(sparkContext.sc)
 
-  // Note that this is a lazy val so we can override the default value in subclasses.
-  protected[sql] lazy val conf: SQLConf = new SQLConf
+  /**
+   * @return Spark SQL configuration
+   */
+  protected[sql] def conf = tlSession.get().conf
 
   /**
    * Set Spark SQL configuration properties.
@@ -103,9 +105,11 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   def getAllConfs: immutable.Map[String, String] = conf.getAllConfs
 
+  // TODO how to handle the temp table per user session?
   @transient
   protected[sql] lazy val catalog: Catalog = new SimpleCatalog(true)
 
+  // TODO how to handle the temp function per user session?
   @transient
   protected[sql] lazy val functionRegistry: FunctionRegistry = new SimpleFunctionRegistry(true)
 
@@ -138,6 +142,14 @@ class SQLContext(@transient val sparkContext: SparkContext)
 
   protected[sql] def executePlan(plan: LogicalPlan) = new this.QueryExecution(plan)
 
+  @transient
+  protected[sql] val tlSession = new ThreadLocal[SQLSession]() {
+    override def initialValue = defaultSession
+  }
+
+  @transient
+  protected[sql] val defaultSession = createSession()
+
   sparkContext.getConf.getAll.foreach {
     case (key, value) if key.startsWith("spark.sql") => setConf(key, value)
     case _ =>
@@ -194,6 +206,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * }}}
    *
    * @group basic
+   * TODO move to SQLSession?
    */
   @transient
   val udf: UDFRegistration = new UDFRegistration(this)
@@ -1059,6 +1072,32 @@ class SQLContext(@transient val sparkContext: SparkContext)
     )
   }
 
+
+  protected[sql] def openSession(): SQLSession = {
+    detachSession()
+    val session = createSession()
+    tlSession.set(session)
+
+    session
+  }
+
+  protected[sql] def currentSession(): SQLSession = {
+    tlSession.get()
+  }
+
+  protected[sql] def createSession(): SQLSession = {
+    new this.SQLSession()
+  }
+
+  protected[sql] def detachSession(): Unit = {
+    tlSession.remove()
+  }
+
+  protected[sql] class SQLSession {
+    // Note that this is a lazy val so we can override the default value in subclasses.
+    protected[sql] lazy val conf: SQLConf = new SQLConf
+  }
+
   /**
    * :: DeveloperApi ::
    * The primary workflow for executing relational queries using Spark.  Designed to allow easy

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -24,16 +24,22 @@ import org.apache.spark.sql.{DataFrame, SQLConf, SQLContext}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 
 /** A SQLContext that can be used for local testing. */
-object TestSQLContext
+class LocalSQLContext
   extends SQLContext(
     new SparkContext(
       "local[2]",
       "TestSQLContext",
       new SparkConf().set("spark.sql.testkey", "true"))) {
 
-  /** Fewer partitions to speed up testing. */
-  protected[sql] override lazy val conf: SQLConf = new SQLConf {
-    override def numShufflePartitions: Int = this.getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
+  override protected[sql] def createSession(): SQLSession = {
+    new this.SQLSession()
+  }
+
+  protected[sql] class SQLSession extends super.SQLSession {
+    protected[sql] override lazy val conf: SQLConf = new SQLConf {
+      /** Fewer partitions to speed up testing. */
+      override def numShufflePartitions: Int = this.getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
+    }
   }
 
   /**
@@ -45,3 +51,6 @@ object TestSQLContext
   }
 
 }
+
+object TestSQLContext extends LocalSQLContext
+
diff --git a/...server/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/...server/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
diff --git a/...erver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/...erver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -195,6 +195,146 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
       }
     }
   }
+
+  test("test multiple session") {
+    import org.apache.spark.sql.SQLConf
+    var defaultV1: String = null
+    var defaultV2: String = null
+
+    withMultipleConnectionJdbcStatement(
+      // create table
+      { statement =>
+
+        val queries = Seq(
+            "DROP TABLE IF EXISTS test_map",
+            "CREATE TABLE test_map(key INT, value STRING)",
+            s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_map",
+            "CACHE TABLE test_table AS SELECT key FROM test_map ORDER BY key DESC")
+
+        queries.foreach(statement.execute)
+
+        val rs1 = statement.executeQuery("SELECT key FROM test_table ORDER BY KEY DESC")
+        val buf1 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs1.next()) {
+          buf1 += rs1.getInt(1)
+        }
+        rs1.close()
+
+        val rs2 = statement.executeQuery("SELECT key FROM test_map ORDER BY KEY DESC")
+        val buf2 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs2.next()) {
+          buf2 += rs2.getInt(1)
+        }
+        rs2.close()
+
+        assert(buf1 === buf2)
+      },
+
+      // first session, we get the default value of the session status
+      { statement =>
+
+        val rs1 = statement.executeQuery(s"SET ${SQLConf.SHUFFLE_PARTITIONS}")
+        rs1.next()
+        defaultV1 = rs1.getString(1)
+        assert(defaultV1 != "200")
+        rs1.close()
+
+        val rs2 = statement.executeQuery("SET hive.cli.print.header")
+        rs2.next()
+
+        defaultV2 = rs2.getString(1)
+        assert(defaultV1 != "true")
+        rs2.close()
+      },
+
+      // second session, we update the session status
+      { statement =>
+
+        val queries = Seq(
+            s"SET ${SQLConf.SHUFFLE_PARTITIONS}=291",
+            "SET hive.cli.print.header=true"
+            )
+
+        queries.map(statement.execute)
+        val rs1 = statement.executeQuery(s"SET ${SQLConf.SHUFFLE_PARTITIONS}")
+        rs1.next()
+        assert("spark.sql.shuffle.partitions=291" === rs1.getString(1))
+        rs1.close()
+
+        val rs2 = statement.executeQuery("SET hive.cli.print.header")
+        rs2.next()
+        assert("hive.cli.print.header=true" === rs2.getString(1))
+        rs2.close()
+      },
+
+      // third session, we get the latest session status, supposed to be the
+      // default value
+      { statement =>
+
+        val rs1 = statement.executeQuery(s"SET ${SQLConf.SHUFFLE_PARTITIONS}")
+        rs1.next()
+        assert(defaultV1 === rs1.getString(1))
+        rs1.close()
+
+        val rs2 = statement.executeQuery("SET hive.cli.print.header")
+        rs2.next()
+        assert(defaultV2 === rs2.getString(1))
+        rs2.close()
+      },
+
+      // accessing the cached data in another session
+      { statement =>
+
+        val rs1 = statement.executeQuery("SELECT key FROM test_table ORDER BY KEY DESC")
+        val buf1 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs1.next()) {
+          buf1 += rs1.getInt(1)
+        }
+        rs1.close()
+
+        val rs2 = statement.executeQuery("SELECT key FROM test_map ORDER BY KEY DESC")
+        val buf2 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs2.next()) {
+          buf2 += rs2.getInt(1)
+        }
+        rs2.close()
+
+        assert(buf1 === buf2)
+        statement.executeQuery("UNCACHE TABLE test_table")
+
+        // TODO need to figure out how to determine if the data loaded from cache
+        val rs3 = statement.executeQuery("SELECT key FROM test_map ORDER BY KEY DESC")
+        val buf3 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs3.next()) {
+          buf3 += rs3.getInt(1)
+        }
+        rs3.close()
+
+        assert(buf1 === buf3)
+      },
+
+      // accessing the uncached table
+      { statement =>
+
+        // TODO need to figure out how to determine if the data loaded from cache
+        val rs1 = statement.executeQuery("SELECT key FROM test_table ORDER BY KEY DESC")
+        val buf1 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs1.next()) {
+          buf1 += rs1.getInt(1)
+        }
+        rs1.close()
+
+        val rs2 = statement.executeQuery("SELECT key FROM test_map ORDER BY KEY DESC")
+        val buf2 = new collection.mutable.ArrayBuffer[Int]()
+        while (rs2.next()) {
+          buf2 += rs2.getInt(1)
+        }
+        rs2.close()
+
+        assert(buf1 === buf2)
+      }
+    )
+  }
 }
 
 class HiveThriftHttpServerSuite extends HiveThriftJdbcTest {
@@ -245,15 +385,22 @@ abstract class HiveThriftJdbcTest extends HiveThriftServer2Test {
     s"jdbc:hive2://localhost:$serverPort/"
   }
 
-  protected def withJdbcStatement(f: Statement => Unit): Unit = {
-    val connection = DriverManager.getConnection(jdbcUri, user, "")
-    val statement = connection.createStatement()
-
-    try f(statement) finally {
-      statement.close()
-      connection.close()
+  def withMultipleConnectionJdbcStatement(fs: (Statement => Unit)*) {
+    val user = System.getProperty("user.name")
+    val connections = fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") }
+    val statements = connections.map(_.createStatement())
+
+    try {
+      statements.zip(fs).map { case (s, f) => f(s) }
+    } finally {
+      statements.map(_.close())
+      connections.map(_.close())
     }
   }
+
+  def withJdbcStatement(f: Statement => Unit) {
+    withMultipleConnectionJdbcStatement(f)
+  }
 }
 
 abstract class HiveThriftServer2Test extends FunSuite with BeforeAndAfterAll with Logging {