@@ -21,16 +21,13 @@ import java.io.File
2121import java .net .{URL , URLClassLoader }
2222import java .sql .Timestamp
2323
24- import org .apache .hadoop .hive .common .StatsSetupConst
25- import org .apache .hadoop .hive .common .`type` .HiveDecimal
26- import org .apache .spark .sql .SQLConf .SQLConfEntry
27- import org .apache .spark .sql .catalyst .ParserDialect
28-
2924import scala .collection .JavaConversions ._
3025import scala .collection .mutable .HashMap
3126import scala .language .implicitConversions
3227
3328import org .apache .hadoop .fs .{FileSystem , Path }
29+ import org .apache .hadoop .hive .common .StatsSetupConst
30+ import org .apache .hadoop .hive .common .`type` .HiveDecimal
3431import org .apache .hadoop .hive .conf .HiveConf
3532import org .apache .hadoop .hive .ql .metadata .Table
3633import org .apache .hadoop .hive .ql .parse .VariableSubstitution
@@ -40,6 +37,9 @@ import org.apache.hadoop.hive.serde2.io.{DateWritable, TimestampWritable}
4037import org .apache .spark .SparkContext
4138import org .apache .spark .annotation .Experimental
4239import org .apache .spark .sql ._
40+ import org .apache .spark .sql .SQLConf .SQLConfEntry
41+ import org .apache .spark .sql .SQLConf .SQLConfEntry ._
42+ import org .apache .spark .sql .catalyst .ParserDialect
4343import org .apache .spark .sql .catalyst .analysis ._
4444import org .apache .spark .sql .catalyst .plans .logical ._
4545import org .apache .spark .sql .execution .{ExecutedCommand , ExtractPythonUdfs , SetCommand }
@@ -70,13 +70,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
7070
7171 import HiveContext ._
7272
73+ println(" create HiveContext" )
74+
7375 /**
7476 * When true, enables an experimental feature where metastore tables that use the parquet SerDe
7577 * are automatically converted to use the Spark SQL parquet table scan, instead of the Hive
7678 * SerDe.
7779 */
78- protected [sql] def convertMetastoreParquet : Boolean =
79- getConf(" spark.sql.hive.convertMetastoreParquet" , " true" ) == " true"
80+ protected [sql] def convertMetastoreParquet : Boolean = getConf(CONVERT_METASTORE_PARQUET )
8081
8182 /**
8283 * When true, also tries to merge possibly different but compatible Parquet schemas in different
@@ -85,7 +86,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
8586 * This configuration is only effective when "spark.sql.hive.convertMetastoreParquet" is true.
8687 */
8788 protected [sql] def convertMetastoreParquetWithSchemaMerging : Boolean =
88- getConf(" spark.sql.hive.convertMetastoreParquet.mergeSchema " , " false " ) == " true "
89+ getConf(CONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING )
8990
9091 /**
9192 * When true, a table created by a Hive CTAS statement (no USING clause) will be
@@ -99,8 +100,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
99100 * - The CTAS statement specifies SequenceFile (STORED AS SEQUENCEFILE) as the file format
100101 * and no SerDe is specified (no ROW FORMAT SERDE clause).
101102 */
102- protected [sql] def convertCTAS : Boolean =
103- getConf(" spark.sql.hive.convertCTAS" , " false" ).toBoolean
103+ protected [sql] def convertCTAS : Boolean = getConf(CONVERT_CTAS )
104104
105105 /**
106106 * The version of the hive client that will be used to communicate with the metastore. Note that
@@ -118,8 +118,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
118118 * option is only valid when using the execution version of Hive.
119119 * - maven - download the correct version of hive on demand from maven.
120120 */
121- protected [hive] def hiveMetastoreJars : String =
122- getConf(HIVE_METASTORE_JARS , " builtin" )
121+ protected [hive] def hiveMetastoreJars : String = getConf(HIVE_METASTORE_JARS )
123122
124123 /**
125124 * A comma separated list of class prefixes that should be loaded using the classloader that
@@ -129,26 +128,20 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
129128 * custom appenders that are used by log4j.
130129 */
131130 protected [hive] def hiveMetastoreSharedPrefixes : Seq [String ] =
132- getConf(" spark.sql.hive.metastore.sharedPrefixes" , jdbcPrefixes)
133- .split(" ," ).filterNot(_ == " " )
134-
135- private def jdbcPrefixes = Seq (
136- " com.mysql.jdbc" , " org.postgresql" , " com.microsoft.sqlserver" , " oracle.jdbc" ).mkString(" ," )
131+ getConf(HIVE_METASTORE_SHARED_PREFIXES ).filterNot(_ == " " )
137132
138133 /**
139134 * A comma separated list of class prefixes that should explicitly be reloaded for each version
140135 * of Hive that Spark SQL is communicating with. For example, Hive UDFs that are declared in a
141136 * prefix that typically would be shared (i.e. org.apache.spark.*)
142137 */
143138 protected [hive] def hiveMetastoreBarrierPrefixes : Seq [String ] =
144- getConf(" spark.sql.hive.metastore.barrierPrefixes" , " " )
145- .split(" ," ).filterNot(_ == " " )
139+ getConf(HIVE_METASTORE_BARRIER_PREFIXES ).filterNot(_ == " " )
146140
147141 /*
148142 * hive thrift server use background spark sql thread pool to execute sql queries
149143 */
150- protected [hive] def hiveThriftServerAsync : Boolean =
151- getConf(" spark.sql.hive.thriftServer.async" , " true" ).toBoolean
144+ protected [hive] def hiveThriftServerAsync : Boolean = getConf(HIVE_THRIFT_SERVER_ASYNC )
152145
153146 @ transient
154147 protected [sql] lazy val substitutor = new VariableSubstitution ()
@@ -525,7 +518,50 @@ private[hive] object HiveContext {
525518 val hiveExecutionVersion : String = " 0.13.1"
526519
527520 val HIVE_METASTORE_VERSION : String = " spark.sql.hive.metastore.version"
528- val HIVE_METASTORE_JARS : String = " spark.sql.hive.metastore.jars"
521+ val HIVE_METASTORE_JARS = stringConf(" spark.sql.hive.metastore.jars" ,
522+ defaultValue = Some (" builtin" ),
523+ doc = " Location of the jars that should be used to instantiate the HiveMetastoreClient. This" +
524+ " property can be one of three options: " +
525+ " 1. \" builtin\" Use Hive 0.13.1, which is bundled with the Spark assembly jar when " +
526+ " <code>-Phive</code> is enabled. When this option is chosen, " +
527+ " spark.sql.hive.metastore.version must be either <code>0.13.1</code> or not defined. " +
528+ " 2. \" maven\" Use Hive jars of specified version downloaded from Maven repositories." +
529+ " 3. A classpath in the standard format for both Hive and Hadoop." )
530+
531+ val CONVERT_METASTORE_PARQUET = booleanConf(" spark.sql.hive.convertMetastoreParquet" ,
532+ defaultValue = Some (true ),
533+ doc = " When set to false, Spark SQL will use the Hive SerDe for parquet tables instead of " +
534+ " the built in support." )
535+
536+ val CONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING = booleanConf(
537+ " spark.sql.hive.convertMetastoreParquet.mergeSchema" ,
538+ defaultValue = Some (false ),
539+ doc = " TODO" )
540+
541+ val CONVERT_CTAS = booleanConf(" spark.sql.hive.convertCTAS" ,
542+ defaultValue = Some (false ),
543+ doc = " TODO" )
544+
545+ val HIVE_METASTORE_SHARED_PREFIXES = stringSeqConf(" spark.sql.hive.metastore.sharedPrefixes" ,
546+ defaultValue = Some (jdbcPrefixes),
547+ doc = " A comma separated list of class prefixes that should be loaded using the classloader " +
548+ " that is shared between Spark SQL and a specific version of Hive. An example of classes " +
549+ " that should be shared is JDBC drivers that are needed to talk to the metastore. Other " +
550+ " classes that need to be shared are those that interact with classes that are already " +
551+ " shared. For example, custom appenders that are used by log4j." )
552+
553+ private def jdbcPrefixes = Seq (
554+ " com.mysql.jdbc" , " org.postgresql" , " com.microsoft.sqlserver" , " oracle.jdbc" )
555+
556+ val HIVE_METASTORE_BARRIER_PREFIXES = stringSeqConf(" spark.sql.hive.metastore.barrierPrefixes" ,
557+ defaultValue = Some (Seq ()),
558+ doc = " A comma separated list of class prefixes that should explicitly be reloaded for each " +
559+ " version of Hive that Spark SQL is communicating with. For example, Hive UDFs that are " +
560+ " declared in a prefix that typically would be shared (i.e. <code>org.apache.spark.*</code>)." )
561+
562+ val HIVE_THRIFT_SERVER_ASYNC = booleanConf(" spark.sql.hive.thriftServer.async" ,
563+ defaultValue = Some (true ),
564+ doc = " TODO" )
529565
530566 /** Constructs a configuration for hive, where the metastore is located in a temp directory. */
531567 def newTemporaryConfiguration (): Map [String , String ] = {
0 commit comments