From a26122a2110f625eadec2385367779d7b6e4c3b1 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 3 Aug 2016 21:06:28 -0700 Subject: [PATCH] remove hash --- .../execution/HiveCompatibilitySuite.scala | 80 +++++++++---------- .../spark/sql/hive/HiveSessionCatalog.scala | 1 - .../apache/spark/sql/hive/test/TestHive.scala | 28 ------- 3 files changed, 36 insertions(+), 73 deletions(-) diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 13d18fdec0e9d..a1f254916ff1e 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -57,8 +57,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, 5) // Enable in-memory partition pruning for testing purposes TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true) - // Use Hive hash expression instead of the native one - TestHive.sessionState.functionRegistry.unregisterFunction("hash") // Ensures that the plans generation use metastore relation and not OrcRelation // Was done because SqlBuilder does not work with plans having logical relation TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, false) @@ -76,7 +74,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning) TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, originalConvertMetastoreOrc) TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled) - TestHive.sessionState.functionRegistry.restore() // For debugging dump some statistics about how much time was spent in various optimizer rules logWarning(RuleExecutor.dumpTimeSpent()) @@ -553,7 +550,42 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "union31", "union_date", "varchar_2", - "varchar_join1" + "varchar_join1", + + // These tests are based on the Hive's hash function, which is different from Spark + "auto_join_nulls", + "auto_join0", + "auto_join1", + "auto_join2", + "auto_join3", + "auto_join4", + "auto_join5", + "auto_join6", + "auto_join7", + "auto_join8", + "auto_join9", + "auto_join10", + "auto_join11", + "auto_join12", + "auto_join13", + "auto_join14", + "auto_join14_hadoop20", + "auto_join15", + "auto_join17", + "auto_join18", + "auto_join19", + "auto_join20", + "auto_join22", + "auto_join25", + "auto_join30", + "auto_join31", + "correlationoptimizer1", + "correlationoptimizer2", + "correlationoptimizer3", + "correlationoptimizer4", + "multiMapJoin1", + "orc_dictionary_threshold", + "udf_hash" ) /** @@ -573,39 +605,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "annotate_stats_part", "annotate_stats_table", "annotate_stats_union", - "auto_join0", - "auto_join1", - "auto_join10", - "auto_join11", - "auto_join12", - "auto_join13", - "auto_join14", - "auto_join14_hadoop20", - "auto_join15", - "auto_join17", - "auto_join18", - "auto_join19", - "auto_join2", - "auto_join20", - "auto_join21", - "auto_join22", - "auto_join23", - "auto_join24", - "auto_join25", - "auto_join26", - "auto_join27", - "auto_join28", - "auto_join3", - "auto_join30", - "auto_join31", - "auto_join4", - "auto_join5", - "auto_join6", - "auto_join7", - "auto_join8", - "auto_join9", - "auto_join_nulls", - "auto_join_reordering_values", "binary_constant", "binarysortable_1", "cast1", @@ -618,15 +617,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "compute_stats_long", "compute_stats_string", "convert_enum_to_string", - "correlationoptimizer1", "correlationoptimizer10", "correlationoptimizer11", "correlationoptimizer13", "correlationoptimizer14", "correlationoptimizer15", - "correlationoptimizer2", - "correlationoptimizer3", - "correlationoptimizer4", "correlationoptimizer6", "correlationoptimizer7", "correlationoptimizer8", @@ -867,7 +862,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "merge2", "merge4", "mergejoins", - "multiMapJoin1", "multiMapJoin2", "multi_insert_gby", "multi_insert_gby3", @@ -889,7 +883,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "nullinput2", "nullscript", "optional_outer", - "orc_dictionary_threshold", "order", "order2", "outer_join_ppr", @@ -1022,7 +1015,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "udf_from_unixtime", "udf_greaterthan", "udf_greaterthanorequal", - "udf_hash", "udf_hex", "udf_if", "udf_index", diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala index c59ac3dcafea4..df836cb55f003 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala @@ -235,7 +235,6 @@ private[sql] class HiveSessionCatalog( // parse_url_tuple, posexplode, reflect2, // str_to_map, windowingtablefunction. private val hiveFunctions = Seq( - "hash", "histogram_numeric", "percentile", "percentile_approx" diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index cdc8d610d378f..21a2c1f2d81e6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -487,24 +487,6 @@ private[hive] class TestHiveQueryExecution( } } - -private[hive] class TestHiveFunctionRegistry extends SimpleFunctionRegistry { - - private val removedFunctions = - collection.mutable.ArrayBuffer.empty[(String, (ExpressionInfo, FunctionBuilder))] - - def unregisterFunction(name: String): Unit = synchronized { - functionBuilders.remove(name).foreach(f => removedFunctions += name -> f) - } - - def restore(): Unit = synchronized { - removedFunctions.foreach { - case (name, (info, builder)) => registerFunction(name, info, builder) - } - } -} - - private[hive] class TestHiveSessionState( sparkSession: TestHiveSparkSession) extends HiveSessionState(sparkSession) { self => @@ -520,16 +502,6 @@ private[hive] class TestHiveSessionState( } } - override lazy val functionRegistry: TestHiveFunctionRegistry = { - // We use TestHiveFunctionRegistry at here to track functions that have been explicitly - // unregistered (through TestHiveFunctionRegistry.unregisterFunction method). - val fr = new TestHiveFunctionRegistry - org.apache.spark.sql.catalyst.analysis.FunctionRegistry.expressions.foreach { - case (name, (info, builder)) => fr.registerFunction(name, info, builder) - } - fr - } - override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = { new TestHiveQueryExecution(sparkSession, plan) }