Skip to content

Commit f3ddd6f

Browse files
wangyumgatorsmile
authored andcommitted
[SPARK-27402][SQL][TEST-HADOOP3.2][TEST-MAVEN] Fix hadoop-3.2 test issue(except the hive-thriftserver module)
## What changes were proposed in this pull request? This pr fix hadoop-3.2 test issues(except the `hive-thriftserver` module): 1. Add `hive.metastore.schema.verification` and `datanucleus.schema.autoCreateAll` to HiveConf. 2. hadoop-3.2 support access the Hive metastore from 0.12 to 2.2 After [SPARK-27176](https://issues.apache.org/jira/browse/SPARK-27176) and this PR, we upgraded the built-in Hive to 2.3 when enabling the Hadoop 3.2+ profile. This upgrade fixes the following issues: - [HIVE-6727](https://issues.apache.org/jira/browse/HIVE-6727): Table level stats for external tables are set incorrectly. - [HIVE-15653](https://issues.apache.org/jira/browse/HIVE-15653): Some ALTER TABLE commands drop table stats. - [SPARK-12014](https://issues.apache.org/jira/browse/SPARK-12014): Spark SQL query containing semicolon is broken in Beeline. - [SPARK-25193](https://issues.apache.org/jira/browse/SPARK-25193): insert overwrite doesn't throw exception when drop old data fails. - [SPARK-25919](https://issues.apache.org/jira/browse/SPARK-25919): Date value corrupts when tables are "ParquetHiveSerDe" formatted and target table is Partitioned. - [SPARK-26332](https://issues.apache.org/jira/browse/SPARK-26332): Spark sql write orc table on viewFS throws exception. - [SPARK-26437](https://issues.apache.org/jira/browse/SPARK-26437): Decimal data becomes bigint to query, unable to query. ## How was this patch tested? This pr test Spark’s Hadoop 3.2 profile on jenkins and #24591 test Spark’s Hadoop 2.7 profile on jenkins This PR close #24591 Closes #24391 from wangyum/SPARK-27402. Authored-by: Yuming Wang <[email protected]> Signed-off-by: gatorsmile <[email protected]>
1 parent d169b0a commit f3ddd6f

File tree

15 files changed

+154
-42
lines changed

15 files changed

+154
-42
lines changed

dev/sparktestsupport/modules.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,17 @@
1515
# limitations under the License.
1616
#
1717

18+
from __future__ import print_function
1819
from functools import total_ordering
1920
import itertools
2021
import re
22+
import os
23+
24+
if os.environ.get("AMPLAB_JENKINS"):
25+
hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.7")
26+
else:
27+
hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7")
28+
print("[info] Choosing supported modules with Hadoop profile", hadoop_version)
2129

2230
all_modules = []
2331

@@ -72,7 +80,11 @@ def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=
7280
self.dependent_modules = set()
7381
for dep in dependencies:
7482
dep.dependent_modules.add(self)
75-
all_modules.append(self)
83+
# TODO: Skip hive-thriftserver module for hadoop-3.2. remove this once hadoop-3.2 support it
84+
if name == "hive-thriftserver" and hadoop_version == "hadoop3.2":
85+
print("[info] Skip unsupported module:", name)
86+
else:
87+
all_modules.append(self)
7688

7789
def contains_file(self, filename):
7890
return any(re.match(p, filename) for p in self.source_file_prefixes)

sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1371,7 +1371,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
13711371
// if (isUsingHiveMetastore) {
13721372
// assert(storageFormat.properties.get("path") === expected)
13731373
// }
1374-
assert(storageFormat.locationUri === Some(expected))
1374+
assert(storageFormat.locationUri.map(_.getPath) === Some(expected.getPath))
13751375
}
13761376
// set table location
13771377
sql("ALTER TABLE dbx.tab1 SET LOCATION '/path/to/your/lovely/heart'")

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
118118
}
119119
}
120120

121-
protected def testSelectiveDictionaryEncoding(isSelective: Boolean) {
121+
protected def testSelectiveDictionaryEncoding(isSelective: Boolean, isHive23: Boolean = false) {
122122
val tableName = "orcTable"
123123

124124
withTempDir { dir =>
@@ -171,7 +171,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
171171
// Hive 0.11 and RLE v2 is introduced in Hive 0.12 ORC with more improvements.
172172
// For more details, see https://orc.apache.org/specification/
173173
assert(stripe.getColumns(1).getKind === DICTIONARY_V2)
174-
if (isSelective) {
174+
if (isSelective || isHive23) {
175175
assert(stripe.getColumns(2).getKind === DIRECT_V2)
176176
} else {
177177
assert(stripe.getColumns(2).getKind === DICTIONARY_V2)

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
5252
import org.apache.spark.sql.execution.QueryExecutionException
5353
import org.apache.spark.sql.execution.command.DDLUtils
5454
import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX}
55+
import org.apache.spark.sql.hive.HiveUtils
5556
import org.apache.spark.sql.hive.client.HiveClientImpl._
5657
import org.apache.spark.sql.types._
5758
import org.apache.spark.util.{CircularBuffer, Utils}
@@ -191,7 +192,29 @@ private[hive] class HiveClientImpl(
191192
}
192193

193194
/** Returns the configuration for the current session. */
194-
def conf: HiveConf = state.getConf
195+
def conf: HiveConf = if (!HiveUtils.isHive23) {
196+
state.getConf
197+
} else {
198+
val hiveConf = state.getConf
199+
// Hive changed the default of datanucleus.schema.autoCreateAll from true to false
200+
// and hive.metastore.schema.verification from false to true since Hive 2.0.
201+
// For details, see the JIRA HIVE-6113, HIVE-12463 and HIVE-1841.
202+
// isEmbeddedMetaStore should not be true in the production environment.
203+
// We hard-code hive.metastore.schema.verification and datanucleus.schema.autoCreateAll to allow
204+
// bin/spark-shell, bin/spark-sql and sbin/start-thriftserver.sh to automatically create the
205+
// Derby Metastore when running Spark in the non-production environment.
206+
val isEmbeddedMetaStore = {
207+
val msUri = hiveConf.getVar(ConfVars.METASTOREURIS)
208+
val msConnUrl = hiveConf.getVar(ConfVars.METASTORECONNECTURLKEY)
209+
(msUri == null || msUri.trim().isEmpty) &&
210+
(msConnUrl != null && msConnUrl.startsWith("jdbc:derby"))
211+
}
212+
if (isEmbeddedMetaStore) {
213+
hiveConf.setBoolean("hive.metastore.schema.verification", false)
214+
hiveConf.setBoolean("datanucleus.schema.autoCreateAll", true)
215+
}
216+
hiveConf
217+
}
195218

196219
private val userName = conf.getUser
197220

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import org.apache.commons.io.{FileUtils, IOUtils}
2828
import org.apache.commons.lang3.{JavaVersion, SystemUtils}
2929
import org.apache.hadoop.conf.Configuration
3030
import org.apache.hadoop.hive.conf.HiveConf.ConfVars
31+
import org.apache.hadoop.hive.shims.ShimLoader
3132

3233
import org.apache.spark.SparkConf
3334
import org.apache.spark.deploy.SparkSubmitUtils
@@ -196,6 +197,7 @@ private[hive] class IsolatedClientLoader(
196197
protected def isBarrierClass(name: String): Boolean =
197198
name.startsWith(classOf[HiveClientImpl].getName) ||
198199
name.startsWith(classOf[Shim].getName) ||
200+
name.startsWith(classOf[ShimLoader].getName) ||
199201
barrierPrefixes.exists(name.startsWith)
200202

201203
protected def classToPath(name: String): String =

sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ object TestHive
6363
// SPARK-8910
6464
.set(UI_ENABLED, false)
6565
.set(config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK, true)
66+
// Hive changed the default of hive.metastore.disallow.incompatible.col.type.changes
67+
// from false to true. For details, see the JIRA HIVE-12320 and HIVE-17764.
68+
.set("spark.hadoop.hive.metastore.disallow.incompatible.col.type.changes", "false")
6669
// Disable ConvertToLocalRelation for better test coverage. Test cases built on
6770
// LocalRelation will exercise the optimization rules better by disabling it as
6871
// this rule may potentially block testing of other optimization rules such as
@@ -120,8 +123,10 @@ class TestHiveContext(
120123
@transient override val sparkSession: TestHiveSparkSession)
121124
extends SQLContext(sparkSession) {
122125

123-
val HIVE_CONTRIB_JAR: String = "hive-contrib-0.13.1.jar"
124-
val HIVE_HCATALOG_CORE_JAR: String = "hive-hcatalog-core-0.13.1.jar"
126+
val HIVE_CONTRIB_JAR: String =
127+
if (HiveUtils.isHive23) "hive-contrib-2.3.4.jar" else "hive-contrib-0.13.1.jar"
128+
val HIVE_HCATALOG_CORE_JAR: String =
129+
if (HiveUtils.isHive23) "hive-hcatalog-core-2.3.4.jar" else "hive-hcatalog-core-0.13.1.jar"
125130

126131
/**
127132
* If loadTestTables is false, no test tables are loaded. Note that this flag can only be true
123 KB
Binary file not shown.
258 KB
Binary file not shown.

sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,19 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
5858
}
5959

6060
test("shaded Protobuf") {
61-
assertLoads("org.apache.hive.com.google.protobuf.ServiceException")
61+
if (HiveUtils.isHive23) {
62+
assertLoads("com.google.protobuf.ServiceException")
63+
} else {
64+
assertLoads("org.apache.hive.com.google.protobuf.ServiceException")
65+
}
6266
}
6367

6468
test("shaded Kryo") {
65-
assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo")
69+
if (HiveUtils.isHive23) {
70+
assertLoads("com.esotericsoftware.kryo.Kryo")
71+
} else {
72+
assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo")
73+
}
6674
}
6775

6876
test("hive-common") {
@@ -81,7 +89,12 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
8189
}
8290

8391
test("parquet-hadoop-bundle") {
84-
assertLoads("parquet.hadoop.ParquetOutputFormat")
85-
assertLoads("parquet.hadoop.ParquetInputFormat")
92+
if (HiveUtils.isHive23) {
93+
assertLoads("org.apache.parquet.hadoop.ParquetOutputFormat")
94+
assertLoads("org.apache.parquet.hadoop.ParquetInputFormat")
95+
} else {
96+
assertLoads("parquet.hadoop.ParquetOutputFormat")
97+
assertLoads("parquet.hadoop.ParquetInputFormat")
98+
}
8699
}
87100
}

sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
186186
"--master", "local[2]",
187187
"--conf", "spark.ui.enabled=false",
188188
"--conf", "spark.master.rest.enabled=false",
189+
"--conf", "spark.sql.hive.metastore.version=1.2.1",
190+
"--conf", "spark.sql.hive.metastore.jars=maven",
189191
"--conf", s"spark.sql.warehouse.dir=${wareHousePath.getCanonicalPath}",
190192
"--conf", s"spark.sql.test.version.index=$index",
191193
"--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}",
@@ -203,6 +205,8 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
203205
"--master", "local[2]",
204206
"--conf", "spark.ui.enabled=false",
205207
"--conf", "spark.master.rest.enabled=false",
208+
"--conf", "spark.sql.hive.metastore.version=1.2.1",
209+
"--conf", "spark.sql.hive.metastore.jars=maven",
206210
"--conf", s"spark.sql.warehouse.dir=${wareHousePath.getCanonicalPath}",
207211
"--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}",
208212
unusedJar.toString)

0 commit comments

Comments
 (0)