Support test-hive1.2 and test-hive2.3 (default) in PR builder

HyukjinKwon · HyukjinKwon · commit 2135955c8e23 · 2019-11-29T16:19:39.000+09:00
diff --git a/appveyor.yml b/appveyor.yml
@@ -53,7 +53,7 @@ install:
 build_script:
   # '-Djna.nosys=true' is required to avoid kernel32.dll load failure.
   # See SPARK-28759.
-  - cmd: mvn -DskipTests -Psparkr -Phive -Phive-1.2 -Djna.nosys=true package
+  - cmd: mvn -DskipTests -Psparkr -Phive -Djna.nosys=true package
 
 environment:
   NOT_CRAN: true
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
@@ -182,6 +182,11 @@ def main():
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7"
     if "test-hadoop3.2" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2"
+    # Switch the Hive profile based on the PR title:
+    if "test-hive1.2" in ghprb_pull_title:
+        os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive1.2"
+    if "test-hive2.3" in ghprb_pull_title:
+        os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive2.3"
 
     build_display_name = os.environ["BUILD_DISPLAY_NAME"]
     build_url = os.environ["BUILD_URL"]
diff --git a/dev/run-tests.py b/dev/run-tests.py
@@ -179,7 +179,8 @@ def run_apache_rat_checks():
     run_cmd([os.path.join(SPARK_HOME, "dev", "check-license")])
 
 
-def run_scala_style_checks(build_profiles):
+def run_scala_style_checks(extra_profiles):
+    build_profiles = extra_profiles + modules.root.build_profile_flags
     set_title_and_block("Running Scala style checks", "BLOCK_SCALA_STYLE")
     profiles = " ".join(build_profiles)
     print("[info] Checking Scala style using SBT with these profiles: ", profiles)
@@ -288,20 +289,35 @@ def get_hadoop_profiles(hadoop_version):
     }
 
     if hadoop_version in sbt_maven_hadoop_profiles:
-        if ("ghprbPullTitle" in os.environ and
-                "test-hive1.2" in os.environ["ghprbPullTitle"].lower()):
-            return sbt_maven_hadoop_profiles[hadoop_version] + ["-Phive-1.2"]
-        else:
-            return sbt_maven_hadoop_profiles[hadoop_version]
+        return sbt_maven_hadoop_profiles[hadoop_version]
     else:
         print("[error] Could not find", hadoop_version, "in the list. Valid options",
               " are", sbt_maven_hadoop_profiles.keys())
         sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))
 
 
-def build_spark_maven(hadoop_version):
+def get_hive_profiles(hive_version):
+    """
+    For the given Hive version tag, return a list of Maven/SBT profile flags for
+    building and testing against that Hive version.
+    """
+
+    sbt_maven_hive_profiles = {
+        "hive1.2": ["-Phive-1.2"],
+        "hive2.3": ["-Phive-2.3"],
+    }
+
+    if hive_version in sbt_maven_hive_profiles:
+        return sbt_maven_hive_profiles[hive_version]
+    else:
+        print("[error] Could not find", hive_version, "in the list. Valid options",
+              " are", sbt_maven_hive_profiles.keys())
+        sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))
+
+
+def build_spark_maven(extra_profiles):
     # Enable all of the profiles for the build:
-    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+    build_profiles = extra_profiles + modules.root.build_profile_flags
     mvn_goals = ["clean", "package", "-DskipTests"]
     profiles_and_goals = build_profiles + mvn_goals
 
@@ -310,9 +326,9 @@ def build_spark_maven(hadoop_version):
     exec_maven(profiles_and_goals)
 
 
-def build_spark_sbt(hadoop_version):
+def build_spark_sbt(extra_profiles):
     # Enable all of the profiles for the build:
-    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+    build_profiles = extra_profiles + modules.root.build_profile_flags
     sbt_goals = ["test:package",  # Build test jars as some tests depend on them
                  "streaming-kinesis-asl-assembly/assembly"]
     profiles_and_goals = build_profiles + sbt_goals
@@ -322,10 +338,10 @@ def build_spark_sbt(hadoop_version):
     exec_sbt(profiles_and_goals)
 
 
-def build_spark_unidoc_sbt(hadoop_version):
+def build_spark_unidoc_sbt(extra_profiles):
     set_title_and_block("Building Unidoc API Documentation", "BLOCK_DOCUMENTATION")
     # Enable all of the profiles for the build:
-    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+    build_profiles = extra_profiles + modules.root.build_profile_flags
     sbt_goals = ["unidoc"]
     profiles_and_goals = build_profiles + sbt_goals
 
@@ -335,9 +351,9 @@ def build_spark_unidoc_sbt(hadoop_version):
     exec_sbt(profiles_and_goals)
 
 
-def build_spark_assembly_sbt(hadoop_version, checkstyle=False):
+def build_spark_assembly_sbt(extra_profiles, checkstyle=False):
     # Enable all of the profiles for the build:
-    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+    build_profiles = extra_profiles + modules.root.build_profile_flags
     sbt_goals = ["assembly/package"]
     profiles_and_goals = build_profiles + sbt_goals
     print("[info] Building Spark assembly using SBT with these arguments: ",
@@ -347,25 +363,25 @@ def build_spark_assembly_sbt(hadoop_version, checkstyle=False):
     if checkstyle:
         run_java_style_checks(build_profiles)
 
-    build_spark_unidoc_sbt(hadoop_version)
+    build_spark_unidoc_sbt(extra_profiles)
 
 
-def build_apache_spark(build_tool, hadoop_version):
-    """Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or
-    `maven`). Defaults to using `sbt`."""
+def build_apache_spark(build_tool, extra_profiles):
+    """Will build Spark with the extra profiles and the passed in build tool
+    (either `sbt` or `maven`). Defaults to using `sbt`."""
 
     set_title_and_block("Building Spark", "BLOCK_BUILD")
 
     rm_r("lib_managed")
 
     if build_tool == "maven":
-        build_spark_maven(hadoop_version)
+        build_spark_maven(extra_profiles)
     else:
-        build_spark_sbt(hadoop_version)
+        build_spark_sbt(extra_profiles)
 
 
-def detect_binary_inop_with_mima(hadoop_version):
-    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+def detect_binary_inop_with_mima(extra_profiles):
+    build_profiles = extra_profiles + modules.root.build_profile_flags
     set_title_and_block("Detecting binary incompatibilities with MiMa", "BLOCK_MIMA")
     profiles = " ".join(build_profiles)
     print("[info] Detecting binary incompatibilities with MiMa using SBT with these profiles: ",
@@ -399,14 +415,14 @@ def run_scala_tests_sbt(test_modules, test_profiles):
     exec_sbt(profiles_and_goals)
 
 
-def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags):
+def run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags):
     """Function to properly execute all tests passed in as a set from the
     `determine_test_suites` function"""
     set_title_and_block("Running Spark unit tests", "BLOCK_SPARK_UNIT_TESTS")
 
     test_modules = set(test_modules)
 
-    test_profiles = get_hadoop_profiles(hadoop_version) + \
+    test_profiles = extra_profiles + \
         list(set(itertools.chain.from_iterable(m.build_profile_flags for m in test_modules)))
 
     if excluded_tags:
@@ -559,6 +575,7 @@ def main():
         # to reflect the environment settings
         build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt")
         hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.7")
+        hive_version = os.environ.get("AMPLAB_JENKINS_BUILD_HIVE_PROFILE", "hive2.3")
         test_env = "amplab_jenkins"
         # add path for Python3 in Jenkins if we're calling from a Jenkins machine
         # TODO(sknapp):  after all builds are ported to the ubuntu workers, change this to be:
@@ -568,10 +585,12 @@ def main():
         # else we're running locally and can use local settings
         build_tool = "sbt"
         hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7")
+        hive_version = os.environ.get("HIVE_PROFILE", "hive2.3")
         test_env = "local"
 
     print("[info] Using build tool", build_tool, "with Hadoop profile", hadoop_version,
-          "under environment", test_env)
+          "and Hive profile", hive_version, "under environment", test_env)
+    extra_profiles = get_hadoop_profiles(hadoop_version) + get_hive_profiles(hive_version)
 
     changed_modules = None
     changed_files = None
@@ -605,8 +624,7 @@ def main():
     if not changed_files or any(f.endswith(".scala")
                                 or f.endswith("scalastyle-config.xml")
                                 for f in changed_files):
-        build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
-        run_scala_style_checks(build_profiles)
+        run_scala_style_checks(extra_profiles)
     should_run_java_style_checks = False
     if not changed_files or any(f.endswith(".java")
                                 or f.endswith("checkstyle.xml")
@@ -634,18 +652,18 @@ def main():
         run_build_tests()
 
     # spark build
-    build_apache_spark(build_tool, hadoop_version)
+    build_apache_spark(build_tool, extra_profiles)
 
     # backwards compatibility checks
     if build_tool == "sbt":
         # Note: compatibility tests only supported in sbt for now
-        detect_binary_inop_with_mima(hadoop_version)
+        detect_binary_inop_with_mima(extra_profiles)
         # Since we did not build assembly/package before running dev/mima, we need to
         # do it here because the tests still rely on it; see SPARK-13294 for details.
-        build_spark_assembly_sbt(hadoop_version, should_run_java_style_checks)
+        build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
 
     # run the test suites
-    run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags)
+    run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags)
 
     modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
     if modules_with_python_tests:
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
@@ -476,7 +476,8 @@ object SparkParallelTestGrouping {
     "org.apache.spark.ml.classification.LinearSVCSuite",
     "org.apache.spark.sql.SQLQueryTestSuite",
     "org.apache.spark.sql.hive.thriftserver.ThriftServerQueryTestSuite",
-    "org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite"
+    "org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite",
+    "org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite"
   )
 
   private val DEFAULT_TEST_GROUP = "default_test_group"

Original file line number	Diff line number	Diff line change
`@@ -476,7 +476,8 @@ object SparkParallelTestGrouping {`
`476`	`476`	`"org.apache.spark.ml.classification.LinearSVCSuite",`
`477`	`477`	`"org.apache.spark.sql.SQLQueryTestSuite",`
`478`	`478`	`"org.apache.spark.sql.hive.thriftserver.ThriftServerQueryTestSuite",`
`479`		`- "org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite"`
	`479`	`+ "org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite",`
	`480`	`+ "org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite"`
`480`	`481`	`)`
`481`	`482`
`482`	`483`	`private val DEFAULT_TEST_GROUP = "default_test_group"`