From 158af667188299d4e7b70ae59286312396cec716 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 29 Nov 2019 15:23:32 +0900 Subject: [PATCH 1/2] Revert "Revert "[SPARK-29991][INFRA] Support `test-hive1.2` in PR Builder"" This reverts commit 9351e3e76fb11e9fdaf39aef5aea86fdeccd6f28. --- dev/run-tests.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 2d52ead06a041..13a2960049a54 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -283,12 +283,16 @@ def get_hadoop_profiles(hadoop_version): """ sbt_maven_hadoop_profiles = { - "hadoop2.7": ["-Phadoop-2.7", "-Phive-1.2"], - "hadoop3.2": ["-Phadoop-3.2", "-Phive-2.3"], + "hadoop2.7": ["-Phadoop-2.7"], + "hadoop3.2": ["-Phadoop-3.2"], } if hadoop_version in sbt_maven_hadoop_profiles: - return sbt_maven_hadoop_profiles[hadoop_version] + if ("ghprbPullTitle" in os.environ and + "test-hive1.2" in os.environ["ghprbPullTitle"].lower()): + return sbt_maven_hadoop_profiles[hadoop_version] + ["-Phive-1.2"] + else: + return sbt_maven_hadoop_profiles[hadoop_version] else: print("[error] Could not find", hadoop_version, "in the list. Valid options", " are", sbt_maven_hadoop_profiles.keys()) From 2135955c8e2362ae48ff443df249e55f801e0c63 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Fri, 29 Nov 2019 16:19:39 +0900 Subject: [PATCH 2/2] Support `test-hive1.2` and `test-hive2.3` (default) in PR builder --- appveyor.yml | 2 +- dev/run-tests-jenkins.py | 5 +++ dev/run-tests.py | 80 ++++++++++++++++++++++++---------------- project/SparkBuild.scala | 3 +- 4 files changed, 57 insertions(+), 33 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 325fd67abc674..00c688ba18eb6 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -53,7 +53,7 @@ install: build_script: # '-Djna.nosys=true' is required to avoid kernel32.dll load failure. # See SPARK-28759. - - cmd: mvn -DskipTests -Psparkr -Phive -Phive-1.2 -Djna.nosys=true package + - cmd: mvn -DskipTests -Psparkr -Phive -Djna.nosys=true package environment: NOT_CRAN: true diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index 5429aeba8ea13..18aabc3f63abe 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -182,6 +182,11 @@ def main(): os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7" if "test-hadoop3.2" in ghprb_pull_title: os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2" + # Switch the Hive profile based on the PR title: + if "test-hive1.2" in ghprb_pull_title: + os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive1.2" + if "test-hive2.3" in ghprb_pull_title: + os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive2.3" build_display_name = os.environ["BUILD_DISPLAY_NAME"] build_url = os.environ["BUILD_URL"] diff --git a/dev/run-tests.py b/dev/run-tests.py index 13a2960049a54..5255a77ec2081 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -179,7 +179,8 @@ def run_apache_rat_checks(): run_cmd([os.path.join(SPARK_HOME, "dev", "check-license")]) -def run_scala_style_checks(build_profiles): +def run_scala_style_checks(extra_profiles): + build_profiles = extra_profiles + modules.root.build_profile_flags set_title_and_block("Running Scala style checks", "BLOCK_SCALA_STYLE") profiles = " ".join(build_profiles) print("[info] Checking Scala style using SBT with these profiles: ", profiles) @@ -288,20 +289,35 @@ def get_hadoop_profiles(hadoop_version): } if hadoop_version in sbt_maven_hadoop_profiles: - if ("ghprbPullTitle" in os.environ and - "test-hive1.2" in os.environ["ghprbPullTitle"].lower()): - return sbt_maven_hadoop_profiles[hadoop_version] + ["-Phive-1.2"] - else: - return sbt_maven_hadoop_profiles[hadoop_version] + return sbt_maven_hadoop_profiles[hadoop_version] else: print("[error] Could not find", hadoop_version, "in the list. Valid options", " are", sbt_maven_hadoop_profiles.keys()) sys.exit(int(os.environ.get("CURRENT_BLOCK", 255))) -def build_spark_maven(hadoop_version): +def get_hive_profiles(hive_version): + """ + For the given Hive version tag, return a list of Maven/SBT profile flags for + building and testing against that Hive version. + """ + + sbt_maven_hive_profiles = { + "hive1.2": ["-Phive-1.2"], + "hive2.3": ["-Phive-2.3"], + } + + if hive_version in sbt_maven_hive_profiles: + return sbt_maven_hive_profiles[hive_version] + else: + print("[error] Could not find", hive_version, "in the list. Valid options", + " are", sbt_maven_hive_profiles.keys()) + sys.exit(int(os.environ.get("CURRENT_BLOCK", 255))) + + +def build_spark_maven(extra_profiles): # Enable all of the profiles for the build: - build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags + build_profiles = extra_profiles + modules.root.build_profile_flags mvn_goals = ["clean", "package", "-DskipTests"] profiles_and_goals = build_profiles + mvn_goals @@ -310,9 +326,9 @@ def build_spark_maven(hadoop_version): exec_maven(profiles_and_goals) -def build_spark_sbt(hadoop_version): +def build_spark_sbt(extra_profiles): # Enable all of the profiles for the build: - build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags + build_profiles = extra_profiles + modules.root.build_profile_flags sbt_goals = ["test:package", # Build test jars as some tests depend on them "streaming-kinesis-asl-assembly/assembly"] profiles_and_goals = build_profiles + sbt_goals @@ -322,10 +338,10 @@ def build_spark_sbt(hadoop_version): exec_sbt(profiles_and_goals) -def build_spark_unidoc_sbt(hadoop_version): +def build_spark_unidoc_sbt(extra_profiles): set_title_and_block("Building Unidoc API Documentation", "BLOCK_DOCUMENTATION") # Enable all of the profiles for the build: - build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags + build_profiles = extra_profiles + modules.root.build_profile_flags sbt_goals = ["unidoc"] profiles_and_goals = build_profiles + sbt_goals @@ -335,9 +351,9 @@ def build_spark_unidoc_sbt(hadoop_version): exec_sbt(profiles_and_goals) -def build_spark_assembly_sbt(hadoop_version, checkstyle=False): +def build_spark_assembly_sbt(extra_profiles, checkstyle=False): # Enable all of the profiles for the build: - build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags + build_profiles = extra_profiles + modules.root.build_profile_flags sbt_goals = ["assembly/package"] profiles_and_goals = build_profiles + sbt_goals print("[info] Building Spark assembly using SBT with these arguments: ", @@ -347,25 +363,25 @@ def build_spark_assembly_sbt(hadoop_version, checkstyle=False): if checkstyle: run_java_style_checks(build_profiles) - build_spark_unidoc_sbt(hadoop_version) + build_spark_unidoc_sbt(extra_profiles) -def build_apache_spark(build_tool, hadoop_version): - """Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or - `maven`). Defaults to using `sbt`.""" +def build_apache_spark(build_tool, extra_profiles): + """Will build Spark with the extra profiles and the passed in build tool + (either `sbt` or `maven`). Defaults to using `sbt`.""" set_title_and_block("Building Spark", "BLOCK_BUILD") rm_r("lib_managed") if build_tool == "maven": - build_spark_maven(hadoop_version) + build_spark_maven(extra_profiles) else: - build_spark_sbt(hadoop_version) + build_spark_sbt(extra_profiles) -def detect_binary_inop_with_mima(hadoop_version): - build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags +def detect_binary_inop_with_mima(extra_profiles): + build_profiles = extra_profiles + modules.root.build_profile_flags set_title_and_block("Detecting binary incompatibilities with MiMa", "BLOCK_MIMA") profiles = " ".join(build_profiles) print("[info] Detecting binary incompatibilities with MiMa using SBT with these profiles: ", @@ -399,14 +415,14 @@ def run_scala_tests_sbt(test_modules, test_profiles): exec_sbt(profiles_and_goals) -def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags): +def run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags): """Function to properly execute all tests passed in as a set from the `determine_test_suites` function""" set_title_and_block("Running Spark unit tests", "BLOCK_SPARK_UNIT_TESTS") test_modules = set(test_modules) - test_profiles = get_hadoop_profiles(hadoop_version) + \ + test_profiles = extra_profiles + \ list(set(itertools.chain.from_iterable(m.build_profile_flags for m in test_modules))) if excluded_tags: @@ -559,6 +575,7 @@ def main(): # to reflect the environment settings build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt") hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.7") + hive_version = os.environ.get("AMPLAB_JENKINS_BUILD_HIVE_PROFILE", "hive2.3") test_env = "amplab_jenkins" # add path for Python3 in Jenkins if we're calling from a Jenkins machine # TODO(sknapp): after all builds are ported to the ubuntu workers, change this to be: @@ -568,10 +585,12 @@ def main(): # else we're running locally and can use local settings build_tool = "sbt" hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7") + hive_version = os.environ.get("HIVE_PROFILE", "hive2.3") test_env = "local" print("[info] Using build tool", build_tool, "with Hadoop profile", hadoop_version, - "under environment", test_env) + "and Hive profile", hive_version, "under environment", test_env) + extra_profiles = get_hadoop_profiles(hadoop_version) + get_hive_profiles(hive_version) changed_modules = None changed_files = None @@ -605,8 +624,7 @@ def main(): if not changed_files or any(f.endswith(".scala") or f.endswith("scalastyle-config.xml") for f in changed_files): - build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags - run_scala_style_checks(build_profiles) + run_scala_style_checks(extra_profiles) should_run_java_style_checks = False if not changed_files or any(f.endswith(".java") or f.endswith("checkstyle.xml") @@ -634,18 +652,18 @@ def main(): run_build_tests() # spark build - build_apache_spark(build_tool, hadoop_version) + build_apache_spark(build_tool, extra_profiles) # backwards compatibility checks if build_tool == "sbt": # Note: compatibility tests only supported in sbt for now - detect_binary_inop_with_mima(hadoop_version) + detect_binary_inop_with_mima(extra_profiles) # Since we did not build assembly/package before running dev/mima, we need to # do it here because the tests still rely on it; see SPARK-13294 for details. - build_spark_assembly_sbt(hadoop_version, should_run_java_style_checks) + build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks) # run the test suites - run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) + run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 8dda5809fa374..5a9cf11f94f11 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -476,7 +476,8 @@ object SparkParallelTestGrouping { "org.apache.spark.ml.classification.LinearSVCSuite", "org.apache.spark.sql.SQLQueryTestSuite", "org.apache.spark.sql.hive.thriftserver.ThriftServerQueryTestSuite", - "org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite" + "org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite", + "org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite" ) private val DEFAULT_TEST_GROUP = "default_test_group"