From f793e604acaf2f4e1635ff7e78583e4394a52264 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Thu, 5 Dec 2019 18:31:04 +0900 Subject: [PATCH 01/17] HADOOP-16747. Support Python 3 in dev-support scripts. --- dev-support/bin/checkcompatibility.py | 504 ++++++++++---------- dev-support/determine-flaky-tests-hadoop.py | 320 ++++++------- 2 files changed, 402 insertions(+), 422 deletions(-) diff --git a/dev-support/bin/checkcompatibility.py b/dev-support/bin/checkcompatibility.py index ad1e9cbe47ff2..a98cf7634b0ca 100755 --- a/dev-support/bin/checkcompatibility.py +++ b/dev-support/bin/checkcompatibility.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -30,316 +30,308 @@ import shutil import subprocess import sys -import urllib2 -try: - import argparse -except ImportError: - sys.stderr.write("Please install argparse, e.g. via `pip install argparse`.") - sys.exit(2) +import urllib.request +import argparse # Various relative paths REPO_DIR = os.getcwd() + def check_output(*popenargs, **kwargs): - r"""Run command with arguments and return its output as a byte string. - Backported from Python 2.7 as it's implemented as pure python on stdlib. - >>> check_output(['/usr/bin/python', '--version']) - Python 2.6.2 - """ - process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) - output, _ = process.communicate() - retcode = process.poll() - if retcode: - cmd = kwargs.get("args") - if cmd is None: - cmd = popenargs[0] - error = subprocess.CalledProcessError(retcode, cmd) - error.output = output - raise error - return output + """ Run command with arguments and return its output as a string. """ + return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8') + def get_repo_dir(): - """ Return the path to the top of the repo. """ - dirname, _ = os.path.split(os.path.abspath(__file__)) - return os.path.join(dirname, "../..") + """ Return the path to the top of the repo. """ + dirname, _ = os.path.split(os.path.abspath(__file__)) + return os.path.join(dirname, "../..") + def get_scratch_dir(): - """ Return the path to the scratch dir that we build within. """ - scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check") - if not os.path.exists(scratch_dir): - os.makedirs(scratch_dir) - return scratch_dir + """ Return the path to the scratch dir that we build within. """ + scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check") + if not os.path.exists(scratch_dir): + os.makedirs(scratch_dir) + return scratch_dir + def get_java_acc_dir(): - """ Return the path where we check out the Java API Compliance Checker. """ - return os.path.join(get_repo_dir(), "target", "java-acc") + """ Return the path where we check out the Java API Compliance Checker. """ + return os.path.join(get_repo_dir(), "target", "java-acc") def clean_scratch_dir(scratch_dir): - """ Clean up and re-create the scratch directory. """ - if os.path.exists(scratch_dir): - logging.info("Removing scratch dir %s...", scratch_dir) - shutil.rmtree(scratch_dir) - logging.info("Creating empty scratch dir %s...", scratch_dir) - os.makedirs(scratch_dir) + """ Clean up and re-create the scratch directory. """ + if os.path.exists(scratch_dir): + logging.info("Removing scratch dir %s...", scratch_dir) + shutil.rmtree(scratch_dir) + logging.info("Creating empty scratch dir %s...", scratch_dir) + os.makedirs(scratch_dir) def checkout_java_tree(rev, path): - """ Check out the Java source tree for the given revision into + """ Check out the Java source tree for the given revision into the given path. """ - logging.info("Checking out %s in %s", rev, path) - os.makedirs(path) - # Extract java source - subprocess.check_call(["bash", '-o', 'pipefail', "-c", - ("git archive --format=tar %s | " + - "tar -C \"%s\" -xf -") % (rev, path)], - cwd=get_repo_dir()) + logging.info("Checking out %s in %s", rev, path) + os.makedirs(path) + # Extract java source + subprocess.check_call(["bash", '-o', 'pipefail', "-c", + ("git archive --format=tar %s | " + + "tar -C \"%s\" -xf -") % (rev, path)], + cwd=get_repo_dir()) + def get_git_hash(revname): - """ Convert 'revname' to its SHA-1 hash. """ - return check_output(["git", "rev-parse", revname], - cwd=get_repo_dir()).strip() + """ Convert 'revname' to its SHA-1 hash. """ + return check_output(["git", "rev-parse", revname], + cwd=get_repo_dir()).strip() + def get_repo_name(): - """Get the name of the repo based on the git remote.""" - remotes = check_output(["git", "remote", "-v"], - cwd=get_repo_dir()).strip().split("\n") - # Example output: - # origin https://github.com/apache/hadoop.git (fetch) - # origin https://github.com/apache/hadoop.git (push) - remote_url = remotes[0].split("\t")[1].split(" ")[0] - remote = remote_url.split("/")[-1] - if remote.endswith(".git"): - remote = remote[:-4] - return remote + """Get the name of the repo based on the git remote.""" + remotes = check_output(["git", "remote", "-v"], + cwd=get_repo_dir()).strip().split("\n") + # Example output: + # origin https://github.com/apache/hadoop.git (fetch) + # origin https://github.com/apache/hadoop.git (push) + remote_url = remotes[0].split("\t")[1].split(" ")[0] + remote = remote_url.split("/")[-1] + if remote.endswith(".git"): + remote = remote[:-4] + return remote + def build_tree(java_path): - """ Run the Java build within 'path'. """ - logging.info("Building in %s...", java_path) - subprocess.check_call(["mvn", "-DskipTests", "-Dmaven.javadoc.skip=true", - "package"], - cwd=java_path) + """ Run the Java build within 'path'. """ + logging.info("Building in %s...", java_path) + subprocess.check_call(["mvn", "-DskipTests", "-Dmaven.javadoc.skip=true", + "package"], + cwd=java_path) def checkout_java_acc(force): - """ + """ Check out the Java API Compliance Checker. If 'force' is true, will re-download even if the directory exists. """ - acc_dir = get_java_acc_dir() - if os.path.exists(acc_dir): - logging.info("Java ACC is already downloaded.") - if not force: - return - logging.info("Forcing re-download.") - shutil.rmtree(acc_dir) + acc_dir = get_java_acc_dir() + if os.path.exists(acc_dir): + logging.info("Java ACC is already downloaded.") + if not force: + return + logging.info("Forcing re-download.") + shutil.rmtree(acc_dir) - logging.info("Downloading Java ACC...") + logging.info("Downloading Java ACC...") - url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz" - scratch_dir = get_scratch_dir() - path = os.path.join(scratch_dir, os.path.basename(url)) - jacc = urllib2.urlopen(url) - with open(path, 'wb') as w: - w.write(jacc.read()) + url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz" + scratch_dir = get_scratch_dir() + path = os.path.join(scratch_dir, os.path.basename(url)) + jacc = urllib.request.urlopen(url) + with open(path, 'wb') as w: + w.write(jacc.read()) - subprocess.check_call(["tar", "xzf", path], - cwd=scratch_dir) + subprocess.check_call(["tar", "xzf", path], + cwd=scratch_dir) - shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-1.8"), - os.path.join(acc_dir)) + shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-1.8"), + os.path.join(acc_dir)) def find_jars(path): - """ Return a list of jars within 'path' to be checked for compatibility. """ - all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines()) + """ Return a list of jars within 'path' to be checked for compatibility. """ + all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines()) + + return [j for j in all_jars if ( + "-tests" not in j and + "-sources" not in j and + "-with-dependencies" not in j)] - return [j for j in all_jars if ( - "-tests" not in j and - "-sources" not in j and - "-with-dependencies" not in j)] def write_xml_file(path, version, jars): - """Write the XML manifest file for JACC.""" - with open(path, "wt") as f: - f.write("" + version + "\n") - f.write("") - for j in jars: - f.write(j + "\n") - f.write("") + """Write the XML manifest file for JACC.""" + with open(path, "wt") as f: + f.write("" + version + "\n") + f.write("") + for j in jars: + f.write(j + "\n") + f.write("") + def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations): - """ Run the compliance checker to compare 'src' and 'dst'. """ - logging.info("Will check compatibility between original jars:\n\t%s\n" + - "and new jars:\n\t%s", - "\n\t".join(src_jars), - "\n\t".join(dst_jars)) + """ Run the compliance checker to compare 'src' and 'dst'. """ + logging.info("Will check compatibility between original jars:\n\t%s\n" + + "and new jars:\n\t%s", + "\n\t".join(src_jars), + "\n\t".join(dst_jars)) - java_acc_path = os.path.join(get_java_acc_dir(), "japi-compliance-checker.pl") + java_acc_path = os.path.join(get_java_acc_dir(), "japi-compliance-checker.pl") - src_xml_path = os.path.join(get_scratch_dir(), "src.xml") - dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml") - write_xml_file(src_xml_path, src_name, src_jars) - write_xml_file(dst_xml_path, dst_name, dst_jars) + src_xml_path = os.path.join(get_scratch_dir(), "src.xml") + dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml") + write_xml_file(src_xml_path, src_name, src_jars) + write_xml_file(dst_xml_path, dst_name, dst_jars) - out_path = os.path.join(get_scratch_dir(), "report.html") + out_path = os.path.join(get_scratch_dir(), "report.html") - args = ["perl", java_acc_path, - "-l", get_repo_name(), - "-d1", src_xml_path, - "-d2", dst_xml_path, - "-report-path", out_path] + args = ["perl", java_acc_path, + "-l", get_repo_name(), + "-d1", src_xml_path, + "-d2", dst_xml_path, + "-report-path", out_path] - if annotations is not None: - annotations_path = os.path.join(get_scratch_dir(), "annotations.txt") - with file(annotations_path, "w") as f: - for ann in annotations: - print >>f, ann - args += ["-annotations-list", annotations_path] + if annotations is not None: + annotations_path = os.path.join(get_scratch_dir(), "annotations.txt") + with file(annotations_path, "w") as f: + for ann in annotations: + print(ann, file=f) + args += ["-annotations-list", annotations_path] + + subprocess.check_call(args) - subprocess.check_call(args) def filter_jars(jars, include_filters, exclude_filters): - """Filter the list of JARs based on include and exclude filters.""" - filtered = [] - # Apply include filters - for j in jars: - found = False - basename = os.path.basename(j) - for f in include_filters: - if f.match(basename): - found = True - break - if found: - filtered += [j] + """Filter the list of JARs based on include and exclude filters.""" + filtered = [] + # Apply include filters + for j in jars: + found = False + basename = os.path.basename(j) + for f in include_filters: + if f.match(basename): + found = True + break + if found: + filtered += [j] + else: + logging.debug("Ignoring JAR %s", j) + # Apply exclude filters + exclude_filtered = [] + for j in filtered: + basename = os.path.basename(j) + found = False + for f in exclude_filters: + if f.match(basename): + found = True + break + if found: + logging.debug("Ignoring JAR %s", j) + else: + exclude_filtered += [j] + + return exclude_filtered + + +def main(): + """Main function.""" + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser( + description="Run Java API Compliance Checker.") + parser.add_argument("-f", "--force-download", + action="store_true", + help="Download dependencies (i.e. Java JAVA_ACC) " + + "even if they are already present") + parser.add_argument("-i", "--include-file", + action="append", + dest="include_files", + help="Regex filter for JAR files to be included. " + + "Applied before the exclude filters. " + + "Can be specified multiple times.") + parser.add_argument("-e", "--exclude-file", + action="append", + dest="exclude_files", + help="Regex filter for JAR files to be excluded. " + + "Applied after the include filters. " + + "Can be specified multiple times.") + parser.add_argument("-a", "--annotation", + action="append", + dest="annotations", + help="Fully-qualified Java annotation. " + + "Java ACC will only check compatibility of " + + "annotated classes. Can be specified multiple times.") + parser.add_argument("--skip-clean", + action="store_true", + help="Skip cleaning the scratch directory.") + parser.add_argument("--skip-build", + action="store_true", + help="Skip building the projects.") + parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.") + parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD", + help="Destination revision. " + + "If not specified, will use HEAD.") + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + + args = parser.parse_args() + + src_rev, dst_rev = args.src_rev[0], args.dst_rev + + logging.info("Source revision: %s", src_rev) + logging.info("Destination revision: %s", dst_rev) + + # Construct the JAR regex patterns for filtering. + include_filters = [] + if args.include_files is not None: + for f in args.include_files: + logging.info("Applying JAR filename include filter: %s", f) + include_filters += [re.compile(f)] + else: + include_filters = [re.compile(".*")] + + exclude_filters = [] + if args.exclude_files is not None: + for f in args.exclude_files: + logging.info("Applying JAR filename exclude filter: %s", f) + exclude_filters += [re.compile(f)] + + # Construct the annotation list + annotations = args.annotations + if annotations is not None: + logging.info("Filtering classes using %d annotation(s):", len(annotations)) + for a in annotations: + logging.info("\t%s", a) + + # Download deps. + checkout_java_acc(args.force_download) + + # Set up the build. + scratch_dir = get_scratch_dir() + src_dir = os.path.join(scratch_dir, "src") + dst_dir = os.path.join(scratch_dir, "dst") + + if args.skip_clean: + logging.info("Skipping cleaning the scratch directory") else: - logging.debug("Ignoring JAR %s", j) - # Apply exclude filters - exclude_filtered = [] - for j in filtered: - basename = os.path.basename(j) - found = False - for f in exclude_filters: - if f.match(basename): - found = True - break - if found: - logging.debug("Ignoring JAR %s", j) + clean_scratch_dir(scratch_dir) + # Check out the src and dst source trees. + checkout_java_tree(get_git_hash(src_rev), src_dir) + checkout_java_tree(get_git_hash(dst_rev), dst_dir) + + # Run the build in each. + if args.skip_build: + logging.info("Skipping the build") else: - exclude_filtered += [j] + build_tree(src_dir) + build_tree(dst_dir) - return exclude_filtered + # Find the JARs. + src_jars = find_jars(src_dir) + dst_jars = find_jars(dst_dir) + # Filter the JARs. + src_jars = filter_jars(src_jars, include_filters, exclude_filters) + dst_jars = filter_jars(dst_jars, include_filters, exclude_filters) -def main(): - """Main function.""" - logging.basicConfig(level=logging.INFO) - parser = argparse.ArgumentParser( - description="Run Java API Compliance Checker.") - parser.add_argument("-f", "--force-download", - action="store_true", - help="Download dependencies (i.e. Java JAVA_ACC) " + - "even if they are already present") - parser.add_argument("-i", "--include-file", - action="append", - dest="include_files", - help="Regex filter for JAR files to be included. " + - "Applied before the exclude filters. " + - "Can be specified multiple times.") - parser.add_argument("-e", "--exclude-file", - action="append", - dest="exclude_files", - help="Regex filter for JAR files to be excluded. " + - "Applied after the include filters. " + - "Can be specified multiple times.") - parser.add_argument("-a", "--annotation", - action="append", - dest="annotations", - help="Fully-qualified Java annotation. " + - "Java ACC will only check compatibility of " + - "annotated classes. Can be specified multiple times.") - parser.add_argument("--skip-clean", - action="store_true", - help="Skip cleaning the scratch directory.") - parser.add_argument("--skip-build", - action="store_true", - help="Skip building the projects.") - parser.add_argument("src_rev", nargs=1, help="Source revision.") - parser.add_argument("dst_rev", nargs="?", default="HEAD", - help="Destination revision. " + - "If not specified, will use HEAD.") - - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - - args = parser.parse_args() - - src_rev, dst_rev = args.src_rev[0], args.dst_rev - - logging.info("Source revision: %s", src_rev) - logging.info("Destination revision: %s", dst_rev) - - # Construct the JAR regex patterns for filtering. - include_filters = [] - if args.include_files is not None: - for f in args.include_files: - logging.info("Applying JAR filename include filter: %s", f) - include_filters += [re.compile(f)] - else: - include_filters = [re.compile(".*")] - - exclude_filters = [] - if args.exclude_files is not None: - for f in args.exclude_files: - logging.info("Applying JAR filename exclude filter: %s", f) - exclude_filters += [re.compile(f)] - - # Construct the annotation list - annotations = args.annotations - if annotations is not None: - logging.info("Filtering classes using %d annotation(s):", len(annotations)) - for a in annotations: - logging.info("\t%s", a) - - # Download deps. - checkout_java_acc(args.force_download) - - # Set up the build. - scratch_dir = get_scratch_dir() - src_dir = os.path.join(scratch_dir, "src") - dst_dir = os.path.join(scratch_dir, "dst") - - if args.skip_clean: - logging.info("Skipping cleaning the scratch directory") - else: - clean_scratch_dir(scratch_dir) - # Check out the src and dst source trees. - checkout_java_tree(get_git_hash(src_rev), src_dir) - checkout_java_tree(get_git_hash(dst_rev), dst_dir) - - # Run the build in each. - if args.skip_build: - logging.info("Skipping the build") - else: - build_tree(src_dir) - build_tree(dst_dir) - - # Find the JARs. - src_jars = find_jars(src_dir) - dst_jars = find_jars(dst_dir) - - # Filter the JARs. - src_jars = filter_jars(src_jars, include_filters, exclude_filters) - dst_jars = filter_jars(dst_jars, include_filters, exclude_filters) - - if len(src_jars) == 0 or len(dst_jars) == 0: - logging.error("No JARs found! Are your filters too strong?") - sys.exit(1) - - run_java_acc(src_rev, src_jars, - dst_rev, dst_jars, annotations) + if len(src_jars) == 0 or len(dst_jars) == 0: + logging.error("No JARs found! Are your filters too strong?") + sys.exit(1) + + run_java_acc(src_rev, src_jars, + dst_rev, dst_jars, annotations) if __name__ == "__main__": - main() + main() diff --git a/dev-support/determine-flaky-tests-hadoop.py b/dev-support/determine-flaky-tests-hadoop.py index 8644299bba4a2..8436da85b4c58 100755 --- a/dev-support/determine-flaky-tests-hadoop.py +++ b/dev-support/determine-flaky-tests-hadoop.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -35,22 +35,8 @@ # at the failed test for the specific run is necessary. # import sys -import platform -sysversion = sys.hexversion -onward30 = False -if sysversion < 0x020600F0: - sys.exit("Minimum supported python version is 2.6, the current version is " + - "Python" + platform.python_version()) - -if sysversion == 0x030000F0: - sys.exit("There is a known bug with Python" + platform.python_version() + - ", please try a different version"); - -if sysversion < 0x03000000: - import urllib2 -else: - onward30 = True - import urllib.request + +import urllib.request import datetime import json as simplejson @@ -60,7 +46,7 @@ # Configuration DEFAULT_JENKINS_URL = "https://builds.apache.org" -DEFAULT_JOB_NAME = "Hadoop-Common-trunk" +DEFAULT_JOB_NAME = "hadoop-qbt-trunk-java8-linux-x86" DEFAULT_NUM_PREVIOUS_DAYS = 14 DEFAULT_TOP_NUM_FAILED_TEST = -1 @@ -69,177 +55,179 @@ # total number of runs to examine numRunsToExamine = 0 -#summary mode +# summary mode summary_mode = False -#total number of errors -error_count = 0 +# total number of errors +ERROR_COUNT = 0 + -""" Parse arguments """ def parse_args(): - parser = OptionParser() - parser.add_option("-J", "--jenkins-url", type="string", - dest="jenkins_url", help="Jenkins URL", - default=DEFAULT_JENKINS_URL) - parser.add_option("-j", "--job-name", type="string", - dest="job_name", help="Job name to look at", - default=DEFAULT_JOB_NAME) - parser.add_option("-n", "--num-days", type="int", - dest="num_prev_days", help="Number of days to examine", - default=DEFAULT_NUM_PREVIOUS_DAYS) - parser.add_option("-t", "--top", type="int", - dest="num_failed_tests", - help="Summary Mode, only show top number of failed tests", - default=DEFAULT_TOP_NUM_FAILED_TEST) - - (options, args) = parser.parse_args() - if args: - parser.error("unexpected arguments: " + repr(args)) - return options - -""" Load data from specified url """ + """ Parse arguments """ + parser = OptionParser() + parser.add_option("-J", "--jenkins-url", type="string", + dest="jenkins_url", help="Jenkins URL", + default=DEFAULT_JENKINS_URL) + parser.add_option("-j", "--job-name", type="string", + dest="job_name", help="Job name to look at", + default=DEFAULT_JOB_NAME) + parser.add_option("-n", "--num-days", type="int", + dest="num_prev_days", help="Number of days to examine", + default=DEFAULT_NUM_PREVIOUS_DAYS) + parser.add_option("-t", "--top", type="int", + dest="num_failed_tests", + help="Summary Mode, only show top number of failed tests", + default=DEFAULT_TOP_NUM_FAILED_TEST) + + (options, args) = parser.parse_args() + if args: + parser.error("unexpected arguments: " + repr(args)) + return options + + def load_url_data(url): - if onward30: + """ Load data from specified url """ ourl = urllib.request.urlopen(url) codec = ourl.info().get_param('charset') content = ourl.read().decode(codec) data = simplejson.loads(content, strict=False) - else: - ourl = urllib2.urlopen(url) - data = simplejson.load(ourl, strict=False) - return data - -""" List all builds of the target project. """ + return data + + def list_builds(jenkins_url, job_name): - global summary_mode - url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict( - jenkins=jenkins_url, - job_name=job_name) + """ List all builds of the target project. """ + global summary_mode, ERROR_COUNT + url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict( + jenkins=jenkins_url, + job_name=job_name) - try: - data = load_url_data(url) + try: + data = load_url_data(url) + + except: + if not summary_mode: + logging.error("Could not fetch: %s" % url) + ERROR_COUNT += 1 + raise + return data['builds'] - except: - if not summary_mode: - logging.error("Could not fetch: %s" % url) - error_count += 1 - raise - return data['builds'] -""" Find the names of any tests which failed in the given build output URL. """ def find_failing_tests(testReportApiJson, jobConsoleOutput): - global summary_mode - global error_count - ret = set() - try: - data = load_url_data(testReportApiJson) + """ Find the names of any tests which failed in the given build output URL. """ + global summary_mode, ERROR_COUNT + ret = set() + try: + data = load_url_data(testReportApiJson) - except: - if not summary_mode: - logging.error(" Could not open testReport, check " + - jobConsoleOutput + " for why it was reported failed") - error_count += 1 + except: + if not summary_mode: + logging.error(" Could not open testReport, check " + + jobConsoleOutput + " for why it was reported failed") + ERROR_COUNT += 1 + return ret + + for suite in data['suites']: + for cs in suite['cases']: + status = cs['status'] + errDetails = cs['errorDetails'] + if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)): + ret.add(cs['className'] + "." + cs['name']) + + if len(ret) == 0 and (not summary_mode): + logging.info(" No failed tests in testReport, check " + + jobConsoleOutput + " for why it was reported failed.") return ret - for suite in data['suites']: - for cs in suite['cases']: - status = cs['status'] - errDetails = cs['errorDetails'] - if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)): - ret.add(cs['className'] + "." + cs['name']) - if len(ret) == 0 and (not summary_mode): - logging.info(" No failed tests in testReport, check " + - jobConsoleOutput + " for why it was reported failed.") - return ret - -""" Iterate runs of specfied job within num_prev_days and collect results """ def find_flaky_tests(jenkins_url, job_name, num_prev_days): - global numRunsToExamine - global summary_mode - all_failing = dict() - # First list all builds - builds = list_builds(jenkins_url, job_name) - - # Select only those in the last N days - min_time = int(time.time()) - SECONDS_PER_DAY * num_prev_days - builds = [b for b in builds if (int(b['timestamp']) / 1000) > min_time] - - # Filter out only those that failed - failing_build_urls = [(b['url'] , b['timestamp']) for b in builds - if (b['result'] in ('UNSTABLE', 'FAILURE'))] - - tnum = len(builds) - num = len(failing_build_urls) - numRunsToExamine = tnum - if not summary_mode: - logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum) - + ") that have failed tests in the past " + str(num_prev_days) + " days" - + ((".", ", as listed below:\n")[num > 0])) - - for failed_build_with_time in failing_build_urls: - failed_build = failed_build_with_time[0]; - jobConsoleOutput = failed_build + "Console"; - testReport = failed_build + "testReport"; - testReportApiJson = testReport + "/api/json"; - - ts = float(failed_build_with_time[1]) / 1000. - st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') + """ Iterate runs of specfied job within num_prev_days and collect results """ + global numRunsToExamine + global summary_mode + all_failing = dict() + # First list all builds + builds = list_builds(jenkins_url, job_name) + + # Select only those in the last N days + min_time = time.time() - SECONDS_PER_DAY * num_prev_days + builds = [b for b in builds if (float(b['timestamp']) / 1000) > min_time] + + # Filter out only those that failed + failing_build_urls = [(b['url'], b['timestamp']) for b in builds + if b['result'] in ('UNSTABLE', 'FAILURE')] + + tnum = len(builds) + num = len(failing_build_urls) + numRunsToExamine = tnum if not summary_mode: - logging.info("===>%s" % str(testReport) + " (" + st + ")") - failing = find_failing_tests(testReportApiJson, jobConsoleOutput) - if failing: - for ftest in failing: + logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum) + + ") that have failed tests in the past " + str(num_prev_days) + " days" + + ((".", ", as listed below:\n")[num > 0])) + + for failed_build_with_time in failing_build_urls: + failed_build = failed_build_with_time[0] + jobConsoleOutput = failed_build + "Console" + testReport = failed_build + "testReport" + testReportApiJson = testReport + "/api/json" + + ts = float(failed_build_with_time[1]) / 1000. + st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') if not summary_mode: - logging.info(" Failed test: %s" % ftest) - all_failing[ftest] = all_failing.get(ftest,0)+1 + logging.info("===>%s" % str(testReport) + " (" + st + ")") + failing = find_failing_tests(testReportApiJson, jobConsoleOutput) + if failing: + for ftest in failing: + if not summary_mode: + logging.info(" Failed test: %s" % ftest) + all_failing[ftest] = all_failing.get(ftest, 0) + 1 + + return all_failing - return all_failing def main(): - global numRunsToExamine - global summary_mode - logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) - - # set up logger to write to stdout - soh = logging.StreamHandler(sys.stdout) - soh.setLevel(logging.INFO) - logger = logging.getLogger() - logger.removeHandler(logger.handlers[0]) - logger.addHandler(soh) - - opts = parse_args() - logging.info("****Recently FAILED builds in url: " + opts.jenkins_url - + "/job/" + opts.job_name + "") - - if opts.num_failed_tests != -1: - summary_mode = True - - all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name, - opts.num_prev_days) - if len(all_failing) == 0: - raise SystemExit(0) - - if summary_mode and opts.num_failed_tests < len(all_failing): - logging.info("\nAmong " + str(numRunsToExamine) + - " runs examined, top " + str(opts.num_failed_tests) + - " failed tests <#failedRuns: testName>:") - else: - logging.info("\nAmong " + str(numRunsToExamine) + - " runs examined, all failed tests <#failedRuns: testName>:") - - # print summary section: all failed tests sorted by how many times they failed - line_count = 0 - for tn in sorted(all_failing, key=all_failing.get, reverse=True): - logging.info(" " + str(all_failing[tn])+ ": " + tn) - if summary_mode: - line_count += 1 - if line_count == opts.num_failed_tests: - break - - if summary_mode and error_count > 0: - logging.info("\n" + str(error_count) + " errors found, you may " - + "re-run in non summary mode to see error details."); + global numRunsToExamine + global summary_mode + logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) + + # set up logger to write to stdout + soh = logging.StreamHandler(sys.stdout) + soh.setLevel(logging.INFO) + logger = logging.getLogger() + logger.removeHandler(logger.handlers[0]) + logger.addHandler(soh) + + opts = parse_args() + logging.info("****Recently FAILED builds in url: " + opts.jenkins_url + + "/job/" + opts.job_name + "") + + if opts.num_failed_tests != -1: + summary_mode = True + + all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name, + opts.num_prev_days) + if len(all_failing) == 0: + raise SystemExit(0) + + if summary_mode and opts.num_failed_tests < len(all_failing): + logging.info("\nAmong " + str(numRunsToExamine) + + " runs examined, top " + str(opts.num_failed_tests) + + " failed tests <#failedRuns: testName>:") + else: + logging.info("\nAmong " + str(numRunsToExamine) + + " runs examined, all failed tests <#failedRuns: testName>:") + + # print summary section: all failed tests sorted by how many times they failed + line_count = 0 + for tn in sorted(all_failing, key=all_failing.get, reverse=True): + logging.info(" " + str(all_failing[tn]) + ": " + tn) + if summary_mode: + line_count += 1 + if line_count == opts.num_failed_tests: + break + + if summary_mode and ERROR_COUNT > 0: + logging.info("\n" + str(ERROR_COUNT) + " errors found, you may " + + "re-run in non summary mode to see error details.") + if __name__ == "__main__": - main() + main() From 08d55bfbfe8701a91cf594df1a4bd9c1b811211b Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 16 Nov 2020 11:07:15 +0900 Subject: [PATCH 02/17] Test YETUS-452. Remove python2; rewrite python bits for python 3 --- dev-support/Jenkinsfile | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index 1703d0153bfb6..31cd1a850a6a5 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -35,7 +35,7 @@ pipeline { DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile" YETUS='yetus' // Branch or tag name. Yetus release tags are 'rel/X.Y.Z' - YETUS_VERSION='6ab19e71eaf3234863424c6f684b34c1d3dcc0ce' + YETUS_VERSION='yetus452' } parameters { @@ -51,7 +51,7 @@ pipeline { checkout([ $class: 'GitSCM', branches: [[name: "${env.YETUS_VERSION}"]], - userRemoteConfigs: [[ url: 'https://github.com/apache/yetus.git']]] + userRemoteConfigs: [[ url: 'https://github.com/effectivemachines/buretoolbox.git']]] ) } } @@ -60,7 +60,7 @@ pipeline { stage ('precommit-run') { steps { withCredentials( - [usernamePassword(credentialsId: 'apache-hadoop-at-github.com', + [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd', passwordVariable: 'GITHUB_TOKEN', usernameVariable: 'GITHUB_USER'), usernamePassword(credentialsId: 'hadoopqa-at-asf-jira', @@ -152,9 +152,6 @@ pipeline { # help keep the ASF boxes clean YETUS_ARGS+=("--sentinel") - # use emoji vote so it is easier to find the broken line - YETUS_ARGS+=("--github-use-emoji-vote") - # test with Java 8 and 11 YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64") YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64") @@ -174,6 +171,20 @@ pipeline { post { always { script { + + // Publish status if it was missed (YETUS-1059) + withCredentials( + [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd', + passwordVariable: 'GITHUB_TOKEN', + usernameVariable: 'GITHUB_USER')]) { + sh '''#!/usr/bin/env bash + YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}") + YETUS_ARGS+=("--patch-dir=${WORKSPACE}/${PATCHDIR}") + TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/github-status-recovery.sh" + /usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" ${EXTRA_ARGS} || true + ''' + } + // Yetus output archiveArtifacts "${env.PATCHDIR}/**" // Publish the HTML report so that it can be looked at From c21861520f6194574cb6162d7951852d85b3dd1d Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 16 Nov 2020 11:12:06 +0900 Subject: [PATCH 03/17] Upgrade to Python 3 in Dockerfile --- dev-support/docker/Dockerfile | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile index cf442902d3d9c..858e7657ebbf8 100644 --- a/dev-support/docker/Dockerfile +++ b/dev-support/docker/Dockerfile @@ -72,10 +72,11 @@ RUN apt-get -q update \ openjdk-8-jdk \ pinentry-curses \ pkg-config \ - python \ - python2.7 \ - python-pkg-resources \ - python-setuptools \ + python3 \ + python3-pip \ + python3-pkg-resources \ + python3-setuptools \ + python3-wheel \ rsync \ shellcheck \ software-properties-common \ @@ -108,18 +109,6 @@ RUN mkdir -p /opt/boost-library \ && cd /root \ && rm -rf /opt/boost-library -#### -# Install pip (deprecated from Focal toolchain) -#### -# hadolint ignore=DL3003 -RUN mkdir -p /opt/pip \ - && curl -L https://bootstrap.pypa.io/2.7/get-pip.py > get-pip.py \ - && mv get-pip.py /opt/pip \ - && cd /opt/pip \ - && python2.7 get-pip.py "pip < 21.0" \ - && cd /root \ - && rm -rf /opt/pip - ###### # Install Google Protobuf 3.7.1 (3.6.1 ships with Focal) ###### @@ -139,19 +128,9 @@ ENV PROTOBUF_HOME /opt/protobuf ENV PATH "${PATH}:/opt/protobuf/bin" #### -# Install pylint at fixed version (2.0.0 removed python2 support) -# https://github.com/PyCQA/pylint/issues/2294 -#### -RUN pip2 install \ - astroid==1.6.6 \ - isort==4.3.21 \ - configparser==4.0.2 \ - pylint==1.9.2 - -#### -# Install dateutil.parser +# Install pylint and python-dateutil #### -RUN pip2 install python-dateutil==2.7.3 +RUN pip3 install pylint python-dateutil #### # Install bower From 2210431bb0cb7575bf38acd6c76b8ecd8275c8fb Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 25 Jan 2021 18:07:05 +0900 Subject: [PATCH 04/17] Revert "Test YETUS-452. Remove python2; rewrite python bits for python 3" This reverts commit 7deb8e1d53d046820ddf3d54d2900c429cb87d6e. --- dev-support/Jenkinsfile | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index 31cd1a850a6a5..1703d0153bfb6 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -35,7 +35,7 @@ pipeline { DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile" YETUS='yetus' // Branch or tag name. Yetus release tags are 'rel/X.Y.Z' - YETUS_VERSION='yetus452' + YETUS_VERSION='6ab19e71eaf3234863424c6f684b34c1d3dcc0ce' } parameters { @@ -51,7 +51,7 @@ pipeline { checkout([ $class: 'GitSCM', branches: [[name: "${env.YETUS_VERSION}"]], - userRemoteConfigs: [[ url: 'https://github.com/effectivemachines/buretoolbox.git']]] + userRemoteConfigs: [[ url: 'https://github.com/apache/yetus.git']]] ) } } @@ -60,7 +60,7 @@ pipeline { stage ('precommit-run') { steps { withCredentials( - [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd', + [usernamePassword(credentialsId: 'apache-hadoop-at-github.com', passwordVariable: 'GITHUB_TOKEN', usernameVariable: 'GITHUB_USER'), usernamePassword(credentialsId: 'hadoopqa-at-asf-jira', @@ -152,6 +152,9 @@ pipeline { # help keep the ASF boxes clean YETUS_ARGS+=("--sentinel") + # use emoji vote so it is easier to find the broken line + YETUS_ARGS+=("--github-use-emoji-vote") + # test with Java 8 and 11 YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64") YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64") @@ -171,20 +174,6 @@ pipeline { post { always { script { - - // Publish status if it was missed (YETUS-1059) - withCredentials( - [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd', - passwordVariable: 'GITHUB_TOKEN', - usernameVariable: 'GITHUB_USER')]) { - sh '''#!/usr/bin/env bash - YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}") - YETUS_ARGS+=("--patch-dir=${WORKSPACE}/${PATCHDIR}") - TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/github-status-recovery.sh" - /usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" ${EXTRA_ARGS} || true - ''' - } - // Yetus output archiveArtifacts "${env.PATCHDIR}/**" // Publish the HTML report so that it can be looked at From 112fcf008d0f29ad46dbbf1e4c91716fdfc87b1c Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 25 Jan 2021 18:08:37 +0900 Subject: [PATCH 05/17] Use Yetus 0.13.0 --- dev-support/Jenkinsfile | 2 +- dev-support/bin/yetus-wrapper | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index 1703d0153bfb6..7bcc1a7d55b36 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -35,7 +35,7 @@ pipeline { DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile" YETUS='yetus' // Branch or tag name. Yetus release tags are 'rel/X.Y.Z' - YETUS_VERSION='6ab19e71eaf3234863424c6f684b34c1d3dcc0ce' + YETUS_VERSION='rel/0.13.0' } parameters { diff --git a/dev-support/bin/yetus-wrapper b/dev-support/bin/yetus-wrapper index bca2316ae6784..8532d1749701b 100755 --- a/dev-support/bin/yetus-wrapper +++ b/dev-support/bin/yetus-wrapper @@ -77,7 +77,7 @@ WANTED="$1" shift ARGV=("$@") -HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.10.0} +HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.13.0} BIN=$(yetus_abs "${BASH_SOURCE-$0}") BINDIR=$(dirname "${BIN}") From b70e9e6ce5822bf1c8dd25c0a9590da03df20a49 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 25 Jan 2021 18:43:39 +0900 Subject: [PATCH 06/17] Remove unsupported option --- dev-support/Jenkinsfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index 7bcc1a7d55b36..6a085394aa1d7 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -152,9 +152,6 @@ pipeline { # help keep the ASF boxes clean YETUS_ARGS+=("--sentinel") - # use emoji vote so it is easier to find the broken line - YETUS_ARGS+=("--github-use-emoji-vote") - # test with Java 8 and 11 YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64") YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64") From 8485aa1a703a89c0efd8398f962818fc21313fa1 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Tue, 26 Jan 2021 08:46:13 +0900 Subject: [PATCH 07/17] Fix hadolint error --- dev-support/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile index 858e7657ebbf8..90cbcc111919e 100644 --- a/dev-support/docker/Dockerfile +++ b/dev-support/docker/Dockerfile @@ -130,7 +130,7 @@ ENV PATH "${PATH}:/opt/protobuf/bin" #### # Install pylint and python-dateutil #### -RUN pip3 install pylint python-dateutil +RUN pip3 install pylint=2.6.0 python-dateutil=2.8.1 #### # Install bower From 64de97ad09fc48a8106df92c126042f96b1763b5 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Tue, 26 Jan 2021 09:13:24 +0900 Subject: [PATCH 08/17] Revert indent changes in checkcompatibility.py --- dev-support/bin/checkcompatibility.py | 479 +++++++++++++------------- 1 file changed, 235 insertions(+), 244 deletions(-) diff --git a/dev-support/bin/checkcompatibility.py b/dev-support/bin/checkcompatibility.py index a98cf7634b0ca..3db36154ef9c5 100755 --- a/dev-support/bin/checkcompatibility.py +++ b/dev-support/bin/checkcompatibility.py @@ -36,302 +36,293 @@ # Various relative paths REPO_DIR = os.getcwd() - def check_output(*popenargs, **kwargs): - """ Run command with arguments and return its output as a string. """ - return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8') + """ Run command with arguments and return its output as a string. """ + return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8') def get_repo_dir(): - """ Return the path to the top of the repo. """ - dirname, _ = os.path.split(os.path.abspath(__file__)) - return os.path.join(dirname, "../..") - + """ Return the path to the top of the repo. """ + dirname, _ = os.path.split(os.path.abspath(__file__)) + return os.path.join(dirname, "../..") def get_scratch_dir(): - """ Return the path to the scratch dir that we build within. """ - scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check") - if not os.path.exists(scratch_dir): - os.makedirs(scratch_dir) - return scratch_dir - + """ Return the path to the scratch dir that we build within. """ + scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check") + if not os.path.exists(scratch_dir): + os.makedirs(scratch_dir) + return scratch_dir def get_java_acc_dir(): - """ Return the path where we check out the Java API Compliance Checker. """ - return os.path.join(get_repo_dir(), "target", "java-acc") + """ Return the path where we check out the Java API Compliance Checker. """ + return os.path.join(get_repo_dir(), "target", "java-acc") def clean_scratch_dir(scratch_dir): - """ Clean up and re-create the scratch directory. """ - if os.path.exists(scratch_dir): - logging.info("Removing scratch dir %s...", scratch_dir) - shutil.rmtree(scratch_dir) - logging.info("Creating empty scratch dir %s...", scratch_dir) - os.makedirs(scratch_dir) + """ Clean up and re-create the scratch directory. """ + if os.path.exists(scratch_dir): + logging.info("Removing scratch dir %s...", scratch_dir) + shutil.rmtree(scratch_dir) + logging.info("Creating empty scratch dir %s...", scratch_dir) + os.makedirs(scratch_dir) def checkout_java_tree(rev, path): - """ Check out the Java source tree for the given revision into + """ Check out the Java source tree for the given revision into the given path. """ - logging.info("Checking out %s in %s", rev, path) - os.makedirs(path) - # Extract java source - subprocess.check_call(["bash", '-o', 'pipefail', "-c", - ("git archive --format=tar %s | " + - "tar -C \"%s\" -xf -") % (rev, path)], - cwd=get_repo_dir()) - + logging.info("Checking out %s in %s", rev, path) + os.makedirs(path) + # Extract java source + subprocess.check_call(["bash", '-o', 'pipefail', "-c", + ("git archive --format=tar %s | " + + "tar -C \"%s\" -xf -") % (rev, path)], + cwd=get_repo_dir()) def get_git_hash(revname): - """ Convert 'revname' to its SHA-1 hash. """ - return check_output(["git", "rev-parse", revname], - cwd=get_repo_dir()).strip() - + """ Convert 'revname' to its SHA-1 hash. """ + return check_output(["git", "rev-parse", revname], + cwd=get_repo_dir()).strip() def get_repo_name(): - """Get the name of the repo based on the git remote.""" - remotes = check_output(["git", "remote", "-v"], - cwd=get_repo_dir()).strip().split("\n") - # Example output: - # origin https://github.com/apache/hadoop.git (fetch) - # origin https://github.com/apache/hadoop.git (push) - remote_url = remotes[0].split("\t")[1].split(" ")[0] - remote = remote_url.split("/")[-1] - if remote.endswith(".git"): - remote = remote[:-4] - return remote - + """Get the name of the repo based on the git remote.""" + remotes = check_output(["git", "remote", "-v"], + cwd=get_repo_dir()).strip().split("\n") + # Example output: + # origin https://github.com/apache/hadoop.git (fetch) + # origin https://github.com/apache/hadoop.git (push) + remote_url = remotes[0].split("\t")[1].split(" ")[0] + remote = remote_url.split("/")[-1] + if remote.endswith(".git"): + remote = remote[:-4] + return remote def build_tree(java_path): - """ Run the Java build within 'path'. """ - logging.info("Building in %s...", java_path) - subprocess.check_call(["mvn", "-DskipTests", "-Dmaven.javadoc.skip=true", - "package"], - cwd=java_path) + """ Run the Java build within 'path'. """ + logging.info("Building in %s...", java_path) + subprocess.check_call(["mvn", "-DskipTests", "-Dmaven.javadoc.skip=true", + "package"], + cwd=java_path) def checkout_java_acc(force): - """ + """ Check out the Java API Compliance Checker. If 'force' is true, will re-download even if the directory exists. """ - acc_dir = get_java_acc_dir() - if os.path.exists(acc_dir): - logging.info("Java ACC is already downloaded.") - if not force: - return - logging.info("Forcing re-download.") - shutil.rmtree(acc_dir) + acc_dir = get_java_acc_dir() + if os.path.exists(acc_dir): + logging.info("Java ACC is already downloaded.") + if not force: + return + logging.info("Forcing re-download.") + shutil.rmtree(acc_dir) - logging.info("Downloading Java ACC...") + logging.info("Downloading Java ACC...") - url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz" - scratch_dir = get_scratch_dir() - path = os.path.join(scratch_dir, os.path.basename(url)) - jacc = urllib.request.urlopen(url) - with open(path, 'wb') as w: - w.write(jacc.read()) + url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz" + scratch_dir = get_scratch_dir() + path = os.path.join(scratch_dir, os.path.basename(url)) + jacc = urllib.request.urlopen(url) + with open(path, 'wb') as w: + w.write(jacc.read()) - subprocess.check_call(["tar", "xzf", path], - cwd=scratch_dir) + subprocess.check_call(["tar", "xzf", path], + cwd=scratch_dir) - shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-1.8"), - os.path.join(acc_dir)) + shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-1.8"), + os.path.join(acc_dir)) def find_jars(path): - """ Return a list of jars within 'path' to be checked for compatibility. """ - all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines()) - - return [j for j in all_jars if ( - "-tests" not in j and - "-sources" not in j and - "-with-dependencies" not in j)] + """ Return a list of jars within 'path' to be checked for compatibility. """ + all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines()) + return [j for j in all_jars if ( + "-tests" not in j and + "-sources" not in j and + "-with-dependencies" not in j)] def write_xml_file(path, version, jars): - """Write the XML manifest file for JACC.""" - with open(path, "wt") as f: - f.write("" + version + "\n") - f.write("") - for j in jars: - f.write(j + "\n") - f.write("") - + """Write the XML manifest file for JACC.""" + with open(path, "wt") as f: + f.write("" + version + "\n") + f.write("") + for j in jars: + f.write(j + "\n") + f.write("") def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations): - """ Run the compliance checker to compare 'src' and 'dst'. """ - logging.info("Will check compatibility between original jars:\n\t%s\n" + - "and new jars:\n\t%s", - "\n\t".join(src_jars), - "\n\t".join(dst_jars)) - - java_acc_path = os.path.join(get_java_acc_dir(), "japi-compliance-checker.pl") + """ Run the compliance checker to compare 'src' and 'dst'. """ + logging.info("Will check compatibility between original jars:\n\t%s\n" + + "and new jars:\n\t%s", + "\n\t".join(src_jars), + "\n\t".join(dst_jars)) - src_xml_path = os.path.join(get_scratch_dir(), "src.xml") - dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml") - write_xml_file(src_xml_path, src_name, src_jars) - write_xml_file(dst_xml_path, dst_name, dst_jars) + java_acc_path = os.path.join(get_java_acc_dir(), "japi-compliance-checker.pl") - out_path = os.path.join(get_scratch_dir(), "report.html") + src_xml_path = os.path.join(get_scratch_dir(), "src.xml") + dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml") + write_xml_file(src_xml_path, src_name, src_jars) + write_xml_file(dst_xml_path, dst_name, dst_jars) - args = ["perl", java_acc_path, - "-l", get_repo_name(), - "-d1", src_xml_path, - "-d2", dst_xml_path, - "-report-path", out_path] + out_path = os.path.join(get_scratch_dir(), "report.html") - if annotations is not None: - annotations_path = os.path.join(get_scratch_dir(), "annotations.txt") - with file(annotations_path, "w") as f: - for ann in annotations: - print(ann, file=f) - args += ["-annotations-list", annotations_path] + args = ["perl", java_acc_path, + "-l", get_repo_name(), + "-d1", src_xml_path, + "-d2", dst_xml_path, + "-report-path", out_path] - subprocess.check_call(args) + if annotations is not None: + annotations_path = os.path.join(get_scratch_dir(), "annotations.txt") + with file(annotations_path, "w") as f: + for ann in annotations: + print(ann, file=f) + args += ["-annotations-list", annotations_path] + subprocess.check_call(args) def filter_jars(jars, include_filters, exclude_filters): - """Filter the list of JARs based on include and exclude filters.""" - filtered = [] - # Apply include filters - for j in jars: - found = False - basename = os.path.basename(j) - for f in include_filters: - if f.match(basename): - found = True - break - if found: - filtered += [j] - else: - logging.debug("Ignoring JAR %s", j) - # Apply exclude filters - exclude_filtered = [] - for j in filtered: - basename = os.path.basename(j) - found = False - for f in exclude_filters: - if f.match(basename): - found = True - break - if found: - logging.debug("Ignoring JAR %s", j) - else: - exclude_filtered += [j] - - return exclude_filtered - - -def main(): - """Main function.""" - logging.basicConfig(level=logging.INFO) - parser = argparse.ArgumentParser( - description="Run Java API Compliance Checker.") - parser.add_argument("-f", "--force-download", - action="store_true", - help="Download dependencies (i.e. Java JAVA_ACC) " + - "even if they are already present") - parser.add_argument("-i", "--include-file", - action="append", - dest="include_files", - help="Regex filter for JAR files to be included. " + - "Applied before the exclude filters. " + - "Can be specified multiple times.") - parser.add_argument("-e", "--exclude-file", - action="append", - dest="exclude_files", - help="Regex filter for JAR files to be excluded. " + - "Applied after the include filters. " + - "Can be specified multiple times.") - parser.add_argument("-a", "--annotation", - action="append", - dest="annotations", - help="Fully-qualified Java annotation. " + - "Java ACC will only check compatibility of " + - "annotated classes. Can be specified multiple times.") - parser.add_argument("--skip-clean", - action="store_true", - help="Skip cleaning the scratch directory.") - parser.add_argument("--skip-build", - action="store_true", - help="Skip building the projects.") - parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.") - parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD", - help="Destination revision. " + - "If not specified, will use HEAD.") - - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - - args = parser.parse_args() - - src_rev, dst_rev = args.src_rev[0], args.dst_rev - - logging.info("Source revision: %s", src_rev) - logging.info("Destination revision: %s", dst_rev) - - # Construct the JAR regex patterns for filtering. - include_filters = [] - if args.include_files is not None: - for f in args.include_files: - logging.info("Applying JAR filename include filter: %s", f) - include_filters += [re.compile(f)] - else: - include_filters = [re.compile(".*")] - - exclude_filters = [] - if args.exclude_files is not None: - for f in args.exclude_files: - logging.info("Applying JAR filename exclude filter: %s", f) - exclude_filters += [re.compile(f)] - - # Construct the annotation list - annotations = args.annotations - if annotations is not None: - logging.info("Filtering classes using %d annotation(s):", len(annotations)) - for a in annotations: - logging.info("\t%s", a) - - # Download deps. - checkout_java_acc(args.force_download) - - # Set up the build. - scratch_dir = get_scratch_dir() - src_dir = os.path.join(scratch_dir, "src") - dst_dir = os.path.join(scratch_dir, "dst") - - if args.skip_clean: - logging.info("Skipping cleaning the scratch directory") + """Filter the list of JARs based on include and exclude filters.""" + filtered = [] + # Apply include filters + for j in jars: + found = False + basename = os.path.basename(j) + for f in include_filters: + if f.match(basename): + found = True + break + if found: + filtered += [j] else: - clean_scratch_dir(scratch_dir) - # Check out the src and dst source trees. - checkout_java_tree(get_git_hash(src_rev), src_dir) - checkout_java_tree(get_git_hash(dst_rev), dst_dir) - - # Run the build in each. - if args.skip_build: - logging.info("Skipping the build") + logging.debug("Ignoring JAR %s", j) + # Apply exclude filters + exclude_filtered = [] + for j in filtered: + basename = os.path.basename(j) + found = False + for f in exclude_filters: + if f.match(basename): + found = True + break + if found: + logging.debug("Ignoring JAR %s", j) else: - build_tree(src_dir) - build_tree(dst_dir) + exclude_filtered += [j] - # Find the JARs. - src_jars = find_jars(src_dir) - dst_jars = find_jars(dst_dir) + return exclude_filtered - # Filter the JARs. - src_jars = filter_jars(src_jars, include_filters, exclude_filters) - dst_jars = filter_jars(dst_jars, include_filters, exclude_filters) - if len(src_jars) == 0 or len(dst_jars) == 0: - logging.error("No JARs found! Are your filters too strong?") - sys.exit(1) - - run_java_acc(src_rev, src_jars, - dst_rev, dst_jars, annotations) +def main(): + """Main function.""" + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser( + description="Run Java API Compliance Checker.") + parser.add_argument("-f", "--force-download", + action="store_true", + help="Download dependencies (i.e. Java JAVA_ACC) " + + "even if they are already present") + parser.add_argument("-i", "--include-file", + action="append", + dest="include_files", + help="Regex filter for JAR files to be included. " + + "Applied before the exclude filters. " + + "Can be specified multiple times.") + parser.add_argument("-e", "--exclude-file", + action="append", + dest="exclude_files", + help="Regex filter for JAR files to be excluded. " + + "Applied after the include filters. " + + "Can be specified multiple times.") + parser.add_argument("-a", "--annotation", + action="append", + dest="annotations", + help="Fully-qualified Java annotation. " + + "Java ACC will only check compatibility of " + + "annotated classes. Can be specified multiple times.") + parser.add_argument("--skip-clean", + action="store_true", + help="Skip cleaning the scratch directory.") + parser.add_argument("--skip-build", + action="store_true", + help="Skip building the projects.") + parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.") + parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD", + help="Destination revision. " + + "If not specified, will use HEAD.") + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + + args = parser.parse_args() + + src_rev, dst_rev = args.src_rev[0], args.dst_rev + + logging.info("Source revision: %s", src_rev) + logging.info("Destination revision: %s", dst_rev) + + # Construct the JAR regex patterns for filtering. + include_filters = [] + if args.include_files is not None: + for f in args.include_files: + logging.info("Applying JAR filename include filter: %s", f) + include_filters += [re.compile(f)] + else: + include_filters = [re.compile(".*")] + + exclude_filters = [] + if args.exclude_files is not None: + for f in args.exclude_files: + logging.info("Applying JAR filename exclude filter: %s", f) + exclude_filters += [re.compile(f)] + + # Construct the annotation list + annotations = args.annotations + if annotations is not None: + logging.info("Filtering classes using %d annotation(s):", len(annotations)) + for a in annotations: + logging.info("\t%s", a) + + # Download deps. + checkout_java_acc(args.force_download) + + # Set up the build. + scratch_dir = get_scratch_dir() + src_dir = os.path.join(scratch_dir, "src") + dst_dir = os.path.join(scratch_dir, "dst") + + if args.skip_clean: + logging.info("Skipping cleaning the scratch directory") + else: + clean_scratch_dir(scratch_dir) + # Check out the src and dst source trees. + checkout_java_tree(get_git_hash(src_rev), src_dir) + checkout_java_tree(get_git_hash(dst_rev), dst_dir) + + # Run the build in each. + if args.skip_build: + logging.info("Skipping the build") + else: + build_tree(src_dir) + build_tree(dst_dir) + + # Find the JARs. + src_jars = find_jars(src_dir) + dst_jars = find_jars(dst_dir) + + # Filter the JARs. + src_jars = filter_jars(src_jars, include_filters, exclude_filters) + dst_jars = filter_jars(dst_jars, include_filters, exclude_filters) + + if len(src_jars) == 0 or len(dst_jars) == 0: + logging.error("No JARs found! Are your filters too strong?") + sys.exit(1) + + run_java_acc(src_rev, src_jars, + dst_rev, dst_jars, annotations) if __name__ == "__main__": - main() + main() From 82fe22d98fb08c5ac3df378154284f54ad3137cb Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Tue, 26 Jan 2021 17:27:24 +0900 Subject: [PATCH 09/17] Reverted indent changes from determine-flaky-tests-hadoop.py --- dev-support/determine-flaky-tests-hadoop.py | 303 ++++++++++---------- 1 file changed, 149 insertions(+), 154 deletions(-) diff --git a/dev-support/determine-flaky-tests-hadoop.py b/dev-support/determine-flaky-tests-hadoop.py index 8436da85b4c58..1cf7d2830289d 100755 --- a/dev-support/determine-flaky-tests-hadoop.py +++ b/dev-support/determine-flaky-tests-hadoop.py @@ -46,7 +46,7 @@ # Configuration DEFAULT_JENKINS_URL = "https://builds.apache.org" -DEFAULT_JOB_NAME = "hadoop-qbt-trunk-java8-linux-x86" +DEFAULT_JOB_NAME = "Hadoop-Common-trunk" DEFAULT_NUM_PREVIOUS_DAYS = 14 DEFAULT_TOP_NUM_FAILED_TEST = -1 @@ -55,179 +55,174 @@ # total number of runs to examine numRunsToExamine = 0 -# summary mode +#summary mode summary_mode = False -# total number of errors -ERROR_COUNT = 0 - +#total number of errors +error_count = 0 +""" Parse arguments """ def parse_args(): - """ Parse arguments """ - parser = OptionParser() - parser.add_option("-J", "--jenkins-url", type="string", - dest="jenkins_url", help="Jenkins URL", - default=DEFAULT_JENKINS_URL) - parser.add_option("-j", "--job-name", type="string", - dest="job_name", help="Job name to look at", - default=DEFAULT_JOB_NAME) - parser.add_option("-n", "--num-days", type="int", - dest="num_prev_days", help="Number of days to examine", - default=DEFAULT_NUM_PREVIOUS_DAYS) - parser.add_option("-t", "--top", type="int", - dest="num_failed_tests", - help="Summary Mode, only show top number of failed tests", - default=DEFAULT_TOP_NUM_FAILED_TEST) - - (options, args) = parser.parse_args() - if args: - parser.error("unexpected arguments: " + repr(args)) - return options - - + parser = OptionParser() + parser.add_option("-J", "--jenkins-url", type="string", + dest="jenkins_url", help="Jenkins URL", + default=DEFAULT_JENKINS_URL) + parser.add_option("-j", "--job-name", type="string", + dest="job_name", help="Job name to look at", + default=DEFAULT_JOB_NAME) + parser.add_option("-n", "--num-days", type="int", + dest="num_prev_days", help="Number of days to examine", + default=DEFAULT_NUM_PREVIOUS_DAYS) + parser.add_option("-t", "--top", type="int", + dest="num_failed_tests", + help="Summary Mode, only show top number of failed tests", + default=DEFAULT_TOP_NUM_FAILED_TEST) + + (options, args) = parser.parse_args() + if args: + parser.error("unexpected arguments: " + repr(args)) + return options + +""" Load data from specified url """ def load_url_data(url): - """ Load data from specified url """ - ourl = urllib.request.urlopen(url) - codec = ourl.info().get_param('charset') - content = ourl.read().decode(codec) - data = simplejson.loads(content, strict=False) - return data - + ourl = urllib.request.urlopen(url) + codec = ourl.info().get_param('charset') + content = ourl.read().decode(codec) + data = simplejson.loads(content, strict=False) + return data +""" List all builds of the target project. """ def list_builds(jenkins_url, job_name): - """ List all builds of the target project. """ - global summary_mode, ERROR_COUNT - url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict( - jenkins=jenkins_url, - job_name=job_name) - - try: - data = load_url_data(url) + global summary_mode + global error_count + url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict( + jenkins=jenkins_url, + job_name=job_name) - except: - if not summary_mode: - logging.error("Could not fetch: %s" % url) - ERROR_COUNT += 1 - raise - return data['builds'] + try: + data = load_url_data(url) + except: + if not summary_mode: + logging.error("Could not fetch: %s" % url) + error_count += 1 + raise + return data['builds'] +""" Find the names of any tests which failed in the given build output URL. """ def find_failing_tests(testReportApiJson, jobConsoleOutput): - """ Find the names of any tests which failed in the given build output URL. """ - global summary_mode, ERROR_COUNT - ret = set() - try: - data = load_url_data(testReportApiJson) + global summary_mode + global error_count + ret = set() + try: + data = load_url_data(testReportApiJson) - except: - if not summary_mode: - logging.error(" Could not open testReport, check " + - jobConsoleOutput + " for why it was reported failed") - ERROR_COUNT += 1 - return ret - - for suite in data['suites']: - for cs in suite['cases']: - status = cs['status'] - errDetails = cs['errorDetails'] - if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)): - ret.add(cs['className'] + "." + cs['name']) - - if len(ret) == 0 and (not summary_mode): - logging.info(" No failed tests in testReport, check " + - jobConsoleOutput + " for why it was reported failed.") + except: + if not summary_mode: + logging.error(" Could not open testReport, check " + + jobConsoleOutput + " for why it was reported failed") + error_count += 1 return ret + for suite in data['suites']: + for cs in suite['cases']: + status = cs['status'] + errDetails = cs['errorDetails'] + if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)): + ret.add(cs['className'] + "." + cs['name']) + if len(ret) == 0 and (not summary_mode): + logging.info(" No failed tests in testReport, check " + + jobConsoleOutput + " for why it was reported failed.") + return ret + +""" Iterate runs of specfied job within num_prev_days and collect results """ def find_flaky_tests(jenkins_url, job_name, num_prev_days): - """ Iterate runs of specfied job within num_prev_days and collect results """ - global numRunsToExamine - global summary_mode - all_failing = dict() - # First list all builds - builds = list_builds(jenkins_url, job_name) - - # Select only those in the last N days - min_time = time.time() - SECONDS_PER_DAY * num_prev_days - builds = [b for b in builds if (float(b['timestamp']) / 1000) > min_time] - - # Filter out only those that failed - failing_build_urls = [(b['url'], b['timestamp']) for b in builds - if b['result'] in ('UNSTABLE', 'FAILURE')] - - tnum = len(builds) - num = len(failing_build_urls) - numRunsToExamine = tnum + global numRunsToExamine + global summary_mode + all_failing = dict() + # First list all builds + builds = list_builds(jenkins_url, job_name) + + # Select only those in the last N days + min_time = time.time() - SECONDS_PER_DAY * num_prev_days + builds = [b for b in builds if (float(b['timestamp']) / 1000) > min_time] + + # Filter out only those that failed + failing_build_urls = [(b['url'] , b['timestamp']) for b in builds + if (b['result'] in ('UNSTABLE', 'FAILURE'))] + + tnum = len(builds) + num = len(failing_build_urls) + numRunsToExamine = tnum + if not summary_mode: + logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum) + + ") that have failed tests in the past " + str(num_prev_days) + " days" + + ((".", ", as listed below:\n")[num > 0])) + + for failed_build_with_time in failing_build_urls: + failed_build = failed_build_with_time[0]; + jobConsoleOutput = failed_build + "Console"; + testReport = failed_build + "testReport"; + testReportApiJson = testReport + "/api/json"; + + ts = float(failed_build_with_time[1]) / 1000. + st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') if not summary_mode: - logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum) - + ") that have failed tests in the past " + str(num_prev_days) + " days" - + ((".", ", as listed below:\n")[num > 0])) - - for failed_build_with_time in failing_build_urls: - failed_build = failed_build_with_time[0] - jobConsoleOutput = failed_build + "Console" - testReport = failed_build + "testReport" - testReportApiJson = testReport + "/api/json" - - ts = float(failed_build_with_time[1]) / 1000. - st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') + logging.info("===>%s" % str(testReport) + " (" + st + ")") + failing = find_failing_tests(testReportApiJson, jobConsoleOutput) + if failing: + for ftest in failing: if not summary_mode: - logging.info("===>%s" % str(testReport) + " (" + st + ")") - failing = find_failing_tests(testReportApiJson, jobConsoleOutput) - if failing: - for ftest in failing: - if not summary_mode: - logging.info(" Failed test: %s" % ftest) - all_failing[ftest] = all_failing.get(ftest, 0) + 1 - - return all_failing + logging.info(" Failed test: %s" % ftest) + all_failing[ftest] = all_failing.get(ftest,0)+1 + return all_failing def main(): - global numRunsToExamine - global summary_mode - logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) - - # set up logger to write to stdout - soh = logging.StreamHandler(sys.stdout) - soh.setLevel(logging.INFO) - logger = logging.getLogger() - logger.removeHandler(logger.handlers[0]) - logger.addHandler(soh) - - opts = parse_args() - logging.info("****Recently FAILED builds in url: " + opts.jenkins_url - + "/job/" + opts.job_name + "") - - if opts.num_failed_tests != -1: - summary_mode = True - - all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name, - opts.num_prev_days) - if len(all_failing) == 0: - raise SystemExit(0) - - if summary_mode and opts.num_failed_tests < len(all_failing): - logging.info("\nAmong " + str(numRunsToExamine) + - " runs examined, top " + str(opts.num_failed_tests) + - " failed tests <#failedRuns: testName>:") - else: - logging.info("\nAmong " + str(numRunsToExamine) + - " runs examined, all failed tests <#failedRuns: testName>:") - - # print summary section: all failed tests sorted by how many times they failed - line_count = 0 - for tn in sorted(all_failing, key=all_failing.get, reverse=True): - logging.info(" " + str(all_failing[tn]) + ": " + tn) - if summary_mode: - line_count += 1 - if line_count == opts.num_failed_tests: - break - - if summary_mode and ERROR_COUNT > 0: - logging.info("\n" + str(ERROR_COUNT) + " errors found, you may " - + "re-run in non summary mode to see error details.") - + global numRunsToExamine + global summary_mode + logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) + + # set up logger to write to stdout + soh = logging.StreamHandler(sys.stdout) + soh.setLevel(logging.INFO) + logger = logging.getLogger() + logger.removeHandler(logger.handlers[0]) + logger.addHandler(soh) + + opts = parse_args() + logging.info("****Recently FAILED builds in url: " + opts.jenkins_url + + "/job/" + opts.job_name + "") + + if opts.num_failed_tests != -1: + summary_mode = True + + all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name, + opts.num_prev_days) + if len(all_failing) == 0: + raise SystemExit(0) + + if summary_mode and opts.num_failed_tests < len(all_failing): + logging.info("\nAmong " + str(numRunsToExamine) + + " runs examined, top " + str(opts.num_failed_tests) + + " failed tests <#failedRuns: testName>:") + else: + logging.info("\nAmong " + str(numRunsToExamine) + + " runs examined, all failed tests <#failedRuns: testName>:") + + # print summary section: all failed tests sorted by how many times they failed + line_count = 0 + for tn in sorted(all_failing, key=all_failing.get, reverse=True): + logging.info(" " + str(all_failing[tn])+ ": " + tn) + if summary_mode: + line_count += 1 + if line_count == opts.num_failed_tests: + break + + if summary_mode and error_count > 0: + logging.info("\n" + str(error_count) + " errors found, you may " + + "re-run in non summary mode to see error details."); if __name__ == "__main__": - main() + main() From d4a4e1b221ae57ac82b107012bd79c1cb36c30d5 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Tue, 26 Jan 2021 18:06:47 +0900 Subject: [PATCH 10/17] Fix pip3 install command --- dev-support/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile index 90cbcc111919e..930d44a839c4f 100644 --- a/dev-support/docker/Dockerfile +++ b/dev-support/docker/Dockerfile @@ -130,7 +130,7 @@ ENV PATH "${PATH}:/opt/protobuf/bin" #### # Install pylint and python-dateutil #### -RUN pip3 install pylint=2.6.0 python-dateutil=2.8.1 +RUN pip3 install pylint==2.6.0 python-dateutil==2.8.1 #### # Install bower From 9a9032a62de6aa7bdc3d08be84d8ca59069e52f5 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Tue, 26 Jan 2021 18:08:02 +0900 Subject: [PATCH 11/17] Update Dockerfile_aarch64 --- dev-support/docker/Dockerfile_aarch64 | 35 ++++++--------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/dev-support/docker/Dockerfile_aarch64 b/dev-support/docker/Dockerfile_aarch64 index bdf9e0c7e59f6..858c9b36ddf05 100644 --- a/dev-support/docker/Dockerfile_aarch64 +++ b/dev-support/docker/Dockerfile_aarch64 @@ -76,10 +76,11 @@ RUN apt-get -q update \ openjdk-8-jdk \ pinentry-curses \ pkg-config \ - python \ - python2.7 \ - python-pkg-resources \ - python-setuptools \ + python3 \ + python3-pip \ + python3-pkg-resources \ + python3-setuptools \ + python3-wheel \ rsync \ shellcheck \ software-properties-common \ @@ -112,18 +113,6 @@ RUN mkdir -p /opt/boost-library \ && cd /root \ && rm -rf /opt/boost-library -#### -# Install pip (deprecated from Focal toolchain) -#### -# hadolint ignore=DL3003 -RUN mkdir -p /opt/pip \ - && curl -L https://bootstrap.pypa.io/2.7/get-pip.py > get-pip.py \ - && mv get-pip.py /opt/pip \ - && cd /opt/pip \ - && python2.7 get-pip.py "pip < 21.0" \ - && cd /root \ - && rm -rf /opt/pip - ###### # Install Google Protobuf 3.7.1 (3.6.1 ships with Focal) ###### @@ -143,19 +132,9 @@ ENV PROTOBUF_HOME /opt/protobuf ENV PATH "${PATH}:/opt/protobuf/bin" #### -# Install pylint at fixed version (2.0.0 removed python2 support) -# https://github.com/PyCQA/pylint/issues/2294 -#### -RUN pip2 install \ - astroid==1.6.6 \ - isort==4.3.21 \ - configparser==4.0.2 \ - pylint==1.9.2 - -#### -# Install dateutil.parser +# Install pylint and python-dateutil #### -RUN pip2 install python-dateutil==2.7.3 +RUN pip3 install pylint==2.6.0 python-dateutil==2.8.1 #### # Install bower From cc34617a098d18138888056a4fbcc67544152be5 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Sat, 13 Feb 2021 15:42:43 +0900 Subject: [PATCH 12/17] Apply YETUS-1099 --- dev-support/Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index 6a085394aa1d7..18120e9171f7b 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -35,7 +35,8 @@ pipeline { DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile" YETUS='yetus' // Branch or tag name. Yetus release tags are 'rel/X.Y.Z' - YETUS_VERSION='rel/0.13.0' + // Yetus 0.13.0 + YETUS-1099 to fix shelldocs bug + YETUS_VERSION='94857fb27ccb3b3522eaaaff1abcb28f999c49ac' } parameters { From 2b316f0fb2e272b5025e42c7372467311488816f Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 15 Feb 2021 11:35:09 +0900 Subject: [PATCH 13/17] Disable Hadoop's bundled (Yetus 0.13.0) shelldocs --- dev-support/Jenkinsfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index 18120e9171f7b..77aad70bae657 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -134,9 +134,6 @@ pipeline { # plugins to enable YETUS_ARGS+=("--plugins=all") - # use Hadoop's bundled shelldocs - YETUS_ARGS+=("--shelldocs=${WORKSPACE}/${SOURCEDIR}/dev-support/bin/shelldocs") - # don't let these tests cause -1s because we aren't really paying that # much attention to them YETUS_ARGS+=("--tests-filter=checkstyle") From ece917cdb5d75323ae93f11d3fd3e6448ac000ee Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 15 Feb 2021 12:51:29 +0900 Subject: [PATCH 14/17] Revert "Apply YETUS-1099" This reverts commit cc34617a098d18138888056a4fbcc67544152be5. --- dev-support/Jenkinsfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index 77aad70bae657..39450ec762aab 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -35,8 +35,7 @@ pipeline { DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile" YETUS='yetus' // Branch or tag name. Yetus release tags are 'rel/X.Y.Z' - // Yetus 0.13.0 + YETUS-1099 to fix shelldocs bug - YETUS_VERSION='94857fb27ccb3b3522eaaaff1abcb28f999c49ac' + YETUS_VERSION='rel/0.13.0' } parameters { From 200ed5874d2c9ee3fe3c896e6734af6aedfc1b9c Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 15 Feb 2021 12:53:14 +0900 Subject: [PATCH 15/17] Publish build status in the post script --- dev-support/Jenkinsfile | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index 39450ec762aab..5119bae189c34 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -168,6 +168,19 @@ pipeline { post { always { script { + // Publish status if it was missed (YETUS-1059) + withCredentials( + [usernamePassword(credentialsId: 'apache-hadoop-at-github.com', + passwordVariable: 'GITHUB_TOKEN', + usernameVariable: 'GITHUB_USER')]) { + sh '''#!/usr/bin/env bash + YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}") + YETUS_ARGS+=("--patch-dir=${WORKSPACE}/${PATCHDIR}") + TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/github-status-recovery.sh" + /usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" ${EXTRA_ARGS} || true + ''' + } + // Yetus output archiveArtifacts "${env.PATCHDIR}/**" // Publish the HTML report so that it can be looked at From 7a6bee776e917abacf3b7fc990ba54002155c706 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 15 Feb 2021 13:45:36 +0900 Subject: [PATCH 16/17] Use strong token --- dev-support/Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index 5119bae189c34..6841ed30a79cc 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -60,7 +60,7 @@ pipeline { stage ('precommit-run') { steps { withCredentials( - [usernamePassword(credentialsId: 'apache-hadoop-at-github.com', + [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd', passwordVariable: 'GITHUB_TOKEN', usernameVariable: 'GITHUB_USER'), usernamePassword(credentialsId: 'hadoopqa-at-asf-jira', @@ -170,7 +170,7 @@ pipeline { script { // Publish status if it was missed (YETUS-1059) withCredentials( - [usernamePassword(credentialsId: 'apache-hadoop-at-github.com', + [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd', passwordVariable: 'GITHUB_TOKEN', usernameVariable: 'GITHUB_USER')]) { sh '''#!/usr/bin/env bash From 9ebb78464dc8f2ed68d086817f9f496339b3941d Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Wed, 17 Feb 2021 10:10:00 +0900 Subject: [PATCH 17/17] Remove determine-flaky-tests-hadoop.py since it is not used --- dev-support/determine-flaky-tests-hadoop.py | 228 -------------------- 1 file changed, 228 deletions(-) delete mode 100755 dev-support/determine-flaky-tests-hadoop.py diff --git a/dev-support/determine-flaky-tests-hadoop.py b/dev-support/determine-flaky-tests-hadoop.py deleted file mode 100755 index 1cf7d2830289d..0000000000000 --- a/dev-support/determine-flaky-tests-hadoop.py +++ /dev/null @@ -1,228 +0,0 @@ -#!/usr/bin/env python3 -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Given a jenkins test job, this script examines all runs of the job done -# within specified period of time (number of days prior to the execution -# time of this script), and reports all failed tests. -# -# The output of this script includes a section for each run that has failed -# tests, with each failed test name listed. -# -# More importantly, at the end, it outputs a summary section to list all failed -# tests within all examined runs, and indicate how many runs a same test -# failed, and sorted all failed tests by how many runs each test failed. -# -# This way, when we see failed tests in PreCommit build, we can quickly tell -# whether a failed test is a new failure, or it failed before and how often it -# failed, so to have idea whether it may just be a flaky test. -# -# Of course, to be 100% sure about the reason of a test failure, closer look -# at the failed test for the specific run is necessary. -# -import sys - -import urllib.request - -import datetime -import json as simplejson -import logging -from optparse import OptionParser -import time - -# Configuration -DEFAULT_JENKINS_URL = "https://builds.apache.org" -DEFAULT_JOB_NAME = "Hadoop-Common-trunk" -DEFAULT_NUM_PREVIOUS_DAYS = 14 -DEFAULT_TOP_NUM_FAILED_TEST = -1 - -SECONDS_PER_DAY = 86400 - -# total number of runs to examine -numRunsToExamine = 0 - -#summary mode -summary_mode = False - -#total number of errors -error_count = 0 - -""" Parse arguments """ -def parse_args(): - parser = OptionParser() - parser.add_option("-J", "--jenkins-url", type="string", - dest="jenkins_url", help="Jenkins URL", - default=DEFAULT_JENKINS_URL) - parser.add_option("-j", "--job-name", type="string", - dest="job_name", help="Job name to look at", - default=DEFAULT_JOB_NAME) - parser.add_option("-n", "--num-days", type="int", - dest="num_prev_days", help="Number of days to examine", - default=DEFAULT_NUM_PREVIOUS_DAYS) - parser.add_option("-t", "--top", type="int", - dest="num_failed_tests", - help="Summary Mode, only show top number of failed tests", - default=DEFAULT_TOP_NUM_FAILED_TEST) - - (options, args) = parser.parse_args() - if args: - parser.error("unexpected arguments: " + repr(args)) - return options - -""" Load data from specified url """ -def load_url_data(url): - ourl = urllib.request.urlopen(url) - codec = ourl.info().get_param('charset') - content = ourl.read().decode(codec) - data = simplejson.loads(content, strict=False) - return data - -""" List all builds of the target project. """ -def list_builds(jenkins_url, job_name): - global summary_mode - global error_count - url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict( - jenkins=jenkins_url, - job_name=job_name) - - try: - data = load_url_data(url) - - except: - if not summary_mode: - logging.error("Could not fetch: %s" % url) - error_count += 1 - raise - return data['builds'] - -""" Find the names of any tests which failed in the given build output URL. """ -def find_failing_tests(testReportApiJson, jobConsoleOutput): - global summary_mode - global error_count - ret = set() - try: - data = load_url_data(testReportApiJson) - - except: - if not summary_mode: - logging.error(" Could not open testReport, check " + - jobConsoleOutput + " for why it was reported failed") - error_count += 1 - return ret - - for suite in data['suites']: - for cs in suite['cases']: - status = cs['status'] - errDetails = cs['errorDetails'] - if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)): - ret.add(cs['className'] + "." + cs['name']) - - if len(ret) == 0 and (not summary_mode): - logging.info(" No failed tests in testReport, check " + - jobConsoleOutput + " for why it was reported failed.") - return ret - -""" Iterate runs of specfied job within num_prev_days and collect results """ -def find_flaky_tests(jenkins_url, job_name, num_prev_days): - global numRunsToExamine - global summary_mode - all_failing = dict() - # First list all builds - builds = list_builds(jenkins_url, job_name) - - # Select only those in the last N days - min_time = time.time() - SECONDS_PER_DAY * num_prev_days - builds = [b for b in builds if (float(b['timestamp']) / 1000) > min_time] - - # Filter out only those that failed - failing_build_urls = [(b['url'] , b['timestamp']) for b in builds - if (b['result'] in ('UNSTABLE', 'FAILURE'))] - - tnum = len(builds) - num = len(failing_build_urls) - numRunsToExamine = tnum - if not summary_mode: - logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum) - + ") that have failed tests in the past " + str(num_prev_days) + " days" - + ((".", ", as listed below:\n")[num > 0])) - - for failed_build_with_time in failing_build_urls: - failed_build = failed_build_with_time[0]; - jobConsoleOutput = failed_build + "Console"; - testReport = failed_build + "testReport"; - testReportApiJson = testReport + "/api/json"; - - ts = float(failed_build_with_time[1]) / 1000. - st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') - if not summary_mode: - logging.info("===>%s" % str(testReport) + " (" + st + ")") - failing = find_failing_tests(testReportApiJson, jobConsoleOutput) - if failing: - for ftest in failing: - if not summary_mode: - logging.info(" Failed test: %s" % ftest) - all_failing[ftest] = all_failing.get(ftest,0)+1 - - return all_failing - -def main(): - global numRunsToExamine - global summary_mode - logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) - - # set up logger to write to stdout - soh = logging.StreamHandler(sys.stdout) - soh.setLevel(logging.INFO) - logger = logging.getLogger() - logger.removeHandler(logger.handlers[0]) - logger.addHandler(soh) - - opts = parse_args() - logging.info("****Recently FAILED builds in url: " + opts.jenkins_url - + "/job/" + opts.job_name + "") - - if opts.num_failed_tests != -1: - summary_mode = True - - all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name, - opts.num_prev_days) - if len(all_failing) == 0: - raise SystemExit(0) - - if summary_mode and opts.num_failed_tests < len(all_failing): - logging.info("\nAmong " + str(numRunsToExamine) + - " runs examined, top " + str(opts.num_failed_tests) + - " failed tests <#failedRuns: testName>:") - else: - logging.info("\nAmong " + str(numRunsToExamine) + - " runs examined, all failed tests <#failedRuns: testName>:") - - # print summary section: all failed tests sorted by how many times they failed - line_count = 0 - for tn in sorted(all_failing, key=all_failing.get, reverse=True): - logging.info(" " + str(all_failing[tn])+ ": " + tn) - if summary_mode: - line_count += 1 - if line_count == opts.num_failed_tests: - break - - if summary_mode and error_count > 0: - logging.info("\n" + str(error_count) + " errors found, you may " - + "re-run in non summary mode to see error details."); - -if __name__ == "__main__": - main()