From f793e604acaf2f4e1635ff7e78583e4394a52264 Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Thu, 5 Dec 2019 18:31:04 +0900
Subject: [PATCH 01/17] HADOOP-16747. Support Python 3 in dev-support scripts.

---
 dev-support/bin/checkcompatibility.py       | 504 ++++++++++----------
 dev-support/determine-flaky-tests-hadoop.py | 320 ++++++-------
 2 files changed, 402 insertions(+), 422 deletions(-)

diff --git a/dev-support/bin/checkcompatibility.py b/dev-support/bin/checkcompatibility.py
index ad1e9cbe47ff2..a98cf7634b0ca 100755
--- a/dev-support/bin/checkcompatibility.py
+++ b/dev-support/bin/checkcompatibility.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -30,316 +30,308 @@
 import shutil
 import subprocess
 import sys
-import urllib2
-try:
-  import argparse
-except ImportError:
-  sys.stderr.write("Please install argparse, e.g. via `pip install argparse`.")
-  sys.exit(2)
+import urllib.request
+import argparse
 
 # Various relative paths
 REPO_DIR = os.getcwd()
 
+
 def check_output(*popenargs, **kwargs):
-  r"""Run command with arguments and return its output as a byte string.
-  Backported from Python 2.7 as it's implemented as pure python on stdlib.
-  >>> check_output(['/usr/bin/python', '--version'])
-  Python 2.6.2
-  """
-  process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
-  output, _ = process.communicate()
-  retcode = process.poll()
-  if retcode:
-    cmd = kwargs.get("args")
-    if cmd is None:
-      cmd = popenargs[0]
-    error = subprocess.CalledProcessError(retcode, cmd)
-    error.output = output
-    raise error
-  return output
+    """ Run command with arguments and return its output as a string. """
+    return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8')
+
 
 def get_repo_dir():
-  """ Return the path to the top of the repo. """
-  dirname, _ = os.path.split(os.path.abspath(__file__))
-  return os.path.join(dirname, "../..")
+    """ Return the path to the top of the repo. """
+    dirname, _ = os.path.split(os.path.abspath(__file__))
+    return os.path.join(dirname, "../..")
+
 
 def get_scratch_dir():
-  """ Return the path to the scratch dir that we build within. """
-  scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check")
-  if not os.path.exists(scratch_dir):
-    os.makedirs(scratch_dir)
-  return scratch_dir
+    """ Return the path to the scratch dir that we build within. """
+    scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check")
+    if not os.path.exists(scratch_dir):
+        os.makedirs(scratch_dir)
+    return scratch_dir
+
 
 def get_java_acc_dir():
-  """ Return the path where we check out the Java API Compliance Checker. """
-  return os.path.join(get_repo_dir(), "target", "java-acc")
+    """ Return the path where we check out the Java API Compliance Checker. """
+    return os.path.join(get_repo_dir(), "target", "java-acc")
 
 
 def clean_scratch_dir(scratch_dir):
-  """ Clean up and re-create the scratch directory. """
-  if os.path.exists(scratch_dir):
-    logging.info("Removing scratch dir %s...", scratch_dir)
-    shutil.rmtree(scratch_dir)
-  logging.info("Creating empty scratch dir %s...", scratch_dir)
-  os.makedirs(scratch_dir)
+    """ Clean up and re-create the scratch directory. """
+    if os.path.exists(scratch_dir):
+        logging.info("Removing scratch dir %s...", scratch_dir)
+        shutil.rmtree(scratch_dir)
+    logging.info("Creating empty scratch dir %s...", scratch_dir)
+    os.makedirs(scratch_dir)
 
 
 def checkout_java_tree(rev, path):
-  """ Check out the Java source tree for the given revision into
+    """ Check out the Java source tree for the given revision into
   the given path. """
-  logging.info("Checking out %s in %s", rev, path)
-  os.makedirs(path)
-  # Extract java source
-  subprocess.check_call(["bash", '-o', 'pipefail', "-c",
-                         ("git archive --format=tar %s | " +
-                          "tar -C \"%s\" -xf -") % (rev, path)],
-                        cwd=get_repo_dir())
+    logging.info("Checking out %s in %s", rev, path)
+    os.makedirs(path)
+    # Extract java source
+    subprocess.check_call(["bash", '-o', 'pipefail', "-c",
+                           ("git archive --format=tar %s | " +
+                            "tar -C \"%s\" -xf -") % (rev, path)],
+                          cwd=get_repo_dir())
+
 
 def get_git_hash(revname):
-  """ Convert 'revname' to its SHA-1 hash. """
-  return check_output(["git", "rev-parse", revname],
-                      cwd=get_repo_dir()).strip()
+    """ Convert 'revname' to its SHA-1 hash. """
+    return check_output(["git", "rev-parse", revname],
+                        cwd=get_repo_dir()).strip()
+
 
 def get_repo_name():
-  """Get the name of the repo based on the git remote."""
-  remotes = check_output(["git", "remote", "-v"],
-                         cwd=get_repo_dir()).strip().split("\n")
-  # Example output:
-  # origin	https://github.com/apache/hadoop.git (fetch)
-  # origin	https://github.com/apache/hadoop.git (push)
-  remote_url = remotes[0].split("\t")[1].split(" ")[0]
-  remote = remote_url.split("/")[-1]
-  if remote.endswith(".git"):
-    remote = remote[:-4]
-  return remote
+    """Get the name of the repo based on the git remote."""
+    remotes = check_output(["git", "remote", "-v"],
+                           cwd=get_repo_dir()).strip().split("\n")
+    # Example output:
+    # origin https://github.com/apache/hadoop.git (fetch)
+    # origin https://github.com/apache/hadoop.git (push)
+    remote_url = remotes[0].split("\t")[1].split(" ")[0]
+    remote = remote_url.split("/")[-1]
+    if remote.endswith(".git"):
+        remote = remote[:-4]
+    return remote
+
 
 def build_tree(java_path):
-  """ Run the Java build within 'path'. """
-  logging.info("Building in %s...", java_path)
-  subprocess.check_call(["mvn", "-DskipTests", "-Dmaven.javadoc.skip=true",
-                         "package"],
-                        cwd=java_path)
+    """ Run the Java build within 'path'. """
+    logging.info("Building in %s...", java_path)
+    subprocess.check_call(["mvn", "-DskipTests", "-Dmaven.javadoc.skip=true",
+                           "package"],
+                          cwd=java_path)
 
 
 def checkout_java_acc(force):
-  """
+    """
   Check out the Java API Compliance Checker. If 'force' is true, will
   re-download even if the directory exists.
   """
-  acc_dir = get_java_acc_dir()
-  if os.path.exists(acc_dir):
-    logging.info("Java ACC is already downloaded.")
-    if not force:
-      return
-    logging.info("Forcing re-download.")
-    shutil.rmtree(acc_dir)
+    acc_dir = get_java_acc_dir()
+    if os.path.exists(acc_dir):
+        logging.info("Java ACC is already downloaded.")
+        if not force:
+            return
+        logging.info("Forcing re-download.")
+        shutil.rmtree(acc_dir)
 
-  logging.info("Downloading Java ACC...")
+    logging.info("Downloading Java ACC...")
 
-  url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz"
-  scratch_dir = get_scratch_dir()
-  path = os.path.join(scratch_dir, os.path.basename(url))
-  jacc = urllib2.urlopen(url)
-  with open(path, 'wb') as w:
-    w.write(jacc.read())
+    url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz"
+    scratch_dir = get_scratch_dir()
+    path = os.path.join(scratch_dir, os.path.basename(url))
+    jacc = urllib.request.urlopen(url)
+    with open(path, 'wb') as w:
+        w.write(jacc.read())
 
-  subprocess.check_call(["tar", "xzf", path],
-                        cwd=scratch_dir)
+    subprocess.check_call(["tar", "xzf", path],
+                          cwd=scratch_dir)
 
-  shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-1.8"),
-              os.path.join(acc_dir))
+    shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-1.8"),
+                os.path.join(acc_dir))
 
 
 def find_jars(path):
-  """ Return a list of jars within 'path' to be checked for compatibility. """
-  all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines())
+    """ Return a list of jars within 'path' to be checked for compatibility. """
+    all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines())
+
+    return [j for j in all_jars if (
+            "-tests" not in j and
+            "-sources" not in j and
+            "-with-dependencies" not in j)]
 
-  return [j for j in all_jars if (
-      "-tests" not in j and
-      "-sources" not in j and
-      "-with-dependencies" not in j)]
 
 def write_xml_file(path, version, jars):
-  """Write the XML manifest file for JACC."""
-  with open(path, "wt") as f:
-    f.write("<version>" + version + "</version>\n")
-    f.write("<archives>")
-    for j in jars:
-      f.write(j + "\n")
-    f.write("</archives>")
+    """Write the XML manifest file for JACC."""
+    with open(path, "wt") as f:
+        f.write("<version>" + version + "</version>\n")
+        f.write("<archives>")
+        for j in jars:
+            f.write(j + "\n")
+        f.write("</archives>")
+
 
 def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations):
-  """ Run the compliance checker to compare 'src' and 'dst'. """
-  logging.info("Will check compatibility between original jars:\n\t%s\n" +
-               "and new jars:\n\t%s",
-               "\n\t".join(src_jars),
-               "\n\t".join(dst_jars))
+    """ Run the compliance checker to compare 'src' and 'dst'. """
+    logging.info("Will check compatibility between original jars:\n\t%s\n" +
+                 "and new jars:\n\t%s",
+                 "\n\t".join(src_jars),
+                 "\n\t".join(dst_jars))
 
-  java_acc_path = os.path.join(get_java_acc_dir(), "japi-compliance-checker.pl")
+    java_acc_path = os.path.join(get_java_acc_dir(), "japi-compliance-checker.pl")
 
-  src_xml_path = os.path.join(get_scratch_dir(), "src.xml")
-  dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml")
-  write_xml_file(src_xml_path, src_name, src_jars)
-  write_xml_file(dst_xml_path, dst_name, dst_jars)
+    src_xml_path = os.path.join(get_scratch_dir(), "src.xml")
+    dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml")
+    write_xml_file(src_xml_path, src_name, src_jars)
+    write_xml_file(dst_xml_path, dst_name, dst_jars)
 
-  out_path = os.path.join(get_scratch_dir(), "report.html")
+    out_path = os.path.join(get_scratch_dir(), "report.html")
 
-  args = ["perl", java_acc_path,
-          "-l", get_repo_name(),
-          "-d1", src_xml_path,
-          "-d2", dst_xml_path,
-          "-report-path", out_path]
+    args = ["perl", java_acc_path,
+            "-l", get_repo_name(),
+            "-d1", src_xml_path,
+            "-d2", dst_xml_path,
+            "-report-path", out_path]
 
-  if annotations is not None:
-    annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
-    with file(annotations_path, "w") as f:
-      for ann in annotations:
-        print >>f, ann
-    args += ["-annotations-list", annotations_path]
+    if annotations is not None:
+        annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
+        with file(annotations_path, "w") as f:
+            for ann in annotations:
+                print(ann, file=f)
+        args += ["-annotations-list", annotations_path]
+
+    subprocess.check_call(args)
 
-  subprocess.check_call(args)
 
 def filter_jars(jars, include_filters, exclude_filters):
-  """Filter the list of JARs based on include and exclude filters."""
-  filtered = []
-  # Apply include filters
-  for j in jars:
-    found = False
-    basename = os.path.basename(j)
-    for f in include_filters:
-      if f.match(basename):
-        found = True
-        break
-    if found:
-      filtered += [j]
+    """Filter the list of JARs based on include and exclude filters."""
+    filtered = []
+    # Apply include filters
+    for j in jars:
+        found = False
+        basename = os.path.basename(j)
+        for f in include_filters:
+            if f.match(basename):
+                found = True
+                break
+        if found:
+            filtered += [j]
+        else:
+            logging.debug("Ignoring JAR %s", j)
+    # Apply exclude filters
+    exclude_filtered = []
+    for j in filtered:
+        basename = os.path.basename(j)
+        found = False
+        for f in exclude_filters:
+            if f.match(basename):
+                found = True
+                break
+        if found:
+            logging.debug("Ignoring JAR %s", j)
+        else:
+            exclude_filtered += [j]
+
+    return exclude_filtered
+
+
+def main():
+    """Main function."""
+    logging.basicConfig(level=logging.INFO)
+    parser = argparse.ArgumentParser(
+        description="Run Java API Compliance Checker.")
+    parser.add_argument("-f", "--force-download",
+                        action="store_true",
+                        help="Download dependencies (i.e. Java JAVA_ACC) " +
+                             "even if they are already present")
+    parser.add_argument("-i", "--include-file",
+                        action="append",
+                        dest="include_files",
+                        help="Regex filter for JAR files to be included. " +
+                             "Applied before the exclude filters. " +
+                             "Can be specified multiple times.")
+    parser.add_argument("-e", "--exclude-file",
+                        action="append",
+                        dest="exclude_files",
+                        help="Regex filter for JAR files to be excluded. " +
+                             "Applied after the include filters. " +
+                             "Can be specified multiple times.")
+    parser.add_argument("-a", "--annotation",
+                        action="append",
+                        dest="annotations",
+                        help="Fully-qualified Java annotation. " +
+                             "Java ACC will only check compatibility of " +
+                             "annotated classes. Can be specified multiple times.")
+    parser.add_argument("--skip-clean",
+                        action="store_true",
+                        help="Skip cleaning the scratch directory.")
+    parser.add_argument("--skip-build",
+                        action="store_true",
+                        help="Skip building the projects.")
+    parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.")
+    parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD",
+                        help="Destination revision. " +
+                             "If not specified, will use HEAD.")
+
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+
+    args = parser.parse_args()
+
+    src_rev, dst_rev = args.src_rev[0], args.dst_rev
+
+    logging.info("Source revision: %s", src_rev)
+    logging.info("Destination revision: %s", dst_rev)
+
+    # Construct the JAR regex patterns for filtering.
+    include_filters = []
+    if args.include_files is not None:
+        for f in args.include_files:
+            logging.info("Applying JAR filename include filter: %s", f)
+            include_filters += [re.compile(f)]
+    else:
+        include_filters = [re.compile(".*")]
+
+    exclude_filters = []
+    if args.exclude_files is not None:
+        for f in args.exclude_files:
+            logging.info("Applying JAR filename exclude filter: %s", f)
+            exclude_filters += [re.compile(f)]
+
+    # Construct the annotation list
+    annotations = args.annotations
+    if annotations is not None:
+        logging.info("Filtering classes using %d annotation(s):", len(annotations))
+        for a in annotations:
+            logging.info("\t%s", a)
+
+    # Download deps.
+    checkout_java_acc(args.force_download)
+
+    # Set up the build.
+    scratch_dir = get_scratch_dir()
+    src_dir = os.path.join(scratch_dir, "src")
+    dst_dir = os.path.join(scratch_dir, "dst")
+
+    if args.skip_clean:
+        logging.info("Skipping cleaning the scratch directory")
     else:
-      logging.debug("Ignoring JAR %s", j)
-  # Apply exclude filters
-  exclude_filtered = []
-  for j in filtered:
-    basename = os.path.basename(j)
-    found = False
-    for f in exclude_filters:
-      if f.match(basename):
-        found = True
-        break
-    if found:
-      logging.debug("Ignoring JAR %s", j)
+        clean_scratch_dir(scratch_dir)
+        # Check out the src and dst source trees.
+        checkout_java_tree(get_git_hash(src_rev), src_dir)
+        checkout_java_tree(get_git_hash(dst_rev), dst_dir)
+
+    # Run the build in each.
+    if args.skip_build:
+        logging.info("Skipping the build")
     else:
-      exclude_filtered += [j]
+        build_tree(src_dir)
+        build_tree(dst_dir)
 
-  return exclude_filtered
+    # Find the JARs.
+    src_jars = find_jars(src_dir)
+    dst_jars = find_jars(dst_dir)
 
+    # Filter the JARs.
+    src_jars = filter_jars(src_jars, include_filters, exclude_filters)
+    dst_jars = filter_jars(dst_jars, include_filters, exclude_filters)
 
-def main():
-  """Main function."""
-  logging.basicConfig(level=logging.INFO)
-  parser = argparse.ArgumentParser(
-      description="Run Java API Compliance Checker.")
-  parser.add_argument("-f", "--force-download",
-                      action="store_true",
-                      help="Download dependencies (i.e. Java JAVA_ACC) " +
-                      "even if they are already present")
-  parser.add_argument("-i", "--include-file",
-                      action="append",
-                      dest="include_files",
-                      help="Regex filter for JAR files to be included. " +
-                      "Applied before the exclude filters. " +
-                      "Can be specified multiple times.")
-  parser.add_argument("-e", "--exclude-file",
-                      action="append",
-                      dest="exclude_files",
-                      help="Regex filter for JAR files to be excluded. " +
-                      "Applied after the include filters. " +
-                      "Can be specified multiple times.")
-  parser.add_argument("-a", "--annotation",
-                      action="append",
-                      dest="annotations",
-                      help="Fully-qualified Java annotation. " +
-                      "Java ACC will only check compatibility of " +
-                      "annotated classes. Can be specified multiple times.")
-  parser.add_argument("--skip-clean",
-                      action="store_true",
-                      help="Skip cleaning the scratch directory.")
-  parser.add_argument("--skip-build",
-                      action="store_true",
-                      help="Skip building the projects.")
-  parser.add_argument("src_rev", nargs=1, help="Source revision.")
-  parser.add_argument("dst_rev", nargs="?", default="HEAD",
-                      help="Destination revision. " +
-                      "If not specified, will use HEAD.")
-
-  if len(sys.argv) == 1:
-    parser.print_help()
-    sys.exit(1)
-
-  args = parser.parse_args()
-
-  src_rev, dst_rev = args.src_rev[0], args.dst_rev
-
-  logging.info("Source revision: %s", src_rev)
-  logging.info("Destination revision: %s", dst_rev)
-
-  # Construct the JAR regex patterns for filtering.
-  include_filters = []
-  if args.include_files is not None:
-    for f in args.include_files:
-      logging.info("Applying JAR filename include filter: %s", f)
-      include_filters += [re.compile(f)]
-  else:
-    include_filters = [re.compile(".*")]
-
-  exclude_filters = []
-  if args.exclude_files is not None:
-    for f in args.exclude_files:
-      logging.info("Applying JAR filename exclude filter: %s", f)
-      exclude_filters += [re.compile(f)]
-
-  # Construct the annotation list
-  annotations = args.annotations
-  if annotations is not None:
-    logging.info("Filtering classes using %d annotation(s):", len(annotations))
-    for a in annotations:
-      logging.info("\t%s", a)
-
-  # Download deps.
-  checkout_java_acc(args.force_download)
-
-  # Set up the build.
-  scratch_dir = get_scratch_dir()
-  src_dir = os.path.join(scratch_dir, "src")
-  dst_dir = os.path.join(scratch_dir, "dst")
-
-  if args.skip_clean:
-    logging.info("Skipping cleaning the scratch directory")
-  else:
-    clean_scratch_dir(scratch_dir)
-    # Check out the src and dst source trees.
-    checkout_java_tree(get_git_hash(src_rev), src_dir)
-    checkout_java_tree(get_git_hash(dst_rev), dst_dir)
-
-  # Run the build in each.
-  if args.skip_build:
-    logging.info("Skipping the build")
-  else:
-    build_tree(src_dir)
-    build_tree(dst_dir)
-
-  # Find the JARs.
-  src_jars = find_jars(src_dir)
-  dst_jars = find_jars(dst_dir)
-
-  # Filter the JARs.
-  src_jars = filter_jars(src_jars, include_filters, exclude_filters)
-  dst_jars = filter_jars(dst_jars, include_filters, exclude_filters)
-
-  if len(src_jars) == 0 or len(dst_jars) == 0:
-    logging.error("No JARs found! Are your filters too strong?")
-    sys.exit(1)
-
-  run_java_acc(src_rev, src_jars,
-               dst_rev, dst_jars, annotations)
+    if len(src_jars) == 0 or len(dst_jars) == 0:
+        logging.error("No JARs found! Are your filters too strong?")
+        sys.exit(1)
+
+    run_java_acc(src_rev, src_jars,
+                 dst_rev, dst_jars, annotations)
 
 
 if __name__ == "__main__":
-  main()
+    main()
diff --git a/dev-support/determine-flaky-tests-hadoop.py b/dev-support/determine-flaky-tests-hadoop.py
index 8644299bba4a2..8436da85b4c58 100755
--- a/dev-support/determine-flaky-tests-hadoop.py
+++ b/dev-support/determine-flaky-tests-hadoop.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -35,22 +35,8 @@
 # at the failed test for the specific run is necessary.
 #
 import sys
-import platform
-sysversion = sys.hexversion
-onward30 = False
-if sysversion < 0x020600F0:
-  sys.exit("Minimum supported python version is 2.6, the current version is " +
-      "Python" + platform.python_version())
-
-if sysversion == 0x030000F0:
-  sys.exit("There is a known bug with Python" + platform.python_version() +
-      ", please try a different version");
-
-if sysversion < 0x03000000:
-  import urllib2
-else:
-  onward30 = True
-  import urllib.request
+
+import urllib.request
 
 import datetime
 import json as simplejson
@@ -60,7 +46,7 @@
 
 # Configuration
 DEFAULT_JENKINS_URL = "https://builds.apache.org"
-DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
+DEFAULT_JOB_NAME = "hadoop-qbt-trunk-java8-linux-x86"
 DEFAULT_NUM_PREVIOUS_DAYS = 14
 DEFAULT_TOP_NUM_FAILED_TEST = -1
 
@@ -69,177 +55,179 @@
 # total number of runs to examine
 numRunsToExamine = 0
 
-#summary mode
+# summary mode
 summary_mode = False
 
-#total number of errors
-error_count = 0
+# total number of errors
+ERROR_COUNT = 0
+
 
-""" Parse arguments """
 def parse_args():
-  parser = OptionParser()
-  parser.add_option("-J", "--jenkins-url", type="string",
-                    dest="jenkins_url", help="Jenkins URL",
-                    default=DEFAULT_JENKINS_URL)
-  parser.add_option("-j", "--job-name", type="string",
-                    dest="job_name", help="Job name to look at",
-                    default=DEFAULT_JOB_NAME)
-  parser.add_option("-n", "--num-days", type="int",
-                    dest="num_prev_days", help="Number of days to examine",
-                    default=DEFAULT_NUM_PREVIOUS_DAYS)
-  parser.add_option("-t", "--top", type="int",
-                    dest="num_failed_tests",
-                    help="Summary Mode, only show top number of failed tests",
-                    default=DEFAULT_TOP_NUM_FAILED_TEST)
-
-  (options, args) = parser.parse_args()
-  if args:
-    parser.error("unexpected arguments: " + repr(args))
-  return options
-
-""" Load data from specified url """
+    """ Parse arguments """
+    parser = OptionParser()
+    parser.add_option("-J", "--jenkins-url", type="string",
+                      dest="jenkins_url", help="Jenkins URL",
+                      default=DEFAULT_JENKINS_URL)
+    parser.add_option("-j", "--job-name", type="string",
+                      dest="job_name", help="Job name to look at",
+                      default=DEFAULT_JOB_NAME)
+    parser.add_option("-n", "--num-days", type="int",
+                      dest="num_prev_days", help="Number of days to examine",
+                      default=DEFAULT_NUM_PREVIOUS_DAYS)
+    parser.add_option("-t", "--top", type="int",
+                      dest="num_failed_tests",
+                      help="Summary Mode, only show top number of failed tests",
+                      default=DEFAULT_TOP_NUM_FAILED_TEST)
+
+    (options, args) = parser.parse_args()
+    if args:
+        parser.error("unexpected arguments: " + repr(args))
+    return options
+
+
 def load_url_data(url):
-  if onward30:
+    """ Load data from specified url """
     ourl = urllib.request.urlopen(url)
     codec = ourl.info().get_param('charset')
     content = ourl.read().decode(codec)
     data = simplejson.loads(content, strict=False)
-  else:
-    ourl = urllib2.urlopen(url)
-    data = simplejson.load(ourl, strict=False)
-  return data
- 
-""" List all builds of the target project. """
+    return data
+
+
 def list_builds(jenkins_url, job_name):
-  global summary_mode
-  url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
-      jenkins=jenkins_url,
-      job_name=job_name)
+    """ List all builds of the target project. """
+    global summary_mode, ERROR_COUNT
+    url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
+        jenkins=jenkins_url,
+        job_name=job_name)
 
-  try:
-    data = load_url_data(url)
+    try:
+        data = load_url_data(url)
+
+    except:
+        if not summary_mode:
+            logging.error("Could not fetch: %s" % url)
+        ERROR_COUNT += 1
+        raise
+    return data['builds']
 
-  except:
-    if not summary_mode:
-      logging.error("Could not fetch: %s" % url)
-    error_count += 1
-    raise
-  return data['builds']
 
-""" Find the names of any tests which failed in the given build output URL. """
 def find_failing_tests(testReportApiJson, jobConsoleOutput):
-  global summary_mode
-  global error_count
-  ret = set()
-  try:
-    data = load_url_data(testReportApiJson)
+    """ Find the names of any tests which failed in the given build output URL. """
+    global summary_mode, ERROR_COUNT
+    ret = set()
+    try:
+        data = load_url_data(testReportApiJson)
 
-  except:
-    if not summary_mode:
-      logging.error("    Could not open testReport, check " +
-        jobConsoleOutput + " for why it was reported failed")
-    error_count += 1
+    except:
+        if not summary_mode:
+            logging.error("    Could not open testReport, check " +
+                          jobConsoleOutput + " for why it was reported failed")
+        ERROR_COUNT += 1
+        return ret
+
+    for suite in data['suites']:
+        for cs in suite['cases']:
+            status = cs['status']
+            errDetails = cs['errorDetails']
+            if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
+                ret.add(cs['className'] + "." + cs['name'])
+
+    if len(ret) == 0 and (not summary_mode):
+        logging.info("    No failed tests in testReport, check " +
+                     jobConsoleOutput + " for why it was reported failed.")
     return ret
 
-  for suite in data['suites']:
-    for cs in suite['cases']:
-      status = cs['status']
-      errDetails = cs['errorDetails']
-      if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
-        ret.add(cs['className'] + "." + cs['name'])
 
-  if len(ret) == 0 and (not summary_mode):
-    logging.info("    No failed tests in testReport, check " +
-        jobConsoleOutput + " for why it was reported failed.")
-  return ret
-
-""" Iterate runs of specfied job within num_prev_days and collect results """
 def find_flaky_tests(jenkins_url, job_name, num_prev_days):
-  global numRunsToExamine
-  global summary_mode
-  all_failing = dict()
-  # First list all builds
-  builds = list_builds(jenkins_url, job_name)
-
-  # Select only those in the last N days
-  min_time = int(time.time()) - SECONDS_PER_DAY * num_prev_days
-  builds = [b for b in builds if (int(b['timestamp']) / 1000) > min_time]
-
-  # Filter out only those that failed
-  failing_build_urls = [(b['url'] , b['timestamp']) for b in builds
-      if (b['result'] in ('UNSTABLE', 'FAILURE'))]
-
-  tnum = len(builds)
-  num = len(failing_build_urls)
-  numRunsToExamine = tnum
-  if not summary_mode:
-    logging.info("    THERE ARE " + str(num) + " builds (out of " + str(tnum)
-      + ") that have failed tests in the past " + str(num_prev_days) + " days"
-      + ((".", ", as listed below:\n")[num > 0]))
-
-  for failed_build_with_time in failing_build_urls:
-    failed_build = failed_build_with_time[0];
-    jobConsoleOutput = failed_build + "Console";
-    testReport = failed_build + "testReport";
-    testReportApiJson = testReport + "/api/json";
-
-    ts = float(failed_build_with_time[1]) / 1000.
-    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
+    """ Iterate runs of specfied job within num_prev_days and collect results """
+    global numRunsToExamine
+    global summary_mode
+    all_failing = dict()
+    # First list all builds
+    builds = list_builds(jenkins_url, job_name)
+
+    # Select only those in the last N days
+    min_time = time.time() - SECONDS_PER_DAY * num_prev_days
+    builds = [b for b in builds if (float(b['timestamp']) / 1000) > min_time]
+
+    # Filter out only those that failed
+    failing_build_urls = [(b['url'], b['timestamp']) for b in builds
+                          if b['result'] in ('UNSTABLE', 'FAILURE')]
+
+    tnum = len(builds)
+    num = len(failing_build_urls)
+    numRunsToExamine = tnum
     if not summary_mode:
-      logging.info("===>%s" % str(testReport) + " (" + st + ")")
-    failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
-    if failing:
-      for ftest in failing:
+        logging.info("    THERE ARE " + str(num) + " builds (out of " + str(tnum)
+                     + ") that have failed tests in the past " + str(num_prev_days) + " days"
+                     + ((".", ", as listed below:\n")[num > 0]))
+
+    for failed_build_with_time in failing_build_urls:
+        failed_build = failed_build_with_time[0]
+        jobConsoleOutput = failed_build + "Console"
+        testReport = failed_build + "testReport"
+        testReportApiJson = testReport + "/api/json"
+
+        ts = float(failed_build_with_time[1]) / 1000.
+        st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
         if not summary_mode:
-          logging.info("    Failed test: %s" % ftest)
-        all_failing[ftest] = all_failing.get(ftest,0)+1
+            logging.info("===>%s" % str(testReport) + " (" + st + ")")
+        failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
+        if failing:
+            for ftest in failing:
+                if not summary_mode:
+                    logging.info("    Failed test: %s" % ftest)
+                all_failing[ftest] = all_failing.get(ftest, 0) + 1
+
+    return all_failing
 
-  return all_failing
 
 def main():
-  global numRunsToExamine
-  global summary_mode
-  logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
-
-  # set up logger to write to stdout
-  soh = logging.StreamHandler(sys.stdout)
-  soh.setLevel(logging.INFO)
-  logger = logging.getLogger()
-  logger.removeHandler(logger.handlers[0])
-  logger.addHandler(soh)
-
-  opts = parse_args()
-  logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
-      + "/job/" + opts.job_name + "")
-
-  if opts.num_failed_tests != -1:
-    summary_mode = True
-
-  all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
-      opts.num_prev_days)
-  if len(all_failing) == 0:
-    raise SystemExit(0)
-
-  if summary_mode and opts.num_failed_tests < len(all_failing):
-    logging.info("\nAmong " + str(numRunsToExamine) +
-                 " runs examined, top " + str(opts.num_failed_tests) +
-                 " failed tests <#failedRuns: testName>:")
-  else:
-      logging.info("\nAmong " + str(numRunsToExamine) +
-                   " runs examined, all failed tests <#failedRuns: testName>:")
-
-  # print summary section: all failed tests sorted by how many times they failed
-  line_count = 0
-  for tn in sorted(all_failing, key=all_failing.get, reverse=True):
-    logging.info("    " + str(all_failing[tn])+ ": " + tn)
-    if summary_mode:
-      line_count += 1
-      if line_count == opts.num_failed_tests:
-        break
-
-  if summary_mode and error_count > 0:
-    logging.info("\n" + str(error_count) + " errors found, you may "
-                 + "re-run in non summary mode to see error details.");
+    global numRunsToExamine
+    global summary_mode
+    logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
+
+    # set up logger to write to stdout
+    soh = logging.StreamHandler(sys.stdout)
+    soh.setLevel(logging.INFO)
+    logger = logging.getLogger()
+    logger.removeHandler(logger.handlers[0])
+    logger.addHandler(soh)
+
+    opts = parse_args()
+    logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
+                 + "/job/" + opts.job_name + "")
+
+    if opts.num_failed_tests != -1:
+        summary_mode = True
+
+    all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
+                                   opts.num_prev_days)
+    if len(all_failing) == 0:
+        raise SystemExit(0)
+
+    if summary_mode and opts.num_failed_tests < len(all_failing):
+        logging.info("\nAmong " + str(numRunsToExamine) +
+                     " runs examined, top " + str(opts.num_failed_tests) +
+                     " failed tests <#failedRuns: testName>:")
+    else:
+        logging.info("\nAmong " + str(numRunsToExamine) +
+                     " runs examined, all failed tests <#failedRuns: testName>:")
+
+    # print summary section: all failed tests sorted by how many times they failed
+    line_count = 0
+    for tn in sorted(all_failing, key=all_failing.get, reverse=True):
+        logging.info("    " + str(all_failing[tn]) + ": " + tn)
+        if summary_mode:
+            line_count += 1
+            if line_count == opts.num_failed_tests:
+                break
+
+    if summary_mode and ERROR_COUNT > 0:
+        logging.info("\n" + str(ERROR_COUNT) + " errors found, you may "
+                     + "re-run in non summary mode to see error details.")
+
 
 if __name__ == "__main__":
-  main()
+    main()

From 08d55bfbfe8701a91cf594df1a4bd9c1b811211b Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Mon, 16 Nov 2020 11:07:15 +0900
Subject: [PATCH 02/17] Test YETUS-452. Remove python2; rewrite python bits for
 python 3

---
 dev-support/Jenkinsfile | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile
index 1703d0153bfb6..31cd1a850a6a5 100644
--- a/dev-support/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@@ -35,7 +35,7 @@ pipeline {
         DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile"
         YETUS='yetus'
         // Branch or tag name.  Yetus release tags are 'rel/X.Y.Z'
-        YETUS_VERSION='6ab19e71eaf3234863424c6f684b34c1d3dcc0ce'
+        YETUS_VERSION='yetus452'
     }
 
     parameters {
@@ -51,7 +51,7 @@ pipeline {
                     checkout([
                         $class: 'GitSCM',
                         branches: [[name: "${env.YETUS_VERSION}"]],
-                        userRemoteConfigs: [[ url: 'https://github.com/apache/yetus.git']]]
+                        userRemoteConfigs: [[ url: 'https://github.com/effectivemachines/buretoolbox.git']]]
                     )
                 }
             }
@@ -60,7 +60,7 @@ pipeline {
         stage ('precommit-run') {
             steps {
                 withCredentials(
-                    [usernamePassword(credentialsId: 'apache-hadoop-at-github.com',
+                    [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd',
                                   passwordVariable: 'GITHUB_TOKEN',
                                   usernameVariable: 'GITHUB_USER'),
                     usernamePassword(credentialsId: 'hadoopqa-at-asf-jira',
@@ -152,9 +152,6 @@ pipeline {
                         # help keep the ASF boxes clean
                         YETUS_ARGS+=("--sentinel")
 
-                        # use emoji vote so it is easier to find the broken line
-                        YETUS_ARGS+=("--github-use-emoji-vote")
-
                         # test with Java 8 and 11
                         YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64")
                         YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64")
@@ -174,6 +171,20 @@ pipeline {
     post {
         always {
           script {
+
+            // Publish status if it was missed (YETUS-1059)
+            withCredentials(
+                [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd',
+                                  passwordVariable: 'GITHUB_TOKEN',
+                                  usernameVariable: 'GITHUB_USER')]) {
+                sh '''#!/usr/bin/env bash
+                    YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}")
+                    YETUS_ARGS+=("--patch-dir=${WORKSPACE}/${PATCHDIR}")
+                    TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/github-status-recovery.sh"
+                    /usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" ${EXTRA_ARGS} || true
+                    '''
+            }
+
             // Yetus output
             archiveArtifacts "${env.PATCHDIR}/**"
             // Publish the HTML report so that it can be looked at

From c21861520f6194574cb6162d7951852d85b3dd1d Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Mon, 16 Nov 2020 11:12:06 +0900
Subject: [PATCH 03/17] Upgrade to Python 3 in Dockerfile

---
 dev-support/docker/Dockerfile | 35 +++++++----------------------------
 1 file changed, 7 insertions(+), 28 deletions(-)

diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile
index cf442902d3d9c..858e7657ebbf8 100644
--- a/dev-support/docker/Dockerfile
+++ b/dev-support/docker/Dockerfile
@@ -72,10 +72,11 @@ RUN apt-get -q update \
         openjdk-8-jdk \
         pinentry-curses \
         pkg-config \
-        python \
-        python2.7 \
-        python-pkg-resources \
-        python-setuptools \
+        python3 \
+        python3-pip \
+        python3-pkg-resources \
+        python3-setuptools \
+        python3-wheel \
         rsync \
         shellcheck \
         software-properties-common \
@@ -108,18 +109,6 @@ RUN mkdir -p /opt/boost-library \
     && cd /root \
     && rm -rf /opt/boost-library
 
-####
-# Install pip (deprecated from Focal toolchain)
-####
-# hadolint ignore=DL3003
-RUN mkdir -p /opt/pip \
-    && curl -L https://bootstrap.pypa.io/2.7/get-pip.py > get-pip.py \
-    && mv get-pip.py /opt/pip \
-    && cd /opt/pip \
-    && python2.7 get-pip.py "pip < 21.0" \
-    && cd /root \
-    && rm -rf /opt/pip
-
 ######
 # Install Google Protobuf 3.7.1 (3.6.1 ships with Focal)
 ######
@@ -139,19 +128,9 @@ ENV PROTOBUF_HOME /opt/protobuf
 ENV PATH "${PATH}:/opt/protobuf/bin"
 
 ####
-# Install pylint at fixed version (2.0.0 removed python2 support)
-# https://github.com/PyCQA/pylint/issues/2294
-####
-RUN pip2 install \
-    astroid==1.6.6 \
-    isort==4.3.21 \
-    configparser==4.0.2 \
-    pylint==1.9.2
-
-####
-# Install dateutil.parser
+# Install pylint and python-dateutil
 ####
-RUN pip2 install python-dateutil==2.7.3
+RUN pip3 install pylint python-dateutil
 
 ####
 # Install bower

From 2210431bb0cb7575bf38acd6c76b8ecd8275c8fb Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Mon, 25 Jan 2021 18:07:05 +0900
Subject: [PATCH 04/17] Revert "Test YETUS-452. Remove python2; rewrite python
 bits for python 3"

This reverts commit 7deb8e1d53d046820ddf3d54d2900c429cb87d6e.
---
 dev-support/Jenkinsfile | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile
index 31cd1a850a6a5..1703d0153bfb6 100644
--- a/dev-support/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@@ -35,7 +35,7 @@ pipeline {
         DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile"
         YETUS='yetus'
         // Branch or tag name.  Yetus release tags are 'rel/X.Y.Z'
-        YETUS_VERSION='yetus452'
+        YETUS_VERSION='6ab19e71eaf3234863424c6f684b34c1d3dcc0ce'
     }
 
     parameters {
@@ -51,7 +51,7 @@ pipeline {
                     checkout([
                         $class: 'GitSCM',
                         branches: [[name: "${env.YETUS_VERSION}"]],
-                        userRemoteConfigs: [[ url: 'https://github.com/effectivemachines/buretoolbox.git']]]
+                        userRemoteConfigs: [[ url: 'https://github.com/apache/yetus.git']]]
                     )
                 }
             }
@@ -60,7 +60,7 @@ pipeline {
         stage ('precommit-run') {
             steps {
                 withCredentials(
-                    [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd',
+                    [usernamePassword(credentialsId: 'apache-hadoop-at-github.com',
                                   passwordVariable: 'GITHUB_TOKEN',
                                   usernameVariable: 'GITHUB_USER'),
                     usernamePassword(credentialsId: 'hadoopqa-at-asf-jira',
@@ -152,6 +152,9 @@ pipeline {
                         # help keep the ASF boxes clean
                         YETUS_ARGS+=("--sentinel")
 
+                        # use emoji vote so it is easier to find the broken line
+                        YETUS_ARGS+=("--github-use-emoji-vote")
+
                         # test with Java 8 and 11
                         YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64")
                         YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64")
@@ -171,20 +174,6 @@ pipeline {
     post {
         always {
           script {
-
-            // Publish status if it was missed (YETUS-1059)
-            withCredentials(
-                [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd',
-                                  passwordVariable: 'GITHUB_TOKEN',
-                                  usernameVariable: 'GITHUB_USER')]) {
-                sh '''#!/usr/bin/env bash
-                    YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}")
-                    YETUS_ARGS+=("--patch-dir=${WORKSPACE}/${PATCHDIR}")
-                    TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/github-status-recovery.sh"
-                    /usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" ${EXTRA_ARGS} || true
-                    '''
-            }
-
             // Yetus output
             archiveArtifacts "${env.PATCHDIR}/**"
             // Publish the HTML report so that it can be looked at

From 112fcf008d0f29ad46dbbf1e4c91716fdfc87b1c Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Mon, 25 Jan 2021 18:08:37 +0900
Subject: [PATCH 05/17] Use Yetus 0.13.0

---
 dev-support/Jenkinsfile       | 2 +-
 dev-support/bin/yetus-wrapper | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile
index 1703d0153bfb6..7bcc1a7d55b36 100644
--- a/dev-support/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@@ -35,7 +35,7 @@ pipeline {
         DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile"
         YETUS='yetus'
         // Branch or tag name.  Yetus release tags are 'rel/X.Y.Z'
-        YETUS_VERSION='6ab19e71eaf3234863424c6f684b34c1d3dcc0ce'
+        YETUS_VERSION='rel/0.13.0'
     }
 
     parameters {
diff --git a/dev-support/bin/yetus-wrapper b/dev-support/bin/yetus-wrapper
index bca2316ae6784..8532d1749701b 100755
--- a/dev-support/bin/yetus-wrapper
+++ b/dev-support/bin/yetus-wrapper
@@ -77,7 +77,7 @@ WANTED="$1"
 shift
 ARGV=("$@")
 
-HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.10.0}
+HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.13.0}
 BIN=$(yetus_abs "${BASH_SOURCE-$0}")
 BINDIR=$(dirname "${BIN}")
 

From b70e9e6ce5822bf1c8dd25c0a9590da03df20a49 Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Mon, 25 Jan 2021 18:43:39 +0900
Subject: [PATCH 06/17] Remove unsupported option

---
 dev-support/Jenkinsfile | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile
index 7bcc1a7d55b36..6a085394aa1d7 100644
--- a/dev-support/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@@ -152,9 +152,6 @@ pipeline {
                         # help keep the ASF boxes clean
                         YETUS_ARGS+=("--sentinel")
 
-                        # use emoji vote so it is easier to find the broken line
-                        YETUS_ARGS+=("--github-use-emoji-vote")
-
                         # test with Java 8 and 11
                         YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64")
                         YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64")

From 8485aa1a703a89c0efd8398f962818fc21313fa1 Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Tue, 26 Jan 2021 08:46:13 +0900
Subject: [PATCH 07/17] Fix hadolint error

---
 dev-support/docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile
index 858e7657ebbf8..90cbcc111919e 100644
--- a/dev-support/docker/Dockerfile
+++ b/dev-support/docker/Dockerfile
@@ -130,7 +130,7 @@ ENV PATH "${PATH}:/opt/protobuf/bin"
 ####
 # Install pylint and python-dateutil
 ####
-RUN pip3 install pylint python-dateutil
+RUN pip3 install pylint=2.6.0 python-dateutil=2.8.1
 
 ####
 # Install bower

From 64de97ad09fc48a8106df92c126042f96b1763b5 Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Tue, 26 Jan 2021 09:13:24 +0900
Subject: [PATCH 08/17] Revert indent changes in checkcompatibility.py

---
 dev-support/bin/checkcompatibility.py | 479 +++++++++++++-------------
 1 file changed, 235 insertions(+), 244 deletions(-)

diff --git a/dev-support/bin/checkcompatibility.py b/dev-support/bin/checkcompatibility.py
index a98cf7634b0ca..3db36154ef9c5 100755
--- a/dev-support/bin/checkcompatibility.py
+++ b/dev-support/bin/checkcompatibility.py
@@ -36,302 +36,293 @@
 # Various relative paths
 REPO_DIR = os.getcwd()
 
-
 def check_output(*popenargs, **kwargs):
-    """ Run command with arguments and return its output as a string. """
-    return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8')
+  """ Run command with arguments and return its output as a string. """
+  return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8')
 
 
 def get_repo_dir():
-    """ Return the path to the top of the repo. """
-    dirname, _ = os.path.split(os.path.abspath(__file__))
-    return os.path.join(dirname, "../..")
-
+  """ Return the path to the top of the repo. """
+  dirname, _ = os.path.split(os.path.abspath(__file__))
+  return os.path.join(dirname, "../..")
 
 def get_scratch_dir():
-    """ Return the path to the scratch dir that we build within. """
-    scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check")
-    if not os.path.exists(scratch_dir):
-        os.makedirs(scratch_dir)
-    return scratch_dir
-
+  """ Return the path to the scratch dir that we build within. """
+  scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check")
+  if not os.path.exists(scratch_dir):
+    os.makedirs(scratch_dir)
+  return scratch_dir
 
 def get_java_acc_dir():
-    """ Return the path where we check out the Java API Compliance Checker. """
-    return os.path.join(get_repo_dir(), "target", "java-acc")
+  """ Return the path where we check out the Java API Compliance Checker. """
+  return os.path.join(get_repo_dir(), "target", "java-acc")
 
 
 def clean_scratch_dir(scratch_dir):
-    """ Clean up and re-create the scratch directory. """
-    if os.path.exists(scratch_dir):
-        logging.info("Removing scratch dir %s...", scratch_dir)
-        shutil.rmtree(scratch_dir)
-    logging.info("Creating empty scratch dir %s...", scratch_dir)
-    os.makedirs(scratch_dir)
+  """ Clean up and re-create the scratch directory. """
+  if os.path.exists(scratch_dir):
+    logging.info("Removing scratch dir %s...", scratch_dir)
+    shutil.rmtree(scratch_dir)
+  logging.info("Creating empty scratch dir %s...", scratch_dir)
+  os.makedirs(scratch_dir)
 
 
 def checkout_java_tree(rev, path):
-    """ Check out the Java source tree for the given revision into
+  """ Check out the Java source tree for the given revision into
   the given path. """
-    logging.info("Checking out %s in %s", rev, path)
-    os.makedirs(path)
-    # Extract java source
-    subprocess.check_call(["bash", '-o', 'pipefail', "-c",
-                           ("git archive --format=tar %s | " +
-                            "tar -C \"%s\" -xf -") % (rev, path)],
-                          cwd=get_repo_dir())
-
+  logging.info("Checking out %s in %s", rev, path)
+  os.makedirs(path)
+  # Extract java source
+  subprocess.check_call(["bash", '-o', 'pipefail', "-c",
+                         ("git archive --format=tar %s | " +
+                          "tar -C \"%s\" -xf -") % (rev, path)],
+                        cwd=get_repo_dir())
 
 def get_git_hash(revname):
-    """ Convert 'revname' to its SHA-1 hash. """
-    return check_output(["git", "rev-parse", revname],
-                        cwd=get_repo_dir()).strip()
-
+  """ Convert 'revname' to its SHA-1 hash. """
+  return check_output(["git", "rev-parse", revname],
+                      cwd=get_repo_dir()).strip()
 
 def get_repo_name():
-    """Get the name of the repo based on the git remote."""
-    remotes = check_output(["git", "remote", "-v"],
-                           cwd=get_repo_dir()).strip().split("\n")
-    # Example output:
-    # origin https://github.com/apache/hadoop.git (fetch)
-    # origin https://github.com/apache/hadoop.git (push)
-    remote_url = remotes[0].split("\t")[1].split(" ")[0]
-    remote = remote_url.split("/")[-1]
-    if remote.endswith(".git"):
-        remote = remote[:-4]
-    return remote
-
+  """Get the name of the repo based on the git remote."""
+  remotes = check_output(["git", "remote", "-v"],
+                         cwd=get_repo_dir()).strip().split("\n")
+  # Example output:
+  # origin	https://github.com/apache/hadoop.git (fetch)
+  # origin	https://github.com/apache/hadoop.git (push)
+  remote_url = remotes[0].split("\t")[1].split(" ")[0]
+  remote = remote_url.split("/")[-1]
+  if remote.endswith(".git"):
+    remote = remote[:-4]
+  return remote
 
 def build_tree(java_path):
-    """ Run the Java build within 'path'. """
-    logging.info("Building in %s...", java_path)
-    subprocess.check_call(["mvn", "-DskipTests", "-Dmaven.javadoc.skip=true",
-                           "package"],
-                          cwd=java_path)
+  """ Run the Java build within 'path'. """
+  logging.info("Building in %s...", java_path)
+  subprocess.check_call(["mvn", "-DskipTests", "-Dmaven.javadoc.skip=true",
+                         "package"],
+                        cwd=java_path)
 
 
 def checkout_java_acc(force):
-    """
+  """
   Check out the Java API Compliance Checker. If 'force' is true, will
   re-download even if the directory exists.
   """
-    acc_dir = get_java_acc_dir()
-    if os.path.exists(acc_dir):
-        logging.info("Java ACC is already downloaded.")
-        if not force:
-            return
-        logging.info("Forcing re-download.")
-        shutil.rmtree(acc_dir)
+  acc_dir = get_java_acc_dir()
+  if os.path.exists(acc_dir):
+    logging.info("Java ACC is already downloaded.")
+    if not force:
+      return
+    logging.info("Forcing re-download.")
+    shutil.rmtree(acc_dir)
 
-    logging.info("Downloading Java ACC...")
+  logging.info("Downloading Java ACC...")
 
-    url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz"
-    scratch_dir = get_scratch_dir()
-    path = os.path.join(scratch_dir, os.path.basename(url))
-    jacc = urllib.request.urlopen(url)
-    with open(path, 'wb') as w:
-        w.write(jacc.read())
+  url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz"
+  scratch_dir = get_scratch_dir()
+  path = os.path.join(scratch_dir, os.path.basename(url))
+  jacc = urllib.request.urlopen(url)
+  with open(path, 'wb') as w:
+    w.write(jacc.read())
 
-    subprocess.check_call(["tar", "xzf", path],
-                          cwd=scratch_dir)
+  subprocess.check_call(["tar", "xzf", path],
+                        cwd=scratch_dir)
 
-    shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-1.8"),
-                os.path.join(acc_dir))
+  shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-1.8"),
+              os.path.join(acc_dir))
 
 
 def find_jars(path):
-    """ Return a list of jars within 'path' to be checked for compatibility. """
-    all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines())
-
-    return [j for j in all_jars if (
-            "-tests" not in j and
-            "-sources" not in j and
-            "-with-dependencies" not in j)]
+  """ Return a list of jars within 'path' to be checked for compatibility. """
+  all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines())
 
+  return [j for j in all_jars if (
+      "-tests" not in j and
+      "-sources" not in j and
+      "-with-dependencies" not in j)]
 
 def write_xml_file(path, version, jars):
-    """Write the XML manifest file for JACC."""
-    with open(path, "wt") as f:
-        f.write("<version>" + version + "</version>\n")
-        f.write("<archives>")
-        for j in jars:
-            f.write(j + "\n")
-        f.write("</archives>")
-
+  """Write the XML manifest file for JACC."""
+  with open(path, "wt") as f:
+    f.write("<version>" + version + "</version>\n")
+    f.write("<archives>")
+    for j in jars:
+      f.write(j + "\n")
+    f.write("</archives>")
 
 def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations):
-    """ Run the compliance checker to compare 'src' and 'dst'. """
-    logging.info("Will check compatibility between original jars:\n\t%s\n" +
-                 "and new jars:\n\t%s",
-                 "\n\t".join(src_jars),
-                 "\n\t".join(dst_jars))
-
-    java_acc_path = os.path.join(get_java_acc_dir(), "japi-compliance-checker.pl")
+  """ Run the compliance checker to compare 'src' and 'dst'. """
+  logging.info("Will check compatibility between original jars:\n\t%s\n" +
+               "and new jars:\n\t%s",
+               "\n\t".join(src_jars),
+               "\n\t".join(dst_jars))
 
-    src_xml_path = os.path.join(get_scratch_dir(), "src.xml")
-    dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml")
-    write_xml_file(src_xml_path, src_name, src_jars)
-    write_xml_file(dst_xml_path, dst_name, dst_jars)
+  java_acc_path = os.path.join(get_java_acc_dir(), "japi-compliance-checker.pl")
 
-    out_path = os.path.join(get_scratch_dir(), "report.html")
+  src_xml_path = os.path.join(get_scratch_dir(), "src.xml")
+  dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml")
+  write_xml_file(src_xml_path, src_name, src_jars)
+  write_xml_file(dst_xml_path, dst_name, dst_jars)
 
-    args = ["perl", java_acc_path,
-            "-l", get_repo_name(),
-            "-d1", src_xml_path,
-            "-d2", dst_xml_path,
-            "-report-path", out_path]
+  out_path = os.path.join(get_scratch_dir(), "report.html")
 
-    if annotations is not None:
-        annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
-        with file(annotations_path, "w") as f:
-            for ann in annotations:
-                print(ann, file=f)
-        args += ["-annotations-list", annotations_path]
+  args = ["perl", java_acc_path,
+          "-l", get_repo_name(),
+          "-d1", src_xml_path,
+          "-d2", dst_xml_path,
+          "-report-path", out_path]
 
-    subprocess.check_call(args)
+  if annotations is not None:
+    annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
+    with file(annotations_path, "w") as f:
+      for ann in annotations:
+        print(ann, file=f)
+    args += ["-annotations-list", annotations_path]
 
+  subprocess.check_call(args)
 
 def filter_jars(jars, include_filters, exclude_filters):
-    """Filter the list of JARs based on include and exclude filters."""
-    filtered = []
-    # Apply include filters
-    for j in jars:
-        found = False
-        basename = os.path.basename(j)
-        for f in include_filters:
-            if f.match(basename):
-                found = True
-                break
-        if found:
-            filtered += [j]
-        else:
-            logging.debug("Ignoring JAR %s", j)
-    # Apply exclude filters
-    exclude_filtered = []
-    for j in filtered:
-        basename = os.path.basename(j)
-        found = False
-        for f in exclude_filters:
-            if f.match(basename):
-                found = True
-                break
-        if found:
-            logging.debug("Ignoring JAR %s", j)
-        else:
-            exclude_filtered += [j]
-
-    return exclude_filtered
-
-
-def main():
-    """Main function."""
-    logging.basicConfig(level=logging.INFO)
-    parser = argparse.ArgumentParser(
-        description="Run Java API Compliance Checker.")
-    parser.add_argument("-f", "--force-download",
-                        action="store_true",
-                        help="Download dependencies (i.e. Java JAVA_ACC) " +
-                             "even if they are already present")
-    parser.add_argument("-i", "--include-file",
-                        action="append",
-                        dest="include_files",
-                        help="Regex filter for JAR files to be included. " +
-                             "Applied before the exclude filters. " +
-                             "Can be specified multiple times.")
-    parser.add_argument("-e", "--exclude-file",
-                        action="append",
-                        dest="exclude_files",
-                        help="Regex filter for JAR files to be excluded. " +
-                             "Applied after the include filters. " +
-                             "Can be specified multiple times.")
-    parser.add_argument("-a", "--annotation",
-                        action="append",
-                        dest="annotations",
-                        help="Fully-qualified Java annotation. " +
-                             "Java ACC will only check compatibility of " +
-                             "annotated classes. Can be specified multiple times.")
-    parser.add_argument("--skip-clean",
-                        action="store_true",
-                        help="Skip cleaning the scratch directory.")
-    parser.add_argument("--skip-build",
-                        action="store_true",
-                        help="Skip building the projects.")
-    parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.")
-    parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD",
-                        help="Destination revision. " +
-                             "If not specified, will use HEAD.")
-
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-
-    args = parser.parse_args()
-
-    src_rev, dst_rev = args.src_rev[0], args.dst_rev
-
-    logging.info("Source revision: %s", src_rev)
-    logging.info("Destination revision: %s", dst_rev)
-
-    # Construct the JAR regex patterns for filtering.
-    include_filters = []
-    if args.include_files is not None:
-        for f in args.include_files:
-            logging.info("Applying JAR filename include filter: %s", f)
-            include_filters += [re.compile(f)]
-    else:
-        include_filters = [re.compile(".*")]
-
-    exclude_filters = []
-    if args.exclude_files is not None:
-        for f in args.exclude_files:
-            logging.info("Applying JAR filename exclude filter: %s", f)
-            exclude_filters += [re.compile(f)]
-
-    # Construct the annotation list
-    annotations = args.annotations
-    if annotations is not None:
-        logging.info("Filtering classes using %d annotation(s):", len(annotations))
-        for a in annotations:
-            logging.info("\t%s", a)
-
-    # Download deps.
-    checkout_java_acc(args.force_download)
-
-    # Set up the build.
-    scratch_dir = get_scratch_dir()
-    src_dir = os.path.join(scratch_dir, "src")
-    dst_dir = os.path.join(scratch_dir, "dst")
-
-    if args.skip_clean:
-        logging.info("Skipping cleaning the scratch directory")
+  """Filter the list of JARs based on include and exclude filters."""
+  filtered = []
+  # Apply include filters
+  for j in jars:
+    found = False
+    basename = os.path.basename(j)
+    for f in include_filters:
+      if f.match(basename):
+        found = True
+        break
+    if found:
+      filtered += [j]
     else:
-        clean_scratch_dir(scratch_dir)
-        # Check out the src and dst source trees.
-        checkout_java_tree(get_git_hash(src_rev), src_dir)
-        checkout_java_tree(get_git_hash(dst_rev), dst_dir)
-
-    # Run the build in each.
-    if args.skip_build:
-        logging.info("Skipping the build")
+      logging.debug("Ignoring JAR %s", j)
+  # Apply exclude filters
+  exclude_filtered = []
+  for j in filtered:
+    basename = os.path.basename(j)
+    found = False
+    for f in exclude_filters:
+      if f.match(basename):
+        found = True
+        break
+    if found:
+      logging.debug("Ignoring JAR %s", j)
     else:
-        build_tree(src_dir)
-        build_tree(dst_dir)
+      exclude_filtered += [j]
 
-    # Find the JARs.
-    src_jars = find_jars(src_dir)
-    dst_jars = find_jars(dst_dir)
+  return exclude_filtered
 
-    # Filter the JARs.
-    src_jars = filter_jars(src_jars, include_filters, exclude_filters)
-    dst_jars = filter_jars(dst_jars, include_filters, exclude_filters)
 
-    if len(src_jars) == 0 or len(dst_jars) == 0:
-        logging.error("No JARs found! Are your filters too strong?")
-        sys.exit(1)
-
-    run_java_acc(src_rev, src_jars,
-                 dst_rev, dst_jars, annotations)
+def main():
+  """Main function."""
+  logging.basicConfig(level=logging.INFO)
+  parser = argparse.ArgumentParser(
+      description="Run Java API Compliance Checker.")
+  parser.add_argument("-f", "--force-download",
+                      action="store_true",
+                      help="Download dependencies (i.e. Java JAVA_ACC) " +
+                      "even if they are already present")
+  parser.add_argument("-i", "--include-file",
+                      action="append",
+                      dest="include_files",
+                      help="Regex filter for JAR files to be included. " +
+                      "Applied before the exclude filters. " +
+                      "Can be specified multiple times.")
+  parser.add_argument("-e", "--exclude-file",
+                      action="append",
+                      dest="exclude_files",
+                      help="Regex filter for JAR files to be excluded. " +
+                      "Applied after the include filters. " +
+                      "Can be specified multiple times.")
+  parser.add_argument("-a", "--annotation",
+                      action="append",
+                      dest="annotations",
+                      help="Fully-qualified Java annotation. " +
+                      "Java ACC will only check compatibility of " +
+                      "annotated classes. Can be specified multiple times.")
+  parser.add_argument("--skip-clean",
+                      action="store_true",
+                      help="Skip cleaning the scratch directory.")
+  parser.add_argument("--skip-build",
+                      action="store_true",
+                      help="Skip building the projects.")
+  parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.")
+  parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD",
+                      help="Destination revision. " +
+                      "If not specified, will use HEAD.")
+
+  if len(sys.argv) == 1:
+    parser.print_help()
+    sys.exit(1)
+
+  args = parser.parse_args()
+
+  src_rev, dst_rev = args.src_rev[0], args.dst_rev
+
+  logging.info("Source revision: %s", src_rev)
+  logging.info("Destination revision: %s", dst_rev)
+
+  # Construct the JAR regex patterns for filtering.
+  include_filters = []
+  if args.include_files is not None:
+    for f in args.include_files:
+      logging.info("Applying JAR filename include filter: %s", f)
+      include_filters += [re.compile(f)]
+  else:
+    include_filters = [re.compile(".*")]
+
+  exclude_filters = []
+  if args.exclude_files is not None:
+    for f in args.exclude_files:
+      logging.info("Applying JAR filename exclude filter: %s", f)
+      exclude_filters += [re.compile(f)]
+
+  # Construct the annotation list
+  annotations = args.annotations
+  if annotations is not None:
+    logging.info("Filtering classes using %d annotation(s):", len(annotations))
+    for a in annotations:
+      logging.info("\t%s", a)
+
+  # Download deps.
+  checkout_java_acc(args.force_download)
+
+  # Set up the build.
+  scratch_dir = get_scratch_dir()
+  src_dir = os.path.join(scratch_dir, "src")
+  dst_dir = os.path.join(scratch_dir, "dst")
+
+  if args.skip_clean:
+    logging.info("Skipping cleaning the scratch directory")
+  else:
+    clean_scratch_dir(scratch_dir)
+    # Check out the src and dst source trees.
+    checkout_java_tree(get_git_hash(src_rev), src_dir)
+    checkout_java_tree(get_git_hash(dst_rev), dst_dir)
+
+  # Run the build in each.
+  if args.skip_build:
+    logging.info("Skipping the build")
+  else:
+    build_tree(src_dir)
+    build_tree(dst_dir)
+
+  # Find the JARs.
+  src_jars = find_jars(src_dir)
+  dst_jars = find_jars(dst_dir)
+
+  # Filter the JARs.
+  src_jars = filter_jars(src_jars, include_filters, exclude_filters)
+  dst_jars = filter_jars(dst_jars, include_filters, exclude_filters)
+
+  if len(src_jars) == 0 or len(dst_jars) == 0:
+    logging.error("No JARs found! Are your filters too strong?")
+    sys.exit(1)
+
+  run_java_acc(src_rev, src_jars,
+               dst_rev, dst_jars, annotations)
 
 
 if __name__ == "__main__":
-    main()
+  main()

From 82fe22d98fb08c5ac3df378154284f54ad3137cb Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Tue, 26 Jan 2021 17:27:24 +0900
Subject: [PATCH 09/17] Reverted indent changes from
 determine-flaky-tests-hadoop.py

---
 dev-support/determine-flaky-tests-hadoop.py | 303 ++++++++++----------
 1 file changed, 149 insertions(+), 154 deletions(-)

diff --git a/dev-support/determine-flaky-tests-hadoop.py b/dev-support/determine-flaky-tests-hadoop.py
index 8436da85b4c58..1cf7d2830289d 100755
--- a/dev-support/determine-flaky-tests-hadoop.py
+++ b/dev-support/determine-flaky-tests-hadoop.py
@@ -46,7 +46,7 @@
 
 # Configuration
 DEFAULT_JENKINS_URL = "https://builds.apache.org"
-DEFAULT_JOB_NAME = "hadoop-qbt-trunk-java8-linux-x86"
+DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
 DEFAULT_NUM_PREVIOUS_DAYS = 14
 DEFAULT_TOP_NUM_FAILED_TEST = -1
 
@@ -55,179 +55,174 @@
 # total number of runs to examine
 numRunsToExamine = 0
 
-# summary mode
+#summary mode
 summary_mode = False
 
-# total number of errors
-ERROR_COUNT = 0
-
+#total number of errors
+error_count = 0
 
+""" Parse arguments """
 def parse_args():
-    """ Parse arguments """
-    parser = OptionParser()
-    parser.add_option("-J", "--jenkins-url", type="string",
-                      dest="jenkins_url", help="Jenkins URL",
-                      default=DEFAULT_JENKINS_URL)
-    parser.add_option("-j", "--job-name", type="string",
-                      dest="job_name", help="Job name to look at",
-                      default=DEFAULT_JOB_NAME)
-    parser.add_option("-n", "--num-days", type="int",
-                      dest="num_prev_days", help="Number of days to examine",
-                      default=DEFAULT_NUM_PREVIOUS_DAYS)
-    parser.add_option("-t", "--top", type="int",
-                      dest="num_failed_tests",
-                      help="Summary Mode, only show top number of failed tests",
-                      default=DEFAULT_TOP_NUM_FAILED_TEST)
-
-    (options, args) = parser.parse_args()
-    if args:
-        parser.error("unexpected arguments: " + repr(args))
-    return options
-
-
+  parser = OptionParser()
+  parser.add_option("-J", "--jenkins-url", type="string",
+                    dest="jenkins_url", help="Jenkins URL",
+                    default=DEFAULT_JENKINS_URL)
+  parser.add_option("-j", "--job-name", type="string",
+                    dest="job_name", help="Job name to look at",
+                    default=DEFAULT_JOB_NAME)
+  parser.add_option("-n", "--num-days", type="int",
+                    dest="num_prev_days", help="Number of days to examine",
+                    default=DEFAULT_NUM_PREVIOUS_DAYS)
+  parser.add_option("-t", "--top", type="int",
+                    dest="num_failed_tests",
+                    help="Summary Mode, only show top number of failed tests",
+                    default=DEFAULT_TOP_NUM_FAILED_TEST)
+
+  (options, args) = parser.parse_args()
+  if args:
+    parser.error("unexpected arguments: " + repr(args))
+  return options
+
+""" Load data from specified url """
 def load_url_data(url):
-    """ Load data from specified url """
-    ourl = urllib.request.urlopen(url)
-    codec = ourl.info().get_param('charset')
-    content = ourl.read().decode(codec)
-    data = simplejson.loads(content, strict=False)
-    return data
-
+  ourl = urllib.request.urlopen(url)
+  codec = ourl.info().get_param('charset')
+  content = ourl.read().decode(codec)
+  data = simplejson.loads(content, strict=False)
+  return data
 
+""" List all builds of the target project. """
 def list_builds(jenkins_url, job_name):
-    """ List all builds of the target project. """
-    global summary_mode, ERROR_COUNT
-    url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
-        jenkins=jenkins_url,
-        job_name=job_name)
-
-    try:
-        data = load_url_data(url)
+  global summary_mode
+  global error_count
+  url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
+      jenkins=jenkins_url,
+      job_name=job_name)
 
-    except:
-        if not summary_mode:
-            logging.error("Could not fetch: %s" % url)
-        ERROR_COUNT += 1
-        raise
-    return data['builds']
+  try:
+    data = load_url_data(url)
 
+  except:
+    if not summary_mode:
+      logging.error("Could not fetch: %s" % url)
+    error_count += 1
+    raise
+  return data['builds']
 
+""" Find the names of any tests which failed in the given build output URL. """
 def find_failing_tests(testReportApiJson, jobConsoleOutput):
-    """ Find the names of any tests which failed in the given build output URL. """
-    global summary_mode, ERROR_COUNT
-    ret = set()
-    try:
-        data = load_url_data(testReportApiJson)
+  global summary_mode
+  global error_count
+  ret = set()
+  try:
+    data = load_url_data(testReportApiJson)
 
-    except:
-        if not summary_mode:
-            logging.error("    Could not open testReport, check " +
-                          jobConsoleOutput + " for why it was reported failed")
-        ERROR_COUNT += 1
-        return ret
-
-    for suite in data['suites']:
-        for cs in suite['cases']:
-            status = cs['status']
-            errDetails = cs['errorDetails']
-            if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
-                ret.add(cs['className'] + "." + cs['name'])
-
-    if len(ret) == 0 and (not summary_mode):
-        logging.info("    No failed tests in testReport, check " +
-                     jobConsoleOutput + " for why it was reported failed.")
+  except:
+    if not summary_mode:
+      logging.error("    Could not open testReport, check " +
+        jobConsoleOutput + " for why it was reported failed")
+    error_count += 1
     return ret
 
+  for suite in data['suites']:
+    for cs in suite['cases']:
+      status = cs['status']
+      errDetails = cs['errorDetails']
+      if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
+        ret.add(cs['className'] + "." + cs['name'])
 
+  if len(ret) == 0 and (not summary_mode):
+    logging.info("    No failed tests in testReport, check " +
+        jobConsoleOutput + " for why it was reported failed.")
+  return ret
+
+""" Iterate runs of specfied job within num_prev_days and collect results """
 def find_flaky_tests(jenkins_url, job_name, num_prev_days):
-    """ Iterate runs of specfied job within num_prev_days and collect results """
-    global numRunsToExamine
-    global summary_mode
-    all_failing = dict()
-    # First list all builds
-    builds = list_builds(jenkins_url, job_name)
-
-    # Select only those in the last N days
-    min_time = time.time() - SECONDS_PER_DAY * num_prev_days
-    builds = [b for b in builds if (float(b['timestamp']) / 1000) > min_time]
-
-    # Filter out only those that failed
-    failing_build_urls = [(b['url'], b['timestamp']) for b in builds
-                          if b['result'] in ('UNSTABLE', 'FAILURE')]
-
-    tnum = len(builds)
-    num = len(failing_build_urls)
-    numRunsToExamine = tnum
+  global numRunsToExamine
+  global summary_mode
+  all_failing = dict()
+  # First list all builds
+  builds = list_builds(jenkins_url, job_name)
+
+  # Select only those in the last N days
+  min_time = time.time() - SECONDS_PER_DAY * num_prev_days
+  builds = [b for b in builds if (float(b['timestamp']) / 1000) > min_time]
+
+  # Filter out only those that failed
+  failing_build_urls = [(b['url'] , b['timestamp']) for b in builds
+      if (b['result'] in ('UNSTABLE', 'FAILURE'))]
+
+  tnum = len(builds)
+  num = len(failing_build_urls)
+  numRunsToExamine = tnum
+  if not summary_mode:
+    logging.info("    THERE ARE " + str(num) + " builds (out of " + str(tnum)
+      + ") that have failed tests in the past " + str(num_prev_days) + " days"
+      + ((".", ", as listed below:\n")[num > 0]))
+
+  for failed_build_with_time in failing_build_urls:
+    failed_build = failed_build_with_time[0];
+    jobConsoleOutput = failed_build + "Console";
+    testReport = failed_build + "testReport";
+    testReportApiJson = testReport + "/api/json";
+
+    ts = float(failed_build_with_time[1]) / 1000.
+    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
     if not summary_mode:
-        logging.info("    THERE ARE " + str(num) + " builds (out of " + str(tnum)
-                     + ") that have failed tests in the past " + str(num_prev_days) + " days"
-                     + ((".", ", as listed below:\n")[num > 0]))
-
-    for failed_build_with_time in failing_build_urls:
-        failed_build = failed_build_with_time[0]
-        jobConsoleOutput = failed_build + "Console"
-        testReport = failed_build + "testReport"
-        testReportApiJson = testReport + "/api/json"
-
-        ts = float(failed_build_with_time[1]) / 1000.
-        st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
+      logging.info("===>%s" % str(testReport) + " (" + st + ")")
+    failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
+    if failing:
+      for ftest in failing:
         if not summary_mode:
-            logging.info("===>%s" % str(testReport) + " (" + st + ")")
-        failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
-        if failing:
-            for ftest in failing:
-                if not summary_mode:
-                    logging.info("    Failed test: %s" % ftest)
-                all_failing[ftest] = all_failing.get(ftest, 0) + 1
-
-    return all_failing
+          logging.info("    Failed test: %s" % ftest)
+        all_failing[ftest] = all_failing.get(ftest,0)+1
 
+  return all_failing
 
 def main():
-    global numRunsToExamine
-    global summary_mode
-    logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
-
-    # set up logger to write to stdout
-    soh = logging.StreamHandler(sys.stdout)
-    soh.setLevel(logging.INFO)
-    logger = logging.getLogger()
-    logger.removeHandler(logger.handlers[0])
-    logger.addHandler(soh)
-
-    opts = parse_args()
-    logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
-                 + "/job/" + opts.job_name + "")
-
-    if opts.num_failed_tests != -1:
-        summary_mode = True
-
-    all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
-                                   opts.num_prev_days)
-    if len(all_failing) == 0:
-        raise SystemExit(0)
-
-    if summary_mode and opts.num_failed_tests < len(all_failing):
-        logging.info("\nAmong " + str(numRunsToExamine) +
-                     " runs examined, top " + str(opts.num_failed_tests) +
-                     " failed tests <#failedRuns: testName>:")
-    else:
-        logging.info("\nAmong " + str(numRunsToExamine) +
-                     " runs examined, all failed tests <#failedRuns: testName>:")
-
-    # print summary section: all failed tests sorted by how many times they failed
-    line_count = 0
-    for tn in sorted(all_failing, key=all_failing.get, reverse=True):
-        logging.info("    " + str(all_failing[tn]) + ": " + tn)
-        if summary_mode:
-            line_count += 1
-            if line_count == opts.num_failed_tests:
-                break
-
-    if summary_mode and ERROR_COUNT > 0:
-        logging.info("\n" + str(ERROR_COUNT) + " errors found, you may "
-                     + "re-run in non summary mode to see error details.")
-
+  global numRunsToExamine
+  global summary_mode
+  logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
+
+  # set up logger to write to stdout
+  soh = logging.StreamHandler(sys.stdout)
+  soh.setLevel(logging.INFO)
+  logger = logging.getLogger()
+  logger.removeHandler(logger.handlers[0])
+  logger.addHandler(soh)
+
+  opts = parse_args()
+  logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
+      + "/job/" + opts.job_name + "")
+
+  if opts.num_failed_tests != -1:
+    summary_mode = True
+
+  all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
+      opts.num_prev_days)
+  if len(all_failing) == 0:
+    raise SystemExit(0)
+
+  if summary_mode and opts.num_failed_tests < len(all_failing):
+    logging.info("\nAmong " + str(numRunsToExamine) +
+                 " runs examined, top " + str(opts.num_failed_tests) +
+                 " failed tests <#failedRuns: testName>:")
+  else:
+      logging.info("\nAmong " + str(numRunsToExamine) +
+                   " runs examined, all failed tests <#failedRuns: testName>:")
+
+  # print summary section: all failed tests sorted by how many times they failed
+  line_count = 0
+  for tn in sorted(all_failing, key=all_failing.get, reverse=True):
+    logging.info("    " + str(all_failing[tn])+ ": " + tn)
+    if summary_mode:
+      line_count += 1
+      if line_count == opts.num_failed_tests:
+        break
+
+  if summary_mode and error_count > 0:
+    logging.info("\n" + str(error_count) + " errors found, you may "
+                 + "re-run in non summary mode to see error details.");
 
 if __name__ == "__main__":
-    main()
+  main()

From d4a4e1b221ae57ac82b107012bd79c1cb36c30d5 Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Tue, 26 Jan 2021 18:06:47 +0900
Subject: [PATCH 10/17] Fix pip3 install command

---
 dev-support/docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile
index 90cbcc111919e..930d44a839c4f 100644
--- a/dev-support/docker/Dockerfile
+++ b/dev-support/docker/Dockerfile
@@ -130,7 +130,7 @@ ENV PATH "${PATH}:/opt/protobuf/bin"
 ####
 # Install pylint and python-dateutil
 ####
-RUN pip3 install pylint=2.6.0 python-dateutil=2.8.1
+RUN pip3 install pylint==2.6.0 python-dateutil==2.8.1
 
 ####
 # Install bower

From 9a9032a62de6aa7bdc3d08be84d8ca59069e52f5 Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Tue, 26 Jan 2021 18:08:02 +0900
Subject: [PATCH 11/17] Update Dockerfile_aarch64

---
 dev-support/docker/Dockerfile_aarch64 | 35 ++++++---------------------
 1 file changed, 7 insertions(+), 28 deletions(-)

diff --git a/dev-support/docker/Dockerfile_aarch64 b/dev-support/docker/Dockerfile_aarch64
index bdf9e0c7e59f6..858c9b36ddf05 100644
--- a/dev-support/docker/Dockerfile_aarch64
+++ b/dev-support/docker/Dockerfile_aarch64
@@ -76,10 +76,11 @@ RUN apt-get -q update \
         openjdk-8-jdk \
         pinentry-curses \
         pkg-config \
-        python \
-        python2.7 \
-        python-pkg-resources \
-        python-setuptools \
+        python3 \
+        python3-pip \
+        python3-pkg-resources \
+        python3-setuptools \
+        python3-wheel \
         rsync \
         shellcheck \
         software-properties-common \
@@ -112,18 +113,6 @@ RUN mkdir -p /opt/boost-library \
     && cd /root \
     && rm -rf /opt/boost-library
 
-####
-# Install pip (deprecated from Focal toolchain)
-####
-# hadolint ignore=DL3003
-RUN mkdir -p /opt/pip \
-    && curl -L https://bootstrap.pypa.io/2.7/get-pip.py > get-pip.py \
-    && mv get-pip.py /opt/pip \
-    && cd /opt/pip \
-    && python2.7 get-pip.py "pip < 21.0" \
-    && cd /root \
-    && rm -rf /opt/pip
-
 ######
 # Install Google Protobuf 3.7.1 (3.6.1 ships with Focal)
 ######
@@ -143,19 +132,9 @@ ENV PROTOBUF_HOME /opt/protobuf
 ENV PATH "${PATH}:/opt/protobuf/bin"
 
 ####
-# Install pylint at fixed version (2.0.0 removed python2 support)
-# https://github.com/PyCQA/pylint/issues/2294
-####
-RUN pip2 install \
-    astroid==1.6.6 \
-    isort==4.3.21 \
-    configparser==4.0.2 \
-    pylint==1.9.2
-
-####
-# Install dateutil.parser
+# Install pylint and python-dateutil
 ####
-RUN pip2 install python-dateutil==2.7.3
+RUN pip3 install pylint==2.6.0 python-dateutil==2.8.1
 
 ####
 # Install bower

From cc34617a098d18138888056a4fbcc67544152be5 Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Sat, 13 Feb 2021 15:42:43 +0900
Subject: [PATCH 12/17] Apply YETUS-1099

---
 dev-support/Jenkinsfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile
index 6a085394aa1d7..18120e9171f7b 100644
--- a/dev-support/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@@ -35,7 +35,8 @@ pipeline {
         DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile"
         YETUS='yetus'
         // Branch or tag name.  Yetus release tags are 'rel/X.Y.Z'
-        YETUS_VERSION='rel/0.13.0'
+        // Yetus 0.13.0 + YETUS-1099 to fix shelldocs bug
+        YETUS_VERSION='94857fb27ccb3b3522eaaaff1abcb28f999c49ac'
     }
 
     parameters {

From 2b316f0fb2e272b5025e42c7372467311488816f Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Mon, 15 Feb 2021 11:35:09 +0900
Subject: [PATCH 13/17] Disable Hadoop's bundled (Yetus 0.13.0) shelldocs

---
 dev-support/Jenkinsfile | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile
index 18120e9171f7b..77aad70bae657 100644
--- a/dev-support/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@@ -134,9 +134,6 @@ pipeline {
                         # plugins to enable
                         YETUS_ARGS+=("--plugins=all")
 
-                        # use Hadoop's bundled shelldocs
-                        YETUS_ARGS+=("--shelldocs=${WORKSPACE}/${SOURCEDIR}/dev-support/bin/shelldocs")
-
                         # don't let these tests cause -1s because we aren't really paying that
                         # much attention to them
                         YETUS_ARGS+=("--tests-filter=checkstyle")

From ece917cdb5d75323ae93f11d3fd3e6448ac000ee Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Mon, 15 Feb 2021 12:51:29 +0900
Subject: [PATCH 14/17] Revert "Apply YETUS-1099"

This reverts commit cc34617a098d18138888056a4fbcc67544152be5.
---
 dev-support/Jenkinsfile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile
index 77aad70bae657..39450ec762aab 100644
--- a/dev-support/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@@ -35,8 +35,7 @@ pipeline {
         DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile"
         YETUS='yetus'
         // Branch or tag name.  Yetus release tags are 'rel/X.Y.Z'
-        // Yetus 0.13.0 + YETUS-1099 to fix shelldocs bug
-        YETUS_VERSION='94857fb27ccb3b3522eaaaff1abcb28f999c49ac'
+        YETUS_VERSION='rel/0.13.0'
     }
 
     parameters {

From 200ed5874d2c9ee3fe3c896e6734af6aedfc1b9c Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Mon, 15 Feb 2021 12:53:14 +0900
Subject: [PATCH 15/17] Publish build status in the post script

---
 dev-support/Jenkinsfile | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile
index 39450ec762aab..5119bae189c34 100644
--- a/dev-support/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@@ -168,6 +168,19 @@ pipeline {
     post {
         always {
           script {
+            // Publish status if it was missed (YETUS-1059)
+            withCredentials(
+                [usernamePassword(credentialsId: 'apache-hadoop-at-github.com',
+                                  passwordVariable: 'GITHUB_TOKEN',
+                                  usernameVariable: 'GITHUB_USER')]) {
+                sh '''#!/usr/bin/env bash
+                    YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}")
+                    YETUS_ARGS+=("--patch-dir=${WORKSPACE}/${PATCHDIR}")
+                    TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/github-status-recovery.sh"
+                    /usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" ${EXTRA_ARGS} || true
+                    '''
+            }
+
             // Yetus output
             archiveArtifacts "${env.PATCHDIR}/**"
             // Publish the HTML report so that it can be looked at

From 7a6bee776e917abacf3b7fc990ba54002155c706 Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Mon, 15 Feb 2021 13:45:36 +0900
Subject: [PATCH 16/17] Use strong token

---
 dev-support/Jenkinsfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile
index 5119bae189c34..6841ed30a79cc 100644
--- a/dev-support/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@@ -60,7 +60,7 @@ pipeline {
         stage ('precommit-run') {
             steps {
                 withCredentials(
-                    [usernamePassword(credentialsId: 'apache-hadoop-at-github.com',
+                    [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd',
                                   passwordVariable: 'GITHUB_TOKEN',
                                   usernameVariable: 'GITHUB_USER'),
                     usernamePassword(credentialsId: 'hadoopqa-at-asf-jira',
@@ -170,7 +170,7 @@ pipeline {
           script {
             // Publish status if it was missed (YETUS-1059)
             withCredentials(
-                [usernamePassword(credentialsId: 'apache-hadoop-at-github.com',
+                [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd',
                                   passwordVariable: 'GITHUB_TOKEN',
                                   usernameVariable: 'GITHUB_USER')]) {
                 sh '''#!/usr/bin/env bash

From 9ebb78464dc8f2ed68d086817f9f496339b3941d Mon Sep 17 00:00:00 2001
From: Akira Ajisaka <aajisaka@apache.org>
Date: Wed, 17 Feb 2021 10:10:00 +0900
Subject: [PATCH 17/17] Remove determine-flaky-tests-hadoop.py since it is not
 used

---
 dev-support/determine-flaky-tests-hadoop.py | 228 --------------------
 1 file changed, 228 deletions(-)
 delete mode 100755 dev-support/determine-flaky-tests-hadoop.py

diff --git a/dev-support/determine-flaky-tests-hadoop.py b/dev-support/determine-flaky-tests-hadoop.py
deleted file mode 100755
index 1cf7d2830289d..0000000000000
--- a/dev-support/determine-flaky-tests-hadoop.py
+++ /dev/null
@@ -1,228 +0,0 @@
-#!/usr/bin/env python3
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Given a jenkins test job, this script examines all runs of the job done
-# within specified period of time (number of days prior to the execution
-# time of this script), and reports all failed tests.
-#
-# The output of this script includes a section for each run that has failed
-# tests, with each failed test name listed.
-#
-# More importantly, at the end, it outputs a summary section to list all failed
-# tests within all examined runs, and indicate how many runs a same test
-# failed, and sorted all failed tests by how many runs each test failed.
-#
-# This way, when we see failed tests in PreCommit build, we can quickly tell
-# whether a failed test is a new failure, or it failed before and how often it
-# failed, so to have idea whether it may just be a flaky test.
-#
-# Of course, to be 100% sure about the reason of a test failure, closer look
-# at the failed test for the specific run is necessary.
-#
-import sys
-
-import urllib.request
-
-import datetime
-import json as simplejson
-import logging
-from optparse import OptionParser
-import time
-
-# Configuration
-DEFAULT_JENKINS_URL = "https://builds.apache.org"
-DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
-DEFAULT_NUM_PREVIOUS_DAYS = 14
-DEFAULT_TOP_NUM_FAILED_TEST = -1
-
-SECONDS_PER_DAY = 86400
-
-# total number of runs to examine
-numRunsToExamine = 0
-
-#summary mode
-summary_mode = False
-
-#total number of errors
-error_count = 0
-
-""" Parse arguments """
-def parse_args():
-  parser = OptionParser()
-  parser.add_option("-J", "--jenkins-url", type="string",
-                    dest="jenkins_url", help="Jenkins URL",
-                    default=DEFAULT_JENKINS_URL)
-  parser.add_option("-j", "--job-name", type="string",
-                    dest="job_name", help="Job name to look at",
-                    default=DEFAULT_JOB_NAME)
-  parser.add_option("-n", "--num-days", type="int",
-                    dest="num_prev_days", help="Number of days to examine",
-                    default=DEFAULT_NUM_PREVIOUS_DAYS)
-  parser.add_option("-t", "--top", type="int",
-                    dest="num_failed_tests",
-                    help="Summary Mode, only show top number of failed tests",
-                    default=DEFAULT_TOP_NUM_FAILED_TEST)
-
-  (options, args) = parser.parse_args()
-  if args:
-    parser.error("unexpected arguments: " + repr(args))
-  return options
-
-""" Load data from specified url """
-def load_url_data(url):
-  ourl = urllib.request.urlopen(url)
-  codec = ourl.info().get_param('charset')
-  content = ourl.read().decode(codec)
-  data = simplejson.loads(content, strict=False)
-  return data
-
-""" List all builds of the target project. """
-def list_builds(jenkins_url, job_name):
-  global summary_mode
-  global error_count
-  url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
-      jenkins=jenkins_url,
-      job_name=job_name)
-
-  try:
-    data = load_url_data(url)
-
-  except:
-    if not summary_mode:
-      logging.error("Could not fetch: %s" % url)
-    error_count += 1
-    raise
-  return data['builds']
-
-""" Find the names of any tests which failed in the given build output URL. """
-def find_failing_tests(testReportApiJson, jobConsoleOutput):
-  global summary_mode
-  global error_count
-  ret = set()
-  try:
-    data = load_url_data(testReportApiJson)
-
-  except:
-    if not summary_mode:
-      logging.error("    Could not open testReport, check " +
-        jobConsoleOutput + " for why it was reported failed")
-    error_count += 1
-    return ret
-
-  for suite in data['suites']:
-    for cs in suite['cases']:
-      status = cs['status']
-      errDetails = cs['errorDetails']
-      if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
-        ret.add(cs['className'] + "." + cs['name'])
-
-  if len(ret) == 0 and (not summary_mode):
-    logging.info("    No failed tests in testReport, check " +
-        jobConsoleOutput + " for why it was reported failed.")
-  return ret
-
-""" Iterate runs of specfied job within num_prev_days and collect results """
-def find_flaky_tests(jenkins_url, job_name, num_prev_days):
-  global numRunsToExamine
-  global summary_mode
-  all_failing = dict()
-  # First list all builds
-  builds = list_builds(jenkins_url, job_name)
-
-  # Select only those in the last N days
-  min_time = time.time() - SECONDS_PER_DAY * num_prev_days
-  builds = [b for b in builds if (float(b['timestamp']) / 1000) > min_time]
-
-  # Filter out only those that failed
-  failing_build_urls = [(b['url'] , b['timestamp']) for b in builds
-      if (b['result'] in ('UNSTABLE', 'FAILURE'))]
-
-  tnum = len(builds)
-  num = len(failing_build_urls)
-  numRunsToExamine = tnum
-  if not summary_mode:
-    logging.info("    THERE ARE " + str(num) + " builds (out of " + str(tnum)
-      + ") that have failed tests in the past " + str(num_prev_days) + " days"
-      + ((".", ", as listed below:\n")[num > 0]))
-
-  for failed_build_with_time in failing_build_urls:
-    failed_build = failed_build_with_time[0];
-    jobConsoleOutput = failed_build + "Console";
-    testReport = failed_build + "testReport";
-    testReportApiJson = testReport + "/api/json";
-
-    ts = float(failed_build_with_time[1]) / 1000.
-    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
-    if not summary_mode:
-      logging.info("===>%s" % str(testReport) + " (" + st + ")")
-    failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
-    if failing:
-      for ftest in failing:
-        if not summary_mode:
-          logging.info("    Failed test: %s" % ftest)
-        all_failing[ftest] = all_failing.get(ftest,0)+1
-
-  return all_failing
-
-def main():
-  global numRunsToExamine
-  global summary_mode
-  logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
-
-  # set up logger to write to stdout
-  soh = logging.StreamHandler(sys.stdout)
-  soh.setLevel(logging.INFO)
-  logger = logging.getLogger()
-  logger.removeHandler(logger.handlers[0])
-  logger.addHandler(soh)
-
-  opts = parse_args()
-  logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
-      + "/job/" + opts.job_name + "")
-
-  if opts.num_failed_tests != -1:
-    summary_mode = True
-
-  all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
-      opts.num_prev_days)
-  if len(all_failing) == 0:
-    raise SystemExit(0)
-
-  if summary_mode and opts.num_failed_tests < len(all_failing):
-    logging.info("\nAmong " + str(numRunsToExamine) +
-                 " runs examined, top " + str(opts.num_failed_tests) +
-                 " failed tests <#failedRuns: testName>:")
-  else:
-      logging.info("\nAmong " + str(numRunsToExamine) +
-                   " runs examined, all failed tests <#failedRuns: testName>:")
-
-  # print summary section: all failed tests sorted by how many times they failed
-  line_count = 0
-  for tn in sorted(all_failing, key=all_failing.get, reverse=True):
-    logging.info("    " + str(all_failing[tn])+ ": " + tn)
-    if summary_mode:
-      line_count += 1
-      if line_count == opts.num_failed_tests:
-        break
-
-  if summary_mode and error_count > 0:
-    logging.info("\n" + str(error_count) + " errors found, you may "
-                 + "re-run in non summary mode to see error details.");
-
-if __name__ == "__main__":
-  main()